Index: head/sys/dev/an/if_an.c =================================================================== --- head/sys/dev/an/if_an.c (revision 331796) +++ head/sys/dev/an/if_an.c (revision 331797) @@ -1,3812 +1,3818 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1997, 1998, 1999 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* * Aironet 4500/4800 802.11 PCMCIA/ISA/PCI driver for FreeBSD. * * Written by Bill Paul * Electrical Engineering Department * Columbia University, New York City */ #include __FBSDID("$FreeBSD$"); /* * The Aironet 4500/4800 series cards come in PCMCIA, ISA and PCI form. * This driver supports all three device types (PCI devices are supported * through an extra PCI shim: /sys/dev/an/if_an_pci.c). ISA devices can be * supported either using hard-coded IO port/IRQ settings or via Plug * and Play. The 4500 series devices support 1Mbps and 2Mbps data rates. * The 4800 devices support 1, 2, 5.5 and 11Mbps rates. * * Like the WaveLAN/IEEE cards, the Aironet NICs are all essentially * PCMCIA devices. The ISA and PCI cards are a combination of a PCMCIA * device and a PCMCIA to ISA or PCMCIA to PCI adapter card. There are * a couple of important differences though: * * - Lucent ISA card looks to the host like a PCMCIA controller with * a PCMCIA WaveLAN card inserted. This means that even desktop * machines need to be configured with PCMCIA support in order to * use WaveLAN/IEEE ISA cards. The Aironet cards on the other hand * actually look like normal ISA and PCI devices to the host, so * no PCMCIA controller support is needed * * The latter point results in a small gotcha. The Aironet PCMCIA * cards can be configured for one of two operating modes depending * on how the Vpp1 and Vpp2 programming voltages are set when the * card is activated. In order to put the card in proper PCMCIA * operation (where the CIS table is visible and the interface is * programmed for PCMCIA operation), both Vpp1 and Vpp2 have to be * set to 5 volts. FreeBSD by default doesn't set the Vpp voltages, * which leaves the card in ISA/PCI mode, which prevents it from * being activated as an PCMCIA device. * * Note that some PCMCIA controller software packages for Windows NT * fail to set the voltages as well. * * The Aironet devices can operate in both station mode and access point * mode. Typically, when programmed for station mode, the card can be set * to automatically perform encapsulation/decapsulation of Ethernet II * and 802.3 frames within 802.11 frames so that the host doesn't have * to do it itself. This driver doesn't program the card that way: the * driver handles all of the encapsulation/decapsulation itself. */ #include "opt_inet.h" #ifdef INET #define ANCACHE /* enable signal strength cache */ #endif #include #include #include #include #include #include #include #include #include #ifdef ANCACHE #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #include #include #endif #include #include #include #include /* These are global because we need them in sys/pci/if_an_p.c. */ static void an_reset(struct an_softc *); static int an_init_mpi350_desc(struct an_softc *); static int an_ioctl(struct ifnet *, u_long, caddr_t); static void an_init(void *); static void an_init_locked(struct an_softc *); static int an_init_tx_ring(struct an_softc *); static void an_start(struct ifnet *); static void an_start_locked(struct ifnet *); static void an_watchdog(struct an_softc *); static void an_rxeof(struct an_softc *); static void an_txeof(struct an_softc *, int); static void an_promisc(struct an_softc *, int); static int an_cmd(struct an_softc *, int, int); static int an_cmd_struct(struct an_softc *, struct an_command *, struct an_reply *); static int an_read_record(struct an_softc *, struct an_ltv_gen *); static int an_write_record(struct an_softc *, struct an_ltv_gen *); static int an_read_data(struct an_softc *, int, int, caddr_t, int); static int an_write_data(struct an_softc *, int, int, caddr_t, int); static int an_seek(struct an_softc *, int, int, int); static int an_alloc_nicmem(struct an_softc *, int, int *); static int an_dma_malloc(struct an_softc *, bus_size_t, struct an_dma_alloc *, int); static void an_dma_free(struct an_softc *, struct an_dma_alloc *); static void an_dma_malloc_cb(void *, bus_dma_segment_t *, int, int); static void an_stats_update(void *); static void an_setdef(struct an_softc *, struct an_req *); #ifdef ANCACHE static void an_cache_store(struct an_softc *, struct ether_header *, struct mbuf *, u_int8_t, u_int8_t); #endif /* function definitions for use with the Cisco's Linux configuration utilities */ static int readrids(struct ifnet*, struct aironet_ioctl*); static int writerids(struct ifnet*, struct aironet_ioctl*); static int flashcard(struct ifnet*, struct aironet_ioctl*); static int cmdreset(struct ifnet *); static int setflashmode(struct ifnet *); static int flashgchar(struct ifnet *,int,int); static int flashpchar(struct ifnet *,int,int); static int flashputbuf(struct ifnet *); static int flashrestart(struct ifnet *); static int WaitBusy(struct ifnet *, int); static int unstickbusy(struct ifnet *); static void an_dump_record (struct an_softc *,struct an_ltv_gen *, char *); static int an_media_change (struct ifnet *); static void an_media_status (struct ifnet *, struct ifmediareq *); static int an_dump = 0; static int an_cache_mode = 0; #define DBM 0 #define PERCENT 1 #define RAW 2 static char an_conf[256]; static char an_conf_cache[256]; /* sysctl vars */ static SYSCTL_NODE(_hw, OID_AUTO, an, CTLFLAG_RD, 0, "Wireless driver parameters"); /* XXX violate ethernet/netgraph callback hooks */ extern void (*ng_ether_attach_p)(struct ifnet *ifp); extern void (*ng_ether_detach_p)(struct ifnet *ifp); static int sysctl_an_dump(SYSCTL_HANDLER_ARGS) { int error, r, last; char *s = an_conf; last = an_dump; switch (an_dump) { case 0: strcpy(an_conf, "off"); break; case 1: strcpy(an_conf, "type"); break; case 2: strcpy(an_conf, "dump"); break; default: snprintf(an_conf, 5, "%x", an_dump); break; } error = sysctl_handle_string(oidp, an_conf, sizeof(an_conf), req); if (strncmp(an_conf,"off", 3) == 0) { an_dump = 0; } if (strncmp(an_conf,"dump", 4) == 0) { an_dump = 1; } if (strncmp(an_conf,"type", 4) == 0) { an_dump = 2; } if (*s == 'f') { r = 0; for (;;s++) { if ((*s >= '0') && (*s <= '9')) { r = r * 16 + (*s - '0'); } else if ((*s >= 'a') && (*s <= 'f')) { r = r * 16 + (*s - 'a' + 10); } else { break; } } an_dump = r; } if (an_dump != last) printf("Sysctl changed for Aironet driver\n"); return error; } SYSCTL_PROC(_hw_an, OID_AUTO, an_dump, CTLTYPE_STRING | CTLFLAG_RW, 0, sizeof(an_conf), sysctl_an_dump, "A", ""); static int sysctl_an_cache_mode(SYSCTL_HANDLER_ARGS) { int error; switch (an_cache_mode) { case 1: strcpy(an_conf_cache, "per"); break; case 2: strcpy(an_conf_cache, "raw"); break; default: strcpy(an_conf_cache, "dbm"); break; } error = sysctl_handle_string(oidp, an_conf_cache, sizeof(an_conf_cache), req); if (strncmp(an_conf_cache,"dbm", 3) == 0) { an_cache_mode = 0; } if (strncmp(an_conf_cache,"per", 3) == 0) { an_cache_mode = 1; } if (strncmp(an_conf_cache,"raw", 3) == 0) { an_cache_mode = 2; } return error; } SYSCTL_PROC(_hw_an, OID_AUTO, an_cache_mode, CTLTYPE_STRING | CTLFLAG_RW, 0, sizeof(an_conf_cache), sysctl_an_cache_mode, "A", ""); /* * We probe for an Aironet 4500/4800 card by attempting to * read the default SSID list. On reset, the first entry in * the SSID list will contain the name "tsunami." If we don't * find this, then there's no card present. */ int an_probe(device_t dev) { struct an_softc *sc = device_get_softc(dev); struct an_ltv_ssidlist_new ssid; int error; bzero((char *)&ssid, sizeof(ssid)); error = an_alloc_port(dev, 0, AN_IOSIZ); if (error != 0) return (0); /* can't do autoprobing */ if (rman_get_start(sc->port_res) == -1) return(0); /* * We need to fake up a softc structure long enough * to be able to issue commands and call some of the * other routines. */ ssid.an_len = sizeof(ssid); ssid.an_type = AN_RID_SSIDLIST; /* Make sure interrupts are disabled. */ sc->mpi350 = 0; CSR_WRITE_2(sc, AN_INT_EN(sc->mpi350), 0); CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), 0xFFFF); sc->an_dev = dev; mtx_init(&sc->an_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); AN_LOCK(sc); an_reset(sc); if (an_cmd(sc, AN_CMD_READCFG, 0)) { AN_UNLOCK(sc); goto fail; } if (an_read_record(sc, (struct an_ltv_gen *)&ssid)) { AN_UNLOCK(sc); goto fail; } /* See if the ssid matches what we expect ... but doesn't have to */ if (strcmp(ssid.an_entry[0].an_ssid, AN_DEF_SSID)) { AN_UNLOCK(sc); goto fail; } AN_UNLOCK(sc); return(AN_IOSIZ); fail: mtx_destroy(&sc->an_mtx); return(0); } /* * Allocate a port resource with the given resource id. */ int an_alloc_port(device_t dev, int rid, int size) { struct an_softc *sc = device_get_softc(dev); struct resource *res; res = bus_alloc_resource_anywhere(dev, SYS_RES_IOPORT, &rid, size, RF_ACTIVE); if (res) { sc->port_rid = rid; sc->port_res = res; return (0); } else { return (ENOENT); } } /* * Allocate a memory resource with the given resource id. */ int an_alloc_memory(device_t dev, int rid, int size) { struct an_softc *sc = device_get_softc(dev); struct resource *res; res = bus_alloc_resource_anywhere(dev, SYS_RES_MEMORY, &rid, size, RF_ACTIVE); if (res) { sc->mem_rid = rid; sc->mem_res = res; sc->mem_used = size; return (0); } else { return (ENOENT); } } /* * Allocate a auxiliary memory resource with the given resource id. */ int an_alloc_aux_memory(device_t dev, int rid, int size) { struct an_softc *sc = device_get_softc(dev); struct resource *res; res = bus_alloc_resource_anywhere(dev, SYS_RES_MEMORY, &rid, size, RF_ACTIVE); if (res) { sc->mem_aux_rid = rid; sc->mem_aux_res = res; sc->mem_aux_used = size; return (0); } else { return (ENOENT); } } /* * Allocate an irq resource with the given resource id. */ int an_alloc_irq(device_t dev, int rid, int flags) { struct an_softc *sc = device_get_softc(dev); struct resource *res; res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, (RF_ACTIVE | flags)); if (res) { sc->irq_rid = rid; sc->irq_res = res; return (0); } else { return (ENOENT); } } static void an_dma_malloc_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *paddr = (bus_addr_t*) arg; *paddr = segs->ds_addr; } /* * Alloc DMA memory and set the pointer to it */ static int an_dma_malloc(struct an_softc *sc, bus_size_t size, struct an_dma_alloc *dma, int mapflags) { int r; r = bus_dmamem_alloc(sc->an_dtag, (void**) &dma->an_dma_vaddr, BUS_DMA_NOWAIT, &dma->an_dma_map); if (r != 0) goto fail_1; r = bus_dmamap_load(sc->an_dtag, dma->an_dma_map, dma->an_dma_vaddr, size, an_dma_malloc_cb, &dma->an_dma_paddr, mapflags | BUS_DMA_NOWAIT); if (r != 0) goto fail_2; dma->an_dma_size = size; return (0); fail_2: bus_dmamap_unload(sc->an_dtag, dma->an_dma_map); fail_1: bus_dmamem_free(sc->an_dtag, dma->an_dma_vaddr, dma->an_dma_map); return (r); } static void an_dma_free(struct an_softc *sc, struct an_dma_alloc *dma) { bus_dmamap_unload(sc->an_dtag, dma->an_dma_map); bus_dmamem_free(sc->an_dtag, dma->an_dma_vaddr, dma->an_dma_map); dma->an_dma_vaddr = 0; } /* * Release all resources */ void an_release_resources(device_t dev) { struct an_softc *sc = device_get_softc(dev); int i; if (sc->port_res) { bus_release_resource(dev, SYS_RES_IOPORT, sc->port_rid, sc->port_res); sc->port_res = 0; } if (sc->mem_res) { bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid, sc->mem_res); sc->mem_res = 0; } if (sc->mem_aux_res) { bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_aux_rid, sc->mem_aux_res); sc->mem_aux_res = 0; } if (sc->irq_res) { bus_release_resource(dev, SYS_RES_IRQ, sc->irq_rid, sc->irq_res); sc->irq_res = 0; } if (sc->an_rid_buffer.an_dma_paddr) { an_dma_free(sc, &sc->an_rid_buffer); } for (i = 0; i < AN_MAX_RX_DESC; i++) if (sc->an_rx_buffer[i].an_dma_paddr) { an_dma_free(sc, &sc->an_rx_buffer[i]); } for (i = 0; i < AN_MAX_TX_DESC; i++) if (sc->an_tx_buffer[i].an_dma_paddr) { an_dma_free(sc, &sc->an_tx_buffer[i]); } if (sc->an_dtag) { bus_dma_tag_destroy(sc->an_dtag); } } int an_init_mpi350_desc(struct an_softc *sc) { struct an_command cmd_struct; struct an_reply reply; struct an_card_rid_desc an_rid_desc; struct an_card_rx_desc an_rx_desc; struct an_card_tx_desc an_tx_desc; int i, desc; AN_LOCK_ASSERT(sc); if(!sc->an_rid_buffer.an_dma_paddr) an_dma_malloc(sc, AN_RID_BUFFER_SIZE, &sc->an_rid_buffer, 0); for (i = 0; i < AN_MAX_RX_DESC; i++) if(!sc->an_rx_buffer[i].an_dma_paddr) an_dma_malloc(sc, AN_RX_BUFFER_SIZE, &sc->an_rx_buffer[i], 0); for (i = 0; i < AN_MAX_TX_DESC; i++) if(!sc->an_tx_buffer[i].an_dma_paddr) an_dma_malloc(sc, AN_TX_BUFFER_SIZE, &sc->an_tx_buffer[i], 0); /* * Allocate RX descriptor */ bzero(&reply,sizeof(reply)); cmd_struct.an_cmd = AN_CMD_ALLOC_DESC; cmd_struct.an_parm0 = AN_DESCRIPTOR_RX; cmd_struct.an_parm1 = AN_RX_DESC_OFFSET; cmd_struct.an_parm2 = AN_MAX_RX_DESC; if (an_cmd_struct(sc, &cmd_struct, &reply)) { if_printf(sc->an_ifp, "failed to allocate RX descriptor\n"); return(EIO); } for (desc = 0; desc < AN_MAX_RX_DESC; desc++) { bzero(&an_rx_desc, sizeof(an_rx_desc)); an_rx_desc.an_valid = 1; an_rx_desc.an_len = AN_RX_BUFFER_SIZE; an_rx_desc.an_done = 0; an_rx_desc.an_phys = sc->an_rx_buffer[desc].an_dma_paddr; for (i = 0; i < sizeof(an_rx_desc) / 4; i++) CSR_MEM_AUX_WRITE_4(sc, AN_RX_DESC_OFFSET + (desc * sizeof(an_rx_desc)) + (i * 4), ((u_int32_t *)(void *)&an_rx_desc)[i]); } /* * Allocate TX descriptor */ bzero(&reply,sizeof(reply)); cmd_struct.an_cmd = AN_CMD_ALLOC_DESC; cmd_struct.an_parm0 = AN_DESCRIPTOR_TX; cmd_struct.an_parm1 = AN_TX_DESC_OFFSET; cmd_struct.an_parm2 = AN_MAX_TX_DESC; if (an_cmd_struct(sc, &cmd_struct, &reply)) { if_printf(sc->an_ifp, "failed to allocate TX descriptor\n"); return(EIO); } for (desc = 0; desc < AN_MAX_TX_DESC; desc++) { bzero(&an_tx_desc, sizeof(an_tx_desc)); an_tx_desc.an_offset = 0; an_tx_desc.an_eoc = 0; an_tx_desc.an_valid = 0; an_tx_desc.an_len = 0; an_tx_desc.an_phys = sc->an_tx_buffer[desc].an_dma_paddr; for (i = 0; i < sizeof(an_tx_desc) / 4; i++) CSR_MEM_AUX_WRITE_4(sc, AN_TX_DESC_OFFSET + (desc * sizeof(an_tx_desc)) + (i * 4), ((u_int32_t *)(void *)&an_tx_desc)[i]); } /* * Allocate RID descriptor */ bzero(&reply,sizeof(reply)); cmd_struct.an_cmd = AN_CMD_ALLOC_DESC; cmd_struct.an_parm0 = AN_DESCRIPTOR_HOSTRW; cmd_struct.an_parm1 = AN_HOST_DESC_OFFSET; cmd_struct.an_parm2 = 1; if (an_cmd_struct(sc, &cmd_struct, &reply)) { if_printf(sc->an_ifp, "failed to allocate host descriptor\n"); return(EIO); } bzero(&an_rid_desc, sizeof(an_rid_desc)); an_rid_desc.an_valid = 1; an_rid_desc.an_len = AN_RID_BUFFER_SIZE; an_rid_desc.an_rid = 0; an_rid_desc.an_phys = sc->an_rid_buffer.an_dma_paddr; for (i = 0; i < sizeof(an_rid_desc) / 4; i++) CSR_MEM_AUX_WRITE_4(sc, AN_HOST_DESC_OFFSET + i * 4, ((u_int32_t *)(void *)&an_rid_desc)[i]); return(0); } int an_attach(struct an_softc *sc, int flags) { struct ifnet *ifp; int error = EIO; int i, nrate, mword; u_int8_t r; ifp = sc->an_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(sc->an_dev, "can not if_alloc()\n"); goto fail; } ifp->if_softc = sc; if_initname(ifp, device_get_name(sc->an_dev), device_get_unit(sc->an_dev)); sc->an_gone = 0; sc->an_associated = 0; sc->an_monitor = 0; sc->an_was_monitor = 0; sc->an_flash_buffer = NULL; /* Reset the NIC. */ AN_LOCK(sc); an_reset(sc); if (sc->mpi350) { error = an_init_mpi350_desc(sc); if (error) goto fail; } /* Load factory config */ if (an_cmd(sc, AN_CMD_READCFG, 0)) { device_printf(sc->an_dev, "failed to load config data\n"); goto fail; } /* Read the current configuration */ sc->an_config.an_type = AN_RID_GENCONFIG; sc->an_config.an_len = sizeof(struct an_ltv_genconfig); if (an_read_record(sc, (struct an_ltv_gen *)&sc->an_config)) { device_printf(sc->an_dev, "read record failed\n"); goto fail; } /* Read the card capabilities */ sc->an_caps.an_type = AN_RID_CAPABILITIES; sc->an_caps.an_len = sizeof(struct an_ltv_caps); if (an_read_record(sc, (struct an_ltv_gen *)&sc->an_caps)) { device_printf(sc->an_dev, "read record failed\n"); goto fail; } /* Read ssid list */ sc->an_ssidlist.an_type = AN_RID_SSIDLIST; sc->an_ssidlist.an_len = sizeof(struct an_ltv_ssidlist_new); if (an_read_record(sc, (struct an_ltv_gen *)&sc->an_ssidlist)) { device_printf(sc->an_dev, "read record failed\n"); goto fail; } /* Read AP list */ sc->an_aplist.an_type = AN_RID_APLIST; sc->an_aplist.an_len = sizeof(struct an_ltv_aplist); if (an_read_record(sc, (struct an_ltv_gen *)&sc->an_aplist)) { device_printf(sc->an_dev, "read record failed\n"); goto fail; } #ifdef ANCACHE /* Read the RSSI <-> dBm map */ sc->an_have_rssimap = 0; if (sc->an_caps.an_softcaps & 8) { sc->an_rssimap.an_type = AN_RID_RSSI_MAP; sc->an_rssimap.an_len = sizeof(struct an_ltv_rssi_map); if (an_read_record(sc, (struct an_ltv_gen *)&sc->an_rssimap)) { device_printf(sc->an_dev, "unable to get RSSI <-> dBM map\n"); } else { device_printf(sc->an_dev, "got RSSI <-> dBM map\n"); sc->an_have_rssimap = 1; } } else { device_printf(sc->an_dev, "no RSSI <-> dBM map\n"); } #endif AN_UNLOCK(sc); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = an_ioctl; ifp->if_start = an_start; ifp->if_init = an_init; ifp->if_baudrate = 10000000; IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); bzero(sc->an_config.an_nodename, sizeof(sc->an_config.an_nodename)); bcopy(AN_DEFAULT_NODENAME, sc->an_config.an_nodename, sizeof(AN_DEFAULT_NODENAME) - 1); bzero(sc->an_ssidlist.an_entry[0].an_ssid, sizeof(sc->an_ssidlist.an_entry[0].an_ssid)); bcopy(AN_DEFAULT_NETNAME, sc->an_ssidlist.an_entry[0].an_ssid, sizeof(AN_DEFAULT_NETNAME) - 1); sc->an_ssidlist.an_entry[0].an_len = strlen(AN_DEFAULT_NETNAME); sc->an_config.an_opmode = AN_OPMODE_INFRASTRUCTURE_STATION; sc->an_tx_rate = 0; bzero((char *)&sc->an_stats, sizeof(sc->an_stats)); nrate = 8; ifmedia_init(&sc->an_ifmedia, 0, an_media_change, an_media_status); if_printf(ifp, "supported rates: "); #define ADD(s, o) ifmedia_add(&sc->an_ifmedia, \ IFM_MAKEWORD(IFM_IEEE80211, (s), (o), 0), 0, NULL) ADD(IFM_AUTO, 0); ADD(IFM_AUTO, IFM_IEEE80211_ADHOC); for (i = 0; i < nrate; i++) { r = sc->an_caps.an_rates[i]; mword = ieee80211_rate2media(NULL, r, IEEE80211_MODE_AUTO); if (mword == 0) continue; printf("%s%d%sMbps", (i != 0 ? " " : ""), (r & IEEE80211_RATE_VAL) / 2, ((r & 0x1) != 0 ? ".5" : "")); ADD(mword, 0); ADD(mword, IFM_IEEE80211_ADHOC); } printf("\n"); ifmedia_set(&sc->an_ifmedia, IFM_MAKEWORD(IFM_IEEE80211, IFM_AUTO, 0, 0)); #undef ADD /* * Call MI attach routine. */ ether_ifattach(ifp, sc->an_caps.an_oemaddr); callout_init_mtx(&sc->an_stat_ch, &sc->an_mtx, 0); return(0); fail: AN_UNLOCK(sc); mtx_destroy(&sc->an_mtx); if (ifp != NULL) if_free(ifp); return(error); } int an_detach(device_t dev) { struct an_softc *sc = device_get_softc(dev); struct ifnet *ifp = sc->an_ifp; if (sc->an_gone) { device_printf(dev,"already unloaded\n"); return(0); } AN_LOCK(sc); an_stop(sc); sc->an_gone = 1; ifmedia_removeall(&sc->an_ifmedia); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; AN_UNLOCK(sc); ether_ifdetach(ifp); bus_teardown_intr(dev, sc->irq_res, sc->irq_handle); callout_drain(&sc->an_stat_ch); if_free(ifp); an_release_resources(dev); mtx_destroy(&sc->an_mtx); return (0); } static void an_rxeof(struct an_softc *sc) { struct ifnet *ifp; struct ether_header *eh; struct ieee80211_frame *ih; struct an_rxframe rx_frame; struct an_rxframe_802_3 rx_frame_802_3; struct mbuf *m; int len, id, error = 0, i, count = 0; int ieee80211_header_len; u_char *bpf_buf; u_short fc1; struct an_card_rx_desc an_rx_desc; u_int8_t *buf; AN_LOCK_ASSERT(sc); ifp = sc->an_ifp; if (!sc->mpi350) { id = CSR_READ_2(sc, AN_RX_FID); if (sc->an_monitor && (ifp->if_flags & IFF_PROMISC)) { /* read raw 802.11 packet */ bpf_buf = sc->buf_802_11; /* read header */ if (an_read_data(sc, id, 0x0, (caddr_t)&rx_frame, sizeof(rx_frame))) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } /* * skip beacon by default since this increases the * system load a lot */ if (!(sc->an_monitor & AN_MONITOR_INCLUDE_BEACON) && (rx_frame.an_frame_ctl & IEEE80211_FC0_SUBTYPE_BEACON)) { return; } if (sc->an_monitor & AN_MONITOR_AIRONET_HEADER) { len = rx_frame.an_rx_payload_len + sizeof(rx_frame); /* Check for insane frame length */ if (len > sizeof(sc->buf_802_11)) { if_printf(ifp, "oversized packet " "received (%d, %d)\n", len, MCLBYTES); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } bcopy((char *)&rx_frame, bpf_buf, sizeof(rx_frame)); error = an_read_data(sc, id, sizeof(rx_frame), (caddr_t)bpf_buf+sizeof(rx_frame), rx_frame.an_rx_payload_len); } else { fc1=rx_frame.an_frame_ctl >> 8; ieee80211_header_len = sizeof(struct ieee80211_frame); if ((fc1 & IEEE80211_FC1_DIR_TODS) && (fc1 & IEEE80211_FC1_DIR_FROMDS)) { ieee80211_header_len += ETHER_ADDR_LEN; } len = rx_frame.an_rx_payload_len + ieee80211_header_len; /* Check for insane frame length */ if (len > sizeof(sc->buf_802_11)) { if_printf(ifp, "oversized packet " "received (%d, %d)\n", len, MCLBYTES); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } ih = (struct ieee80211_frame *)bpf_buf; bcopy((char *)&rx_frame.an_frame_ctl, (char *)ih, ieee80211_header_len); error = an_read_data(sc, id, sizeof(rx_frame) + rx_frame.an_gaplen, (caddr_t)ih +ieee80211_header_len, rx_frame.an_rx_payload_len); } /* dump raw 802.11 packet to bpf and skip ip stack */ BPF_TAP(ifp, bpf_buf, len); } else { MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } if (!(MCLGET(m, M_NOWAIT))) { m_freem(m); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } m->m_pkthdr.rcvif = ifp; /* Read Ethernet encapsulated packet */ #ifdef ANCACHE /* Read NIC frame header */ if (an_read_data(sc, id, 0, (caddr_t)&rx_frame, sizeof(rx_frame))) { m_freem(m); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } #endif /* Read in the 802_3 frame header */ if (an_read_data(sc, id, 0x34, (caddr_t)&rx_frame_802_3, sizeof(rx_frame_802_3))) { m_freem(m); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } if (rx_frame_802_3.an_rx_802_3_status != 0) { m_freem(m); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } /* Check for insane frame length */ len = rx_frame_802_3.an_rx_802_3_payload_len; if (len > sizeof(sc->buf_802_11)) { m_freem(m); if_printf(ifp, "oversized packet " "received (%d, %d)\n", len, MCLBYTES); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } m->m_pkthdr.len = m->m_len = rx_frame_802_3.an_rx_802_3_payload_len + 12; eh = mtod(m, struct ether_header *); bcopy((char *)&rx_frame_802_3.an_rx_dst_addr, (char *)&eh->ether_dhost, ETHER_ADDR_LEN); bcopy((char *)&rx_frame_802_3.an_rx_src_addr, (char *)&eh->ether_shost, ETHER_ADDR_LEN); /* in mbuf header type is just before payload */ error = an_read_data(sc, id, 0x44, (caddr_t)&(eh->ether_type), rx_frame_802_3.an_rx_802_3_payload_len); if (error) { m_freem(m); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); /* Receive packet. */ #ifdef ANCACHE an_cache_store(sc, eh, m, rx_frame.an_rx_signal_strength, rx_frame.an_rsvd0); #endif AN_UNLOCK(sc); (*ifp->if_input)(ifp, m); AN_LOCK(sc); } } else { /* MPI-350 */ for (count = 0; count < AN_MAX_RX_DESC; count++){ for (i = 0; i < sizeof(an_rx_desc) / 4; i++) ((u_int32_t *)(void *)&an_rx_desc)[i] = CSR_MEM_AUX_READ_4(sc, AN_RX_DESC_OFFSET + (count * sizeof(an_rx_desc)) + (i * 4)); if (an_rx_desc.an_done && !an_rx_desc.an_valid) { buf = sc->an_rx_buffer[count].an_dma_vaddr; MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } if (!(MCLGET(m, M_NOWAIT))) { m_freem(m); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } m->m_pkthdr.rcvif = ifp; /* Read Ethernet encapsulated packet */ /* * No ANCACHE support since we just get back * an Ethernet packet no 802.11 info */ #if 0 #ifdef ANCACHE /* Read NIC frame header */ bcopy(buf, (caddr_t)&rx_frame, sizeof(rx_frame)); #endif #endif /* Check for insane frame length */ len = an_rx_desc.an_len + 12; if (len > MCLBYTES) { m_freem(m); if_printf(ifp, "oversized packet " "received (%d, %d)\n", len, MCLBYTES); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } m->m_pkthdr.len = m->m_len = an_rx_desc.an_len + 12; eh = mtod(m, struct ether_header *); bcopy(buf, (char *)eh, m->m_pkthdr.len); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); /* Receive packet. */ #if 0 #ifdef ANCACHE an_cache_store(sc, eh, m, rx_frame.an_rx_signal_strength, rx_frame.an_rsvd0); #endif #endif AN_UNLOCK(sc); (*ifp->if_input)(ifp, m); AN_LOCK(sc); an_rx_desc.an_valid = 1; an_rx_desc.an_len = AN_RX_BUFFER_SIZE; an_rx_desc.an_done = 0; an_rx_desc.an_phys = sc->an_rx_buffer[count].an_dma_paddr; for (i = 0; i < sizeof(an_rx_desc) / 4; i++) CSR_MEM_AUX_WRITE_4(sc, AN_RX_DESC_OFFSET + (count * sizeof(an_rx_desc)) + (i * 4), ((u_int32_t *)(void *)&an_rx_desc)[i]); } else { if_printf(ifp, "Didn't get valid RX packet " "%x %x %d\n", an_rx_desc.an_done, an_rx_desc.an_valid, an_rx_desc.an_len); } } } } static void an_txeof(struct an_softc *sc, int status) { struct ifnet *ifp; int id, i; AN_LOCK_ASSERT(sc); ifp = sc->an_ifp; sc->an_timer = 0; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if (!sc->mpi350) { id = CSR_READ_2(sc, AN_TX_CMP_FID(sc->mpi350)); if (status & AN_EV_TX_EXC) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } else if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); for (i = 0; i < AN_TX_RING_CNT; i++) { if (id == sc->an_rdata.an_tx_ring[i]) { sc->an_rdata.an_tx_ring[i] = 0; break; } } AN_INC(sc->an_rdata.an_tx_cons, AN_TX_RING_CNT); } else { /* MPI 350 */ id = CSR_READ_2(sc, AN_TX_CMP_FID(sc->mpi350)); if (!sc->an_rdata.an_tx_empty){ if (status & AN_EV_TX_EXC) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } else if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); AN_INC(sc->an_rdata.an_tx_cons, AN_MAX_TX_DESC); if (sc->an_rdata.an_tx_prod == sc->an_rdata.an_tx_cons) sc->an_rdata.an_tx_empty = 1; } } return; } /* * We abuse the stats updater to check the current NIC status. This * is important because we don't want to allow transmissions until * the NIC has synchronized to the current cell (either as the master * in an ad-hoc group, or as a station connected to an access point). * * Note that this function will be called via callout(9) with a lock held. */ static void an_stats_update(void *xsc) { struct an_softc *sc; struct ifnet *ifp; sc = xsc; AN_LOCK_ASSERT(sc); ifp = sc->an_ifp; if (sc->an_timer > 0 && --sc->an_timer == 0) an_watchdog(sc); sc->an_status.an_type = AN_RID_STATUS; sc->an_status.an_len = sizeof(struct an_ltv_status); if (an_read_record(sc, (struct an_ltv_gen *)&sc->an_status)) return; if (sc->an_status.an_opmode & AN_STATUS_OPMODE_IN_SYNC) sc->an_associated = 1; else sc->an_associated = 0; /* Don't do this while we're transmitting */ if (ifp->if_drv_flags & IFF_DRV_OACTIVE) { callout_reset(&sc->an_stat_ch, hz, an_stats_update, sc); return; } sc->an_stats.an_len = sizeof(struct an_ltv_stats); sc->an_stats.an_type = AN_RID_32BITS_CUM; if (an_read_record(sc, (struct an_ltv_gen *)&sc->an_stats.an_len)) return; callout_reset(&sc->an_stat_ch, hz, an_stats_update, sc); return; } void an_intr(void *xsc) { struct an_softc *sc; struct ifnet *ifp; u_int16_t status; sc = (struct an_softc*)xsc; AN_LOCK(sc); if (sc->an_gone) { AN_UNLOCK(sc); return; } ifp = sc->an_ifp; /* Disable interrupts. */ CSR_WRITE_2(sc, AN_INT_EN(sc->mpi350), 0); status = CSR_READ_2(sc, AN_EVENT_STAT(sc->mpi350)); CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), ~AN_INTRS(sc->mpi350)); if (status & AN_EV_MIC) { CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_MIC); } if (status & AN_EV_LINKSTAT) { if (CSR_READ_2(sc, AN_LINKSTAT(sc->mpi350)) == AN_LINKSTAT_ASSOCIATED) sc->an_associated = 1; else sc->an_associated = 0; CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_LINKSTAT); } if (status & AN_EV_RX) { an_rxeof(sc); CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_RX); } if (sc->mpi350 && status & AN_EV_TX_CPY) { an_txeof(sc, status); CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_TX_CPY); } if (status & AN_EV_TX) { an_txeof(sc, status); CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_TX); } if (status & AN_EV_TX_EXC) { an_txeof(sc, status); CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_TX_EXC); } if (status & AN_EV_ALLOC) CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_ALLOC); /* Re-enable interrupts. */ CSR_WRITE_2(sc, AN_INT_EN(sc->mpi350), AN_INTRS(sc->mpi350)); if ((ifp->if_flags & IFF_UP) && !IFQ_DRV_IS_EMPTY(&ifp->if_snd)) an_start_locked(ifp); AN_UNLOCK(sc); return; } static int an_cmd_struct(struct an_softc *sc, struct an_command *cmd, struct an_reply *reply) { int i; AN_LOCK_ASSERT(sc); for (i = 0; i != AN_TIMEOUT; i++) { if (CSR_READ_2(sc, AN_COMMAND(sc->mpi350)) & AN_CMD_BUSY) { DELAY(1000); } else break; } if( i == AN_TIMEOUT) { printf("BUSY\n"); return(ETIMEDOUT); } CSR_WRITE_2(sc, AN_PARAM0(sc->mpi350), cmd->an_parm0); CSR_WRITE_2(sc, AN_PARAM1(sc->mpi350), cmd->an_parm1); CSR_WRITE_2(sc, AN_PARAM2(sc->mpi350), cmd->an_parm2); CSR_WRITE_2(sc, AN_COMMAND(sc->mpi350), cmd->an_cmd); for (i = 0; i < AN_TIMEOUT; i++) { if (CSR_READ_2(sc, AN_EVENT_STAT(sc->mpi350)) & AN_EV_CMD) break; DELAY(1000); } reply->an_resp0 = CSR_READ_2(sc, AN_RESP0(sc->mpi350)); reply->an_resp1 = CSR_READ_2(sc, AN_RESP1(sc->mpi350)); reply->an_resp2 = CSR_READ_2(sc, AN_RESP2(sc->mpi350)); reply->an_status = CSR_READ_2(sc, AN_STATUS(sc->mpi350)); if (CSR_READ_2(sc, AN_COMMAND(sc->mpi350)) & AN_CMD_BUSY) CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_CLR_STUCK_BUSY); /* Ack the command */ CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_CMD); if (i == AN_TIMEOUT) return(ETIMEDOUT); return(0); } static int an_cmd(struct an_softc *sc, int cmd, int val) { int i, s = 0; AN_LOCK_ASSERT(sc); CSR_WRITE_2(sc, AN_PARAM0(sc->mpi350), val); CSR_WRITE_2(sc, AN_PARAM1(sc->mpi350), 0); CSR_WRITE_2(sc, AN_PARAM2(sc->mpi350), 0); CSR_WRITE_2(sc, AN_COMMAND(sc->mpi350), cmd); for (i = 0; i < AN_TIMEOUT; i++) { if (CSR_READ_2(sc, AN_EVENT_STAT(sc->mpi350)) & AN_EV_CMD) break; else { if (CSR_READ_2(sc, AN_COMMAND(sc->mpi350)) == cmd) CSR_WRITE_2(sc, AN_COMMAND(sc->mpi350), cmd); } } for (i = 0; i < AN_TIMEOUT; i++) { CSR_READ_2(sc, AN_RESP0(sc->mpi350)); CSR_READ_2(sc, AN_RESP1(sc->mpi350)); CSR_READ_2(sc, AN_RESP2(sc->mpi350)); s = CSR_READ_2(sc, AN_STATUS(sc->mpi350)); if ((s & AN_STAT_CMD_CODE) == (cmd & AN_STAT_CMD_CODE)) break; } /* Ack the command */ CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_CMD); if (CSR_READ_2(sc, AN_COMMAND(sc->mpi350)) & AN_CMD_BUSY) CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_CLR_STUCK_BUSY); if (i == AN_TIMEOUT) return(ETIMEDOUT); return(0); } /* * This reset sequence may look a little strange, but this is the * most reliable method I've found to really kick the NIC in the * head and force it to reboot correctly. */ static void an_reset(struct an_softc *sc) { if (sc->an_gone) return; AN_LOCK_ASSERT(sc); an_cmd(sc, AN_CMD_ENABLE, 0); an_cmd(sc, AN_CMD_FW_RESTART, 0); an_cmd(sc, AN_CMD_NOOP2, 0); if (an_cmd(sc, AN_CMD_FORCE_SYNCLOSS, 0) == ETIMEDOUT) device_printf(sc->an_dev, "reset failed\n"); an_cmd(sc, AN_CMD_DISABLE, 0); return; } /* * Read an LTV record from the NIC. */ static int an_read_record(struct an_softc *sc, struct an_ltv_gen *ltv) { struct an_ltv_gen *an_ltv; struct an_card_rid_desc an_rid_desc; struct an_command cmd; struct an_reply reply; struct ifnet *ifp; u_int16_t *ptr; u_int8_t *ptr2; int i, len; AN_LOCK_ASSERT(sc); if (ltv->an_len < 4 || ltv->an_type == 0) return(EINVAL); ifp = sc->an_ifp; if (!sc->mpi350){ /* Tell the NIC to enter record read mode. */ if (an_cmd(sc, AN_CMD_ACCESS|AN_ACCESS_READ, ltv->an_type)) { if_printf(ifp, "RID access failed\n"); return(EIO); } /* Seek to the record. */ if (an_seek(sc, ltv->an_type, 0, AN_BAP1)) { if_printf(ifp, "seek to record failed\n"); return(EIO); } /* * Read the length and record type and make sure they * match what we expect (this verifies that we have enough * room to hold all of the returned data). * Length includes type but not length. */ len = CSR_READ_2(sc, AN_DATA1); if (len > (ltv->an_len - 2)) { if_printf(ifp, "record length mismatch -- expected %d, " "got %d for Rid %x\n", ltv->an_len - 2, len, ltv->an_type); len = ltv->an_len - 2; } else { ltv->an_len = len + 2; } /* Now read the data. */ len -= 2; /* skip the type */ ptr = <v->an_val; for (i = len; i > 1; i -= 2) *ptr++ = CSR_READ_2(sc, AN_DATA1); if (i) { ptr2 = (u_int8_t *)ptr; *ptr2 = CSR_READ_1(sc, AN_DATA1); } } else { /* MPI-350 */ if (!sc->an_rid_buffer.an_dma_vaddr) return(EIO); an_rid_desc.an_valid = 1; an_rid_desc.an_len = AN_RID_BUFFER_SIZE; an_rid_desc.an_rid = 0; an_rid_desc.an_phys = sc->an_rid_buffer.an_dma_paddr; bzero(sc->an_rid_buffer.an_dma_vaddr, AN_RID_BUFFER_SIZE); bzero(&cmd, sizeof(cmd)); bzero(&reply, sizeof(reply)); cmd.an_cmd = AN_CMD_ACCESS|AN_ACCESS_READ; cmd.an_parm0 = ltv->an_type; for (i = 0; i < sizeof(an_rid_desc) / 4; i++) CSR_MEM_AUX_WRITE_4(sc, AN_HOST_DESC_OFFSET + i * 4, ((u_int32_t *)(void *)&an_rid_desc)[i]); if (an_cmd_struct(sc, &cmd, &reply) || reply.an_status & AN_CMD_QUAL_MASK) { if_printf(ifp, "failed to read RID %x %x %x %x %x, %d\n", ltv->an_type, reply.an_status, reply.an_resp0, reply.an_resp1, reply.an_resp2, i); return(EIO); } an_ltv = (struct an_ltv_gen *)sc->an_rid_buffer.an_dma_vaddr; if (an_ltv->an_len + 2 < an_rid_desc.an_len) { an_rid_desc.an_len = an_ltv->an_len; } len = an_rid_desc.an_len; if (len > (ltv->an_len - 2)) { if_printf(ifp, "record length mismatch -- expected %d, " "got %d for Rid %x\n", ltv->an_len - 2, len, ltv->an_type); len = ltv->an_len - 2; } else { ltv->an_len = len + 2; } bcopy(&an_ltv->an_type, <v->an_val, len); } if (an_dump) an_dump_record(sc, ltv, "Read"); return(0); } /* * Same as read, except we inject data instead of reading it. */ static int an_write_record(struct an_softc *sc, struct an_ltv_gen *ltv) { struct an_card_rid_desc an_rid_desc; struct an_command cmd; struct an_reply reply; u_int16_t *ptr; u_int8_t *ptr2; int i, len; AN_LOCK_ASSERT(sc); if (an_dump) an_dump_record(sc, ltv, "Write"); if (!sc->mpi350){ if (an_cmd(sc, AN_CMD_ACCESS|AN_ACCESS_READ, ltv->an_type)) return(EIO); if (an_seek(sc, ltv->an_type, 0, AN_BAP1)) return(EIO); /* * Length includes type but not length. */ len = ltv->an_len - 2; CSR_WRITE_2(sc, AN_DATA1, len); len -= 2; /* skip the type */ ptr = <v->an_val; for (i = len; i > 1; i -= 2) CSR_WRITE_2(sc, AN_DATA1, *ptr++); if (i) { ptr2 = (u_int8_t *)ptr; CSR_WRITE_1(sc, AN_DATA0, *ptr2); } if (an_cmd(sc, AN_CMD_ACCESS|AN_ACCESS_WRITE, ltv->an_type)) return(EIO); } else { /* MPI-350 */ for (i = 0; i != AN_TIMEOUT; i++) { if (CSR_READ_2(sc, AN_COMMAND(sc->mpi350)) & AN_CMD_BUSY) { DELAY(10); } else break; } if (i == AN_TIMEOUT) { printf("BUSY\n"); } an_rid_desc.an_valid = 1; an_rid_desc.an_len = ltv->an_len - 2; an_rid_desc.an_rid = ltv->an_type; an_rid_desc.an_phys = sc->an_rid_buffer.an_dma_paddr; bcopy(<v->an_type, sc->an_rid_buffer.an_dma_vaddr, an_rid_desc.an_len); bzero(&cmd,sizeof(cmd)); bzero(&reply,sizeof(reply)); cmd.an_cmd = AN_CMD_ACCESS|AN_ACCESS_WRITE; cmd.an_parm0 = ltv->an_type; for (i = 0; i < sizeof(an_rid_desc) / 4; i++) CSR_MEM_AUX_WRITE_4(sc, AN_HOST_DESC_OFFSET + i * 4, ((u_int32_t *)(void *)&an_rid_desc)[i]); DELAY(100000); if ((i = an_cmd_struct(sc, &cmd, &reply))) { if_printf(sc->an_ifp, "failed to write RID 1 %x %x %x %x %x, %d\n", ltv->an_type, reply.an_status, reply.an_resp0, reply.an_resp1, reply.an_resp2, i); return(EIO); } if (reply.an_status & AN_CMD_QUAL_MASK) { if_printf(sc->an_ifp, "failed to write RID 2 %x %x %x %x %x, %d\n", ltv->an_type, reply.an_status, reply.an_resp0, reply.an_resp1, reply.an_resp2, i); return(EIO); } DELAY(100000); } return(0); } static void an_dump_record(struct an_softc *sc, struct an_ltv_gen *ltv, char *string) { u_int8_t *ptr2; int len; int i; int count = 0; char buf[17], temp; len = ltv->an_len - 4; if_printf(sc->an_ifp, "RID %4x, Length %4d, Mode %s\n", ltv->an_type, ltv->an_len - 4, string); if (an_dump == 1 || (an_dump == ltv->an_type)) { if_printf(sc->an_ifp, "\t"); bzero(buf,sizeof(buf)); ptr2 = (u_int8_t *)<v->an_val; for (i = len; i > 0; i--) { printf("%02x ", *ptr2); temp = *ptr2++; if (isprint(temp)) buf[count] = temp; else buf[count] = '.'; if (++count == 16) { count = 0; printf("%s\n",buf); if_printf(sc->an_ifp, "\t"); bzero(buf,sizeof(buf)); } } for (; count != 16; count++) { printf(" "); } printf(" %s\n",buf); } } static int an_seek(struct an_softc *sc, int id, int off, int chan) { int i; int selreg, offreg; switch (chan) { case AN_BAP0: selreg = AN_SEL0; offreg = AN_OFF0; break; case AN_BAP1: selreg = AN_SEL1; offreg = AN_OFF1; break; default: if_printf(sc->an_ifp, "invalid data path: %x\n", chan); return(EIO); } CSR_WRITE_2(sc, selreg, id); CSR_WRITE_2(sc, offreg, off); for (i = 0; i < AN_TIMEOUT; i++) { if (!(CSR_READ_2(sc, offreg) & (AN_OFF_BUSY|AN_OFF_ERR))) break; } if (i == AN_TIMEOUT) return(ETIMEDOUT); return(0); } static int an_read_data(struct an_softc *sc, int id, int off, caddr_t buf, int len) { int i; u_int16_t *ptr; u_int8_t *ptr2; if (off != -1) { if (an_seek(sc, id, off, AN_BAP1)) return(EIO); } ptr = (u_int16_t *)buf; for (i = len; i > 1; i -= 2) *ptr++ = CSR_READ_2(sc, AN_DATA1); if (i) { ptr2 = (u_int8_t *)ptr; *ptr2 = CSR_READ_1(sc, AN_DATA1); } return(0); } static int an_write_data(struct an_softc *sc, int id, int off, caddr_t buf, int len) { int i; u_int16_t *ptr; u_int8_t *ptr2; if (off != -1) { if (an_seek(sc, id, off, AN_BAP0)) return(EIO); } ptr = (u_int16_t *)buf; for (i = len; i > 1; i -= 2) CSR_WRITE_2(sc, AN_DATA0, *ptr++); if (i) { ptr2 = (u_int8_t *)ptr; CSR_WRITE_1(sc, AN_DATA0, *ptr2); } return(0); } /* * Allocate a region of memory inside the NIC and zero * it out. */ static int an_alloc_nicmem(struct an_softc *sc, int len, int *id) { int i; if (an_cmd(sc, AN_CMD_ALLOC_MEM, len)) { if_printf(sc->an_ifp, "failed to allocate %d bytes on NIC\n", len); return(ENOMEM); } for (i = 0; i < AN_TIMEOUT; i++) { if (CSR_READ_2(sc, AN_EVENT_STAT(sc->mpi350)) & AN_EV_ALLOC) break; } if (i == AN_TIMEOUT) return(ETIMEDOUT); CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_ALLOC); *id = CSR_READ_2(sc, AN_ALLOC_FID); if (an_seek(sc, *id, 0, AN_BAP0)) return(EIO); for (i = 0; i < len / 2; i++) CSR_WRITE_2(sc, AN_DATA0, 0); return(0); } static void an_setdef(struct an_softc *sc, struct an_req *areq) { struct ifnet *ifp; struct an_ltv_genconfig *cfg; struct an_ltv_ssidlist_new *ssid; struct an_ltv_aplist *ap; struct an_ltv_gen *sp; ifp = sc->an_ifp; AN_LOCK_ASSERT(sc); switch (areq->an_type) { case AN_RID_GENCONFIG: cfg = (struct an_ltv_genconfig *)areq; bcopy((char *)&cfg->an_macaddr, IF_LLADDR(sc->an_ifp), ETHER_ADDR_LEN); bcopy((char *)cfg, (char *)&sc->an_config, sizeof(struct an_ltv_genconfig)); break; case AN_RID_SSIDLIST: ssid = (struct an_ltv_ssidlist_new *)areq; bcopy((char *)ssid, (char *)&sc->an_ssidlist, sizeof(struct an_ltv_ssidlist_new)); break; case AN_RID_APLIST: ap = (struct an_ltv_aplist *)areq; bcopy((char *)ap, (char *)&sc->an_aplist, sizeof(struct an_ltv_aplist)); break; case AN_RID_TX_SPEED: sp = (struct an_ltv_gen *)areq; sc->an_tx_rate = sp->an_val; /* Read the current configuration */ sc->an_config.an_type = AN_RID_GENCONFIG; sc->an_config.an_len = sizeof(struct an_ltv_genconfig); an_read_record(sc, (struct an_ltv_gen *)&sc->an_config); cfg = &sc->an_config; /* clear other rates and set the only one we want */ bzero(cfg->an_rates, sizeof(cfg->an_rates)); cfg->an_rates[0] = sc->an_tx_rate; /* Save the new rate */ sc->an_config.an_type = AN_RID_GENCONFIG; sc->an_config.an_len = sizeof(struct an_ltv_genconfig); break; case AN_RID_WEP_TEMP: /* Cache the temp keys */ bcopy(areq, &sc->an_temp_keys[((struct an_ltv_key *)areq)->kindex], sizeof(struct an_ltv_key)); case AN_RID_WEP_PERM: case AN_RID_LEAPUSERNAME: case AN_RID_LEAPPASSWORD: an_init_locked(sc); /* Disable the MAC. */ an_cmd(sc, AN_CMD_DISABLE, 0); /* Write the key */ an_write_record(sc, (struct an_ltv_gen *)areq); /* Turn the MAC back on. */ an_cmd(sc, AN_CMD_ENABLE, 0); break; case AN_RID_MONITOR_MODE: cfg = (struct an_ltv_genconfig *)areq; bpfdetach(ifp); if (ng_ether_detach_p != NULL) (*ng_ether_detach_p) (ifp); sc->an_monitor = cfg->an_len; if (sc->an_monitor & AN_MONITOR) { if (sc->an_monitor & AN_MONITOR_AIRONET_HEADER) { bpfattach(ifp, DLT_AIRONET_HEADER, sizeof(struct ether_header)); } else { bpfattach(ifp, DLT_IEEE802_11, sizeof(struct ether_header)); } } else { bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header)); if (ng_ether_attach_p != NULL) (*ng_ether_attach_p) (ifp); } break; default: if_printf(ifp, "unknown RID: %x\n", areq->an_type); return; } /* Reinitialize the card. */ if (ifp->if_flags) an_init_locked(sc); return; } /* * Derived from Linux driver to enable promiscious mode. */ static void an_promisc(struct an_softc *sc, int promisc) { AN_LOCK_ASSERT(sc); if (sc->an_was_monitor) { an_reset(sc); if (sc->mpi350) an_init_mpi350_desc(sc); } if (sc->an_monitor || sc->an_was_monitor) an_init_locked(sc); sc->an_was_monitor = sc->an_monitor; an_cmd(sc, AN_CMD_SET_MODE, promisc ? 0xffff : 0); return; } static int an_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { int error = 0; int len; int i, max; struct an_softc *sc; struct ifreq *ifr; struct thread *td = curthread; struct ieee80211req *ireq; struct ieee80211_channel ch; u_int8_t tmpstr[IEEE80211_NWID_LEN*2]; u_int8_t *tmpptr; struct an_ltv_genconfig *config; struct an_ltv_key *key; struct an_ltv_status *status; struct an_ltv_ssidlist_new *ssids; int mode; struct aironet_ioctl l_ioctl; sc = ifp->if_softc; ifr = (struct ifreq *)data; ireq = (struct ieee80211req *)data; config = (struct an_ltv_genconfig *)&sc->areq; key = (struct an_ltv_key *)&sc->areq; status = (struct an_ltv_status *)&sc->areq; ssids = (struct an_ltv_ssidlist_new *)&sc->areq; if (sc->an_gone) { error = ENODEV; goto out; } switch (command) { case SIOCSIFFLAGS: AN_LOCK(sc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING && ifp->if_flags & IFF_PROMISC && !(sc->an_if_flags & IFF_PROMISC)) { an_promisc(sc, 1); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && !(ifp->if_flags & IFF_PROMISC) && sc->an_if_flags & IFF_PROMISC) { an_promisc(sc, 0); } else an_init_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) an_stop(sc); } sc->an_if_flags = ifp->if_flags; AN_UNLOCK(sc); error = 0; break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->an_ifmedia, command); break; case SIOCADDMULTI: case SIOCDELMULTI: /* The Aironet has no multicast filter. */ error = 0; break; case SIOCGAIRONET: - error = copyin(ifr->ifr_data, &sc->areq, sizeof(sc->areq)); + error = copyin(ifr_data_get_ptr(ifr), &sc->areq, + sizeof(sc->areq)); if (error != 0) break; AN_LOCK(sc); #ifdef ANCACHE if (sc->areq.an_type == AN_RID_ZERO_CACHE) { error = priv_check(td, PRIV_DRIVER); if (error) break; sc->an_sigitems = sc->an_nextitem = 0; break; } else if (sc->areq.an_type == AN_RID_READ_CACHE) { char *pt = (char *)&sc->areq.an_val; bcopy((char *)&sc->an_sigitems, (char *)pt, sizeof(int)); pt += sizeof(int); sc->areq.an_len = sizeof(int) / 2; bcopy((char *)&sc->an_sigcache, (char *)pt, sizeof(struct an_sigcache) * sc->an_sigitems); sc->areq.an_len += ((sizeof(struct an_sigcache) * sc->an_sigitems) / 2) + 1; } else #endif if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { AN_UNLOCK(sc); error = EINVAL; break; } AN_UNLOCK(sc); - error = copyout(&sc->areq, ifr->ifr_data, sizeof(sc->areq)); + error = copyout(&sc->areq, ifr_data_get_ptr(ifr), + sizeof(sc->areq)); break; case SIOCSAIRONET: if ((error = priv_check(td, PRIV_DRIVER))) goto out; AN_LOCK(sc); - error = copyin(ifr->ifr_data, &sc->areq, sizeof(sc->areq)); + error = copyin(ifr_data_get_ptr(ifr), &sc->areq, + sizeof(sc->areq)); if (error != 0) break; an_setdef(sc, &sc->areq); AN_UNLOCK(sc); break; case SIOCGPRIVATE_0: /* used by Cisco client utility */ if ((error = priv_check(td, PRIV_DRIVER))) goto out; - error = copyin(ifr->ifr_data, &l_ioctl, sizeof(l_ioctl)); + error = copyin(ifr_data_get_ptr(ifr), &l_ioctl, + sizeof(l_ioctl)); if (error) goto out; mode = l_ioctl.command; AN_LOCK(sc); if (mode >= AIROGCAP && mode <= AIROGSTATSD32) { error = readrids(ifp, &l_ioctl); } else if (mode >= AIROPCAP && mode <= AIROPLEAPUSR) { error = writerids(ifp, &l_ioctl); } else if (mode >= AIROFLSHRST && mode <= AIRORESTART) { error = flashcard(ifp, &l_ioctl); } else { error =-1; } AN_UNLOCK(sc); if (!error) { /* copy out the updated command info */ - error = copyout(&l_ioctl, ifr->ifr_data, sizeof(l_ioctl)); + error = copyout(&l_ioctl, ifr_data_get_ptr(ifr), + sizeof(l_ioctl)); } break; case SIOCGPRIVATE_1: /* used by Cisco client utility */ if ((error = priv_check(td, PRIV_DRIVER))) goto out; - error = copyin(ifr->ifr_data, &l_ioctl, sizeof(l_ioctl)); + error = copyin(ifr_data_get_ptr(ifr), &l_ioctl, + sizeof(l_ioctl)); if (error) goto out; l_ioctl.command = 0; error = AIROMAGIC; (void) copyout(&error, l_ioctl.data, sizeof(error)); error = 0; break; case SIOCG80211: sc->areq.an_len = sizeof(sc->areq); /* was that a good idea DJA we are doing a short-cut */ switch (ireq->i_type) { case IEEE80211_IOC_SSID: AN_LOCK(sc); if (ireq->i_val == -1) { sc->areq.an_type = AN_RID_STATUS; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } len = status->an_ssidlen; tmpptr = status->an_ssid; } else if (ireq->i_val >= 0) { sc->areq.an_type = AN_RID_SSIDLIST; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } max = (sc->areq.an_len - 4) / sizeof(struct an_ltv_ssid_entry); if ( max > MAX_SSIDS ) { printf("To many SSIDs only using " "%d of %d\n", MAX_SSIDS, max); max = MAX_SSIDS; } if (ireq->i_val > max) { error = EINVAL; AN_UNLOCK(sc); break; } else { len = ssids->an_entry[ireq->i_val].an_len; tmpptr = ssids->an_entry[ireq->i_val].an_ssid; } } else { error = EINVAL; AN_UNLOCK(sc); break; } if (len > IEEE80211_NWID_LEN) { error = EINVAL; AN_UNLOCK(sc); break; } AN_UNLOCK(sc); ireq->i_len = len; bzero(tmpstr, IEEE80211_NWID_LEN); bcopy(tmpptr, tmpstr, len); error = copyout(tmpstr, ireq->i_data, IEEE80211_NWID_LEN); break; case IEEE80211_IOC_NUMSSIDS: AN_LOCK(sc); sc->areq.an_len = sizeof(sc->areq); sc->areq.an_type = AN_RID_SSIDLIST; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { AN_UNLOCK(sc); error = EINVAL; break; } max = (sc->areq.an_len - 4) / sizeof(struct an_ltv_ssid_entry); AN_UNLOCK(sc); if ( max > MAX_SSIDS ) { printf("To many SSIDs only using " "%d of %d\n", MAX_SSIDS, max); max = MAX_SSIDS; } ireq->i_val = max; break; case IEEE80211_IOC_WEP: AN_LOCK(sc); sc->areq.an_type = AN_RID_ACTUALCFG; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } AN_UNLOCK(sc); if (config->an_authtype & AN_AUTHTYPE_PRIVACY_IN_USE) { if (config->an_authtype & AN_AUTHTYPE_ALLOW_UNENCRYPTED) ireq->i_val = IEEE80211_WEP_MIXED; else ireq->i_val = IEEE80211_WEP_ON; } else { ireq->i_val = IEEE80211_WEP_OFF; } break; case IEEE80211_IOC_WEPKEY: /* * XXX: I'm not entierly convinced this is * correct, but it's what is implemented in * ancontrol so it will have to do until we get * access to actual Cisco code. */ if (ireq->i_val < 0 || ireq->i_val > 8) { error = EINVAL; break; } len = 0; if (ireq->i_val < 5) { AN_LOCK(sc); sc->areq.an_type = AN_RID_WEP_TEMP; for (i = 0; i < 5; i++) { if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; break; } if (key->kindex == 0xffff) break; if (key->kindex == ireq->i_val) len = key->klen; /* Required to get next entry */ sc->areq.an_type = AN_RID_WEP_PERM; } AN_UNLOCK(sc); if (error != 0) { break; } } /* We aren't allowed to read the value of the * key from the card so we just output zeros * like we would if we could read the card, but * denied the user access. */ bzero(tmpstr, len); ireq->i_len = len; error = copyout(tmpstr, ireq->i_data, len); break; case IEEE80211_IOC_NUMWEPKEYS: ireq->i_val = 9; /* include home key */ break; case IEEE80211_IOC_WEPTXKEY: /* * For some strange reason, you have to read all * keys before you can read the txkey. */ AN_LOCK(sc); sc->areq.an_type = AN_RID_WEP_TEMP; for (i = 0; i < 5; i++) { if (an_read_record(sc, (struct an_ltv_gen *) &sc->areq)) { error = EINVAL; break; } if (key->kindex == 0xffff) { break; } /* Required to get next entry */ sc->areq.an_type = AN_RID_WEP_PERM; } if (error != 0) { AN_UNLOCK(sc); break; } sc->areq.an_type = AN_RID_WEP_PERM; key->kindex = 0xffff; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } ireq->i_val = key->mac[0]; /* * Check for home mode. Map home mode into * 5th key since that is how it is stored on * the card */ sc->areq.an_len = sizeof(struct an_ltv_genconfig); sc->areq.an_type = AN_RID_GENCONFIG; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } if (config->an_home_product & AN_HOME_NETWORK) ireq->i_val = 4; AN_UNLOCK(sc); break; case IEEE80211_IOC_AUTHMODE: AN_LOCK(sc); sc->areq.an_type = AN_RID_ACTUALCFG; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } AN_UNLOCK(sc); if ((config->an_authtype & AN_AUTHTYPE_MASK) == AN_AUTHTYPE_NONE) { ireq->i_val = IEEE80211_AUTH_NONE; } else if ((config->an_authtype & AN_AUTHTYPE_MASK) == AN_AUTHTYPE_OPEN) { ireq->i_val = IEEE80211_AUTH_OPEN; } else if ((config->an_authtype & AN_AUTHTYPE_MASK) == AN_AUTHTYPE_SHAREDKEY) { ireq->i_val = IEEE80211_AUTH_SHARED; } else error = EINVAL; break; case IEEE80211_IOC_STATIONNAME: AN_LOCK(sc); sc->areq.an_type = AN_RID_ACTUALCFG; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } AN_UNLOCK(sc); ireq->i_len = sizeof(config->an_nodename); tmpptr = config->an_nodename; bzero(tmpstr, IEEE80211_NWID_LEN); bcopy(tmpptr, tmpstr, ireq->i_len); error = copyout(tmpstr, ireq->i_data, IEEE80211_NWID_LEN); break; case IEEE80211_IOC_CHANNEL: AN_LOCK(sc); sc->areq.an_type = AN_RID_STATUS; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } AN_UNLOCK(sc); ireq->i_val = status->an_cur_channel; break; case IEEE80211_IOC_CURCHAN: AN_LOCK(sc); sc->areq.an_type = AN_RID_STATUS; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } AN_UNLOCK(sc); bzero(&ch, sizeof(ch)); ch.ic_freq = ieee80211_ieee2mhz(status->an_cur_channel, IEEE80211_CHAN_B); ch.ic_flags = IEEE80211_CHAN_B; ch.ic_ieee = status->an_cur_channel; error = copyout(&ch, ireq->i_data, sizeof(ch)); break; case IEEE80211_IOC_POWERSAVE: AN_LOCK(sc); sc->areq.an_type = AN_RID_ACTUALCFG; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } AN_UNLOCK(sc); if (config->an_psave_mode == AN_PSAVE_NONE) { ireq->i_val = IEEE80211_POWERSAVE_OFF; } else if (config->an_psave_mode == AN_PSAVE_CAM) { ireq->i_val = IEEE80211_POWERSAVE_CAM; } else if (config->an_psave_mode == AN_PSAVE_PSP) { ireq->i_val = IEEE80211_POWERSAVE_PSP; } else if (config->an_psave_mode == AN_PSAVE_PSP_CAM) { ireq->i_val = IEEE80211_POWERSAVE_PSP_CAM; } else error = EINVAL; break; case IEEE80211_IOC_POWERSAVESLEEP: AN_LOCK(sc); sc->areq.an_type = AN_RID_ACTUALCFG; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } AN_UNLOCK(sc); ireq->i_val = config->an_listen_interval; break; } break; case SIOCS80211: if ((error = priv_check(td, PRIV_NET80211_MANAGE))) goto out; AN_LOCK(sc); sc->areq.an_len = sizeof(sc->areq); /* * We need a config structure for everything but the WEP * key management and SSIDs so we get it now so avoid * duplicating this code every time. */ if (ireq->i_type != IEEE80211_IOC_SSID && ireq->i_type != IEEE80211_IOC_WEPKEY && ireq->i_type != IEEE80211_IOC_WEPTXKEY) { sc->areq.an_type = AN_RID_GENCONFIG; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } } switch (ireq->i_type) { case IEEE80211_IOC_SSID: sc->areq.an_len = sizeof(sc->areq); sc->areq.an_type = AN_RID_SSIDLIST; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } if (ireq->i_len > IEEE80211_NWID_LEN) { error = EINVAL; AN_UNLOCK(sc); break; } max = (sc->areq.an_len - 4) / sizeof(struct an_ltv_ssid_entry); if ( max > MAX_SSIDS ) { printf("To many SSIDs only using " "%d of %d\n", MAX_SSIDS, max); max = MAX_SSIDS; } if (ireq->i_val > max) { error = EINVAL; AN_UNLOCK(sc); break; } else { error = copyin(ireq->i_data, ssids->an_entry[ireq->i_val].an_ssid, ireq->i_len); ssids->an_entry[ireq->i_val].an_len = ireq->i_len; sc->areq.an_len = sizeof(sc->areq); sc->areq.an_type = AN_RID_SSIDLIST; an_setdef(sc, &sc->areq); AN_UNLOCK(sc); break; } break; case IEEE80211_IOC_WEP: switch (ireq->i_val) { case IEEE80211_WEP_OFF: config->an_authtype &= ~(AN_AUTHTYPE_PRIVACY_IN_USE | AN_AUTHTYPE_ALLOW_UNENCRYPTED); break; case IEEE80211_WEP_ON: config->an_authtype |= AN_AUTHTYPE_PRIVACY_IN_USE; config->an_authtype &= ~AN_AUTHTYPE_ALLOW_UNENCRYPTED; break; case IEEE80211_WEP_MIXED: config->an_authtype |= AN_AUTHTYPE_PRIVACY_IN_USE | AN_AUTHTYPE_ALLOW_UNENCRYPTED; break; default: error = EINVAL; break; } if (error != EINVAL) an_setdef(sc, &sc->areq); AN_UNLOCK(sc); break; case IEEE80211_IOC_WEPKEY: if (ireq->i_val < 0 || ireq->i_val > 8 || ireq->i_len > 13) { error = EINVAL; AN_UNLOCK(sc); break; } error = copyin(ireq->i_data, tmpstr, 13); if (error != 0) { AN_UNLOCK(sc); break; } /* * Map the 9th key into the home mode * since that is how it is stored on * the card */ bzero(&sc->areq, sizeof(struct an_ltv_key)); sc->areq.an_len = sizeof(struct an_ltv_key); key->mac[0] = 1; /* The others are 0. */ if (ireq->i_val < 4) { sc->areq.an_type = AN_RID_WEP_TEMP; key->kindex = ireq->i_val; } else { sc->areq.an_type = AN_RID_WEP_PERM; key->kindex = ireq->i_val - 4; } key->klen = ireq->i_len; bcopy(tmpstr, key->key, key->klen); an_setdef(sc, &sc->areq); AN_UNLOCK(sc); break; case IEEE80211_IOC_WEPTXKEY: if (ireq->i_val < 0 || ireq->i_val > 4) { error = EINVAL; AN_UNLOCK(sc); break; } /* * Map the 5th key into the home mode * since that is how it is stored on * the card */ sc->areq.an_len = sizeof(struct an_ltv_genconfig); sc->areq.an_type = AN_RID_ACTUALCFG; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } if (ireq->i_val == 4) { config->an_home_product |= AN_HOME_NETWORK; ireq->i_val = 0; } else { config->an_home_product &= ~AN_HOME_NETWORK; } sc->an_config.an_home_product = config->an_home_product; /* update configuration */ an_init_locked(sc); bzero(&sc->areq, sizeof(struct an_ltv_key)); sc->areq.an_len = sizeof(struct an_ltv_key); sc->areq.an_type = AN_RID_WEP_PERM; key->kindex = 0xffff; key->mac[0] = ireq->i_val; an_setdef(sc, &sc->areq); AN_UNLOCK(sc); break; case IEEE80211_IOC_AUTHMODE: switch (ireq->i_val) { case IEEE80211_AUTH_NONE: config->an_authtype = AN_AUTHTYPE_NONE | (config->an_authtype & ~AN_AUTHTYPE_MASK); break; case IEEE80211_AUTH_OPEN: config->an_authtype = AN_AUTHTYPE_OPEN | (config->an_authtype & ~AN_AUTHTYPE_MASK); break; case IEEE80211_AUTH_SHARED: config->an_authtype = AN_AUTHTYPE_SHAREDKEY | (config->an_authtype & ~AN_AUTHTYPE_MASK); break; default: error = EINVAL; } if (error != EINVAL) { an_setdef(sc, &sc->areq); } AN_UNLOCK(sc); break; case IEEE80211_IOC_STATIONNAME: if (ireq->i_len > 16) { error = EINVAL; AN_UNLOCK(sc); break; } bzero(config->an_nodename, 16); error = copyin(ireq->i_data, config->an_nodename, ireq->i_len); an_setdef(sc, &sc->areq); AN_UNLOCK(sc); break; case IEEE80211_IOC_CHANNEL: /* * The actual range is 1-14, but if you set it * to 0 you get the default so we let that work * too. */ if (ireq->i_val < 0 || ireq->i_val >14) { error = EINVAL; AN_UNLOCK(sc); break; } config->an_ds_channel = ireq->i_val; an_setdef(sc, &sc->areq); AN_UNLOCK(sc); break; case IEEE80211_IOC_POWERSAVE: switch (ireq->i_val) { case IEEE80211_POWERSAVE_OFF: config->an_psave_mode = AN_PSAVE_NONE; break; case IEEE80211_POWERSAVE_CAM: config->an_psave_mode = AN_PSAVE_CAM; break; case IEEE80211_POWERSAVE_PSP: config->an_psave_mode = AN_PSAVE_PSP; break; case IEEE80211_POWERSAVE_PSP_CAM: config->an_psave_mode = AN_PSAVE_PSP_CAM; break; default: error = EINVAL; break; } an_setdef(sc, &sc->areq); AN_UNLOCK(sc); break; case IEEE80211_IOC_POWERSAVESLEEP: config->an_listen_interval = ireq->i_val; an_setdef(sc, &sc->areq); AN_UNLOCK(sc); break; default: AN_UNLOCK(sc); break; } /* if (!error) { AN_LOCK(sc); an_setdef(sc, &sc->areq); AN_UNLOCK(sc); } */ break; default: error = ether_ioctl(ifp, command, data); break; } out: return(error != 0); } static int an_init_tx_ring(struct an_softc *sc) { int i; int id; if (sc->an_gone) return (0); if (!sc->mpi350) { for (i = 0; i < AN_TX_RING_CNT; i++) { if (an_alloc_nicmem(sc, 1518 + 0x44, &id)) return(ENOMEM); sc->an_rdata.an_tx_fids[i] = id; sc->an_rdata.an_tx_ring[i] = 0; } } sc->an_rdata.an_tx_prod = 0; sc->an_rdata.an_tx_cons = 0; sc->an_rdata.an_tx_empty = 1; return(0); } static void an_init(void *xsc) { struct an_softc *sc = xsc; AN_LOCK(sc); an_init_locked(sc); AN_UNLOCK(sc); } static void an_init_locked(struct an_softc *sc) { struct ifnet *ifp; AN_LOCK_ASSERT(sc); ifp = sc->an_ifp; if (sc->an_gone) return; if (ifp->if_drv_flags & IFF_DRV_RUNNING) an_stop(sc); sc->an_associated = 0; /* Allocate the TX buffers */ if (an_init_tx_ring(sc)) { an_reset(sc); if (sc->mpi350) an_init_mpi350_desc(sc); if (an_init_tx_ring(sc)) { if_printf(ifp, "tx buffer allocation failed\n"); return; } } /* Set our MAC address. */ bcopy((char *)IF_LLADDR(sc->an_ifp), (char *)&sc->an_config.an_macaddr, ETHER_ADDR_LEN); if (ifp->if_flags & IFF_BROADCAST) sc->an_config.an_rxmode = AN_RXMODE_BC_ADDR; else sc->an_config.an_rxmode = AN_RXMODE_ADDR; if (ifp->if_flags & IFF_MULTICAST) sc->an_config.an_rxmode = AN_RXMODE_BC_MC_ADDR; if (ifp->if_flags & IFF_PROMISC) { if (sc->an_monitor & AN_MONITOR) { if (sc->an_monitor & AN_MONITOR_ANY_BSS) { sc->an_config.an_rxmode |= AN_RXMODE_80211_MONITOR_ANYBSS | AN_RXMODE_NO_8023_HEADER; } else { sc->an_config.an_rxmode |= AN_RXMODE_80211_MONITOR_CURBSS | AN_RXMODE_NO_8023_HEADER; } } } #ifdef ANCACHE if (sc->an_have_rssimap) sc->an_config.an_rxmode |= AN_RXMODE_NORMALIZED_RSSI; #endif /* Set the ssid list */ sc->an_ssidlist.an_type = AN_RID_SSIDLIST; sc->an_ssidlist.an_len = sizeof(struct an_ltv_ssidlist_new); if (an_write_record(sc, (struct an_ltv_gen *)&sc->an_ssidlist)) { if_printf(ifp, "failed to set ssid list\n"); return; } /* Set the AP list */ sc->an_aplist.an_type = AN_RID_APLIST; sc->an_aplist.an_len = sizeof(struct an_ltv_aplist); if (an_write_record(sc, (struct an_ltv_gen *)&sc->an_aplist)) { if_printf(ifp, "failed to set AP list\n"); return; } /* Set the configuration in the NIC */ sc->an_config.an_len = sizeof(struct an_ltv_genconfig); sc->an_config.an_type = AN_RID_GENCONFIG; if (an_write_record(sc, (struct an_ltv_gen *)&sc->an_config)) { if_printf(ifp, "failed to set configuration\n"); return; } /* Enable the MAC */ if (an_cmd(sc, AN_CMD_ENABLE, 0)) { if_printf(ifp, "failed to enable MAC\n"); return; } if (ifp->if_flags & IFF_PROMISC) an_cmd(sc, AN_CMD_SET_MODE, 0xffff); /* enable interrupts */ CSR_WRITE_2(sc, AN_INT_EN(sc->mpi350), AN_INTRS(sc->mpi350)); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; callout_reset(&sc->an_stat_ch, hz, an_stats_update, sc); return; } static void an_start(struct ifnet *ifp) { struct an_softc *sc; sc = ifp->if_softc; AN_LOCK(sc); an_start_locked(ifp); AN_UNLOCK(sc); } static void an_start_locked(struct ifnet *ifp) { struct an_softc *sc; struct mbuf *m0 = NULL; struct an_txframe_802_3 tx_frame_802_3; struct ether_header *eh; int id, idx, i; unsigned char txcontrol; struct an_card_tx_desc an_tx_desc; u_int8_t *buf; sc = ifp->if_softc; AN_LOCK_ASSERT(sc); if (sc->an_gone) return; if (ifp->if_drv_flags & IFF_DRV_OACTIVE) return; if (!sc->an_associated) return; /* We can't send in monitor mode so toss any attempts. */ if (sc->an_monitor && (ifp->if_flags & IFF_PROMISC)) { for (;;) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m0); if (m0 == NULL) break; m_freem(m0); } return; } idx = sc->an_rdata.an_tx_prod; if (!sc->mpi350) { bzero((char *)&tx_frame_802_3, sizeof(tx_frame_802_3)); while (sc->an_rdata.an_tx_ring[idx] == 0) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m0); if (m0 == NULL) break; id = sc->an_rdata.an_tx_fids[idx]; eh = mtod(m0, struct ether_header *); bcopy((char *)&eh->ether_dhost, (char *)&tx_frame_802_3.an_tx_dst_addr, ETHER_ADDR_LEN); bcopy((char *)&eh->ether_shost, (char *)&tx_frame_802_3.an_tx_src_addr, ETHER_ADDR_LEN); /* minus src/dest mac & type */ tx_frame_802_3.an_tx_802_3_payload_len = m0->m_pkthdr.len - 12; m_copydata(m0, sizeof(struct ether_header) - 2 , tx_frame_802_3.an_tx_802_3_payload_len, (caddr_t)&sc->an_txbuf); txcontrol = AN_TXCTL_8023 | AN_TXCTL_HW(sc->mpi350); /* write the txcontrol only */ an_write_data(sc, id, 0x08, (caddr_t)&txcontrol, sizeof(txcontrol)); /* 802_3 header */ an_write_data(sc, id, 0x34, (caddr_t)&tx_frame_802_3, sizeof(struct an_txframe_802_3)); /* in mbuf header type is just before payload */ an_write_data(sc, id, 0x44, (caddr_t)&sc->an_txbuf, tx_frame_802_3.an_tx_802_3_payload_len); /* * If there's a BPF listner, bounce a copy of * this frame to him. */ BPF_MTAP(ifp, m0); m_freem(m0); m0 = NULL; sc->an_rdata.an_tx_ring[idx] = id; if (an_cmd(sc, AN_CMD_TX, id)) if_printf(ifp, "xmit failed\n"); AN_INC(idx, AN_TX_RING_CNT); /* * Set a timeout in case the chip goes out to lunch. */ sc->an_timer = 5; } } else { /* MPI-350 */ /* Disable interrupts. */ CSR_WRITE_2(sc, AN_INT_EN(sc->mpi350), 0); while (sc->an_rdata.an_tx_empty || idx != sc->an_rdata.an_tx_cons) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m0); if (m0 == NULL) { break; } buf = sc->an_tx_buffer[idx].an_dma_vaddr; eh = mtod(m0, struct ether_header *); /* DJA optimize this to limit bcopy */ bcopy((char *)&eh->ether_dhost, (char *)&tx_frame_802_3.an_tx_dst_addr, ETHER_ADDR_LEN); bcopy((char *)&eh->ether_shost, (char *)&tx_frame_802_3.an_tx_src_addr, ETHER_ADDR_LEN); /* minus src/dest mac & type */ tx_frame_802_3.an_tx_802_3_payload_len = m0->m_pkthdr.len - 12; m_copydata(m0, sizeof(struct ether_header) - 2 , tx_frame_802_3.an_tx_802_3_payload_len, (caddr_t)&sc->an_txbuf); txcontrol = AN_TXCTL_8023 | AN_TXCTL_HW(sc->mpi350); /* write the txcontrol only */ bcopy((caddr_t)&txcontrol, &buf[0x08], sizeof(txcontrol)); /* 802_3 header */ bcopy((caddr_t)&tx_frame_802_3, &buf[0x34], sizeof(struct an_txframe_802_3)); /* in mbuf header type is just before payload */ bcopy((caddr_t)&sc->an_txbuf, &buf[0x44], tx_frame_802_3.an_tx_802_3_payload_len); bzero(&an_tx_desc, sizeof(an_tx_desc)); an_tx_desc.an_offset = 0; an_tx_desc.an_eoc = 1; an_tx_desc.an_valid = 1; an_tx_desc.an_len = 0x44 + tx_frame_802_3.an_tx_802_3_payload_len; an_tx_desc.an_phys = sc->an_tx_buffer[idx].an_dma_paddr; for (i = sizeof(an_tx_desc) / 4 - 1; i >= 0; i--) { CSR_MEM_AUX_WRITE_4(sc, AN_TX_DESC_OFFSET /* zero for now */ + (0 * sizeof(an_tx_desc)) + (i * 4), ((u_int32_t *)(void *)&an_tx_desc)[i]); } /* * If there's a BPF listner, bounce a copy of * this frame to him. */ BPF_MTAP(ifp, m0); m_freem(m0); m0 = NULL; AN_INC(idx, AN_MAX_TX_DESC); sc->an_rdata.an_tx_empty = 0; CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_ALLOC); /* * Set a timeout in case the chip goes out to lunch. */ sc->an_timer = 5; } /* Re-enable interrupts. */ CSR_WRITE_2(sc, AN_INT_EN(sc->mpi350), AN_INTRS(sc->mpi350)); } if (m0 != NULL) ifp->if_drv_flags |= IFF_DRV_OACTIVE; sc->an_rdata.an_tx_prod = idx; return; } void an_stop(struct an_softc *sc) { struct ifnet *ifp; int i; AN_LOCK_ASSERT(sc); if (sc->an_gone) return; ifp = sc->an_ifp; an_cmd(sc, AN_CMD_FORCE_SYNCLOSS, 0); CSR_WRITE_2(sc, AN_INT_EN(sc->mpi350), 0); an_cmd(sc, AN_CMD_DISABLE, 0); for (i = 0; i < AN_TX_RING_CNT; i++) an_cmd(sc, AN_CMD_DEALLOC_MEM, sc->an_rdata.an_tx_fids[i]); callout_stop(&sc->an_stat_ch); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING|IFF_DRV_OACTIVE); if (sc->an_flash_buffer) { free(sc->an_flash_buffer, M_DEVBUF); sc->an_flash_buffer = NULL; } } static void an_watchdog(struct an_softc *sc) { struct ifnet *ifp; AN_LOCK_ASSERT(sc); if (sc->an_gone) return; ifp = sc->an_ifp; if_printf(ifp, "device timeout\n"); an_reset(sc); if (sc->mpi350) an_init_mpi350_desc(sc); an_init_locked(sc); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } int an_shutdown(device_t dev) { struct an_softc *sc; sc = device_get_softc(dev); AN_LOCK(sc); an_stop(sc); sc->an_gone = 1; AN_UNLOCK(sc); return (0); } void an_resume(device_t dev) { struct an_softc *sc; struct ifnet *ifp; int i; sc = device_get_softc(dev); AN_LOCK(sc); ifp = sc->an_ifp; sc->an_gone = 0; an_reset(sc); if (sc->mpi350) an_init_mpi350_desc(sc); an_init_locked(sc); /* Recovery temporary keys */ for (i = 0; i < 4; i++) { sc->areq.an_type = AN_RID_WEP_TEMP; sc->areq.an_len = sizeof(struct an_ltv_key); bcopy(&sc->an_temp_keys[i], &sc->areq, sizeof(struct an_ltv_key)); an_setdef(sc, &sc->areq); } if (ifp->if_flags & IFF_UP) an_start_locked(ifp); AN_UNLOCK(sc); return; } #ifdef ANCACHE /* Aironet signal strength cache code. * store signal/noise/quality on per MAC src basis in * a small fixed cache. The cache wraps if > MAX slots * used. The cache may be zeroed out to start over. * Two simple filters exist to reduce computation: * 1. ip only (literally 0x800, ETHERTYPE_IP) which may be used * to ignore some packets. It defaults to ip only. * it could be used to focus on broadcast, non-IP 802.11 beacons. * 2. multicast/broadcast only. This may be used to * ignore unicast packets and only cache signal strength * for multicast/broadcast packets (beacons); e.g., Mobile-IP * beacons and not unicast traffic. * * The cache stores (MAC src(index), IP src (major clue), signal, * quality, noise) * * No apologies for storing IP src here. It's easy and saves much * trouble elsewhere. The cache is assumed to be INET dependent, * although it need not be. * * Note: the Aironet only has a single byte of signal strength value * in the rx frame header, and it's not scaled to anything sensible. * This is kind of lame, but it's all we've got. */ #ifdef documentation int an_sigitems; /* number of cached entries */ struct an_sigcache an_sigcache[MAXANCACHE]; /* array of cache entries */ int an_nextitem; /* index/# of entries */ #endif /* control variables for cache filtering. Basic idea is * to reduce cost (e.g., to only Mobile-IP agent beacons * which are broadcast or multicast). Still you might * want to measure signal strength anth unicast ping packets * on a pt. to pt. ant. setup. */ /* set true if you want to limit cache items to broadcast/mcast * only packets (not unicast). Useful for mobile-ip beacons which * are broadcast/multicast at network layer. Default is all packets * so ping/unicast anll work say anth pt. to pt. antennae setup. */ static int an_cache_mcastonly = 0; SYSCTL_INT(_hw_an, OID_AUTO, an_cache_mcastonly, CTLFLAG_RW, &an_cache_mcastonly, 0, ""); /* set true if you want to limit cache items to IP packets only */ static int an_cache_iponly = 1; SYSCTL_INT(_hw_an, OID_AUTO, an_cache_iponly, CTLFLAG_RW, &an_cache_iponly, 0, ""); /* * an_cache_store, per rx packet store signal * strength in MAC (src) indexed cache. */ static void an_cache_store(struct an_softc *sc, struct ether_header *eh, struct mbuf *m, u_int8_t rx_rssi, u_int8_t rx_quality) { struct ip *ip = NULL; int i; static int cache_slot = 0; /* use this cache entry */ static int wrapindex = 0; /* next "free" cache entry */ int type_ipv4 = 0; /* filters: * 1. ip only * 2. configurable filter to throw out unicast packets, * keep multicast only. */ if ((ntohs(eh->ether_type) == ETHERTYPE_IP)) { type_ipv4 = 1; } /* filter for ip packets only */ if ( an_cache_iponly && !type_ipv4) { return; } /* filter for broadcast/multicast only */ if (an_cache_mcastonly && ((eh->ether_dhost[0] & 1) == 0)) { return; } #ifdef SIGDEBUG if_printf(sc->an_ifp, "q value %x (MSB=0x%x, LSB=0x%x) \n", rx_rssi & 0xffff, rx_rssi >> 8, rx_rssi & 0xff); #endif /* find the ip header. we want to store the ip_src * address. */ if (type_ipv4) { ip = mtod(m, struct ip *); } /* do a linear search for a matching MAC address * in the cache table * . MAC address is 6 bytes, * . var w_nextitem holds total number of entries already cached */ for (i = 0; i < sc->an_nextitem; i++) { if (! bcmp(eh->ether_shost , sc->an_sigcache[i].macsrc, 6 )) { /* Match!, * so we already have this entry, * update the data */ break; } } /* did we find a matching mac address? * if yes, then overwrite a previously existing cache entry */ if (i < sc->an_nextitem ) { cache_slot = i; } /* else, have a new address entry,so * add this new entry, * if table full, then we need to replace LRU entry */ else { /* check for space in cache table * note: an_nextitem also holds number of entries * added in the cache table */ if ( sc->an_nextitem < MAXANCACHE ) { cache_slot = sc->an_nextitem; sc->an_nextitem++; sc->an_sigitems = sc->an_nextitem; } /* no space found, so simply wrap anth wrap index * and "zap" the next entry */ else { if (wrapindex == MAXANCACHE) { wrapindex = 0; } cache_slot = wrapindex++; } } /* invariant: cache_slot now points at some slot * in cache. */ if (cache_slot < 0 || cache_slot >= MAXANCACHE) { log(LOG_ERR, "an_cache_store, bad index: %d of " "[0..%d], gross cache error\n", cache_slot, MAXANCACHE); return; } /* store items in cache * .ip source address * .mac src * .signal, etc. */ if (type_ipv4) { sc->an_sigcache[cache_slot].ipsrc = ip->ip_src.s_addr; } bcopy( eh->ether_shost, sc->an_sigcache[cache_slot].macsrc, 6); switch (an_cache_mode) { case DBM: if (sc->an_have_rssimap) { sc->an_sigcache[cache_slot].signal = - sc->an_rssimap.an_entries[rx_rssi].an_rss_dbm; sc->an_sigcache[cache_slot].quality = - sc->an_rssimap.an_entries[rx_quality].an_rss_dbm; } else { sc->an_sigcache[cache_slot].signal = rx_rssi - 100; sc->an_sigcache[cache_slot].quality = rx_quality - 100; } break; case PERCENT: if (sc->an_have_rssimap) { sc->an_sigcache[cache_slot].signal = sc->an_rssimap.an_entries[rx_rssi].an_rss_pct; sc->an_sigcache[cache_slot].quality = sc->an_rssimap.an_entries[rx_quality].an_rss_pct; } else { if (rx_rssi > 100) rx_rssi = 100; if (rx_quality > 100) rx_quality = 100; sc->an_sigcache[cache_slot].signal = rx_rssi; sc->an_sigcache[cache_slot].quality = rx_quality; } break; case RAW: sc->an_sigcache[cache_slot].signal = rx_rssi; sc->an_sigcache[cache_slot].quality = rx_quality; break; } sc->an_sigcache[cache_slot].noise = 0; return; } #endif static int an_media_change(struct ifnet *ifp) { struct an_softc *sc = ifp->if_softc; struct an_ltv_genconfig *cfg; int otype = sc->an_config.an_opmode; int orate = sc->an_tx_rate; AN_LOCK(sc); sc->an_tx_rate = ieee80211_media2rate( IFM_SUBTYPE(sc->an_ifmedia.ifm_cur->ifm_media)); if (sc->an_tx_rate < 0) sc->an_tx_rate = 0; if (orate != sc->an_tx_rate) { /* Read the current configuration */ sc->an_config.an_type = AN_RID_GENCONFIG; sc->an_config.an_len = sizeof(struct an_ltv_genconfig); an_read_record(sc, (struct an_ltv_gen *)&sc->an_config); cfg = &sc->an_config; /* clear other rates and set the only one we want */ bzero(cfg->an_rates, sizeof(cfg->an_rates)); cfg->an_rates[0] = sc->an_tx_rate; /* Save the new rate */ sc->an_config.an_type = AN_RID_GENCONFIG; sc->an_config.an_len = sizeof(struct an_ltv_genconfig); } if ((sc->an_ifmedia.ifm_cur->ifm_media & IFM_IEEE80211_ADHOC) != 0) sc->an_config.an_opmode &= ~AN_OPMODE_INFRASTRUCTURE_STATION; else sc->an_config.an_opmode |= AN_OPMODE_INFRASTRUCTURE_STATION; if (otype != sc->an_config.an_opmode || orate != sc->an_tx_rate) an_init_locked(sc); AN_UNLOCK(sc); return(0); } static void an_media_status(struct ifnet *ifp, struct ifmediareq *imr) { struct an_ltv_status status; struct an_softc *sc = ifp->if_softc; imr->ifm_active = IFM_IEEE80211; AN_LOCK(sc); status.an_len = sizeof(status); status.an_type = AN_RID_STATUS; if (an_read_record(sc, (struct an_ltv_gen *)&status)) { /* If the status read fails, just lie. */ imr->ifm_active = sc->an_ifmedia.ifm_cur->ifm_media; imr->ifm_status = IFM_AVALID|IFM_ACTIVE; } if (sc->an_tx_rate == 0) { imr->ifm_active = IFM_IEEE80211|IFM_AUTO; } if (sc->an_config.an_opmode == AN_OPMODE_IBSS_ADHOC) imr->ifm_active |= IFM_IEEE80211_ADHOC; imr->ifm_active |= ieee80211_rate2media(NULL, status.an_current_tx_rate, IEEE80211_MODE_AUTO); imr->ifm_status = IFM_AVALID; if (status.an_opmode & AN_STATUS_OPMODE_ASSOCIATED) imr->ifm_status |= IFM_ACTIVE; AN_UNLOCK(sc); } /********************** Cisco utility support routines *************/ /* * ReadRids & WriteRids derived from Cisco driver additions to Ben Reed's * Linux driver */ static int readrids(struct ifnet *ifp, struct aironet_ioctl *l_ioctl) { unsigned short rid; struct an_softc *sc; int error; switch (l_ioctl->command) { case AIROGCAP: rid = AN_RID_CAPABILITIES; break; case AIROGCFG: rid = AN_RID_GENCONFIG; break; case AIROGSLIST: rid = AN_RID_SSIDLIST; break; case AIROGVLIST: rid = AN_RID_APLIST; break; case AIROGDRVNAM: rid = AN_RID_DRVNAME; break; case AIROGEHTENC: rid = AN_RID_ENCAPPROTO; break; case AIROGWEPKTMP: rid = AN_RID_WEP_TEMP; break; case AIROGWEPKNV: rid = AN_RID_WEP_PERM; break; case AIROGSTAT: rid = AN_RID_STATUS; break; case AIROGSTATSD32: rid = AN_RID_32BITS_DELTA; break; case AIROGSTATSC32: rid = AN_RID_32BITS_CUM; break; default: rid = 999; break; } if (rid == 999) /* Is bad command */ return -EINVAL; sc = ifp->if_softc; sc->areq.an_len = AN_MAX_DATALEN; sc->areq.an_type = rid; an_read_record(sc, (struct an_ltv_gen *)&sc->areq); l_ioctl->len = sc->areq.an_len - 4; /* just data */ AN_UNLOCK(sc); /* the data contains the length at first */ if (copyout(&(sc->areq.an_len), l_ioctl->data, sizeof(sc->areq.an_len))) { error = -EFAULT; goto lock_exit; } /* Just copy the data back */ if (copyout(&(sc->areq.an_val), l_ioctl->data + 2, l_ioctl->len)) { error = -EFAULT; goto lock_exit; } error = 0; lock_exit: AN_LOCK(sc); return (error); } static int writerids(struct ifnet *ifp, struct aironet_ioctl *l_ioctl) { struct an_softc *sc; int rid, command, error; sc = ifp->if_softc; AN_LOCK_ASSERT(sc); rid = 0; command = l_ioctl->command; switch (command) { case AIROPSIDS: rid = AN_RID_SSIDLIST; break; case AIROPCAP: rid = AN_RID_CAPABILITIES; break; case AIROPAPLIST: rid = AN_RID_APLIST; break; case AIROPCFG: rid = AN_RID_GENCONFIG; break; case AIROPMACON: an_cmd(sc, AN_CMD_ENABLE, 0); return 0; break; case AIROPMACOFF: an_cmd(sc, AN_CMD_DISABLE, 0); return 0; break; case AIROPSTCLR: /* * This command merely clears the counts does not actually * store any data only reads rid. But as it changes the cards * state, I put it in the writerid routines. */ rid = AN_RID_32BITS_DELTACLR; sc = ifp->if_softc; sc->areq.an_len = AN_MAX_DATALEN; sc->areq.an_type = rid; an_read_record(sc, (struct an_ltv_gen *)&sc->areq); l_ioctl->len = sc->areq.an_len - 4; /* just data */ AN_UNLOCK(sc); /* the data contains the length at first */ error = copyout(&(sc->areq.an_len), l_ioctl->data, sizeof(sc->areq.an_len)); if (error) { AN_LOCK(sc); return -EFAULT; } /* Just copy the data */ error = copyout(&(sc->areq.an_val), l_ioctl->data + 2, l_ioctl->len); AN_LOCK(sc); if (error) return -EFAULT; return 0; break; case AIROPWEPKEY: rid = AN_RID_WEP_TEMP; break; case AIROPWEPKEYNV: rid = AN_RID_WEP_PERM; break; case AIROPLEAPUSR: rid = AN_RID_LEAPUSERNAME; break; case AIROPLEAPPWD: rid = AN_RID_LEAPPASSWORD; break; default: return -EOPNOTSUPP; } if (rid) { if (l_ioctl->len > sizeof(sc->areq.an_val) + 4) return -EINVAL; sc->areq.an_len = l_ioctl->len + 4; /* add type & length */ sc->areq.an_type = rid; /* Just copy the data back */ AN_UNLOCK(sc); error = copyin((l_ioctl->data) + 2, &sc->areq.an_val, l_ioctl->len); AN_LOCK(sc); if (error) return -EFAULT; an_cmd(sc, AN_CMD_DISABLE, 0); an_write_record(sc, (struct an_ltv_gen *)&sc->areq); an_cmd(sc, AN_CMD_ENABLE, 0); return 0; } return -EOPNOTSUPP; } /* * General Flash utilities derived from Cisco driver additions to Ben Reed's * Linux driver */ #define FLASH_DELAY(_sc, x) msleep(ifp, &(_sc)->an_mtx, PZERO, \ "flash", ((x) / hz) + 1); #define FLASH_COMMAND 0x7e7e #define FLASH_SIZE 32 * 1024 static int unstickbusy(struct ifnet *ifp) { struct an_softc *sc = ifp->if_softc; if (CSR_READ_2(sc, AN_COMMAND(sc->mpi350)) & AN_CMD_BUSY) { CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_CLR_STUCK_BUSY); return 1; } return 0; } /* * Wait for busy completion from card wait for delay uSec's Return true for * success meaning command reg is clear */ static int WaitBusy(struct ifnet *ifp, int uSec) { int statword = 0xffff; int delay = 0; struct an_softc *sc = ifp->if_softc; while ((statword & AN_CMD_BUSY) && delay <= (1000 * 100)) { FLASH_DELAY(sc, 10); delay += 10; statword = CSR_READ_2(sc, AN_COMMAND(sc->mpi350)); if ((AN_CMD_BUSY & statword) && (delay % 200)) { unstickbusy(ifp); } } return 0 == (AN_CMD_BUSY & statword); } /* * STEP 1) Disable MAC and do soft reset on card. */ static int cmdreset(struct ifnet *ifp) { int status; struct an_softc *sc = ifp->if_softc; AN_LOCK(sc); an_stop(sc); an_cmd(sc, AN_CMD_DISABLE, 0); if (!(status = WaitBusy(ifp, AN_TIMEOUT))) { if_printf(ifp, "Waitbusy hang b4 RESET =%d\n", status); AN_UNLOCK(sc); return -EBUSY; } CSR_WRITE_2(sc, AN_COMMAND(sc->mpi350), AN_CMD_FW_RESTART); FLASH_DELAY(sc, 1000); /* WAS 600 12/7/00 */ if (!(status = WaitBusy(ifp, 100))) { if_printf(ifp, "Waitbusy hang AFTER RESET =%d\n", status); AN_UNLOCK(sc); return -EBUSY; } AN_UNLOCK(sc); return 0; } /* * STEP 2) Put the card in legendary flash mode */ static int setflashmode(struct ifnet *ifp) { int status; struct an_softc *sc = ifp->if_softc; CSR_WRITE_2(sc, AN_SW0(sc->mpi350), FLASH_COMMAND); CSR_WRITE_2(sc, AN_SW1(sc->mpi350), FLASH_COMMAND); CSR_WRITE_2(sc, AN_SW0(sc->mpi350), FLASH_COMMAND); CSR_WRITE_2(sc, AN_COMMAND(sc->mpi350), FLASH_COMMAND); /* * mdelay(500); // 500ms delay */ FLASH_DELAY(sc, 500); if (!(status = WaitBusy(ifp, AN_TIMEOUT))) { printf("Waitbusy hang after setflash mode\n"); return -EIO; } return 0; } /* * Get a character from the card matching matchbyte Step 3) */ static int flashgchar(struct ifnet *ifp, int matchbyte, int dwelltime) { int rchar; unsigned char rbyte = 0; int success = -1; struct an_softc *sc = ifp->if_softc; do { rchar = CSR_READ_2(sc, AN_SW1(sc->mpi350)); if (dwelltime && !(0x8000 & rchar)) { dwelltime -= 10; FLASH_DELAY(sc, 10); continue; } rbyte = 0xff & rchar; if ((rbyte == matchbyte) && (0x8000 & rchar)) { CSR_WRITE_2(sc, AN_SW1(sc->mpi350), 0); success = 1; break; } if (rbyte == 0x81 || rbyte == 0x82 || rbyte == 0x83 || rbyte == 0x1a || 0xffff == rchar) break; CSR_WRITE_2(sc, AN_SW1(sc->mpi350), 0); } while (dwelltime > 0); return success; } /* * Put character to SWS0 wait for dwelltime x 50us for echo . */ static int flashpchar(struct ifnet *ifp, int byte, int dwelltime) { int echo; int pollbusy, waittime; struct an_softc *sc = ifp->if_softc; byte |= 0x8000; if (dwelltime == 0) dwelltime = 200; waittime = dwelltime; /* * Wait for busy bit d15 to go false indicating buffer empty */ do { pollbusy = CSR_READ_2(sc, AN_SW0(sc->mpi350)); if (pollbusy & 0x8000) { FLASH_DELAY(sc, 50); waittime -= 50; continue; } else break; } while (waittime >= 0); /* timeout for busy clear wait */ if (waittime <= 0) { if_printf(ifp, "flash putchar busywait timeout!\n"); return -1; } /* * Port is clear now write byte and wait for it to echo back */ do { CSR_WRITE_2(sc, AN_SW0(sc->mpi350), byte); FLASH_DELAY(sc, 50); dwelltime -= 50; echo = CSR_READ_2(sc, AN_SW1(sc->mpi350)); } while (dwelltime >= 0 && echo != byte); CSR_WRITE_2(sc, AN_SW1(sc->mpi350), 0); return echo == byte; } /* * Transfer 32k of firmware data from user buffer to our buffer and send to * the card */ static int flashputbuf(struct ifnet *ifp) { unsigned short *bufp; int nwords; struct an_softc *sc = ifp->if_softc; /* Write stuff */ bufp = sc->an_flash_buffer; if (!sc->mpi350) { CSR_WRITE_2(sc, AN_AUX_PAGE, 0x100); CSR_WRITE_2(sc, AN_AUX_OFFSET, 0); for (nwords = 0; nwords != FLASH_SIZE / 2; nwords++) { CSR_WRITE_2(sc, AN_AUX_DATA, bufp[nwords] & 0xffff); } } else { for (nwords = 0; nwords != FLASH_SIZE / 4; nwords++) { CSR_MEM_AUX_WRITE_4(sc, 0x8000, ((u_int32_t *)bufp)[nwords] & 0xffff); } } CSR_WRITE_2(sc, AN_SW0(sc->mpi350), 0x8000); return 0; } /* * After flashing restart the card. */ static int flashrestart(struct ifnet *ifp) { int status = 0; struct an_softc *sc = ifp->if_softc; FLASH_DELAY(sc, 1024); /* Added 12/7/00 */ an_init_locked(sc); FLASH_DELAY(sc, 1024); /* Added 12/7/00 */ return status; } /* * Entry point for flash ioclt. */ static int flashcard(struct ifnet *ifp, struct aironet_ioctl *l_ioctl) { int z = 0, status; struct an_softc *sc; sc = ifp->if_softc; if (sc->mpi350) { if_printf(ifp, "flashing not supported on MPI 350 yet\n"); return(-1); } status = l_ioctl->command; switch (l_ioctl->command) { case AIROFLSHRST: return cmdreset(ifp); break; case AIROFLSHSTFL: if (sc->an_flash_buffer) { free(sc->an_flash_buffer, M_DEVBUF); sc->an_flash_buffer = NULL; } sc->an_flash_buffer = malloc(FLASH_SIZE, M_DEVBUF, M_WAITOK); if (sc->an_flash_buffer) return setflashmode(ifp); else return ENOBUFS; break; case AIROFLSHGCHR: /* Get char from aux */ if (l_ioctl->len > sizeof(sc->areq)) { return -EINVAL; } AN_UNLOCK(sc); status = copyin(l_ioctl->data, &sc->areq, l_ioctl->len); AN_LOCK(sc); if (status) return status; z = *(int *)&sc->areq; if ((status = flashgchar(ifp, z, 8000)) == 1) return 0; else return -1; case AIROFLSHPCHR: /* Send char to card. */ if (l_ioctl->len > sizeof(sc->areq)) { return -EINVAL; } AN_UNLOCK(sc); status = copyin(l_ioctl->data, &sc->areq, l_ioctl->len); AN_LOCK(sc); if (status) return status; z = *(int *)&sc->areq; if ((status = flashpchar(ifp, z, 8000)) == -1) return -EIO; else return 0; break; case AIROFLPUTBUF: /* Send 32k to card */ if (l_ioctl->len > FLASH_SIZE) { if_printf(ifp, "Buffer to big, %x %x\n", l_ioctl->len, FLASH_SIZE); return -EINVAL; } AN_UNLOCK(sc); status = copyin(l_ioctl->data, sc->an_flash_buffer, l_ioctl->len); AN_LOCK(sc); if (status) return status; if ((status = flashputbuf(ifp)) != 0) return -EIO; else return 0; break; case AIRORESTART: if ((status = flashrestart(ifp)) != 0) { if_printf(ifp, "FLASHRESTART returned %d\n", status); return -EIO; } else return 0; break; default: return -EINVAL; } return -EINVAL; } Index: head/sys/dev/ath/if_ath_ioctl.c =================================================================== --- head/sys/dev/ath/if_ath_ioctl.c (revision 331796) +++ head/sys/dev/ath/if_ath_ioctl.c (revision 331797) @@ -1,309 +1,309 @@ /*- * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. */ #include __FBSDID("$FreeBSD$"); /* * Driver for the Atheros Wireless LAN controller. * * This software is derived from work of Atsushi Onoe; his contribution * is greatly appreciated. */ #include "opt_inet.h" #include "opt_ath.h" /* * This is needed for register operations which are performed * by the driver - eg, calls to ath_hal_gettsf32(). * * It's also required for any AH_DEBUG checks in here, eg the * module dependencies. */ #include "opt_ah.h" #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for mp_ncpus */ #include #include #include #include #include #include #include #include #include #include #include #ifdef IEEE80211_SUPPORT_SUPERG #include #endif #ifdef IEEE80211_SUPPORT_TDMA #include #endif #include #ifdef INET #include #include #endif #include #include /* XXX for softled */ #include #include #include #include #include #include #include #ifdef IEEE80211_SUPPORT_TDMA #include #endif #include /* * ioctl() related pieces. * * Some subsystems (eg spectral, dfs) have their own ioctl method which * we call. */ /* * Fetch the rate control statistics for the given node. */ static int ath_ioctl_ratestats(struct ath_softc *sc, struct ath_rateioctl *rs) { struct ath_node *an; struct ieee80211com *ic = &sc->sc_ic; struct ieee80211_node *ni; int error = 0; /* Perform a lookup on the given node */ ni = ieee80211_find_node(&ic->ic_sta, rs->is_u.macaddr); if (ni == NULL) { error = EINVAL; goto bad; } /* Lock the ath_node */ an = ATH_NODE(ni); ATH_NODE_LOCK(an); /* Fetch the rate control stats for this node */ error = ath_rate_fetch_node_stats(sc, an, rs); /* No matter what happens here, just drop through */ /* Unlock the ath_node */ ATH_NODE_UNLOCK(an); /* Unref the node */ ieee80211_node_decref(ni); bad: return (error); } #ifdef ATH_DIAGAPI /* * Diagnostic interface to the HAL. This is used by various * tools to do things like retrieve register contents for * debugging. The mechanism is intentionally opaque so that * it can change frequently w/o concern for compatibility. */ static int ath_ioctl_diag(struct ath_softc *sc, struct ath_diag *ad) { struct ath_hal *ah = sc->sc_ah; u_int id = ad->ad_id & ATH_DIAG_ID; void *indata = NULL; void *outdata = NULL; u_int32_t insize = ad->ad_in_size; u_int32_t outsize = ad->ad_out_size; int error = 0; if (ad->ad_id & ATH_DIAG_IN) { /* * Copy in data. */ indata = malloc(insize, M_TEMP, M_NOWAIT); if (indata == NULL) { error = ENOMEM; goto bad; } error = copyin(ad->ad_in_data, indata, insize); if (error) goto bad; } if (ad->ad_id & ATH_DIAG_DYN) { /* * Allocate a buffer for the results (otherwise the HAL * returns a pointer to a buffer where we can read the * results). Note that we depend on the HAL leaving this * pointer for us to use below in reclaiming the buffer; * may want to be more defensive. */ outdata = malloc(outsize, M_TEMP, M_NOWAIT | M_ZERO); if (outdata == NULL) { error = ENOMEM; goto bad; } } ATH_LOCK(sc); if (id != HAL_DIAG_REGS) ath_power_set_power_state(sc, HAL_PM_AWAKE); ATH_UNLOCK(sc); if (ath_hal_getdiagstate(ah, id, indata, insize, &outdata, &outsize)) { if (outsize < ad->ad_out_size) ad->ad_out_size = outsize; if (outdata != NULL) error = copyout(outdata, ad->ad_out_data, ad->ad_out_size); } else { error = EINVAL; } ATH_LOCK(sc); if (id != HAL_DIAG_REGS) ath_power_restore_power_state(sc); ATH_UNLOCK(sc); bad: if ((ad->ad_id & ATH_DIAG_IN) && indata != NULL) free(indata, M_TEMP); if ((ad->ad_id & ATH_DIAG_DYN) && outdata != NULL) free(outdata, M_TEMP); return error; } #endif /* ATH_DIAGAPI */ int ath_ioctl(struct ieee80211com *ic, u_long cmd, void *data) { struct ifreq *ifr = data; struct ath_softc *sc = ic->ic_softc; switch (cmd) { case SIOCGATHSTATS: { struct ieee80211vap *vap; struct ifnet *ifp; const HAL_RATE_TABLE *rt; /* NB: embed these numbers to get a consistent view */ sc->sc_stats.ast_tx_packets = 0; sc->sc_stats.ast_rx_packets = 0; TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { ifp = vap->iv_ifp; sc->sc_stats.ast_tx_packets += ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS); sc->sc_stats.ast_rx_packets += ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS); } sc->sc_stats.ast_tx_rssi = ATH_RSSI(sc->sc_halstats.ns_avgtxrssi); sc->sc_stats.ast_rx_rssi = ATH_RSSI(sc->sc_halstats.ns_avgrssi); #ifdef IEEE80211_SUPPORT_TDMA sc->sc_stats.ast_tdma_tsfadjp = TDMA_AVG(sc->sc_avgtsfdeltap); sc->sc_stats.ast_tdma_tsfadjm = TDMA_AVG(sc->sc_avgtsfdeltam); #endif rt = sc->sc_currates; sc->sc_stats.ast_tx_rate = rt->info[sc->sc_txrix].dot11Rate &~ IEEE80211_RATE_BASIC; if (rt->info[sc->sc_txrix].phy & IEEE80211_T_HT) sc->sc_stats.ast_tx_rate |= IEEE80211_RATE_MCS; - return copyout(&sc->sc_stats, - ifr->ifr_data, sizeof (sc->sc_stats)); + return copyout(&sc->sc_stats, ifr_data_get_ptr(ifr), + sizeof (sc->sc_stats)); } case SIOCGATHAGSTATS: - return copyout(&sc->sc_aggr_stats, - ifr->ifr_data, sizeof (sc->sc_aggr_stats)); + return copyout(&sc->sc_aggr_stats, ifr_data_get_ptr(ifr), + sizeof (sc->sc_aggr_stats)); case SIOCZATHSTATS: { int error; error = priv_check(curthread, PRIV_DRIVER); if (error == 0) { memset(&sc->sc_stats, 0, sizeof(sc->sc_stats)); memset(&sc->sc_aggr_stats, 0, sizeof(sc->sc_aggr_stats)); memset(&sc->sc_intr_stats, 0, sizeof(sc->sc_intr_stats)); } return (error); } #ifdef ATH_DIAGAPI case SIOCGATHDIAG: return (ath_ioctl_diag(sc, data)); case SIOCGATHPHYERR: return (ath_ioctl_phyerr(sc, data)); #endif case SIOCGATHSPECTRAL: return (ath_ioctl_spectral(sc, data)); case SIOCGATHNODERATESTATS: return (ath_ioctl_ratestats(sc, data)); case SIOCGATHBTCOEX: return (ath_btcoex_ioctl(sc, data)); default: /* * This signals the net80211 layer that we didn't handle this * ioctl. */ return (ENOTTY); } } Index: head/sys/dev/cxgbe/t4_main.c =================================================================== --- head/sys/dev/cxgbe/t4_main.c (revision 331796) +++ head/sys/dev/cxgbe/t4_main.c (revision 331797) @@ -1,10442 +1,10442 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RSS #include #endif #if defined(__i386__) || defined(__amd64__) #include #include #include #include #endif #include #ifdef DDB #include #include #endif #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "cudbg/cudbg.h" #include "t4_ioctl.h" #include "t4_l2t.h" #include "t4_mp_ring.h" #include "t4_if.h" /* T4 bus driver interface */ static int t4_probe(device_t); static int t4_attach(device_t); static int t4_detach(device_t); static int t4_ready(device_t); static int t4_read_port_device(device_t, int, device_t *); static device_method_t t4_methods[] = { DEVMETHOD(device_probe, t4_probe), DEVMETHOD(device_attach, t4_attach), DEVMETHOD(device_detach, t4_detach), DEVMETHOD(t4_is_main_ready, t4_ready), DEVMETHOD(t4_read_port_device, t4_read_port_device), DEVMETHOD_END }; static driver_t t4_driver = { "t4nex", t4_methods, sizeof(struct adapter) }; /* T4 port (cxgbe) interface */ static int cxgbe_probe(device_t); static int cxgbe_attach(device_t); static int cxgbe_detach(device_t); device_method_t cxgbe_methods[] = { DEVMETHOD(device_probe, cxgbe_probe), DEVMETHOD(device_attach, cxgbe_attach), DEVMETHOD(device_detach, cxgbe_detach), { 0, 0 } }; static driver_t cxgbe_driver = { "cxgbe", cxgbe_methods, sizeof(struct port_info) }; /* T4 VI (vcxgbe) interface */ static int vcxgbe_probe(device_t); static int vcxgbe_attach(device_t); static int vcxgbe_detach(device_t); static device_method_t vcxgbe_methods[] = { DEVMETHOD(device_probe, vcxgbe_probe), DEVMETHOD(device_attach, vcxgbe_attach), DEVMETHOD(device_detach, vcxgbe_detach), { 0, 0 } }; static driver_t vcxgbe_driver = { "vcxgbe", vcxgbe_methods, sizeof(struct vi_info) }; static d_ioctl_t t4_ioctl; static struct cdevsw t4_cdevsw = { .d_version = D_VERSION, .d_ioctl = t4_ioctl, .d_name = "t4nex", }; /* T5 bus driver interface */ static int t5_probe(device_t); static device_method_t t5_methods[] = { DEVMETHOD(device_probe, t5_probe), DEVMETHOD(device_attach, t4_attach), DEVMETHOD(device_detach, t4_detach), DEVMETHOD(t4_is_main_ready, t4_ready), DEVMETHOD(t4_read_port_device, t4_read_port_device), DEVMETHOD_END }; static driver_t t5_driver = { "t5nex", t5_methods, sizeof(struct adapter) }; /* T5 port (cxl) interface */ static driver_t cxl_driver = { "cxl", cxgbe_methods, sizeof(struct port_info) }; /* T5 VI (vcxl) interface */ static driver_t vcxl_driver = { "vcxl", vcxgbe_methods, sizeof(struct vi_info) }; /* T6 bus driver interface */ static int t6_probe(device_t); static device_method_t t6_methods[] = { DEVMETHOD(device_probe, t6_probe), DEVMETHOD(device_attach, t4_attach), DEVMETHOD(device_detach, t4_detach), DEVMETHOD(t4_is_main_ready, t4_ready), DEVMETHOD(t4_read_port_device, t4_read_port_device), DEVMETHOD_END }; static driver_t t6_driver = { "t6nex", t6_methods, sizeof(struct adapter) }; /* T6 port (cc) interface */ static driver_t cc_driver = { "cc", cxgbe_methods, sizeof(struct port_info) }; /* T6 VI (vcc) interface */ static driver_t vcc_driver = { "vcc", vcxgbe_methods, sizeof(struct vi_info) }; /* ifnet + media interface */ static void cxgbe_init(void *); static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t); static int cxgbe_transmit(struct ifnet *, struct mbuf *); static void cxgbe_qflush(struct ifnet *); static int cxgbe_media_change(struct ifnet *); static void cxgbe_media_status(struct ifnet *, struct ifmediareq *); MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services"); /* * Correct lock order when you need to acquire multiple locks is t4_list_lock, * then ADAPTER_LOCK, then t4_uld_list_lock. */ static struct sx t4_list_lock; SLIST_HEAD(, adapter) t4_list; #ifdef TCP_OFFLOAD static struct sx t4_uld_list_lock; SLIST_HEAD(, uld_info) t4_uld_list; #endif /* * Tunables. See tweak_tunables() too. * * Each tunable is set to a default value here if it's known at compile-time. * Otherwise it is set to -n as an indication to tweak_tunables() that it should * provide a reasonable default (upto n) when the driver is loaded. * * Tunables applicable to both T4 and T5 are under hw.cxgbe. Those specific to * T5 are under hw.cxl. */ /* * Number of queues for tx and rx, NIC and offload. */ #define NTXQ 16 int t4_ntxq = -NTXQ; TUNABLE_INT("hw.cxgbe.ntxq", &t4_ntxq); TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq); /* Old name, undocumented */ #define NRXQ 8 int t4_nrxq = -NRXQ; TUNABLE_INT("hw.cxgbe.nrxq", &t4_nrxq); TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq); /* Old name, undocumented */ #define NTXQ_VI 1 static int t4_ntxq_vi = -NTXQ_VI; TUNABLE_INT("hw.cxgbe.ntxq_vi", &t4_ntxq_vi); #define NRXQ_VI 1 static int t4_nrxq_vi = -NRXQ_VI; TUNABLE_INT("hw.cxgbe.nrxq_vi", &t4_nrxq_vi); static int t4_rsrv_noflowq = 0; TUNABLE_INT("hw.cxgbe.rsrv_noflowq", &t4_rsrv_noflowq); #ifdef TCP_OFFLOAD #define NOFLDTXQ 8 static int t4_nofldtxq = -NOFLDTXQ; TUNABLE_INT("hw.cxgbe.nofldtxq", &t4_nofldtxq); #define NOFLDRXQ 2 static int t4_nofldrxq = -NOFLDRXQ; TUNABLE_INT("hw.cxgbe.nofldrxq", &t4_nofldrxq); #define NOFLDTXQ_VI 1 static int t4_nofldtxq_vi = -NOFLDTXQ_VI; TUNABLE_INT("hw.cxgbe.nofldtxq_vi", &t4_nofldtxq_vi); #define NOFLDRXQ_VI 1 static int t4_nofldrxq_vi = -NOFLDRXQ_VI; TUNABLE_INT("hw.cxgbe.nofldrxq_vi", &t4_nofldrxq_vi); #define TMR_IDX_OFLD 1 int t4_tmr_idx_ofld = TMR_IDX_OFLD; TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_ofld", &t4_tmr_idx_ofld); #define PKTC_IDX_OFLD (-1) int t4_pktc_idx_ofld = PKTC_IDX_OFLD; TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_ofld", &t4_pktc_idx_ofld); /* 0 means chip/fw default, non-zero number is value in microseconds */ static u_long t4_toe_keepalive_idle = 0; TUNABLE_ULONG("hw.cxgbe.toe.keepalive_idle", &t4_toe_keepalive_idle); /* 0 means chip/fw default, non-zero number is value in microseconds */ static u_long t4_toe_keepalive_interval = 0; TUNABLE_ULONG("hw.cxgbe.toe.keepalive_interval", &t4_toe_keepalive_interval); /* 0 means chip/fw default, non-zero number is # of keepalives before abort */ static int t4_toe_keepalive_count = 0; TUNABLE_INT("hw.cxgbe.toe.keepalive_count", &t4_toe_keepalive_count); /* 0 means chip/fw default, non-zero number is value in microseconds */ static u_long t4_toe_rexmt_min = 0; TUNABLE_ULONG("hw.cxgbe.toe.rexmt_min", &t4_toe_rexmt_min); /* 0 means chip/fw default, non-zero number is value in microseconds */ static u_long t4_toe_rexmt_max = 0; TUNABLE_ULONG("hw.cxgbe.toe.rexmt_max", &t4_toe_rexmt_max); /* 0 means chip/fw default, non-zero number is # of rexmt before abort */ static int t4_toe_rexmt_count = 0; TUNABLE_INT("hw.cxgbe.toe.rexmt_count", &t4_toe_rexmt_count); /* -1 means chip/fw default, other values are raw backoff values to use */ static int t4_toe_rexmt_backoff[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.0", &t4_toe_rexmt_backoff[0]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.1", &t4_toe_rexmt_backoff[1]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.2", &t4_toe_rexmt_backoff[2]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.3", &t4_toe_rexmt_backoff[3]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.4", &t4_toe_rexmt_backoff[4]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.5", &t4_toe_rexmt_backoff[5]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.6", &t4_toe_rexmt_backoff[6]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.7", &t4_toe_rexmt_backoff[7]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.8", &t4_toe_rexmt_backoff[8]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.9", &t4_toe_rexmt_backoff[9]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.10", &t4_toe_rexmt_backoff[10]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.11", &t4_toe_rexmt_backoff[11]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.12", &t4_toe_rexmt_backoff[12]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.13", &t4_toe_rexmt_backoff[13]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.14", &t4_toe_rexmt_backoff[14]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.15", &t4_toe_rexmt_backoff[15]); #endif #ifdef DEV_NETMAP #define NNMTXQ_VI 2 static int t4_nnmtxq_vi = -NNMTXQ_VI; TUNABLE_INT("hw.cxgbe.nnmtxq_vi", &t4_nnmtxq_vi); #define NNMRXQ_VI 2 static int t4_nnmrxq_vi = -NNMRXQ_VI; TUNABLE_INT("hw.cxgbe.nnmrxq_vi", &t4_nnmrxq_vi); #endif /* * Holdoff parameters for ports. */ #define TMR_IDX 1 int t4_tmr_idx = TMR_IDX; TUNABLE_INT("hw.cxgbe.holdoff_timer_idx", &t4_tmr_idx); TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx); /* Old name */ #define PKTC_IDX (-1) int t4_pktc_idx = PKTC_IDX; TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx", &t4_pktc_idx); TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx); /* Old name */ /* * Size (# of entries) of each tx and rx queue. */ unsigned int t4_qsize_txq = TX_EQ_QSIZE; TUNABLE_INT("hw.cxgbe.qsize_txq", &t4_qsize_txq); unsigned int t4_qsize_rxq = RX_IQ_QSIZE; TUNABLE_INT("hw.cxgbe.qsize_rxq", &t4_qsize_rxq); /* * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively). */ int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX; TUNABLE_INT("hw.cxgbe.interrupt_types", &t4_intr_types); /* * Configuration file. */ #define DEFAULT_CF "default" #define FLASH_CF "flash" #define UWIRE_CF "uwire" #define FPGA_CF "fpga" static char t4_cfg_file[32] = DEFAULT_CF; TUNABLE_STR("hw.cxgbe.config_file", t4_cfg_file, sizeof(t4_cfg_file)); /* * PAUSE settings (bit 0, 1 = rx_pause, tx_pause respectively). * rx_pause = 1 to heed incoming PAUSE frames, 0 to ignore them. * tx_pause = 1 to emit PAUSE frames when the rx FIFO reaches its high water * mark or when signalled to do so, 0 to never emit PAUSE. */ static int t4_pause_settings = PAUSE_TX | PAUSE_RX; TUNABLE_INT("hw.cxgbe.pause_settings", &t4_pause_settings); /* * Forward Error Correction settings (bit 0, 1, 2 = FEC_RS, FEC_BASER_RS, * FEC_RESERVED respectively). * -1 to run with the firmware default. * 0 to disable FEC. */ static int t4_fec = -1; TUNABLE_INT("hw.cxgbe.fec", &t4_fec); /* * Link autonegotiation. * -1 to run with the firmware default. * 0 to disable. * 1 to enable. */ static int t4_autoneg = -1; TUNABLE_INT("hw.cxgbe.autoneg", &t4_autoneg); /* * Firmware auto-install by driver during attach (0, 1, 2 = prohibited, allowed, * encouraged respectively). */ static unsigned int t4_fw_install = 1; TUNABLE_INT("hw.cxgbe.fw_install", &t4_fw_install); /* * ASIC features that will be used. Disable the ones you don't want so that the * chip resources aren't wasted on features that will not be used. */ static int t4_nbmcaps_allowed = 0; TUNABLE_INT("hw.cxgbe.nbmcaps_allowed", &t4_nbmcaps_allowed); static int t4_linkcaps_allowed = 0; /* No DCBX, PPP, etc. by default */ TUNABLE_INT("hw.cxgbe.linkcaps_allowed", &t4_linkcaps_allowed); static int t4_switchcaps_allowed = FW_CAPS_CONFIG_SWITCH_INGRESS | FW_CAPS_CONFIG_SWITCH_EGRESS; TUNABLE_INT("hw.cxgbe.switchcaps_allowed", &t4_switchcaps_allowed); static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC; TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed); static int t4_toecaps_allowed = -1; TUNABLE_INT("hw.cxgbe.toecaps_allowed", &t4_toecaps_allowed); static int t4_rdmacaps_allowed = -1; TUNABLE_INT("hw.cxgbe.rdmacaps_allowed", &t4_rdmacaps_allowed); static int t4_cryptocaps_allowed = -1; TUNABLE_INT("hw.cxgbe.cryptocaps_allowed", &t4_cryptocaps_allowed); static int t4_iscsicaps_allowed = -1; TUNABLE_INT("hw.cxgbe.iscsicaps_allowed", &t4_iscsicaps_allowed); static int t4_fcoecaps_allowed = 0; TUNABLE_INT("hw.cxgbe.fcoecaps_allowed", &t4_fcoecaps_allowed); static int t5_write_combine = 1; TUNABLE_INT("hw.cxl.write_combine", &t5_write_combine); static int t4_num_vis = 1; TUNABLE_INT("hw.cxgbe.num_vis", &t4_num_vis); /* * PCIe Relaxed Ordering. * -1: driver should figure out a good value. * 0: disable RO. * 1: enable RO. * 2: leave RO alone. */ static int pcie_relaxed_ordering = -1; TUNABLE_INT("hw.cxgbe.pcie_relaxed_ordering", &pcie_relaxed_ordering); /* Functions used by VIs to obtain unique MAC addresses for each VI. */ static int vi_mac_funcs[] = { FW_VI_FUNC_ETH, FW_VI_FUNC_OFLD, FW_VI_FUNC_IWARP, FW_VI_FUNC_OPENISCSI, FW_VI_FUNC_OPENFCOE, FW_VI_FUNC_FOISCSI, FW_VI_FUNC_FOFCOE, }; struct intrs_and_queues { uint16_t intr_type; /* INTx, MSI, or MSI-X */ uint16_t num_vis; /* number of VIs for each port */ uint16_t nirq; /* Total # of vectors */ uint16_t ntxq; /* # of NIC txq's for each port */ uint16_t nrxq; /* # of NIC rxq's for each port */ uint16_t nofldtxq; /* # of TOE txq's for each port */ uint16_t nofldrxq; /* # of TOE rxq's for each port */ /* The vcxgbe/vcxl interfaces use these and not the ones above. */ uint16_t ntxq_vi; /* # of NIC txq's */ uint16_t nrxq_vi; /* # of NIC rxq's */ uint16_t nofldtxq_vi; /* # of TOE txq's */ uint16_t nofldrxq_vi; /* # of TOE rxq's */ uint16_t nnmtxq_vi; /* # of netmap txq's */ uint16_t nnmrxq_vi; /* # of netmap rxq's */ }; struct filter_entry { uint32_t valid:1; /* filter allocated and valid */ uint32_t locked:1; /* filter is administratively locked */ uint32_t pending:1; /* filter action is pending firmware reply */ uint32_t smtidx:8; /* Source MAC Table index for smac */ struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ struct t4_filter_specification fs; }; static void setup_memwin(struct adapter *); static void position_memwin(struct adapter *, int, uint32_t); static int rw_via_memwin(struct adapter *, int, uint32_t, uint32_t *, int, int); static inline int read_via_memwin(struct adapter *, int, uint32_t, uint32_t *, int); static inline int write_via_memwin(struct adapter *, int, uint32_t, const uint32_t *, int); static int validate_mem_range(struct adapter *, uint32_t, int); static int fwmtype_to_hwmtype(int); static int validate_mt_off_len(struct adapter *, int, uint32_t, int, uint32_t *); static int fixup_devlog_params(struct adapter *); static int cfg_itype_and_nqueues(struct adapter *, struct intrs_and_queues *); static int prep_firmware(struct adapter *); static int partition_resources(struct adapter *, const struct firmware *, const char *); static int get_params__pre_init(struct adapter *); static int get_params__post_init(struct adapter *); static int set_params__post_init(struct adapter *); static void t4_set_desc(struct adapter *); static void build_medialist(struct port_info *, struct ifmedia *); static void init_l1cfg(struct port_info *); static int cxgbe_init_synchronized(struct vi_info *); static int cxgbe_uninit_synchronized(struct vi_info *); static void quiesce_txq(struct adapter *, struct sge_txq *); static void quiesce_wrq(struct adapter *, struct sge_wrq *); static void quiesce_iq(struct adapter *, struct sge_iq *); static void quiesce_fl(struct adapter *, struct sge_fl *); static int t4_alloc_irq(struct adapter *, struct irq *, int rid, driver_intr_t *, void *, char *); static int t4_free_irq(struct adapter *, struct irq *); static void get_regs(struct adapter *, struct t4_regdump *, uint8_t *); static void vi_refresh_stats(struct adapter *, struct vi_info *); static void cxgbe_refresh_stats(struct adapter *, struct port_info *); static void cxgbe_tick(void *); static void cxgbe_vlan_config(void *, struct ifnet *, uint16_t); static void cxgbe_sysctls(struct port_info *); static int sysctl_int_array(SYSCTL_HANDLER_ARGS); static int sysctl_bitfield(SYSCTL_HANDLER_ARGS); static int sysctl_btphy(SYSCTL_HANDLER_ARGS); static int sysctl_noflowq(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS); static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS); static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS); static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS); static int sysctl_fec(SYSCTL_HANDLER_ARGS); static int sysctl_autoneg(SYSCTL_HANDLER_ARGS); static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS); static int sysctl_temperature(SYSCTL_HANDLER_ARGS); #ifdef SBUF_DRAIN static int sysctl_cctrl(SYSCTL_HANDLER_ARGS); static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS); static int sysctl_cim_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS); static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS); static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS); static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS); static int sysctl_devlog(SYSCTL_HANDLER_ARGS); static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS); static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS); static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS); static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS); static int sysctl_meminfo(SYSCTL_HANDLER_ARGS); static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS); static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS); static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS); static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS); static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tids(SYSCTL_HANDLER_ARGS); static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS); static int sysctl_tp_la(SYSCTL_HANDLER_ARGS); static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS); static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS); static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tc_params(SYSCTL_HANDLER_ARGS); #endif #ifdef TCP_OFFLOAD static int sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS); static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS); static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS); static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS); static int sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS); static int sysctl_tp_backoff(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS); #endif static uint32_t fconf_iconf_to_mode(uint32_t, uint32_t); static uint32_t mode_to_fconf(uint32_t); static uint32_t mode_to_iconf(uint32_t); static int check_fspec_against_fconf_iconf(struct adapter *, struct t4_filter_specification *); static int get_filter_mode(struct adapter *, uint32_t *); static int set_filter_mode(struct adapter *, uint32_t); static inline uint64_t get_filter_hits(struct adapter *, uint32_t); static int get_filter(struct adapter *, struct t4_filter *); static int set_filter(struct adapter *, struct t4_filter *); static int del_filter(struct adapter *, struct t4_filter *); static void clear_filter(struct filter_entry *); static int set_filter_wr(struct adapter *, int); static int del_filter_wr(struct adapter *, int); static int set_tcb_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *); static int get_sge_context(struct adapter *, struct t4_sge_context *); static int load_fw(struct adapter *, struct t4_data *); static int load_cfg(struct adapter *, struct t4_data *); static int load_boot(struct adapter *, struct t4_bootrom *); static int load_bootcfg(struct adapter *, struct t4_data *); static int cudbg_dump(struct adapter *, struct t4_cudbg_dump *); static int read_card_mem(struct adapter *, int, struct t4_mem_range *); static int read_i2c(struct adapter *, struct t4_i2c_data *); #ifdef TCP_OFFLOAD static int toe_capability(struct vi_info *, int); #endif static int mod_event(module_t, int, void *); static int notify_siblings(device_t, int); struct { uint16_t device; char *desc; } t4_pciids[] = { {0xa000, "Chelsio Terminator 4 FPGA"}, {0x4400, "Chelsio T440-dbg"}, {0x4401, "Chelsio T420-CR"}, {0x4402, "Chelsio T422-CR"}, {0x4403, "Chelsio T440-CR"}, {0x4404, "Chelsio T420-BCH"}, {0x4405, "Chelsio T440-BCH"}, {0x4406, "Chelsio T440-CH"}, {0x4407, "Chelsio T420-SO"}, {0x4408, "Chelsio T420-CX"}, {0x4409, "Chelsio T420-BT"}, {0x440a, "Chelsio T404-BT"}, {0x440e, "Chelsio T440-LP-CR"}, }, t5_pciids[] = { {0xb000, "Chelsio Terminator 5 FPGA"}, {0x5400, "Chelsio T580-dbg"}, {0x5401, "Chelsio T520-CR"}, /* 2 x 10G */ {0x5402, "Chelsio T522-CR"}, /* 2 x 10G, 2 X 1G */ {0x5403, "Chelsio T540-CR"}, /* 4 x 10G */ {0x5407, "Chelsio T520-SO"}, /* 2 x 10G, nomem */ {0x5409, "Chelsio T520-BT"}, /* 2 x 10GBaseT */ {0x540a, "Chelsio T504-BT"}, /* 4 x 1G */ {0x540d, "Chelsio T580-CR"}, /* 2 x 40G */ {0x540e, "Chelsio T540-LP-CR"}, /* 4 x 10G */ {0x5410, "Chelsio T580-LP-CR"}, /* 2 x 40G */ {0x5411, "Chelsio T520-LL-CR"}, /* 2 x 10G */ {0x5412, "Chelsio T560-CR"}, /* 1 x 40G, 2 x 10G */ {0x5414, "Chelsio T580-LP-SO-CR"}, /* 2 x 40G, nomem */ {0x5415, "Chelsio T502-BT"}, /* 2 x 1G */ #ifdef notyet {0x5404, "Chelsio T520-BCH"}, {0x5405, "Chelsio T540-BCH"}, {0x5406, "Chelsio T540-CH"}, {0x5408, "Chelsio T520-CX"}, {0x540b, "Chelsio B520-SR"}, {0x540c, "Chelsio B504-BT"}, {0x540f, "Chelsio Amsterdam"}, {0x5413, "Chelsio T580-CHR"}, #endif }, t6_pciids[] = { {0xc006, "Chelsio Terminator 6 FPGA"}, /* T6 PE10K6 FPGA (PF0) */ {0x6400, "Chelsio T6-DBG-25"}, /* 2 x 10/25G, debug */ {0x6401, "Chelsio T6225-CR"}, /* 2 x 10/25G */ {0x6402, "Chelsio T6225-SO-CR"}, /* 2 x 10/25G, nomem */ {0x6403, "Chelsio T6425-CR"}, /* 4 x 10/25G */ {0x6404, "Chelsio T6425-SO-CR"}, /* 4 x 10/25G, nomem */ {0x6405, "Chelsio T6225-OCP-SO"}, /* 2 x 10/25G, nomem */ {0x6406, "Chelsio T62100-OCP-SO"}, /* 2 x 40/50/100G, nomem */ {0x6407, "Chelsio T62100-LP-CR"}, /* 2 x 40/50/100G */ {0x6408, "Chelsio T62100-SO-CR"}, /* 2 x 40/50/100G, nomem */ {0x6409, "Chelsio T6210-BT"}, /* 2 x 10GBASE-T */ {0x640d, "Chelsio T62100-CR"}, /* 2 x 40/50/100G */ {0x6410, "Chelsio T6-DBG-100"}, /* 2 x 40/50/100G, debug */ {0x6411, "Chelsio T6225-LL-CR"}, /* 2 x 10/25G */ {0x6414, "Chelsio T61100-OCP-SO"}, /* 1 x 40/50/100G, nomem */ {0x6415, "Chelsio T6201-BT"}, /* 2 x 1000BASE-T */ /* Custom */ {0x6480, "Chelsio T6225 80"}, {0x6481, "Chelsio T62100 81"}, {0x6484, "Chelsio T62100 84"}, }; #ifdef TCP_OFFLOAD /* * service_iq() has an iq and needs the fl. Offset of fl from the iq should be * exactly the same for both rxq and ofld_rxq. */ CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq)); CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl)); #endif CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE); static int t4_probe(device_t dev) { int i; uint16_t v = pci_get_vendor(dev); uint16_t d = pci_get_device(dev); uint8_t f = pci_get_function(dev); if (v != PCI_VENDOR_ID_CHELSIO) return (ENXIO); /* Attach only to PF0 of the FPGA */ if (d == 0xa000 && f != 0) return (ENXIO); for (i = 0; i < nitems(t4_pciids); i++) { if (d == t4_pciids[i].device) { device_set_desc(dev, t4_pciids[i].desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static int t5_probe(device_t dev) { int i; uint16_t v = pci_get_vendor(dev); uint16_t d = pci_get_device(dev); uint8_t f = pci_get_function(dev); if (v != PCI_VENDOR_ID_CHELSIO) return (ENXIO); /* Attach only to PF0 of the FPGA */ if (d == 0xb000 && f != 0) return (ENXIO); for (i = 0; i < nitems(t5_pciids); i++) { if (d == t5_pciids[i].device) { device_set_desc(dev, t5_pciids[i].desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static int t6_probe(device_t dev) { int i; uint16_t v = pci_get_vendor(dev); uint16_t d = pci_get_device(dev); if (v != PCI_VENDOR_ID_CHELSIO) return (ENXIO); for (i = 0; i < nitems(t6_pciids); i++) { if (d == t6_pciids[i].device) { device_set_desc(dev, t6_pciids[i].desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static void t5_attribute_workaround(device_t dev) { device_t root_port; uint32_t v; /* * The T5 chips do not properly echo the No Snoop and Relaxed * Ordering attributes when replying to a TLP from a Root * Port. As a workaround, find the parent Root Port and * disable No Snoop and Relaxed Ordering. Note that this * affects all devices under this root port. */ root_port = pci_find_pcie_root_port(dev); if (root_port == NULL) { device_printf(dev, "Unable to find parent root port\n"); return; } v = pcie_adjust_config(root_port, PCIER_DEVICE_CTL, PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE, 0, 2); if ((v & (PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE)) != 0) device_printf(dev, "Disabled No Snoop/Relaxed Ordering on %s\n", device_get_nameunit(root_port)); } static const struct devnames devnames[] = { { .nexus_name = "t4nex", .ifnet_name = "cxgbe", .vi_ifnet_name = "vcxgbe", .pf03_drv_name = "t4iov", .vf_nexus_name = "t4vf", .vf_ifnet_name = "cxgbev" }, { .nexus_name = "t5nex", .ifnet_name = "cxl", .vi_ifnet_name = "vcxl", .pf03_drv_name = "t5iov", .vf_nexus_name = "t5vf", .vf_ifnet_name = "cxlv" }, { .nexus_name = "t6nex", .ifnet_name = "cc", .vi_ifnet_name = "vcc", .pf03_drv_name = "t6iov", .vf_nexus_name = "t6vf", .vf_ifnet_name = "ccv" } }; void t4_init_devnames(struct adapter *sc) { int id; id = chip_id(sc); if (id >= CHELSIO_T4 && id - CHELSIO_T4 < nitems(devnames)) sc->names = &devnames[id - CHELSIO_T4]; else { device_printf(sc->dev, "chip id %d is not supported.\n", id); sc->names = NULL; } } static int t4_attach(device_t dev) { struct adapter *sc; int rc = 0, i, j, rqidx, tqidx, nports; struct make_dev_args mda; struct intrs_and_queues iaq; struct sge *s; uint32_t *buf; #ifdef TCP_OFFLOAD int ofld_rqidx, ofld_tqidx; #endif #ifdef DEV_NETMAP int nm_rqidx, nm_tqidx; #endif int num_vis; sc = device_get_softc(dev); sc->dev = dev; TUNABLE_INT_FETCH("hw.cxgbe.dflags", &sc->debug_flags); if ((pci_get_device(dev) & 0xff00) == 0x5400) t5_attribute_workaround(dev); pci_enable_busmaster(dev); if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) { uint32_t v; pci_set_max_read_req(dev, 4096); v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2); sc->params.pci.mps = 128 << ((v & PCIEM_CTL_MAX_PAYLOAD) >> 5); if (pcie_relaxed_ordering == 0 && (v | PCIEM_CTL_RELAXED_ORD_ENABLE) != 0) { v &= ~PCIEM_CTL_RELAXED_ORD_ENABLE; pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2); } else if (pcie_relaxed_ordering == 1 && (v & PCIEM_CTL_RELAXED_ORD_ENABLE) == 0) { v |= PCIEM_CTL_RELAXED_ORD_ENABLE; pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2); } } sc->sge_gts_reg = MYPF_REG(A_SGE_PF_GTS); sc->sge_kdoorbell_reg = MYPF_REG(A_SGE_PF_KDOORBELL); sc->traceq = -1; mtx_init(&sc->ifp_lock, sc->ifp_lockname, 0, MTX_DEF); snprintf(sc->ifp_lockname, sizeof(sc->ifp_lockname), "%s tracer", device_get_nameunit(dev)); snprintf(sc->lockname, sizeof(sc->lockname), "%s", device_get_nameunit(dev)); mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF); t4_add_adapter(sc); mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF); TAILQ_INIT(&sc->sfl); callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0); mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF); rc = t4_map_bars_0_and_4(sc); if (rc != 0) goto done; /* error message displayed already */ memset(sc->chan_map, 0xff, sizeof(sc->chan_map)); /* Prepare the adapter for operation. */ buf = malloc(PAGE_SIZE, M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_prep_adapter(sc, buf); free(buf, M_CXGBE); if (rc != 0) { device_printf(dev, "failed to prepare adapter: %d.\n", rc); goto done; } /* * This is the real PF# to which we're attaching. Works from within PCI * passthrough environments too, where pci_get_function() could return a * different PF# depending on the passthrough configuration. We need to * use the real PF# in all our communication with the firmware. */ j = t4_read_reg(sc, A_PL_WHOAMI); sc->pf = chip_id(sc) <= CHELSIO_T5 ? G_SOURCEPF(j) : G_T6_SOURCEPF(j); sc->mbox = sc->pf; t4_init_devnames(sc); if (sc->names == NULL) { rc = ENOTSUP; goto done; /* error message displayed already */ } /* * Do this really early, with the memory windows set up even before the * character device. The userland tool's register i/o and mem read * will work even in "recovery mode". */ setup_memwin(sc); if (t4_init_devlog_params(sc, 0) == 0) fixup_devlog_params(sc); make_dev_args_init(&mda); mda.mda_devsw = &t4_cdevsw; mda.mda_uid = UID_ROOT; mda.mda_gid = GID_WHEEL; mda.mda_mode = 0600; mda.mda_si_drv1 = sc; rc = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev)); if (rc != 0) device_printf(dev, "failed to create nexus char device: %d.\n", rc); /* Go no further if recovery mode has been requested. */ if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) { device_printf(dev, "recovery mode.\n"); goto done; } #if defined(__i386__) if ((cpu_feature & CPUID_CX8) == 0) { device_printf(dev, "64 bit atomics not available.\n"); rc = ENOTSUP; goto done; } #endif /* Prepare the firmware for operation */ rc = prep_firmware(sc); if (rc != 0) goto done; /* error message displayed already */ rc = get_params__post_init(sc); if (rc != 0) goto done; /* error message displayed already */ rc = set_params__post_init(sc); if (rc != 0) goto done; /* error message displayed already */ rc = t4_map_bar_2(sc); if (rc != 0) goto done; /* error message displayed already */ rc = t4_create_dma_tag(sc); if (rc != 0) goto done; /* error message displayed already */ /* * First pass over all the ports - allocate VIs and initialize some * basic parameters like mac address, port type, etc. */ for_each_port(sc, i) { struct port_info *pi; pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK); sc->port[i] = pi; /* These must be set before t4_port_init */ pi->adapter = sc; pi->port_id = i; /* * XXX: vi[0] is special so we can't delay this allocation until * pi->nvi's final value is known. */ pi->vi = malloc(sizeof(struct vi_info) * t4_num_vis, M_CXGBE, M_ZERO | M_WAITOK); /* * Allocate the "main" VI and initialize parameters * like mac addr. */ rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i); if (rc != 0) { device_printf(dev, "unable to initialize port %d: %d\n", i, rc); free(pi->vi, M_CXGBE); free(pi, M_CXGBE); sc->port[i] = NULL; goto done; } snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d", device_get_nameunit(dev), i); mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF); sc->chan_map[pi->tx_chan] = i; /* All VIs on this port share this media. */ ifmedia_init(&pi->media, IFM_IMASK, cxgbe_media_change, cxgbe_media_status); pi->dev = device_add_child(dev, sc->names->ifnet_name, -1); if (pi->dev == NULL) { device_printf(dev, "failed to add device for port %d.\n", i); rc = ENXIO; goto done; } pi->vi[0].dev = pi->dev; device_set_softc(pi->dev, pi); } /* * Interrupt type, # of interrupts, # of rx/tx queues, etc. */ nports = sc->params.nports; rc = cfg_itype_and_nqueues(sc, &iaq); if (rc != 0) goto done; /* error message displayed already */ num_vis = iaq.num_vis; sc->intr_type = iaq.intr_type; sc->intr_count = iaq.nirq; s = &sc->sge; s->nrxq = nports * iaq.nrxq; s->ntxq = nports * iaq.ntxq; if (num_vis > 1) { s->nrxq += nports * (num_vis - 1) * iaq.nrxq_vi; s->ntxq += nports * (num_vis - 1) * iaq.ntxq_vi; } s->neq = s->ntxq + s->nrxq; /* the free list in an rxq is an eq */ s->neq += nports + 1;/* ctrl queues: 1 per port + 1 mgmt */ s->niq = s->nrxq + 1; /* 1 extra for firmware event queue */ #ifdef TCP_OFFLOAD if (is_offload(sc)) { s->nofldrxq = nports * iaq.nofldrxq; s->nofldtxq = nports * iaq.nofldtxq; if (num_vis > 1) { s->nofldrxq += nports * (num_vis - 1) * iaq.nofldrxq_vi; s->nofldtxq += nports * (num_vis - 1) * iaq.nofldtxq_vi; } s->neq += s->nofldtxq + s->nofldrxq; s->niq += s->nofldrxq; s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq), M_CXGBE, M_ZERO | M_WAITOK); } #endif #ifdef DEV_NETMAP if (num_vis > 1) { s->nnmrxq = nports * (num_vis - 1) * iaq.nnmrxq_vi; s->nnmtxq = nports * (num_vis - 1) * iaq.nnmtxq_vi; } s->neq += s->nnmtxq + s->nnmrxq; s->niq += s->nnmrxq; s->nm_rxq = malloc(s->nnmrxq * sizeof(struct sge_nm_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->nm_txq = malloc(s->nnmtxq * sizeof(struct sge_nm_txq), M_CXGBE, M_ZERO | M_WAITOK); #endif s->ctrlq = malloc(nports * sizeof(struct sge_wrq), M_CXGBE, M_ZERO | M_WAITOK); s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE, M_ZERO | M_WAITOK); s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE, M_ZERO | M_WAITOK); s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE, M_ZERO | M_WAITOK); sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE, M_ZERO | M_WAITOK); t4_init_l2t(sc, M_WAITOK); t4_init_tx_sched(sc); /* * Second pass over the ports. This time we know the number of rx and * tx queues that each port should get. */ rqidx = tqidx = 0; #ifdef TCP_OFFLOAD ofld_rqidx = ofld_tqidx = 0; #endif #ifdef DEV_NETMAP nm_rqidx = nm_tqidx = 0; #endif for_each_port(sc, i) { struct port_info *pi = sc->port[i]; struct vi_info *vi; if (pi == NULL) continue; pi->nvi = num_vis; for_each_vi(pi, j, vi) { vi->pi = pi; vi->qsize_rxq = t4_qsize_rxq; vi->qsize_txq = t4_qsize_txq; vi->first_rxq = rqidx; vi->first_txq = tqidx; vi->tmr_idx = t4_tmr_idx; vi->pktc_idx = t4_pktc_idx; vi->nrxq = j == 0 ? iaq.nrxq : iaq.nrxq_vi; vi->ntxq = j == 0 ? iaq.ntxq : iaq.ntxq_vi; rqidx += vi->nrxq; tqidx += vi->ntxq; if (j == 0 && vi->ntxq > 1) vi->rsrv_noflowq = t4_rsrv_noflowq ? 1 : 0; else vi->rsrv_noflowq = 0; #ifdef TCP_OFFLOAD vi->ofld_tmr_idx = t4_tmr_idx_ofld; vi->ofld_pktc_idx = t4_pktc_idx_ofld; vi->first_ofld_rxq = ofld_rqidx; vi->first_ofld_txq = ofld_tqidx; vi->nofldrxq = j == 0 ? iaq.nofldrxq : iaq.nofldrxq_vi; vi->nofldtxq = j == 0 ? iaq.nofldtxq : iaq.nofldtxq_vi; ofld_rqidx += vi->nofldrxq; ofld_tqidx += vi->nofldtxq; #endif #ifdef DEV_NETMAP if (j > 0) { vi->first_nm_rxq = nm_rqidx; vi->first_nm_txq = nm_tqidx; vi->nnmrxq = iaq.nnmrxq_vi; vi->nnmtxq = iaq.nnmtxq_vi; nm_rqidx += vi->nnmrxq; nm_tqidx += vi->nnmtxq; } #endif } } rc = t4_setup_intr_handlers(sc); if (rc != 0) { device_printf(dev, "failed to setup interrupt handlers: %d\n", rc); goto done; } rc = bus_generic_probe(dev); if (rc != 0) { device_printf(dev, "failed to probe child drivers: %d\n", rc); goto done; } /* * Ensure thread-safe mailbox access (in debug builds). * * So far this was the only thread accessing the mailbox but various * ifnets and sysctls are about to be created and their handlers/ioctls * will access the mailbox from different threads. */ sc->flags |= CHK_MBOX_ACCESS; rc = bus_generic_attach(dev); if (rc != 0) { device_printf(dev, "failed to attach all child ports: %d\n", rc); goto done; } device_printf(dev, "PCIe gen%d x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n", sc->params.pci.speed, sc->params.pci.width, sc->params.nports, sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" : (sc->intr_type == INTR_MSI ? "MSI" : "INTx"), sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq); t4_set_desc(sc); notify_siblings(dev, 0); done: if (rc != 0 && sc->cdev) { /* cdev was created and so cxgbetool works; recover that way. */ device_printf(dev, "error during attach, adapter is now in recovery mode.\n"); rc = 0; } if (rc != 0) t4_detach_common(dev); else t4_sysctls(sc); return (rc); } static int t4_ready(device_t dev) { struct adapter *sc; sc = device_get_softc(dev); if (sc->flags & FW_OK) return (0); return (ENXIO); } static int t4_read_port_device(device_t dev, int port, device_t *child) { struct adapter *sc; struct port_info *pi; sc = device_get_softc(dev); if (port < 0 || port >= MAX_NPORTS) return (EINVAL); pi = sc->port[port]; if (pi == NULL || pi->dev == NULL) return (ENXIO); *child = pi->dev; return (0); } static int notify_siblings(device_t dev, int detaching) { device_t sibling; int error, i; error = 0; for (i = 0; i < PCI_FUNCMAX; i++) { if (i == pci_get_function(dev)) continue; sibling = pci_find_dbsf(pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev), i); if (sibling == NULL || !device_is_attached(sibling)) continue; if (detaching) error = T4_DETACH_CHILD(sibling); else (void)T4_ATTACH_CHILD(sibling); if (error) break; } return (error); } /* * Idempotent */ static int t4_detach(device_t dev) { struct adapter *sc; int rc; sc = device_get_softc(dev); rc = notify_siblings(dev, 1); if (rc) { device_printf(dev, "failed to detach sibling devices: %d\n", rc); return (rc); } return (t4_detach_common(dev)); } int t4_detach_common(device_t dev) { struct adapter *sc; struct port_info *pi; int i, rc; sc = device_get_softc(dev); sc->flags &= ~CHK_MBOX_ACCESS; if (sc->flags & FULL_INIT_DONE) { if (!(sc->flags & IS_VF)) t4_intr_disable(sc); } if (sc->cdev) { destroy_dev(sc->cdev); sc->cdev = NULL; } if (device_is_attached(dev)) { rc = bus_generic_detach(dev); if (rc) { device_printf(dev, "failed to detach child devices: %d\n", rc); return (rc); } } for (i = 0; i < sc->intr_count; i++) t4_free_irq(sc, &sc->irq[i]); if ((sc->flags & (IS_VF | FW_OK)) == FW_OK) t4_free_tx_sched(sc); for (i = 0; i < MAX_NPORTS; i++) { pi = sc->port[i]; if (pi) { t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid); if (pi->dev) device_delete_child(dev, pi->dev); mtx_destroy(&pi->pi_lock); free(pi->vi, M_CXGBE); free(pi, M_CXGBE); } } device_delete_children(dev); if (sc->flags & FULL_INIT_DONE) adapter_full_uninit(sc); if ((sc->flags & (IS_VF | FW_OK)) == FW_OK) t4_fw_bye(sc, sc->mbox); if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX) pci_release_msi(dev); if (sc->regs_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid, sc->regs_res); if (sc->udbs_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->udbs_rid, sc->udbs_res); if (sc->msix_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid, sc->msix_res); if (sc->l2t) t4_free_l2t(sc->l2t); #ifdef TCP_OFFLOAD free(sc->sge.ofld_rxq, M_CXGBE); free(sc->sge.ofld_txq, M_CXGBE); #endif #ifdef DEV_NETMAP free(sc->sge.nm_rxq, M_CXGBE); free(sc->sge.nm_txq, M_CXGBE); #endif free(sc->irq, M_CXGBE); free(sc->sge.rxq, M_CXGBE); free(sc->sge.txq, M_CXGBE); free(sc->sge.ctrlq, M_CXGBE); free(sc->sge.iqmap, M_CXGBE); free(sc->sge.eqmap, M_CXGBE); free(sc->tids.ftid_tab, M_CXGBE); free(sc->tt.tls_rx_ports, M_CXGBE); t4_destroy_dma_tag(sc); if (mtx_initialized(&sc->sc_lock)) { sx_xlock(&t4_list_lock); SLIST_REMOVE(&t4_list, sc, adapter, link); sx_xunlock(&t4_list_lock); mtx_destroy(&sc->sc_lock); } callout_drain(&sc->sfl_callout); if (mtx_initialized(&sc->tids.ftid_lock)) mtx_destroy(&sc->tids.ftid_lock); if (mtx_initialized(&sc->sfl_lock)) mtx_destroy(&sc->sfl_lock); if (mtx_initialized(&sc->ifp_lock)) mtx_destroy(&sc->ifp_lock); if (mtx_initialized(&sc->reg_lock)) mtx_destroy(&sc->reg_lock); for (i = 0; i < NUM_MEMWIN; i++) { struct memwin *mw = &sc->memwin[i]; if (rw_initialized(&mw->mw_lock)) rw_destroy(&mw->mw_lock); } bzero(sc, sizeof(*sc)); return (0); } static int cxgbe_probe(device_t dev) { char buf[128]; struct port_info *pi = device_get_softc(dev); snprintf(buf, sizeof(buf), "port %d", pi->port_id); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } #define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \ IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \ IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6 | IFCAP_HWSTATS) #define T4_CAP_ENABLE (T4_CAP) static int cxgbe_vi_attach(device_t dev, struct vi_info *vi) { struct ifnet *ifp; struct sbuf *sb; vi->xact_addr_filt = -1; callout_init(&vi->tick, 1); /* Allocate an ifnet and set it up */ ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "Cannot allocate ifnet\n"); return (ENOMEM); } vi->ifp = ifp; ifp->if_softc = vi; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = cxgbe_init; ifp->if_ioctl = cxgbe_ioctl; ifp->if_transmit = cxgbe_transmit; ifp->if_qflush = cxgbe_qflush; ifp->if_get_counter = cxgbe_get_counter; ifp->if_capabilities = T4_CAP; #ifdef TCP_OFFLOAD if (vi->nofldrxq != 0) ifp->if_capabilities |= IFCAP_TOE; #endif #ifdef DEV_NETMAP if (vi->nnmrxq != 0) ifp->if_capabilities |= IFCAP_NETMAP; #endif ifp->if_capenable = T4_CAP_ENABLE; ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO | CSUM_UDP_IPV6 | CSUM_TCP_IPV6; ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS; ifp->if_hw_tsomaxsegsize = 65536; vi->vlan_c = EVENTHANDLER_REGISTER(vlan_config, cxgbe_vlan_config, ifp, EVENTHANDLER_PRI_ANY); ether_ifattach(ifp, vi->hw_addr); #ifdef DEV_NETMAP if (ifp->if_capabilities & IFCAP_NETMAP) cxgbe_nm_attach(vi); #endif sb = sbuf_new_auto(); sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq); #ifdef TCP_OFFLOAD if (ifp->if_capabilities & IFCAP_TOE) sbuf_printf(sb, "; %d txq, %d rxq (TOE)", vi->nofldtxq, vi->nofldrxq); #endif #ifdef DEV_NETMAP if (ifp->if_capabilities & IFCAP_NETMAP) sbuf_printf(sb, "; %d txq, %d rxq (netmap)", vi->nnmtxq, vi->nnmrxq); #endif sbuf_finish(sb); device_printf(dev, "%s\n", sbuf_data(sb)); sbuf_delete(sb); vi_sysctls(vi); return (0); } static int cxgbe_attach(device_t dev) { struct port_info *pi = device_get_softc(dev); struct adapter *sc = pi->adapter; struct vi_info *vi; int i, rc; callout_init_mtx(&pi->tick, &pi->pi_lock, 0); rc = cxgbe_vi_attach(dev, &pi->vi[0]); if (rc) return (rc); for_each_vi(pi, i, vi) { if (i == 0) continue; vi->dev = device_add_child(dev, sc->names->vi_ifnet_name, -1); if (vi->dev == NULL) { device_printf(dev, "failed to add VI %d\n", i); continue; } device_set_softc(vi->dev, vi); } cxgbe_sysctls(pi); bus_generic_attach(dev); return (0); } static void cxgbe_vi_detach(struct vi_info *vi) { struct ifnet *ifp = vi->ifp; ether_ifdetach(ifp); if (vi->vlan_c) EVENTHANDLER_DEREGISTER(vlan_config, vi->vlan_c); /* Let detach proceed even if these fail. */ #ifdef DEV_NETMAP if (ifp->if_capabilities & IFCAP_NETMAP) cxgbe_nm_detach(vi); #endif cxgbe_uninit_synchronized(vi); callout_drain(&vi->tick); vi_full_uninit(vi); if_free(vi->ifp); vi->ifp = NULL; } static int cxgbe_detach(device_t dev) { struct port_info *pi = device_get_softc(dev); struct adapter *sc = pi->adapter; int rc; /* Detach the extra VIs first. */ rc = bus_generic_detach(dev); if (rc) return (rc); device_delete_children(dev); doom_vi(sc, &pi->vi[0]); if (pi->flags & HAS_TRACEQ) { sc->traceq = -1; /* cloner should not create ifnet */ t4_tracer_port_detach(sc); } cxgbe_vi_detach(&pi->vi[0]); callout_drain(&pi->tick); ifmedia_removeall(&pi->media); end_synchronized_op(sc, 0); return (0); } static void cxgbe_init(void *arg) { struct vi_info *vi = arg; struct adapter *sc = vi->pi->adapter; if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0) return; cxgbe_init_synchronized(vi); end_synchronized_op(sc, 0); } static int cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data) { int rc = 0, mtu, flags, can_sleep; struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct ifreq *ifr = (struct ifreq *)data; uint32_t mask; switch (cmd) { case SIOCSIFMTU: mtu = ifr->ifr_mtu; if (mtu < ETHERMIN || mtu > MAX_MTU) return (EINVAL); rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4mtu"); if (rc) return (rc); ifp->if_mtu = mtu; if (vi->flags & VI_INIT_DONE) { t4_update_fl_bufsize(ifp); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(ifp, XGMAC_MTU); } end_synchronized_op(sc, 0); break; case SIOCSIFFLAGS: can_sleep = 0; redo_sifflags: rc = begin_synchronized_op(sc, vi, can_sleep ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4flg"); if (rc) return (rc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { flags = vi->if_flags; if ((ifp->if_flags ^ flags) & (IFF_PROMISC | IFF_ALLMULTI)) { if (can_sleep == 1) { end_synchronized_op(sc, 0); can_sleep = 0; goto redo_sifflags; } rc = update_mac_settings(ifp, XGMAC_PROMISC | XGMAC_ALLMULTI); } } else { if (can_sleep == 0) { end_synchronized_op(sc, LOCK_HELD); can_sleep = 1; goto redo_sifflags; } rc = cxgbe_init_synchronized(vi); } vi->if_flags = ifp->if_flags; } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if (can_sleep == 0) { end_synchronized_op(sc, LOCK_HELD); can_sleep = 1; goto redo_sifflags; } rc = cxgbe_uninit_synchronized(vi); } end_synchronized_op(sc, can_sleep ? 0 : LOCK_HELD); break; case SIOCADDMULTI: case SIOCDELMULTI: /* these two are called with a mutex held :-( */ rc = begin_synchronized_op(sc, vi, HOLD_LOCK, "t4multi"); if (rc) return (rc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(ifp, XGMAC_MCADDRS); end_synchronized_op(sc, LOCK_HELD); break; case SIOCSIFCAP: rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4cap"); if (rc) return (rc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (IFCAP_TSO4 & ifp->if_capenable && !(IFCAP_TXCSUM & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO4; if_printf(ifp, "tso4 disabled due to -txcsum.\n"); } } if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); if (IFCAP_TSO6 & ifp->if_capenable && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO6; if_printf(ifp, "tso6 disabled due to -txcsum6.\n"); } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; /* * Note that we leave CSUM_TSO alone (it is always set). The * kernel takes both IFCAP_TSOx and CSUM_TSO into account before * sending a TSO request our way, so it's sufficient to toggle * IFCAP_TSOx only. */ if (mask & IFCAP_TSO4) { if (!(IFCAP_TSO4 & ifp->if_capenable) && !(IFCAP_TXCSUM & ifp->if_capenable)) { if_printf(ifp, "enable txcsum first.\n"); rc = EAGAIN; goto fail; } ifp->if_capenable ^= IFCAP_TSO4; } if (mask & IFCAP_TSO6) { if (!(IFCAP_TSO6 & ifp->if_capenable) && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { if_printf(ifp, "enable txcsum6 first.\n"); rc = EAGAIN; goto fail; } ifp->if_capenable ^= IFCAP_TSO6; } if (mask & IFCAP_LRO) { #if defined(INET) || defined(INET6) int i; struct sge_rxq *rxq; ifp->if_capenable ^= IFCAP_LRO; for_each_rxq(vi, i, rxq) { if (ifp->if_capenable & IFCAP_LRO) rxq->iq.flags |= IQ_LRO_ENABLED; else rxq->iq.flags &= ~IQ_LRO_ENABLED; } #endif } #ifdef TCP_OFFLOAD if (mask & IFCAP_TOE) { int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE; rc = toe_capability(vi, enable); if (rc != 0) goto fail; ifp->if_capenable ^= mask; } #endif if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(ifp, XGMAC_VLANEX); } if (mask & IFCAP_VLAN_MTU) { ifp->if_capenable ^= IFCAP_VLAN_MTU; /* Need to find out how to disable auto-mtu-inflation */ } if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (mask & IFCAP_VLAN_HWCSUM) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; #ifdef VLAN_CAPABILITIES VLAN_CAPABILITIES(ifp); #endif fail: end_synchronized_op(sc, 0); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: case SIOCGIFXMEDIA: ifmedia_ioctl(ifp, ifr, &pi->media, cmd); break; case SIOCGI2C: { struct ifi2creq i2c; - rc = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); + rc = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (rc != 0) break; if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { rc = EPERM; break; } if (i2c.len > sizeof(i2c.data)) { rc = EINVAL; break; } rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4i2c"); if (rc) return (rc); rc = -t4_i2c_rd(sc, sc->mbox, pi->port_id, i2c.dev_addr, i2c.offset, i2c.len, &i2c.data[0]); end_synchronized_op(sc, 0); if (rc == 0) - rc = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); + rc = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c)); break; } default: rc = ether_ioctl(ifp, cmd, data); } return (rc); } static int cxgbe_transmit(struct ifnet *ifp, struct mbuf *m) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct sge_txq *txq; void *items[1]; int rc; M_ASSERTPKTHDR(m); MPASS(m->m_nextpkt == NULL); /* not quite ready for this yet */ if (__predict_false(pi->link_cfg.link_ok == 0)) { m_freem(m); return (ENETDOWN); } rc = parse_pkt(sc, &m); if (__predict_false(rc != 0)) { MPASS(m == NULL); /* was freed already */ atomic_add_int(&pi->tx_parse_error, 1); /* rare, atomic is ok */ return (rc); } /* Select a txq. */ txq = &sc->sge.txq[vi->first_txq]; if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) + vi->rsrv_noflowq); items[0] = m; rc = mp_ring_enqueue(txq->r, items, 1, 4096); if (__predict_false(rc != 0)) m_freem(m); return (rc); } static void cxgbe_qflush(struct ifnet *ifp) { struct vi_info *vi = ifp->if_softc; struct sge_txq *txq; int i; /* queues do not exist if !VI_INIT_DONE. */ if (vi->flags & VI_INIT_DONE) { for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags |= EQ_QFLUSH; TXQ_UNLOCK(txq); while (!mp_ring_is_idle(txq->r)) { mp_ring_check_drainage(txq->r, 0); pause("qflush", 1); } TXQ_LOCK(txq); txq->eq.flags &= ~EQ_QFLUSH; TXQ_UNLOCK(txq); } } if_qflush(ifp); } static uint64_t vi_get_counter(struct ifnet *ifp, ift_counter c) { struct vi_info *vi = ifp->if_softc; struct fw_vi_stats_vf *s = &vi->stats; vi_refresh_stats(vi->pi->adapter, vi); switch (c) { case IFCOUNTER_IPACKETS: return (s->rx_bcast_frames + s->rx_mcast_frames + s->rx_ucast_frames); case IFCOUNTER_IERRORS: return (s->rx_err_frames); case IFCOUNTER_OPACKETS: return (s->tx_bcast_frames + s->tx_mcast_frames + s->tx_ucast_frames + s->tx_offload_frames); case IFCOUNTER_OERRORS: return (s->tx_drop_frames); case IFCOUNTER_IBYTES: return (s->rx_bcast_bytes + s->rx_mcast_bytes + s->rx_ucast_bytes); case IFCOUNTER_OBYTES: return (s->tx_bcast_bytes + s->tx_mcast_bytes + s->tx_ucast_bytes + s->tx_offload_bytes); case IFCOUNTER_IMCASTS: return (s->rx_mcast_frames); case IFCOUNTER_OMCASTS: return (s->tx_mcast_frames); case IFCOUNTER_OQDROPS: { uint64_t drops; drops = 0; if (vi->flags & VI_INIT_DONE) { int i; struct sge_txq *txq; for_each_txq(vi, i, txq) drops += counter_u64_fetch(txq->r->drops); } return (drops); } default: return (if_get_counter_default(ifp, c)); } } uint64_t cxgbe_get_counter(struct ifnet *ifp, ift_counter c) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct port_stats *s = &pi->stats; if (pi->nvi > 1 || sc->flags & IS_VF) return (vi_get_counter(ifp, c)); cxgbe_refresh_stats(sc, pi); switch (c) { case IFCOUNTER_IPACKETS: return (s->rx_frames); case IFCOUNTER_IERRORS: return (s->rx_jabber + s->rx_runt + s->rx_too_long + s->rx_fcs_err + s->rx_len_err); case IFCOUNTER_OPACKETS: return (s->tx_frames); case IFCOUNTER_OERRORS: return (s->tx_error_frames); case IFCOUNTER_IBYTES: return (s->rx_octets); case IFCOUNTER_OBYTES: return (s->tx_octets); case IFCOUNTER_IMCASTS: return (s->rx_mcast_frames); case IFCOUNTER_OMCASTS: return (s->tx_mcast_frames); case IFCOUNTER_IQDROPS: return (s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 + s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 + s->rx_trunc3 + pi->tnl_cong_drops); case IFCOUNTER_OQDROPS: { uint64_t drops; drops = s->tx_drop; if (vi->flags & VI_INIT_DONE) { int i; struct sge_txq *txq; for_each_txq(vi, i, txq) drops += counter_u64_fetch(txq->r->drops); } return (drops); } default: return (if_get_counter_default(ifp, c)); } } static int cxgbe_media_change(struct ifnet *ifp) { struct vi_info *vi = ifp->if_softc; device_printf(vi->dev, "%s unimplemented.\n", __func__); return (EOPNOTSUPP); } static void cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct ifmedia_entry *cur; struct link_config *lc = &pi->link_cfg; /* * If all the interfaces are administratively down the firmware does not * report transceiver changes. Refresh port info here so that ifconfig * displays accurate information at all times. */ if (begin_synchronized_op(pi->adapter, NULL, SLEEP_OK | INTR_OK, "t4med") == 0) { PORT_LOCK(pi); if (pi->up_vis == 0) { t4_update_port_info(pi); build_medialist(pi, &pi->media); } PORT_UNLOCK(pi); end_synchronized_op(pi->adapter, 0); } ifmr->ifm_status = IFM_AVALID; if (lc->link_ok == 0) return; ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active &= ~(IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE); if (lc->fc & PAUSE_RX) ifmr->ifm_active |= IFM_ETH_RXPAUSE; if (lc->fc & PAUSE_TX) ifmr->ifm_active |= IFM_ETH_TXPAUSE; /* active and current will differ iff current media is autoselect. */ cur = pi->media.ifm_cur; if (cur != NULL && IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO) return; ifmr->ifm_active = IFM_ETHER | IFM_FDX; if (lc->fc & PAUSE_RX) ifmr->ifm_active |= IFM_ETH_RXPAUSE; if (lc->fc & PAUSE_TX) ifmr->ifm_active |= IFM_ETH_TXPAUSE; switch (lc->speed) { case 10000: ifmr->ifm_active |= IFM_10G_T; break; case 1000: ifmr->ifm_active |= IFM_1000_T; break; case 100: ifmr->ifm_active |= IFM_100_TX; break; case 10: ifmr->ifm_active |= IFM_10_T; break; default: device_printf(vi->dev, "link up but speed unknown (%u)\n", lc->speed); } } static int vcxgbe_probe(device_t dev) { char buf[128]; struct vi_info *vi = device_get_softc(dev); snprintf(buf, sizeof(buf), "port %d vi %td", vi->pi->port_id, vi - vi->pi->vi); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } static int alloc_extra_vi(struct adapter *sc, struct port_info *pi, struct vi_info *vi) { int func, index, rc; uint32_t param, val; ASSERT_SYNCHRONIZED_OP(sc); index = vi - pi->vi; MPASS(index > 0); /* This function deals with _extra_ VIs only */ KASSERT(index < nitems(vi_mac_funcs), ("%s: VI %s doesn't have a MAC func", __func__, device_get_nameunit(vi->dev))); func = vi_mac_funcs[index]; rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1, vi->hw_addr, &vi->rss_size, func, 0); if (rc < 0) { device_printf(vi->dev, "failed to allocate virtual interface %d" "for port %d: %d\n", index, pi->port_id, -rc); return (-rc); } vi->viid = rc; if (chip_id(sc) <= CHELSIO_T5) vi->smt_idx = (rc & 0x7f) << 1; else vi->smt_idx = (rc & 0x7f); if (vi->rss_size == 1) { /* * This VI didn't get a slice of the RSS table. Reduce the * number of VIs being created (hw.cxgbe.num_vis) or modify the * configuration file (nvi, rssnvi for this PF) if this is a * problem. */ device_printf(vi->dev, "RSS table not available.\n"); vi->rss_base = 0xffff; return (0); } param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) | V_FW_PARAMS_PARAM_YZ(vi->viid); rc = t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); if (rc) vi->rss_base = 0xffff; else { MPASS((val >> 16) == vi->rss_size); vi->rss_base = val & 0xffff; } return (0); } static int vcxgbe_attach(device_t dev) { struct vi_info *vi; struct port_info *pi; struct adapter *sc; int rc; vi = device_get_softc(dev); pi = vi->pi; sc = pi->adapter; rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4via"); if (rc) return (rc); rc = alloc_extra_vi(sc, pi, vi); end_synchronized_op(sc, 0); if (rc) return (rc); rc = cxgbe_vi_attach(dev, vi); if (rc) { t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid); return (rc); } return (0); } static int vcxgbe_detach(device_t dev) { struct vi_info *vi; struct adapter *sc; vi = device_get_softc(dev); sc = vi->pi->adapter; doom_vi(sc, vi); cxgbe_vi_detach(vi); t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid); end_synchronized_op(sc, 0); return (0); } void t4_fatal_err(struct adapter *sc) { t4_set_reg_field(sc, A_SGE_CONTROL, F_GLOBALENABLE, 0); t4_intr_disable(sc); log(LOG_EMERG, "%s: encountered fatal error, adapter stopped.\n", device_get_nameunit(sc->dev)); } void t4_add_adapter(struct adapter *sc) { sx_xlock(&t4_list_lock); SLIST_INSERT_HEAD(&t4_list, sc, link); sx_xunlock(&t4_list_lock); } int t4_map_bars_0_and_4(struct adapter *sc) { sc->regs_rid = PCIR_BAR(0); sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->regs_rid, RF_ACTIVE); if (sc->regs_res == NULL) { device_printf(sc->dev, "cannot map registers.\n"); return (ENXIO); } sc->bt = rman_get_bustag(sc->regs_res); sc->bh = rman_get_bushandle(sc->regs_res); sc->mmio_len = rman_get_size(sc->regs_res); setbit(&sc->doorbells, DOORBELL_KDB); sc->msix_rid = PCIR_BAR(4); sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->msix_rid, RF_ACTIVE); if (sc->msix_res == NULL) { device_printf(sc->dev, "cannot map MSI-X BAR.\n"); return (ENXIO); } return (0); } int t4_map_bar_2(struct adapter *sc) { /* * T4: only iWARP driver uses the userspace doorbells. There is no need * to map it if RDMA is disabled. */ if (is_t4(sc) && sc->rdmacaps == 0) return (0); sc->udbs_rid = PCIR_BAR(2); sc->udbs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->udbs_rid, RF_ACTIVE); if (sc->udbs_res == NULL) { device_printf(sc->dev, "cannot map doorbell BAR.\n"); return (ENXIO); } sc->udbs_base = rman_get_virtual(sc->udbs_res); if (chip_id(sc) >= CHELSIO_T5) { setbit(&sc->doorbells, DOORBELL_UDB); #if defined(__i386__) || defined(__amd64__) if (t5_write_combine) { int rc, mode; /* * Enable write combining on BAR2. This is the * userspace doorbell BAR and is split into 128B * (UDBS_SEG_SIZE) doorbell regions, each associated * with an egress queue. The first 64B has the doorbell * and the second 64B can be used to submit a tx work * request with an implicit doorbell. */ rc = pmap_change_attr((vm_offset_t)sc->udbs_base, rman_get_size(sc->udbs_res), PAT_WRITE_COMBINING); if (rc == 0) { clrbit(&sc->doorbells, DOORBELL_UDB); setbit(&sc->doorbells, DOORBELL_WCWR); setbit(&sc->doorbells, DOORBELL_UDBWC); } else { t5_write_combine = 0; device_printf(sc->dev, "couldn't enable write combining: %d\n", rc); } mode = is_t5(sc) ? V_STATMODE(0) : V_T6_STATMODE(0); t4_write_reg(sc, A_SGE_STAT_CFG, V_STATSOURCE_T5(7) | mode); } #else t5_write_combine = 0; #endif sc->iwt.wc_en = t5_write_combine; } return (0); } struct memwin_init { uint32_t base; uint32_t aperture; }; static const struct memwin_init t4_memwin[NUM_MEMWIN] = { { MEMWIN0_BASE, MEMWIN0_APERTURE }, { MEMWIN1_BASE, MEMWIN1_APERTURE }, { MEMWIN2_BASE_T4, MEMWIN2_APERTURE_T4 } }; static const struct memwin_init t5_memwin[NUM_MEMWIN] = { { MEMWIN0_BASE, MEMWIN0_APERTURE }, { MEMWIN1_BASE, MEMWIN1_APERTURE }, { MEMWIN2_BASE_T5, MEMWIN2_APERTURE_T5 }, }; static void setup_memwin(struct adapter *sc) { const struct memwin_init *mw_init; struct memwin *mw; int i; uint32_t bar0; if (is_t4(sc)) { /* * Read low 32b of bar0 indirectly via the hardware backdoor * mechanism. Works from within PCI passthrough environments * too, where rman_get_start() can return a different value. We * need to program the T4 memory window decoders with the actual * addresses that will be coming across the PCIe link. */ bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0)); bar0 &= (uint32_t) PCIM_BAR_MEM_BASE; mw_init = &t4_memwin[0]; } else { /* T5+ use the relative offset inside the PCIe BAR */ bar0 = 0; mw_init = &t5_memwin[0]; } for (i = 0, mw = &sc->memwin[0]; i < NUM_MEMWIN; i++, mw_init++, mw++) { rw_init(&mw->mw_lock, "memory window access"); mw->mw_base = mw_init->base; mw->mw_aperture = mw_init->aperture; mw->mw_curpos = 0; t4_write_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, i), (mw->mw_base + bar0) | V_BIR(0) | V_WINDOW(ilog2(mw->mw_aperture) - 10)); rw_wlock(&mw->mw_lock); position_memwin(sc, i, 0); rw_wunlock(&mw->mw_lock); } /* flush */ t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2)); } /* * Positions the memory window at the given address in the card's address space. * There are some alignment requirements and the actual position may be at an * address prior to the requested address. mw->mw_curpos always has the actual * position of the window. */ static void position_memwin(struct adapter *sc, int idx, uint32_t addr) { struct memwin *mw; uint32_t pf; uint32_t reg; MPASS(idx >= 0 && idx < NUM_MEMWIN); mw = &sc->memwin[idx]; rw_assert(&mw->mw_lock, RA_WLOCKED); if (is_t4(sc)) { pf = 0; mw->mw_curpos = addr & ~0xf; /* start must be 16B aligned */ } else { pf = V_PFNUM(sc->pf); mw->mw_curpos = addr & ~0x7f; /* start must be 128B aligned */ } reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, idx); t4_write_reg(sc, reg, mw->mw_curpos | pf); t4_read_reg(sc, reg); /* flush */ } static int rw_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val, int len, int rw) { struct memwin *mw; uint32_t mw_end, v; MPASS(idx >= 0 && idx < NUM_MEMWIN); /* Memory can only be accessed in naturally aligned 4 byte units */ if (addr & 3 || len & 3 || len <= 0) return (EINVAL); mw = &sc->memwin[idx]; while (len > 0) { rw_rlock(&mw->mw_lock); mw_end = mw->mw_curpos + mw->mw_aperture; if (addr >= mw_end || addr < mw->mw_curpos) { /* Will need to reposition the window */ if (!rw_try_upgrade(&mw->mw_lock)) { rw_runlock(&mw->mw_lock); rw_wlock(&mw->mw_lock); } rw_assert(&mw->mw_lock, RA_WLOCKED); position_memwin(sc, idx, addr); rw_downgrade(&mw->mw_lock); mw_end = mw->mw_curpos + mw->mw_aperture; } rw_assert(&mw->mw_lock, RA_RLOCKED); while (addr < mw_end && len > 0) { if (rw == 0) { v = t4_read_reg(sc, mw->mw_base + addr - mw->mw_curpos); *val++ = le32toh(v); } else { v = *val++; t4_write_reg(sc, mw->mw_base + addr - mw->mw_curpos, htole32(v)); } addr += 4; len -= 4; } rw_runlock(&mw->mw_lock); } return (0); } static inline int read_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val, int len) { return (rw_via_memwin(sc, idx, addr, val, len, 0)); } static inline int write_via_memwin(struct adapter *sc, int idx, uint32_t addr, const uint32_t *val, int len) { return (rw_via_memwin(sc, idx, addr, (void *)(uintptr_t)val, len, 1)); } static int t4_range_cmp(const void *a, const void *b) { return ((const struct t4_range *)a)->start - ((const struct t4_range *)b)->start; } /* * Verify that the memory range specified by the addr/len pair is valid within * the card's address space. */ static int validate_mem_range(struct adapter *sc, uint32_t addr, int len) { struct t4_range mem_ranges[4], *r, *next; uint32_t em, addr_len; int i, n, remaining; /* Memory can only be accessed in naturally aligned 4 byte units */ if (addr & 3 || len & 3 || len <= 0) return (EINVAL); /* Enabled memories */ em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); r = &mem_ranges[0]; n = 0; bzero(r, sizeof(mem_ranges)); if (em & F_EDRAM0_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR); r->size = G_EDRAM0_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EDRAM0_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } if (em & F_EDRAM1_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR); r->size = G_EDRAM1_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EDRAM1_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } if (em & F_EXT_MEM_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); r->size = G_EXT_MEM_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EXT_MEM_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } if (is_t5(sc) && em & F_EXT_MEM1_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); r->size = G_EXT_MEM1_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EXT_MEM1_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } MPASS(n <= nitems(mem_ranges)); if (n > 1) { /* Sort and merge the ranges. */ qsort(mem_ranges, n, sizeof(struct t4_range), t4_range_cmp); /* Start from index 0 and examine the next n - 1 entries. */ r = &mem_ranges[0]; for (remaining = n - 1; remaining > 0; remaining--, r++) { MPASS(r->size > 0); /* r is a valid entry. */ next = r + 1; MPASS(next->size > 0); /* and so is the next one. */ while (r->start + r->size >= next->start) { /* Merge the next one into the current entry. */ r->size = max(r->start + r->size, next->start + next->size) - r->start; n--; /* One fewer entry in total. */ if (--remaining == 0) goto done; /* short circuit */ next++; } if (next != r + 1) { /* * Some entries were merged into r and next * points to the first valid entry that couldn't * be merged. */ MPASS(next->size > 0); /* must be valid */ memcpy(r + 1, next, remaining * sizeof(*r)); #ifdef INVARIANTS /* * This so that the foo->size assertion in the * next iteration of the loop do the right * thing for entries that were pulled up and are * no longer valid. */ MPASS(n < nitems(mem_ranges)); bzero(&mem_ranges[n], (nitems(mem_ranges) - n) * sizeof(struct t4_range)); #endif } } done: /* Done merging the ranges. */ MPASS(n > 0); r = &mem_ranges[0]; for (i = 0; i < n; i++, r++) { if (addr >= r->start && addr + len <= r->start + r->size) return (0); } } return (EFAULT); } static int fwmtype_to_hwmtype(int mtype) { switch (mtype) { case FW_MEMTYPE_EDC0: return (MEM_EDC0); case FW_MEMTYPE_EDC1: return (MEM_EDC1); case FW_MEMTYPE_EXTMEM: return (MEM_MC0); case FW_MEMTYPE_EXTMEM1: return (MEM_MC1); default: panic("%s: cannot translate fw mtype %d.", __func__, mtype); } } /* * Verify that the memory range specified by the memtype/offset/len pair is * valid and lies entirely within the memtype specified. The global address of * the start of the range is returned in addr. */ static int validate_mt_off_len(struct adapter *sc, int mtype, uint32_t off, int len, uint32_t *addr) { uint32_t em, addr_len, maddr; /* Memory can only be accessed in naturally aligned 4 byte units */ if (off & 3 || len & 3 || len == 0) return (EINVAL); em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); switch (fwmtype_to_hwmtype(mtype)) { case MEM_EDC0: if (!(em & F_EDRAM0_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR); maddr = G_EDRAM0_BASE(addr_len) << 20; break; case MEM_EDC1: if (!(em & F_EDRAM1_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR); maddr = G_EDRAM1_BASE(addr_len) << 20; break; case MEM_MC: if (!(em & F_EXT_MEM_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); maddr = G_EXT_MEM_BASE(addr_len) << 20; break; case MEM_MC1: if (!is_t5(sc) || !(em & F_EXT_MEM1_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); maddr = G_EXT_MEM1_BASE(addr_len) << 20; break; default: return (EINVAL); } *addr = maddr + off; /* global address */ return (validate_mem_range(sc, *addr, len)); } static int fixup_devlog_params(struct adapter *sc) { struct devlog_params *dparams = &sc->params.devlog; int rc; rc = validate_mt_off_len(sc, dparams->memtype, dparams->start, dparams->size, &dparams->addr); return (rc); } static void update_nirq(struct intrs_and_queues *iaq, int nports) { int extra = T4_EXTRA_INTR; iaq->nirq = extra; iaq->nirq += nports * (iaq->nrxq + iaq->nofldrxq); iaq->nirq += nports * (iaq->num_vis - 1) * max(iaq->nrxq_vi, iaq->nnmrxq_vi); iaq->nirq += nports * (iaq->num_vis - 1) * iaq->nofldrxq_vi; } /* * Adjust requirements to fit the number of interrupts available. */ static void calculate_iaq(struct adapter *sc, struct intrs_and_queues *iaq, int itype, int navail) { int old_nirq; const int nports = sc->params.nports; MPASS(nports > 0); MPASS(navail > 0); bzero(iaq, sizeof(*iaq)); iaq->intr_type = itype; iaq->num_vis = t4_num_vis; iaq->ntxq = t4_ntxq; iaq->ntxq_vi = t4_ntxq_vi; iaq->nrxq = t4_nrxq; iaq->nrxq_vi = t4_nrxq_vi; #ifdef TCP_OFFLOAD if (is_offload(sc)) { iaq->nofldtxq = t4_nofldtxq; iaq->nofldtxq_vi = t4_nofldtxq_vi; iaq->nofldrxq = t4_nofldrxq; iaq->nofldrxq_vi = t4_nofldrxq_vi; } #endif #ifdef DEV_NETMAP iaq->nnmtxq_vi = t4_nnmtxq_vi; iaq->nnmrxq_vi = t4_nnmrxq_vi; #endif update_nirq(iaq, nports); if (iaq->nirq <= navail && (itype != INTR_MSI || powerof2(iaq->nirq))) { /* * This is the normal case -- there are enough interrupts for * everything. */ goto done; } /* * If extra VIs have been configured try reducing their count and see if * that works. */ while (iaq->num_vis > 1) { iaq->num_vis--; update_nirq(iaq, nports); if (iaq->nirq <= navail && (itype != INTR_MSI || powerof2(iaq->nirq))) { device_printf(sc->dev, "virtual interfaces per port " "reduced to %d from %d. nrxq=%u, nofldrxq=%u, " "nrxq_vi=%u nofldrxq_vi=%u, nnmrxq_vi=%u. " "itype %d, navail %u, nirq %d.\n", iaq->num_vis, t4_num_vis, iaq->nrxq, iaq->nofldrxq, iaq->nrxq_vi, iaq->nofldrxq_vi, iaq->nnmrxq_vi, itype, navail, iaq->nirq); goto done; } } /* * Extra VIs will not be created. Log a message if they were requested. */ MPASS(iaq->num_vis == 1); iaq->ntxq_vi = iaq->nrxq_vi = 0; iaq->nofldtxq_vi = iaq->nofldrxq_vi = 0; iaq->nnmtxq_vi = iaq->nnmrxq_vi = 0; if (iaq->num_vis != t4_num_vis) { device_printf(sc->dev, "extra virtual interfaces disabled. " "nrxq=%u, nofldrxq=%u, nrxq_vi=%u nofldrxq_vi=%u, " "nnmrxq_vi=%u. itype %d, navail %u, nirq %d.\n", iaq->nrxq, iaq->nofldrxq, iaq->nrxq_vi, iaq->nofldrxq_vi, iaq->nnmrxq_vi, itype, navail, iaq->nirq); } /* * Keep reducing the number of NIC rx queues to the next lower power of * 2 (for even RSS distribution) and halving the TOE rx queues and see * if that works. */ do { if (iaq->nrxq > 1) { do { iaq->nrxq--; } while (!powerof2(iaq->nrxq)); } if (iaq->nofldrxq > 1) iaq->nofldrxq >>= 1; old_nirq = iaq->nirq; update_nirq(iaq, nports); if (iaq->nirq <= navail && (itype != INTR_MSI || powerof2(iaq->nirq))) { device_printf(sc->dev, "running with reduced number of " "rx queues because of shortage of interrupts. " "nrxq=%u, nofldrxq=%u. " "itype %d, navail %u, nirq %d.\n", iaq->nrxq, iaq->nofldrxq, itype, navail, iaq->nirq); goto done; } } while (old_nirq != iaq->nirq); /* One interrupt for everything. Ugh. */ device_printf(sc->dev, "running with minimal number of queues. " "itype %d, navail %u.\n", itype, navail); iaq->nirq = 1; MPASS(iaq->nrxq == 1); iaq->ntxq = 1; if (iaq->nofldrxq > 1) iaq->nofldtxq = 1; done: MPASS(iaq->num_vis > 0); if (iaq->num_vis > 1) { MPASS(iaq->nrxq_vi > 0); MPASS(iaq->ntxq_vi > 0); } MPASS(iaq->nirq > 0); MPASS(iaq->nrxq > 0); MPASS(iaq->ntxq > 0); if (itype == INTR_MSI) { MPASS(powerof2(iaq->nirq)); } } static int cfg_itype_and_nqueues(struct adapter *sc, struct intrs_and_queues *iaq) { int rc, itype, navail, nalloc; for (itype = INTR_MSIX; itype; itype >>= 1) { if ((itype & t4_intr_types) == 0) continue; /* not allowed */ if (itype == INTR_MSIX) navail = pci_msix_count(sc->dev); else if (itype == INTR_MSI) navail = pci_msi_count(sc->dev); else navail = 1; restart: if (navail == 0) continue; calculate_iaq(sc, iaq, itype, navail); nalloc = iaq->nirq; rc = 0; if (itype == INTR_MSIX) rc = pci_alloc_msix(sc->dev, &nalloc); else if (itype == INTR_MSI) rc = pci_alloc_msi(sc->dev, &nalloc); if (rc == 0 && nalloc > 0) { if (nalloc == iaq->nirq) return (0); /* * Didn't get the number requested. Use whatever number * the kernel is willing to allocate. */ device_printf(sc->dev, "fewer vectors than requested, " "type=%d, req=%d, rcvd=%d; will downshift req.\n", itype, iaq->nirq, nalloc); pci_release_msi(sc->dev); navail = nalloc; goto restart; } device_printf(sc->dev, "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n", itype, rc, iaq->nirq, nalloc); } device_printf(sc->dev, "failed to find a usable interrupt type. " "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types, pci_msix_count(sc->dev), pci_msi_count(sc->dev)); return (ENXIO); } #define FW_VERSION(chip) ( \ V_FW_HDR_FW_VER_MAJOR(chip##FW_VERSION_MAJOR) | \ V_FW_HDR_FW_VER_MINOR(chip##FW_VERSION_MINOR) | \ V_FW_HDR_FW_VER_MICRO(chip##FW_VERSION_MICRO) | \ V_FW_HDR_FW_VER_BUILD(chip##FW_VERSION_BUILD)) #define FW_INTFVER(chip, intf) (chip##FW_HDR_INTFVER_##intf) struct fw_info { uint8_t chip; char *kld_name; char *fw_mod_name; struct fw_hdr fw_hdr; /* XXX: waste of space, need a sparse struct */ } fw_info[] = { { .chip = CHELSIO_T4, .kld_name = "t4fw_cfg", .fw_mod_name = "t4fw", .fw_hdr = { .chip = FW_HDR_CHIP_T4, .fw_ver = htobe32_const(FW_VERSION(T4)), .intfver_nic = FW_INTFVER(T4, NIC), .intfver_vnic = FW_INTFVER(T4, VNIC), .intfver_ofld = FW_INTFVER(T4, OFLD), .intfver_ri = FW_INTFVER(T4, RI), .intfver_iscsipdu = FW_INTFVER(T4, ISCSIPDU), .intfver_iscsi = FW_INTFVER(T4, ISCSI), .intfver_fcoepdu = FW_INTFVER(T4, FCOEPDU), .intfver_fcoe = FW_INTFVER(T4, FCOE), }, }, { .chip = CHELSIO_T5, .kld_name = "t5fw_cfg", .fw_mod_name = "t5fw", .fw_hdr = { .chip = FW_HDR_CHIP_T5, .fw_ver = htobe32_const(FW_VERSION(T5)), .intfver_nic = FW_INTFVER(T5, NIC), .intfver_vnic = FW_INTFVER(T5, VNIC), .intfver_ofld = FW_INTFVER(T5, OFLD), .intfver_ri = FW_INTFVER(T5, RI), .intfver_iscsipdu = FW_INTFVER(T5, ISCSIPDU), .intfver_iscsi = FW_INTFVER(T5, ISCSI), .intfver_fcoepdu = FW_INTFVER(T5, FCOEPDU), .intfver_fcoe = FW_INTFVER(T5, FCOE), }, }, { .chip = CHELSIO_T6, .kld_name = "t6fw_cfg", .fw_mod_name = "t6fw", .fw_hdr = { .chip = FW_HDR_CHIP_T6, .fw_ver = htobe32_const(FW_VERSION(T6)), .intfver_nic = FW_INTFVER(T6, NIC), .intfver_vnic = FW_INTFVER(T6, VNIC), .intfver_ofld = FW_INTFVER(T6, OFLD), .intfver_ri = FW_INTFVER(T6, RI), .intfver_iscsipdu = FW_INTFVER(T6, ISCSIPDU), .intfver_iscsi = FW_INTFVER(T6, ISCSI), .intfver_fcoepdu = FW_INTFVER(T6, FCOEPDU), .intfver_fcoe = FW_INTFVER(T6, FCOE), }, } }; static struct fw_info * find_fw_info(int chip) { int i; for (i = 0; i < nitems(fw_info); i++) { if (fw_info[i].chip == chip) return (&fw_info[i]); } return (NULL); } /* * Is the given firmware API compatible with the one the driver was compiled * with? */ static int fw_compatible(const struct fw_hdr *hdr1, const struct fw_hdr *hdr2) { /* short circuit if it's the exact same firmware version */ if (hdr1->chip == hdr2->chip && hdr1->fw_ver == hdr2->fw_ver) return (1); /* * XXX: Is this too conservative? Perhaps I should limit this to the * features that are supported in the driver. */ #define SAME_INTF(x) (hdr1->intfver_##x == hdr2->intfver_##x) if (hdr1->chip == hdr2->chip && SAME_INTF(nic) && SAME_INTF(vnic) && SAME_INTF(ofld) && SAME_INTF(ri) && SAME_INTF(iscsipdu) && SAME_INTF(iscsi) && SAME_INTF(fcoepdu) && SAME_INTF(fcoe)) return (1); #undef SAME_INTF return (0); } /* * The firmware in the KLD is usable, but should it be installed? This routine * explains itself in detail if it indicates the KLD firmware should be * installed. */ static int should_install_kld_fw(struct adapter *sc, int card_fw_usable, int k, int c) { const char *reason; if (!card_fw_usable) { reason = "incompatible or unusable"; goto install; } if (k > c) { reason = "older than the version bundled with this driver"; goto install; } if (t4_fw_install == 2 && k != c) { reason = "different than the version bundled with this driver"; goto install; } return (0); install: if (t4_fw_install == 0) { device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, " "but the driver is prohibited from installing a different " "firmware on the card.\n", G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason); return (0); } device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, " "installing firmware %u.%u.%u.%u on card.\n", G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason, G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k), G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k)); return (1); } /* * Establish contact with the firmware and determine if we are the master driver * or not, and whether we are responsible for chip initialization. */ static int prep_firmware(struct adapter *sc) { const struct firmware *fw = NULL, *default_cfg; int rc, pf, card_fw_usable, kld_fw_usable, need_fw_reset = 1; enum dev_state state; struct fw_info *fw_info; struct fw_hdr *card_fw; /* fw on the card */ const struct fw_hdr *kld_fw; /* fw in the KLD */ const struct fw_hdr *drv_fw; /* fw header the driver was compiled against */ /* This is the firmware whose headers the driver was compiled against */ fw_info = find_fw_info(chip_id(sc)); if (fw_info == NULL) { device_printf(sc->dev, "unable to look up firmware information for chip %d.\n", chip_id(sc)); return (EINVAL); } drv_fw = &fw_info->fw_hdr; /* * The firmware KLD contains many modules. The KLD name is also the * name of the module that contains the default config file. */ default_cfg = firmware_get(fw_info->kld_name); /* This is the firmware in the KLD */ fw = firmware_get(fw_info->fw_mod_name); if (fw != NULL) { kld_fw = (const void *)fw->data; kld_fw_usable = fw_compatible(drv_fw, kld_fw); } else { kld_fw = NULL; kld_fw_usable = 0; } /* Read the header of the firmware on the card */ card_fw = malloc(sizeof(*card_fw), M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_read_flash(sc, FLASH_FW_START, sizeof (*card_fw) / sizeof (uint32_t), (uint32_t *)card_fw, 1); if (rc == 0) { card_fw_usable = fw_compatible(drv_fw, (const void*)card_fw); if (card_fw->fw_ver == be32toh(0xffffffff)) { uint32_t d = be32toh(kld_fw->fw_ver); if (!kld_fw_usable) { device_printf(sc->dev, "no firmware on the card and no usable " "firmware bundled with the driver.\n"); rc = EIO; goto done; } else if (t4_fw_install == 0) { device_printf(sc->dev, "no firmware on the card and the driver " "is prohibited from installing new " "firmware.\n"); rc = EIO; goto done; } device_printf(sc->dev, "no firmware on the card, " "installing firmware %d.%d.%d.%d\n", G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d), G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d)); rc = t4_fw_forceinstall(sc, fw->data, fw->datasize); if (rc < 0) { rc = -rc; device_printf(sc->dev, "firmware install failed: %d.\n", rc); goto done; } memcpy(card_fw, kld_fw, sizeof(*card_fw)); card_fw_usable = 1; need_fw_reset = 0; } } else { device_printf(sc->dev, "Unable to read card's firmware header: %d\n", rc); card_fw_usable = 0; } /* Contact firmware. */ rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state); if (rc < 0 || state == DEV_STATE_ERR) { rc = -rc; device_printf(sc->dev, "failed to connect to the firmware: %d, %d.\n", rc, state); goto done; } pf = rc; if (pf == sc->mbox) sc->flags |= MASTER_PF; else if (state == DEV_STATE_UNINIT) { /* * We didn't get to be the master so we definitely won't be * configuring the chip. It's a bug if someone else hasn't * configured it already. */ device_printf(sc->dev, "couldn't be master(%d), " "device not already initialized either(%d).\n", rc, state); rc = EPROTO; goto done; } if (card_fw_usable && card_fw->fw_ver == drv_fw->fw_ver && (!kld_fw_usable || kld_fw->fw_ver == drv_fw->fw_ver)) { /* * Common case: the firmware on the card is an exact match and * the KLD is an exact match too, or the KLD is * absent/incompatible. Note that t4_fw_install = 2 is ignored * here -- use cxgbetool loadfw if you want to reinstall the * same firmware as the one on the card. */ } else if (kld_fw_usable && state == DEV_STATE_UNINIT && should_install_kld_fw(sc, card_fw_usable, be32toh(kld_fw->fw_ver), be32toh(card_fw->fw_ver))) { rc = -t4_fw_upgrade(sc, sc->mbox, fw->data, fw->datasize, 0); if (rc != 0) { device_printf(sc->dev, "failed to install firmware: %d\n", rc); goto done; } /* Installed successfully, update the cached header too. */ memcpy(card_fw, kld_fw, sizeof(*card_fw)); card_fw_usable = 1; need_fw_reset = 0; /* already reset as part of load_fw */ } if (!card_fw_usable) { uint32_t d, c, k; d = ntohl(drv_fw->fw_ver); c = ntohl(card_fw->fw_ver); k = kld_fw ? ntohl(kld_fw->fw_ver) : 0; device_printf(sc->dev, "Cannot find a usable firmware: " "fw_install %d, chip state %d, " "driver compiled with %d.%d.%d.%d, " "card has %d.%d.%d.%d, KLD has %d.%d.%d.%d\n", t4_fw_install, state, G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d), G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d), G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k), G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k)); rc = EINVAL; goto done; } /* Reset device */ if (need_fw_reset && (rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST)) != 0) { device_printf(sc->dev, "firmware reset failed: %d.\n", rc); if (rc != ETIMEDOUT && rc != EIO) t4_fw_bye(sc, sc->mbox); goto done; } sc->flags |= FW_OK; rc = get_params__pre_init(sc); if (rc != 0) goto done; /* error message displayed already */ /* Partition adapter resources as specified in the config file. */ if (state == DEV_STATE_UNINIT) { KASSERT(sc->flags & MASTER_PF, ("%s: trying to change chip settings when not master.", __func__)); rc = partition_resources(sc, default_cfg, fw_info->kld_name); if (rc != 0) goto done; /* error message displayed already */ t4_tweak_chip_settings(sc); /* get basic stuff going */ rc = -t4_fw_initialize(sc, sc->mbox); if (rc != 0) { device_printf(sc->dev, "fw init failed: %d.\n", rc); goto done; } } else { snprintf(sc->cfg_file, sizeof(sc->cfg_file), "pf%d", pf); sc->cfcsum = 0; } done: free(card_fw, M_CXGBE); if (fw != NULL) firmware_put(fw, FIRMWARE_UNLOAD); if (default_cfg != NULL) firmware_put(default_cfg, FIRMWARE_UNLOAD); return (rc); } #define FW_PARAM_DEV(param) \ (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \ V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param)) #define FW_PARAM_PFVF(param) \ (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \ V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param)) /* * Partition chip resources for use between various PFs, VFs, etc. */ static int partition_resources(struct adapter *sc, const struct firmware *default_cfg, const char *name_prefix) { const struct firmware *cfg = NULL; int rc = 0; struct fw_caps_config_cmd caps; uint32_t mtype, moff, finicsum, cfcsum; /* * Figure out what configuration file to use. Pick the default config * file for the card if the user hasn't specified one explicitly. */ snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", t4_cfg_file); if (strncmp(t4_cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) { /* Card specific overrides go here. */ if (pci_get_device(sc->dev) == 0x440a) snprintf(sc->cfg_file, sizeof(sc->cfg_file), UWIRE_CF); if (is_fpga(sc)) snprintf(sc->cfg_file, sizeof(sc->cfg_file), FPGA_CF); } /* * We need to load another module if the profile is anything except * "default" or "flash". */ if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) != 0 && strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) { char s[32]; snprintf(s, sizeof(s), "%s_%s", name_prefix, sc->cfg_file); cfg = firmware_get(s); if (cfg == NULL) { if (default_cfg != NULL) { device_printf(sc->dev, "unable to load module \"%s\" for " "configuration profile \"%s\", will use " "the default config file instead.\n", s, sc->cfg_file); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", DEFAULT_CF); } else { device_printf(sc->dev, "unable to load module \"%s\" for " "configuration profile \"%s\", will use " "the config file on the card's flash " "instead.\n", s, sc->cfg_file); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF); } } } if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) == 0 && default_cfg == NULL) { device_printf(sc->dev, "default config file not available, will use the config " "file on the card's flash instead.\n"); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF); } if (strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) { u_int cflen; const uint32_t *cfdata; uint32_t param, val, addr; KASSERT(cfg != NULL || default_cfg != NULL, ("%s: no config to upload", __func__)); /* * Ask the firmware where it wants us to upload the config file. */ param = FW_PARAM_DEV(CF); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); if (rc != 0) { /* No support for config file? Shouldn't happen. */ device_printf(sc->dev, "failed to query config file location: %d.\n", rc); goto done; } mtype = G_FW_PARAMS_PARAM_Y(val); moff = G_FW_PARAMS_PARAM_Z(val) << 16; /* * XXX: sheer laziness. We deliberately added 4 bytes of * useless stuffing/comments at the end of the config file so * it's ok to simply throw away the last remaining bytes when * the config file is not an exact multiple of 4. This also * helps with the validate_mt_off_len check. */ if (cfg != NULL) { cflen = cfg->datasize & ~3; cfdata = cfg->data; } else { cflen = default_cfg->datasize & ~3; cfdata = default_cfg->data; } if (cflen > FLASH_CFG_MAX_SIZE) { device_printf(sc->dev, "config file too long (%d, max allowed is %d). " "Will try to use the config on the card, if any.\n", cflen, FLASH_CFG_MAX_SIZE); goto use_config_on_flash; } rc = validate_mt_off_len(sc, mtype, moff, cflen, &addr); if (rc != 0) { device_printf(sc->dev, "%s: addr (%d/0x%x) or len %d is not valid: %d. " "Will try to use the config on the card, if any.\n", __func__, mtype, moff, cflen, rc); goto use_config_on_flash; } write_via_memwin(sc, 2, addr, cfdata, cflen); } else { use_config_on_flash: mtype = FW_MEMTYPE_FLASH; moff = t4_flash_cfg_addr(sc); } bzero(&caps, sizeof(caps)); caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ); caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID | V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) | V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) | FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps); if (rc != 0) { device_printf(sc->dev, "failed to pre-process config file: %d " "(mtype %d, moff 0x%x).\n", rc, mtype, moff); goto done; } finicsum = be32toh(caps.finicsum); cfcsum = be32toh(caps.cfcsum); if (finicsum != cfcsum) { device_printf(sc->dev, "WARNING: config file checksum mismatch: %08x %08x\n", finicsum, cfcsum); } sc->cfcsum = cfcsum; #define LIMIT_CAPS(x) do { \ caps.x &= htobe16(t4_##x##_allowed); \ } while (0) /* * Let the firmware know what features will (not) be used so it can tune * things accordingly. */ LIMIT_CAPS(nbmcaps); LIMIT_CAPS(linkcaps); LIMIT_CAPS(switchcaps); LIMIT_CAPS(niccaps); LIMIT_CAPS(toecaps); LIMIT_CAPS(rdmacaps); LIMIT_CAPS(cryptocaps); LIMIT_CAPS(iscsicaps); LIMIT_CAPS(fcoecaps); #undef LIMIT_CAPS caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE); caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL); if (rc != 0) { device_printf(sc->dev, "failed to process config file: %d.\n", rc); } done: if (cfg != NULL) firmware_put(cfg, FIRMWARE_UNLOAD); return (rc); } /* * Retrieve parameters that are needed (or nice to have) very early. */ static int get_params__pre_init(struct adapter *sc) { int rc; uint32_t param[2], val[2]; t4_get_version_info(sc); snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers), G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers), G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers), G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers)); snprintf(sc->bs_version, sizeof(sc->bs_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.bs_vers), G_FW_HDR_FW_VER_MINOR(sc->params.bs_vers), G_FW_HDR_FW_VER_MICRO(sc->params.bs_vers), G_FW_HDR_FW_VER_BUILD(sc->params.bs_vers)); snprintf(sc->tp_version, sizeof(sc->tp_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.tp_vers), G_FW_HDR_FW_VER_MINOR(sc->params.tp_vers), G_FW_HDR_FW_VER_MICRO(sc->params.tp_vers), G_FW_HDR_FW_VER_BUILD(sc->params.tp_vers)); snprintf(sc->er_version, sizeof(sc->er_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.er_vers), G_FW_HDR_FW_VER_MINOR(sc->params.er_vers), G_FW_HDR_FW_VER_MICRO(sc->params.er_vers), G_FW_HDR_FW_VER_BUILD(sc->params.er_vers)); param[0] = FW_PARAM_DEV(PORTVEC); param[1] = FW_PARAM_DEV(CCLK); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query parameters (pre_init): %d.\n", rc); return (rc); } sc->params.portvec = val[0]; sc->params.nports = bitcount32(val[0]); sc->params.vpd.cclk = val[1]; /* Read device log parameters. */ rc = -t4_init_devlog_params(sc, 1); if (rc == 0) fixup_devlog_params(sc); else { device_printf(sc->dev, "failed to get devlog parameters: %d.\n", rc); rc = 0; /* devlog isn't critical for device operation */ } return (rc); } /* * Retrieve various parameters that are of interest to the driver. The device * has been initialized by the firmware at this point. */ static int get_params__post_init(struct adapter *sc) { int rc; uint32_t param[7], val[7]; struct fw_caps_config_cmd caps; param[0] = FW_PARAM_PFVF(IQFLINT_START); param[1] = FW_PARAM_PFVF(EQ_START); param[2] = FW_PARAM_PFVF(FILTER_START); param[3] = FW_PARAM_PFVF(FILTER_END); param[4] = FW_PARAM_PFVF(L2T_START); param[5] = FW_PARAM_PFVF(L2T_END); param[6] = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) | V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_VDD); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 7, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query parameters (post_init): %d.\n", rc); return (rc); } sc->sge.iq_start = val[0]; sc->sge.eq_start = val[1]; sc->tids.ftid_base = val[2]; sc->tids.nftids = val[3] - val[2] + 1; sc->params.ftid_min = val[2]; sc->params.ftid_max = val[3]; sc->vres.l2t.start = val[4]; sc->vres.l2t.size = val[5] - val[4] + 1; KASSERT(sc->vres.l2t.size <= L2T_SIZE, ("%s: L2 table size (%u) larger than expected (%u)", __func__, sc->vres.l2t.size, L2T_SIZE)); sc->params.core_vdd = val[6]; /* * MPSBGMAP is queried separately because only recent firmwares support * it as a parameter and we don't want the compound query above to fail * on older firmwares. */ param[0] = FW_PARAM_DEV(MPSBGMAP); val[0] = 0; rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val); if (rc == 0) sc->params.mps_bg_map = val[0]; else sc->params.mps_bg_map = 0; /* get capabilites */ bzero(&caps, sizeof(caps)); caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ); caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps); if (rc != 0) { device_printf(sc->dev, "failed to get card capabilities: %d.\n", rc); return (rc); } #define READ_CAPS(x) do { \ sc->x = htobe16(caps.x); \ } while (0) READ_CAPS(nbmcaps); READ_CAPS(linkcaps); READ_CAPS(switchcaps); READ_CAPS(niccaps); READ_CAPS(toecaps); READ_CAPS(rdmacaps); READ_CAPS(cryptocaps); READ_CAPS(iscsicaps); READ_CAPS(fcoecaps); /* * The firmware attempts memfree TOE configuration for -SO cards and * will report toecaps=0 if it runs out of resources (this depends on * the config file). It may not report 0 for other capabilities * dependent on the TOE in this case. Set them to 0 here so that the * driver doesn't bother tracking resources that will never be used. */ if (sc->toecaps == 0) { sc->iscsicaps = 0; sc->rdmacaps = 0; } if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) { param[0] = FW_PARAM_PFVF(ETHOFLD_START); param[1] = FW_PARAM_PFVF(ETHOFLD_END); param[2] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 3, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query NIC parameters: %d.\n", rc); return (rc); } sc->tids.etid_base = val[0]; sc->params.etid_min = val[0]; sc->tids.netids = val[1] - val[0] + 1; sc->params.netids = sc->tids.netids; sc->params.eo_wr_cred = val[2]; sc->params.ethoffload = 1; } if (sc->toecaps) { /* query offload-related parameters */ param[0] = FW_PARAM_DEV(NTID); param[1] = FW_PARAM_PFVF(SERVER_START); param[2] = FW_PARAM_PFVF(SERVER_END); param[3] = FW_PARAM_PFVF(TDDP_START); param[4] = FW_PARAM_PFVF(TDDP_END); param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query TOE parameters: %d.\n", rc); return (rc); } sc->tids.ntids = val[0]; sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS); sc->tids.stid_base = val[1]; sc->tids.nstids = val[2] - val[1] + 1; sc->vres.ddp.start = val[3]; sc->vres.ddp.size = val[4] - val[3] + 1; sc->params.ofldq_wr_cred = val[5]; sc->params.offload = 1; } if (sc->rdmacaps) { param[0] = FW_PARAM_PFVF(STAG_START); param[1] = FW_PARAM_PFVF(STAG_END); param[2] = FW_PARAM_PFVF(RQ_START); param[3] = FW_PARAM_PFVF(RQ_END); param[4] = FW_PARAM_PFVF(PBL_START); param[5] = FW_PARAM_PFVF(PBL_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query RDMA parameters(1): %d.\n", rc); return (rc); } sc->vres.stag.start = val[0]; sc->vres.stag.size = val[1] - val[0] + 1; sc->vres.rq.start = val[2]; sc->vres.rq.size = val[3] - val[2] + 1; sc->vres.pbl.start = val[4]; sc->vres.pbl.size = val[5] - val[4] + 1; param[0] = FW_PARAM_PFVF(SQRQ_START); param[1] = FW_PARAM_PFVF(SQRQ_END); param[2] = FW_PARAM_PFVF(CQ_START); param[3] = FW_PARAM_PFVF(CQ_END); param[4] = FW_PARAM_PFVF(OCQ_START); param[5] = FW_PARAM_PFVF(OCQ_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query RDMA parameters(2): %d.\n", rc); return (rc); } sc->vres.qp.start = val[0]; sc->vres.qp.size = val[1] - val[0] + 1; sc->vres.cq.start = val[2]; sc->vres.cq.size = val[3] - val[2] + 1; sc->vres.ocq.start = val[4]; sc->vres.ocq.size = val[5] - val[4] + 1; param[0] = FW_PARAM_PFVF(SRQ_START); param[1] = FW_PARAM_PFVF(SRQ_END); param[2] = FW_PARAM_DEV(MAXORDIRD_QP); param[3] = FW_PARAM_DEV(MAXIRD_ADAPTER); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 4, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query RDMA parameters(3): %d.\n", rc); return (rc); } sc->vres.srq.start = val[0]; sc->vres.srq.size = val[1] - val[0] + 1; sc->params.max_ordird_qp = val[2]; sc->params.max_ird_adapter = val[3]; } if (sc->iscsicaps) { param[0] = FW_PARAM_PFVF(ISCSI_START); param[1] = FW_PARAM_PFVF(ISCSI_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query iSCSI parameters: %d.\n", rc); return (rc); } sc->vres.iscsi.start = val[0]; sc->vres.iscsi.size = val[1] - val[0] + 1; } if (sc->cryptocaps & FW_CAPS_CONFIG_TLSKEYS) { param[0] = FW_PARAM_PFVF(TLS_START); param[1] = FW_PARAM_PFVF(TLS_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query TLS parameters: %d.\n", rc); return (rc); } sc->vres.key.start = val[0]; sc->vres.key.size = val[1] - val[0] + 1; } t4_init_sge_params(sc); /* * We've got the params we wanted to query via the firmware. Now grab * some others directly from the chip. */ rc = t4_read_chip_settings(sc); return (rc); } static int set_params__post_init(struct adapter *sc) { uint32_t param, val; #ifdef TCP_OFFLOAD int i, v, shift; #endif /* ask for encapsulated CPLs */ param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP); val = 1; (void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); #ifdef TCP_OFFLOAD /* * Override the TOE timers with user provided tunables. This is not the * recommended way to change the timers (the firmware config file is) so * these tunables are not documented. * * All the timer tunables are in microseconds. */ if (t4_toe_keepalive_idle != 0) { v = us_to_tcp_ticks(sc, t4_toe_keepalive_idle); v &= M_KEEPALIVEIDLE; t4_set_reg_field(sc, A_TP_KEEP_IDLE, V_KEEPALIVEIDLE(M_KEEPALIVEIDLE), V_KEEPALIVEIDLE(v)); } if (t4_toe_keepalive_interval != 0) { v = us_to_tcp_ticks(sc, t4_toe_keepalive_interval); v &= M_KEEPALIVEINTVL; t4_set_reg_field(sc, A_TP_KEEP_INTVL, V_KEEPALIVEINTVL(M_KEEPALIVEINTVL), V_KEEPALIVEINTVL(v)); } if (t4_toe_keepalive_count != 0) { v = t4_toe_keepalive_count & M_KEEPALIVEMAXR2; t4_set_reg_field(sc, A_TP_SHIFT_CNT, V_KEEPALIVEMAXR1(M_KEEPALIVEMAXR1) | V_KEEPALIVEMAXR2(M_KEEPALIVEMAXR2), V_KEEPALIVEMAXR1(1) | V_KEEPALIVEMAXR2(v)); } if (t4_toe_rexmt_min != 0) { v = us_to_tcp_ticks(sc, t4_toe_rexmt_min); v &= M_RXTMIN; t4_set_reg_field(sc, A_TP_RXT_MIN, V_RXTMIN(M_RXTMIN), V_RXTMIN(v)); } if (t4_toe_rexmt_max != 0) { v = us_to_tcp_ticks(sc, t4_toe_rexmt_max); v &= M_RXTMAX; t4_set_reg_field(sc, A_TP_RXT_MAX, V_RXTMAX(M_RXTMAX), V_RXTMAX(v)); } if (t4_toe_rexmt_count != 0) { v = t4_toe_rexmt_count & M_RXTSHIFTMAXR2; t4_set_reg_field(sc, A_TP_SHIFT_CNT, V_RXTSHIFTMAXR1(M_RXTSHIFTMAXR1) | V_RXTSHIFTMAXR2(M_RXTSHIFTMAXR2), V_RXTSHIFTMAXR1(1) | V_RXTSHIFTMAXR2(v)); } for (i = 0; i < nitems(t4_toe_rexmt_backoff); i++) { if (t4_toe_rexmt_backoff[i] != -1) { v = t4_toe_rexmt_backoff[i] & M_TIMERBACKOFFINDEX0; shift = (i & 3) << 3; t4_set_reg_field(sc, A_TP_TCP_BACKOFF_REG0 + (i & ~3), M_TIMERBACKOFFINDEX0 << shift, v << shift); } } #endif return (0); } #undef FW_PARAM_PFVF #undef FW_PARAM_DEV static void t4_set_desc(struct adapter *sc) { char buf[128]; struct adapter_params *p = &sc->params; snprintf(buf, sizeof(buf), "Chelsio %s", p->vpd.id); device_set_desc_copy(sc->dev, buf); } static void build_medialist(struct port_info *pi, struct ifmedia *media) { int m; PORT_LOCK_ASSERT_OWNED(pi); ifmedia_removeall(media); /* * XXX: Would it be better to ifmedia_add all 4 combinations of pause * settings for every speed instead of just txpause|rxpause? ifconfig * media display looks much better if autoselect is the only case where * ifm_current is different from ifm_active. If the user picks anything * except txpause|rxpause the display is ugly. */ m = IFM_ETHER | IFM_FDX | IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE; switch(pi->port_type) { case FW_PORT_TYPE_BT_XFI: case FW_PORT_TYPE_BT_XAUI: ifmedia_add(media, m | IFM_10G_T, 0, NULL); /* fall through */ case FW_PORT_TYPE_BT_SGMII: ifmedia_add(media, m | IFM_1000_T, 0, NULL); ifmedia_add(media, m | IFM_100_TX, 0, NULL); ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(media, IFM_ETHER | IFM_AUTO); break; case FW_PORT_TYPE_CX4: ifmedia_add(media, m | IFM_10G_CX4, 0, NULL); ifmedia_set(media, m | IFM_10G_CX4); break; case FW_PORT_TYPE_QSFP_10G: case FW_PORT_TYPE_SFP: case FW_PORT_TYPE_FIBER_XFI: case FW_PORT_TYPE_FIBER_XAUI: switch (pi->mod_type) { case FW_PORT_MOD_TYPE_LR: ifmedia_add(media, m | IFM_10G_LR, 0, NULL); ifmedia_set(media, m | IFM_10G_LR); break; case FW_PORT_MOD_TYPE_SR: ifmedia_add(media, m | IFM_10G_SR, 0, NULL); ifmedia_set(media, m | IFM_10G_SR); break; case FW_PORT_MOD_TYPE_LRM: ifmedia_add(media, m | IFM_10G_LRM, 0, NULL); ifmedia_set(media, m | IFM_10G_LRM); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: ifmedia_add(media, m | IFM_10G_TWINAX, 0, NULL); ifmedia_set(media, m | IFM_10G_TWINAX); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; ifmedia_add(media, m | IFM_NONE, 0, NULL); ifmedia_set(media, m | IFM_NONE); break; case FW_PORT_MOD_TYPE_NA: case FW_PORT_MOD_TYPE_ER: default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } break; case FW_PORT_TYPE_CR_QSFP: case FW_PORT_TYPE_SFP28: case FW_PORT_TYPE_KR_SFP28: switch (pi->mod_type) { case FW_PORT_MOD_TYPE_SR: ifmedia_add(media, m | IFM_25G_SR, 0, NULL); ifmedia_set(media, m | IFM_25G_SR); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: ifmedia_add(media, m | IFM_25G_CR, 0, NULL); ifmedia_set(media, m | IFM_25G_CR); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; ifmedia_add(media, m | IFM_NONE, 0, NULL); ifmedia_set(media, m | IFM_NONE); break; default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } break; case FW_PORT_TYPE_QSFP: switch (pi->mod_type) { case FW_PORT_MOD_TYPE_LR: ifmedia_add(media, m | IFM_40G_LR4, 0, NULL); ifmedia_set(media, m | IFM_40G_LR4); break; case FW_PORT_MOD_TYPE_SR: ifmedia_add(media, m | IFM_40G_SR4, 0, NULL); ifmedia_set(media, m | IFM_40G_SR4); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: ifmedia_add(media, m | IFM_40G_CR4, 0, NULL); ifmedia_set(media, m | IFM_40G_CR4); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; ifmedia_add(media, m | IFM_NONE, 0, NULL); ifmedia_set(media, m | IFM_NONE); break; default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } break; case FW_PORT_TYPE_KR4_100G: case FW_PORT_TYPE_CR4_QSFP: switch (pi->mod_type) { case FW_PORT_MOD_TYPE_LR: ifmedia_add(media, m | IFM_100G_LR4, 0, NULL); ifmedia_set(media, m | IFM_100G_LR4); break; case FW_PORT_MOD_TYPE_SR: ifmedia_add(media, m | IFM_100G_SR4, 0, NULL); ifmedia_set(media, m | IFM_100G_SR4); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: ifmedia_add(media, m | IFM_100G_CR4, 0, NULL); ifmedia_set(media, m | IFM_100G_CR4); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; ifmedia_add(media, m | IFM_NONE, 0, NULL); ifmedia_set(media, m | IFM_NONE); break; default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } break; default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } } /* * Update all the requested_* fields in the link config and then send a mailbox * command to apply the settings. */ static void init_l1cfg(struct port_info *pi) { struct adapter *sc = pi->adapter; struct link_config *lc = &pi->link_cfg; int rc; ASSERT_SYNCHRONIZED_OP(sc); lc->requested_speed = port_top_speed(pi); /* in Gbps */ if (t4_autoneg != 0 && lc->supported & FW_PORT_CAP_ANEG) { lc->requested_aneg = AUTONEG_ENABLE; } else { lc->requested_aneg = AUTONEG_DISABLE; } lc->requested_fc = t4_pause_settings & (PAUSE_TX | PAUSE_RX); if (t4_fec != -1) { lc->requested_fec = t4_fec & (FEC_RS | FEC_BASER_RS | FEC_RESERVED); } else { /* Use the suggested value provided by the firmware in acaps */ if (lc->advertising & FW_PORT_CAP_FEC_RS) lc->requested_fec = FEC_RS; else if (lc->advertising & FW_PORT_CAP_FEC_BASER_RS) lc->requested_fec = FEC_BASER_RS; else if (lc->advertising & FW_PORT_CAP_FEC_RESERVED) lc->requested_fec = FEC_RESERVED; else lc->requested_fec = 0; } rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc); if (rc != 0) { device_printf(pi->dev, "l1cfg failed: %d\n", rc); } else { lc->fc = lc->requested_fc; lc->fec = lc->requested_fec; } } #define FW_MAC_EXACT_CHUNK 7 /* * Program the port's XGMAC based on parameters in ifnet. The caller also * indicates which parameters should be programmed (the rest are left alone). */ int update_mac_settings(struct ifnet *ifp, int flags) { int rc = 0; struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1; ASSERT_SYNCHRONIZED_OP(sc); KASSERT(flags, ("%s: not told what to update.", __func__)); if (flags & XGMAC_MTU) mtu = ifp->if_mtu; if (flags & XGMAC_PROMISC) promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0; if (flags & XGMAC_ALLMULTI) allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0; if (flags & XGMAC_VLANEX) vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0; if (flags & (XGMAC_MTU|XGMAC_PROMISC|XGMAC_ALLMULTI|XGMAC_VLANEX)) { rc = -t4_set_rxmode(sc, sc->mbox, vi->viid, mtu, promisc, allmulti, 1, vlanex, false); if (rc) { if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags, rc); return (rc); } } if (flags & XGMAC_UCADDR) { uint8_t ucaddr[ETHER_ADDR_LEN]; bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr)); rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt, ucaddr, true, true); if (rc < 0) { rc = -rc; if_printf(ifp, "change_mac failed: %d\n", rc); return (rc); } else { vi->xact_addr_filt = rc; rc = 0; } } if (flags & XGMAC_MCADDRS) { const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK]; int del = 1; uint64_t hash = 0; struct ifmultiaddr *ifma; int i = 0, j; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mcaddr[i] = LLADDR((struct sockaddr_dl *)ifma->ifma_addr); MPASS(ETHER_IS_MULTICAST(mcaddr[i])); i++; if (i == FW_MAC_EXACT_CHUNK) { rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i, mcaddr, NULL, &hash, 0); if (rc < 0) { rc = -rc; for (j = 0; j < i; j++) { if_printf(ifp, "failed to add mc address" " %02x:%02x:%02x:" "%02x:%02x:%02x rc=%d\n", mcaddr[j][0], mcaddr[j][1], mcaddr[j][2], mcaddr[j][3], mcaddr[j][4], mcaddr[j][5], rc); } goto mcfail; } del = 0; i = 0; } } if (i > 0) { rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i, mcaddr, NULL, &hash, 0); if (rc < 0) { rc = -rc; for (j = 0; j < i; j++) { if_printf(ifp, "failed to add mc address" " %02x:%02x:%02x:" "%02x:%02x:%02x rc=%d\n", mcaddr[j][0], mcaddr[j][1], mcaddr[j][2], mcaddr[j][3], mcaddr[j][4], mcaddr[j][5], rc); } goto mcfail; } } rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, hash, 0); if (rc != 0) if_printf(ifp, "failed to set mc address hash: %d", rc); mcfail: if_maddr_runlock(ifp); } return (rc); } /* * {begin|end}_synchronized_op must be called from the same thread. */ int begin_synchronized_op(struct adapter *sc, struct vi_info *vi, int flags, char *wmesg) { int rc, pri; #ifdef WITNESS /* the caller thinks it's ok to sleep, but is it really? */ if (flags & SLEEP_OK) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "begin_synchronized_op"); #endif if (INTR_OK) pri = PCATCH; else pri = 0; ADAPTER_LOCK(sc); for (;;) { if (vi && IS_DOOMED(vi)) { rc = ENXIO; goto done; } if (!IS_BUSY(sc)) { rc = 0; break; } if (!(flags & SLEEP_OK)) { rc = EBUSY; goto done; } if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) { rc = EINTR; goto done; } } KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__)); SET_BUSY(sc); #ifdef INVARIANTS sc->last_op = wmesg; sc->last_op_thr = curthread; sc->last_op_flags = flags; #endif done: if (!(flags & HOLD_LOCK) || rc) ADAPTER_UNLOCK(sc); return (rc); } /* * Tell if_ioctl and if_init that the VI is going away. This is * special variant of begin_synchronized_op and must be paired with a * call to end_synchronized_op. */ void doom_vi(struct adapter *sc, struct vi_info *vi) { ADAPTER_LOCK(sc); SET_DOOMED(vi); wakeup(&sc->flags); while (IS_BUSY(sc)) mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0); SET_BUSY(sc); #ifdef INVARIANTS sc->last_op = "t4detach"; sc->last_op_thr = curthread; sc->last_op_flags = 0; #endif ADAPTER_UNLOCK(sc); } /* * {begin|end}_synchronized_op must be called from the same thread. */ void end_synchronized_op(struct adapter *sc, int flags) { if (flags & LOCK_HELD) ADAPTER_LOCK_ASSERT_OWNED(sc); else ADAPTER_LOCK(sc); KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__)); CLR_BUSY(sc); wakeup(&sc->flags); ADAPTER_UNLOCK(sc); } static int cxgbe_init_synchronized(struct vi_info *vi) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct ifnet *ifp = vi->ifp; int rc = 0, i; struct sge_txq *txq; ASSERT_SYNCHRONIZED_OP(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) return (0); /* already running */ if (!(sc->flags & FULL_INIT_DONE) && ((rc = adapter_full_init(sc)) != 0)) return (rc); /* error message displayed already */ if (!(vi->flags & VI_INIT_DONE) && ((rc = vi_full_init(vi)) != 0)) return (rc); /* error message displayed already */ rc = update_mac_settings(ifp, XGMAC_ALL); if (rc) goto done; /* error message displayed already */ rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true); if (rc != 0) { if_printf(ifp, "enable_vi failed: %d\n", rc); goto done; } /* * Can't fail from this point onwards. Review cxgbe_uninit_synchronized * if this changes. */ for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags |= EQ_ENABLED; TXQ_UNLOCK(txq); } /* * The first iq of the first port to come up is used for tracing. */ if (sc->traceq < 0 && IS_MAIN_VI(vi)) { sc->traceq = sc->sge.rxq[vi->first_rxq].iq.abs_id; t4_write_reg(sc, is_t4(sc) ? A_MPS_TRC_RSS_CONTROL : A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) | V_QUEUENUMBER(sc->traceq)); pi->flags |= HAS_TRACEQ; } /* all ok */ PORT_LOCK(pi); if (pi->up_vis++ == 0) { t4_update_port_info(pi); build_medialist(pi, &pi->media); init_l1cfg(pi); } ifp->if_drv_flags |= IFF_DRV_RUNNING; if (pi->nvi > 1 || sc->flags & IS_VF) callout_reset(&vi->tick, hz, vi_tick, vi); else callout_reset(&pi->tick, hz, cxgbe_tick, pi); PORT_UNLOCK(pi); done: if (rc != 0) cxgbe_uninit_synchronized(vi); return (rc); } /* * Idempotent. */ static int cxgbe_uninit_synchronized(struct vi_info *vi) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct ifnet *ifp = vi->ifp; int rc, i; struct sge_txq *txq; ASSERT_SYNCHRONIZED_OP(sc); if (!(vi->flags & VI_INIT_DONE)) { KASSERT(!(ifp->if_drv_flags & IFF_DRV_RUNNING), ("uninited VI is running")); return (0); } /* * Disable the VI so that all its data in either direction is discarded * by the MPS. Leave everything else (the queues, interrupts, and 1Hz * tick) intact as the TP can deliver negative advice or data that it's * holding in its RAM (for an offloaded connection) even after the VI is * disabled. */ rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false); if (rc) { if_printf(ifp, "disable_vi failed: %d\n", rc); return (rc); } for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags &= ~EQ_ENABLED; TXQ_UNLOCK(txq); } PORT_LOCK(pi); if (pi->nvi > 1 || sc->flags & IS_VF) callout_stop(&vi->tick); else callout_stop(&pi->tick); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { PORT_UNLOCK(pi); return (0); } ifp->if_drv_flags &= ~IFF_DRV_RUNNING; pi->up_vis--; if (pi->up_vis > 0) { PORT_UNLOCK(pi); return (0); } PORT_UNLOCK(pi); pi->link_cfg.link_ok = 0; pi->link_cfg.speed = 0; pi->link_cfg.link_down_rc = 255; t4_os_link_changed(pi); pi->old_link_cfg = pi->link_cfg; return (0); } /* * It is ok for this function to fail midway and return right away. t4_detach * will walk the entire sc->irq list and clean up whatever is valid. */ int t4_setup_intr_handlers(struct adapter *sc) { int rc, rid, p, q, v; char s[8]; struct irq *irq; struct port_info *pi; struct vi_info *vi; struct sge *sge = &sc->sge; struct sge_rxq *rxq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; #endif #ifdef DEV_NETMAP struct sge_nm_rxq *nm_rxq; #endif #ifdef RSS int nbuckets = rss_getnumbuckets(); #endif /* * Setup interrupts. */ irq = &sc->irq[0]; rid = sc->intr_type == INTR_INTX ? 0 : 1; if (forwarding_intr_to_fwq(sc)) return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all")); /* Multiple interrupts. */ if (sc->flags & IS_VF) KASSERT(sc->intr_count >= T4VF_EXTRA_INTR + sc->params.nports, ("%s: too few intr.", __func__)); else KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports, ("%s: too few intr.", __func__)); /* The first one is always error intr on PFs */ if (!(sc->flags & IS_VF)) { rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err"); if (rc != 0) return (rc); irq++; rid++; } /* The second one is always the firmware event queue (first on VFs) */ rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sge->fwq, "evt"); if (rc != 0) return (rc); irq++; rid++; for_each_port(sc, p) { pi = sc->port[p]; for_each_vi(pi, v, vi) { vi->first_intr = rid - 1; if (vi->nnmrxq > 0) { int n = max(vi->nrxq, vi->nnmrxq); rxq = &sge->rxq[vi->first_rxq]; #ifdef DEV_NETMAP nm_rxq = &sge->nm_rxq[vi->first_nm_rxq]; #endif for (q = 0; q < n; q++) { snprintf(s, sizeof(s), "%x%c%x", p, 'a' + v, q); if (q < vi->nrxq) irq->rxq = rxq++; #ifdef DEV_NETMAP if (q < vi->nnmrxq) irq->nm_rxq = nm_rxq++; #endif rc = t4_alloc_irq(sc, irq, rid, t4_vi_intr, irq, s); if (rc != 0) return (rc); #ifdef RSS if (q < vi->nrxq) { bus_bind_intr(sc->dev, irq->res, rss_getcpu(q % nbuckets)); } #endif irq++; rid++; vi->nintr++; } } else { for_each_rxq(vi, q, rxq) { snprintf(s, sizeof(s), "%x%c%x", p, 'a' + v, q); rc = t4_alloc_irq(sc, irq, rid, t4_intr, rxq, s); if (rc != 0) return (rc); #ifdef RSS bus_bind_intr(sc->dev, irq->res, rss_getcpu(q % nbuckets)); #endif irq++; rid++; vi->nintr++; } } #ifdef TCP_OFFLOAD for_each_ofld_rxq(vi, q, ofld_rxq) { snprintf(s, sizeof(s), "%x%c%x", p, 'A' + v, q); rc = t4_alloc_irq(sc, irq, rid, t4_intr, ofld_rxq, s); if (rc != 0) return (rc); irq++; rid++; vi->nintr++; } #endif } } MPASS(irq == &sc->irq[sc->intr_count]); return (0); } int adapter_full_init(struct adapter *sc) { int rc, i; #ifdef RSS uint32_t raw_rss_key[RSS_KEYSIZE / sizeof(uint32_t)]; uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)]; #endif ASSERT_SYNCHRONIZED_OP(sc); ADAPTER_LOCK_ASSERT_NOTOWNED(sc); KASSERT((sc->flags & FULL_INIT_DONE) == 0, ("%s: FULL_INIT_DONE already", __func__)); /* * queues that belong to the adapter (not any particular port). */ rc = t4_setup_adapter_queues(sc); if (rc != 0) goto done; for (i = 0; i < nitems(sc->tq); i++) { sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT, taskqueue_thread_enqueue, &sc->tq[i]); if (sc->tq[i] == NULL) { device_printf(sc->dev, "failed to allocate task queue %d\n", i); rc = ENOMEM; goto done; } taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d", device_get_nameunit(sc->dev), i); } #ifdef RSS MPASS(RSS_KEYSIZE == 40); rss_getkey((void *)&raw_rss_key[0]); for (i = 0; i < nitems(rss_key); i++) { rss_key[i] = htobe32(raw_rss_key[nitems(rss_key) - 1 - i]); } t4_write_rss_key(sc, &rss_key[0], -1, 1); #endif if (!(sc->flags & IS_VF)) t4_intr_enable(sc); sc->flags |= FULL_INIT_DONE; done: if (rc != 0) adapter_full_uninit(sc); return (rc); } int adapter_full_uninit(struct adapter *sc) { int i; ADAPTER_LOCK_ASSERT_NOTOWNED(sc); t4_teardown_adapter_queues(sc); for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) { taskqueue_free(sc->tq[i]); sc->tq[i] = NULL; } sc->flags &= ~FULL_INIT_DONE; return (0); } #ifdef RSS #define SUPPORTED_RSS_HASHTYPES (RSS_HASHTYPE_RSS_IPV4 | \ RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | \ RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_UDP_IPV4 | \ RSS_HASHTYPE_RSS_UDP_IPV6) /* Translates kernel hash types to hardware. */ static int hashconfig_to_hashen(int hashconfig) { int hashen = 0; if (hashconfig & RSS_HASHTYPE_RSS_IPV4) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN; if (hashconfig & RSS_HASHTYPE_RSS_IPV6) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN; if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV4) { hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN | F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN; } if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV6) { hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN | F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN; } if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV4) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN; if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV6) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN; return (hashen); } /* Translates hardware hash types to kernel. */ static int hashen_to_hashconfig(int hashen) { int hashconfig = 0; if (hashen & F_FW_RSS_VI_CONFIG_CMD_UDPEN) { /* * If UDP hashing was enabled it must have been enabled for * either IPv4 or IPv6 (inclusive or). Enabling UDP without * enabling any 4-tuple hash is nonsense configuration. */ MPASS(hashen & (F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)); if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV4; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV6; } if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV4; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV6; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN) hashconfig |= RSS_HASHTYPE_RSS_IPV4; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN) hashconfig |= RSS_HASHTYPE_RSS_IPV6; return (hashconfig); } #endif int vi_full_init(struct vi_info *vi) { struct adapter *sc = vi->pi->adapter; struct ifnet *ifp = vi->ifp; uint16_t *rss; struct sge_rxq *rxq; int rc, i, j, hashen; #ifdef RSS int nbuckets = rss_getnumbuckets(); int hashconfig = rss_gethashconfig(); int extra; #endif ASSERT_SYNCHRONIZED_OP(sc); KASSERT((vi->flags & VI_INIT_DONE) == 0, ("%s: VI_INIT_DONE already", __func__)); sysctl_ctx_init(&vi->ctx); vi->flags |= VI_SYSCTL_CTX; /* * Allocate tx/rx/fl queues for this VI. */ rc = t4_setup_vi_queues(vi); if (rc != 0) goto done; /* error message displayed already */ /* * Setup RSS for this VI. Save a copy of the RSS table for later use. */ if (vi->nrxq > vi->rss_size) { if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); " "some queues will never receive traffic.\n", vi->nrxq, vi->rss_size); } else if (vi->rss_size % vi->nrxq) { if_printf(ifp, "nrxq (%d), hw RSS table size (%d); " "expect uneven traffic distribution.\n", vi->nrxq, vi->rss_size); } #ifdef RSS if (vi->nrxq != nbuckets) { if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);" "performance will be impacted.\n", vi->nrxq, nbuckets); } #endif rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK); for (i = 0; i < vi->rss_size;) { #ifdef RSS j = rss_get_indirection_to_bucket(i); j %= vi->nrxq; rxq = &sc->sge.rxq[vi->first_rxq + j]; rss[i++] = rxq->iq.abs_id; #else for_each_rxq(vi, j, rxq) { rss[i++] = rxq->iq.abs_id; if (i == vi->rss_size) break; } #endif } rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss, vi->rss_size); if (rc != 0) { if_printf(ifp, "rss_config failed: %d\n", rc); goto done; } #ifdef RSS hashen = hashconfig_to_hashen(hashconfig); /* * We may have had to enable some hashes even though the global config * wants them disabled. This is a potential problem that must be * reported to the user. */ extra = hashen_to_hashconfig(hashen) ^ hashconfig; /* * If we consider only the supported hash types, then the enabled hashes * are a superset of the requested hashes. In other words, there cannot * be any supported hash that was requested but not enabled, but there * can be hashes that were not requested but had to be enabled. */ extra &= SUPPORTED_RSS_HASHTYPES; MPASS((extra & hashconfig) == 0); if (extra) { if_printf(ifp, "global RSS config (0x%x) cannot be accommodated.\n", hashconfig); } if (extra & RSS_HASHTYPE_RSS_IPV4) if_printf(ifp, "IPv4 2-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_TCP_IPV4) if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_IPV6) if_printf(ifp, "IPv6 2-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_TCP_IPV6) if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_UDP_IPV4) if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_UDP_IPV6) if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n"); #else hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN; #endif rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, hashen, rss[0], 0, 0); if (rc != 0) { if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc); goto done; } vi->rss = rss; vi->flags |= VI_INIT_DONE; done: if (rc != 0) vi_full_uninit(vi); return (rc); } /* * Idempotent. */ int vi_full_uninit(struct vi_info *vi) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; int i; struct sge_rxq *rxq; struct sge_txq *txq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; struct sge_wrq *ofld_txq; #endif if (vi->flags & VI_INIT_DONE) { /* Need to quiesce queues. */ /* XXX: Only for the first VI? */ if (IS_MAIN_VI(vi) && !(sc->flags & IS_VF)) quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]); for_each_txq(vi, i, txq) { quiesce_txq(sc, txq); } #ifdef TCP_OFFLOAD for_each_ofld_txq(vi, i, ofld_txq) { quiesce_wrq(sc, ofld_txq); } #endif for_each_rxq(vi, i, rxq) { quiesce_iq(sc, &rxq->iq); quiesce_fl(sc, &rxq->fl); } #ifdef TCP_OFFLOAD for_each_ofld_rxq(vi, i, ofld_rxq) { quiesce_iq(sc, &ofld_rxq->iq); quiesce_fl(sc, &ofld_rxq->fl); } #endif free(vi->rss, M_CXGBE); free(vi->nm_rss, M_CXGBE); } t4_teardown_vi_queues(vi); vi->flags &= ~VI_INIT_DONE; return (0); } static void quiesce_txq(struct adapter *sc, struct sge_txq *txq) { struct sge_eq *eq = &txq->eq; struct sge_qstat *spg = (void *)&eq->desc[eq->sidx]; (void) sc; /* unused */ #ifdef INVARIANTS TXQ_LOCK(txq); MPASS((eq->flags & EQ_ENABLED) == 0); TXQ_UNLOCK(txq); #endif /* Wait for the mp_ring to empty. */ while (!mp_ring_is_idle(txq->r)) { mp_ring_check_drainage(txq->r, 0); pause("rquiesce", 1); } /* Then wait for the hardware to finish. */ while (spg->cidx != htobe16(eq->pidx)) pause("equiesce", 1); /* Finally, wait for the driver to reclaim all descriptors. */ while (eq->cidx != eq->pidx) pause("dquiesce", 1); } static void quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq) { /* XXXTX */ } static void quiesce_iq(struct adapter *sc, struct sge_iq *iq) { (void) sc; /* unused */ /* Synchronize with the interrupt handler */ while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED)) pause("iqfree", 1); } static void quiesce_fl(struct adapter *sc, struct sge_fl *fl) { mtx_lock(&sc->sfl_lock); FL_LOCK(fl); fl->flags |= FL_DOOMED; FL_UNLOCK(fl); callout_stop(&sc->sfl_callout); mtx_unlock(&sc->sfl_lock); KASSERT((fl->flags & FL_STARVING) == 0, ("%s: still starving", __func__)); } static int t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid, driver_intr_t *handler, void *arg, char *name) { int rc; irq->rid = rid; irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid, RF_SHAREABLE | RF_ACTIVE); if (irq->res == NULL) { device_printf(sc->dev, "failed to allocate IRQ for rid %d, name %s.\n", rid, name); return (ENOMEM); } rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET, NULL, handler, arg, &irq->tag); if (rc != 0) { device_printf(sc->dev, "failed to setup interrupt for rid %d, name %s: %d\n", rid, name, rc); } else if (name) bus_describe_intr(sc->dev, irq->res, irq->tag, "%s", name); return (rc); } static int t4_free_irq(struct adapter *sc, struct irq *irq) { if (irq->tag) bus_teardown_intr(sc->dev, irq->res, irq->tag); if (irq->res) bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res); bzero(irq, sizeof(*irq)); return (0); } static void get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf) { regs->version = chip_id(sc) | chip_rev(sc) << 10; t4_get_regs(sc, buf, regs->len); } #define A_PL_INDIR_CMD 0x1f8 #define S_PL_AUTOINC 31 #define M_PL_AUTOINC 0x1U #define V_PL_AUTOINC(x) ((x) << S_PL_AUTOINC) #define G_PL_AUTOINC(x) (((x) >> S_PL_AUTOINC) & M_PL_AUTOINC) #define S_PL_VFID 20 #define M_PL_VFID 0xffU #define V_PL_VFID(x) ((x) << S_PL_VFID) #define G_PL_VFID(x) (((x) >> S_PL_VFID) & M_PL_VFID) #define S_PL_ADDR 0 #define M_PL_ADDR 0xfffffU #define V_PL_ADDR(x) ((x) << S_PL_ADDR) #define G_PL_ADDR(x) (((x) >> S_PL_ADDR) & M_PL_ADDR) #define A_PL_INDIR_DATA 0x1fc static uint64_t read_vf_stat(struct adapter *sc, unsigned int viid, int reg) { u32 stats[2]; mtx_assert(&sc->reg_lock, MA_OWNED); if (sc->flags & IS_VF) { stats[0] = t4_read_reg(sc, VF_MPS_REG(reg)); stats[1] = t4_read_reg(sc, VF_MPS_REG(reg + 4)); } else { t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | V_PL_VFID(G_FW_VIID_VIN(viid)) | V_PL_ADDR(VF_MPS_REG(reg))); stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA); stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA); } return (((uint64_t)stats[1]) << 32 | stats[0]); } static void t4_get_vi_stats(struct adapter *sc, unsigned int viid, struct fw_vi_stats_vf *stats) { #define GET_STAT(name) \ read_vf_stat(sc, viid, A_MPS_VF_STAT_##name##_L) stats->tx_bcast_bytes = GET_STAT(TX_VF_BCAST_BYTES); stats->tx_bcast_frames = GET_STAT(TX_VF_BCAST_FRAMES); stats->tx_mcast_bytes = GET_STAT(TX_VF_MCAST_BYTES); stats->tx_mcast_frames = GET_STAT(TX_VF_MCAST_FRAMES); stats->tx_ucast_bytes = GET_STAT(TX_VF_UCAST_BYTES); stats->tx_ucast_frames = GET_STAT(TX_VF_UCAST_FRAMES); stats->tx_drop_frames = GET_STAT(TX_VF_DROP_FRAMES); stats->tx_offload_bytes = GET_STAT(TX_VF_OFFLOAD_BYTES); stats->tx_offload_frames = GET_STAT(TX_VF_OFFLOAD_FRAMES); stats->rx_bcast_bytes = GET_STAT(RX_VF_BCAST_BYTES); stats->rx_bcast_frames = GET_STAT(RX_VF_BCAST_FRAMES); stats->rx_mcast_bytes = GET_STAT(RX_VF_MCAST_BYTES); stats->rx_mcast_frames = GET_STAT(RX_VF_MCAST_FRAMES); stats->rx_ucast_bytes = GET_STAT(RX_VF_UCAST_BYTES); stats->rx_ucast_frames = GET_STAT(RX_VF_UCAST_FRAMES); stats->rx_err_frames = GET_STAT(RX_VF_ERR_FRAMES); #undef GET_STAT } static void t4_clr_vi_stats(struct adapter *sc, unsigned int viid) { int reg; t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | V_PL_VFID(G_FW_VIID_VIN(viid)) | V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L))); for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L; reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4) t4_write_reg(sc, A_PL_INDIR_DATA, 0); } static void vi_refresh_stats(struct adapter *sc, struct vi_info *vi) { struct timeval tv; const struct timeval interval = {0, 250000}; /* 250ms */ if (!(vi->flags & VI_INIT_DONE)) return; getmicrotime(&tv); timevalsub(&tv, &interval); if (timevalcmp(&tv, &vi->last_refreshed, <)) return; mtx_lock(&sc->reg_lock); t4_get_vi_stats(sc, vi->viid, &vi->stats); getmicrotime(&vi->last_refreshed); mtx_unlock(&sc->reg_lock); } static void cxgbe_refresh_stats(struct adapter *sc, struct port_info *pi) { u_int i, v, tnl_cong_drops, bg_map; struct timeval tv; const struct timeval interval = {0, 250000}; /* 250ms */ getmicrotime(&tv); timevalsub(&tv, &interval); if (timevalcmp(&tv, &pi->last_refreshed, <)) return; tnl_cong_drops = 0; t4_get_port_stats(sc, pi->tx_chan, &pi->stats); bg_map = pi->mps_bg_map; while (bg_map) { i = ffs(bg_map) - 1; mtx_lock(&sc->reg_lock); t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v, 1, A_TP_MIB_TNL_CNG_DROP_0 + i); mtx_unlock(&sc->reg_lock); tnl_cong_drops += v; bg_map &= ~(1 << i); } pi->tnl_cong_drops = tnl_cong_drops; getmicrotime(&pi->last_refreshed); } static void cxgbe_tick(void *arg) { struct port_info *pi = arg; struct adapter *sc = pi->adapter; PORT_LOCK_ASSERT_OWNED(pi); cxgbe_refresh_stats(sc, pi); callout_schedule(&pi->tick, hz); } void vi_tick(void *arg) { struct vi_info *vi = arg; struct adapter *sc = vi->pi->adapter; vi_refresh_stats(sc, vi); callout_schedule(&vi->tick, hz); } static void cxgbe_vlan_config(void *arg, struct ifnet *ifp, uint16_t vid) { struct ifnet *vlan; if (arg != ifp || ifp->if_type != IFT_ETHER) return; vlan = VLAN_DEVAT(ifp, vid); VLAN_SETCOOKIE(vlan, ifp); } /* * Should match fw_caps_config_ enums in t4fw_interface.h */ static char *caps_decoder[] = { "\20\001IPMI\002NCSI", /* 0: NBM */ "\20\001PPP\002QFC\003DCBX", /* 1: link */ "\20\001INGRESS\002EGRESS", /* 2: switch */ "\20\001NIC\002VM\003IDS\004UM\005UM_ISGL" /* 3: NIC */ "\006HASHFILTER\007ETHOFLD", "\20\001TOE", /* 4: TOE */ "\20\001RDDP\002RDMAC", /* 5: RDMA */ "\20\001INITIATOR_PDU\002TARGET_PDU" /* 6: iSCSI */ "\003INITIATOR_CNXOFLD\004TARGET_CNXOFLD" "\005INITIATOR_SSNOFLD\006TARGET_SSNOFLD" "\007T10DIF" "\010INITIATOR_CMDOFLD\011TARGET_CMDOFLD", "\20\001LOOKASIDE\002TLSKEYS", /* 7: Crypto */ "\20\001INITIATOR\002TARGET\003CTRL_OFLD" /* 8: FCoE */ "\004PO_INITIATOR\005PO_TARGET", }; void t4_sysctls(struct adapter *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children, *c0; static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"}; ctx = device_get_sysctl_ctx(sc->dev); /* * dev.t4nex.X. */ oid = device_get_sysctl_tree(sc->dev); c0 = children = SYSCTL_CHILDREN(oid); sc->sc_do_rxcopy = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "do_rx_copy", CTLFLAG_RW, &sc->sc_do_rxcopy, 1, "Do RX copy of small frames"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, NULL, sc->params.nports, "# of ports"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "doorbells", CTLTYPE_STRING | CTLFLAG_RD, doorbells, sc->doorbells, sysctl_bitfield, "A", "available doorbells"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, NULL, sc->params.vpd.cclk, "core clock frequency (in KHz)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers", CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.timer_val, sizeof(sc->params.sge.timer_val), sysctl_int_array, "A", "interrupt holdoff timer values (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts", CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.counter_val, sizeof(sc->params.sge.counter_val), sysctl_int_array, "A", "interrupt holdoff packet counter values"); t4_sge_sysctls(sc, ctx, children); sc->lro_timeout = 100; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_timeout", CTLFLAG_RW, &sc->lro_timeout, 0, "lro inactive-flush timeout (in us)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dflags", CTLFLAG_RW, &sc->debug_flags, 0, "flags to enable runtime debugging"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "tp_version", CTLFLAG_RD, sc->tp_version, 0, "TP microcode version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", CTLFLAG_RD, sc->fw_version, 0, "firmware version"); if (sc->flags & IS_VF) return; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD, NULL, chip_rev(sc), "chip hardware revision"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "sn", CTLFLAG_RD, sc->params.vpd.sn, 0, "serial number"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pn", CTLFLAG_RD, sc->params.vpd.pn, 0, "part number"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "ec", CTLFLAG_RD, sc->params.vpd.ec, 0, "engineering change"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "md_version", CTLFLAG_RD, sc->params.vpd.md, 0, "manufacturing diags version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "na", CTLFLAG_RD, sc->params.vpd.na, 0, "network address"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "er_version", CTLFLAG_RD, sc->er_version, 0, "expansion ROM version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "bs_version", CTLFLAG_RD, sc->bs_version, 0, "bootstrap firmware version"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "scfg_version", CTLFLAG_RD, NULL, sc->params.scfg_vers, "serial config version"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "vpd_version", CTLFLAG_RD, NULL, sc->params.vpd_vers, "VPD version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf", CTLFLAG_RD, sc->cfg_file, 0, "configuration file"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, NULL, sc->cfcsum, "config file checksum"); #define SYSCTL_CAP(name, n, text) \ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, #name, \ CTLTYPE_STRING | CTLFLAG_RD, caps_decoder[n], sc->name, \ sysctl_bitfield, "A", "available " text " capabilities") SYSCTL_CAP(nbmcaps, 0, "NBM"); SYSCTL_CAP(linkcaps, 1, "link"); SYSCTL_CAP(switchcaps, 2, "switch"); SYSCTL_CAP(niccaps, 3, "NIC"); SYSCTL_CAP(toecaps, 4, "TCP offload"); SYSCTL_CAP(rdmacaps, 5, "RDMA"); SYSCTL_CAP(iscsicaps, 6, "iSCSI"); SYSCTL_CAP(cryptocaps, 7, "crypto"); SYSCTL_CAP(fcoecaps, 8, "FCoE"); #undef SYSCTL_CAP SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nfilters", CTLFLAG_RD, NULL, sc->tids.nftids, "number of filters"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD, sc, 0, sysctl_temperature, "I", "chip temperature (in Celsius)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_vdd", CTLFLAG_RD, &sc->params.core_vdd, 0, "core Vdd (in mV)"); #ifdef SBUF_DRAIN /* * dev.t4nex.X.misc. Marked CTLFLAG_SKIP to avoid information overload. */ oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc", CTLFLAG_RD | CTLFLAG_SKIP, NULL, "logs and miscellaneous information"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cctrl, "A", "congestion control"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1", CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp", CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0", CTLTYPE_STRING | CTLFLAG_RD, sc, 3, sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1", CTLTYPE_STRING | CTLFLAG_RD, sc, 4, sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi", CTLTYPE_STRING | CTLFLAG_RD, sc, 5, sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, chip_id(sc) <= CHELSIO_T5 ? sysctl_cim_la : sysctl_cim_la_t6, "A", "CIM logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_ma_la, "A", "CIM MA logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0", CTLTYPE_STRING | CTLFLAG_RD, sc, 0 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1", CTLTYPE_STRING | CTLFLAG_RD, sc, 1 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2", CTLTYPE_STRING | CTLFLAG_RD, sc, 2 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3", CTLTYPE_STRING | CTLFLAG_RD, sc, 3 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge", CTLTYPE_STRING | CTLFLAG_RD, sc, 4 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi", CTLTYPE_STRING | CTLFLAG_RD, sc, 5 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)"); if (chip_id(sc) > CHELSIO_T4) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx", CTLTYPE_STRING | CTLFLAG_RD, sc, 6 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 6 (SGE0-RX)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx", CTLTYPE_STRING | CTLFLAG_RD, sc, 7 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 7 (SGE1-RX)"); } SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_pif_la, "A", "CIM PIF logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_qcfg, "A", "CIM queue configuration"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cpl_stats, "A", "CPL statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_ddp_stats, "A", "non-TCP DDP statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_devlog, "A", "firmware's device log"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_fcoe_stats, "A", "FCoE statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_hw_sched, "A", "hardware scheduler "); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_l2t, "A", "hardware L2 table"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_lb_stats, "A", "loopback statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_meminfo, "A", "memory regions"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mps_tcam", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, chip_id(sc) <= CHELSIO_T5 ? sysctl_mps_tcam : sysctl_mps_tcam_t6, "A", "MPS TCAM entries"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_path_mtus, "A", "path MTUs"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_pm_stats, "A", "PM statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_rdma_stats, "A", "RDMA statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tcp_stats, "A", "TCP statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tids, "A", "TID information"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_err_stats, "A", "TP error statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la_mask", CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tp_la_mask, "I", "TP logic analyzer event capture mask"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_la, "A", "TP logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tx_rate, "A", "Tx rate"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ulprx_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_ulprx_la, "A", "ULPRX logic analyzer"); if (chip_id(sc) >= CHELSIO_T5) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "wcwr_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_wcwr_stats, "A", "write combined work requests"); } #endif #ifdef TCP_OFFLOAD if (is_offload(sc)) { int i; char s[4]; /* * dev.t4nex.X.toe. */ oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe", CTLFLAG_RD, NULL, "TOE parameters"); children = SYSCTL_CHILDREN(oid); sc->tt.cong_algorithm = -1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_algorithm", CTLFLAG_RW, &sc->tt.cong_algorithm, 0, "congestion control " "(-1 = default, 0 = reno, 1 = tahoe, 2 = newreno, " "3 = highspeed)"); sc->tt.sndbuf = 256 * 1024; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW, &sc->tt.sndbuf, 0, "max hardware send buffer size"); sc->tt.ddp = 0; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW, &sc->tt.ddp, 0, "DDP allowed"); sc->tt.rx_coalesce = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce", CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing"); sc->tt.tls = 0; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tls", CTLFLAG_RW, &sc->tt.tls, 0, "Inline TLS allowed"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tls_rx_ports", CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tls_rx_ports, "I", "TCP ports that use inline TLS+TOE RX"); sc->tt.tx_align = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align", CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload"); sc->tt.tx_zcopy = 0; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_zcopy", CTLFLAG_RW, &sc->tt.tx_zcopy, 0, "Enable zero-copy aio_write(2)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timer_tick", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_tick, "A", "TP timer tick (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timestamp_tick", CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_tp_tick, "A", "TCP timestamp tick (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_tick", CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_tp_tick, "A", "DACK tick (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_timer", CTLTYPE_UINT | CTLFLAG_RD, sc, 0, sysctl_tp_dack_timer, "IU", "DACK timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_min", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MIN, sysctl_tp_timer, "LU", "Minimum retransmit interval (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_max", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MAX, sysctl_tp_timer, "LU", "Maximum retransmit interval (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_min", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MIN, sysctl_tp_timer, "LU", "Persist timer min (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_max", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MAX, sysctl_tp_timer, "LU", "Persist timer max (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_idle", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_IDLE, sysctl_tp_timer, "LU", "Keepalive idle timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_interval", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_INTVL, sysctl_tp_timer, "LU", "Keepalive interval timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "initial_srtt", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_INIT_SRTT, sysctl_tp_timer, "LU", "Initial SRTT (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "finwait2_timer", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_FINWAIT2_TIMER, sysctl_tp_timer, "LU", "FINWAIT2 timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "syn_rexmt_count", CTLTYPE_UINT | CTLFLAG_RD, sc, S_SYNSHIFTMAX, sysctl_tp_shift_cnt, "IU", "Number of SYN retransmissions before abort"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_count", CTLTYPE_UINT | CTLFLAG_RD, sc, S_RXTSHIFTMAXR2, sysctl_tp_shift_cnt, "IU", "Number of retransmissions before abort"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_count", CTLTYPE_UINT | CTLFLAG_RD, sc, S_KEEPALIVEMAXR2, sysctl_tp_shift_cnt, "IU", "Number of keepalive probes before abort"); oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "rexmt_backoff", CTLFLAG_RD, NULL, "TOE retransmit backoffs"); children = SYSCTL_CHILDREN(oid); for (i = 0; i < 16; i++) { snprintf(s, sizeof(s), "%u", i); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, s, CTLTYPE_UINT | CTLFLAG_RD, sc, i, sysctl_tp_backoff, "IU", "TOE retransmit backoff"); } } #endif } void vi_sysctls(struct vi_info *vi) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children; ctx = device_get_sysctl_ctx(vi->dev); /* * dev.v?(cxgbe|cxl).X. */ oid = device_get_sysctl_tree(vi->dev); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "viid", CTLFLAG_RD, NULL, vi->viid, "VI identifer"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD, &vi->nrxq, 0, "# of rx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD, &vi->ntxq, 0, "# of tx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD, &vi->first_rxq, 0, "index of first rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD, &vi->first_txq, 0, "index of first tx queue"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rss_size", CTLFLAG_RD, NULL, vi->rss_size, "size of RSS indirection table"); if (IS_MAIN_VI(vi)) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_noflowq, "IU", "Reserve queue 0 for non-flowid packets"); } #ifdef TCP_OFFLOAD if (vi->nofldrxq != 0) { SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD, &vi->nofldrxq, 0, "# of rx queues for offloaded TCP connections"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD, &vi->nofldtxq, 0, "# of tx queues for offloaded TCP connections"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq", CTLFLAG_RD, &vi->first_ofld_rxq, 0, "index of first TOE rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq", CTLFLAG_RD, &vi->first_ofld_txq, 0, "index of first TOE tx queue"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx_ofld", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx_ofld, "I", "holdoff timer index for TOE queues"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx_ofld", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx_ofld, "I", "holdoff packet counter index for TOE queues"); } #endif #ifdef DEV_NETMAP if (vi->nnmrxq != 0) { SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmrxq", CTLFLAG_RD, &vi->nnmrxq, 0, "# of netmap rx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmtxq", CTLFLAG_RD, &vi->nnmtxq, 0, "# of netmap tx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_rxq", CTLFLAG_RD, &vi->first_nm_rxq, 0, "index of first netmap rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_txq", CTLFLAG_RD, &vi->first_nm_txq, 0, "index of first netmap tx queue"); } #endif SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx, "I", "holdoff timer index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx, "I", "holdoff packet counter index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_rxq, "I", "rx queue size"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_txq, "I", "tx queue size"); } static void cxgbe_sysctls(struct port_info *pi) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children, *children2; struct adapter *sc = pi->adapter; int i; char name[16]; ctx = device_get_sysctl_ctx(pi->dev); /* * dev.cxgbe.X. */ oid = device_get_sysctl_tree(pi->dev); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc", CTLTYPE_STRING | CTLFLAG_RD, pi, 0, sysctl_linkdnrc, "A", "reason why link is down"); if (pi->port_type == FW_PORT_TYPE_BT_XAUI) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD, pi, 0, sysctl_btphy, "I", "PHY temperature (in Celsius)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version", CTLTYPE_INT | CTLFLAG_RD, pi, 1, sysctl_btphy, "I", "PHY firmware version"); } SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_settings", CTLTYPE_STRING | CTLFLAG_RW, pi, 0, sysctl_pause_settings, "A", "PAUSE settings (bit 0 = rx_pause, bit 1 = tx_pause)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fec", CTLTYPE_STRING | CTLFLAG_RW, pi, 0, sysctl_fec, "A", "Forward Error Correction (bit 0 = RS, bit 1 = BASER_RS)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "autoneg", CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_autoneg, "I", "autonegotiation (-1 = not supported)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "max_speed", CTLFLAG_RD, NULL, port_top_speed(pi), "max speed (in Gbps)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "mps_bg_map", CTLFLAG_RD, NULL, pi->mps_bg_map, "MPS buffer group map"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_e_chan_map", CTLFLAG_RD, NULL, pi->rx_e_chan_map, "TP rx e-channel map"); if (sc->flags & IS_VF) return; /* * dev.(cxgbe|cxl).X.tc. */ oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "tc", CTLFLAG_RD, NULL, "Tx scheduler traffic classes (cl_rl)"); for (i = 0; i < sc->chip_params->nsched_cls; i++) { struct tx_cl_rl_params *tc = &pi->sched_params->cl_rl[i]; snprintf(name, sizeof(name), "%d", i); children2 = SYSCTL_CHILDREN(SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, name, CTLFLAG_RD, NULL, "traffic class")); SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "flags", CTLFLAG_RD, &tc->flags, 0, "flags"); SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "refcount", CTLFLAG_RD, &tc->refcount, 0, "references to this class"); #ifdef SBUF_DRAIN SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "params", CTLTYPE_STRING | CTLFLAG_RD, sc, (pi->port_id << 16) | i, sysctl_tc_params, "A", "traffic class parameters"); #endif } /* * dev.cxgbe.X.stats. */ oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD, NULL, "port statistics"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_parse_error", CTLFLAG_RD, &pi->tx_parse_error, 0, "# of tx packets with invalid length or # of segments"); #define SYSCTL_ADD_T4_REG64(pi, name, desc, reg) \ SYSCTL_ADD_OID(ctx, children, OID_AUTO, name, \ CTLTYPE_U64 | CTLFLAG_RD, sc, reg, \ sysctl_handle_t4_reg64, "QU", desc) SYSCTL_ADD_T4_REG64(pi, "tx_octets", "# of octets in good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BYTES_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames", "total # of good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_FRAMES_L)); SYSCTL_ADD_T4_REG64(pi, "tx_bcast_frames", "# of broadcast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_mcast_frames", "# of multicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_MCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ucast_frames", "# of unicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_UCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_error_frames", "# of error frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_64", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_64B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_65_127", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_65B_127B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_128_255", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_128B_255B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_256_511", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_256B_511B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_512_1023", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_512B_1023B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_1024_1518", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1024B_1518B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_1519_max", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1519B_MAX_L)); SYSCTL_ADD_T4_REG64(pi, "tx_drop", "# of dropped tx frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_DROP_L)); SYSCTL_ADD_T4_REG64(pi, "tx_pause", "# of pause frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PAUSE_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp0", "# of PPP prio 0 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP0_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp1", "# of PPP prio 1 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP1_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp2", "# of PPP prio 2 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP2_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp3", "# of PPP prio 3 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP3_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp4", "# of PPP prio 4 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP4_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp5", "# of PPP prio 5 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP5_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp6", "# of PPP prio 6 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP6_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp7", "# of PPP prio 7 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP7_L)); SYSCTL_ADD_T4_REG64(pi, "rx_octets", "# of octets in good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BYTES_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames", "total # of good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_FRAMES_L)); SYSCTL_ADD_T4_REG64(pi, "rx_bcast_frames", "# of broadcast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_mcast_frames", "# of multicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ucast_frames", "# of unicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_UCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_too_long", "# of frames exceeding MTU", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_jabber", "# of jabber frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_CRC_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_fcs_err", "# of frames received with bad FCS", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_len_err", "# of frames received with length error", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LEN_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_symbol_err", "symbol errors", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_SYM_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_runt", "# of short frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LESS_64B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_64", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_64B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_65_127", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_65B_127B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_128_255", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_128B_255B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_256_511", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_256B_511B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_512_1023", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_512B_1023B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_1024_1518", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1024B_1518B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_1519_max", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1519B_MAX_L)); SYSCTL_ADD_T4_REG64(pi, "rx_pause", "# of pause frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PAUSE_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp0", "# of PPP prio 0 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP0_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp1", "# of PPP prio 1 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP1_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp2", "# of PPP prio 2 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP2_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp3", "# of PPP prio 3 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP3_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp4", "# of PPP prio 4 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP4_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp5", "# of PPP prio 5 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP5_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp6", "# of PPP prio 6 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP6_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp7", "# of PPP prio 7 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP7_L)); #undef SYSCTL_ADD_T4_REG64 #define SYSCTL_ADD_T4_PORTSTAT(name, desc) \ SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \ &pi->stats.name, desc) /* We get these from port_stats and they may be stale by up to 1s */ SYSCTL_ADD_T4_PORTSTAT(rx_ovflow0, "# drops due to buffer-group 0 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow1, "# drops due to buffer-group 1 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow2, "# drops due to buffer-group 2 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow3, "# drops due to buffer-group 3 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc0, "# of buffer-group 0 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc1, "# of buffer-group 1 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc2, "# of buffer-group 2 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc3, "# of buffer-group 3 truncated packets"); #undef SYSCTL_ADD_T4_PORTSTAT SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tx_tls_records", CTLFLAG_RD, &pi->tx_tls_records, "# of TLS records transmitted"); SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tx_tls_octets", CTLFLAG_RD, &pi->tx_tls_octets, "# of payload octets in transmitted TLS records"); SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "rx_tls_records", CTLFLAG_RD, &pi->rx_tls_records, "# of TLS records received"); SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "rx_tls_octets", CTLFLAG_RD, &pi->rx_tls_octets, "# of payload octets in received TLS records"); } static int sysctl_int_array(SYSCTL_HANDLER_ARGS) { int rc, *i, space = 0; struct sbuf sb; sbuf_new_for_sysctl(&sb, NULL, 64, req); for (i = arg1; arg2; arg2 -= sizeof(int), i++) { if (space) sbuf_printf(&sb, " "); sbuf_printf(&sb, "%d", *i); space = 1; } rc = sbuf_finish(&sb); sbuf_delete(&sb); return (rc); } static int sysctl_bitfield(SYSCTL_HANDLER_ARGS) { int rc; struct sbuf *sb; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "%b", (int)arg2, (char *)arg1); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_btphy(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; int op = arg2; struct adapter *sc = pi->adapter; u_int v; int rc; rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4btt"); if (rc) return (rc); /* XXX: magic numbers */ rc = -t4_mdio_rd(sc, sc->mbox, pi->mdio_addr, 0x1e, op ? 0x20 : 0xc820, &v); end_synchronized_op(sc, 0); if (rc) return (rc); if (op == 0) v /= 256; rc = sysctl_handle_int(oidp, &v, 0, req); return (rc); } static int sysctl_noflowq(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; int rc, val; val = vi->rsrv_noflowq; rc = sysctl_handle_int(oidp, &val, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if ((val >= 1) && (vi->ntxq > 1)) vi->rsrv_noflowq = 1; else vi->rsrv_noflowq = 0; return (rc); } static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int idx, rc, i; struct sge_rxq *rxq; uint8_t v; idx = vi->tmr_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < 0 || idx >= SGE_NTIMERS) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4tmr"); if (rc) return (rc); v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->pktc_idx != -1); for_each_rxq(vi, i, rxq) { #ifdef atomic_store_rel_8 atomic_store_rel_8(&rxq->iq.intr_params, v); #else rxq->iq.intr_params = v; #endif } vi->tmr_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (0); } static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int idx, rc; idx = vi->pktc_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < -1 || idx >= SGE_NCOUNTERS) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4pktc"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->pktc_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int qsize, rc; qsize = vi->qsize_rxq; rc = sysctl_handle_int(oidp, &qsize, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (qsize < 128 || (qsize & 7)) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4rxqs"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->qsize_rxq = qsize; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int qsize, rc; qsize = vi->qsize_txq; rc = sysctl_handle_int(oidp, &qsize, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (qsize < 128 || qsize > 65536) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4txqs"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->qsize_txq = qsize; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; struct link_config *lc = &pi->link_cfg; int rc; if (req->newptr == NULL) { struct sbuf *sb; static char *bits = "\20\1PAUSE_RX\2PAUSE_TX"; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "%b", lc->fc & (PAUSE_TX | PAUSE_RX), bits); rc = sbuf_finish(sb); sbuf_delete(sb); } else { char s[2]; int n; s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX)); s[1] = 0; rc = sysctl_handle_string(oidp, s, sizeof(s), req); if (rc != 0) return(rc); if (s[1] != 0) return (EINVAL); if (s[0] < '0' || s[0] > '9') return (EINVAL); /* not a number */ n = s[0] - '0'; if (n & ~(PAUSE_TX | PAUSE_RX)) return (EINVAL); /* some other bit is set too */ rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4PAUSE"); if (rc) return (rc); if ((lc->requested_fc & (PAUSE_TX | PAUSE_RX)) != n) { lc->requested_fc &= ~(PAUSE_TX | PAUSE_RX); lc->requested_fc |= n; rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc); if (rc == 0) { lc->fc = lc->requested_fc; } } end_synchronized_op(sc, 0); } return (rc); } static int sysctl_fec(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; struct link_config *lc = &pi->link_cfg; int rc; if (req->newptr == NULL) { struct sbuf *sb; static char *bits = "\20\1RS\2BASER_RS\3RESERVED"; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "%b", lc->fec & M_FW_PORT_CAP_FEC, bits); rc = sbuf_finish(sb); sbuf_delete(sb); } else { char s[2]; int n; s[0] = '0' + (lc->requested_fec & M_FW_PORT_CAP_FEC); s[1] = 0; rc = sysctl_handle_string(oidp, s, sizeof(s), req); if (rc != 0) return(rc); if (s[1] != 0) return (EINVAL); if (s[0] < '0' || s[0] > '9') return (EINVAL); /* not a number */ n = s[0] - '0'; if (n & ~M_FW_PORT_CAP_FEC) return (EINVAL); /* some other bit is set too */ rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4fec"); if (rc) return (rc); if ((lc->requested_fec & M_FW_PORT_CAP_FEC) != n) { lc->requested_fec = n & G_FW_PORT_CAP_FEC(lc->supported); rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc); if (rc == 0) { lc->fec = lc->requested_fec; } } end_synchronized_op(sc, 0); } return (rc); } static int sysctl_autoneg(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; struct link_config *lc = &pi->link_cfg; int rc, val, old; if (lc->supported & FW_PORT_CAP_ANEG) val = lc->requested_aneg == AUTONEG_ENABLE ? 1 : 0; else val = -1; rc = sysctl_handle_int(oidp, &val, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if ((lc->supported & FW_PORT_CAP_ANEG) == 0) return (ENOTSUP); if (val == 0) val = AUTONEG_DISABLE; else if (val == 1) val = AUTONEG_ENABLE; else return (EINVAL); if (lc->requested_aneg == val) return (0); /* no change */ rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4aneg"); if (rc) return (rc); old = lc->requested_aneg; lc->requested_aneg = val; rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc); if (rc != 0) lc->requested_aneg = old; end_synchronized_op(sc, 0); return (rc); } static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int reg = arg2; uint64_t val; val = t4_read_reg64(sc, reg); return (sysctl_handle_64(oidp, &val, 0, req)); } static int sysctl_temperature(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int rc, t; uint32_t param, val; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4temp"); if (rc) return (rc); param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) | V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_TMP); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); end_synchronized_op(sc, 0); if (rc) return (rc); /* unknown is returned as 0 but we display -1 in that case */ t = val == 0 ? -1 : val; rc = sysctl_handle_int(oidp, &t, 0, req); return (rc); } #ifdef SBUF_DRAIN static int sysctl_cctrl(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint16_t incr[NMTUS][NCCTRL_WIN]; static const char *dec_fac[] = { "0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875", "0.9375" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); t4_read_cong_tbl(sc, incr); for (i = 0; i < NCCTRL_WIN; ++i) { sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i, incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i], incr[5][i], incr[6][i], incr[7][i]); sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n", incr[8][i], incr[9][i], incr[10][i], incr[11][i], incr[12][i], incr[13][i], incr[14][i], incr[15][i], sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = { "TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI", /* ibq's */ "ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI", /* obq's */ "SGE0-RX", "SGE1-RX" /* additional obq's (T5 onwards) */ }; static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, n, qid = arg2; uint32_t *buf, *p; char *qtype; u_int cim_num_obq = sc->chip_params->cim_num_obq; KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq, ("%s: bad qid %d\n", __func__, qid)); if (qid < CIM_NUM_IBQ) { /* inbound queue */ qtype = "IBQ"; n = 4 * CIM_IBQ_SIZE; buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = t4_read_cim_ibq(sc, qid, buf, n); } else { /* outbound queue */ qtype = "OBQ"; qid -= CIM_NUM_IBQ; n = 4 * cim_num_obq * CIM_OBQ_SIZE; buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = t4_read_cim_obq(sc, qid, buf, n); } if (rc < 0) { rc = -rc; goto done; } n = rc * sizeof(uint32_t); /* rc has # of words actually read */ rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) goto done; sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req); if (sb == NULL) { rc = ENOMEM; goto done; } sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]); for (i = 0, p = buf; i < n; i += 16, p += 4) sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1], p[2], p[3]); rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_cim_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int cfg; struct sbuf *sb; uint32_t *buf, *p; int rc; MPASS(chip_id(sc) <= CHELSIO_T5); rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg); if (rc != 0) return (rc); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_cim_read_la(sc, buf, NULL); if (rc != 0) goto done; sbuf_printf(sb, "Status Data PC%s", cfg & F_UPDBGLACAPTPCONLY ? "" : " LS0Stat LS0Addr LS0Data"); for (p = buf; p <= &buf[sc->params.cim_la_size - 8]; p += 8) { if (cfg & F_UPDBGLACAPTPCONLY) { sbuf_printf(sb, "\n %02x %08x %08x", p[5] & 0xff, p[6], p[7]); sbuf_printf(sb, "\n %02x %02x%06x %02x%06x", (p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8, p[4] & 0xff, p[5] >> 8); sbuf_printf(sb, "\n %02x %x%07x %x%07x", (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4, p[1] & 0xf, p[2] >> 4); } else { sbuf_printf(sb, "\n %02x %x%07x %x%07x %08x %08x " "%08x%08x%08x%08x", (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4, p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5], p[6], p[7]); } } rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int cfg; struct sbuf *sb; uint32_t *buf, *p; int rc; MPASS(chip_id(sc) > CHELSIO_T5); rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg); if (rc != 0) return (rc); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_cim_read_la(sc, buf, NULL); if (rc != 0) goto done; sbuf_printf(sb, "Status Inst Data PC%s", cfg & F_UPDBGLACAPTPCONLY ? "" : " LS0Stat LS0Addr LS0Data LS1Stat LS1Addr LS1Data"); for (p = buf; p <= &buf[sc->params.cim_la_size - 10]; p += 10) { if (cfg & F_UPDBGLACAPTPCONLY) { sbuf_printf(sb, "\n %02x %08x %08x %08x", p[3] & 0xff, p[2], p[1], p[0]); sbuf_printf(sb, "\n %02x %02x%06x %02x%06x %02x%06x", (p[6] >> 8) & 0xff, p[6] & 0xff, p[5] >> 8, p[5] & 0xff, p[4] >> 8, p[4] & 0xff, p[3] >> 8); sbuf_printf(sb, "\n %02x %04x%04x %04x%04x %04x%04x", (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16, p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff, p[6] >> 16); } else { sbuf_printf(sb, "\n %02x %04x%04x %04x%04x %04x%04x " "%08x %08x %08x %08x %08x %08x", (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16, p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff, p[6] >> 16, p[2], p[1], p[0], p[5], p[4], p[3]); } } rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int i; struct sbuf *sb; uint32_t *buf, *p; int rc; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(2 * CIM_MALA_SIZE * 5 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_cim_read_ma_la(sc, buf, buf + 5 * CIM_MALA_SIZE); p = buf; for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) { sbuf_printf(sb, "\n%02x%08x%08x%08x%08x", p[4], p[3], p[2], p[1], p[0]); } sbuf_printf(sb, "\n\nCnt ID Tag UE Data RDY VLD"); for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) { sbuf_printf(sb, "\n%3u %2u %x %u %08x%08x %u %u", (p[2] >> 10) & 0xff, (p[2] >> 7) & 7, (p[2] >> 3) & 0xf, (p[2] >> 2) & 1, (p[1] >> 2) | ((p[2] & 3) << 30), (p[0] >> 2) | ((p[1] & 3) << 30), (p[0] >> 1) & 1, p[0] & 1); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int i; struct sbuf *sb; uint32_t *buf, *p; int rc; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(2 * CIM_PIFLA_SIZE * 6 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_cim_read_pif_la(sc, buf, buf + 6 * CIM_PIFLA_SIZE, NULL, NULL); p = buf; sbuf_printf(sb, "Cntl ID DataBE Addr Data"); for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) { sbuf_printf(sb, "\n %02x %02x %04x %08x %08x%08x%08x%08x", (p[5] >> 22) & 0xff, (p[5] >> 16) & 0x3f, p[5] & 0xffff, p[4], p[3], p[2], p[1], p[0]); } sbuf_printf(sb, "\n\nCntl ID Data"); for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) { sbuf_printf(sb, "\n %02x %02x %08x%08x%08x%08x", (p[4] >> 6) & 0xff, p[4] & 0x3f, p[3], p[2], p[1], p[0]); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5]; uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5]; uint16_t thres[CIM_NUM_IBQ]; uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr; uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat; u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq; cim_num_obq = sc->chip_params->cim_num_obq; if (is_t4(sc)) { ibq_rdaddr = A_UP_IBQ_0_RDADDR; obq_rdaddr = A_UP_OBQ_0_REALADDR; } else { ibq_rdaddr = A_UP_IBQ_0_SHADOW_RDADDR; obq_rdaddr = A_UP_OBQ_0_SHADOW_REALADDR; } nq = CIM_NUM_IBQ + cim_num_obq; rc = -t4_cim_read(sc, ibq_rdaddr, 4 * nq, stat); if (rc == 0) rc = -t4_cim_read(sc, obq_rdaddr, 2 * cim_num_obq, obq_wr); if (rc != 0) return (rc); t4_read_cimq_cfg(sc, base, size, thres); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, " Queue Base Size Thres RdPtr WrPtr SOP EOP Avail"); for (i = 0; i < CIM_NUM_IBQ; i++, p += 4) sbuf_printf(sb, "\n%7s %5x %5u %5u %6x %4x %4u %4u %5u", qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]), G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]), G_QUEREMFLITS(p[2]) * 16); for ( ; i < nq; i++, p += 4, wr += 2) sbuf_printf(sb, "\n%7s %5x %5u %12x %4x %4u %4u %5u", qname[i], base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff, wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]), G_QUEREMFLITS(p[2]) * 16); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_cpl_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_cpl_stats(sc, &stats, 0); mtx_unlock(&sc->reg_lock); if (sc->chip_params->nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3"); sbuf_printf(sb, "\nCPL requests: %10u %10u %10u %10u", stats.req[0], stats.req[1], stats.req[2], stats.req[3]); sbuf_printf(sb, "\nCPL responses: %10u %10u %10u %10u", stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]); } else { sbuf_printf(sb, " channel 0 channel 1"); sbuf_printf(sb, "\nCPL requests: %10u %10u", stats.req[0], stats.req[1]); sbuf_printf(sb, "\nCPL responses: %10u %10u", stats.rsp[0], stats.rsp[1]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_usm_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_get_usm_stats(sc, &stats, 1); sbuf_printf(sb, "Frames: %u\n", stats.frames); sbuf_printf(sb, "Octets: %ju\n", stats.octets); sbuf_printf(sb, "Drops: %u", stats.drops); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static const char * const devlog_level_strings[] = { [FW_DEVLOG_LEVEL_EMERG] = "EMERG", [FW_DEVLOG_LEVEL_CRIT] = "CRIT", [FW_DEVLOG_LEVEL_ERR] = "ERR", [FW_DEVLOG_LEVEL_NOTICE] = "NOTICE", [FW_DEVLOG_LEVEL_INFO] = "INFO", [FW_DEVLOG_LEVEL_DEBUG] = "DEBUG" }; static const char * const devlog_facility_strings[] = { [FW_DEVLOG_FACILITY_CORE] = "CORE", [FW_DEVLOG_FACILITY_CF] = "CF", [FW_DEVLOG_FACILITY_SCHED] = "SCHED", [FW_DEVLOG_FACILITY_TIMER] = "TIMER", [FW_DEVLOG_FACILITY_RES] = "RES", [FW_DEVLOG_FACILITY_HW] = "HW", [FW_DEVLOG_FACILITY_FLR] = "FLR", [FW_DEVLOG_FACILITY_DMAQ] = "DMAQ", [FW_DEVLOG_FACILITY_PHY] = "PHY", [FW_DEVLOG_FACILITY_MAC] = "MAC", [FW_DEVLOG_FACILITY_PORT] = "PORT", [FW_DEVLOG_FACILITY_VI] = "VI", [FW_DEVLOG_FACILITY_FILTER] = "FILTER", [FW_DEVLOG_FACILITY_ACL] = "ACL", [FW_DEVLOG_FACILITY_TM] = "TM", [FW_DEVLOG_FACILITY_QFC] = "QFC", [FW_DEVLOG_FACILITY_DCB] = "DCB", [FW_DEVLOG_FACILITY_ETH] = "ETH", [FW_DEVLOG_FACILITY_OFLD] = "OFLD", [FW_DEVLOG_FACILITY_RI] = "RI", [FW_DEVLOG_FACILITY_ISCSI] = "ISCSI", [FW_DEVLOG_FACILITY_FCOE] = "FCOE", [FW_DEVLOG_FACILITY_FOISCSI] = "FOISCSI", [FW_DEVLOG_FACILITY_FOFCOE] = "FOFCOE", [FW_DEVLOG_FACILITY_CHNET] = "CHNET", }; static int sysctl_devlog(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct devlog_params *dparams = &sc->params.devlog; struct fw_devlog_e *buf, *e; int i, j, rc, nentries, first = 0; struct sbuf *sb; uint64_t ftstamp = UINT64_MAX; if (dparams->addr == 0) return (ENXIO); buf = malloc(dparams->size, M_CXGBE, M_NOWAIT); if (buf == NULL) return (ENOMEM); rc = read_via_memwin(sc, 1, dparams->addr, (void *)buf, dparams->size); if (rc != 0) goto done; nentries = dparams->size / sizeof(struct fw_devlog_e); for (i = 0; i < nentries; i++) { e = &buf[i]; if (e->timestamp == 0) break; /* end */ e->timestamp = be64toh(e->timestamp); e->seqno = be32toh(e->seqno); for (j = 0; j < 8; j++) e->params[j] = be32toh(e->params[j]); if (e->timestamp < ftstamp) { ftstamp = e->timestamp; first = i; } } if (buf[first].timestamp == 0) goto done; /* nothing in the log */ rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) goto done; sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) { rc = ENOMEM; goto done; } sbuf_printf(sb, "%10s %15s %8s %8s %s\n", "Seq#", "Tstamp", "Level", "Facility", "Message"); i = first; do { e = &buf[i]; if (e->timestamp == 0) break; /* end */ sbuf_printf(sb, "%10d %15ju %8s %8s ", e->seqno, e->timestamp, (e->level < nitems(devlog_level_strings) ? devlog_level_strings[e->level] : "UNKNOWN"), (e->facility < nitems(devlog_facility_strings) ? devlog_facility_strings[e->facility] : "UNKNOWN")); sbuf_printf(sb, e->fmt, e->params[0], e->params[1], e->params[2], e->params[3], e->params[4], e->params[5], e->params[6], e->params[7]); if (++i == nentries) i = 0; } while (i != first); rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_fcoe_stats stats[MAX_NCHAN]; int i, nchan = sc->chip_params->nchan; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); for (i = 0; i < nchan; i++) t4_get_fcoe_stats(sc, i, &stats[i], 1); if (nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3"); sbuf_printf(sb, "\noctetsDDP: %16ju %16ju %16ju %16ju", stats[0].octets_ddp, stats[1].octets_ddp, stats[2].octets_ddp, stats[3].octets_ddp); sbuf_printf(sb, "\nframesDDP: %16u %16u %16u %16u", stats[0].frames_ddp, stats[1].frames_ddp, stats[2].frames_ddp, stats[3].frames_ddp); sbuf_printf(sb, "\nframesDrop: %16u %16u %16u %16u", stats[0].frames_drop, stats[1].frames_drop, stats[2].frames_drop, stats[3].frames_drop); } else { sbuf_printf(sb, " channel 0 channel 1"); sbuf_printf(sb, "\noctetsDDP: %16ju %16ju", stats[0].octets_ddp, stats[1].octets_ddp); sbuf_printf(sb, "\nframesDDP: %16u %16u", stats[0].frames_ddp, stats[1].frames_ddp); sbuf_printf(sb, "\nframesDrop: %16u %16u", stats[0].frames_drop, stats[1].frames_drop); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; unsigned int map, kbps, ipg, mode; unsigned int pace_tab[NTX_SCHED]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP); mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG)); t4_read_pace_tbl(sc, pace_tab); sbuf_printf(sb, "Scheduler Mode Channel Rate (Kbps) " "Class IPG (0.1 ns) Flow IPG (us)"); for (i = 0; i < NTX_SCHED; ++i, map >>= 2) { t4_get_tx_sched(sc, i, &kbps, &ipg, 1); sbuf_printf(sb, "\n %u %-5s %u ", i, (mode & (1 << i)) ? "flow" : "class", map & 3); if (kbps) sbuf_printf(sb, "%9u ", kbps); else sbuf_printf(sb, " disabled "); if (ipg) sbuf_printf(sb, "%13u ", ipg); else sbuf_printf(sb, " disabled "); if (pace_tab[i]) sbuf_printf(sb, "%10u", pace_tab[i]); else sbuf_printf(sb, " disabled"); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, j; uint64_t *p0, *p1; struct lb_port_stats s[2]; static const char *stat_name[] = { "OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:", "UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:", "Frames128To255:", "Frames256To511:", "Frames512To1023:", "Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:", "BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:", "BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:", "BG2FramesTrunc:", "BG3FramesTrunc:" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); memset(s, 0, sizeof(s)); for (i = 0; i < sc->chip_params->nchan; i += 2) { t4_get_lb_stats(sc, i, &s[0]); t4_get_lb_stats(sc, i + 1, &s[1]); p0 = &s[0].octets; p1 = &s[1].octets; sbuf_printf(sb, "%s Loopback %u" " Loopback %u", i == 0 ? "" : "\n", i, i + 1); for (j = 0; j < nitems(stat_name); j++) sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j], *p0++, *p1++); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS) { int rc = 0; struct port_info *pi = arg1; struct link_config *lc = &pi->link_cfg; struct sbuf *sb; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 64, req); if (sb == NULL) return (ENOMEM); if (lc->link_ok || lc->link_down_rc == 255) sbuf_printf(sb, "n/a"); else sbuf_printf(sb, "%s", t4_link_down_rc_str(lc->link_down_rc)); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } struct mem_desc { unsigned int base; unsigned int limit; unsigned int idx; }; static int mem_desc_cmp(const void *a, const void *b) { return ((const struct mem_desc *)a)->base - ((const struct mem_desc *)b)->base; } static void mem_region_show(struct sbuf *sb, const char *name, unsigned int from, unsigned int to) { unsigned int size; if (from == to) return; size = to - from + 1; if (size == 0) return; /* XXX: need humanize_number(3) in libkern for a more readable 'size' */ sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size); } static int sysctl_meminfo(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, n; uint32_t lo, hi, used, alloc; static const char *memory[] = {"EDC0:", "EDC1:", "MC:", "MC0:", "MC1:"}; static const char *region[] = { "DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:", "Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:", "Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:", "TDDP region:", "TPT region:", "STAG region:", "RQ region:", "RQUDP region:", "PBL region:", "TXPBL region:", "DBVFIFO region:", "ULPRX state:", "ULPTX state:", "On-chip queues:", "TLS keys:", }; struct mem_desc avail[4]; struct mem_desc mem[nitems(region) + 3]; /* up to 3 holes */ struct mem_desc *md = mem; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); for (i = 0; i < nitems(mem); i++) { mem[i].limit = 0; mem[i].idx = i; } /* Find and sort the populated memory ranges */ i = 0; lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); if (lo & F_EDRAM0_ENABLE) { hi = t4_read_reg(sc, A_MA_EDRAM0_BAR); avail[i].base = G_EDRAM0_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20); avail[i].idx = 0; i++; } if (lo & F_EDRAM1_ENABLE) { hi = t4_read_reg(sc, A_MA_EDRAM1_BAR); avail[i].base = G_EDRAM1_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20); avail[i].idx = 1; i++; } if (lo & F_EXT_MEM_ENABLE) { hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); avail[i].base = G_EXT_MEM_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EXT_MEM_SIZE(hi) << 20); avail[i].idx = is_t5(sc) ? 3 : 2; /* Call it MC0 for T5 */ i++; } if (is_t5(sc) && lo & F_EXT_MEM1_ENABLE) { hi = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); avail[i].base = G_EXT_MEM1_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EXT_MEM1_SIZE(hi) << 20); avail[i].idx = 4; i++; } if (!i) /* no memory available */ return 0; qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp); (md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR); (md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR); (md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR); (md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE); /* the next few have explicit upper bounds */ md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE); md->limit = md->base - 1 + t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) * G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE)); md++; md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE); md->limit = md->base - 1 + t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) * G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE)); md++; if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) { if (chip_id(sc) <= CHELSIO_T5) md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE); else md->base = t4_read_reg(sc, A_LE_DB_HASH_TBL_BASE_ADDR); md->limit = 0; } else { md->base = 0; md->idx = nitems(region); /* hide it */ } md++; #define ulp_region(reg) \ md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\ (md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT) ulp_region(RX_ISCSI); ulp_region(RX_TDDP); ulp_region(TX_TPT); ulp_region(RX_STAG); ulp_region(RX_RQ); ulp_region(RX_RQUDP); ulp_region(RX_PBL); ulp_region(TX_PBL); #undef ulp_region md->base = 0; md->idx = nitems(region); if (!is_t4(sc)) { uint32_t size = 0; uint32_t sge_ctrl = t4_read_reg(sc, A_SGE_CONTROL2); uint32_t fifo_size = t4_read_reg(sc, A_SGE_DBVFIFO_SIZE); if (is_t5(sc)) { if (sge_ctrl & F_VFIFO_ENABLE) size = G_DBVFIFO_SIZE(fifo_size); } else size = G_T6_DBVFIFO_SIZE(fifo_size); if (size) { md->base = G_BASEADDR(t4_read_reg(sc, A_SGE_DBVFIFO_BADDR)); md->limit = md->base + (size << 2) - 1; } } md++; md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE); md->limit = 0; md++; md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE); md->limit = 0; md++; md->base = sc->vres.ocq.start; if (sc->vres.ocq.size) md->limit = md->base + sc->vres.ocq.size - 1; else md->idx = nitems(region); /* hide it */ md++; md->base = sc->vres.key.start; if (sc->vres.key.size) md->limit = md->base + sc->vres.key.size - 1; else md->idx = nitems(region); /* hide it */ md++; /* add any address-space holes, there can be up to 3 */ for (n = 0; n < i - 1; n++) if (avail[n].limit < avail[n + 1].base) (md++)->base = avail[n].limit; if (avail[n].limit) (md++)->base = avail[n].limit; n = md - mem; qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp); for (lo = 0; lo < i; lo++) mem_region_show(sb, memory[avail[lo].idx], avail[lo].base, avail[lo].limit - 1); sbuf_printf(sb, "\n"); for (i = 0; i < n; i++) { if (mem[i].idx >= nitems(region)) continue; /* skip holes */ if (!mem[i].limit) mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0; mem_region_show(sb, region[mem[i].idx], mem[i].base, mem[i].limit); } sbuf_printf(sb, "\n"); lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR); hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1; mem_region_show(sb, "uP RAM:", lo, hi); lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR); hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1; mem_region_show(sb, "uP Extmem2:", lo, hi); lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE); sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n", G_PMRXMAXPAGE(lo), t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10, (lo & F_PMRXNUMCHN) ? 2 : 1); lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE); hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE); sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n", G_PMTXMAXPAGE(lo), hi >= (1 << 20) ? (hi >> 20) : (hi >> 10), hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo)); sbuf_printf(sb, "%u p-structs\n", t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT)); for (i = 0; i < 4; i++) { if (chip_id(sc) > CHELSIO_T5) lo = t4_read_reg(sc, A_MPS_RX_MAC_BG_PG_CNT0 + i * 4); else lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4); if (is_t5(sc)) { used = G_T5_USED(lo); alloc = G_T5_ALLOC(lo); } else { used = G_USED(lo); alloc = G_ALLOC(lo); } /* For T6 these are MAC buffer groups */ sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated", i, used, alloc); } for (i = 0; i < sc->chip_params->nchan; i++) { if (chip_id(sc) > CHELSIO_T5) lo = t4_read_reg(sc, A_MPS_RX_LPBK_BG_PG_CNT0 + i * 4); else lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4); if (is_t5(sc)) { used = G_T5_USED(lo); alloc = G_T5_ALLOC(lo); } else { used = G_USED(lo); alloc = G_ALLOC(lo); } /* For T6 these are MAC buffer groups */ sbuf_printf(sb, "\nLoopback %d using %u pages out of %u allocated", i, used, alloc); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static inline void tcamxy2valmask(uint64_t x, uint64_t y, uint8_t *addr, uint64_t *mask) { *mask = x | y; y = htobe64(y); memcpy(addr, (char *)&y + 2, ETHER_ADDR_LEN); } static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; MPASS(chip_id(sc) <= CHELSIO_T5); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "Idx Ethernet address Mask Vld Ports PF" " VF Replication P0 P1 P2 P3 ML"); for (i = 0; i < sc->chip_params->mps_tcam_size; i++) { uint64_t tcamx, tcamy, mask; uint32_t cls_lo, cls_hi; uint8_t addr[ETHER_ADDR_LEN]; tcamy = t4_read_reg64(sc, MPS_CLS_TCAM_Y_L(i)); tcamx = t4_read_reg64(sc, MPS_CLS_TCAM_X_L(i)); if (tcamx & tcamy) continue; tcamxy2valmask(tcamx, tcamy, addr, &mask); cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i)); cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i)); sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x %012jx" " %c %#x%4u%4d", i, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], (uintmax_t)mask, (cls_lo & F_SRAM_VLD) ? 'Y' : 'N', G_PORTMAP(cls_hi), G_PF(cls_lo), (cls_lo & F_VF_VALID) ? G_VF(cls_lo) : -1); if (cls_lo & F_REPLICATE) { struct fw_ldst_cmd ldst_cmd; memset(&ldst_cmd, 0, sizeof(ldst_cmd)); ldst_cmd.op_to_addrspace = htobe32(V_FW_CMD_OP(FW_LDST_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ | V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS)); ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd)); ldst_cmd.u.mps.rplc.fid_idx = htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) | V_FW_LDST_CMD_IDX(i)); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4mps"); if (rc) break; rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd, sizeof(ldst_cmd), &ldst_cmd); end_synchronized_op(sc, 0); if (rc != 0) { sbuf_printf(sb, "%36d", rc); rc = 0; } else { sbuf_printf(sb, " %08x %08x %08x %08x", be32toh(ldst_cmd.u.mps.rplc.rplc127_96), be32toh(ldst_cmd.u.mps.rplc.rplc95_64), be32toh(ldst_cmd.u.mps.rplc.rplc63_32), be32toh(ldst_cmd.u.mps.rplc.rplc31_0)); } } else sbuf_printf(sb, "%36s", ""); sbuf_printf(sb, "%4u%3u%3u%3u %#3x", G_SRAM_PRIO0(cls_lo), G_SRAM_PRIO1(cls_lo), G_SRAM_PRIO2(cls_lo), G_SRAM_PRIO3(cls_lo), (cls_lo >> S_MULTILISTEN0) & 0xf); } if (rc) (void) sbuf_finish(sb); else rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; MPASS(chip_id(sc) > CHELSIO_T5); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "Idx Ethernet address Mask VNI Mask" " IVLAN Vld DIP_Hit Lookup Port Vld Ports PF VF" " Replication" " P0 P1 P2 P3 ML\n"); for (i = 0; i < sc->chip_params->mps_tcam_size; i++) { uint8_t dip_hit, vlan_vld, lookup_type, port_num; uint16_t ivlan; uint64_t tcamx, tcamy, val, mask; uint32_t cls_lo, cls_hi, ctl, data2, vnix, vniy; uint8_t addr[ETHER_ADDR_LEN]; ctl = V_CTLREQID(1) | V_CTLCMDTYPE(0) | V_CTLXYBITSEL(0); if (i < 256) ctl |= V_CTLTCAMINDEX(i) | V_CTLTCAMSEL(0); else ctl |= V_CTLTCAMINDEX(i - 256) | V_CTLTCAMSEL(1); t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl); val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1); tcamy = G_DMACH(val) << 32; tcamy |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1); data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1); lookup_type = G_DATALKPTYPE(data2); port_num = G_DATAPORTNUM(data2); if (lookup_type && lookup_type != M_DATALKPTYPE) { /* Inner header VNI */ vniy = ((data2 & F_DATAVIDH2) << 23) | (G_DATAVIDH1(data2) << 16) | G_VIDL(val); dip_hit = data2 & F_DATADIPHIT; vlan_vld = 0; } else { vniy = 0; dip_hit = 0; vlan_vld = data2 & F_DATAVIDH2; ivlan = G_VIDL(val); } ctl |= V_CTLXYBITSEL(1); t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl); val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1); tcamx = G_DMACH(val) << 32; tcamx |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1); data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1); if (lookup_type && lookup_type != M_DATALKPTYPE) { /* Inner header VNI mask */ vnix = ((data2 & F_DATAVIDH2) << 23) | (G_DATAVIDH1(data2) << 16) | G_VIDL(val); } else vnix = 0; if (tcamx & tcamy) continue; tcamxy2valmask(tcamx, tcamy, addr, &mask); cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i)); cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i)); if (lookup_type && lookup_type != M_DATALKPTYPE) { sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x " "%012jx %06x %06x - - %3c" " 'I' %4x %3c %#x%4u%4d", i, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], (uintmax_t)mask, vniy, vnix, dip_hit ? 'Y' : 'N', port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N', G_PORTMAP(cls_hi), G_T6_PF(cls_lo), cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1); } else { sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x " "%012jx - - ", i, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], (uintmax_t)mask); if (vlan_vld) sbuf_printf(sb, "%4u Y ", ivlan); else sbuf_printf(sb, " - N "); sbuf_printf(sb, "- %3c %4x %3c %#x%4u%4d", lookup_type ? 'I' : 'O', port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N', G_PORTMAP(cls_hi), G_T6_PF(cls_lo), cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1); } if (cls_lo & F_T6_REPLICATE) { struct fw_ldst_cmd ldst_cmd; memset(&ldst_cmd, 0, sizeof(ldst_cmd)); ldst_cmd.op_to_addrspace = htobe32(V_FW_CMD_OP(FW_LDST_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ | V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS)); ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd)); ldst_cmd.u.mps.rplc.fid_idx = htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) | V_FW_LDST_CMD_IDX(i)); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t6mps"); if (rc) break; rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd, sizeof(ldst_cmd), &ldst_cmd); end_synchronized_op(sc, 0); if (rc != 0) { sbuf_printf(sb, "%72d", rc); rc = 0; } else { sbuf_printf(sb, " %08x %08x %08x %08x" " %08x %08x %08x %08x", be32toh(ldst_cmd.u.mps.rplc.rplc255_224), be32toh(ldst_cmd.u.mps.rplc.rplc223_192), be32toh(ldst_cmd.u.mps.rplc.rplc191_160), be32toh(ldst_cmd.u.mps.rplc.rplc159_128), be32toh(ldst_cmd.u.mps.rplc.rplc127_96), be32toh(ldst_cmd.u.mps.rplc.rplc95_64), be32toh(ldst_cmd.u.mps.rplc.rplc63_32), be32toh(ldst_cmd.u.mps.rplc.rplc31_0)); } } else sbuf_printf(sb, "%72s", ""); sbuf_printf(sb, "%4u%3u%3u%3u %#x", G_T6_SRAM_PRIO0(cls_lo), G_T6_SRAM_PRIO1(cls_lo), G_T6_SRAM_PRIO2(cls_lo), G_T6_SRAM_PRIO3(cls_lo), (cls_lo >> S_T6_MULTILISTEN0) & 0xf); } if (rc) (void) sbuf_finish(sb); else rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; uint16_t mtus[NMTUS]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_read_mtu_tbl(sc, mtus, NULL); sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u", mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6], mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13], mtus[14], mtus[15]); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint32_t tx_cnt[MAX_PM_NSTATS], rx_cnt[MAX_PM_NSTATS]; uint64_t tx_cyc[MAX_PM_NSTATS], rx_cyc[MAX_PM_NSTATS]; static const char *tx_stats[MAX_PM_NSTATS] = { "Read:", "Write bypass:", "Write mem:", "Bypass + mem:", "Tx FIFO wait", NULL, "Tx latency" }; static const char *rx_stats[MAX_PM_NSTATS] = { "Read:", "Write bypass:", "Write mem:", "Flush:", "Rx FIFO wait", NULL, "Rx latency" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_pmtx_get_stats(sc, tx_cnt, tx_cyc); t4_pmrx_get_stats(sc, rx_cnt, rx_cyc); sbuf_printf(sb, " Tx pcmds Tx bytes"); for (i = 0; i < 4; i++) { sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i], tx_cyc[i]); } sbuf_printf(sb, "\n Rx pcmds Rx bytes"); for (i = 0; i < 4; i++) { sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i], rx_cyc[i]); } if (chip_id(sc) > CHELSIO_T5) { sbuf_printf(sb, "\n Total wait Total occupancy"); sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i], tx_cyc[i]); sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i], rx_cyc[i]); i += 2; MPASS(i < nitems(tx_stats)); sbuf_printf(sb, "\n Reads Total wait"); sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i], tx_cyc[i]); sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i], rx_cyc[i]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_rdma_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_rdma_stats(sc, &stats, 0); mtx_unlock(&sc->reg_lock); sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod); sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_tcp_stats v4, v6; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_tcp_stats(sc, &v4, &v6, 0); mtx_unlock(&sc->reg_lock); sbuf_printf(sb, " IP IPv6\n"); sbuf_printf(sb, "OutRsts: %20u %20u\n", v4.tcp_out_rsts, v6.tcp_out_rsts); sbuf_printf(sb, "InSegs: %20ju %20ju\n", v4.tcp_in_segs, v6.tcp_in_segs); sbuf_printf(sb, "OutSegs: %20ju %20ju\n", v4.tcp_out_segs, v6.tcp_out_segs); sbuf_printf(sb, "RetransSegs: %20ju %20ju", v4.tcp_retrans_segs, v6.tcp_retrans_segs); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tids(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tid_info *t = &sc->tids; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); if (t->natids) { sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1, t->atids_in_use); } if (t->ntids) { sbuf_printf(sb, "TID range: "); if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) { uint32_t b, hb; if (chip_id(sc) <= CHELSIO_T5) { b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4; hb = t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4; } else { b = t4_read_reg(sc, A_LE_DB_SRVR_START_INDEX); hb = t4_read_reg(sc, A_T6_LE_DB_HASH_TID_BASE); } if (b) sbuf_printf(sb, "0-%u, ", b - 1); sbuf_printf(sb, "%u-%u", hb, t->ntids - 1); } else sbuf_printf(sb, "0-%u", t->ntids - 1); sbuf_printf(sb, ", in use: %u\n", atomic_load_acq_int(&t->tids_in_use)); } if (t->nstids) { sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base, t->stid_base + t->nstids - 1, t->stids_in_use); } if (t->nftids) { sbuf_printf(sb, "FTID range: %u-%u\n", t->ftid_base, t->ftid_base + t->nftids - 1); } if (t->netids) { sbuf_printf(sb, "ETID range: %u-%u\n", t->etid_base, t->etid_base + t->netids - 1); } sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users", t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4), t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6)); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_err_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_err_stats(sc, &stats, 0); mtx_unlock(&sc->reg_lock); if (sc->chip_params->nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3\n"); sbuf_printf(sb, "macInErrs: %10u %10u %10u %10u\n", stats.mac_in_errs[0], stats.mac_in_errs[1], stats.mac_in_errs[2], stats.mac_in_errs[3]); sbuf_printf(sb, "hdrInErrs: %10u %10u %10u %10u\n", stats.hdr_in_errs[0], stats.hdr_in_errs[1], stats.hdr_in_errs[2], stats.hdr_in_errs[3]); sbuf_printf(sb, "tcpInErrs: %10u %10u %10u %10u\n", stats.tcp_in_errs[0], stats.tcp_in_errs[1], stats.tcp_in_errs[2], stats.tcp_in_errs[3]); sbuf_printf(sb, "tcp6InErrs: %10u %10u %10u %10u\n", stats.tcp6_in_errs[0], stats.tcp6_in_errs[1], stats.tcp6_in_errs[2], stats.tcp6_in_errs[3]); sbuf_printf(sb, "tnlCongDrops: %10u %10u %10u %10u\n", stats.tnl_cong_drops[0], stats.tnl_cong_drops[1], stats.tnl_cong_drops[2], stats.tnl_cong_drops[3]); sbuf_printf(sb, "tnlTxDrops: %10u %10u %10u %10u\n", stats.tnl_tx_drops[0], stats.tnl_tx_drops[1], stats.tnl_tx_drops[2], stats.tnl_tx_drops[3]); sbuf_printf(sb, "ofldVlanDrops: %10u %10u %10u %10u\n", stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1], stats.ofld_vlan_drops[2], stats.ofld_vlan_drops[3]); sbuf_printf(sb, "ofldChanDrops: %10u %10u %10u %10u\n\n", stats.ofld_chan_drops[0], stats.ofld_chan_drops[1], stats.ofld_chan_drops[2], stats.ofld_chan_drops[3]); } else { sbuf_printf(sb, " channel 0 channel 1\n"); sbuf_printf(sb, "macInErrs: %10u %10u\n", stats.mac_in_errs[0], stats.mac_in_errs[1]); sbuf_printf(sb, "hdrInErrs: %10u %10u\n", stats.hdr_in_errs[0], stats.hdr_in_errs[1]); sbuf_printf(sb, "tcpInErrs: %10u %10u\n", stats.tcp_in_errs[0], stats.tcp_in_errs[1]); sbuf_printf(sb, "tcp6InErrs: %10u %10u\n", stats.tcp6_in_errs[0], stats.tcp6_in_errs[1]); sbuf_printf(sb, "tnlCongDrops: %10u %10u\n", stats.tnl_cong_drops[0], stats.tnl_cong_drops[1]); sbuf_printf(sb, "tnlTxDrops: %10u %10u\n", stats.tnl_tx_drops[0], stats.tnl_tx_drops[1]); sbuf_printf(sb, "ofldVlanDrops: %10u %10u\n", stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1]); sbuf_printf(sb, "ofldChanDrops: %10u %10u\n\n", stats.ofld_chan_drops[0], stats.ofld_chan_drops[1]); } sbuf_printf(sb, "ofldNoNeigh: %u\nofldCongDefer: %u", stats.ofld_no_neigh, stats.ofld_cong_defer); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct tp_params *tpp = &sc->params.tp; u_int mask; int rc; mask = tpp->la_mask >> 16; rc = sysctl_handle_int(oidp, &mask, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (mask > 0xffff) return (EINVAL); tpp->la_mask = mask << 16; t4_set_reg_field(sc, A_TP_DBG_LA_CONFIG, 0xffff0000U, tpp->la_mask); return (0); } struct field_desc { const char *name; u_int start; u_int width; }; static void field_desc_show(struct sbuf *sb, uint64_t v, const struct field_desc *f) { char buf[32]; int line_size = 0; while (f->name) { uint64_t mask = (1ULL << f->width) - 1; int len = snprintf(buf, sizeof(buf), "%s: %ju", f->name, ((uintmax_t)v >> f->start) & mask); if (line_size + len >= 79) { line_size = 8; sbuf_printf(sb, "\n "); } sbuf_printf(sb, "%s ", buf); line_size += len + 1; f++; } sbuf_printf(sb, "\n"); } static const struct field_desc tp_la0[] = { { "RcfOpCodeOut", 60, 4 }, { "State", 56, 4 }, { "WcfState", 52, 4 }, { "RcfOpcSrcOut", 50, 2 }, { "CRxError", 49, 1 }, { "ERxError", 48, 1 }, { "SanityFailed", 47, 1 }, { "SpuriousMsg", 46, 1 }, { "FlushInputMsg", 45, 1 }, { "FlushInputCpl", 44, 1 }, { "RssUpBit", 43, 1 }, { "RssFilterHit", 42, 1 }, { "Tid", 32, 10 }, { "InitTcb", 31, 1 }, { "LineNumber", 24, 7 }, { "Emsg", 23, 1 }, { "EdataOut", 22, 1 }, { "Cmsg", 21, 1 }, { "CdataOut", 20, 1 }, { "EreadPdu", 19, 1 }, { "CreadPdu", 18, 1 }, { "TunnelPkt", 17, 1 }, { "RcfPeerFin", 16, 1 }, { "RcfReasonOut", 12, 4 }, { "TxCchannel", 10, 2 }, { "RcfTxChannel", 8, 2 }, { "RxEchannel", 6, 2 }, { "RcfRxChannel", 5, 1 }, { "RcfDataOutSrdy", 4, 1 }, { "RxDvld", 3, 1 }, { "RxOoDvld", 2, 1 }, { "RxCongestion", 1, 1 }, { "TxCongestion", 0, 1 }, { NULL } }; static const struct field_desc tp_la1[] = { { "CplCmdIn", 56, 8 }, { "CplCmdOut", 48, 8 }, { "ESynOut", 47, 1 }, { "EAckOut", 46, 1 }, { "EFinOut", 45, 1 }, { "ERstOut", 44, 1 }, { "SynIn", 43, 1 }, { "AckIn", 42, 1 }, { "FinIn", 41, 1 }, { "RstIn", 40, 1 }, { "DataIn", 39, 1 }, { "DataInVld", 38, 1 }, { "PadIn", 37, 1 }, { "RxBufEmpty", 36, 1 }, { "RxDdp", 35, 1 }, { "RxFbCongestion", 34, 1 }, { "TxFbCongestion", 33, 1 }, { "TxPktSumSrdy", 32, 1 }, { "RcfUlpType", 28, 4 }, { "Eread", 27, 1 }, { "Ebypass", 26, 1 }, { "Esave", 25, 1 }, { "Static0", 24, 1 }, { "Cread", 23, 1 }, { "Cbypass", 22, 1 }, { "Csave", 21, 1 }, { "CPktOut", 20, 1 }, { "RxPagePoolFull", 18, 2 }, { "RxLpbkPkt", 17, 1 }, { "TxLpbkPkt", 16, 1 }, { "RxVfValid", 15, 1 }, { "SynLearned", 14, 1 }, { "SetDelEntry", 13, 1 }, { "SetInvEntry", 12, 1 }, { "CpcmdDvld", 11, 1 }, { "CpcmdSave", 10, 1 }, { "RxPstructsFull", 8, 2 }, { "EpcmdDvld", 7, 1 }, { "EpcmdFlush", 6, 1 }, { "EpcmdTrimPrefix", 5, 1 }, { "EpcmdTrimPostfix", 4, 1 }, { "ERssIp4Pkt", 3, 1 }, { "ERssIp6Pkt", 2, 1 }, { "ERssTcpUdpPkt", 1, 1 }, { "ERssFceFipPkt", 0, 1 }, { NULL } }; static const struct field_desc tp_la2[] = { { "CplCmdIn", 56, 8 }, { "MpsVfVld", 55, 1 }, { "MpsPf", 52, 3 }, { "MpsVf", 44, 8 }, { "SynIn", 43, 1 }, { "AckIn", 42, 1 }, { "FinIn", 41, 1 }, { "RstIn", 40, 1 }, { "DataIn", 39, 1 }, { "DataInVld", 38, 1 }, { "PadIn", 37, 1 }, { "RxBufEmpty", 36, 1 }, { "RxDdp", 35, 1 }, { "RxFbCongestion", 34, 1 }, { "TxFbCongestion", 33, 1 }, { "TxPktSumSrdy", 32, 1 }, { "RcfUlpType", 28, 4 }, { "Eread", 27, 1 }, { "Ebypass", 26, 1 }, { "Esave", 25, 1 }, { "Static0", 24, 1 }, { "Cread", 23, 1 }, { "Cbypass", 22, 1 }, { "Csave", 21, 1 }, { "CPktOut", 20, 1 }, { "RxPagePoolFull", 18, 2 }, { "RxLpbkPkt", 17, 1 }, { "TxLpbkPkt", 16, 1 }, { "RxVfValid", 15, 1 }, { "SynLearned", 14, 1 }, { "SetDelEntry", 13, 1 }, { "SetInvEntry", 12, 1 }, { "CpcmdDvld", 11, 1 }, { "CpcmdSave", 10, 1 }, { "RxPstructsFull", 8, 2 }, { "EpcmdDvld", 7, 1 }, { "EpcmdFlush", 6, 1 }, { "EpcmdTrimPrefix", 5, 1 }, { "EpcmdTrimPostfix", 4, 1 }, { "ERssIp4Pkt", 3, 1 }, { "ERssIp6Pkt", 2, 1 }, { "ERssTcpUdpPkt", 1, 1 }, { "ERssFceFipPkt", 0, 1 }, { NULL } }; static void tp_la_show(struct sbuf *sb, uint64_t *p, int idx) { field_desc_show(sb, *p, tp_la0); } static void tp_la_show2(struct sbuf *sb, uint64_t *p, int idx) { if (idx) sbuf_printf(sb, "\n"); field_desc_show(sb, p[0], tp_la0); if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL) field_desc_show(sb, p[1], tp_la0); } static void tp_la_show3(struct sbuf *sb, uint64_t *p, int idx) { if (idx) sbuf_printf(sb, "\n"); field_desc_show(sb, p[0], tp_la0); if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL) field_desc_show(sb, p[1], (p[0] & (1 << 17)) ? tp_la2 : tp_la1); } static int sysctl_tp_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; uint64_t *buf, *p; int rc; u_int i, inc; void (*show_func)(struct sbuf *, uint64_t *, int); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(TPLA_SIZE * sizeof(uint64_t), M_CXGBE, M_ZERO | M_WAITOK); t4_tp_read_la(sc, buf, NULL); p = buf; switch (G_DBGLAMODE(t4_read_reg(sc, A_TP_DBG_LA_CONFIG))) { case 2: inc = 2; show_func = tp_la_show2; break; case 3: inc = 2; show_func = tp_la_show3; break; default: inc = 1; show_func = tp_la_show; } for (i = 0; i < TPLA_SIZE / inc; i++, p += inc) (*show_func)(sb, p, i); rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; u64 nrate[MAX_NCHAN], orate[MAX_NCHAN]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_get_chan_txrate(sc, nrate, orate); if (sc->chip_params->nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3\n"); sbuf_printf(sb, "NIC B/s: %10ju %10ju %10ju %10ju\n", nrate[0], nrate[1], nrate[2], nrate[3]); sbuf_printf(sb, "Offload B/s: %10ju %10ju %10ju %10ju", orate[0], orate[1], orate[2], orate[3]); } else { sbuf_printf(sb, " channel 0 channel 1\n"); sbuf_printf(sb, "NIC B/s: %10ju %10ju\n", nrate[0], nrate[1]); sbuf_printf(sb, "Offload B/s: %10ju %10ju", orate[0], orate[1]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; uint32_t *buf, *p; int rc, i; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(ULPRX_LA_SIZE * 8 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_ulprx_read_la(sc, buf); p = buf; sbuf_printf(sb, " Pcmd Type Message" " Data"); for (i = 0; i < ULPRX_LA_SIZE; i++, p += 8) { sbuf_printf(sb, "\n%08x%08x %4x %08x %08x%08x%08x%08x", p[1], p[0], p[2], p[3], p[7], p[6], p[5], p[4]); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, v; MPASS(chip_id(sc) >= CHELSIO_T5); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); v = t4_read_reg(sc, A_SGE_STAT_CFG); if (G_STATSOURCE_T5(v) == 7) { int mode; mode = is_t5(sc) ? G_STATMODE(v) : G_T6_STATMODE(v); if (mode == 0) { sbuf_printf(sb, "total %d, incomplete %d", t4_read_reg(sc, A_SGE_STAT_TOTAL), t4_read_reg(sc, A_SGE_STAT_MATCH)); } else if (mode == 1) { sbuf_printf(sb, "total %d, data overflow %d", t4_read_reg(sc, A_SGE_STAT_TOTAL), t4_read_reg(sc, A_SGE_STAT_MATCH)); } else { sbuf_printf(sb, "unknown mode %d", mode); } } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tc_params(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct tx_cl_rl_params tc; struct sbuf *sb; int i, rc, port_id, mbps, gbps; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); port_id = arg2 >> 16; MPASS(port_id < sc->params.nports); MPASS(sc->port[port_id] != NULL); i = arg2 & 0xffff; MPASS(i < sc->chip_params->nsched_cls); mtx_lock(&sc->tc_lock); tc = sc->port[port_id]->sched_params->cl_rl[i]; mtx_unlock(&sc->tc_lock); if (tc.flags & TX_CLRL_ERROR) { sbuf_printf(sb, "error"); goto done; } if (tc.ratemode == SCHED_CLASS_RATEMODE_REL) { /* XXX: top speed or actual link speed? */ gbps = port_top_speed(sc->port[port_id]); sbuf_printf(sb, " %u%% of %uGbps", tc.maxrate, gbps); } else if (tc.ratemode == SCHED_CLASS_RATEMODE_ABS) { switch (tc.rateunit) { case SCHED_CLASS_RATEUNIT_BITS: mbps = tc.maxrate / 1000; gbps = tc.maxrate / 1000000; if (tc.maxrate == gbps * 1000000) sbuf_printf(sb, " %uGbps", gbps); else if (tc.maxrate == mbps * 1000) sbuf_printf(sb, " %uMbps", mbps); else sbuf_printf(sb, " %uKbps", tc.maxrate); break; case SCHED_CLASS_RATEUNIT_PKTS: sbuf_printf(sb, " %upps", tc.maxrate); break; default: rc = ENXIO; goto done; } } switch (tc.mode) { case SCHED_CLASS_MODE_CLASS: sbuf_printf(sb, " aggregate"); break; case SCHED_CLASS_MODE_FLOW: sbuf_printf(sb, " per-flow"); break; default: rc = ENXIO; goto done; } done: if (rc == 0) rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } #endif #ifdef TCP_OFFLOAD static int sysctl_tls_rx_ports(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int *old_ports, *new_ports; int i, new_count, rc; if (req->newptr == NULL && req->oldptr == NULL) return (SYSCTL_OUT(req, NULL, imax(sc->tt.num_tls_rx_ports, 1) * sizeof(sc->tt.tls_rx_ports[0]))); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4tlsrx"); if (rc) return (rc); if (sc->tt.num_tls_rx_ports == 0) { i = -1; rc = SYSCTL_OUT(req, &i, sizeof(i)); } else rc = SYSCTL_OUT(req, sc->tt.tls_rx_ports, sc->tt.num_tls_rx_ports * sizeof(sc->tt.tls_rx_ports[0])); if (rc == 0 && req->newptr != NULL) { new_count = req->newlen / sizeof(new_ports[0]); new_ports = malloc(new_count * sizeof(new_ports[0]), M_CXGBE, M_WAITOK); rc = SYSCTL_IN(req, new_ports, new_count * sizeof(new_ports[0])); if (rc) goto err; /* Allow setting to a single '-1' to clear the list. */ if (new_count == 1 && new_ports[0] == -1) { ADAPTER_LOCK(sc); old_ports = sc->tt.tls_rx_ports; sc->tt.tls_rx_ports = NULL; sc->tt.num_tls_rx_ports = 0; ADAPTER_UNLOCK(sc); free(old_ports, M_CXGBE); } else { for (i = 0; i < new_count; i++) { if (new_ports[i] < 1 || new_ports[i] > IPPORT_MAX) { rc = EINVAL; goto err; } } ADAPTER_LOCK(sc); old_ports = sc->tt.tls_rx_ports; sc->tt.tls_rx_ports = new_ports; sc->tt.num_tls_rx_ports = new_count; ADAPTER_UNLOCK(sc); free(old_ports, M_CXGBE); new_ports = NULL; } err: free(new_ports, M_CXGBE); } end_synchronized_op(sc, 0); return (rc); } static void unit_conv(char *buf, size_t len, u_int val, u_int factor) { u_int rem = val % factor; if (rem == 0) snprintf(buf, len, "%u", val / factor); else { while (rem % 10 == 0) rem /= 10; snprintf(buf, len, "%u.%u", val / factor, rem); } } static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; char buf[16]; u_int res, re; u_int cclk_ps = 1000000000 / sc->params.vpd.cclk; res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION); switch (arg2) { case 0: /* timer_tick */ re = G_TIMERRESOLUTION(res); break; case 1: /* TCP timestamp tick */ re = G_TIMESTAMPRESOLUTION(res); break; case 2: /* DACK tick */ re = G_DELAYEDACKRESOLUTION(res); break; default: return (EDOOFUS); } unit_conv(buf, sizeof(buf), (cclk_ps << re), 1000000); return (sysctl_handle_string(oidp, buf, sizeof(buf), req)); } static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int res, dack_re, v; u_int cclk_ps = 1000000000 / sc->params.vpd.cclk; res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION); dack_re = G_DELAYEDACKRESOLUTION(res); v = ((cclk_ps << dack_re) / 1000000) * t4_read_reg(sc, A_TP_DACK_TIMER); return (sysctl_handle_int(oidp, &v, 0, req)); } static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int reg = arg2; u_int tre; u_long tp_tick_us, v; u_int cclk_ps = 1000000000 / sc->params.vpd.cclk; MPASS(reg == A_TP_RXT_MIN || reg == A_TP_RXT_MAX || reg == A_TP_PERS_MIN || reg == A_TP_PERS_MAX || reg == A_TP_KEEP_IDLE || reg == A_TP_KEEP_INTVL || reg == A_TP_INIT_SRTT || reg == A_TP_FINWAIT2_TIMER); tre = G_TIMERRESOLUTION(t4_read_reg(sc, A_TP_TIMER_RESOLUTION)); tp_tick_us = (cclk_ps << tre) / 1000000; if (reg == A_TP_INIT_SRTT) v = tp_tick_us * G_INITSRTT(t4_read_reg(sc, reg)); else v = tp_tick_us * t4_read_reg(sc, reg); return (sysctl_handle_long(oidp, &v, 0, req)); } /* * All fields in TP_SHIFT_CNT are 4b and the starting location of the field is * passed to this function. */ static int sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int idx = arg2; u_int v; MPASS(idx >= 0 && idx <= 24); v = (t4_read_reg(sc, A_TP_SHIFT_CNT) >> idx) & 0xf; return (sysctl_handle_int(oidp, &v, 0, req)); } static int sysctl_tp_backoff(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int idx = arg2; u_int shift, v, r; MPASS(idx >= 0 && idx < 16); r = A_TP_TCP_BACKOFF_REG0 + (idx & ~3); shift = (idx & 3) << 3; v = (t4_read_reg(sc, r) >> shift) & M_TIMERBACKOFFINDEX0; return (sysctl_handle_int(oidp, &v, 0, req)); } static int sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int idx, rc, i; struct sge_ofld_rxq *ofld_rxq; uint8_t v; idx = vi->ofld_tmr_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < 0 || idx >= SGE_NTIMERS) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4otmr"); if (rc) return (rc); v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->ofld_pktc_idx != -1); for_each_ofld_rxq(vi, i, ofld_rxq) { #ifdef atomic_store_rel_8 atomic_store_rel_8(&ofld_rxq->iq.intr_params, v); #else ofld_rxq->iq.intr_params = v; #endif } vi->ofld_tmr_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (0); } static int sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int idx, rc; idx = vi->ofld_pktc_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < -1 || idx >= SGE_NCOUNTERS) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4opktc"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->ofld_pktc_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (rc); } #endif static uint32_t fconf_iconf_to_mode(uint32_t fconf, uint32_t iconf) { uint32_t mode; mode = T4_FILTER_IPv4 | T4_FILTER_IPv6 | T4_FILTER_IP_SADDR | T4_FILTER_IP_DADDR | T4_FILTER_IP_SPORT | T4_FILTER_IP_DPORT; if (fconf & F_FRAGMENTATION) mode |= T4_FILTER_IP_FRAGMENT; if (fconf & F_MPSHITTYPE) mode |= T4_FILTER_MPS_HIT_TYPE; if (fconf & F_MACMATCH) mode |= T4_FILTER_MAC_IDX; if (fconf & F_ETHERTYPE) mode |= T4_FILTER_ETH_TYPE; if (fconf & F_PROTOCOL) mode |= T4_FILTER_IP_PROTO; if (fconf & F_TOS) mode |= T4_FILTER_IP_TOS; if (fconf & F_VLAN) mode |= T4_FILTER_VLAN; if (fconf & F_VNIC_ID) { mode |= T4_FILTER_VNIC; if (iconf & F_VNIC) mode |= T4_FILTER_IC_VNIC; } if (fconf & F_PORT) mode |= T4_FILTER_PORT; if (fconf & F_FCOE) mode |= T4_FILTER_FCoE; return (mode); } static uint32_t mode_to_fconf(uint32_t mode) { uint32_t fconf = 0; if (mode & T4_FILTER_IP_FRAGMENT) fconf |= F_FRAGMENTATION; if (mode & T4_FILTER_MPS_HIT_TYPE) fconf |= F_MPSHITTYPE; if (mode & T4_FILTER_MAC_IDX) fconf |= F_MACMATCH; if (mode & T4_FILTER_ETH_TYPE) fconf |= F_ETHERTYPE; if (mode & T4_FILTER_IP_PROTO) fconf |= F_PROTOCOL; if (mode & T4_FILTER_IP_TOS) fconf |= F_TOS; if (mode & T4_FILTER_VLAN) fconf |= F_VLAN; if (mode & T4_FILTER_VNIC) fconf |= F_VNIC_ID; if (mode & T4_FILTER_PORT) fconf |= F_PORT; if (mode & T4_FILTER_FCoE) fconf |= F_FCOE; return (fconf); } static uint32_t mode_to_iconf(uint32_t mode) { if (mode & T4_FILTER_IC_VNIC) return (F_VNIC); return (0); } static int check_fspec_against_fconf_iconf(struct adapter *sc, struct t4_filter_specification *fs) { struct tp_params *tpp = &sc->params.tp; uint32_t fconf = 0; if (fs->val.frag || fs->mask.frag) fconf |= F_FRAGMENTATION; if (fs->val.matchtype || fs->mask.matchtype) fconf |= F_MPSHITTYPE; if (fs->val.macidx || fs->mask.macidx) fconf |= F_MACMATCH; if (fs->val.ethtype || fs->mask.ethtype) fconf |= F_ETHERTYPE; if (fs->val.proto || fs->mask.proto) fconf |= F_PROTOCOL; if (fs->val.tos || fs->mask.tos) fconf |= F_TOS; if (fs->val.vlan_vld || fs->mask.vlan_vld) fconf |= F_VLAN; if (fs->val.ovlan_vld || fs->mask.ovlan_vld) { fconf |= F_VNIC_ID; if (tpp->ingress_config & F_VNIC) return (EINVAL); } if (fs->val.pfvf_vld || fs->mask.pfvf_vld) { fconf |= F_VNIC_ID; if ((tpp->ingress_config & F_VNIC) == 0) return (EINVAL); } if (fs->val.iport || fs->mask.iport) fconf |= F_PORT; if (fs->val.fcoe || fs->mask.fcoe) fconf |= F_FCOE; if ((tpp->vlan_pri_map | fconf) != tpp->vlan_pri_map) return (E2BIG); return (0); } static int get_filter_mode(struct adapter *sc, uint32_t *mode) { struct tp_params *tpp = &sc->params.tp; /* * We trust the cached values of the relevant TP registers. This means * things work reliably only if writes to those registers are always via * t4_set_filter_mode. */ *mode = fconf_iconf_to_mode(tpp->vlan_pri_map, tpp->ingress_config); return (0); } static int set_filter_mode(struct adapter *sc, uint32_t mode) { struct tp_params *tpp = &sc->params.tp; uint32_t fconf, iconf; int rc; iconf = mode_to_iconf(mode); if ((iconf ^ tpp->ingress_config) & F_VNIC) { /* * For now we just complain if A_TP_INGRESS_CONFIG is not * already set to the correct value for the requested filter * mode. It's not clear if it's safe to write to this register * on the fly. (And we trust the cached value of the register). */ return (EBUSY); } fconf = mode_to_fconf(mode); rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4setfm"); if (rc) return (rc); if (sc->tids.ftids_in_use > 0) { rc = EBUSY; goto done; } #ifdef TCP_OFFLOAD if (uld_active(sc, ULD_TOM)) { rc = EBUSY; goto done; } #endif rc = -t4_set_filter_mode(sc, fconf, true); done: end_synchronized_op(sc, LOCK_HELD); return (rc); } static inline uint64_t get_filter_hits(struct adapter *sc, uint32_t fid) { uint32_t tcb_addr; tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) + (fid + sc->tids.ftid_base) * TCB_SIZE; if (is_t4(sc)) { uint64_t hits; read_via_memwin(sc, 0, tcb_addr + 16, (uint32_t *)&hits, 8); return (be64toh(hits)); } else { uint32_t hits; read_via_memwin(sc, 0, tcb_addr + 24, &hits, 4); return (be32toh(hits)); } } static int get_filter(struct adapter *sc, struct t4_filter *t) { int i, rc, nfilters = sc->tids.nftids; struct filter_entry *f; rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4getf"); if (rc) return (rc); if (sc->tids.ftids_in_use == 0 || sc->tids.ftid_tab == NULL || t->idx >= nfilters) { t->idx = 0xffffffff; goto done; } f = &sc->tids.ftid_tab[t->idx]; for (i = t->idx; i < nfilters; i++, f++) { if (f->valid) { t->idx = i; t->l2tidx = f->l2t ? f->l2t->idx : 0; t->smtidx = f->smtidx; if (f->fs.hitcnts) t->hits = get_filter_hits(sc, t->idx); else t->hits = UINT64_MAX; t->fs = f->fs; goto done; } } t->idx = 0xffffffff; done: end_synchronized_op(sc, LOCK_HELD); return (0); } static int set_filter(struct adapter *sc, struct t4_filter *t) { unsigned int nfilters, nports; struct filter_entry *f; int i, rc; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setf"); if (rc) return (rc); nfilters = sc->tids.nftids; nports = sc->params.nports; if (nfilters == 0) { rc = ENOTSUP; goto done; } if (t->idx >= nfilters) { rc = EINVAL; goto done; } /* Validate against the global filter mode and ingress config */ rc = check_fspec_against_fconf_iconf(sc, &t->fs); if (rc != 0) goto done; if (t->fs.action == FILTER_SWITCH && t->fs.eport >= nports) { rc = EINVAL; goto done; } if (t->fs.val.iport >= nports) { rc = EINVAL; goto done; } /* Can't specify an iq if not steering to it */ if (!t->fs.dirsteer && t->fs.iq) { rc = EINVAL; goto done; } /* IPv6 filter idx must be 4 aligned */ if (t->fs.type == 1 && ((t->idx & 0x3) || t->idx + 4 >= nfilters)) { rc = EINVAL; goto done; } if (!(sc->flags & FULL_INIT_DONE) && ((rc = adapter_full_init(sc)) != 0)) goto done; if (sc->tids.ftid_tab == NULL) { KASSERT(sc->tids.ftids_in_use == 0, ("%s: no memory allocated but filters_in_use > 0", __func__)); sc->tids.ftid_tab = malloc(sizeof (struct filter_entry) * nfilters, M_CXGBE, M_NOWAIT | M_ZERO); if (sc->tids.ftid_tab == NULL) { rc = ENOMEM; goto done; } mtx_init(&sc->tids.ftid_lock, "T4 filters", 0, MTX_DEF); } for (i = 0; i < 4; i++) { f = &sc->tids.ftid_tab[t->idx + i]; if (f->pending || f->valid) { rc = EBUSY; goto done; } if (f->locked) { rc = EPERM; goto done; } if (t->fs.type == 0) break; } f = &sc->tids.ftid_tab[t->idx]; f->fs = t->fs; rc = set_filter_wr(sc, t->idx); done: end_synchronized_op(sc, 0); if (rc == 0) { mtx_lock(&sc->tids.ftid_lock); for (;;) { if (f->pending == 0) { rc = f->valid ? 0 : EIO; break; } if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock, PCATCH, "t4setfw", 0)) { rc = EINPROGRESS; break; } } mtx_unlock(&sc->tids.ftid_lock); } return (rc); } static int del_filter(struct adapter *sc, struct t4_filter *t) { unsigned int nfilters; struct filter_entry *f; int rc; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4delf"); if (rc) return (rc); nfilters = sc->tids.nftids; if (nfilters == 0) { rc = ENOTSUP; goto done; } if (sc->tids.ftid_tab == NULL || sc->tids.ftids_in_use == 0 || t->idx >= nfilters) { rc = EINVAL; goto done; } if (!(sc->flags & FULL_INIT_DONE)) { rc = EAGAIN; goto done; } f = &sc->tids.ftid_tab[t->idx]; if (f->pending) { rc = EBUSY; goto done; } if (f->locked) { rc = EPERM; goto done; } if (f->valid) { t->fs = f->fs; /* extra info for the caller */ rc = del_filter_wr(sc, t->idx); } done: end_synchronized_op(sc, 0); if (rc == 0) { mtx_lock(&sc->tids.ftid_lock); for (;;) { if (f->pending == 0) { rc = f->valid ? EIO : 0; break; } if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock, PCATCH, "t4delfw", 0)) { rc = EINPROGRESS; break; } } mtx_unlock(&sc->tids.ftid_lock); } return (rc); } static void clear_filter(struct filter_entry *f) { if (f->l2t) t4_l2t_release(f->l2t); bzero(f, sizeof (*f)); } static int set_filter_wr(struct adapter *sc, int fidx) { struct filter_entry *f = &sc->tids.ftid_tab[fidx]; struct fw_filter_wr *fwr; unsigned int ftid, vnic_vld, vnic_vld_mask; struct wrq_cookie cookie; ASSERT_SYNCHRONIZED_OP(sc); if (f->fs.newdmac || f->fs.newvlan) { /* This filter needs an L2T entry; allocate one. */ f->l2t = t4_l2t_alloc_switching(sc->l2t); if (f->l2t == NULL) return (EAGAIN); if (t4_l2t_set_switching(sc, f->l2t, f->fs.vlan, f->fs.eport, f->fs.dmac)) { t4_l2t_release(f->l2t); f->l2t = NULL; return (ENOMEM); } } /* Already validated against fconf, iconf */ MPASS((f->fs.val.pfvf_vld & f->fs.val.ovlan_vld) == 0); MPASS((f->fs.mask.pfvf_vld & f->fs.mask.ovlan_vld) == 0); if (f->fs.val.pfvf_vld || f->fs.val.ovlan_vld) vnic_vld = 1; else vnic_vld = 0; if (f->fs.mask.pfvf_vld || f->fs.mask.ovlan_vld) vnic_vld_mask = 1; else vnic_vld_mask = 0; ftid = sc->tids.ftid_base + fidx; fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie); if (fwr == NULL) return (ENOMEM); bzero(fwr, sizeof(*fwr)); fwr->op_pkd = htobe32(V_FW_WR_OP(FW_FILTER_WR)); fwr->len16_pkd = htobe32(FW_LEN16(*fwr)); fwr->tid_to_iq = htobe32(V_FW_FILTER_WR_TID(ftid) | V_FW_FILTER_WR_RQTYPE(f->fs.type) | V_FW_FILTER_WR_NOREPLY(0) | V_FW_FILTER_WR_IQ(f->fs.iq)); fwr->del_filter_to_l2tix = htobe32(V_FW_FILTER_WR_RPTTID(f->fs.rpttid) | V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) | V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) | V_FW_FILTER_WR_MASKHASH(f->fs.maskhash) | V_FW_FILTER_WR_DIRSTEERHASH(f->fs.dirsteerhash) | V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) | V_FW_FILTER_WR_DMAC(f->fs.newdmac) | V_FW_FILTER_WR_SMAC(f->fs.newsmac) | V_FW_FILTER_WR_INSVLAN(f->fs.newvlan == VLAN_INSERT || f->fs.newvlan == VLAN_REWRITE) | V_FW_FILTER_WR_RMVLAN(f->fs.newvlan == VLAN_REMOVE || f->fs.newvlan == VLAN_REWRITE) | V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) | V_FW_FILTER_WR_TXCHAN(f->fs.eport) | V_FW_FILTER_WR_PRIO(f->fs.prio) | V_FW_FILTER_WR_L2TIX(f->l2t ? f->l2t->idx : 0)); fwr->ethtype = htobe16(f->fs.val.ethtype); fwr->ethtypem = htobe16(f->fs.mask.ethtype); fwr->frag_to_ovlan_vldm = (V_FW_FILTER_WR_FRAG(f->fs.val.frag) | V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) | V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.vlan_vld) | V_FW_FILTER_WR_OVLAN_VLD(vnic_vld) | V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.vlan_vld) | V_FW_FILTER_WR_OVLAN_VLDM(vnic_vld_mask)); fwr->smac_sel = 0; fwr->rx_chan_rx_rpl_iq = htobe16(V_FW_FILTER_WR_RX_CHAN(0) | V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.fwq.abs_id)); fwr->maci_to_matchtypem = htobe32(V_FW_FILTER_WR_MACI(f->fs.val.macidx) | V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) | V_FW_FILTER_WR_FCOE(f->fs.val.fcoe) | V_FW_FILTER_WR_FCOEM(f->fs.mask.fcoe) | V_FW_FILTER_WR_PORT(f->fs.val.iport) | V_FW_FILTER_WR_PORTM(f->fs.mask.iport) | V_FW_FILTER_WR_MATCHTYPE(f->fs.val.matchtype) | V_FW_FILTER_WR_MATCHTYPEM(f->fs.mask.matchtype)); fwr->ptcl = f->fs.val.proto; fwr->ptclm = f->fs.mask.proto; fwr->ttyp = f->fs.val.tos; fwr->ttypm = f->fs.mask.tos; fwr->ivlan = htobe16(f->fs.val.vlan); fwr->ivlanm = htobe16(f->fs.mask.vlan); fwr->ovlan = htobe16(f->fs.val.vnic); fwr->ovlanm = htobe16(f->fs.mask.vnic); bcopy(f->fs.val.dip, fwr->lip, sizeof (fwr->lip)); bcopy(f->fs.mask.dip, fwr->lipm, sizeof (fwr->lipm)); bcopy(f->fs.val.sip, fwr->fip, sizeof (fwr->fip)); bcopy(f->fs.mask.sip, fwr->fipm, sizeof (fwr->fipm)); fwr->lp = htobe16(f->fs.val.dport); fwr->lpm = htobe16(f->fs.mask.dport); fwr->fp = htobe16(f->fs.val.sport); fwr->fpm = htobe16(f->fs.mask.sport); if (f->fs.newsmac) bcopy(f->fs.smac, fwr->sma, sizeof (fwr->sma)); f->pending = 1; sc->tids.ftids_in_use++; commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie); return (0); } static int del_filter_wr(struct adapter *sc, int fidx) { struct filter_entry *f = &sc->tids.ftid_tab[fidx]; struct fw_filter_wr *fwr; unsigned int ftid; struct wrq_cookie cookie; ftid = sc->tids.ftid_base + fidx; fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie); if (fwr == NULL) return (ENOMEM); bzero(fwr, sizeof (*fwr)); t4_mk_filtdelwr(ftid, fwr, sc->sge.fwq.abs_id); f->pending = 1; commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie); return (0); } int t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1); unsigned int idx = GET_TID(rpl); unsigned int rc; struct filter_entry *f; KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, rss->opcode)); MPASS(iq == &sc->sge.fwq); MPASS(is_ftid(sc, idx)); idx -= sc->tids.ftid_base; f = &sc->tids.ftid_tab[idx]; rc = G_COOKIE(rpl->cookie); mtx_lock(&sc->tids.ftid_lock); if (rc == FW_FILTER_WR_FLT_ADDED) { KASSERT(f->pending, ("%s: filter[%u] isn't pending.", __func__, idx)); f->smtidx = (be64toh(rpl->oldval) >> 24) & 0xff; f->pending = 0; /* asynchronous setup completed */ f->valid = 1; } else { if (rc != FW_FILTER_WR_FLT_DELETED) { /* Add or delete failed, display an error */ log(LOG_ERR, "filter %u setup failed with error %u\n", idx, rc); } clear_filter(f); sc->tids.ftids_in_use--; } wakeup(&sc->tids.ftid_tab); mtx_unlock(&sc->tids.ftid_lock); return (0); } static int set_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { MPASS(iq->set_tcb_rpl != NULL); return (iq->set_tcb_rpl(iq, rss, m)); } static int l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { MPASS(iq->l2t_write_rpl != NULL); return (iq->l2t_write_rpl(iq, rss, m)); } static int get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt) { int rc; if (cntxt->cid > M_CTXTQID) return (EINVAL); if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS && cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM) return (EINVAL); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ctxt"); if (rc) return (rc); if (sc->flags & FW_OK) { rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id, &cntxt->data[0]); if (rc == 0) goto done; } /* * Read via firmware failed or wasn't even attempted. Read directly via * the backdoor. */ rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, &cntxt->data[0]); done: end_synchronized_op(sc, 0); return (rc); } static int load_fw(struct adapter *sc, struct t4_data *fw) { int rc; uint8_t *fw_data; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw"); if (rc) return (rc); /* * The firmware, with the sole exception of the memory parity error * handler, runs from memory and not flash. It is almost always safe to * install a new firmware on a running system. Just set bit 1 in * hw.cxgbe.dflags or dev...dflags first. */ if (sc->flags & FULL_INIT_DONE && (sc->debug_flags & DF_LOAD_FW_ANYTIME) == 0) { rc = EBUSY; goto done; } fw_data = malloc(fw->len, M_CXGBE, M_WAITOK); if (fw_data == NULL) { rc = ENOMEM; goto done; } rc = copyin(fw->data, fw_data, fw->len); if (rc == 0) rc = -t4_load_fw(sc, fw_data, fw->len); free(fw_data, M_CXGBE); done: end_synchronized_op(sc, 0); return (rc); } static int load_cfg(struct adapter *sc, struct t4_data *cfg) { int rc; uint8_t *cfg_data = NULL; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf"); if (rc) return (rc); if (cfg->len == 0) { /* clear */ rc = -t4_load_cfg(sc, NULL, 0); goto done; } cfg_data = malloc(cfg->len, M_CXGBE, M_WAITOK); if (cfg_data == NULL) { rc = ENOMEM; goto done; } rc = copyin(cfg->data, cfg_data, cfg->len); if (rc == 0) rc = -t4_load_cfg(sc, cfg_data, cfg->len); free(cfg_data, M_CXGBE); done: end_synchronized_op(sc, 0); return (rc); } static int load_boot(struct adapter *sc, struct t4_bootrom *br) { int rc; uint8_t *br_data = NULL; u_int offset; if (br->len > 1024 * 1024) return (EFBIG); if (br->pf_offset == 0) { /* pfidx */ if (br->pfidx_addr > 7) return (EINVAL); offset = G_OFFSET(t4_read_reg(sc, PF_REG(br->pfidx_addr, A_PCIE_PF_EXPROM_OFST))); } else if (br->pf_offset == 1) { /* offset */ offset = G_OFFSET(br->pfidx_addr); } else { return (EINVAL); } rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldbr"); if (rc) return (rc); if (br->len == 0) { /* clear */ rc = -t4_load_boot(sc, NULL, offset, 0); goto done; } br_data = malloc(br->len, M_CXGBE, M_WAITOK); if (br_data == NULL) { rc = ENOMEM; goto done; } rc = copyin(br->data, br_data, br->len); if (rc == 0) rc = -t4_load_boot(sc, br_data, offset, br->len); free(br_data, M_CXGBE); done: end_synchronized_op(sc, 0); return (rc); } static int load_bootcfg(struct adapter *sc, struct t4_data *bc) { int rc; uint8_t *bc_data = NULL; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf"); if (rc) return (rc); if (bc->len == 0) { /* clear */ rc = -t4_load_bootcfg(sc, NULL, 0); goto done; } bc_data = malloc(bc->len, M_CXGBE, M_WAITOK); if (bc_data == NULL) { rc = ENOMEM; goto done; } rc = copyin(bc->data, bc_data, bc->len); if (rc == 0) rc = -t4_load_bootcfg(sc, bc_data, bc->len); free(bc_data, M_CXGBE); done: end_synchronized_op(sc, 0); return (rc); } static int cudbg_dump(struct adapter *sc, struct t4_cudbg_dump *dump) { int rc; struct cudbg_init *cudbg; void *handle, *buf; /* buf is large, don't block if no memory is available */ buf = malloc(dump->len, M_CXGBE, M_NOWAIT | M_ZERO); if (buf == NULL) return (ENOMEM); handle = cudbg_alloc_handle(); if (handle == NULL) { rc = ENOMEM; goto done; } cudbg = cudbg_get_init(handle); cudbg->adap = sc; cudbg->print = (cudbg_print_cb)printf; #ifndef notyet device_printf(sc->dev, "%s: wr_flash %u, len %u, data %p.\n", __func__, dump->wr_flash, dump->len, dump->data); #endif if (dump->wr_flash) cudbg->use_flash = 1; MPASS(sizeof(cudbg->dbg_bitmap) == sizeof(dump->bitmap)); memcpy(cudbg->dbg_bitmap, dump->bitmap, sizeof(cudbg->dbg_bitmap)); rc = cudbg_collect(handle, buf, &dump->len); if (rc != 0) goto done; rc = copyout(buf, dump->data, dump->len); done: cudbg_free_handle(handle); free(buf, M_CXGBE); return (rc); } #define MAX_READ_BUF_SIZE (128 * 1024) static int read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr) { uint32_t addr, remaining, n; uint32_t *buf; int rc; uint8_t *dst; rc = validate_mem_range(sc, mr->addr, mr->len); if (rc != 0) return (rc); buf = malloc(min(mr->len, MAX_READ_BUF_SIZE), M_CXGBE, M_WAITOK); addr = mr->addr; remaining = mr->len; dst = (void *)mr->data; while (remaining) { n = min(remaining, MAX_READ_BUF_SIZE); read_via_memwin(sc, 2, addr, buf, n); rc = copyout(buf, dst, n); if (rc != 0) break; dst += n; remaining -= n; addr += n; } free(buf, M_CXGBE); return (rc); } #undef MAX_READ_BUF_SIZE static int read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd) { int rc; if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports) return (EINVAL); if (i2cd->len > sizeof(i2cd->data)) return (EFBIG); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd"); if (rc) return (rc); rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr, i2cd->offset, i2cd->len, &i2cd->data[0]); end_synchronized_op(sc, 0); return (rc); } int t4_os_find_pci_capability(struct adapter *sc, int cap) { int i; return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0); } int t4_os_pci_save_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_save(dev, dinfo, 0); return (0); } int t4_os_pci_restore_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_restore(dev, dinfo); return (0); } void t4_os_portmod_changed(struct port_info *pi) { struct adapter *sc = pi->adapter; struct vi_info *vi; struct ifnet *ifp; static const char *mod_str[] = { NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM" }; PORT_LOCK(pi); build_medialist(pi, &pi->media); PORT_UNLOCK(pi); vi = &pi->vi[0]; if (begin_synchronized_op(sc, vi, HOLD_LOCK, "t4mod") == 0) { init_l1cfg(pi); end_synchronized_op(sc, LOCK_HELD); } ifp = vi->ifp; if (pi->mod_type == FW_PORT_MOD_TYPE_NONE) if_printf(ifp, "transceiver unplugged.\n"); else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN) if_printf(ifp, "unknown transceiver inserted.\n"); else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED) if_printf(ifp, "unsupported transceiver inserted.\n"); else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) { if_printf(ifp, "%dGbps %s transceiver inserted.\n", port_top_speed(pi), mod_str[pi->mod_type]); } else { if_printf(ifp, "transceiver (type %d) inserted.\n", pi->mod_type); } } void t4_os_link_changed(struct port_info *pi) { struct vi_info *vi; struct ifnet *ifp; struct link_config *lc; int v; for_each_vi(pi, v, vi) { ifp = vi->ifp; if (ifp == NULL) continue; lc = &pi->link_cfg; if (lc->link_ok) { ifp->if_baudrate = IF_Mbps(lc->speed); if_link_state_change(ifp, LINK_STATE_UP); } else { if_link_state_change(ifp, LINK_STATE_DOWN); } } } void t4_iterate(void (*func)(struct adapter *, void *), void *arg) { struct adapter *sc; sx_slock(&t4_list_lock); SLIST_FOREACH(sc, &t4_list, link) { /* * func should not make any assumptions about what state sc is * in - the only guarantee is that sc->sc_lock is a valid lock. */ func(sc, arg); } sx_sunlock(&t4_list_lock); } static int t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, struct thread *td) { int rc; struct adapter *sc = dev->si_drv1; rc = priv_check(td, PRIV_DRIVER); if (rc != 0) return (rc); switch (cmd) { case CHELSIO_T4_GETREG: { struct t4_reg *edata = (struct t4_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); if (edata->size == 4) edata->val = t4_read_reg(sc, edata->addr); else if (edata->size == 8) edata->val = t4_read_reg64(sc, edata->addr); else return (EINVAL); break; } case CHELSIO_T4_SETREG: { struct t4_reg *edata = (struct t4_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); if (edata->size == 4) { if (edata->val & 0xffffffff00000000) return (EINVAL); t4_write_reg(sc, edata->addr, (uint32_t) edata->val); } else if (edata->size == 8) t4_write_reg64(sc, edata->addr, edata->val); else return (EINVAL); break; } case CHELSIO_T4_REGDUMP: { struct t4_regdump *regs = (struct t4_regdump *)data; int reglen = t4_get_regs_len(sc); uint8_t *buf; if (regs->len < reglen) { regs->len = reglen; /* hint to the caller */ return (ENOBUFS); } regs->len = reglen; buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO); get_regs(sc, regs, buf); rc = copyout(buf, regs->data, reglen); free(buf, M_CXGBE); break; } case CHELSIO_T4_GET_FILTER_MODE: rc = get_filter_mode(sc, (uint32_t *)data); break; case CHELSIO_T4_SET_FILTER_MODE: rc = set_filter_mode(sc, *(uint32_t *)data); break; case CHELSIO_T4_GET_FILTER: rc = get_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_SET_FILTER: rc = set_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_DEL_FILTER: rc = del_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_GET_SGE_CONTEXT: rc = get_sge_context(sc, (struct t4_sge_context *)data); break; case CHELSIO_T4_LOAD_FW: rc = load_fw(sc, (struct t4_data *)data); break; case CHELSIO_T4_GET_MEM: rc = read_card_mem(sc, 2, (struct t4_mem_range *)data); break; case CHELSIO_T4_GET_I2C: rc = read_i2c(sc, (struct t4_i2c_data *)data); break; case CHELSIO_T4_CLEAR_STATS: { int i, v, bg_map; u_int port_id = *(uint32_t *)data; struct port_info *pi; struct vi_info *vi; if (port_id >= sc->params.nports) return (EINVAL); pi = sc->port[port_id]; if (pi == NULL) return (EIO); /* MAC stats */ t4_clr_port_stats(sc, pi->tx_chan); pi->tx_parse_error = 0; pi->tnl_cong_drops = 0; mtx_lock(&sc->reg_lock); for_each_vi(pi, v, vi) { if (vi->flags & VI_INIT_DONE) t4_clr_vi_stats(sc, vi->viid); } bg_map = pi->mps_bg_map; v = 0; /* reuse */ while (bg_map) { i = ffs(bg_map) - 1; t4_write_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v, 1, A_TP_MIB_TNL_CNG_DROP_0 + i); bg_map &= ~(1 << i); } mtx_unlock(&sc->reg_lock); /* * Since this command accepts a port, clear stats for * all VIs on this port. */ for_each_vi(pi, v, vi) { if (vi->flags & VI_INIT_DONE) { struct sge_rxq *rxq; struct sge_txq *txq; struct sge_wrq *wrq; for_each_rxq(vi, i, rxq) { #if defined(INET) || defined(INET6) rxq->lro.lro_queued = 0; rxq->lro.lro_flushed = 0; #endif rxq->rxcsum = 0; rxq->vlan_extraction = 0; } for_each_txq(vi, i, txq) { txq->txcsum = 0; txq->tso_wrs = 0; txq->vlan_insertion = 0; txq->imm_wrs = 0; txq->sgl_wrs = 0; txq->txpkt_wrs = 0; txq->txpkts0_wrs = 0; txq->txpkts1_wrs = 0; txq->txpkts0_pkts = 0; txq->txpkts1_pkts = 0; mp_ring_reset_stats(txq->r); } #ifdef TCP_OFFLOAD /* nothing to clear for each ofld_rxq */ for_each_ofld_txq(vi, i, wrq) { wrq->tx_wrs_direct = 0; wrq->tx_wrs_copied = 0; } #endif if (IS_MAIN_VI(vi)) { wrq = &sc->sge.ctrlq[pi->port_id]; wrq->tx_wrs_direct = 0; wrq->tx_wrs_copied = 0; } } } break; } case CHELSIO_T4_SCHED_CLASS: rc = t4_set_sched_class(sc, (struct t4_sched_params *)data); break; case CHELSIO_T4_SCHED_QUEUE: rc = t4_set_sched_queue(sc, (struct t4_sched_queue *)data); break; case CHELSIO_T4_GET_TRACER: rc = t4_get_tracer(sc, (struct t4_tracer *)data); break; case CHELSIO_T4_SET_TRACER: rc = t4_set_tracer(sc, (struct t4_tracer *)data); break; case CHELSIO_T4_LOAD_CFG: rc = load_cfg(sc, (struct t4_data *)data); break; case CHELSIO_T4_LOAD_BOOT: rc = load_boot(sc, (struct t4_bootrom *)data); break; case CHELSIO_T4_LOAD_BOOTCFG: rc = load_bootcfg(sc, (struct t4_data *)data); break; case CHELSIO_T4_CUDBG_DUMP: rc = cudbg_dump(sc, (struct t4_cudbg_dump *)data); break; default: rc = ENOTTY; } return (rc); } void t4_db_full(struct adapter *sc) { CXGBE_UNIMPLEMENTED(__func__); } void t4_db_dropped(struct adapter *sc) { CXGBE_UNIMPLEMENTED(__func__); } #ifdef TCP_OFFLOAD static int toe_capability(struct vi_info *vi, int enable) { int rc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; ASSERT_SYNCHRONIZED_OP(sc); if (!is_offload(sc)) return (ENODEV); if (enable) { if ((vi->ifp->if_capenable & IFCAP_TOE) != 0) { /* TOE is already enabled. */ return (0); } /* * We need the port's queues around so that we're able to send * and receive CPLs to/from the TOE even if the ifnet for this * port has never been UP'd administratively. */ if (!(vi->flags & VI_INIT_DONE)) { rc = vi_full_init(vi); if (rc) return (rc); } if (!(pi->vi[0].flags & VI_INIT_DONE)) { rc = vi_full_init(&pi->vi[0]); if (rc) return (rc); } if (isset(&sc->offload_map, pi->port_id)) { /* TOE is enabled on another VI of this port. */ pi->uld_vis++; return (0); } if (!uld_active(sc, ULD_TOM)) { rc = t4_activate_uld(sc, ULD_TOM); if (rc == EAGAIN) { log(LOG_WARNING, "You must kldload t4_tom.ko before trying " "to enable TOE on a cxgbe interface.\n"); } if (rc != 0) return (rc); KASSERT(sc->tom_softc != NULL, ("%s: TOM activated but softc NULL", __func__)); KASSERT(uld_active(sc, ULD_TOM), ("%s: TOM activated but flag not set", __func__)); } /* Activate iWARP and iSCSI too, if the modules are loaded. */ if (!uld_active(sc, ULD_IWARP)) (void) t4_activate_uld(sc, ULD_IWARP); if (!uld_active(sc, ULD_ISCSI)) (void) t4_activate_uld(sc, ULD_ISCSI); pi->uld_vis++; setbit(&sc->offload_map, pi->port_id); } else { pi->uld_vis--; if (!isset(&sc->offload_map, pi->port_id) || pi->uld_vis > 0) return (0); KASSERT(uld_active(sc, ULD_TOM), ("%s: TOM never initialized?", __func__)); clrbit(&sc->offload_map, pi->port_id); } return (0); } /* * Add an upper layer driver to the global list. */ int t4_register_uld(struct uld_info *ui) { int rc = 0; struct uld_info *u; sx_xlock(&t4_uld_list_lock); SLIST_FOREACH(u, &t4_uld_list, link) { if (u->uld_id == ui->uld_id) { rc = EEXIST; goto done; } } SLIST_INSERT_HEAD(&t4_uld_list, ui, link); ui->refcount = 0; done: sx_xunlock(&t4_uld_list_lock); return (rc); } int t4_unregister_uld(struct uld_info *ui) { int rc = EINVAL; struct uld_info *u; sx_xlock(&t4_uld_list_lock); SLIST_FOREACH(u, &t4_uld_list, link) { if (u == ui) { if (ui->refcount > 0) { rc = EBUSY; goto done; } SLIST_REMOVE(&t4_uld_list, ui, uld_info, link); rc = 0; goto done; } } done: sx_xunlock(&t4_uld_list_lock); return (rc); } int t4_activate_uld(struct adapter *sc, int id) { int rc; struct uld_info *ui; ASSERT_SYNCHRONIZED_OP(sc); if (id < 0 || id > ULD_MAX) return (EINVAL); rc = EAGAIN; /* kldoad the module with this ULD and try again. */ sx_slock(&t4_uld_list_lock); SLIST_FOREACH(ui, &t4_uld_list, link) { if (ui->uld_id == id) { if (!(sc->flags & FULL_INIT_DONE)) { rc = adapter_full_init(sc); if (rc != 0) break; } rc = ui->activate(sc); if (rc == 0) { setbit(&sc->active_ulds, id); ui->refcount++; } break; } } sx_sunlock(&t4_uld_list_lock); return (rc); } int t4_deactivate_uld(struct adapter *sc, int id) { int rc; struct uld_info *ui; ASSERT_SYNCHRONIZED_OP(sc); if (id < 0 || id > ULD_MAX) return (EINVAL); rc = ENXIO; sx_slock(&t4_uld_list_lock); SLIST_FOREACH(ui, &t4_uld_list, link) { if (ui->uld_id == id) { rc = ui->deactivate(sc); if (rc == 0) { clrbit(&sc->active_ulds, id); ui->refcount--; } break; } } sx_sunlock(&t4_uld_list_lock); return (rc); } int uld_active(struct adapter *sc, int uld_id) { MPASS(uld_id >= 0 && uld_id <= ULD_MAX); return (isset(&sc->active_ulds, uld_id)); } #endif /* * t = ptr to tunable. * nc = number of CPUs. * c = compiled in default for that tunable. */ static void calculate_nqueues(int *t, int nc, const int c) { int nq; if (*t > 0) return; nq = *t < 0 ? -*t : c; *t = min(nc, nq); } /* * Come up with reasonable defaults for some of the tunables, provided they're * not set by the user (in which case we'll use the values as is). */ static void tweak_tunables(void) { int nc = mp_ncpus; /* our snapshot of the number of CPUs */ if (t4_ntxq < 1) { #ifdef RSS t4_ntxq = rss_getnumbuckets(); #else calculate_nqueues(&t4_ntxq, nc, NTXQ); #endif } calculate_nqueues(&t4_ntxq_vi, nc, NTXQ_VI); if (t4_nrxq < 1) { #ifdef RSS t4_nrxq = rss_getnumbuckets(); #else calculate_nqueues(&t4_nrxq, nc, NRXQ); #endif } calculate_nqueues(&t4_nrxq_vi, nc, NRXQ_VI); #ifdef TCP_OFFLOAD calculate_nqueues(&t4_nofldtxq, nc, NOFLDTXQ); calculate_nqueues(&t4_nofldtxq_vi, nc, NOFLDTXQ_VI); calculate_nqueues(&t4_nofldrxq, nc, NOFLDRXQ); calculate_nqueues(&t4_nofldrxq_vi, nc, NOFLDRXQ_VI); if (t4_toecaps_allowed == -1) t4_toecaps_allowed = FW_CAPS_CONFIG_TOE; if (t4_rdmacaps_allowed == -1) { t4_rdmacaps_allowed = FW_CAPS_CONFIG_RDMA_RDDP | FW_CAPS_CONFIG_RDMA_RDMAC; } if (t4_iscsicaps_allowed == -1) { t4_iscsicaps_allowed = FW_CAPS_CONFIG_ISCSI_INITIATOR_PDU | FW_CAPS_CONFIG_ISCSI_TARGET_PDU | FW_CAPS_CONFIG_ISCSI_T10DIF; } if (t4_tmr_idx_ofld < 0 || t4_tmr_idx_ofld >= SGE_NTIMERS) t4_tmr_idx_ofld = TMR_IDX_OFLD; if (t4_pktc_idx_ofld < -1 || t4_pktc_idx_ofld >= SGE_NCOUNTERS) t4_pktc_idx_ofld = PKTC_IDX_OFLD; #else if (t4_toecaps_allowed == -1) t4_toecaps_allowed = 0; if (t4_rdmacaps_allowed == -1) t4_rdmacaps_allowed = 0; if (t4_iscsicaps_allowed == -1) t4_iscsicaps_allowed = 0; #endif #ifdef DEV_NETMAP calculate_nqueues(&t4_nnmtxq_vi, nc, NNMTXQ_VI); calculate_nqueues(&t4_nnmrxq_vi, nc, NNMRXQ_VI); #endif if (t4_tmr_idx < 0 || t4_tmr_idx >= SGE_NTIMERS) t4_tmr_idx = TMR_IDX; if (t4_pktc_idx < -1 || t4_pktc_idx >= SGE_NCOUNTERS) t4_pktc_idx = PKTC_IDX; if (t4_qsize_txq < 128) t4_qsize_txq = 128; if (t4_qsize_rxq < 128) t4_qsize_rxq = 128; while (t4_qsize_rxq & 7) t4_qsize_rxq++; t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX; /* * Number of VIs to create per-port. The first VI is the "main" regular * VI for the port. The rest are additional virtual interfaces on the * same physical port. Note that the main VI does not have native * netmap support but the extra VIs do. * * Limit the number of VIs per port to the number of available * MAC addresses per port. */ if (t4_num_vis < 1) t4_num_vis = 1; if (t4_num_vis > nitems(vi_mac_funcs)) { t4_num_vis = nitems(vi_mac_funcs); printf("cxgbe: number of VIs limited to %d\n", t4_num_vis); } if (pcie_relaxed_ordering < 0 || pcie_relaxed_ordering > 2) { pcie_relaxed_ordering = 1; #if defined(__i386__) || defined(__amd64__) if (cpu_vendor_id == CPU_VENDOR_INTEL) pcie_relaxed_ordering = 0; #endif } } #ifdef DDB static void t4_dump_tcb(struct adapter *sc, int tid) { uint32_t base, i, j, off, pf, reg, save, tcb_addr, win_pos; reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2); save = t4_read_reg(sc, reg); base = sc->memwin[2].mw_base; /* Dump TCB for the tid */ tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE); tcb_addr += tid * TCB_SIZE; if (is_t4(sc)) { pf = 0; win_pos = tcb_addr & ~0xf; /* start must be 16B aligned */ } else { pf = V_PFNUM(sc->pf); win_pos = tcb_addr & ~0x7f; /* start must be 128B aligned */ } t4_write_reg(sc, reg, win_pos | pf); t4_read_reg(sc, reg); off = tcb_addr - win_pos; for (i = 0; i < 4; i++) { uint32_t buf[8]; for (j = 0; j < 8; j++, off += 4) buf[j] = htonl(t4_read_reg(sc, base + off)); db_printf("%08x %08x %08x %08x %08x %08x %08x %08x\n", buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); } t4_write_reg(sc, reg, save); t4_read_reg(sc, reg); } static void t4_dump_devlog(struct adapter *sc) { struct devlog_params *dparams = &sc->params.devlog; struct fw_devlog_e e; int i, first, j, m, nentries, rc; uint64_t ftstamp = UINT64_MAX; if (dparams->start == 0) { db_printf("devlog params not valid\n"); return; } nentries = dparams->size / sizeof(struct fw_devlog_e); m = fwmtype_to_hwmtype(dparams->memtype); /* Find the first entry. */ first = -1; for (i = 0; i < nentries && !db_pager_quit; i++) { rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e), sizeof(e), (void *)&e); if (rc != 0) break; if (e.timestamp == 0) break; e.timestamp = be64toh(e.timestamp); if (e.timestamp < ftstamp) { ftstamp = e.timestamp; first = i; } } if (first == -1) return; i = first; do { rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e), sizeof(e), (void *)&e); if (rc != 0) return; if (e.timestamp == 0) return; e.timestamp = be64toh(e.timestamp); e.seqno = be32toh(e.seqno); for (j = 0; j < 8; j++) e.params[j] = be32toh(e.params[j]); db_printf("%10d %15ju %8s %8s ", e.seqno, e.timestamp, (e.level < nitems(devlog_level_strings) ? devlog_level_strings[e.level] : "UNKNOWN"), (e.facility < nitems(devlog_facility_strings) ? devlog_facility_strings[e.facility] : "UNKNOWN")); db_printf(e.fmt, e.params[0], e.params[1], e.params[2], e.params[3], e.params[4], e.params[5], e.params[6], e.params[7]); if (++i == nentries) i = 0; } while (i != first && !db_pager_quit); } static struct command_table db_t4_table = LIST_HEAD_INITIALIZER(db_t4_table); _DB_SET(_show, t4, NULL, db_show_table, 0, &db_t4_table); DB_FUNC(devlog, db_show_devlog, db_t4_table, CS_OWN, NULL) { device_t dev; int t; bool valid; valid = false; t = db_read_token(); if (t == tIDENT) { dev = device_lookup_by_name(db_tok_string); valid = true; } db_skip_to_eol(); if (!valid) { db_printf("usage: show t4 devlog \n"); return; } if (dev == NULL) { db_printf("device not found\n"); return; } t4_dump_devlog(device_get_softc(dev)); } DB_FUNC(tcb, db_show_t4tcb, db_t4_table, CS_OWN, NULL) { device_t dev; int radix, tid, t; bool valid; valid = false; radix = db_radix; db_radix = 10; t = db_read_token(); if (t == tIDENT) { dev = device_lookup_by_name(db_tok_string); t = db_read_token(); if (t == tNUMBER) { tid = db_tok_number; valid = true; } } db_radix = radix; db_skip_to_eol(); if (!valid) { db_printf("usage: show t4 tcb \n"); return; } if (dev == NULL) { db_printf("device not found\n"); return; } if (tid < 0) { db_printf("invalid tid\n"); return; } t4_dump_tcb(device_get_softc(dev), tid); } #endif /* * Borrowed from cesa_prep_aes_key(). * * NB: The crypto engine wants the words in the decryption key in reverse * order. */ void t4_aes_getdeckey(void *dec_key, const void *enc_key, unsigned int kbits) { uint32_t ek[4 * (RIJNDAEL_MAXNR + 1)]; uint32_t *dkey; int i; rijndaelKeySetupEnc(ek, enc_key, kbits); dkey = dec_key; dkey += (kbits / 8) / 4; switch (kbits) { case 128: for (i = 0; i < 4; i++) *--dkey = htobe32(ek[4 * 10 + i]); break; case 192: for (i = 0; i < 2; i++) *--dkey = htobe32(ek[4 * 11 + 2 + i]); for (i = 0; i < 4; i++) *--dkey = htobe32(ek[4 * 12 + i]); break; case 256: for (i = 0; i < 4; i++) *--dkey = htobe32(ek[4 * 13 + i]); for (i = 0; i < 4; i++) *--dkey = htobe32(ek[4 * 14 + i]); break; } MPASS(dkey == dec_key); } static struct sx mlu; /* mod load unload */ SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload"); static int mod_event(module_t mod, int cmd, void *arg) { int rc = 0; static int loaded = 0; switch (cmd) { case MOD_LOAD: sx_xlock(&mlu); if (loaded++ == 0) { t4_sge_modload(); t4_register_cpl_handler(CPL_SET_TCB_RPL, set_tcb_rpl); t4_register_cpl_handler(CPL_L2T_WRITE_RPL, l2t_write_rpl); t4_register_cpl_handler(CPL_TRACE_PKT, t4_trace_pkt); t4_register_cpl_handler(CPL_T5_TRACE_PKT, t5_trace_pkt); sx_init(&t4_list_lock, "T4/T5 adapters"); SLIST_INIT(&t4_list); #ifdef TCP_OFFLOAD sx_init(&t4_uld_list_lock, "T4/T5 ULDs"); SLIST_INIT(&t4_uld_list); #endif t4_tracer_modload(); tweak_tunables(); } sx_xunlock(&mlu); break; case MOD_UNLOAD: sx_xlock(&mlu); if (--loaded == 0) { int tries; sx_slock(&t4_list_lock); if (!SLIST_EMPTY(&t4_list)) { rc = EBUSY; sx_sunlock(&t4_list_lock); goto done_unload; } #ifdef TCP_OFFLOAD sx_slock(&t4_uld_list_lock); if (!SLIST_EMPTY(&t4_uld_list)) { rc = EBUSY; sx_sunlock(&t4_uld_list_lock); sx_sunlock(&t4_list_lock); goto done_unload; } #endif tries = 0; while (tries++ < 5 && t4_sge_extfree_refs() != 0) { uprintf("%ju clusters with custom free routine " "still is use.\n", t4_sge_extfree_refs()); pause("t4unload", 2 * hz); } #ifdef TCP_OFFLOAD sx_sunlock(&t4_uld_list_lock); #endif sx_sunlock(&t4_list_lock); if (t4_sge_extfree_refs() == 0) { t4_tracer_modunload(); #ifdef TCP_OFFLOAD sx_destroy(&t4_uld_list_lock); #endif sx_destroy(&t4_list_lock); t4_sge_modunload(); loaded = 0; } else { rc = EBUSY; loaded++; /* undo earlier decrement */ } } done_unload: sx_xunlock(&mlu); break; } return (rc); } static devclass_t t4_devclass, t5_devclass, t6_devclass; static devclass_t cxgbe_devclass, cxl_devclass, cc_devclass; static devclass_t vcxgbe_devclass, vcxl_devclass, vcc_devclass; DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0); MODULE_VERSION(t4nex, 1); MODULE_DEPEND(t4nex, firmware, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(t4nex, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0); MODULE_VERSION(t5nex, 1); MODULE_DEPEND(t5nex, firmware, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(t5nex, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ DRIVER_MODULE(t6nex, pci, t6_driver, t6_devclass, mod_event, 0); MODULE_VERSION(t6nex, 1); MODULE_DEPEND(t6nex, firmware, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(t6nex, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0); MODULE_VERSION(cxgbe, 1); DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0); MODULE_VERSION(cxl, 1); DRIVER_MODULE(cc, t6nex, cc_driver, cc_devclass, 0, 0); MODULE_VERSION(cc, 1); DRIVER_MODULE(vcxgbe, cxgbe, vcxgbe_driver, vcxgbe_devclass, 0, 0); MODULE_VERSION(vcxgbe, 1); DRIVER_MODULE(vcxl, cxl, vcxl_driver, vcxl_devclass, 0, 0); MODULE_VERSION(vcxl, 1); DRIVER_MODULE(vcc, cc, vcc_driver, vcc_devclass, 0, 0); MODULE_VERSION(vcc, 1); Index: head/sys/dev/if_ndis/if_ndis.c =================================================================== --- head/sys/dev/if_ndis/if_ndis.c (revision 331796) +++ head/sys/dev/if_ndis/if_ndis.c (revision 331797) @@ -1,3426 +1,3429 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 2003 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. * * WPA support originally contributed by Arvind Srinivasan * then hacked upon mercilessly by my. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NDIS_DEBUG #ifdef NDIS_DEBUG #define DPRINTF(x) do { if (ndis_debug > 0) printf x; } while (0) int ndis_debug = 0; SYSCTL_INT(_debug, OID_AUTO, ndis, CTLFLAG_RW, &ndis_debug, 0, "if_ndis debug level"); #else #define DPRINTF(x) #endif SYSCTL_DECL(_hw_ndisusb); int ndisusb_halt = 1; SYSCTL_INT(_hw_ndisusb, OID_AUTO, halt, CTLFLAG_RW, &ndisusb_halt, 0, "Halt NDIS USB driver when it's attached"); /* 0 - 30 dBm to mW conversion table */ static const uint16_t dBm2mW[] = { 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 25, 28, 32, 35, 40, 45, 50, 56, 63, 71, 79, 89, 100, 112, 126, 141, 158, 178, 200, 224, 251, 282, 316, 355, 398, 447, 501, 562, 631, 708, 794, 891, 1000 }; MODULE_DEPEND(ndis, ether, 1, 1, 1); MODULE_DEPEND(ndis, wlan, 1, 1, 1); MODULE_DEPEND(ndis, ndisapi, 1, 1, 1); MODULE_VERSION(ndis, 1); int ndis_attach (device_t); int ndis_detach (device_t); int ndis_suspend (device_t); int ndis_resume (device_t); void ndis_shutdown (device_t); int ndisdrv_modevent (module_t, int, void *); static void ndis_txeof (ndis_handle, ndis_packet *, ndis_status); static void ndis_rxeof (ndis_handle, ndis_packet **, uint32_t); static void ndis_rxeof_eth (ndis_handle, ndis_handle, char *, void *, uint32_t, void *, uint32_t, uint32_t); static void ndis_rxeof_done (ndis_handle); static void ndis_rxeof_xfr (kdpc *, ndis_handle, void *, void *); static void ndis_rxeof_xfr_done (ndis_handle, ndis_packet *, uint32_t, uint32_t); static void ndis_linksts (ndis_handle, ndis_status, void *, uint32_t); static void ndis_linksts_done (ndis_handle); /* We need to wrap these functions for amd64. */ static funcptr ndis_txeof_wrap; static funcptr ndis_rxeof_wrap; static funcptr ndis_rxeof_eth_wrap; static funcptr ndis_rxeof_done_wrap; static funcptr ndis_rxeof_xfr_wrap; static funcptr ndis_rxeof_xfr_done_wrap; static funcptr ndis_linksts_wrap; static funcptr ndis_linksts_done_wrap; static funcptr ndis_ticktask_wrap; static funcptr ndis_ifstarttask_wrap; static funcptr ndis_resettask_wrap; static funcptr ndis_inputtask_wrap; static struct ieee80211vap *ndis_vap_create(struct ieee80211com *, const char [IFNAMSIZ], int, enum ieee80211_opmode, int, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN]); static void ndis_vap_delete (struct ieee80211vap *); static void ndis_tick (void *); static void ndis_ticktask (device_object *, void *); static int ndis_raw_xmit (struct ieee80211_node *, struct mbuf *, const struct ieee80211_bpf_params *); static void ndis_update_mcast (struct ieee80211com *); static void ndis_update_promisc (struct ieee80211com *); static void ndis_ifstart (struct ifnet *); static void ndis_ifstarttask (device_object *, void *); static void ndis_resettask (device_object *, void *); static void ndis_inputtask (device_object *, void *); static int ndis_ifioctl (struct ifnet *, u_long, caddr_t); static int ndis_newstate (struct ieee80211vap *, enum ieee80211_state, int); static int ndis_nettype_chan (uint32_t); static int ndis_nettype_mode (uint32_t); static void ndis_scan (void *); static void ndis_scan_results (struct ndis_softc *); static void ndis_scan_start (struct ieee80211com *); static void ndis_scan_end (struct ieee80211com *); static void ndis_set_channel (struct ieee80211com *); static void ndis_scan_curchan (struct ieee80211_scan_state *, unsigned long); static void ndis_scan_mindwell (struct ieee80211_scan_state *); static void ndis_init (void *); static void ndis_stop (struct ndis_softc *); static int ndis_ifmedia_upd (struct ifnet *); static void ndis_ifmedia_sts (struct ifnet *, struct ifmediareq *); static int ndis_get_bssid_list (struct ndis_softc *, ndis_80211_bssid_list_ex **); static int ndis_get_assoc (struct ndis_softc *, ndis_wlan_bssid_ex **); static int ndis_probe_offload (struct ndis_softc *); static int ndis_set_offload (struct ndis_softc *); static void ndis_getstate_80211 (struct ndis_softc *); static void ndis_setstate_80211 (struct ndis_softc *); static void ndis_auth_and_assoc (struct ndis_softc *, struct ieee80211vap *); static void ndis_media_status (struct ifnet *, struct ifmediareq *); static int ndis_set_cipher (struct ndis_softc *, int); static int ndis_set_wpa (struct ndis_softc *, void *, int); static int ndis_add_key (struct ieee80211vap *, const struct ieee80211_key *); static int ndis_del_key (struct ieee80211vap *, const struct ieee80211_key *); static void ndis_setmulti (struct ndis_softc *); static void ndis_map_sclist (void *, bus_dma_segment_t *, int, bus_size_t, int); static int ndis_ifattach(struct ndis_softc *); static int ndis_80211attach(struct ndis_softc *); static int ndis_80211ioctl(struct ieee80211com *, u_long , void *); static int ndis_80211transmit(struct ieee80211com *, struct mbuf *); static void ndis_80211parent(struct ieee80211com *); static int ndisdrv_loaded = 0; /* * This routine should call windrv_load() once for each driver * image. This will do the relocation and dynalinking for the * image, and create a Windows driver object which will be * saved in our driver database. */ int ndisdrv_modevent(mod, cmd, arg) module_t mod; int cmd; void *arg; { int error = 0; switch (cmd) { case MOD_LOAD: ndisdrv_loaded++; if (ndisdrv_loaded > 1) break; windrv_wrap((funcptr)ndis_rxeof, &ndis_rxeof_wrap, 3, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_rxeof_eth, &ndis_rxeof_eth_wrap, 8, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_rxeof_done, &ndis_rxeof_done_wrap, 1, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_rxeof_xfr, &ndis_rxeof_xfr_wrap, 4, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_rxeof_xfr_done, &ndis_rxeof_xfr_done_wrap, 4, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_txeof, &ndis_txeof_wrap, 3, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_linksts, &ndis_linksts_wrap, 4, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_linksts_done, &ndis_linksts_done_wrap, 1, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_ticktask, &ndis_ticktask_wrap, 2, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_ifstarttask, &ndis_ifstarttask_wrap, 2, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_resettask, &ndis_resettask_wrap, 2, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_inputtask, &ndis_inputtask_wrap, 2, WINDRV_WRAP_STDCALL); break; case MOD_UNLOAD: ndisdrv_loaded--; if (ndisdrv_loaded > 0) break; /* fallthrough */ case MOD_SHUTDOWN: windrv_unwrap(ndis_rxeof_wrap); windrv_unwrap(ndis_rxeof_eth_wrap); windrv_unwrap(ndis_rxeof_done_wrap); windrv_unwrap(ndis_rxeof_xfr_wrap); windrv_unwrap(ndis_rxeof_xfr_done_wrap); windrv_unwrap(ndis_txeof_wrap); windrv_unwrap(ndis_linksts_wrap); windrv_unwrap(ndis_linksts_done_wrap); windrv_unwrap(ndis_ticktask_wrap); windrv_unwrap(ndis_ifstarttask_wrap); windrv_unwrap(ndis_resettask_wrap); windrv_unwrap(ndis_inputtask_wrap); break; default: error = EINVAL; break; } return (error); } /* * Program the 64-bit multicast hash filter. */ static void ndis_setmulti(sc) struct ndis_softc *sc; { struct ifnet *ifp; struct ifmultiaddr *ifma; int len, mclistsz, error; uint8_t *mclist; if (!NDIS_INITIALIZED(sc)) return; if (sc->ndis_80211) return; ifp = sc->ifp; if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) { sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; len = sizeof(sc->ndis_filter); error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER, &sc->ndis_filter, &len); if (error) device_printf(sc->ndis_dev, "set allmulti failed: %d\n", error); return; } if (TAILQ_EMPTY(&ifp->if_multiaddrs)) return; len = sizeof(mclistsz); ndis_get_info(sc, OID_802_3_MAXIMUM_LIST_SIZE, &mclistsz, &len); mclist = malloc(ETHER_ADDR_LEN * mclistsz, M_TEMP, M_NOWAIT|M_ZERO); if (mclist == NULL) { sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; goto out; } sc->ndis_filter |= NDIS_PACKET_TYPE_MULTICAST; len = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), mclist + (ETHER_ADDR_LEN * len), ETHER_ADDR_LEN); len++; if (len > mclistsz) { if_maddr_runlock(ifp); sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; sc->ndis_filter &= ~NDIS_PACKET_TYPE_MULTICAST; goto out; } } if_maddr_runlock(ifp); len = len * ETHER_ADDR_LEN; error = ndis_set_info(sc, OID_802_3_MULTICAST_LIST, mclist, &len); if (error) { device_printf(sc->ndis_dev, "set mclist failed: %d\n", error); sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; sc->ndis_filter &= ~NDIS_PACKET_TYPE_MULTICAST; } out: free(mclist, M_TEMP); len = sizeof(sc->ndis_filter); error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER, &sc->ndis_filter, &len); if (error) device_printf(sc->ndis_dev, "set multi failed: %d\n", error); } static int ndis_set_offload(sc) struct ndis_softc *sc; { ndis_task_offload *nto; ndis_task_offload_hdr *ntoh; ndis_task_tcpip_csum *nttc; struct ifnet *ifp; int len, error; if (!NDIS_INITIALIZED(sc)) return (EINVAL); if (sc->ndis_80211) return (EINVAL); /* See if there's anything to set. */ ifp = sc->ifp; error = ndis_probe_offload(sc); if (error) return (error); if (sc->ndis_hwassist == 0 && ifp->if_capabilities == 0) return (0); len = sizeof(ndis_task_offload_hdr) + sizeof(ndis_task_offload) + sizeof(ndis_task_tcpip_csum); ntoh = malloc(len, M_TEMP, M_NOWAIT|M_ZERO); if (ntoh == NULL) return (ENOMEM); ntoh->ntoh_vers = NDIS_TASK_OFFLOAD_VERSION; ntoh->ntoh_len = sizeof(ndis_task_offload_hdr); ntoh->ntoh_offset_firsttask = sizeof(ndis_task_offload_hdr); ntoh->ntoh_encapfmt.nef_encaphdrlen = sizeof(struct ether_header); ntoh->ntoh_encapfmt.nef_encap = NDIS_ENCAP_IEEE802_3; ntoh->ntoh_encapfmt.nef_flags = NDIS_ENCAPFLAG_FIXEDHDRLEN; nto = (ndis_task_offload *)((char *)ntoh + ntoh->ntoh_offset_firsttask); nto->nto_vers = NDIS_TASK_OFFLOAD_VERSION; nto->nto_len = sizeof(ndis_task_offload); nto->nto_task = NDIS_TASK_TCPIP_CSUM; nto->nto_offset_nexttask = 0; nto->nto_taskbuflen = sizeof(ndis_task_tcpip_csum); nttc = (ndis_task_tcpip_csum *)nto->nto_taskbuf; if (ifp->if_capenable & IFCAP_TXCSUM) nttc->nttc_v4tx = sc->ndis_v4tx; if (ifp->if_capenable & IFCAP_RXCSUM) nttc->nttc_v4rx = sc->ndis_v4rx; error = ndis_set_info(sc, OID_TCP_TASK_OFFLOAD, ntoh, &len); free(ntoh, M_TEMP); return (error); } static int ndis_probe_offload(sc) struct ndis_softc *sc; { ndis_task_offload *nto; ndis_task_offload_hdr *ntoh; ndis_task_tcpip_csum *nttc = NULL; struct ifnet *ifp; int len, error, dummy; ifp = sc->ifp; len = sizeof(dummy); error = ndis_get_info(sc, OID_TCP_TASK_OFFLOAD, &dummy, &len); if (error != ENOSPC) return (error); ntoh = malloc(len, M_TEMP, M_NOWAIT|M_ZERO); if (ntoh == NULL) return (ENOMEM); ntoh->ntoh_vers = NDIS_TASK_OFFLOAD_VERSION; ntoh->ntoh_len = sizeof(ndis_task_offload_hdr); ntoh->ntoh_encapfmt.nef_encaphdrlen = sizeof(struct ether_header); ntoh->ntoh_encapfmt.nef_encap = NDIS_ENCAP_IEEE802_3; ntoh->ntoh_encapfmt.nef_flags = NDIS_ENCAPFLAG_FIXEDHDRLEN; error = ndis_get_info(sc, OID_TCP_TASK_OFFLOAD, ntoh, &len); if (error) { free(ntoh, M_TEMP); return (error); } if (ntoh->ntoh_vers != NDIS_TASK_OFFLOAD_VERSION) { free(ntoh, M_TEMP); return (EINVAL); } nto = (ndis_task_offload *)((char *)ntoh + ntoh->ntoh_offset_firsttask); while (1) { switch (nto->nto_task) { case NDIS_TASK_TCPIP_CSUM: nttc = (ndis_task_tcpip_csum *)nto->nto_taskbuf; break; /* Don't handle these yet. */ case NDIS_TASK_IPSEC: case NDIS_TASK_TCP_LARGESEND: default: break; } if (nto->nto_offset_nexttask == 0) break; nto = (ndis_task_offload *)((char *)nto + nto->nto_offset_nexttask); } if (nttc == NULL) { free(ntoh, M_TEMP); return (ENOENT); } sc->ndis_v4tx = nttc->nttc_v4tx; sc->ndis_v4rx = nttc->nttc_v4rx; if (nttc->nttc_v4tx & NDIS_TCPSUM_FLAGS_IP_CSUM) sc->ndis_hwassist |= CSUM_IP; if (nttc->nttc_v4tx & NDIS_TCPSUM_FLAGS_TCP_CSUM) sc->ndis_hwassist |= CSUM_TCP; if (nttc->nttc_v4tx & NDIS_TCPSUM_FLAGS_UDP_CSUM) sc->ndis_hwassist |= CSUM_UDP; if (sc->ndis_hwassist) ifp->if_capabilities |= IFCAP_TXCSUM; if (nttc->nttc_v4rx & NDIS_TCPSUM_FLAGS_IP_CSUM) ifp->if_capabilities |= IFCAP_RXCSUM; if (nttc->nttc_v4rx & NDIS_TCPSUM_FLAGS_TCP_CSUM) ifp->if_capabilities |= IFCAP_RXCSUM; if (nttc->nttc_v4rx & NDIS_TCPSUM_FLAGS_UDP_CSUM) ifp->if_capabilities |= IFCAP_RXCSUM; free(ntoh, M_TEMP); return (0); } static int ndis_nettype_chan(uint32_t type) { switch (type) { case NDIS_80211_NETTYPE_11FH: return (IEEE80211_CHAN_FHSS); case NDIS_80211_NETTYPE_11DS: return (IEEE80211_CHAN_B); case NDIS_80211_NETTYPE_11OFDM5: return (IEEE80211_CHAN_A); case NDIS_80211_NETTYPE_11OFDM24: return (IEEE80211_CHAN_G); } DPRINTF(("unknown channel nettype %d\n", type)); return (IEEE80211_CHAN_B); /* Default to 11B chan */ } static int ndis_nettype_mode(uint32_t type) { switch (type) { case NDIS_80211_NETTYPE_11FH: return (IEEE80211_MODE_FH); case NDIS_80211_NETTYPE_11DS: return (IEEE80211_MODE_11B); case NDIS_80211_NETTYPE_11OFDM5: return (IEEE80211_MODE_11A); case NDIS_80211_NETTYPE_11OFDM24: return (IEEE80211_MODE_11G); } DPRINTF(("unknown mode nettype %d\n", type)); return (IEEE80211_MODE_AUTO); } /* * Attach the interface. Allocate softc structures, do ifmedia * setup and ethernet/BPF attach. */ int ndis_attach(device_t dev) { struct ndis_softc *sc; driver_object *pdrv; device_object *pdo; int error = 0, len; int i; sc = device_get_softc(dev); mtx_init(&sc->ndis_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); KeInitializeSpinLock(&sc->ndis_rxlock); KeInitializeSpinLock(&sc->ndisusb_tasklock); KeInitializeSpinLock(&sc->ndisusb_xferdonelock); InitializeListHead(&sc->ndis_shlist); InitializeListHead(&sc->ndisusb_tasklist); InitializeListHead(&sc->ndisusb_xferdonelist); callout_init(&sc->ndis_stat_callout, 1); mbufq_init(&sc->ndis_rxqueue, INT_MAX); /* XXXGL: sane maximum */ if (sc->ndis_iftype == PCMCIABus) { error = ndis_alloc_amem(sc); if (error) { device_printf(dev, "failed to allocate " "attribute memory\n"); goto fail; } } /* Create sysctl registry nodes */ ndis_create_sysctls(sc); /* Find the PDO for this device instance. */ if (sc->ndis_iftype == PCIBus) pdrv = windrv_lookup(0, "PCI Bus"); else if (sc->ndis_iftype == PCMCIABus) pdrv = windrv_lookup(0, "PCCARD Bus"); else pdrv = windrv_lookup(0, "USB Bus"); pdo = windrv_find_pdo(pdrv, dev); /* * Create a new functional device object for this * device. This is what creates the miniport block * for this device instance. */ if (NdisAddDevice(sc->ndis_dobj, pdo) != STATUS_SUCCESS) { device_printf(dev, "failed to create FDO!\n"); error = ENXIO; goto fail; } /* Tell the user what version of the API the driver is using. */ device_printf(dev, "NDIS API version: %d.%d\n", sc->ndis_chars->nmc_version_major, sc->ndis_chars->nmc_version_minor); /* Do resource conversion. */ if (sc->ndis_iftype == PCMCIABus || sc->ndis_iftype == PCIBus) ndis_convert_res(sc); else sc->ndis_block->nmb_rlist = NULL; /* Install our RX and TX interrupt handlers. */ sc->ndis_block->nmb_senddone_func = ndis_txeof_wrap; sc->ndis_block->nmb_pktind_func = ndis_rxeof_wrap; sc->ndis_block->nmb_ethrxindicate_func = ndis_rxeof_eth_wrap; sc->ndis_block->nmb_ethrxdone_func = ndis_rxeof_done_wrap; sc->ndis_block->nmb_tdcond_func = ndis_rxeof_xfr_done_wrap; /* Override the status handler so we can detect link changes. */ sc->ndis_block->nmb_status_func = ndis_linksts_wrap; sc->ndis_block->nmb_statusdone_func = ndis_linksts_done_wrap; /* Set up work item handlers. */ sc->ndis_tickitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); sc->ndis_startitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); sc->ndis_resetitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); sc->ndis_inputitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); sc->ndisusb_xferdoneitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); sc->ndisusb_taskitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); KeInitializeDpc(&sc->ndis_rxdpc, ndis_rxeof_xfr_wrap, sc->ndis_block); /* Call driver's init routine. */ if (ndis_init_nic(sc)) { device_printf(dev, "init handler failed\n"); error = ENXIO; goto fail; } /* * Figure out how big to make the TX buffer pool. */ len = sizeof(sc->ndis_maxpkts); if (ndis_get_info(sc, OID_GEN_MAXIMUM_SEND_PACKETS, &sc->ndis_maxpkts, &len)) { device_printf(dev, "failed to get max TX packets\n"); error = ENXIO; goto fail; } /* * If this is a deserialized miniport, we don't have * to honor the OID_GEN_MAXIMUM_SEND_PACKETS result. */ if (!NDIS_SERIALIZED(sc->ndis_block)) sc->ndis_maxpkts = NDIS_TXPKTS; /* Enforce some sanity, just in case. */ if (sc->ndis_maxpkts == 0) sc->ndis_maxpkts = 10; sc->ndis_txarray = malloc(sizeof(ndis_packet *) * sc->ndis_maxpkts, M_DEVBUF, M_NOWAIT|M_ZERO); /* Allocate a pool of ndis_packets for TX encapsulation. */ NdisAllocatePacketPool(&i, &sc->ndis_txpool, sc->ndis_maxpkts, PROTOCOL_RESERVED_SIZE_IN_PACKET); if (i != NDIS_STATUS_SUCCESS) { sc->ndis_txpool = NULL; device_printf(dev, "failed to allocate TX packet pool"); error = ENOMEM; goto fail; } sc->ndis_txpending = sc->ndis_maxpkts; sc->ndis_oidcnt = 0; /* Get supported oid list. */ ndis_get_supported_oids(sc, &sc->ndis_oids, &sc->ndis_oidcnt); /* If the NDIS module requested scatter/gather, init maps. */ if (sc->ndis_sc) ndis_init_dma(sc); /* * See if the OID_802_11_CONFIGURATION OID is * supported by this driver. If it is, then this an 802.11 * wireless driver, and we should set up media for wireless. */ for (i = 0; i < sc->ndis_oidcnt; i++) if (sc->ndis_oids[i] == OID_802_11_CONFIGURATION) { sc->ndis_80211 = 1; break; } if (sc->ndis_80211) error = ndis_80211attach(sc); else error = ndis_ifattach(sc); fail: if (error) { ndis_detach(dev); return (error); } if (sc->ndis_iftype == PNPBus && ndisusb_halt == 0) return (error); DPRINTF(("attach done.\n")); /* We're done talking to the NIC for now; halt it. */ ndis_halt_nic(sc); DPRINTF(("halting done.\n")); return (error); } static int ndis_80211attach(struct ndis_softc *sc) { struct ieee80211com *ic = &sc->ndis_ic; ndis_80211_rates_ex rates; struct ndis_80211_nettype_list *ntl; uint32_t arg; int mode, i, r, len, nonettypes = 1; uint8_t bands[IEEE80211_MODE_BYTES] = { 0 }; callout_init(&sc->ndis_scan_callout, 1); ic->ic_softc = sc; ic->ic_ioctl = ndis_80211ioctl; ic->ic_name = device_get_nameunit(sc->ndis_dev); ic->ic_opmode = IEEE80211_M_STA; ic->ic_phytype = IEEE80211_T_DS; ic->ic_caps = IEEE80211_C_8023ENCAP | IEEE80211_C_STA | IEEE80211_C_IBSS; setbit(ic->ic_modecaps, IEEE80211_MODE_AUTO); len = 0; r = ndis_get_info(sc, OID_802_11_NETWORK_TYPES_SUPPORTED, NULL, &len); if (r != ENOSPC) goto nonettypes; ntl = malloc(len, M_DEVBUF, M_WAITOK | M_ZERO); r = ndis_get_info(sc, OID_802_11_NETWORK_TYPES_SUPPORTED, ntl, &len); if (r != 0) { free(ntl, M_DEVBUF); goto nonettypes; } for (i = 0; i < ntl->ntl_items; i++) { mode = ndis_nettype_mode(ntl->ntl_type[i]); if (mode) { nonettypes = 0; setbit(ic->ic_modecaps, mode); setbit(bands, mode); } else device_printf(sc->ndis_dev, "Unknown nettype %d\n", ntl->ntl_type[i]); } free(ntl, M_DEVBUF); nonettypes: /* Default to 11b channels if the card did not supply any */ if (nonettypes) { setbit(ic->ic_modecaps, IEEE80211_MODE_11B); setbit(bands, IEEE80211_MODE_11B); } len = sizeof(rates); bzero((char *)&rates, len); r = ndis_get_info(sc, OID_802_11_SUPPORTED_RATES, (void *)rates, &len); if (r != 0) device_printf(sc->ndis_dev, "get rates failed: 0x%x\n", r); /* * Since the supported rates only up to 8 can be supported, * if this is not 802.11b we're just going to be faking it * all up to heck. */ #define TESTSETRATE(x, y) \ do { \ int i; \ for (i = 0; i < ic->ic_sup_rates[x].rs_nrates; i++) { \ if (ic->ic_sup_rates[x].rs_rates[i] == (y)) \ break; \ } \ if (i == ic->ic_sup_rates[x].rs_nrates) { \ ic->ic_sup_rates[x].rs_rates[i] = (y); \ ic->ic_sup_rates[x].rs_nrates++; \ } \ } while (0) #define SETRATE(x, y) \ ic->ic_sup_rates[x].rs_rates[ic->ic_sup_rates[x].rs_nrates] = (y) #define INCRATE(x) \ ic->ic_sup_rates[x].rs_nrates++ ic->ic_curmode = IEEE80211_MODE_AUTO; if (isset(ic->ic_modecaps, IEEE80211_MODE_11A)) ic->ic_sup_rates[IEEE80211_MODE_11A].rs_nrates = 0; if (isset(ic->ic_modecaps, IEEE80211_MODE_11B)) ic->ic_sup_rates[IEEE80211_MODE_11B].rs_nrates = 0; if (isset(ic->ic_modecaps, IEEE80211_MODE_11G)) ic->ic_sup_rates[IEEE80211_MODE_11G].rs_nrates = 0; for (i = 0; i < len; i++) { switch (rates[i] & IEEE80211_RATE_VAL) { case 2: case 4: case 11: case 10: case 22: if (isclr(ic->ic_modecaps, IEEE80211_MODE_11B)) { /* Lazy-init 802.11b. */ setbit(ic->ic_modecaps, IEEE80211_MODE_11B); ic->ic_sup_rates[IEEE80211_MODE_11B]. rs_nrates = 0; } SETRATE(IEEE80211_MODE_11B, rates[i]); INCRATE(IEEE80211_MODE_11B); break; default: if (isset(ic->ic_modecaps, IEEE80211_MODE_11A)) { SETRATE(IEEE80211_MODE_11A, rates[i]); INCRATE(IEEE80211_MODE_11A); } if (isset(ic->ic_modecaps, IEEE80211_MODE_11G)) { SETRATE(IEEE80211_MODE_11G, rates[i]); INCRATE(IEEE80211_MODE_11G); } break; } } /* * If the hardware supports 802.11g, it most * likely supports 802.11b and all of the * 802.11b and 802.11g speeds, so maybe we can * just cheat here. Just how in the heck do * we detect turbo modes, though? */ if (isset(ic->ic_modecaps, IEEE80211_MODE_11B)) { TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|2); TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|4); TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|11); TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|22); } if (isset(ic->ic_modecaps, IEEE80211_MODE_11G)) { TESTSETRATE(IEEE80211_MODE_11G, 48); TESTSETRATE(IEEE80211_MODE_11G, 72); TESTSETRATE(IEEE80211_MODE_11G, 96); TESTSETRATE(IEEE80211_MODE_11G, 108); } if (isset(ic->ic_modecaps, IEEE80211_MODE_11A)) { TESTSETRATE(IEEE80211_MODE_11A, 48); TESTSETRATE(IEEE80211_MODE_11A, 72); TESTSETRATE(IEEE80211_MODE_11A, 96); TESTSETRATE(IEEE80211_MODE_11A, 108); } #undef SETRATE #undef INCRATE #undef TESTSETRATE ieee80211_init_channels(ic, NULL, bands); /* * To test for WPA support, we need to see if we can * set AUTHENTICATION_MODE to WPA and read it back * successfully. */ i = sizeof(arg); arg = NDIS_80211_AUTHMODE_WPA; r = ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i); if (r == 0) { r = ndis_get_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i); if (r == 0 && arg == NDIS_80211_AUTHMODE_WPA) ic->ic_caps |= IEEE80211_C_WPA; } /* * To test for supported ciphers, we set each * available encryption type in descending order. * If ENC3 works, then we have WEP, TKIP and AES. * If only ENC2 works, then we have WEP and TKIP. * If only ENC1 works, then we have just WEP. */ i = sizeof(arg); arg = NDIS_80211_WEPSTAT_ENC3ENABLED; r = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &i); if (r == 0) { ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP | IEEE80211_CRYPTO_TKIP | IEEE80211_CRYPTO_AES_CCM; goto got_crypto; } arg = NDIS_80211_WEPSTAT_ENC2ENABLED; r = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &i); if (r == 0) { ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP | IEEE80211_CRYPTO_TKIP; goto got_crypto; } arg = NDIS_80211_WEPSTAT_ENC1ENABLED; r = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &i); if (r == 0) ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP; got_crypto: i = sizeof(arg); r = ndis_get_info(sc, OID_802_11_POWER_MODE, &arg, &i); if (r == 0) ic->ic_caps |= IEEE80211_C_PMGT; r = ndis_get_info(sc, OID_802_11_TX_POWER_LEVEL, &arg, &i); if (r == 0) ic->ic_caps |= IEEE80211_C_TXPMGT; /* * Get station address from the driver. */ len = sizeof(ic->ic_macaddr); ndis_get_info(sc, OID_802_3_CURRENT_ADDRESS, &ic->ic_macaddr, &len); ieee80211_ifattach(ic); ic->ic_raw_xmit = ndis_raw_xmit; ic->ic_scan_start = ndis_scan_start; ic->ic_scan_end = ndis_scan_end; ic->ic_set_channel = ndis_set_channel; ic->ic_scan_curchan = ndis_scan_curchan; ic->ic_scan_mindwell = ndis_scan_mindwell; ic->ic_bsschan = IEEE80211_CHAN_ANYC; ic->ic_vap_create = ndis_vap_create; ic->ic_vap_delete = ndis_vap_delete; ic->ic_update_mcast = ndis_update_mcast; ic->ic_update_promisc = ndis_update_promisc; ic->ic_transmit = ndis_80211transmit; ic->ic_parent = ndis_80211parent; if (bootverbose) ieee80211_announce(ic); return (0); } static int ndis_ifattach(struct ndis_softc *sc) { struct ifnet *ifp; u_char eaddr[ETHER_ADDR_LEN]; int len; ifp = if_alloc(IFT_ETHER); if (ifp == NULL) return (ENOSPC); sc->ifp = ifp; ifp->if_softc = sc; /* Check for task offload support. */ ndis_probe_offload(sc); /* * Get station address from the driver. */ len = sizeof(eaddr); ndis_get_info(sc, OID_802_3_CURRENT_ADDRESS, eaddr, &len); if_initname(ifp, device_get_name(sc->ndis_dev), device_get_unit(sc->ndis_dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = ndis_ifioctl; ifp->if_start = ndis_ifstart; ifp->if_init = ndis_init; ifp->if_baudrate = 10000000; IFQ_SET_MAXLEN(&ifp->if_snd, 50); ifp->if_snd.ifq_drv_maxlen = 25; IFQ_SET_READY(&ifp->if_snd); ifp->if_capenable = ifp->if_capabilities; ifp->if_hwassist = sc->ndis_hwassist; ifmedia_init(&sc->ifmedia, IFM_IMASK, ndis_ifmedia_upd, ndis_ifmedia_sts); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T|IFM_FDX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_100_TX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_100_TX|IFM_FDX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_AUTO, 0, NULL); ifmedia_set(&sc->ifmedia, IFM_ETHER|IFM_AUTO); ether_ifattach(ifp, eaddr); return (0); } static struct ieee80211vap * ndis_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit, enum ieee80211_opmode opmode, int flags, const uint8_t bssid[IEEE80211_ADDR_LEN], const uint8_t mac[IEEE80211_ADDR_LEN]) { struct ndis_vap *nvp; struct ieee80211vap *vap; if (!TAILQ_EMPTY(&ic->ic_vaps)) /* only one at a time */ return NULL; nvp = malloc(sizeof(struct ndis_vap), M_80211_VAP, M_WAITOK | M_ZERO); vap = &nvp->vap; ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid); /* override with driver methods */ nvp->newstate = vap->iv_newstate; vap->iv_newstate = ndis_newstate; /* complete setup */ ieee80211_vap_attach(vap, ieee80211_media_change, ndis_media_status, mac); ic->ic_opmode = opmode; /* install key handing routines */ vap->iv_key_set = ndis_add_key; vap->iv_key_delete = ndis_del_key; return vap; } static void ndis_vap_delete(struct ieee80211vap *vap) { struct ndis_vap *nvp = NDIS_VAP(vap); struct ieee80211com *ic = vap->iv_ic; struct ndis_softc *sc = ic->ic_softc; ndis_stop(sc); callout_drain(&sc->ndis_scan_callout); ieee80211_vap_detach(vap); free(nvp, M_80211_VAP); } /* * Shutdown hardware and free up resources. This can be called any * time after the mutex has been initialized. It is called in both * the error case in attach and the normal detach case so it needs * to be careful about only freeing resources that have actually been * allocated. */ int ndis_detach(device_t dev) { struct ifnet *ifp; struct ndis_softc *sc; driver_object *drv; sc = device_get_softc(dev); NDIS_LOCK(sc); if (!sc->ndis_80211) ifp = sc->ifp; else ifp = NULL; if (ifp != NULL) ifp->if_flags &= ~IFF_UP; if (device_is_attached(dev)) { NDIS_UNLOCK(sc); ndis_stop(sc); if (sc->ndis_80211) ieee80211_ifdetach(&sc->ndis_ic); else if (ifp != NULL) ether_ifdetach(ifp); } else NDIS_UNLOCK(sc); if (sc->ndis_tickitem != NULL) IoFreeWorkItem(sc->ndis_tickitem); if (sc->ndis_startitem != NULL) IoFreeWorkItem(sc->ndis_startitem); if (sc->ndis_resetitem != NULL) IoFreeWorkItem(sc->ndis_resetitem); if (sc->ndis_inputitem != NULL) IoFreeWorkItem(sc->ndis_inputitem); if (sc->ndisusb_xferdoneitem != NULL) IoFreeWorkItem(sc->ndisusb_xferdoneitem); if (sc->ndisusb_taskitem != NULL) IoFreeWorkItem(sc->ndisusb_taskitem); bus_generic_detach(dev); ndis_unload_driver(sc); if (sc->ndis_irq) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->ndis_irq); if (sc->ndis_res_io) bus_release_resource(dev, SYS_RES_IOPORT, sc->ndis_io_rid, sc->ndis_res_io); if (sc->ndis_res_mem) bus_release_resource(dev, SYS_RES_MEMORY, sc->ndis_mem_rid, sc->ndis_res_mem); if (sc->ndis_res_altmem) bus_release_resource(dev, SYS_RES_MEMORY, sc->ndis_altmem_rid, sc->ndis_res_altmem); if (ifp != NULL) if_free(ifp); if (sc->ndis_iftype == PCMCIABus) ndis_free_amem(sc); if (sc->ndis_sc) ndis_destroy_dma(sc); if (sc->ndis_txarray) free(sc->ndis_txarray, M_DEVBUF); if (!sc->ndis_80211) ifmedia_removeall(&sc->ifmedia); if (sc->ndis_txpool != NULL) NdisFreePacketPool(sc->ndis_txpool); /* Destroy the PDO for this device. */ if (sc->ndis_iftype == PCIBus) drv = windrv_lookup(0, "PCI Bus"); else if (sc->ndis_iftype == PCMCIABus) drv = windrv_lookup(0, "PCCARD Bus"); else drv = windrv_lookup(0, "USB Bus"); if (drv == NULL) panic("couldn't find driver object"); windrv_destroy_pdo(drv, dev); if (sc->ndis_iftype == PCIBus) bus_dma_tag_destroy(sc->ndis_parent_tag); return (0); } int ndis_suspend(dev) device_t dev; { struct ndis_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->ifp; #ifdef notdef if (NDIS_INITIALIZED(sc)) ndis_stop(sc); #endif return (0); } int ndis_resume(dev) device_t dev; { struct ndis_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->ifp; if (NDIS_INITIALIZED(sc)) ndis_init(sc); return (0); } /* * The following bunch of routines are here to support drivers that * use the NdisMEthIndicateReceive()/MiniportTransferData() mechanism. * The NdisMEthIndicateReceive() handler runs at DISPATCH_LEVEL for * serialized miniports, or IRQL <= DISPATCH_LEVEL for deserialized * miniports. */ static void ndis_rxeof_eth(adapter, ctx, addr, hdr, hdrlen, lookahead, lookaheadlen, pktlen) ndis_handle adapter; ndis_handle ctx; char *addr; void *hdr; uint32_t hdrlen; void *lookahead; uint32_t lookaheadlen; uint32_t pktlen; { ndis_miniport_block *block; uint8_t irql = 0; uint32_t status; ndis_buffer *b; ndis_packet *p; struct mbuf *m; ndis_ethpriv *priv; block = adapter; m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return; /* Save the data provided to us so far. */ m->m_len = lookaheadlen + hdrlen; m->m_pkthdr.len = pktlen + hdrlen; m->m_next = NULL; m_copyback(m, 0, hdrlen, hdr); m_copyback(m, hdrlen, lookaheadlen, lookahead); /* Now create a fake NDIS_PACKET to hold the data */ NdisAllocatePacket(&status, &p, block->nmb_rxpool); if (status != NDIS_STATUS_SUCCESS) { m_freem(m); return; } p->np_m0 = m; b = IoAllocateMdl(m->m_data, m->m_pkthdr.len, FALSE, FALSE, NULL); if (b == NULL) { NdisFreePacket(p); m_freem(m); return; } p->np_private.npp_head = p->np_private.npp_tail = b; p->np_private.npp_totlen = m->m_pkthdr.len; /* Save the packet RX context somewhere. */ priv = (ndis_ethpriv *)&p->np_protocolreserved; priv->nep_ctx = ctx; if (!NDIS_SERIALIZED(block)) KeAcquireSpinLock(&block->nmb_lock, &irql); InsertTailList((&block->nmb_packetlist), (&p->np_list)); if (!NDIS_SERIALIZED(block)) KeReleaseSpinLock(&block->nmb_lock, irql); } /* * NdisMEthIndicateReceiveComplete() handler, runs at DISPATCH_LEVEL * for serialized miniports, or IRQL <= DISPATCH_LEVEL for deserialized * miniports. */ static void ndis_rxeof_done(adapter) ndis_handle adapter; { struct ndis_softc *sc; ndis_miniport_block *block; block = adapter; /* Schedule transfer/RX of queued packets. */ sc = device_get_softc(block->nmb_physdeviceobj->do_devext); KeInsertQueueDpc(&sc->ndis_rxdpc, NULL, NULL); } /* * MiniportTransferData() handler, runs at DISPATCH_LEVEL. */ static void ndis_rxeof_xfr(dpc, adapter, sysarg1, sysarg2) kdpc *dpc; ndis_handle adapter; void *sysarg1; void *sysarg2; { ndis_miniport_block *block; struct ndis_softc *sc; ndis_packet *p; list_entry *l; uint32_t status; ndis_ethpriv *priv; struct ifnet *ifp; struct mbuf *m; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; KeAcquireSpinLockAtDpcLevel(&block->nmb_lock); l = block->nmb_packetlist.nle_flink; while(!IsListEmpty(&block->nmb_packetlist)) { l = RemoveHeadList((&block->nmb_packetlist)); p = CONTAINING_RECORD(l, ndis_packet, np_list); InitializeListHead((&p->np_list)); priv = (ndis_ethpriv *)&p->np_protocolreserved; m = p->np_m0; p->np_softc = sc; p->np_m0 = NULL; KeReleaseSpinLockFromDpcLevel(&block->nmb_lock); status = MSCALL6(sc->ndis_chars->nmc_transferdata_func, p, &p->np_private.npp_totlen, block, priv->nep_ctx, m->m_len, m->m_pkthdr.len - m->m_len); KeAcquireSpinLockAtDpcLevel(&block->nmb_lock); /* * If status is NDIS_STATUS_PENDING, do nothing and * wait for a callback to the ndis_rxeof_xfr_done() * handler. */ m->m_len = m->m_pkthdr.len; m->m_pkthdr.rcvif = ifp; if (status == NDIS_STATUS_SUCCESS) { IoFreeMdl(p->np_private.npp_head); NdisFreePacket(p); KeAcquireSpinLockAtDpcLevel(&sc->ndis_rxlock); mbufq_enqueue(&sc->ndis_rxqueue, m); KeReleaseSpinLockFromDpcLevel(&sc->ndis_rxlock); IoQueueWorkItem(sc->ndis_inputitem, (io_workitem_func)ndis_inputtask_wrap, WORKQUEUE_CRITICAL, sc); } if (status == NDIS_STATUS_FAILURE) m_freem(m); /* Advance to next packet */ l = block->nmb_packetlist.nle_flink; } KeReleaseSpinLockFromDpcLevel(&block->nmb_lock); } /* * NdisMTransferDataComplete() handler, runs at DISPATCH_LEVEL. */ static void ndis_rxeof_xfr_done(adapter, packet, status, len) ndis_handle adapter; ndis_packet *packet; uint32_t status; uint32_t len; { ndis_miniport_block *block; struct ndis_softc *sc; struct ifnet *ifp; struct mbuf *m; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; m = packet->np_m0; IoFreeMdl(packet->np_private.npp_head); NdisFreePacket(packet); if (status != NDIS_STATUS_SUCCESS) { m_freem(m); return; } m->m_len = m->m_pkthdr.len; m->m_pkthdr.rcvif = ifp; KeAcquireSpinLockAtDpcLevel(&sc->ndis_rxlock); mbufq_enqueue(&sc->ndis_rxqueue, m); KeReleaseSpinLockFromDpcLevel(&sc->ndis_rxlock); IoQueueWorkItem(sc->ndis_inputitem, (io_workitem_func)ndis_inputtask_wrap, WORKQUEUE_CRITICAL, sc); } /* * A frame has been uploaded: pass the resulting mbuf chain up to * the higher level protocols. * * When handling received NDIS packets, the 'status' field in the * out-of-band portion of the ndis_packet has special meaning. In the * most common case, the underlying NDIS driver will set this field * to NDIS_STATUS_SUCCESS, which indicates that it's ok for us to * take possession of it. We then change the status field to * NDIS_STATUS_PENDING to tell the driver that we now own the packet, * and that we will return it at some point in the future via the * return packet handler. * * If the driver hands us a packet with a status of NDIS_STATUS_RESOURCES, * this means the driver is running out of packet/buffer resources and * wants to maintain ownership of the packet. In this case, we have to * copy the packet data into local storage and let the driver keep the * packet. */ static void ndis_rxeof(adapter, packets, pktcnt) ndis_handle adapter; ndis_packet **packets; uint32_t pktcnt; { struct ndis_softc *sc; ndis_miniport_block *block; ndis_packet *p; uint32_t s; ndis_tcpip_csum *csum; struct ifnet *ifp; struct mbuf *m0, *m; int i; block = (ndis_miniport_block *)adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; /* * There's a slim chance the driver may indicate some packets * before we're completely ready to handle them. If we detect this, * we need to return them to the miniport and ignore them. */ if (!sc->ndis_running) { for (i = 0; i < pktcnt; i++) { p = packets[i]; if (p->np_oob.npo_status == NDIS_STATUS_SUCCESS) { p->np_refcnt++; ndis_return_packet(p); } } return; } for (i = 0; i < pktcnt; i++) { p = packets[i]; /* Stash the softc here so ptom can use it. */ p->np_softc = sc; if (ndis_ptom(&m0, p)) { device_printf(sc->ndis_dev, "ptom failed\n"); if (p->np_oob.npo_status == NDIS_STATUS_SUCCESS) ndis_return_packet(p); } else { #ifdef notdef if (p->np_oob.npo_status == NDIS_STATUS_RESOURCES) { m = m_dup(m0, M_NOWAIT); /* * NOTE: we want to destroy the mbuf here, but * we don't actually want to return it to the * driver via the return packet handler. By * bumping np_refcnt, we can prevent the * ndis_return_packet() routine from actually * doing anything. */ p->np_refcnt++; m_freem(m0); if (m == NULL) if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); else m0 = m; } else p->np_oob.npo_status = NDIS_STATUS_PENDING; #endif m = m_dup(m0, M_NOWAIT); if (p->np_oob.npo_status == NDIS_STATUS_RESOURCES) p->np_refcnt++; else p->np_oob.npo_status = NDIS_STATUS_PENDING; m_freem(m0); if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); continue; } m0 = m; m0->m_pkthdr.rcvif = ifp; /* Deal with checksum offload. */ if (ifp->if_capenable & IFCAP_RXCSUM && p->np_ext.npe_info[ndis_tcpipcsum_info] != NULL) { s = (uintptr_t) p->np_ext.npe_info[ndis_tcpipcsum_info]; csum = (ndis_tcpip_csum *)&s; if (csum->u.ntc_rxflags & NDIS_RXCSUM_IP_PASSED) m0->m_pkthdr.csum_flags |= CSUM_IP_CHECKED|CSUM_IP_VALID; if (csum->u.ntc_rxflags & (NDIS_RXCSUM_TCP_PASSED | NDIS_RXCSUM_UDP_PASSED)) { m0->m_pkthdr.csum_flags |= CSUM_DATA_VALID|CSUM_PSEUDO_HDR; m0->m_pkthdr.csum_data = 0xFFFF; } } KeAcquireSpinLockAtDpcLevel(&sc->ndis_rxlock); mbufq_enqueue(&sc->ndis_rxqueue, m0); KeReleaseSpinLockFromDpcLevel(&sc->ndis_rxlock); IoQueueWorkItem(sc->ndis_inputitem, (io_workitem_func)ndis_inputtask_wrap, WORKQUEUE_CRITICAL, sc); } } } /* * This routine is run at PASSIVE_LEVEL. We use this routine to pass * packets into the stack in order to avoid calling (*ifp->if_input)() * with any locks held (at DISPATCH_LEVEL, we'll be holding the * 'dispatch level' per-cpu sleep lock). */ static void ndis_inputtask(device_object *dobj, void *arg) { ndis_miniport_block *block; struct ndis_softc *sc = arg; struct mbuf *m; uint8_t irql; block = dobj->do_devext; KeAcquireSpinLock(&sc->ndis_rxlock, &irql); while ((m = mbufq_dequeue(&sc->ndis_rxqueue)) != NULL) { KeReleaseSpinLock(&sc->ndis_rxlock, irql); if ((sc->ndis_80211 != 0)) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); if (vap != NULL) vap->iv_deliver_data(vap, vap->iv_bss, m); } else { struct ifnet *ifp = sc->ifp; (*ifp->if_input)(ifp, m); } KeAcquireSpinLock(&sc->ndis_rxlock, &irql); } KeReleaseSpinLock(&sc->ndis_rxlock, irql); } /* * A frame was downloaded to the chip. It's safe for us to clean up * the list buffers. */ static void ndis_txeof(adapter, packet, status) ndis_handle adapter; ndis_packet *packet; ndis_status status; { struct ndis_softc *sc; ndis_miniport_block *block; struct ifnet *ifp; int idx; struct mbuf *m; block = (ndis_miniport_block *)adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; m = packet->np_m0; idx = packet->np_txidx; if (sc->ndis_sc) bus_dmamap_unload(sc->ndis_ttag, sc->ndis_tmaps[idx]); ndis_free_packet(packet); m_freem(m); NDIS_LOCK(sc); sc->ndis_txarray[idx] = NULL; sc->ndis_txpending++; if (!sc->ndis_80211) { struct ifnet *ifp = sc->ifp; if (status == NDIS_STATUS_SUCCESS) if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); else if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } sc->ndis_tx_timer = 0; NDIS_UNLOCK(sc); if (!sc->ndis_80211) IoQueueWorkItem(sc->ndis_startitem, (io_workitem_func)ndis_ifstarttask_wrap, WORKQUEUE_CRITICAL, sc); DPRINTF(("%s: ndis_ifstarttask_wrap sc=%p\n", __func__, sc)); } static void ndis_linksts(adapter, status, sbuf, slen) ndis_handle adapter; ndis_status status; void *sbuf; uint32_t slen; { ndis_miniport_block *block; struct ndis_softc *sc; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); sc->ndis_sts = status; /* Event list is all full up, drop this one. */ NDIS_LOCK(sc); if (sc->ndis_evt[sc->ndis_evtpidx].ne_sts) { NDIS_UNLOCK(sc); return; } /* Cache the event. */ if (slen) { sc->ndis_evt[sc->ndis_evtpidx].ne_buf = malloc(slen, M_TEMP, M_NOWAIT); if (sc->ndis_evt[sc->ndis_evtpidx].ne_buf == NULL) { NDIS_UNLOCK(sc); return; } bcopy((char *)sbuf, sc->ndis_evt[sc->ndis_evtpidx].ne_buf, slen); } sc->ndis_evt[sc->ndis_evtpidx].ne_sts = status; sc->ndis_evt[sc->ndis_evtpidx].ne_len = slen; NDIS_EVTINC(sc->ndis_evtpidx); NDIS_UNLOCK(sc); } static void ndis_linksts_done(adapter) ndis_handle adapter; { ndis_miniport_block *block; struct ndis_softc *sc; struct ifnet *ifp; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; if (!NDIS_INITIALIZED(sc)) return; switch (sc->ndis_sts) { case NDIS_STATUS_MEDIA_CONNECT: IoQueueWorkItem(sc->ndis_tickitem, (io_workitem_func)ndis_ticktask_wrap, WORKQUEUE_CRITICAL, sc); if (!sc->ndis_80211) IoQueueWorkItem(sc->ndis_startitem, (io_workitem_func)ndis_ifstarttask_wrap, WORKQUEUE_CRITICAL, sc); break; case NDIS_STATUS_MEDIA_DISCONNECT: if (sc->ndis_link) IoQueueWorkItem(sc->ndis_tickitem, (io_workitem_func)ndis_ticktask_wrap, WORKQUEUE_CRITICAL, sc); break; default: break; } } static void ndis_tick(xsc) void *xsc; { struct ndis_softc *sc; sc = xsc; if (sc->ndis_hang_timer && --sc->ndis_hang_timer == 0) { IoQueueWorkItem(sc->ndis_tickitem, (io_workitem_func)ndis_ticktask_wrap, WORKQUEUE_CRITICAL, sc); sc->ndis_hang_timer = sc->ndis_block->nmb_checkforhangsecs; } if (sc->ndis_tx_timer && --sc->ndis_tx_timer == 0) { if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1); device_printf(sc->ndis_dev, "watchdog timeout\n"); IoQueueWorkItem(sc->ndis_resetitem, (io_workitem_func)ndis_resettask_wrap, WORKQUEUE_CRITICAL, sc); if (!sc->ndis_80211) IoQueueWorkItem(sc->ndis_startitem, (io_workitem_func)ndis_ifstarttask_wrap, WORKQUEUE_CRITICAL, sc); } callout_reset(&sc->ndis_stat_callout, hz, ndis_tick, sc); } static void ndis_ticktask(device_object *d, void *xsc) { struct ndis_softc *sc = xsc; ndis_checkforhang_handler hangfunc; uint8_t rval; NDIS_LOCK(sc); if (!NDIS_INITIALIZED(sc)) { NDIS_UNLOCK(sc); return; } NDIS_UNLOCK(sc); hangfunc = sc->ndis_chars->nmc_checkhang_func; if (hangfunc != NULL) { rval = MSCALL1(hangfunc, sc->ndis_block->nmb_miniportadapterctx); if (rval == TRUE) { ndis_reset_nic(sc); return; } } NDIS_LOCK(sc); if (sc->ndis_link == 0 && sc->ndis_sts == NDIS_STATUS_MEDIA_CONNECT) { sc->ndis_link = 1; if (sc->ndis_80211 != 0) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); if (vap != NULL) { NDIS_UNLOCK(sc); ndis_getstate_80211(sc); ieee80211_new_state(vap, IEEE80211_S_RUN, -1); NDIS_LOCK(sc); if_link_state_change(vap->iv_ifp, LINK_STATE_UP); } } else if_link_state_change(sc->ifp, LINK_STATE_UP); } if (sc->ndis_link == 1 && sc->ndis_sts == NDIS_STATUS_MEDIA_DISCONNECT) { sc->ndis_link = 0; if (sc->ndis_80211 != 0) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); if (vap != NULL) { NDIS_UNLOCK(sc); ieee80211_new_state(vap, IEEE80211_S_SCAN, 0); NDIS_LOCK(sc); if_link_state_change(vap->iv_ifp, LINK_STATE_DOWN); } } else if_link_state_change(sc->ifp, LINK_STATE_DOWN); } NDIS_UNLOCK(sc); } static void ndis_map_sclist(arg, segs, nseg, mapsize, error) void *arg; bus_dma_segment_t *segs; int nseg; bus_size_t mapsize; int error; { struct ndis_sc_list *sclist; int i; if (error || arg == NULL) return; sclist = arg; sclist->nsl_frags = nseg; for (i = 0; i < nseg; i++) { sclist->nsl_elements[i].nse_addr.np_quad = segs[i].ds_addr; sclist->nsl_elements[i].nse_len = segs[i].ds_len; } } static int ndis_raw_xmit(struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_bpf_params *params) { /* no support; just discard */ m_freem(m); ieee80211_free_node(ni); return (0); } static void ndis_update_mcast(struct ieee80211com *ic) { struct ndis_softc *sc = ic->ic_softc; ndis_setmulti(sc); } static void ndis_update_promisc(struct ieee80211com *ic) { /* not supported */ } static void ndis_ifstarttask(device_object *d, void *arg) { struct ndis_softc *sc = arg; DPRINTF(("%s: sc=%p, ifp=%p\n", __func__, sc, sc->ifp)); if (sc->ndis_80211) return; struct ifnet *ifp = sc->ifp; if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) ndis_ifstart(ifp); } /* * Main transmit routine. To make NDIS drivers happy, we need to * transform mbuf chains into NDIS packets and feed them to the * send packet routines. Most drivers allow you to send several * packets at once (up to the maxpkts limit). Unfortunately, rather * that accepting them in the form of a linked list, they expect * a contiguous array of pointers to packets. * * For those drivers which use the NDIS scatter/gather DMA mechanism, * we need to perform busdma work here. Those that use map registers * will do the mapping themselves on a buffer by buffer basis. */ static void ndis_ifstart(struct ifnet *ifp) { struct ndis_softc *sc; struct mbuf *m = NULL; ndis_packet **p0 = NULL, *p = NULL; ndis_tcpip_csum *csum; int pcnt = 0, status; sc = ifp->if_softc; NDIS_LOCK(sc); if (!sc->ndis_link || ifp->if_drv_flags & IFF_DRV_OACTIVE) { NDIS_UNLOCK(sc); return; } p0 = &sc->ndis_txarray[sc->ndis_txidx]; while(sc->ndis_txpending) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m); if (m == NULL) break; NdisAllocatePacket(&status, &sc->ndis_txarray[sc->ndis_txidx], sc->ndis_txpool); if (status != NDIS_STATUS_SUCCESS) break; if (ndis_mtop(m, &sc->ndis_txarray[sc->ndis_txidx])) { IFQ_DRV_PREPEND(&ifp->if_snd, m); NDIS_UNLOCK(sc); return; } /* * Save pointer to original mbuf * so we can free it later. */ p = sc->ndis_txarray[sc->ndis_txidx]; p->np_txidx = sc->ndis_txidx; p->np_m0 = m; p->np_oob.npo_status = NDIS_STATUS_PENDING; /* * Do scatter/gather processing, if driver requested it. */ if (sc->ndis_sc) { bus_dmamap_load_mbuf(sc->ndis_ttag, sc->ndis_tmaps[sc->ndis_txidx], m, ndis_map_sclist, &p->np_sclist, BUS_DMA_NOWAIT); bus_dmamap_sync(sc->ndis_ttag, sc->ndis_tmaps[sc->ndis_txidx], BUS_DMASYNC_PREREAD); p->np_ext.npe_info[ndis_sclist_info] = &p->np_sclist; } /* Handle checksum offload. */ if (ifp->if_capenable & IFCAP_TXCSUM && m->m_pkthdr.csum_flags) { csum = (ndis_tcpip_csum *) &p->np_ext.npe_info[ndis_tcpipcsum_info]; csum->u.ntc_txflags = NDIS_TXCSUM_DO_IPV4; if (m->m_pkthdr.csum_flags & CSUM_IP) csum->u.ntc_txflags |= NDIS_TXCSUM_DO_IP; if (m->m_pkthdr.csum_flags & CSUM_TCP) csum->u.ntc_txflags |= NDIS_TXCSUM_DO_TCP; if (m->m_pkthdr.csum_flags & CSUM_UDP) csum->u.ntc_txflags |= NDIS_TXCSUM_DO_UDP; p->np_private.npp_flags = NDIS_PROTOCOL_ID_TCP_IP; } NDIS_INC(sc); sc->ndis_txpending--; pcnt++; /* * If there's a BPF listener, bounce a copy of this frame * to him. */ if (!sc->ndis_80211) /* XXX handle 80211 */ BPF_MTAP(ifp, m); /* * The array that p0 points to must appear contiguous, * so we must not wrap past the end of sc->ndis_txarray[]. * If it looks like we're about to wrap, break out here * so the this batch of packets can be transmitted, then * wait for txeof to ask us to send the rest. */ if (sc->ndis_txidx == 0) break; } if (pcnt == 0) { NDIS_UNLOCK(sc); return; } if (sc->ndis_txpending == 0) ifp->if_drv_flags |= IFF_DRV_OACTIVE; /* * Set a timeout in case the chip goes out to lunch. */ sc->ndis_tx_timer = 5; NDIS_UNLOCK(sc); /* * According to NDIS documentation, if a driver exports * a MiniportSendPackets() routine, we prefer that over * a MiniportSend() routine (which sends just a single * packet). */ if (sc->ndis_chars->nmc_sendmulti_func != NULL) ndis_send_packets(sc, p0, pcnt); else ndis_send_packet(sc, p); return; } static int ndis_80211transmit(struct ieee80211com *ic, struct mbuf *m) { struct ndis_softc *sc = ic->ic_softc; ndis_packet **p0 = NULL, *p = NULL; int status; NDIS_LOCK(sc); if (!sc->ndis_link || !sc->ndis_running) { NDIS_UNLOCK(sc); return (ENXIO); } if (sc->ndis_txpending == 0) { NDIS_UNLOCK(sc); return (ENOBUFS); } p0 = &sc->ndis_txarray[sc->ndis_txidx]; NdisAllocatePacket(&status, &sc->ndis_txarray[sc->ndis_txidx], sc->ndis_txpool); if (status != NDIS_STATUS_SUCCESS) { NDIS_UNLOCK(sc); return (ENOBUFS); } if (ndis_mtop(m, &sc->ndis_txarray[sc->ndis_txidx])) { NDIS_UNLOCK(sc); return (ENOBUFS); } /* * Save pointer to original mbuf * so we can free it later. */ p = sc->ndis_txarray[sc->ndis_txidx]; p->np_txidx = sc->ndis_txidx; p->np_m0 = m; p->np_oob.npo_status = NDIS_STATUS_PENDING; /* * Do scatter/gather processing, if driver requested it. */ if (sc->ndis_sc) { bus_dmamap_load_mbuf(sc->ndis_ttag, sc->ndis_tmaps[sc->ndis_txidx], m, ndis_map_sclist, &p->np_sclist, BUS_DMA_NOWAIT); bus_dmamap_sync(sc->ndis_ttag, sc->ndis_tmaps[sc->ndis_txidx], BUS_DMASYNC_PREREAD); p->np_ext.npe_info[ndis_sclist_info] = &p->np_sclist; } NDIS_INC(sc); sc->ndis_txpending--; /* * Set a timeout in case the chip goes out to lunch. */ sc->ndis_tx_timer = 5; NDIS_UNLOCK(sc); /* * According to NDIS documentation, if a driver exports * a MiniportSendPackets() routine, we prefer that over * a MiniportSend() routine (which sends just a single * packet). */ if (sc->ndis_chars->nmc_sendmulti_func != NULL) ndis_send_packets(sc, p0, 1); else ndis_send_packet(sc, p); return (0); } static void ndis_80211parent(struct ieee80211com *ic) { struct ndis_softc *sc = ic->ic_softc; /*NDIS_LOCK(sc);*/ if (ic->ic_nrunning > 0) { if (!sc->ndis_running) ndis_init(sc); } else if (sc->ndis_running) ndis_stop(sc); /*NDIS_UNLOCK(sc);*/ } static void ndis_init(void *xsc) { struct ndis_softc *sc = xsc; int i, len, error; /* * Avoid reintializing the link unnecessarily. * This should be dealt with in a better way by * fixing the upper layer modules so they don't * call ifp->if_init() quite as often. */ if (sc->ndis_link) return; /* * Cancel pending I/O and free all RX/TX buffers. */ ndis_stop(sc); if (!(sc->ndis_iftype == PNPBus && ndisusb_halt == 0)) { error = ndis_init_nic(sc); if (error != 0) { device_printf(sc->ndis_dev, "failed to initialize the device: %d\n", error); return; } } /* Program the packet filter */ sc->ndis_filter = NDIS_PACKET_TYPE_DIRECTED | NDIS_PACKET_TYPE_BROADCAST; if (sc->ndis_80211) { struct ieee80211com *ic = &sc->ndis_ic; if (ic->ic_promisc > 0) sc->ndis_filter |= NDIS_PACKET_TYPE_PROMISCUOUS; } else { struct ifnet *ifp = sc->ifp; if (ifp->if_flags & IFF_PROMISC) sc->ndis_filter |= NDIS_PACKET_TYPE_PROMISCUOUS; } len = sizeof(sc->ndis_filter); error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER, &sc->ndis_filter, &len); if (error) device_printf(sc->ndis_dev, "set filter failed: %d\n", error); /* * Set lookahead. */ if (sc->ndis_80211) i = ETHERMTU; else i = sc->ifp->if_mtu; len = sizeof(i); ndis_set_info(sc, OID_GEN_CURRENT_LOOKAHEAD, &i, &len); /* * Program the multicast filter, if necessary. */ ndis_setmulti(sc); /* Setup task offload. */ ndis_set_offload(sc); NDIS_LOCK(sc); sc->ndis_txidx = 0; sc->ndis_txpending = sc->ndis_maxpkts; sc->ndis_link = 0; if (!sc->ndis_80211) { if_link_state_change(sc->ifp, LINK_STATE_UNKNOWN); sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } sc->ndis_tx_timer = 0; /* * Some drivers don't set this value. The NDIS spec says * the default checkforhang timeout is "approximately 2 * seconds." We use 3 seconds, because it seems for some * drivers, exactly 2 seconds is too fast. */ if (sc->ndis_block->nmb_checkforhangsecs == 0) sc->ndis_block->nmb_checkforhangsecs = 3; sc->ndis_hang_timer = sc->ndis_block->nmb_checkforhangsecs; callout_reset(&sc->ndis_stat_callout, hz, ndis_tick, sc); sc->ndis_running = 1; NDIS_UNLOCK(sc); /* XXX force handling */ if (sc->ndis_80211) ieee80211_start_all(&sc->ndis_ic); /* start all vap's */ } /* * Set media options. */ static int ndis_ifmedia_upd(ifp) struct ifnet *ifp; { struct ndis_softc *sc; sc = ifp->if_softc; if (NDIS_INITIALIZED(sc)) ndis_init(sc); return (0); } /* * Report current media status. */ static void ndis_ifmedia_sts(ifp, ifmr) struct ifnet *ifp; struct ifmediareq *ifmr; { struct ndis_softc *sc; uint32_t media_info; ndis_media_state linkstate; int len; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; sc = ifp->if_softc; if (!NDIS_INITIALIZED(sc)) return; len = sizeof(linkstate); ndis_get_info(sc, OID_GEN_MEDIA_CONNECT_STATUS, (void *)&linkstate, &len); len = sizeof(media_info); ndis_get_info(sc, OID_GEN_LINK_SPEED, (void *)&media_info, &len); if (linkstate == nmc_connected) ifmr->ifm_status |= IFM_ACTIVE; switch (media_info) { case 100000: ifmr->ifm_active |= IFM_10_T; break; case 1000000: ifmr->ifm_active |= IFM_100_TX; break; case 10000000: ifmr->ifm_active |= IFM_1000_T; break; default: device_printf(sc->ndis_dev, "unknown speed: %d\n", media_info); break; } } static int ndis_set_cipher(struct ndis_softc *sc, int cipher) { struct ieee80211com *ic = &sc->ndis_ic; int rval = 0, len; uint32_t arg, save; len = sizeof(arg); if (cipher == WPA_CSE_WEP40 || cipher == WPA_CSE_WEP104) { if (!(ic->ic_cryptocaps & IEEE80211_CRYPTO_WEP)) return (ENOTSUP); arg = NDIS_80211_WEPSTAT_ENC1ENABLED; } if (cipher == WPA_CSE_TKIP) { if (!(ic->ic_cryptocaps & IEEE80211_CRYPTO_TKIP)) return (ENOTSUP); arg = NDIS_80211_WEPSTAT_ENC2ENABLED; } if (cipher == WPA_CSE_CCMP) { if (!(ic->ic_cryptocaps & IEEE80211_CRYPTO_AES_CCM)) return (ENOTSUP); arg = NDIS_80211_WEPSTAT_ENC3ENABLED; } DPRINTF(("Setting cipher to %d\n", arg)); save = arg; rval = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &len); if (rval) return (rval); /* Check that the cipher was set correctly. */ len = sizeof(save); rval = ndis_get_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &len); if (rval != 0 || arg != save) return (ENODEV); return (0); } /* * WPA is hairy to set up. Do the work in a separate routine * so we don't clutter the setstate function too much. * Important yet undocumented fact: first we have to set the * authentication mode, _then_ we enable the ciphers. If one * of the WPA authentication modes isn't enabled, the driver * might not permit the TKIP or AES ciphers to be selected. */ static int ndis_set_wpa(sc, ie, ielen) struct ndis_softc *sc; void *ie; int ielen; { struct ieee80211_ie_wpa *w; struct ndis_ie *n; char *pos; uint32_t arg; int i; /* * Apparently, the only way for us to know what ciphers * and key management/authentication mode to use is for * us to inspect the optional information element (IE) * stored in the 802.11 state machine. This IE should be * supplied by the WPA supplicant. */ w = (struct ieee80211_ie_wpa *)ie; /* Check for the right kind of IE. */ if (w->wpa_id != IEEE80211_ELEMID_VENDOR) { DPRINTF(("Incorrect IE type %d\n", w->wpa_id)); return (EINVAL); } /* Skip over the ucast cipher OIDs. */ pos = (char *)&w->wpa_uciphers[0]; pos += w->wpa_uciphercnt * sizeof(struct ndis_ie); /* Skip over the authmode count. */ pos += sizeof(u_int16_t); /* * Check for the authentication modes. I'm * pretty sure there's only supposed to be one. */ n = (struct ndis_ie *)pos; if (n->ni_val == WPA_ASE_NONE) arg = NDIS_80211_AUTHMODE_WPANONE; if (n->ni_val == WPA_ASE_8021X_UNSPEC) arg = NDIS_80211_AUTHMODE_WPA; if (n->ni_val == WPA_ASE_8021X_PSK) arg = NDIS_80211_AUTHMODE_WPAPSK; DPRINTF(("Setting WPA auth mode to %d\n", arg)); i = sizeof(arg); if (ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i)) return (ENOTSUP); i = sizeof(arg); ndis_get_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i); /* Now configure the desired ciphers. */ /* First, set up the multicast group cipher. */ n = (struct ndis_ie *)&w->wpa_mcipher[0]; if (ndis_set_cipher(sc, n->ni_val)) return (ENOTSUP); /* Now start looking around for the unicast ciphers. */ pos = (char *)&w->wpa_uciphers[0]; n = (struct ndis_ie *)pos; for (i = 0; i < w->wpa_uciphercnt; i++) { if (ndis_set_cipher(sc, n->ni_val)) return (ENOTSUP); n++; } return (0); } static void ndis_media_status(struct ifnet *ifp, struct ifmediareq *imr) { struct ieee80211vap *vap = ifp->if_softc; struct ndis_softc *sc = vap->iv_ic->ic_softc; uint32_t txrate; int len; if (!NDIS_INITIALIZED(sc)) return; len = sizeof(txrate); if (ndis_get_info(sc, OID_GEN_LINK_SPEED, &txrate, &len) == 0) vap->iv_bss->ni_txrate = txrate / 5000; ieee80211_media_status(ifp, imr); } static void ndis_setstate_80211(struct ndis_softc *sc) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); ndis_80211_macaddr bssid; ndis_80211_config config; int rval = 0, len; uint32_t arg; if (!NDIS_INITIALIZED(sc)) { DPRINTF(("%s: NDIS not initialized\n", __func__)); return; } /* Disassociate and turn off radio. */ len = sizeof(arg); arg = 1; ndis_set_info(sc, OID_802_11_DISASSOCIATE, &arg, &len); /* Set network infrastructure mode. */ len = sizeof(arg); if (ic->ic_opmode == IEEE80211_M_IBSS) arg = NDIS_80211_NET_INFRA_IBSS; else arg = NDIS_80211_NET_INFRA_BSS; rval = ndis_set_info(sc, OID_802_11_INFRASTRUCTURE_MODE, &arg, &len); if (rval) device_printf (sc->ndis_dev, "set infra failed: %d\n", rval); /* Set power management */ len = sizeof(arg); if (vap->iv_flags & IEEE80211_F_PMGTON) arg = NDIS_80211_POWERMODE_FAST_PSP; else arg = NDIS_80211_POWERMODE_CAM; ndis_set_info(sc, OID_802_11_POWER_MODE, &arg, &len); /* Set TX power */ if ((ic->ic_caps & IEEE80211_C_TXPMGT) && ic->ic_txpowlimit < nitems(dBm2mW)) { arg = dBm2mW[ic->ic_txpowlimit]; len = sizeof(arg); ndis_set_info(sc, OID_802_11_TX_POWER_LEVEL, &arg, &len); } /* * Default encryption mode to off, authentication * to open and privacy to 'accept everything.' */ len = sizeof(arg); arg = NDIS_80211_WEPSTAT_DISABLED; ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &len); len = sizeof(arg); arg = NDIS_80211_AUTHMODE_OPEN; ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &len); /* * Note that OID_802_11_PRIVACY_FILTER is optional: * not all drivers implement it. */ len = sizeof(arg); arg = NDIS_80211_PRIVFILT_8021XWEP; ndis_set_info(sc, OID_802_11_PRIVACY_FILTER, &arg, &len); len = sizeof(config); bzero((char *)&config, len); config.nc_length = len; config.nc_fhconfig.ncf_length = sizeof(ndis_80211_config_fh); rval = ndis_get_info(sc, OID_802_11_CONFIGURATION, &config, &len); /* * Some drivers expect us to initialize these values, so * provide some defaults. */ if (config.nc_beaconperiod == 0) config.nc_beaconperiod = 100; if (config.nc_atimwin == 0) config.nc_atimwin = 100; if (config.nc_fhconfig.ncf_dwelltime == 0) config.nc_fhconfig.ncf_dwelltime = 200; if (rval == 0 && ic->ic_bsschan != IEEE80211_CHAN_ANYC) { int chan, chanflag; chan = ieee80211_chan2ieee(ic, ic->ic_bsschan); chanflag = config.nc_dsconfig > 2500000 ? IEEE80211_CHAN_2GHZ : IEEE80211_CHAN_5GHZ; if (chan != ieee80211_mhz2ieee(config.nc_dsconfig / 1000, 0)) { config.nc_dsconfig = ic->ic_bsschan->ic_freq * 1000; len = sizeof(config); config.nc_length = len; config.nc_fhconfig.ncf_length = sizeof(ndis_80211_config_fh); DPRINTF(("Setting channel to %ukHz\n", config.nc_dsconfig)); rval = ndis_set_info(sc, OID_802_11_CONFIGURATION, &config, &len); if (rval) device_printf(sc->ndis_dev, "couldn't change " "DS config to %ukHz: %d\n", config.nc_dsconfig, rval); } } else if (rval) device_printf(sc->ndis_dev, "couldn't retrieve " "channel info: %d\n", rval); /* Set the BSSID to our value so the driver doesn't associate */ len = IEEE80211_ADDR_LEN; bcopy(vap->iv_myaddr, bssid, len); DPRINTF(("Setting BSSID to %6D\n", (uint8_t *)&bssid, ":")); rval = ndis_set_info(sc, OID_802_11_BSSID, &bssid, &len); if (rval) device_printf(sc->ndis_dev, "setting BSSID failed: %d\n", rval); } static void ndis_auth_and_assoc(struct ndis_softc *sc, struct ieee80211vap *vap) { struct ieee80211_node *ni = vap->iv_bss; ndis_80211_ssid ssid; ndis_80211_macaddr bssid; ndis_80211_wep wep; int i, rval = 0, len, error; uint32_t arg; if (!NDIS_INITIALIZED(sc)) { DPRINTF(("%s: NDIS not initialized\n", __func__)); return; } /* Initial setup */ ndis_setstate_80211(sc); /* Set network infrastructure mode. */ len = sizeof(arg); if (vap->iv_opmode == IEEE80211_M_IBSS) arg = NDIS_80211_NET_INFRA_IBSS; else arg = NDIS_80211_NET_INFRA_BSS; rval = ndis_set_info(sc, OID_802_11_INFRASTRUCTURE_MODE, &arg, &len); if (rval) device_printf (sc->ndis_dev, "set infra failed: %d\n", rval); /* Set RTS threshold */ len = sizeof(arg); arg = vap->iv_rtsthreshold; ndis_set_info(sc, OID_802_11_RTS_THRESHOLD, &arg, &len); /* Set fragmentation threshold */ len = sizeof(arg); arg = vap->iv_fragthreshold; ndis_set_info(sc, OID_802_11_FRAGMENTATION_THRESHOLD, &arg, &len); /* Set WEP */ if (vap->iv_flags & IEEE80211_F_PRIVACY && !(vap->iv_flags & IEEE80211_F_WPA)) { int keys_set = 0; if (ni->ni_authmode == IEEE80211_AUTH_SHARED) { len = sizeof(arg); arg = NDIS_80211_AUTHMODE_SHARED; DPRINTF(("Setting shared auth\n")); ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &len); } for (i = 0; i < IEEE80211_WEP_NKID; i++) { if (vap->iv_nw_keys[i].wk_keylen) { if (vap->iv_nw_keys[i].wk_cipher->ic_cipher != IEEE80211_CIPHER_WEP) continue; bzero((char *)&wep, sizeof(wep)); wep.nw_keylen = vap->iv_nw_keys[i].wk_keylen; /* * 5, 13 and 16 are the only valid * key lengths. Anything in between * will be zero padded out to the * next highest boundary. */ if (vap->iv_nw_keys[i].wk_keylen < 5) wep.nw_keylen = 5; else if (vap->iv_nw_keys[i].wk_keylen > 5 && vap->iv_nw_keys[i].wk_keylen < 13) wep.nw_keylen = 13; else if (vap->iv_nw_keys[i].wk_keylen > 13 && vap->iv_nw_keys[i].wk_keylen < 16) wep.nw_keylen = 16; wep.nw_keyidx = i; wep.nw_length = (sizeof(uint32_t) * 3) + wep.nw_keylen; if (i == vap->iv_def_txkey) wep.nw_keyidx |= NDIS_80211_WEPKEY_TX; bcopy(vap->iv_nw_keys[i].wk_key, wep.nw_keydata, wep.nw_length); len = sizeof(wep); DPRINTF(("Setting WEP key %d\n", i)); rval = ndis_set_info(sc, OID_802_11_ADD_WEP, &wep, &len); if (rval) device_printf(sc->ndis_dev, "set wepkey failed: %d\n", rval); keys_set++; } } if (keys_set) { DPRINTF(("Setting WEP on\n")); arg = NDIS_80211_WEPSTAT_ENABLED; len = sizeof(arg); rval = ndis_set_info(sc, OID_802_11_WEP_STATUS, &arg, &len); if (rval) device_printf(sc->ndis_dev, "enable WEP failed: %d\n", rval); if (vap->iv_flags & IEEE80211_F_DROPUNENC) arg = NDIS_80211_PRIVFILT_8021XWEP; else arg = NDIS_80211_PRIVFILT_ACCEPTALL; len = sizeof(arg); ndis_set_info(sc, OID_802_11_PRIVACY_FILTER, &arg, &len); } } /* Set up WPA. */ if ((vap->iv_flags & IEEE80211_F_WPA) && vap->iv_appie_assocreq != NULL) { struct ieee80211_appie *ie = vap->iv_appie_assocreq; error = ndis_set_wpa(sc, ie->ie_data, ie->ie_len); if (error != 0) device_printf(sc->ndis_dev, "WPA setup failed\n"); } #ifdef notyet /* Set network type. */ arg = 0; switch (vap->iv_curmode) { case IEEE80211_MODE_11A: arg = NDIS_80211_NETTYPE_11OFDM5; break; case IEEE80211_MODE_11B: arg = NDIS_80211_NETTYPE_11DS; break; case IEEE80211_MODE_11G: arg = NDIS_80211_NETTYPE_11OFDM24; break; default: device_printf(sc->ndis_dev, "unknown mode: %d\n", vap->iv_curmode); } if (arg) { DPRINTF(("Setting network type to %d\n", arg)); len = sizeof(arg); rval = ndis_set_info(sc, OID_802_11_NETWORK_TYPE_IN_USE, &arg, &len); if (rval) device_printf(sc->ndis_dev, "set nettype failed: %d\n", rval); } #endif /* * If the user selected a specific BSSID, try * to use that one. This is useful in the case where * there are several APs in range with the same network * name. To delete the BSSID, we use the broadcast * address as the BSSID. * Note that some drivers seem to allow setting a BSSID * in ad-hoc mode, which has the effect of forcing the * NIC to create an ad-hoc cell with a specific BSSID, * instead of a randomly chosen one. However, the net80211 * code makes the assumtion that the BSSID setting is invalid * when you're in ad-hoc mode, so we don't allow that here. */ len = IEEE80211_ADDR_LEN; if (vap->iv_flags & IEEE80211_F_DESBSSID && vap->iv_opmode != IEEE80211_M_IBSS) bcopy(ni->ni_bssid, bssid, len); else bcopy(ieee80211broadcastaddr, bssid, len); DPRINTF(("Setting BSSID to %6D\n", (uint8_t *)&bssid, ":")); rval = ndis_set_info(sc, OID_802_11_BSSID, &bssid, &len); if (rval) device_printf(sc->ndis_dev, "setting BSSID failed: %d\n", rval); /* Set SSID -- always do this last. */ #ifdef NDIS_DEBUG if (ndis_debug > 0) { printf("Setting ESSID to "); ieee80211_print_essid(ni->ni_essid, ni->ni_esslen); printf("\n"); } #endif len = sizeof(ssid); bzero((char *)&ssid, len); ssid.ns_ssidlen = ni->ni_esslen; if (ssid.ns_ssidlen == 0) { ssid.ns_ssidlen = 1; } else bcopy(ni->ni_essid, ssid.ns_ssid, ssid.ns_ssidlen); rval = ndis_set_info(sc, OID_802_11_SSID, &ssid, &len); if (rval) device_printf (sc->ndis_dev, "set ssid failed: %d\n", rval); return; } static int ndis_get_bssid_list(sc, bl) struct ndis_softc *sc; ndis_80211_bssid_list_ex **bl; { int len, error; len = sizeof(uint32_t) + (sizeof(ndis_wlan_bssid_ex) * 16); *bl = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO); if (*bl == NULL) return (ENOMEM); error = ndis_get_info(sc, OID_802_11_BSSID_LIST, *bl, &len); if (error == ENOSPC) { free(*bl, M_DEVBUF); *bl = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO); if (*bl == NULL) return (ENOMEM); error = ndis_get_info(sc, OID_802_11_BSSID_LIST, *bl, &len); } if (error) { DPRINTF(("%s: failed to read\n", __func__)); free(*bl, M_DEVBUF); return (error); } return (0); } static int ndis_get_assoc(struct ndis_softc *sc, ndis_wlan_bssid_ex **assoc) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap; struct ieee80211_node *ni; ndis_80211_bssid_list_ex *bl; ndis_wlan_bssid_ex *bs; ndis_80211_macaddr bssid; int i, len, error; if (!sc->ndis_link) return (ENOENT); len = sizeof(bssid); error = ndis_get_info(sc, OID_802_11_BSSID, &bssid, &len); if (error) { device_printf(sc->ndis_dev, "failed to get bssid\n"); return (ENOENT); } vap = TAILQ_FIRST(&ic->ic_vaps); ni = vap->iv_bss; error = ndis_get_bssid_list(sc, &bl); if (error) return (error); bs = (ndis_wlan_bssid_ex *)&bl->nblx_bssid[0]; for (i = 0; i < bl->nblx_items; i++) { if (bcmp(bs->nwbx_macaddr, bssid, sizeof(bssid)) == 0) { *assoc = malloc(bs->nwbx_len, M_TEMP, M_NOWAIT); if (*assoc == NULL) { free(bl, M_TEMP); return (ENOMEM); } bcopy((char *)bs, (char *)*assoc, bs->nwbx_len); free(bl, M_TEMP); if (ic->ic_opmode == IEEE80211_M_STA) ni->ni_associd = 1 | 0xc000; /* fake associd */ return (0); } bs = (ndis_wlan_bssid_ex *)((char *)bs + bs->nwbx_len); } free(bl, M_TEMP); return (ENOENT); } static void ndis_getstate_80211(struct ndis_softc *sc) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); struct ieee80211_node *ni = vap->iv_bss; ndis_wlan_bssid_ex *bs; int rval, len, i = 0; int chanflag; uint32_t arg; if (!NDIS_INITIALIZED(sc)) return; if ((rval = ndis_get_assoc(sc, &bs)) != 0) return; /* We're associated, retrieve info on the current bssid. */ ic->ic_curmode = ndis_nettype_mode(bs->nwbx_nettype); chanflag = ndis_nettype_chan(bs->nwbx_nettype); IEEE80211_ADDR_COPY(ni->ni_bssid, bs->nwbx_macaddr); /* Get SSID from current association info. */ bcopy(bs->nwbx_ssid.ns_ssid, ni->ni_essid, bs->nwbx_ssid.ns_ssidlen); ni->ni_esslen = bs->nwbx_ssid.ns_ssidlen; if (ic->ic_caps & IEEE80211_C_PMGT) { len = sizeof(arg); rval = ndis_get_info(sc, OID_802_11_POWER_MODE, &arg, &len); if (rval) device_printf(sc->ndis_dev, "get power mode failed: %d\n", rval); if (arg == NDIS_80211_POWERMODE_CAM) vap->iv_flags &= ~IEEE80211_F_PMGTON; else vap->iv_flags |= IEEE80211_F_PMGTON; } /* Get TX power */ if (ic->ic_caps & IEEE80211_C_TXPMGT) { len = sizeof(arg); ndis_get_info(sc, OID_802_11_TX_POWER_LEVEL, &arg, &len); for (i = 0; i < nitems(dBm2mW); i++) if (dBm2mW[i] >= arg) break; ic->ic_txpowlimit = i; } /* * Use the current association information to reflect * what channel we're on. */ ic->ic_curchan = ieee80211_find_channel(ic, bs->nwbx_config.nc_dsconfig / 1000, chanflag); if (ic->ic_curchan == NULL) ic->ic_curchan = &ic->ic_channels[0]; ni->ni_chan = ic->ic_curchan; ic->ic_bsschan = ic->ic_curchan; free(bs, M_TEMP); /* * Determine current authentication mode. */ len = sizeof(arg); rval = ndis_get_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &len); if (rval) device_printf(sc->ndis_dev, "get authmode status failed: %d\n", rval); else { vap->iv_flags &= ~IEEE80211_F_WPA; switch (arg) { case NDIS_80211_AUTHMODE_OPEN: ni->ni_authmode = IEEE80211_AUTH_OPEN; break; case NDIS_80211_AUTHMODE_SHARED: ni->ni_authmode = IEEE80211_AUTH_SHARED; break; case NDIS_80211_AUTHMODE_AUTO: ni->ni_authmode = IEEE80211_AUTH_AUTO; break; case NDIS_80211_AUTHMODE_WPA: case NDIS_80211_AUTHMODE_WPAPSK: case NDIS_80211_AUTHMODE_WPANONE: ni->ni_authmode = IEEE80211_AUTH_WPA; vap->iv_flags |= IEEE80211_F_WPA1; break; case NDIS_80211_AUTHMODE_WPA2: case NDIS_80211_AUTHMODE_WPA2PSK: ni->ni_authmode = IEEE80211_AUTH_WPA; vap->iv_flags |= IEEE80211_F_WPA2; break; default: ni->ni_authmode = IEEE80211_AUTH_NONE; break; } } len = sizeof(arg); rval = ndis_get_info(sc, OID_802_11_WEP_STATUS, &arg, &len); if (rval) device_printf(sc->ndis_dev, "get wep status failed: %d\n", rval); if (arg == NDIS_80211_WEPSTAT_ENABLED) vap->iv_flags |= IEEE80211_F_PRIVACY|IEEE80211_F_DROPUNENC; else vap->iv_flags &= ~(IEEE80211_F_PRIVACY|IEEE80211_F_DROPUNENC); } static int ndis_ifioctl(ifp, command, data) struct ifnet *ifp; u_long command; caddr_t data; { struct ndis_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; int i, error = 0; /*NDIS_LOCK(sc);*/ switch (command) { case SIOCSIFFLAGS: if (ifp->if_flags & IFF_UP) { if (sc->ndis_running && ifp->if_flags & IFF_PROMISC && !(sc->ndis_if_flags & IFF_PROMISC)) { sc->ndis_filter |= NDIS_PACKET_TYPE_PROMISCUOUS; i = sizeof(sc->ndis_filter); error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER, &sc->ndis_filter, &i); } else if (sc->ndis_running && !(ifp->if_flags & IFF_PROMISC) && sc->ndis_if_flags & IFF_PROMISC) { sc->ndis_filter &= ~NDIS_PACKET_TYPE_PROMISCUOUS; i = sizeof(sc->ndis_filter); error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER, &sc->ndis_filter, &i); } else ndis_init(sc); } else { if (sc->ndis_running) ndis_stop(sc); } sc->ndis_if_flags = ifp->if_flags; error = 0; break; case SIOCADDMULTI: case SIOCDELMULTI: ndis_setmulti(sc); error = 0; break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->ifmedia, command); break; case SIOCSIFCAP: ifp->if_capenable = ifr->ifr_reqcap; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist = sc->ndis_hwassist; else ifp->if_hwassist = 0; ndis_set_offload(sc); break; default: error = ether_ioctl(ifp, command, data); break; } /*NDIS_UNLOCK(sc);*/ return(error); } static int ndis_80211ioctl(struct ieee80211com *ic, u_long cmd, void *data) { struct ndis_softc *sc = ic->ic_softc; struct ifreq *ifr = data; struct ndis_oid_data oid; struct ndis_evt evt; void *oidbuf = NULL; int error = 0; if ((error = priv_check(curthread, PRIV_DRIVER)) != 0) return (error); switch (cmd) { case SIOCGDRVSPEC: case SIOCSDRVSPEC: - error = copyin(ifr->ifr_data, &oid, sizeof(oid)); + error = copyin(ifr_data_get_ptr(ifr), &oid, sizeof(oid)); if (error) break; oidbuf = malloc(oid.len, M_TEMP, M_WAITOK | M_ZERO); - error = copyin(ifr->ifr_data + sizeof(oid), oidbuf, oid.len); + error = copyin((caddr_t)ifr_data_get_ptr(ifr) + sizeof(oid), + oidbuf, oid.len); } if (error) { free(oidbuf, M_TEMP); return (error); } switch (cmd) { case SIOCGDRVSPEC: error = ndis_get_info(sc, oid.oid, oidbuf, &oid.len); break; case SIOCSDRVSPEC: error = ndis_set_info(sc, oid.oid, oidbuf, &oid.len); break; case SIOCGPRIVATE_0: NDIS_LOCK(sc); if (sc->ndis_evt[sc->ndis_evtcidx].ne_sts == 0) { error = ENOENT; NDIS_UNLOCK(sc); break; } - error = copyin(ifr->ifr_data, &evt, sizeof(evt)); + error = copyin(ifr_data_get_ptr(ifr), &evt, sizeof(evt)); if (error) { NDIS_UNLOCK(sc); break; } if (evt.ne_len < sc->ndis_evt[sc->ndis_evtcidx].ne_len) { error = ENOSPC; NDIS_UNLOCK(sc); break; } error = copyout(&sc->ndis_evt[sc->ndis_evtcidx], - ifr->ifr_data, sizeof(uint32_t) * 2); + ifr_data_get_ptr(ifr), sizeof(uint32_t) * 2); if (error) { NDIS_UNLOCK(sc); break; } if (sc->ndis_evt[sc->ndis_evtcidx].ne_len) { error = copyout(sc->ndis_evt[sc->ndis_evtcidx].ne_buf, - ifr->ifr_data + (sizeof(uint32_t) * 2), + (caddr_t)ifr_data_get_ptr(ifr) + + (sizeof(uint32_t) * 2), sc->ndis_evt[sc->ndis_evtcidx].ne_len); if (error) { NDIS_UNLOCK(sc); break; } free(sc->ndis_evt[sc->ndis_evtcidx].ne_buf, M_TEMP); sc->ndis_evt[sc->ndis_evtcidx].ne_buf = NULL; } sc->ndis_evt[sc->ndis_evtcidx].ne_len = 0; sc->ndis_evt[sc->ndis_evtcidx].ne_sts = 0; NDIS_EVTINC(sc->ndis_evtcidx); NDIS_UNLOCK(sc); break; default: error = ENOTTY; break; } switch (cmd) { case SIOCGDRVSPEC: case SIOCSDRVSPEC: - error = copyout(&oid, ifr->ifr_data, sizeof(oid)); + error = copyout(&oid, ifr_data_get_ptr(ifr), sizeof(oid)); if (error) break; - error = copyout(oidbuf, ifr->ifr_data + sizeof(oid), oid.len); + error = copyout(oidbuf, + (caddr_t)ifr_data_get_ptr(ifr) + sizeof(oid), oid.len); } free(oidbuf, M_TEMP); return (error); } int ndis_del_key(struct ieee80211vap *vap, const struct ieee80211_key *key) { struct ndis_softc *sc = vap->iv_ic->ic_softc; ndis_80211_key rkey; int len, error = 0; bzero((char *)&rkey, sizeof(rkey)); len = sizeof(rkey); rkey.nk_len = len; rkey.nk_keyidx = key->wk_keyix; bcopy(vap->iv_ifp->if_broadcastaddr, rkey.nk_bssid, IEEE80211_ADDR_LEN); error = ndis_set_info(sc, OID_802_11_REMOVE_KEY, &rkey, &len); if (error) return (0); return (1); } /* * In theory this could be called for any key, but we'll * only use it for WPA TKIP or AES keys. These need to be * set after initial authentication with the AP. */ static int ndis_add_key(struct ieee80211vap *vap, const struct ieee80211_key *key) { struct ndis_softc *sc = vap->iv_ic->ic_softc; ndis_80211_key rkey; int len, error = 0; switch (key->wk_cipher->ic_cipher) { case IEEE80211_CIPHER_TKIP: len = sizeof(ndis_80211_key); bzero((char *)&rkey, sizeof(rkey)); rkey.nk_len = len; rkey.nk_keylen = key->wk_keylen; if (key->wk_flags & IEEE80211_KEY_SWMIC) rkey.nk_keylen += 16; /* key index - gets weird in NDIS */ if (key->wk_keyix != IEEE80211_KEYIX_NONE) rkey.nk_keyidx = key->wk_keyix; else rkey.nk_keyidx = 0; if (key->wk_flags & IEEE80211_KEY_XMIT) rkey.nk_keyidx |= 1 << 31; if (key->wk_flags & IEEE80211_KEY_GROUP) { bcopy(ieee80211broadcastaddr, rkey.nk_bssid, IEEE80211_ADDR_LEN); } else { bcopy(vap->iv_bss->ni_bssid, rkey.nk_bssid, IEEE80211_ADDR_LEN); /* pairwise key */ rkey.nk_keyidx |= 1 << 30; } /* need to set bit 29 based on keyrsc */ rkey.nk_keyrsc = key->wk_keyrsc[0]; /* XXX need tid */ if (rkey.nk_keyrsc) rkey.nk_keyidx |= 1 << 29; if (key->wk_flags & IEEE80211_KEY_SWMIC) { bcopy(key->wk_key, rkey.nk_keydata, 16); bcopy(key->wk_key + 24, rkey.nk_keydata + 16, 8); bcopy(key->wk_key + 16, rkey.nk_keydata + 24, 8); } else bcopy(key->wk_key, rkey.nk_keydata, key->wk_keylen); error = ndis_set_info(sc, OID_802_11_ADD_KEY, &rkey, &len); break; case IEEE80211_CIPHER_WEP: error = 0; break; /* * I don't know how to set up keys for the AES * cipher yet. Is it the same as TKIP? */ case IEEE80211_CIPHER_AES_CCM: default: error = ENOTTY; break; } /* We need to return 1 for success, 0 for failure. */ if (error) return (0); return (1); } static void ndis_resettask(d, arg) device_object *d; void *arg; { struct ndis_softc *sc; sc = arg; ndis_reset_nic(sc); } /* * Stop the adapter and free any mbufs allocated to the * RX and TX lists. */ static void ndis_stop(struct ndis_softc *sc) { int i; callout_drain(&sc->ndis_stat_callout); NDIS_LOCK(sc); sc->ndis_tx_timer = 0; sc->ndis_link = 0; if (!sc->ndis_80211) sc->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); sc->ndis_running = 0; NDIS_UNLOCK(sc); if (sc->ndis_iftype != PNPBus || (sc->ndis_iftype == PNPBus && !(sc->ndisusb_status & NDISUSB_STATUS_DETACH) && ndisusb_halt != 0)) ndis_halt_nic(sc); NDIS_LOCK(sc); for (i = 0; i < NDIS_EVENTS; i++) { if (sc->ndis_evt[i].ne_sts && sc->ndis_evt[i].ne_buf != NULL) { free(sc->ndis_evt[i].ne_buf, M_TEMP); sc->ndis_evt[i].ne_buf = NULL; } sc->ndis_evt[i].ne_sts = 0; sc->ndis_evt[i].ne_len = 0; } sc->ndis_evtcidx = 0; sc->ndis_evtpidx = 0; NDIS_UNLOCK(sc); } /* * Stop all chip I/O so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ void ndis_shutdown(dev) device_t dev; { struct ndis_softc *sc; sc = device_get_softc(dev); ndis_stop(sc); } static int ndis_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct ndis_vap *nvp = NDIS_VAP(vap); struct ieee80211com *ic = vap->iv_ic; struct ndis_softc *sc = ic->ic_softc; enum ieee80211_state ostate; DPRINTF(("%s: %s -> %s\n", __func__, ieee80211_state_name[vap->iv_state], ieee80211_state_name[nstate])); ostate = vap->iv_state; vap->iv_state = nstate; switch (nstate) { /* pass on to net80211 */ case IEEE80211_S_INIT: case IEEE80211_S_SCAN: return nvp->newstate(vap, nstate, arg); case IEEE80211_S_ASSOC: if (ostate != IEEE80211_S_AUTH) { IEEE80211_UNLOCK(ic); ndis_auth_and_assoc(sc, vap); IEEE80211_LOCK(ic); } break; case IEEE80211_S_AUTH: IEEE80211_UNLOCK(ic); ndis_auth_and_assoc(sc, vap); if (vap->iv_state == IEEE80211_S_AUTH) /* XXX */ ieee80211_new_state(vap, IEEE80211_S_ASSOC, 0); IEEE80211_LOCK(ic); break; default: break; } return (0); } static void ndis_scan(void *arg) { struct ieee80211vap *vap = arg; ieee80211_scan_done(vap); } static void ndis_scan_results(struct ndis_softc *sc) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); ndis_80211_bssid_list_ex *bl; ndis_wlan_bssid_ex *wb; struct ieee80211_scanparams sp; struct ieee80211_frame wh; struct ieee80211_channel *saved_chan; int i, j; int rssi, noise, freq, chanflag; uint8_t ssid[2+IEEE80211_NWID_LEN]; uint8_t rates[2+IEEE80211_RATE_MAXSIZE]; uint8_t *frm, *efrm; saved_chan = ic->ic_curchan; noise = -96; if (ndis_get_bssid_list(sc, &bl)) return; DPRINTF(("%s: %d results\n", __func__, bl->nblx_items)); wb = &bl->nblx_bssid[0]; for (i = 0; i < bl->nblx_items; i++) { memset(&sp, 0, sizeof(sp)); memcpy(wh.i_addr2, wb->nwbx_macaddr, sizeof(wh.i_addr2)); memcpy(wh.i_addr3, wb->nwbx_macaddr, sizeof(wh.i_addr3)); rssi = 100 * (wb->nwbx_rssi - noise) / (-32 - noise); rssi = max(0, min(rssi, 100)); /* limit 0 <= rssi <= 100 */ if (wb->nwbx_privacy) sp.capinfo |= IEEE80211_CAPINFO_PRIVACY; sp.bintval = wb->nwbx_config.nc_beaconperiod; switch (wb->nwbx_netinfra) { case NDIS_80211_NET_INFRA_IBSS: sp.capinfo |= IEEE80211_CAPINFO_IBSS; break; case NDIS_80211_NET_INFRA_BSS: sp.capinfo |= IEEE80211_CAPINFO_ESS; break; } sp.rates = &rates[0]; for (j = 0; j < IEEE80211_RATE_MAXSIZE; j++) { /* XXX - check units */ if (wb->nwbx_supportedrates[j] == 0) break; rates[2 + j] = wb->nwbx_supportedrates[j] & 0x7f; } rates[1] = j; sp.ssid = (uint8_t *)&ssid[0]; memcpy(sp.ssid + 2, &wb->nwbx_ssid.ns_ssid, wb->nwbx_ssid.ns_ssidlen); sp.ssid[1] = wb->nwbx_ssid.ns_ssidlen; chanflag = ndis_nettype_chan(wb->nwbx_nettype); freq = wb->nwbx_config.nc_dsconfig / 1000; sp.chan = sp.bchan = ieee80211_mhz2ieee(freq, chanflag); /* Hack ic->ic_curchan to be in sync with the scan result */ ic->ic_curchan = ieee80211_find_channel(ic, freq, chanflag); if (ic->ic_curchan == NULL) ic->ic_curchan = &ic->ic_channels[0]; /* Process extended info from AP */ if (wb->nwbx_len > sizeof(ndis_wlan_bssid)) { frm = (uint8_t *)&wb->nwbx_ies; efrm = frm + wb->nwbx_ielen; if (efrm - frm < 12) goto done; sp.tstamp = frm; frm += 8; sp.bintval = le16toh(*(uint16_t *)frm); frm += 2; sp.capinfo = le16toh(*(uint16_t *)frm); frm += 2; sp.ies = frm; sp.ies_len = efrm - frm; } done: DPRINTF(("scan: bssid %s chan %dMHz (%d/%d) rssi %d\n", ether_sprintf(wb->nwbx_macaddr), freq, sp.bchan, chanflag, rssi)); ieee80211_add_scan(vap, ic->ic_curchan, &sp, &wh, 0, rssi, noise); wb = (ndis_wlan_bssid_ex *)((char *)wb + wb->nwbx_len); } free(bl, M_DEVBUF); /* Restore the channel after messing with it */ ic->ic_curchan = saved_chan; } static void ndis_scan_start(struct ieee80211com *ic) { struct ndis_softc *sc = ic->ic_softc; struct ieee80211vap *vap; struct ieee80211_scan_state *ss; ndis_80211_ssid ssid; int error, len; ss = ic->ic_scan; vap = TAILQ_FIRST(&ic->ic_vaps); if (!NDIS_INITIALIZED(sc)) { DPRINTF(("%s: scan aborted\n", __func__)); ieee80211_cancel_scan(vap); return; } len = sizeof(ssid); bzero((char *)&ssid, len); if (ss->ss_nssid == 0) ssid.ns_ssidlen = 1; else { /* Perform a directed scan */ ssid.ns_ssidlen = ss->ss_ssid[0].len; bcopy(ss->ss_ssid[0].ssid, ssid.ns_ssid, ssid.ns_ssidlen); } error = ndis_set_info(sc, OID_802_11_SSID, &ssid, &len); if (error) DPRINTF(("%s: set ESSID failed\n", __func__)); len = 0; error = ndis_set_info(sc, OID_802_11_BSSID_LIST_SCAN, NULL, &len); if (error) { DPRINTF(("%s: scan command failed\n", __func__)); ieee80211_cancel_scan(vap); return; } /* Set a timer to collect the results */ callout_reset(&sc->ndis_scan_callout, hz * 3, ndis_scan, vap); } static void ndis_set_channel(struct ieee80211com *ic) { /* ignore */ } static void ndis_scan_curchan(struct ieee80211_scan_state *ss, unsigned long maxdwell) { /* ignore */ } static void ndis_scan_mindwell(struct ieee80211_scan_state *ss) { /* NB: don't try to abort scan; wait for firmware to finish */ } static void ndis_scan_end(struct ieee80211com *ic) { struct ndis_softc *sc = ic->ic_softc; ndis_scan_results(sc); } Index: head/sys/dev/iwi/if_iwi.c =================================================================== --- head/sys/dev/iwi/if_iwi.c (revision 331796) +++ head/sys/dev/iwi/if_iwi.c (revision 331797) @@ -1,3620 +1,3620 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2004, 2005 * Damien Bergamini . All rights reserved. * Copyright (c) 2005-2006 Sam Leffler, Errno Consulting * Copyright (c) 2007 Andrew Thompson * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /*- * Intel(R) PRO/Wireless 2200BG/2225BG/2915ABG driver * http://www.intel.com/network/connectivity/products/wireless/prowireless_mobile.htm */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define IWI_DEBUG #ifdef IWI_DEBUG #define DPRINTF(x) do { if (iwi_debug > 0) printf x; } while (0) #define DPRINTFN(n, x) do { if (iwi_debug >= (n)) printf x; } while (0) int iwi_debug = 0; SYSCTL_INT(_debug, OID_AUTO, iwi, CTLFLAG_RW, &iwi_debug, 0, "iwi debug level"); static const char *iwi_fw_states[] = { "IDLE", /* IWI_FW_IDLE */ "LOADING", /* IWI_FW_LOADING */ "ASSOCIATING", /* IWI_FW_ASSOCIATING */ "DISASSOCIATING", /* IWI_FW_DISASSOCIATING */ "SCANNING", /* IWI_FW_SCANNING */ }; #else #define DPRINTF(x) #define DPRINTFN(n, x) #endif MODULE_DEPEND(iwi, pci, 1, 1, 1); MODULE_DEPEND(iwi, wlan, 1, 1, 1); MODULE_DEPEND(iwi, firmware, 1, 1, 1); enum { IWI_LED_TX, IWI_LED_RX, IWI_LED_POLL, }; struct iwi_ident { uint16_t vendor; uint16_t device; const char *name; }; static const struct iwi_ident iwi_ident_table[] = { { 0x8086, 0x4220, "Intel(R) PRO/Wireless 2200BG" }, { 0x8086, 0x4221, "Intel(R) PRO/Wireless 2225BG" }, { 0x8086, 0x4223, "Intel(R) PRO/Wireless 2915ABG" }, { 0x8086, 0x4224, "Intel(R) PRO/Wireless 2915ABG" }, { 0, 0, NULL } }; static const uint8_t def_chan_2ghz[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 }; static const uint8_t def_chan_5ghz_band1[] = { 36, 40, 44, 48, 52, 56, 60, 64 }; static const uint8_t def_chan_5ghz_band2[] = { 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140 }; static const uint8_t def_chan_5ghz_band3[] = { 149, 153, 157, 161, 165 }; static struct ieee80211vap *iwi_vap_create(struct ieee80211com *, const char [IFNAMSIZ], int, enum ieee80211_opmode, int, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN]); static void iwi_vap_delete(struct ieee80211vap *); static void iwi_dma_map_addr(void *, bus_dma_segment_t *, int, int); static int iwi_alloc_cmd_ring(struct iwi_softc *, struct iwi_cmd_ring *, int); static void iwi_reset_cmd_ring(struct iwi_softc *, struct iwi_cmd_ring *); static void iwi_free_cmd_ring(struct iwi_softc *, struct iwi_cmd_ring *); static int iwi_alloc_tx_ring(struct iwi_softc *, struct iwi_tx_ring *, int, bus_addr_t, bus_addr_t); static void iwi_reset_tx_ring(struct iwi_softc *, struct iwi_tx_ring *); static void iwi_free_tx_ring(struct iwi_softc *, struct iwi_tx_ring *); static int iwi_alloc_rx_ring(struct iwi_softc *, struct iwi_rx_ring *, int); static void iwi_reset_rx_ring(struct iwi_softc *, struct iwi_rx_ring *); static void iwi_free_rx_ring(struct iwi_softc *, struct iwi_rx_ring *); static struct ieee80211_node *iwi_node_alloc(struct ieee80211vap *, const uint8_t [IEEE80211_ADDR_LEN]); static void iwi_node_free(struct ieee80211_node *); static void iwi_media_status(struct ifnet *, struct ifmediareq *); static int iwi_newstate(struct ieee80211vap *, enum ieee80211_state, int); static void iwi_wme_init(struct iwi_softc *); static int iwi_wme_setparams(struct iwi_softc *); static int iwi_wme_update(struct ieee80211com *); static uint16_t iwi_read_prom_word(struct iwi_softc *, uint8_t); static void iwi_frame_intr(struct iwi_softc *, struct iwi_rx_data *, int, struct iwi_frame *); static void iwi_notification_intr(struct iwi_softc *, struct iwi_notif *); static void iwi_rx_intr(struct iwi_softc *); static void iwi_tx_intr(struct iwi_softc *, struct iwi_tx_ring *); static void iwi_intr(void *); static int iwi_cmd(struct iwi_softc *, uint8_t, void *, uint8_t); static void iwi_write_ibssnode(struct iwi_softc *, const u_int8_t [], int); static int iwi_tx_start(struct iwi_softc *, struct mbuf *, struct ieee80211_node *, int); static int iwi_raw_xmit(struct ieee80211_node *, struct mbuf *, const struct ieee80211_bpf_params *); static void iwi_start(struct iwi_softc *); static int iwi_transmit(struct ieee80211com *, struct mbuf *); static void iwi_watchdog(void *); static int iwi_ioctl(struct ieee80211com *, u_long, void *); static void iwi_parent(struct ieee80211com *); static void iwi_stop_master(struct iwi_softc *); static int iwi_reset(struct iwi_softc *); static int iwi_load_ucode(struct iwi_softc *, const struct iwi_fw *); static int iwi_load_firmware(struct iwi_softc *, const struct iwi_fw *); static void iwi_release_fw_dma(struct iwi_softc *sc); static int iwi_config(struct iwi_softc *); static int iwi_get_firmware(struct iwi_softc *, enum ieee80211_opmode); static void iwi_put_firmware(struct iwi_softc *); static void iwi_monitor_scan(void *, int); static int iwi_scanchan(struct iwi_softc *, unsigned long, int); static void iwi_scan_start(struct ieee80211com *); static void iwi_scan_end(struct ieee80211com *); static void iwi_set_channel(struct ieee80211com *); static void iwi_scan_curchan(struct ieee80211_scan_state *, unsigned long maxdwell); static void iwi_scan_mindwell(struct ieee80211_scan_state *); static int iwi_auth_and_assoc(struct iwi_softc *, struct ieee80211vap *); static void iwi_disassoc(void *, int); static int iwi_disassociate(struct iwi_softc *, int quiet); static void iwi_init_locked(struct iwi_softc *); static void iwi_init(void *); static int iwi_init_fw_dma(struct iwi_softc *, int); static void iwi_stop_locked(void *); static void iwi_stop(struct iwi_softc *); static void iwi_restart(void *, int); static int iwi_getrfkill(struct iwi_softc *); static void iwi_radio_on(void *, int); static void iwi_radio_off(void *, int); static void iwi_sysctlattach(struct iwi_softc *); static void iwi_led_event(struct iwi_softc *, int); static void iwi_ledattach(struct iwi_softc *); static void iwi_collect_bands(struct ieee80211com *, uint8_t [], size_t); static void iwi_getradiocaps(struct ieee80211com *, int, int *, struct ieee80211_channel []); static int iwi_probe(device_t); static int iwi_attach(device_t); static int iwi_detach(device_t); static int iwi_shutdown(device_t); static int iwi_suspend(device_t); static int iwi_resume(device_t); static device_method_t iwi_methods[] = { /* Device interface */ DEVMETHOD(device_probe, iwi_probe), DEVMETHOD(device_attach, iwi_attach), DEVMETHOD(device_detach, iwi_detach), DEVMETHOD(device_shutdown, iwi_shutdown), DEVMETHOD(device_suspend, iwi_suspend), DEVMETHOD(device_resume, iwi_resume), DEVMETHOD_END }; static driver_t iwi_driver = { "iwi", iwi_methods, sizeof (struct iwi_softc) }; static devclass_t iwi_devclass; DRIVER_MODULE(iwi, pci, iwi_driver, iwi_devclass, NULL, NULL); MODULE_VERSION(iwi, 1); static __inline uint8_t MEM_READ_1(struct iwi_softc *sc, uint32_t addr) { CSR_WRITE_4(sc, IWI_CSR_INDIRECT_ADDR, addr); return CSR_READ_1(sc, IWI_CSR_INDIRECT_DATA); } static __inline uint32_t MEM_READ_4(struct iwi_softc *sc, uint32_t addr) { CSR_WRITE_4(sc, IWI_CSR_INDIRECT_ADDR, addr); return CSR_READ_4(sc, IWI_CSR_INDIRECT_DATA); } static int iwi_probe(device_t dev) { const struct iwi_ident *ident; for (ident = iwi_ident_table; ident->name != NULL; ident++) { if (pci_get_vendor(dev) == ident->vendor && pci_get_device(dev) == ident->device) { device_set_desc(dev, ident->name); return (BUS_PROBE_DEFAULT); } } return ENXIO; } static int iwi_attach(device_t dev) { struct iwi_softc *sc = device_get_softc(dev); struct ieee80211com *ic = &sc->sc_ic; uint16_t val; int i, error; sc->sc_dev = dev; sc->sc_ledevent = ticks; IWI_LOCK_INIT(sc); mbufq_init(&sc->sc_snd, ifqmaxlen); sc->sc_unr = new_unrhdr(1, IWI_MAX_IBSSNODE-1, &sc->sc_mtx); TASK_INIT(&sc->sc_radiontask, 0, iwi_radio_on, sc); TASK_INIT(&sc->sc_radiofftask, 0, iwi_radio_off, sc); TASK_INIT(&sc->sc_restarttask, 0, iwi_restart, sc); TASK_INIT(&sc->sc_disassoctask, 0, iwi_disassoc, sc); TASK_INIT(&sc->sc_monitortask, 0, iwi_monitor_scan, sc); callout_init_mtx(&sc->sc_wdtimer, &sc->sc_mtx, 0); callout_init_mtx(&sc->sc_rftimer, &sc->sc_mtx, 0); pci_write_config(dev, 0x41, 0, 1); /* enable bus-mastering */ pci_enable_busmaster(dev); i = PCIR_BAR(0); sc->mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &i, RF_ACTIVE); if (sc->mem == NULL) { device_printf(dev, "could not allocate memory resource\n"); goto fail; } sc->sc_st = rman_get_bustag(sc->mem); sc->sc_sh = rman_get_bushandle(sc->mem); i = 0; sc->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &i, RF_ACTIVE | RF_SHAREABLE); if (sc->irq == NULL) { device_printf(dev, "could not allocate interrupt resource\n"); goto fail; } if (iwi_reset(sc) != 0) { device_printf(dev, "could not reset adapter\n"); goto fail; } /* * Allocate rings. */ if (iwi_alloc_cmd_ring(sc, &sc->cmdq, IWI_CMD_RING_COUNT) != 0) { device_printf(dev, "could not allocate Cmd ring\n"); goto fail; } for (i = 0; i < 4; i++) { error = iwi_alloc_tx_ring(sc, &sc->txq[i], IWI_TX_RING_COUNT, IWI_CSR_TX1_RIDX + i * 4, IWI_CSR_TX1_WIDX + i * 4); if (error != 0) { device_printf(dev, "could not allocate Tx ring %d\n", i+i); goto fail; } } if (iwi_alloc_rx_ring(sc, &sc->rxq, IWI_RX_RING_COUNT) != 0) { device_printf(dev, "could not allocate Rx ring\n"); goto fail; } iwi_wme_init(sc); ic->ic_softc = sc; ic->ic_name = device_get_nameunit(dev); ic->ic_opmode = IEEE80211_M_STA; ic->ic_phytype = IEEE80211_T_OFDM; /* not only, but not used */ /* set device capabilities */ ic->ic_caps = IEEE80211_C_STA /* station mode supported */ | IEEE80211_C_IBSS /* IBSS mode supported */ | IEEE80211_C_MONITOR /* monitor mode supported */ | IEEE80211_C_PMGT /* power save supported */ | IEEE80211_C_SHPREAMBLE /* short preamble supported */ | IEEE80211_C_WPA /* 802.11i */ | IEEE80211_C_WME /* 802.11e */ #if 0 | IEEE80211_C_BGSCAN /* capable of bg scanning */ #endif ; /* read MAC address from EEPROM */ val = iwi_read_prom_word(sc, IWI_EEPROM_MAC + 0); ic->ic_macaddr[0] = val & 0xff; ic->ic_macaddr[1] = val >> 8; val = iwi_read_prom_word(sc, IWI_EEPROM_MAC + 1); ic->ic_macaddr[2] = val & 0xff; ic->ic_macaddr[3] = val >> 8; val = iwi_read_prom_word(sc, IWI_EEPROM_MAC + 2); ic->ic_macaddr[4] = val & 0xff; ic->ic_macaddr[5] = val >> 8; iwi_getradiocaps(ic, IEEE80211_CHAN_MAX, &ic->ic_nchans, ic->ic_channels); ieee80211_ifattach(ic); /* override default methods */ ic->ic_node_alloc = iwi_node_alloc; sc->sc_node_free = ic->ic_node_free; ic->ic_node_free = iwi_node_free; ic->ic_raw_xmit = iwi_raw_xmit; ic->ic_scan_start = iwi_scan_start; ic->ic_scan_end = iwi_scan_end; ic->ic_set_channel = iwi_set_channel; ic->ic_scan_curchan = iwi_scan_curchan; ic->ic_scan_mindwell = iwi_scan_mindwell; ic->ic_wme.wme_update = iwi_wme_update; ic->ic_vap_create = iwi_vap_create; ic->ic_vap_delete = iwi_vap_delete; ic->ic_ioctl = iwi_ioctl; ic->ic_transmit = iwi_transmit; ic->ic_parent = iwi_parent; ic->ic_getradiocaps = iwi_getradiocaps; ieee80211_radiotap_attach(ic, &sc->sc_txtap.wt_ihdr, sizeof(sc->sc_txtap), IWI_TX_RADIOTAP_PRESENT, &sc->sc_rxtap.wr_ihdr, sizeof(sc->sc_rxtap), IWI_RX_RADIOTAP_PRESENT); iwi_sysctlattach(sc); iwi_ledattach(sc); /* * Hook our interrupt after all initialization is complete. */ error = bus_setup_intr(dev, sc->irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, iwi_intr, sc, &sc->sc_ih); if (error != 0) { device_printf(dev, "could not set up interrupt\n"); goto fail; } if (bootverbose) ieee80211_announce(ic); return 0; fail: /* XXX fix */ iwi_detach(dev); return ENXIO; } static int iwi_detach(device_t dev) { struct iwi_softc *sc = device_get_softc(dev); struct ieee80211com *ic = &sc->sc_ic; bus_teardown_intr(dev, sc->irq, sc->sc_ih); /* NB: do early to drain any pending tasks */ ieee80211_draintask(ic, &sc->sc_radiontask); ieee80211_draintask(ic, &sc->sc_radiofftask); ieee80211_draintask(ic, &sc->sc_restarttask); ieee80211_draintask(ic, &sc->sc_disassoctask); ieee80211_draintask(ic, &sc->sc_monitortask); iwi_stop(sc); ieee80211_ifdetach(ic); iwi_put_firmware(sc); iwi_release_fw_dma(sc); iwi_free_cmd_ring(sc, &sc->cmdq); iwi_free_tx_ring(sc, &sc->txq[0]); iwi_free_tx_ring(sc, &sc->txq[1]); iwi_free_tx_ring(sc, &sc->txq[2]); iwi_free_tx_ring(sc, &sc->txq[3]); iwi_free_rx_ring(sc, &sc->rxq); bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->irq), sc->irq); bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->mem), sc->mem); delete_unrhdr(sc->sc_unr); mbufq_drain(&sc->sc_snd); IWI_LOCK_DESTROY(sc); return 0; } static struct ieee80211vap * iwi_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit, enum ieee80211_opmode opmode, int flags, const uint8_t bssid[IEEE80211_ADDR_LEN], const uint8_t mac[IEEE80211_ADDR_LEN]) { struct iwi_softc *sc = ic->ic_softc; struct iwi_vap *ivp; struct ieee80211vap *vap; int i; if (!TAILQ_EMPTY(&ic->ic_vaps)) /* only one at a time */ return NULL; /* * Get firmware image (and possibly dma memory) on mode change. */ if (iwi_get_firmware(sc, opmode)) return NULL; /* allocate DMA memory for mapping firmware image */ i = sc->fw_fw.size; if (sc->fw_boot.size > i) i = sc->fw_boot.size; /* XXX do we dma the ucode as well ? */ if (sc->fw_uc.size > i) i = sc->fw_uc.size; if (iwi_init_fw_dma(sc, i)) return NULL; ivp = malloc(sizeof(struct iwi_vap), M_80211_VAP, M_WAITOK | M_ZERO); vap = &ivp->iwi_vap; ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid); /* override the default, the setting comes from the linux driver */ vap->iv_bmissthreshold = 24; /* override with driver methods */ ivp->iwi_newstate = vap->iv_newstate; vap->iv_newstate = iwi_newstate; /* complete setup */ ieee80211_vap_attach(vap, ieee80211_media_change, iwi_media_status, mac); ic->ic_opmode = opmode; return vap; } static void iwi_vap_delete(struct ieee80211vap *vap) { struct iwi_vap *ivp = IWI_VAP(vap); ieee80211_vap_detach(vap); free(ivp, M_80211_VAP); } static void iwi_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { if (error != 0) return; KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg)); *(bus_addr_t *)arg = segs[0].ds_addr; } static int iwi_alloc_cmd_ring(struct iwi_softc *sc, struct iwi_cmd_ring *ring, int count) { int error; ring->count = count; ring->queued = 0; ring->cur = ring->next = 0; error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 4, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, count * IWI_CMD_DESC_SIZE, 1, count * IWI_CMD_DESC_SIZE, 0, NULL, NULL, &ring->desc_dmat); if (error != 0) { device_printf(sc->sc_dev, "could not create desc DMA tag\n"); goto fail; } error = bus_dmamem_alloc(ring->desc_dmat, (void **)&ring->desc, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &ring->desc_map); if (error != 0) { device_printf(sc->sc_dev, "could not allocate DMA memory\n"); goto fail; } error = bus_dmamap_load(ring->desc_dmat, ring->desc_map, ring->desc, count * IWI_CMD_DESC_SIZE, iwi_dma_map_addr, &ring->physaddr, 0); if (error != 0) { device_printf(sc->sc_dev, "could not load desc DMA map\n"); goto fail; } return 0; fail: iwi_free_cmd_ring(sc, ring); return error; } static void iwi_reset_cmd_ring(struct iwi_softc *sc, struct iwi_cmd_ring *ring) { ring->queued = 0; ring->cur = ring->next = 0; } static void iwi_free_cmd_ring(struct iwi_softc *sc, struct iwi_cmd_ring *ring) { if (ring->desc != NULL) { bus_dmamap_sync(ring->desc_dmat, ring->desc_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->desc_dmat, ring->desc_map); bus_dmamem_free(ring->desc_dmat, ring->desc, ring->desc_map); } if (ring->desc_dmat != NULL) bus_dma_tag_destroy(ring->desc_dmat); } static int iwi_alloc_tx_ring(struct iwi_softc *sc, struct iwi_tx_ring *ring, int count, bus_addr_t csr_ridx, bus_addr_t csr_widx) { int i, error; ring->count = count; ring->queued = 0; ring->cur = ring->next = 0; ring->csr_ridx = csr_ridx; ring->csr_widx = csr_widx; error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 4, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, count * IWI_TX_DESC_SIZE, 1, count * IWI_TX_DESC_SIZE, 0, NULL, NULL, &ring->desc_dmat); if (error != 0) { device_printf(sc->sc_dev, "could not create desc DMA tag\n"); goto fail; } error = bus_dmamem_alloc(ring->desc_dmat, (void **)&ring->desc, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &ring->desc_map); if (error != 0) { device_printf(sc->sc_dev, "could not allocate DMA memory\n"); goto fail; } error = bus_dmamap_load(ring->desc_dmat, ring->desc_map, ring->desc, count * IWI_TX_DESC_SIZE, iwi_dma_map_addr, &ring->physaddr, 0); if (error != 0) { device_printf(sc->sc_dev, "could not load desc DMA map\n"); goto fail; } ring->data = malloc(count * sizeof (struct iwi_tx_data), M_DEVBUF, M_NOWAIT | M_ZERO); if (ring->data == NULL) { device_printf(sc->sc_dev, "could not allocate soft data\n"); error = ENOMEM; goto fail; } error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, IWI_MAX_NSEG, MCLBYTES, 0, NULL, NULL, &ring->data_dmat); if (error != 0) { device_printf(sc->sc_dev, "could not create data DMA tag\n"); goto fail; } for (i = 0; i < count; i++) { error = bus_dmamap_create(ring->data_dmat, 0, &ring->data[i].map); if (error != 0) { device_printf(sc->sc_dev, "could not create DMA map\n"); goto fail; } } return 0; fail: iwi_free_tx_ring(sc, ring); return error; } static void iwi_reset_tx_ring(struct iwi_softc *sc, struct iwi_tx_ring *ring) { struct iwi_tx_data *data; int i; for (i = 0; i < ring->count; i++) { data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); data->m = NULL; } if (data->ni != NULL) { ieee80211_free_node(data->ni); data->ni = NULL; } } ring->queued = 0; ring->cur = ring->next = 0; } static void iwi_free_tx_ring(struct iwi_softc *sc, struct iwi_tx_ring *ring) { struct iwi_tx_data *data; int i; if (ring->desc != NULL) { bus_dmamap_sync(ring->desc_dmat, ring->desc_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->desc_dmat, ring->desc_map); bus_dmamem_free(ring->desc_dmat, ring->desc, ring->desc_map); } if (ring->desc_dmat != NULL) bus_dma_tag_destroy(ring->desc_dmat); if (ring->data != NULL) { for (i = 0; i < ring->count; i++) { data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); } if (data->ni != NULL) ieee80211_free_node(data->ni); if (data->map != NULL) bus_dmamap_destroy(ring->data_dmat, data->map); } free(ring->data, M_DEVBUF); } if (ring->data_dmat != NULL) bus_dma_tag_destroy(ring->data_dmat); } static int iwi_alloc_rx_ring(struct iwi_softc *sc, struct iwi_rx_ring *ring, int count) { struct iwi_rx_data *data; int i, error; ring->count = count; ring->cur = 0; ring->data = malloc(count * sizeof (struct iwi_rx_data), M_DEVBUF, M_NOWAIT | M_ZERO); if (ring->data == NULL) { device_printf(sc->sc_dev, "could not allocate soft data\n"); error = ENOMEM; goto fail; } error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, MCLBYTES, 0, NULL, NULL, &ring->data_dmat); if (error != 0) { device_printf(sc->sc_dev, "could not create data DMA tag\n"); goto fail; } for (i = 0; i < count; i++) { data = &ring->data[i]; error = bus_dmamap_create(ring->data_dmat, 0, &data->map); if (error != 0) { device_printf(sc->sc_dev, "could not create DMA map\n"); goto fail; } data->m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (data->m == NULL) { device_printf(sc->sc_dev, "could not allocate rx mbuf\n"); error = ENOMEM; goto fail; } error = bus_dmamap_load(ring->data_dmat, data->map, mtod(data->m, void *), MCLBYTES, iwi_dma_map_addr, &data->physaddr, 0); if (error != 0) { device_printf(sc->sc_dev, "could not load rx buf DMA map"); goto fail; } data->reg = IWI_CSR_RX_BASE + i * 4; } return 0; fail: iwi_free_rx_ring(sc, ring); return error; } static void iwi_reset_rx_ring(struct iwi_softc *sc, struct iwi_rx_ring *ring) { ring->cur = 0; } static void iwi_free_rx_ring(struct iwi_softc *sc, struct iwi_rx_ring *ring) { struct iwi_rx_data *data; int i; if (ring->data != NULL) { for (i = 0; i < ring->count; i++) { data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); } if (data->map != NULL) bus_dmamap_destroy(ring->data_dmat, data->map); } free(ring->data, M_DEVBUF); } if (ring->data_dmat != NULL) bus_dma_tag_destroy(ring->data_dmat); } static int iwi_shutdown(device_t dev) { struct iwi_softc *sc = device_get_softc(dev); iwi_stop(sc); iwi_put_firmware(sc); /* ??? XXX */ return 0; } static int iwi_suspend(device_t dev) { struct iwi_softc *sc = device_get_softc(dev); struct ieee80211com *ic = &sc->sc_ic; ieee80211_suspend_all(ic); return 0; } static int iwi_resume(device_t dev) { struct iwi_softc *sc = device_get_softc(dev); struct ieee80211com *ic = &sc->sc_ic; pci_write_config(dev, 0x41, 0, 1); ieee80211_resume_all(ic); return 0; } static struct ieee80211_node * iwi_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN]) { struct iwi_node *in; in = malloc(sizeof (struct iwi_node), M_80211_NODE, M_NOWAIT | M_ZERO); if (in == NULL) return NULL; /* XXX assign sta table entry for adhoc */ in->in_station = -1; return &in->in_node; } static void iwi_node_free(struct ieee80211_node *ni) { struct ieee80211com *ic = ni->ni_ic; struct iwi_softc *sc = ic->ic_softc; struct iwi_node *in = (struct iwi_node *)ni; if (in->in_station != -1) { DPRINTF(("%s mac %6D station %u\n", __func__, ni->ni_macaddr, ":", in->in_station)); free_unr(sc->sc_unr, in->in_station); } sc->sc_node_free(ni); } /* * Convert h/w rate code to IEEE rate code. */ static int iwi_cvtrate(int iwirate) { switch (iwirate) { case IWI_RATE_DS1: return 2; case IWI_RATE_DS2: return 4; case IWI_RATE_DS5: return 11; case IWI_RATE_DS11: return 22; case IWI_RATE_OFDM6: return 12; case IWI_RATE_OFDM9: return 18; case IWI_RATE_OFDM12: return 24; case IWI_RATE_OFDM18: return 36; case IWI_RATE_OFDM24: return 48; case IWI_RATE_OFDM36: return 72; case IWI_RATE_OFDM48: return 96; case IWI_RATE_OFDM54: return 108; } return 0; } /* * The firmware automatically adapts the transmit speed. We report its current * value here. */ static void iwi_media_status(struct ifnet *ifp, struct ifmediareq *imr) { struct ieee80211vap *vap = ifp->if_softc; struct ieee80211com *ic = vap->iv_ic; struct iwi_softc *sc = ic->ic_softc; struct ieee80211_node *ni; /* read current transmission rate from adapter */ ni = ieee80211_ref_node(vap->iv_bss); ni->ni_txrate = iwi_cvtrate(CSR_READ_4(sc, IWI_CSR_CURRENT_TX_RATE)); ieee80211_free_node(ni); ieee80211_media_status(ifp, imr); } static int iwi_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct iwi_vap *ivp = IWI_VAP(vap); struct ieee80211com *ic = vap->iv_ic; struct iwi_softc *sc = ic->ic_softc; IWI_LOCK_DECL; DPRINTF(("%s: %s -> %s flags 0x%x\n", __func__, ieee80211_state_name[vap->iv_state], ieee80211_state_name[nstate], sc->flags)); IEEE80211_UNLOCK(ic); IWI_LOCK(sc); switch (nstate) { case IEEE80211_S_INIT: /* * NB: don't try to do this if iwi_stop_master has * shutdown the firmware and disabled interrupts. */ if (vap->iv_state == IEEE80211_S_RUN && (sc->flags & IWI_FLAG_FW_INITED)) iwi_disassociate(sc, 0); break; case IEEE80211_S_AUTH: iwi_auth_and_assoc(sc, vap); break; case IEEE80211_S_RUN: if (vap->iv_opmode == IEEE80211_M_IBSS && vap->iv_state == IEEE80211_S_SCAN) { /* * XXX when joining an ibss network we are called * with a SCAN -> RUN transition on scan complete. * Use that to call iwi_auth_and_assoc. On completing * the join we are then called again with an * AUTH -> RUN transition and we want to do nothing. * This is all totally bogus and needs to be redone. */ iwi_auth_and_assoc(sc, vap); } else if (vap->iv_opmode == IEEE80211_M_MONITOR) ieee80211_runtask(ic, &sc->sc_monitortask); break; case IEEE80211_S_ASSOC: /* * If we are transitioning from AUTH then just wait * for the ASSOC status to come back from the firmware. * Otherwise we need to issue the association request. */ if (vap->iv_state == IEEE80211_S_AUTH) break; iwi_auth_and_assoc(sc, vap); break; default: break; } IWI_UNLOCK(sc); IEEE80211_LOCK(ic); return ivp->iwi_newstate(vap, nstate, arg); } /* * WME parameters coming from IEEE 802.11e specification. These values are * already declared in ieee80211_proto.c, but they are static so they can't * be reused here. */ static const struct wmeParams iwi_wme_cck_params[WME_NUM_AC] = { { 0, 3, 5, 7, 0 }, /* WME_AC_BE */ { 0, 3, 5, 10, 0 }, /* WME_AC_BK */ { 0, 2, 4, 5, 188 }, /* WME_AC_VI */ { 0, 2, 3, 4, 102 } /* WME_AC_VO */ }; static const struct wmeParams iwi_wme_ofdm_params[WME_NUM_AC] = { { 0, 3, 4, 6, 0 }, /* WME_AC_BE */ { 0, 3, 4, 10, 0 }, /* WME_AC_BK */ { 0, 2, 3, 4, 94 }, /* WME_AC_VI */ { 0, 2, 2, 3, 47 } /* WME_AC_VO */ }; #define IWI_EXP2(v) htole16((1 << (v)) - 1) #define IWI_USEC(v) htole16(IEEE80211_TXOP_TO_US(v)) static void iwi_wme_init(struct iwi_softc *sc) { const struct wmeParams *wmep; int ac; memset(sc->wme, 0, sizeof sc->wme); for (ac = 0; ac < WME_NUM_AC; ac++) { /* set WME values for CCK modulation */ wmep = &iwi_wme_cck_params[ac]; sc->wme[1].aifsn[ac] = wmep->wmep_aifsn; sc->wme[1].cwmin[ac] = IWI_EXP2(wmep->wmep_logcwmin); sc->wme[1].cwmax[ac] = IWI_EXP2(wmep->wmep_logcwmax); sc->wme[1].burst[ac] = IWI_USEC(wmep->wmep_txopLimit); sc->wme[1].acm[ac] = wmep->wmep_acm; /* set WME values for OFDM modulation */ wmep = &iwi_wme_ofdm_params[ac]; sc->wme[2].aifsn[ac] = wmep->wmep_aifsn; sc->wme[2].cwmin[ac] = IWI_EXP2(wmep->wmep_logcwmin); sc->wme[2].cwmax[ac] = IWI_EXP2(wmep->wmep_logcwmax); sc->wme[2].burst[ac] = IWI_USEC(wmep->wmep_txopLimit); sc->wme[2].acm[ac] = wmep->wmep_acm; } } static int iwi_wme_setparams(struct iwi_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct chanAccParams chp; const struct wmeParams *wmep; int ac; ieee80211_wme_ic_getparams(ic, &chp); for (ac = 0; ac < WME_NUM_AC; ac++) { /* set WME values for current operating mode */ wmep = &chp.cap_wmeParams[ac]; sc->wme[0].aifsn[ac] = wmep->wmep_aifsn; sc->wme[0].cwmin[ac] = IWI_EXP2(wmep->wmep_logcwmin); sc->wme[0].cwmax[ac] = IWI_EXP2(wmep->wmep_logcwmax); sc->wme[0].burst[ac] = IWI_USEC(wmep->wmep_txopLimit); sc->wme[0].acm[ac] = wmep->wmep_acm; } DPRINTF(("Setting WME parameters\n")); return iwi_cmd(sc, IWI_CMD_SET_WME_PARAMS, sc->wme, sizeof sc->wme); } #undef IWI_USEC #undef IWI_EXP2 static int iwi_wme_update(struct ieee80211com *ic) { struct iwi_softc *sc = ic->ic_softc; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); IWI_LOCK_DECL; /* * We may be called to update the WME parameters in * the adapter at various places. If we're already * associated then initiate the request immediately; * otherwise we assume the params will get sent down * to the adapter as part of the work iwi_auth_and_assoc * does. */ if (vap->iv_state == IEEE80211_S_RUN) { IWI_LOCK(sc); iwi_wme_setparams(sc); IWI_UNLOCK(sc); } return (0); } static int iwi_wme_setie(struct iwi_softc *sc) { struct ieee80211_wme_info wme; memset(&wme, 0, sizeof wme); wme.wme_id = IEEE80211_ELEMID_VENDOR; wme.wme_len = sizeof (struct ieee80211_wme_info) - 2; wme.wme_oui[0] = 0x00; wme.wme_oui[1] = 0x50; wme.wme_oui[2] = 0xf2; wme.wme_type = WME_OUI_TYPE; wme.wme_subtype = WME_INFO_OUI_SUBTYPE; wme.wme_version = WME_VERSION; wme.wme_info = 0; DPRINTF(("Setting WME IE (len=%u)\n", wme.wme_len)); return iwi_cmd(sc, IWI_CMD_SET_WMEIE, &wme, sizeof wme); } /* * Read 16 bits at address 'addr' from the serial EEPROM. */ static uint16_t iwi_read_prom_word(struct iwi_softc *sc, uint8_t addr) { uint32_t tmp; uint16_t val; int n; /* clock C once before the first command */ IWI_EEPROM_CTL(sc, 0); IWI_EEPROM_CTL(sc, IWI_EEPROM_S); IWI_EEPROM_CTL(sc, IWI_EEPROM_S | IWI_EEPROM_C); IWI_EEPROM_CTL(sc, IWI_EEPROM_S); /* write start bit (1) */ IWI_EEPROM_CTL(sc, IWI_EEPROM_S | IWI_EEPROM_D); IWI_EEPROM_CTL(sc, IWI_EEPROM_S | IWI_EEPROM_D | IWI_EEPROM_C); /* write READ opcode (10) */ IWI_EEPROM_CTL(sc, IWI_EEPROM_S | IWI_EEPROM_D); IWI_EEPROM_CTL(sc, IWI_EEPROM_S | IWI_EEPROM_D | IWI_EEPROM_C); IWI_EEPROM_CTL(sc, IWI_EEPROM_S); IWI_EEPROM_CTL(sc, IWI_EEPROM_S | IWI_EEPROM_C); /* write address A7-A0 */ for (n = 7; n >= 0; n--) { IWI_EEPROM_CTL(sc, IWI_EEPROM_S | (((addr >> n) & 1) << IWI_EEPROM_SHIFT_D)); IWI_EEPROM_CTL(sc, IWI_EEPROM_S | (((addr >> n) & 1) << IWI_EEPROM_SHIFT_D) | IWI_EEPROM_C); } IWI_EEPROM_CTL(sc, IWI_EEPROM_S); /* read data Q15-Q0 */ val = 0; for (n = 15; n >= 0; n--) { IWI_EEPROM_CTL(sc, IWI_EEPROM_S | IWI_EEPROM_C); IWI_EEPROM_CTL(sc, IWI_EEPROM_S); tmp = MEM_READ_4(sc, IWI_MEM_EEPROM_CTL); val |= ((tmp & IWI_EEPROM_Q) >> IWI_EEPROM_SHIFT_Q) << n; } IWI_EEPROM_CTL(sc, 0); /* clear Chip Select and clock C */ IWI_EEPROM_CTL(sc, IWI_EEPROM_S); IWI_EEPROM_CTL(sc, 0); IWI_EEPROM_CTL(sc, IWI_EEPROM_C); return val; } static void iwi_setcurchan(struct iwi_softc *sc, int chan) { struct ieee80211com *ic = &sc->sc_ic; sc->curchan = chan; ieee80211_radiotap_chan_change(ic); } static void iwi_frame_intr(struct iwi_softc *sc, struct iwi_rx_data *data, int i, struct iwi_frame *frame) { struct ieee80211com *ic = &sc->sc_ic; struct mbuf *mnew, *m; struct ieee80211_node *ni; int type, error, framelen; int8_t rssi, nf; IWI_LOCK_DECL; framelen = le16toh(frame->len); if (framelen < IEEE80211_MIN_LEN || framelen > MCLBYTES) { /* * XXX >MCLBYTES is bogus as it means the h/w dma'd * out of bounds; need to figure out how to limit * frame size in the firmware */ /* XXX stat */ DPRINTFN(1, ("drop rx frame len=%u chan=%u rssi=%u rssi_dbm=%u\n", le16toh(frame->len), frame->chan, frame->rssi, frame->rssi_dbm)); return; } DPRINTFN(5, ("received frame len=%u chan=%u rssi=%u rssi_dbm=%u\n", le16toh(frame->len), frame->chan, frame->rssi, frame->rssi_dbm)); if (frame->chan != sc->curchan) iwi_setcurchan(sc, frame->chan); /* * Try to allocate a new mbuf for this ring element and load it before * processing the current mbuf. If the ring element cannot be loaded, * drop the received packet and reuse the old mbuf. In the unlikely * case that the old mbuf can't be reloaded either, explicitly panic. */ mnew = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (mnew == NULL) { counter_u64_add(ic->ic_ierrors, 1); return; } bus_dmamap_unload(sc->rxq.data_dmat, data->map); error = bus_dmamap_load(sc->rxq.data_dmat, data->map, mtod(mnew, void *), MCLBYTES, iwi_dma_map_addr, &data->physaddr, 0); if (error != 0) { m_freem(mnew); /* try to reload the old mbuf */ error = bus_dmamap_load(sc->rxq.data_dmat, data->map, mtod(data->m, void *), MCLBYTES, iwi_dma_map_addr, &data->physaddr, 0); if (error != 0) { /* very unlikely that it will fail... */ panic("%s: could not load old rx mbuf", device_get_name(sc->sc_dev)); } counter_u64_add(ic->ic_ierrors, 1); return; } /* * New mbuf successfully loaded, update Rx ring and continue * processing. */ m = data->m; data->m = mnew; CSR_WRITE_4(sc, data->reg, data->physaddr); /* finalize mbuf */ m->m_pkthdr.len = m->m_len = sizeof (struct iwi_hdr) + sizeof (struct iwi_frame) + framelen; m_adj(m, sizeof (struct iwi_hdr) + sizeof (struct iwi_frame)); rssi = frame->rssi_dbm; nf = -95; if (ieee80211_radiotap_active(ic)) { struct iwi_rx_radiotap_header *tap = &sc->sc_rxtap; tap->wr_flags = 0; tap->wr_antsignal = rssi; tap->wr_antnoise = nf; tap->wr_rate = iwi_cvtrate(frame->rate); tap->wr_antenna = frame->antenna; } IWI_UNLOCK(sc); ni = ieee80211_find_rxnode(ic, mtod(m, struct ieee80211_frame_min *)); if (ni != NULL) { type = ieee80211_input(ni, m, rssi, nf); ieee80211_free_node(ni); } else type = ieee80211_input_all(ic, m, rssi, nf); IWI_LOCK(sc); if (sc->sc_softled) { /* * Blink for any data frame. Otherwise do a * heartbeat-style blink when idle. The latter * is mainly for station mode where we depend on * periodic beacon frames to trigger the poll event. */ if (type == IEEE80211_FC0_TYPE_DATA) { sc->sc_rxrate = frame->rate; iwi_led_event(sc, IWI_LED_RX); } else if (ticks - sc->sc_ledevent >= sc->sc_ledidle) iwi_led_event(sc, IWI_LED_POLL); } } /* * Check for an association response frame to see if QoS * has been negotiated. We parse just enough to figure * out if we're supposed to use QoS. The proper solution * is to pass the frame up so ieee80211_input can do the * work but that's made hard by how things currently are * done in the driver. */ static void iwi_checkforqos(struct ieee80211vap *vap, const struct ieee80211_frame *wh, int len) { #define SUBTYPE(wh) ((wh)->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK) const uint8_t *frm, *efrm, *wme; struct ieee80211_node *ni; uint16_t capinfo, status, associd; /* NB: +8 for capinfo, status, associd, and first ie */ if (!(sizeof(*wh)+8 < len && len < IEEE80211_MAX_LEN) || SUBTYPE(wh) != IEEE80211_FC0_SUBTYPE_ASSOC_RESP) return; /* * asresp frame format * [2] capability information * [2] status * [2] association ID * [tlv] supported rates * [tlv] extended supported rates * [tlv] WME */ frm = (const uint8_t *)&wh[1]; efrm = ((const uint8_t *) wh) + len; capinfo = le16toh(*(const uint16_t *)frm); frm += 2; status = le16toh(*(const uint16_t *)frm); frm += 2; associd = le16toh(*(const uint16_t *)frm); frm += 2; wme = NULL; while (efrm - frm > 1) { IEEE80211_VERIFY_LENGTH(efrm - frm, frm[1] + 2, return); switch (*frm) { case IEEE80211_ELEMID_VENDOR: if (iswmeoui(frm)) wme = frm; break; } frm += frm[1] + 2; } ni = ieee80211_ref_node(vap->iv_bss); ni->ni_capinfo = capinfo; ni->ni_associd = associd & 0x3fff; if (wme != NULL) ni->ni_flags |= IEEE80211_NODE_QOS; else ni->ni_flags &= ~IEEE80211_NODE_QOS; ieee80211_free_node(ni); #undef SUBTYPE } static void iwi_notif_link_quality(struct iwi_softc *sc, struct iwi_notif *notif) { struct iwi_notif_link_quality *lq; int len; len = le16toh(notif->len); DPRINTFN(5, ("Notification (%u) - len=%d, sizeof=%zu\n", notif->type, len, sizeof(struct iwi_notif_link_quality) )); /* enforce length */ if (len != sizeof(struct iwi_notif_link_quality)) { DPRINTFN(5, ("Notification: (%u) too short (%d)\n", notif->type, len)); return; } lq = (struct iwi_notif_link_quality *)(notif + 1); memcpy(&sc->sc_linkqual, lq, sizeof(sc->sc_linkqual)); sc->sc_linkqual_valid = 1; } /* * Task queue callbacks for iwi_notification_intr used to avoid LOR's. */ static void iwi_notification_intr(struct iwi_softc *sc, struct iwi_notif *notif) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); struct iwi_notif_scan_channel *chan; struct iwi_notif_scan_complete *scan; struct iwi_notif_authentication *auth; struct iwi_notif_association *assoc; struct iwi_notif_beacon_state *beacon; switch (notif->type) { case IWI_NOTIF_TYPE_SCAN_CHANNEL: chan = (struct iwi_notif_scan_channel *)(notif + 1); DPRINTFN(3, ("Scan of channel %u complete (%u)\n", ieee80211_ieee2mhz(chan->nchan, 0), chan->nchan)); /* Reset the timer, the scan is still going */ sc->sc_state_timer = 3; break; case IWI_NOTIF_TYPE_SCAN_COMPLETE: scan = (struct iwi_notif_scan_complete *)(notif + 1); DPRINTFN(2, ("Scan completed (%u, %u)\n", scan->nchan, scan->status)); IWI_STATE_END(sc, IWI_FW_SCANNING); /* * Monitor mode works by doing a passive scan to set * the channel and enable rx. Because we don't want * to abort a scan lest the firmware crash we scan * for a short period of time and automatically restart * the scan when notified the sweep has completed. */ if (vap->iv_opmode == IEEE80211_M_MONITOR) { ieee80211_runtask(ic, &sc->sc_monitortask); break; } if (scan->status == IWI_SCAN_COMPLETED) { /* NB: don't need to defer, net80211 does it for us */ ieee80211_scan_next(vap); } break; case IWI_NOTIF_TYPE_AUTHENTICATION: auth = (struct iwi_notif_authentication *)(notif + 1); switch (auth->state) { case IWI_AUTH_SUCCESS: DPRINTFN(2, ("Authentication succeeeded\n")); ieee80211_new_state(vap, IEEE80211_S_ASSOC, -1); break; case IWI_AUTH_FAIL: /* * These are delivered as an unsolicited deauth * (e.g. due to inactivity) or in response to an * associate request. */ sc->flags &= ~IWI_FLAG_ASSOCIATED; if (vap->iv_state != IEEE80211_S_RUN) { DPRINTFN(2, ("Authentication failed\n")); vap->iv_stats.is_rx_auth_fail++; IWI_STATE_END(sc, IWI_FW_ASSOCIATING); } else { DPRINTFN(2, ("Deauthenticated\n")); vap->iv_stats.is_rx_deauth++; } ieee80211_new_state(vap, IEEE80211_S_SCAN, -1); break; case IWI_AUTH_SENT_1: case IWI_AUTH_RECV_2: case IWI_AUTH_SEQ1_PASS: break; case IWI_AUTH_SEQ1_FAIL: DPRINTFN(2, ("Initial authentication handshake failed; " "you probably need shared key\n")); vap->iv_stats.is_rx_auth_fail++; IWI_STATE_END(sc, IWI_FW_ASSOCIATING); /* XXX retry shared key when in auto */ break; default: device_printf(sc->sc_dev, "unknown authentication state %u\n", auth->state); break; } break; case IWI_NOTIF_TYPE_ASSOCIATION: assoc = (struct iwi_notif_association *)(notif + 1); switch (assoc->state) { case IWI_AUTH_SUCCESS: /* re-association, do nothing */ break; case IWI_ASSOC_SUCCESS: DPRINTFN(2, ("Association succeeded\n")); sc->flags |= IWI_FLAG_ASSOCIATED; IWI_STATE_END(sc, IWI_FW_ASSOCIATING); iwi_checkforqos(vap, (const struct ieee80211_frame *)(assoc+1), le16toh(notif->len) - sizeof(*assoc) - 1); ieee80211_new_state(vap, IEEE80211_S_RUN, -1); break; case IWI_ASSOC_INIT: sc->flags &= ~IWI_FLAG_ASSOCIATED; switch (sc->fw_state) { case IWI_FW_ASSOCIATING: DPRINTFN(2, ("Association failed\n")); IWI_STATE_END(sc, IWI_FW_ASSOCIATING); ieee80211_new_state(vap, IEEE80211_S_SCAN, -1); break; case IWI_FW_DISASSOCIATING: DPRINTFN(2, ("Dissassociated\n")); IWI_STATE_END(sc, IWI_FW_DISASSOCIATING); vap->iv_stats.is_rx_disassoc++; ieee80211_new_state(vap, IEEE80211_S_SCAN, -1); break; } break; default: device_printf(sc->sc_dev, "unknown association state %u\n", assoc->state); break; } break; case IWI_NOTIF_TYPE_BEACON: /* XXX check struct length */ beacon = (struct iwi_notif_beacon_state *)(notif + 1); DPRINTFN(5, ("Beacon state (%u, %u)\n", beacon->state, le32toh(beacon->number))); if (beacon->state == IWI_BEACON_MISS) { /* * The firmware notifies us of every beacon miss * so we need to track the count against the * configured threshold before notifying the * 802.11 layer. * XXX try to roam, drop assoc only on much higher count */ if (le32toh(beacon->number) >= vap->iv_bmissthreshold) { DPRINTF(("Beacon miss: %u >= %u\n", le32toh(beacon->number), vap->iv_bmissthreshold)); vap->iv_stats.is_beacon_miss++; /* * It's pointless to notify the 802.11 layer * as it'll try to send a probe request (which * we'll discard) and then timeout and drop us * into scan state. Instead tell the firmware * to disassociate and then on completion we'll * kick the state machine to scan. */ ieee80211_runtask(ic, &sc->sc_disassoctask); } } break; case IWI_NOTIF_TYPE_CALIBRATION: case IWI_NOTIF_TYPE_NOISE: /* XXX handle? */ DPRINTFN(5, ("Notification (%u)\n", notif->type)); break; case IWI_NOTIF_TYPE_LINK_QUALITY: iwi_notif_link_quality(sc, notif); break; default: DPRINTF(("unknown notification type %u flags 0x%x len %u\n", notif->type, notif->flags, le16toh(notif->len))); break; } } static void iwi_rx_intr(struct iwi_softc *sc) { struct iwi_rx_data *data; struct iwi_hdr *hdr; uint32_t hw; hw = CSR_READ_4(sc, IWI_CSR_RX_RIDX); for (; sc->rxq.cur != hw;) { data = &sc->rxq.data[sc->rxq.cur]; bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); hdr = mtod(data->m, struct iwi_hdr *); switch (hdr->type) { case IWI_HDR_TYPE_FRAME: iwi_frame_intr(sc, data, sc->rxq.cur, (struct iwi_frame *)(hdr + 1)); break; case IWI_HDR_TYPE_NOTIF: iwi_notification_intr(sc, (struct iwi_notif *)(hdr + 1)); break; default: device_printf(sc->sc_dev, "unknown hdr type %u\n", hdr->type); } DPRINTFN(15, ("rx done idx=%u\n", sc->rxq.cur)); sc->rxq.cur = (sc->rxq.cur + 1) % IWI_RX_RING_COUNT; } /* tell the firmware what we have processed */ hw = (hw == 0) ? IWI_RX_RING_COUNT - 1 : hw - 1; CSR_WRITE_4(sc, IWI_CSR_RX_WIDX, hw); } static void iwi_tx_intr(struct iwi_softc *sc, struct iwi_tx_ring *txq) { struct iwi_tx_data *data; uint32_t hw; hw = CSR_READ_4(sc, txq->csr_ridx); while (txq->next != hw) { data = &txq->data[txq->next]; DPRINTFN(15, ("tx done idx=%u\n", txq->next)); bus_dmamap_sync(txq->data_dmat, data->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->data_dmat, data->map); ieee80211_tx_complete(data->ni, data->m, 0); data->ni = NULL; data->m = NULL; txq->queued--; txq->next = (txq->next + 1) % IWI_TX_RING_COUNT; } sc->sc_tx_timer = 0; if (sc->sc_softled) iwi_led_event(sc, IWI_LED_TX); iwi_start(sc); } static void iwi_fatal_error_intr(struct iwi_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); device_printf(sc->sc_dev, "firmware error\n"); if (vap != NULL) ieee80211_cancel_scan(vap); ieee80211_runtask(ic, &sc->sc_restarttask); sc->flags &= ~IWI_FLAG_BUSY; sc->sc_busy_timer = 0; wakeup(sc); } static void iwi_radio_off_intr(struct iwi_softc *sc) { ieee80211_runtask(&sc->sc_ic, &sc->sc_radiofftask); } static void iwi_intr(void *arg) { struct iwi_softc *sc = arg; uint32_t r; IWI_LOCK_DECL; IWI_LOCK(sc); if ((r = CSR_READ_4(sc, IWI_CSR_INTR)) == 0 || r == 0xffffffff) { IWI_UNLOCK(sc); return; } /* acknowledge interrupts */ CSR_WRITE_4(sc, IWI_CSR_INTR, r); if (r & IWI_INTR_FATAL_ERROR) { iwi_fatal_error_intr(sc); goto done; } if (r & IWI_INTR_FW_INITED) { if (!(r & (IWI_INTR_FATAL_ERROR | IWI_INTR_PARITY_ERROR))) wakeup(sc); } if (r & IWI_INTR_RADIO_OFF) iwi_radio_off_intr(sc); if (r & IWI_INTR_CMD_DONE) { sc->flags &= ~IWI_FLAG_BUSY; sc->sc_busy_timer = 0; wakeup(sc); } if (r & IWI_INTR_TX1_DONE) iwi_tx_intr(sc, &sc->txq[0]); if (r & IWI_INTR_TX2_DONE) iwi_tx_intr(sc, &sc->txq[1]); if (r & IWI_INTR_TX3_DONE) iwi_tx_intr(sc, &sc->txq[2]); if (r & IWI_INTR_TX4_DONE) iwi_tx_intr(sc, &sc->txq[3]); if (r & IWI_INTR_RX_DONE) iwi_rx_intr(sc); if (r & IWI_INTR_PARITY_ERROR) { /* XXX rate-limit */ device_printf(sc->sc_dev, "parity error\n"); } done: IWI_UNLOCK(sc); } static int iwi_cmd(struct iwi_softc *sc, uint8_t type, void *data, uint8_t len) { struct iwi_cmd_desc *desc; IWI_LOCK_ASSERT(sc); if (sc->flags & IWI_FLAG_BUSY) { device_printf(sc->sc_dev, "%s: cmd %d not sent, busy\n", __func__, type); return EAGAIN; } sc->flags |= IWI_FLAG_BUSY; sc->sc_busy_timer = 2; desc = &sc->cmdq.desc[sc->cmdq.cur]; desc->hdr.type = IWI_HDR_TYPE_COMMAND; desc->hdr.flags = IWI_HDR_FLAG_IRQ; desc->type = type; desc->len = len; memcpy(desc->data, data, len); bus_dmamap_sync(sc->cmdq.desc_dmat, sc->cmdq.desc_map, BUS_DMASYNC_PREWRITE); DPRINTFN(2, ("sending command idx=%u type=%u len=%u\n", sc->cmdq.cur, type, len)); sc->cmdq.cur = (sc->cmdq.cur + 1) % IWI_CMD_RING_COUNT; CSR_WRITE_4(sc, IWI_CSR_CMD_WIDX, sc->cmdq.cur); return msleep(sc, &sc->sc_mtx, 0, "iwicmd", hz); } static void iwi_write_ibssnode(struct iwi_softc *sc, const u_int8_t addr[IEEE80211_ADDR_LEN], int entry) { struct iwi_ibssnode node; /* write node information into NIC memory */ memset(&node, 0, sizeof node); IEEE80211_ADDR_COPY(node.bssid, addr); DPRINTF(("%s mac %6D station %u\n", __func__, node.bssid, ":", entry)); CSR_WRITE_REGION_1(sc, IWI_CSR_NODE_BASE + entry * sizeof node, (uint8_t *)&node, sizeof node); } static int iwi_tx_start(struct iwi_softc *sc, struct mbuf *m0, struct ieee80211_node *ni, int ac) { struct ieee80211vap *vap = ni->ni_vap; struct iwi_node *in = (struct iwi_node *)ni; const struct ieee80211_frame *wh; struct ieee80211_key *k; struct iwi_tx_ring *txq = &sc->txq[ac]; struct iwi_tx_data *data; struct iwi_tx_desc *desc; struct mbuf *mnew; bus_dma_segment_t segs[IWI_MAX_NSEG]; int error, nsegs, hdrlen, i; int ismcast, flags, xflags, staid; IWI_LOCK_ASSERT(sc); wh = mtod(m0, const struct ieee80211_frame *); /* NB: only data frames use this path */ hdrlen = ieee80211_hdrsize(wh); ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1); flags = xflags = 0; if (!ismcast) flags |= IWI_DATA_FLAG_NEED_ACK; if (vap->iv_flags & IEEE80211_F_SHPREAMBLE) flags |= IWI_DATA_FLAG_SHPREAMBLE; if (IEEE80211_QOS_HAS_SEQ(wh)) { xflags |= IWI_DATA_XFLAG_QOS; if (ieee80211_wme_vap_ac_is_noack(vap, ac)) flags &= ~IWI_DATA_FLAG_NEED_ACK; } /* * This is only used in IBSS mode where the firmware expect an index * in a h/w table instead of a destination address. */ if (vap->iv_opmode == IEEE80211_M_IBSS) { if (!ismcast) { if (in->in_station == -1) { in->in_station = alloc_unr(sc->sc_unr); if (in->in_station == -1) { /* h/w table is full */ if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1); m_freem(m0); ieee80211_free_node(ni); return 0; } iwi_write_ibssnode(sc, ni->ni_macaddr, in->in_station); } staid = in->in_station; } else { /* * Multicast addresses have no associated node * so there will be no station entry. We reserve * entry 0 for one mcast address and use that. * If there are many being used this will be * expensive and we'll need to do a better job * but for now this handles the broadcast case. */ if (!IEEE80211_ADDR_EQ(wh->i_addr1, sc->sc_mcast)) { IEEE80211_ADDR_COPY(sc->sc_mcast, wh->i_addr1); iwi_write_ibssnode(sc, sc->sc_mcast, 0); } staid = 0; } } else staid = 0; if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) { k = ieee80211_crypto_encap(ni, m0); if (k == NULL) { m_freem(m0); return ENOBUFS; } /* packet header may have moved, reset our local pointer */ wh = mtod(m0, struct ieee80211_frame *); } if (ieee80211_radiotap_active_vap(vap)) { struct iwi_tx_radiotap_header *tap = &sc->sc_txtap; tap->wt_flags = 0; ieee80211_radiotap_tx(vap, m0); } data = &txq->data[txq->cur]; desc = &txq->desc[txq->cur]; /* save and trim IEEE802.11 header */ m_copydata(m0, 0, hdrlen, (caddr_t)&desc->wh); m_adj(m0, hdrlen); error = bus_dmamap_load_mbuf_sg(txq->data_dmat, data->map, m0, segs, &nsegs, 0); if (error != 0 && error != EFBIG) { device_printf(sc->sc_dev, "could not map mbuf (error %d)\n", error); m_freem(m0); return error; } if (error != 0) { mnew = m_defrag(m0, M_NOWAIT); if (mnew == NULL) { device_printf(sc->sc_dev, "could not defragment mbuf\n"); m_freem(m0); return ENOBUFS; } m0 = mnew; error = bus_dmamap_load_mbuf_sg(txq->data_dmat, data->map, m0, segs, &nsegs, 0); if (error != 0) { device_printf(sc->sc_dev, "could not map mbuf (error %d)\n", error); m_freem(m0); return error; } } data->m = m0; data->ni = ni; desc->hdr.type = IWI_HDR_TYPE_DATA; desc->hdr.flags = IWI_HDR_FLAG_IRQ; desc->station = staid; desc->cmd = IWI_DATA_CMD_TX; desc->len = htole16(m0->m_pkthdr.len); desc->flags = flags; desc->xflags = xflags; #if 0 if (vap->iv_flags & IEEE80211_F_PRIVACY) desc->wep_txkey = vap->iv_def_txkey; else #endif desc->flags |= IWI_DATA_FLAG_NO_WEP; desc->nseg = htole32(nsegs); for (i = 0; i < nsegs; i++) { desc->seg_addr[i] = htole32(segs[i].ds_addr); desc->seg_len[i] = htole16(segs[i].ds_len); } bus_dmamap_sync(txq->data_dmat, data->map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(txq->desc_dmat, txq->desc_map, BUS_DMASYNC_PREWRITE); DPRINTFN(5, ("sending data frame txq=%u idx=%u len=%u nseg=%u\n", ac, txq->cur, le16toh(desc->len), nsegs)); txq->queued++; txq->cur = (txq->cur + 1) % IWI_TX_RING_COUNT; CSR_WRITE_4(sc, txq->csr_widx, txq->cur); return 0; } static int iwi_raw_xmit(struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_bpf_params *params) { /* no support; just discard */ m_freem(m); ieee80211_free_node(ni); return 0; } static int iwi_transmit(struct ieee80211com *ic, struct mbuf *m) { struct iwi_softc *sc = ic->ic_softc; int error; IWI_LOCK_DECL; IWI_LOCK(sc); if (!sc->sc_running) { IWI_UNLOCK(sc); return (ENXIO); } error = mbufq_enqueue(&sc->sc_snd, m); if (error) { IWI_UNLOCK(sc); return (error); } iwi_start(sc); IWI_UNLOCK(sc); return (0); } static void iwi_start(struct iwi_softc *sc) { struct mbuf *m; struct ieee80211_node *ni; int ac; IWI_LOCK_ASSERT(sc); while ((m = mbufq_dequeue(&sc->sc_snd)) != NULL) { ac = M_WME_GETAC(m); if (sc->txq[ac].queued > IWI_TX_RING_COUNT - 8) { /* there is no place left in this ring; tail drop */ /* XXX tail drop */ mbufq_prepend(&sc->sc_snd, m); break; } ni = (struct ieee80211_node *) m->m_pkthdr.rcvif; if (iwi_tx_start(sc, m, ni, ac) != 0) { if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1); ieee80211_free_node(ni); break; } sc->sc_tx_timer = 5; } } static void iwi_watchdog(void *arg) { struct iwi_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; IWI_LOCK_ASSERT(sc); if (sc->sc_tx_timer > 0) { if (--sc->sc_tx_timer == 0) { device_printf(sc->sc_dev, "device timeout\n"); counter_u64_add(ic->ic_oerrors, 1); ieee80211_runtask(ic, &sc->sc_restarttask); } } if (sc->sc_state_timer > 0) { if (--sc->sc_state_timer == 0) { device_printf(sc->sc_dev, "firmware stuck in state %d, resetting\n", sc->fw_state); if (sc->fw_state == IWI_FW_SCANNING) ieee80211_cancel_scan(TAILQ_FIRST(&ic->ic_vaps)); ieee80211_runtask(ic, &sc->sc_restarttask); sc->sc_state_timer = 3; } } if (sc->sc_busy_timer > 0) { if (--sc->sc_busy_timer == 0) { device_printf(sc->sc_dev, "firmware command timeout, resetting\n"); ieee80211_runtask(ic, &sc->sc_restarttask); } } callout_reset(&sc->sc_wdtimer, hz, iwi_watchdog, sc); } static void iwi_parent(struct ieee80211com *ic) { struct iwi_softc *sc = ic->ic_softc; int startall = 0; IWI_LOCK_DECL; IWI_LOCK(sc); if (ic->ic_nrunning > 0) { if (!sc->sc_running) { iwi_init_locked(sc); startall = 1; } } else if (sc->sc_running) iwi_stop_locked(sc); IWI_UNLOCK(sc); if (startall) ieee80211_start_all(ic); } static int iwi_ioctl(struct ieee80211com *ic, u_long cmd, void *data) { struct ifreq *ifr = data; struct iwi_softc *sc = ic->ic_softc; int error; IWI_LOCK_DECL; IWI_LOCK(sc); switch (cmd) { case SIOCGIWISTATS: /* XXX validate permissions/memory/etc? */ - error = copyout(&sc->sc_linkqual, ifr->ifr_data, + error = copyout(&sc->sc_linkqual, ifr_data_get_ptr(ifr), sizeof(struct iwi_notif_link_quality)); break; case SIOCZIWISTATS: memset(&sc->sc_linkqual, 0, sizeof(struct iwi_notif_link_quality)); error = 0; break; default: error = ENOTTY; break; } IWI_UNLOCK(sc); return (error); } static void iwi_stop_master(struct iwi_softc *sc) { uint32_t tmp; int ntries; /* disable interrupts */ CSR_WRITE_4(sc, IWI_CSR_INTR_MASK, 0); CSR_WRITE_4(sc, IWI_CSR_RST, IWI_RST_STOP_MASTER); for (ntries = 0; ntries < 5; ntries++) { if (CSR_READ_4(sc, IWI_CSR_RST) & IWI_RST_MASTER_DISABLED) break; DELAY(10); } if (ntries == 5) device_printf(sc->sc_dev, "timeout waiting for master\n"); tmp = CSR_READ_4(sc, IWI_CSR_RST); CSR_WRITE_4(sc, IWI_CSR_RST, tmp | IWI_RST_PRINCETON_RESET); sc->flags &= ~IWI_FLAG_FW_INITED; } static int iwi_reset(struct iwi_softc *sc) { uint32_t tmp; int i, ntries; iwi_stop_master(sc); tmp = CSR_READ_4(sc, IWI_CSR_CTL); CSR_WRITE_4(sc, IWI_CSR_CTL, tmp | IWI_CTL_INIT); CSR_WRITE_4(sc, IWI_CSR_READ_INT, IWI_READ_INT_INIT_HOST); /* wait for clock stabilization */ for (ntries = 0; ntries < 1000; ntries++) { if (CSR_READ_4(sc, IWI_CSR_CTL) & IWI_CTL_CLOCK_READY) break; DELAY(200); } if (ntries == 1000) { device_printf(sc->sc_dev, "timeout waiting for clock stabilization\n"); return EIO; } tmp = CSR_READ_4(sc, IWI_CSR_RST); CSR_WRITE_4(sc, IWI_CSR_RST, tmp | IWI_RST_SOFT_RESET); DELAY(10); tmp = CSR_READ_4(sc, IWI_CSR_CTL); CSR_WRITE_4(sc, IWI_CSR_CTL, tmp | IWI_CTL_INIT); /* clear NIC memory */ CSR_WRITE_4(sc, IWI_CSR_AUTOINC_ADDR, 0); for (i = 0; i < 0xc000; i++) CSR_WRITE_4(sc, IWI_CSR_AUTOINC_DATA, 0); return 0; } static const struct iwi_firmware_ohdr * iwi_setup_ofw(struct iwi_softc *sc, struct iwi_fw *fw) { const struct firmware *fp = fw->fp; const struct iwi_firmware_ohdr *hdr; if (fp->datasize < sizeof (struct iwi_firmware_ohdr)) { device_printf(sc->sc_dev, "image '%s' too small\n", fp->name); return NULL; } hdr = (const struct iwi_firmware_ohdr *)fp->data; if ((IWI_FW_GET_MAJOR(le32toh(hdr->version)) != IWI_FW_REQ_MAJOR) || (IWI_FW_GET_MINOR(le32toh(hdr->version)) != IWI_FW_REQ_MINOR)) { device_printf(sc->sc_dev, "version for '%s' %d.%d != %d.%d\n", fp->name, IWI_FW_GET_MAJOR(le32toh(hdr->version)), IWI_FW_GET_MINOR(le32toh(hdr->version)), IWI_FW_REQ_MAJOR, IWI_FW_REQ_MINOR); return NULL; } fw->data = ((const char *) fp->data) + sizeof(struct iwi_firmware_ohdr); fw->size = fp->datasize - sizeof(struct iwi_firmware_ohdr); fw->name = fp->name; return hdr; } static const struct iwi_firmware_ohdr * iwi_setup_oucode(struct iwi_softc *sc, struct iwi_fw *fw) { const struct iwi_firmware_ohdr *hdr; hdr = iwi_setup_ofw(sc, fw); if (hdr != NULL && le32toh(hdr->mode) != IWI_FW_MODE_UCODE) { device_printf(sc->sc_dev, "%s is not a ucode image\n", fw->name); hdr = NULL; } return hdr; } static void iwi_getfw(struct iwi_fw *fw, const char *fwname, struct iwi_fw *uc, const char *ucname) { if (fw->fp == NULL) fw->fp = firmware_get(fwname); /* NB: pre-3.0 ucode is packaged separately */ if (uc->fp == NULL && fw->fp != NULL && fw->fp->version < 300) uc->fp = firmware_get(ucname); } /* * Get the required firmware images if not already loaded. * Note that we hold firmware images so long as the device * is marked up in case we need to reload them on device init. * This is necessary because we re-init the device sometimes * from a context where we cannot read from the filesystem * (e.g. from the taskqueue thread when rfkill is re-enabled). * XXX return 0 on success, 1 on error. * * NB: the order of get'ing and put'ing images here is * intentional to support handling firmware images bundled * by operating mode and/or all together in one file with * the boot firmware as "master". */ static int iwi_get_firmware(struct iwi_softc *sc, enum ieee80211_opmode opmode) { const struct iwi_firmware_hdr *hdr; const struct firmware *fp; /* invalidate cached firmware on mode change */ if (sc->fw_mode != opmode) iwi_put_firmware(sc); switch (opmode) { case IEEE80211_M_STA: iwi_getfw(&sc->fw_fw, "iwi_bss", &sc->fw_uc, "iwi_ucode_bss"); break; case IEEE80211_M_IBSS: iwi_getfw(&sc->fw_fw, "iwi_ibss", &sc->fw_uc, "iwi_ucode_ibss"); break; case IEEE80211_M_MONITOR: iwi_getfw(&sc->fw_fw, "iwi_monitor", &sc->fw_uc, "iwi_ucode_monitor"); break; default: device_printf(sc->sc_dev, "unknown opmode %d\n", opmode); return EINVAL; } fp = sc->fw_fw.fp; if (fp == NULL) { device_printf(sc->sc_dev, "could not load firmware\n"); goto bad; } if (fp->version < 300) { /* * Firmware prior to 3.0 was packaged as separate * boot, firmware, and ucode images. Verify the * ucode image was read in, retrieve the boot image * if needed, and check version stamps for consistency. * The version stamps in the data are also checked * above; this is a bit paranoid but is a cheap * safeguard against mis-packaging. */ if (sc->fw_uc.fp == NULL) { device_printf(sc->sc_dev, "could not load ucode\n"); goto bad; } if (sc->fw_boot.fp == NULL) { sc->fw_boot.fp = firmware_get("iwi_boot"); if (sc->fw_boot.fp == NULL) { device_printf(sc->sc_dev, "could not load boot firmware\n"); goto bad; } } if (sc->fw_boot.fp->version != sc->fw_fw.fp->version || sc->fw_boot.fp->version != sc->fw_uc.fp->version) { device_printf(sc->sc_dev, "firmware version mismatch: " "'%s' is %d, '%s' is %d, '%s' is %d\n", sc->fw_boot.fp->name, sc->fw_boot.fp->version, sc->fw_uc.fp->name, sc->fw_uc.fp->version, sc->fw_fw.fp->name, sc->fw_fw.fp->version ); goto bad; } /* * Check and setup each image. */ if (iwi_setup_oucode(sc, &sc->fw_uc) == NULL || iwi_setup_ofw(sc, &sc->fw_boot) == NULL || iwi_setup_ofw(sc, &sc->fw_fw) == NULL) goto bad; } else { /* * Check and setup combined image. */ if (fp->datasize < sizeof(struct iwi_firmware_hdr)) { device_printf(sc->sc_dev, "image '%s' too small\n", fp->name); goto bad; } hdr = (const struct iwi_firmware_hdr *)fp->data; if (fp->datasize < sizeof(*hdr) + le32toh(hdr->bsize) + le32toh(hdr->usize) + le32toh(hdr->fsize)) { device_printf(sc->sc_dev, "image '%s' too small (2)\n", fp->name); goto bad; } sc->fw_boot.data = ((const char *) fp->data) + sizeof(*hdr); sc->fw_boot.size = le32toh(hdr->bsize); sc->fw_boot.name = fp->name; sc->fw_uc.data = sc->fw_boot.data + sc->fw_boot.size; sc->fw_uc.size = le32toh(hdr->usize); sc->fw_uc.name = fp->name; sc->fw_fw.data = sc->fw_uc.data + sc->fw_uc.size; sc->fw_fw.size = le32toh(hdr->fsize); sc->fw_fw.name = fp->name; } #if 0 device_printf(sc->sc_dev, "boot %d ucode %d fw %d bytes\n", sc->fw_boot.size, sc->fw_uc.size, sc->fw_fw.size); #endif sc->fw_mode = opmode; return 0; bad: iwi_put_firmware(sc); return 1; } static void iwi_put_fw(struct iwi_fw *fw) { if (fw->fp != NULL) { firmware_put(fw->fp, FIRMWARE_UNLOAD); fw->fp = NULL; } fw->data = NULL; fw->size = 0; fw->name = NULL; } /* * Release any cached firmware images. */ static void iwi_put_firmware(struct iwi_softc *sc) { iwi_put_fw(&sc->fw_uc); iwi_put_fw(&sc->fw_fw); iwi_put_fw(&sc->fw_boot); } static int iwi_load_ucode(struct iwi_softc *sc, const struct iwi_fw *fw) { uint32_t tmp; const uint16_t *w; const char *uc = fw->data; size_t size = fw->size; int i, ntries, error; IWI_LOCK_ASSERT(sc); error = 0; CSR_WRITE_4(sc, IWI_CSR_RST, CSR_READ_4(sc, IWI_CSR_RST) | IWI_RST_STOP_MASTER); for (ntries = 0; ntries < 5; ntries++) { if (CSR_READ_4(sc, IWI_CSR_RST) & IWI_RST_MASTER_DISABLED) break; DELAY(10); } if (ntries == 5) { device_printf(sc->sc_dev, "timeout waiting for master\n"); error = EIO; goto fail; } MEM_WRITE_4(sc, 0x3000e0, 0x80000000); DELAY(5000); tmp = CSR_READ_4(sc, IWI_CSR_RST); tmp &= ~IWI_RST_PRINCETON_RESET; CSR_WRITE_4(sc, IWI_CSR_RST, tmp); DELAY(5000); MEM_WRITE_4(sc, 0x3000e0, 0); DELAY(1000); MEM_WRITE_4(sc, IWI_MEM_EEPROM_EVENT, 1); DELAY(1000); MEM_WRITE_4(sc, IWI_MEM_EEPROM_EVENT, 0); DELAY(1000); MEM_WRITE_1(sc, 0x200000, 0x00); MEM_WRITE_1(sc, 0x200000, 0x40); DELAY(1000); /* write microcode into adapter memory */ for (w = (const uint16_t *)uc; size > 0; w++, size -= 2) MEM_WRITE_2(sc, 0x200010, htole16(*w)); MEM_WRITE_1(sc, 0x200000, 0x00); MEM_WRITE_1(sc, 0x200000, 0x80); /* wait until we get an answer */ for (ntries = 0; ntries < 100; ntries++) { if (MEM_READ_1(sc, 0x200000) & 1) break; DELAY(100); } if (ntries == 100) { device_printf(sc->sc_dev, "timeout waiting for ucode to initialize\n"); error = EIO; goto fail; } /* read the answer or the firmware will not initialize properly */ for (i = 0; i < 7; i++) MEM_READ_4(sc, 0x200004); MEM_WRITE_1(sc, 0x200000, 0x00); fail: return error; } /* macro to handle unaligned little endian data in firmware image */ #define GETLE32(p) ((p)[0] | (p)[1] << 8 | (p)[2] << 16 | (p)[3] << 24) static int iwi_load_firmware(struct iwi_softc *sc, const struct iwi_fw *fw) { u_char *p, *end; uint32_t sentinel, ctl, src, dst, sum, len, mlen, tmp; int ntries, error; IWI_LOCK_ASSERT(sc); /* copy firmware image to DMA memory */ memcpy(sc->fw_virtaddr, fw->data, fw->size); /* make sure the adapter will get up-to-date values */ bus_dmamap_sync(sc->fw_dmat, sc->fw_map, BUS_DMASYNC_PREWRITE); /* tell the adapter where the command blocks are stored */ MEM_WRITE_4(sc, 0x3000a0, 0x27000); /* * Store command blocks into adapter's internal memory using register * indirections. The adapter will read the firmware image through DMA * using information stored in command blocks. */ src = sc->fw_physaddr; p = sc->fw_virtaddr; end = p + fw->size; CSR_WRITE_4(sc, IWI_CSR_AUTOINC_ADDR, 0x27000); while (p < end) { dst = GETLE32(p); p += 4; src += 4; len = GETLE32(p); p += 4; src += 4; p += len; while (len > 0) { mlen = min(len, IWI_CB_MAXDATALEN); ctl = IWI_CB_DEFAULT_CTL | mlen; sum = ctl ^ src ^ dst; /* write a command block */ CSR_WRITE_4(sc, IWI_CSR_AUTOINC_DATA, ctl); CSR_WRITE_4(sc, IWI_CSR_AUTOINC_DATA, src); CSR_WRITE_4(sc, IWI_CSR_AUTOINC_DATA, dst); CSR_WRITE_4(sc, IWI_CSR_AUTOINC_DATA, sum); src += mlen; dst += mlen; len -= mlen; } } /* write a fictive final command block (sentinel) */ sentinel = CSR_READ_4(sc, IWI_CSR_AUTOINC_ADDR); CSR_WRITE_4(sc, IWI_CSR_AUTOINC_DATA, 0); tmp = CSR_READ_4(sc, IWI_CSR_RST); tmp &= ~(IWI_RST_MASTER_DISABLED | IWI_RST_STOP_MASTER); CSR_WRITE_4(sc, IWI_CSR_RST, tmp); /* tell the adapter to start processing command blocks */ MEM_WRITE_4(sc, 0x3000a4, 0x540100); /* wait until the adapter reaches the sentinel */ for (ntries = 0; ntries < 400; ntries++) { if (MEM_READ_4(sc, 0x3000d0) >= sentinel) break; DELAY(100); } /* sync dma, just in case */ bus_dmamap_sync(sc->fw_dmat, sc->fw_map, BUS_DMASYNC_POSTWRITE); if (ntries == 400) { device_printf(sc->sc_dev, "timeout processing command blocks for %s firmware\n", fw->name); return EIO; } /* we're done with command blocks processing */ MEM_WRITE_4(sc, 0x3000a4, 0x540c00); /* allow interrupts so we know when the firmware is ready */ CSR_WRITE_4(sc, IWI_CSR_INTR_MASK, IWI_INTR_MASK); /* tell the adapter to initialize the firmware */ CSR_WRITE_4(sc, IWI_CSR_RST, 0); tmp = CSR_READ_4(sc, IWI_CSR_CTL); CSR_WRITE_4(sc, IWI_CSR_CTL, tmp | IWI_CTL_ALLOW_STANDBY); /* wait at most one second for firmware initialization to complete */ if ((error = msleep(sc, &sc->sc_mtx, 0, "iwiinit", hz)) != 0) { device_printf(sc->sc_dev, "timeout waiting for %s firmware " "initialization to complete\n", fw->name); } return error; } static int iwi_setpowermode(struct iwi_softc *sc, struct ieee80211vap *vap) { uint32_t data; if (vap->iv_flags & IEEE80211_F_PMGTON) { /* XXX set more fine-grained operation */ data = htole32(IWI_POWER_MODE_MAX); } else data = htole32(IWI_POWER_MODE_CAM); DPRINTF(("Setting power mode to %u\n", le32toh(data))); return iwi_cmd(sc, IWI_CMD_SET_POWER_MODE, &data, sizeof data); } static int iwi_setwepkeys(struct iwi_softc *sc, struct ieee80211vap *vap) { struct iwi_wep_key wepkey; struct ieee80211_key *wk; int error, i; for (i = 0; i < IEEE80211_WEP_NKID; i++) { wk = &vap->iv_nw_keys[i]; wepkey.cmd = IWI_WEP_KEY_CMD_SETKEY; wepkey.idx = i; wepkey.len = wk->wk_keylen; memset(wepkey.key, 0, sizeof wepkey.key); memcpy(wepkey.key, wk->wk_key, wk->wk_keylen); DPRINTF(("Setting wep key index %u len %u\n", wepkey.idx, wepkey.len)); error = iwi_cmd(sc, IWI_CMD_SET_WEP_KEY, &wepkey, sizeof wepkey); if (error != 0) return error; } return 0; } static int iwi_set_rateset(struct iwi_softc *sc, const struct ieee80211_rateset *net_rs, int mode, int type) { struct iwi_rateset rs; memset(&rs, 0, sizeof(rs)); rs.mode = mode; rs.type = type; rs.nrates = net_rs->rs_nrates; if (rs.nrates > nitems(rs.rates)) { DPRINTF(("Truncating negotiated rate set from %u\n", rs.nrates)); rs.nrates = nitems(rs.rates); } memcpy(rs.rates, net_rs->rs_rates, rs.nrates); DPRINTF(("Setting .11%c%s %s rates (%u)\n", mode == IWI_MODE_11A ? 'a' : 'b', mode == IWI_MODE_11G ? "g" : "", type == IWI_RATESET_TYPE_SUPPORTED ? "supported" : "negotiated", rs.nrates)); return (iwi_cmd(sc, IWI_CMD_SET_RATES, &rs, sizeof(rs))); } static int iwi_config(struct iwi_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct iwi_configuration config; struct iwi_txpower power; uint32_t data; int error, i; IWI_LOCK_ASSERT(sc); DPRINTF(("Setting MAC address to %6D\n", ic->ic_macaddr, ":")); error = iwi_cmd(sc, IWI_CMD_SET_MAC_ADDRESS, ic->ic_macaddr, IEEE80211_ADDR_LEN); if (error != 0) return error; memset(&config, 0, sizeof config); config.bluetooth_coexistence = sc->bluetooth; config.silence_threshold = 0x1e; config.antenna = sc->antenna; config.multicast_enabled = 1; config.answer_pbreq = (ic->ic_opmode == IEEE80211_M_IBSS) ? 1 : 0; config.disable_unicast_decryption = 1; config.disable_multicast_decryption = 1; if (ic->ic_opmode == IEEE80211_M_MONITOR) { config.allow_invalid_frames = 1; config.allow_beacon_and_probe_resp = 1; config.allow_mgt = 1; } DPRINTF(("Configuring adapter\n")); error = iwi_cmd(sc, IWI_CMD_SET_CONFIG, &config, sizeof config); if (error != 0) return error; if (ic->ic_opmode == IEEE80211_M_IBSS) { power.mode = IWI_MODE_11B; power.nchan = 11; for (i = 0; i < 11; i++) { power.chan[i].chan = i + 1; power.chan[i].power = IWI_TXPOWER_MAX; } DPRINTF(("Setting .11b channels tx power\n")); error = iwi_cmd(sc, IWI_CMD_SET_TX_POWER, &power, sizeof power); if (error != 0) return error; power.mode = IWI_MODE_11G; DPRINTF(("Setting .11g channels tx power\n")); error = iwi_cmd(sc, IWI_CMD_SET_TX_POWER, &power, sizeof power); if (error != 0) return error; } error = iwi_set_rateset(sc, &ic->ic_sup_rates[IEEE80211_MODE_11G], IWI_MODE_11G, IWI_RATESET_TYPE_SUPPORTED); if (error != 0) return error; error = iwi_set_rateset(sc, &ic->ic_sup_rates[IEEE80211_MODE_11A], IWI_MODE_11A, IWI_RATESET_TYPE_SUPPORTED); if (error != 0) return error; data = htole32(arc4random()); DPRINTF(("Setting initialization vector to %u\n", le32toh(data))); error = iwi_cmd(sc, IWI_CMD_SET_IV, &data, sizeof data); if (error != 0) return error; /* enable adapter */ DPRINTF(("Enabling adapter\n")); return iwi_cmd(sc, IWI_CMD_ENABLE, NULL, 0); } static __inline void set_scan_type(struct iwi_scan_ext *scan, int ix, int scan_type) { uint8_t *st = &scan->scan_type[ix / 2]; if (ix % 2) *st = (*st & 0xf0) | ((scan_type & 0xf) << 0); else *st = (*st & 0x0f) | ((scan_type & 0xf) << 4); } static int scan_type(const struct ieee80211_scan_state *ss, const struct ieee80211_channel *chan) { /* We can only set one essid for a directed scan */ if (ss->ss_nssid != 0) return IWI_SCAN_TYPE_BDIRECTED; if ((ss->ss_flags & IEEE80211_SCAN_ACTIVE) && (chan->ic_flags & IEEE80211_CHAN_PASSIVE) == 0) return IWI_SCAN_TYPE_BROADCAST; return IWI_SCAN_TYPE_PASSIVE; } static __inline int scan_band(const struct ieee80211_channel *c) { return IEEE80211_IS_CHAN_5GHZ(c) ? IWI_CHAN_5GHZ : IWI_CHAN_2GHZ; } static void iwi_monitor_scan(void *arg, int npending) { struct iwi_softc *sc = arg; IWI_LOCK_DECL; IWI_LOCK(sc); (void) iwi_scanchan(sc, 2000, 0); IWI_UNLOCK(sc); } /* * Start a scan on the current channel or all channels. */ static int iwi_scanchan(struct iwi_softc *sc, unsigned long maxdwell, int allchan) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211_channel *chan; struct ieee80211_scan_state *ss; struct iwi_scan_ext scan; int error = 0; IWI_LOCK_ASSERT(sc); if (sc->fw_state == IWI_FW_SCANNING) { /* * This should not happen as we only trigger scan_next after * completion */ DPRINTF(("%s: called too early - still scanning\n", __func__)); return (EBUSY); } IWI_STATE_BEGIN(sc, IWI_FW_SCANNING); ss = ic->ic_scan; memset(&scan, 0, sizeof scan); scan.full_scan_index = htole32(++sc->sc_scangen); scan.dwell_time[IWI_SCAN_TYPE_PASSIVE] = htole16(maxdwell); if (ic->ic_flags_ext & IEEE80211_FEXT_BGSCAN) { /* * Use very short dwell times for when we send probe request * frames. Without this bg scans hang. Ideally this should * be handled with early-termination as done by net80211 but * that's not feasible (aborting a scan is problematic). */ scan.dwell_time[IWI_SCAN_TYPE_BROADCAST] = htole16(30); scan.dwell_time[IWI_SCAN_TYPE_BDIRECTED] = htole16(30); } else { scan.dwell_time[IWI_SCAN_TYPE_BROADCAST] = htole16(maxdwell); scan.dwell_time[IWI_SCAN_TYPE_BDIRECTED] = htole16(maxdwell); } /* We can only set one essid for a directed scan */ if (ss->ss_nssid != 0) { error = iwi_cmd(sc, IWI_CMD_SET_ESSID, ss->ss_ssid[0].ssid, ss->ss_ssid[0].len); if (error) return (error); } if (allchan) { int i, next, band, b, bstart; /* * Convert scan list to run-length encoded channel list * the firmware requires (preserving the order setup by * net80211). The first entry in each run specifies the * band and the count of items in the run. */ next = 0; /* next open slot */ bstart = 0; /* NB: not needed, silence compiler */ band = -1; /* NB: impossible value */ KASSERT(ss->ss_last > 0, ("no channels")); for (i = 0; i < ss->ss_last; i++) { chan = ss->ss_chans[i]; b = scan_band(chan); if (b != band) { if (band != -1) scan.channels[bstart] = (next - bstart) | band; /* NB: this allocates a slot for the run-len */ band = b, bstart = next++; } if (next >= IWI_SCAN_CHANNELS) { DPRINTF(("truncating scan list\n")); break; } scan.channels[next] = ieee80211_chan2ieee(ic, chan); set_scan_type(&scan, next, scan_type(ss, chan)); next++; } scan.channels[bstart] = (next - bstart) | band; } else { /* Scan the current channel only */ chan = ic->ic_curchan; scan.channels[0] = 1 | scan_band(chan); scan.channels[1] = ieee80211_chan2ieee(ic, chan); set_scan_type(&scan, 1, scan_type(ss, chan)); } #ifdef IWI_DEBUG if (iwi_debug > 0) { static const char *scantype[8] = { "PSTOP", "PASV", "DIR", "BCAST", "BDIR", "5", "6", "7" }; int i; printf("Scan request: index %u dwell %d/%d/%d\n" , le32toh(scan.full_scan_index) , le16toh(scan.dwell_time[IWI_SCAN_TYPE_PASSIVE]) , le16toh(scan.dwell_time[IWI_SCAN_TYPE_BROADCAST]) , le16toh(scan.dwell_time[IWI_SCAN_TYPE_BDIRECTED]) ); i = 0; do { int run = scan.channels[i]; if (run == 0) break; printf("Scan %d %s channels:", run & 0x3f, run & IWI_CHAN_2GHZ ? "2.4GHz" : "5GHz"); for (run &= 0x3f, i++; run > 0; run--, i++) { uint8_t type = scan.scan_type[i/2]; printf(" %u/%s", scan.channels[i], scantype[(i & 1 ? type : type>>4) & 7]); } printf("\n"); } while (i < IWI_SCAN_CHANNELS); } #endif return (iwi_cmd(sc, IWI_CMD_SCAN_EXT, &scan, sizeof scan)); } static int iwi_set_sensitivity(struct iwi_softc *sc, int8_t rssi_dbm) { struct iwi_sensitivity sens; DPRINTF(("Setting sensitivity to %d\n", rssi_dbm)); memset(&sens, 0, sizeof sens); sens.rssi = htole16(rssi_dbm); return iwi_cmd(sc, IWI_CMD_SET_SENSITIVITY, &sens, sizeof sens); } static int iwi_auth_and_assoc(struct iwi_softc *sc, struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; struct ifnet *ifp = vap->iv_ifp; struct ieee80211_node *ni; struct iwi_configuration config; struct iwi_associate *assoc = &sc->assoc; uint16_t capinfo; uint32_t data; int error, mode; IWI_LOCK_ASSERT(sc); ni = ieee80211_ref_node(vap->iv_bss); if (sc->flags & IWI_FLAG_ASSOCIATED) { DPRINTF(("Already associated\n")); return (-1); } IWI_STATE_BEGIN(sc, IWI_FW_ASSOCIATING); error = 0; mode = 0; if (IEEE80211_IS_CHAN_A(ic->ic_curchan)) mode = IWI_MODE_11A; else if (IEEE80211_IS_CHAN_G(ic->ic_curchan)) mode = IWI_MODE_11G; if (IEEE80211_IS_CHAN_B(ic->ic_curchan)) mode = IWI_MODE_11B; if (IEEE80211_IS_CHAN_2GHZ(ic->ic_curchan)) { memset(&config, 0, sizeof config); config.bluetooth_coexistence = sc->bluetooth; config.antenna = sc->antenna; config.multicast_enabled = 1; if (mode == IWI_MODE_11G) config.use_protection = 1; config.answer_pbreq = (vap->iv_opmode == IEEE80211_M_IBSS) ? 1 : 0; config.disable_unicast_decryption = 1; config.disable_multicast_decryption = 1; DPRINTF(("Configuring adapter\n")); error = iwi_cmd(sc, IWI_CMD_SET_CONFIG, &config, sizeof config); if (error != 0) goto done; } #ifdef IWI_DEBUG if (iwi_debug > 0) { printf("Setting ESSID to "); ieee80211_print_essid(ni->ni_essid, ni->ni_esslen); printf("\n"); } #endif error = iwi_cmd(sc, IWI_CMD_SET_ESSID, ni->ni_essid, ni->ni_esslen); if (error != 0) goto done; error = iwi_setpowermode(sc, vap); if (error != 0) goto done; data = htole32(vap->iv_rtsthreshold); DPRINTF(("Setting RTS threshold to %u\n", le32toh(data))); error = iwi_cmd(sc, IWI_CMD_SET_RTS_THRESHOLD, &data, sizeof data); if (error != 0) goto done; data = htole32(vap->iv_fragthreshold); DPRINTF(("Setting fragmentation threshold to %u\n", le32toh(data))); error = iwi_cmd(sc, IWI_CMD_SET_FRAG_THRESHOLD, &data, sizeof data); if (error != 0) goto done; /* the rate set has already been "negotiated" */ error = iwi_set_rateset(sc, &ni->ni_rates, mode, IWI_RATESET_TYPE_NEGOTIATED); if (error != 0) goto done; memset(assoc, 0, sizeof *assoc); if ((vap->iv_flags & IEEE80211_F_WME) && ni->ni_ies.wme_ie != NULL) { /* NB: don't treat WME setup as failure */ if (iwi_wme_setparams(sc) == 0 && iwi_wme_setie(sc) == 0) assoc->policy |= htole16(IWI_POLICY_WME); /* XXX complain on failure? */ } if (vap->iv_appie_wpa != NULL) { struct ieee80211_appie *ie = vap->iv_appie_wpa; DPRINTF(("Setting optional IE (len=%u)\n", ie->ie_len)); error = iwi_cmd(sc, IWI_CMD_SET_OPTIE, ie->ie_data, ie->ie_len); if (error != 0) goto done; } error = iwi_set_sensitivity(sc, ic->ic_node_getrssi(ni)); if (error != 0) goto done; assoc->mode = mode; assoc->chan = ic->ic_curchan->ic_ieee; /* * NB: do not arrange for shared key auth w/o privacy * (i.e. a wep key); it causes a firmware error. */ if ((vap->iv_flags & IEEE80211_F_PRIVACY) && ni->ni_authmode == IEEE80211_AUTH_SHARED) { assoc->auth = IWI_AUTH_SHARED; /* * It's possible to have privacy marked but no default * key setup. This typically is due to a user app bug * but if we blindly grab the key the firmware will * barf so avoid it for now. */ if (vap->iv_def_txkey != IEEE80211_KEYIX_NONE) assoc->auth |= vap->iv_def_txkey << 4; error = iwi_setwepkeys(sc, vap); if (error != 0) goto done; } if (vap->iv_flags & IEEE80211_F_WPA) assoc->policy |= htole16(IWI_POLICY_WPA); if (vap->iv_opmode == IEEE80211_M_IBSS && ni->ni_tstamp.tsf == 0) assoc->type = IWI_HC_IBSS_START; else assoc->type = IWI_HC_ASSOC; memcpy(assoc->tstamp, ni->ni_tstamp.data, 8); if (vap->iv_opmode == IEEE80211_M_IBSS) capinfo = IEEE80211_CAPINFO_IBSS; else capinfo = IEEE80211_CAPINFO_ESS; if (vap->iv_flags & IEEE80211_F_PRIVACY) capinfo |= IEEE80211_CAPINFO_PRIVACY; if ((ic->ic_flags & IEEE80211_F_SHPREAMBLE) && IEEE80211_IS_CHAN_2GHZ(ic->ic_curchan)) capinfo |= IEEE80211_CAPINFO_SHORT_PREAMBLE; if (ni->ni_capinfo & IEEE80211_CAPINFO_SHORT_SLOTTIME) capinfo |= IEEE80211_CAPINFO_SHORT_SLOTTIME; assoc->capinfo = htole16(capinfo); assoc->lintval = htole16(ic->ic_lintval); assoc->intval = htole16(ni->ni_intval); IEEE80211_ADDR_COPY(assoc->bssid, ni->ni_bssid); if (vap->iv_opmode == IEEE80211_M_IBSS) IEEE80211_ADDR_COPY(assoc->dst, ifp->if_broadcastaddr); else IEEE80211_ADDR_COPY(assoc->dst, ni->ni_bssid); DPRINTF(("%s bssid %6D dst %6D channel %u policy 0x%x " "auth %u capinfo 0x%x lintval %u bintval %u\n", assoc->type == IWI_HC_IBSS_START ? "Start" : "Join", assoc->bssid, ":", assoc->dst, ":", assoc->chan, le16toh(assoc->policy), assoc->auth, le16toh(assoc->capinfo), le16toh(assoc->lintval), le16toh(assoc->intval))); error = iwi_cmd(sc, IWI_CMD_ASSOCIATE, assoc, sizeof *assoc); done: ieee80211_free_node(ni); if (error) IWI_STATE_END(sc, IWI_FW_ASSOCIATING); return (error); } static void iwi_disassoc(void *arg, int pending) { struct iwi_softc *sc = arg; IWI_LOCK_DECL; IWI_LOCK(sc); iwi_disassociate(sc, 0); IWI_UNLOCK(sc); } static int iwi_disassociate(struct iwi_softc *sc, int quiet) { struct iwi_associate *assoc = &sc->assoc; if ((sc->flags & IWI_FLAG_ASSOCIATED) == 0) { DPRINTF(("Not associated\n")); return (-1); } IWI_STATE_BEGIN(sc, IWI_FW_DISASSOCIATING); if (quiet) assoc->type = IWI_HC_DISASSOC_QUIET; else assoc->type = IWI_HC_DISASSOC; DPRINTF(("Trying to disassociate from %6D channel %u\n", assoc->bssid, ":", assoc->chan)); return iwi_cmd(sc, IWI_CMD_ASSOCIATE, assoc, sizeof *assoc); } /* * release dma resources for the firmware */ static void iwi_release_fw_dma(struct iwi_softc *sc) { if (sc->fw_flags & IWI_FW_HAVE_PHY) bus_dmamap_unload(sc->fw_dmat, sc->fw_map); if (sc->fw_flags & IWI_FW_HAVE_MAP) bus_dmamem_free(sc->fw_dmat, sc->fw_virtaddr, sc->fw_map); if (sc->fw_flags & IWI_FW_HAVE_DMAT) bus_dma_tag_destroy(sc->fw_dmat); sc->fw_flags = 0; sc->fw_dma_size = 0; sc->fw_dmat = NULL; sc->fw_map = NULL; sc->fw_physaddr = 0; sc->fw_virtaddr = NULL; } /* * allocate the dma descriptor for the firmware. * Return 0 on success, 1 on error. * Must be called unlocked, protected by IWI_FLAG_FW_LOADING. */ static int iwi_init_fw_dma(struct iwi_softc *sc, int size) { if (sc->fw_dma_size >= size) return 0; if (bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 4, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, size, 1, size, 0, NULL, NULL, &sc->fw_dmat) != 0) { device_printf(sc->sc_dev, "could not create firmware DMA tag\n"); goto error; } sc->fw_flags |= IWI_FW_HAVE_DMAT; if (bus_dmamem_alloc(sc->fw_dmat, &sc->fw_virtaddr, 0, &sc->fw_map) != 0) { device_printf(sc->sc_dev, "could not allocate firmware DMA memory\n"); goto error; } sc->fw_flags |= IWI_FW_HAVE_MAP; if (bus_dmamap_load(sc->fw_dmat, sc->fw_map, sc->fw_virtaddr, size, iwi_dma_map_addr, &sc->fw_physaddr, 0) != 0) { device_printf(sc->sc_dev, "could not load firmware DMA map\n"); goto error; } sc->fw_flags |= IWI_FW_HAVE_PHY; sc->fw_dma_size = size; return 0; error: iwi_release_fw_dma(sc); return 1; } static void iwi_init_locked(struct iwi_softc *sc) { struct iwi_rx_data *data; int i; IWI_LOCK_ASSERT(sc); if (sc->fw_state == IWI_FW_LOADING) { device_printf(sc->sc_dev, "%s: already loading\n", __func__); return; /* XXX: condvar? */ } iwi_stop_locked(sc); IWI_STATE_BEGIN(sc, IWI_FW_LOADING); if (iwi_reset(sc) != 0) { device_printf(sc->sc_dev, "could not reset adapter\n"); goto fail; } if (iwi_load_firmware(sc, &sc->fw_boot) != 0) { device_printf(sc->sc_dev, "could not load boot firmware %s\n", sc->fw_boot.name); goto fail; } if (iwi_load_ucode(sc, &sc->fw_uc) != 0) { device_printf(sc->sc_dev, "could not load microcode %s\n", sc->fw_uc.name); goto fail; } iwi_stop_master(sc); CSR_WRITE_4(sc, IWI_CSR_CMD_BASE, sc->cmdq.physaddr); CSR_WRITE_4(sc, IWI_CSR_CMD_SIZE, sc->cmdq.count); CSR_WRITE_4(sc, IWI_CSR_CMD_WIDX, sc->cmdq.cur); CSR_WRITE_4(sc, IWI_CSR_TX1_BASE, sc->txq[0].physaddr); CSR_WRITE_4(sc, IWI_CSR_TX1_SIZE, sc->txq[0].count); CSR_WRITE_4(sc, IWI_CSR_TX1_WIDX, sc->txq[0].cur); CSR_WRITE_4(sc, IWI_CSR_TX2_BASE, sc->txq[1].physaddr); CSR_WRITE_4(sc, IWI_CSR_TX2_SIZE, sc->txq[1].count); CSR_WRITE_4(sc, IWI_CSR_TX2_WIDX, sc->txq[1].cur); CSR_WRITE_4(sc, IWI_CSR_TX3_BASE, sc->txq[2].physaddr); CSR_WRITE_4(sc, IWI_CSR_TX3_SIZE, sc->txq[2].count); CSR_WRITE_4(sc, IWI_CSR_TX3_WIDX, sc->txq[2].cur); CSR_WRITE_4(sc, IWI_CSR_TX4_BASE, sc->txq[3].physaddr); CSR_WRITE_4(sc, IWI_CSR_TX4_SIZE, sc->txq[3].count); CSR_WRITE_4(sc, IWI_CSR_TX4_WIDX, sc->txq[3].cur); for (i = 0; i < sc->rxq.count; i++) { data = &sc->rxq.data[i]; CSR_WRITE_4(sc, data->reg, data->physaddr); } CSR_WRITE_4(sc, IWI_CSR_RX_WIDX, sc->rxq.count - 1); if (iwi_load_firmware(sc, &sc->fw_fw) != 0) { device_printf(sc->sc_dev, "could not load main firmware %s\n", sc->fw_fw.name); goto fail; } sc->flags |= IWI_FLAG_FW_INITED; IWI_STATE_END(sc, IWI_FW_LOADING); if (iwi_config(sc) != 0) { device_printf(sc->sc_dev, "unable to enable adapter\n"); goto fail2; } callout_reset(&sc->sc_wdtimer, hz, iwi_watchdog, sc); sc->sc_running = 1; return; fail: IWI_STATE_END(sc, IWI_FW_LOADING); fail2: iwi_stop_locked(sc); } static void iwi_init(void *priv) { struct iwi_softc *sc = priv; struct ieee80211com *ic = &sc->sc_ic; IWI_LOCK_DECL; IWI_LOCK(sc); iwi_init_locked(sc); IWI_UNLOCK(sc); if (sc->sc_running) ieee80211_start_all(ic); } static void iwi_stop_locked(void *priv) { struct iwi_softc *sc = priv; IWI_LOCK_ASSERT(sc); sc->sc_running = 0; if (sc->sc_softled) { callout_stop(&sc->sc_ledtimer); sc->sc_blinking = 0; } callout_stop(&sc->sc_wdtimer); callout_stop(&sc->sc_rftimer); iwi_stop_master(sc); CSR_WRITE_4(sc, IWI_CSR_RST, IWI_RST_SOFT_RESET); /* reset rings */ iwi_reset_cmd_ring(sc, &sc->cmdq); iwi_reset_tx_ring(sc, &sc->txq[0]); iwi_reset_tx_ring(sc, &sc->txq[1]); iwi_reset_tx_ring(sc, &sc->txq[2]); iwi_reset_tx_ring(sc, &sc->txq[3]); iwi_reset_rx_ring(sc, &sc->rxq); sc->sc_tx_timer = 0; sc->sc_state_timer = 0; sc->sc_busy_timer = 0; sc->flags &= ~(IWI_FLAG_BUSY | IWI_FLAG_ASSOCIATED); sc->fw_state = IWI_FW_IDLE; wakeup(sc); } static void iwi_stop(struct iwi_softc *sc) { IWI_LOCK_DECL; IWI_LOCK(sc); iwi_stop_locked(sc); IWI_UNLOCK(sc); } static void iwi_restart(void *arg, int npending) { struct iwi_softc *sc = arg; iwi_init(sc); } /* * Return whether or not the radio is enabled in hardware * (i.e. the rfkill switch is "off"). */ static int iwi_getrfkill(struct iwi_softc *sc) { return (CSR_READ_4(sc, IWI_CSR_IO) & IWI_IO_RADIO_ENABLED) == 0; } static void iwi_radio_on(void *arg, int pending) { struct iwi_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; device_printf(sc->sc_dev, "radio turned on\n"); iwi_init(sc); ieee80211_notify_radio(ic, 1); } static void iwi_rfkill_poll(void *arg) { struct iwi_softc *sc = arg; IWI_LOCK_ASSERT(sc); /* * Check for a change in rfkill state. We get an * interrupt when a radio is disabled but not when * it is enabled so we must poll for the latter. */ if (!iwi_getrfkill(sc)) { ieee80211_runtask(&sc->sc_ic, &sc->sc_radiontask); return; } callout_reset(&sc->sc_rftimer, 2*hz, iwi_rfkill_poll, sc); } static void iwi_radio_off(void *arg, int pending) { struct iwi_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; IWI_LOCK_DECL; device_printf(sc->sc_dev, "radio turned off\n"); ieee80211_notify_radio(ic, 0); IWI_LOCK(sc); iwi_stop_locked(sc); iwi_rfkill_poll(sc); IWI_UNLOCK(sc); } static int iwi_sysctl_stats(SYSCTL_HANDLER_ARGS) { struct iwi_softc *sc = arg1; uint32_t size, buf[128]; memset(buf, 0, sizeof buf); if (!(sc->flags & IWI_FLAG_FW_INITED)) return SYSCTL_OUT(req, buf, sizeof buf); size = min(CSR_READ_4(sc, IWI_CSR_TABLE0_SIZE), 128 - 1); CSR_READ_REGION_4(sc, IWI_CSR_TABLE0_BASE, &buf[1], size); return SYSCTL_OUT(req, buf, size); } static int iwi_sysctl_radio(SYSCTL_HANDLER_ARGS) { struct iwi_softc *sc = arg1; int val = !iwi_getrfkill(sc); return SYSCTL_OUT(req, &val, sizeof val); } /* * Add sysctl knobs. */ static void iwi_sysctlattach(struct iwi_softc *sc) { struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->sc_dev); struct sysctl_oid *tree = device_get_sysctl_tree(sc->sc_dev); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "radio", CTLTYPE_INT | CTLFLAG_RD, sc, 0, iwi_sysctl_radio, "I", "radio transmitter switch state (0=off, 1=on)"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "stats", CTLTYPE_OPAQUE | CTLFLAG_RD, sc, 0, iwi_sysctl_stats, "S", "statistics"); sc->bluetooth = 0; SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "bluetooth", CTLFLAG_RW, &sc->bluetooth, 0, "bluetooth coexistence"); sc->antenna = IWI_ANTENNA_AUTO; SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "antenna", CTLFLAG_RW, &sc->antenna, 0, "antenna (0=auto)"); } /* * LED support. * * Different cards have different capabilities. Some have three * led's while others have only one. The linux ipw driver defines * led's for link state (associated or not), band (11a, 11g, 11b), * and for link activity. We use one led and vary the blink rate * according to the tx/rx traffic a la the ath driver. */ static __inline uint32_t iwi_toggle_event(uint32_t r) { return r &~ (IWI_RST_STANDBY | IWI_RST_GATE_ODMA | IWI_RST_GATE_IDMA | IWI_RST_GATE_ADMA); } static uint32_t iwi_read_event(struct iwi_softc *sc) { return MEM_READ_4(sc, IWI_MEM_EEPROM_EVENT); } static void iwi_write_event(struct iwi_softc *sc, uint32_t v) { MEM_WRITE_4(sc, IWI_MEM_EEPROM_EVENT, v); } static void iwi_led_done(void *arg) { struct iwi_softc *sc = arg; sc->sc_blinking = 0; } /* * Turn the activity LED off: flip the pin and then set a timer so no * update will happen for the specified duration. */ static void iwi_led_off(void *arg) { struct iwi_softc *sc = arg; uint32_t v; v = iwi_read_event(sc); v &= ~sc->sc_ledpin; iwi_write_event(sc, iwi_toggle_event(v)); callout_reset(&sc->sc_ledtimer, sc->sc_ledoff, iwi_led_done, sc); } /* * Blink the LED according to the specified on/off times. */ static void iwi_led_blink(struct iwi_softc *sc, int on, int off) { uint32_t v; v = iwi_read_event(sc); v |= sc->sc_ledpin; iwi_write_event(sc, iwi_toggle_event(v)); sc->sc_blinking = 1; sc->sc_ledoff = off; callout_reset(&sc->sc_ledtimer, on, iwi_led_off, sc); } static void iwi_led_event(struct iwi_softc *sc, int event) { /* NB: on/off times from the Atheros NDIS driver, w/ permission */ static const struct { u_int rate; /* tx/rx iwi rate */ u_int16_t timeOn; /* LED on time (ms) */ u_int16_t timeOff; /* LED off time (ms) */ } blinkrates[] = { { IWI_RATE_OFDM54, 40, 10 }, { IWI_RATE_OFDM48, 44, 11 }, { IWI_RATE_OFDM36, 50, 13 }, { IWI_RATE_OFDM24, 57, 14 }, { IWI_RATE_OFDM18, 67, 16 }, { IWI_RATE_OFDM12, 80, 20 }, { IWI_RATE_DS11, 100, 25 }, { IWI_RATE_OFDM9, 133, 34 }, { IWI_RATE_OFDM6, 160, 40 }, { IWI_RATE_DS5, 200, 50 }, { 6, 240, 58 }, /* XXX 3Mb/s if it existed */ { IWI_RATE_DS2, 267, 66 }, { IWI_RATE_DS1, 400, 100 }, { 0, 500, 130 }, /* unknown rate/polling */ }; uint32_t txrate; int j = 0; /* XXX silence compiler */ sc->sc_ledevent = ticks; /* time of last event */ if (sc->sc_blinking) /* don't interrupt active blink */ return; switch (event) { case IWI_LED_POLL: j = nitems(blinkrates)-1; break; case IWI_LED_TX: /* read current transmission rate from adapter */ txrate = CSR_READ_4(sc, IWI_CSR_CURRENT_TX_RATE); if (blinkrates[sc->sc_txrix].rate != txrate) { for (j = 0; j < nitems(blinkrates)-1; j++) if (blinkrates[j].rate == txrate) break; sc->sc_txrix = j; } else j = sc->sc_txrix; break; case IWI_LED_RX: if (blinkrates[sc->sc_rxrix].rate != sc->sc_rxrate) { for (j = 0; j < nitems(blinkrates)-1; j++) if (blinkrates[j].rate == sc->sc_rxrate) break; sc->sc_rxrix = j; } else j = sc->sc_rxrix; break; } /* XXX beware of overflow */ iwi_led_blink(sc, (blinkrates[j].timeOn * hz) / 1000, (blinkrates[j].timeOff * hz) / 1000); } static int iwi_sysctl_softled(SYSCTL_HANDLER_ARGS) { struct iwi_softc *sc = arg1; int softled = sc->sc_softled; int error; error = sysctl_handle_int(oidp, &softled, 0, req); if (error || !req->newptr) return error; softled = (softled != 0); if (softled != sc->sc_softled) { if (softled) { uint32_t v = iwi_read_event(sc); v &= ~sc->sc_ledpin; iwi_write_event(sc, iwi_toggle_event(v)); } sc->sc_softled = softled; } return 0; } static void iwi_ledattach(struct iwi_softc *sc) { struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->sc_dev); struct sysctl_oid *tree = device_get_sysctl_tree(sc->sc_dev); sc->sc_blinking = 0; sc->sc_ledstate = 1; sc->sc_ledidle = (2700*hz)/1000; /* 2.7sec */ callout_init_mtx(&sc->sc_ledtimer, &sc->sc_mtx, 0); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "softled", CTLTYPE_INT | CTLFLAG_RW, sc, 0, iwi_sysctl_softled, "I", "enable/disable software LED support"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "ledpin", CTLFLAG_RW, &sc->sc_ledpin, 0, "pin setting to turn activity LED on"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "ledidle", CTLFLAG_RW, &sc->sc_ledidle, 0, "idle time for inactivity LED (ticks)"); /* XXX for debugging */ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "nictype", CTLFLAG_RD, &sc->sc_nictype, 0, "NIC type from EEPROM"); sc->sc_ledpin = IWI_RST_LED_ACTIVITY; sc->sc_softled = 1; sc->sc_nictype = (iwi_read_prom_word(sc, IWI_EEPROM_NIC) >> 8) & 0xff; if (sc->sc_nictype == 1) { /* * NB: led's are reversed. */ sc->sc_ledpin = IWI_RST_LED_ASSOCIATED; } } static void iwi_scan_start(struct ieee80211com *ic) { /* ignore */ } static void iwi_set_channel(struct ieee80211com *ic) { struct iwi_softc *sc = ic->ic_softc; if (sc->fw_state == IWI_FW_IDLE) iwi_setcurchan(sc, ic->ic_curchan->ic_ieee); } static void iwi_scan_curchan(struct ieee80211_scan_state *ss, unsigned long maxdwell) { struct ieee80211vap *vap = ss->ss_vap; struct iwi_softc *sc = vap->iv_ic->ic_softc; IWI_LOCK_DECL; IWI_LOCK(sc); if (iwi_scanchan(sc, maxdwell, 0)) ieee80211_cancel_scan(vap); IWI_UNLOCK(sc); } static void iwi_scan_mindwell(struct ieee80211_scan_state *ss) { /* NB: don't try to abort scan; wait for firmware to finish */ } static void iwi_scan_end(struct ieee80211com *ic) { struct iwi_softc *sc = ic->ic_softc; IWI_LOCK_DECL; IWI_LOCK(sc); sc->flags &= ~IWI_FLAG_CHANNEL_SCAN; /* NB: make sure we're still scanning */ if (sc->fw_state == IWI_FW_SCANNING) iwi_cmd(sc, IWI_CMD_ABORT_SCAN, NULL, 0); IWI_UNLOCK(sc); } static void iwi_collect_bands(struct ieee80211com *ic, uint8_t bands[], size_t bands_sz) { struct iwi_softc *sc = ic->ic_softc; device_t dev = sc->sc_dev; memset(bands, 0, bands_sz); setbit(bands, IEEE80211_MODE_11B); setbit(bands, IEEE80211_MODE_11G); if (pci_get_device(dev) >= 0x4223) setbit(bands, IEEE80211_MODE_11A); } static void iwi_getradiocaps(struct ieee80211com *ic, int maxchans, int *nchans, struct ieee80211_channel chans[]) { uint8_t bands[IEEE80211_MODE_BYTES]; iwi_collect_bands(ic, bands, sizeof(bands)); *nchans = 0; if (isset(bands, IEEE80211_MODE_11B) || isset(bands, IEEE80211_MODE_11G)) ieee80211_add_channel_list_2ghz(chans, maxchans, nchans, def_chan_2ghz, nitems(def_chan_2ghz), bands, 0); if (isset(bands, IEEE80211_MODE_11A)) { ieee80211_add_channel_list_5ghz(chans, maxchans, nchans, def_chan_5ghz_band1, nitems(def_chan_5ghz_band1), bands, 0); ieee80211_add_channel_list_5ghz(chans, maxchans, nchans, def_chan_5ghz_band2, nitems(def_chan_5ghz_band2), bands, 0); ieee80211_add_channel_list_5ghz(chans, maxchans, nchans, def_chan_5ghz_band3, nitems(def_chan_5ghz_band3), bands, 0); } } Index: head/sys/dev/ixl/ixl_pf_main.c =================================================================== --- head/sys/dev/ixl/ixl_pf_main.c (revision 331796) +++ head/sys/dev/ixl/ixl_pf_main.c (revision 331797) @@ -1,6090 +1,6090 @@ /****************************************************************************** Copyright (c) 2013-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "ixl_pf.h" #ifdef PCI_IOV #include "ixl_pf_iov.h" #endif #ifdef IXL_IW #include "ixl_iw.h" #include "ixl_iw_int.h" #endif #ifdef DEV_NETMAP #include #include #include #endif /* DEV_NETMAP */ static int ixl_setup_queue(struct ixl_queue *, struct ixl_pf *, int); static u64 ixl_max_aq_speed_to_value(u8); static u8 ixl_convert_sysctl_aq_link_speed(u8, bool); /* Sysctls */ static int ixl_set_flowcntl(SYSCTL_HANDLER_ARGS); static int ixl_set_advertise(SYSCTL_HANDLER_ARGS); static int ixl_current_speed(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_show_fw(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_unallocated_queues(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_pf_tx_itr(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_pf_rx_itr(SYSCTL_HANDLER_ARGS); /* Debug Sysctls */ static int ixl_sysctl_link_status(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_phy_abilities(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_sw_filter_list(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_hw_res_alloc(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_switch_config(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_hkey(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_hena(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_hlut(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fw_link_management(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_read_i2c_byte(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_write_i2c_byte(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_fc_ability(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_rs_ability(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_fc_request(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_rs_request(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_auto_enable(SYSCTL_HANDLER_ARGS); #ifdef IXL_DEBUG static int ixl_sysctl_qtx_tail_handler(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_qrx_tail_handler(SYSCTL_HANDLER_ARGS); #endif #ifdef IXL_IW extern int ixl_enable_iwarp; #endif const uint8_t ixl_bcast_addr[ETHER_ADDR_LEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; const char * const ixl_fc_string[6] = { "None", "Rx", "Tx", "Full", "Priority", "Default" }; MALLOC_DEFINE(M_IXL, "ixl", "ixl driver allocations"); void ixl_debug_core(struct ixl_pf *pf, enum ixl_dbg_mask mask, char *fmt, ...) { va_list args; if (!(mask & pf->dbg_mask)) return; /* Re-implement device_printf() */ device_print_prettyname(pf->dev); va_start(args, fmt); vprintf(fmt, args); va_end(args); } /* ** Put the FW, API, NVM, EEtrackID, and OEM version information into a string */ void ixl_nvm_version_str(struct i40e_hw *hw, struct sbuf *buf) { u8 oem_ver = (u8)(hw->nvm.oem_ver >> 24); u16 oem_build = (u16)((hw->nvm.oem_ver >> 16) & 0xFFFF); u8 oem_patch = (u8)(hw->nvm.oem_ver & 0xFF); sbuf_printf(buf, "fw %d.%d.%05d api %d.%d nvm %x.%02x etid %08x oem %d.%d.%d", hw->aq.fw_maj_ver, hw->aq.fw_min_ver, hw->aq.fw_build, hw->aq.api_maj_ver, hw->aq.api_min_ver, (hw->nvm.version & IXL_NVM_VERSION_HI_MASK) >> IXL_NVM_VERSION_HI_SHIFT, (hw->nvm.version & IXL_NVM_VERSION_LO_MASK) >> IXL_NVM_VERSION_LO_SHIFT, hw->nvm.eetrack, oem_ver, oem_build, oem_patch); } void ixl_print_nvm_version(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *sbuf; sbuf = sbuf_new_auto(); ixl_nvm_version_str(hw, sbuf); sbuf_finish(sbuf); device_printf(dev, "%s\n", sbuf_data(sbuf)); sbuf_delete(sbuf); } static void ixl_configure_tx_itr(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; vsi->tx_itr_setting = pf->tx_itr; for (int i = 0; i < vsi->num_queues; i++, que++) { struct tx_ring *txr = &que->txr; wr32(hw, I40E_PFINT_ITRN(IXL_TX_ITR, i), vsi->tx_itr_setting); txr->itr = vsi->tx_itr_setting; txr->latency = IXL_AVE_LATENCY; } } static void ixl_configure_rx_itr(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; vsi->rx_itr_setting = pf->rx_itr; for (int i = 0; i < vsi->num_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; wr32(hw, I40E_PFINT_ITRN(IXL_RX_ITR, i), vsi->rx_itr_setting); rxr->itr = vsi->rx_itr_setting; rxr->latency = IXL_AVE_LATENCY; } } /* * Write PF ITR values to queue ITR registers. */ void ixl_configure_itr(struct ixl_pf *pf) { ixl_configure_tx_itr(pf); ixl_configure_rx_itr(pf); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ void ixl_init_locked(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ifnet *ifp = vsi->ifp; device_t dev = pf->dev; struct i40e_filter_control_settings filter; u8 tmpaddr[ETHER_ADDR_LEN]; int ret; INIT_DEBUGOUT("ixl_init_locked: begin"); IXL_PF_LOCK_ASSERT(pf); ixl_stop_locked(pf); /* * If the aq is dead here, it probably means something outside of the driver * did something to the adapter, like a PF reset. * So rebuild the driver's state here if that occurs. */ if (!i40e_check_asq_alive(&pf->hw)) { device_printf(dev, "Admin Queue is down; resetting...\n"); ixl_teardown_hw_structs(pf); ixl_reset(pf); } /* Get the latest mac address... User might use a LAA */ bcopy(IF_LLADDR(vsi->ifp), tmpaddr, I40E_ETH_LENGTH_OF_ADDRESS); if (!cmp_etheraddr(hw->mac.addr, tmpaddr) && (i40e_validate_mac_addr(tmpaddr) == I40E_SUCCESS)) { ixl_del_filter(vsi, hw->mac.addr, IXL_VLAN_ANY); bcopy(tmpaddr, hw->mac.addr, I40E_ETH_LENGTH_OF_ADDRESS); ret = i40e_aq_mac_address_write(hw, I40E_AQC_WRITE_TYPE_LAA_ONLY, hw->mac.addr, NULL); if (ret) { device_printf(dev, "LLA address" "change failed!!\n"); return; } } ixl_add_filter(vsi, hw->mac.addr, IXL_VLAN_ANY); /* Set the various hardware offload abilities */ ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TSO) ifp->if_hwassist |= CSUM_TSO; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) ifp->if_hwassist |= (CSUM_TCP_IPV6 | CSUM_UDP_IPV6); /* Set up the device filtering */ bzero(&filter, sizeof(filter)); filter.enable_ethtype = TRUE; filter.enable_macvlan = TRUE; filter.enable_fdir = FALSE; filter.hash_lut_size = I40E_HASH_LUT_SIZE_512; if (i40e_set_filter_control(hw, &filter)) device_printf(dev, "i40e_set_filter_control() failed\n"); /* Prepare the VSI: rings, hmc contexts, etc... */ if (ixl_initialize_vsi(vsi)) { device_printf(dev, "initialize vsi failed!!\n"); return; } /* Set up RSS */ ixl_config_rss(pf); /* Add protocol filters to list */ ixl_init_filters(vsi); /* Setup vlan's if needed */ ixl_setup_vlan_filters(vsi); /* Set up MSI/X routing and the ITR settings */ if (pf->msix > 1) { ixl_configure_queue_intr_msix(pf); ixl_configure_itr(pf); } else ixl_configure_legacy(pf); ixl_enable_rings(vsi); i40e_aq_set_default_vsi(hw, vsi->seid, NULL); ixl_reconfigure_filters(vsi); /* And now turn on interrupts */ ixl_enable_intr(vsi); /* Get link info */ hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &pf->link_up); ixl_update_link_status(pf); /* Start the local timer */ callout_reset(&pf->timer, hz, ixl_local_timer, pf); /* Now inform the stack we're ready */ ifp->if_drv_flags |= IFF_DRV_RUNNING; #ifdef IXL_IW if (ixl_enable_iwarp && pf->iw_enabled) { ret = ixl_iw_pf_init(pf); if (ret) device_printf(dev, "initialize iwarp failed, code %d\n", ret); } #endif } /********************************************************************* * * Get the hardware capabilities * **********************************************************************/ int ixl_get_hw_capabilities(struct ixl_pf *pf) { struct i40e_aqc_list_capabilities_element_resp *buf; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int error, len; u16 needed; bool again = TRUE; len = 40 * sizeof(struct i40e_aqc_list_capabilities_element_resp); retry: if (!(buf = (struct i40e_aqc_list_capabilities_element_resp *) malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate cap memory\n"); return (ENOMEM); } /* This populates the hw struct */ error = i40e_aq_discover_capabilities(hw, buf, len, &needed, i40e_aqc_opc_list_func_capabilities, NULL); free(buf, M_DEVBUF); if ((pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOMEM) && (again == TRUE)) { /* retry once with a larger buffer */ again = FALSE; len = needed; goto retry; } else if (pf->hw.aq.asq_last_status != I40E_AQ_RC_OK) { device_printf(dev, "capability discovery failed: %d\n", pf->hw.aq.asq_last_status); return (ENODEV); } /* Capture this PF's starting queue pair */ pf->qbase = hw->func_caps.base_queue; #ifdef IXL_DEBUG device_printf(dev, "pf_id=%d, num_vfs=%d, msix_pf=%d, " "msix_vf=%d, fd_g=%d, fd_b=%d, tx_qp=%d rx_qp=%d qbase=%d\n", hw->pf_id, hw->func_caps.num_vfs, hw->func_caps.num_msix_vectors, hw->func_caps.num_msix_vectors_vf, hw->func_caps.fd_filters_guaranteed, hw->func_caps.fd_filters_best_effort, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, hw->func_caps.base_queue); #endif /* Print a subset of the capability information. */ device_printf(dev, "PF-ID[%d]: VFs %d, MSIX %d, VF MSIX %d, QPs %d, %s\n", hw->pf_id, hw->func_caps.num_vfs, hw->func_caps.num_msix_vectors, hw->func_caps.num_msix_vectors_vf, hw->func_caps.num_tx_qp, (hw->func_caps.mdio_port_mode == 2) ? "I2C" : (hw->func_caps.mdio_port_mode == 1) ? "MDIO dedicated" : "MDIO shared"); struct i40e_osdep *osdep = (struct i40e_osdep *)hw->back; osdep->i2c_intfc_num = ixl_find_i2c_interface(pf); if (osdep->i2c_intfc_num != -1) pf->has_i2c = true; return (error); } void ixl_cap_txcsum_tso(struct ixl_vsi *vsi, struct ifnet *ifp, int mask) { device_t dev = vsi->dev; /* Enable/disable TXCSUM/TSO4 */ if (!(ifp->if_capenable & IFCAP_TXCSUM) && !(ifp->if_capenable & IFCAP_TSO4)) { if (mask & IFCAP_TXCSUM) { ifp->if_capenable |= IFCAP_TXCSUM; /* enable TXCSUM, restore TSO if previously enabled */ if (vsi->flags & IXL_FLAGS_KEEP_TSO4) { vsi->flags &= ~IXL_FLAGS_KEEP_TSO4; ifp->if_capenable |= IFCAP_TSO4; } } else if (mask & IFCAP_TSO4) { ifp->if_capenable |= (IFCAP_TXCSUM | IFCAP_TSO4); vsi->flags &= ~IXL_FLAGS_KEEP_TSO4; device_printf(dev, "TSO4 requires txcsum, enabling both...\n"); } } else if((ifp->if_capenable & IFCAP_TXCSUM) && !(ifp->if_capenable & IFCAP_TSO4)) { if (mask & IFCAP_TXCSUM) ifp->if_capenable &= ~IFCAP_TXCSUM; else if (mask & IFCAP_TSO4) ifp->if_capenable |= IFCAP_TSO4; } else if((ifp->if_capenable & IFCAP_TXCSUM) && (ifp->if_capenable & IFCAP_TSO4)) { if (mask & IFCAP_TXCSUM) { vsi->flags |= IXL_FLAGS_KEEP_TSO4; ifp->if_capenable &= ~(IFCAP_TXCSUM | IFCAP_TSO4); device_printf(dev, "TSO4 requires txcsum, disabling both...\n"); } else if (mask & IFCAP_TSO4) ifp->if_capenable &= ~IFCAP_TSO4; } /* Enable/disable TXCSUM_IPV6/TSO6 */ if (!(ifp->if_capenable & IFCAP_TXCSUM_IPV6) && !(ifp->if_capenable & IFCAP_TSO6)) { if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable |= IFCAP_TXCSUM_IPV6; if (vsi->flags & IXL_FLAGS_KEEP_TSO6) { vsi->flags &= ~IXL_FLAGS_KEEP_TSO6; ifp->if_capenable |= IFCAP_TSO6; } } else if (mask & IFCAP_TSO6) { ifp->if_capenable |= (IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); vsi->flags &= ~IXL_FLAGS_KEEP_TSO6; device_printf(dev, "TSO6 requires txcsum6, enabling both...\n"); } } else if((ifp->if_capenable & IFCAP_TXCSUM_IPV6) && !(ifp->if_capenable & IFCAP_TSO6)) { if (mask & IFCAP_TXCSUM_IPV6) ifp->if_capenable &= ~IFCAP_TXCSUM_IPV6; else if (mask & IFCAP_TSO6) ifp->if_capenable |= IFCAP_TSO6; } else if ((ifp->if_capenable & IFCAP_TXCSUM_IPV6) && (ifp->if_capenable & IFCAP_TSO6)) { if (mask & IFCAP_TXCSUM_IPV6) { vsi->flags |= IXL_FLAGS_KEEP_TSO6; ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); device_printf(dev, "TSO6 requires txcsum6, disabling both...\n"); } else if (mask & IFCAP_TSO6) ifp->if_capenable &= ~IFCAP_TSO6; } } /* For the set_advertise sysctl */ void ixl_get_initial_advertised_speeds(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; enum i40e_status_code status; struct i40e_aq_get_phy_abilities_resp abilities; /* Set initial sysctl values */ status = i40e_aq_get_phy_capabilities(hw, FALSE, false, &abilities, NULL); if (status) { /* Non-fatal error */ device_printf(dev, "%s: i40e_aq_get_phy_capabilities() error %d\n", __func__, status); return; } pf->advertised_speed = ixl_convert_sysctl_aq_link_speed(abilities.link_speed, false); } int ixl_teardown_hw_structs(struct ixl_pf *pf) { enum i40e_status_code status = 0; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; /* Shutdown LAN HMC */ if (hw->hmc.hmc_obj) { status = i40e_shutdown_lan_hmc(hw); if (status) { device_printf(dev, "init: LAN HMC shutdown failure; status %d\n", status); goto err_out; } } // XXX: This gets called when we know the adminq is inactive; // so we already know it's setup when we get here. /* Shutdown admin queue */ status = i40e_shutdown_adminq(hw); if (status) device_printf(dev, "init: Admin Queue shutdown failure; status %d\n", status); err_out: return (status); } int ixl_reset(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; u8 set_fc_err_mask; int error = 0; // XXX: clear_hw() actually writes to hw registers -- maybe this isn't necessary i40e_clear_hw(hw); error = i40e_pf_reset(hw); if (error) { device_printf(dev, "init: PF reset failure"); error = EIO; goto err_out; } error = i40e_init_adminq(hw); if (error) { device_printf(dev, "init: Admin queue init failure;" " status code %d", error); error = EIO; goto err_out; } i40e_clear_pxe_mode(hw); error = ixl_get_hw_capabilities(pf); if (error) { device_printf(dev, "init: Error retrieving HW capabilities;" " status code %d\n", error); goto err_out; } error = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, 0, 0); if (error) { device_printf(dev, "init: LAN HMC init failed; status code %d\n", error); error = EIO; goto err_out; } error = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY); if (error) { device_printf(dev, "init: LAN HMC config failed; status code %d\n", error); error = EIO; goto err_out; } // XXX: possible fix for panic, but our failure recovery is still broken error = ixl_switch_config(pf); if (error) { device_printf(dev, "init: ixl_switch_config() failed: %d\n", error); goto err_out; } error = i40e_aq_set_phy_int_mask(hw, IXL_DEFAULT_PHY_INT_MASK, NULL); if (error) { device_printf(dev, "init: i40e_aq_set_phy_mask() failed: err %d," " aq_err %d\n", error, hw->aq.asq_last_status); error = EIO; goto err_out; } error = i40e_set_fc(hw, &set_fc_err_mask, true); if (error) { device_printf(dev, "init: setting link flow control failed; retcode %d," " fc_err_mask 0x%02x\n", error, set_fc_err_mask); goto err_out; } // XXX: (Rebuild VSIs?) /* Firmware delay workaround */ if (((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver < 33)) || (hw->aq.fw_maj_ver < 4)) { i40e_msec_delay(75); error = i40e_aq_set_link_restart_an(hw, TRUE, NULL); if (error) { device_printf(dev, "init: link restart failed, aq_err %d\n", hw->aq.asq_last_status); goto err_out; } } err_out: return (error); } /* ** MSIX Interrupt Handlers and Tasklets */ void ixl_handle_que(void *context, int pending) { struct ixl_queue *que = context; struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; struct ifnet *ifp = vsi->ifp; bool more; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { more = ixl_rxeof(que, IXL_RX_LIMIT); IXL_TX_LOCK(txr); ixl_txeof(que); if (!drbr_empty(ifp, txr->br)) ixl_mq_start_locked(ifp, txr); IXL_TX_UNLOCK(txr); if (more) { taskqueue_enqueue(que->tq, &que->task); return; } } /* Reenable this interrupt - hmmm */ ixl_enable_queue(hw, que->me); return; } /********************************************************************* * * Legacy Interrupt Service routine * **********************************************************************/ void ixl_intr(void *arg) { struct ixl_pf *pf = arg; struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; struct ifnet *ifp = vsi->ifp; struct tx_ring *txr = &que->txr; u32 icr0; bool more_tx, more_rx; pf->admin_irq++; /* Protect against spurious interrupts */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; icr0 = rd32(hw, I40E_PFINT_ICR0); #ifdef PCI_IOV if (icr0 & I40E_PFINT_ICR0_VFLR_MASK) taskqueue_enqueue(pf->tq, &pf->vflr_task); #endif if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) { taskqueue_enqueue(pf->tq, &pf->adminq); } if (icr0 & I40E_PFINT_ICR0_QUEUE_0_MASK) { ++que->irqs; more_rx = ixl_rxeof(que, IXL_RX_LIMIT); IXL_TX_LOCK(txr); more_tx = ixl_txeof(que); if (!drbr_empty(vsi->ifp, txr->br)) more_tx = 1; IXL_TX_UNLOCK(txr); } ixl_enable_intr0(hw); } /********************************************************************* * * MSIX VSI Interrupt Service routine * **********************************************************************/ void ixl_msix_que(void *arg) { struct ixl_queue *que = arg; struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; bool more_tx, more_rx; /* Protect against spurious interrupts */ if (!(vsi->ifp->if_drv_flags & IFF_DRV_RUNNING)) return; ++que->irqs; more_rx = ixl_rxeof(que, IXL_RX_LIMIT); IXL_TX_LOCK(txr); more_tx = ixl_txeof(que); /* ** Make certain that if the stack ** has anything queued the task gets ** scheduled to handle it. */ if (!drbr_empty(vsi->ifp, txr->br)) more_tx = 1; IXL_TX_UNLOCK(txr); ixl_set_queue_rx_itr(que); ixl_set_queue_tx_itr(que); if (more_tx || more_rx) taskqueue_enqueue(que->tq, &que->task); else ixl_enable_queue(hw, que->me); return; } /********************************************************************* * * MSIX Admin Queue Interrupt Service routine * **********************************************************************/ void ixl_msix_adminq(void *arg) { struct ixl_pf *pf = arg; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; u32 reg, mask, rstat_reg; bool do_task = FALSE; ++pf->admin_irq; reg = rd32(hw, I40E_PFINT_ICR0); mask = rd32(hw, I40E_PFINT_ICR0_ENA); /* Check on the cause */ if (reg & I40E_PFINT_ICR0_ADMINQ_MASK) { mask &= ~I40E_PFINT_ICR0_ADMINQ_MASK; do_task = TRUE; } if (reg & I40E_PFINT_ICR0_MAL_DETECT_MASK) { ixl_handle_mdd_event(pf); mask &= ~I40E_PFINT_ICR0_MAL_DETECT_MASK; } if (reg & I40E_PFINT_ICR0_GRST_MASK) { device_printf(dev, "Reset Requested!\n"); rstat_reg = rd32(hw, I40E_GLGEN_RSTAT); rstat_reg = (rstat_reg & I40E_GLGEN_RSTAT_RESET_TYPE_MASK) >> I40E_GLGEN_RSTAT_RESET_TYPE_SHIFT; device_printf(dev, "Reset type: "); switch (rstat_reg) { /* These others might be handled similarly to an EMPR reset */ case I40E_RESET_CORER: printf("CORER\n"); break; case I40E_RESET_GLOBR: printf("GLOBR\n"); break; case I40E_RESET_EMPR: printf("EMPR\n"); atomic_set_int(&pf->state, IXL_PF_STATE_EMPR_RESETTING); break; default: printf("POR\n"); break; } /* overload admin queue task to check reset progress */ do_task = TRUE; } if (reg & I40E_PFINT_ICR0_ECC_ERR_MASK) { device_printf(dev, "ECC Error detected!\n"); } if (reg & I40E_PFINT_ICR0_HMC_ERR_MASK) { reg = rd32(hw, I40E_PFHMC_ERRORINFO); if (reg & I40E_PFHMC_ERRORINFO_ERROR_DETECTED_MASK) { device_printf(dev, "HMC Error detected!\n"); device_printf(dev, "INFO 0x%08x\n", reg); reg = rd32(hw, I40E_PFHMC_ERRORDATA); device_printf(dev, "DATA 0x%08x\n", reg); wr32(hw, I40E_PFHMC_ERRORINFO, 0); } } if (reg & I40E_PFINT_ICR0_PCI_EXCEPTION_MASK) { device_printf(dev, "PCI Exception detected!\n"); } #ifdef PCI_IOV if (reg & I40E_PFINT_ICR0_VFLR_MASK) { mask &= ~I40E_PFINT_ICR0_ENA_VFLR_MASK; taskqueue_enqueue(pf->tq, &pf->vflr_task); } #endif if (do_task) taskqueue_enqueue(pf->tq, &pf->adminq); else ixl_enable_intr0(hw); } void ixl_set_promisc(struct ixl_vsi *vsi) { struct ifnet *ifp = vsi->ifp; struct i40e_hw *hw = vsi->hw; int err, mcnt = 0; bool uni = FALSE, multi = FALSE; if (ifp->if_flags & IFF_ALLMULTI) multi = TRUE; else { /* Need to count the multicast addresses */ struct ifmultiaddr *ifma; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_MULTICAST_ADDR) break; mcnt++; } if_maddr_runlock(ifp); } if (mcnt >= MAX_MULTICAST_ADDR) multi = TRUE; if (ifp->if_flags & IFF_PROMISC) uni = TRUE; err = i40e_aq_set_vsi_unicast_promiscuous(hw, vsi->seid, uni, NULL, TRUE); err = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, multi, NULL); return; } /********************************************************************* * Filter Routines * * Routines for multicast and vlan filter management. * *********************************************************************/ void ixl_add_multi(struct ixl_vsi *vsi) { struct ifmultiaddr *ifma; struct ifnet *ifp = vsi->ifp; struct i40e_hw *hw = vsi->hw; int mcnt = 0, flags; IOCTL_DEBUGOUT("ixl_add_multi: begin"); if_maddr_rlock(ifp); /* ** First just get a count, to decide if we ** we simply use multicast promiscuous. */ TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mcnt++; } if_maddr_runlock(ifp); if (__predict_false(mcnt >= MAX_MULTICAST_ADDR)) { /* delete existing MC filters */ ixl_del_hw_filters(vsi, mcnt); i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, TRUE, NULL); return; } mcnt = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; ixl_add_mc_filter(vsi, (u8*)LLADDR((struct sockaddr_dl *) ifma->ifma_addr)); mcnt++; } if_maddr_runlock(ifp); if (mcnt > 0) { flags = (IXL_FILTER_ADD | IXL_FILTER_USED | IXL_FILTER_MC); ixl_add_hw_filters(vsi, flags, mcnt); } IOCTL_DEBUGOUT("ixl_add_multi: end"); return; } void ixl_del_multi(struct ixl_vsi *vsi) { struct ifnet *ifp = vsi->ifp; struct ifmultiaddr *ifma; struct ixl_mac_filter *f; int mcnt = 0; bool match = FALSE; IOCTL_DEBUGOUT("ixl_del_multi: begin"); /* Search for removed multicast addresses */ if_maddr_rlock(ifp); SLIST_FOREACH(f, &vsi->ftl, next) { if ((f->flags & IXL_FILTER_USED) && (f->flags & IXL_FILTER_MC)) { match = FALSE; TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; u8 *mc_addr = (u8 *)LLADDR((struct sockaddr_dl *)ifma->ifma_addr); if (cmp_etheraddr(f->macaddr, mc_addr)) { match = TRUE; break; } } if (match == FALSE) { f->flags |= IXL_FILTER_DEL; mcnt++; } } } if_maddr_runlock(ifp); if (mcnt > 0) ixl_del_hw_filters(vsi, mcnt); } /********************************************************************* * Timer routine * * This routine checks for link status,updates statistics, * and runs the watchdog check. * * Only runs when the driver is configured UP and RUNNING. * **********************************************************************/ void ixl_local_timer(void *arg) { struct ixl_pf *pf = arg; struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; device_t dev = pf->dev; struct tx_ring *txr; int hung = 0; u32 mask; s32 timer, new_timer; IXL_PF_LOCK_ASSERT(pf); /* Fire off the adminq task */ taskqueue_enqueue(pf->tq, &pf->adminq); /* Update stats */ ixl_update_stats_counters(pf); /* Check status of the queues */ mask = (I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK | I40E_PFINT_DYN_CTLN_ITR_INDX_MASK); for (int i = 0; i < vsi->num_queues; i++, que++) { txr = &que->txr; timer = atomic_load_acq_32(&txr->watchdog_timer); if (timer > 0) { new_timer = timer - hz; if (new_timer <= 0) { atomic_store_rel_32(&txr->watchdog_timer, -1); device_printf(dev, "WARNING: queue %d " "appears to be hung!\n", que->me); ++hung; } else { /* * If this fails, that means something in the TX path has updated * the watchdog, so it means the TX path is still working and * the watchdog doesn't need to countdown. */ atomic_cmpset_rel_32(&txr->watchdog_timer, timer, new_timer); /* Any queues with outstanding work get a sw irq */ wr32(hw, I40E_PFINT_DYN_CTLN(que->me), mask); } } } /* Reset when a queue shows hung */ if (hung) goto hung; callout_reset(&pf->timer, hz, ixl_local_timer, pf); return; hung: device_printf(dev, "WARNING: Resetting!\n"); pf->watchdog_events++; ixl_init_locked(pf); } void ixl_link_up_msg(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ifnet *ifp = pf->vsi.ifp; log(LOG_NOTICE, "%s: Link is up, %s Full Duplex, FEC: %s, Autoneg: %s, Flow Control: %s\n", ifp->if_xname, ixl_aq_speed_to_str(hw->phy.link_info.link_speed), (hw->phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_KR_ENA) ? "Clause 74 BASE-R FEC" : (hw->phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_RS_ENA) ? "Clause 108 RS-FEC" : "None", (hw->phy.link_info.an_info & I40E_AQ_AN_COMPLETED) ? "True" : "False", (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_TX && hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_RX) ? ixl_fc_string[3] : (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_TX) ? ixl_fc_string[2] : (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_RX) ? ixl_fc_string[1] : ixl_fc_string[0]); } /* ** Note: this routine updates the OS on the link state ** the real check of the hardware only happens with ** a link interrupt. */ void ixl_update_link_status(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; struct ifnet *ifp = vsi->ifp; device_t dev = pf->dev; if (pf->link_up) { if (vsi->link_active == FALSE) { vsi->link_active = TRUE; ifp->if_baudrate = ixl_max_aq_speed_to_value(pf->link_speed); if_link_state_change(ifp, LINK_STATE_UP); ixl_link_up_msg(pf); } } else { /* Link down */ if (vsi->link_active == TRUE) { if (bootverbose) device_printf(dev, "Link is Down\n"); if_link_state_change(ifp, LINK_STATE_DOWN); vsi->link_active = FALSE; } } return; } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * **********************************************************************/ void ixl_stop_locked(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; struct ifnet *ifp = vsi->ifp; INIT_DEBUGOUT("ixl_stop: begin\n"); IXL_PF_LOCK_ASSERT(pf); #ifdef IXL_IW /* Stop iWARP device */ if (ixl_enable_iwarp && pf->iw_enabled) ixl_iw_pf_stop(pf); #endif /* Stop the local timer */ callout_stop(&pf->timer); ixl_disable_rings_intr(vsi); ixl_disable_rings(vsi); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING); } void ixl_stop(struct ixl_pf *pf) { IXL_PF_LOCK(pf); ixl_stop_locked(pf); IXL_PF_UNLOCK(pf); } /********************************************************************* * * Setup MSIX Interrupt resources and handlers for the VSI * **********************************************************************/ int ixl_setup_legacy(struct ixl_pf *pf) { device_t dev = pf->dev; int error, rid = 0; if (pf->msix == 1) rid = 1; pf->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (pf->res == NULL) { device_printf(dev, "bus_alloc_resource_any() for" " legacy/msi interrupt\n"); return (ENXIO); } /* Set the handler function */ error = bus_setup_intr(dev, pf->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixl_intr, pf, &pf->tag); if (error) { pf->res = NULL; device_printf(dev, "bus_setup_intr() for legacy/msi" " interrupt handler failed, error %d\n", error); return (ENXIO); } error = bus_describe_intr(dev, pf->res, pf->tag, "irq"); if (error) { /* non-fatal */ device_printf(dev, "bus_describe_intr() for Admin Queue" " interrupt name failed, error %d\n", error); } return (0); } int ixl_setup_adminq_tq(struct ixl_pf *pf) { device_t dev = pf->dev; int error = 0; /* Tasklet for Admin Queue interrupts */ TASK_INIT(&pf->adminq, 0, ixl_do_adminq, pf); #ifdef PCI_IOV /* VFLR Tasklet */ TASK_INIT(&pf->vflr_task, 0, ixl_handle_vflr, pf); #endif /* Create and start Admin Queue taskqueue */ pf->tq = taskqueue_create_fast("ixl_aq", M_NOWAIT, taskqueue_thread_enqueue, &pf->tq); if (!pf->tq) { device_printf(dev, "taskqueue_create_fast (for AQ) returned NULL!\n"); return (ENOMEM); } error = taskqueue_start_threads(&pf->tq, 1, PI_NET, "%s aq", device_get_nameunit(dev)); if (error) { device_printf(dev, "taskqueue_start_threads (for AQ) error: %d\n", error); taskqueue_free(pf->tq); return (error); } return (0); } int ixl_setup_queue_tqs(struct ixl_vsi *vsi) { struct ixl_queue *que = vsi->queues; device_t dev = vsi->dev; #ifdef RSS int cpu_id = 0; cpuset_t cpu_mask; #endif /* Create queue tasks and start queue taskqueues */ for (int i = 0; i < vsi->num_queues; i++, que++) { TASK_INIT(&que->tx_task, 0, ixl_deferred_mq_start, que); TASK_INIT(&que->task, 0, ixl_handle_que, que); que->tq = taskqueue_create_fast("ixl_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); #ifdef RSS CPU_SETOF(cpu_id, &cpu_mask); taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, &cpu_mask, "%s (bucket %d)", device_get_nameunit(dev), cpu_id); #else taskqueue_start_threads(&que->tq, 1, PI_NET, "%s (que %d)", device_get_nameunit(dev), que->me); #endif } return (0); } void ixl_free_adminq_tq(struct ixl_pf *pf) { if (pf->tq) { taskqueue_free(pf->tq); pf->tq = NULL; } } void ixl_free_queue_tqs(struct ixl_vsi *vsi) { struct ixl_queue *que = vsi->queues; for (int i = 0; i < vsi->num_queues; i++, que++) { if (que->tq) { taskqueue_free(que->tq); que->tq = NULL; } } } int ixl_setup_adminq_msix(struct ixl_pf *pf) { device_t dev = pf->dev; int rid, error = 0; /* Admin IRQ rid is 1, vector is 0 */ rid = 1; /* Get interrupt resource from bus */ pf->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!pf->res) { device_printf(dev, "bus_alloc_resource_any() for Admin Queue" " interrupt failed [rid=%d]\n", rid); return (ENXIO); } /* Then associate interrupt with handler */ error = bus_setup_intr(dev, pf->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixl_msix_adminq, pf, &pf->tag); if (error) { pf->res = NULL; device_printf(dev, "bus_setup_intr() for Admin Queue" " interrupt handler failed, error %d\n", error); return (ENXIO); } error = bus_describe_intr(dev, pf->res, pf->tag, "aq"); if (error) { /* non-fatal */ device_printf(dev, "bus_describe_intr() for Admin Queue" " interrupt name failed, error %d\n", error); } pf->admvec = 0; return (0); } /* * Allocate interrupt resources from bus and associate an interrupt handler * to those for the VSI's queues. */ int ixl_setup_queue_msix(struct ixl_vsi *vsi) { device_t dev = vsi->dev; struct ixl_queue *que = vsi->queues; struct tx_ring *txr; int error, rid, vector = 1; /* Queue interrupt vector numbers start at 1 (adminq intr is 0) */ for (int i = 0; i < vsi->num_queues; i++, vector++, que++) { int cpu_id = i; rid = vector + 1; txr = &que->txr; que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!que->res) { device_printf(dev, "bus_alloc_resource_any() for" " Queue %d interrupt failed [rid=%d]\n", que->me, rid); return (ENXIO); } /* Set the handler function */ error = bus_setup_intr(dev, que->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixl_msix_que, que, &que->tag); if (error) { device_printf(dev, "bus_setup_intr() for Queue %d" " interrupt handler failed, error %d\n", que->me, error); bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); return (error); } error = bus_describe_intr(dev, que->res, que->tag, "q%d", i); if (error) { device_printf(dev, "bus_describe_intr() for Queue %d" " interrupt name failed, error %d\n", que->me, error); } /* Bind the vector to a CPU */ #ifdef RSS cpu_id = rss_getcpu(i % rss_getnumbuckets()); #endif error = bus_bind_intr(dev, que->res, cpu_id); if (error) { device_printf(dev, "bus_bind_intr() for Queue %d" " to CPU %d failed, error %d\n", que->me, cpu_id, error); } que->msix = vector; } return (0); } /* * When used in a virtualized environment PCI BUSMASTER capability may not be set * so explicity set it here and rewrite the ENABLE in the MSIX control register * at this point to cause the host to successfully initialize us. */ void ixl_set_busmaster(device_t dev) { u16 pci_cmd_word; pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); pci_cmd_word |= PCIM_CMD_BUSMASTEREN; pci_write_config(dev, PCIR_COMMAND, pci_cmd_word, 2); } /* * rewrite the ENABLE in the MSIX control register * to cause the host to successfully initialize us. */ void ixl_set_msix_enable(device_t dev) { int msix_ctrl, rid; pci_find_cap(dev, PCIY_MSIX, &rid); rid += PCIR_MSIX_CTRL; msix_ctrl = pci_read_config(dev, rid, 2); msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; pci_write_config(dev, rid, msix_ctrl, 2); } /* * Allocate MSI/X vectors from the OS. * Returns 0 for legacy, 1 for MSI, >1 for MSIX. */ int ixl_init_msix(struct ixl_pf *pf) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; int auto_max_queues; int rid, want, vectors, queues, available; #ifdef IXL_IW int iw_want, iw_vectors; pf->iw_msix = 0; #endif /* Override by tuneable */ if (!pf->enable_msix) goto no_msix; /* Ensure proper operation in virtualized environment */ ixl_set_busmaster(dev); /* First try MSI/X */ rid = PCIR_BAR(IXL_MSIX_BAR); pf->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!pf->msix_mem) { /* May not be enabled */ device_printf(pf->dev, "Unable to map MSIX table\n"); goto no_msix; } available = pci_msix_count(dev); if (available < 2) { /* system has msix disabled (0), or only one vector (1) */ bus_release_resource(dev, SYS_RES_MEMORY, rid, pf->msix_mem); pf->msix_mem = NULL; goto no_msix; } /* Clamp max number of queues based on: * - # of MSI-X vectors available * - # of cpus available * - # of queues that can be assigned to the LAN VSI */ auto_max_queues = min(mp_ncpus, available - 1); if (hw->mac.type == I40E_MAC_X722) auto_max_queues = min(auto_max_queues, 128); else auto_max_queues = min(auto_max_queues, 64); /* Override with tunable value if tunable is less than autoconfig count */ if ((pf->max_queues != 0) && (pf->max_queues <= auto_max_queues)) queues = pf->max_queues; /* Use autoconfig amount if that's lower */ else if ((pf->max_queues != 0) && (pf->max_queues > auto_max_queues)) { device_printf(dev, "ixl_max_queues (%d) is too large, using " "autoconfig amount (%d)...\n", pf->max_queues, auto_max_queues); queues = auto_max_queues; } /* Limit maximum auto-configured queues to 8 if no user value is set */ else queues = min(auto_max_queues, 8); #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ if (queues > rss_getnumbuckets()) queues = rss_getnumbuckets(); #endif /* ** Want one vector (RX/TX pair) per queue ** plus an additional for the admin queue. */ want = queues + 1; if (want <= available) /* Have enough */ vectors = want; else { device_printf(pf->dev, "MSIX Configuration Problem, " "%d vectors available but %d wanted!\n", available, want); pf->msix_mem = NULL; goto no_msix; /* Will go to Legacy setup */ } #ifdef IXL_IW if (ixl_enable_iwarp) { /* iWARP wants additional vector for CQP */ iw_want = mp_ncpus + 1; available -= vectors; if (available > 0) { iw_vectors = (available >= iw_want) ? iw_want : available; vectors += iw_vectors; } else iw_vectors = 0; } #endif ixl_set_msix_enable(dev); if (pci_alloc_msix(dev, &vectors) == 0) { device_printf(pf->dev, "Using MSIX interrupts with %d vectors\n", vectors); pf->msix = vectors; #ifdef IXL_IW if (ixl_enable_iwarp) pf->iw_msix = iw_vectors; #endif pf->vsi.num_queues = queues; #ifdef RSS /* * If we're doing RSS, the number of queues needs to * match the number of RSS buckets that are configured. * * + If there's more queues than RSS buckets, we'll end * up with queues that get no traffic. * * + If there's more RSS buckets than queues, we'll end * up having multiple RSS buckets map to the same queue, * so there'll be some contention. */ if (queues != rss_getnumbuckets()) { device_printf(dev, "%s: queues (%d) != RSS buckets (%d)" "; performance will be impacted.\n", __func__, queues, rss_getnumbuckets()); } #endif return (vectors); } no_msix: vectors = pci_msi_count(dev); pf->vsi.num_queues = 1; pf->max_queues = 1; if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) device_printf(pf->dev, "Using an MSI interrupt\n"); else { vectors = 0; device_printf(pf->dev, "Using a Legacy interrupt\n"); } return (vectors); } /* * Configure admin queue/misc interrupt cause registers in hardware. */ void ixl_configure_intr0_msix(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; u32 reg; /* First set up the adminq - vector 0 */ wr32(hw, I40E_PFINT_ICR0_ENA, 0); /* disable all */ rd32(hw, I40E_PFINT_ICR0); /* read to clear */ reg = I40E_PFINT_ICR0_ENA_ECC_ERR_MASK | I40E_PFINT_ICR0_ENA_GRST_MASK | I40E_PFINT_ICR0_ENA_HMC_ERR_MASK | I40E_PFINT_ICR0_ENA_ADMINQ_MASK | I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK | I40E_PFINT_ICR0_ENA_VFLR_MASK | I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); /* * 0x7FF is the end of the queue list. * This means we won't use MSI-X vector 0 for a queue interrupt * in MSIX mode. */ wr32(hw, I40E_PFINT_LNKLST0, 0x7FF); /* Value is in 2 usec units, so 0x3E is 62*2 = 124 usecs. */ wr32(hw, I40E_PFINT_ITR0(IXL_RX_ITR), 0x3E); wr32(hw, I40E_PFINT_DYN_CTL0, I40E_PFINT_DYN_CTL0_SW_ITR_INDX_MASK | I40E_PFINT_DYN_CTL0_INTENA_MSK_MASK); wr32(hw, I40E_PFINT_STAT_CTL0, 0); } /* * Configure queue interrupt cause registers in hardware. */ void ixl_configure_queue_intr_msix(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; u32 reg; u16 vector = 1; for (int i = 0; i < vsi->num_queues; i++, vector++) { wr32(hw, I40E_PFINT_DYN_CTLN(i), 0); /* First queue type is RX / 0 */ wr32(hw, I40E_PFINT_LNKLSTN(i), i); reg = I40E_QINT_RQCTL_CAUSE_ENA_MASK | (IXL_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | (i << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_RQCTL(i), reg); reg = I40E_QINT_TQCTL_CAUSE_ENA_MASK | (IXL_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | (IXL_QUEUE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_TQCTL(i), reg); } } /* * Configure for MSI single vector operation */ void ixl_configure_legacy(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; struct rx_ring *rxr = &que->rxr; struct tx_ring *txr = &que->txr; u32 reg; /* Configure ITR */ vsi->tx_itr_setting = pf->tx_itr; wr32(hw, I40E_PFINT_ITR0(IXL_TX_ITR), vsi->tx_itr_setting); txr->itr = vsi->tx_itr_setting; vsi->rx_itr_setting = pf->rx_itr; wr32(hw, I40E_PFINT_ITR0(IXL_RX_ITR), vsi->rx_itr_setting); rxr->itr = vsi->rx_itr_setting; /* Setup "other" causes */ reg = I40E_PFINT_ICR0_ENA_ECC_ERR_MASK | I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK | I40E_PFINT_ICR0_ENA_GRST_MASK | I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK | I40E_PFINT_ICR0_ENA_GPIO_MASK | I40E_PFINT_ICR0_ENA_LINK_STAT_CHANGE_MASK | I40E_PFINT_ICR0_ENA_HMC_ERR_MASK | I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK | I40E_PFINT_ICR0_ENA_VFLR_MASK | I40E_PFINT_ICR0_ENA_ADMINQ_MASK ; wr32(hw, I40E_PFINT_ICR0_ENA, reg); /* No ITR for non-queue interrupts */ wr32(hw, I40E_PFINT_STAT_CTL0, IXL_ITR_NONE << I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT); /* FIRSTQ_INDX = 0, FIRSTQ_TYPE = 0 (rx) */ wr32(hw, I40E_PFINT_LNKLST0, 0); /* Associate the queue pair to the vector and enable the q int */ reg = I40E_QINT_RQCTL_CAUSE_ENA_MASK | (IXL_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_RQCTL(0), reg); reg = I40E_QINT_TQCTL_CAUSE_ENA_MASK | (IXL_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (IXL_QUEUE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT); wr32(hw, I40E_QINT_TQCTL(0), reg); } int ixl_allocate_pci_resources(struct ixl_pf *pf) { int rid; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; /* Map BAR0 */ rid = PCIR_BAR(0); pf->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(pf->pci_mem)) { device_printf(dev, "Unable to allocate bus resource: PCI memory\n"); return (ENXIO); } /* Save off the PCI information */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); hw->bus.device = pci_get_slot(dev); hw->bus.func = pci_get_function(dev); /* Save off register access information */ pf->osdep.mem_bus_space_tag = rman_get_bustag(pf->pci_mem); pf->osdep.mem_bus_space_handle = rman_get_bushandle(pf->pci_mem); pf->osdep.mem_bus_space_size = rman_get_size(pf->pci_mem); pf->osdep.flush_reg = I40E_GLGEN_STAT; pf->hw.hw_addr = (u8 *) &pf->osdep.mem_bus_space_handle; pf->hw.back = &pf->osdep; return (0); } /* * Teardown and release the admin queue/misc vector * interrupt. */ int ixl_teardown_adminq_msix(struct ixl_pf *pf) { device_t dev = pf->dev; int rid, error = 0; if (pf->admvec) /* we are doing MSIX */ rid = pf->admvec + 1; else (pf->msix != 0) ? (rid = 1):(rid = 0); if (pf->tag != NULL) { bus_teardown_intr(dev, pf->res, pf->tag); if (error) { device_printf(dev, "bus_teardown_intr() for" " interrupt 0 failed\n"); // return (ENXIO); } pf->tag = NULL; } if (pf->res != NULL) { bus_release_resource(dev, SYS_RES_IRQ, rid, pf->res); if (error) { device_printf(dev, "bus_release_resource() for" " interrupt 0 failed [rid=%d]\n", rid); // return (ENXIO); } pf->res = NULL; } return (0); } int ixl_teardown_queue_msix(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct ixl_queue *que = vsi->queues; device_t dev = vsi->dev; int rid, error = 0; /* We may get here before stations are setup */ if ((pf->msix < 2) || (que == NULL)) return (0); /* Release all MSIX queue resources */ for (int i = 0; i < vsi->num_queues; i++, que++) { rid = que->msix + 1; if (que->tag != NULL) { error = bus_teardown_intr(dev, que->res, que->tag); if (error) { device_printf(dev, "bus_teardown_intr() for" " Queue %d interrupt failed\n", que->me); // return (ENXIO); } que->tag = NULL; } if (que->res != NULL) { error = bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); if (error) { device_printf(dev, "bus_release_resource() for" " Queue %d interrupt failed [rid=%d]\n", que->me, rid); // return (ENXIO); } que->res = NULL; } } return (0); } void ixl_free_pci_resources(struct ixl_pf *pf) { device_t dev = pf->dev; int memrid; ixl_teardown_queue_msix(&pf->vsi); ixl_teardown_adminq_msix(pf); if (pf->msix > 0) pci_release_msi(dev); memrid = PCIR_BAR(IXL_MSIX_BAR); if (pf->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, memrid, pf->msix_mem); if (pf->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), pf->pci_mem); return; } void ixl_add_ifmedia(struct ixl_vsi *vsi, u64 phy_types) { /* Display supported media types */ if (phy_types & (I40E_CAP_PHY_TYPE_100BASE_TX)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_100_TX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_1000BASE_T)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_1000_T, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_1000BASE_SX)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_1000_SX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_1000BASE_LX)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_1000_LX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_XAUI) || phy_types & (I40E_CAP_PHY_TYPE_XFI) || phy_types & (I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_TWINAX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_SR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_SR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_LR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_LR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_T)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_T, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_40GBASE_CR4) || phy_types & (I40E_CAP_PHY_TYPE_40GBASE_CR4_CU) || phy_types & (I40E_CAP_PHY_TYPE_40GBASE_AOC) || phy_types & (I40E_CAP_PHY_TYPE_XLAUI) || phy_types & (I40E_CAP_PHY_TYPE_40GBASE_KR4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_CR4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_40GBASE_SR4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_SR4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_40GBASE_LR4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_LR4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_1000BASE_KX)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_1000_KX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_CR1_CU) || phy_types & (I40E_CAP_PHY_TYPE_10GBASE_CR1)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_CR1, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_AOC)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_TWINAX_LONG, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_SFI)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_SFI, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_KX4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_KX4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_KR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_KR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_20GBASE_KR2)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_20G_KR2, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_40GBASE_KR4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_KR4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_XLPPI)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_XLPPI, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_KR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_25G_KR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_CR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_25G_CR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_SR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_25G_SR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_LR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_UNKNOWN, 0, NULL); } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ int ixl_setup_interface(device_t dev, struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct ifnet *ifp; struct i40e_hw *hw = vsi->hw; struct ixl_queue *que = vsi->queues; struct i40e_aq_get_phy_abilities_resp abilities; enum i40e_status_code aq_error = 0; INIT_DEBUGOUT("ixl_setup_interface: begin"); ifp = vsi->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_mtu = ETHERMTU; ifp->if_init = ixl_init; ifp->if_softc = vsi; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = ixl_ioctl; #if __FreeBSD_version >= 1100036 if_setgetcounterfn(ifp, ixl_get_counter); #endif ifp->if_transmit = ixl_mq_start; ifp->if_qflush = ixl_qflush; ifp->if_snd.ifq_maxlen = que->num_desc - 2; vsi->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; /* Set TSO limits */ ifp->if_hw_tsomax = IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_CRC_LEN); ifp->if_hw_tsomaxsegcount = IXL_MAX_TSO_SEGS; ifp->if_hw_tsomaxsegsize = PAGE_SIZE; /* * Tell the upper layer(s) we support long frames. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_HWCSUM; ifp->if_capabilities |= IFCAP_HWCSUM_IPV6; ifp->if_capabilities |= IFCAP_TSO; ifp->if_capabilities |= IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_LRO; /* VLAN capabilties */ ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM; ifp->if_capenable = ifp->if_capabilities; /* ** Don't turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If ** using vlans directly on the ixl driver you can ** enable this and get full hardware tag filtering. */ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&vsi->media, IFM_IMASK, ixl_media_change, ixl_media_status); aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, TRUE, &abilities, NULL); /* May need delay to detect fiber correctly */ if (aq_error == I40E_ERR_UNKNOWN_PHY) { i40e_msec_delay(200); aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, TRUE, &abilities, NULL); } if (aq_error) { if (aq_error == I40E_ERR_UNKNOWN_PHY) device_printf(dev, "Unknown PHY type detected!\n"); else device_printf(dev, "Error getting supported media types, err %d," " AQ error %d\n", aq_error, hw->aq.asq_last_status); return (0); } pf->supported_speeds = abilities.link_speed; ifp->if_baudrate = ixl_max_aq_speed_to_value(pf->supported_speeds); ixl_add_ifmedia(vsi, hw->phy.phy_types); /* Use autoselect media by default */ ifmedia_add(&vsi->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&vsi->media, IFM_ETHER | IFM_AUTO); ether_ifattach(ifp, hw->mac.addr); return (0); } /* ** Run when the Admin Queue gets a link state change interrupt. */ void ixl_link_event(struct ixl_pf *pf, struct i40e_arq_event_info *e) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct i40e_aqc_get_link_status *status = (struct i40e_aqc_get_link_status *)&e->desc.params.raw; /* Request link status from adapter */ hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &pf->link_up); /* Print out message if an unqualified module is found */ if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) && (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) && (!(status->link_info & I40E_AQ_LINK_UP))) device_printf(dev, "Link failed because " "an unqualified module was detected!\n"); /* Update OS link info */ ixl_update_link_status(pf); } /********************************************************************* * * Get Firmware Switch configuration * - this will need to be more robust when more complex * switch configurations are enabled. * **********************************************************************/ int ixl_switch_config(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; device_t dev = vsi->dev; struct i40e_aqc_get_switch_config_resp *sw_config; u8 aq_buf[I40E_AQ_LARGE_BUF]; int ret; u16 next = 0; memset(&aq_buf, 0, sizeof(aq_buf)); sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf; ret = i40e_aq_get_switch_config(hw, sw_config, sizeof(aq_buf), &next, NULL); if (ret) { device_printf(dev, "aq_get_switch_config() failed, error %d," " aq_error %d\n", ret, pf->hw.aq.asq_last_status); return (ret); } if (pf->dbg_mask & IXL_DBG_SWITCH_INFO) { device_printf(dev, "Switch config: header reported: %d in structure, %d total\n", sw_config->header.num_reported, sw_config->header.num_total); for (int i = 0; i < sw_config->header.num_reported; i++) { device_printf(dev, "-> %d: type=%d seid=%d uplink=%d downlink=%d\n", i, sw_config->element[i].element_type, sw_config->element[i].seid, sw_config->element[i].uplink_seid, sw_config->element[i].downlink_seid); } } /* Simplified due to a single VSI */ vsi->uplink_seid = sw_config->element[0].uplink_seid; vsi->downlink_seid = sw_config->element[0].downlink_seid; vsi->seid = sw_config->element[0].seid; return (ret); } /********************************************************************* * * Initialize the VSI: this handles contexts, which means things * like the number of descriptors, buffer size, * plus we init the rings thru this function. * **********************************************************************/ int ixl_initialize_vsi(struct ixl_vsi *vsi) { struct ixl_pf *pf = vsi->back; struct ixl_queue *que = vsi->queues; device_t dev = vsi->dev; struct i40e_hw *hw = vsi->hw; struct i40e_vsi_context ctxt; int tc_queues; int err = 0; memset(&ctxt, 0, sizeof(ctxt)); ctxt.seid = vsi->seid; if (pf->veb_seid != 0) ctxt.uplink_seid = pf->veb_seid; ctxt.pf_num = hw->pf_id; err = i40e_aq_get_vsi_params(hw, &ctxt, NULL); if (err) { device_printf(dev, "i40e_aq_get_vsi_params() failed, error %d" " aq_error %d\n", err, hw->aq.asq_last_status); return (err); } ixl_dbg(pf, IXL_DBG_SWITCH_INFO, "get_vsi_params: seid: %d, uplinkseid: %d, vsi_number: %d, " "vsis_allocated: %d, vsis_unallocated: %d, flags: 0x%x, " "pfnum: %d, vfnum: %d, stat idx: %d, enabled: %d\n", ctxt.seid, ctxt.uplink_seid, ctxt.vsi_number, ctxt.vsis_allocated, ctxt.vsis_unallocated, ctxt.flags, ctxt.pf_num, ctxt.vf_num, ctxt.info.stat_counter_idx, ctxt.info.up_enable_bits); /* ** Set the queue and traffic class bits ** - when multiple traffic classes are supported ** this will need to be more robust. */ ctxt.info.valid_sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; ctxt.info.mapping_flags |= I40E_AQ_VSI_QUE_MAP_CONTIG; /* In contig mode, que_mapping[0] is first queue index used by this VSI */ ctxt.info.queue_mapping[0] = 0; /* * This VSI will only use traffic class 0; start traffic class 0's * queue allocation at queue 0, and assign it 2^tc_queues queues (though * the driver may not use all of them). */ tc_queues = bsrl(pf->qtag.num_allocated); ctxt.info.tc_mapping[0] = ((0 << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) & I40E_AQ_VSI_TC_QUE_OFFSET_MASK) | ((tc_queues << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT) & I40E_AQ_VSI_TC_QUE_NUMBER_MASK); /* Set VLAN receive stripping mode */ ctxt.info.valid_sections |= I40E_AQ_VSI_PROP_VLAN_VALID; ctxt.info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL; if (vsi->ifp->if_capenable & IFCAP_VLAN_HWTAGGING) ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_EMOD_STR_BOTH; else ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_EMOD_NOTHING; #ifdef IXL_IW /* Set TCP Enable for iWARP capable VSI */ if (ixl_enable_iwarp && pf->iw_enabled) { ctxt.info.valid_sections |= htole16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID); ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA; } #endif /* Save VSI number and info for use later */ vsi->vsi_num = ctxt.vsi_number; bcopy(&ctxt.info, &vsi->info, sizeof(vsi->info)); /* Reset VSI statistics */ ixl_vsi_reset_stats(vsi); vsi->hw_filters_add = 0; vsi->hw_filters_del = 0; ctxt.flags = htole16(I40E_AQ_VSI_TYPE_PF); err = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (err) { device_printf(dev, "i40e_aq_update_vsi_params() failed, error %d," " aq_error %d\n", err, hw->aq.asq_last_status); return (err); } for (int i = 0; i < vsi->num_queues; i++, que++) { struct tx_ring *txr = &que->txr; struct rx_ring *rxr = &que->rxr; struct i40e_hmc_obj_txq tctx; struct i40e_hmc_obj_rxq rctx; u32 txctl; u16 size; /* Setup the HMC TX Context */ size = que->num_desc * sizeof(struct i40e_tx_desc); memset(&tctx, 0, sizeof(struct i40e_hmc_obj_txq)); tctx.new_context = 1; tctx.base = (txr->dma.pa/IXL_TX_CTX_BASE_UNITS); tctx.qlen = que->num_desc; tctx.fc_ena = 0; tctx.rdylist = vsi->info.qs_handle[0]; /* index is TC */ /* Enable HEAD writeback */ tctx.head_wb_ena = 1; tctx.head_wb_addr = txr->dma.pa + (que->num_desc * sizeof(struct i40e_tx_desc)); tctx.rdylist_act = 0; err = i40e_clear_lan_tx_queue_context(hw, i); if (err) { device_printf(dev, "Unable to clear TX context\n"); break; } err = i40e_set_lan_tx_queue_context(hw, i, &tctx); if (err) { device_printf(dev, "Unable to set TX context\n"); break; } /* Associate the ring with this PF */ txctl = I40E_QTX_CTL_PF_QUEUE; txctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) & I40E_QTX_CTL_PF_INDX_MASK); wr32(hw, I40E_QTX_CTL(i), txctl); ixl_flush(hw); /* Do ring (re)init */ ixl_init_tx_ring(que); /* Next setup the HMC RX Context */ if (vsi->max_frame_size <= MCLBYTES) rxr->mbuf_sz = MCLBYTES; else rxr->mbuf_sz = MJUMPAGESIZE; u16 max_rxmax = rxr->mbuf_sz * hw->func_caps.rx_buf_chain_len; /* Set up an RX context for the HMC */ memset(&rctx, 0, sizeof(struct i40e_hmc_obj_rxq)); rctx.dbuff = rxr->mbuf_sz >> I40E_RXQ_CTX_DBUFF_SHIFT; /* ignore header split for now */ rctx.hbuff = 0 >> I40E_RXQ_CTX_HBUFF_SHIFT; rctx.rxmax = (vsi->max_frame_size < max_rxmax) ? vsi->max_frame_size : max_rxmax; rctx.dtype = 0; rctx.dsize = 1; /* do 32byte descriptors */ rctx.hsplit_0 = 0; /* no HDR split initially */ rctx.base = (rxr->dma.pa/IXL_RX_CTX_BASE_UNITS); rctx.qlen = que->num_desc; rctx.tphrdesc_ena = 1; rctx.tphwdesc_ena = 1; rctx.tphdata_ena = 0; rctx.tphhead_ena = 0; rctx.lrxqthresh = 2; rctx.crcstrip = 1; rctx.l2tsel = 1; rctx.showiv = 1; rctx.fc_ena = 0; rctx.prefena = 1; err = i40e_clear_lan_rx_queue_context(hw, i); if (err) { device_printf(dev, "Unable to clear RX context %d\n", i); break; } err = i40e_set_lan_rx_queue_context(hw, i, &rctx); if (err) { device_printf(dev, "Unable to set RX context %d\n", i); break; } err = ixl_init_rx_ring(que); if (err) { device_printf(dev, "Fail in init_rx_ring %d\n", i); break; } #ifdef DEV_NETMAP /* preserve queue */ if (vsi->ifp->if_capenable & IFCAP_NETMAP) { struct netmap_adapter *na = NA(vsi->ifp); struct netmap_kring *kring = &na->rx_rings[i]; int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); wr32(vsi->hw, I40E_QRX_TAIL(que->me), t); } else #endif /* DEV_NETMAP */ wr32(vsi->hw, I40E_QRX_TAIL(que->me), que->num_desc - 1); } return (err); } /********************************************************************* * * Free all VSI structs. * **********************************************************************/ void ixl_free_vsi(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct ixl_queue *que = vsi->queues; /* Free station queues */ if (!vsi->queues) goto free_filters; for (int i = 0; i < vsi->num_queues; i++, que++) { struct tx_ring *txr = &que->txr; struct rx_ring *rxr = &que->rxr; if (!mtx_initialized(&txr->mtx)) /* uninitialized */ continue; IXL_TX_LOCK(txr); ixl_free_que_tx(que); if (txr->base) i40e_free_dma_mem(&pf->hw, &txr->dma); IXL_TX_UNLOCK(txr); IXL_TX_LOCK_DESTROY(txr); if (!mtx_initialized(&rxr->mtx)) /* uninitialized */ continue; IXL_RX_LOCK(rxr); ixl_free_que_rx(que); if (rxr->base) i40e_free_dma_mem(&pf->hw, &rxr->dma); IXL_RX_UNLOCK(rxr); IXL_RX_LOCK_DESTROY(rxr); } free(vsi->queues, M_DEVBUF); free_filters: /* Free VSI filter list */ ixl_free_mac_filters(vsi); } void ixl_free_mac_filters(struct ixl_vsi *vsi) { struct ixl_mac_filter *f; while (!SLIST_EMPTY(&vsi->ftl)) { f = SLIST_FIRST(&vsi->ftl); SLIST_REMOVE_HEAD(&vsi->ftl, next); free(f, M_DEVBUF); } } /* * Fill out fields in queue struct and setup tx/rx memory and structs */ static int ixl_setup_queue(struct ixl_queue *que, struct ixl_pf *pf, int index) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct tx_ring *txr = &que->txr; struct rx_ring *rxr = &que->rxr; int error = 0; int rsize, tsize; que->num_desc = pf->ringsz; que->me = index; que->vsi = vsi; txr->que = que; txr->tail = I40E_QTX_TAIL(que->me); /* Initialize the TX lock */ snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", device_get_nameunit(dev), que->me); mtx_init(&txr->mtx, txr->mtx_name, NULL, MTX_DEF); /* Create the TX descriptor ring */ tsize = roundup2((que->num_desc * sizeof(struct i40e_tx_desc)) + sizeof(u32), DBA_ALIGN); if (i40e_allocate_dma_mem(hw, &txr->dma, i40e_mem_reserved, tsize, DBA_ALIGN)) { device_printf(dev, "Unable to allocate TX Descriptor memory\n"); error = ENOMEM; goto fail; } txr->base = (struct i40e_tx_desc *)txr->dma.va; bzero((void *)txr->base, tsize); /* Now allocate transmit soft structs for the ring */ if (ixl_allocate_tx_data(que)) { device_printf(dev, "Critical Failure setting up TX structures\n"); error = ENOMEM; goto fail; } /* Allocate a buf ring */ txr->br = buf_ring_alloc(DEFAULT_TXBRSZ, M_DEVBUF, M_NOWAIT, &txr->mtx); if (txr->br == NULL) { device_printf(dev, "Critical Failure setting up TX buf ring\n"); error = ENOMEM; goto fail; } rsize = roundup2(que->num_desc * sizeof(union i40e_rx_desc), DBA_ALIGN); rxr->que = que; rxr->tail = I40E_QRX_TAIL(que->me); /* Initialize the RX side lock */ snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", device_get_nameunit(dev), que->me); mtx_init(&rxr->mtx, rxr->mtx_name, NULL, MTX_DEF); if (i40e_allocate_dma_mem(hw, &rxr->dma, i40e_mem_reserved, rsize, 4096)) { device_printf(dev, "Unable to allocate RX Descriptor memory\n"); error = ENOMEM; goto fail; } rxr->base = (union i40e_rx_desc *)rxr->dma.va; bzero((void *)rxr->base, rsize); /* Allocate receive soft structs for the ring*/ if (ixl_allocate_rx_data(que)) { device_printf(dev, "Critical Failure setting up receive structs\n"); error = ENOMEM; goto fail; } return (0); fail: if (rxr->base) i40e_free_dma_mem(&pf->hw, &rxr->dma); if (mtx_initialized(&rxr->mtx)) mtx_destroy(&rxr->mtx); if (txr->br) { buf_ring_free(txr->br, M_DEVBUF); txr->br = NULL; } if (txr->base) i40e_free_dma_mem(&pf->hw, &txr->dma); if (mtx_initialized(&txr->mtx)) mtx_destroy(&txr->mtx); return (error); } /********************************************************************* * * Allocate memory for the VSI (virtual station interface) and their * associated queues, rings and the descriptors associated with each, * called only once at attach. * **********************************************************************/ int ixl_setup_stations(struct ixl_pf *pf) { device_t dev = pf->dev; struct ixl_vsi *vsi; struct ixl_queue *que; int error = 0; vsi = &pf->vsi; vsi->back = (void *)pf; vsi->hw = &pf->hw; vsi->id = 0; vsi->num_vlans = 0; vsi->back = pf; /* Get memory for the station queues */ if (!(vsi->queues = (struct ixl_queue *) malloc(sizeof(struct ixl_queue) * vsi->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate queue memory\n"); error = ENOMEM; return (error); } /* Then setup each queue */ for (int i = 0; i < vsi->num_queues; i++) { que = &vsi->queues[i]; error = ixl_setup_queue(que, pf, i); if (error) return (error); } return (0); } /* ** Provide a update to the queue RX ** interrupt moderation value. */ void ixl_set_queue_rx_itr(struct ixl_queue *que) { struct ixl_vsi *vsi = que->vsi; struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct i40e_hw *hw = vsi->hw; struct rx_ring *rxr = &que->rxr; u16 rx_itr; u16 rx_latency = 0; int rx_bytes; /* Idle, do nothing */ if (rxr->bytes == 0) return; if (pf->dynamic_rx_itr) { rx_bytes = rxr->bytes/rxr->itr; rx_itr = rxr->itr; /* Adjust latency range */ switch (rxr->latency) { case IXL_LOW_LATENCY: if (rx_bytes > 10) { rx_latency = IXL_AVE_LATENCY; rx_itr = IXL_ITR_20K; } break; case IXL_AVE_LATENCY: if (rx_bytes > 20) { rx_latency = IXL_BULK_LATENCY; rx_itr = IXL_ITR_8K; } else if (rx_bytes <= 10) { rx_latency = IXL_LOW_LATENCY; rx_itr = IXL_ITR_100K; } break; case IXL_BULK_LATENCY: if (rx_bytes <= 20) { rx_latency = IXL_AVE_LATENCY; rx_itr = IXL_ITR_20K; } break; } rxr->latency = rx_latency; if (rx_itr != rxr->itr) { /* do an exponential smoothing */ rx_itr = (10 * rx_itr * rxr->itr) / ((9 * rx_itr) + rxr->itr); rxr->itr = min(rx_itr, IXL_MAX_ITR); wr32(hw, I40E_PFINT_ITRN(IXL_RX_ITR, que->me), rxr->itr); } } else { /* We may have have toggled to non-dynamic */ if (vsi->rx_itr_setting & IXL_ITR_DYNAMIC) vsi->rx_itr_setting = pf->rx_itr; /* Update the hardware if needed */ if (rxr->itr != vsi->rx_itr_setting) { rxr->itr = vsi->rx_itr_setting; wr32(hw, I40E_PFINT_ITRN(IXL_RX_ITR, que->me), rxr->itr); } } rxr->bytes = 0; rxr->packets = 0; return; } /* ** Provide a update to the queue TX ** interrupt moderation value. */ void ixl_set_queue_tx_itr(struct ixl_queue *que) { struct ixl_vsi *vsi = que->vsi; struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; u16 tx_itr; u16 tx_latency = 0; int tx_bytes; /* Idle, do nothing */ if (txr->bytes == 0) return; if (pf->dynamic_tx_itr) { tx_bytes = txr->bytes/txr->itr; tx_itr = txr->itr; switch (txr->latency) { case IXL_LOW_LATENCY: if (tx_bytes > 10) { tx_latency = IXL_AVE_LATENCY; tx_itr = IXL_ITR_20K; } break; case IXL_AVE_LATENCY: if (tx_bytes > 20) { tx_latency = IXL_BULK_LATENCY; tx_itr = IXL_ITR_8K; } else if (tx_bytes <= 10) { tx_latency = IXL_LOW_LATENCY; tx_itr = IXL_ITR_100K; } break; case IXL_BULK_LATENCY: if (tx_bytes <= 20) { tx_latency = IXL_AVE_LATENCY; tx_itr = IXL_ITR_20K; } break; } txr->latency = tx_latency; if (tx_itr != txr->itr) { /* do an exponential smoothing */ tx_itr = (10 * tx_itr * txr->itr) / ((9 * tx_itr) + txr->itr); txr->itr = min(tx_itr, IXL_MAX_ITR); wr32(hw, I40E_PFINT_ITRN(IXL_TX_ITR, que->me), txr->itr); } } else { /* We may have have toggled to non-dynamic */ if (vsi->tx_itr_setting & IXL_ITR_DYNAMIC) vsi->tx_itr_setting = pf->tx_itr; /* Update the hardware if needed */ if (txr->itr != vsi->tx_itr_setting) { txr->itr = vsi->tx_itr_setting; wr32(hw, I40E_PFINT_ITRN(IXL_TX_ITR, que->me), txr->itr); } } txr->bytes = 0; txr->packets = 0; return; } void ixl_add_vsi_sysctls(struct ixl_pf *pf, struct ixl_vsi *vsi, struct sysctl_ctx_list *ctx, const char *sysctl_name) { struct sysctl_oid *tree; struct sysctl_oid_list *child; struct sysctl_oid_list *vsi_list; tree = device_get_sysctl_tree(pf->dev); child = SYSCTL_CHILDREN(tree); vsi->vsi_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, sysctl_name, CTLFLAG_RD, NULL, "VSI Number"); vsi_list = SYSCTL_CHILDREN(vsi->vsi_node); ixl_add_sysctls_eth_stats(ctx, vsi_list, &vsi->eth_stats); } #ifdef IXL_DEBUG /** * ixl_sysctl_qtx_tail_handler * Retrieves I40E_QTX_TAIL value from hardware * for a sysctl. */ static int ixl_sysctl_qtx_tail_handler(SYSCTL_HANDLER_ARGS) { struct ixl_queue *que; int error; u32 val; que = ((struct ixl_queue *)oidp->oid_arg1); if (!que) return 0; val = rd32(que->vsi->hw, que->txr.tail); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } /** * ixl_sysctl_qrx_tail_handler * Retrieves I40E_QRX_TAIL value from hardware * for a sysctl. */ static int ixl_sysctl_qrx_tail_handler(SYSCTL_HANDLER_ARGS) { struct ixl_queue *que; int error; u32 val; que = ((struct ixl_queue *)oidp->oid_arg1); if (!que) return 0; val = rd32(que->vsi->hw, que->rxr.tail); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } #endif /* * Used to set the Tx ITR value for all of the PF LAN VSI's queues. * Writes to the ITR registers immediately. */ static int ixl_sysctl_pf_tx_itr(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; int error = 0; int requested_tx_itr; requested_tx_itr = pf->tx_itr; error = sysctl_handle_int(oidp, &requested_tx_itr, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (pf->dynamic_tx_itr) { device_printf(dev, "Cannot set TX itr value while dynamic TX itr is enabled\n"); return (EINVAL); } if (requested_tx_itr < 0 || requested_tx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid TX itr value; value must be between 0 and %d\n", IXL_MAX_ITR); return (EINVAL); } pf->tx_itr = requested_tx_itr; ixl_configure_tx_itr(pf); return (error); } /* * Used to set the Rx ITR value for all of the PF LAN VSI's queues. * Writes to the ITR registers immediately. */ static int ixl_sysctl_pf_rx_itr(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; int error = 0; int requested_rx_itr; requested_rx_itr = pf->rx_itr; error = sysctl_handle_int(oidp, &requested_rx_itr, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (pf->dynamic_rx_itr) { device_printf(dev, "Cannot set RX itr value while dynamic RX itr is enabled\n"); return (EINVAL); } if (requested_rx_itr < 0 || requested_rx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid RX itr value; value must be between 0 and %d\n", IXL_MAX_ITR); return (EINVAL); } pf->rx_itr = requested_rx_itr; ixl_configure_rx_itr(pf); return (error); } void ixl_add_hw_stats(struct ixl_pf *pf) { device_t dev = pf->dev; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *queues = vsi->queues; struct i40e_hw_port_stats *pf_stats = &pf->stats; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct sysctl_oid_list *vsi_list; struct sysctl_oid *queue_node; struct sysctl_oid_list *queue_list; struct tx_ring *txr; struct rx_ring *rxr; char queue_namebuf[QUEUE_NAME_LEN]; /* Driver statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", CTLFLAG_RD, &pf->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "admin_irq", CTLFLAG_RD, &pf->admin_irq, "Admin Queue IRQ Handled"); ixl_add_vsi_sysctls(pf, &pf->vsi, ctx, "pf"); vsi_list = SYSCTL_CHILDREN(pf->vsi.vsi_node); /* Queue statistics */ for (int q = 0; q < vsi->num_queues; q++) { snprintf(queue_namebuf, QUEUE_NAME_LEN, "que%d", q); queue_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, queue_namebuf, CTLFLAG_RD, NULL, "Queue #"); queue_list = SYSCTL_CHILDREN(queue_node); txr = &(queues[q].txr); rxr = &(queues[q].rxr); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "mbuf_defrag_failed", CTLFLAG_RD, &(queues[q].mbuf_defrag_failed), "m_defrag() failed"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", CTLFLAG_RD, &(queues[q].irqs), "irqs on this queue"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tso_tx", CTLFLAG_RD, &(queues[q].tso), "TSO"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_dmamap_failed", CTLFLAG_RD, &(queues[q].tx_dmamap_failed), "Driver tx dma failure in xmit"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "mss_too_small", CTLFLAG_RD, &(queues[q].mss_too_small), "TSO sends with an MSS less than 64"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &(txr->no_desc), "Queue No Descriptor Available"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &(txr->total_packets), "Queue Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, &(txr->tx_bytes), "Queue Bytes Transmitted"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &(rxr->rx_packets), "Queue Packets Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &(rxr->rx_bytes), "Queue Bytes Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_desc_err", CTLFLAG_RD, &(rxr->desc_errs), "Queue Rx Descriptor Errors"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "rx_itr", CTLFLAG_RD, &(rxr->itr), 0, "Queue Rx ITR Interval"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "tx_itr", CTLFLAG_RD, &(txr->itr), 0, "Queue Tx ITR Interval"); #ifdef IXL_DEBUG SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_not_done", CTLFLAG_RD, &(rxr->not_done), "Queue Rx Descriptors not Done"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "rx_next_refresh", CTLFLAG_RD, &(rxr->next_refresh), 0, "Queue Rx Descriptors not Done"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "rx_next_check", CTLFLAG_RD, &(rxr->next_check), 0, "Queue Rx Descriptors not Done"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "qtx_tail", CTLTYPE_UINT | CTLFLAG_RD, &queues[q], sizeof(struct ixl_queue), ixl_sysctl_qtx_tail_handler, "IU", "Queue Transmit Descriptor Tail"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "qrx_tail", CTLTYPE_UINT | CTLFLAG_RD, &queues[q], sizeof(struct ixl_queue), ixl_sysctl_qrx_tail_handler, "IU", "Queue Receive Descriptor Tail"); #endif } /* MAC stats */ ixl_add_sysctls_mac_stats(ctx, child, pf_stats); } void ixl_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct i40e_eth_stats *eth_stats) { struct ixl_sysctl_info ctls[] = { {ð_stats->rx_bytes, "good_octets_rcvd", "Good Octets Received"}, {ð_stats->rx_unicast, "ucast_pkts_rcvd", "Unicast Packets Received"}, {ð_stats->rx_multicast, "mcast_pkts_rcvd", "Multicast Packets Received"}, {ð_stats->rx_broadcast, "bcast_pkts_rcvd", "Broadcast Packets Received"}, {ð_stats->rx_discards, "rx_discards", "Discarded RX packets"}, {ð_stats->tx_bytes, "good_octets_txd", "Good Octets Transmitted"}, {ð_stats->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted"}, {ð_stats->tx_multicast, "mcast_pkts_txd", "Multicast Packets Transmitted"}, {ð_stats->tx_broadcast, "bcast_pkts_txd", "Broadcast Packets Transmitted"}, // end {0,0,0} }; struct ixl_sysctl_info *entry = ctls; while (entry->stat != 0) { SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, entry->name, CTLFLAG_RD, entry->stat, entry->description); entry++; } } void ixl_add_sysctls_mac_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct i40e_hw_port_stats *stats) { struct sysctl_oid *stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac", CTLFLAG_RD, NULL, "Mac Statistics"); struct sysctl_oid_list *stat_list = SYSCTL_CHILDREN(stat_node); struct i40e_eth_stats *eth_stats = &stats->eth; ixl_add_sysctls_eth_stats(ctx, stat_list, eth_stats); struct ixl_sysctl_info ctls[] = { {&stats->crc_errors, "crc_errors", "CRC Errors"}, {&stats->illegal_bytes, "illegal_bytes", "Illegal Byte Errors"}, {&stats->mac_local_faults, "local_faults", "MAC Local Faults"}, {&stats->mac_remote_faults, "remote_faults", "MAC Remote Faults"}, {&stats->rx_length_errors, "rx_length_errors", "Receive Length Errors"}, /* Packet Reception Stats */ {&stats->rx_size_64, "rx_frames_64", "64 byte frames received"}, {&stats->rx_size_127, "rx_frames_65_127", "65-127 byte frames received"}, {&stats->rx_size_255, "rx_frames_128_255", "128-255 byte frames received"}, {&stats->rx_size_511, "rx_frames_256_511", "256-511 byte frames received"}, {&stats->rx_size_1023, "rx_frames_512_1023", "512-1023 byte frames received"}, {&stats->rx_size_1522, "rx_frames_1024_1522", "1024-1522 byte frames received"}, {&stats->rx_size_big, "rx_frames_big", "1523-9522 byte frames received"}, {&stats->rx_undersize, "rx_undersize", "Undersized packets received"}, {&stats->rx_fragments, "rx_fragmented", "Fragmented packets received"}, {&stats->rx_oversize, "rx_oversized", "Oversized packets received"}, {&stats->rx_jabber, "rx_jabber", "Received Jabber"}, {&stats->checksum_error, "checksum_errors", "Checksum Errors"}, /* Packet Transmission Stats */ {&stats->tx_size_64, "tx_frames_64", "64 byte frames transmitted"}, {&stats->tx_size_127, "tx_frames_65_127", "65-127 byte frames transmitted"}, {&stats->tx_size_255, "tx_frames_128_255", "128-255 byte frames transmitted"}, {&stats->tx_size_511, "tx_frames_256_511", "256-511 byte frames transmitted"}, {&stats->tx_size_1023, "tx_frames_512_1023", "512-1023 byte frames transmitted"}, {&stats->tx_size_1522, "tx_frames_1024_1522", "1024-1522 byte frames transmitted"}, {&stats->tx_size_big, "tx_frames_big", "1523-9522 byte frames transmitted"}, /* Flow control */ {&stats->link_xon_tx, "xon_txd", "Link XON transmitted"}, {&stats->link_xon_rx, "xon_recvd", "Link XON received"}, {&stats->link_xoff_tx, "xoff_txd", "Link XOFF transmitted"}, {&stats->link_xoff_rx, "xoff_recvd", "Link XOFF received"}, /* End */ {0,0,0} }; struct ixl_sysctl_info *entry = ctls; while (entry->stat != 0) { SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, entry->name, CTLFLAG_RD, entry->stat, entry->description); entry++; } } void ixl_set_rss_key(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; device_t dev = pf->dev; enum i40e_status_code status; #ifdef RSS u32 rss_seed[IXL_RSS_KEY_SIZE_REG]; #else u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687, 0x183cfd8c, 0xce880440, 0x580cbc3c, 0x35897377, 0x328b25e1, 0x4fa98922, 0xb7d90c14, 0xd5bad70d, 0xcd15a2c1, 0x0, 0x0, 0x0}; #endif #ifdef RSS /* Fetch the configured RSS key */ rss_getkey((uint8_t *) &rss_seed); #endif /* Fill out hash function seed */ if (hw->mac.type == I40E_MAC_X722) { struct i40e_aqc_get_set_rss_key_data key_data; bcopy(rss_seed, key_data.standard_rss_key, 40); status = i40e_aq_set_rss_key(hw, vsi->vsi_num, &key_data); if (status) device_printf(dev, "i40e_aq_set_rss_key status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); } else { for (int i = 0; i < IXL_RSS_KEY_SIZE_REG; i++) i40e_write_rx_ctl(hw, I40E_PFQF_HKEY(i), rss_seed[i]); } } /* * Configure enabled PCTYPES for RSS. */ void ixl_set_rss_pctypes(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; u64 set_hena = 0, hena; #ifdef RSS u32 rss_hash_config; rss_hash_config = rss_gethashconfig(); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP); #else if (hw->mac.type == I40E_MAC_X722) set_hena = IXL_DEFAULT_RSS_HENA_X722; else set_hena = IXL_DEFAULT_RSS_HENA_XL710; #endif hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); hena |= set_hena; i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); } void ixl_set_rss_hlut(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct ixl_vsi *vsi = &pf->vsi; int i, que_id; int lut_entry_width; u32 lut = 0; enum i40e_status_code status; if (hw->mac.type == I40E_MAC_X722) lut_entry_width = 7; else lut_entry_width = pf->hw.func_caps.rss_table_entry_width; /* Populate the LUT with max no. of queues in round robin fashion */ u8 hlut_buf[512]; for (i = 0; i < pf->hw.func_caps.rss_table_size; i++) { #ifdef RSS /* * Fetch the RSS bucket id for the given indirection entry. * Cap it at the number of configured buckets (which is * num_queues.) */ que_id = rss_get_indirection_to_bucket(i); que_id = que_id % vsi->num_queues; #else que_id = i % vsi->num_queues; #endif lut = (que_id & ((0x1 << lut_entry_width) - 1)); hlut_buf[i] = lut; } if (hw->mac.type == I40E_MAC_X722) { status = i40e_aq_set_rss_lut(hw, vsi->vsi_num, TRUE, hlut_buf, sizeof(hlut_buf)); if (status) device_printf(dev, "i40e_aq_set_rss_lut status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); } else { for (i = 0; i < pf->hw.func_caps.rss_table_size >> 2; i++) wr32(hw, I40E_PFQF_HLUT(i), ((u32 *)hlut_buf)[i]); ixl_flush(hw); } } /* ** Setup the PF's RSS parameters. */ void ixl_config_rss(struct ixl_pf *pf) { ixl_set_rss_key(pf); ixl_set_rss_pctypes(pf); ixl_set_rss_hlut(pf); } /* ** This routine is run via an vlan config EVENT, ** it enables us to use the HW Filter table since ** we can get the vlan id. This just creates the ** entry in the soft version of the VFTA, init will ** repopulate the real table. */ void ixl_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct ixl_vsi *vsi = ifp->if_softc; struct i40e_hw *hw = vsi->hw; struct ixl_pf *pf = (struct ixl_pf *)vsi->back; if (ifp->if_softc != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IXL_PF_LOCK(pf); ++vsi->num_vlans; ixl_add_filter(vsi, hw->mac.addr, vtag); IXL_PF_UNLOCK(pf); } /* ** This routine is run via an vlan ** unconfig EVENT, remove our entry ** in the soft vfta. */ void ixl_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct ixl_vsi *vsi = ifp->if_softc; struct i40e_hw *hw = vsi->hw; struct ixl_pf *pf = (struct ixl_pf *)vsi->back; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IXL_PF_LOCK(pf); --vsi->num_vlans; ixl_del_filter(vsi, hw->mac.addr, vtag); IXL_PF_UNLOCK(pf); } /* ** This routine updates vlan filters, called by init ** it scans the filter table and then updates the hw ** after a soft reset. */ void ixl_setup_vlan_filters(struct ixl_vsi *vsi) { struct ixl_mac_filter *f; int cnt = 0, flags; if (vsi->num_vlans == 0) return; /* ** Scan the filter list for vlan entries, ** mark them for addition and then call ** for the AQ update. */ SLIST_FOREACH(f, &vsi->ftl, next) { if (f->flags & IXL_FILTER_VLAN) { f->flags |= (IXL_FILTER_ADD | IXL_FILTER_USED); cnt++; } } if (cnt == 0) { printf("setup vlan: no filters found!\n"); return; } flags = IXL_FILTER_VLAN; flags |= (IXL_FILTER_ADD | IXL_FILTER_USED); ixl_add_hw_filters(vsi, flags, cnt); return; } /* ** Initialize filter list and add filters that the hardware ** needs to know about. ** ** Requires VSI's filter list & seid to be set before calling. */ void ixl_init_filters(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; /* Add broadcast address */ ixl_add_filter(vsi, ixl_bcast_addr, IXL_VLAN_ANY); /* * Prevent Tx flow control frames from being sent out by * non-firmware transmitters. * This affects every VSI in the PF. */ if (pf->enable_tx_fc_filter) i40e_add_filter_to_drop_tx_flow_control_frames(vsi->hw, vsi->seid); } /* ** This routine adds mulicast filters */ void ixl_add_mc_filter(struct ixl_vsi *vsi, u8 *macaddr) { struct ixl_mac_filter *f; /* Does one already exist */ f = ixl_find_filter(vsi, macaddr, IXL_VLAN_ANY); if (f != NULL) return; f = ixl_get_filter(vsi); if (f == NULL) { printf("WARNING: no filter available!!\n"); return; } bcopy(macaddr, f->macaddr, ETHER_ADDR_LEN); f->vlan = IXL_VLAN_ANY; f->flags |= (IXL_FILTER_ADD | IXL_FILTER_USED | IXL_FILTER_MC); return; } void ixl_reconfigure_filters(struct ixl_vsi *vsi) { ixl_add_hw_filters(vsi, IXL_FILTER_USED, vsi->num_macs); } /* ** This routine adds macvlan filters */ void ixl_add_filter(struct ixl_vsi *vsi, const u8 *macaddr, s16 vlan) { struct ixl_mac_filter *f, *tmp; struct ixl_pf *pf; device_t dev; DEBUGOUT("ixl_add_filter: begin"); pf = vsi->back; dev = pf->dev; /* Does one already exist */ f = ixl_find_filter(vsi, macaddr, vlan); if (f != NULL) return; /* ** Is this the first vlan being registered, if so we ** need to remove the ANY filter that indicates we are ** not in a vlan, and replace that with a 0 filter. */ if ((vlan != IXL_VLAN_ANY) && (vsi->num_vlans == 1)) { tmp = ixl_find_filter(vsi, macaddr, IXL_VLAN_ANY); if (tmp != NULL) { ixl_del_filter(vsi, macaddr, IXL_VLAN_ANY); ixl_add_filter(vsi, macaddr, 0); } } f = ixl_get_filter(vsi); if (f == NULL) { device_printf(dev, "WARNING: no filter available!!\n"); return; } bcopy(macaddr, f->macaddr, ETHER_ADDR_LEN); f->vlan = vlan; f->flags |= (IXL_FILTER_ADD | IXL_FILTER_USED); if (f->vlan != IXL_VLAN_ANY) f->flags |= IXL_FILTER_VLAN; else vsi->num_macs++; ixl_add_hw_filters(vsi, f->flags, 1); return; } void ixl_del_filter(struct ixl_vsi *vsi, const u8 *macaddr, s16 vlan) { struct ixl_mac_filter *f; f = ixl_find_filter(vsi, macaddr, vlan); if (f == NULL) return; f->flags |= IXL_FILTER_DEL; ixl_del_hw_filters(vsi, 1); vsi->num_macs--; /* Check if this is the last vlan removal */ if (vlan != IXL_VLAN_ANY && vsi->num_vlans == 0) { /* Switch back to a non-vlan filter */ ixl_del_filter(vsi, macaddr, 0); ixl_add_filter(vsi, macaddr, IXL_VLAN_ANY); } return; } /* ** Find the filter with both matching mac addr and vlan id */ struct ixl_mac_filter * ixl_find_filter(struct ixl_vsi *vsi, const u8 *macaddr, s16 vlan) { struct ixl_mac_filter *f; bool match = FALSE; SLIST_FOREACH(f, &vsi->ftl, next) { if (!cmp_etheraddr(f->macaddr, macaddr)) continue; if (f->vlan == vlan) { match = TRUE; break; } } if (!match) f = NULL; return (f); } /* ** This routine takes additions to the vsi filter ** table and creates an Admin Queue call to create ** the filters in the hardware. */ void ixl_add_hw_filters(struct ixl_vsi *vsi, int flags, int cnt) { struct i40e_aqc_add_macvlan_element_data *a, *b; struct ixl_mac_filter *f; struct ixl_pf *pf; struct i40e_hw *hw; device_t dev; int err, j = 0; pf = vsi->back; dev = pf->dev; hw = &pf->hw; IXL_PF_LOCK_ASSERT(pf); a = malloc(sizeof(struct i40e_aqc_add_macvlan_element_data) * cnt, M_DEVBUF, M_NOWAIT | M_ZERO); if (a == NULL) { device_printf(dev, "add_hw_filters failed to get memory\n"); return; } /* ** Scan the filter list, each time we find one ** we add it to the admin queue array and turn off ** the add bit. */ SLIST_FOREACH(f, &vsi->ftl, next) { if (f->flags == flags) { b = &a[j]; // a pox on fvl long names :) bcopy(f->macaddr, b->mac_addr, ETHER_ADDR_LEN); if (f->vlan == IXL_VLAN_ANY) { b->vlan_tag = 0; b->flags = I40E_AQC_MACVLAN_ADD_IGNORE_VLAN; } else { b->vlan_tag = f->vlan; b->flags = 0; } b->flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH; f->flags &= ~IXL_FILTER_ADD; j++; } if (j == cnt) break; } if (j > 0) { err = i40e_aq_add_macvlan(hw, vsi->seid, a, j, NULL); if (err) device_printf(dev, "aq_add_macvlan err %d, " "aq_error %d\n", err, hw->aq.asq_last_status); else vsi->hw_filters_add += j; } free(a, M_DEVBUF); return; } /* ** This routine takes removals in the vsi filter ** table and creates an Admin Queue call to delete ** the filters in the hardware. */ void ixl_del_hw_filters(struct ixl_vsi *vsi, int cnt) { struct i40e_aqc_remove_macvlan_element_data *d, *e; struct ixl_pf *pf; struct i40e_hw *hw; device_t dev; struct ixl_mac_filter *f, *f_temp; int err, j = 0; DEBUGOUT("ixl_del_hw_filters: begin\n"); pf = vsi->back; hw = &pf->hw; dev = pf->dev; d = malloc(sizeof(struct i40e_aqc_remove_macvlan_element_data) * cnt, M_DEVBUF, M_NOWAIT | M_ZERO); if (d == NULL) { printf("del hw filter failed to get memory\n"); return; } SLIST_FOREACH_SAFE(f, &vsi->ftl, next, f_temp) { if (f->flags & IXL_FILTER_DEL) { e = &d[j]; // a pox on fvl long names :) bcopy(f->macaddr, e->mac_addr, ETHER_ADDR_LEN); e->vlan_tag = (f->vlan == IXL_VLAN_ANY ? 0 : f->vlan); e->flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH; /* delete entry from vsi list */ SLIST_REMOVE(&vsi->ftl, f, ixl_mac_filter, next); free(f, M_DEVBUF); j++; } if (j == cnt) break; } if (j > 0) { err = i40e_aq_remove_macvlan(hw, vsi->seid, d, j, NULL); if (err && hw->aq.asq_last_status != I40E_AQ_RC_ENOENT) { int sc = 0; for (int i = 0; i < j; i++) sc += (!d[i].error_code); vsi->hw_filters_del += sc; device_printf(dev, "Failed to remove %d/%d filters, aq error %d\n", j - sc, j, hw->aq.asq_last_status); } else vsi->hw_filters_del += j; } free(d, M_DEVBUF); DEBUGOUT("ixl_del_hw_filters: end\n"); return; } int ixl_enable_tx_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { struct i40e_hw *hw = &pf->hw; int error = 0; u32 reg; u16 pf_qidx; pf_qidx = ixl_pf_qidx_from_vsi_qidx(qtag, vsi_qidx); ixl_dbg(pf, IXL_DBG_EN_DIS, "Enabling PF TX ring %4d / VSI TX ring %4d...\n", pf_qidx, vsi_qidx); i40e_pre_tx_queue_cfg(hw, pf_qidx, TRUE); reg = rd32(hw, I40E_QTX_ENA(pf_qidx)); reg |= I40E_QTX_ENA_QENA_REQ_MASK | I40E_QTX_ENA_QENA_STAT_MASK; wr32(hw, I40E_QTX_ENA(pf_qidx), reg); /* Verify the enable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QTX_ENA(pf_qidx)); if (reg & I40E_QTX_ENA_QENA_STAT_MASK) break; i40e_msec_delay(10); } if ((reg & I40E_QTX_ENA_QENA_STAT_MASK) == 0) { device_printf(pf->dev, "TX queue %d still disabled!\n", pf_qidx); error = ETIMEDOUT; } return (error); } int ixl_enable_rx_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { struct i40e_hw *hw = &pf->hw; int error = 0; u32 reg; u16 pf_qidx; pf_qidx = ixl_pf_qidx_from_vsi_qidx(qtag, vsi_qidx); ixl_dbg(pf, IXL_DBG_EN_DIS, "Enabling PF RX ring %4d / VSI RX ring %4d...\n", pf_qidx, vsi_qidx); reg = rd32(hw, I40E_QRX_ENA(pf_qidx)); reg |= I40E_QRX_ENA_QENA_REQ_MASK | I40E_QRX_ENA_QENA_STAT_MASK; wr32(hw, I40E_QRX_ENA(pf_qidx), reg); /* Verify the enable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QRX_ENA(pf_qidx)); if (reg & I40E_QRX_ENA_QENA_STAT_MASK) break; i40e_msec_delay(10); } if ((reg & I40E_QRX_ENA_QENA_STAT_MASK) == 0) { device_printf(pf->dev, "RX queue %d still disabled!\n", pf_qidx); error = ETIMEDOUT; } return (error); } int ixl_enable_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { int error = 0; error = ixl_enable_tx_ring(pf, qtag, vsi_qidx); /* Called function already prints error message */ if (error) return (error); error = ixl_enable_rx_ring(pf, qtag, vsi_qidx); return (error); } /* For PF VSI only */ int ixl_enable_rings(struct ixl_vsi *vsi) { struct ixl_pf *pf = vsi->back; int error = 0; for (int i = 0; i < vsi->num_queues; i++) { error = ixl_enable_ring(pf, &pf->qtag, i); if (error) return (error); } return (error); } int ixl_disable_tx_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { struct i40e_hw *hw = &pf->hw; int error = 0; u32 reg; u16 pf_qidx; pf_qidx = ixl_pf_qidx_from_vsi_qidx(qtag, vsi_qidx); i40e_pre_tx_queue_cfg(hw, pf_qidx, FALSE); i40e_usec_delay(500); reg = rd32(hw, I40E_QTX_ENA(pf_qidx)); reg &= ~I40E_QTX_ENA_QENA_REQ_MASK; wr32(hw, I40E_QTX_ENA(pf_qidx), reg); /* Verify the disable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QTX_ENA(pf_qidx)); if (!(reg & I40E_QTX_ENA_QENA_STAT_MASK)) break; i40e_msec_delay(10); } if (reg & I40E_QTX_ENA_QENA_STAT_MASK) { device_printf(pf->dev, "TX queue %d still enabled!\n", pf_qidx); error = ETIMEDOUT; } return (error); } int ixl_disable_rx_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { struct i40e_hw *hw = &pf->hw; int error = 0; u32 reg; u16 pf_qidx; pf_qidx = ixl_pf_qidx_from_vsi_qidx(qtag, vsi_qidx); reg = rd32(hw, I40E_QRX_ENA(pf_qidx)); reg &= ~I40E_QRX_ENA_QENA_REQ_MASK; wr32(hw, I40E_QRX_ENA(pf_qidx), reg); /* Verify the disable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QRX_ENA(pf_qidx)); if (!(reg & I40E_QRX_ENA_QENA_STAT_MASK)) break; i40e_msec_delay(10); } if (reg & I40E_QRX_ENA_QENA_STAT_MASK) { device_printf(pf->dev, "RX queue %d still enabled!\n", pf_qidx); error = ETIMEDOUT; } return (error); } int ixl_disable_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { int error = 0; error = ixl_disable_tx_ring(pf, qtag, vsi_qidx); /* Called function already prints error message */ if (error) return (error); error = ixl_disable_rx_ring(pf, qtag, vsi_qidx); return (error); } /* For PF VSI only */ int ixl_disable_rings(struct ixl_vsi *vsi) { struct ixl_pf *pf = vsi->back; int error = 0; for (int i = 0; i < vsi->num_queues; i++) { error = ixl_disable_ring(pf, &pf->qtag, i); if (error) return (error); } return (error); } /** * ixl_handle_mdd_event * * Called from interrupt handler to identify possibly malicious vfs * (But also detects events from the PF, as well) **/ void ixl_handle_mdd_event(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; bool mdd_detected = false; bool pf_mdd_detected = false; u32 reg; /* find what triggered the MDD event */ reg = rd32(hw, I40E_GL_MDET_TX); if (reg & I40E_GL_MDET_TX_VALID_MASK) { u8 pf_num = (reg & I40E_GL_MDET_TX_PF_NUM_MASK) >> I40E_GL_MDET_TX_PF_NUM_SHIFT; u8 event = (reg & I40E_GL_MDET_TX_EVENT_MASK) >> I40E_GL_MDET_TX_EVENT_SHIFT; u16 queue = (reg & I40E_GL_MDET_TX_QUEUE_MASK) >> I40E_GL_MDET_TX_QUEUE_SHIFT; device_printf(dev, "Malicious Driver Detection event %d" " on TX queue %d, pf number %d\n", event, queue, pf_num); wr32(hw, I40E_GL_MDET_TX, 0xffffffff); mdd_detected = true; } reg = rd32(hw, I40E_GL_MDET_RX); if (reg & I40E_GL_MDET_RX_VALID_MASK) { u8 pf_num = (reg & I40E_GL_MDET_RX_FUNCTION_MASK) >> I40E_GL_MDET_RX_FUNCTION_SHIFT; u8 event = (reg & I40E_GL_MDET_RX_EVENT_MASK) >> I40E_GL_MDET_RX_EVENT_SHIFT; u16 queue = (reg & I40E_GL_MDET_RX_QUEUE_MASK) >> I40E_GL_MDET_RX_QUEUE_SHIFT; device_printf(dev, "Malicious Driver Detection event %d" " on RX queue %d, pf number %d\n", event, queue, pf_num); wr32(hw, I40E_GL_MDET_RX, 0xffffffff); mdd_detected = true; } if (mdd_detected) { reg = rd32(hw, I40E_PF_MDET_TX); if (reg & I40E_PF_MDET_TX_VALID_MASK) { wr32(hw, I40E_PF_MDET_TX, 0xFFFF); device_printf(dev, "MDD TX event is for this function!"); pf_mdd_detected = true; } reg = rd32(hw, I40E_PF_MDET_RX); if (reg & I40E_PF_MDET_RX_VALID_MASK) { wr32(hw, I40E_PF_MDET_RX, 0xFFFF); device_printf(dev, "MDD RX event is for this function!"); pf_mdd_detected = true; } } /* re-enable mdd interrupt cause */ reg = rd32(hw, I40E_PFINT_ICR0_ENA); reg |= I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); ixl_flush(hw); } void ixl_enable_intr(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct i40e_hw *hw = vsi->hw; struct ixl_queue *que = vsi->queues; if (pf->msix > 1) { for (int i = 0; i < vsi->num_queues; i++, que++) ixl_enable_queue(hw, que->me); } else ixl_enable_intr0(hw); } void ixl_disable_rings_intr(struct ixl_vsi *vsi) { struct i40e_hw *hw = vsi->hw; struct ixl_queue *que = vsi->queues; for (int i = 0; i < vsi->num_queues; i++, que++) ixl_disable_queue(hw, que->me); } void ixl_enable_intr0(struct i40e_hw *hw) { u32 reg; /* Use IXL_ITR_NONE so ITR isn't updated here */ reg = I40E_PFINT_DYN_CTL0_INTENA_MASK | I40E_PFINT_DYN_CTL0_CLEARPBA_MASK | (IXL_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTL0, reg); } void ixl_disable_intr0(struct i40e_hw *hw) { u32 reg; reg = IXL_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT; wr32(hw, I40E_PFINT_DYN_CTL0, reg); ixl_flush(hw); } void ixl_enable_queue(struct i40e_hw *hw, int id) { u32 reg; reg = I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (IXL_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTLN(id), reg); } void ixl_disable_queue(struct i40e_hw *hw, int id) { u32 reg; reg = IXL_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT; wr32(hw, I40E_PFINT_DYN_CTLN(id), reg); } void ixl_update_stats_counters(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_vf *vf; struct i40e_hw_port_stats *nsd = &pf->stats; struct i40e_hw_port_stats *osd = &pf->stats_offsets; /* Update hw stats */ ixl_stat_update32(hw, I40E_GLPRT_CRCERRS(hw->port), pf->stat_offsets_loaded, &osd->crc_errors, &nsd->crc_errors); ixl_stat_update32(hw, I40E_GLPRT_ILLERRC(hw->port), pf->stat_offsets_loaded, &osd->illegal_bytes, &nsd->illegal_bytes); ixl_stat_update48(hw, I40E_GLPRT_GORCH(hw->port), I40E_GLPRT_GORCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_bytes, &nsd->eth.rx_bytes); ixl_stat_update48(hw, I40E_GLPRT_GOTCH(hw->port), I40E_GLPRT_GOTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_bytes, &nsd->eth.tx_bytes); ixl_stat_update32(hw, I40E_GLPRT_RDPC(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_discards, &nsd->eth.rx_discards); ixl_stat_update48(hw, I40E_GLPRT_UPRCH(hw->port), I40E_GLPRT_UPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_unicast, &nsd->eth.rx_unicast); ixl_stat_update48(hw, I40E_GLPRT_UPTCH(hw->port), I40E_GLPRT_UPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_unicast, &nsd->eth.tx_unicast); ixl_stat_update48(hw, I40E_GLPRT_MPRCH(hw->port), I40E_GLPRT_MPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_multicast, &nsd->eth.rx_multicast); ixl_stat_update48(hw, I40E_GLPRT_MPTCH(hw->port), I40E_GLPRT_MPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_multicast, &nsd->eth.tx_multicast); ixl_stat_update48(hw, I40E_GLPRT_BPRCH(hw->port), I40E_GLPRT_BPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_broadcast, &nsd->eth.rx_broadcast); ixl_stat_update48(hw, I40E_GLPRT_BPTCH(hw->port), I40E_GLPRT_BPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_broadcast, &nsd->eth.tx_broadcast); ixl_stat_update32(hw, I40E_GLPRT_TDOLD(hw->port), pf->stat_offsets_loaded, &osd->tx_dropped_link_down, &nsd->tx_dropped_link_down); ixl_stat_update32(hw, I40E_GLPRT_MLFC(hw->port), pf->stat_offsets_loaded, &osd->mac_local_faults, &nsd->mac_local_faults); ixl_stat_update32(hw, I40E_GLPRT_MRFC(hw->port), pf->stat_offsets_loaded, &osd->mac_remote_faults, &nsd->mac_remote_faults); ixl_stat_update32(hw, I40E_GLPRT_RLEC(hw->port), pf->stat_offsets_loaded, &osd->rx_length_errors, &nsd->rx_length_errors); /* Flow control (LFC) stats */ ixl_stat_update32(hw, I40E_GLPRT_LXONRXC(hw->port), pf->stat_offsets_loaded, &osd->link_xon_rx, &nsd->link_xon_rx); ixl_stat_update32(hw, I40E_GLPRT_LXONTXC(hw->port), pf->stat_offsets_loaded, &osd->link_xon_tx, &nsd->link_xon_tx); ixl_stat_update32(hw, I40E_GLPRT_LXOFFRXC(hw->port), pf->stat_offsets_loaded, &osd->link_xoff_rx, &nsd->link_xoff_rx); ixl_stat_update32(hw, I40E_GLPRT_LXOFFTXC(hw->port), pf->stat_offsets_loaded, &osd->link_xoff_tx, &nsd->link_xoff_tx); /* Packet size stats rx */ ixl_stat_update48(hw, I40E_GLPRT_PRC64H(hw->port), I40E_GLPRT_PRC64L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_64, &nsd->rx_size_64); ixl_stat_update48(hw, I40E_GLPRT_PRC127H(hw->port), I40E_GLPRT_PRC127L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_127, &nsd->rx_size_127); ixl_stat_update48(hw, I40E_GLPRT_PRC255H(hw->port), I40E_GLPRT_PRC255L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_255, &nsd->rx_size_255); ixl_stat_update48(hw, I40E_GLPRT_PRC511H(hw->port), I40E_GLPRT_PRC511L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_511, &nsd->rx_size_511); ixl_stat_update48(hw, I40E_GLPRT_PRC1023H(hw->port), I40E_GLPRT_PRC1023L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_1023, &nsd->rx_size_1023); ixl_stat_update48(hw, I40E_GLPRT_PRC1522H(hw->port), I40E_GLPRT_PRC1522L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_1522, &nsd->rx_size_1522); ixl_stat_update48(hw, I40E_GLPRT_PRC9522H(hw->port), I40E_GLPRT_PRC9522L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_big, &nsd->rx_size_big); /* Packet size stats tx */ ixl_stat_update48(hw, I40E_GLPRT_PTC64H(hw->port), I40E_GLPRT_PTC64L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_64, &nsd->tx_size_64); ixl_stat_update48(hw, I40E_GLPRT_PTC127H(hw->port), I40E_GLPRT_PTC127L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_127, &nsd->tx_size_127); ixl_stat_update48(hw, I40E_GLPRT_PTC255H(hw->port), I40E_GLPRT_PTC255L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_255, &nsd->tx_size_255); ixl_stat_update48(hw, I40E_GLPRT_PTC511H(hw->port), I40E_GLPRT_PTC511L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_511, &nsd->tx_size_511); ixl_stat_update48(hw, I40E_GLPRT_PTC1023H(hw->port), I40E_GLPRT_PTC1023L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_1023, &nsd->tx_size_1023); ixl_stat_update48(hw, I40E_GLPRT_PTC1522H(hw->port), I40E_GLPRT_PTC1522L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_1522, &nsd->tx_size_1522); ixl_stat_update48(hw, I40E_GLPRT_PTC9522H(hw->port), I40E_GLPRT_PTC9522L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_big, &nsd->tx_size_big); ixl_stat_update32(hw, I40E_GLPRT_RUC(hw->port), pf->stat_offsets_loaded, &osd->rx_undersize, &nsd->rx_undersize); ixl_stat_update32(hw, I40E_GLPRT_RFC(hw->port), pf->stat_offsets_loaded, &osd->rx_fragments, &nsd->rx_fragments); ixl_stat_update32(hw, I40E_GLPRT_ROC(hw->port), pf->stat_offsets_loaded, &osd->rx_oversize, &nsd->rx_oversize); ixl_stat_update32(hw, I40E_GLPRT_RJC(hw->port), pf->stat_offsets_loaded, &osd->rx_jabber, &nsd->rx_jabber); pf->stat_offsets_loaded = true; /* End hw stats */ /* Update vsi stats */ ixl_update_vsi_stats(vsi); for (int i = 0; i < pf->num_vfs; i++) { vf = &pf->vfs[i]; if (vf->vf_flags & VF_FLAG_ENABLED) ixl_update_eth_stats(&pf->vfs[i].vsi); } } int ixl_rebuild_hw_structs_after_reset(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; device_t dev = pf->dev; bool is_up = false; int error = 0; is_up = !!(vsi->ifp->if_drv_flags & IFF_DRV_RUNNING); /* Teardown */ if (is_up) ixl_stop(pf); error = i40e_shutdown_lan_hmc(hw); if (error) device_printf(dev, "Shutdown LAN HMC failed with code %d\n", error); ixl_disable_intr0(hw); ixl_teardown_adminq_msix(pf); error = i40e_shutdown_adminq(hw); if (error) device_printf(dev, "Shutdown Admin queue failed with code %d\n", error); /* Setup */ error = i40e_init_adminq(hw); if (error != 0 && error != I40E_ERR_FIRMWARE_API_VERSION) { device_printf(dev, "Unable to initialize Admin Queue, error %d\n", error); } error = ixl_setup_adminq_msix(pf); if (error) { device_printf(dev, "ixl_setup_adminq_msix error: %d\n", error); } ixl_configure_intr0_msix(pf); ixl_enable_intr0(hw); error = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, 0, 0); if (error) { device_printf(dev, "init_lan_hmc failed: %d\n", error); } error = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY); if (error) { device_printf(dev, "configure_lan_hmc failed: %d\n", error); } if (is_up) ixl_init(pf); return (0); } void ixl_handle_empr_reset(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int count = 0; u32 reg; /* Typically finishes within 3-4 seconds */ while (count++ < 100) { reg = rd32(hw, I40E_GLGEN_RSTAT) & I40E_GLGEN_RSTAT_DEVSTATE_MASK; if (reg) i40e_msec_delay(100); else break; } ixl_dbg(pf, IXL_DBG_INFO, "EMPR reset wait count: %d\n", count); device_printf(dev, "Rebuilding driver state...\n"); ixl_rebuild_hw_structs_after_reset(pf); device_printf(dev, "Rebuilding driver state done.\n"); atomic_clear_int(&pf->state, IXL_PF_STATE_EMPR_RESETTING); } /* ** Tasklet handler for MSIX Adminq interrupts ** - do outside interrupt since it might sleep */ void ixl_do_adminq(void *context, int pending) { struct ixl_pf *pf = context; struct i40e_hw *hw = &pf->hw; struct i40e_arq_event_info event; i40e_status ret; device_t dev = pf->dev; u32 loop = 0; u16 opcode, result; if (pf->state & IXL_PF_STATE_EMPR_RESETTING) { /* Flag cleared at end of this function */ ixl_handle_empr_reset(pf); return; } /* Admin Queue handling */ event.buf_len = IXL_AQ_BUF_SZ; event.msg_buf = malloc(event.buf_len, M_DEVBUF, M_NOWAIT | M_ZERO); if (!event.msg_buf) { device_printf(dev, "%s: Unable to allocate memory for Admin" " Queue event!\n", __func__); return; } IXL_PF_LOCK(pf); /* clean and process any events */ do { ret = i40e_clean_arq_element(hw, &event, &result); if (ret) break; opcode = LE16_TO_CPU(event.desc.opcode); ixl_dbg(pf, IXL_DBG_AQ, "Admin Queue event: %#06x\n", opcode); switch (opcode) { case i40e_aqc_opc_get_link_status: ixl_link_event(pf, &event); break; case i40e_aqc_opc_send_msg_to_pf: #ifdef PCI_IOV ixl_handle_vf_msg(pf, &event); #endif break; case i40e_aqc_opc_event_lan_overflow: default: break; } } while (result && (loop++ < IXL_ADM_LIMIT)); free(event.msg_buf, M_DEVBUF); /* * If there are still messages to process, reschedule ourselves. * Otherwise, re-enable our interrupt. */ if (result > 0) taskqueue_enqueue(pf->tq, &pf->adminq); else ixl_enable_intr0(hw); IXL_PF_UNLOCK(pf); } /** * Update VSI-specific ethernet statistics counters. **/ void ixl_update_eth_stats(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct i40e_hw *hw = &pf->hw; struct i40e_eth_stats *es; struct i40e_eth_stats *oes; struct i40e_hw_port_stats *nsd; u16 stat_idx = vsi->info.stat_counter_idx; es = &vsi->eth_stats; oes = &vsi->eth_stats_offsets; nsd = &pf->stats; /* Gather up the stats that the hw collects */ ixl_stat_update32(hw, I40E_GLV_TEPC(stat_idx), vsi->stat_offsets_loaded, &oes->tx_errors, &es->tx_errors); ixl_stat_update32(hw, I40E_GLV_RDPC(stat_idx), vsi->stat_offsets_loaded, &oes->rx_discards, &es->rx_discards); ixl_stat_update48(hw, I40E_GLV_GORCH(stat_idx), I40E_GLV_GORCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_bytes, &es->rx_bytes); ixl_stat_update48(hw, I40E_GLV_UPRCH(stat_idx), I40E_GLV_UPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_unicast, &es->rx_unicast); ixl_stat_update48(hw, I40E_GLV_MPRCH(stat_idx), I40E_GLV_MPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_multicast, &es->rx_multicast); ixl_stat_update48(hw, I40E_GLV_BPRCH(stat_idx), I40E_GLV_BPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_broadcast, &es->rx_broadcast); ixl_stat_update48(hw, I40E_GLV_GOTCH(stat_idx), I40E_GLV_GOTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_bytes, &es->tx_bytes); ixl_stat_update48(hw, I40E_GLV_UPTCH(stat_idx), I40E_GLV_UPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_unicast, &es->tx_unicast); ixl_stat_update48(hw, I40E_GLV_MPTCH(stat_idx), I40E_GLV_MPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_multicast, &es->tx_multicast); ixl_stat_update48(hw, I40E_GLV_BPTCH(stat_idx), I40E_GLV_BPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_broadcast, &es->tx_broadcast); vsi->stat_offsets_loaded = true; } void ixl_update_vsi_stats(struct ixl_vsi *vsi) { struct ixl_pf *pf; struct ifnet *ifp; struct i40e_eth_stats *es; u64 tx_discards; struct i40e_hw_port_stats *nsd; pf = vsi->back; ifp = vsi->ifp; es = &vsi->eth_stats; nsd = &pf->stats; ixl_update_eth_stats(vsi); tx_discards = es->tx_discards + nsd->tx_dropped_link_down; for (int i = 0; i < vsi->num_queues; i++) tx_discards += vsi->queues[i].txr.br->br_drops; /* Update ifnet stats */ IXL_SET_IPACKETS(vsi, es->rx_unicast + es->rx_multicast + es->rx_broadcast); IXL_SET_OPACKETS(vsi, es->tx_unicast + es->tx_multicast + es->tx_broadcast); IXL_SET_IBYTES(vsi, es->rx_bytes); IXL_SET_OBYTES(vsi, es->tx_bytes); IXL_SET_IMCASTS(vsi, es->rx_multicast); IXL_SET_OMCASTS(vsi, es->tx_multicast); IXL_SET_IERRORS(vsi, nsd->crc_errors + nsd->illegal_bytes + nsd->rx_undersize + nsd->rx_oversize + nsd->rx_fragments + nsd->rx_jabber); IXL_SET_OERRORS(vsi, es->tx_errors); IXL_SET_IQDROPS(vsi, es->rx_discards + nsd->eth.rx_discards); IXL_SET_OQDROPS(vsi, tx_discards); IXL_SET_NOPROTO(vsi, es->rx_unknown_protocol); IXL_SET_COLLISIONS(vsi, 0); } /** * Reset all of the stats for the given pf **/ void ixl_pf_reset_stats(struct ixl_pf *pf) { bzero(&pf->stats, sizeof(struct i40e_hw_port_stats)); bzero(&pf->stats_offsets, sizeof(struct i40e_hw_port_stats)); pf->stat_offsets_loaded = false; } /** * Resets all stats of the given vsi **/ void ixl_vsi_reset_stats(struct ixl_vsi *vsi) { bzero(&vsi->eth_stats, sizeof(struct i40e_eth_stats)); bzero(&vsi->eth_stats_offsets, sizeof(struct i40e_eth_stats)); vsi->stat_offsets_loaded = false; } /** * Read and update a 48 bit stat from the hw * * Since the device stats are not reset at PFReset, they likely will not * be zeroed when the driver starts. We'll save the first values read * and use them as offsets to be subtracted from the raw values in order * to report stats that count from zero. **/ void ixl_stat_update48(struct i40e_hw *hw, u32 hireg, u32 loreg, bool offset_loaded, u64 *offset, u64 *stat) { u64 new_data; #if defined(__FreeBSD__) && (__FreeBSD_version >= 1000000) && defined(__amd64__) new_data = rd64(hw, loreg); #else /* * Use two rd32's instead of one rd64; FreeBSD versions before * 10 don't support 64-bit bus reads/writes. */ new_data = rd32(hw, loreg); new_data |= ((u64)(rd32(hw, hireg) & 0xFFFF)) << 32; #endif if (!offset_loaded) *offset = new_data; if (new_data >= *offset) *stat = new_data - *offset; else *stat = (new_data + ((u64)1 << 48)) - *offset; *stat &= 0xFFFFFFFFFFFFULL; } /** * Read and update a 32 bit stat from the hw **/ void ixl_stat_update32(struct i40e_hw *hw, u32 reg, bool offset_loaded, u64 *offset, u64 *stat) { u32 new_data; new_data = rd32(hw, reg); if (!offset_loaded) *offset = new_data; if (new_data >= *offset) *stat = (u32)(new_data - *offset); else *stat = (u32)((new_data + ((u64)1 << 32)) - *offset); } void ixl_add_device_sysctls(struct ixl_pf *pf) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid_list *ctx_list = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); struct sysctl_oid *debug_node; struct sysctl_oid_list *debug_list; struct sysctl_oid *fec_node; struct sysctl_oid_list *fec_list; /* Set up sysctls */ SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW, pf, 0, ixl_set_flowcntl, "I", IXL_SYSCTL_HELP_FC); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW, pf, 0, ixl_set_advertise, "I", IXL_SYSCTL_HELP_SET_ADVERTISE); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "current_speed", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_current_speed, "A", "Current Port Speed"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_show_fw, "A", "Firmware version"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "unallocated_queues", CTLTYPE_INT | CTLFLAG_RD, pf, 0, ixl_sysctl_unallocated_queues, "I", "Queues not allocated to a PF or VF"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "tx_itr", CTLTYPE_INT | CTLFLAG_RW, pf, 0, ixl_sysctl_pf_tx_itr, "I", "Immediately set TX ITR value for all queues"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "rx_itr", CTLTYPE_INT | CTLFLAG_RW, pf, 0, ixl_sysctl_pf_rx_itr, "I", "Immediately set RX ITR value for all queues"); SYSCTL_ADD_INT(ctx, ctx_list, OID_AUTO, "dynamic_rx_itr", CTLFLAG_RW, &pf->dynamic_rx_itr, 0, "Enable dynamic RX ITR"); SYSCTL_ADD_INT(ctx, ctx_list, OID_AUTO, "dynamic_tx_itr", CTLFLAG_RW, &pf->dynamic_tx_itr, 0, "Enable dynamic TX ITR"); /* Add FEC sysctls for 25G adapters */ /* * XXX: These settings can be changed, but that isn't supported, * so these are read-only for now. */ if (hw->device_id == I40E_DEV_ID_25G_B || hw->device_id == I40E_DEV_ID_25G_SFP28) { fec_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "fec", CTLFLAG_RD, NULL, "FEC Sysctls"); fec_list = SYSCTL_CHILDREN(fec_node); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "fc_ability", CTLTYPE_INT | CTLFLAG_RD, pf, 0, ixl_sysctl_fec_fc_ability, "I", "FC FEC ability enabled"); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "rs_ability", CTLTYPE_INT | CTLFLAG_RD, pf, 0, ixl_sysctl_fec_rs_ability, "I", "RS FEC ability enabled"); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "fc_requested", CTLTYPE_INT | CTLFLAG_RD, pf, 0, ixl_sysctl_fec_fc_request, "I", "FC FEC mode requested on link"); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "rs_requested", CTLTYPE_INT | CTLFLAG_RD, pf, 0, ixl_sysctl_fec_rs_request, "I", "RS FEC mode requested on link"); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "auto_fec_enabled", CTLTYPE_INT | CTLFLAG_RD, pf, 0, ixl_sysctl_fec_auto_enable, "I", "Let FW decide FEC ability/request modes"); } /* Add sysctls meant to print debug information, but don't list them * in "sysctl -a" output. */ debug_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_SKIP, NULL, "Debug Sysctls"); debug_list = SYSCTL_CHILDREN(debug_node); SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "shared_debug_mask", CTLFLAG_RW, &pf->hw.debug_mask, 0, "Shared code debug message level"); SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "core_debug_mask", CTLFLAG_RW, &pf->dbg_mask, 0, "Non-hared code debug message level"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "link_status", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_link_status, "A", IXL_SYSCTL_HELP_LINK_STATUS); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_abilities", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_phy_abilities, "A", "PHY Abilities"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "filter_list", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_sw_filter_list, "A", "SW Filter List"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "hw_res_alloc", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_hw_res_alloc, "A", "HW Resource Allocation"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "switch_config", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_switch_config, "A", "HW Switch Configuration"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "rss_key", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_hkey, "A", "View RSS key"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "rss_lut", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_hlut, "A", "View RSS lookup table"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "rss_hena", CTLTYPE_ULONG | CTLFLAG_RD, pf, 0, ixl_sysctl_hena, "LU", "View enabled packet types for RSS"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "disable_fw_link_management", CTLTYPE_INT | CTLFLAG_WR, pf, 0, ixl_sysctl_fw_link_management, "I", "Disable FW Link Management"); if (pf->has_i2c) { SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "read_i2c_byte", CTLTYPE_INT | CTLFLAG_RW, pf, 0, ixl_sysctl_read_i2c_byte, "I", "Read byte from I2C bus"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "write_i2c_byte", CTLTYPE_INT | CTLFLAG_RW, pf, 0, ixl_sysctl_write_i2c_byte, "I", "Write byte to I2C bus"); } #ifdef PCI_IOV SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "vc_debug_level", CTLFLAG_RW, &pf->vc_debug_lvl, 0, "PF/VF Virtual Channel debug level"); #endif } /* * Primarily for finding out how many queues can be assigned to VFs, * at runtime. */ static int ixl_sysctl_unallocated_queues(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int queues; IXL_PF_LOCK(pf); queues = (int)ixl_pf_qmgr_get_num_free(&pf->qmgr); IXL_PF_UNLOCK(pf); return sysctl_handle_int(oidp, NULL, queues, req); } /* ** Set flow control using sysctl: ** 0 - off ** 1 - rx pause ** 2 - tx pause ** 3 - full */ int ixl_set_flowcntl(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int requested_fc, error = 0; enum i40e_status_code aq_error = 0; u8 fc_aq_err = 0; /* Get request */ requested_fc = pf->fc; error = sysctl_handle_int(oidp, &requested_fc, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (requested_fc < 0 || requested_fc > 3) { device_printf(dev, "Invalid fc mode; valid modes are 0 through 3\n"); return (EINVAL); } /* Set fc ability for port */ hw->fc.requested_mode = requested_fc; aq_error = i40e_set_fc(hw, &fc_aq_err, TRUE); if (aq_error) { device_printf(dev, "%s: Error setting new fc mode %d; fc_err %#x\n", __func__, aq_error, fc_aq_err); return (EIO); } pf->fc = requested_fc; /* Get new link state */ i40e_msec_delay(250); hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &pf->link_up); return (0); } char * ixl_aq_speed_to_str(enum i40e_aq_link_speed link_speed) { int index; char *speeds[] = { "Unknown", "100 Mbps", "1 Gbps", "10 Gbps", "40 Gbps", "20 Gbps", "25 Gbps", }; switch (link_speed) { case I40E_LINK_SPEED_100MB: index = 1; break; case I40E_LINK_SPEED_1GB: index = 2; break; case I40E_LINK_SPEED_10GB: index = 3; break; case I40E_LINK_SPEED_40GB: index = 4; break; case I40E_LINK_SPEED_20GB: index = 5; break; case I40E_LINK_SPEED_25GB: index = 6; break; case I40E_LINK_SPEED_UNKNOWN: default: index = 0; break; } return speeds[index]; } int ixl_current_speed(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; int error = 0; ixl_update_link_status(pf); error = sysctl_handle_string(oidp, ixl_aq_speed_to_str(hw->phy.link_info.link_speed), 8, req); return (error); } static u8 ixl_convert_sysctl_aq_link_speed(u8 speeds, bool to_aq) { static u16 speedmap[6] = { (I40E_LINK_SPEED_100MB | (0x1 << 8)), (I40E_LINK_SPEED_1GB | (0x2 << 8)), (I40E_LINK_SPEED_10GB | (0x4 << 8)), (I40E_LINK_SPEED_20GB | (0x8 << 8)), (I40E_LINK_SPEED_25GB | (0x10 << 8)), (I40E_LINK_SPEED_40GB | (0x20 << 8)) }; u8 retval = 0; for (int i = 0; i < 6; i++) { if (to_aq) retval |= (speeds & (speedmap[i] >> 8)) ? (speedmap[i] & 0xff) : 0; else retval |= (speeds & speedmap[i]) ? (speedmap[i] >> 8) : 0; } return (retval); } int ixl_set_advertised_speeds(struct ixl_pf *pf, int speeds) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct i40e_aq_get_phy_abilities_resp abilities; struct i40e_aq_set_phy_config config; enum i40e_status_code aq_error = 0; /* Get current capability information */ aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, FALSE, &abilities, NULL); if (aq_error) { device_printf(dev, "%s: Error getting phy capabilities %d," " aq error: %d\n", __func__, aq_error, hw->aq.asq_last_status); return (EIO); } /* Prepare new config */ bzero(&config, sizeof(config)); config.link_speed = ixl_convert_sysctl_aq_link_speed(speeds, true); config.phy_type = abilities.phy_type; config.phy_type_ext = abilities.phy_type_ext; config.abilities = abilities.abilities | I40E_AQ_PHY_ENABLE_ATOMIC_LINK; config.eee_capability = abilities.eee_capability; config.eeer = abilities.eeer_val; config.low_power_ctrl = abilities.d3_lpan; /* Do aq command & restart link */ aq_error = i40e_aq_set_phy_config(hw, &config, NULL); if (aq_error) { device_printf(dev, "%s: Error setting new phy config %d," " aq error: %d\n", __func__, aq_error, hw->aq.asq_last_status); return (EIO); } return (0); } /* ** Control link advertise speed: ** Flags: ** 0x1 - advertise 100 Mb ** 0x2 - advertise 1G ** 0x4 - advertise 10G ** 0x8 - advertise 20G ** 0x10 - advertise 25G ** 0x20 - advertise 40G ** ** Set to 0 to disable link */ int ixl_set_advertise(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; u8 converted_speeds; int requested_ls = 0; int error = 0; /* Read in new mode */ requested_ls = pf->advertised_speed; error = sysctl_handle_int(oidp, &requested_ls, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Check if changing speeds is supported */ switch (hw->device_id) { case I40E_DEV_ID_25G_B: case I40E_DEV_ID_25G_SFP28: device_printf(dev, "Changing advertised speeds not supported" " on this device.\n"); return (EINVAL); } if (requested_ls < 0 || requested_ls > 0xff) { } /* Check for valid value */ converted_speeds = ixl_convert_sysctl_aq_link_speed((u8)requested_ls, true); if ((converted_speeds | pf->supported_speeds) != pf->supported_speeds) { device_printf(dev, "Invalid advertised speed; " "valid flags are: 0x%02x\n", ixl_convert_sysctl_aq_link_speed(pf->supported_speeds, false)); return (EINVAL); } error = ixl_set_advertised_speeds(pf, requested_ls); if (error) return (error); pf->advertised_speed = requested_ls; ixl_update_link_status(pf); return (0); } /* * Input: bitmap of enum i40e_aq_link_speed */ static u64 ixl_max_aq_speed_to_value(u8 link_speeds) { if (link_speeds & I40E_LINK_SPEED_40GB) return IF_Gbps(40); if (link_speeds & I40E_LINK_SPEED_25GB) return IF_Gbps(25); if (link_speeds & I40E_LINK_SPEED_20GB) return IF_Gbps(20); if (link_speeds & I40E_LINK_SPEED_10GB) return IF_Gbps(10); if (link_speeds & I40E_LINK_SPEED_1GB) return IF_Gbps(1); if (link_speeds & I40E_LINK_SPEED_100MB) return IF_Mbps(100); else /* Minimum supported link speed */ return IF_Mbps(100); } /* ** Get the width and transaction speed of ** the bus this adapter is plugged into. */ void ixl_get_bus_info(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; u16 link; u32 offset, num_ports; u64 max_speed; /* Some devices don't use PCIE */ if (hw->mac.type == I40E_MAC_X722) return; /* Read PCI Express Capabilities Link Status Register */ pci_find_cap(dev, PCIY_EXPRESS, &offset); link = pci_read_config(dev, offset + PCIER_LINK_STA, 2); /* Fill out hw struct with PCIE info */ i40e_set_pci_config_data(hw, link); /* Use info to print out bandwidth messages */ device_printf(dev,"PCI Express Bus: Speed %s %s\n", ((hw->bus.speed == i40e_bus_speed_8000) ? "8.0GT/s": (hw->bus.speed == i40e_bus_speed_5000) ? "5.0GT/s": (hw->bus.speed == i40e_bus_speed_2500) ? "2.5GT/s":"Unknown"), (hw->bus.width == i40e_bus_width_pcie_x8) ? "Width x8" : (hw->bus.width == i40e_bus_width_pcie_x4) ? "Width x4" : (hw->bus.width == i40e_bus_width_pcie_x2) ? "Width x2" : (hw->bus.width == i40e_bus_width_pcie_x1) ? "Width x1" : ("Unknown")); /* * If adapter is in slot with maximum supported speed, * no warning message needs to be printed out. */ if (hw->bus.speed >= i40e_bus_speed_8000 && hw->bus.width >= i40e_bus_width_pcie_x8) return; num_ports = bitcount32(hw->func_caps.valid_functions); max_speed = ixl_max_aq_speed_to_value(pf->supported_speeds) / 1000000; if ((num_ports * max_speed) > hw->bus.speed * hw->bus.width) { device_printf(dev, "PCI-Express bandwidth available" " for this device may be insufficient for" " optimal performance.\n"); device_printf(dev, "Please move the device to a different" " PCI-e link with more lanes and/or higher" " transfer rate.\n"); } } static int ixl_sysctl_show_fw(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; struct sbuf *sbuf; sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); ixl_nvm_version_str(hw, sbuf); sbuf_finish(sbuf); sbuf_delete(sbuf); return 0; } void ixl_print_nvm_cmd(device_t dev, struct i40e_nvm_access *nvma) { if ((nvma->command == I40E_NVM_READ) && ((nvma->config & 0xFF) == 0xF) && (((nvma->config & 0xF00) >> 8) == 0xF) && (nvma->offset == 0) && (nvma->data_size == 1)) { // device_printf(dev, "- Get Driver Status Command\n"); } else if (nvma->command == I40E_NVM_READ) { } else { switch (nvma->command) { case 0xB: device_printf(dev, "- command: I40E_NVM_READ\n"); break; case 0xC: device_printf(dev, "- command: I40E_NVM_WRITE\n"); break; default: device_printf(dev, "- command: unknown 0x%08x\n", nvma->command); break; } device_printf(dev, "- config (ptr) : 0x%02x\n", nvma->config & 0xFF); device_printf(dev, "- config (flags): 0x%01x\n", (nvma->config & 0xF00) >> 8); device_printf(dev, "- offset : 0x%08x\n", nvma->offset); device_printf(dev, "- data_s : 0x%08x\n", nvma->data_size); } } int ixl_handle_nvmupd_cmd(struct ixl_pf *pf, struct ifdrv *ifd) { struct i40e_hw *hw = &pf->hw; struct i40e_nvm_access *nvma; device_t dev = pf->dev; enum i40e_status_code status = 0; int perrno; DEBUGFUNC("ixl_handle_nvmupd_cmd"); /* Sanity checks */ if (ifd->ifd_len < sizeof(struct i40e_nvm_access) || ifd->ifd_data == NULL) { device_printf(dev, "%s: incorrect ifdrv length or data pointer\n", __func__); device_printf(dev, "%s: ifdrv length: %lu, sizeof(struct i40e_nvm_access): %lu\n", __func__, ifd->ifd_len, sizeof(struct i40e_nvm_access)); device_printf(dev, "%s: data pointer: %p\n", __func__, ifd->ifd_data); return (EINVAL); } nvma = (struct i40e_nvm_access *)ifd->ifd_data; if (pf->dbg_mask & IXL_DBG_NVMUPD) ixl_print_nvm_cmd(dev, nvma); if (pf->state & IXL_PF_STATE_EMPR_RESETTING) { int count = 0; while (count++ < 100) { i40e_msec_delay(100); if (!(pf->state & IXL_PF_STATE_EMPR_RESETTING)) break; } } if (!(pf->state & IXL_PF_STATE_EMPR_RESETTING)) { IXL_PF_LOCK(pf); status = i40e_nvmupd_command(hw, nvma, nvma->data, &perrno); IXL_PF_UNLOCK(pf); } else { perrno = -EBUSY; } if (status) device_printf(dev, "i40e_nvmupd_command status %s, perrno %d\n", i40e_stat_str(hw, status), perrno); /* * -EPERM is actually ERESTART, which the kernel interprets as it needing * to run this ioctl again. So use -EACCES for -EPERM instead. */ if (perrno == -EPERM) return (-EACCES); else return (perrno); } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ void ixl_media_status(struct ifnet * ifp, struct ifmediareq * ifmr) { struct ixl_vsi *vsi = ifp->if_softc; struct ixl_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; INIT_DEBUGOUT("ixl_media_status: begin"); IXL_PF_LOCK(pf); hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &pf->link_up); ixl_update_link_status(pf); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!pf->link_up) { IXL_PF_UNLOCK(pf); return; } ifmr->ifm_status |= IFM_ACTIVE; /* Hardware always does full-duplex */ ifmr->ifm_active |= IFM_FDX; switch (hw->phy.link_info.phy_type) { /* 100 M */ case I40E_PHY_TYPE_100BASE_TX: ifmr->ifm_active |= IFM_100_TX; break; /* 1 G */ case I40E_PHY_TYPE_1000BASE_T: ifmr->ifm_active |= IFM_1000_T; break; case I40E_PHY_TYPE_1000BASE_SX: ifmr->ifm_active |= IFM_1000_SX; break; case I40E_PHY_TYPE_1000BASE_LX: ifmr->ifm_active |= IFM_1000_LX; break; case I40E_PHY_TYPE_1000BASE_T_OPTICAL: ifmr->ifm_active |= IFM_OTHER; break; /* 10 G */ case I40E_PHY_TYPE_10GBASE_SFPP_CU: ifmr->ifm_active |= IFM_10G_TWINAX; break; case I40E_PHY_TYPE_10GBASE_SR: ifmr->ifm_active |= IFM_10G_SR; break; case I40E_PHY_TYPE_10GBASE_LR: ifmr->ifm_active |= IFM_10G_LR; break; case I40E_PHY_TYPE_10GBASE_T: ifmr->ifm_active |= IFM_10G_T; break; case I40E_PHY_TYPE_XAUI: case I40E_PHY_TYPE_XFI: case I40E_PHY_TYPE_10GBASE_AOC: ifmr->ifm_active |= IFM_OTHER; break; /* 25 G */ case I40E_PHY_TYPE_25GBASE_KR: ifmr->ifm_active |= IFM_25G_KR; break; case I40E_PHY_TYPE_25GBASE_CR: ifmr->ifm_active |= IFM_25G_CR; break; case I40E_PHY_TYPE_25GBASE_SR: ifmr->ifm_active |= IFM_25G_SR; break; case I40E_PHY_TYPE_25GBASE_LR: ifmr->ifm_active |= IFM_UNKNOWN; break; /* 40 G */ case I40E_PHY_TYPE_40GBASE_CR4: case I40E_PHY_TYPE_40GBASE_CR4_CU: ifmr->ifm_active |= IFM_40G_CR4; break; case I40E_PHY_TYPE_40GBASE_SR4: ifmr->ifm_active |= IFM_40G_SR4; break; case I40E_PHY_TYPE_40GBASE_LR4: ifmr->ifm_active |= IFM_40G_LR4; break; case I40E_PHY_TYPE_XLAUI: ifmr->ifm_active |= IFM_OTHER; break; case I40E_PHY_TYPE_1000BASE_KX: ifmr->ifm_active |= IFM_1000_KX; break; case I40E_PHY_TYPE_SGMII: ifmr->ifm_active |= IFM_1000_SGMII; break; /* ERJ: What's the difference between these? */ case I40E_PHY_TYPE_10GBASE_CR1_CU: case I40E_PHY_TYPE_10GBASE_CR1: ifmr->ifm_active |= IFM_10G_CR1; break; case I40E_PHY_TYPE_10GBASE_KX4: ifmr->ifm_active |= IFM_10G_KX4; break; case I40E_PHY_TYPE_10GBASE_KR: ifmr->ifm_active |= IFM_10G_KR; break; case I40E_PHY_TYPE_SFI: ifmr->ifm_active |= IFM_10G_SFI; break; /* Our single 20G media type */ case I40E_PHY_TYPE_20GBASE_KR2: ifmr->ifm_active |= IFM_20G_KR2; break; case I40E_PHY_TYPE_40GBASE_KR4: ifmr->ifm_active |= IFM_40G_KR4; break; case I40E_PHY_TYPE_XLPPI: case I40E_PHY_TYPE_40GBASE_AOC: ifmr->ifm_active |= IFM_40G_XLPPI; break; /* Unknown to driver */ default: ifmr->ifm_active |= IFM_UNKNOWN; break; } /* Report flow control status as well */ if (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_TX) ifmr->ifm_active |= IFM_ETH_TXPAUSE; if (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_RX) ifmr->ifm_active |= IFM_ETH_RXPAUSE; IXL_PF_UNLOCK(pf); } void ixl_init(void *arg) { struct ixl_pf *pf = arg; IXL_PF_LOCK(pf); ixl_init_locked(pf); IXL_PF_UNLOCK(pf); } /* * NOTE: Fortville does not support forcing media speeds. Instead, * use the set_advertise sysctl to set the speeds Fortville * will advertise or be allowed to operate at. */ int ixl_media_change(struct ifnet * ifp) { struct ixl_vsi *vsi = ifp->if_softc; struct ifmedia *ifm = &vsi->media; INIT_DEBUGOUT("ixl_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); if_printf(ifp, "Use 'advertise_speed' sysctl to change advertised speeds\n"); return (ENODEV); } /********************************************************************* * Ioctl entry point * * ixl_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ int ixl_ioctl(struct ifnet * ifp, u_long command, caddr_t data) { struct ixl_vsi *vsi = ifp->if_softc; struct ixl_pf *pf = vsi->back; struct ifreq *ifr = (struct ifreq *)data; struct ifdrv *ifd = (struct ifdrv *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; bool avoid_reset = FALSE; #endif int error = 0; switch (command) { case SIOCSIFADDR: IOCTL_DEBUGOUT("ioctl: SIOCSIFADDR (Set Interface Address)"); #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif #if defined(INET) || defined(INET6) /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) ixl_init(pf); #ifdef INET if (!(ifp->if_flags & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else error = ether_ioctl(ifp, command, data); break; #endif case SIOCSIFMTU: IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); if (ifr->ifr_mtu > IXL_MAX_FRAME - ETHER_HDR_LEN - ETHER_CRC_LEN - ETHER_VLAN_ENCAP_LEN) { error = EINVAL; } else { IXL_PF_LOCK(pf); ifp->if_mtu = ifr->ifr_mtu; vsi->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; if (ifp->if_drv_flags & IFF_DRV_RUNNING) ixl_init_locked(pf); IXL_PF_UNLOCK(pf); } break; case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)"); IXL_PF_LOCK(pf); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ pf->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { ixl_set_promisc(vsi); } } else { IXL_PF_UNLOCK(pf); ixl_init(pf); IXL_PF_LOCK(pf); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ixl_stop_locked(pf); } } pf->if_flags = ifp->if_flags; IXL_PF_UNLOCK(pf); break; case SIOCSDRVSPEC: case SIOCGDRVSPEC: IOCTL_DEBUGOUT("ioctl: SIOCxDRVSPEC (Get/Set Driver-specific " "Info)\n"); /* NVM update command */ if (ifd->ifd_cmd == I40E_NVM_ACCESS) error = ixl_handle_nvmupd_cmd(pf, ifd); else error = EINVAL; break; case SIOCADDMULTI: IOCTL_DEBUGOUT("ioctl: SIOCADDMULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXL_PF_LOCK(pf); ixl_disable_rings_intr(vsi); ixl_add_multi(vsi); ixl_enable_intr(vsi); IXL_PF_UNLOCK(pf); } break; case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl: SIOCDELMULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXL_PF_LOCK(pf); ixl_disable_rings_intr(vsi); ixl_del_multi(vsi); ixl_enable_intr(vsi); IXL_PF_UNLOCK(pf); } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: case SIOCGIFXMEDIA: IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &vsi->media, command); break; case SIOCSIFCAP: { int mask = ifr->ifr_reqcap ^ ifp->if_capenable; IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)"); ixl_cap_txcsum_tso(vsi, ifp, mask); if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWFILTER) ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXL_PF_LOCK(pf); ixl_init_locked(pf); IXL_PF_UNLOCK(pf); } VLAN_CAPABILITIES(ifp); break; } #if __FreeBSD_version >= 1003000 case SIOCGI2C: { struct ifi2creq i2c; int i; IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)"); if (!pf->has_i2c) return (ENOTTY); - error = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); + error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (error != 0) break; if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { error = EINVAL; break; } if (i2c.len > sizeof(i2c.data)) { error = EINVAL; break; } for (i = 0; i < i2c.len; i++) if (ixl_read_i2c_byte(pf, i2c.offset + i, i2c.dev_addr, &i2c.data[i])) return (EIO); - error = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); + error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c)); break; } #endif default: IOCTL_DEBUGOUT("ioctl: UNKNOWN (0x%X)\n", (int)command); error = ether_ioctl(ifp, command, data); break; } return (error); } int ixl_find_i2c_interface(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; bool i2c_en, port_matched; u32 reg; for (int i = 0; i < 4; i++) { reg = rd32(hw, I40E_GLGEN_MDIO_I2C_SEL(i)); i2c_en = (reg & I40E_GLGEN_MDIO_I2C_SEL_MDIO_I2C_SEL_MASK); port_matched = ((reg & I40E_GLGEN_MDIO_I2C_SEL_PHY_PORT_NUM_MASK) >> I40E_GLGEN_MDIO_I2C_SEL_PHY_PORT_NUM_SHIFT) & BIT(hw->port); if (i2c_en && port_matched) return (i); } return (-1); } static char * ixl_phy_type_string(u32 bit_pos, bool ext) { static char * phy_types_str[32] = { "SGMII", "1000BASE-KX", "10GBASE-KX4", "10GBASE-KR", "40GBASE-KR4", "XAUI", "XFI", "SFI", "XLAUI", "XLPPI", "40GBASE-CR4", "10GBASE-CR1", "Reserved (12)", "Reserved (13)", "Reserved (14)", "Reserved (15)", "Reserved (16)", "100BASE-TX", "1000BASE-T", "10GBASE-T", "10GBASE-SR", "10GBASE-LR", "10GBASE-SFP+Cu", "10GBASE-CR1", "40GBASE-CR4", "40GBASE-SR4", "40GBASE-LR4", "1000BASE-SX", "1000BASE-LX", "1000BASE-T Optical", "20GBASE-KR2", "Reserved (31)" }; static char * ext_phy_types_str[4] = { "25GBASE-KR", "25GBASE-CR", "25GBASE-SR", "25GBASE-LR" }; if (ext && bit_pos > 3) return "Invalid_Ext"; if (bit_pos > 31) return "Invalid"; return (ext) ? ext_phy_types_str[bit_pos] : phy_types_str[bit_pos]; } int ixl_aq_get_link_status(struct ixl_pf *pf, struct i40e_aqc_get_link_status *link_status) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; struct i40e_aq_desc desc; enum i40e_status_code status; struct i40e_aqc_get_link_status *aq_link_status = (struct i40e_aqc_get_link_status *)&desc.params.raw; i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_get_link_status); link_status->command_flags = CPU_TO_LE16(I40E_AQ_LSE_ENABLE); status = i40e_asq_send_command(hw, &desc, NULL, 0, NULL); if (status) { device_printf(dev, "%s: i40e_aqc_opc_get_link_status status %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (EIO); } bcopy(aq_link_status, link_status, sizeof(struct i40e_aqc_get_link_status)); return (0); } static char * ixl_phy_type_string_ls(u8 val) { if (val >= 0x1F) return ixl_phy_type_string(val - 0x1F, true); else return ixl_phy_type_string(val, false); } static int ixl_sysctl_link_status(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; struct sbuf *buf; int error = 0; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for sysctl output.\n"); return (ENOMEM); } struct i40e_aqc_get_link_status link_status; error = ixl_aq_get_link_status(pf, &link_status); if (error) { sbuf_delete(buf); return (error); } /* TODO: Add 25G types */ sbuf_printf(buf, "\n" "PHY Type : 0x%02x<%s>\n" "Speed : 0x%02x\n" "Link info: 0x%02x\n" "AN info : 0x%02x\n" "Ext info : 0x%02x\n" "Loopback : 0x%02x\n" "Max Frame: %d\n" "Config : 0x%02x\n" "Power : 0x%02x", link_status.phy_type, ixl_phy_type_string_ls(link_status.phy_type), link_status.link_speed, link_status.link_info, link_status.an_info, link_status.ext_info, link_status.loopback, link_status.max_frame_size, link_status.config, link_status.power_desc); error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_phy_abilities(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; enum i40e_status_code status; struct i40e_aq_get_phy_abilities_resp abilities; struct sbuf *buf; int error = 0; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for sysctl output.\n"); return (ENOMEM); } status = i40e_aq_get_phy_capabilities(hw, FALSE, FALSE, &abilities, NULL); if (status) { device_printf(dev, "%s: i40e_aq_get_phy_capabilities() status %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); sbuf_delete(buf); return (EIO); } sbuf_printf(buf, "\n" "PHY Type : %08x", abilities.phy_type); if (abilities.phy_type != 0) { sbuf_printf(buf, "<"); for (int i = 0; i < 32; i++) if ((1 << i) & abilities.phy_type) sbuf_printf(buf, "%s,", ixl_phy_type_string(i, false)); sbuf_printf(buf, ">\n"); } sbuf_printf(buf, "PHY Ext : %02x", abilities.phy_type_ext); if (abilities.phy_type_ext != 0) { sbuf_printf(buf, "<"); for (int i = 0; i < 4; i++) if ((1 << i) & abilities.phy_type_ext) sbuf_printf(buf, "%s,", ixl_phy_type_string(i, true)); sbuf_printf(buf, ">"); } sbuf_printf(buf, "\n"); sbuf_printf(buf, "Speed : %02x\n" "Abilities: %02x\n" "EEE cap : %04x\n" "EEER reg : %08x\n" "D3 Lpan : %02x\n" "ID : %02x %02x %02x %02x\n" "ModType : %02x %02x %02x\n" "ModType E: %01x\n" "FEC Cfg : %02x\n" "Ext CC : %02x", abilities.link_speed, abilities.abilities, abilities.eee_capability, abilities.eeer_val, abilities.d3_lpan, abilities.phy_id[0], abilities.phy_id[1], abilities.phy_id[2], abilities.phy_id[3], abilities.module_type[0], abilities.module_type[1], abilities.module_type[2], abilities.phy_type_ext >> 5, abilities.phy_type_ext & 0x1F, abilities.ext_comp_code); error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_sw_filter_list(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct ixl_vsi *vsi = &pf->vsi; struct ixl_mac_filter *f; char *buf, *buf_i; int error = 0; int ftl_len = 0; int ftl_counter = 0; int buf_len = 0; int entry_len = 42; SLIST_FOREACH(f, &vsi->ftl, next) { ftl_len++; } if (ftl_len < 1) { sysctl_handle_string(oidp, "(none)", 6, req); return (0); } buf_len = sizeof(char) * (entry_len + 1) * ftl_len + 2; buf = buf_i = malloc(buf_len, M_DEVBUF, M_NOWAIT); sprintf(buf_i++, "\n"); SLIST_FOREACH(f, &vsi->ftl, next) { sprintf(buf_i, MAC_FORMAT ", vlan %4d, flags %#06x", MAC_FORMAT_ARGS(f->macaddr), f->vlan, f->flags); buf_i += entry_len; /* don't print '\n' for last entry */ if (++ftl_counter != ftl_len) { sprintf(buf_i, "\n"); buf_i++; } } error = sysctl_handle_string(oidp, buf, strlen(buf), req); if (error) printf("sysctl error: %d\n", error); free(buf, M_DEVBUF); return error; } #define IXL_SW_RES_SIZE 0x14 int ixl_res_alloc_cmp(const void *a, const void *b) { const struct i40e_aqc_switch_resource_alloc_element_resp *one, *two; one = (const struct i40e_aqc_switch_resource_alloc_element_resp *)a; two = (const struct i40e_aqc_switch_resource_alloc_element_resp *)b; return ((int)one->resource_type - (int)two->resource_type); } /* * Longest string length: 25 */ char * ixl_switch_res_type_string(u8 type) { static char * ixl_switch_res_type_strings[0x14] = { "VEB", "VSI", "Perfect Match MAC address", "S-tag", "(Reserved)", "Multicast hash entry", "Unicast hash entry", "VLAN", "VSI List entry", "(Reserved)", "VLAN Statistic Pool", "Mirror Rule", "Queue Set", "Inner VLAN Forward filter", "(Reserved)", "Inner MAC", "IP", "GRE/VN1 Key", "VN2 Key", "Tunneling Port" }; if (type < 0x14) return ixl_switch_res_type_strings[type]; else return "(Reserved)"; } static int ixl_sysctl_hw_res_alloc(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; enum i40e_status_code status; int error = 0; u8 num_entries; struct i40e_aqc_switch_resource_alloc_element_resp resp[IXL_SW_RES_SIZE]; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } bzero(resp, sizeof(resp)); status = i40e_aq_get_switch_resource_alloc(hw, &num_entries, resp, IXL_SW_RES_SIZE, NULL); if (status) { device_printf(dev, "%s: get_switch_resource_alloc() error %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); sbuf_delete(buf); return (error); } /* Sort entries by type for display */ qsort(resp, num_entries, sizeof(struct i40e_aqc_switch_resource_alloc_element_resp), &ixl_res_alloc_cmp); sbuf_cat(buf, "\n"); sbuf_printf(buf, "# of entries: %d\n", num_entries); sbuf_printf(buf, " Type | Guaranteed | Total | Used | Un-allocated\n" " | (this) | (all) | (this) | (all) \n"); for (int i = 0; i < num_entries; i++) { sbuf_printf(buf, "%25s | %10d %5d %6d %12d", ixl_switch_res_type_string(resp[i].resource_type), resp[i].guaranteed, resp[i].total, resp[i].used, resp[i].total_unalloced); if (i < num_entries - 1) sbuf_cat(buf, "\n"); } error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } /* ** Caller must init and delete sbuf; this function will clear and ** finish it for caller. ** ** XXX: Cannot use the SEID for this, since there is no longer a ** fixed mapping between SEID and element type. */ char * ixl_switch_element_string(struct sbuf *s, struct i40e_aqc_switch_config_element_resp *element) { sbuf_clear(s); switch (element->element_type) { case I40E_AQ_SW_ELEM_TYPE_MAC: sbuf_printf(s, "MAC %3d", element->element_info); break; case I40E_AQ_SW_ELEM_TYPE_PF: sbuf_printf(s, "PF %3d", element->element_info); break; case I40E_AQ_SW_ELEM_TYPE_VF: sbuf_printf(s, "VF %3d", element->element_info); break; case I40E_AQ_SW_ELEM_TYPE_EMP: sbuf_cat(s, "EMP"); break; case I40E_AQ_SW_ELEM_TYPE_BMC: sbuf_cat(s, "BMC"); break; case I40E_AQ_SW_ELEM_TYPE_PV: sbuf_cat(s, "PV"); break; case I40E_AQ_SW_ELEM_TYPE_VEB: sbuf_cat(s, "VEB"); break; case I40E_AQ_SW_ELEM_TYPE_PA: sbuf_cat(s, "PA"); break; case I40E_AQ_SW_ELEM_TYPE_VSI: sbuf_printf(s, "VSI %3d", element->element_info); break; default: sbuf_cat(s, "?"); break; } sbuf_finish(s); return sbuf_data(s); } static int ixl_sysctl_switch_config(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; struct sbuf *nmbuf; enum i40e_status_code status; int error = 0; u16 next = 0; u8 aq_buf[I40E_AQ_LARGE_BUF]; struct i40e_aqc_get_switch_config_resp *sw_config; sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for sysctl output.\n"); return (ENOMEM); } status = i40e_aq_get_switch_config(hw, sw_config, sizeof(aq_buf), &next, NULL); if (status) { device_printf(dev, "%s: aq_get_switch_config() error %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); sbuf_delete(buf); return error; } if (next) device_printf(dev, "%s: TODO: get more config with SEID %d\n", __func__, next); nmbuf = sbuf_new_auto(); if (!nmbuf) { device_printf(dev, "Could not allocate sbuf for name output.\n"); sbuf_delete(buf); return (ENOMEM); } sbuf_cat(buf, "\n"); /* Assuming <= 255 elements in switch */ sbuf_printf(buf, "# of reported elements: %d\n", sw_config->header.num_reported); sbuf_printf(buf, "total # of elements: %d\n", sw_config->header.num_total); /* Exclude: ** Revision -- all elements are revision 1 for now */ sbuf_printf(buf, "SEID ( Name ) | Uplink | Downlink | Conn Type\n" " | | | (uplink)\n"); for (int i = 0; i < sw_config->header.num_reported; i++) { // "%4d (%8s) | %8s %8s %#8x", sbuf_printf(buf, "%4d", sw_config->element[i].seid); sbuf_cat(buf, " "); sbuf_printf(buf, "(%8s)", ixl_switch_element_string(nmbuf, &sw_config->element[i])); sbuf_cat(buf, " | "); sbuf_printf(buf, "%8d", sw_config->element[i].uplink_seid); sbuf_cat(buf, " "); sbuf_printf(buf, "%8d", sw_config->element[i].downlink_seid); sbuf_cat(buf, " "); sbuf_printf(buf, "%#8x", sw_config->element[i].connection_type); if (i < sw_config->header.num_reported - 1) sbuf_cat(buf, "\n"); } sbuf_delete(nmbuf); error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_hkey(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; int error = 0; enum i40e_status_code status; u32 reg; struct i40e_aqc_get_set_rss_key_data key_data; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } sbuf_cat(buf, "\n"); if (hw->mac.type == I40E_MAC_X722) { bzero(key_data.standard_rss_key, sizeof(key_data.standard_rss_key)); status = i40e_aq_get_rss_key(hw, pf->vsi.vsi_num, &key_data); if (status) device_printf(dev, "i40e_aq_get_rss_key status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); sbuf_printf(buf, "%40D", (u_char *)key_data.standard_rss_key, ""); } else { for (int i = 0; i < IXL_RSS_KEY_SIZE_REG; i++) { reg = i40e_read_rx_ctl(hw, I40E_PFQF_HKEY(i)); sbuf_printf(buf, "%4D", (u_char *)®, ""); } } error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_hlut(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; int error = 0; enum i40e_status_code status; u8 hlut[512]; u32 reg; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } sbuf_cat(buf, "\n"); if (hw->mac.type == I40E_MAC_X722) { bzero(hlut, sizeof(hlut)); status = i40e_aq_get_rss_lut(hw, pf->vsi.vsi_num, TRUE, hlut, sizeof(hlut)); if (status) device_printf(dev, "i40e_aq_get_rss_lut status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); sbuf_printf(buf, "%512D", (u_char *)hlut, ""); } else { for (int i = 0; i < hw->func_caps.rss_table_size >> 2; i++) { reg = rd32(hw, I40E_PFQF_HLUT(i)); sbuf_printf(buf, "%4D", (u_char *)®, ""); } } error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_hena(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; u64 hena; hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); return sysctl_handle_long(oidp, NULL, hena, req); } /* * Sysctl to disable firmware's link management * * 1 - Disable link management on this port * 0 - Re-enable link management * * On normal NVMs, firmware manages link by default. */ static int ixl_sysctl_fw_link_management(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int requested_mode = -1; enum i40e_status_code status = 0; int error = 0; /* Read in new mode */ error = sysctl_handle_int(oidp, &requested_mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Check for sane value */ if (requested_mode < 0 || requested_mode > 1) { device_printf(dev, "Valid modes are 0 or 1\n"); return (EINVAL); } /* Set new mode */ status = i40e_aq_set_phy_debug(hw, !!(requested_mode) << 4, NULL); if (status) { device_printf(dev, "%s: Error setting new phy debug mode %s," " aq error: %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (EIO); } return (0); } /* * Sysctl to read a byte from I2C bus. * * Input: 32-bit value: * bits 0-7: device address (0xA0 or 0xA2) * bits 8-15: offset (0-255) * bits 16-31: unused * Output: 8-bit value read */ static int ixl_sysctl_read_i2c_byte(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; int input = -1, error = 0; device_printf(dev, "%s: start\n", __func__); u8 dev_addr, offset, output; /* Read in I2C read parameters */ error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Validate device address */ dev_addr = input & 0xFF; if (dev_addr != 0xA0 && dev_addr != 0xA2) { return (EINVAL); } offset = (input >> 8) & 0xFF; error = ixl_read_i2c_byte(pf, offset, dev_addr, &output); if (error) return (error); device_printf(dev, "%02X\n", output); return (0); } /* * Sysctl to write a byte to the I2C bus. * * Input: 32-bit value: * bits 0-7: device address (0xA0 or 0xA2) * bits 8-15: offset (0-255) * bits 16-23: value to write * bits 24-31: unused * Output: 8-bit value written */ static int ixl_sysctl_write_i2c_byte(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; int input = -1, error = 0; u8 dev_addr, offset, value; /* Read in I2C write parameters */ error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Validate device address */ dev_addr = input & 0xFF; if (dev_addr != 0xA0 && dev_addr != 0xA2) { return (EINVAL); } offset = (input >> 8) & 0xFF; value = (input >> 16) & 0xFF; error = ixl_write_i2c_byte(pf, offset, dev_addr, value); if (error) return (error); device_printf(dev, "%02X written\n", value); return (0); } static int ixl_get_fec_config(struct ixl_pf *pf, struct i40e_aq_get_phy_abilities_resp *abilities, u8 bit_pos, int *is_set) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; enum i40e_status_code status; status = i40e_aq_get_phy_capabilities(hw, FALSE, FALSE, abilities, NULL); if (status) { device_printf(dev, "%s: i40e_aq_get_phy_capabilities() status %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (EIO); } *is_set = !!(abilities->phy_type_ext & bit_pos); return (0); } static int ixl_set_fec_config(struct ixl_pf *pf, struct i40e_aq_get_phy_abilities_resp *abilities, u8 bit_pos, int set) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; struct i40e_aq_set_phy_config config; enum i40e_status_code status; /* Set new PHY config */ memset(&config, 0, sizeof(config)); config.fec_config = abilities->phy_type_ext & ~(bit_pos); if (set) config.fec_config |= bit_pos; if (config.fec_config != abilities->phy_type_ext) { config.abilities |= I40E_AQ_PHY_ENABLE_ATOMIC_LINK; config.phy_type = abilities->phy_type; config.phy_type_ext = abilities->phy_type_ext; config.link_speed = abilities->link_speed; config.eee_capability = abilities->eee_capability; config.eeer = abilities->eeer_val; config.low_power_ctrl = abilities->d3_lpan; status = i40e_aq_set_phy_config(hw, &config, NULL); if (status) { device_printf(dev, "%s: i40e_aq_set_phy_config() status %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (EIO); } } return (0); } static int ixl_sysctl_fec_fc_ability(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_SET_FEC_ABILITY_KR, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_ABILITY_KR, !!(mode)); } static int ixl_sysctl_fec_rs_ability(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_SET_FEC_ABILITY_RS, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_ABILITY_RS, !!(mode)); } static int ixl_sysctl_fec_fc_request(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_SET_FEC_REQUEST_KR, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_REQUEST_KR, !!(mode)); } static int ixl_sysctl_fec_rs_request(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_SET_FEC_REQUEST_RS, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_REQUEST_RS, !!(mode)); } static int ixl_sysctl_fec_auto_enable(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_SET_FEC_AUTO, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_AUTO, !!(mode)); } Index: head/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c =================================================================== --- head/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c (revision 331796) +++ head/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c (revision 331797) @@ -1,2881 +1,2881 @@ /* * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include #include #include #include #ifdef CONFIG_NET_RX_BUSY_POLL #include #endif #include #include #include #include #include #include #include #include #include "en.h" #include "en_port.h" static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv); static void mlx4_en_sysctl_conf(struct mlx4_en_priv *priv); #ifdef CONFIG_NET_RX_BUSY_POLL /* must be called with local_bh_disable()d */ static int mlx4_en_low_latency_recv(struct napi_struct *napi) { struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); struct net_device *dev = cq->dev; struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_rx_ring *rx_ring = priv->rx_ring[cq->ring]; int done; if (!priv->port_up) return LL_FLUSH_FAILED; if (!mlx4_en_cq_lock_poll(cq)) return LL_FLUSH_BUSY; done = mlx4_en_process_rx_cq(dev, cq, 4); #ifdef LL_EXTENDED_STATS if (likely(done)) rx_ring->cleaned += done; else rx_ring->misses++; #endif mlx4_en_cq_unlock_poll(cq); return done; } #endif /* CONFIG_NET_RX_BUSY_POLL */ #ifdef CONFIG_RFS_ACCEL struct mlx4_en_filter { struct list_head next; struct work_struct work; u8 ip_proto; __be32 src_ip; __be32 dst_ip; __be16 src_port; __be16 dst_port; int rxq_index; struct mlx4_en_priv *priv; u32 flow_id; /* RFS infrastructure id */ int id; /* mlx4_en driver id */ u64 reg_id; /* Flow steering API id */ u8 activated; /* Used to prevent expiry before filter * is attached */ struct hlist_node filter_chain; }; static void mlx4_en_filter_rfs_expire(struct mlx4_en_priv *priv); static enum mlx4_net_trans_rule_id mlx4_ip_proto_to_trans_rule_id(u8 ip_proto) { switch (ip_proto) { case IPPROTO_UDP: return MLX4_NET_TRANS_RULE_ID_UDP; case IPPROTO_TCP: return MLX4_NET_TRANS_RULE_ID_TCP; default: return MLX4_NET_TRANS_RULE_NUM; } }; static void mlx4_en_filter_work(struct work_struct *work) { struct mlx4_en_filter *filter = container_of(work, struct mlx4_en_filter, work); struct mlx4_en_priv *priv = filter->priv; struct mlx4_spec_list spec_tcp_udp = { .id = mlx4_ip_proto_to_trans_rule_id(filter->ip_proto), { .tcp_udp = { .dst_port = filter->dst_port, .dst_port_msk = (__force __be16)-1, .src_port = filter->src_port, .src_port_msk = (__force __be16)-1, }, }, }; struct mlx4_spec_list spec_ip = { .id = MLX4_NET_TRANS_RULE_ID_IPV4, { .ipv4 = { .dst_ip = filter->dst_ip, .dst_ip_msk = (__force __be32)-1, .src_ip = filter->src_ip, .src_ip_msk = (__force __be32)-1, }, }, }; struct mlx4_spec_list spec_eth = { .id = MLX4_NET_TRANS_RULE_ID_ETH, }; struct mlx4_net_trans_rule rule = { .list = LIST_HEAD_INIT(rule.list), .queue_mode = MLX4_NET_TRANS_Q_LIFO, .exclusive = 1, .allow_loopback = 1, .promisc_mode = MLX4_FS_REGULAR, .port = priv->port, .priority = MLX4_DOMAIN_RFS, }; int rc; __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); if (spec_tcp_udp.id >= MLX4_NET_TRANS_RULE_NUM) { en_warn(priv, "RFS: ignoring unsupported ip protocol (%d)\n", filter->ip_proto); goto ignore; } list_add_tail(&spec_eth.list, &rule.list); list_add_tail(&spec_ip.list, &rule.list); list_add_tail(&spec_tcp_udp.list, &rule.list); rule.qpn = priv->rss_map.qps[filter->rxq_index].qpn; memcpy(spec_eth.eth.dst_mac, priv->dev->dev_addr, ETH_ALEN); memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN); filter->activated = 0; if (filter->reg_id) { rc = mlx4_flow_detach(priv->mdev->dev, filter->reg_id); if (rc && rc != -ENOENT) en_err(priv, "Error detaching flow. rc = %d\n", rc); } rc = mlx4_flow_attach(priv->mdev->dev, &rule, &filter->reg_id); if (rc) en_err(priv, "Error attaching flow. err = %d\n", rc); ignore: mlx4_en_filter_rfs_expire(priv); filter->activated = 1; } static inline struct hlist_head * filter_hash_bucket(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip, __be16 src_port, __be16 dst_port) { unsigned long l; int bucket_idx; l = (__force unsigned long)src_port | ((__force unsigned long)dst_port << 2); l ^= (__force unsigned long)(src_ip ^ dst_ip); bucket_idx = hash_long(l, MLX4_EN_FILTER_HASH_SHIFT); return &priv->filter_hash[bucket_idx]; } static struct mlx4_en_filter * mlx4_en_filter_alloc(struct mlx4_en_priv *priv, int rxq_index, __be32 src_ip, __be32 dst_ip, u8 ip_proto, __be16 src_port, __be16 dst_port, u32 flow_id) { struct mlx4_en_filter *filter = NULL; filter = kzalloc(sizeof(struct mlx4_en_filter), GFP_ATOMIC); if (!filter) return NULL; filter->priv = priv; filter->rxq_index = rxq_index; INIT_WORK(&filter->work, mlx4_en_filter_work); filter->src_ip = src_ip; filter->dst_ip = dst_ip; filter->ip_proto = ip_proto; filter->src_port = src_port; filter->dst_port = dst_port; filter->flow_id = flow_id; filter->id = priv->last_filter_id++ % RPS_NO_FILTER; list_add_tail(&filter->next, &priv->filters); hlist_add_head(&filter->filter_chain, filter_hash_bucket(priv, src_ip, dst_ip, src_port, dst_port)); return filter; } static void mlx4_en_filter_free(struct mlx4_en_filter *filter) { struct mlx4_en_priv *priv = filter->priv; int rc; list_del(&filter->next); rc = mlx4_flow_detach(priv->mdev->dev, filter->reg_id); if (rc && rc != -ENOENT) en_err(priv, "Error detaching flow. rc = %d\n", rc); kfree(filter); } static inline struct mlx4_en_filter * mlx4_en_filter_find(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip, u8 ip_proto, __be16 src_port, __be16 dst_port) { struct mlx4_en_filter *filter; struct mlx4_en_filter *ret = NULL; hlist_for_each_entry(filter, filter_hash_bucket(priv, src_ip, dst_ip, src_port, dst_port), filter_chain) { if (filter->src_ip == src_ip && filter->dst_ip == dst_ip && filter->ip_proto == ip_proto && filter->src_port == src_port && filter->dst_port == dst_port) { ret = filter; break; } } return ret; } static int mlx4_en_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, u16 rxq_index, u32 flow_id) { struct mlx4_en_priv *priv = netdev_priv(net_dev); struct mlx4_en_filter *filter; const struct iphdr *ip; const __be16 *ports; u8 ip_proto; __be32 src_ip; __be32 dst_ip; __be16 src_port; __be16 dst_port; int nhoff = skb_network_offset(skb); int ret = 0; if (skb->protocol != htons(ETH_P_IP)) return -EPROTONOSUPPORT; ip = (const struct iphdr *)(skb->data + nhoff); if (ip_is_fragment(ip)) return -EPROTONOSUPPORT; if ((ip->protocol != IPPROTO_TCP) && (ip->protocol != IPPROTO_UDP)) return -EPROTONOSUPPORT; ports = (const __be16 *)(skb->data + nhoff + 4 * ip->ihl); ip_proto = ip->protocol; src_ip = ip->saddr; dst_ip = ip->daddr; src_port = ports[0]; dst_port = ports[1]; spin_lock_bh(&priv->filters_lock); filter = mlx4_en_filter_find(priv, src_ip, dst_ip, ip_proto, src_port, dst_port); if (filter) { if (filter->rxq_index == rxq_index) goto out; filter->rxq_index = rxq_index; } else { filter = mlx4_en_filter_alloc(priv, rxq_index, src_ip, dst_ip, ip_proto, src_port, dst_port, flow_id); if (!filter) { ret = -ENOMEM; goto err; } } queue_work(priv->mdev->workqueue, &filter->work); out: ret = filter->id; err: spin_unlock_bh(&priv->filters_lock); return ret; } void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv) { struct mlx4_en_filter *filter, *tmp; LIST_HEAD(del_list); spin_lock_bh(&priv->filters_lock); list_for_each_entry_safe(filter, tmp, &priv->filters, next) { list_move(&filter->next, &del_list); hlist_del(&filter->filter_chain); } spin_unlock_bh(&priv->filters_lock); list_for_each_entry_safe(filter, tmp, &del_list, next) { cancel_work_sync(&filter->work); mlx4_en_filter_free(filter); } } static void mlx4_en_filter_rfs_expire(struct mlx4_en_priv *priv) { struct mlx4_en_filter *filter = NULL, *tmp, *last_filter = NULL; LIST_HEAD(del_list); int i = 0; spin_lock_bh(&priv->filters_lock); list_for_each_entry_safe(filter, tmp, &priv->filters, next) { if (i > MLX4_EN_FILTER_EXPIRY_QUOTA) break; if (filter->activated && !work_pending(&filter->work) && rps_may_expire_flow(priv->dev, filter->rxq_index, filter->flow_id, filter->id)) { list_move(&filter->next, &del_list); hlist_del(&filter->filter_chain); } else last_filter = filter; i++; } if (last_filter && (&last_filter->next != priv->filters.next)) list_move(&priv->filters, &last_filter->next); spin_unlock_bh(&priv->filters_lock); list_for_each_entry_safe(filter, tmp, &del_list, next) mlx4_en_filter_free(filter); } #endif static void mlx4_en_vlan_rx_add_vid(void *arg, struct net_device *dev, u16 vid) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err; int idx; if (arg != priv) return; en_dbg(HW, priv, "adding VLAN:%d\n", vid); set_bit(vid, priv->active_vlans); /* Add VID to port VLAN filter */ mutex_lock(&mdev->state_lock); if (mdev->device_up && priv->port_up) { err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); if (err) en_err(priv, "Failed configuring VLAN filter\n"); } if (mlx4_register_vlan(mdev->dev, priv->port, vid, &idx)) en_dbg(HW, priv, "failed adding vlan %d\n", vid); mutex_unlock(&mdev->state_lock); } static void mlx4_en_vlan_rx_kill_vid(void *arg, struct net_device *dev, u16 vid) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err; if (arg != priv) return; en_dbg(HW, priv, "Killing VID:%d\n", vid); clear_bit(vid, priv->active_vlans); /* Remove VID from port VLAN filter */ mutex_lock(&mdev->state_lock); mlx4_unregister_vlan(mdev->dev, priv->port, vid); if (mdev->device_up && priv->port_up) { err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); if (err) en_err(priv, "Failed configuring VLAN filter\n"); } mutex_unlock(&mdev->state_lock); } static int mlx4_en_tunnel_steer_add(struct mlx4_en_priv *priv, unsigned char *addr, int qpn, u64 *reg_id) { int err; if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN || priv->mdev->dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) return 0; /* do nothing */ err = mlx4_tunnel_steer_add(priv->mdev->dev, addr, priv->port, qpn, MLX4_DOMAIN_NIC, reg_id); if (err) { en_err(priv, "failed to add vxlan steering rule, err %d\n", err); return err; } en_dbg(DRV, priv, "added vxlan steering rule, mac %pM reg_id %llx\n", addr, (long long)*reg_id); return 0; } static int mlx4_en_uc_steer_add(struct mlx4_en_priv *priv, unsigned char *mac, int *qpn, u64 *reg_id) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; int err; switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_B0: { struct mlx4_qp qp; u8 gid[16] = {0}; qp.qpn = *qpn; memcpy(&gid[10], mac, ETH_ALEN); gid[5] = priv->port; err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH); break; } case MLX4_STEERING_MODE_DEVICE_MANAGED: { struct mlx4_spec_list spec_eth = { {NULL} }; __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); struct mlx4_net_trans_rule rule = { .queue_mode = MLX4_NET_TRANS_Q_FIFO, .exclusive = 0, .allow_loopback = 1, .promisc_mode = MLX4_FS_REGULAR, .priority = MLX4_DOMAIN_NIC, }; rule.port = priv->port; rule.qpn = *qpn; INIT_LIST_HEAD(&rule.list); spec_eth.id = MLX4_NET_TRANS_RULE_ID_ETH; memcpy(spec_eth.eth.dst_mac, mac, ETH_ALEN); memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN); list_add_tail(&spec_eth.list, &rule.list); err = mlx4_flow_attach(dev, &rule, reg_id); break; } default: return -EINVAL; } if (err) en_warn(priv, "Failed Attaching Unicast\n"); return err; } static void mlx4_en_uc_steer_release(struct mlx4_en_priv *priv, unsigned char *mac, int qpn, u64 reg_id) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_B0: { struct mlx4_qp qp; u8 gid[16] = {0}; qp.qpn = qpn; memcpy(&gid[10], mac, ETH_ALEN); gid[5] = priv->port; mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH); break; } case MLX4_STEERING_MODE_DEVICE_MANAGED: { mlx4_flow_detach(dev, reg_id); break; } default: en_err(priv, "Invalid steering mode.\n"); } } static int mlx4_en_get_qp(struct mlx4_en_priv *priv) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; int index = 0; int err = 0; int *qpn = &priv->base_qpn; u64 mac = mlx4_mac_to_u64(IF_LLADDR(priv->dev)); en_dbg(DRV, priv, "Registering MAC: %pM for adding\n", IF_LLADDR(priv->dev)); index = mlx4_register_mac(dev, priv->port, mac); if (index < 0) { err = index; en_err(priv, "Failed adding MAC: %pM\n", IF_LLADDR(priv->dev)); return err; } if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { int base_qpn = mlx4_get_base_qpn(dev, priv->port); *qpn = base_qpn + index; return 0; } err = mlx4_qp_reserve_range(dev, 1, 1, qpn, MLX4_RESERVE_A0_QP); en_dbg(DRV, priv, "Reserved qp %d\n", *qpn); if (err) { en_err(priv, "Failed to reserve qp for mac registration\n"); mlx4_unregister_mac(dev, priv->port, mac); return err; } return 0; } static void mlx4_en_put_qp(struct mlx4_en_priv *priv) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; int qpn = priv->base_qpn; if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { u64 mac = mlx4_mac_to_u64(IF_LLADDR(priv->dev)); en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n", IF_LLADDR(priv->dev)); mlx4_unregister_mac(dev, priv->port, mac); } else { en_dbg(DRV, priv, "Releasing qp: port %d, qpn %d\n", priv->port, qpn); mlx4_qp_release_range(dev, qpn, 1); priv->flags &= ~MLX4_EN_FLAG_FORCE_PROMISC; } } static void mlx4_en_clear_uclist(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_addr_list *tmp, *uc_to_del; list_for_each_entry_safe(uc_to_del, tmp, &priv->uc_list, list) { list_del(&uc_to_del->list); kfree(uc_to_del); } } static void mlx4_en_cache_uclist(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_addr_list *tmp; struct ifaddr *ifa; mlx4_en_clear_uclist(dev); if_addr_rlock(dev); TAILQ_FOREACH(ifa, &dev->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_LINK) continue; if (((struct sockaddr_dl *)ifa->ifa_addr)->sdl_alen != ETHER_ADDR_LEN) continue; tmp = kzalloc(sizeof(struct mlx4_en_addr_list), GFP_ATOMIC); if (tmp == NULL) { en_err(priv, "Failed to allocate address list\n"); break; } memcpy(tmp->addr, LLADDR((struct sockaddr_dl *)ifa->ifa_addr), ETH_ALEN); list_add_tail(&tmp->list, &priv->uc_list); } if_addr_runlock(dev); } static void mlx4_en_clear_mclist(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_addr_list *tmp, *mc_to_del; list_for_each_entry_safe(mc_to_del, tmp, &priv->mc_list, list) { list_del(&mc_to_del->list); kfree(mc_to_del); } } static void mlx4_en_cache_mclist(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_addr_list *tmp; struct ifmultiaddr *ifma; mlx4_en_clear_mclist(dev); if_maddr_rlock(dev); TAILQ_FOREACH(ifma, &dev->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (((struct sockaddr_dl *)ifma->ifma_addr)->sdl_alen != ETHER_ADDR_LEN) continue; tmp = kzalloc(sizeof(struct mlx4_en_addr_list), GFP_ATOMIC); if (tmp == NULL) { en_err(priv, "Failed to allocate address list\n"); break; } memcpy(tmp->addr, LLADDR((struct sockaddr_dl *)ifma->ifma_addr), ETH_ALEN); list_add_tail(&tmp->list, &priv->mc_list); } if_maddr_runlock(dev); } static void update_addr_list_flags(struct mlx4_en_priv *priv, struct list_head *dst, struct list_head *src) { struct mlx4_en_addr_list *dst_tmp, *src_tmp, *new_mc; bool found; /* Find all the entries that should be removed from dst, * These are the entries that are not found in src */ list_for_each_entry(dst_tmp, dst, list) { found = false; list_for_each_entry(src_tmp, src, list) { if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) { found = true; break; } } if (!found) dst_tmp->action = MLX4_ADDR_LIST_REM; } /* Add entries that exist in src but not in dst * mark them as need to add */ list_for_each_entry(src_tmp, src, list) { found = false; list_for_each_entry(dst_tmp, dst, list) { if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) { dst_tmp->action = MLX4_ADDR_LIST_NONE; found = true; break; } } if (!found) { new_mc = kmalloc(sizeof(struct mlx4_en_addr_list), GFP_KERNEL); if (!new_mc) { en_err(priv, "Failed to allocate current multicast list\n"); return; } memcpy(new_mc, src_tmp, sizeof(struct mlx4_en_addr_list)); new_mc->action = MLX4_ADDR_LIST_ADD; list_add_tail(&new_mc->list, dst); } } } static void mlx4_en_set_rx_mode(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); if (!priv->port_up) return; queue_work(priv->mdev->workqueue, &priv->rx_mode_task); } static void mlx4_en_set_promisc_mode(struct mlx4_en_priv *priv, struct mlx4_en_dev *mdev) { int err = 0; if (!(priv->flags & MLX4_EN_FLAG_PROMISC)) { priv->flags |= MLX4_EN_FLAG_PROMISC; /* Enable promiscouos mode */ switch (mdev->dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: err = mlx4_flow_steer_promisc_add(mdev->dev, priv->port, priv->base_qpn, MLX4_FS_ALL_DEFAULT); if (err) en_err(priv, "Failed enabling promiscuous mode\n"); priv->flags |= MLX4_EN_FLAG_MC_PROMISC; break; case MLX4_STEERING_MODE_B0: err = mlx4_unicast_promisc_add(mdev->dev, priv->base_qpn, priv->port); if (err) en_err(priv, "Failed enabling unicast promiscuous mode\n"); /* Add the default qp number as multicast * promisc */ if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) { err = mlx4_multicast_promisc_add(mdev->dev, priv->base_qpn, priv->port); if (err) en_err(priv, "Failed enabling multicast promiscuous mode\n"); priv->flags |= MLX4_EN_FLAG_MC_PROMISC; } break; case MLX4_STEERING_MODE_A0: err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 1); if (err) en_err(priv, "Failed enabling promiscuous mode\n"); break; } /* Disable port multicast filter (unconditionally) */ err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_DISABLE); if (err) en_err(priv, "Failed disabling multicast filter\n"); } } static void mlx4_en_clear_promisc_mode(struct mlx4_en_priv *priv, struct mlx4_en_dev *mdev) { int err = 0; priv->flags &= ~MLX4_EN_FLAG_PROMISC; /* Disable promiscouos mode */ switch (mdev->dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: err = mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, MLX4_FS_ALL_DEFAULT); if (err) en_err(priv, "Failed disabling promiscuous mode\n"); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; break; case MLX4_STEERING_MODE_B0: err = mlx4_unicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); if (err) en_err(priv, "Failed disabling unicast promiscuous mode\n"); /* Disable Multicast promisc */ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { err = mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); if (err) en_err(priv, "Failed disabling multicast promiscuous mode\n"); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; } break; case MLX4_STEERING_MODE_A0: err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 0); if (err) en_err(priv, "Failed disabling promiscuous mode\n"); break; } } static void mlx4_en_do_multicast(struct mlx4_en_priv *priv, struct net_device *dev, struct mlx4_en_dev *mdev) { struct mlx4_en_addr_list *addr_list, *tmp; u8 mc_list[16] = {0}; int err = 0; u64 mcast_addr = 0; /* Enable/disable the multicast filter according to IFF_ALLMULTI */ if (dev->if_flags & IFF_ALLMULTI) { err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_DISABLE); if (err) en_err(priv, "Failed disabling multicast filter\n"); /* Add the default qp number as multicast promisc */ if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) { switch (mdev->dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: err = mlx4_flow_steer_promisc_add(mdev->dev, priv->port, priv->base_qpn, MLX4_FS_MC_DEFAULT); break; case MLX4_STEERING_MODE_B0: err = mlx4_multicast_promisc_add(mdev->dev, priv->base_qpn, priv->port); break; case MLX4_STEERING_MODE_A0: break; } if (err) en_err(priv, "Failed entering multicast promisc mode\n"); priv->flags |= MLX4_EN_FLAG_MC_PROMISC; } } else { /* Disable Multicast promisc */ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { switch (mdev->dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: err = mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, MLX4_FS_MC_DEFAULT); break; case MLX4_STEERING_MODE_B0: err = mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); break; case MLX4_STEERING_MODE_A0: break; } if (err) en_err(priv, "Failed disabling multicast promiscuous mode\n"); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; } /* Update unicast list */ mlx4_en_cache_uclist(dev); update_addr_list_flags(priv, &priv->curr_uc_list, &priv->uc_list); list_for_each_entry_safe(addr_list, tmp, &priv->curr_uc_list, list) { if (addr_list->action == MLX4_ADDR_LIST_REM) { mlx4_en_uc_steer_release(priv, addr_list->addr, priv->rss_map.indir_qp.qpn, addr_list->reg_id); /* remove from list */ list_del(&addr_list->list); kfree(addr_list); } else if (addr_list->action == MLX4_ADDR_LIST_ADD) { err = mlx4_en_uc_steer_add(priv, addr_list->addr, &priv->rss_map.indir_qp.qpn, &addr_list->reg_id); if (err) en_err(priv, "Fail to add unicast address\n"); } } err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_DISABLE); if (err) en_err(priv, "Failed disabling multicast filter\n"); /* Flush mcast filter and init it with broadcast address */ mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, ETH_BCAST, 1, MLX4_MCAST_CONFIG); /* Update multicast list - we cache all addresses so they won't * change while HW is updated holding the command semaphor */ mlx4_en_cache_mclist(dev); list_for_each_entry(addr_list, &priv->mc_list, list) { mcast_addr = mlx4_mac_to_u64(addr_list->addr); mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, mcast_addr, 0, MLX4_MCAST_CONFIG); } err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_ENABLE); if (err) en_err(priv, "Failed enabling multicast filter\n"); update_addr_list_flags(priv, &priv->curr_mc_list, &priv->mc_list); list_for_each_entry_safe(addr_list, tmp, &priv->curr_mc_list, list) { if (addr_list->action == MLX4_ADDR_LIST_REM) { /* detach this address and delete from list */ memcpy(&mc_list[10], addr_list->addr, ETH_ALEN); mc_list[5] = priv->port; err = mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, addr_list->reg_id); if (err) en_err(priv, "Fail to detach multicast address\n"); if (addr_list->tunnel_reg_id) { err = mlx4_flow_detach(priv->mdev->dev, addr_list->tunnel_reg_id); if (err) en_err(priv, "Failed to detach multicast address\n"); } /* remove from list */ list_del(&addr_list->list); kfree(addr_list); } else if (addr_list->action == MLX4_ADDR_LIST_ADD) { /* attach the address */ memcpy(&mc_list[10], addr_list->addr, ETH_ALEN); /* needed for B0 steering support */ mc_list[5] = priv->port; err = mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, mc_list, priv->port, 0, MLX4_PROT_ETH, &addr_list->reg_id); if (err) en_err(priv, "Fail to attach multicast address\n"); err = mlx4_en_tunnel_steer_add(priv, &mc_list[10], priv->base_qpn, &addr_list->tunnel_reg_id); if (err) en_err(priv, "Failed to attach multicast address\n"); } } } } static void mlx4_en_do_set_rx_mode(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, rx_mode_task); struct mlx4_en_dev *mdev = priv->mdev; struct net_device *dev = priv->dev; mutex_lock(&mdev->state_lock); if (!mdev->device_up) { en_dbg(HW, priv, "Card is not up, ignoring rx mode change.\n"); goto out; } if (!priv->port_up) { en_dbg(HW, priv, "Port is down, ignoring rx mode change.\n"); goto out; } if (!mlx4_en_QUERY_PORT(mdev, priv->port)) { if (priv->port_state.link_state) { priv->last_link_state = MLX4_DEV_EVENT_PORT_UP; /* update netif baudrate */ priv->dev->if_baudrate = IF_Mbps(priv->port_state.link_speed); /* Important note: the following call for if_link_state_change * is needed for interface up scenario (start port, link state * change) */ if_link_state_change(priv->dev, LINK_STATE_UP); en_dbg(HW, priv, "Link Up\n"); } } /* Promsicuous mode: disable all filters */ if ((dev->if_flags & IFF_PROMISC) || (priv->flags & MLX4_EN_FLAG_FORCE_PROMISC)) { mlx4_en_set_promisc_mode(priv, mdev); goto out; } /* Not in promiscuous mode */ if (priv->flags & MLX4_EN_FLAG_PROMISC) mlx4_en_clear_promisc_mode(priv, mdev); mlx4_en_do_multicast(priv, dev, mdev); out: mutex_unlock(&mdev->state_lock); } static void mlx4_en_watchdog_timeout(void *arg) { struct mlx4_en_priv *priv = arg; struct mlx4_en_dev *mdev = priv->mdev; en_dbg(DRV, priv, "Scheduling watchdog\n"); queue_work(mdev->workqueue, &priv->watchdog_task); if (priv->port_up) callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT, mlx4_en_watchdog_timeout, priv); } static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) { struct mlx4_en_cq *cq; int i; /* If we haven't received a specific coalescing setting * (module param), we set the moderation parameters as follows: * - moder_cnt is set to the number of mtu sized packets to * satisfy our coalescing target. * - moder_time is set to a fixed value. */ priv->rx_frames = MLX4_EN_RX_COAL_TARGET; priv->rx_usecs = MLX4_EN_RX_COAL_TIME; priv->tx_frames = MLX4_EN_TX_COAL_PKTS; priv->tx_usecs = MLX4_EN_TX_COAL_TIME; en_dbg(INTR, priv, "Default coalesing params for mtu: %u - " "rx_frames:%d rx_usecs:%d\n", (unsigned)priv->dev->if_mtu, priv->rx_frames, priv->rx_usecs); /* Setup cq moderation params */ for (i = 0; i < priv->rx_ring_num; i++) { cq = priv->rx_cq[i]; cq->moder_cnt = priv->rx_frames; cq->moder_time = priv->rx_usecs; priv->last_moder_time[i] = MLX4_EN_AUTO_CONF; priv->last_moder_packets[i] = 0; priv->last_moder_bytes[i] = 0; } for (i = 0; i < priv->tx_ring_num; i++) { cq = priv->tx_cq[i]; cq->moder_cnt = priv->tx_frames; cq->moder_time = priv->tx_usecs; } /* Reset auto-moderation params */ priv->pkt_rate_low = MLX4_EN_RX_RATE_LOW; priv->rx_usecs_low = MLX4_EN_RX_COAL_TIME_LOW; priv->pkt_rate_high = MLX4_EN_RX_RATE_HIGH; priv->rx_usecs_high = MLX4_EN_RX_COAL_TIME_HIGH; priv->sample_interval = MLX4_EN_SAMPLE_INTERVAL; priv->adaptive_rx_coal = 1; priv->last_moder_jiffies = 0; priv->last_moder_tx_packets = 0; } static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv) { unsigned long period = (unsigned long) (jiffies - priv->last_moder_jiffies); struct mlx4_en_cq *cq; unsigned long packets; unsigned long rate; unsigned long avg_pkt_size; unsigned long rx_packets; unsigned long rx_bytes; unsigned long rx_pkt_diff; int moder_time; int ring, err; if (!priv->adaptive_rx_coal || period < priv->sample_interval * HZ) return; for (ring = 0; ring < priv->rx_ring_num; ring++) { spin_lock(&priv->stats_lock); rx_packets = priv->rx_ring[ring]->packets; rx_bytes = priv->rx_ring[ring]->bytes; spin_unlock(&priv->stats_lock); rx_pkt_diff = ((unsigned long) (rx_packets - priv->last_moder_packets[ring])); packets = rx_pkt_diff; rate = packets * HZ / period; avg_pkt_size = packets ? ((unsigned long) (rx_bytes - priv->last_moder_bytes[ring])) / packets : 0; /* Apply auto-moderation only when packet rate * exceeds a rate that it matters */ if (rate > (MLX4_EN_RX_RATE_THRESH / priv->rx_ring_num) && avg_pkt_size > MLX4_EN_AVG_PKT_SMALL) { if (rate < priv->pkt_rate_low) moder_time = priv->rx_usecs_low; else if (rate > priv->pkt_rate_high) moder_time = priv->rx_usecs_high; else moder_time = (rate - priv->pkt_rate_low) * (priv->rx_usecs_high - priv->rx_usecs_low) / (priv->pkt_rate_high - priv->pkt_rate_low) + priv->rx_usecs_low; } else { moder_time = priv->rx_usecs_low; } if (moder_time != priv->last_moder_time[ring]) { priv->last_moder_time[ring] = moder_time; cq = priv->rx_cq[ring]; cq->moder_time = moder_time; cq->moder_cnt = priv->rx_frames; err = mlx4_en_set_cq_moder(priv, cq); if (err) en_err(priv, "Failed modifying moderation for cq:%d\n", ring); } priv->last_moder_packets[ring] = rx_packets; priv->last_moder_bytes[ring] = rx_bytes; } priv->last_moder_jiffies = jiffies; } static void mlx4_en_do_get_stats(struct work_struct *work) { struct delayed_work *delay = to_delayed_work(work); struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv, stats_task); struct mlx4_en_dev *mdev = priv->mdev; int err; mutex_lock(&mdev->state_lock); if (mdev->device_up) { if (priv->port_up) { if (mlx4_is_slave(mdev->dev)) err = mlx4_en_get_vport_stats(mdev, priv->port); else err = mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 0); if (err) en_dbg(HW, priv, "Could not update stats\n"); mlx4_en_auto_moderation(priv); } queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); } mutex_unlock(&mdev->state_lock); } /* mlx4_en_service_task - Run service task for tasks that needed to be done * periodically */ static void mlx4_en_service_task(struct work_struct *work) { struct delayed_work *delay = to_delayed_work(work); struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv, service_task); struct mlx4_en_dev *mdev = priv->mdev; mutex_lock(&mdev->state_lock); if (mdev->device_up) { queue_delayed_work(mdev->workqueue, &priv->service_task, SERVICE_TASK_DELAY); } mutex_unlock(&mdev->state_lock); } static void mlx4_en_linkstate(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, linkstate_task); struct mlx4_en_dev *mdev = priv->mdev; int linkstate = priv->link_state; mutex_lock(&mdev->state_lock); /* If observable port state changed set carrier state and * report to system log */ if (priv->last_link_state != linkstate) { if (linkstate == MLX4_DEV_EVENT_PORT_DOWN) { en_info(priv, "Link Down\n"); if_link_state_change(priv->dev, LINK_STATE_DOWN); /* update netif baudrate */ priv->dev->if_baudrate = 0; /* make sure the port is up before notifying the OS. * This is tricky since we get here on INIT_PORT and * in such case we can't tell the OS the port is up. * To solve this there is a call to if_link_state_change * in set_rx_mode. * */ } else if (priv->port_up && (linkstate == MLX4_DEV_EVENT_PORT_UP)){ if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) en_info(priv, "Query port failed\n"); priv->dev->if_baudrate = IF_Mbps(priv->port_state.link_speed); en_info(priv, "Link Up\n"); if_link_state_change(priv->dev, LINK_STATE_UP); } } priv->last_link_state = linkstate; mutex_unlock(&mdev->state_lock); } int mlx4_en_start_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_cq *cq; struct mlx4_en_tx_ring *tx_ring; int rx_index = 0; int tx_index = 0; int err = 0; int i; int j; u8 mc_list[16] = {0}; if (priv->port_up) { en_dbg(DRV, priv, "start port called while port already up\n"); return 0; } INIT_LIST_HEAD(&priv->mc_list); INIT_LIST_HEAD(&priv->uc_list); INIT_LIST_HEAD(&priv->curr_mc_list); INIT_LIST_HEAD(&priv->curr_uc_list); INIT_LIST_HEAD(&priv->ethtool_list); /* Calculate Rx buf size */ dev->if_mtu = min(dev->if_mtu, priv->max_mtu); mlx4_en_calc_rx_buf(dev); en_dbg(DRV, priv, "Rx buf size:%d\n", priv->rx_mb_size); /* Configure rx cq's and rings */ err = mlx4_en_activate_rx_rings(priv); if (err) { en_err(priv, "Failed to activate RX rings\n"); return err; } for (i = 0; i < priv->rx_ring_num; i++) { cq = priv->rx_cq[i]; mlx4_en_cq_init_lock(cq); err = mlx4_en_activate_cq(priv, cq, i); if (err) { en_err(priv, "Failed activating Rx CQ\n"); goto cq_err; } for (j = 0; j < cq->size; j++) cq->buf[j].owner_sr_opcode = MLX4_CQE_OWNER_MASK; err = mlx4_en_set_cq_moder(priv, cq); if (err) { en_err(priv, "Failed setting cq moderation parameters"); mlx4_en_deactivate_cq(priv, cq); goto cq_err; } mlx4_en_arm_cq(priv, cq); priv->rx_ring[i]->cqn = cq->mcq.cqn; ++rx_index; } /* Set qp number */ en_dbg(DRV, priv, "Getting qp number for port %d\n", priv->port); err = mlx4_en_get_qp(priv); if (err) { en_err(priv, "Failed getting eth qp\n"); goto cq_err; } mdev->mac_removed[priv->port] = 0; priv->counter_index = mlx4_get_default_counter_index(mdev->dev, priv->port); err = mlx4_en_config_rss_steer(priv); if (err) { en_err(priv, "Failed configuring rss steering\n"); goto mac_err; } err = mlx4_en_create_drop_qp(priv); if (err) goto rss_err; /* Configure tx cq's and rings */ for (i = 0; i < priv->tx_ring_num; i++) { /* Configure cq */ cq = priv->tx_cq[i]; err = mlx4_en_activate_cq(priv, cq, i); if (err) { en_err(priv, "Failed activating Tx CQ\n"); goto tx_err; } err = mlx4_en_set_cq_moder(priv, cq); if (err) { en_err(priv, "Failed setting cq moderation parameters"); mlx4_en_deactivate_cq(priv, cq); goto tx_err; } en_dbg(DRV, priv, "Resetting index of collapsed CQ:%d to -1\n", i); cq->buf->wqe_index = cpu_to_be16(0xffff); /* Configure ring */ tx_ring = priv->tx_ring[i]; err = mlx4_en_activate_tx_ring(priv, tx_ring, cq->mcq.cqn, i / priv->num_tx_rings_p_up); if (err) { en_err(priv, "Failed activating Tx ring %d\n", i); mlx4_en_deactivate_cq(priv, cq); goto tx_err; } /* Arm CQ for TX completions */ mlx4_en_arm_cq(priv, cq); /* Set initial ownership of all Tx TXBBs to SW (1) */ for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE) *((u32 *) (tx_ring->buf + j)) = INIT_OWNER_BIT; ++tx_index; } /* Configure port */ err = mlx4_SET_PORT_general(mdev->dev, priv->port, priv->rx_mb_size, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); if (err) { en_err(priv, "Failed setting port general configurations for port %d, with error %d\n", priv->port, err); goto tx_err; } /* Set default qp number */ err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 0); if (err) { en_err(priv, "Failed setting default qp numbers\n"); goto tx_err; } /* Init port */ en_dbg(HW, priv, "Initializing port\n"); err = mlx4_INIT_PORT(mdev->dev, priv->port); if (err) { en_err(priv, "Failed Initializing port\n"); goto tx_err; } /* Attach rx QP to bradcast address */ memset(&mc_list[10], 0xff, ETH_ALEN); mc_list[5] = priv->port; /* needed for B0 steering support */ if (mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, mc_list, priv->port, 0, MLX4_PROT_ETH, &priv->broadcast_id)) mlx4_warn(mdev, "Failed Attaching Broadcast\n"); /* Must redo promiscuous mode setup. */ priv->flags &= ~(MLX4_EN_FLAG_PROMISC | MLX4_EN_FLAG_MC_PROMISC); /* Schedule multicast task to populate multicast list */ queue_work(mdev->workqueue, &priv->rx_mode_task); priv->port_up = true; /* Enable the queues. */ dev->if_drv_flags &= ~IFF_DRV_OACTIVE; dev->if_drv_flags |= IFF_DRV_RUNNING; #ifdef CONFIG_DEBUG_FS mlx4_en_create_debug_files(priv); #endif callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT, mlx4_en_watchdog_timeout, priv); return 0; tx_err: while (tx_index--) { mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[tx_index]); mlx4_en_deactivate_cq(priv, priv->tx_cq[tx_index]); } mlx4_en_destroy_drop_qp(priv); rss_err: mlx4_en_release_rss_steer(priv); mac_err: mlx4_en_put_qp(priv); cq_err: while (rx_index--) mlx4_en_deactivate_cq(priv, priv->rx_cq[rx_index]); for (i = 0; i < priv->rx_ring_num; i++) mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); return err; /* need to close devices */ } void mlx4_en_stop_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_addr_list *addr_list, *tmp; int i; u8 mc_list[16] = {0}; if (!priv->port_up) { en_dbg(DRV, priv, "stop port called while port already down\n"); return; } #ifdef CONFIG_DEBUG_FS mlx4_en_delete_debug_files(priv); #endif /* close port*/ mlx4_CLOSE_PORT(mdev->dev, priv->port); /* Set port as not active */ priv->port_up = false; priv->counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev); /* Promsicuous mode */ if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { priv->flags &= ~(MLX4_EN_FLAG_PROMISC | MLX4_EN_FLAG_MC_PROMISC); mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, MLX4_FS_ALL_DEFAULT); mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, MLX4_FS_MC_DEFAULT); } else if (priv->flags & MLX4_EN_FLAG_PROMISC) { priv->flags &= ~MLX4_EN_FLAG_PROMISC; /* Disable promiscouos mode */ mlx4_unicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); /* Disable Multicast promisc */ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; } } /* Detach All unicasts */ list_for_each_entry(addr_list, &priv->curr_uc_list, list) { mlx4_en_uc_steer_release(priv, addr_list->addr, priv->rss_map.indir_qp.qpn, addr_list->reg_id); } mlx4_en_clear_uclist(dev); list_for_each_entry_safe(addr_list, tmp, &priv->curr_uc_list, list) { list_del(&addr_list->list); kfree(addr_list); } /* Detach All multicasts */ memset(&mc_list[10], 0xff, ETH_ALEN); mc_list[5] = priv->port; /* needed for B0 steering support */ mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, priv->broadcast_id); list_for_each_entry(addr_list, &priv->curr_mc_list, list) { memcpy(&mc_list[10], addr_list->addr, ETH_ALEN); mc_list[5] = priv->port; mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, addr_list->reg_id); } mlx4_en_clear_mclist(dev); list_for_each_entry_safe(addr_list, tmp, &priv->curr_mc_list, list) { list_del(&addr_list->list); kfree(addr_list); } /* Flush multicast filter */ mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 1, MLX4_MCAST_CONFIG); mlx4_en_destroy_drop_qp(priv); /* Free TX Rings */ for (i = 0; i < priv->tx_ring_num; i++) { mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[i]); mlx4_en_deactivate_cq(priv, priv->tx_cq[i]); } msleep(10); for (i = 0; i < priv->tx_ring_num; i++) mlx4_en_free_tx_buf(dev, priv->tx_ring[i]); /* Free RSS qps */ mlx4_en_release_rss_steer(priv); /* Unregister Mac address for the port */ mlx4_en_put_qp(priv); mdev->mac_removed[priv->port] = 1; /* Free RX Rings */ for (i = 0; i < priv->rx_ring_num; i++) { struct mlx4_en_cq *cq = priv->rx_cq[i]; mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); mlx4_en_deactivate_cq(priv, cq); } callout_stop(&priv->watchdog_timer); dev->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); } static void mlx4_en_restart(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, watchdog_task); struct mlx4_en_dev *mdev = priv->mdev; struct net_device *dev = priv->dev; struct mlx4_en_tx_ring *ring; int i; if (priv->blocked == 0 || priv->port_up == 0) return; for (i = 0; i < priv->tx_ring_num; i++) { ring = priv->tx_ring[i]; if (ring->blocked && ring->watchdog_time + MLX4_EN_WATCHDOG_TIMEOUT < ticks) goto reset; } return; reset: priv->port_stats.tx_timeout++; en_dbg(DRV, priv, "Watchdog task called for port %d\n", priv->port); mutex_lock(&mdev->state_lock); if (priv->port_up) { mlx4_en_stop_port(dev); //for (i = 0; i < priv->tx_ring_num; i++) // netdev_tx_reset_queue(priv->tx_ring[i]->tx_queue); if (mlx4_en_start_port(dev)) en_err(priv, "Failed restarting port %d\n", priv->port); } mutex_unlock(&mdev->state_lock); } static void mlx4_en_clear_stats(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int i; if (!mlx4_is_slave(mdev->dev)) if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1)) en_dbg(HW, priv, "Failed dumping statistics\n"); memset(&priv->pstats, 0, sizeof(priv->pstats)); memset(&priv->pkstats, 0, sizeof(priv->pkstats)); memset(&priv->port_stats, 0, sizeof(priv->port_stats)); memset(&priv->vport_stats, 0, sizeof(priv->vport_stats)); for (i = 0; i < priv->tx_ring_num; i++) { priv->tx_ring[i]->bytes = 0; priv->tx_ring[i]->packets = 0; priv->tx_ring[i]->tx_csum = 0; priv->tx_ring[i]->oversized_packets = 0; } for (i = 0; i < priv->rx_ring_num; i++) { priv->rx_ring[i]->bytes = 0; priv->rx_ring[i]->packets = 0; priv->rx_ring[i]->csum_ok = 0; priv->rx_ring[i]->csum_none = 0; } } static void mlx4_en_open(void* arg) { struct mlx4_en_priv *priv; struct mlx4_en_dev *mdev; struct net_device *dev; int err = 0; priv = arg; mdev = priv->mdev; dev = priv->dev; mutex_lock(&mdev->state_lock); if (!mdev->device_up) { en_err(priv, "Cannot open - device down/disabled\n"); goto out; } /* Reset HW statistics and SW counters */ mlx4_en_clear_stats(dev); err = mlx4_en_start_port(dev); if (err) en_err(priv, "Failed starting port:%d\n", priv->port); out: mutex_unlock(&mdev->state_lock); return; } void mlx4_en_free_resources(struct mlx4_en_priv *priv) { int i; #ifdef CONFIG_RFS_ACCEL if (priv->dev->rx_cpu_rmap) { free_irq_cpu_rmap(priv->dev->rx_cpu_rmap); priv->dev->rx_cpu_rmap = NULL; } #endif for (i = 0; i < priv->tx_ring_num; i++) { if (priv->tx_ring && priv->tx_ring[i]) mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]); if (priv->tx_cq && priv->tx_cq[i]) mlx4_en_destroy_cq(priv, &priv->tx_cq[i]); } for (i = 0; i < priv->rx_ring_num; i++) { if (priv->rx_ring[i]) mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i], priv->prof->rx_ring_size, priv->stride); if (priv->rx_cq[i]) mlx4_en_destroy_cq(priv, &priv->rx_cq[i]); } if (priv->stat_sysctl != NULL) sysctl_ctx_free(&priv->stat_ctx); } int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) { struct mlx4_en_port_profile *prof = priv->prof; int i; int node = 0; /* Create rx Rings */ for (i = 0; i < priv->rx_ring_num; i++) { if (mlx4_en_create_cq(priv, &priv->rx_cq[i], prof->rx_ring_size, i, RX, node)) goto err; if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i], prof->rx_ring_size, node)) goto err; } /* Create tx Rings */ for (i = 0; i < priv->tx_ring_num; i++) { if (mlx4_en_create_cq(priv, &priv->tx_cq[i], prof->tx_ring_size, i, TX, node)) goto err; if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], prof->tx_ring_size, TXBB_SIZE, node, i)) goto err; } #ifdef CONFIG_RFS_ACCEL priv->dev->rx_cpu_rmap = alloc_irq_cpu_rmap(priv->rx_ring_num); if (!priv->dev->rx_cpu_rmap) goto err; #endif /* Re-create stat sysctls in case the number of rings changed. */ mlx4_en_sysctl_stat(priv); return 0; err: en_err(priv, "Failed to allocate NIC resources\n"); for (i = 0; i < priv->rx_ring_num; i++) { if (priv->rx_ring[i]) mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i], prof->rx_ring_size, priv->stride); if (priv->rx_cq[i]) mlx4_en_destroy_cq(priv, &priv->rx_cq[i]); } for (i = 0; i < priv->tx_ring_num; i++) { if (priv->tx_ring[i]) mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]); if (priv->tx_cq[i]) mlx4_en_destroy_cq(priv, &priv->tx_cq[i]); } priv->port_up = false; return -ENOMEM; } struct en_port_attribute { struct attribute attr; ssize_t (*show)(struct en_port *, struct en_port_attribute *, char *buf); ssize_t (*store)(struct en_port *, struct en_port_attribute *, char *buf, size_t count); }; #define PORT_ATTR_RO(_name) \ struct en_port_attribute en_port_attr_##_name = __ATTR_RO(_name) #define EN_PORT_ATTR(_name, _mode, _show, _store) \ struct en_port_attribute en_port_attr_##_name = __ATTR(_name, _mode, _show, _store) void mlx4_en_destroy_netdev(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port); /* don't allow more IOCTLs */ priv->gone = 1; /* XXX wait a bit to allow IOCTL handlers to complete */ pause("W", hz); if (priv->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach); if (priv->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach); /* Unregister device - this will close the port if it was up */ if (priv->registered) { mutex_lock(&mdev->state_lock); ether_ifdetach(dev); mutex_unlock(&mdev->state_lock); } mutex_lock(&mdev->state_lock); mlx4_en_stop_port(dev); mutex_unlock(&mdev->state_lock); if (priv->allocated) mlx4_free_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE); cancel_delayed_work(&priv->stats_task); cancel_delayed_work(&priv->service_task); /* flush any pending task for this netdev */ flush_workqueue(mdev->workqueue); callout_drain(&priv->watchdog_timer); /* Detach the netdev so tasks would not attempt to access it */ mutex_lock(&mdev->state_lock); mdev->pndev[priv->port] = NULL; mutex_unlock(&mdev->state_lock); mlx4_en_free_resources(priv); /* freeing the sysctl conf cannot be called from within mlx4_en_free_resources */ if (priv->conf_sysctl != NULL) sysctl_ctx_free(&priv->conf_ctx); kfree(priv->tx_ring); kfree(priv->tx_cq); kfree(priv); if_free(dev); } static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err = 0; en_dbg(DRV, priv, "Change MTU called - current:%u new:%u\n", (unsigned)dev->if_mtu, (unsigned)new_mtu); if ((new_mtu < MLX4_EN_MIN_MTU) || (new_mtu > priv->max_mtu)) { en_err(priv, "Bad MTU size:%d, max %u.\n", new_mtu, priv->max_mtu); return -EPERM; } mutex_lock(&mdev->state_lock); dev->if_mtu = new_mtu; if (dev->if_drv_flags & IFF_DRV_RUNNING) { if (!mdev->device_up) { /* NIC is probably restarting - let watchdog task reset * * the port */ en_dbg(DRV, priv, "Change MTU called with card down!?\n"); } else { mlx4_en_stop_port(dev); err = mlx4_en_start_port(dev); if (err) { en_err(priv, "Failed restarting port:%d\n", priv->port); queue_work(mdev->workqueue, &priv->watchdog_task); } } } mutex_unlock(&mdev->state_lock); return 0; } static int mlx4_en_calc_media(struct mlx4_en_priv *priv) { int trans_type; int active; active = IFM_ETHER; if (priv->last_link_state == MLX4_DEV_EVENT_PORT_DOWN) return (active); active |= IFM_FDX; trans_type = priv->port_state.transceiver; /* XXX I don't know all of the transceiver values. */ switch (priv->port_state.link_speed) { case 100: active |= IFM_100_T; break; case 1000: active |= IFM_1000_T; break; case 10000: if (trans_type > 0 && trans_type <= 0xC) active |= IFM_10G_SR; else if (trans_type == 0x80 || trans_type == 0) active |= IFM_10G_CX4; break; case 40000: active |= IFM_40G_CR4; break; } if (priv->prof->tx_pause) active |= IFM_ETH_TXPAUSE; if (priv->prof->rx_pause) active |= IFM_ETH_RXPAUSE; return (active); } static void mlx4_en_media_status(struct ifnet *dev, struct ifmediareq *ifmr) { struct mlx4_en_priv *priv; priv = dev->if_softc; ifmr->ifm_status = IFM_AVALID; if (priv->last_link_state != MLX4_DEV_EVENT_PORT_DOWN) ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active = mlx4_en_calc_media(priv); return; } static int mlx4_en_media_change(struct ifnet *dev) { struct mlx4_en_priv *priv; struct ifmedia *ifm; int rxpause; int txpause; int error; priv = dev->if_softc; ifm = &priv->media; rxpause = txpause = 0; error = 0; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: break; case IFM_10G_SR: case IFM_10G_CX4: case IFM_1000_T: case IFM_40G_CR4: if ((IFM_SUBTYPE(ifm->ifm_media) == IFM_SUBTYPE(mlx4_en_calc_media(priv))) && (ifm->ifm_media & IFM_FDX)) break; /* Fallthrough */ default: printf("%s: Only auto media type\n", if_name(dev)); return (EINVAL); } /* Allow user to set/clear pause */ if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_RXPAUSE) rxpause = 1; if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_TXPAUSE) txpause = 1; if (priv->prof->tx_pause != txpause || priv->prof->rx_pause != rxpause) { priv->prof->tx_pause = txpause; priv->prof->rx_pause = rxpause; error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); } return (error); } static int mlx4_en_ioctl(struct ifnet *dev, u_long command, caddr_t data) { struct mlx4_en_priv *priv; struct mlx4_en_dev *mdev; struct ifreq *ifr; int error; int mask; struct ifrsskey *ifrk; const u32 *key; struct ifrsshash *ifrh; u8 rss_mask; error = 0; mask = 0; priv = dev->if_softc; /* check if detaching */ if (priv == NULL || priv->gone != 0) return (ENXIO); mdev = priv->mdev; ifr = (struct ifreq *) data; switch (command) { case SIOCSIFMTU: error = -mlx4_en_change_mtu(dev, ifr->ifr_mtu); break; case SIOCSIFFLAGS: if (dev->if_flags & IFF_UP) { if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0) { mutex_lock(&mdev->state_lock); mlx4_en_start_port(dev); mutex_unlock(&mdev->state_lock); } else { mlx4_en_set_rx_mode(dev); } } else { mutex_lock(&mdev->state_lock); if (dev->if_drv_flags & IFF_DRV_RUNNING) { mlx4_en_stop_port(dev); if_link_state_change(dev, LINK_STATE_DOWN); } mutex_unlock(&mdev->state_lock); } break; case SIOCADDMULTI: case SIOCDELMULTI: mlx4_en_set_rx_mode(dev); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(dev, ifr, &priv->media, command); break; case SIOCSIFCAP: mutex_lock(&mdev->state_lock); mask = ifr->ifr_reqcap ^ dev->if_capenable; if (mask & IFCAP_TXCSUM) { dev->if_capenable ^= IFCAP_TXCSUM; dev->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (IFCAP_TSO4 & dev->if_capenable && !(IFCAP_TXCSUM & dev->if_capenable)) { dev->if_capenable &= ~IFCAP_TSO4; dev->if_hwassist &= ~CSUM_IP_TSO; if_printf(dev, "tso4 disabled due to -txcsum.\n"); } } if (mask & IFCAP_TXCSUM_IPV6) { dev->if_capenable ^= IFCAP_TXCSUM_IPV6; dev->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); if (IFCAP_TSO6 & dev->if_capenable && !(IFCAP_TXCSUM_IPV6 & dev->if_capenable)) { dev->if_capenable &= ~IFCAP_TSO6; dev->if_hwassist &= ~CSUM_IP6_TSO; if_printf(dev, "tso6 disabled due to -txcsum6.\n"); } } if (mask & IFCAP_RXCSUM) dev->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) dev->if_capenable ^= IFCAP_RXCSUM_IPV6; if (mask & IFCAP_TSO4) { if (!(IFCAP_TSO4 & dev->if_capenable) && !(IFCAP_TXCSUM & dev->if_capenable)) { if_printf(dev, "enable txcsum first.\n"); error = EAGAIN; goto out; } dev->if_capenable ^= IFCAP_TSO4; dev->if_hwassist ^= CSUM_IP_TSO; } if (mask & IFCAP_TSO6) { if (!(IFCAP_TSO6 & dev->if_capenable) && !(IFCAP_TXCSUM_IPV6 & dev->if_capenable)) { if_printf(dev, "enable txcsum6 first.\n"); error = EAGAIN; goto out; } dev->if_capenable ^= IFCAP_TSO6; dev->if_hwassist ^= CSUM_IP6_TSO; } if (mask & IFCAP_LRO) dev->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) dev->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWFILTER) dev->if_capenable ^= IFCAP_VLAN_HWFILTER; if (mask & IFCAP_WOL_MAGIC) dev->if_capenable ^= IFCAP_WOL_MAGIC; if (dev->if_drv_flags & IFF_DRV_RUNNING) mlx4_en_start_port(dev); out: mutex_unlock(&mdev->state_lock); VLAN_CAPABILITIES(dev); break; #if __FreeBSD_version >= 1100036 case SIOCGI2C: { struct ifi2creq i2c; - error = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); + error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (error) break; if (i2c.len > sizeof(i2c.data)) { error = EINVAL; break; } /* * Note that we ignore i2c.addr here. The driver hardcodes * the address to 0x50, while standard expects it to be 0xA0. */ error = mlx4_get_module_info(mdev->dev, priv->port, i2c.offset, i2c.len, i2c.data); if (error < 0) { error = -error; break; } - error = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); + error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c)); break; } #endif case SIOCGIFRSSKEY: ifrk = (struct ifrsskey *)data; ifrk->ifrk_func = RSS_FUNC_TOEPLITZ; mutex_lock(&mdev->state_lock); key = mlx4_en_get_rss_key(priv, &ifrk->ifrk_keylen); if (ifrk->ifrk_keylen > RSS_KEYLEN) error = EINVAL; else memcpy(ifrk->ifrk_key, key, ifrk->ifrk_keylen); mutex_unlock(&mdev->state_lock); break; case SIOCGIFRSSHASH: mutex_lock(&mdev->state_lock); rss_mask = mlx4_en_get_rss_mask(priv); mutex_unlock(&mdev->state_lock); ifrh = (struct ifrsshash *)data; ifrh->ifrh_func = RSS_FUNC_TOEPLITZ; ifrh->ifrh_types = 0; if (rss_mask & MLX4_RSS_IPV4) ifrh->ifrh_types |= RSS_TYPE_IPV4; if (rss_mask & MLX4_RSS_TCP_IPV4) ifrh->ifrh_types |= RSS_TYPE_TCP_IPV4; if (rss_mask & MLX4_RSS_IPV6) ifrh->ifrh_types |= RSS_TYPE_IPV6; if (rss_mask & MLX4_RSS_TCP_IPV6) ifrh->ifrh_types |= RSS_TYPE_TCP_IPV6; if (rss_mask & MLX4_RSS_UDP_IPV4) ifrh->ifrh_types |= RSS_TYPE_UDP_IPV4; if (rss_mask & MLX4_RSS_UDP_IPV6) ifrh->ifrh_types |= RSS_TYPE_UDP_IPV6; break; default: error = ether_ioctl(dev, command, data); break; } return (error); } int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, struct mlx4_en_port_profile *prof) { struct net_device *dev; struct mlx4_en_priv *priv; uint8_t dev_addr[ETHER_ADDR_LEN]; int err; int i; priv = kzalloc(sizeof(*priv), GFP_KERNEL); dev = priv->dev = if_alloc(IFT_ETHER); if (dev == NULL) { en_err(priv, "Net device allocation failed\n"); kfree(priv); return -ENOMEM; } dev->if_softc = priv; if_initname(dev, "mlxen", (device_get_unit( mdev->pdev->dev.bsddev) * MLX4_MAX_PORTS) + port - 1); dev->if_mtu = ETHERMTU; dev->if_init = mlx4_en_open; dev->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; dev->if_ioctl = mlx4_en_ioctl; dev->if_transmit = mlx4_en_transmit; dev->if_qflush = mlx4_en_qflush; dev->if_snd.ifq_maxlen = prof->tx_ring_size; /* * Initialize driver private data */ priv->counter_index = 0xff; spin_lock_init(&priv->stats_lock); INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode); INIT_WORK(&priv->watchdog_task, mlx4_en_restart); INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task); callout_init(&priv->watchdog_timer, 1); #ifdef CONFIG_RFS_ACCEL INIT_LIST_HEAD(&priv->filters); spin_lock_init(&priv->filters_lock); #endif priv->msg_enable = MLX4_EN_MSG_LEVEL; priv->dev = dev; priv->mdev = mdev; priv->ddev = &mdev->pdev->dev; priv->prof = prof; priv->port = port; priv->port_up = false; priv->flags = prof->flags; priv->num_tx_rings_p_up = mdev->profile.num_tx_rings_p_up; priv->tx_ring_num = prof->tx_ring_num; priv->tx_ring = kcalloc(MAX_TX_RINGS, sizeof(struct mlx4_en_tx_ring *), GFP_KERNEL); if (!priv->tx_ring) { err = -ENOMEM; goto out; } priv->tx_cq = kcalloc(sizeof(struct mlx4_en_cq *), MAX_TX_RINGS, GFP_KERNEL); if (!priv->tx_cq) { err = -ENOMEM; goto out; } priv->rx_ring_num = prof->rx_ring_num; priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0; priv->mac_index = -1; priv->last_ifq_jiffies = 0; priv->if_counters_rx_errors = 0; priv->if_counters_rx_no_buffer = 0; #ifdef CONFIG_MLX4_EN_DCB if (!mlx4_is_slave(priv->mdev->dev)) { priv->dcbx_cap = DCB_CAP_DCBX_HOST; priv->flags |= MLX4_EN_FLAG_DCB_ENABLED; if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) { dev->dcbnl_ops = &mlx4_en_dcbnl_ops; } else { en_info(priv, "QoS disabled - no HW support\n"); dev->dcbnl_ops = &mlx4_en_dcbnl_pfc_ops; } } #endif /* Query for default mac and max mtu */ priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port]; priv->mac = mdev->dev->caps.def_mac[priv->port]; if (ILLEGAL_MAC(priv->mac)) { #if BITS_PER_LONG == 64 en_err(priv, "Port: %d, invalid mac burned: 0x%lx, quiting\n", priv->port, priv->mac); #elif BITS_PER_LONG == 32 en_err(priv, "Port: %d, invalid mac burned: 0x%llx, quiting\n", priv->port, priv->mac); #endif err = -EINVAL; goto out; } priv->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + DS_SIZE); mlx4_en_sysctl_conf(priv); err = mlx4_en_alloc_resources(priv); if (err) goto out; /* Allocate page for receive rings */ err = mlx4_alloc_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE, MLX4_EN_PAGE_SIZE); if (err) { en_err(priv, "Failed to allocate page for rx qps\n"); goto out; } priv->allocated = 1; /* * Set driver features */ dev->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6; dev->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING; dev->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER; dev->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU; dev->if_capabilities |= IFCAP_LRO; dev->if_capabilities |= IFCAP_HWSTATS; if (mdev->LSO_support) dev->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTSO; #if __FreeBSD_version >= 1100000 /* set TSO limits so that we don't have to drop TX packets */ dev->if_hw_tsomax = MLX4_EN_TX_MAX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) /* hdr */; dev->if_hw_tsomaxsegcount = MLX4_EN_TX_MAX_MBUF_FRAGS - 1 /* hdr */; dev->if_hw_tsomaxsegsize = MLX4_EN_TX_MAX_MBUF_SIZE; #endif dev->if_capenable = dev->if_capabilities; dev->if_hwassist = 0; if (dev->if_capenable & (IFCAP_TSO4 | IFCAP_TSO6)) dev->if_hwassist |= CSUM_TSO; if (dev->if_capenable & IFCAP_TXCSUM) dev->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (dev->if_capenable & IFCAP_TXCSUM_IPV6) dev->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); /* Register for VLAN events */ priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, mlx4_en_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST); priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, mlx4_en_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST); mdev->pndev[priv->port] = dev; priv->last_link_state = MLX4_DEV_EVENT_PORT_DOWN; mlx4_en_set_default_moderation(priv); /* Set default MAC */ for (i = 0; i < ETHER_ADDR_LEN; i++) dev_addr[ETHER_ADDR_LEN - 1 - i] = (u8) (priv->mac >> (8 * i)); ether_ifattach(dev, dev_addr); if_link_state_change(dev, LINK_STATE_DOWN); ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK, mlx4_en_media_change, mlx4_en_media_status); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_1000_T, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_SR, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_CX4, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_40G_CR4, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO); en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num); en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); priv->registered = 1; en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num); en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); priv->rx_mb_size = dev->if_mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN; err = mlx4_SET_PORT_general(mdev->dev, priv->port, priv->rx_mb_size, prof->tx_pause, prof->tx_ppp, prof->rx_pause, prof->rx_ppp); if (err) { en_err(priv, "Failed setting port general configurations " "for port %d, with error %d\n", priv->port, err); goto out; } /* Init port */ en_warn(priv, "Initializing port\n"); err = mlx4_INIT_PORT(mdev->dev, priv->port); if (err) { en_err(priv, "Failed Initializing port\n"); goto out; } queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) queue_delayed_work(mdev->workqueue, &priv->service_task, SERVICE_TASK_DELAY); return 0; out: mlx4_en_destroy_netdev(dev); return err; } static int mlx4_en_set_ring_size(struct net_device *dev, int rx_size, int tx_size) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int port_up = 0; int err = 0; rx_size = roundup_pow_of_two(rx_size); rx_size = max_t(u32, rx_size, MLX4_EN_MIN_RX_SIZE); rx_size = min_t(u32, rx_size, MLX4_EN_MAX_RX_SIZE); tx_size = roundup_pow_of_two(tx_size); tx_size = max_t(u32, tx_size, MLX4_EN_MIN_TX_SIZE); tx_size = min_t(u32, tx_size, MLX4_EN_MAX_TX_SIZE); if (rx_size == (priv->port_up ? priv->rx_ring[0]->actual_size : priv->rx_ring[0]->size) && tx_size == priv->tx_ring[0]->size) return 0; mutex_lock(&mdev->state_lock); if (priv->port_up) { port_up = 1; mlx4_en_stop_port(dev); } mlx4_en_free_resources(priv); priv->prof->tx_ring_size = tx_size; priv->prof->rx_ring_size = rx_size; err = mlx4_en_alloc_resources(priv); if (err) { en_err(priv, "Failed reallocating port resources\n"); goto out; } if (port_up) { err = mlx4_en_start_port(dev); if (err) en_err(priv, "Failed starting port\n"); } out: mutex_unlock(&mdev->state_lock); return err; } static int mlx4_en_set_rx_ring_size(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; int size; int error; priv = arg1; size = priv->prof->rx_ring_size; error = sysctl_handle_int(oidp, &size, 0, req); if (error || !req->newptr) return (error); error = -mlx4_en_set_ring_size(priv->dev, size, priv->prof->tx_ring_size); return (error); } static int mlx4_en_set_tx_ring_size(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; int size; int error; priv = arg1; size = priv->prof->tx_ring_size; error = sysctl_handle_int(oidp, &size, 0, req); if (error || !req->newptr) return (error); error = -mlx4_en_set_ring_size(priv->dev, priv->prof->rx_ring_size, size); return (error); } static int mlx4_en_get_module_info(struct net_device *dev, struct ethtool_modinfo *modinfo) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int ret; u8 data[4]; /* Read first 2 bytes to get Module & REV ID */ ret = mlx4_get_module_info(mdev->dev, priv->port, 0/*offset*/, 2/*size*/, data); if (ret < 2) { en_err(priv, "Failed to read eeprom module first two bytes, error: 0x%x\n", -ret); return -EIO; } switch (data[0] /* identifier */) { case MLX4_MODULE_ID_QSFP: modinfo->type = ETH_MODULE_SFF_8436; modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; break; case MLX4_MODULE_ID_QSFP_PLUS: if (data[1] >= 0x3) { /* revision id */ modinfo->type = ETH_MODULE_SFF_8636; modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; } else { modinfo->type = ETH_MODULE_SFF_8436; modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; } break; case MLX4_MODULE_ID_QSFP28: modinfo->type = ETH_MODULE_SFF_8636; modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; break; case MLX4_MODULE_ID_SFP: modinfo->type = ETH_MODULE_SFF_8472; modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; break; default: en_err(priv, "mlx4_en_get_module_info : Not recognized cable type\n"); return -EINVAL; } return 0; } static int mlx4_en_get_module_eeprom(struct net_device *dev, struct ethtool_eeprom *ee, u8 *data) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int offset = ee->offset; int i = 0, ret; if (ee->len == 0) return -EINVAL; memset(data, 0, ee->len); while (i < ee->len) { en_dbg(DRV, priv, "mlx4_get_module_info i(%d) offset(%d) len(%d)\n", i, offset, ee->len - i); ret = mlx4_get_module_info(mdev->dev, priv->port, offset, ee->len - i, data + i); if (!ret) /* Done reading */ return 0; if (ret < 0) { en_err(priv, "mlx4_get_module_info i(%d) offset(%d) bytes_to_read(%d) - FAILED (0x%x)\n", i, offset, ee->len - i, ret); return -1; } i += ret; offset += ret; } return 0; } static void mlx4_en_print_eeprom(u8 *data, __u32 len) { int i; int j = 0; int row = 0; const int NUM_OF_BYTES = 16; printf("\nOffset\t\tValues\n"); printf("------\t\t------\n"); while(row < len){ printf("0x%04x\t\t",row); for(i=0; i < NUM_OF_BYTES; i++){ printf("%02x ", data[j]); row++; j++; } printf("\n"); } } /* Read cable EEPROM module information by first inspecting the first * two bytes to get the length and then read the rest of the information. * The information is printed to dmesg. */ static int mlx4_en_read_eeprom(SYSCTL_HANDLER_ARGS) { u8* data; int error; int result = 0; struct mlx4_en_priv *priv; struct net_device *dev; struct ethtool_modinfo modinfo; struct ethtool_eeprom ee; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); if (result == 1) { priv = arg1; dev = priv->dev; data = kmalloc(PAGE_SIZE, GFP_KERNEL); error = mlx4_en_get_module_info(dev, &modinfo); if (error) { en_err(priv, "mlx4_en_get_module_info returned with error - FAILED (0x%x)\n", -error); goto out; } ee.len = modinfo.eeprom_len; ee.offset = 0; error = mlx4_en_get_module_eeprom(dev, &ee, data); if (error) { en_err(priv, "mlx4_en_get_module_eeprom returned with error - FAILED (0x%x)\n", -error); /* Continue printing partial information in case of an error */ } /* EEPROM information will be printed in dmesg */ mlx4_en_print_eeprom(data, ee.len); out: kfree(data); } /* Return zero to prevent sysctl failure. */ return (0); } static int mlx4_en_set_tx_ppp(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; int ppp; int error; priv = arg1; ppp = priv->prof->tx_ppp; error = sysctl_handle_int(oidp, &ppp, 0, req); if (error || !req->newptr) return (error); if (ppp > 0xff || ppp < 0) return (-EINVAL); priv->prof->tx_ppp = ppp; error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); return (error); } static int mlx4_en_set_rx_ppp(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; struct mlx4_en_dev *mdev; int ppp; int error; int port_up; port_up = 0; priv = arg1; mdev = priv->mdev; ppp = priv->prof->rx_ppp; error = sysctl_handle_int(oidp, &ppp, 0, req); if (error || !req->newptr) return (error); if (ppp > 0xff || ppp < 0) return (-EINVAL); /* See if we have to change the number of tx queues. */ if (!ppp != !priv->prof->rx_ppp) { mutex_lock(&mdev->state_lock); if (priv->port_up) { port_up = 1; mlx4_en_stop_port(priv->dev); } mlx4_en_free_resources(priv); priv->prof->rx_ppp = ppp; error = -mlx4_en_alloc_resources(priv); if (error) en_err(priv, "Failed reallocating port resources\n"); if (error == 0 && port_up) { error = -mlx4_en_start_port(priv->dev); if (error) en_err(priv, "Failed starting port\n"); } mutex_unlock(&mdev->state_lock); return (error); } priv->prof->rx_ppp = ppp; error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); return (error); } static void mlx4_en_sysctl_conf(struct mlx4_en_priv *priv) { struct net_device *dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *node; struct sysctl_oid_list *node_list; struct sysctl_oid *coal; struct sysctl_oid_list *coal_list; const char *pnameunit; dev = priv->dev; ctx = &priv->conf_ctx; pnameunit = device_get_nameunit(priv->mdev->pdev->dev.bsddev); sysctl_ctx_init(ctx); priv->conf_sysctl = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO, dev->if_xname, CTLFLAG_RD, 0, "mlx4 10gig ethernet"); node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(priv->conf_sysctl), OID_AUTO, "conf", CTLFLAG_RD, NULL, "Configuration"); node_list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "msg_enable", CTLFLAG_RW, &priv->msg_enable, 0, "Driver message enable bitfield"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "rx_rings", CTLFLAG_RD, &priv->rx_ring_num, 0, "Number of receive rings"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_rings", CTLFLAG_RD, &priv->tx_ring_num, 0, "Number of transmit rings"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_rx_ring_size, "I", "Receive ring size"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_tx_ring_size, "I", "Transmit ring size"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_ppp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_tx_ppp, "I", "TX Per-priority pause"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_ppp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_rx_ppp, "I", "RX Per-priority pause"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "port_num", CTLFLAG_RD, &priv->port, 0, "Port Number"); SYSCTL_ADD_STRING(ctx, node_list, OID_AUTO, "device_name", CTLFLAG_RD, __DECONST(void *, pnameunit), 0, "PCI device name"); /* Add coalescer configuration. */ coal = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, "coalesce", CTLFLAG_RD, NULL, "Interrupt coalesce configuration"); coal_list = SYSCTL_CHILDREN(coal); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_low", CTLFLAG_RW, &priv->pkt_rate_low, 0, "Packets per-second for minimum delay"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_low", CTLFLAG_RW, &priv->rx_usecs_low, 0, "Minimum RX delay in micro-seconds"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_high", CTLFLAG_RW, &priv->pkt_rate_high, 0, "Packets per-second for maximum delay"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_high", CTLFLAG_RW, &priv->rx_usecs_high, 0, "Maximum RX delay in micro-seconds"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "sample_interval", CTLFLAG_RW, &priv->sample_interval, 0, "adaptive frequency in units of HZ ticks"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "adaptive_rx_coal", CTLFLAG_RW, &priv->adaptive_rx_coal, 0, "Enable adaptive rx coalescing"); /* EEPROM support */ SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "eeprom_info", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_read_eeprom, "I", "EEPROM information"); } static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *node_list; struct sysctl_oid *ring_node; struct sysctl_oid_list *ring_list; struct mlx4_en_tx_ring *tx_ring; struct mlx4_en_rx_ring *rx_ring; char namebuf[128]; int i; ctx = &priv->stat_ctx; sysctl_ctx_init(ctx); priv->stat_sysctl = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(priv->conf_sysctl), OID_AUTO, "stat", CTLFLAG_RD, NULL, "Statistics"); node_list = SYSCTL_CHILDREN(priv->stat_sysctl); #ifdef MLX4_EN_PERF_STAT SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_poll", CTLFLAG_RD, &priv->pstats.tx_poll, "TX Poll calls"); SYSCTL_ADD_QUAD(ctx, node_list, OID_AUTO, "tx_pktsz_avg", CTLFLAG_RD, &priv->pstats.tx_pktsz_avg, "TX average packet size"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "inflight_avg", CTLFLAG_RD, &priv->pstats.inflight_avg, "TX average packets in-flight"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_coal_avg", CTLFLAG_RD, &priv->pstats.tx_coal_avg, "TX average coalesced completions"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "rx_coal_avg", CTLFLAG_RD, &priv->pstats.rx_coal_avg, "RX average coalesced completions"); #endif SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tso_packets", CTLFLAG_RD, &priv->port_stats.tso_packets, 0, "TSO packets sent"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "queue_stopped", CTLFLAG_RD, &priv->port_stats.queue_stopped, 0, "Queue full"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "wake_queue", CTLFLAG_RD, &priv->port_stats.wake_queue, 0, "Queue resumed after full"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_timeout", CTLFLAG_RD, &priv->port_stats.tx_timeout, 0, "Transmit timeouts"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_oversized_packets", CTLFLAG_RD, &priv->port_stats.oversized_packets, 0, "TX oversized packets, m_defrag failed"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_alloc_failed", CTLFLAG_RD, &priv->port_stats.rx_alloc_failed, 0, "RX failed to allocate mbuf"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_chksum_good", CTLFLAG_RD, &priv->port_stats.rx_chksum_good, 0, "RX checksum offload success"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_chksum_none", CTLFLAG_RD, &priv->port_stats.rx_chksum_none, 0, "RX without checksum offload"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_chksum_offload", CTLFLAG_RD, &priv->port_stats.tx_chksum_offload, 0, "TX checksum offloads"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "defrag_attempts", CTLFLAG_RD, &priv->port_stats.defrag_attempts, 0, "Oversized chains defragged"); /* Could strdup the names and add in a loop. This is simpler. */ SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &priv->pkstats.rx_bytes, 0, "RX Bytes"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &priv->pkstats.rx_packets, 0, "RX packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_multicast_packets", CTLFLAG_RD, &priv->pkstats.rx_multicast_packets, 0, "RX Multicast Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_broadcast_packets", CTLFLAG_RD, &priv->pkstats.rx_broadcast_packets, 0, "RX Broadcast Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_errors", CTLFLAG_RD, &priv->pkstats.rx_errors, 0, "RX Errors"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_dropped", CTLFLAG_RD, &priv->pkstats.rx_dropped, 0, "RX Dropped"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_length_errors", CTLFLAG_RD, &priv->pkstats.rx_length_errors, 0, "RX Length Errors"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_over_errors", CTLFLAG_RD, &priv->pkstats.rx_over_errors, 0, "RX Over Errors"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_crc_errors", CTLFLAG_RD, &priv->pkstats.rx_crc_errors, 0, "RX CRC Errors"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_jabbers", CTLFLAG_RD, &priv->pkstats.rx_jabbers, 0, "RX Jabbers"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_in_range_length_error", CTLFLAG_RD, &priv->pkstats.rx_in_range_length_error, 0, "RX IN_Range Length Error"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_out_range_length_error", CTLFLAG_RD, &priv->pkstats.rx_out_range_length_error, 0, "RX Out Range Length Error"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_lt_64_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_lt_64_bytes_packets, 0, "RX Lt 64 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_127_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_127_bytes_packets, 0, "RX 127 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_255_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_255_bytes_packets, 0, "RX 255 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_511_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_511_bytes_packets, 0, "RX 511 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_1023_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_1023_bytes_packets, 0, "RX 1023 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_1518_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_1518_bytes_packets, 0, "RX 1518 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_1522_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_1522_bytes_packets, 0, "RX 1522 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_1548_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_1548_bytes_packets, 0, "RX 1548 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_gt_1548_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_gt_1548_bytes_packets, 0, "RX Greater Then 1548 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &priv->pkstats.tx_packets, 0, "TX packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, &priv->pkstats.tx_bytes, 0, "TX Bytes"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_multicast_packets", CTLFLAG_RD, &priv->pkstats.tx_multicast_packets, 0, "TX Multicast Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_broadcast_packets", CTLFLAG_RD, &priv->pkstats.tx_broadcast_packets, 0, "TX Broadcast Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_errors", CTLFLAG_RD, &priv->pkstats.tx_errors, 0, "TX Errors"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_dropped", CTLFLAG_RD, &priv->pkstats.tx_dropped, 0, "TX Dropped"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_lt_64_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_lt_64_bytes_packets, 0, "TX Less Then 64 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_127_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_127_bytes_packets, 0, "TX 127 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_255_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_255_bytes_packets, 0, "TX 255 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_511_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_511_bytes_packets, 0, "TX 511 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_1023_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_1023_bytes_packets, 0, "TX 1023 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_1518_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_1518_bytes_packets, 0, "TX 1518 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_1522_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_1522_bytes_packets, 0, "TX 1522 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_1548_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_1548_bytes_packets, 0, "TX 1548 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_gt_1548_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_gt_1548_bytes_packets, 0, "TX Greater Then 1548 Bytes Packets"); for (i = 0; i < priv->tx_ring_num; i++) { tx_ring = priv->tx_ring[i]; snprintf(namebuf, sizeof(namebuf), "tx_ring%d", i); ring_node = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "TX Ring"); ring_list = SYSCTL_CHILDREN(ring_node); SYSCTL_ADD_U64(ctx, ring_list, OID_AUTO, "packets", CTLFLAG_RD, &tx_ring->packets, 0, "TX packets"); SYSCTL_ADD_U64(ctx, ring_list, OID_AUTO, "bytes", CTLFLAG_RD, &tx_ring->bytes, 0, "TX bytes"); SYSCTL_ADD_U64(ctx, ring_list, OID_AUTO, "tso_packets", CTLFLAG_RD, &tx_ring->tso_packets, 0, "TSO packets"); SYSCTL_ADD_U64(ctx, ring_list, OID_AUTO, "defrag_attempts", CTLFLAG_RD, &tx_ring->defrag_attempts, 0, "Oversized chains defragged"); } for (i = 0; i < priv->rx_ring_num; i++) { rx_ring = priv->rx_ring[i]; snprintf(namebuf, sizeof(namebuf), "rx_ring%d", i); ring_node = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "RX Ring"); ring_list = SYSCTL_CHILDREN(ring_node); SYSCTL_ADD_U64(ctx, ring_list, OID_AUTO, "packets", CTLFLAG_RD, &rx_ring->packets, 0, "RX packets"); SYSCTL_ADD_U64(ctx, ring_list, OID_AUTO, "bytes", CTLFLAG_RD, &rx_ring->bytes, 0, "RX bytes"); SYSCTL_ADD_U64(ctx, ring_list, OID_AUTO, "error", CTLFLAG_RD, &rx_ring->errors, 0, "RX soft errors"); } } Index: head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c =================================================================== --- head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c (revision 331796) +++ head/sys/dev/mlx5/mlx5_en/mlx5_en_main.c (revision 331797) @@ -1,3788 +1,3788 @@ /*- * Copyright (c) 2015 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "en.h" #include #include #define ETH_DRIVER_VERSION "3.1.0-dev" char mlx5e_version[] = "Mellanox Ethernet driver" " (" ETH_DRIVER_VERSION ")"; struct mlx5e_channel_param { struct mlx5e_rq_param rq; struct mlx5e_sq_param sq; struct mlx5e_cq_param rx_cq; struct mlx5e_cq_param tx_cq; }; static const struct { u32 subtype; u64 baudrate; } mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = { [MLX5E_1000BASE_CX_SGMII] = { .subtype = IFM_1000_CX_SGMII, .baudrate = IF_Mbps(1000ULL), }, [MLX5E_1000BASE_KX] = { .subtype = IFM_1000_KX, .baudrate = IF_Mbps(1000ULL), }, [MLX5E_10GBASE_CX4] = { .subtype = IFM_10G_CX4, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_KX4] = { .subtype = IFM_10G_KX4, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_KR] = { .subtype = IFM_10G_KR, .baudrate = IF_Gbps(10ULL), }, [MLX5E_20GBASE_KR2] = { .subtype = IFM_20G_KR2, .baudrate = IF_Gbps(20ULL), }, [MLX5E_40GBASE_CR4] = { .subtype = IFM_40G_CR4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_40GBASE_KR4] = { .subtype = IFM_40G_KR4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_56GBASE_R4] = { .subtype = IFM_56G_R4, .baudrate = IF_Gbps(56ULL), }, [MLX5E_10GBASE_CR] = { .subtype = IFM_10G_CR1, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_SR] = { .subtype = IFM_10G_SR, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_ER] = { .subtype = IFM_10G_ER, .baudrate = IF_Gbps(10ULL), }, [MLX5E_40GBASE_SR4] = { .subtype = IFM_40G_SR4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_40GBASE_LR4] = { .subtype = IFM_40G_LR4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_100GBASE_CR4] = { .subtype = IFM_100G_CR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100GBASE_SR4] = { .subtype = IFM_100G_SR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100GBASE_KR4] = { .subtype = IFM_100G_KR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100GBASE_LR4] = { .subtype = IFM_100G_LR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100BASE_TX] = { .subtype = IFM_100_TX, .baudrate = IF_Mbps(100ULL), }, [MLX5E_1000BASE_T] = { .subtype = IFM_1000_T, .baudrate = IF_Mbps(1000ULL), }, [MLX5E_10GBASE_T] = { .subtype = IFM_10G_T, .baudrate = IF_Gbps(10ULL), }, [MLX5E_25GBASE_CR] = { .subtype = IFM_25G_CR, .baudrate = IF_Gbps(25ULL), }, [MLX5E_25GBASE_KR] = { .subtype = IFM_25G_KR, .baudrate = IF_Gbps(25ULL), }, [MLX5E_25GBASE_SR] = { .subtype = IFM_25G_SR, .baudrate = IF_Gbps(25ULL), }, [MLX5E_50GBASE_CR2] = { .subtype = IFM_50G_CR2, .baudrate = IF_Gbps(50ULL), }, [MLX5E_50GBASE_KR2] = { .subtype = IFM_50G_KR2, .baudrate = IF_Gbps(50ULL), }, }; MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet"); static SYSCTL_NODE(_hw, OID_AUTO, mlx5, CTLFLAG_RW, 0, "MLX5 driver parameters"); static void mlx5e_update_carrier(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; u32 out[MLX5_ST_SZ_DW(ptys_reg)]; u32 eth_proto_oper; int error; u8 port_state; u8 i; port_state = mlx5_query_vport_state(mdev, MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0); if (port_state == VPORT_STATE_UP) { priv->media_status_last |= IFM_ACTIVE; } else { priv->media_status_last &= ~IFM_ACTIVE; priv->media_active_last = IFM_ETHER; if_link_state_change(priv->ifp, LINK_STATE_DOWN); return; } error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); if (error) { priv->media_active_last = IFM_ETHER; priv->ifp->if_baudrate = 1; if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n", __func__, error); return; } eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) { if (mlx5e_mode_table[i].baudrate == 0) continue; if (MLX5E_PROT_MASK(i) & eth_proto_oper) { priv->ifp->if_baudrate = mlx5e_mode_table[i].baudrate; priv->media_active_last = mlx5e_mode_table[i].subtype | IFM_ETHER | IFM_FDX; } } if_link_state_change(priv->ifp, LINK_STATE_UP); } static void mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr) { struct mlx5e_priv *priv = dev->if_softc; ifmr->ifm_status = priv->media_status_last; ifmr->ifm_active = priv->media_active_last | (priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) | (priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0); } static u32 mlx5e_find_link_mode(u32 subtype) { u32 i; u32 link_mode = 0; for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { if (mlx5e_mode_table[i].baudrate == 0) continue; if (mlx5e_mode_table[i].subtype == subtype) link_mode |= MLX5E_PROT_MASK(i); } return (link_mode); } static int mlx5e_set_port_pause_and_pfc(struct mlx5e_priv *priv) { return (mlx5_set_port_pause_and_pfc(priv->mdev, 1, priv->params.rx_pauseframe_control, priv->params.tx_pauseframe_control, priv->params.rx_priority_flow_control, priv->params.tx_priority_flow_control)); } static int mlx5e_set_port_pfc(struct mlx5e_priv *priv) { int error; if (priv->params.rx_pauseframe_control || priv->params.tx_pauseframe_control) { if_printf(priv->ifp, "Global pauseframes must be disabled before enabling PFC.\n"); error = -EINVAL; } else { error = mlx5e_set_port_pause_and_pfc(priv); } return (error); } static int mlx5e_media_change(struct ifnet *dev) { struct mlx5e_priv *priv = dev->if_softc; struct mlx5_core_dev *mdev = priv->mdev; u32 eth_proto_cap; u32 link_mode; int was_opened; int locked; int error; locked = PRIV_LOCKED(priv); if (!locked) PRIV_LOCK(priv); if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) { error = EINVAL; goto done; } link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media)); /* query supported capabilities */ error = mlx5_query_port_proto_cap(mdev, ð_proto_cap, MLX5_PTYS_EN); if (error != 0) { if_printf(dev, "Query port media capability failed\n"); goto done; } /* check for autoselect */ if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) { link_mode = eth_proto_cap; if (link_mode == 0) { if_printf(dev, "Port media capability is zero\n"); error = EINVAL; goto done; } } else { link_mode = link_mode & eth_proto_cap; if (link_mode == 0) { if_printf(dev, "Not supported link mode requested\n"); error = EINVAL; goto done; } } if (priv->media.ifm_media & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) { /* check if PFC is enabled */ if (priv->params.rx_priority_flow_control || priv->params.tx_priority_flow_control) { if_printf(dev, "PFC must be disabled before enabling global pauseframes.\n"); error = EINVAL; goto done; } } /* update pauseframe control bits */ priv->params.rx_pauseframe_control = (priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0; priv->params.tx_pauseframe_control = (priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0; /* check if device is opened */ was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); /* reconfigure the hardware */ mlx5_set_port_status(mdev, MLX5_PORT_DOWN); mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN); error = -mlx5e_set_port_pause_and_pfc(priv); if (was_opened) mlx5_set_port_status(mdev, MLX5_PORT_UP); done: if (!locked) PRIV_UNLOCK(priv); return (error); } static void mlx5e_update_carrier_work(struct work_struct *work) { struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, update_carrier_work); PRIV_LOCK(priv); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) mlx5e_update_carrier(priv); PRIV_UNLOCK(priv); } /* * This function reads the physical port counters from the firmware * using a pre-defined layout defined by various MLX5E_PPORT_XXX() * macros. The output is converted from big-endian 64-bit values into * host endian ones and stored in the "priv->stats.pport" structure. */ static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_pport_stats *s = &priv->stats.pport; struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug; u32 *in; u32 *out; const u64 *ptr; unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg); unsigned x; unsigned y; unsigned z; /* allocate firmware request structures */ in = mlx5_vzalloc(sz); out = mlx5_vzalloc(sz); if (in == NULL || out == NULL) goto free_out; /* * Get pointer to the 64-bit counter set which is located at a * fixed offset in the output firmware request structure: */ ptr = (const uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set); MLX5_SET(ppcnt_reg, in, local_port, 1); /* read IEEE802_3 counter group using predefined counter layout */ MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); for (x = 0, y = MLX5E_PPORT_PER_PRIO_STATS_NUM; x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++) s->arg[y] = be64toh(ptr[x]); /* read RFC2819 counter group using predefined counter layout */ MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++) s->arg[y] = be64toh(ptr[x]); for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM + MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++) s_debug->arg[y] = be64toh(ptr[x]); /* read RFC2863 counter group using predefined counter layout */ MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++) s_debug->arg[y] = be64toh(ptr[x]); /* read physical layer stats counter group using predefined counter layout */ MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++) s_debug->arg[y] = be64toh(ptr[x]); /* read per-priority counters */ MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); /* iterate all the priorities */ for (y = z = 0; z != MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO; z++) { MLX5_SET(ppcnt_reg, in, prio_tc, z); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); /* read per priority stats counter group using predefined counter layout */ for (x = 0; x != (MLX5E_PPORT_PER_PRIO_STATS_NUM / MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO); x++, y++) s->arg[y] = be64toh(ptr[x]); } free_out: /* free firmware request structures */ kvfree(in); kvfree(out); } /* * This function is called regularly to collect all statistics * counters from the firmware. The values can be viewed through the * sysctl interface. Execution is serialized using the priv's global * configuration lock. */ static void mlx5e_update_stats_work(struct work_struct *work) { struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, update_stats_work); struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_vport_stats *s = &priv->stats.vport; struct mlx5e_rq_stats *rq_stats; struct mlx5e_sq_stats *sq_stats; struct buf_ring *sq_br; #if (__FreeBSD_version < 1100000) struct ifnet *ifp = priv->ifp; #endif u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); u64 tso_packets = 0; u64 tso_bytes = 0; u64 tx_queue_dropped = 0; u64 tx_defragged = 0; u64 tx_offload_none = 0; u64 lro_packets = 0; u64 lro_bytes = 0; u64 sw_lro_queued = 0; u64 sw_lro_flushed = 0; u64 rx_csum_none = 0; u64 rx_wqe_err = 0; u32 rx_out_of_buffer = 0; int i; int j; PRIV_LOCK(priv); out = mlx5_vzalloc(outlen); if (out == NULL) goto free_out; if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0) goto free_out; /* Collect firts the SW counters and then HW for consistency */ for (i = 0; i < priv->params.num_channels; i++) { struct mlx5e_rq *rq = &priv->channel[i]->rq; rq_stats = &priv->channel[i]->rq.stats; /* collect stats from LRO */ rq_stats->sw_lro_queued = rq->lro.lro_queued; rq_stats->sw_lro_flushed = rq->lro.lro_flushed; sw_lro_queued += rq_stats->sw_lro_queued; sw_lro_flushed += rq_stats->sw_lro_flushed; lro_packets += rq_stats->lro_packets; lro_bytes += rq_stats->lro_bytes; rx_csum_none += rq_stats->csum_none; rx_wqe_err += rq_stats->wqe_err; for (j = 0; j < priv->num_tc; j++) { sq_stats = &priv->channel[i]->sq[j].stats; sq_br = priv->channel[i]->sq[j].br; tso_packets += sq_stats->tso_packets; tso_bytes += sq_stats->tso_bytes; tx_queue_dropped += sq_stats->dropped; if (sq_br != NULL) tx_queue_dropped += sq_br->br_drops; tx_defragged += sq_stats->defragged; tx_offload_none += sq_stats->csum_offload_none; } } /* update counters */ s->tso_packets = tso_packets; s->tso_bytes = tso_bytes; s->tx_queue_dropped = tx_queue_dropped; s->tx_defragged = tx_defragged; s->lro_packets = lro_packets; s->lro_bytes = lro_bytes; s->sw_lro_queued = sw_lro_queued; s->sw_lro_flushed = sw_lro_flushed; s->rx_csum_none = rx_csum_none; s->rx_wqe_err = rx_wqe_err; /* HW counters */ memset(in, 0, sizeof(in)); MLX5_SET(query_vport_counter_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_COUNTER); MLX5_SET(query_vport_counter_in, in, op_mod, 0); MLX5_SET(query_vport_counter_in, in, other_vport, 0); memset(out, 0, outlen); /* get number of out-of-buffer drops first */ if (mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id, &rx_out_of_buffer)) goto free_out; /* accumulate difference into a 64-bit counter */ s->rx_out_of_buffer += (u64)(u32)(rx_out_of_buffer - s->rx_out_of_buffer_prev); s->rx_out_of_buffer_prev = rx_out_of_buffer; /* get port statistics */ if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen)) goto free_out; #define MLX5_GET_CTR(out, x) \ MLX5_GET64(query_vport_counter_out, out, x) s->rx_error_packets = MLX5_GET_CTR(out, received_errors.packets); s->rx_error_bytes = MLX5_GET_CTR(out, received_errors.octets); s->tx_error_packets = MLX5_GET_CTR(out, transmit_errors.packets); s->tx_error_bytes = MLX5_GET_CTR(out, transmit_errors.octets); s->rx_unicast_packets = MLX5_GET_CTR(out, received_eth_unicast.packets); s->rx_unicast_bytes = MLX5_GET_CTR(out, received_eth_unicast.octets); s->tx_unicast_packets = MLX5_GET_CTR(out, transmitted_eth_unicast.packets); s->tx_unicast_bytes = MLX5_GET_CTR(out, transmitted_eth_unicast.octets); s->rx_multicast_packets = MLX5_GET_CTR(out, received_eth_multicast.packets); s->rx_multicast_bytes = MLX5_GET_CTR(out, received_eth_multicast.octets); s->tx_multicast_packets = MLX5_GET_CTR(out, transmitted_eth_multicast.packets); s->tx_multicast_bytes = MLX5_GET_CTR(out, transmitted_eth_multicast.octets); s->rx_broadcast_packets = MLX5_GET_CTR(out, received_eth_broadcast.packets); s->rx_broadcast_bytes = MLX5_GET_CTR(out, received_eth_broadcast.octets); s->tx_broadcast_packets = MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); s->tx_broadcast_bytes = MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); s->rx_packets = s->rx_unicast_packets + s->rx_multicast_packets + s->rx_broadcast_packets - s->rx_out_of_buffer; s->rx_bytes = s->rx_unicast_bytes + s->rx_multicast_bytes + s->rx_broadcast_bytes; s->tx_packets = s->tx_unicast_packets + s->tx_multicast_packets + s->tx_broadcast_packets; s->tx_bytes = s->tx_unicast_bytes + s->tx_multicast_bytes + s->tx_broadcast_bytes; /* Update calculated offload counters */ s->tx_csum_offload = s->tx_packets - tx_offload_none; s->rx_csum_good = s->rx_packets - s->rx_csum_none; /* Get physical port counters */ mlx5e_update_pport_counters(priv); #if (__FreeBSD_version < 1100000) /* no get_counters interface in fbsd 10 */ ifp->if_ipackets = s->rx_packets; ifp->if_ierrors = s->rx_error_packets + priv->stats.pport.alignment_err + priv->stats.pport.check_seq_err + priv->stats.pport.crc_align_errors + priv->stats.pport.in_range_len_errors + priv->stats.pport.jabbers + priv->stats.pport.out_of_range_len + priv->stats.pport.oversize_pkts + priv->stats.pport.symbol_err + priv->stats.pport.too_long_errors + priv->stats.pport.undersize_pkts + priv->stats.pport.unsupported_op_rx; ifp->if_iqdrops = s->rx_out_of_buffer + priv->stats.pport.drop_events; ifp->if_opackets = s->tx_packets; ifp->if_oerrors = s->tx_error_packets; ifp->if_snd.ifq_drops = s->tx_queue_dropped; ifp->if_ibytes = s->rx_bytes; ifp->if_obytes = s->tx_bytes; ifp->if_collisions = priv->stats.pport.collisions; #endif free_out: kvfree(out); /* Update diagnostics, if any */ if (priv->params_ethtool.diag_pci_enable || priv->params_ethtool.diag_general_enable) { int error = mlx5_core_get_diagnostics_full(mdev, priv->params_ethtool.diag_pci_enable ? &priv->params_pci : NULL, priv->params_ethtool.diag_general_enable ? &priv->params_general : NULL); if (error != 0) if_printf(priv->ifp, "Failed reading diagnostics: %d\n", error); } PRIV_UNLOCK(priv); } static void mlx5e_update_stats(void *arg) { struct mlx5e_priv *priv = arg; queue_work(priv->wq, &priv->update_stats_work); callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv); } static void mlx5e_async_event_sub(struct mlx5e_priv *priv, enum mlx5_dev_event event) { switch (event) { case MLX5_DEV_EVENT_PORT_UP: case MLX5_DEV_EVENT_PORT_DOWN: queue_work(priv->wq, &priv->update_carrier_work); break; default: break; } } static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, enum mlx5_dev_event event, unsigned long param) { struct mlx5e_priv *priv = vpriv; mtx_lock(&priv->async_events_mtx); if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state)) mlx5e_async_event_sub(priv, event); mtx_unlock(&priv->async_events_mtx); } static void mlx5e_enable_async_events(struct mlx5e_priv *priv) { set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); } static void mlx5e_disable_async_events(struct mlx5e_priv *priv) { mtx_lock(&priv->async_events_mtx); clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); mtx_unlock(&priv->async_events_mtx); } static void mlx5e_calibration_callout(void *arg); static int mlx5e_calibration_duration = 20; static int mlx5e_fast_calibration = 1; static int mlx5e_normal_calibration = 30; static SYSCTL_NODE(_hw_mlx5, OID_AUTO, calibr, CTLFLAG_RW, 0, "MLX5 timestamp calibration parameteres"); SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, duration, CTLFLAG_RWTUN, &mlx5e_calibration_duration, 0, "Duration of initial calibration"); SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, fast, CTLFLAG_RWTUN, &mlx5e_fast_calibration, 0, "Recalibration interval during initial calibration"); SYSCTL_INT(_hw_mlx5_calibr, OID_AUTO, normal, CTLFLAG_RWTUN, &mlx5e_normal_calibration, 0, "Recalibration interval during normal operations"); /* * Ignites the calibration process. */ static void mlx5e_reset_calibration_callout(struct mlx5e_priv *priv) { if (priv->clbr_done == 0) mlx5e_calibration_callout(priv); else callout_reset_curcpu(&priv->tstmp_clbr, (priv->clbr_done < mlx5e_calibration_duration ? mlx5e_fast_calibration : mlx5e_normal_calibration) * hz, mlx5e_calibration_callout, priv); } static uint64_t mlx5e_timespec2usec(const struct timespec *ts) { return ((uint64_t)ts->tv_sec * 1000000000 + ts->tv_nsec); } static uint64_t mlx5e_hw_clock(struct mlx5e_priv *priv) { struct mlx5_init_seg *iseg; uint32_t hw_h, hw_h1, hw_l; iseg = priv->mdev->iseg; do { hw_h = ioread32be(&iseg->internal_timer_h); hw_l = ioread32be(&iseg->internal_timer_l); hw_h1 = ioread32be(&iseg->internal_timer_h); } while (hw_h1 != hw_h); return (((uint64_t)hw_h << 32) | hw_l); } /* * The calibration callout, it runs either in the context of the * thread which enables calibration, or in callout. It takes the * snapshot of system and adapter clocks, then advances the pointers to * the calibration point to allow rx path to read the consistent data * lockless. */ static void mlx5e_calibration_callout(void *arg) { struct mlx5e_priv *priv; struct mlx5e_clbr_point *next, *curr; struct timespec ts; int clbr_curr_next; priv = arg; curr = &priv->clbr_points[priv->clbr_curr]; clbr_curr_next = priv->clbr_curr + 1; if (clbr_curr_next >= nitems(priv->clbr_points)) clbr_curr_next = 0; next = &priv->clbr_points[clbr_curr_next]; next->base_prev = curr->base_curr; next->clbr_hw_prev = curr->clbr_hw_curr; next->clbr_hw_curr = mlx5e_hw_clock(priv); if (((next->clbr_hw_curr - curr->clbr_hw_prev) >> MLX5E_TSTMP_PREC) == 0) { if_printf(priv->ifp, "HW failed tstmp frozen %#jx %#jx," "disabling\n", next->clbr_hw_curr, curr->clbr_hw_prev); priv->clbr_done = 0; return; } nanouptime(&ts); next->base_curr = mlx5e_timespec2usec(&ts); curr->clbr_gen = 0; atomic_thread_fence_rel(); priv->clbr_curr = clbr_curr_next; atomic_store_rel_int(&next->clbr_gen, ++(priv->clbr_gen)); if (priv->clbr_done < mlx5e_calibration_duration) priv->clbr_done++; mlx5e_reset_calibration_callout(priv); } static const char *mlx5e_rq_stats_desc[] = { MLX5E_RQ_STATS(MLX5E_STATS_DESC) }; static int mlx5e_create_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) { struct mlx5e_priv *priv = c->priv; struct mlx5_core_dev *mdev = priv->mdev; char buffer[16]; void *rqc = param->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); int wq_sz; int err; int i; /* Create DMA descriptor TAG */ if ((err = -bus_dma_tag_create( bus_get_dma_tag(mdev->pdev->dev.bsddev), 1, /* any alignment */ 0, /* no boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM16BYTES, /* maxsize */ 1, /* nsegments */ MJUM16BYTES, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &rq->dma_tag))) goto done; err = mlx5_wq_ll_create(mdev, ¶m->wq, rqc_wq, &rq->wq, &rq->wq_ctrl); if (err) goto err_free_dma_tag; rq->wq.db = &rq->wq.db[MLX5_RCV_DBR]; if (priv->params.hw_lro_en) { rq->wqe_sz = priv->params.lro_wqe_sz; } else { rq->wqe_sz = MLX5E_SW2MB_MTU(priv->ifp->if_mtu); } if (rq->wqe_sz > MJUM16BYTES) { err = -ENOMEM; goto err_rq_wq_destroy; } else if (rq->wqe_sz > MJUM9BYTES) { rq->wqe_sz = MJUM16BYTES; } else if (rq->wqe_sz > MJUMPAGESIZE) { rq->wqe_sz = MJUM9BYTES; } else if (rq->wqe_sz > MCLBYTES) { rq->wqe_sz = MJUMPAGESIZE; } else { rq->wqe_sz = MCLBYTES; } wq_sz = mlx5_wq_ll_get_size(&rq->wq); err = -tcp_lro_init_args(&rq->lro, c->ifp, TCP_LRO_ENTRIES, wq_sz); if (err) goto err_rq_wq_destroy; rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO); for (i = 0; i != wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN; err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map); if (err != 0) { while (i--) bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map); goto err_rq_mbuf_free; } wqe->data.lkey = c->mkey_be; wqe->data.byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING); } rq->ifp = c->ifp; rq->channel = c; rq->ix = c->ix; snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix); mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM, rq->stats.arg); return (0); err_rq_mbuf_free: free(rq->mbuf, M_MLX5EN); tcp_lro_free(&rq->lro); err_rq_wq_destroy: mlx5_wq_destroy(&rq->wq_ctrl); err_free_dma_tag: bus_dma_tag_destroy(rq->dma_tag); done: return (err); } static void mlx5e_destroy_rq(struct mlx5e_rq *rq) { int wq_sz; int i; /* destroy all sysctl nodes */ sysctl_ctx_free(&rq->stats.ctx); /* free leftover LRO packets, if any */ tcp_lro_free(&rq->lro); wq_sz = mlx5_wq_ll_get_size(&rq->wq); for (i = 0; i != wq_sz; i++) { if (rq->mbuf[i].mbuf != NULL) { bus_dmamap_unload(rq->dma_tag, rq->mbuf[i].dma_map); m_freem(rq->mbuf[i].mbuf); } bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map); } free(rq->mbuf, M_MLX5EN); mlx5_wq_destroy(&rq->wq_ctrl); } static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; struct mlx5_core_dev *mdev = priv->mdev; void *in; void *rqc; void *wq; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(create_rq_in) + sizeof(u64) * rq->wq_ctrl.buf.npages; in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); wq = MLX5_ADDR_OF(rqc, rqc, wq); memcpy(rqc, param->rqc, sizeof(param->rqc)); MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); MLX5_SET(rqc, rqc, flush_in_error_en, 1); if (priv->counter_set_id >= 0) MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id); MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma); mlx5_fill_page_array(&rq->wq_ctrl.buf, (__be64 *) MLX5_ADDR_OF(wq, wq, pas)); err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn); kvfree(in); return (err); } static int mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; struct mlx5_core_dev *mdev = priv->mdev; void *in; void *rqc; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(modify_rq_in); in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); MLX5_SET(modify_rq_in, in, rqn, rq->rqn); MLX5_SET(modify_rq_in, in, rq_state, curr_state); MLX5_SET(rqc, rqc, state, next_state); err = mlx5_core_modify_rq(mdev, in, inlen); kvfree(in); return (err); } static void mlx5e_disable_rq(struct mlx5e_rq *rq) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; struct mlx5_core_dev *mdev = priv->mdev; mlx5_core_destroy_rq(mdev, rq->rqn); } static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; struct mlx5_wq_ll *wq = &rq->wq; int i; for (i = 0; i < 1000; i++) { if (wq->cur_sz >= priv->params.min_rx_wqes) return (0); msleep(4); } return (-ETIMEDOUT); } static int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) { int err; err = mlx5e_create_rq(c, param, rq); if (err) return (err); err = mlx5e_enable_rq(rq, param); if (err) goto err_destroy_rq; err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); if (err) goto err_disable_rq; c->rq.enabled = 1; return (0); err_disable_rq: mlx5e_disable_rq(rq); err_destroy_rq: mlx5e_destroy_rq(rq); return (err); } static void mlx5e_close_rq(struct mlx5e_rq *rq) { mtx_lock(&rq->mtx); rq->enabled = 0; callout_stop(&rq->watchdog); mtx_unlock(&rq->mtx); callout_drain(&rq->watchdog); mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); } static void mlx5e_close_rq_wait(struct mlx5e_rq *rq) { struct mlx5_core_dev *mdev = rq->channel->priv->mdev; /* wait till RQ is empty */ while (!mlx5_wq_ll_is_empty(&rq->wq) && (mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)) { msleep(4); rq->cq.mcq.comp(&rq->cq.mcq); } mlx5e_disable_rq(rq); mlx5e_destroy_rq(rq); } void mlx5e_free_sq_db(struct mlx5e_sq *sq) { int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); int x; for (x = 0; x != wq_sz; x++) bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map); free(sq->mbuf, M_MLX5EN); } int mlx5e_alloc_sq_db(struct mlx5e_sq *sq) { int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); int err; int x; sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO); /* Create DMA descriptor MAPs */ for (x = 0; x != wq_sz; x++) { err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map); if (err != 0) { while (x--) bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map); free(sq->mbuf, M_MLX5EN); return (err); } } return (0); } static const char *mlx5e_sq_stats_desc[] = { MLX5E_SQ_STATS(MLX5E_STATS_DESC) }; static int mlx5e_create_sq(struct mlx5e_channel *c, int tc, struct mlx5e_sq_param *param, struct mlx5e_sq *sq) { struct mlx5e_priv *priv = c->priv; struct mlx5_core_dev *mdev = priv->mdev; char buffer[16]; void *sqc = param->sqc; void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); #ifdef RSS cpuset_t cpu_mask; int cpu_id; #endif int err; /* Create DMA descriptor TAG */ if ((err = -bus_dma_tag_create( bus_get_dma_tag(mdev->pdev->dev.bsddev), 1, /* any alignment */ 0, /* no boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MLX5E_MAX_TX_PAYLOAD_SIZE, /* maxsize */ MLX5E_MAX_TX_MBUF_FRAGS, /* nsegments */ MLX5E_MAX_TX_MBUF_SIZE, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &sq->dma_tag))) goto done; err = mlx5_alloc_map_uar(mdev, &sq->uar); if (err) goto err_free_dma_tag; err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) goto err_unmap_free_uar; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; err = mlx5e_alloc_sq_db(sq); if (err) goto err_sq_wq_destroy; sq->mkey_be = c->mkey_be; sq->ifp = priv->ifp; sq->priv = priv; sq->tc = tc; /* check if we should allocate a second packet buffer */ if (priv->params_ethtool.tx_bufring_disable == 0) { sq->br = buf_ring_alloc(MLX5E_SQ_TX_QUEUE_SIZE, M_MLX5EN, M_WAITOK, &sq->lock); if (sq->br == NULL) { if_printf(c->ifp, "%s: Failed allocating sq drbr buffer\n", __func__); err = -ENOMEM; goto err_free_sq_db; } sq->sq_tq = taskqueue_create_fast("mlx5e_que", M_WAITOK, taskqueue_thread_enqueue, &sq->sq_tq); if (sq->sq_tq == NULL) { if_printf(c->ifp, "%s: Failed allocating taskqueue\n", __func__); err = -ENOMEM; goto err_free_drbr; } TASK_INIT(&sq->sq_task, 0, mlx5e_tx_que, sq); #ifdef RSS cpu_id = rss_getcpu(c->ix % rss_getnumbuckets()); CPU_SETOF(cpu_id, &cpu_mask); taskqueue_start_threads_cpuset(&sq->sq_tq, 1, PI_NET, &cpu_mask, "%s TX SQ%d.%d CPU%d", c->ifp->if_xname, c->ix, tc, cpu_id); #else taskqueue_start_threads(&sq->sq_tq, 1, PI_NET, "%s TX SQ%d.%d", c->ifp->if_xname, c->ix, tc); #endif } snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc); mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM, sq->stats.arg); return (0); err_free_drbr: buf_ring_free(sq->br, M_MLX5EN); err_free_sq_db: mlx5e_free_sq_db(sq); err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); err_unmap_free_uar: mlx5_unmap_free_uar(mdev, &sq->uar); err_free_dma_tag: bus_dma_tag_destroy(sq->dma_tag); done: return (err); } static void mlx5e_destroy_sq(struct mlx5e_sq *sq) { /* destroy all sysctl nodes */ sysctl_ctx_free(&sq->stats.ctx); mlx5e_free_sq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); mlx5_unmap_free_uar(sq->priv->mdev, &sq->uar); if (sq->sq_tq != NULL) { taskqueue_drain(sq->sq_tq, &sq->sq_task); taskqueue_free(sq->sq_tq); } if (sq->br != NULL) buf_ring_free(sq->br, M_MLX5EN); } int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param, int tis_num) { void *in; void *sqc; void *wq; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * sq->wq_ctrl.buf.npages; in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); wq = MLX5_ADDR_OF(sqc, sqc, wq); memcpy(sqc, param->sqc, sizeof(param->sqc)); MLX5_SET(sqc, sqc, tis_num_0, tis_num); MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(sqc, sqc, tis_lst_sz, 1); MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); MLX5_SET(wq, wq, uar_page, sq->uar.index); MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); mlx5_fill_page_array(&sq->wq_ctrl.buf, (__be64 *) MLX5_ADDR_OF(wq, wq, pas)); err = mlx5_core_create_sq(sq->priv->mdev, in, inlen, &sq->sqn); kvfree(in); return (err); } int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state) { void *in; void *sqc; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(modify_sq_in); in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); MLX5_SET(modify_sq_in, in, sqn, sq->sqn); MLX5_SET(modify_sq_in, in, sq_state, curr_state); MLX5_SET(sqc, sqc, state, next_state); err = mlx5_core_modify_sq(sq->priv->mdev, in, inlen); kvfree(in); return (err); } void mlx5e_disable_sq(struct mlx5e_sq *sq) { mlx5_core_destroy_sq(sq->priv->mdev, sq->sqn); } static int mlx5e_open_sq(struct mlx5e_channel *c, int tc, struct mlx5e_sq_param *param, struct mlx5e_sq *sq) { int err; err = mlx5e_create_sq(c, tc, param, sq); if (err) return (err); err = mlx5e_enable_sq(sq, param, c->priv->tisn[tc]); if (err) goto err_destroy_sq; err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); if (err) goto err_disable_sq; atomic_store_rel_int(&sq->queue_state, MLX5E_SQ_READY); return (0); err_disable_sq: mlx5e_disable_sq(sq); err_destroy_sq: mlx5e_destroy_sq(sq); return (err); } static void mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep) { /* fill up remainder with NOPs */ while (sq->cev_counter != 0) { while (!mlx5e_sq_has_room_for(sq, 1)) { if (can_sleep != 0) { mtx_unlock(&sq->lock); msleep(4); mtx_lock(&sq->lock); } else { goto done; } } /* send a single NOP */ mlx5e_send_nop(sq, 1); atomic_thread_fence_rel(); } done: /* Check if we need to write the doorbell */ if (likely(sq->doorbell.d64 != 0)) { mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0); sq->doorbell.d64 = 0; } } void mlx5e_sq_cev_timeout(void *arg) { struct mlx5e_sq *sq = arg; mtx_assert(&sq->lock, MA_OWNED); /* check next state */ switch (sq->cev_next_state) { case MLX5E_CEV_STATE_SEND_NOPS: /* fill TX ring with NOPs, if any */ mlx5e_sq_send_nops_locked(sq, 0); /* check if completed */ if (sq->cev_counter == 0) { sq->cev_next_state = MLX5E_CEV_STATE_INITIAL; return; } break; default: /* send NOPs on next timeout */ sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS; break; } /* restart timer */ callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq); } void mlx5e_drain_sq(struct mlx5e_sq *sq) { int error; struct mlx5_core_dev *mdev= sq->priv->mdev; /* * Check if already stopped. * * NOTE: The "stopped" variable is only written when both the * priv's configuration lock and the SQ's lock is locked. It * can therefore safely be read when only one of the two locks * is locked. This function is always called when the priv's * configuration lock is locked. */ if (sq->stopped != 0) return; mtx_lock(&sq->lock); /* don't put more packets into the SQ */ sq->stopped = 1; /* teardown event factor timer, if any */ sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS; callout_stop(&sq->cev_callout); /* send dummy NOPs in order to flush the transmit ring */ mlx5e_sq_send_nops_locked(sq, 1); mtx_unlock(&sq->lock); /* make sure it is safe to free the callout */ callout_drain(&sq->cev_callout); /* wait till SQ is empty or link is down */ mtx_lock(&sq->lock); while (sq->cc != sq->pc && (sq->priv->media_status_last & IFM_ACTIVE) != 0 && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) { mtx_unlock(&sq->lock); msleep(1); sq->cq.mcq.comp(&sq->cq.mcq); mtx_lock(&sq->lock); } mtx_unlock(&sq->lock); /* error out remaining requests */ error = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR); if (error != 0) { if_printf(sq->ifp, "mlx5e_modify_sq() from RDY to ERR failed: %d\n", error); } /* wait till SQ is empty */ mtx_lock(&sq->lock); while (sq->cc != sq->pc && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) { mtx_unlock(&sq->lock); msleep(1); sq->cq.mcq.comp(&sq->cq.mcq); mtx_lock(&sq->lock); } mtx_unlock(&sq->lock); } static void mlx5e_close_sq_wait(struct mlx5e_sq *sq) { mlx5e_drain_sq(sq); mlx5e_disable_sq(sq); mlx5e_destroy_sq(sq); } static int mlx5e_create_cq(struct mlx5e_priv *priv, struct mlx5e_cq_param *param, struct mlx5e_cq *cq, mlx5e_cq_comp_t *comp, int eq_ix) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_core_cq *mcq = &cq->mcq; int eqn_not_used; int irqn; int err; u32 i; param->wq.buf_numa_node = 0; param->wq.db_numa_node = 0; err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, &cq->wq_ctrl); if (err) return (err); mlx5_vector2eqn(mdev, eq_ix, &eqn_not_used, &irqn); mcq->cqe_sz = 64; mcq->set_ci_db = cq->wq_ctrl.db.db; mcq->arm_db = cq->wq_ctrl.db.db + 1; *mcq->set_ci_db = 0; *mcq->arm_db = 0; mcq->vector = eq_ix; mcq->comp = comp; mcq->event = mlx5e_cq_error_event; mcq->irqn = irqn; mcq->uar = &priv->cq_uar; for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); cqe->op_own = 0xf1; } cq->priv = priv; return (0); } static void mlx5e_destroy_cq(struct mlx5e_cq *cq) { mlx5_wq_destroy(&cq->wq_ctrl); } static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, int eq_ix) { struct mlx5_core_cq *mcq = &cq->mcq; void *in; void *cqc; int inlen; int irqn_not_used; int eqn; int err; inlen = MLX5_ST_SZ_BYTES(create_cq_in) + sizeof(u64) * cq->wq_ctrl.buf.npages; in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); memcpy(cqc, param->cqc, sizeof(param->cqc)); mlx5_fill_page_array(&cq->wq_ctrl.buf, (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas)); mlx5_vector2eqn(cq->priv->mdev, eq_ix, &eqn, &irqn_not_used); MLX5_SET(cqc, cqc, c_eqn, eqn); MLX5_SET(cqc, cqc, uar_page, mcq->uar->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - PAGE_SHIFT); MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); err = mlx5_core_create_cq(cq->priv->mdev, mcq, in, inlen); kvfree(in); if (err) return (err); mlx5e_cq_arm(cq, MLX5_GET_DOORBELL_LOCK(&cq->priv->doorbell_lock)); return (0); } static void mlx5e_disable_cq(struct mlx5e_cq *cq) { mlx5_core_destroy_cq(cq->priv->mdev, &cq->mcq); } int mlx5e_open_cq(struct mlx5e_priv *priv, struct mlx5e_cq_param *param, struct mlx5e_cq *cq, mlx5e_cq_comp_t *comp, int eq_ix) { int err; err = mlx5e_create_cq(priv, param, cq, comp, eq_ix); if (err) return (err); err = mlx5e_enable_cq(cq, param, eq_ix); if (err) goto err_destroy_cq; return (0); err_destroy_cq: mlx5e_destroy_cq(cq); return (err); } void mlx5e_close_cq(struct mlx5e_cq *cq) { mlx5e_disable_cq(cq); mlx5e_destroy_cq(cq); } static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, struct mlx5e_channel_param *cparam) { int err; int tc; for (tc = 0; tc < c->num_tc; tc++) { /* open completion queue */ err = mlx5e_open_cq(c->priv, &cparam->tx_cq, &c->sq[tc].cq, &mlx5e_tx_cq_comp, c->ix); if (err) goto err_close_tx_cqs; } return (0); err_close_tx_cqs: for (tc--; tc >= 0; tc--) mlx5e_close_cq(&c->sq[tc].cq); return (err); } static void mlx5e_close_tx_cqs(struct mlx5e_channel *c) { int tc; for (tc = 0; tc < c->num_tc; tc++) mlx5e_close_cq(&c->sq[tc].cq); } static int mlx5e_open_sqs(struct mlx5e_channel *c, struct mlx5e_channel_param *cparam) { int err; int tc; for (tc = 0; tc < c->num_tc; tc++) { err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]); if (err) goto err_close_sqs; } return (0); err_close_sqs: for (tc--; tc >= 0; tc--) mlx5e_close_sq_wait(&c->sq[tc]); return (err); } static void mlx5e_close_sqs_wait(struct mlx5e_channel *c) { int tc; for (tc = 0; tc < c->num_tc; tc++) mlx5e_close_sq_wait(&c->sq[tc]); } static void mlx5e_chan_mtx_init(struct mlx5e_channel *c) { int tc; mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&c->rq.watchdog, &c->rq.mtx, 0); for (tc = 0; tc < c->num_tc; tc++) { struct mlx5e_sq *sq = c->sq + tc; mtx_init(&sq->lock, "mlx5tx", MTX_NETWORK_LOCK " TX", MTX_DEF); mtx_init(&sq->comp_lock, "mlx5comp", MTX_NETWORK_LOCK " TX", MTX_DEF); callout_init_mtx(&sq->cev_callout, &sq->lock, 0); sq->cev_factor = c->priv->params_ethtool.tx_completion_fact; /* ensure the TX completion event factor is not zero */ if (sq->cev_factor == 0) sq->cev_factor = 1; } } static void mlx5e_chan_mtx_destroy(struct mlx5e_channel *c) { int tc; mtx_destroy(&c->rq.mtx); for (tc = 0; tc < c->num_tc; tc++) { mtx_destroy(&c->sq[tc].lock); mtx_destroy(&c->sq[tc].comp_lock); } } static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_channel_param *cparam, struct mlx5e_channel *volatile *cp) { struct mlx5e_channel *c; int err; c = malloc(sizeof(*c), M_MLX5EN, M_WAITOK | M_ZERO); c->priv = priv; c->ix = ix; c->cpu = 0; c->ifp = priv->ifp; c->mkey_be = cpu_to_be32(priv->mr.key); c->num_tc = priv->num_tc; /* init mutexes */ mlx5e_chan_mtx_init(c); /* open transmit completion queue */ err = mlx5e_open_tx_cqs(c, cparam); if (err) goto err_free; /* open receive completion queue */ err = mlx5e_open_cq(c->priv, &cparam->rx_cq, &c->rq.cq, &mlx5e_rx_cq_comp, c->ix); if (err) goto err_close_tx_cqs; err = mlx5e_open_sqs(c, cparam); if (err) goto err_close_rx_cq; err = mlx5e_open_rq(c, &cparam->rq, &c->rq); if (err) goto err_close_sqs; /* store channel pointer */ *cp = c; /* poll receive queue initially */ c->rq.cq.mcq.comp(&c->rq.cq.mcq); return (0); err_close_sqs: mlx5e_close_sqs_wait(c); err_close_rx_cq: mlx5e_close_cq(&c->rq.cq); err_close_tx_cqs: mlx5e_close_tx_cqs(c); err_free: /* destroy mutexes */ mlx5e_chan_mtx_destroy(c); free(c, M_MLX5EN); return (err); } static void mlx5e_close_channel(struct mlx5e_channel *volatile *pp) { struct mlx5e_channel *c = *pp; /* check if channel is already closed */ if (c == NULL) return; mlx5e_close_rq(&c->rq); } static void mlx5e_close_channel_wait(struct mlx5e_channel *volatile *pp) { struct mlx5e_channel *c = *pp; /* check if channel is already closed */ if (c == NULL) return; /* ensure channel pointer is no longer used */ *pp = NULL; mlx5e_close_rq_wait(&c->rq); mlx5e_close_sqs_wait(c); mlx5e_close_cq(&c->rq.cq); mlx5e_close_tx_cqs(c); /* destroy mutexes */ mlx5e_chan_mtx_destroy(c); free(c, M_MLX5EN); } static void mlx5e_build_rq_param(struct mlx5e_priv *priv, struct mlx5e_rq_param *param) { void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size); MLX5_SET(wq, wq, pd, priv->pdn); param->wq.buf_numa_node = 0; param->wq.db_numa_node = 0; param->wq.linear = 1; } static void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); MLX5_SET(wq, wq, pd, priv->pdn); param->wq.buf_numa_node = 0; param->wq.db_numa_node = 0; param->wq.linear = 1; } static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, struct mlx5e_cq_param *param) { void *cqc = param->cqc; MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index); } static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, struct mlx5e_cq_param *param) { void *cqc = param->cqc; /* * TODO The sysctl to control on/off is a bool value for now, which means * we only support CSUM, once HASH is implemnted we'll need to address that. */ if (priv->params.cqe_zipping_en) { MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM); MLX5_SET(cqc, cqc, cqe_compression_en, 1); } MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size); MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec); MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts); switch (priv->params.rx_cq_moderation_mode) { case 0: MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; default: if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE); else MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; } mlx5e_build_common_cq_param(priv, param); } static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, struct mlx5e_cq_param *param) { void *cqc = param->cqc; MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size); MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec); MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts); switch (priv->params.tx_cq_moderation_mode) { case 0: MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; default: if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE); else MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; } mlx5e_build_common_cq_param(priv, param); } static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam) { memset(cparam, 0, sizeof(*cparam)); mlx5e_build_rq_param(priv, &cparam->rq); mlx5e_build_sq_param(priv, &cparam->sq); mlx5e_build_rx_cq_param(priv, &cparam->rx_cq); mlx5e_build_tx_cq_param(priv, &cparam->tx_cq); } static int mlx5e_open_channels(struct mlx5e_priv *priv) { struct mlx5e_channel_param cparam; void *ptr; int err; int i; int j; priv->channel = malloc(priv->params.num_channels * sizeof(struct mlx5e_channel *), M_MLX5EN, M_WAITOK | M_ZERO); mlx5e_build_channel_param(priv, &cparam); for (i = 0; i < priv->params.num_channels; i++) { err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]); if (err) goto err_close_channels; } for (j = 0; j < priv->params.num_channels; j++) { err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq); if (err) goto err_close_channels; } return (0); err_close_channels: for (i--; i >= 0; i--) { mlx5e_close_channel(&priv->channel[i]); mlx5e_close_channel_wait(&priv->channel[i]); } /* remove "volatile" attribute from "channel" pointer */ ptr = __DECONST(void *, priv->channel); priv->channel = NULL; free(ptr, M_MLX5EN); return (err); } static void mlx5e_close_channels(struct mlx5e_priv *priv) { void *ptr; int i; if (priv->channel == NULL) return; for (i = 0; i < priv->params.num_channels; i++) mlx5e_close_channel(&priv->channel[i]); for (i = 0; i < priv->params.num_channels; i++) mlx5e_close_channel_wait(&priv->channel[i]); /* remove "volatile" attribute from "channel" pointer */ ptr = __DECONST(void *, priv->channel); priv->channel = NULL; free(ptr, M_MLX5EN); } static int mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq) { if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) { uint8_t cq_mode; switch (priv->params.tx_cq_moderation_mode) { case 0: cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE; break; default: cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE; break; } return (mlx5_core_modify_cq_moderation_mode(priv->mdev, &sq->cq.mcq, priv->params.tx_cq_moderation_usec, priv->params.tx_cq_moderation_pkts, cq_mode)); } return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq, priv->params.tx_cq_moderation_usec, priv->params.tx_cq_moderation_pkts)); } static int mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq) { if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) { uint8_t cq_mode; int retval; switch (priv->params.rx_cq_moderation_mode) { case 0: cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE; break; default: cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE; break; } retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq, priv->params.rx_cq_moderation_usec, priv->params.rx_cq_moderation_pkts, cq_mode); return (retval); } return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq, priv->params.rx_cq_moderation_usec, priv->params.rx_cq_moderation_pkts)); } static int mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c) { int err; int i; if (c == NULL) return (EINVAL); err = mlx5e_refresh_rq_params(priv, &c->rq); if (err) goto done; for (i = 0; i != c->num_tc; i++) { err = mlx5e_refresh_sq_params(priv, &c->sq[i]); if (err) goto done; } done: return (err); } int mlx5e_refresh_channel_params(struct mlx5e_priv *priv) { int i; if (priv->channel == NULL) return (EINVAL); for (i = 0; i < priv->params.num_channels; i++) { int err; err = mlx5e_refresh_channel_params_sub(priv, priv->channel[i]); if (err) return (err); } return (0); } static int mlx5e_open_tis(struct mlx5e_priv *priv, int tc) { struct mlx5_core_dev *mdev = priv->mdev; u32 in[MLX5_ST_SZ_DW(create_tis_in)]; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); memset(in, 0, sizeof(in)); MLX5_SET(tisc, tisc, prio, tc); MLX5_SET(tisc, tisc, transport_domain, priv->tdn); return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc])); } static void mlx5e_close_tis(struct mlx5e_priv *priv, int tc) { mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]); } static int mlx5e_open_tises(struct mlx5e_priv *priv) { int num_tc = priv->num_tc; int err; int tc; for (tc = 0; tc < num_tc; tc++) { err = mlx5e_open_tis(priv, tc); if (err) goto err_close_tises; } return (0); err_close_tises: for (tc--; tc >= 0; tc--) mlx5e_close_tis(priv, tc); return (err); } static void mlx5e_close_tises(struct mlx5e_priv *priv) { int num_tc = priv->num_tc; int tc; for (tc = 0; tc < num_tc; tc++) mlx5e_close_tis(priv, tc); } static int mlx5e_open_rqt(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; u32 *in; u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0}; void *rqtc; int inlen; int err; int sz; int i; sz = 1 << priv->params.rx_hash_log_tbl_sz; inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); MLX5_SET(rqtc, rqtc, rqt_max_size, sz); for (i = 0; i < sz; i++) { int ix; #ifdef RSS ix = rss_get_indirection_to_bucket(i); #else ix = i; #endif /* ensure we don't overflow */ ix %= priv->params.num_channels; MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn); } MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT); err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); if (!err) priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn); kvfree(in); return (err); } static void mlx5e_close_rqt(struct mlx5e_priv *priv) { u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0}; u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0}; MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn); mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out)); } static void mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt) { void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); __be32 *hkey; MLX5_SET(tirc, tirc, transport_domain, priv->tdn); #define ROUGH_MAX_L2_L3_HDR_SZ 256 #define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP) #define MLX5_HASH_ALL (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP |\ MLX5_HASH_FIELD_SEL_L4_SPORT |\ MLX5_HASH_FIELD_SEL_L4_DPORT) #define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP |\ MLX5_HASH_FIELD_SEL_IPSEC_SPI) if (priv->params.hw_lro_en) { MLX5_SET(tirc, tirc, lro_enable_mask, MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO | MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO); MLX5_SET(tirc, tirc, lro_max_msg_sz, (priv->params.lro_wqe_sz - ROUGH_MAX_L2_L3_HDR_SZ) >> 8); /* TODO: add the option to choose timer value dynamically */ MLX5_SET(tirc, tirc, lro_timeout_period_usecs, MLX5_CAP_ETH(priv->mdev, lro_timer_supported_periods[2])); } /* setup parameters for hashing TIR type, if any */ switch (tt) { case MLX5E_TT_ANY: MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); MLX5_SET(tirc, tirc, inline_rqn, priv->channel[0]->rq.rqn); break; default: MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, priv->rqtn); MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ); hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); #ifdef RSS /* * The FreeBSD RSS implementation does currently not * support symmetric Toeplitz hashes: */ MLX5_SET(tirc, tirc, rx_hash_symmetric, 0); rss_getkey((uint8_t *)hkey); #else MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); hkey[0] = cpu_to_be32(0xD181C62C); hkey[1] = cpu_to_be32(0xF7F4DB5B); hkey[2] = cpu_to_be32(0x1983A2FC); hkey[3] = cpu_to_be32(0x943E1ADB); hkey[4] = cpu_to_be32(0xD9389E6B); hkey[5] = cpu_to_be32(0xD1039C2C); hkey[6] = cpu_to_be32(0xA74499AD); hkey[7] = cpu_to_be32(0x593D56D9); hkey[8] = cpu_to_be32(0xF3253C06); hkey[9] = cpu_to_be32(0x2ADC1FFC); #endif break; } switch (tt) { case MLX5E_TT_IPV4_TCP: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); #ifdef RSS if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) { MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP); } else #endif MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_ALL); break; case MLX5E_TT_IPV6_TCP: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); #ifdef RSS if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) { MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP); } else #endif MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_ALL); break; case MLX5E_TT_IPV4_UDP: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, MLX5_L4_PROT_TYPE_UDP); #ifdef RSS if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) { MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP); } else #endif MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_ALL); break; case MLX5E_TT_IPV6_UDP: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, MLX5_L4_PROT_TYPE_UDP); #ifdef RSS if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) { MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP); } else #endif MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_ALL); break; case MLX5E_TT_IPV4_IPSEC_AH: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP_IPSEC_SPI); break; case MLX5E_TT_IPV6_IPSEC_AH: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP_IPSEC_SPI); break; case MLX5E_TT_IPV4_IPSEC_ESP: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP_IPSEC_SPI); break; case MLX5E_TT_IPV6_IPSEC_ESP: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP_IPSEC_SPI); break; case MLX5E_TT_IPV4: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP); break; case MLX5E_TT_IPV6: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP); break; default: break; } } static int mlx5e_open_tir(struct mlx5e_priv *priv, int tt) { struct mlx5_core_dev *mdev = priv->mdev; u32 *in; void *tirc; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(create_tir_in); in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context); mlx5e_build_tir_ctx(priv, tirc, tt); err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]); kvfree(in); return (err); } static void mlx5e_close_tir(struct mlx5e_priv *priv, int tt) { mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]); } static int mlx5e_open_tirs(struct mlx5e_priv *priv) { int err; int i; for (i = 0; i < MLX5E_NUM_TT; i++) { err = mlx5e_open_tir(priv, i); if (err) goto err_close_tirs; } return (0); err_close_tirs: for (i--; i >= 0; i--) mlx5e_close_tir(priv, i); return (err); } static void mlx5e_close_tirs(struct mlx5e_priv *priv) { int i; for (i = 0; i < MLX5E_NUM_TT; i++) mlx5e_close_tir(priv, i); } /* * SW MTU does not include headers, * HW MTU includes all headers and checksums. */ static int mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu) { struct mlx5e_priv *priv = ifp->if_softc; struct mlx5_core_dev *mdev = priv->mdev; int hw_mtu; int err; hw_mtu = MLX5E_SW2HW_MTU(sw_mtu); err = mlx5_set_port_mtu(mdev, hw_mtu); if (err) { if_printf(ifp, "%s: mlx5_set_port_mtu failed setting %d, err=%d\n", __func__, sw_mtu, err); return (err); } /* Update vport context MTU */ err = mlx5_set_vport_mtu(mdev, hw_mtu); if (err) { if_printf(ifp, "%s: Failed updating vport context with MTU size, err=%d\n", __func__, err); } ifp->if_mtu = sw_mtu; err = mlx5_query_vport_mtu(mdev, &hw_mtu); if (err || !hw_mtu) { /* fallback to port oper mtu */ err = mlx5_query_port_oper_mtu(mdev, &hw_mtu); } if (err) { if_printf(ifp, "Query port MTU, after setting new " "MTU value, failed\n"); return (err); } else if (MLX5E_HW2SW_MTU(hw_mtu) < sw_mtu) { err = -E2BIG, if_printf(ifp, "Port MTU %d is smaller than " "ifp mtu %d\n", hw_mtu, sw_mtu); } else if (MLX5E_HW2SW_MTU(hw_mtu) > sw_mtu) { err = -EINVAL; if_printf(ifp, "Port MTU %d is bigger than " "ifp mtu %d\n", hw_mtu, sw_mtu); } priv->params_ethtool.hw_mtu = hw_mtu; return (err); } int mlx5e_open_locked(struct ifnet *ifp) { struct mlx5e_priv *priv = ifp->if_softc; int err; u16 set_id; /* check if already opened */ if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0) return (0); #ifdef RSS if (rss_getnumbuckets() > priv->params.num_channels) { if_printf(ifp, "NOTE: There are more RSS buckets(%u) than " "channels(%u) available\n", rss_getnumbuckets(), priv->params.num_channels); } #endif err = mlx5e_open_tises(priv); if (err) { if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n", __func__, err); return (err); } err = mlx5_vport_alloc_q_counter(priv->mdev, MLX5_INTERFACE_PROTOCOL_ETH, &set_id); if (err) { if_printf(priv->ifp, "%s: mlx5_vport_alloc_q_counter failed: %d\n", __func__, err); goto err_close_tises; } /* store counter set ID */ priv->counter_set_id = set_id; err = mlx5e_open_channels(priv); if (err) { if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n", __func__, err); goto err_dalloc_q_counter; } err = mlx5e_open_rqt(priv); if (err) { if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n", __func__, err); goto err_close_channels; } err = mlx5e_open_tirs(priv); if (err) { if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n", __func__, err); goto err_close_rqls; } err = mlx5e_open_flow_table(priv); if (err) { if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n", __func__, err); goto err_close_tirs; } err = mlx5e_add_all_vlan_rules(priv); if (err) { if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n", __func__, err); goto err_close_flow_table; } set_bit(MLX5E_STATE_OPENED, &priv->state); mlx5e_update_carrier(priv); mlx5e_set_rx_mode_core(priv); return (0); err_close_flow_table: mlx5e_close_flow_table(priv); err_close_tirs: mlx5e_close_tirs(priv); err_close_rqls: mlx5e_close_rqt(priv); err_close_channels: mlx5e_close_channels(priv); err_dalloc_q_counter: mlx5_vport_dealloc_q_counter(priv->mdev, MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id); err_close_tises: mlx5e_close_tises(priv); return (err); } static void mlx5e_open(void *arg) { struct mlx5e_priv *priv = arg; PRIV_LOCK(priv); if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP)) if_printf(priv->ifp, "%s: Setting port status to up failed\n", __func__); mlx5e_open_locked(priv->ifp); priv->ifp->if_drv_flags |= IFF_DRV_RUNNING; PRIV_UNLOCK(priv); } int mlx5e_close_locked(struct ifnet *ifp) { struct mlx5e_priv *priv = ifp->if_softc; /* check if already closed */ if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0) return (0); clear_bit(MLX5E_STATE_OPENED, &priv->state); mlx5e_set_rx_mode_core(priv); mlx5e_del_all_vlan_rules(priv); if_link_state_change(priv->ifp, LINK_STATE_DOWN); mlx5e_close_flow_table(priv); mlx5e_close_tirs(priv); mlx5e_close_rqt(priv); mlx5e_close_channels(priv); mlx5_vport_dealloc_q_counter(priv->mdev, MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id); mlx5e_close_tises(priv); return (0); } #if (__FreeBSD_version >= 1100000) static uint64_t mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt) { struct mlx5e_priv *priv = ifp->if_softc; u64 retval; /* PRIV_LOCK(priv); XXX not allowed */ switch (cnt) { case IFCOUNTER_IPACKETS: retval = priv->stats.vport.rx_packets; break; case IFCOUNTER_IERRORS: retval = priv->stats.vport.rx_error_packets + priv->stats.pport.alignment_err + priv->stats.pport.check_seq_err + priv->stats.pport.crc_align_errors + priv->stats.pport.in_range_len_errors + priv->stats.pport.jabbers + priv->stats.pport.out_of_range_len + priv->stats.pport.oversize_pkts + priv->stats.pport.symbol_err + priv->stats.pport.too_long_errors + priv->stats.pport.undersize_pkts + priv->stats.pport.unsupported_op_rx; break; case IFCOUNTER_IQDROPS: retval = priv->stats.vport.rx_out_of_buffer + priv->stats.pport.drop_events; break; case IFCOUNTER_OPACKETS: retval = priv->stats.vport.tx_packets; break; case IFCOUNTER_OERRORS: retval = priv->stats.vport.tx_error_packets; break; case IFCOUNTER_IBYTES: retval = priv->stats.vport.rx_bytes; break; case IFCOUNTER_OBYTES: retval = priv->stats.vport.tx_bytes; break; case IFCOUNTER_IMCASTS: retval = priv->stats.vport.rx_multicast_packets; break; case IFCOUNTER_OMCASTS: retval = priv->stats.vport.tx_multicast_packets; break; case IFCOUNTER_OQDROPS: retval = priv->stats.vport.tx_queue_dropped; break; case IFCOUNTER_COLLISIONS: retval = priv->stats.pport.collisions; break; default: retval = if_get_counter_default(ifp, cnt); break; } /* PRIV_UNLOCK(priv); XXX not allowed */ return (retval); } #endif static void mlx5e_set_rx_mode(struct ifnet *ifp) { struct mlx5e_priv *priv = ifp->if_softc; queue_work(priv->wq, &priv->set_rx_mode_work); } static int mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct mlx5e_priv *priv; struct ifreq *ifr; struct ifi2creq i2c; int error = 0; int mask = 0; int size_read = 0; int module_status; int module_num; int max_mtu; uint8_t read_addr; priv = ifp->if_softc; /* check if detaching */ if (priv == NULL || priv->gone != 0) return (ENXIO); switch (command) { case SIOCSIFMTU: ifr = (struct ifreq *)data; PRIV_LOCK(priv); mlx5_query_port_max_mtu(priv->mdev, &max_mtu); if (ifr->ifr_mtu >= MLX5E_MTU_MIN && ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) { int was_opened; was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); if (was_opened) mlx5e_close_locked(ifp); /* set new MTU */ mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu); if (was_opened) mlx5e_open_locked(ifp); } else { error = EINVAL; if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n", MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu)); } PRIV_UNLOCK(priv); break; case SIOCSIFFLAGS: if ((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { mlx5e_set_rx_mode(ifp); break; } PRIV_LOCK(priv); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0) mlx5e_open_locked(ifp); ifp->if_drv_flags |= IFF_DRV_RUNNING; mlx5_set_port_status(priv->mdev, MLX5_PORT_UP); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { mlx5_set_port_status(priv->mdev, MLX5_PORT_DOWN); if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0) mlx5e_close_locked(ifp); mlx5e_update_carrier(priv); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; } } PRIV_UNLOCK(priv); break; case SIOCADDMULTI: case SIOCDELMULTI: mlx5e_set_rx_mode(ifp); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: case SIOCGIFXMEDIA: ifr = (struct ifreq *)data; error = ifmedia_ioctl(ifp, ifr, &priv->media, command); break; case SIOCSIFCAP: ifr = (struct ifreq *)data; PRIV_LOCK(priv); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (IFCAP_TSO4 & ifp->if_capenable && !(IFCAP_TXCSUM & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO4; ifp->if_hwassist &= ~CSUM_IP_TSO; if_printf(ifp, "tso4 disabled due to -txcsum.\n"); } } if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); if (IFCAP_TSO6 & ifp->if_capenable && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO6; ifp->if_hwassist &= ~CSUM_IP6_TSO; if_printf(ifp, "tso6 disabled due to -txcsum6.\n"); } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; if (mask & IFCAP_TSO4) { if (!(IFCAP_TSO4 & ifp->if_capenable) && !(IFCAP_TXCSUM & ifp->if_capenable)) { if_printf(ifp, "enable txcsum first.\n"); error = EAGAIN; goto out; } ifp->if_capenable ^= IFCAP_TSO4; ifp->if_hwassist ^= CSUM_IP_TSO; } if (mask & IFCAP_TSO6) { if (!(IFCAP_TSO6 & ifp->if_capenable) && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { if_printf(ifp, "enable txcsum6 first.\n"); error = EAGAIN; goto out; } ifp->if_capenable ^= IFCAP_TSO6; ifp->if_hwassist ^= CSUM_IP6_TSO; } if (mask & IFCAP_VLAN_HWFILTER) { if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) mlx5e_disable_vlan_filter(priv); else mlx5e_enable_vlan_filter(priv); ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; } if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_WOL_MAGIC) ifp->if_capenable ^= IFCAP_WOL_MAGIC; VLAN_CAPABILITIES(ifp); /* turn off LRO means also turn of HW LRO - if it's on */ if (mask & IFCAP_LRO) { int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); bool need_restart = false; ifp->if_capenable ^= IFCAP_LRO; if (!(ifp->if_capenable & IFCAP_LRO)) { if (priv->params.hw_lro_en) { priv->params.hw_lro_en = false; need_restart = true; /* Not sure this is the correct way */ priv->params_ethtool.hw_lro = priv->params.hw_lro_en; } } if (was_opened && need_restart) { mlx5e_close_locked(ifp); mlx5e_open_locked(ifp); } } if (mask & IFCAP_HWRXTSTMP) { ifp->if_capenable ^= IFCAP_HWRXTSTMP; if (ifp->if_capenable & IFCAP_HWRXTSTMP) { if (priv->clbr_done == 0) mlx5e_reset_calibration_callout(priv); } else { callout_drain(&priv->tstmp_clbr); priv->clbr_done = 0; } } out: PRIV_UNLOCK(priv); break; case SIOCGI2C: ifr = (struct ifreq *)data; /* * Copy from the user-space address ifr_data to the * kernel-space address i2c */ - error = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); + error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (error) break; if (i2c.len > sizeof(i2c.data)) { error = EINVAL; break; } PRIV_LOCK(priv); /* Get module_num which is required for the query_eeprom */ error = mlx5_query_module_num(priv->mdev, &module_num); if (error) { if_printf(ifp, "Query module num failed, eeprom " "reading is not supported\n"); error = EINVAL; goto err_i2c; } /* Check if module is present before doing an access */ module_status = mlx5_query_module_status(priv->mdev, module_num); if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED && module_status != MLX5_MODULE_STATUS_PLUGGED_DISABLED) { error = EINVAL; goto err_i2c; } /* * Currently 0XA0 and 0xA2 are the only addresses permitted. * The internal conversion is as follows: */ if (i2c.dev_addr == 0xA0) read_addr = MLX5E_I2C_ADDR_LOW; else if (i2c.dev_addr == 0xA2) read_addr = MLX5E_I2C_ADDR_HIGH; else { if_printf(ifp, "Query eeprom failed, " "Invalid Address: %X\n", i2c.dev_addr); error = EINVAL; goto err_i2c; } error = mlx5_query_eeprom(priv->mdev, read_addr, MLX5E_EEPROM_LOW_PAGE, (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num, (uint32_t *)i2c.data, &size_read); if (error) { if_printf(ifp, "Query eeprom failed, eeprom " "reading is not supported\n"); error = EINVAL; goto err_i2c; } if (i2c.len > MLX5_EEPROM_MAX_BYTES) { error = mlx5_query_eeprom(priv->mdev, read_addr, MLX5E_EEPROM_LOW_PAGE, (uint32_t)(i2c.offset + size_read), (uint32_t)(i2c.len - size_read), module_num, (uint32_t *)(i2c.data + size_read), &size_read); } if (error) { if_printf(ifp, "Query eeprom failed, eeprom " "reading is not supported\n"); error = EINVAL; goto err_i2c; } - error = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); + error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c)); err_i2c: PRIV_UNLOCK(priv); break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) { /* * TODO: uncoment once FW really sets all these bits if * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap || * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap || * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return * -ENOTSUPP; */ /* TODO: add more must-to-have features */ if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) return (-ENODEV); return (0); } static void mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv, int num_comp_vectors) { /* * TODO: Consider link speed for setting "log_sq_size", * "log_rq_size" and "cq_moderation_xxx": */ priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; priv->params.rx_cq_moderation_usec = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE : MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; priv->params.rx_cq_moderation_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0; priv->params.rx_cq_moderation_pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; priv->params.tx_cq_moderation_usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; priv->params.tx_cq_moderation_pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; priv->params.min_rx_wqes = MLX5E_PARAMS_DEFAULT_MIN_RX_WQES; priv->params.rx_hash_log_tbl_sz = (order_base_2(num_comp_vectors) > MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ? order_base_2(num_comp_vectors) : MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ; priv->params.num_tc = 1; priv->params.default_vlan_prio = 0; priv->counter_set_id = -1; /* * hw lro is currently defaulted to off. when it won't anymore we * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)" */ priv->params.hw_lro_en = false; priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; priv->params.cqe_zipping_en = !!MLX5_CAP_GEN(mdev, cqe_compression); priv->mdev = mdev; priv->params.num_channels = num_comp_vectors; priv->order_base_2_num_channels = order_base_2(num_comp_vectors); priv->queue_mapping_channel_mask = roundup_pow_of_two(num_comp_vectors) - 1; priv->num_tc = priv->params.num_tc; priv->default_vlan_prio = priv->params.default_vlan_prio; INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work); INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); } static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, struct mlx5_core_mr *mkey) { struct ifnet *ifp = priv->ifp; struct mlx5_core_dev *mdev = priv->mdev; int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); void *mkc; u32 *in; int err; in = mlx5_vzalloc(inlen); if (in == NULL) { if_printf(ifp, "%s: failed to allocate inbox\n", __func__); return (-ENOMEM); } mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA); MLX5_SET(mkc, mkc, lw, 1); MLX5_SET(mkc, mkc, lr, 1); MLX5_SET(mkc, mkc, pd, pdn); MLX5_SET(mkc, mkc, length64, 1); MLX5_SET(mkc, mkc, qpn, 0xffffff); err = mlx5_core_create_mkey(mdev, mkey, in, inlen); if (err) if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n", __func__, err); kvfree(in); return (err); } static const char *mlx5e_vport_stats_desc[] = { MLX5E_VPORT_STATS(MLX5E_STATS_DESC) }; static const char *mlx5e_pport_stats_desc[] = { MLX5E_PPORT_STATS(MLX5E_STATS_DESC) }; static void mlx5e_priv_mtx_init(struct mlx5e_priv *priv) { mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF); sx_init(&priv->state_lock, "mlx5state"); callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0); MLX5_INIT_DOORBELL_LOCK(&priv->doorbell_lock); } static void mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv) { mtx_destroy(&priv->async_events_mtx); sx_destroy(&priv->state_lock); } static int sysctl_firmware(SYSCTL_HANDLER_ARGS) { /* * %d.%d%.d the string format. * fw_rev_{maj,min,sub} return u16, 2^16 = 65536. * We need at most 5 chars to store that. * It also has: two "." and NULL at the end, which means we need 18 * (5*3 + 3) chars at most. */ char fw[18]; struct mlx5e_priv *priv = arg1; int error; snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev), fw_rev_sub(priv->mdev)); error = sysctl_handle_string(oidp, fw, sizeof(fw), req); return (error); } static void mlx5e_disable_tx_dma(struct mlx5e_channel *ch) { int i; for (i = 0; i < ch->num_tc; i++) mlx5e_drain_sq(&ch->sq[i]); } static void mlx5e_reset_sq_doorbell_record(struct mlx5e_sq *sq) { sq->doorbell.d32[0] = cpu_to_be32(MLX5_OPCODE_NOP); sq->doorbell.d32[1] = cpu_to_be32(sq->sqn << 8); mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0); sq->doorbell.d64 = 0; } void mlx5e_resume_sq(struct mlx5e_sq *sq) { int err; /* check if already enabled */ if (sq->stopped == 0) return; err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_ERR, MLX5_SQC_STATE_RST); if (err != 0) { if_printf(sq->ifp, "mlx5e_modify_sq() from ERR to RST failed: %d\n", err); } sq->cc = 0; sq->pc = 0; /* reset doorbell prior to moving from RST to RDY */ mlx5e_reset_sq_doorbell_record(sq); err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); if (err != 0) { if_printf(sq->ifp, "mlx5e_modify_sq() from RST to RDY failed: %d\n", err); } mtx_lock(&sq->lock); sq->cev_next_state = MLX5E_CEV_STATE_INITIAL; sq->stopped = 0; mtx_unlock(&sq->lock); } static void mlx5e_enable_tx_dma(struct mlx5e_channel *ch) { int i; for (i = 0; i < ch->num_tc; i++) mlx5e_resume_sq(&ch->sq[i]); } static void mlx5e_disable_rx_dma(struct mlx5e_channel *ch) { struct mlx5e_rq *rq = &ch->rq; int err; mtx_lock(&rq->mtx); rq->enabled = 0; callout_stop(&rq->watchdog); mtx_unlock(&rq->mtx); callout_drain(&rq->watchdog); err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); if (err != 0) { if_printf(rq->ifp, "mlx5e_modify_rq() from RDY to RST failed: %d\n", err); } while (!mlx5_wq_ll_is_empty(&rq->wq)) { msleep(1); rq->cq.mcq.comp(&rq->cq.mcq); } /* * Transitioning into RST state will allow the FW to track less ERR state queues, * thus reducing the recv queue flushing time */ err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_ERR, MLX5_RQC_STATE_RST); if (err != 0) { if_printf(rq->ifp, "mlx5e_modify_rq() from ERR to RST failed: %d\n", err); } } static void mlx5e_enable_rx_dma(struct mlx5e_channel *ch) { struct mlx5e_rq *rq = &ch->rq; int err; rq->wq.wqe_ctr = 0; mlx5_wq_ll_update_db_record(&rq->wq); err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); if (err != 0) { if_printf(rq->ifp, "mlx5e_modify_rq() from RST to RDY failed: %d\n", err); } rq->enabled = 1; rq->cq.mcq.comp(&rq->cq.mcq); } void mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value) { int i; if (priv->channel == NULL) return; for (i = 0; i < priv->params.num_channels; i++) { if (!priv->channel[i]) continue; if (value) mlx5e_disable_tx_dma(priv->channel[i]); else mlx5e_enable_tx_dma(priv->channel[i]); } } void mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value) { int i; if (priv->channel == NULL) return; for (i = 0; i < priv->params.num_channels; i++) { if (!priv->channel[i]) continue; if (value) mlx5e_disable_rx_dma(priv->channel[i]); else mlx5e_enable_rx_dma(priv->channel[i]); } } static void mlx5e_add_hw_stats(struct mlx5e_priv *priv) { SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw), OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0, sysctl_firmware, "A", "HCA firmware version"); SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw), OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0, "Board ID"); } static int mlx5e_sysctl_tx_priority_flow_control(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv = arg1; uint32_t tx_pfc; uint32_t value; int error; PRIV_LOCK(priv); tx_pfc = priv->params.tx_priority_flow_control; /* get current value */ value = (tx_pfc >> arg2) & 1; error = sysctl_handle_32(oidp, &value, 0, req); /* range check value */ if (value != 0) priv->params.tx_priority_flow_control |= (1 << arg2); else priv->params.tx_priority_flow_control &= ~(1 << arg2); /* check if update is required */ if (error == 0 && priv->gone == 0 && tx_pfc != priv->params.tx_priority_flow_control) { error = -mlx5e_set_port_pfc(priv); /* restore previous value */ if (error != 0) priv->params.tx_priority_flow_control= tx_pfc; } PRIV_UNLOCK(priv); return (error); } static int mlx5e_sysctl_rx_priority_flow_control(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv = arg1; uint32_t rx_pfc; uint32_t value; int error; PRIV_LOCK(priv); rx_pfc = priv->params.rx_priority_flow_control; /* get current value */ value = (rx_pfc >> arg2) & 1; error = sysctl_handle_32(oidp, &value, 0, req); /* range check value */ if (value != 0) priv->params.rx_priority_flow_control |= (1 << arg2); else priv->params.rx_priority_flow_control &= ~(1 << arg2); /* check if update is required */ if (error == 0 && priv->gone == 0 && rx_pfc != priv->params.rx_priority_flow_control) { error = -mlx5e_set_port_pfc(priv); /* restore previous value */ if (error != 0) priv->params.rx_priority_flow_control= rx_pfc; } PRIV_UNLOCK(priv); return (error); } static void mlx5e_setup_pauseframes(struct mlx5e_priv *priv) { unsigned int x; char path[96]; int error; /* Only receiving pauseframes is enabled by default */ priv->params.tx_pauseframe_control = 0; priv->params.rx_pauseframe_control = 1; /* disable ports flow control, PFC, by default */ priv->params.tx_priority_flow_control = 0; priv->params.rx_priority_flow_control = 0; #if (__FreeBSD_version < 1100000) /* compute path for sysctl */ snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control", device_get_unit(priv->mdev->pdev->dev.bsddev)); /* try to fetch tunable, if any */ TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control); /* compute path for sysctl */ snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control", device_get_unit(priv->mdev->pdev->dev.bsddev)); /* try to fetch tunable, if any */ TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control); for (x = 0; x != 8; x++) { /* compute path for sysctl */ snprintf(path, sizeof(path), "dev.mce.%d.tx_priority_flow_control_%u", device_get_unit(priv->mdev->pdev->dev.bsddev), x); /* try to fetch tunable, if any */ if (TUNABLE_INT_FETCH(path, &value) == 0 && value != 0) priv->params.tx_priority_flow_control |= 1 << x; /* compute path for sysctl */ snprintf(path, sizeof(path), "dev.mce.%d.rx_priority_flow_control_%u", device_get_unit(priv->mdev->pdev->dev.bsddev), x); /* try to fetch tunable, if any */ if (TUNABLE_INT_FETCH(path, &value) == 0 && value != 0) priv->params.rx_priority_flow_control |= 1 << x; } #endif /* register pauseframe SYSCTLs */ SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN, &priv->params.tx_pauseframe_control, 0, "Set to enable TX pause frames. Clear to disable."); SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN, &priv->params.rx_pauseframe_control, 0, "Set to enable RX pause frames. Clear to disable."); /* register priority_flow control, PFC, SYSCTLs */ for (x = 0; x != 8; x++) { snprintf(path, sizeof(path), "tx_priority_flow_control_%u", x); SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, path, CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, priv, x, &mlx5e_sysctl_tx_priority_flow_control, "IU", "Set to enable TX ports flow control frames for given priority. Clear to disable."); snprintf(path, sizeof(path), "rx_priority_flow_control_%u", x); SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, path, CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, priv, x, &mlx5e_sysctl_rx_priority_flow_control, "IU", "Set to enable RX ports flow control frames for given priority. Clear to disable."); } PRIV_LOCK(priv); /* range check */ priv->params.tx_pauseframe_control = priv->params.tx_pauseframe_control ? 1 : 0; priv->params.rx_pauseframe_control = priv->params.rx_pauseframe_control ? 1 : 0; /* update firmware */ error = mlx5e_set_port_pause_and_pfc(priv); if (error == -EINVAL) { if_printf(priv->ifp, "Global pauseframes must be disabled before enabling PFC.\n"); priv->params.rx_priority_flow_control = 0; priv->params.tx_priority_flow_control = 0; /* update firmware */ (void) mlx5e_set_port_pause_and_pfc(priv); } PRIV_UNLOCK(priv); } static void * mlx5e_create_ifp(struct mlx5_core_dev *mdev) { struct ifnet *ifp; struct mlx5e_priv *priv; u8 dev_addr[ETHER_ADDR_LEN] __aligned(4); struct sysctl_oid_list *child; int ncv = mdev->priv.eq_table.num_comp_vectors; char unit[16]; int err; int i; u32 eth_proto_cap; if (mlx5e_check_required_hca_cap(mdev)) { mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n"); return (NULL); } priv = malloc(sizeof(*priv), M_MLX5EN, M_WAITOK | M_ZERO); mlx5e_priv_mtx_init(priv); ifp = priv->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { mlx5_core_err(mdev, "if_alloc() failed\n"); goto err_free_priv; } ifp->if_softc = priv; if_initname(ifp, "mce", device_get_unit(mdev->pdev->dev.bsddev)); ifp->if_mtu = ETHERMTU; ifp->if_init = mlx5e_open; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = mlx5e_ioctl; ifp->if_transmit = mlx5e_xmit; ifp->if_qflush = if_qflush; #if (__FreeBSD_version >= 1100000) ifp->if_get_counter = mlx5e_get_counter; #endif ifp->if_snd.ifq_maxlen = ifqmaxlen; /* * Set driver features */ ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6; ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING; ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER; ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_LRO; ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO; ifp->if_capabilities |= IFCAP_HWSTATS | IFCAP_HWRXTSTMP; /* set TSO limits so that we don't have to drop TX packets */ ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */; ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE; ifp->if_capenable = ifp->if_capabilities; ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TSO) ifp->if_hwassist |= CSUM_TSO; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); /* ifnet sysctl tree */ sysctl_ctx_init(&priv->sysctl_ctx); priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev), OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name"); if (priv->sysctl_ifnet == NULL) { mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n"); goto err_free_sysctl; } snprintf(unit, sizeof(unit), "%d", ifp->if_dunit); priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit"); if (priv->sysctl_ifnet == NULL) { mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n"); goto err_free_sysctl; } /* HW sysctl tree */ child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev)); priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child, OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw"); if (priv->sysctl_hw == NULL) { mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n"); goto err_free_sysctl; } mlx5e_build_ifp_priv(mdev, priv, ncv); snprintf(unit, sizeof(unit), "mce%u_wq", device_get_unit(mdev->pdev->dev.bsddev)); priv->wq = alloc_workqueue(unit, 0, 1); if (priv->wq == NULL) { if_printf(ifp, "%s: alloc_workqueue failed\n", __func__); goto err_free_sysctl; } err = mlx5_alloc_map_uar(mdev, &priv->cq_uar); if (err) { if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n", __func__, err); goto err_free_wq; } err = mlx5_core_alloc_pd(mdev, &priv->pdn); if (err) { if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n", __func__, err); goto err_unmap_free_uar; } err = mlx5_alloc_transport_domain(mdev, &priv->tdn); if (err) { if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n", __func__, err); goto err_dealloc_pd; } err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr); if (err) { if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n", __func__, err); goto err_dealloc_transport_domain; } mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr); /* check if we should generate a random MAC address */ if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 && is_zero_ether_addr(dev_addr)) { random_ether_addr(dev_addr); if_printf(ifp, "Assigned random MAC address\n"); } /* set default MTU */ mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu); /* Set desc */ device_set_desc(mdev->pdev->dev.bsddev, mlx5e_version); /* Set default media status */ priv->media_status_last = IFM_AVALID; priv->media_active_last = IFM_ETHER | IFM_AUTO | IFM_ETH_RXPAUSE | IFM_FDX; /* setup default pauseframes configuration */ mlx5e_setup_pauseframes(priv); err = mlx5_query_port_proto_cap(mdev, ð_proto_cap, MLX5_PTYS_EN); if (err) { eth_proto_cap = 0; if_printf(ifp, "%s: Query port media capability failed, %d\n", __func__, err); } /* Setup supported medias */ ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK, mlx5e_media_change, mlx5e_media_status); for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { if (mlx5e_mode_table[i].baudrate == 0) continue; if (MLX5E_PROT_MASK(i) & eth_proto_cap) { ifmedia_add(&priv->media, mlx5e_mode_table[i].subtype | IFM_ETHER, 0, NULL); ifmedia_add(&priv->media, mlx5e_mode_table[i].subtype | IFM_ETHER | IFM_FDX | IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL); } } ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX | IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL); /* Set autoselect by default */ ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX | IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE); ether_ifattach(ifp, dev_addr); /* Register for VLAN events */ priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST); priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST); /* Link is down by default */ if_link_state_change(ifp, LINK_STATE_DOWN); mlx5e_enable_async_events(priv); mlx5e_add_hw_stats(priv); mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM, priv->stats.vport.arg); mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM, priv->stats.pport.arg); mlx5e_create_ethtool(priv); mtx_lock(&priv->async_events_mtx); mlx5e_update_stats(priv); mtx_unlock(&priv->async_events_mtx); SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, "rx_clbr_done", CTLFLAG_RD, &priv->clbr_done, 0, "RX timestamps calibration state"); callout_init(&priv->tstmp_clbr, CALLOUT_DIRECT); mlx5e_reset_calibration_callout(priv); return (priv); err_dealloc_transport_domain: mlx5_dealloc_transport_domain(mdev, priv->tdn); err_dealloc_pd: mlx5_core_dealloc_pd(mdev, priv->pdn); err_unmap_free_uar: mlx5_unmap_free_uar(mdev, &priv->cq_uar); err_free_wq: destroy_workqueue(priv->wq); err_free_sysctl: sysctl_ctx_free(&priv->sysctl_ctx); if_free(ifp); err_free_priv: mlx5e_priv_mtx_destroy(priv); free(priv, M_MLX5EN); return (NULL); } static void mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv) { struct mlx5e_priv *priv = vpriv; struct ifnet *ifp = priv->ifp; /* don't allow more IOCTLs */ priv->gone = 1; /* * Clear the device description to avoid use after free, * because the bsddev is not destroyed when this module is * unloaded: */ device_set_desc(mdev->pdev->dev.bsddev, NULL); /* XXX wait a bit to allow IOCTL handlers to complete */ pause("W", hz); /* stop watchdog timer */ callout_drain(&priv->watchdog); callout_drain(&priv->tstmp_clbr); if (priv->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach); if (priv->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach); /* make sure device gets closed */ PRIV_LOCK(priv); mlx5e_close_locked(ifp); PRIV_UNLOCK(priv); /* unregister device */ ifmedia_removeall(&priv->media); ether_ifdetach(ifp); if_free(ifp); /* destroy all remaining sysctl nodes */ if (priv->sysctl_debug) sysctl_ctx_free(&priv->stats.port_stats_debug.ctx); sysctl_ctx_free(&priv->stats.vport.ctx); sysctl_ctx_free(&priv->stats.pport.ctx); sysctl_ctx_free(&priv->sysctl_ctx); mlx5_core_destroy_mkey(priv->mdev, &priv->mr); mlx5_dealloc_transport_domain(priv->mdev, priv->tdn); mlx5_core_dealloc_pd(priv->mdev, priv->pdn); mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar); mlx5e_disable_async_events(priv); destroy_workqueue(priv->wq); mlx5e_priv_mtx_destroy(priv); free(priv, M_MLX5EN); } static void * mlx5e_get_ifp(void *vpriv) { struct mlx5e_priv *priv = vpriv; return (priv->ifp); } static struct mlx5_interface mlx5e_interface = { .add = mlx5e_create_ifp, .remove = mlx5e_destroy_ifp, .event = mlx5e_async_event, .protocol = MLX5_INTERFACE_PROTOCOL_ETH, .get_dev = mlx5e_get_ifp, }; void mlx5e_init(void) { mlx5_register_interface(&mlx5e_interface); } void mlx5e_cleanup(void) { mlx5_unregister_interface(&mlx5e_interface); } module_init_order(mlx5e_init, SI_ORDER_THIRD); module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD); #if (__FreeBSD_version >= 1100000) MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1); #endif MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1); MODULE_VERSION(mlx5en, 1); Index: head/sys/dev/mwl/if_mwl.c =================================================================== --- head/sys/dev/mwl/if_mwl.c (revision 331796) +++ head/sys/dev/mwl/if_mwl.c (revision 331797) @@ -1,4840 +1,4840 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2007-2009 Sam Leffler, Errno Consulting * Copyright (c) 2007-2008 Marvell Semiconductor, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. */ #include __FBSDID("$FreeBSD$"); /* * Driver for the Marvell 88W8363 Wireless LAN controller. */ #include "opt_inet.h" #include "opt_mwl.h" #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif /* INET */ #include #include /* idiomatic shorthands: MS = mask+shift, SM = shift+mask */ #define MS(v,x) (((v) & x) >> x##_S) #define SM(v,x) (((v) << x##_S) & x) static struct ieee80211vap *mwl_vap_create(struct ieee80211com *, const char [IFNAMSIZ], int, enum ieee80211_opmode, int, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN]); static void mwl_vap_delete(struct ieee80211vap *); static int mwl_setupdma(struct mwl_softc *); static int mwl_hal_reset(struct mwl_softc *sc); static int mwl_init(struct mwl_softc *); static void mwl_parent(struct ieee80211com *); static int mwl_reset(struct ieee80211vap *, u_long); static void mwl_stop(struct mwl_softc *); static void mwl_start(struct mwl_softc *); static int mwl_transmit(struct ieee80211com *, struct mbuf *); static int mwl_raw_xmit(struct ieee80211_node *, struct mbuf *, const struct ieee80211_bpf_params *); static int mwl_media_change(struct ifnet *); static void mwl_watchdog(void *); static int mwl_ioctl(struct ieee80211com *, u_long, void *); static void mwl_radar_proc(void *, int); static void mwl_chanswitch_proc(void *, int); static void mwl_bawatchdog_proc(void *, int); static int mwl_key_alloc(struct ieee80211vap *, struct ieee80211_key *, ieee80211_keyix *, ieee80211_keyix *); static int mwl_key_delete(struct ieee80211vap *, const struct ieee80211_key *); static int mwl_key_set(struct ieee80211vap *, const struct ieee80211_key *); static int _mwl_key_set(struct ieee80211vap *, const struct ieee80211_key *, const uint8_t mac[IEEE80211_ADDR_LEN]); static int mwl_mode_init(struct mwl_softc *); static void mwl_update_mcast(struct ieee80211com *); static void mwl_update_promisc(struct ieee80211com *); static void mwl_updateslot(struct ieee80211com *); static int mwl_beacon_setup(struct ieee80211vap *); static void mwl_beacon_update(struct ieee80211vap *, int); #ifdef MWL_HOST_PS_SUPPORT static void mwl_update_ps(struct ieee80211vap *, int); static int mwl_set_tim(struct ieee80211_node *, int); #endif static int mwl_dma_setup(struct mwl_softc *); static void mwl_dma_cleanup(struct mwl_softc *); static struct ieee80211_node *mwl_node_alloc(struct ieee80211vap *, const uint8_t [IEEE80211_ADDR_LEN]); static void mwl_node_cleanup(struct ieee80211_node *); static void mwl_node_drain(struct ieee80211_node *); static void mwl_node_getsignal(const struct ieee80211_node *, int8_t *, int8_t *); static void mwl_node_getmimoinfo(const struct ieee80211_node *, struct ieee80211_mimo_info *); static int mwl_rxbuf_init(struct mwl_softc *, struct mwl_rxbuf *); static void mwl_rx_proc(void *, int); static void mwl_txq_init(struct mwl_softc *sc, struct mwl_txq *, int); static int mwl_tx_setup(struct mwl_softc *, int, int); static int mwl_wme_update(struct ieee80211com *); static void mwl_tx_cleanupq(struct mwl_softc *, struct mwl_txq *); static void mwl_tx_cleanup(struct mwl_softc *); static uint16_t mwl_calcformat(uint8_t rate, const struct ieee80211_node *); static int mwl_tx_start(struct mwl_softc *, struct ieee80211_node *, struct mwl_txbuf *, struct mbuf *); static void mwl_tx_proc(void *, int); static int mwl_chan_set(struct mwl_softc *, struct ieee80211_channel *); static void mwl_draintxq(struct mwl_softc *); static void mwl_cleartxq(struct mwl_softc *, struct ieee80211vap *); static int mwl_recv_action(struct ieee80211_node *, const struct ieee80211_frame *, const uint8_t *, const uint8_t *); static int mwl_addba_request(struct ieee80211_node *, struct ieee80211_tx_ampdu *, int dialogtoken, int baparamset, int batimeout); static int mwl_addba_response(struct ieee80211_node *, struct ieee80211_tx_ampdu *, int status, int baparamset, int batimeout); static void mwl_addba_stop(struct ieee80211_node *, struct ieee80211_tx_ampdu *); static int mwl_startrecv(struct mwl_softc *); static MWL_HAL_APMODE mwl_getapmode(const struct ieee80211vap *, struct ieee80211_channel *); static int mwl_setapmode(struct ieee80211vap *, struct ieee80211_channel*); static void mwl_scan_start(struct ieee80211com *); static void mwl_scan_end(struct ieee80211com *); static void mwl_set_channel(struct ieee80211com *); static int mwl_peerstadb(struct ieee80211_node *, int aid, int staid, MWL_HAL_PEERINFO *pi); static int mwl_localstadb(struct ieee80211vap *); static int mwl_newstate(struct ieee80211vap *, enum ieee80211_state, int); static int allocstaid(struct mwl_softc *sc, int aid); static void delstaid(struct mwl_softc *sc, int staid); static void mwl_newassoc(struct ieee80211_node *, int); static void mwl_agestations(void *); static int mwl_setregdomain(struct ieee80211com *, struct ieee80211_regdomain *, int, struct ieee80211_channel []); static void mwl_getradiocaps(struct ieee80211com *, int, int *, struct ieee80211_channel []); static int mwl_getchannels(struct mwl_softc *); static void mwl_sysctlattach(struct mwl_softc *); static void mwl_announce(struct mwl_softc *); SYSCTL_NODE(_hw, OID_AUTO, mwl, CTLFLAG_RD, 0, "Marvell driver parameters"); static int mwl_rxdesc = MWL_RXDESC; /* # rx desc's to allocate */ SYSCTL_INT(_hw_mwl, OID_AUTO, rxdesc, CTLFLAG_RW, &mwl_rxdesc, 0, "rx descriptors allocated"); static int mwl_rxbuf = MWL_RXBUF; /* # rx buffers to allocate */ SYSCTL_INT(_hw_mwl, OID_AUTO, rxbuf, CTLFLAG_RWTUN, &mwl_rxbuf, 0, "rx buffers allocated"); static int mwl_txbuf = MWL_TXBUF; /* # tx buffers to allocate */ SYSCTL_INT(_hw_mwl, OID_AUTO, txbuf, CTLFLAG_RWTUN, &mwl_txbuf, 0, "tx buffers allocated"); static int mwl_txcoalesce = 8; /* # tx packets to q before poking f/w*/ SYSCTL_INT(_hw_mwl, OID_AUTO, txcoalesce, CTLFLAG_RWTUN, &mwl_txcoalesce, 0, "tx buffers to send at once"); static int mwl_rxquota = MWL_RXBUF; /* # max buffers to process */ SYSCTL_INT(_hw_mwl, OID_AUTO, rxquota, CTLFLAG_RWTUN, &mwl_rxquota, 0, "max rx buffers to process per interrupt"); static int mwl_rxdmalow = 3; /* # min buffers for wakeup */ SYSCTL_INT(_hw_mwl, OID_AUTO, rxdmalow, CTLFLAG_RWTUN, &mwl_rxdmalow, 0, "min free rx buffers before restarting traffic"); #ifdef MWL_DEBUG static int mwl_debug = 0; SYSCTL_INT(_hw_mwl, OID_AUTO, debug, CTLFLAG_RWTUN, &mwl_debug, 0, "control debugging printfs"); enum { MWL_DEBUG_XMIT = 0x00000001, /* basic xmit operation */ MWL_DEBUG_XMIT_DESC = 0x00000002, /* xmit descriptors */ MWL_DEBUG_RECV = 0x00000004, /* basic recv operation */ MWL_DEBUG_RECV_DESC = 0x00000008, /* recv descriptors */ MWL_DEBUG_RESET = 0x00000010, /* reset processing */ MWL_DEBUG_BEACON = 0x00000020, /* beacon handling */ MWL_DEBUG_INTR = 0x00000040, /* ISR */ MWL_DEBUG_TX_PROC = 0x00000080, /* tx ISR proc */ MWL_DEBUG_RX_PROC = 0x00000100, /* rx ISR proc */ MWL_DEBUG_KEYCACHE = 0x00000200, /* key cache management */ MWL_DEBUG_STATE = 0x00000400, /* 802.11 state transitions */ MWL_DEBUG_NODE = 0x00000800, /* node management */ MWL_DEBUG_RECV_ALL = 0x00001000, /* trace all frames (beacons) */ MWL_DEBUG_TSO = 0x00002000, /* TSO processing */ MWL_DEBUG_AMPDU = 0x00004000, /* BA stream handling */ MWL_DEBUG_ANY = 0xffffffff }; #define IS_BEACON(wh) \ ((wh->i_fc[0] & (IEEE80211_FC0_TYPE_MASK|IEEE80211_FC0_SUBTYPE_MASK)) == \ (IEEE80211_FC0_TYPE_MGT|IEEE80211_FC0_SUBTYPE_BEACON)) #define IFF_DUMPPKTS_RECV(sc, wh) \ ((sc->sc_debug & MWL_DEBUG_RECV) && \ ((sc->sc_debug & MWL_DEBUG_RECV_ALL) || !IS_BEACON(wh))) #define IFF_DUMPPKTS_XMIT(sc) \ (sc->sc_debug & MWL_DEBUG_XMIT) #define DPRINTF(sc, m, fmt, ...) do { \ if (sc->sc_debug & (m)) \ printf(fmt, __VA_ARGS__); \ } while (0) #define KEYPRINTF(sc, hk, mac) do { \ if (sc->sc_debug & MWL_DEBUG_KEYCACHE) \ mwl_keyprint(sc, __func__, hk, mac); \ } while (0) static void mwl_printrxbuf(const struct mwl_rxbuf *bf, u_int ix); static void mwl_printtxbuf(const struct mwl_txbuf *bf, u_int qnum, u_int ix); #else #define IFF_DUMPPKTS_RECV(sc, wh) 0 #define IFF_DUMPPKTS_XMIT(sc) 0 #define DPRINTF(sc, m, fmt, ...) do { (void )sc; } while (0) #define KEYPRINTF(sc, k, mac) do { (void )sc; } while (0) #endif static MALLOC_DEFINE(M_MWLDEV, "mwldev", "mwl driver dma buffers"); /* * Each packet has fixed front matter: a 2-byte length * of the payload, followed by a 4-address 802.11 header * (regardless of the actual header and always w/o any * QoS header). The payload then follows. */ struct mwltxrec { uint16_t fwlen; struct ieee80211_frame_addr4 wh; } __packed; /* * Read/Write shorthands for accesses to BAR 0. Note * that all BAR 1 operations are done in the "hal" and * there should be no reference to them here. */ #ifdef MWL_DEBUG static __inline uint32_t RD4(struct mwl_softc *sc, bus_size_t off) { return bus_space_read_4(sc->sc_io0t, sc->sc_io0h, off); } #endif static __inline void WR4(struct mwl_softc *sc, bus_size_t off, uint32_t val) { bus_space_write_4(sc->sc_io0t, sc->sc_io0h, off, val); } int mwl_attach(uint16_t devid, struct mwl_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct mwl_hal *mh; int error = 0; DPRINTF(sc, MWL_DEBUG_ANY, "%s: devid 0x%x\n", __func__, devid); /* * Setup the RX free list lock early, so it can be consistently * removed. */ MWL_RXFREE_INIT(sc); mh = mwl_hal_attach(sc->sc_dev, devid, sc->sc_io1h, sc->sc_io1t, sc->sc_dmat); if (mh == NULL) { device_printf(sc->sc_dev, "unable to attach HAL\n"); error = EIO; goto bad; } sc->sc_mh = mh; /* * Load firmware so we can get setup. We arbitrarily * pick station firmware; we'll re-load firmware as * needed so setting up the wrong mode isn't a big deal. */ if (mwl_hal_fwload(mh, NULL) != 0) { device_printf(sc->sc_dev, "unable to setup builtin firmware\n"); error = EIO; goto bad1; } if (mwl_hal_gethwspecs(mh, &sc->sc_hwspecs) != 0) { device_printf(sc->sc_dev, "unable to fetch h/w specs\n"); error = EIO; goto bad1; } error = mwl_getchannels(sc); if (error != 0) goto bad1; sc->sc_txantenna = 0; /* h/w default */ sc->sc_rxantenna = 0; /* h/w default */ sc->sc_invalid = 0; /* ready to go, enable int handling */ sc->sc_ageinterval = MWL_AGEINTERVAL; /* * Allocate tx+rx descriptors and populate the lists. * We immediately push the information to the firmware * as otherwise it gets upset. */ error = mwl_dma_setup(sc); if (error != 0) { device_printf(sc->sc_dev, "failed to setup descriptors: %d\n", error); goto bad1; } error = mwl_setupdma(sc); /* push to firmware */ if (error != 0) /* NB: mwl_setupdma prints msg */ goto bad1; callout_init(&sc->sc_timer, 1); callout_init_mtx(&sc->sc_watchdog, &sc->sc_mtx, 0); mbufq_init(&sc->sc_snd, ifqmaxlen); sc->sc_tq = taskqueue_create("mwl_taskq", M_NOWAIT, taskqueue_thread_enqueue, &sc->sc_tq); taskqueue_start_threads(&sc->sc_tq, 1, PI_NET, "%s taskq", device_get_nameunit(sc->sc_dev)); TASK_INIT(&sc->sc_rxtask, 0, mwl_rx_proc, sc); TASK_INIT(&sc->sc_radartask, 0, mwl_radar_proc, sc); TASK_INIT(&sc->sc_chanswitchtask, 0, mwl_chanswitch_proc, sc); TASK_INIT(&sc->sc_bawatchdogtask, 0, mwl_bawatchdog_proc, sc); /* NB: insure BK queue is the lowest priority h/w queue */ if (!mwl_tx_setup(sc, WME_AC_BK, MWL_WME_AC_BK)) { device_printf(sc->sc_dev, "unable to setup xmit queue for %s traffic!\n", ieee80211_wme_acnames[WME_AC_BK]); error = EIO; goto bad2; } if (!mwl_tx_setup(sc, WME_AC_BE, MWL_WME_AC_BE) || !mwl_tx_setup(sc, WME_AC_VI, MWL_WME_AC_VI) || !mwl_tx_setup(sc, WME_AC_VO, MWL_WME_AC_VO)) { /* * Not enough hardware tx queues to properly do WME; * just punt and assign them all to the same h/w queue. * We could do a better job of this if, for example, * we allocate queues when we switch from station to * AP mode. */ if (sc->sc_ac2q[WME_AC_VI] != NULL) mwl_tx_cleanupq(sc, sc->sc_ac2q[WME_AC_VI]); if (sc->sc_ac2q[WME_AC_BE] != NULL) mwl_tx_cleanupq(sc, sc->sc_ac2q[WME_AC_BE]); sc->sc_ac2q[WME_AC_BE] = sc->sc_ac2q[WME_AC_BK]; sc->sc_ac2q[WME_AC_VI] = sc->sc_ac2q[WME_AC_BK]; sc->sc_ac2q[WME_AC_VO] = sc->sc_ac2q[WME_AC_BK]; } TASK_INIT(&sc->sc_txtask, 0, mwl_tx_proc, sc); ic->ic_softc = sc; ic->ic_name = device_get_nameunit(sc->sc_dev); /* XXX not right but it's not used anywhere important */ ic->ic_phytype = IEEE80211_T_OFDM; ic->ic_opmode = IEEE80211_M_STA; ic->ic_caps = IEEE80211_C_STA /* station mode supported */ | IEEE80211_C_HOSTAP /* hostap mode */ | IEEE80211_C_MONITOR /* monitor mode */ #if 0 | IEEE80211_C_IBSS /* ibss, nee adhoc, mode */ | IEEE80211_C_AHDEMO /* adhoc demo mode */ #endif | IEEE80211_C_MBSS /* mesh point link mode */ | IEEE80211_C_WDS /* WDS supported */ | IEEE80211_C_SHPREAMBLE /* short preamble supported */ | IEEE80211_C_SHSLOT /* short slot time supported */ | IEEE80211_C_WME /* WME/WMM supported */ | IEEE80211_C_BURST /* xmit bursting supported */ | IEEE80211_C_WPA /* capable of WPA1+WPA2 */ | IEEE80211_C_BGSCAN /* capable of bg scanning */ | IEEE80211_C_TXFRAG /* handle tx frags */ | IEEE80211_C_TXPMGT /* capable of txpow mgt */ | IEEE80211_C_DFS /* DFS supported */ ; ic->ic_htcaps = IEEE80211_HTCAP_SMPS_ENA /* SM PS mode enabled */ | IEEE80211_HTCAP_CHWIDTH40 /* 40MHz channel width */ | IEEE80211_HTCAP_SHORTGI20 /* short GI in 20MHz */ | IEEE80211_HTCAP_SHORTGI40 /* short GI in 40MHz */ | IEEE80211_HTCAP_RXSTBC_2STREAM/* 1-2 spatial streams */ #if MWL_AGGR_SIZE == 7935 | IEEE80211_HTCAP_MAXAMSDU_7935 /* max A-MSDU length */ #else | IEEE80211_HTCAP_MAXAMSDU_3839 /* max A-MSDU length */ #endif #if 0 | IEEE80211_HTCAP_PSMP /* PSMP supported */ | IEEE80211_HTCAP_40INTOLERANT /* 40MHz intolerant */ #endif /* s/w capabilities */ | IEEE80211_HTC_HT /* HT operation */ | IEEE80211_HTC_AMPDU /* tx A-MPDU */ | IEEE80211_HTC_AMSDU /* tx A-MSDU */ | IEEE80211_HTC_SMPS /* SMPS available */ ; /* * Mark h/w crypto support. * XXX no way to query h/w support. */ ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP | IEEE80211_CRYPTO_AES_CCM | IEEE80211_CRYPTO_TKIP | IEEE80211_CRYPTO_TKIPMIC ; /* * Transmit requires space in the packet for a special * format transmit record and optional padding between * this record and the payload. Ask the net80211 layer * to arrange this when encapsulating packets so we can * add it efficiently. */ ic->ic_headroom = sizeof(struct mwltxrec) - sizeof(struct ieee80211_frame); IEEE80211_ADDR_COPY(ic->ic_macaddr, sc->sc_hwspecs.macAddr); /* call MI attach routine. */ ieee80211_ifattach(ic); ic->ic_setregdomain = mwl_setregdomain; ic->ic_getradiocaps = mwl_getradiocaps; /* override default methods */ ic->ic_raw_xmit = mwl_raw_xmit; ic->ic_newassoc = mwl_newassoc; ic->ic_updateslot = mwl_updateslot; ic->ic_update_mcast = mwl_update_mcast; ic->ic_update_promisc = mwl_update_promisc; ic->ic_wme.wme_update = mwl_wme_update; ic->ic_transmit = mwl_transmit; ic->ic_ioctl = mwl_ioctl; ic->ic_parent = mwl_parent; ic->ic_node_alloc = mwl_node_alloc; sc->sc_node_cleanup = ic->ic_node_cleanup; ic->ic_node_cleanup = mwl_node_cleanup; sc->sc_node_drain = ic->ic_node_drain; ic->ic_node_drain = mwl_node_drain; ic->ic_node_getsignal = mwl_node_getsignal; ic->ic_node_getmimoinfo = mwl_node_getmimoinfo; ic->ic_scan_start = mwl_scan_start; ic->ic_scan_end = mwl_scan_end; ic->ic_set_channel = mwl_set_channel; sc->sc_recv_action = ic->ic_recv_action; ic->ic_recv_action = mwl_recv_action; sc->sc_addba_request = ic->ic_addba_request; ic->ic_addba_request = mwl_addba_request; sc->sc_addba_response = ic->ic_addba_response; ic->ic_addba_response = mwl_addba_response; sc->sc_addba_stop = ic->ic_addba_stop; ic->ic_addba_stop = mwl_addba_stop; ic->ic_vap_create = mwl_vap_create; ic->ic_vap_delete = mwl_vap_delete; ieee80211_radiotap_attach(ic, &sc->sc_tx_th.wt_ihdr, sizeof(sc->sc_tx_th), MWL_TX_RADIOTAP_PRESENT, &sc->sc_rx_th.wr_ihdr, sizeof(sc->sc_rx_th), MWL_RX_RADIOTAP_PRESENT); /* * Setup dynamic sysctl's now that country code and * regdomain are available from the hal. */ mwl_sysctlattach(sc); if (bootverbose) ieee80211_announce(ic); mwl_announce(sc); return 0; bad2: mwl_dma_cleanup(sc); bad1: mwl_hal_detach(mh); bad: MWL_RXFREE_DESTROY(sc); sc->sc_invalid = 1; return error; } int mwl_detach(struct mwl_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; MWL_LOCK(sc); mwl_stop(sc); MWL_UNLOCK(sc); /* * NB: the order of these is important: * o call the 802.11 layer before detaching the hal to * insure callbacks into the driver to delete global * key cache entries can be handled * o reclaim the tx queue data structures after calling * the 802.11 layer as we'll get called back to reclaim * node state and potentially want to use them * o to cleanup the tx queues the hal is called, so detach * it last * Other than that, it's straightforward... */ ieee80211_ifdetach(ic); callout_drain(&sc->sc_watchdog); mwl_dma_cleanup(sc); MWL_RXFREE_DESTROY(sc); mwl_tx_cleanup(sc); mwl_hal_detach(sc->sc_mh); mbufq_drain(&sc->sc_snd); return 0; } /* * MAC address handling for multiple BSS on the same radio. * The first vap uses the MAC address from the EEPROM. For * subsequent vap's we set the U/L bit (bit 1) in the MAC * address and use the next six bits as an index. */ static void assign_address(struct mwl_softc *sc, uint8_t mac[IEEE80211_ADDR_LEN], int clone) { int i; if (clone && mwl_hal_ismbsscapable(sc->sc_mh)) { /* NB: we only do this if h/w supports multiple bssid */ for (i = 0; i < 32; i++) if ((sc->sc_bssidmask & (1<sc_bssidmask |= 1<sc_nbssid0++; } static void reclaim_address(struct mwl_softc *sc, const uint8_t mac[IEEE80211_ADDR_LEN]) { int i = mac[0] >> 2; if (i != 0 || --sc->sc_nbssid0 == 0) sc->sc_bssidmask &= ~(1<ic_softc; struct mwl_hal *mh = sc->sc_mh; struct ieee80211vap *vap, *apvap; struct mwl_hal_vap *hvap; struct mwl_vap *mvp; uint8_t mac[IEEE80211_ADDR_LEN]; IEEE80211_ADDR_COPY(mac, mac0); switch (opmode) { case IEEE80211_M_HOSTAP: case IEEE80211_M_MBSS: if ((flags & IEEE80211_CLONE_MACADDR) == 0) assign_address(sc, mac, flags & IEEE80211_CLONE_BSSID); hvap = mwl_hal_newvap(mh, MWL_HAL_AP, mac); if (hvap == NULL) { if ((flags & IEEE80211_CLONE_MACADDR) == 0) reclaim_address(sc, mac); return NULL; } break; case IEEE80211_M_STA: if ((flags & IEEE80211_CLONE_MACADDR) == 0) assign_address(sc, mac, flags & IEEE80211_CLONE_BSSID); hvap = mwl_hal_newvap(mh, MWL_HAL_STA, mac); if (hvap == NULL) { if ((flags & IEEE80211_CLONE_MACADDR) == 0) reclaim_address(sc, mac); return NULL; } /* no h/w beacon miss support; always use s/w */ flags |= IEEE80211_CLONE_NOBEACONS; break; case IEEE80211_M_WDS: hvap = NULL; /* NB: we use associated AP vap */ if (sc->sc_napvaps == 0) return NULL; /* no existing AP vap */ break; case IEEE80211_M_MONITOR: hvap = NULL; break; case IEEE80211_M_IBSS: case IEEE80211_M_AHDEMO: default: return NULL; } mvp = malloc(sizeof(struct mwl_vap), M_80211_VAP, M_WAITOK | M_ZERO); mvp->mv_hvap = hvap; if (opmode == IEEE80211_M_WDS) { /* * WDS vaps must have an associated AP vap; find one. * XXX not right. */ TAILQ_FOREACH(apvap, &ic->ic_vaps, iv_next) if (apvap->iv_opmode == IEEE80211_M_HOSTAP) { mvp->mv_ap_hvap = MWL_VAP(apvap)->mv_hvap; break; } KASSERT(mvp->mv_ap_hvap != NULL, ("no ap vap")); } vap = &mvp->mv_vap; ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid); /* override with driver methods */ mvp->mv_newstate = vap->iv_newstate; vap->iv_newstate = mwl_newstate; vap->iv_max_keyix = 0; /* XXX */ vap->iv_key_alloc = mwl_key_alloc; vap->iv_key_delete = mwl_key_delete; vap->iv_key_set = mwl_key_set; #ifdef MWL_HOST_PS_SUPPORT if (opmode == IEEE80211_M_HOSTAP || opmode == IEEE80211_M_MBSS) { vap->iv_update_ps = mwl_update_ps; mvp->mv_set_tim = vap->iv_set_tim; vap->iv_set_tim = mwl_set_tim; } #endif vap->iv_reset = mwl_reset; vap->iv_update_beacon = mwl_beacon_update; /* override max aid so sta's cannot assoc when we're out of sta id's */ vap->iv_max_aid = MWL_MAXSTAID; /* override default A-MPDU rx parameters */ vap->iv_ampdu_rxmax = IEEE80211_HTCAP_MAXRXAMPDU_64K; vap->iv_ampdu_density = IEEE80211_HTCAP_MPDUDENSITY_4; /* complete setup */ ieee80211_vap_attach(vap, mwl_media_change, ieee80211_media_status, mac); switch (vap->iv_opmode) { case IEEE80211_M_HOSTAP: case IEEE80211_M_MBSS: case IEEE80211_M_STA: /* * Setup sta db entry for local address. */ mwl_localstadb(vap); if (vap->iv_opmode == IEEE80211_M_HOSTAP || vap->iv_opmode == IEEE80211_M_MBSS) sc->sc_napvaps++; else sc->sc_nstavaps++; break; case IEEE80211_M_WDS: sc->sc_nwdsvaps++; break; default: break; } /* * Setup overall operating mode. */ if (sc->sc_napvaps) ic->ic_opmode = IEEE80211_M_HOSTAP; else if (sc->sc_nstavaps) ic->ic_opmode = IEEE80211_M_STA; else ic->ic_opmode = opmode; return vap; } static void mwl_vap_delete(struct ieee80211vap *vap) { struct mwl_vap *mvp = MWL_VAP(vap); struct mwl_softc *sc = vap->iv_ic->ic_softc; struct mwl_hal *mh = sc->sc_mh; struct mwl_hal_vap *hvap = mvp->mv_hvap; enum ieee80211_opmode opmode = vap->iv_opmode; /* XXX disallow ap vap delete if WDS still present */ if (sc->sc_running) { /* quiesce h/w while we remove the vap */ mwl_hal_intrset(mh, 0); /* disable interrupts */ } ieee80211_vap_detach(vap); switch (opmode) { case IEEE80211_M_HOSTAP: case IEEE80211_M_MBSS: case IEEE80211_M_STA: KASSERT(hvap != NULL, ("no hal vap handle")); (void) mwl_hal_delstation(hvap, vap->iv_myaddr); mwl_hal_delvap(hvap); if (opmode == IEEE80211_M_HOSTAP || opmode == IEEE80211_M_MBSS) sc->sc_napvaps--; else sc->sc_nstavaps--; /* XXX don't do it for IEEE80211_CLONE_MACADDR */ reclaim_address(sc, vap->iv_myaddr); break; case IEEE80211_M_WDS: sc->sc_nwdsvaps--; break; default: break; } mwl_cleartxq(sc, vap); free(mvp, M_80211_VAP); if (sc->sc_running) mwl_hal_intrset(mh, sc->sc_imask); } void mwl_suspend(struct mwl_softc *sc) { MWL_LOCK(sc); mwl_stop(sc); MWL_UNLOCK(sc); } void mwl_resume(struct mwl_softc *sc) { int error = EDOOFUS; MWL_LOCK(sc); if (sc->sc_ic.ic_nrunning > 0) error = mwl_init(sc); MWL_UNLOCK(sc); if (error == 0) ieee80211_start_all(&sc->sc_ic); /* start all vap's */ } void mwl_shutdown(void *arg) { struct mwl_softc *sc = arg; MWL_LOCK(sc); mwl_stop(sc); MWL_UNLOCK(sc); } /* * Interrupt handler. Most of the actual processing is deferred. */ void mwl_intr(void *arg) { struct mwl_softc *sc = arg; struct mwl_hal *mh = sc->sc_mh; uint32_t status; if (sc->sc_invalid) { /* * The hardware is not ready/present, don't touch anything. * Note this can happen early on if the IRQ is shared. */ DPRINTF(sc, MWL_DEBUG_ANY, "%s: invalid; ignored\n", __func__); return; } /* * Figure out the reason(s) for the interrupt. */ mwl_hal_getisr(mh, &status); /* NB: clears ISR too */ if (status == 0) /* must be a shared irq */ return; DPRINTF(sc, MWL_DEBUG_INTR, "%s: status 0x%x imask 0x%x\n", __func__, status, sc->sc_imask); if (status & MACREG_A2HRIC_BIT_RX_RDY) taskqueue_enqueue(sc->sc_tq, &sc->sc_rxtask); if (status & MACREG_A2HRIC_BIT_TX_DONE) taskqueue_enqueue(sc->sc_tq, &sc->sc_txtask); if (status & MACREG_A2HRIC_BIT_BA_WATCHDOG) taskqueue_enqueue(sc->sc_tq, &sc->sc_bawatchdogtask); if (status & MACREG_A2HRIC_BIT_OPC_DONE) mwl_hal_cmddone(mh); if (status & MACREG_A2HRIC_BIT_MAC_EVENT) { ; } if (status & MACREG_A2HRIC_BIT_ICV_ERROR) { /* TKIP ICV error */ sc->sc_stats.mst_rx_badtkipicv++; } if (status & MACREG_A2HRIC_BIT_QUEUE_EMPTY) { /* 11n aggregation queue is empty, re-fill */ ; } if (status & MACREG_A2HRIC_BIT_QUEUE_FULL) { ; } if (status & MACREG_A2HRIC_BIT_RADAR_DETECT) { /* radar detected, process event */ taskqueue_enqueue(sc->sc_tq, &sc->sc_radartask); } if (status & MACREG_A2HRIC_BIT_CHAN_SWITCH) { /* DFS channel switch */ taskqueue_enqueue(sc->sc_tq, &sc->sc_chanswitchtask); } } static void mwl_radar_proc(void *arg, int pending) { struct mwl_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; DPRINTF(sc, MWL_DEBUG_ANY, "%s: radar detected, pending %u\n", __func__, pending); sc->sc_stats.mst_radardetect++; /* XXX stop h/w BA streams? */ IEEE80211_LOCK(ic); ieee80211_dfs_notify_radar(ic, ic->ic_curchan); IEEE80211_UNLOCK(ic); } static void mwl_chanswitch_proc(void *arg, int pending) { struct mwl_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; DPRINTF(sc, MWL_DEBUG_ANY, "%s: channel switch notice, pending %u\n", __func__, pending); IEEE80211_LOCK(ic); sc->sc_csapending = 0; ieee80211_csa_completeswitch(ic); IEEE80211_UNLOCK(ic); } static void mwl_bawatchdog(const MWL_HAL_BASTREAM *sp) { struct ieee80211_node *ni = sp->data[0]; /* send DELBA and drop the stream */ ieee80211_ampdu_stop(ni, sp->data[1], IEEE80211_REASON_UNSPECIFIED); } static void mwl_bawatchdog_proc(void *arg, int pending) { struct mwl_softc *sc = arg; struct mwl_hal *mh = sc->sc_mh; const MWL_HAL_BASTREAM *sp; uint8_t bitmap, n; sc->sc_stats.mst_bawatchdog++; if (mwl_hal_getwatchdogbitmap(mh, &bitmap) != 0) { DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: could not get bitmap\n", __func__); sc->sc_stats.mst_bawatchdog_failed++; return; } DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: bitmap 0x%x\n", __func__, bitmap); if (bitmap == 0xff) { n = 0; /* disable all ba streams */ for (bitmap = 0; bitmap < 8; bitmap++) { sp = mwl_hal_bastream_lookup(mh, bitmap); if (sp != NULL) { mwl_bawatchdog(sp); n++; } } if (n == 0) { DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: no BA streams found\n", __func__); sc->sc_stats.mst_bawatchdog_empty++; } } else if (bitmap != 0xaa) { /* disable a single ba stream */ sp = mwl_hal_bastream_lookup(mh, bitmap); if (sp != NULL) { mwl_bawatchdog(sp); } else { DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: no BA stream %d\n", __func__, bitmap); sc->sc_stats.mst_bawatchdog_notfound++; } } } /* * Convert net80211 channel to a HAL channel. */ static void mwl_mapchan(MWL_HAL_CHANNEL *hc, const struct ieee80211_channel *chan) { hc->channel = chan->ic_ieee; *(uint32_t *)&hc->channelFlags = 0; if (IEEE80211_IS_CHAN_2GHZ(chan)) hc->channelFlags.FreqBand = MWL_FREQ_BAND_2DOT4GHZ; else if (IEEE80211_IS_CHAN_5GHZ(chan)) hc->channelFlags.FreqBand = MWL_FREQ_BAND_5GHZ; if (IEEE80211_IS_CHAN_HT40(chan)) { hc->channelFlags.ChnlWidth = MWL_CH_40_MHz_WIDTH; if (IEEE80211_IS_CHAN_HT40U(chan)) hc->channelFlags.ExtChnlOffset = MWL_EXT_CH_ABOVE_CTRL_CH; else hc->channelFlags.ExtChnlOffset = MWL_EXT_CH_BELOW_CTRL_CH; } else hc->channelFlags.ChnlWidth = MWL_CH_20_MHz_WIDTH; /* XXX 10MHz channels */ } /* * Inform firmware of our tx/rx dma setup. The BAR 0 * writes below are for compatibility with older firmware. * For current firmware we send this information with a * cmd block via mwl_hal_sethwdma. */ static int mwl_setupdma(struct mwl_softc *sc) { int error, i; sc->sc_hwdma.rxDescRead = sc->sc_rxdma.dd_desc_paddr; WR4(sc, sc->sc_hwspecs.rxDescRead, sc->sc_hwdma.rxDescRead); WR4(sc, sc->sc_hwspecs.rxDescWrite, sc->sc_hwdma.rxDescRead); for (i = 0; i < MWL_NUM_TX_QUEUES-MWL_NUM_ACK_QUEUES; i++) { struct mwl_txq *txq = &sc->sc_txq[i]; sc->sc_hwdma.wcbBase[i] = txq->dma.dd_desc_paddr; WR4(sc, sc->sc_hwspecs.wcbBase[i], sc->sc_hwdma.wcbBase[i]); } sc->sc_hwdma.maxNumTxWcb = mwl_txbuf; sc->sc_hwdma.maxNumWCB = MWL_NUM_TX_QUEUES-MWL_NUM_ACK_QUEUES; error = mwl_hal_sethwdma(sc->sc_mh, &sc->sc_hwdma); if (error != 0) { device_printf(sc->sc_dev, "unable to setup tx/rx dma; hal status %u\n", error); /* XXX */ } return error; } /* * Inform firmware of tx rate parameters. * Called after a channel change. */ static int mwl_setcurchanrates(struct mwl_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; const struct ieee80211_rateset *rs; MWL_HAL_TXRATE rates; memset(&rates, 0, sizeof(rates)); rs = ieee80211_get_suprates(ic, ic->ic_curchan); /* rate used to send management frames */ rates.MgtRate = rs->rs_rates[0] & IEEE80211_RATE_VAL; /* rate used to send multicast frames */ rates.McastRate = rates.MgtRate; return mwl_hal_settxrate_auto(sc->sc_mh, &rates); } /* * Inform firmware of tx rate parameters. Called whenever * user-settable params change and after a channel change. */ static int mwl_setrates(struct ieee80211vap *vap) { struct mwl_vap *mvp = MWL_VAP(vap); struct ieee80211_node *ni = vap->iv_bss; const struct ieee80211_txparam *tp = ni->ni_txparms; MWL_HAL_TXRATE rates; KASSERT(vap->iv_state == IEEE80211_S_RUN, ("state %d", vap->iv_state)); /* * Update the h/w rate map. * NB: 0x80 for MCS is passed through unchanged */ memset(&rates, 0, sizeof(rates)); /* rate used to send management frames */ rates.MgtRate = tp->mgmtrate; /* rate used to send multicast frames */ rates.McastRate = tp->mcastrate; /* while here calculate EAPOL fixed rate cookie */ mvp->mv_eapolformat = htole16(mwl_calcformat(rates.MgtRate, ni)); return mwl_hal_settxrate(mvp->mv_hvap, tp->ucastrate != IEEE80211_FIXED_RATE_NONE ? RATE_FIXED : RATE_AUTO, &rates); } /* * Setup a fixed xmit rate cookie for EAPOL frames. */ static void mwl_seteapolformat(struct ieee80211vap *vap) { struct mwl_vap *mvp = MWL_VAP(vap); struct ieee80211_node *ni = vap->iv_bss; enum ieee80211_phymode mode; uint8_t rate; KASSERT(vap->iv_state == IEEE80211_S_RUN, ("state %d", vap->iv_state)); mode = ieee80211_chan2mode(ni->ni_chan); /* * Use legacy rates when operating a mixed HT+non-HT bss. * NB: this may violate POLA for sta and wds vap's. */ if (mode == IEEE80211_MODE_11NA && (vap->iv_flags_ht & IEEE80211_FHT_PUREN) == 0) rate = vap->iv_txparms[IEEE80211_MODE_11A].mgmtrate; else if (mode == IEEE80211_MODE_11NG && (vap->iv_flags_ht & IEEE80211_FHT_PUREN) == 0) rate = vap->iv_txparms[IEEE80211_MODE_11G].mgmtrate; else rate = vap->iv_txparms[mode].mgmtrate; mvp->mv_eapolformat = htole16(mwl_calcformat(rate, ni)); } /* * Map SKU+country code to region code for radar bin'ing. */ static int mwl_map2regioncode(const struct ieee80211_regdomain *rd) { switch (rd->regdomain) { case SKU_FCC: case SKU_FCC3: return DOMAIN_CODE_FCC; case SKU_CA: return DOMAIN_CODE_IC; case SKU_ETSI: case SKU_ETSI2: case SKU_ETSI3: if (rd->country == CTRY_SPAIN) return DOMAIN_CODE_SPAIN; if (rd->country == CTRY_FRANCE || rd->country == CTRY_FRANCE2) return DOMAIN_CODE_FRANCE; /* XXX force 1.3.1 radar type */ return DOMAIN_CODE_ETSI_131; case SKU_JAPAN: return DOMAIN_CODE_MKK; case SKU_ROW: return DOMAIN_CODE_DGT; /* Taiwan */ case SKU_APAC: case SKU_APAC2: case SKU_APAC3: return DOMAIN_CODE_AUS; /* Australia */ } /* XXX KOREA? */ return DOMAIN_CODE_FCC; /* XXX? */ } static int mwl_hal_reset(struct mwl_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct mwl_hal *mh = sc->sc_mh; mwl_hal_setantenna(mh, WL_ANTENNATYPE_RX, sc->sc_rxantenna); mwl_hal_setantenna(mh, WL_ANTENNATYPE_TX, sc->sc_txantenna); mwl_hal_setradio(mh, 1, WL_AUTO_PREAMBLE); mwl_hal_setwmm(sc->sc_mh, (ic->ic_flags & IEEE80211_F_WME) != 0); mwl_chan_set(sc, ic->ic_curchan); /* NB: RF/RA performance tuned for indoor mode */ mwl_hal_setrateadaptmode(mh, 0); mwl_hal_setoptimizationlevel(mh, (ic->ic_flags & IEEE80211_F_BURST) != 0); mwl_hal_setregioncode(mh, mwl_map2regioncode(&ic->ic_regdomain)); mwl_hal_setaggampduratemode(mh, 1, 80); /* XXX */ mwl_hal_setcfend(mh, 0); /* XXX */ return 1; } static int mwl_init(struct mwl_softc *sc) { struct mwl_hal *mh = sc->sc_mh; int error = 0; MWL_LOCK_ASSERT(sc); /* * Stop anything previously setup. This is safe * whether this is the first time through or not. */ mwl_stop(sc); /* * Push vap-independent state to the firmware. */ if (!mwl_hal_reset(sc)) { device_printf(sc->sc_dev, "unable to reset hardware\n"); return EIO; } /* * Setup recv (once); transmit is already good to go. */ error = mwl_startrecv(sc); if (error != 0) { device_printf(sc->sc_dev, "unable to start recv logic\n"); return error; } /* * Enable interrupts. */ sc->sc_imask = MACREG_A2HRIC_BIT_RX_RDY | MACREG_A2HRIC_BIT_TX_DONE | MACREG_A2HRIC_BIT_OPC_DONE #if 0 | MACREG_A2HRIC_BIT_MAC_EVENT #endif | MACREG_A2HRIC_BIT_ICV_ERROR | MACREG_A2HRIC_BIT_RADAR_DETECT | MACREG_A2HRIC_BIT_CHAN_SWITCH #if 0 | MACREG_A2HRIC_BIT_QUEUE_EMPTY #endif | MACREG_A2HRIC_BIT_BA_WATCHDOG | MACREQ_A2HRIC_BIT_TX_ACK ; sc->sc_running = 1; mwl_hal_intrset(mh, sc->sc_imask); callout_reset(&sc->sc_watchdog, hz, mwl_watchdog, sc); return 0; } static void mwl_stop(struct mwl_softc *sc) { MWL_LOCK_ASSERT(sc); if (sc->sc_running) { /* * Shutdown the hardware and driver. */ sc->sc_running = 0; callout_stop(&sc->sc_watchdog); sc->sc_tx_timer = 0; mwl_draintxq(sc); } } static int mwl_reset_vap(struct ieee80211vap *vap, int state) { struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; struct ieee80211com *ic = vap->iv_ic; if (state == IEEE80211_S_RUN) mwl_setrates(vap); /* XXX off by 1? */ mwl_hal_setrtsthreshold(hvap, vap->iv_rtsthreshold); /* XXX auto? 20/40 split? */ mwl_hal_sethtgi(hvap, (vap->iv_flags_ht & (IEEE80211_FHT_SHORTGI20|IEEE80211_FHT_SHORTGI40)) ? 1 : 0); mwl_hal_setnprot(hvap, ic->ic_htprotmode == IEEE80211_PROT_NONE ? HTPROTECT_NONE : HTPROTECT_AUTO); /* XXX txpower cap */ /* re-setup beacons */ if (state == IEEE80211_S_RUN && (vap->iv_opmode == IEEE80211_M_HOSTAP || vap->iv_opmode == IEEE80211_M_MBSS || vap->iv_opmode == IEEE80211_M_IBSS)) { mwl_setapmode(vap, vap->iv_bss->ni_chan); mwl_hal_setnprotmode(hvap, MS(ic->ic_curhtprotmode, IEEE80211_HTINFO_OPMODE)); return mwl_beacon_setup(vap); } return 0; } /* * Reset the hardware w/o losing operational state. * Used to reset or reload hardware state for a vap. */ static int mwl_reset(struct ieee80211vap *vap, u_long cmd) { struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; int error = 0; if (hvap != NULL) { /* WDS, MONITOR, etc. */ struct ieee80211com *ic = vap->iv_ic; struct mwl_softc *sc = ic->ic_softc; struct mwl_hal *mh = sc->sc_mh; /* XXX handle DWDS sta vap change */ /* XXX do we need to disable interrupts? */ mwl_hal_intrset(mh, 0); /* disable interrupts */ error = mwl_reset_vap(vap, vap->iv_state); mwl_hal_intrset(mh, sc->sc_imask); } return error; } /* * Allocate a tx buffer for sending a frame. The * packet is assumed to have the WME AC stored so * we can use it to select the appropriate h/w queue. */ static struct mwl_txbuf * mwl_gettxbuf(struct mwl_softc *sc, struct mwl_txq *txq) { struct mwl_txbuf *bf; /* * Grab a TX buffer and associated resources. */ MWL_TXQ_LOCK(txq); bf = STAILQ_FIRST(&txq->free); if (bf != NULL) { STAILQ_REMOVE_HEAD(&txq->free, bf_list); txq->nfree--; } MWL_TXQ_UNLOCK(txq); if (bf == NULL) DPRINTF(sc, MWL_DEBUG_XMIT, "%s: out of xmit buffers on q %d\n", __func__, txq->qnum); return bf; } /* * Return a tx buffer to the queue it came from. Note there * are two cases because we must preserve the order of buffers * as it reflects the fixed order of descriptors in memory * (the firmware pre-fetches descriptors so we cannot reorder). */ static void mwl_puttxbuf_head(struct mwl_txq *txq, struct mwl_txbuf *bf) { bf->bf_m = NULL; bf->bf_node = NULL; MWL_TXQ_LOCK(txq); STAILQ_INSERT_HEAD(&txq->free, bf, bf_list); txq->nfree++; MWL_TXQ_UNLOCK(txq); } static void mwl_puttxbuf_tail(struct mwl_txq *txq, struct mwl_txbuf *bf) { bf->bf_m = NULL; bf->bf_node = NULL; MWL_TXQ_LOCK(txq); STAILQ_INSERT_TAIL(&txq->free, bf, bf_list); txq->nfree++; MWL_TXQ_UNLOCK(txq); } static int mwl_transmit(struct ieee80211com *ic, struct mbuf *m) { struct mwl_softc *sc = ic->ic_softc; int error; MWL_LOCK(sc); if (!sc->sc_running) { MWL_UNLOCK(sc); return (ENXIO); } error = mbufq_enqueue(&sc->sc_snd, m); if (error) { MWL_UNLOCK(sc); return (error); } mwl_start(sc); MWL_UNLOCK(sc); return (0); } static void mwl_start(struct mwl_softc *sc) { struct ieee80211_node *ni; struct mwl_txbuf *bf; struct mbuf *m; struct mwl_txq *txq = NULL; /* XXX silence gcc */ int nqueued; MWL_LOCK_ASSERT(sc); if (!sc->sc_running || sc->sc_invalid) return; nqueued = 0; while ((m = mbufq_dequeue(&sc->sc_snd)) != NULL) { /* * Grab the node for the destination. */ ni = (struct ieee80211_node *) m->m_pkthdr.rcvif; KASSERT(ni != NULL, ("no node")); m->m_pkthdr.rcvif = NULL; /* committed, clear ref */ /* * Grab a TX buffer and associated resources. * We honor the classification by the 802.11 layer. */ txq = sc->sc_ac2q[M_WME_GETAC(m)]; bf = mwl_gettxbuf(sc, txq); if (bf == NULL) { m_freem(m); ieee80211_free_node(ni); #ifdef MWL_TX_NODROP sc->sc_stats.mst_tx_qstop++; break; #else DPRINTF(sc, MWL_DEBUG_XMIT, "%s: tail drop on q %d\n", __func__, txq->qnum); sc->sc_stats.mst_tx_qdrop++; continue; #endif /* MWL_TX_NODROP */ } /* * Pass the frame to the h/w for transmission. */ if (mwl_tx_start(sc, ni, bf, m)) { if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1); mwl_puttxbuf_head(txq, bf); ieee80211_free_node(ni); continue; } nqueued++; if (nqueued >= mwl_txcoalesce) { /* * Poke the firmware to process queued frames; * see below about (lack of) locking. */ nqueued = 0; mwl_hal_txstart(sc->sc_mh, 0/*XXX*/); } } if (nqueued) { /* * NB: We don't need to lock against tx done because * this just prods the firmware to check the transmit * descriptors. The firmware will also start fetching * descriptors by itself if it notices new ones are * present when it goes to deliver a tx done interrupt * to the host. So if we race with tx done processing * it's ok. Delivering the kick here rather than in * mwl_tx_start is an optimization to avoid poking the * firmware for each packet. * * NB: the queue id isn't used so 0 is ok. */ mwl_hal_txstart(sc->sc_mh, 0/*XXX*/); } } static int mwl_raw_xmit(struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_bpf_params *params) { struct ieee80211com *ic = ni->ni_ic; struct mwl_softc *sc = ic->ic_softc; struct mwl_txbuf *bf; struct mwl_txq *txq; if (!sc->sc_running || sc->sc_invalid) { m_freem(m); return ENETDOWN; } /* * Grab a TX buffer and associated resources. * Note that we depend on the classification * by the 802.11 layer to get to the right h/w * queue. Management frames must ALWAYS go on * queue 1 but we cannot just force that here * because we may receive non-mgt frames. */ txq = sc->sc_ac2q[M_WME_GETAC(m)]; bf = mwl_gettxbuf(sc, txq); if (bf == NULL) { sc->sc_stats.mst_tx_qstop++; m_freem(m); return ENOBUFS; } /* * Pass the frame to the h/w for transmission. */ if (mwl_tx_start(sc, ni, bf, m)) { mwl_puttxbuf_head(txq, bf); return EIO; /* XXX */ } /* * NB: We don't need to lock against tx done because * this just prods the firmware to check the transmit * descriptors. The firmware will also start fetching * descriptors by itself if it notices new ones are * present when it goes to deliver a tx done interrupt * to the host. So if we race with tx done processing * it's ok. Delivering the kick here rather than in * mwl_tx_start is an optimization to avoid poking the * firmware for each packet. * * NB: the queue id isn't used so 0 is ok. */ mwl_hal_txstart(sc->sc_mh, 0/*XXX*/); return 0; } static int mwl_media_change(struct ifnet *ifp) { struct ieee80211vap *vap = ifp->if_softc; int error; error = ieee80211_media_change(ifp); /* NB: only the fixed rate can change and that doesn't need a reset */ if (error == ENETRESET) { mwl_setrates(vap); error = 0; } return error; } #ifdef MWL_DEBUG static void mwl_keyprint(struct mwl_softc *sc, const char *tag, const MWL_HAL_KEYVAL *hk, const uint8_t mac[IEEE80211_ADDR_LEN]) { static const char *ciphers[] = { "WEP", "TKIP", "AES-CCM", }; int i, n; printf("%s: [%u] %-7s", tag, hk->keyIndex, ciphers[hk->keyTypeId]); for (i = 0, n = hk->keyLen; i < n; i++) printf(" %02x", hk->key.aes[i]); printf(" mac %s", ether_sprintf(mac)); if (hk->keyTypeId == KEY_TYPE_ID_TKIP) { printf(" %s", "rxmic"); for (i = 0; i < sizeof(hk->key.tkip.rxMic); i++) printf(" %02x", hk->key.tkip.rxMic[i]); printf(" txmic"); for (i = 0; i < sizeof(hk->key.tkip.txMic); i++) printf(" %02x", hk->key.tkip.txMic[i]); } printf(" flags 0x%x\n", hk->keyFlags); } #endif /* * Allocate a key cache slot for a unicast key. The * firmware handles key allocation and every station is * guaranteed key space so we are always successful. */ static int mwl_key_alloc(struct ieee80211vap *vap, struct ieee80211_key *k, ieee80211_keyix *keyix, ieee80211_keyix *rxkeyix) { struct mwl_softc *sc = vap->iv_ic->ic_softc; if (k->wk_keyix != IEEE80211_KEYIX_NONE || (k->wk_flags & IEEE80211_KEY_GROUP)) { if (!(&vap->iv_nw_keys[0] <= k && k < &vap->iv_nw_keys[IEEE80211_WEP_NKID])) { /* should not happen */ DPRINTF(sc, MWL_DEBUG_KEYCACHE, "%s: bogus group key\n", __func__); return 0; } /* give the caller what they requested */ *keyix = *rxkeyix = ieee80211_crypto_get_key_wepidx(vap, k); } else { /* * Firmware handles key allocation. */ *keyix = *rxkeyix = 0; } return 1; } /* * Delete a key entry allocated by mwl_key_alloc. */ static int mwl_key_delete(struct ieee80211vap *vap, const struct ieee80211_key *k) { struct mwl_softc *sc = vap->iv_ic->ic_softc; struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; MWL_HAL_KEYVAL hk; const uint8_t bcastaddr[IEEE80211_ADDR_LEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; if (hvap == NULL) { if (vap->iv_opmode != IEEE80211_M_WDS) { /* XXX monitor mode? */ DPRINTF(sc, MWL_DEBUG_KEYCACHE, "%s: no hvap for opmode %d\n", __func__, vap->iv_opmode); return 0; } hvap = MWL_VAP(vap)->mv_ap_hvap; } DPRINTF(sc, MWL_DEBUG_KEYCACHE, "%s: delete key %u\n", __func__, k->wk_keyix); memset(&hk, 0, sizeof(hk)); hk.keyIndex = k->wk_keyix; switch (k->wk_cipher->ic_cipher) { case IEEE80211_CIPHER_WEP: hk.keyTypeId = KEY_TYPE_ID_WEP; break; case IEEE80211_CIPHER_TKIP: hk.keyTypeId = KEY_TYPE_ID_TKIP; break; case IEEE80211_CIPHER_AES_CCM: hk.keyTypeId = KEY_TYPE_ID_AES; break; default: /* XXX should not happen */ DPRINTF(sc, MWL_DEBUG_KEYCACHE, "%s: unknown cipher %d\n", __func__, k->wk_cipher->ic_cipher); return 0; } return (mwl_hal_keyreset(hvap, &hk, bcastaddr) == 0); /*XXX*/ } static __inline int addgroupflags(MWL_HAL_KEYVAL *hk, const struct ieee80211_key *k) { if (k->wk_flags & IEEE80211_KEY_GROUP) { if (k->wk_flags & IEEE80211_KEY_XMIT) hk->keyFlags |= KEY_FLAG_TXGROUPKEY; if (k->wk_flags & IEEE80211_KEY_RECV) hk->keyFlags |= KEY_FLAG_RXGROUPKEY; return 1; } else return 0; } /* * Set the key cache contents for the specified key. Key cache * slot(s) must already have been allocated by mwl_key_alloc. */ static int mwl_key_set(struct ieee80211vap *vap, const struct ieee80211_key *k) { return (_mwl_key_set(vap, k, k->wk_macaddr)); } static int _mwl_key_set(struct ieee80211vap *vap, const struct ieee80211_key *k, const uint8_t mac[IEEE80211_ADDR_LEN]) { #define GRPXMIT (IEEE80211_KEY_XMIT | IEEE80211_KEY_GROUP) /* NB: static wep keys are marked GROUP+tx/rx; GTK will be tx or rx */ #define IEEE80211_IS_STATICKEY(k) \ (((k)->wk_flags & (GRPXMIT|IEEE80211_KEY_RECV)) == \ (GRPXMIT|IEEE80211_KEY_RECV)) struct mwl_softc *sc = vap->iv_ic->ic_softc; struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; const struct ieee80211_cipher *cip = k->wk_cipher; const uint8_t *macaddr; MWL_HAL_KEYVAL hk; KASSERT((k->wk_flags & IEEE80211_KEY_SWCRYPT) == 0, ("s/w crypto set?")); if (hvap == NULL) { if (vap->iv_opmode != IEEE80211_M_WDS) { /* XXX monitor mode? */ DPRINTF(sc, MWL_DEBUG_KEYCACHE, "%s: no hvap for opmode %d\n", __func__, vap->iv_opmode); return 0; } hvap = MWL_VAP(vap)->mv_ap_hvap; } memset(&hk, 0, sizeof(hk)); hk.keyIndex = k->wk_keyix; switch (cip->ic_cipher) { case IEEE80211_CIPHER_WEP: hk.keyTypeId = KEY_TYPE_ID_WEP; hk.keyLen = k->wk_keylen; if (k->wk_keyix == vap->iv_def_txkey) hk.keyFlags = KEY_FLAG_WEP_TXKEY; if (!IEEE80211_IS_STATICKEY(k)) { /* NB: WEP is never used for the PTK */ (void) addgroupflags(&hk, k); } break; case IEEE80211_CIPHER_TKIP: hk.keyTypeId = KEY_TYPE_ID_TKIP; hk.key.tkip.tsc.high = (uint32_t)(k->wk_keytsc >> 16); hk.key.tkip.tsc.low = (uint16_t)k->wk_keytsc; hk.keyFlags = KEY_FLAG_TSC_VALID | KEY_FLAG_MICKEY_VALID; hk.keyLen = k->wk_keylen + IEEE80211_MICBUF_SIZE; if (!addgroupflags(&hk, k)) hk.keyFlags |= KEY_FLAG_PAIRWISE; break; case IEEE80211_CIPHER_AES_CCM: hk.keyTypeId = KEY_TYPE_ID_AES; hk.keyLen = k->wk_keylen; if (!addgroupflags(&hk, k)) hk.keyFlags |= KEY_FLAG_PAIRWISE; break; default: /* XXX should not happen */ DPRINTF(sc, MWL_DEBUG_KEYCACHE, "%s: unknown cipher %d\n", __func__, k->wk_cipher->ic_cipher); return 0; } /* * NB: tkip mic keys get copied here too; the layout * just happens to match that in ieee80211_key. */ memcpy(hk.key.aes, k->wk_key, hk.keyLen); /* * Locate address of sta db entry for writing key; * the convention unfortunately is somewhat different * than how net80211, hostapd, and wpa_supplicant think. */ if (vap->iv_opmode == IEEE80211_M_STA) { /* * NB: keys plumbed before the sta reaches AUTH state * will be discarded or written to the wrong sta db * entry because iv_bss is meaningless. This is ok * (right now) because we handle deferred plumbing of * WEP keys when the sta reaches AUTH state. */ macaddr = vap->iv_bss->ni_bssid; if ((k->wk_flags & IEEE80211_KEY_GROUP) == 0) { /* XXX plumb to local sta db too for static key wep */ mwl_hal_keyset(hvap, &hk, vap->iv_myaddr); } } else if (vap->iv_opmode == IEEE80211_M_WDS && vap->iv_state != IEEE80211_S_RUN) { /* * Prior to RUN state a WDS vap will not it's BSS node * setup so we will plumb the key to the wrong mac * address (it'll be our local address). Workaround * this for the moment by grabbing the correct address. */ macaddr = vap->iv_des_bssid; } else if ((k->wk_flags & GRPXMIT) == GRPXMIT) macaddr = vap->iv_myaddr; else macaddr = mac; KEYPRINTF(sc, &hk, macaddr); return (mwl_hal_keyset(hvap, &hk, macaddr) == 0); #undef IEEE80211_IS_STATICKEY #undef GRPXMIT } /* * Set the multicast filter contents into the hardware. * XXX f/w has no support; just defer to the os. */ static void mwl_setmcastfilter(struct mwl_softc *sc) { #if 0 struct ether_multi *enm; struct ether_multistep estep; uint8_t macs[IEEE80211_ADDR_LEN*MWL_HAL_MCAST_MAX];/* XXX stack use */ uint8_t *mp; int nmc; mp = macs; nmc = 0; ETHER_FIRST_MULTI(estep, &sc->sc_ec, enm); while (enm != NULL) { /* XXX Punt on ranges. */ if (nmc == MWL_HAL_MCAST_MAX || !IEEE80211_ADDR_EQ(enm->enm_addrlo, enm->enm_addrhi)) { ifp->if_flags |= IFF_ALLMULTI; return; } IEEE80211_ADDR_COPY(mp, enm->enm_addrlo); mp += IEEE80211_ADDR_LEN, nmc++; ETHER_NEXT_MULTI(estep, enm); } ifp->if_flags &= ~IFF_ALLMULTI; mwl_hal_setmcast(sc->sc_mh, nmc, macs); #endif } static int mwl_mode_init(struct mwl_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct mwl_hal *mh = sc->sc_mh; mwl_hal_setpromisc(mh, ic->ic_promisc > 0); mwl_setmcastfilter(sc); return 0; } /* * Callback from the 802.11 layer after a multicast state change. */ static void mwl_update_mcast(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; mwl_setmcastfilter(sc); } /* * Callback from the 802.11 layer after a promiscuous mode change. * Note this interface does not check the operating mode as this * is an internal callback and we are expected to honor the current * state (e.g. this is used for setting the interface in promiscuous * mode when operating in hostap mode to do ACS). */ static void mwl_update_promisc(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; mwl_hal_setpromisc(sc->sc_mh, ic->ic_promisc > 0); } /* * Callback from the 802.11 layer to update the slot time * based on the current setting. We use it to notify the * firmware of ERP changes and the f/w takes care of things * like slot time and preamble. */ static void mwl_updateslot(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; struct mwl_hal *mh = sc->sc_mh; int prot; /* NB: can be called early; suppress needless cmds */ if (!sc->sc_running) return; /* * Calculate the ERP flags. The firwmare will use * this to carry out the appropriate measures. */ prot = 0; if (IEEE80211_IS_CHAN_ANYG(ic->ic_curchan)) { if ((ic->ic_flags & IEEE80211_F_SHSLOT) == 0) prot |= IEEE80211_ERP_NON_ERP_PRESENT; if (ic->ic_flags & IEEE80211_F_USEPROT) prot |= IEEE80211_ERP_USE_PROTECTION; if (ic->ic_flags & IEEE80211_F_USEBARKER) prot |= IEEE80211_ERP_LONG_PREAMBLE; } DPRINTF(sc, MWL_DEBUG_RESET, "%s: chan %u MHz/flags 0x%x %s slot, (prot 0x%x ic_flags 0x%x)\n", __func__, ic->ic_curchan->ic_freq, ic->ic_curchan->ic_flags, ic->ic_flags & IEEE80211_F_SHSLOT ? "short" : "long", prot, ic->ic_flags); mwl_hal_setgprot(mh, prot); } /* * Setup the beacon frame. */ static int mwl_beacon_setup(struct ieee80211vap *vap) { struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; struct ieee80211_node *ni = vap->iv_bss; struct mbuf *m; m = ieee80211_beacon_alloc(ni); if (m == NULL) return ENOBUFS; mwl_hal_setbeacon(hvap, mtod(m, const void *), m->m_len); m_free(m); return 0; } /* * Update the beacon frame in response to a change. */ static void mwl_beacon_update(struct ieee80211vap *vap, int item) { struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; struct ieee80211com *ic = vap->iv_ic; KASSERT(hvap != NULL, ("no beacon")); switch (item) { case IEEE80211_BEACON_ERP: mwl_updateslot(ic); break; case IEEE80211_BEACON_HTINFO: mwl_hal_setnprotmode(hvap, MS(ic->ic_curhtprotmode, IEEE80211_HTINFO_OPMODE)); break; case IEEE80211_BEACON_CAPS: case IEEE80211_BEACON_WME: case IEEE80211_BEACON_APPIE: case IEEE80211_BEACON_CSA: break; case IEEE80211_BEACON_TIM: /* NB: firmware always forms TIM */ return; } /* XXX retain beacon frame and update */ mwl_beacon_setup(vap); } static void mwl_load_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { bus_addr_t *paddr = (bus_addr_t*) arg; KASSERT(error == 0, ("error %u on bus_dma callback", error)); *paddr = segs->ds_addr; } #ifdef MWL_HOST_PS_SUPPORT /* * Handle power save station occupancy changes. */ static void mwl_update_ps(struct ieee80211vap *vap, int nsta) { struct mwl_vap *mvp = MWL_VAP(vap); if (nsta == 0 || mvp->mv_last_ps_sta == 0) mwl_hal_setpowersave_bss(mvp->mv_hvap, nsta); mvp->mv_last_ps_sta = nsta; } /* * Handle associated station power save state changes. */ static int mwl_set_tim(struct ieee80211_node *ni, int set) { struct ieee80211vap *vap = ni->ni_vap; struct mwl_vap *mvp = MWL_VAP(vap); if (mvp->mv_set_tim(ni, set)) { /* NB: state change */ mwl_hal_setpowersave_sta(mvp->mv_hvap, IEEE80211_AID(ni->ni_associd), set); return 1; } else return 0; } #endif /* MWL_HOST_PS_SUPPORT */ static int mwl_desc_setup(struct mwl_softc *sc, const char *name, struct mwl_descdma *dd, int nbuf, size_t bufsize, int ndesc, size_t descsize) { uint8_t *ds; int error; DPRINTF(sc, MWL_DEBUG_RESET, "%s: %s DMA: %u bufs (%ju) %u desc/buf (%ju)\n", __func__, name, nbuf, (uintmax_t) bufsize, ndesc, (uintmax_t) descsize); dd->dd_name = name; dd->dd_desc_len = nbuf * ndesc * descsize; /* * Setup DMA descriptor area. */ error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), /* parent */ PAGE_SIZE, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ dd->dd_desc_len, /* maxsize */ 1, /* nsegments */ dd->dd_desc_len, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dd->dd_dmat); if (error != 0) { device_printf(sc->sc_dev, "cannot allocate %s DMA tag\n", dd->dd_name); return error; } /* allocate descriptors */ error = bus_dmamem_alloc(dd->dd_dmat, (void**) &dd->dd_desc, BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dd->dd_dmamap); if (error != 0) { device_printf(sc->sc_dev, "unable to alloc memory for %u %s descriptors, " "error %u\n", nbuf * ndesc, dd->dd_name, error); goto fail1; } error = bus_dmamap_load(dd->dd_dmat, dd->dd_dmamap, dd->dd_desc, dd->dd_desc_len, mwl_load_cb, &dd->dd_desc_paddr, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->sc_dev, "unable to map %s descriptors, error %u\n", dd->dd_name, error); goto fail2; } ds = dd->dd_desc; memset(ds, 0, dd->dd_desc_len); DPRINTF(sc, MWL_DEBUG_RESET, "%s: %s DMA map: %p (%lu) -> 0x%jx (%lu)\n", __func__, dd->dd_name, ds, (u_long) dd->dd_desc_len, (uintmax_t) dd->dd_desc_paddr, /*XXX*/ (u_long) dd->dd_desc_len); return 0; fail2: bus_dmamem_free(dd->dd_dmat, dd->dd_desc, dd->dd_dmamap); fail1: bus_dma_tag_destroy(dd->dd_dmat); memset(dd, 0, sizeof(*dd)); return error; #undef DS2PHYS } static void mwl_desc_cleanup(struct mwl_softc *sc, struct mwl_descdma *dd) { bus_dmamap_unload(dd->dd_dmat, dd->dd_dmamap); bus_dmamem_free(dd->dd_dmat, dd->dd_desc, dd->dd_dmamap); bus_dma_tag_destroy(dd->dd_dmat); memset(dd, 0, sizeof(*dd)); } /* * Construct a tx q's free list. The order of entries on * the list must reflect the physical layout of tx descriptors * because the firmware pre-fetches descriptors. * * XXX might be better to use indices into the buffer array. */ static void mwl_txq_reset(struct mwl_softc *sc, struct mwl_txq *txq) { struct mwl_txbuf *bf; int i; bf = txq->dma.dd_bufptr; STAILQ_INIT(&txq->free); for (i = 0; i < mwl_txbuf; i++, bf++) STAILQ_INSERT_TAIL(&txq->free, bf, bf_list); txq->nfree = i; } #define DS2PHYS(_dd, _ds) \ ((_dd)->dd_desc_paddr + ((caddr_t)(_ds) - (caddr_t)(_dd)->dd_desc)) static int mwl_txdma_setup(struct mwl_softc *sc, struct mwl_txq *txq) { int error, bsize, i; struct mwl_txbuf *bf; struct mwl_txdesc *ds; error = mwl_desc_setup(sc, "tx", &txq->dma, mwl_txbuf, sizeof(struct mwl_txbuf), MWL_TXDESC, sizeof(struct mwl_txdesc)); if (error != 0) return error; /* allocate and setup tx buffers */ bsize = mwl_txbuf * sizeof(struct mwl_txbuf); bf = malloc(bsize, M_MWLDEV, M_NOWAIT | M_ZERO); if (bf == NULL) { device_printf(sc->sc_dev, "malloc of %u tx buffers failed\n", mwl_txbuf); return ENOMEM; } txq->dma.dd_bufptr = bf; ds = txq->dma.dd_desc; for (i = 0; i < mwl_txbuf; i++, bf++, ds += MWL_TXDESC) { bf->bf_desc = ds; bf->bf_daddr = DS2PHYS(&txq->dma, ds); error = bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT, &bf->bf_dmamap); if (error != 0) { device_printf(sc->sc_dev, "unable to create dmamap for tx " "buffer %u, error %u\n", i, error); return error; } } mwl_txq_reset(sc, txq); return 0; } static void mwl_txdma_cleanup(struct mwl_softc *sc, struct mwl_txq *txq) { struct mwl_txbuf *bf; int i; bf = txq->dma.dd_bufptr; for (i = 0; i < mwl_txbuf; i++, bf++) { KASSERT(bf->bf_m == NULL, ("mbuf on free list")); KASSERT(bf->bf_node == NULL, ("node on free list")); if (bf->bf_dmamap != NULL) bus_dmamap_destroy(sc->sc_dmat, bf->bf_dmamap); } STAILQ_INIT(&txq->free); txq->nfree = 0; if (txq->dma.dd_bufptr != NULL) { free(txq->dma.dd_bufptr, M_MWLDEV); txq->dma.dd_bufptr = NULL; } if (txq->dma.dd_desc_len != 0) mwl_desc_cleanup(sc, &txq->dma); } static int mwl_rxdma_setup(struct mwl_softc *sc) { int error, jumbosize, bsize, i; struct mwl_rxbuf *bf; struct mwl_jumbo *rbuf; struct mwl_rxdesc *ds; caddr_t data; error = mwl_desc_setup(sc, "rx", &sc->sc_rxdma, mwl_rxdesc, sizeof(struct mwl_rxbuf), 1, sizeof(struct mwl_rxdesc)); if (error != 0) return error; /* * Receive is done to a private pool of jumbo buffers. * This allows us to attach to mbuf's and avoid re-mapping * memory on each rx we post. We allocate a large chunk * of memory and manage it in the driver. The mbuf free * callback method is used to reclaim frames after sending * them up the stack. By default we allocate 2x the number of * rx descriptors configured so we have some slop to hold * us while frames are processed. */ if (mwl_rxbuf < 2*mwl_rxdesc) { device_printf(sc->sc_dev, "too few rx dma buffers (%d); increasing to %d\n", mwl_rxbuf, 2*mwl_rxdesc); mwl_rxbuf = 2*mwl_rxdesc; } jumbosize = roundup(MWL_AGGR_SIZE, PAGE_SIZE); sc->sc_rxmemsize = mwl_rxbuf*jumbosize; error = bus_dma_tag_create(sc->sc_dmat, /* parent */ PAGE_SIZE, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sc->sc_rxmemsize, /* maxsize */ 1, /* nsegments */ sc->sc_rxmemsize, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &sc->sc_rxdmat); if (error != 0) { device_printf(sc->sc_dev, "could not create rx DMA tag\n"); return error; } error = bus_dmamem_alloc(sc->sc_rxdmat, (void**) &sc->sc_rxmem, BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &sc->sc_rxmap); if (error != 0) { device_printf(sc->sc_dev, "could not alloc %ju bytes of rx DMA memory\n", (uintmax_t) sc->sc_rxmemsize); return error; } error = bus_dmamap_load(sc->sc_rxdmat, sc->sc_rxmap, sc->sc_rxmem, sc->sc_rxmemsize, mwl_load_cb, &sc->sc_rxmem_paddr, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->sc_dev, "could not load rx DMA map\n"); return error; } /* * Allocate rx buffers and set them up. */ bsize = mwl_rxdesc * sizeof(struct mwl_rxbuf); bf = malloc(bsize, M_MWLDEV, M_NOWAIT | M_ZERO); if (bf == NULL) { device_printf(sc->sc_dev, "malloc of %u rx buffers failed\n", bsize); return error; } sc->sc_rxdma.dd_bufptr = bf; STAILQ_INIT(&sc->sc_rxbuf); ds = sc->sc_rxdma.dd_desc; for (i = 0; i < mwl_rxdesc; i++, bf++, ds++) { bf->bf_desc = ds; bf->bf_daddr = DS2PHYS(&sc->sc_rxdma, ds); /* pre-assign dma buffer */ bf->bf_data = ((uint8_t *)sc->sc_rxmem) + (i*jumbosize); /* NB: tail is intentional to preserve descriptor order */ STAILQ_INSERT_TAIL(&sc->sc_rxbuf, bf, bf_list); } /* * Place remainder of dma memory buffers on the free list. */ SLIST_INIT(&sc->sc_rxfree); for (; i < mwl_rxbuf; i++) { data = ((uint8_t *)sc->sc_rxmem) + (i*jumbosize); rbuf = MWL_JUMBO_DATA2BUF(data); SLIST_INSERT_HEAD(&sc->sc_rxfree, rbuf, next); sc->sc_nrxfree++; } return 0; } #undef DS2PHYS static void mwl_rxdma_cleanup(struct mwl_softc *sc) { if (sc->sc_rxmem_paddr != 0) { bus_dmamap_unload(sc->sc_rxdmat, sc->sc_rxmap); sc->sc_rxmem_paddr = 0; } if (sc->sc_rxmem != NULL) { bus_dmamem_free(sc->sc_rxdmat, sc->sc_rxmem, sc->sc_rxmap); sc->sc_rxmem = NULL; } if (sc->sc_rxdma.dd_bufptr != NULL) { free(sc->sc_rxdma.dd_bufptr, M_MWLDEV); sc->sc_rxdma.dd_bufptr = NULL; } if (sc->sc_rxdma.dd_desc_len != 0) mwl_desc_cleanup(sc, &sc->sc_rxdma); } static int mwl_dma_setup(struct mwl_softc *sc) { int error, i; error = mwl_rxdma_setup(sc); if (error != 0) { mwl_rxdma_cleanup(sc); return error; } for (i = 0; i < MWL_NUM_TX_QUEUES; i++) { error = mwl_txdma_setup(sc, &sc->sc_txq[i]); if (error != 0) { mwl_dma_cleanup(sc); return error; } } return 0; } static void mwl_dma_cleanup(struct mwl_softc *sc) { int i; for (i = 0; i < MWL_NUM_TX_QUEUES; i++) mwl_txdma_cleanup(sc, &sc->sc_txq[i]); mwl_rxdma_cleanup(sc); } static struct ieee80211_node * mwl_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN]) { struct ieee80211com *ic = vap->iv_ic; struct mwl_softc *sc = ic->ic_softc; const size_t space = sizeof(struct mwl_node); struct mwl_node *mn; mn = malloc(space, M_80211_NODE, M_NOWAIT|M_ZERO); if (mn == NULL) { /* XXX stat+msg */ return NULL; } DPRINTF(sc, MWL_DEBUG_NODE, "%s: mn %p\n", __func__, mn); return &mn->mn_node; } static void mwl_node_cleanup(struct ieee80211_node *ni) { struct ieee80211com *ic = ni->ni_ic; struct mwl_softc *sc = ic->ic_softc; struct mwl_node *mn = MWL_NODE(ni); DPRINTF(sc, MWL_DEBUG_NODE, "%s: ni %p ic %p staid %d\n", __func__, ni, ni->ni_ic, mn->mn_staid); if (mn->mn_staid != 0) { struct ieee80211vap *vap = ni->ni_vap; if (mn->mn_hvap != NULL) { if (vap->iv_opmode == IEEE80211_M_STA) mwl_hal_delstation(mn->mn_hvap, vap->iv_myaddr); else mwl_hal_delstation(mn->mn_hvap, ni->ni_macaddr); } /* * NB: legacy WDS peer sta db entry is installed using * the associate ap's hvap; use it again to delete it. * XXX can vap be NULL? */ else if (vap->iv_opmode == IEEE80211_M_WDS && MWL_VAP(vap)->mv_ap_hvap != NULL) mwl_hal_delstation(MWL_VAP(vap)->mv_ap_hvap, ni->ni_macaddr); delstaid(sc, mn->mn_staid); mn->mn_staid = 0; } sc->sc_node_cleanup(ni); } /* * Reclaim rx dma buffers from packets sitting on the ampdu * reorder queue for a station. We replace buffers with a * system cluster (if available). */ static void mwl_ampdu_rxdma_reclaim(struct ieee80211_rx_ampdu *rap) { #if 0 int i, n, off; struct mbuf *m; void *cl; n = rap->rxa_qframes; for (i = 0; i < rap->rxa_wnd && n > 0; i++) { m = rap->rxa_m[i]; if (m == NULL) continue; n--; /* our dma buffers have a well-known free routine */ if ((m->m_flags & M_EXT) == 0 || m->m_ext.ext_free != mwl_ext_free) continue; /* * Try to allocate a cluster and move the data. */ off = m->m_data - m->m_ext.ext_buf; if (off + m->m_pkthdr.len > MCLBYTES) { /* XXX no AMSDU for now */ continue; } cl = pool_cache_get_paddr(&mclpool_cache, 0, &m->m_ext.ext_paddr); if (cl != NULL) { /* * Copy the existing data to the cluster, remove * the rx dma buffer, and attach the cluster in * its place. Note we preserve the offset to the * data so frames being bridged can still prepend * their headers without adding another mbuf. */ memcpy((caddr_t) cl + off, m->m_data, m->m_pkthdr.len); MEXTREMOVE(m); MEXTADD(m, cl, MCLBYTES, 0, NULL, &mclpool_cache); /* setup mbuf like _MCLGET does */ m->m_flags |= M_CLUSTER | M_EXT_RW; _MOWNERREF(m, M_EXT | M_CLUSTER); /* NB: m_data is clobbered by MEXTADDR, adjust */ m->m_data += off; } } #endif } /* * Callback to reclaim resources. We first let the * net80211 layer do it's thing, then if we are still * blocked by a lack of rx dma buffers we walk the ampdu * reorder q's to reclaim buffers by copying to a system * cluster. */ static void mwl_node_drain(struct ieee80211_node *ni) { struct ieee80211com *ic = ni->ni_ic; struct mwl_softc *sc = ic->ic_softc; struct mwl_node *mn = MWL_NODE(ni); DPRINTF(sc, MWL_DEBUG_NODE, "%s: ni %p vap %p staid %d\n", __func__, ni, ni->ni_vap, mn->mn_staid); /* NB: call up first to age out ampdu q's */ sc->sc_node_drain(ni); /* XXX better to not check low water mark? */ if (sc->sc_rxblocked && mn->mn_staid != 0 && (ni->ni_flags & IEEE80211_NODE_HT)) { uint8_t tid; /* * Walk the reorder q and reclaim rx dma buffers by copying * the packet contents into clusters. */ for (tid = 0; tid < WME_NUM_TID; tid++) { struct ieee80211_rx_ampdu *rap; rap = &ni->ni_rx_ampdu[tid]; if ((rap->rxa_flags & IEEE80211_AGGR_XCHGPEND) == 0) continue; if (rap->rxa_qframes) mwl_ampdu_rxdma_reclaim(rap); } } } static void mwl_node_getsignal(const struct ieee80211_node *ni, int8_t *rssi, int8_t *noise) { *rssi = ni->ni_ic->ic_node_getrssi(ni); #ifdef MWL_ANT_INFO_SUPPORT #if 0 /* XXX need to smooth data */ *noise = -MWL_NODE_CONST(ni)->mn_ai.nf; #else *noise = -95; /* XXX */ #endif #else *noise = -95; /* XXX */ #endif } /* * Convert Hardware per-antenna rssi info to common format: * Let a1, a2, a3 represent the amplitudes per chain * Let amax represent max[a1, a2, a3] * Rssi1_dBm = RSSI_dBm + 20*log10(a1/amax) * Rssi1_dBm = RSSI_dBm + 20*log10(a1) - 20*log10(amax) * We store a table that is 4*20*log10(idx) - the extra 4 is to store or * maintain some extra precision. * * Values are stored in .5 db format capped at 127. */ static void mwl_node_getmimoinfo(const struct ieee80211_node *ni, struct ieee80211_mimo_info *mi) { #define CVT(_dst, _src) do { \ (_dst) = rssi + ((logdbtbl[_src] - logdbtbl[rssi_max]) >> 2); \ (_dst) = (_dst) > 64 ? 127 : ((_dst) << 1); \ } while (0) static const int8_t logdbtbl[32] = { 0, 0, 24, 38, 48, 56, 62, 68, 72, 76, 80, 83, 86, 89, 92, 94, 96, 98, 100, 102, 104, 106, 107, 109, 110, 112, 113, 115, 116, 117, 118, 119 }; const struct mwl_node *mn = MWL_NODE_CONST(ni); uint8_t rssi = mn->mn_ai.rsvd1/2; /* XXX */ uint32_t rssi_max; rssi_max = mn->mn_ai.rssi_a; if (mn->mn_ai.rssi_b > rssi_max) rssi_max = mn->mn_ai.rssi_b; if (mn->mn_ai.rssi_c > rssi_max) rssi_max = mn->mn_ai.rssi_c; CVT(mi->ch[0].rssi[0], mn->mn_ai.rssi_a); CVT(mi->ch[1].rssi[0], mn->mn_ai.rssi_b); CVT(mi->ch[2].rssi[0], mn->mn_ai.rssi_c); mi->ch[0].noise[0] = mn->mn_ai.nf_a; mi->ch[1].noise[0] = mn->mn_ai.nf_b; mi->ch[2].noise[0] = mn->mn_ai.nf_c; #undef CVT } static __inline void * mwl_getrxdma(struct mwl_softc *sc) { struct mwl_jumbo *buf; void *data; /* * Allocate from jumbo pool. */ MWL_RXFREE_LOCK(sc); buf = SLIST_FIRST(&sc->sc_rxfree); if (buf == NULL) { DPRINTF(sc, MWL_DEBUG_ANY, "%s: out of rx dma buffers\n", __func__); sc->sc_stats.mst_rx_nodmabuf++; data = NULL; } else { SLIST_REMOVE_HEAD(&sc->sc_rxfree, next); sc->sc_nrxfree--; data = MWL_JUMBO_BUF2DATA(buf); } MWL_RXFREE_UNLOCK(sc); return data; } static __inline void mwl_putrxdma(struct mwl_softc *sc, void *data) { struct mwl_jumbo *buf; /* XXX bounds check data */ MWL_RXFREE_LOCK(sc); buf = MWL_JUMBO_DATA2BUF(data); SLIST_INSERT_HEAD(&sc->sc_rxfree, buf, next); sc->sc_nrxfree++; MWL_RXFREE_UNLOCK(sc); } static int mwl_rxbuf_init(struct mwl_softc *sc, struct mwl_rxbuf *bf) { struct mwl_rxdesc *ds; ds = bf->bf_desc; if (bf->bf_data == NULL) { bf->bf_data = mwl_getrxdma(sc); if (bf->bf_data == NULL) { /* mark descriptor to be skipped */ ds->RxControl = EAGLE_RXD_CTRL_OS_OWN; /* NB: don't need PREREAD */ MWL_RXDESC_SYNC(sc, ds, BUS_DMASYNC_PREWRITE); sc->sc_stats.mst_rxbuf_failed++; return ENOMEM; } } /* * NB: DMA buffer contents is known to be unmodified * so there's no need to flush the data cache. */ /* * Setup descriptor. */ ds->QosCtrl = 0; ds->RSSI = 0; ds->Status = EAGLE_RXD_STATUS_IDLE; ds->Channel = 0; ds->PktLen = htole16(MWL_AGGR_SIZE); ds->SQ2 = 0; ds->pPhysBuffData = htole32(MWL_JUMBO_DMA_ADDR(sc, bf->bf_data)); /* NB: don't touch pPhysNext, set once */ ds->RxControl = EAGLE_RXD_CTRL_DRIVER_OWN; MWL_RXDESC_SYNC(sc, ds, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return 0; } static void mwl_ext_free(struct mbuf *m) { struct mwl_softc *sc = m->m_ext.ext_arg1; /* XXX bounds check data */ mwl_putrxdma(sc, m->m_ext.ext_buf); /* * If we were previously blocked by a lack of rx dma buffers * check if we now have enough to restart rx interrupt handling. * NB: we know we are called at splvm which is above splnet. */ if (sc->sc_rxblocked && sc->sc_nrxfree > mwl_rxdmalow) { sc->sc_rxblocked = 0; mwl_hal_intrset(sc->sc_mh, sc->sc_imask); } } struct mwl_frame_bar { u_int8_t i_fc[2]; u_int8_t i_dur[2]; u_int8_t i_ra[IEEE80211_ADDR_LEN]; u_int8_t i_ta[IEEE80211_ADDR_LEN]; /* ctl, seq, FCS */ } __packed; /* * Like ieee80211_anyhdrsize, but handles BAR frames * specially so the logic below to piece the 802.11 * header together works. */ static __inline int mwl_anyhdrsize(const void *data) { const struct ieee80211_frame *wh = data; if ((wh->i_fc[0]&IEEE80211_FC0_TYPE_MASK) == IEEE80211_FC0_TYPE_CTL) { switch (wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK) { case IEEE80211_FC0_SUBTYPE_CTS: case IEEE80211_FC0_SUBTYPE_ACK: return sizeof(struct ieee80211_frame_ack); case IEEE80211_FC0_SUBTYPE_BAR: return sizeof(struct mwl_frame_bar); } return sizeof(struct ieee80211_frame_min); } else return ieee80211_hdrsize(data); } static void mwl_handlemicerror(struct ieee80211com *ic, const uint8_t *data) { const struct ieee80211_frame *wh; struct ieee80211_node *ni; wh = (const struct ieee80211_frame *)(data + sizeof(uint16_t)); ni = ieee80211_find_rxnode(ic, (const struct ieee80211_frame_min *) wh); if (ni != NULL) { ieee80211_notify_michael_failure(ni->ni_vap, wh, 0); ieee80211_free_node(ni); } } /* * Convert hardware signal strength to rssi. The value * provided by the device has the noise floor added in; * we need to compensate for this but we don't have that * so we use a fixed value. * * The offset of 8 is good for both 2.4 and 5GHz. The LNA * offset is already set as part of the initial gain. This * will give at least +/- 3dB for 2.4GHz and +/- 5dB for 5GHz. */ static __inline int cvtrssi(uint8_t ssi) { int rssi = (int) ssi + 8; /* XXX hack guess until we have a real noise floor */ rssi = 2*(87 - rssi); /* NB: .5 dBm units */ return (rssi < 0 ? 0 : rssi > 127 ? 127 : rssi); } static void mwl_rx_proc(void *arg, int npending) { struct mwl_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; struct mwl_rxbuf *bf; struct mwl_rxdesc *ds; struct mbuf *m; struct ieee80211_qosframe *wh; struct ieee80211_qosframe_addr4 *wh4; struct ieee80211_node *ni; struct mwl_node *mn; int off, len, hdrlen, pktlen, rssi, ntodo; uint8_t *data, status; void *newdata; int16_t nf; DPRINTF(sc, MWL_DEBUG_RX_PROC, "%s: pending %u rdptr 0x%x wrptr 0x%x\n", __func__, npending, RD4(sc, sc->sc_hwspecs.rxDescRead), RD4(sc, sc->sc_hwspecs.rxDescWrite)); nf = -96; /* XXX */ bf = sc->sc_rxnext; for (ntodo = mwl_rxquota; ntodo > 0; ntodo--) { if (bf == NULL) bf = STAILQ_FIRST(&sc->sc_rxbuf); ds = bf->bf_desc; data = bf->bf_data; if (data == NULL) { /* * If data allocation failed previously there * will be no buffer; try again to re-populate it. * Note the firmware will not advance to the next * descriptor with a dma buffer so we must mimic * this or we'll get out of sync. */ DPRINTF(sc, MWL_DEBUG_ANY, "%s: rx buf w/o dma memory\n", __func__); (void) mwl_rxbuf_init(sc, bf); sc->sc_stats.mst_rx_dmabufmissing++; break; } MWL_RXDESC_SYNC(sc, ds, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); if (ds->RxControl != EAGLE_RXD_CTRL_DMA_OWN) break; #ifdef MWL_DEBUG if (sc->sc_debug & MWL_DEBUG_RECV_DESC) mwl_printrxbuf(bf, 0); #endif status = ds->Status; if (status & EAGLE_RXD_STATUS_DECRYPT_ERR_MASK) { counter_u64_add(ic->ic_ierrors, 1); sc->sc_stats.mst_rx_crypto++; /* * NB: Check EAGLE_RXD_STATUS_GENERAL_DECRYPT_ERR * for backwards compatibility. */ if (status != EAGLE_RXD_STATUS_GENERAL_DECRYPT_ERR && (status & EAGLE_RXD_STATUS_TKIP_MIC_DECRYPT_ERR)) { /* * MIC error, notify upper layers. */ bus_dmamap_sync(sc->sc_rxdmat, sc->sc_rxmap, BUS_DMASYNC_POSTREAD); mwl_handlemicerror(ic, data); sc->sc_stats.mst_rx_tkipmic++; } /* XXX too painful to tap packets */ goto rx_next; } /* * Sync the data buffer. */ len = le16toh(ds->PktLen); bus_dmamap_sync(sc->sc_rxdmat, sc->sc_rxmap, BUS_DMASYNC_POSTREAD); /* * The 802.11 header is provided all or in part at the front; * use it to calculate the true size of the header that we'll * construct below. We use this to figure out where to copy * payload prior to constructing the header. */ hdrlen = mwl_anyhdrsize(data + sizeof(uint16_t)); off = sizeof(uint16_t) + sizeof(struct ieee80211_frame_addr4); /* calculate rssi early so we can re-use for each aggregate */ rssi = cvtrssi(ds->RSSI); pktlen = hdrlen + (len - off); /* * NB: we know our frame is at least as large as * IEEE80211_MIN_LEN because there is a 4-address * frame at the front. Hence there's no need to * vet the packet length. If the frame in fact * is too small it should be discarded at the * net80211 layer. */ /* * Attach dma buffer to an mbuf. We tried * doing this based on the packet size (i.e. * copying small packets) but it turns out to * be a net loss. The tradeoff might be system * dependent (cache architecture is important). */ MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) { DPRINTF(sc, MWL_DEBUG_ANY, "%s: no rx mbuf\n", __func__); sc->sc_stats.mst_rx_nombuf++; goto rx_next; } /* * Acquire the replacement dma buffer before * processing the frame. If we're out of dma * buffers we disable rx interrupts and wait * for the free pool to reach mlw_rxdmalow buffers * before starting to do work again. If the firmware * runs out of descriptors then it will toss frames * which is better than our doing it as that can * starve our processing. It is also important that * we always process rx'd frames in case they are * A-MPDU as otherwise the host's view of the BA * window may get out of sync with the firmware. */ newdata = mwl_getrxdma(sc); if (newdata == NULL) { /* NB: stat+msg in mwl_getrxdma */ m_free(m); /* disable RX interrupt and mark state */ mwl_hal_intrset(sc->sc_mh, sc->sc_imask &~ MACREG_A2HRIC_BIT_RX_RDY); sc->sc_rxblocked = 1; ieee80211_drain(ic); /* XXX check rxblocked and immediately start again? */ goto rx_stop; } bf->bf_data = newdata; /* * Attach the dma buffer to the mbuf; * mwl_rxbuf_init will re-setup the rx * descriptor using the replacement dma * buffer we just installed above. */ m_extadd(m, data, MWL_AGGR_SIZE, mwl_ext_free, sc, NULL, 0, EXT_NET_DRV); m->m_data += off - hdrlen; m->m_pkthdr.len = m->m_len = pktlen; /* NB: dma buffer assumed read-only */ /* * Piece 802.11 header together. */ wh = mtod(m, struct ieee80211_qosframe *); /* NB: don't need to do this sometimes but ... */ /* XXX special case so we can memcpy after m_devget? */ ovbcopy(data + sizeof(uint16_t), wh, hdrlen); if (IEEE80211_QOS_HAS_SEQ(wh)) { if (IEEE80211_IS_DSTODS(wh)) { wh4 = mtod(m, struct ieee80211_qosframe_addr4*); *(uint16_t *)wh4->i_qos = ds->QosCtrl; } else { *(uint16_t *)wh->i_qos = ds->QosCtrl; } } /* * The f/w strips WEP header but doesn't clear * the WEP bit; mark the packet with M_WEP so * net80211 will treat the data as decrypted. * While here also clear the PWR_MGT bit since * power save is handled by the firmware and * passing this up will potentially cause the * upper layer to put a station in power save * (except when configured with MWL_HOST_PS_SUPPORT). */ if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) m->m_flags |= M_WEP; #ifdef MWL_HOST_PS_SUPPORT wh->i_fc[1] &= ~IEEE80211_FC1_PROTECTED; #else wh->i_fc[1] &= ~(IEEE80211_FC1_PROTECTED | IEEE80211_FC1_PWR_MGT); #endif if (ieee80211_radiotap_active(ic)) { struct mwl_rx_radiotap_header *tap = &sc->sc_rx_th; tap->wr_flags = 0; tap->wr_rate = ds->Rate; tap->wr_antsignal = rssi + nf; tap->wr_antnoise = nf; } if (IFF_DUMPPKTS_RECV(sc, wh)) { ieee80211_dump_pkt(ic, mtod(m, caddr_t), len, ds->Rate, rssi); } /* dispatch */ ni = ieee80211_find_rxnode(ic, (const struct ieee80211_frame_min *) wh); if (ni != NULL) { mn = MWL_NODE(ni); #ifdef MWL_ANT_INFO_SUPPORT mn->mn_ai.rssi_a = ds->ai.rssi_a; mn->mn_ai.rssi_b = ds->ai.rssi_b; mn->mn_ai.rssi_c = ds->ai.rssi_c; mn->mn_ai.rsvd1 = rssi; #endif /* tag AMPDU aggregates for reorder processing */ if (ni->ni_flags & IEEE80211_NODE_HT) m->m_flags |= M_AMPDU; (void) ieee80211_input(ni, m, rssi, nf); ieee80211_free_node(ni); } else (void) ieee80211_input_all(ic, m, rssi, nf); rx_next: /* NB: ignore ENOMEM so we process more descriptors */ (void) mwl_rxbuf_init(sc, bf); bf = STAILQ_NEXT(bf, bf_list); } rx_stop: sc->sc_rxnext = bf; if (mbufq_first(&sc->sc_snd) != NULL) { /* NB: kick fw; the tx thread may have been preempted */ mwl_hal_txstart(sc->sc_mh, 0); mwl_start(sc); } } static void mwl_txq_init(struct mwl_softc *sc, struct mwl_txq *txq, int qnum) { struct mwl_txbuf *bf, *bn; struct mwl_txdesc *ds; MWL_TXQ_LOCK_INIT(sc, txq); txq->qnum = qnum; txq->txpri = 0; /* XXX */ #if 0 /* NB: q setup by mwl_txdma_setup XXX */ STAILQ_INIT(&txq->free); #endif STAILQ_FOREACH(bf, &txq->free, bf_list) { bf->bf_txq = txq; ds = bf->bf_desc; bn = STAILQ_NEXT(bf, bf_list); if (bn == NULL) bn = STAILQ_FIRST(&txq->free); ds->pPhysNext = htole32(bn->bf_daddr); } STAILQ_INIT(&txq->active); } /* * Setup a hardware data transmit queue for the specified * access control. We record the mapping from ac's * to h/w queues for use by mwl_tx_start. */ static int mwl_tx_setup(struct mwl_softc *sc, int ac, int mvtype) { struct mwl_txq *txq; if (ac >= nitems(sc->sc_ac2q)) { device_printf(sc->sc_dev, "AC %u out of range, max %zu!\n", ac, nitems(sc->sc_ac2q)); return 0; } if (mvtype >= MWL_NUM_TX_QUEUES) { device_printf(sc->sc_dev, "mvtype %u out of range, max %u!\n", mvtype, MWL_NUM_TX_QUEUES); return 0; } txq = &sc->sc_txq[mvtype]; mwl_txq_init(sc, txq, mvtype); sc->sc_ac2q[ac] = txq; return 1; } /* * Update WME parameters for a transmit queue. */ static int mwl_txq_update(struct mwl_softc *sc, int ac) { #define MWL_EXPONENT_TO_VALUE(v) ((1<sc_ic; struct chanAccParams chp; struct mwl_txq *txq = sc->sc_ac2q[ac]; struct wmeParams *wmep; struct mwl_hal *mh = sc->sc_mh; int aifs, cwmin, cwmax, txoplim; ieee80211_wme_ic_getparams(ic, &chp); wmep = &chp.cap_wmeParams[ac]; aifs = wmep->wmep_aifsn; /* XXX in sta mode need to pass log values for cwmin/max */ cwmin = MWL_EXPONENT_TO_VALUE(wmep->wmep_logcwmin); cwmax = MWL_EXPONENT_TO_VALUE(wmep->wmep_logcwmax); txoplim = wmep->wmep_txopLimit; /* NB: units of 32us */ if (mwl_hal_setedcaparams(mh, txq->qnum, cwmin, cwmax, aifs, txoplim)) { device_printf(sc->sc_dev, "unable to update hardware queue " "parameters for %s traffic!\n", ieee80211_wme_acnames[ac]); return 0; } return 1; #undef MWL_EXPONENT_TO_VALUE } /* * Callback from the 802.11 layer to update WME parameters. */ static int mwl_wme_update(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; return !mwl_txq_update(sc, WME_AC_BE) || !mwl_txq_update(sc, WME_AC_BK) || !mwl_txq_update(sc, WME_AC_VI) || !mwl_txq_update(sc, WME_AC_VO) ? EIO : 0; } /* * Reclaim resources for a setup queue. */ static void mwl_tx_cleanupq(struct mwl_softc *sc, struct mwl_txq *txq) { /* XXX hal work? */ MWL_TXQ_LOCK_DESTROY(txq); } /* * Reclaim all tx queue resources. */ static void mwl_tx_cleanup(struct mwl_softc *sc) { int i; for (i = 0; i < MWL_NUM_TX_QUEUES; i++) mwl_tx_cleanupq(sc, &sc->sc_txq[i]); } static int mwl_tx_dmasetup(struct mwl_softc *sc, struct mwl_txbuf *bf, struct mbuf *m0) { struct mbuf *m; int error; /* * Load the DMA map so any coalescing is done. This * also calculates the number of descriptors we need. */ error = bus_dmamap_load_mbuf_sg(sc->sc_dmat, bf->bf_dmamap, m0, bf->bf_segs, &bf->bf_nseg, BUS_DMA_NOWAIT); if (error == EFBIG) { /* XXX packet requires too many descriptors */ bf->bf_nseg = MWL_TXDESC+1; } else if (error != 0) { sc->sc_stats.mst_tx_busdma++; m_freem(m0); return error; } /* * Discard null packets and check for packets that * require too many TX descriptors. We try to convert * the latter to a cluster. */ if (error == EFBIG) { /* too many desc's, linearize */ sc->sc_stats.mst_tx_linear++; #if MWL_TXDESC > 1 m = m_collapse(m0, M_NOWAIT, MWL_TXDESC); #else m = m_defrag(m0, M_NOWAIT); #endif if (m == NULL) { m_freem(m0); sc->sc_stats.mst_tx_nombuf++; return ENOMEM; } m0 = m; error = bus_dmamap_load_mbuf_sg(sc->sc_dmat, bf->bf_dmamap, m0, bf->bf_segs, &bf->bf_nseg, BUS_DMA_NOWAIT); if (error != 0) { sc->sc_stats.mst_tx_busdma++; m_freem(m0); return error; } KASSERT(bf->bf_nseg <= MWL_TXDESC, ("too many segments after defrag; nseg %u", bf->bf_nseg)); } else if (bf->bf_nseg == 0) { /* null packet, discard */ sc->sc_stats.mst_tx_nodata++; m_freem(m0); return EIO; } DPRINTF(sc, MWL_DEBUG_XMIT, "%s: m %p len %u\n", __func__, m0, m0->m_pkthdr.len); bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_PREWRITE); bf->bf_m = m0; return 0; } static __inline int mwl_cvtlegacyrate(int rate) { switch (rate) { case 2: return 0; case 4: return 1; case 11: return 2; case 22: return 3; case 44: return 4; case 12: return 5; case 18: return 6; case 24: return 7; case 36: return 8; case 48: return 9; case 72: return 10; case 96: return 11; case 108:return 12; } return 0; } /* * Calculate fixed tx rate information per client state; * this value is suitable for writing to the Format field * of a tx descriptor. */ static uint16_t mwl_calcformat(uint8_t rate, const struct ieee80211_node *ni) { uint16_t fmt; fmt = SM(3, EAGLE_TXD_ANTENNA) | (IEEE80211_IS_CHAN_HT40D(ni->ni_chan) ? EAGLE_TXD_EXTCHAN_LO : EAGLE_TXD_EXTCHAN_HI); if (rate & IEEE80211_RATE_MCS) { /* HT MCS */ fmt |= EAGLE_TXD_FORMAT_HT /* NB: 0x80 implicitly stripped from ucastrate */ | SM(rate, EAGLE_TXD_RATE); /* XXX short/long GI may be wrong; re-check */ if (IEEE80211_IS_CHAN_HT40(ni->ni_chan)) { fmt |= EAGLE_TXD_CHW_40 | (ni->ni_htcap & IEEE80211_HTCAP_SHORTGI40 ? EAGLE_TXD_GI_SHORT : EAGLE_TXD_GI_LONG); } else { fmt |= EAGLE_TXD_CHW_20 | (ni->ni_htcap & IEEE80211_HTCAP_SHORTGI20 ? EAGLE_TXD_GI_SHORT : EAGLE_TXD_GI_LONG); } } else { /* legacy rate */ fmt |= EAGLE_TXD_FORMAT_LEGACY | SM(mwl_cvtlegacyrate(rate), EAGLE_TXD_RATE) | EAGLE_TXD_CHW_20 /* XXX iv_flags & IEEE80211_F_SHPREAMBLE? */ | (ni->ni_capinfo & IEEE80211_CAPINFO_SHORT_PREAMBLE ? EAGLE_TXD_PREAMBLE_SHORT : EAGLE_TXD_PREAMBLE_LONG); } return fmt; } static int mwl_tx_start(struct mwl_softc *sc, struct ieee80211_node *ni, struct mwl_txbuf *bf, struct mbuf *m0) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = ni->ni_vap; int error, iswep, ismcast; int hdrlen, copyhdrlen, pktlen; struct mwl_txdesc *ds; struct mwl_txq *txq; struct ieee80211_frame *wh; struct mwltxrec *tr; struct mwl_node *mn; uint16_t qos; #if MWL_TXDESC > 1 int i; #endif wh = mtod(m0, struct ieee80211_frame *); iswep = wh->i_fc[1] & IEEE80211_FC1_PROTECTED; ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1); hdrlen = ieee80211_anyhdrsize(wh); copyhdrlen = hdrlen; pktlen = m0->m_pkthdr.len; if (IEEE80211_QOS_HAS_SEQ(wh)) { if (IEEE80211_IS_DSTODS(wh)) { qos = *(uint16_t *) (((struct ieee80211_qosframe_addr4 *) wh)->i_qos); copyhdrlen -= sizeof(qos); } else qos = *(uint16_t *) (((struct ieee80211_qosframe *) wh)->i_qos); } else qos = 0; if (iswep) { const struct ieee80211_cipher *cip; struct ieee80211_key *k; /* * Construct the 802.11 header+trailer for an encrypted * frame. The only reason this can fail is because of an * unknown or unsupported cipher/key type. * * NB: we do this even though the firmware will ignore * what we've done for WEP and TKIP as we need the * ExtIV filled in for CCMP and this also adjusts * the headers which simplifies our work below. */ k = ieee80211_crypto_encap(ni, m0); if (k == NULL) { /* * This can happen when the key is yanked after the * frame was queued. Just discard the frame; the * 802.11 layer counts failures and provides * debugging/diagnostics. */ m_freem(m0); return EIO; } /* * Adjust the packet length for the crypto additions * done during encap and any other bits that the f/w * will add later on. */ cip = k->wk_cipher; pktlen += cip->ic_header + cip->ic_miclen + cip->ic_trailer; /* packet header may have moved, reset our local pointer */ wh = mtod(m0, struct ieee80211_frame *); } if (ieee80211_radiotap_active_vap(vap)) { sc->sc_tx_th.wt_flags = 0; /* XXX */ if (iswep) sc->sc_tx_th.wt_flags |= IEEE80211_RADIOTAP_F_WEP; #if 0 sc->sc_tx_th.wt_rate = ds->DataRate; #endif sc->sc_tx_th.wt_txpower = ni->ni_txpower; sc->sc_tx_th.wt_antenna = sc->sc_txantenna; ieee80211_radiotap_tx(vap, m0); } /* * Copy up/down the 802.11 header; the firmware requires * we present a 2-byte payload length followed by a * 4-address header (w/o QoS), followed (optionally) by * any WEP/ExtIV header (but only filled in for CCMP). * We are assured the mbuf has sufficient headroom to * prepend in-place by the setup of ic_headroom in * mwl_attach. */ if (hdrlen < sizeof(struct mwltxrec)) { const int space = sizeof(struct mwltxrec) - hdrlen; if (M_LEADINGSPACE(m0) < space) { /* NB: should never happen */ device_printf(sc->sc_dev, "not enough headroom, need %d found %zd, " "m_flags 0x%x m_len %d\n", space, M_LEADINGSPACE(m0), m0->m_flags, m0->m_len); ieee80211_dump_pkt(ic, mtod(m0, const uint8_t *), m0->m_len, 0, -1); m_freem(m0); sc->sc_stats.mst_tx_noheadroom++; return EIO; } M_PREPEND(m0, space, M_NOWAIT); } tr = mtod(m0, struct mwltxrec *); if (wh != (struct ieee80211_frame *) &tr->wh) ovbcopy(wh, &tr->wh, hdrlen); /* * Note: the "firmware length" is actually the length * of the fully formed "802.11 payload". That is, it's * everything except for the 802.11 header. In particular * this includes all crypto material including the MIC! */ tr->fwlen = htole16(pktlen - hdrlen); /* * Load the DMA map so any coalescing is done. This * also calculates the number of descriptors we need. */ error = mwl_tx_dmasetup(sc, bf, m0); if (error != 0) { /* NB: stat collected in mwl_tx_dmasetup */ DPRINTF(sc, MWL_DEBUG_XMIT, "%s: unable to setup dma\n", __func__); return error; } bf->bf_node = ni; /* NB: held reference */ m0 = bf->bf_m; /* NB: may have changed */ tr = mtod(m0, struct mwltxrec *); wh = (struct ieee80211_frame *)&tr->wh; /* * Formulate tx descriptor. */ ds = bf->bf_desc; txq = bf->bf_txq; ds->QosCtrl = qos; /* NB: already little-endian */ #if MWL_TXDESC == 1 /* * NB: multiframes should be zero because the descriptors * are initialized to zero. This should handle the case * where the driver is built with MWL_TXDESC=1 but we are * using firmware with multi-segment support. */ ds->PktPtr = htole32(bf->bf_segs[0].ds_addr); ds->PktLen = htole16(bf->bf_segs[0].ds_len); #else ds->multiframes = htole32(bf->bf_nseg); ds->PktLen = htole16(m0->m_pkthdr.len); for (i = 0; i < bf->bf_nseg; i++) { ds->PktPtrArray[i] = htole32(bf->bf_segs[i].ds_addr); ds->PktLenArray[i] = htole16(bf->bf_segs[i].ds_len); } #endif /* NB: pPhysNext, DataRate, and SapPktInfo setup once, don't touch */ ds->Format = 0; ds->pad = 0; ds->ack_wcb_addr = 0; mn = MWL_NODE(ni); /* * Select transmit rate. */ switch (wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) { case IEEE80211_FC0_TYPE_MGT: sc->sc_stats.mst_tx_mgmt++; /* fall thru... */ case IEEE80211_FC0_TYPE_CTL: /* NB: assign to BE q to avoid bursting */ ds->TxPriority = MWL_WME_AC_BE; break; case IEEE80211_FC0_TYPE_DATA: if (!ismcast) { const struct ieee80211_txparam *tp = ni->ni_txparms; /* * EAPOL frames get forced to a fixed rate and w/o * aggregation; otherwise check for any fixed rate * for the client (may depend on association state). */ if (m0->m_flags & M_EAPOL) { const struct mwl_vap *mvp = MWL_VAP_CONST(vap); ds->Format = mvp->mv_eapolformat; ds->pad = htole16( EAGLE_TXD_FIXED_RATE | EAGLE_TXD_DONT_AGGR); } else if (tp->ucastrate != IEEE80211_FIXED_RATE_NONE) { /* XXX pre-calculate per node */ ds->Format = htole16( mwl_calcformat(tp->ucastrate, ni)); ds->pad = htole16(EAGLE_TXD_FIXED_RATE); } /* NB: EAPOL frames will never have qos set */ if (qos == 0) ds->TxPriority = txq->qnum; #if MWL_MAXBA > 3 else if (mwl_bastream_match(&mn->mn_ba[3], qos)) ds->TxPriority = mn->mn_ba[3].txq; #endif #if MWL_MAXBA > 2 else if (mwl_bastream_match(&mn->mn_ba[2], qos)) ds->TxPriority = mn->mn_ba[2].txq; #endif #if MWL_MAXBA > 1 else if (mwl_bastream_match(&mn->mn_ba[1], qos)) ds->TxPriority = mn->mn_ba[1].txq; #endif #if MWL_MAXBA > 0 else if (mwl_bastream_match(&mn->mn_ba[0], qos)) ds->TxPriority = mn->mn_ba[0].txq; #endif else ds->TxPriority = txq->qnum; } else ds->TxPriority = txq->qnum; break; default: device_printf(sc->sc_dev, "bogus frame type 0x%x (%s)\n", wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK, __func__); sc->sc_stats.mst_tx_badframetype++; m_freem(m0); return EIO; } if (IFF_DUMPPKTS_XMIT(sc)) ieee80211_dump_pkt(ic, mtod(m0, const uint8_t *)+sizeof(uint16_t), m0->m_len - sizeof(uint16_t), ds->DataRate, -1); MWL_TXQ_LOCK(txq); ds->Status = htole32(EAGLE_TXD_STATUS_FW_OWNED); STAILQ_INSERT_TAIL(&txq->active, bf, bf_list); MWL_TXDESC_SYNC(txq, ds, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); sc->sc_tx_timer = 5; MWL_TXQ_UNLOCK(txq); return 0; } static __inline int mwl_cvtlegacyrix(int rix) { static const int ieeerates[] = { 2, 4, 11, 22, 44, 12, 18, 24, 36, 48, 72, 96, 108 }; return (rix < nitems(ieeerates) ? ieeerates[rix] : 0); } /* * Process completed xmit descriptors from the specified queue. */ static int mwl_tx_processq(struct mwl_softc *sc, struct mwl_txq *txq) { #define EAGLE_TXD_STATUS_MCAST \ (EAGLE_TXD_STATUS_MULTICAST_TX | EAGLE_TXD_STATUS_BROADCAST_TX) struct ieee80211com *ic = &sc->sc_ic; struct mwl_txbuf *bf; struct mwl_txdesc *ds; struct ieee80211_node *ni; struct mwl_node *an; int nreaped; uint32_t status; DPRINTF(sc, MWL_DEBUG_TX_PROC, "%s: tx queue %u\n", __func__, txq->qnum); for (nreaped = 0;; nreaped++) { MWL_TXQ_LOCK(txq); bf = STAILQ_FIRST(&txq->active); if (bf == NULL) { MWL_TXQ_UNLOCK(txq); break; } ds = bf->bf_desc; MWL_TXDESC_SYNC(txq, ds, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); if (ds->Status & htole32(EAGLE_TXD_STATUS_FW_OWNED)) { MWL_TXQ_UNLOCK(txq); break; } STAILQ_REMOVE_HEAD(&txq->active, bf_list); MWL_TXQ_UNLOCK(txq); #ifdef MWL_DEBUG if (sc->sc_debug & MWL_DEBUG_XMIT_DESC) mwl_printtxbuf(bf, txq->qnum, nreaped); #endif ni = bf->bf_node; if (ni != NULL) { an = MWL_NODE(ni); status = le32toh(ds->Status); if (status & EAGLE_TXD_STATUS_OK) { uint16_t Format = le16toh(ds->Format); uint8_t txant = MS(Format, EAGLE_TXD_ANTENNA); sc->sc_stats.mst_ant_tx[txant]++; if (status & EAGLE_TXD_STATUS_OK_RETRY) sc->sc_stats.mst_tx_retries++; if (status & EAGLE_TXD_STATUS_OK_MORE_RETRY) sc->sc_stats.mst_tx_mretries++; if (txq->qnum >= MWL_WME_AC_VO) ic->ic_wme.wme_hipri_traffic++; ni->ni_txrate = MS(Format, EAGLE_TXD_RATE); if ((Format & EAGLE_TXD_FORMAT_HT) == 0) { ni->ni_txrate = mwl_cvtlegacyrix( ni->ni_txrate); } else ni->ni_txrate |= IEEE80211_RATE_MCS; sc->sc_stats.mst_tx_rate = ni->ni_txrate; } else { if (status & EAGLE_TXD_STATUS_FAILED_LINK_ERROR) sc->sc_stats.mst_tx_linkerror++; if (status & EAGLE_TXD_STATUS_FAILED_XRETRY) sc->sc_stats.mst_tx_xretries++; if (status & EAGLE_TXD_STATUS_FAILED_AGING) sc->sc_stats.mst_tx_aging++; if (bf->bf_m->m_flags & M_FF) sc->sc_stats.mst_ff_txerr++; } if (bf->bf_m->m_flags & M_TXCB) /* XXX strip fw len in case header inspected */ m_adj(bf->bf_m, sizeof(uint16_t)); ieee80211_tx_complete(ni, bf->bf_m, (status & EAGLE_TXD_STATUS_OK) == 0); } else m_freem(bf->bf_m); ds->Status = htole32(EAGLE_TXD_STATUS_IDLE); bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap); mwl_puttxbuf_tail(txq, bf); } return nreaped; #undef EAGLE_TXD_STATUS_MCAST } /* * Deferred processing of transmit interrupt; special-cased * for four hardware queues, 0-3. */ static void mwl_tx_proc(void *arg, int npending) { struct mwl_softc *sc = arg; int nreaped; /* * Process each active queue. */ nreaped = 0; if (!STAILQ_EMPTY(&sc->sc_txq[0].active)) nreaped += mwl_tx_processq(sc, &sc->sc_txq[0]); if (!STAILQ_EMPTY(&sc->sc_txq[1].active)) nreaped += mwl_tx_processq(sc, &sc->sc_txq[1]); if (!STAILQ_EMPTY(&sc->sc_txq[2].active)) nreaped += mwl_tx_processq(sc, &sc->sc_txq[2]); if (!STAILQ_EMPTY(&sc->sc_txq[3].active)) nreaped += mwl_tx_processq(sc, &sc->sc_txq[3]); if (nreaped != 0) { sc->sc_tx_timer = 0; if (mbufq_first(&sc->sc_snd) != NULL) { /* NB: kick fw; the tx thread may have been preempted */ mwl_hal_txstart(sc->sc_mh, 0); mwl_start(sc); } } } static void mwl_tx_draintxq(struct mwl_softc *sc, struct mwl_txq *txq) { struct ieee80211_node *ni; struct mwl_txbuf *bf; u_int ix; /* * NB: this assumes output has been stopped and * we do not need to block mwl_tx_tasklet */ for (ix = 0;; ix++) { MWL_TXQ_LOCK(txq); bf = STAILQ_FIRST(&txq->active); if (bf == NULL) { MWL_TXQ_UNLOCK(txq); break; } STAILQ_REMOVE_HEAD(&txq->active, bf_list); MWL_TXQ_UNLOCK(txq); #ifdef MWL_DEBUG if (sc->sc_debug & MWL_DEBUG_RESET) { struct ieee80211com *ic = &sc->sc_ic; const struct mwltxrec *tr = mtod(bf->bf_m, const struct mwltxrec *); mwl_printtxbuf(bf, txq->qnum, ix); ieee80211_dump_pkt(ic, (const uint8_t *)&tr->wh, bf->bf_m->m_len - sizeof(tr->fwlen), 0, -1); } #endif /* MWL_DEBUG */ bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap); ni = bf->bf_node; if (ni != NULL) { /* * Reclaim node reference. */ ieee80211_free_node(ni); } m_freem(bf->bf_m); mwl_puttxbuf_tail(txq, bf); } } /* * Drain the transmit queues and reclaim resources. */ static void mwl_draintxq(struct mwl_softc *sc) { int i; for (i = 0; i < MWL_NUM_TX_QUEUES; i++) mwl_tx_draintxq(sc, &sc->sc_txq[i]); sc->sc_tx_timer = 0; } #ifdef MWL_DIAGAPI /* * Reset the transmit queues to a pristine state after a fw download. */ static void mwl_resettxq(struct mwl_softc *sc) { int i; for (i = 0; i < MWL_NUM_TX_QUEUES; i++) mwl_txq_reset(sc, &sc->sc_txq[i]); } #endif /* MWL_DIAGAPI */ /* * Clear the transmit queues of any frames submitted for the * specified vap. This is done when the vap is deleted so we * don't potentially reference the vap after it is gone. * Note we cannot remove the frames; we only reclaim the node * reference. */ static void mwl_cleartxq(struct mwl_softc *sc, struct ieee80211vap *vap) { struct mwl_txq *txq; struct mwl_txbuf *bf; int i; for (i = 0; i < MWL_NUM_TX_QUEUES; i++) { txq = &sc->sc_txq[i]; MWL_TXQ_LOCK(txq); STAILQ_FOREACH(bf, &txq->active, bf_list) { struct ieee80211_node *ni = bf->bf_node; if (ni != NULL && ni->ni_vap == vap) { bf->bf_node = NULL; ieee80211_free_node(ni); } } MWL_TXQ_UNLOCK(txq); } } static int mwl_recv_action(struct ieee80211_node *ni, const struct ieee80211_frame *wh, const uint8_t *frm, const uint8_t *efrm) { struct mwl_softc *sc = ni->ni_ic->ic_softc; const struct ieee80211_action *ia; ia = (const struct ieee80211_action *) frm; if (ia->ia_category == IEEE80211_ACTION_CAT_HT && ia->ia_action == IEEE80211_ACTION_HT_MIMOPWRSAVE) { const struct ieee80211_action_ht_mimopowersave *mps = (const struct ieee80211_action_ht_mimopowersave *) ia; mwl_hal_setmimops(sc->sc_mh, ni->ni_macaddr, mps->am_control & IEEE80211_A_HT_MIMOPWRSAVE_ENA, MS(mps->am_control, IEEE80211_A_HT_MIMOPWRSAVE_MODE)); return 0; } else return sc->sc_recv_action(ni, wh, frm, efrm); } static int mwl_addba_request(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, int dialogtoken, int baparamset, int batimeout) { struct mwl_softc *sc = ni->ni_ic->ic_softc; struct ieee80211vap *vap = ni->ni_vap; struct mwl_node *mn = MWL_NODE(ni); struct mwl_bastate *bas; bas = tap->txa_private; if (bas == NULL) { const MWL_HAL_BASTREAM *sp; /* * Check for a free BA stream slot. */ #if MWL_MAXBA > 3 if (mn->mn_ba[3].bastream == NULL) bas = &mn->mn_ba[3]; else #endif #if MWL_MAXBA > 2 if (mn->mn_ba[2].bastream == NULL) bas = &mn->mn_ba[2]; else #endif #if MWL_MAXBA > 1 if (mn->mn_ba[1].bastream == NULL) bas = &mn->mn_ba[1]; else #endif #if MWL_MAXBA > 0 if (mn->mn_ba[0].bastream == NULL) bas = &mn->mn_ba[0]; else #endif { /* sta already has max BA streams */ /* XXX assign BA stream to highest priority tid */ DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: already has max bastreams\n", __func__); sc->sc_stats.mst_ampdu_reject++; return 0; } /* NB: no held reference to ni */ sp = mwl_hal_bastream_alloc(MWL_VAP(vap)->mv_hvap, (baparamset & IEEE80211_BAPS_POLICY_IMMEDIATE) != 0, ni->ni_macaddr, tap->txa_tid, ni->ni_htparam, ni, tap); if (sp == NULL) { /* * No available stream, return 0 so no * a-mpdu aggregation will be done. */ DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: no bastream available\n", __func__); sc->sc_stats.mst_ampdu_nostream++; return 0; } DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: alloc bastream %p\n", __func__, sp); /* NB: qos is left zero so we won't match in mwl_tx_start */ bas->bastream = sp; tap->txa_private = bas; } /* fetch current seq# from the firmware; if available */ if (mwl_hal_bastream_get_seqno(sc->sc_mh, bas->bastream, vap->iv_opmode == IEEE80211_M_STA ? vap->iv_myaddr : ni->ni_macaddr, &tap->txa_start) != 0) tap->txa_start = 0; return sc->sc_addba_request(ni, tap, dialogtoken, baparamset, batimeout); } static int mwl_addba_response(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, int code, int baparamset, int batimeout) { struct mwl_softc *sc = ni->ni_ic->ic_softc; struct mwl_bastate *bas; bas = tap->txa_private; if (bas == NULL) { /* XXX should not happen */ DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: no BA stream allocated, TID %d\n", __func__, tap->txa_tid); sc->sc_stats.mst_addba_nostream++; return 0; } if (code == IEEE80211_STATUS_SUCCESS) { struct ieee80211vap *vap = ni->ni_vap; int bufsiz, error; /* * Tell the firmware to setup the BA stream; * we know resources are available because we * pre-allocated one before forming the request. */ bufsiz = MS(baparamset, IEEE80211_BAPS_BUFSIZ); if (bufsiz == 0) bufsiz = IEEE80211_AGGR_BAWMAX; error = mwl_hal_bastream_create(MWL_VAP(vap)->mv_hvap, bas->bastream, bufsiz, bufsiz, tap->txa_start); if (error != 0) { /* * Setup failed, return immediately so no a-mpdu * aggregation will be done. */ mwl_hal_bastream_destroy(sc->sc_mh, bas->bastream); mwl_bastream_free(bas); tap->txa_private = NULL; DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: create failed, error %d, bufsiz %d TID %d " "htparam 0x%x\n", __func__, error, bufsiz, tap->txa_tid, ni->ni_htparam); sc->sc_stats.mst_bacreate_failed++; return 0; } /* NB: cache txq to avoid ptr indirect */ mwl_bastream_setup(bas, tap->txa_tid, bas->bastream->txq); DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: bastream %p assigned to txq %d TID %d bufsiz %d " "htparam 0x%x\n", __func__, bas->bastream, bas->txq, tap->txa_tid, bufsiz, ni->ni_htparam); } else { /* * Other side NAK'd us; return the resources. */ DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: request failed with code %d, destroy bastream %p\n", __func__, code, bas->bastream); mwl_hal_bastream_destroy(sc->sc_mh, bas->bastream); mwl_bastream_free(bas); tap->txa_private = NULL; } /* NB: firmware sends BAR so we don't need to */ return sc->sc_addba_response(ni, tap, code, baparamset, batimeout); } static void mwl_addba_stop(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap) { struct mwl_softc *sc = ni->ni_ic->ic_softc; struct mwl_bastate *bas; bas = tap->txa_private; if (bas != NULL) { DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: destroy bastream %p\n", __func__, bas->bastream); mwl_hal_bastream_destroy(sc->sc_mh, bas->bastream); mwl_bastream_free(bas); tap->txa_private = NULL; } sc->sc_addba_stop(ni, tap); } /* * Setup the rx data structures. This should only be * done once or we may get out of sync with the firmware. */ static int mwl_startrecv(struct mwl_softc *sc) { if (!sc->sc_recvsetup) { struct mwl_rxbuf *bf, *prev; struct mwl_rxdesc *ds; prev = NULL; STAILQ_FOREACH(bf, &sc->sc_rxbuf, bf_list) { int error = mwl_rxbuf_init(sc, bf); if (error != 0) { DPRINTF(sc, MWL_DEBUG_RECV, "%s: mwl_rxbuf_init failed %d\n", __func__, error); return error; } if (prev != NULL) { ds = prev->bf_desc; ds->pPhysNext = htole32(bf->bf_daddr); } prev = bf; } if (prev != NULL) { ds = prev->bf_desc; ds->pPhysNext = htole32(STAILQ_FIRST(&sc->sc_rxbuf)->bf_daddr); } sc->sc_recvsetup = 1; } mwl_mode_init(sc); /* set filters, etc. */ return 0; } static MWL_HAL_APMODE mwl_getapmode(const struct ieee80211vap *vap, struct ieee80211_channel *chan) { MWL_HAL_APMODE mode; if (IEEE80211_IS_CHAN_HT(chan)) { if (vap->iv_flags_ht & IEEE80211_FHT_PUREN) mode = AP_MODE_N_ONLY; else if (IEEE80211_IS_CHAN_5GHZ(chan)) mode = AP_MODE_AandN; else if (vap->iv_flags & IEEE80211_F_PUREG) mode = AP_MODE_GandN; else mode = AP_MODE_BandGandN; } else if (IEEE80211_IS_CHAN_ANYG(chan)) { if (vap->iv_flags & IEEE80211_F_PUREG) mode = AP_MODE_G_ONLY; else mode = AP_MODE_MIXED; } else if (IEEE80211_IS_CHAN_B(chan)) mode = AP_MODE_B_ONLY; else if (IEEE80211_IS_CHAN_A(chan)) mode = AP_MODE_A_ONLY; else mode = AP_MODE_MIXED; /* XXX should not happen? */ return mode; } static int mwl_setapmode(struct ieee80211vap *vap, struct ieee80211_channel *chan) { struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; return mwl_hal_setapmode(hvap, mwl_getapmode(vap, chan)); } /* * Set/change channels. */ static int mwl_chan_set(struct mwl_softc *sc, struct ieee80211_channel *chan) { struct mwl_hal *mh = sc->sc_mh; struct ieee80211com *ic = &sc->sc_ic; MWL_HAL_CHANNEL hchan; int maxtxpow; DPRINTF(sc, MWL_DEBUG_RESET, "%s: chan %u MHz/flags 0x%x\n", __func__, chan->ic_freq, chan->ic_flags); /* * Convert to a HAL channel description with * the flags constrained to reflect the current * operating mode. */ mwl_mapchan(&hchan, chan); mwl_hal_intrset(mh, 0); /* disable interrupts */ #if 0 mwl_draintxq(sc); /* clear pending tx frames */ #endif mwl_hal_setchannel(mh, &hchan); /* * Tx power is cap'd by the regulatory setting and * possibly a user-set limit. We pass the min of * these to the hal to apply them to the cal data * for this channel. * XXX min bound? */ maxtxpow = 2*chan->ic_maxregpower; if (maxtxpow > ic->ic_txpowlimit) maxtxpow = ic->ic_txpowlimit; mwl_hal_settxpower(mh, &hchan, maxtxpow / 2); /* NB: potentially change mcast/mgt rates */ mwl_setcurchanrates(sc); /* * Update internal state. */ sc->sc_tx_th.wt_chan_freq = htole16(chan->ic_freq); sc->sc_rx_th.wr_chan_freq = htole16(chan->ic_freq); if (IEEE80211_IS_CHAN_A(chan)) { sc->sc_tx_th.wt_chan_flags = htole16(IEEE80211_CHAN_A); sc->sc_rx_th.wr_chan_flags = htole16(IEEE80211_CHAN_A); } else if (IEEE80211_IS_CHAN_ANYG(chan)) { sc->sc_tx_th.wt_chan_flags = htole16(IEEE80211_CHAN_G); sc->sc_rx_th.wr_chan_flags = htole16(IEEE80211_CHAN_G); } else { sc->sc_tx_th.wt_chan_flags = htole16(IEEE80211_CHAN_B); sc->sc_rx_th.wr_chan_flags = htole16(IEEE80211_CHAN_B); } sc->sc_curchan = hchan; mwl_hal_intrset(mh, sc->sc_imask); return 0; } static void mwl_scan_start(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; DPRINTF(sc, MWL_DEBUG_STATE, "%s\n", __func__); } static void mwl_scan_end(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; DPRINTF(sc, MWL_DEBUG_STATE, "%s\n", __func__); } static void mwl_set_channel(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; (void) mwl_chan_set(sc, ic->ic_curchan); } /* * Handle a channel switch request. We inform the firmware * and mark the global state to suppress various actions. * NB: we issue only one request to the fw; we may be called * multiple times if there are multiple vap's. */ static void mwl_startcsa(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; struct mwl_softc *sc = ic->ic_softc; MWL_HAL_CHANNEL hchan; if (sc->sc_csapending) return; mwl_mapchan(&hchan, ic->ic_csa_newchan); /* 1 =>'s quiet channel */ mwl_hal_setchannelswitchie(sc->sc_mh, &hchan, 1, ic->ic_csa_count); sc->sc_csapending = 1; } /* * Plumb any static WEP key for the station. This is * necessary as we must propagate the key from the * global key table of the vap to each sta db entry. */ static void mwl_setanywepkey(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN]) { if ((vap->iv_flags & (IEEE80211_F_PRIVACY|IEEE80211_F_WPA)) == IEEE80211_F_PRIVACY && vap->iv_def_txkey != IEEE80211_KEYIX_NONE && vap->iv_nw_keys[vap->iv_def_txkey].wk_keyix != IEEE80211_KEYIX_NONE) (void) _mwl_key_set(vap, &vap->iv_nw_keys[vap->iv_def_txkey], mac); } static int mwl_peerstadb(struct ieee80211_node *ni, int aid, int staid, MWL_HAL_PEERINFO *pi) { #define WME(ie) ((const struct ieee80211_wme_info *) ie) struct ieee80211vap *vap = ni->ni_vap; struct mwl_hal_vap *hvap; int error; if (vap->iv_opmode == IEEE80211_M_WDS) { /* * WDS vap's do not have a f/w vap; instead they piggyback * on an AP vap and we must install the sta db entry and * crypto state using that AP's handle (the WDS vap has none). */ hvap = MWL_VAP(vap)->mv_ap_hvap; } else hvap = MWL_VAP(vap)->mv_hvap; error = mwl_hal_newstation(hvap, ni->ni_macaddr, aid, staid, pi, ni->ni_flags & (IEEE80211_NODE_QOS | IEEE80211_NODE_HT), ni->ni_ies.wme_ie != NULL ? WME(ni->ni_ies.wme_ie)->wme_info : 0); if (error == 0) { /* * Setup security for this station. For sta mode this is * needed even though do the same thing on transition to * AUTH state because the call to mwl_hal_newstation * clobbers the crypto state we setup. */ mwl_setanywepkey(vap, ni->ni_macaddr); } return error; #undef WME } static void mwl_setglobalkeys(struct ieee80211vap *vap) { struct ieee80211_key *wk; wk = &vap->iv_nw_keys[0]; for (; wk < &vap->iv_nw_keys[IEEE80211_WEP_NKID]; wk++) if (wk->wk_keyix != IEEE80211_KEYIX_NONE) (void) _mwl_key_set(vap, wk, vap->iv_myaddr); } /* * Convert a legacy rate set to a firmware bitmask. */ static uint32_t get_rate_bitmap(const struct ieee80211_rateset *rs) { uint32_t rates; int i; rates = 0; for (i = 0; i < rs->rs_nrates; i++) switch (rs->rs_rates[i] & IEEE80211_RATE_VAL) { case 2: rates |= 0x001; break; case 4: rates |= 0x002; break; case 11: rates |= 0x004; break; case 22: rates |= 0x008; break; case 44: rates |= 0x010; break; case 12: rates |= 0x020; break; case 18: rates |= 0x040; break; case 24: rates |= 0x080; break; case 36: rates |= 0x100; break; case 48: rates |= 0x200; break; case 72: rates |= 0x400; break; case 96: rates |= 0x800; break; case 108: rates |= 0x1000; break; } return rates; } /* * Construct an HT firmware bitmask from an HT rate set. */ static uint32_t get_htrate_bitmap(const struct ieee80211_htrateset *rs) { uint32_t rates; int i; rates = 0; for (i = 0; i < rs->rs_nrates; i++) { if (rs->rs_rates[i] < 16) rates |= 1<rs_rates[i]; } return rates; } /* * Craft station database entry for station. * NB: use host byte order here, the hal handles byte swapping. */ static MWL_HAL_PEERINFO * mkpeerinfo(MWL_HAL_PEERINFO *pi, const struct ieee80211_node *ni) { const struct ieee80211vap *vap = ni->ni_vap; memset(pi, 0, sizeof(*pi)); pi->LegacyRateBitMap = get_rate_bitmap(&ni->ni_rates); pi->CapInfo = ni->ni_capinfo; if (ni->ni_flags & IEEE80211_NODE_HT) { /* HT capabilities, etc */ pi->HTCapabilitiesInfo = ni->ni_htcap; /* XXX pi.HTCapabilitiesInfo */ pi->MacHTParamInfo = ni->ni_htparam; pi->HTRateBitMap = get_htrate_bitmap(&ni->ni_htrates); pi->AddHtInfo.ControlChan = ni->ni_htctlchan; pi->AddHtInfo.AddChan = ni->ni_ht2ndchan; pi->AddHtInfo.OpMode = ni->ni_htopmode; pi->AddHtInfo.stbc = ni->ni_htstbc; /* constrain according to local configuration */ if ((vap->iv_flags_ht & IEEE80211_FHT_SHORTGI40) == 0) pi->HTCapabilitiesInfo &= ~IEEE80211_HTCAP_SHORTGI40; if ((vap->iv_flags_ht & IEEE80211_FHT_SHORTGI20) == 0) pi->HTCapabilitiesInfo &= ~IEEE80211_HTCAP_SHORTGI20; if (ni->ni_chw != 40) pi->HTCapabilitiesInfo &= ~IEEE80211_HTCAP_CHWIDTH40; } return pi; } /* * Re-create the local sta db entry for a vap to ensure * up to date WME state is pushed to the firmware. Because * this resets crypto state this must be followed by a * reload of any keys in the global key table. */ static int mwl_localstadb(struct ieee80211vap *vap) { #define WME(ie) ((const struct ieee80211_wme_info *) ie) struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; struct ieee80211_node *bss; MWL_HAL_PEERINFO pi; int error; switch (vap->iv_opmode) { case IEEE80211_M_STA: bss = vap->iv_bss; error = mwl_hal_newstation(hvap, vap->iv_myaddr, 0, 0, vap->iv_state == IEEE80211_S_RUN ? mkpeerinfo(&pi, bss) : NULL, (bss->ni_flags & (IEEE80211_NODE_QOS | IEEE80211_NODE_HT)), bss->ni_ies.wme_ie != NULL ? WME(bss->ni_ies.wme_ie)->wme_info : 0); if (error == 0) mwl_setglobalkeys(vap); break; case IEEE80211_M_HOSTAP: case IEEE80211_M_MBSS: error = mwl_hal_newstation(hvap, vap->iv_myaddr, 0, 0, NULL, vap->iv_flags & IEEE80211_F_WME, 0); if (error == 0) mwl_setglobalkeys(vap); break; default: error = 0; break; } return error; #undef WME } static int mwl_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct mwl_vap *mvp = MWL_VAP(vap); struct mwl_hal_vap *hvap = mvp->mv_hvap; struct ieee80211com *ic = vap->iv_ic; struct ieee80211_node *ni = NULL; struct mwl_softc *sc = ic->ic_softc; struct mwl_hal *mh = sc->sc_mh; enum ieee80211_state ostate = vap->iv_state; int error; DPRINTF(sc, MWL_DEBUG_STATE, "%s: %s: %s -> %s\n", vap->iv_ifp->if_xname, __func__, ieee80211_state_name[ostate], ieee80211_state_name[nstate]); callout_stop(&sc->sc_timer); /* * Clear current radar detection state. */ if (ostate == IEEE80211_S_CAC) { /* stop quiet mode radar detection */ mwl_hal_setradardetection(mh, DR_CHK_CHANNEL_AVAILABLE_STOP); } else if (sc->sc_radarena) { /* stop in-service radar detection */ mwl_hal_setradardetection(mh, DR_DFS_DISABLE); sc->sc_radarena = 0; } /* * Carry out per-state actions before doing net80211 work. */ if (nstate == IEEE80211_S_INIT) { /* NB: only ap+sta vap's have a fw entity */ if (hvap != NULL) mwl_hal_stop(hvap); } else if (nstate == IEEE80211_S_SCAN) { mwl_hal_start(hvap); /* NB: this disables beacon frames */ mwl_hal_setinframode(hvap); } else if (nstate == IEEE80211_S_AUTH) { /* * Must create a sta db entry in case a WEP key needs to * be plumbed. This entry will be overwritten if we * associate; otherwise it will be reclaimed on node free. */ ni = vap->iv_bss; MWL_NODE(ni)->mn_hvap = hvap; (void) mwl_peerstadb(ni, 0, 0, NULL); } else if (nstate == IEEE80211_S_CSA) { /* XXX move to below? */ if (vap->iv_opmode == IEEE80211_M_HOSTAP || vap->iv_opmode == IEEE80211_M_MBSS) mwl_startcsa(vap); } else if (nstate == IEEE80211_S_CAC) { /* XXX move to below? */ /* stop ap xmit and enable quiet mode radar detection */ mwl_hal_setradardetection(mh, DR_CHK_CHANNEL_AVAILABLE_START); } /* * Invoke the parent method to do net80211 work. */ error = mvp->mv_newstate(vap, nstate, arg); /* * Carry out work that must be done after net80211 runs; * this work requires up to date state (e.g. iv_bss). */ if (error == 0 && nstate == IEEE80211_S_RUN) { /* NB: collect bss node again, it may have changed */ ni = vap->iv_bss; DPRINTF(sc, MWL_DEBUG_STATE, "%s: %s(RUN): iv_flags 0x%08x bintvl %d bssid %s " "capinfo 0x%04x chan %d\n", vap->iv_ifp->if_xname, __func__, vap->iv_flags, ni->ni_intval, ether_sprintf(ni->ni_bssid), ni->ni_capinfo, ieee80211_chan2ieee(ic, ic->ic_curchan)); /* * Recreate local sta db entry to update WME/HT state. */ mwl_localstadb(vap); switch (vap->iv_opmode) { case IEEE80211_M_HOSTAP: case IEEE80211_M_MBSS: if (ostate == IEEE80211_S_CAC) { /* enable in-service radar detection */ mwl_hal_setradardetection(mh, DR_IN_SERVICE_MONITOR_START); sc->sc_radarena = 1; } /* * Allocate and setup the beacon frame * (and related state). */ error = mwl_reset_vap(vap, IEEE80211_S_RUN); if (error != 0) { DPRINTF(sc, MWL_DEBUG_STATE, "%s: beacon setup failed, error %d\n", __func__, error); goto bad; } /* NB: must be after setting up beacon */ mwl_hal_start(hvap); break; case IEEE80211_M_STA: DPRINTF(sc, MWL_DEBUG_STATE, "%s: %s: aid 0x%x\n", vap->iv_ifp->if_xname, __func__, ni->ni_associd); /* * Set state now that we're associated. */ mwl_hal_setassocid(hvap, ni->ni_bssid, ni->ni_associd); mwl_setrates(vap); mwl_hal_setrtsthreshold(hvap, vap->iv_rtsthreshold); if ((vap->iv_flags & IEEE80211_F_DWDS) && sc->sc_ndwdsvaps++ == 0) mwl_hal_setdwds(mh, 1); break; case IEEE80211_M_WDS: DPRINTF(sc, MWL_DEBUG_STATE, "%s: %s: bssid %s\n", vap->iv_ifp->if_xname, __func__, ether_sprintf(ni->ni_bssid)); mwl_seteapolformat(vap); break; default: break; } /* * Set CS mode according to operating channel; * this mostly an optimization for 5GHz. * * NB: must follow mwl_hal_start which resets csmode */ if (IEEE80211_IS_CHAN_5GHZ(ic->ic_bsschan)) mwl_hal_setcsmode(mh, CSMODE_AGGRESSIVE); else mwl_hal_setcsmode(mh, CSMODE_AUTO_ENA); /* * Start timer to prod firmware. */ if (sc->sc_ageinterval != 0) callout_reset(&sc->sc_timer, sc->sc_ageinterval*hz, mwl_agestations, sc); } else if (nstate == IEEE80211_S_SLEEP) { /* XXX set chip in power save */ } else if ((vap->iv_flags & IEEE80211_F_DWDS) && --sc->sc_ndwdsvaps == 0) mwl_hal_setdwds(mh, 0); bad: return error; } /* * Manage station id's; these are separate from AID's * as AID's may have values out of the range of possible * station id's acceptable to the firmware. */ static int allocstaid(struct mwl_softc *sc, int aid) { int staid; if (!(0 < aid && aid < MWL_MAXSTAID) || isset(sc->sc_staid, aid)) { /* NB: don't use 0 */ for (staid = 1; staid < MWL_MAXSTAID; staid++) if (isclr(sc->sc_staid, staid)) break; } else staid = aid; setbit(sc->sc_staid, staid); return staid; } static void delstaid(struct mwl_softc *sc, int staid) { clrbit(sc->sc_staid, staid); } /* * Setup driver-specific state for a newly associated node. * Note that we're called also on a re-associate, the isnew * param tells us if this is the first time or not. */ static void mwl_newassoc(struct ieee80211_node *ni, int isnew) { struct ieee80211vap *vap = ni->ni_vap; struct mwl_softc *sc = vap->iv_ic->ic_softc; struct mwl_node *mn = MWL_NODE(ni); MWL_HAL_PEERINFO pi; uint16_t aid; int error; aid = IEEE80211_AID(ni->ni_associd); if (isnew) { mn->mn_staid = allocstaid(sc, aid); mn->mn_hvap = MWL_VAP(vap)->mv_hvap; } else { mn = MWL_NODE(ni); /* XXX reset BA stream? */ } DPRINTF(sc, MWL_DEBUG_NODE, "%s: mac %s isnew %d aid %d staid %d\n", __func__, ether_sprintf(ni->ni_macaddr), isnew, aid, mn->mn_staid); error = mwl_peerstadb(ni, aid, mn->mn_staid, mkpeerinfo(&pi, ni)); if (error != 0) { DPRINTF(sc, MWL_DEBUG_NODE, "%s: error %d creating sta db entry\n", __func__, error); /* XXX how to deal with error? */ } } /* * Periodically poke the firmware to age out station state * (power save queues, pending tx aggregates). */ static void mwl_agestations(void *arg) { struct mwl_softc *sc = arg; mwl_hal_setkeepalive(sc->sc_mh); if (sc->sc_ageinterval != 0) /* NB: catch dynamic changes */ callout_schedule(&sc->sc_timer, sc->sc_ageinterval*hz); } static const struct mwl_hal_channel * findhalchannel(const MWL_HAL_CHANNELINFO *ci, int ieee) { int i; for (i = 0; i < ci->nchannels; i++) { const struct mwl_hal_channel *hc = &ci->channels[i]; if (hc->ieee == ieee) return hc; } return NULL; } static int mwl_setregdomain(struct ieee80211com *ic, struct ieee80211_regdomain *rd, int nchan, struct ieee80211_channel chans[]) { struct mwl_softc *sc = ic->ic_softc; struct mwl_hal *mh = sc->sc_mh; const MWL_HAL_CHANNELINFO *ci; int i; for (i = 0; i < nchan; i++) { struct ieee80211_channel *c = &chans[i]; const struct mwl_hal_channel *hc; if (IEEE80211_IS_CHAN_2GHZ(c)) { mwl_hal_getchannelinfo(mh, MWL_FREQ_BAND_2DOT4GHZ, IEEE80211_IS_CHAN_HT40(c) ? MWL_CH_40_MHz_WIDTH : MWL_CH_20_MHz_WIDTH, &ci); } else if (IEEE80211_IS_CHAN_5GHZ(c)) { mwl_hal_getchannelinfo(mh, MWL_FREQ_BAND_5GHZ, IEEE80211_IS_CHAN_HT40(c) ? MWL_CH_40_MHz_WIDTH : MWL_CH_20_MHz_WIDTH, &ci); } else { device_printf(sc->sc_dev, "%s: channel %u freq %u/0x%x not 2.4/5GHz\n", __func__, c->ic_ieee, c->ic_freq, c->ic_flags); return EINVAL; } /* * Verify channel has cal data and cap tx power. */ hc = findhalchannel(ci, c->ic_ieee); if (hc != NULL) { if (c->ic_maxpower > 2*hc->maxTxPow) c->ic_maxpower = 2*hc->maxTxPow; goto next; } if (IEEE80211_IS_CHAN_HT40(c)) { /* * Look for the extension channel since the * hal table only has the primary channel. */ hc = findhalchannel(ci, c->ic_extieee); if (hc != NULL) { if (c->ic_maxpower > 2*hc->maxTxPow) c->ic_maxpower = 2*hc->maxTxPow; goto next; } } device_printf(sc->sc_dev, "%s: no cal data for channel %u ext %u freq %u/0x%x\n", __func__, c->ic_ieee, c->ic_extieee, c->ic_freq, c->ic_flags); return EINVAL; next: ; } return 0; } #define IEEE80211_CHAN_HTG (IEEE80211_CHAN_HT|IEEE80211_CHAN_G) #define IEEE80211_CHAN_HTA (IEEE80211_CHAN_HT|IEEE80211_CHAN_A) static void addht40channels(struct ieee80211_channel chans[], int maxchans, int *nchans, const MWL_HAL_CHANNELINFO *ci, int flags) { int i, error; for (i = 0; i < ci->nchannels; i++) { const struct mwl_hal_channel *hc = &ci->channels[i]; error = ieee80211_add_channel_ht40(chans, maxchans, nchans, hc->ieee, hc->maxTxPow, flags); if (error != 0 && error != ENOENT) break; } } static void addchannels(struct ieee80211_channel chans[], int maxchans, int *nchans, const MWL_HAL_CHANNELINFO *ci, const uint8_t bands[]) { int i, error; error = 0; for (i = 0; i < ci->nchannels && error == 0; i++) { const struct mwl_hal_channel *hc = &ci->channels[i]; error = ieee80211_add_channel(chans, maxchans, nchans, hc->ieee, hc->freq, hc->maxTxPow, 0, bands); } } static void getchannels(struct mwl_softc *sc, int maxchans, int *nchans, struct ieee80211_channel chans[]) { const MWL_HAL_CHANNELINFO *ci; uint8_t bands[IEEE80211_MODE_BYTES]; /* * Use the channel info from the hal to craft the * channel list. Note that we pass back an unsorted * list; the caller is required to sort it for us * (if desired). */ *nchans = 0; if (mwl_hal_getchannelinfo(sc->sc_mh, MWL_FREQ_BAND_2DOT4GHZ, MWL_CH_20_MHz_WIDTH, &ci) == 0) { memset(bands, 0, sizeof(bands)); setbit(bands, IEEE80211_MODE_11B); setbit(bands, IEEE80211_MODE_11G); setbit(bands, IEEE80211_MODE_11NG); addchannels(chans, maxchans, nchans, ci, bands); } if (mwl_hal_getchannelinfo(sc->sc_mh, MWL_FREQ_BAND_5GHZ, MWL_CH_20_MHz_WIDTH, &ci) == 0) { memset(bands, 0, sizeof(bands)); setbit(bands, IEEE80211_MODE_11A); setbit(bands, IEEE80211_MODE_11NA); addchannels(chans, maxchans, nchans, ci, bands); } if (mwl_hal_getchannelinfo(sc->sc_mh, MWL_FREQ_BAND_2DOT4GHZ, MWL_CH_40_MHz_WIDTH, &ci) == 0) addht40channels(chans, maxchans, nchans, ci, IEEE80211_CHAN_HTG); if (mwl_hal_getchannelinfo(sc->sc_mh, MWL_FREQ_BAND_5GHZ, MWL_CH_40_MHz_WIDTH, &ci) == 0) addht40channels(chans, maxchans, nchans, ci, IEEE80211_CHAN_HTA); } static void mwl_getradiocaps(struct ieee80211com *ic, int maxchans, int *nchans, struct ieee80211_channel chans[]) { struct mwl_softc *sc = ic->ic_softc; getchannels(sc, maxchans, nchans, chans); } static int mwl_getchannels(struct mwl_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; /* * Use the channel info from the hal to craft the * channel list for net80211. Note that we pass up * an unsorted list; net80211 will sort it for us. */ memset(ic->ic_channels, 0, sizeof(ic->ic_channels)); ic->ic_nchans = 0; getchannels(sc, IEEE80211_CHAN_MAX, &ic->ic_nchans, ic->ic_channels); ic->ic_regdomain.regdomain = SKU_DEBUG; ic->ic_regdomain.country = CTRY_DEFAULT; ic->ic_regdomain.location = 'I'; ic->ic_regdomain.isocc[0] = ' '; /* XXX? */ ic->ic_regdomain.isocc[1] = ' '; return (ic->ic_nchans == 0 ? EIO : 0); } #undef IEEE80211_CHAN_HTA #undef IEEE80211_CHAN_HTG #ifdef MWL_DEBUG static void mwl_printrxbuf(const struct mwl_rxbuf *bf, u_int ix) { const struct mwl_rxdesc *ds = bf->bf_desc; uint32_t status = le32toh(ds->Status); printf("R[%2u] (DS.V:%p DS.P:0x%jx) NEXT:%08x DATA:%08x RC:%02x%s\n" " STAT:%02x LEN:%04x RSSI:%02x CHAN:%02x RATE:%02x QOS:%04x HT:%04x\n", ix, ds, (uintmax_t)bf->bf_daddr, le32toh(ds->pPhysNext), le32toh(ds->pPhysBuffData), ds->RxControl, ds->RxControl != EAGLE_RXD_CTRL_DRIVER_OWN ? "" : (status & EAGLE_RXD_STATUS_OK) ? " *" : " !", ds->Status, le16toh(ds->PktLen), ds->RSSI, ds->Channel, ds->Rate, le16toh(ds->QosCtrl), le16toh(ds->HtSig2)); } static void mwl_printtxbuf(const struct mwl_txbuf *bf, u_int qnum, u_int ix) { const struct mwl_txdesc *ds = bf->bf_desc; uint32_t status = le32toh(ds->Status); printf("Q%u[%3u]", qnum, ix); printf(" (DS.V:%p DS.P:0x%jx)\n", ds, (uintmax_t)bf->bf_daddr); printf(" NEXT:%08x DATA:%08x LEN:%04x STAT:%08x%s\n", le32toh(ds->pPhysNext), le32toh(ds->PktPtr), le16toh(ds->PktLen), status, status & EAGLE_TXD_STATUS_USED ? "" : (status & 3) != 0 ? " *" : " !"); printf(" RATE:%02x PRI:%x QOS:%04x SAP:%08x FORMAT:%04x\n", ds->DataRate, ds->TxPriority, le16toh(ds->QosCtrl), le32toh(ds->SapPktInfo), le16toh(ds->Format)); #if MWL_TXDESC > 1 printf(" MULTIFRAMES:%u LEN:%04x %04x %04x %04x %04x %04x\n" , le32toh(ds->multiframes) , le16toh(ds->PktLenArray[0]), le16toh(ds->PktLenArray[1]) , le16toh(ds->PktLenArray[2]), le16toh(ds->PktLenArray[3]) , le16toh(ds->PktLenArray[4]), le16toh(ds->PktLenArray[5]) ); printf(" DATA:%08x %08x %08x %08x %08x %08x\n" , le32toh(ds->PktPtrArray[0]), le32toh(ds->PktPtrArray[1]) , le32toh(ds->PktPtrArray[2]), le32toh(ds->PktPtrArray[3]) , le32toh(ds->PktPtrArray[4]), le32toh(ds->PktPtrArray[5]) ); #endif #if 0 { const uint8_t *cp = (const uint8_t *) ds; int i; for (i = 0; i < sizeof(struct mwl_txdesc); i++) { printf("%02x ", cp[i]); if (((i+1) % 16) == 0) printf("\n"); } printf("\n"); } #endif } #endif /* MWL_DEBUG */ #if 0 static void mwl_txq_dump(struct mwl_txq *txq) { struct mwl_txbuf *bf; int i = 0; MWL_TXQ_LOCK(txq); STAILQ_FOREACH(bf, &txq->active, bf_list) { struct mwl_txdesc *ds = bf->bf_desc; MWL_TXDESC_SYNC(txq, ds, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); #ifdef MWL_DEBUG mwl_printtxbuf(bf, txq->qnum, i); #endif i++; } MWL_TXQ_UNLOCK(txq); } #endif static void mwl_watchdog(void *arg) { struct mwl_softc *sc = arg; callout_reset(&sc->sc_watchdog, hz, mwl_watchdog, sc); if (sc->sc_tx_timer == 0 || --sc->sc_tx_timer > 0) return; if (sc->sc_running && !sc->sc_invalid) { if (mwl_hal_setkeepalive(sc->sc_mh)) device_printf(sc->sc_dev, "transmit timeout (firmware hung?)\n"); else device_printf(sc->sc_dev, "transmit timeout\n"); #if 0 mwl_reset(sc); mwl_txq_dump(&sc->sc_txq[0]);/*XXX*/ #endif counter_u64_add(sc->sc_ic.ic_oerrors, 1); sc->sc_stats.mst_watchdog++; } } #ifdef MWL_DIAGAPI /* * Diagnostic interface to the HAL. This is used by various * tools to do things like retrieve register contents for * debugging. The mechanism is intentionally opaque so that * it can change frequently w/o concern for compatibility. */ static int mwl_ioctl_diag(struct mwl_softc *sc, struct mwl_diag *md) { struct mwl_hal *mh = sc->sc_mh; u_int id = md->md_id & MWL_DIAG_ID; void *indata = NULL; void *outdata = NULL; u_int32_t insize = md->md_in_size; u_int32_t outsize = md->md_out_size; int error = 0; if (md->md_id & MWL_DIAG_IN) { /* * Copy in data. */ indata = malloc(insize, M_TEMP, M_NOWAIT); if (indata == NULL) { error = ENOMEM; goto bad; } error = copyin(md->md_in_data, indata, insize); if (error) goto bad; } if (md->md_id & MWL_DIAG_DYN) { /* * Allocate a buffer for the results (otherwise the HAL * returns a pointer to a buffer where we can read the * results). Note that we depend on the HAL leaving this * pointer for us to use below in reclaiming the buffer; * may want to be more defensive. */ outdata = malloc(outsize, M_TEMP, M_NOWAIT); if (outdata == NULL) { error = ENOMEM; goto bad; } } if (mwl_hal_getdiagstate(mh, id, indata, insize, &outdata, &outsize)) { if (outsize < md->md_out_size) md->md_out_size = outsize; if (outdata != NULL) error = copyout(outdata, md->md_out_data, md->md_out_size); } else { error = EINVAL; } bad: if ((md->md_id & MWL_DIAG_IN) && indata != NULL) free(indata, M_TEMP); if ((md->md_id & MWL_DIAG_DYN) && outdata != NULL) free(outdata, M_TEMP); return error; } static int mwl_ioctl_reset(struct mwl_softc *sc, struct mwl_diag *md) { struct mwl_hal *mh = sc->sc_mh; int error; MWL_LOCK_ASSERT(sc); if (md->md_id == 0 && mwl_hal_fwload(mh, NULL) != 0) { device_printf(sc->sc_dev, "unable to load firmware\n"); return EIO; } if (mwl_hal_gethwspecs(mh, &sc->sc_hwspecs) != 0) { device_printf(sc->sc_dev, "unable to fetch h/w specs\n"); return EIO; } error = mwl_setupdma(sc); if (error != 0) { /* NB: mwl_setupdma prints a msg */ return error; } /* * Reset tx/rx data structures; after reload we must * re-start the driver's notion of the next xmit/recv. */ mwl_draintxq(sc); /* clear pending frames */ mwl_resettxq(sc); /* rebuild tx q lists */ sc->sc_rxnext = NULL; /* force rx to start at the list head */ return 0; } #endif /* MWL_DIAGAPI */ static void mwl_parent(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; int startall = 0; MWL_LOCK(sc); if (ic->ic_nrunning > 0) { if (sc->sc_running) { /* * To avoid rescanning another access point, * do not call mwl_init() here. Instead, * only reflect promisc mode settings. */ mwl_mode_init(sc); } else { /* * Beware of being called during attach/detach * to reset promiscuous mode. In that case we * will still be marked UP but not RUNNING. * However trying to re-init the interface * is the wrong thing to do as we've already * torn down much of our state. There's * probably a better way to deal with this. */ if (!sc->sc_invalid) { mwl_init(sc); /* XXX lose error */ startall = 1; } } } else mwl_stop(sc); MWL_UNLOCK(sc); if (startall) ieee80211_start_all(ic); } static int mwl_ioctl(struct ieee80211com *ic, u_long cmd, void *data) { struct mwl_softc *sc = ic->ic_softc; struct ifreq *ifr = data; int error = 0; switch (cmd) { case SIOCGMVSTATS: mwl_hal_gethwstats(sc->sc_mh, &sc->sc_stats.hw_stats); #if 0 /* NB: embed these numbers to get a consistent view */ sc->sc_stats.mst_tx_packets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS); sc->sc_stats.mst_rx_packets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS); #endif /* * NB: Drop the softc lock in case of a page fault; * we'll accept any potential inconsisentcy in the * statistics. The alternative is to copy the data * to a local structure. */ - return (copyout(&sc->sc_stats, - ifr->ifr_data, sizeof (sc->sc_stats))); + return (copyout(&sc->sc_stats, ifr_data_get_ptr(ifr), + sizeof (sc->sc_stats))); #ifdef MWL_DIAGAPI case SIOCGMVDIAG: /* XXX check privs */ return mwl_ioctl_diag(sc, (struct mwl_diag *) ifr); case SIOCGMVRESET: /* XXX check privs */ MWL_LOCK(sc); error = mwl_ioctl_reset(sc,(struct mwl_diag *) ifr); MWL_UNLOCK(sc); break; #endif /* MWL_DIAGAPI */ default: error = ENOTTY; break; } return (error); } #ifdef MWL_DEBUG static int mwl_sysctl_debug(SYSCTL_HANDLER_ARGS) { struct mwl_softc *sc = arg1; int debug, error; debug = sc->sc_debug | (mwl_hal_getdebug(sc->sc_mh) << 24); error = sysctl_handle_int(oidp, &debug, 0, req); if (error || !req->newptr) return error; mwl_hal_setdebug(sc->sc_mh, debug >> 24); sc->sc_debug = debug & 0x00ffffff; return 0; } #endif /* MWL_DEBUG */ static void mwl_sysctlattach(struct mwl_softc *sc) { #ifdef MWL_DEBUG struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->sc_dev); struct sysctl_oid *tree = device_get_sysctl_tree(sc->sc_dev); sc->sc_debug = mwl_debug; SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "debug", CTLTYPE_INT | CTLFLAG_RW, sc, 0, mwl_sysctl_debug, "I", "control debugging printfs"); #endif } /* * Announce various information on device/driver attach. */ static void mwl_announce(struct mwl_softc *sc) { device_printf(sc->sc_dev, "Rev A%d hardware, v%d.%d.%d.%d firmware (regioncode %d)\n", sc->sc_hwspecs.hwVersion, (sc->sc_hwspecs.fwReleaseNumber>>24) & 0xff, (sc->sc_hwspecs.fwReleaseNumber>>16) & 0xff, (sc->sc_hwspecs.fwReleaseNumber>>8) & 0xff, (sc->sc_hwspecs.fwReleaseNumber>>0) & 0xff, sc->sc_hwspecs.regionCode); sc->sc_fwrelease = sc->sc_hwspecs.fwReleaseNumber; if (bootverbose) { int i; for (i = 0; i <= WME_AC_VO; i++) { struct mwl_txq *txq = sc->sc_ac2q[i]; device_printf(sc->sc_dev, "Use hw queue %u for %s traffic\n", txq->qnum, ieee80211_wme_acnames[i]); } } if (bootverbose || mwl_rxdesc != MWL_RXDESC) device_printf(sc->sc_dev, "using %u rx descriptors\n", mwl_rxdesc); if (bootverbose || mwl_rxbuf != MWL_RXBUF) device_printf(sc->sc_dev, "using %u rx buffers\n", mwl_rxbuf); if (bootverbose || mwl_txbuf != MWL_TXBUF) device_printf(sc->sc_dev, "using %u tx buffers\n", mwl_txbuf); if (bootverbose && mwl_hal_ismbsscapable(sc->sc_mh)) device_printf(sc->sc_dev, "multi-bss support\n"); #ifdef MWL_TX_NODROP if (bootverbose) device_printf(sc->sc_dev, "no tx drop\n"); #endif } Index: head/sys/dev/nxge/if_nxge.c =================================================================== --- head/sys/dev/nxge/if_nxge.c (revision 331796) +++ head/sys/dev/nxge/if_nxge.c (revision 331797) @@ -1,3535 +1,3537 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2002-2007 Neterion, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include int copyright_print = 0; int hal_driver_init_count = 0; size_t size = sizeof(int); static void inline xge_flush_txds(xge_hal_channel_h); /** * xge_probe * Probes for Xframe devices * * @dev Device handle * * Returns * BUS_PROBE_DEFAULT if device is supported * ENXIO if device is not supported */ int xge_probe(device_t dev) { int devid = pci_get_device(dev); int vendorid = pci_get_vendor(dev); int retValue = ENXIO; if(vendorid == XGE_PCI_VENDOR_ID) { if((devid == XGE_PCI_DEVICE_ID_XENA_2) || (devid == XGE_PCI_DEVICE_ID_HERC_2)) { if(!copyright_print) { xge_os_printf(XGE_COPYRIGHT); copyright_print = 1; } device_set_desc_copy(dev, "Neterion Xframe 10 Gigabit Ethernet Adapter"); retValue = BUS_PROBE_DEFAULT; } } return retValue; } /** * xge_init_params * Sets HAL parameter values (from kenv). * * @dconfig Device Configuration * @dev Device Handle */ void xge_init_params(xge_hal_device_config_t *dconfig, device_t dev) { int qindex, tindex, revision; device_t checkdev; xge_lldev_t *lldev = (xge_lldev_t *)device_get_softc(dev); dconfig->mtu = XGE_DEFAULT_INITIAL_MTU; dconfig->pci_freq_mherz = XGE_DEFAULT_USER_HARDCODED; dconfig->device_poll_millis = XGE_HAL_DEFAULT_DEVICE_POLL_MILLIS; dconfig->link_stability_period = XGE_HAL_DEFAULT_LINK_STABILITY_PERIOD; dconfig->mac.rmac_bcast_en = XGE_DEFAULT_MAC_RMAC_BCAST_EN; dconfig->fifo.alignment_size = XGE_DEFAULT_FIFO_ALIGNMENT_SIZE; XGE_GET_PARAM("hw.xge.enable_tso", (*lldev), enabled_tso, XGE_DEFAULT_ENABLED_TSO); XGE_GET_PARAM("hw.xge.enable_lro", (*lldev), enabled_lro, XGE_DEFAULT_ENABLED_LRO); XGE_GET_PARAM("hw.xge.enable_msi", (*lldev), enabled_msi, XGE_DEFAULT_ENABLED_MSI); XGE_GET_PARAM("hw.xge.latency_timer", (*dconfig), latency_timer, XGE_DEFAULT_LATENCY_TIMER); XGE_GET_PARAM("hw.xge.max_splits_trans", (*dconfig), max_splits_trans, XGE_DEFAULT_MAX_SPLITS_TRANS); XGE_GET_PARAM("hw.xge.mmrb_count", (*dconfig), mmrb_count, XGE_DEFAULT_MMRB_COUNT); XGE_GET_PARAM("hw.xge.shared_splits", (*dconfig), shared_splits, XGE_DEFAULT_SHARED_SPLITS); XGE_GET_PARAM("hw.xge.isr_polling_cnt", (*dconfig), isr_polling_cnt, XGE_DEFAULT_ISR_POLLING_CNT); XGE_GET_PARAM("hw.xge.stats_refresh_time_sec", (*dconfig), stats_refresh_time_sec, XGE_DEFAULT_STATS_REFRESH_TIME_SEC); XGE_GET_PARAM_MAC("hw.xge.mac_tmac_util_period", tmac_util_period, XGE_DEFAULT_MAC_TMAC_UTIL_PERIOD); XGE_GET_PARAM_MAC("hw.xge.mac_rmac_util_period", rmac_util_period, XGE_DEFAULT_MAC_RMAC_UTIL_PERIOD); XGE_GET_PARAM_MAC("hw.xge.mac_rmac_pause_gen_en", rmac_pause_gen_en, XGE_DEFAULT_MAC_RMAC_PAUSE_GEN_EN); XGE_GET_PARAM_MAC("hw.xge.mac_rmac_pause_rcv_en", rmac_pause_rcv_en, XGE_DEFAULT_MAC_RMAC_PAUSE_RCV_EN); XGE_GET_PARAM_MAC("hw.xge.mac_rmac_pause_time", rmac_pause_time, XGE_DEFAULT_MAC_RMAC_PAUSE_TIME); XGE_GET_PARAM_MAC("hw.xge.mac_mc_pause_threshold_q0q3", mc_pause_threshold_q0q3, XGE_DEFAULT_MAC_MC_PAUSE_THRESHOLD_Q0Q3); XGE_GET_PARAM_MAC("hw.xge.mac_mc_pause_threshold_q4q7", mc_pause_threshold_q4q7, XGE_DEFAULT_MAC_MC_PAUSE_THRESHOLD_Q4Q7); XGE_GET_PARAM_FIFO("hw.xge.fifo_memblock_size", memblock_size, XGE_DEFAULT_FIFO_MEMBLOCK_SIZE); XGE_GET_PARAM_FIFO("hw.xge.fifo_reserve_threshold", reserve_threshold, XGE_DEFAULT_FIFO_RESERVE_THRESHOLD); XGE_GET_PARAM_FIFO("hw.xge.fifo_max_frags", max_frags, XGE_DEFAULT_FIFO_MAX_FRAGS); for(qindex = 0; qindex < XGE_FIFO_COUNT; qindex++) { XGE_GET_PARAM_FIFO_QUEUE("hw.xge.fifo_queue_intr", intr, qindex, XGE_DEFAULT_FIFO_QUEUE_INTR); XGE_GET_PARAM_FIFO_QUEUE("hw.xge.fifo_queue_max", max, qindex, XGE_DEFAULT_FIFO_QUEUE_MAX); XGE_GET_PARAM_FIFO_QUEUE("hw.xge.fifo_queue_initial", initial, qindex, XGE_DEFAULT_FIFO_QUEUE_INITIAL); for (tindex = 0; tindex < XGE_HAL_MAX_FIFO_TTI_NUM; tindex++) { dconfig->fifo.queue[qindex].tti[tindex].enabled = 1; dconfig->fifo.queue[qindex].configured = 1; XGE_GET_PARAM_FIFO_QUEUE_TTI("hw.xge.fifo_queue_tti_urange_a", urange_a, qindex, tindex, XGE_DEFAULT_FIFO_QUEUE_TTI_URANGE_A); XGE_GET_PARAM_FIFO_QUEUE_TTI("hw.xge.fifo_queue_tti_urange_b", urange_b, qindex, tindex, XGE_DEFAULT_FIFO_QUEUE_TTI_URANGE_B); XGE_GET_PARAM_FIFO_QUEUE_TTI("hw.xge.fifo_queue_tti_urange_c", urange_c, qindex, tindex, XGE_DEFAULT_FIFO_QUEUE_TTI_URANGE_C); XGE_GET_PARAM_FIFO_QUEUE_TTI("hw.xge.fifo_queue_tti_ufc_a", ufc_a, qindex, tindex, XGE_DEFAULT_FIFO_QUEUE_TTI_UFC_A); XGE_GET_PARAM_FIFO_QUEUE_TTI("hw.xge.fifo_queue_tti_ufc_b", ufc_b, qindex, tindex, XGE_DEFAULT_FIFO_QUEUE_TTI_UFC_B); XGE_GET_PARAM_FIFO_QUEUE_TTI("hw.xge.fifo_queue_tti_ufc_c", ufc_c, qindex, tindex, XGE_DEFAULT_FIFO_QUEUE_TTI_UFC_C); XGE_GET_PARAM_FIFO_QUEUE_TTI("hw.xge.fifo_queue_tti_ufc_d", ufc_d, qindex, tindex, XGE_DEFAULT_FIFO_QUEUE_TTI_UFC_D); XGE_GET_PARAM_FIFO_QUEUE_TTI( "hw.xge.fifo_queue_tti_timer_ci_en", timer_ci_en, qindex, tindex, XGE_DEFAULT_FIFO_QUEUE_TTI_TIMER_CI_EN); XGE_GET_PARAM_FIFO_QUEUE_TTI( "hw.xge.fifo_queue_tti_timer_ac_en", timer_ac_en, qindex, tindex, XGE_DEFAULT_FIFO_QUEUE_TTI_TIMER_AC_EN); XGE_GET_PARAM_FIFO_QUEUE_TTI( "hw.xge.fifo_queue_tti_timer_val_us", timer_val_us, qindex, tindex, XGE_DEFAULT_FIFO_QUEUE_TTI_TIMER_VAL_US); } } XGE_GET_PARAM_RING("hw.xge.ring_memblock_size", memblock_size, XGE_DEFAULT_RING_MEMBLOCK_SIZE); XGE_GET_PARAM_RING("hw.xge.ring_strip_vlan_tag", strip_vlan_tag, XGE_DEFAULT_RING_STRIP_VLAN_TAG); XGE_GET_PARAM("hw.xge.buffer_mode", (*lldev), buffer_mode, XGE_DEFAULT_BUFFER_MODE); if((lldev->buffer_mode < XGE_HAL_RING_QUEUE_BUFFER_MODE_1) || (lldev->buffer_mode > XGE_HAL_RING_QUEUE_BUFFER_MODE_2)) { xge_trace(XGE_ERR, "Supported buffer modes are 1 and 2"); lldev->buffer_mode = XGE_HAL_RING_QUEUE_BUFFER_MODE_1; } for (qindex = 0; qindex < XGE_RING_COUNT; qindex++) { dconfig->ring.queue[qindex].max_frm_len = XGE_HAL_RING_USE_MTU; dconfig->ring.queue[qindex].priority = 0; dconfig->ring.queue[qindex].configured = 1; dconfig->ring.queue[qindex].buffer_mode = (lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_2) ? XGE_HAL_RING_QUEUE_BUFFER_MODE_3 : lldev->buffer_mode; XGE_GET_PARAM_RING_QUEUE("hw.xge.ring_queue_max", max, qindex, XGE_DEFAULT_RING_QUEUE_MAX); XGE_GET_PARAM_RING_QUEUE("hw.xge.ring_queue_initial", initial, qindex, XGE_DEFAULT_RING_QUEUE_INITIAL); XGE_GET_PARAM_RING_QUEUE("hw.xge.ring_queue_dram_size_mb", dram_size_mb, qindex, XGE_DEFAULT_RING_QUEUE_DRAM_SIZE_MB); XGE_GET_PARAM_RING_QUEUE("hw.xge.ring_queue_indicate_max_pkts", indicate_max_pkts, qindex, XGE_DEFAULT_RING_QUEUE_INDICATE_MAX_PKTS); XGE_GET_PARAM_RING_QUEUE("hw.xge.ring_queue_backoff_interval_us", backoff_interval_us, qindex, XGE_DEFAULT_RING_QUEUE_BACKOFF_INTERVAL_US); XGE_GET_PARAM_RING_QUEUE_RTI("hw.xge.ring_queue_rti_ufc_a", ufc_a, qindex, XGE_DEFAULT_RING_QUEUE_RTI_UFC_A); XGE_GET_PARAM_RING_QUEUE_RTI("hw.xge.ring_queue_rti_ufc_b", ufc_b, qindex, XGE_DEFAULT_RING_QUEUE_RTI_UFC_B); XGE_GET_PARAM_RING_QUEUE_RTI("hw.xge.ring_queue_rti_ufc_c", ufc_c, qindex, XGE_DEFAULT_RING_QUEUE_RTI_UFC_C); XGE_GET_PARAM_RING_QUEUE_RTI("hw.xge.ring_queue_rti_ufc_d", ufc_d, qindex, XGE_DEFAULT_RING_QUEUE_RTI_UFC_D); XGE_GET_PARAM_RING_QUEUE_RTI("hw.xge.ring_queue_rti_timer_ac_en", timer_ac_en, qindex, XGE_DEFAULT_RING_QUEUE_RTI_TIMER_AC_EN); XGE_GET_PARAM_RING_QUEUE_RTI("hw.xge.ring_queue_rti_timer_val_us", timer_val_us, qindex, XGE_DEFAULT_RING_QUEUE_RTI_TIMER_VAL_US); XGE_GET_PARAM_RING_QUEUE_RTI("hw.xge.ring_queue_rti_urange_a", urange_a, qindex, XGE_DEFAULT_RING_QUEUE_RTI_URANGE_A); XGE_GET_PARAM_RING_QUEUE_RTI("hw.xge.ring_queue_rti_urange_b", urange_b, qindex, XGE_DEFAULT_RING_QUEUE_RTI_URANGE_B); XGE_GET_PARAM_RING_QUEUE_RTI("hw.xge.ring_queue_rti_urange_c", urange_c, qindex, XGE_DEFAULT_RING_QUEUE_RTI_URANGE_C); } if(dconfig->fifo.max_frags > (PAGE_SIZE/32)) { xge_os_printf("fifo_max_frags = %d", dconfig->fifo.max_frags) xge_os_printf("fifo_max_frags should be <= (PAGE_SIZE / 32) = %d", (int)(PAGE_SIZE / 32)) xge_os_printf("Using fifo_max_frags = %d", (int)(PAGE_SIZE / 32)) dconfig->fifo.max_frags = (PAGE_SIZE / 32); } checkdev = pci_find_device(VENDOR_ID_AMD, DEVICE_ID_8131_PCI_BRIDGE); if(checkdev != NULL) { /* Check Revision for 0x12 */ revision = pci_read_config(checkdev, xge_offsetof(xge_hal_pci_config_t, revision), 1); if(revision <= 0x12) { /* Set mmrb_count to 1k and max splits = 2 */ dconfig->mmrb_count = 1; dconfig->max_splits_trans = XGE_HAL_THREE_SPLIT_TRANSACTION; } } } /** * xge_buffer_sizes_set * Set buffer sizes based on Rx buffer mode * * @lldev Per-adapter Data * @buffer_mode Rx Buffer Mode */ void xge_rx_buffer_sizes_set(xge_lldev_t *lldev, int buffer_mode, int mtu) { int index = 0; int frame_header = XGE_HAL_MAC_HEADER_MAX_SIZE; int buffer_size = mtu + frame_header; xge_os_memzero(lldev->rxd_mbuf_len, sizeof(lldev->rxd_mbuf_len)); if(buffer_mode != XGE_HAL_RING_QUEUE_BUFFER_MODE_5) lldev->rxd_mbuf_len[buffer_mode - 1] = mtu; lldev->rxd_mbuf_len[0] = (buffer_mode == 1) ? buffer_size:frame_header; if(buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_5) lldev->rxd_mbuf_len[1] = XGE_HAL_TCPIP_HEADER_MAX_SIZE; if(buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_5) { index = 2; buffer_size -= XGE_HAL_TCPIP_HEADER_MAX_SIZE; while(buffer_size > MJUMPAGESIZE) { lldev->rxd_mbuf_len[index++] = MJUMPAGESIZE; buffer_size -= MJUMPAGESIZE; } XGE_ALIGN_TO(buffer_size, 128); lldev->rxd_mbuf_len[index] = buffer_size; lldev->rxd_mbuf_cnt = index + 1; } for(index = 0; index < buffer_mode; index++) xge_trace(XGE_TRACE, "Buffer[%d] %d\n", index, lldev->rxd_mbuf_len[index]); } /** * xge_buffer_mode_init * Init Rx buffer mode * * @lldev Per-adapter Data * @mtu Interface MTU */ void xge_buffer_mode_init(xge_lldev_t *lldev, int mtu) { int index = 0, buffer_size = 0; xge_hal_ring_config_t *ring_config = &((lldev->devh)->config.ring); buffer_size = mtu + XGE_HAL_MAC_HEADER_MAX_SIZE; if(lldev->enabled_lro) (lldev->ifnetp)->if_capenable |= IFCAP_LRO; else (lldev->ifnetp)->if_capenable &= ~IFCAP_LRO; lldev->rxd_mbuf_cnt = lldev->buffer_mode; if(lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_2) { XGE_SET_BUFFER_MODE_IN_RINGS(XGE_HAL_RING_QUEUE_BUFFER_MODE_3); ring_config->scatter_mode = XGE_HAL_RING_QUEUE_SCATTER_MODE_B; } else { XGE_SET_BUFFER_MODE_IN_RINGS(lldev->buffer_mode); ring_config->scatter_mode = XGE_HAL_RING_QUEUE_SCATTER_MODE_A; } xge_rx_buffer_sizes_set(lldev, lldev->buffer_mode, mtu); xge_os_printf("%s: TSO %s", device_get_nameunit(lldev->device), ((lldev->enabled_tso) ? "Enabled":"Disabled")); xge_os_printf("%s: LRO %s", device_get_nameunit(lldev->device), ((lldev->ifnetp)->if_capenable & IFCAP_LRO) ? "Enabled":"Disabled"); xge_os_printf("%s: Rx %d Buffer Mode Enabled", device_get_nameunit(lldev->device), lldev->buffer_mode); } /** * xge_driver_initialize * Initializes HAL driver (common for all devices) * * Returns * XGE_HAL_OK if success * XGE_HAL_ERR_BAD_DRIVER_CONFIG if driver configuration parameters are invalid */ int xge_driver_initialize(void) { xge_hal_uld_cbs_t uld_callbacks; xge_hal_driver_config_t driver_config; xge_hal_status_e status = XGE_HAL_OK; /* Initialize HAL driver */ if(!hal_driver_init_count) { xge_os_memzero(&uld_callbacks, sizeof(xge_hal_uld_cbs_t)); xge_os_memzero(&driver_config, sizeof(xge_hal_driver_config_t)); /* * Initial and maximum size of the queue used to store the events * like Link up/down (xge_hal_event_e) */ driver_config.queue_size_initial = XGE_HAL_MIN_QUEUE_SIZE_INITIAL; driver_config.queue_size_max = XGE_HAL_MAX_QUEUE_SIZE_MAX; uld_callbacks.link_up = xge_callback_link_up; uld_callbacks.link_down = xge_callback_link_down; uld_callbacks.crit_err = xge_callback_crit_err; uld_callbacks.event = xge_callback_event; status = xge_hal_driver_initialize(&driver_config, &uld_callbacks); if(status != XGE_HAL_OK) { XGE_EXIT_ON_ERR("xgeX: Initialization of HAL driver failed", xdi_out, status); } } hal_driver_init_count = hal_driver_init_count + 1; xge_hal_driver_debug_module_mask_set(0xffffffff); xge_hal_driver_debug_level_set(XGE_TRACE); xdi_out: return status; } /** * xge_media_init * Initializes, adds and sets media * * @devc Device Handle */ void xge_media_init(device_t devc) { xge_lldev_t *lldev = (xge_lldev_t *)device_get_softc(devc); /* Initialize Media */ ifmedia_init(&lldev->media, IFM_IMASK, xge_ifmedia_change, xge_ifmedia_status); /* Add supported media */ ifmedia_add(&lldev->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 0, NULL); ifmedia_add(&lldev->media, IFM_ETHER | IFM_1000_SX, 0, NULL); ifmedia_add(&lldev->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_add(&lldev->media, IFM_ETHER | IFM_10G_SR, 0, NULL); ifmedia_add(&lldev->media, IFM_ETHER | IFM_10G_LR, 0, NULL); /* Set media */ ifmedia_set(&lldev->media, IFM_ETHER | IFM_AUTO); } /** * xge_pci_space_save * Save PCI configuration space * * @dev Device Handle */ void xge_pci_space_save(device_t dev) { struct pci_devinfo *dinfo = NULL; dinfo = device_get_ivars(dev); xge_trace(XGE_TRACE, "Saving PCI configuration space"); pci_cfg_save(dev, dinfo, 0); } /** * xge_pci_space_restore * Restore saved PCI configuration space * * @dev Device Handle */ void xge_pci_space_restore(device_t dev) { struct pci_devinfo *dinfo = NULL; dinfo = device_get_ivars(dev); xge_trace(XGE_TRACE, "Restoring PCI configuration space"); pci_cfg_restore(dev, dinfo); } /** * xge_msi_info_save * Save MSI info * * @lldev Per-adapter Data */ void xge_msi_info_save(xge_lldev_t * lldev) { xge_os_pci_read16(lldev->pdev, NULL, xge_offsetof(xge_hal_pci_config_le_t, msi_control), &lldev->msi_info.msi_control); xge_os_pci_read32(lldev->pdev, NULL, xge_offsetof(xge_hal_pci_config_le_t, msi_lower_address), &lldev->msi_info.msi_lower_address); xge_os_pci_read32(lldev->pdev, NULL, xge_offsetof(xge_hal_pci_config_le_t, msi_higher_address), &lldev->msi_info.msi_higher_address); xge_os_pci_read16(lldev->pdev, NULL, xge_offsetof(xge_hal_pci_config_le_t, msi_data), &lldev->msi_info.msi_data); } /** * xge_msi_info_restore * Restore saved MSI info * * @dev Device Handle */ void xge_msi_info_restore(xge_lldev_t *lldev) { /* * If interface is made down and up, traffic fails. It was observed that * MSI information were getting reset on down. Restoring them. */ xge_os_pci_write16(lldev->pdev, NULL, xge_offsetof(xge_hal_pci_config_le_t, msi_control), lldev->msi_info.msi_control); xge_os_pci_write32(lldev->pdev, NULL, xge_offsetof(xge_hal_pci_config_le_t, msi_lower_address), lldev->msi_info.msi_lower_address); xge_os_pci_write32(lldev->pdev, NULL, xge_offsetof(xge_hal_pci_config_le_t, msi_higher_address), lldev->msi_info.msi_higher_address); xge_os_pci_write16(lldev->pdev, NULL, xge_offsetof(xge_hal_pci_config_le_t, msi_data), lldev->msi_info.msi_data); } /** * xge_init_mutex * Initializes mutexes used in driver * * @lldev Per-adapter Data */ void xge_mutex_init(xge_lldev_t *lldev) { int qindex; sprintf(lldev->mtx_name_drv, "%s_drv", device_get_nameunit(lldev->device)); mtx_init(&lldev->mtx_drv, lldev->mtx_name_drv, MTX_NETWORK_LOCK, MTX_DEF); for(qindex = 0; qindex < XGE_FIFO_COUNT; qindex++) { sprintf(lldev->mtx_name_tx[qindex], "%s_tx_%d", device_get_nameunit(lldev->device), qindex); mtx_init(&lldev->mtx_tx[qindex], lldev->mtx_name_tx[qindex], NULL, MTX_DEF); } } /** * xge_mutex_destroy * Destroys mutexes used in driver * * @lldev Per-adapter Data */ void xge_mutex_destroy(xge_lldev_t *lldev) { int qindex; for(qindex = 0; qindex < XGE_FIFO_COUNT; qindex++) mtx_destroy(&lldev->mtx_tx[qindex]); mtx_destroy(&lldev->mtx_drv); } /** * xge_print_info * Print device and driver information * * @lldev Per-adapter Data */ void xge_print_info(xge_lldev_t *lldev) { device_t dev = lldev->device; xge_hal_device_t *hldev = lldev->devh; xge_hal_status_e status = XGE_HAL_OK; u64 val64 = 0; const char *xge_pci_bus_speeds[17] = { "PCI 33MHz Bus", "PCI 66MHz Bus", "PCIX(M1) 66MHz Bus", "PCIX(M1) 100MHz Bus", "PCIX(M1) 133MHz Bus", "PCIX(M2) 133MHz Bus", "PCIX(M2) 200MHz Bus", "PCIX(M2) 266MHz Bus", "PCIX(M1) Reserved", "PCIX(M1) 66MHz Bus (Not Supported)", "PCIX(M1) 100MHz Bus (Not Supported)", "PCIX(M1) 133MHz Bus (Not Supported)", "PCIX(M2) Reserved", "PCIX 533 Reserved", "PCI Basic Mode", "PCIX Basic Mode", "PCI Invalid Mode" }; xge_os_printf("%s: Xframe%s %s Revision %d Driver v%s", device_get_nameunit(dev), ((hldev->device_id == XGE_PCI_DEVICE_ID_XENA_2) ? "I" : "II"), hldev->vpd_data.product_name, hldev->revision, XGE_DRIVER_VERSION); xge_os_printf("%s: Serial Number %s", device_get_nameunit(dev), hldev->vpd_data.serial_num); if(pci_get_device(dev) == XGE_PCI_DEVICE_ID_HERC_2) { status = xge_hal_mgmt_reg_read(hldev, 0, xge_offsetof(xge_hal_pci_bar0_t, pci_info), &val64); if(status != XGE_HAL_OK) xge_trace(XGE_ERR, "Error for getting bus speed"); xge_os_printf("%s: Adapter is on %s bit %s", device_get_nameunit(dev), ((val64 & BIT(8)) ? "32":"64"), (xge_pci_bus_speeds[((val64 & XGE_HAL_PCI_INFO) >> 60)])); } xge_os_printf("%s: Using %s Interrupts", device_get_nameunit(dev), (lldev->enabled_msi == XGE_HAL_INTR_MODE_MSI) ? "MSI":"Line"); } /** * xge_create_dma_tags * Creates DMA tags for both Tx and Rx * * @dev Device Handle * * Returns XGE_HAL_OK or XGE_HAL_FAIL (if errors) */ xge_hal_status_e xge_create_dma_tags(device_t dev) { xge_lldev_t *lldev = (xge_lldev_t *)device_get_softc(dev); xge_hal_status_e status = XGE_HAL_FAIL; int mtu = (lldev->ifnetp)->if_mtu, maxsize; /* DMA tag for Tx */ status = bus_dma_tag_create( bus_get_dma_tag(dev), /* Parent */ PAGE_SIZE, /* Alignment */ 0, /* Bounds */ BUS_SPACE_MAXADDR, /* Low Address */ BUS_SPACE_MAXADDR, /* High Address */ NULL, /* Filter Function */ NULL, /* Filter Function Arguments */ MCLBYTES * XGE_MAX_SEGS, /* Maximum Size */ XGE_MAX_SEGS, /* Number of Segments */ MCLBYTES, /* Maximum Segment Size */ BUS_DMA_ALLOCNOW, /* Flags */ NULL, /* Lock Function */ NULL, /* Lock Function Arguments */ (&lldev->dma_tag_tx)); /* DMA Tag */ if(status != 0) goto _exit; maxsize = mtu + XGE_HAL_MAC_HEADER_MAX_SIZE; if(maxsize <= MCLBYTES) { maxsize = MCLBYTES; } else { if(lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_5) maxsize = MJUMPAGESIZE; else maxsize = (maxsize <= MJUMPAGESIZE) ? MJUMPAGESIZE : MJUM9BYTES; } /* DMA tag for Rx */ status = bus_dma_tag_create( bus_get_dma_tag(dev), /* Parent */ PAGE_SIZE, /* Alignment */ 0, /* Bounds */ BUS_SPACE_MAXADDR, /* Low Address */ BUS_SPACE_MAXADDR, /* High Address */ NULL, /* Filter Function */ NULL, /* Filter Function Arguments */ maxsize, /* Maximum Size */ 1, /* Number of Segments */ maxsize, /* Maximum Segment Size */ BUS_DMA_ALLOCNOW, /* Flags */ NULL, /* Lock Function */ NULL, /* Lock Function Arguments */ (&lldev->dma_tag_rx)); /* DMA Tag */ if(status != 0) goto _exit1; status = bus_dmamap_create(lldev->dma_tag_rx, BUS_DMA_NOWAIT, &lldev->extra_dma_map); if(status != 0) goto _exit2; status = XGE_HAL_OK; goto _exit; _exit2: status = bus_dma_tag_destroy(lldev->dma_tag_rx); if(status != 0) xge_trace(XGE_ERR, "Rx DMA tag destroy failed"); _exit1: status = bus_dma_tag_destroy(lldev->dma_tag_tx); if(status != 0) xge_trace(XGE_ERR, "Tx DMA tag destroy failed"); status = XGE_HAL_FAIL; _exit: return status; } /** * xge_confirm_changes * Disables and Enables interface to apply requested change * * @lldev Per-adapter Data * @mtu_set Is it called for changing MTU? (Yes: 1, No: 0) * * Returns 0 or Error Number */ void xge_confirm_changes(xge_lldev_t *lldev, xge_option_e option) { if(lldev->initialized == 0) goto _exit1; mtx_lock(&lldev->mtx_drv); if_down(lldev->ifnetp); xge_device_stop(lldev, XGE_HAL_CHANNEL_OC_NORMAL); if(option == XGE_SET_MTU) (lldev->ifnetp)->if_mtu = lldev->mtu; else xge_buffer_mode_init(lldev, lldev->mtu); xge_device_init(lldev, XGE_HAL_CHANNEL_OC_NORMAL); if_up(lldev->ifnetp); mtx_unlock(&lldev->mtx_drv); goto _exit; _exit1: /* Request was to change MTU and device not initialized */ if(option == XGE_SET_MTU) { (lldev->ifnetp)->if_mtu = lldev->mtu; xge_buffer_mode_init(lldev, lldev->mtu); } _exit: return; } /** * xge_change_lro_status * Enable/Disable LRO feature * * @SYSCTL_HANDLER_ARGS sysctl_oid structure with arguments * * Returns 0 or error number. */ static int xge_change_lro_status(SYSCTL_HANDLER_ARGS) { xge_lldev_t *lldev = (xge_lldev_t *)arg1; int request = lldev->enabled_lro, status = XGE_HAL_OK; status = sysctl_handle_int(oidp, &request, arg2, req); if((status != XGE_HAL_OK) || (!req->newptr)) goto _exit; if((request < 0) || (request > 1)) { status = EINVAL; goto _exit; } /* Return if current and requested states are same */ if(request == lldev->enabled_lro){ xge_trace(XGE_ERR, "LRO is already %s", ((request) ? "enabled" : "disabled")); goto _exit; } lldev->enabled_lro = request; xge_confirm_changes(lldev, XGE_CHANGE_LRO); arg2 = lldev->enabled_lro; _exit: return status; } /** * xge_add_sysctl_handlers * Registers sysctl parameter value update handlers * * @lldev Per-adapter data */ void xge_add_sysctl_handlers(xge_lldev_t *lldev) { struct sysctl_ctx_list *context_list = device_get_sysctl_ctx(lldev->device); struct sysctl_oid *oid = device_get_sysctl_tree(lldev->device); SYSCTL_ADD_PROC(context_list, SYSCTL_CHILDREN(oid), OID_AUTO, "enable_lro", CTLTYPE_INT | CTLFLAG_RW, lldev, 0, xge_change_lro_status, "I", "Enable or disable LRO feature"); } /** * xge_attach * Connects driver to the system if probe was success * * @dev Device Handle */ int xge_attach(device_t dev) { xge_hal_device_config_t *device_config; xge_hal_device_attr_t attr; xge_lldev_t *lldev; xge_hal_device_t *hldev; xge_pci_info_t *pci_info; struct ifnet *ifnetp; int rid, rid0, rid1, error; int msi_count = 0, status = XGE_HAL_OK; int enable_msi = XGE_HAL_INTR_MODE_IRQLINE; device_config = xge_os_malloc(NULL, sizeof(xge_hal_device_config_t)); if(!device_config) { XGE_EXIT_ON_ERR("Memory allocation for device configuration failed", attach_out_config, ENOMEM); } lldev = (xge_lldev_t *) device_get_softc(dev); if(!lldev) { XGE_EXIT_ON_ERR("Adapter softc is NULL", attach_out, ENOMEM); } lldev->device = dev; xge_mutex_init(lldev); error = xge_driver_initialize(); if(error != XGE_HAL_OK) { xge_resources_free(dev, xge_free_mutex); XGE_EXIT_ON_ERR("Initializing driver failed", attach_out, ENXIO); } /* HAL device */ hldev = (xge_hal_device_t *)xge_os_malloc(NULL, sizeof(xge_hal_device_t)); if(!hldev) { xge_resources_free(dev, xge_free_terminate_hal_driver); XGE_EXIT_ON_ERR("Memory allocation for HAL device failed", attach_out, ENOMEM); } lldev->devh = hldev; /* Our private structure */ pci_info = (xge_pci_info_t*) xge_os_malloc(NULL, sizeof(xge_pci_info_t)); if(!pci_info) { xge_resources_free(dev, xge_free_hal_device); XGE_EXIT_ON_ERR("Memory allocation for PCI info. failed", attach_out, ENOMEM); } lldev->pdev = pci_info; pci_info->device = dev; /* Set bus master */ pci_enable_busmaster(dev); /* Get virtual address for BAR0 */ rid0 = PCIR_BAR(0); pci_info->regmap0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid0, RF_ACTIVE); if(pci_info->regmap0 == NULL) { xge_resources_free(dev, xge_free_pci_info); XGE_EXIT_ON_ERR("Bus resource allocation for BAR0 failed", attach_out, ENOMEM); } attr.bar0 = (char *)pci_info->regmap0; pci_info->bar0resource = (xge_bus_resource_t*) xge_os_malloc(NULL, sizeof(xge_bus_resource_t)); if(pci_info->bar0resource == NULL) { xge_resources_free(dev, xge_free_bar0); XGE_EXIT_ON_ERR("Memory allocation for BAR0 Resources failed", attach_out, ENOMEM); } ((xge_bus_resource_t *)(pci_info->bar0resource))->bus_tag = rman_get_bustag(pci_info->regmap0); ((xge_bus_resource_t *)(pci_info->bar0resource))->bus_handle = rman_get_bushandle(pci_info->regmap0); ((xge_bus_resource_t *)(pci_info->bar0resource))->bar_start_addr = pci_info->regmap0; /* Get virtual address for BAR1 */ rid1 = PCIR_BAR(2); pci_info->regmap1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid1, RF_ACTIVE); if(pci_info->regmap1 == NULL) { xge_resources_free(dev, xge_free_bar0_resource); XGE_EXIT_ON_ERR("Bus resource allocation for BAR1 failed", attach_out, ENOMEM); } attr.bar1 = (char *)pci_info->regmap1; pci_info->bar1resource = (xge_bus_resource_t*) xge_os_malloc(NULL, sizeof(xge_bus_resource_t)); if(pci_info->bar1resource == NULL) { xge_resources_free(dev, xge_free_bar1); XGE_EXIT_ON_ERR("Memory allocation for BAR1 Resources failed", attach_out, ENOMEM); } ((xge_bus_resource_t *)(pci_info->bar1resource))->bus_tag = rman_get_bustag(pci_info->regmap1); ((xge_bus_resource_t *)(pci_info->bar1resource))->bus_handle = rman_get_bushandle(pci_info->regmap1); ((xge_bus_resource_t *)(pci_info->bar1resource))->bar_start_addr = pci_info->regmap1; /* Save PCI config space */ xge_pci_space_save(dev); attr.regh0 = (xge_bus_resource_t *) pci_info->bar0resource; attr.regh1 = (xge_bus_resource_t *) pci_info->bar1resource; attr.irqh = lldev->irqhandle; attr.cfgh = pci_info; attr.pdev = pci_info; /* Initialize device configuration parameters */ xge_init_params(device_config, dev); rid = 0; if(lldev->enabled_msi) { /* Number of MSI messages supported by device */ msi_count = pci_msi_count(dev); if(msi_count > 1) { /* Device supports MSI */ if(bootverbose) { xge_trace(XGE_ERR, "MSI count: %d", msi_count); xge_trace(XGE_ERR, "Now, driver supporting 1 message"); } msi_count = 1; error = pci_alloc_msi(dev, &msi_count); if(error == 0) { if(bootverbose) xge_trace(XGE_ERR, "Allocated messages: %d", msi_count); enable_msi = XGE_HAL_INTR_MODE_MSI; rid = 1; } else { if(bootverbose) xge_trace(XGE_ERR, "pci_alloc_msi failed, %d", error); } } } lldev->enabled_msi = enable_msi; /* Allocate resource for irq */ lldev->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, (RF_SHAREABLE | RF_ACTIVE)); if(lldev->irq == NULL) { xge_trace(XGE_ERR, "Allocating irq resource for %s failed", ((rid == 0) ? "line interrupt" : "MSI")); if(rid == 1) { error = pci_release_msi(dev); if(error != 0) { xge_trace(XGE_ERR, "Releasing MSI resources failed %d", error); xge_trace(XGE_ERR, "Requires reboot to use MSI again"); } xge_trace(XGE_ERR, "Trying line interrupts"); rid = 0; lldev->enabled_msi = XGE_HAL_INTR_MODE_IRQLINE; lldev->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, (RF_SHAREABLE | RF_ACTIVE)); } if(lldev->irq == NULL) { xge_trace(XGE_ERR, "Allocating irq resource failed"); xge_resources_free(dev, xge_free_bar1_resource); status = ENOMEM; goto attach_out; } } device_config->intr_mode = lldev->enabled_msi; if(bootverbose) { xge_trace(XGE_TRACE, "rid: %d, Mode: %d, MSI count: %d", rid, lldev->enabled_msi, msi_count); } /* Initialize HAL device */ error = xge_hal_device_initialize(hldev, &attr, device_config); if(error != XGE_HAL_OK) { xge_resources_free(dev, xge_free_irq_resource); XGE_EXIT_ON_ERR("Initializing HAL device failed", attach_out, ENXIO); } xge_hal_device_private_set(hldev, lldev); error = xge_interface_setup(dev); if(error != 0) { status = error; goto attach_out; } ifnetp = lldev->ifnetp; ifnetp->if_mtu = device_config->mtu; xge_media_init(dev); /* Associate interrupt handler with the device */ if(lldev->enabled_msi == XGE_HAL_INTR_MODE_MSI) { error = bus_setup_intr(dev, lldev->irq, (INTR_TYPE_NET | INTR_MPSAFE), #if __FreeBSD_version > 700030 NULL, #endif xge_isr_msi, lldev, &lldev->irqhandle); xge_msi_info_save(lldev); } else { error = bus_setup_intr(dev, lldev->irq, (INTR_TYPE_NET | INTR_MPSAFE), #if __FreeBSD_version > 700030 xge_isr_filter, #endif xge_isr_line, lldev, &lldev->irqhandle); } if(error != 0) { xge_resources_free(dev, xge_free_media_interface); XGE_EXIT_ON_ERR("Associating interrupt handler with device failed", attach_out, ENXIO); } xge_print_info(lldev); xge_add_sysctl_handlers(lldev); xge_buffer_mode_init(lldev, device_config->mtu); attach_out: xge_os_free(NULL, device_config, sizeof(xge_hal_device_config_t)); attach_out_config: return status; } /** * xge_resources_free * Undo what-all we did during load/attach * * @dev Device Handle * @error Identifies what-all to undo */ void xge_resources_free(device_t dev, xge_lables_e error) { xge_lldev_t *lldev; xge_pci_info_t *pci_info; xge_hal_device_t *hldev; int rid, status; /* LL Device */ lldev = (xge_lldev_t *) device_get_softc(dev); pci_info = lldev->pdev; /* HAL Device */ hldev = lldev->devh; switch(error) { case xge_free_all: /* Teardown interrupt handler - device association */ bus_teardown_intr(dev, lldev->irq, lldev->irqhandle); case xge_free_media_interface: /* Media */ ifmedia_removeall(&lldev->media); /* Detach Ether */ ether_ifdetach(lldev->ifnetp); if_free(lldev->ifnetp); xge_hal_device_private_set(hldev, NULL); xge_hal_device_disable(hldev); case xge_free_terminate_hal_device: /* HAL Device */ xge_hal_device_terminate(hldev); case xge_free_irq_resource: /* Release IRQ resource */ bus_release_resource(dev, SYS_RES_IRQ, ((lldev->enabled_msi == XGE_HAL_INTR_MODE_IRQLINE) ? 0:1), lldev->irq); if(lldev->enabled_msi == XGE_HAL_INTR_MODE_MSI) { status = pci_release_msi(dev); if(status != 0) { if(bootverbose) { xge_trace(XGE_ERR, "pci_release_msi returned %d", status); } } } case xge_free_bar1_resource: /* Restore PCI configuration space */ xge_pci_space_restore(dev); /* Free bar1resource */ xge_os_free(NULL, pci_info->bar1resource, sizeof(xge_bus_resource_t)); case xge_free_bar1: /* Release BAR1 */ rid = PCIR_BAR(2); bus_release_resource(dev, SYS_RES_MEMORY, rid, pci_info->regmap1); case xge_free_bar0_resource: /* Free bar0resource */ xge_os_free(NULL, pci_info->bar0resource, sizeof(xge_bus_resource_t)); case xge_free_bar0: /* Release BAR0 */ rid = PCIR_BAR(0); bus_release_resource(dev, SYS_RES_MEMORY, rid, pci_info->regmap0); case xge_free_pci_info: /* Disable Bus Master */ pci_disable_busmaster(dev); /* Free pci_info_t */ lldev->pdev = NULL; xge_os_free(NULL, pci_info, sizeof(xge_pci_info_t)); case xge_free_hal_device: /* Free device configuration struct and HAL device */ xge_os_free(NULL, hldev, sizeof(xge_hal_device_t)); case xge_free_terminate_hal_driver: /* Terminate HAL driver */ hal_driver_init_count = hal_driver_init_count - 1; if(!hal_driver_init_count) { xge_hal_driver_terminate(); } case xge_free_mutex: xge_mutex_destroy(lldev); } } /** * xge_detach * Detaches driver from the Kernel subsystem * * @dev Device Handle */ int xge_detach(device_t dev) { xge_lldev_t *lldev = (xge_lldev_t *)device_get_softc(dev); if(lldev->in_detach == 0) { lldev->in_detach = 1; xge_stop(lldev); xge_resources_free(dev, xge_free_all); } return 0; } /** * xge_shutdown * To shutdown device before system shutdown * * @dev Device Handle */ int xge_shutdown(device_t dev) { xge_lldev_t *lldev = (xge_lldev_t *) device_get_softc(dev); xge_stop(lldev); return 0; } /** * xge_interface_setup * Setup interface * * @dev Device Handle * * Returns 0 on success, ENXIO/ENOMEM on failure */ int xge_interface_setup(device_t dev) { u8 mcaddr[ETHER_ADDR_LEN]; xge_hal_status_e status; xge_lldev_t *lldev = (xge_lldev_t *)device_get_softc(dev); struct ifnet *ifnetp; xge_hal_device_t *hldev = lldev->devh; /* Get the MAC address of the device */ status = xge_hal_device_macaddr_get(hldev, 0, &mcaddr); if(status != XGE_HAL_OK) { xge_resources_free(dev, xge_free_terminate_hal_device); XGE_EXIT_ON_ERR("Getting MAC address failed", ifsetup_out, ENXIO); } /* Get interface ifnet structure for this Ether device */ ifnetp = lldev->ifnetp = if_alloc(IFT_ETHER); if(ifnetp == NULL) { xge_resources_free(dev, xge_free_terminate_hal_device); XGE_EXIT_ON_ERR("Allocation ifnet failed", ifsetup_out, ENOMEM); } /* Initialize interface ifnet structure */ if_initname(ifnetp, device_get_name(dev), device_get_unit(dev)); ifnetp->if_mtu = XGE_HAL_DEFAULT_MTU; ifnetp->if_baudrate = XGE_BAUDRATE; ifnetp->if_init = xge_init; ifnetp->if_softc = lldev; ifnetp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifnetp->if_ioctl = xge_ioctl; ifnetp->if_start = xge_send; /* TODO: Check and assign optimal value */ ifnetp->if_snd.ifq_maxlen = ifqmaxlen; ifnetp->if_capabilities = IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM; if(lldev->enabled_tso) ifnetp->if_capabilities |= IFCAP_TSO4; if(lldev->enabled_lro) ifnetp->if_capabilities |= IFCAP_LRO; ifnetp->if_capenable = ifnetp->if_capabilities; /* Attach the interface */ ether_ifattach(ifnetp, mcaddr); ifsetup_out: return status; } /** * xge_callback_link_up * Callback for Link-up indication from HAL * * @userdata Per-adapter data */ void xge_callback_link_up(void *userdata) { xge_lldev_t *lldev = (xge_lldev_t *)userdata; struct ifnet *ifnetp = lldev->ifnetp; ifnetp->if_flags &= ~IFF_DRV_OACTIVE; if_link_state_change(ifnetp, LINK_STATE_UP); } /** * xge_callback_link_down * Callback for Link-down indication from HAL * * @userdata Per-adapter data */ void xge_callback_link_down(void *userdata) { xge_lldev_t *lldev = (xge_lldev_t *)userdata; struct ifnet *ifnetp = lldev->ifnetp; ifnetp->if_flags |= IFF_DRV_OACTIVE; if_link_state_change(ifnetp, LINK_STATE_DOWN); } /** * xge_callback_crit_err * Callback for Critical error indication from HAL * * @userdata Per-adapter data * @type Event type (Enumerated hardware error) * @serr_data Hardware status */ void xge_callback_crit_err(void *userdata, xge_hal_event_e type, u64 serr_data) { xge_trace(XGE_ERR, "Critical Error"); xge_reset(userdata); } /** * xge_callback_event * Callback from HAL indicating that some event has been queued * * @item Queued event item */ void xge_callback_event(xge_queue_item_t *item) { xge_lldev_t *lldev = NULL; xge_hal_device_t *hldev = NULL; struct ifnet *ifnetp = NULL; hldev = item->context; lldev = xge_hal_device_private(hldev); ifnetp = lldev->ifnetp; switch((int)item->event_type) { case XGE_LL_EVENT_TRY_XMIT_AGAIN: if(lldev->initialized) { if(xge_hal_channel_dtr_count(lldev->fifo_channel[0]) > 0) { ifnetp->if_flags &= ~IFF_DRV_OACTIVE; } else { xge_queue_produce_context( xge_hal_device_queue(lldev->devh), XGE_LL_EVENT_TRY_XMIT_AGAIN, lldev->devh); } } break; case XGE_LL_EVENT_DEVICE_RESETTING: xge_reset(item->context); break; default: break; } } /** * xge_ifmedia_change * Media change driver callback * * @ifnetp Interface Handle * * Returns 0 if media is Ether else EINVAL */ int xge_ifmedia_change(struct ifnet *ifnetp) { xge_lldev_t *lldev = ifnetp->if_softc; struct ifmedia *ifmediap = &lldev->media; return (IFM_TYPE(ifmediap->ifm_media) != IFM_ETHER) ? EINVAL:0; } /** * xge_ifmedia_status * Media status driver callback * * @ifnetp Interface Handle * @ifmr Interface Media Settings */ void xge_ifmedia_status(struct ifnet *ifnetp, struct ifmediareq *ifmr) { xge_hal_status_e status; u64 regvalue; xge_lldev_t *lldev = ifnetp->if_softc; xge_hal_device_t *hldev = lldev->devh; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; status = xge_hal_mgmt_reg_read(hldev, 0, xge_offsetof(xge_hal_pci_bar0_t, adapter_status), ®value); if(status != XGE_HAL_OK) { xge_trace(XGE_TRACE, "Getting adapter status failed"); goto _exit; } if((regvalue & (XGE_HAL_ADAPTER_STATUS_RMAC_REMOTE_FAULT | XGE_HAL_ADAPTER_STATUS_RMAC_LOCAL_FAULT)) == 0) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= IFM_10G_SR | IFM_FDX; if_link_state_change(ifnetp, LINK_STATE_UP); } else { if_link_state_change(ifnetp, LINK_STATE_DOWN); } _exit: return; } /** * xge_ioctl_stats * IOCTL to get statistics * * @lldev Per-adapter data * @ifreqp Interface request */ int xge_ioctl_stats(xge_lldev_t *lldev, struct ifreq *ifreqp) { xge_hal_status_e status = XGE_HAL_OK; char cmd, mode; void *info = NULL; int retValue = EINVAL; - cmd = fubyte(ifreqp->ifr_data); + cmd = fubyte(ifr_data_get_ptr(ifreqp)); if (cmd == -1) return (EFAULT); switch(cmd) { case XGE_QUERY_STATS: mtx_lock(&lldev->mtx_drv); status = xge_hal_stats_hw(lldev->devh, (xge_hal_stats_hw_info_t **)&info); mtx_unlock(&lldev->mtx_drv); if(status == XGE_HAL_OK) { - if(copyout(info, ifreqp->ifr_data, + if(copyout(info, ifr_data_get_ptr(ifreqp), sizeof(xge_hal_stats_hw_info_t)) == 0) retValue = 0; } else { xge_trace(XGE_ERR, "Getting statistics failed (Status: %d)", status); } break; case XGE_QUERY_PCICONF: info = xge_os_malloc(NULL, sizeof(xge_hal_pci_config_t)); if(info != NULL) { mtx_lock(&lldev->mtx_drv); status = xge_hal_mgmt_pci_config(lldev->devh, info, sizeof(xge_hal_pci_config_t)); mtx_unlock(&lldev->mtx_drv); if(status == XGE_HAL_OK) { - if(copyout(info, ifreqp->ifr_data, + if(copyout(info, ifr_data_get_ptr(ifreqp), sizeof(xge_hal_pci_config_t)) == 0) retValue = 0; } else { xge_trace(XGE_ERR, "Getting PCI configuration failed (%d)", status); } xge_os_free(NULL, info, sizeof(xge_hal_pci_config_t)); } break; case XGE_QUERY_DEVSTATS: info = xge_os_malloc(NULL, sizeof(xge_hal_stats_device_info_t)); if(info != NULL) { mtx_lock(&lldev->mtx_drv); status =xge_hal_mgmt_device_stats(lldev->devh, info, sizeof(xge_hal_stats_device_info_t)); mtx_unlock(&lldev->mtx_drv); if(status == XGE_HAL_OK) { - if(copyout(info, ifreqp->ifr_data, + if(copyout(info, ifr_data_get_ptr(ifreqp), sizeof(xge_hal_stats_device_info_t)) == 0) retValue = 0; } else { xge_trace(XGE_ERR, "Getting device info failed (%d)", status); } xge_os_free(NULL, info, sizeof(xge_hal_stats_device_info_t)); } break; case XGE_QUERY_SWSTATS: info = xge_os_malloc(NULL, sizeof(xge_hal_stats_sw_err_t)); if(info != NULL) { mtx_lock(&lldev->mtx_drv); status =xge_hal_mgmt_sw_stats(lldev->devh, info, sizeof(xge_hal_stats_sw_err_t)); mtx_unlock(&lldev->mtx_drv); if(status == XGE_HAL_OK) { - if(copyout(info, ifreqp->ifr_data, + if(copyout(info, ifr_data_get_ptr(ifreqp), sizeof(xge_hal_stats_sw_err_t)) == 0) retValue = 0; } else { xge_trace(XGE_ERR, "Getting tcode statistics failed (%d)", status); } xge_os_free(NULL, info, sizeof(xge_hal_stats_sw_err_t)); } break; case XGE_QUERY_DRIVERSTATS: - if(copyout(&lldev->driver_stats, ifreqp->ifr_data, + if(copyout(&lldev->driver_stats, ifr_data_get_ptr(ifreqp), sizeof(xge_driver_stats_t)) == 0) { retValue = 0; } else { xge_trace(XGE_ERR, "Copyout of driver statistics failed (%d)", status); } break; case XGE_READ_VERSION: info = xge_os_malloc(NULL, XGE_BUFFER_SIZE); if(info != NULL) { strcpy(info, XGE_DRIVER_VERSION); - if(copyout(info, ifreqp->ifr_data, XGE_BUFFER_SIZE) == 0) + if(copyout(info, ifr_data_get_ptr(ifreqp), + XGE_BUFFER_SIZE) == 0) retValue = 0; xge_os_free(NULL, info, XGE_BUFFER_SIZE); } break; case XGE_QUERY_DEVCONF: info = xge_os_malloc(NULL, sizeof(xge_hal_device_config_t)); if(info != NULL) { mtx_lock(&lldev->mtx_drv); status = xge_hal_mgmt_device_config(lldev->devh, info, sizeof(xge_hal_device_config_t)); mtx_unlock(&lldev->mtx_drv); if(status == XGE_HAL_OK) { - if(copyout(info, ifreqp->ifr_data, + if(copyout(info, ifr_data_get_ptr(ifreqp), sizeof(xge_hal_device_config_t)) == 0) retValue = 0; } else { xge_trace(XGE_ERR, "Getting devconfig failed (%d)", status); } xge_os_free(NULL, info, sizeof(xge_hal_device_config_t)); } break; case XGE_QUERY_BUFFER_MODE: - if(copyout(&lldev->buffer_mode, ifreqp->ifr_data, + if(copyout(&lldev->buffer_mode, ifr_data_get_ptr(ifreqp), sizeof(int)) == 0) retValue = 0; break; case XGE_SET_BUFFER_MODE_1: case XGE_SET_BUFFER_MODE_2: case XGE_SET_BUFFER_MODE_5: mode = (cmd == XGE_SET_BUFFER_MODE_1) ? 'Y':'N'; - if(copyout(&mode, ifreqp->ifr_data, sizeof(mode)) == 0) + if(copyout(&mode, ifr_data_get_ptr(ifreqp), sizeof(mode)) == 0) retValue = 0; break; default: xge_trace(XGE_TRACE, "Nothing is matching"); retValue = ENOTTY; break; } return retValue; } /** * xge_ioctl_registers * IOCTL to get registers * * @lldev Per-adapter data * @ifreqp Interface request */ int xge_ioctl_registers(xge_lldev_t *lldev, struct ifreq *ifreqp) { xge_register_t tmpdata; xge_register_t *data; xge_hal_status_e status = XGE_HAL_OK; int retValue = EINVAL, offset = 0, index = 0; int error; u64 val64 = 0; - error = copyin(ifreqp->ifr_data, &tmpdata, sizeof(tmpdata)); + error = copyin(ifr_data_get_ptr(ifreqp), &tmpdata, sizeof(tmpdata)); if (error != 0) return (error); data = &tmpdata; /* Reading a register */ if(strcmp(data->option, "-r") == 0) { data->value = 0x0000; mtx_lock(&lldev->mtx_drv); status = xge_hal_mgmt_reg_read(lldev->devh, 0, data->offset, &data->value); mtx_unlock(&lldev->mtx_drv); if(status == XGE_HAL_OK) { - if(copyout(data, ifreqp->ifr_data, sizeof(xge_register_t)) == 0) + if(copyout(data, ifr_data_get_ptr(ifreqp), + sizeof(xge_register_t)) == 0) retValue = 0; } } /* Writing to a register */ else if(strcmp(data->option, "-w") == 0) { mtx_lock(&lldev->mtx_drv); status = xge_hal_mgmt_reg_write(lldev->devh, 0, data->offset, data->value); if(status == XGE_HAL_OK) { val64 = 0x0000; status = xge_hal_mgmt_reg_read(lldev->devh, 0, data->offset, &val64); if(status != XGE_HAL_OK) { xge_trace(XGE_ERR, "Reading back updated register failed"); } else { if(val64 != data->value) { xge_trace(XGE_ERR, "Read and written register values mismatched"); } else retValue = 0; } } else { xge_trace(XGE_ERR, "Getting register value failed"); } mtx_unlock(&lldev->mtx_drv); } else { mtx_lock(&lldev->mtx_drv); for(index = 0, offset = 0; offset <= XGE_OFFSET_OF_LAST_REG; index++, offset += 0x0008) { val64 = 0; status = xge_hal_mgmt_reg_read(lldev->devh, 0, offset, &val64); if(status != XGE_HAL_OK) { xge_trace(XGE_ERR, "Getting register value failed"); break; } *((u64 *)((u64 *)data + index)) = val64; retValue = 0; } mtx_unlock(&lldev->mtx_drv); if(retValue == 0) { - if(copyout(data, ifreqp->ifr_data, + if(copyout(data, ifr_data_get_ptr(ifreqp), sizeof(xge_hal_pci_bar0_t)) != 0) { xge_trace(XGE_ERR, "Copyout of register values failed"); retValue = EINVAL; } } else { xge_trace(XGE_ERR, "Getting register values failed"); } } return retValue; } /** * xge_ioctl * Callback to control the device - Interface configuration * * @ifnetp Interface Handle * @command Device control command * @data Parameters associated with command (if any) */ int xge_ioctl(struct ifnet *ifnetp, unsigned long command, caddr_t data) { struct ifreq *ifreqp = (struct ifreq *)data; xge_lldev_t *lldev = ifnetp->if_softc; struct ifmedia *ifmediap = &lldev->media; int retValue = 0, mask = 0; if(lldev->in_detach) { return retValue; } switch(command) { /* Set/Get ifnet address */ case SIOCSIFADDR: case SIOCGIFADDR: ether_ioctl(ifnetp, command, data); break; /* Set ifnet MTU */ case SIOCSIFMTU: retValue = xge_change_mtu(lldev, ifreqp->ifr_mtu); break; /* Set ifnet flags */ case SIOCSIFFLAGS: if(ifnetp->if_flags & IFF_UP) { /* Link status is UP */ if(!(ifnetp->if_drv_flags & IFF_DRV_RUNNING)) { xge_init(lldev); } xge_disable_promisc(lldev); xge_enable_promisc(lldev); } else { /* Link status is DOWN */ /* If device is in running, make it down */ if(ifnetp->if_drv_flags & IFF_DRV_RUNNING) { xge_stop(lldev); } } break; /* Add/delete multicast address */ case SIOCADDMULTI: case SIOCDELMULTI: if(ifnetp->if_drv_flags & IFF_DRV_RUNNING) { xge_setmulti(lldev); } break; /* Set/Get net media */ case SIOCSIFMEDIA: case SIOCGIFMEDIA: retValue = ifmedia_ioctl(ifnetp, ifreqp, ifmediap, command); break; /* Set capabilities */ case SIOCSIFCAP: mtx_lock(&lldev->mtx_drv); mask = ifreqp->ifr_reqcap ^ ifnetp->if_capenable; if(mask & IFCAP_TXCSUM) { if(ifnetp->if_capenable & IFCAP_TXCSUM) { ifnetp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TXCSUM); ifnetp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP | CSUM_TSO); } else { ifnetp->if_capenable |= IFCAP_TXCSUM; ifnetp->if_hwassist |= (CSUM_TCP | CSUM_UDP); } } if(mask & IFCAP_TSO4) { if(ifnetp->if_capenable & IFCAP_TSO4) { ifnetp->if_capenable &= ~IFCAP_TSO4; ifnetp->if_hwassist &= ~CSUM_TSO; xge_os_printf("%s: TSO Disabled", device_get_nameunit(lldev->device)); } else if(ifnetp->if_capenable & IFCAP_TXCSUM) { ifnetp->if_capenable |= IFCAP_TSO4; ifnetp->if_hwassist |= CSUM_TSO; xge_os_printf("%s: TSO Enabled", device_get_nameunit(lldev->device)); } } mtx_unlock(&lldev->mtx_drv); break; /* Custom IOCTL 0 */ case SIOCGPRIVATE_0: retValue = xge_ioctl_stats(lldev, ifreqp); break; /* Custom IOCTL 1 */ case SIOCGPRIVATE_1: retValue = xge_ioctl_registers(lldev, ifreqp); break; default: retValue = EINVAL; break; } return retValue; } /** * xge_init * Initialize the interface * * @plldev Per-adapter Data */ void xge_init(void *plldev) { xge_lldev_t *lldev = (xge_lldev_t *)plldev; mtx_lock(&lldev->mtx_drv); xge_os_memzero(&lldev->driver_stats, sizeof(xge_driver_stats_t)); xge_device_init(lldev, XGE_HAL_CHANNEL_OC_NORMAL); mtx_unlock(&lldev->mtx_drv); } /** * xge_device_init * Initialize the interface (called by holding lock) * * @pdevin Per-adapter Data */ void xge_device_init(xge_lldev_t *lldev, xge_hal_channel_reopen_e option) { struct ifnet *ifnetp = lldev->ifnetp; xge_hal_device_t *hldev = lldev->devh; struct ifaddr *ifaddrp; unsigned char *macaddr; struct sockaddr_dl *sockaddrp; int status = XGE_HAL_OK; mtx_assert((&lldev->mtx_drv), MA_OWNED); /* If device is in running state, initializing is not required */ if(ifnetp->if_drv_flags & IFF_DRV_RUNNING) return; /* Initializing timer */ callout_init(&lldev->timer, 1); xge_trace(XGE_TRACE, "Set MTU size"); status = xge_hal_device_mtu_set(hldev, ifnetp->if_mtu); if(status != XGE_HAL_OK) { xge_trace(XGE_ERR, "Setting MTU in HAL device failed"); goto _exit; } /* Enable HAL device */ xge_hal_device_enable(hldev); /* Get MAC address and update in HAL */ ifaddrp = ifnetp->if_addr; sockaddrp = (struct sockaddr_dl *)ifaddrp->ifa_addr; sockaddrp->sdl_type = IFT_ETHER; sockaddrp->sdl_alen = ifnetp->if_addrlen; macaddr = LLADDR(sockaddrp); xge_trace(XGE_TRACE, "Setting MAC address: %02x:%02x:%02x:%02x:%02x:%02x\n", *macaddr, *(macaddr + 1), *(macaddr + 2), *(macaddr + 3), *(macaddr + 4), *(macaddr + 5)); status = xge_hal_device_macaddr_set(hldev, 0, macaddr); if(status != XGE_HAL_OK) xge_trace(XGE_ERR, "Setting MAC address failed (%d)", status); /* Opening channels */ mtx_unlock(&lldev->mtx_drv); status = xge_channel_open(lldev, option); mtx_lock(&lldev->mtx_drv); if(status != XGE_HAL_OK) goto _exit; /* Set appropriate flags */ ifnetp->if_drv_flags |= IFF_DRV_RUNNING; ifnetp->if_flags &= ~IFF_DRV_OACTIVE; /* Checksum capability */ ifnetp->if_hwassist = (ifnetp->if_capenable & IFCAP_TXCSUM) ? (CSUM_TCP | CSUM_UDP) : 0; if((lldev->enabled_tso) && (ifnetp->if_capenable & IFCAP_TSO4)) ifnetp->if_hwassist |= CSUM_TSO; /* Enable interrupts */ xge_hal_device_intr_enable(hldev); callout_reset(&lldev->timer, 10*hz, xge_timer, lldev); /* Disable promiscuous mode */ xge_trace(XGE_TRACE, "If opted, enable promiscuous mode"); xge_enable_promisc(lldev); /* Device is initialized */ lldev->initialized = 1; xge_os_mdelay(1000); _exit: return; } /** * xge_timer * Timer timeout function to handle link status * * @devp Per-adapter Data */ void xge_timer(void *devp) { xge_lldev_t *lldev = (xge_lldev_t *)devp; xge_hal_device_t *hldev = lldev->devh; /* Poll for changes */ xge_hal_device_poll(hldev); /* Reset timer */ callout_reset(&lldev->timer, hz, xge_timer, lldev); return; } /** * xge_stop * De-activate the interface * * @lldev Per-adater Data */ void xge_stop(xge_lldev_t *lldev) { mtx_lock(&lldev->mtx_drv); xge_device_stop(lldev, XGE_HAL_CHANNEL_OC_NORMAL); mtx_unlock(&lldev->mtx_drv); } /** * xge_isr_filter * ISR filter function - to filter interrupts from other devices (shared) * * @handle Per-adapter Data * * Returns * FILTER_STRAY if interrupt is from other device * FILTER_SCHEDULE_THREAD if interrupt is from Xframe device */ int xge_isr_filter(void *handle) { xge_lldev_t *lldev = (xge_lldev_t *)handle; xge_hal_pci_bar0_t *bar0 = (xge_hal_pci_bar0_t *)((lldev->devh)->bar0); u16 retValue = FILTER_STRAY; u64 val64 = 0; XGE_DRV_STATS(isr_filter); val64 = xge_os_pio_mem_read64(lldev->pdev, (lldev->devh)->regh0, &bar0->general_int_status); retValue = (!val64) ? FILTER_STRAY : FILTER_SCHEDULE_THREAD; return retValue; } /** * xge_isr_line * Interrupt service routine for Line interrupts * * @plldev Per-adapter Data */ void xge_isr_line(void *plldev) { xge_hal_status_e status; xge_lldev_t *lldev = (xge_lldev_t *)plldev; xge_hal_device_t *hldev = (xge_hal_device_t *)lldev->devh; struct ifnet *ifnetp = lldev->ifnetp; XGE_DRV_STATS(isr_line); if(ifnetp->if_drv_flags & IFF_DRV_RUNNING) { status = xge_hal_device_handle_irq(hldev); if(!(IFQ_DRV_IS_EMPTY(&ifnetp->if_snd))) xge_send(ifnetp); } } /* * xge_isr_msi * ISR for Message signaled interrupts */ void xge_isr_msi(void *plldev) { xge_lldev_t *lldev = (xge_lldev_t *)plldev; XGE_DRV_STATS(isr_msi); xge_hal_device_continue_irq(lldev->devh); } /** * xge_rx_open * Initiate and open all Rx channels * * @qid Ring Index * @lldev Per-adapter Data * @rflag Channel open/close/reopen flag * * Returns 0 or Error Number */ int xge_rx_open(int qid, xge_lldev_t *lldev, xge_hal_channel_reopen_e rflag) { u64 adapter_status = 0x0; xge_hal_status_e status = XGE_HAL_FAIL; xge_hal_channel_attr_t attr = { .post_qid = qid, .compl_qid = 0, .callback = xge_rx_compl, .per_dtr_space = sizeof(xge_rx_priv_t), .flags = 0, .type = XGE_HAL_CHANNEL_TYPE_RING, .userdata = lldev, .dtr_init = xge_rx_initial_replenish, .dtr_term = xge_rx_term }; /* If device is not ready, return */ status = xge_hal_device_status(lldev->devh, &adapter_status); if(status != XGE_HAL_OK) { xge_os_printf("Adapter Status: 0x%llx", (long long) adapter_status); XGE_EXIT_ON_ERR("Device is not ready", _exit, XGE_HAL_FAIL); } else { status = xge_hal_channel_open(lldev->devh, &attr, &lldev->ring_channel[qid], rflag); } _exit: return status; } /** * xge_tx_open * Initialize and open all Tx channels * * @lldev Per-adapter Data * @tflag Channel open/close/reopen flag * * Returns 0 or Error Number */ int xge_tx_open(xge_lldev_t *lldev, xge_hal_channel_reopen_e tflag) { xge_hal_status_e status = XGE_HAL_FAIL; u64 adapter_status = 0x0; int qindex, index; xge_hal_channel_attr_t attr = { .compl_qid = 0, .callback = xge_tx_compl, .per_dtr_space = sizeof(xge_tx_priv_t), .flags = 0, .type = XGE_HAL_CHANNEL_TYPE_FIFO, .userdata = lldev, .dtr_init = xge_tx_initial_replenish, .dtr_term = xge_tx_term }; /* If device is not ready, return */ status = xge_hal_device_status(lldev->devh, &adapter_status); if(status != XGE_HAL_OK) { xge_os_printf("Adapter Status: 0x%llx", (long long) adapter_status); XGE_EXIT_ON_ERR("Device is not ready", _exit, XGE_HAL_FAIL); } for(qindex = 0; qindex < XGE_FIFO_COUNT; qindex++) { attr.post_qid = qindex, status = xge_hal_channel_open(lldev->devh, &attr, &lldev->fifo_channel[qindex], tflag); if(status != XGE_HAL_OK) { for(index = 0; index < qindex; index++) xge_hal_channel_close(lldev->fifo_channel[index], tflag); } } _exit: return status; } /** * xge_enable_msi * Enables MSI * * @lldev Per-adapter Data */ void xge_enable_msi(xge_lldev_t *lldev) { xge_list_t *item = NULL; xge_hal_device_t *hldev = lldev->devh; xge_hal_channel_t *channel = NULL; u16 offset = 0, val16 = 0; xge_os_pci_read16(lldev->pdev, NULL, xge_offsetof(xge_hal_pci_config_le_t, msi_control), &val16); /* Update msi_data */ offset = (val16 & 0x80) ? 0x4c : 0x48; xge_os_pci_read16(lldev->pdev, NULL, offset, &val16); if(val16 & 0x1) val16 &= 0xfffe; else val16 |= 0x1; xge_os_pci_write16(lldev->pdev, NULL, offset, val16); /* Update msi_control */ xge_os_pci_read16(lldev->pdev, NULL, xge_offsetof(xge_hal_pci_config_le_t, msi_control), &val16); val16 |= 0x10; xge_os_pci_write16(lldev->pdev, NULL, xge_offsetof(xge_hal_pci_config_le_t, msi_control), val16); /* Set TxMAT and RxMAT registers with MSI */ xge_list_for_each(item, &hldev->free_channels) { channel = xge_container_of(item, xge_hal_channel_t, item); xge_hal_channel_msi_set(channel, 1, (u32)val16); } } /** * xge_channel_open * Open both Tx and Rx channels * * @lldev Per-adapter Data * @option Channel reopen option */ int xge_channel_open(xge_lldev_t *lldev, xge_hal_channel_reopen_e option) { xge_lro_entry_t *lro_session = NULL; xge_hal_status_e status = XGE_HAL_OK; int index = 0, index2 = 0; if(lldev->enabled_msi == XGE_HAL_INTR_MODE_MSI) { xge_msi_info_restore(lldev); xge_enable_msi(lldev); } _exit2: status = xge_create_dma_tags(lldev->device); if(status != XGE_HAL_OK) XGE_EXIT_ON_ERR("DMA tag creation failed", _exit, status); /* Open ring (Rx) channel */ for(index = 0; index < XGE_RING_COUNT; index++) { status = xge_rx_open(index, lldev, option); if(status != XGE_HAL_OK) { /* * DMA mapping fails in the unpatched Kernel which can't * allocate contiguous memory for Jumbo frames. * Try using 5 buffer mode. */ if((lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_1) && (((lldev->ifnetp)->if_mtu + XGE_HAL_MAC_HEADER_MAX_SIZE) > MJUMPAGESIZE)) { /* Close so far opened channels */ for(index2 = 0; index2 < index; index2++) { xge_hal_channel_close(lldev->ring_channel[index2], option); } /* Destroy DMA tags intended to use for 1 buffer mode */ if(bus_dmamap_destroy(lldev->dma_tag_rx, lldev->extra_dma_map)) { xge_trace(XGE_ERR, "Rx extra DMA map destroy failed"); } if(bus_dma_tag_destroy(lldev->dma_tag_rx)) xge_trace(XGE_ERR, "Rx DMA tag destroy failed"); if(bus_dma_tag_destroy(lldev->dma_tag_tx)) xge_trace(XGE_ERR, "Tx DMA tag destroy failed"); /* Switch to 5 buffer mode */ lldev->buffer_mode = XGE_HAL_RING_QUEUE_BUFFER_MODE_5; xge_buffer_mode_init(lldev, (lldev->ifnetp)->if_mtu); /* Restart init */ goto _exit2; } else { XGE_EXIT_ON_ERR("Opening Rx channel failed", _exit1, status); } } } if(lldev->enabled_lro) { SLIST_INIT(&lldev->lro_free); SLIST_INIT(&lldev->lro_active); lldev->lro_num = XGE_LRO_DEFAULT_ENTRIES; for(index = 0; index < lldev->lro_num; index++) { lro_session = (xge_lro_entry_t *) xge_os_malloc(NULL, sizeof(xge_lro_entry_t)); if(lro_session == NULL) { lldev->lro_num = index; break; } SLIST_INSERT_HEAD(&lldev->lro_free, lro_session, next); } } /* Open FIFO (Tx) channel */ status = xge_tx_open(lldev, option); if(status != XGE_HAL_OK) XGE_EXIT_ON_ERR("Opening Tx channel failed", _exit1, status); goto _exit; _exit1: /* * Opening Rx channel(s) failed (index is ) or * Initialization of LRO failed (index is XGE_RING_COUNT) * Opening Tx channel failed (index is XGE_RING_COUNT) */ for(index2 = 0; index2 < index; index2++) xge_hal_channel_close(lldev->ring_channel[index2], option); _exit: return status; } /** * xge_channel_close * Close both Tx and Rx channels * * @lldev Per-adapter Data * @option Channel reopen option * */ void xge_channel_close(xge_lldev_t *lldev, xge_hal_channel_reopen_e option) { int qindex = 0; DELAY(1000 * 1000); /* Close FIFO (Tx) channel */ for(qindex = 0; qindex < XGE_FIFO_COUNT; qindex++) xge_hal_channel_close(lldev->fifo_channel[qindex], option); /* Close Ring (Rx) channels */ for(qindex = 0; qindex < XGE_RING_COUNT; qindex++) xge_hal_channel_close(lldev->ring_channel[qindex], option); if(bus_dmamap_destroy(lldev->dma_tag_rx, lldev->extra_dma_map)) xge_trace(XGE_ERR, "Rx extra map destroy failed"); if(bus_dma_tag_destroy(lldev->dma_tag_rx)) xge_trace(XGE_ERR, "Rx DMA tag destroy failed"); if(bus_dma_tag_destroy(lldev->dma_tag_tx)) xge_trace(XGE_ERR, "Tx DMA tag destroy failed"); } /** * dmamap_cb * DMA map callback * * @arg Parameter passed from dmamap * @segs Segments * @nseg Number of segments * @error Error */ void dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { if(!error) { *(bus_addr_t *) arg = segs->ds_addr; } } /** * xge_reset * Device Reset * * @lldev Per-adapter Data */ void xge_reset(xge_lldev_t *lldev) { xge_trace(XGE_TRACE, "Reseting the chip"); /* If the device is not initialized, return */ if(lldev->initialized) { mtx_lock(&lldev->mtx_drv); xge_device_stop(lldev, XGE_HAL_CHANNEL_OC_NORMAL); xge_device_init(lldev, XGE_HAL_CHANNEL_OC_NORMAL); mtx_unlock(&lldev->mtx_drv); } return; } /** * xge_setmulti * Set an address as a multicast address * * @lldev Per-adapter Data */ void xge_setmulti(xge_lldev_t *lldev) { struct ifmultiaddr *ifma; u8 *lladdr; xge_hal_device_t *hldev = (xge_hal_device_t *)lldev->devh; struct ifnet *ifnetp = lldev->ifnetp; int index = 0; int offset = 1; int table_size = 47; xge_hal_status_e status = XGE_HAL_OK; u8 initial_addr[]= {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; if((ifnetp->if_flags & IFF_MULTICAST) && (!lldev->all_multicast)) { status = xge_hal_device_mcast_enable(hldev); lldev->all_multicast = 1; } else if((ifnetp->if_flags & IFF_MULTICAST) && (lldev->all_multicast)) { status = xge_hal_device_mcast_disable(hldev); lldev->all_multicast = 0; } if(status != XGE_HAL_OK) { xge_trace(XGE_ERR, "Enabling/disabling multicast failed"); goto _exit; } /* Updating address list */ if_maddr_rlock(ifnetp); index = 0; TAILQ_FOREACH(ifma, &ifnetp->if_multiaddrs, ifma_link) { if(ifma->ifma_addr->sa_family != AF_LINK) { continue; } lladdr = LLADDR((struct sockaddr_dl *)ifma->ifma_addr); index += 1; } if_maddr_runlock(ifnetp); if((!lldev->all_multicast) && (index)) { lldev->macaddr_count = (index + 1); if(lldev->macaddr_count > table_size) { goto _exit; } /* Clear old addresses */ for(index = 0; index < 48; index++) { xge_hal_device_macaddr_set(hldev, (offset + index), initial_addr); } } /* Add new addresses */ if_maddr_rlock(ifnetp); index = 0; TAILQ_FOREACH(ifma, &ifnetp->if_multiaddrs, ifma_link) { if(ifma->ifma_addr->sa_family != AF_LINK) { continue; } lladdr = LLADDR((struct sockaddr_dl *)ifma->ifma_addr); xge_hal_device_macaddr_set(hldev, (offset + index), lladdr); index += 1; } if_maddr_runlock(ifnetp); _exit: return; } /** * xge_enable_promisc * Enable Promiscuous Mode * * @lldev Per-adapter Data */ void xge_enable_promisc(xge_lldev_t *lldev) { struct ifnet *ifnetp = lldev->ifnetp; xge_hal_device_t *hldev = lldev->devh; xge_hal_pci_bar0_t *bar0 = NULL; u64 val64 = 0; bar0 = (xge_hal_pci_bar0_t *) hldev->bar0; if(ifnetp->if_flags & IFF_PROMISC) { xge_hal_device_promisc_enable(lldev->devh); /* * When operating in promiscuous mode, don't strip the VLAN tag */ val64 = xge_os_pio_mem_read64(lldev->pdev, hldev->regh0, &bar0->rx_pa_cfg); val64 &= ~XGE_HAL_RX_PA_CFG_STRIP_VLAN_TAG_MODE(1); val64 |= XGE_HAL_RX_PA_CFG_STRIP_VLAN_TAG_MODE(0); xge_os_pio_mem_write64(lldev->pdev, hldev->regh0, val64, &bar0->rx_pa_cfg); xge_trace(XGE_TRACE, "Promiscuous mode ON"); } } /** * xge_disable_promisc * Disable Promiscuous Mode * * @lldev Per-adapter Data */ void xge_disable_promisc(xge_lldev_t *lldev) { xge_hal_device_t *hldev = lldev->devh; xge_hal_pci_bar0_t *bar0 = NULL; u64 val64 = 0; bar0 = (xge_hal_pci_bar0_t *) hldev->bar0; xge_hal_device_promisc_disable(lldev->devh); /* * Strip VLAN tag when operating in non-promiscuous mode */ val64 = xge_os_pio_mem_read64(lldev->pdev, hldev->regh0, &bar0->rx_pa_cfg); val64 &= ~XGE_HAL_RX_PA_CFG_STRIP_VLAN_TAG_MODE(1); val64 |= XGE_HAL_RX_PA_CFG_STRIP_VLAN_TAG_MODE(1); xge_os_pio_mem_write64(lldev->pdev, hldev->regh0, val64, &bar0->rx_pa_cfg); xge_trace(XGE_TRACE, "Promiscuous mode OFF"); } /** * xge_change_mtu * Change interface MTU to a requested valid size * * @lldev Per-adapter Data * @NewMtu Requested MTU * * Returns 0 or Error Number */ int xge_change_mtu(xge_lldev_t *lldev, int new_mtu) { int status = XGE_HAL_OK; /* Check requested MTU size for boundary */ if(xge_hal_device_mtu_check(lldev->devh, new_mtu) != XGE_HAL_OK) { XGE_EXIT_ON_ERR("Invalid MTU", _exit, EINVAL); } lldev->mtu = new_mtu; xge_confirm_changes(lldev, XGE_SET_MTU); _exit: return status; } /** * xge_device_stop * * Common code for both stop and part of reset. Disables device, interrupts and * closes channels * * @dev Device Handle * @option Channel normal/reset option */ void xge_device_stop(xge_lldev_t *lldev, xge_hal_channel_reopen_e option) { xge_hal_device_t *hldev = lldev->devh; struct ifnet *ifnetp = lldev->ifnetp; u64 val64 = 0; mtx_assert((&lldev->mtx_drv), MA_OWNED); /* If device is not in "Running" state, return */ if (!(ifnetp->if_drv_flags & IFF_DRV_RUNNING)) goto _exit; /* Set appropriate flags */ ifnetp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); /* Stop timer */ callout_stop(&lldev->timer); /* Disable interrupts */ xge_hal_device_intr_disable(hldev); mtx_unlock(&lldev->mtx_drv); xge_queue_flush(xge_hal_device_queue(lldev->devh)); mtx_lock(&lldev->mtx_drv); /* Disable HAL device */ if(xge_hal_device_disable(hldev) != XGE_HAL_OK) { xge_trace(XGE_ERR, "Disabling HAL device failed"); xge_hal_device_status(hldev, &val64); xge_trace(XGE_ERR, "Adapter Status: 0x%llx", (long long)val64); } /* Close Tx and Rx channels */ xge_channel_close(lldev, option); /* Reset HAL device */ xge_hal_device_reset(hldev); xge_os_mdelay(1000); lldev->initialized = 0; if_link_state_change(ifnetp, LINK_STATE_DOWN); _exit: return; } /** * xge_set_mbuf_cflags * set checksum flag for the mbuf * * @pkt Packet */ void xge_set_mbuf_cflags(mbuf_t pkt) { pkt->m_pkthdr.csum_flags = CSUM_IP_CHECKED; pkt->m_pkthdr.csum_flags |= CSUM_IP_VALID; pkt->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); pkt->m_pkthdr.csum_data = htons(0xffff); } /** * xge_lro_flush_sessions * Flush LRO session and send accumulated LRO packet to upper layer * * @lldev Per-adapter Data */ void xge_lro_flush_sessions(xge_lldev_t *lldev) { xge_lro_entry_t *lro_session = NULL; while(!SLIST_EMPTY(&lldev->lro_active)) { lro_session = SLIST_FIRST(&lldev->lro_active); SLIST_REMOVE_HEAD(&lldev->lro_active, next); xge_lro_flush(lldev, lro_session); } } /** * xge_lro_flush * Flush LRO session. Send accumulated LRO packet to upper layer * * @lldev Per-adapter Data * @lro LRO session to be flushed */ static void xge_lro_flush(xge_lldev_t *lldev, xge_lro_entry_t *lro_session) { struct ip *header_ip; struct tcphdr *header_tcp; u32 *ptr; if(lro_session->append_cnt) { header_ip = lro_session->lro_header_ip; header_ip->ip_len = htons(lro_session->len - ETHER_HDR_LEN); lro_session->m_head->m_pkthdr.len = lro_session->len; header_tcp = (struct tcphdr *)(header_ip + 1); header_tcp->th_ack = lro_session->ack_seq; header_tcp->th_win = lro_session->window; if(lro_session->timestamp) { ptr = (u32 *)(header_tcp + 1); ptr[1] = htonl(lro_session->tsval); ptr[2] = lro_session->tsecr; } } (*lldev->ifnetp->if_input)(lldev->ifnetp, lro_session->m_head); lro_session->m_head = NULL; lro_session->timestamp = 0; lro_session->append_cnt = 0; SLIST_INSERT_HEAD(&lldev->lro_free, lro_session, next); } /** * xge_lro_accumulate * Accumulate packets to form a large LRO packet based on various conditions * * @lldev Per-adapter Data * @m_head Current Packet * * Returns XGE_HAL_OK or XGE_HAL_FAIL (failure) */ static int xge_lro_accumulate(xge_lldev_t *lldev, struct mbuf *m_head) { struct ether_header *header_ethernet; struct ip *header_ip; struct tcphdr *header_tcp; u32 seq, *ptr; struct mbuf *buffer_next, *buffer_tail; xge_lro_entry_t *lro_session; xge_hal_status_e status = XGE_HAL_FAIL; int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len, tcp_options; int trim; /* Get Ethernet header */ header_ethernet = mtod(m_head, struct ether_header *); /* Return if it is not IP packet */ if(header_ethernet->ether_type != htons(ETHERTYPE_IP)) goto _exit; /* Get IP header */ header_ip = lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_1 ? (struct ip *)(header_ethernet + 1) : mtod(m_head->m_next, struct ip *); /* Return if it is not TCP packet */ if(header_ip->ip_p != IPPROTO_TCP) goto _exit; /* Return if packet has options */ if((header_ip->ip_hl << 2) != sizeof(*header_ip)) goto _exit; /* Return if packet is fragmented */ if(header_ip->ip_off & htons(IP_MF | IP_OFFMASK)) goto _exit; /* Get TCP header */ header_tcp = (struct tcphdr *)(header_ip + 1); /* Return if not ACK or PUSH */ if((header_tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0) goto _exit; /* Only timestamp option is handled */ tcp_options = (header_tcp->th_off << 2) - sizeof(*header_tcp); tcp_hdr_len = sizeof(*header_tcp) + tcp_options; ptr = (u32 *)(header_tcp + 1); if(tcp_options != 0) { if(__predict_false(tcp_options != TCPOLEN_TSTAMP_APPA) || (*ptr != ntohl(TCPOPT_NOP << 24 | TCPOPT_NOP << 16 | TCPOPT_TIMESTAMP << 8 | TCPOLEN_TIMESTAMP))) { goto _exit; } } /* Total length of packet (IP) */ ip_len = ntohs(header_ip->ip_len); /* TCP data size */ tcp_data_len = ip_len - (header_tcp->th_off << 2) - sizeof(*header_ip); /* If the frame is padded, trim it */ tot_len = m_head->m_pkthdr.len; trim = tot_len - (ip_len + ETHER_HDR_LEN); if(trim != 0) { if(trim < 0) goto _exit; m_adj(m_head, -trim); tot_len = m_head->m_pkthdr.len; } buffer_next = m_head; buffer_tail = NULL; while(buffer_next != NULL) { buffer_tail = buffer_next; buffer_next = buffer_tail->m_next; } /* Total size of only headers */ hlen = ip_len + ETHER_HDR_LEN - tcp_data_len; /* Get sequence number */ seq = ntohl(header_tcp->th_seq); SLIST_FOREACH(lro_session, &lldev->lro_active, next) { if(lro_session->source_port == header_tcp->th_sport && lro_session->dest_port == header_tcp->th_dport && lro_session->source_ip == header_ip->ip_src.s_addr && lro_session->dest_ip == header_ip->ip_dst.s_addr) { /* Unmatched sequence number, flush LRO session */ if(__predict_false(seq != lro_session->next_seq)) { SLIST_REMOVE(&lldev->lro_active, lro_session, xge_lro_entry_t, next); xge_lro_flush(lldev, lro_session); goto _exit; } /* Handle timestamp option */ if(tcp_options) { u32 tsval = ntohl(*(ptr + 1)); if(__predict_false(lro_session->tsval > tsval || *(ptr + 2) == 0)) { goto _exit; } lro_session->tsval = tsval; lro_session->tsecr = *(ptr + 2); } lro_session->next_seq += tcp_data_len; lro_session->ack_seq = header_tcp->th_ack; lro_session->window = header_tcp->th_win; /* If TCP data/payload is of 0 size, free mbuf */ if(tcp_data_len == 0) { m_freem(m_head); status = XGE_HAL_OK; goto _exit; } lro_session->append_cnt++; lro_session->len += tcp_data_len; /* Adjust mbuf so that m_data points to payload than headers */ m_adj(m_head, hlen); /* Append this packet to LRO accumulated packet */ lro_session->m_tail->m_next = m_head; lro_session->m_tail = buffer_tail; /* Flush if LRO packet is exceeding maximum size */ if(lro_session->len > (XGE_HAL_LRO_DEFAULT_FRM_LEN - lldev->ifnetp->if_mtu)) { SLIST_REMOVE(&lldev->lro_active, lro_session, xge_lro_entry_t, next); xge_lro_flush(lldev, lro_session); } status = XGE_HAL_OK; goto _exit; } } if(SLIST_EMPTY(&lldev->lro_free)) goto _exit; /* Start a new LRO session */ lro_session = SLIST_FIRST(&lldev->lro_free); SLIST_REMOVE_HEAD(&lldev->lro_free, next); SLIST_INSERT_HEAD(&lldev->lro_active, lro_session, next); lro_session->source_port = header_tcp->th_sport; lro_session->dest_port = header_tcp->th_dport; lro_session->source_ip = header_ip->ip_src.s_addr; lro_session->dest_ip = header_ip->ip_dst.s_addr; lro_session->next_seq = seq + tcp_data_len; lro_session->mss = tcp_data_len; lro_session->ack_seq = header_tcp->th_ack; lro_session->window = header_tcp->th_win; lro_session->lro_header_ip = header_ip; /* Handle timestamp option */ if(tcp_options) { lro_session->timestamp = 1; lro_session->tsval = ntohl(*(ptr + 1)); lro_session->tsecr = *(ptr + 2); } lro_session->len = tot_len; lro_session->m_head = m_head; lro_session->m_tail = buffer_tail; status = XGE_HAL_OK; _exit: return status; } /** * xge_accumulate_large_rx * Accumulate packets to form a large LRO packet based on various conditions * * @lldev Per-adapter Data * @pkt Current packet * @pkt_length Packet Length * @rxd_priv Rx Descriptor Private Data */ void xge_accumulate_large_rx(xge_lldev_t *lldev, struct mbuf *pkt, int pkt_length, xge_rx_priv_t *rxd_priv) { if(xge_lro_accumulate(lldev, pkt) != XGE_HAL_OK) { bus_dmamap_sync(lldev->dma_tag_rx, rxd_priv->dmainfo[0].dma_map, BUS_DMASYNC_POSTREAD); (*lldev->ifnetp->if_input)(lldev->ifnetp, pkt); } } /** * xge_rx_compl * If the interrupt is due to received frame (Rx completion), send it up * * @channelh Ring Channel Handle * @dtr Current Descriptor * @t_code Transfer Code indicating success or error * @userdata Per-adapter Data * * Returns XGE_HAL_OK or HAL error enums */ xge_hal_status_e xge_rx_compl(xge_hal_channel_h channelh, xge_hal_dtr_h dtr, u8 t_code, void *userdata) { struct ifnet *ifnetp; xge_rx_priv_t *rxd_priv = NULL; mbuf_t mbuf_up = NULL; xge_hal_status_e status = XGE_HAL_OK; xge_hal_dtr_info_t ext_info; int index; u16 vlan_tag; /*get the user data portion*/ xge_lldev_t *lldev = xge_hal_channel_userdata(channelh); if(!lldev) { XGE_EXIT_ON_ERR("Failed to get user data", _exit, XGE_HAL_FAIL); } XGE_DRV_STATS(rx_completions); /* get the interface pointer */ ifnetp = lldev->ifnetp; do { XGE_DRV_STATS(rx_desc_compl); if(!(ifnetp->if_drv_flags & IFF_DRV_RUNNING)) { status = XGE_HAL_FAIL; goto _exit; } if(t_code) { xge_trace(XGE_TRACE, "Packet dropped because of %d", t_code); XGE_DRV_STATS(rx_tcode); xge_hal_device_handle_tcode(channelh, dtr, t_code); xge_hal_ring_dtr_post(channelh,dtr); continue; } /* Get the private data for this descriptor*/ rxd_priv = (xge_rx_priv_t *) xge_hal_ring_dtr_private(channelh, dtr); if(!rxd_priv) { XGE_EXIT_ON_ERR("Failed to get descriptor private data", _exit, XGE_HAL_FAIL); } /* * Prepare one buffer to send it to upper layer -- since the upper * layer frees the buffer do not use rxd_priv->buffer. Meanwhile * prepare a new buffer, do mapping, use it in the current * descriptor and post descriptor back to ring channel */ mbuf_up = rxd_priv->bufferArray[0]; /* Gets details of mbuf i.e., packet length */ xge_ring_dtr_get(mbuf_up, channelh, dtr, lldev, rxd_priv); status = (lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_1) ? xge_get_buf(dtr, rxd_priv, lldev, 0) : xge_get_buf_3b_5b(dtr, rxd_priv, lldev); if(status != XGE_HAL_OK) { xge_trace(XGE_ERR, "No memory"); XGE_DRV_STATS(rx_no_buf); /* * Unable to allocate buffer. Instead of discarding, post * descriptor back to channel for future processing of same * packet. */ xge_hal_ring_dtr_post(channelh, dtr); continue; } /* Get the extended information */ xge_hal_ring_dtr_info_get(channelh, dtr, &ext_info); /* * As we have allocated a new mbuf for this descriptor, post this * descriptor with new mbuf back to ring channel */ vlan_tag = ext_info.vlan; xge_hal_ring_dtr_post(channelh, dtr); if ((!(ext_info.proto & XGE_HAL_FRAME_PROTO_IP_FRAGMENTED) && (ext_info.proto & XGE_HAL_FRAME_PROTO_TCP_OR_UDP) && (ext_info.l3_cksum == XGE_HAL_L3_CKSUM_OK) && (ext_info.l4_cksum == XGE_HAL_L4_CKSUM_OK))) { /* set Checksum Flag */ xge_set_mbuf_cflags(mbuf_up); if(lldev->enabled_lro) { xge_accumulate_large_rx(lldev, mbuf_up, mbuf_up->m_len, rxd_priv); } else { /* Post-Read sync for buffers*/ for(index = 0; index < lldev->rxd_mbuf_cnt; index++) { bus_dmamap_sync(lldev->dma_tag_rx, rxd_priv->dmainfo[0].dma_map, BUS_DMASYNC_POSTREAD); } (*ifnetp->if_input)(ifnetp, mbuf_up); } } else { /* * Packet with erroneous checksum , let the upper layer deal * with it */ /* Post-Read sync for buffers*/ for(index = 0; index < lldev->rxd_mbuf_cnt; index++) { bus_dmamap_sync(lldev->dma_tag_rx, rxd_priv->dmainfo[0].dma_map, BUS_DMASYNC_POSTREAD); } if(vlan_tag) { mbuf_up->m_pkthdr.ether_vtag = vlan_tag; mbuf_up->m_flags |= M_VLANTAG; } if(lldev->enabled_lro) xge_lro_flush_sessions(lldev); (*ifnetp->if_input)(ifnetp, mbuf_up); } } while(xge_hal_ring_dtr_next_completed(channelh, &dtr, &t_code) == XGE_HAL_OK); if(lldev->enabled_lro) xge_lro_flush_sessions(lldev); _exit: return status; } /** * xge_ring_dtr_get * Get descriptors * * @mbuf_up Packet to send up * @channelh Ring Channel Handle * @dtr Descriptor * @lldev Per-adapter Data * @rxd_priv Rx Descriptor Private Data * * Returns XGE_HAL_OK or HAL error enums */ int xge_ring_dtr_get(mbuf_t mbuf_up, xge_hal_channel_h channelh, xge_hal_dtr_h dtr, xge_lldev_t *lldev, xge_rx_priv_t *rxd_priv) { mbuf_t m; int pkt_length[5]={0,0}, pkt_len=0; dma_addr_t dma_data[5]; int index; m = mbuf_up; pkt_len = 0; if(lldev->buffer_mode != XGE_HAL_RING_QUEUE_BUFFER_MODE_1) { xge_os_memzero(pkt_length, sizeof(pkt_length)); /* * Retrieve data of interest from the completed descriptor -- This * returns the packet length */ if(lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_5) { xge_hal_ring_dtr_5b_get(channelh, dtr, dma_data, pkt_length); } else { xge_hal_ring_dtr_3b_get(channelh, dtr, dma_data, pkt_length); } for(index = 0; index < lldev->rxd_mbuf_cnt; index++) { m->m_len = pkt_length[index]; if(index < (lldev->rxd_mbuf_cnt-1)) { m->m_next = rxd_priv->bufferArray[index + 1]; m = m->m_next; } else { m->m_next = NULL; } pkt_len+=pkt_length[index]; } /* * Since 2 buffer mode is an exceptional case where data is in 3rd * buffer but not in 2nd buffer */ if(lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_2) { m->m_len = pkt_length[2]; pkt_len+=pkt_length[2]; } /* * Update length of newly created buffer to be sent up with packet * length */ mbuf_up->m_pkthdr.len = pkt_len; } else { /* * Retrieve data of interest from the completed descriptor -- This * returns the packet length */ xge_hal_ring_dtr_1b_get(channelh, dtr,&dma_data[0], &pkt_length[0]); /* * Update length of newly created buffer to be sent up with packet * length */ mbuf_up->m_len = mbuf_up->m_pkthdr.len = pkt_length[0]; } return XGE_HAL_OK; } /** * xge_flush_txds * Flush Tx descriptors * * @channelh Channel handle */ static void inline xge_flush_txds(xge_hal_channel_h channelh) { xge_lldev_t *lldev = xge_hal_channel_userdata(channelh); xge_hal_dtr_h tx_dtr; xge_tx_priv_t *tx_priv; u8 t_code; while(xge_hal_fifo_dtr_next_completed(channelh, &tx_dtr, &t_code) == XGE_HAL_OK) { XGE_DRV_STATS(tx_desc_compl); if(t_code) { xge_trace(XGE_TRACE, "Tx descriptor with t_code %d", t_code); XGE_DRV_STATS(tx_tcode); xge_hal_device_handle_tcode(channelh, tx_dtr, t_code); } tx_priv = xge_hal_fifo_dtr_private(tx_dtr); bus_dmamap_unload(lldev->dma_tag_tx, tx_priv->dma_map); m_freem(tx_priv->buffer); tx_priv->buffer = NULL; xge_hal_fifo_dtr_free(channelh, tx_dtr); } } /** * xge_send * Transmit function * * @ifnetp Interface Handle */ void xge_send(struct ifnet *ifnetp) { int qindex = 0; xge_lldev_t *lldev = ifnetp->if_softc; for(qindex = 0; qindex < XGE_FIFO_COUNT; qindex++) { if(mtx_trylock(&lldev->mtx_tx[qindex]) == 0) { XGE_DRV_STATS(tx_lock_fail); break; } xge_send_locked(ifnetp, qindex); mtx_unlock(&lldev->mtx_tx[qindex]); } } static void inline xge_send_locked(struct ifnet *ifnetp, int qindex) { xge_hal_dtr_h dtr; static bus_dma_segment_t segs[XGE_MAX_SEGS]; xge_hal_status_e status; unsigned int max_fragments; xge_lldev_t *lldev = ifnetp->if_softc; xge_hal_channel_h channelh = lldev->fifo_channel[qindex]; mbuf_t m_head = NULL; mbuf_t m_buf = NULL; xge_tx_priv_t *ll_tx_priv = NULL; register unsigned int count = 0; unsigned int nsegs = 0; u16 vlan_tag; max_fragments = ((xge_hal_fifo_t *)channelh)->config->max_frags; /* If device is not initialized, return */ if((!lldev->initialized) || (!(ifnetp->if_drv_flags & IFF_DRV_RUNNING))) return; XGE_DRV_STATS(tx_calls); /* * This loop will be executed for each packet in the kernel maintained * queue -- each packet can be with fragments as an mbuf chain */ for(;;) { IF_DEQUEUE(&ifnetp->if_snd, m_head); if (m_head == NULL) { ifnetp->if_drv_flags &= ~(IFF_DRV_OACTIVE); return; } for(m_buf = m_head; m_buf != NULL; m_buf = m_buf->m_next) { if(m_buf->m_len) count += 1; } if(count >= max_fragments) { m_buf = m_defrag(m_head, M_NOWAIT); if(m_buf != NULL) m_head = m_buf; XGE_DRV_STATS(tx_defrag); } /* Reserve descriptors */ status = xge_hal_fifo_dtr_reserve(channelh, &dtr); if(status != XGE_HAL_OK) { XGE_DRV_STATS(tx_no_txd); xge_flush_txds(channelh); break; } vlan_tag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0; xge_hal_fifo_dtr_vlan_set(dtr, vlan_tag); /* Update Tx private structure for this descriptor */ ll_tx_priv = xge_hal_fifo_dtr_private(dtr); ll_tx_priv->buffer = m_head; /* * Do mapping -- Required DMA tag has been created in xge_init * function and DMA maps have already been created in the * xgell_tx_replenish function. * Returns number of segments through nsegs */ if(bus_dmamap_load_mbuf_sg(lldev->dma_tag_tx, ll_tx_priv->dma_map, m_head, segs, &nsegs, BUS_DMA_NOWAIT)) { xge_trace(XGE_TRACE, "DMA map load failed"); XGE_DRV_STATS(tx_map_fail); break; } if(lldev->driver_stats.tx_max_frags < nsegs) lldev->driver_stats.tx_max_frags = nsegs; /* Set descriptor buffer for header and each fragment/segment */ count = 0; do { xge_hal_fifo_dtr_buffer_set(channelh, dtr, count, (dma_addr_t)htole64(segs[count].ds_addr), segs[count].ds_len); count++; } while(count < nsegs); /* Pre-write Sync of mapping */ bus_dmamap_sync(lldev->dma_tag_tx, ll_tx_priv->dma_map, BUS_DMASYNC_PREWRITE); if((lldev->enabled_tso) && (m_head->m_pkthdr.csum_flags & CSUM_TSO)) { XGE_DRV_STATS(tx_tso); xge_hal_fifo_dtr_mss_set(dtr, m_head->m_pkthdr.tso_segsz); } /* Checksum */ if(ifnetp->if_hwassist > 0) { xge_hal_fifo_dtr_cksum_set_bits(dtr, XGE_HAL_TXD_TX_CKO_IPV4_EN | XGE_HAL_TXD_TX_CKO_TCP_EN | XGE_HAL_TXD_TX_CKO_UDP_EN); } /* Post descriptor to FIFO channel */ xge_hal_fifo_dtr_post(channelh, dtr); XGE_DRV_STATS(tx_posted); /* Send the same copy of mbuf packet to BPF (Berkely Packet Filter) * listener so that we can use tools like tcpdump */ ETHER_BPF_MTAP(ifnetp, m_head); } /* Prepend the packet back to queue */ IF_PREPEND(&ifnetp->if_snd, m_head); ifnetp->if_drv_flags |= IFF_DRV_OACTIVE; xge_queue_produce_context(xge_hal_device_queue(lldev->devh), XGE_LL_EVENT_TRY_XMIT_AGAIN, lldev->devh); XGE_DRV_STATS(tx_again); } /** * xge_get_buf * Allocates new mbufs to be placed into descriptors * * @dtrh Descriptor Handle * @rxd_priv Rx Descriptor Private Data * @lldev Per-adapter Data * @index Buffer Index (if multi-buffer mode) * * Returns XGE_HAL_OK or HAL error enums */ int xge_get_buf(xge_hal_dtr_h dtrh, xge_rx_priv_t *rxd_priv, xge_lldev_t *lldev, int index) { register mbuf_t mp = NULL; struct ifnet *ifnetp = lldev->ifnetp; int status = XGE_HAL_OK; int buffer_size = 0, cluster_size = 0, count; bus_dmamap_t map = rxd_priv->dmainfo[index].dma_map; bus_dma_segment_t segs[3]; buffer_size = (lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_1) ? ifnetp->if_mtu + XGE_HAL_MAC_HEADER_MAX_SIZE : lldev->rxd_mbuf_len[index]; if(buffer_size <= MCLBYTES) { cluster_size = MCLBYTES; mp = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); } else { cluster_size = MJUMPAGESIZE; if((lldev->buffer_mode != XGE_HAL_RING_QUEUE_BUFFER_MODE_5) && (buffer_size > MJUMPAGESIZE)) { cluster_size = MJUM9BYTES; } mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, cluster_size); } if(!mp) { xge_trace(XGE_ERR, "Out of memory to allocate mbuf"); status = XGE_HAL_FAIL; goto getbuf_out; } /* Update mbuf's length, packet length and receive interface */ mp->m_len = mp->m_pkthdr.len = buffer_size; mp->m_pkthdr.rcvif = ifnetp; /* Load DMA map */ if(bus_dmamap_load_mbuf_sg(lldev->dma_tag_rx, lldev->extra_dma_map, mp, segs, &count, BUS_DMA_NOWAIT)) { XGE_DRV_STATS(rx_map_fail); m_freem(mp); XGE_EXIT_ON_ERR("DMA map load failed", getbuf_out, XGE_HAL_FAIL); } /* Update descriptor private data */ rxd_priv->bufferArray[index] = mp; rxd_priv->dmainfo[index].dma_phyaddr = htole64(segs->ds_addr); rxd_priv->dmainfo[index].dma_map = lldev->extra_dma_map; lldev->extra_dma_map = map; /* Pre-Read/Write sync */ bus_dmamap_sync(lldev->dma_tag_rx, map, BUS_DMASYNC_POSTREAD); /* Unload DMA map of mbuf in current descriptor */ bus_dmamap_unload(lldev->dma_tag_rx, map); /* Set descriptor buffer */ if(lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_1) { xge_hal_ring_dtr_1b_set(dtrh, rxd_priv->dmainfo[0].dma_phyaddr, cluster_size); } getbuf_out: return status; } /** * xge_get_buf_3b_5b * Allocates new mbufs to be placed into descriptors (in multi-buffer modes) * * @dtrh Descriptor Handle * @rxd_priv Rx Descriptor Private Data * @lldev Per-adapter Data * * Returns XGE_HAL_OK or HAL error enums */ int xge_get_buf_3b_5b(xge_hal_dtr_h dtrh, xge_rx_priv_t *rxd_priv, xge_lldev_t *lldev) { bus_addr_t dma_pointers[5]; int dma_sizes[5]; int status = XGE_HAL_OK, index; int newindex = 0; for(index = 0; index < lldev->rxd_mbuf_cnt; index++) { status = xge_get_buf(dtrh, rxd_priv, lldev, index); if(status != XGE_HAL_OK) { for(newindex = 0; newindex < index; newindex++) { m_freem(rxd_priv->bufferArray[newindex]); } XGE_EXIT_ON_ERR("mbuf allocation failed", _exit, status); } } for(index = 0; index < lldev->buffer_mode; index++) { if(lldev->rxd_mbuf_len[index] != 0) { dma_pointers[index] = rxd_priv->dmainfo[index].dma_phyaddr; dma_sizes[index] = lldev->rxd_mbuf_len[index]; } else { dma_pointers[index] = rxd_priv->dmainfo[index-1].dma_phyaddr; dma_sizes[index] = 1; } } /* Assigning second buffer to third pointer in 2 buffer mode */ if(lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_2) { dma_pointers[2] = dma_pointers[1]; dma_sizes[2] = dma_sizes[1]; dma_sizes[1] = 1; } if(lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_5) { xge_hal_ring_dtr_5b_set(dtrh, dma_pointers, dma_sizes); } else { xge_hal_ring_dtr_3b_set(dtrh, dma_pointers, dma_sizes); } _exit: return status; } /** * xge_tx_compl * If the interrupt is due to Tx completion, free the sent buffer * * @channelh Channel Handle * @dtr Descriptor * @t_code Transfer Code indicating success or error * @userdata Per-adapter Data * * Returns XGE_HAL_OK or HAL error enum */ xge_hal_status_e xge_tx_compl(xge_hal_channel_h channelh, xge_hal_dtr_h dtr, u8 t_code, void *userdata) { xge_tx_priv_t *ll_tx_priv = NULL; xge_lldev_t *lldev = (xge_lldev_t *)userdata; struct ifnet *ifnetp = lldev->ifnetp; mbuf_t m_buffer = NULL; int qindex = xge_hal_channel_id(channelh); mtx_lock(&lldev->mtx_tx[qindex]); XGE_DRV_STATS(tx_completions); /* * For each completed descriptor: Get private structure, free buffer, * do unmapping, and free descriptor */ do { XGE_DRV_STATS(tx_desc_compl); if(t_code) { XGE_DRV_STATS(tx_tcode); xge_trace(XGE_TRACE, "t_code %d", t_code); xge_hal_device_handle_tcode(channelh, dtr, t_code); } ll_tx_priv = xge_hal_fifo_dtr_private(dtr); m_buffer = ll_tx_priv->buffer; bus_dmamap_unload(lldev->dma_tag_tx, ll_tx_priv->dma_map); m_freem(m_buffer); ll_tx_priv->buffer = NULL; xge_hal_fifo_dtr_free(channelh, dtr); } while(xge_hal_fifo_dtr_next_completed(channelh, &dtr, &t_code) == XGE_HAL_OK); xge_send_locked(ifnetp, qindex); ifnetp->if_drv_flags &= ~IFF_DRV_OACTIVE; mtx_unlock(&lldev->mtx_tx[qindex]); return XGE_HAL_OK; } /** * xge_tx_initial_replenish * Initially allocate buffers and set them into descriptors for later use * * @channelh Tx Channel Handle * @dtrh Descriptor Handle * @index * @userdata Per-adapter Data * @reopen Channel open/reopen option * * Returns XGE_HAL_OK or HAL error enums */ xge_hal_status_e xge_tx_initial_replenish(xge_hal_channel_h channelh, xge_hal_dtr_h dtrh, int index, void *userdata, xge_hal_channel_reopen_e reopen) { xge_tx_priv_t *txd_priv = NULL; int status = XGE_HAL_OK; /* Get the user data portion from channel handle */ xge_lldev_t *lldev = xge_hal_channel_userdata(channelh); if(lldev == NULL) { XGE_EXIT_ON_ERR("Failed to get user data from channel", txinit_out, XGE_HAL_FAIL); } /* Get the private data */ txd_priv = (xge_tx_priv_t *) xge_hal_fifo_dtr_private(dtrh); if(txd_priv == NULL) { XGE_EXIT_ON_ERR("Failed to get descriptor private data", txinit_out, XGE_HAL_FAIL); } /* Create DMA map for this descriptor */ if(bus_dmamap_create(lldev->dma_tag_tx, BUS_DMA_NOWAIT, &txd_priv->dma_map)) { XGE_EXIT_ON_ERR("DMA map creation for Tx descriptor failed", txinit_out, XGE_HAL_FAIL); } txinit_out: return status; } /** * xge_rx_initial_replenish * Initially allocate buffers and set them into descriptors for later use * * @channelh Tx Channel Handle * @dtrh Descriptor Handle * @index Ring Index * @userdata Per-adapter Data * @reopen Channel open/reopen option * * Returns XGE_HAL_OK or HAL error enums */ xge_hal_status_e xge_rx_initial_replenish(xge_hal_channel_h channelh, xge_hal_dtr_h dtrh, int index, void *userdata, xge_hal_channel_reopen_e reopen) { xge_rx_priv_t *rxd_priv = NULL; int status = XGE_HAL_OK; int index1 = 0, index2 = 0; /* Get the user data portion from channel handle */ xge_lldev_t *lldev = xge_hal_channel_userdata(channelh); if(lldev == NULL) { XGE_EXIT_ON_ERR("Failed to get user data from channel", rxinit_out, XGE_HAL_FAIL); } /* Get the private data */ rxd_priv = (xge_rx_priv_t *) xge_hal_ring_dtr_private(channelh, dtrh); if(rxd_priv == NULL) { XGE_EXIT_ON_ERR("Failed to get descriptor private data", rxinit_out, XGE_HAL_FAIL); } rxd_priv->bufferArray = xge_os_malloc(NULL, (sizeof(rxd_priv->bufferArray) * lldev->rxd_mbuf_cnt)); if(rxd_priv->bufferArray == NULL) { XGE_EXIT_ON_ERR("Failed to allocate Rxd private", rxinit_out, XGE_HAL_FAIL); } if(lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_1) { /* Create DMA map for these descriptors*/ if(bus_dmamap_create(lldev->dma_tag_rx , BUS_DMA_NOWAIT, &rxd_priv->dmainfo[0].dma_map)) { XGE_EXIT_ON_ERR("DMA map creation for Rx descriptor failed", rxinit_err_out, XGE_HAL_FAIL); } /* Get a buffer, attach it to this descriptor */ status = xge_get_buf(dtrh, rxd_priv, lldev, 0); } else { for(index1 = 0; index1 < lldev->rxd_mbuf_cnt; index1++) { /* Create DMA map for this descriptor */ if(bus_dmamap_create(lldev->dma_tag_rx , BUS_DMA_NOWAIT , &rxd_priv->dmainfo[index1].dma_map)) { for(index2 = index1 - 1; index2 >= 0; index2--) { bus_dmamap_destroy(lldev->dma_tag_rx, rxd_priv->dmainfo[index2].dma_map); } XGE_EXIT_ON_ERR( "Jumbo DMA map creation for Rx descriptor failed", rxinit_err_out, XGE_HAL_FAIL); } } status = xge_get_buf_3b_5b(dtrh, rxd_priv, lldev); } if(status != XGE_HAL_OK) { for(index1 = 0; index1 < lldev->rxd_mbuf_cnt; index1++) { bus_dmamap_destroy(lldev->dma_tag_rx, rxd_priv->dmainfo[index1].dma_map); } goto rxinit_err_out; } else { goto rxinit_out; } rxinit_err_out: xge_os_free(NULL, rxd_priv->bufferArray, (sizeof(rxd_priv->bufferArray) * lldev->rxd_mbuf_cnt)); rxinit_out: return status; } /** * xge_rx_term * During unload terminate and free all descriptors * * @channelh Rx Channel Handle * @dtrh Rx Descriptor Handle * @state Descriptor State * @userdata Per-adapter Data * @reopen Channel open/reopen option */ void xge_rx_term(xge_hal_channel_h channelh, xge_hal_dtr_h dtrh, xge_hal_dtr_state_e state, void *userdata, xge_hal_channel_reopen_e reopen) { xge_rx_priv_t *rxd_priv = NULL; xge_lldev_t *lldev = NULL; int index = 0; /* Descriptor state is not "Posted" */ if(state != XGE_HAL_DTR_STATE_POSTED) goto rxterm_out; /* Get the user data portion */ lldev = xge_hal_channel_userdata(channelh); /* Get the private data */ rxd_priv = (xge_rx_priv_t *) xge_hal_ring_dtr_private(channelh, dtrh); for(index = 0; index < lldev->rxd_mbuf_cnt; index++) { if(rxd_priv->dmainfo[index].dma_map != NULL) { bus_dmamap_sync(lldev->dma_tag_rx, rxd_priv->dmainfo[index].dma_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(lldev->dma_tag_rx, rxd_priv->dmainfo[index].dma_map); if(rxd_priv->bufferArray[index] != NULL) m_free(rxd_priv->bufferArray[index]); bus_dmamap_destroy(lldev->dma_tag_rx, rxd_priv->dmainfo[index].dma_map); } } xge_os_free(NULL, rxd_priv->bufferArray, (sizeof(rxd_priv->bufferArray) * lldev->rxd_mbuf_cnt)); /* Free the descriptor */ xge_hal_ring_dtr_free(channelh, dtrh); rxterm_out: return; } /** * xge_tx_term * During unload terminate and free all descriptors * * @channelh Rx Channel Handle * @dtrh Rx Descriptor Handle * @state Descriptor State * @userdata Per-adapter Data * @reopen Channel open/reopen option */ void xge_tx_term(xge_hal_channel_h channelh, xge_hal_dtr_h dtr, xge_hal_dtr_state_e state, void *userdata, xge_hal_channel_reopen_e reopen) { xge_tx_priv_t *ll_tx_priv = xge_hal_fifo_dtr_private(dtr); xge_lldev_t *lldev = (xge_lldev_t *)userdata; /* Destroy DMA map */ bus_dmamap_destroy(lldev->dma_tag_tx, ll_tx_priv->dma_map); } /** * xge_methods * * FreeBSD device interface entry points */ static device_method_t xge_methods[] = { DEVMETHOD(device_probe, xge_probe), DEVMETHOD(device_attach, xge_attach), DEVMETHOD(device_detach, xge_detach), DEVMETHOD(device_shutdown, xge_shutdown), DEVMETHOD_END }; static driver_t xge_driver = { "nxge", xge_methods, sizeof(xge_lldev_t), }; static devclass_t xge_devclass; DRIVER_MODULE(nxge, pci, xge_driver, xge_devclass, 0, 0); Index: head/sys/dev/oce/oce_if.c =================================================================== --- head/sys/dev/oce/oce_if.c (revision 331796) +++ head/sys/dev/oce/oce_if.c (revision 331797) @@ -1,2995 +1,2995 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 2013 Emulex * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Emulex Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * freebsd-drivers@emulex.com * * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 */ /* $FreeBSD$ */ #include "opt_inet6.h" #include "opt_inet.h" #include "oce_if.h" #include "oce_user.h" #define is_tso_pkt(m) (m->m_pkthdr.csum_flags & CSUM_TSO) /* UE Status Low CSR */ static char *ue_status_low_desc[] = { "CEV", "CTX", "DBUF", "ERX", "Host", "MPU", "NDMA", "PTC ", "RDMA ", "RXF ", "RXIPS ", "RXULP0 ", "RXULP1 ", "RXULP2 ", "TIM ", "TPOST ", "TPRE ", "TXIPS ", "TXULP0 ", "TXULP1 ", "UC ", "WDMA ", "TXULP2 ", "HOST1 ", "P0_OB_LINK ", "P1_OB_LINK ", "HOST_GPIO ", "MBOX ", "AXGMAC0", "AXGMAC1", "JTAG", "MPU_INTPEND" }; /* UE Status High CSR */ static char *ue_status_hi_desc[] = { "LPCMEMHOST", "MGMT_MAC", "PCS0ONLINE", "MPU_IRAM", "PCS1ONLINE", "PCTL0", "PCTL1", "PMEM", "RR", "TXPB", "RXPP", "XAUI", "TXP", "ARM", "IPC", "HOST2", "HOST3", "HOST4", "HOST5", "HOST6", "HOST7", "HOST8", "HOST9", "NETC", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown" }; struct oce_common_cqe_info{ uint8_t vtp:1; uint8_t l4_cksum_pass:1; uint8_t ip_cksum_pass:1; uint8_t ipv6_frame:1; uint8_t qnq:1; uint8_t rsvd:3; uint8_t num_frags; uint16_t pkt_size; uint16_t vtag; }; /* Driver entry points prototypes */ static int oce_probe(device_t dev); static int oce_attach(device_t dev); static int oce_detach(device_t dev); static int oce_shutdown(device_t dev); static int oce_ioctl(struct ifnet *ifp, u_long command, caddr_t data); static void oce_init(void *xsc); static int oce_multiq_start(struct ifnet *ifp, struct mbuf *m); static void oce_multiq_flush(struct ifnet *ifp); /* Driver interrupt routines protypes */ static void oce_intr(void *arg, int pending); static int oce_setup_intr(POCE_SOFTC sc); static int oce_fast_isr(void *arg); static int oce_alloc_intr(POCE_SOFTC sc, int vector, void (*isr) (void *arg, int pending)); /* Media callbacks prototypes */ static void oce_media_status(struct ifnet *ifp, struct ifmediareq *req); static int oce_media_change(struct ifnet *ifp); /* Transmit routines prototypes */ static int oce_tx(POCE_SOFTC sc, struct mbuf **mpp, int wq_index); static void oce_tx_restart(POCE_SOFTC sc, struct oce_wq *wq); static void oce_process_tx_completion(struct oce_wq *wq); static int oce_multiq_transmit(struct ifnet *ifp, struct mbuf *m, struct oce_wq *wq); /* Receive routines prototypes */ static int oce_cqe_vtp_valid(POCE_SOFTC sc, struct oce_nic_rx_cqe *cqe); static int oce_cqe_portid_valid(POCE_SOFTC sc, struct oce_nic_rx_cqe *cqe); static void oce_rx(struct oce_rq *rq, struct oce_nic_rx_cqe *cqe); static void oce_check_rx_bufs(POCE_SOFTC sc, uint32_t num_cqes, struct oce_rq *rq); static uint16_t oce_rq_handler_lro(void *arg); static void oce_correct_header(struct mbuf *m, struct nic_hwlro_cqe_part1 *cqe1, struct nic_hwlro_cqe_part2 *cqe2); static void oce_rx_lro(struct oce_rq *rq, struct nic_hwlro_singleton_cqe *cqe, struct nic_hwlro_cqe_part2 *cqe2); static void oce_rx_mbuf_chain(struct oce_rq *rq, struct oce_common_cqe_info *cqe_info, struct mbuf **m); /* Helper function prototypes in this file */ static int oce_attach_ifp(POCE_SOFTC sc); static void oce_add_vlan(void *arg, struct ifnet *ifp, uint16_t vtag); static void oce_del_vlan(void *arg, struct ifnet *ifp, uint16_t vtag); static int oce_vid_config(POCE_SOFTC sc); static void oce_mac_addr_set(POCE_SOFTC sc); static int oce_handle_passthrough(struct ifnet *ifp, caddr_t data); static void oce_local_timer(void *arg); static void oce_if_deactivate(POCE_SOFTC sc); static void oce_if_activate(POCE_SOFTC sc); static void setup_max_queues_want(POCE_SOFTC sc); static void update_queues_got(POCE_SOFTC sc); static void process_link_state(POCE_SOFTC sc, struct oce_async_cqe_link_state *acqe); static int oce_tx_asic_stall_verify(POCE_SOFTC sc, struct mbuf *m); static void oce_get_config(POCE_SOFTC sc); static struct mbuf *oce_insert_vlan_tag(POCE_SOFTC sc, struct mbuf *m, boolean_t *complete); static void oce_read_env_variables(POCE_SOFTC sc); /* IP specific */ #if defined(INET6) || defined(INET) static int oce_init_lro(POCE_SOFTC sc); static struct mbuf * oce_tso_setup(POCE_SOFTC sc, struct mbuf **mpp); #endif static device_method_t oce_dispatch[] = { DEVMETHOD(device_probe, oce_probe), DEVMETHOD(device_attach, oce_attach), DEVMETHOD(device_detach, oce_detach), DEVMETHOD(device_shutdown, oce_shutdown), DEVMETHOD_END }; static driver_t oce_driver = { "oce", oce_dispatch, sizeof(OCE_SOFTC) }; static devclass_t oce_devclass; DRIVER_MODULE(oce, pci, oce_driver, oce_devclass, 0, 0); MODULE_DEPEND(oce, pci, 1, 1, 1); MODULE_DEPEND(oce, ether, 1, 1, 1); MODULE_VERSION(oce, 1); /* global vars */ const char component_revision[32] = {"///" COMPONENT_REVISION "///"}; /* Module capabilites and parameters */ uint32_t oce_max_rsp_handled = OCE_MAX_RSP_HANDLED; uint32_t oce_enable_rss = OCE_MODCAP_RSS; uint32_t oce_rq_buf_size = 2048; TUNABLE_INT("hw.oce.max_rsp_handled", &oce_max_rsp_handled); TUNABLE_INT("hw.oce.enable_rss", &oce_enable_rss); /* Supported devices table */ static uint32_t supportedDevices[] = { (PCI_VENDOR_SERVERENGINES << 16) | PCI_PRODUCT_BE2, (PCI_VENDOR_SERVERENGINES << 16) | PCI_PRODUCT_BE3, (PCI_VENDOR_EMULEX << 16) | PCI_PRODUCT_BE3, (PCI_VENDOR_EMULEX << 16) | PCI_PRODUCT_XE201, (PCI_VENDOR_EMULEX << 16) | PCI_PRODUCT_XE201_VF, (PCI_VENDOR_EMULEX << 16) | PCI_PRODUCT_SH }; POCE_SOFTC softc_head = NULL; POCE_SOFTC softc_tail = NULL; struct oce_rdma_if *oce_rdma_if = NULL; /***************************************************************************** * Driver entry points functions * *****************************************************************************/ static int oce_probe(device_t dev) { uint16_t vendor = 0; uint16_t device = 0; int i = 0; char str[256] = {0}; POCE_SOFTC sc; sc = device_get_softc(dev); bzero(sc, sizeof(OCE_SOFTC)); sc->dev = dev; vendor = pci_get_vendor(dev); device = pci_get_device(dev); for (i = 0; i < (sizeof(supportedDevices) / sizeof(uint32_t)); i++) { if (vendor == ((supportedDevices[i] >> 16) & 0xffff)) { if (device == (supportedDevices[i] & 0xffff)) { sprintf(str, "%s:%s", "Emulex CNA NIC function", component_revision); device_set_desc_copy(dev, str); switch (device) { case PCI_PRODUCT_BE2: sc->flags |= OCE_FLAGS_BE2; break; case PCI_PRODUCT_BE3: sc->flags |= OCE_FLAGS_BE3; break; case PCI_PRODUCT_XE201: case PCI_PRODUCT_XE201_VF: sc->flags |= OCE_FLAGS_XE201; break; case PCI_PRODUCT_SH: sc->flags |= OCE_FLAGS_SH; break; default: return ENXIO; } return BUS_PROBE_DEFAULT; } } } return ENXIO; } static int oce_attach(device_t dev) { POCE_SOFTC sc; int rc = 0; sc = device_get_softc(dev); rc = oce_hw_pci_alloc(sc); if (rc) return rc; sc->tx_ring_size = OCE_TX_RING_SIZE; sc->rx_ring_size = OCE_RX_RING_SIZE; /* receive fragment size should be multiple of 2K */ sc->rq_frag_size = ((oce_rq_buf_size / 2048) * 2048); sc->flow_control = OCE_DEFAULT_FLOW_CONTROL; sc->promisc = OCE_DEFAULT_PROMISCUOUS; LOCK_CREATE(&sc->bmbx_lock, "Mailbox_lock"); LOCK_CREATE(&sc->dev_lock, "Device_lock"); /* initialise the hardware */ rc = oce_hw_init(sc); if (rc) goto pci_res_free; oce_read_env_variables(sc); oce_get_config(sc); setup_max_queues_want(sc); rc = oce_setup_intr(sc); if (rc) goto mbox_free; rc = oce_queue_init_all(sc); if (rc) goto intr_free; rc = oce_attach_ifp(sc); if (rc) goto queues_free; #if defined(INET6) || defined(INET) rc = oce_init_lro(sc); if (rc) goto ifp_free; #endif rc = oce_hw_start(sc); if (rc) goto lro_free; sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, oce_add_vlan, sc, EVENTHANDLER_PRI_FIRST); sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, oce_del_vlan, sc, EVENTHANDLER_PRI_FIRST); rc = oce_stats_init(sc); if (rc) goto vlan_free; oce_add_sysctls(sc); callout_init(&sc->timer, CALLOUT_MPSAFE); rc = callout_reset(&sc->timer, 2 * hz, oce_local_timer, sc); if (rc) goto stats_free; sc->next =NULL; if (softc_tail != NULL) { softc_tail->next = sc; } else { softc_head = sc; } softc_tail = sc; return 0; stats_free: callout_drain(&sc->timer); oce_stats_free(sc); vlan_free: if (sc->vlan_attach) EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach); if (sc->vlan_detach) EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach); oce_hw_intr_disable(sc); lro_free: #if defined(INET6) || defined(INET) oce_free_lro(sc); ifp_free: #endif ether_ifdetach(sc->ifp); if_free(sc->ifp); queues_free: oce_queue_release_all(sc); intr_free: oce_intr_free(sc); mbox_free: oce_dma_free(sc, &sc->bsmbx); pci_res_free: oce_hw_pci_free(sc); LOCK_DESTROY(&sc->dev_lock); LOCK_DESTROY(&sc->bmbx_lock); return rc; } static int oce_detach(device_t dev) { POCE_SOFTC sc = device_get_softc(dev); POCE_SOFTC poce_sc_tmp, *ppoce_sc_tmp1, poce_sc_tmp2 = NULL; poce_sc_tmp = softc_head; ppoce_sc_tmp1 = &softc_head; while (poce_sc_tmp != NULL) { if (poce_sc_tmp == sc) { *ppoce_sc_tmp1 = sc->next; if (sc->next == NULL) { softc_tail = poce_sc_tmp2; } break; } poce_sc_tmp2 = poce_sc_tmp; ppoce_sc_tmp1 = &poce_sc_tmp->next; poce_sc_tmp = poce_sc_tmp->next; } LOCK(&sc->dev_lock); oce_if_deactivate(sc); UNLOCK(&sc->dev_lock); callout_drain(&sc->timer); if (sc->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach); if (sc->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach); ether_ifdetach(sc->ifp); if_free(sc->ifp); oce_hw_shutdown(sc); bus_generic_detach(dev); return 0; } static int oce_shutdown(device_t dev) { int rc; rc = oce_detach(dev); return rc; } static int oce_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ifreq *ifr = (struct ifreq *)data; POCE_SOFTC sc = ifp->if_softc; int rc = 0; uint32_t u; switch (command) { case SIOCGIFMEDIA: rc = ifmedia_ioctl(ifp, ifr, &sc->media, command); break; case SIOCSIFMTU: if (ifr->ifr_mtu > OCE_MAX_MTU) rc = EINVAL; else ifp->if_mtu = ifr->ifr_mtu; break; case SIOCSIFFLAGS: if (ifp->if_flags & IFF_UP) { if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; oce_init(sc); } device_printf(sc->dev, "Interface Up\n"); } else { LOCK(&sc->dev_lock); sc->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); oce_if_deactivate(sc); UNLOCK(&sc->dev_lock); device_printf(sc->dev, "Interface Down\n"); } if ((ifp->if_flags & IFF_PROMISC) && !sc->promisc) { if (!oce_rxf_set_promiscuous(sc, (1 | (1 << 1)))) sc->promisc = TRUE; } else if (!(ifp->if_flags & IFF_PROMISC) && sc->promisc) { if (!oce_rxf_set_promiscuous(sc, 0)) sc->promisc = FALSE; } break; case SIOCADDMULTI: case SIOCDELMULTI: rc = oce_hw_update_multicast(sc); if (rc) device_printf(sc->dev, "Update multicast address failed\n"); break; case SIOCSIFCAP: u = ifr->ifr_reqcap ^ ifp->if_capenable; if (u & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (IFCAP_TSO & ifp->if_capenable && !(IFCAP_TXCSUM & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO; ifp->if_hwassist &= ~CSUM_TSO; if_printf(ifp, "TSO disabled due to -txcsum.\n"); } } if (u & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (u & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; if (IFCAP_TSO & ifp->if_capenable) { if (IFCAP_TXCSUM & ifp->if_capenable) ifp->if_hwassist |= CSUM_TSO; else { ifp->if_capenable &= ~IFCAP_TSO; ifp->if_hwassist &= ~CSUM_TSO; if_printf(ifp, "Enable txcsum first.\n"); rc = EAGAIN; } } else ifp->if_hwassist &= ~CSUM_TSO; } if (u & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (u & IFCAP_VLAN_HWFILTER) { ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; oce_vid_config(sc); } #if defined(INET6) || defined(INET) if (u & IFCAP_LRO) { ifp->if_capenable ^= IFCAP_LRO; if(sc->enable_hwlro) { if(ifp->if_capenable & IFCAP_LRO) { rc = oce_mbox_nic_set_iface_lro_config(sc, 1); }else { rc = oce_mbox_nic_set_iface_lro_config(sc, 0); } } } #endif break; case SIOCGPRIVATE_0: rc = oce_handle_passthrough(ifp, data); break; default: rc = ether_ioctl(ifp, command, data); break; } return rc; } static void oce_init(void *arg) { POCE_SOFTC sc = arg; LOCK(&sc->dev_lock); if (sc->ifp->if_flags & IFF_UP) { oce_if_deactivate(sc); oce_if_activate(sc); } UNLOCK(&sc->dev_lock); } static int oce_multiq_start(struct ifnet *ifp, struct mbuf *m) { POCE_SOFTC sc = ifp->if_softc; struct oce_wq *wq = NULL; int queue_index = 0; int status = 0; if (!sc->link_status) return ENXIO; if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) queue_index = m->m_pkthdr.flowid % sc->nwqs; wq = sc->wq[queue_index]; LOCK(&wq->tx_lock); status = oce_multiq_transmit(ifp, m, wq); UNLOCK(&wq->tx_lock); return status; } static void oce_multiq_flush(struct ifnet *ifp) { POCE_SOFTC sc = ifp->if_softc; struct mbuf *m; int i = 0; for (i = 0; i < sc->nwqs; i++) { while ((m = buf_ring_dequeue_sc(sc->wq[i]->br)) != NULL) m_freem(m); } if_qflush(ifp); } /***************************************************************************** * Driver interrupt routines functions * *****************************************************************************/ static void oce_intr(void *arg, int pending) { POCE_INTR_INFO ii = (POCE_INTR_INFO) arg; POCE_SOFTC sc = ii->sc; struct oce_eq *eq = ii->eq; struct oce_eqe *eqe; struct oce_cq *cq = NULL; int i, num_eqes = 0; bus_dmamap_sync(eq->ring->dma.tag, eq->ring->dma.map, BUS_DMASYNC_POSTWRITE); do { eqe = RING_GET_CONSUMER_ITEM_VA(eq->ring, struct oce_eqe); if (eqe->evnt == 0) break; eqe->evnt = 0; bus_dmamap_sync(eq->ring->dma.tag, eq->ring->dma.map, BUS_DMASYNC_POSTWRITE); RING_GET(eq->ring, 1); num_eqes++; } while (TRUE); if (!num_eqes) goto eq_arm; /* Spurious */ /* Clear EQ entries, but dont arm */ oce_arm_eq(sc, eq->eq_id, num_eqes, FALSE, FALSE); /* Process TX, RX and MCC. But dont arm CQ*/ for (i = 0; i < eq->cq_valid; i++) { cq = eq->cq[i]; (*cq->cq_handler)(cq->cb_arg); } /* Arm all cqs connected to this EQ */ for (i = 0; i < eq->cq_valid; i++) { cq = eq->cq[i]; oce_arm_cq(sc, cq->cq_id, 0, TRUE); } eq_arm: oce_arm_eq(sc, eq->eq_id, 0, TRUE, FALSE); return; } static int oce_setup_intr(POCE_SOFTC sc) { int rc = 0, use_intx = 0; int vector = 0, req_vectors = 0; int tot_req_vectors, tot_vectors; if (is_rss_enabled(sc)) req_vectors = MAX((sc->nrqs - 1), sc->nwqs); else req_vectors = 1; tot_req_vectors = req_vectors; if (sc->rdma_flags & OCE_RDMA_FLAG_SUPPORTED) { if (req_vectors > 1) { tot_req_vectors += OCE_RDMA_VECTORS; sc->roce_intr_count = OCE_RDMA_VECTORS; } } if (sc->flags & OCE_FLAGS_MSIX_CAPABLE) { sc->intr_count = req_vectors; tot_vectors = tot_req_vectors; rc = pci_alloc_msix(sc->dev, &tot_vectors); if (rc != 0) { use_intx = 1; pci_release_msi(sc->dev); } else { if (sc->rdma_flags & OCE_RDMA_FLAG_SUPPORTED) { if (tot_vectors < tot_req_vectors) { if (sc->intr_count < (2 * OCE_RDMA_VECTORS)) { sc->roce_intr_count = (tot_vectors / 2); } sc->intr_count = tot_vectors - sc->roce_intr_count; } } else { sc->intr_count = tot_vectors; } sc->flags |= OCE_FLAGS_USING_MSIX; } } else use_intx = 1; if (use_intx) sc->intr_count = 1; /* Scale number of queues based on intr we got */ update_queues_got(sc); if (use_intx) { device_printf(sc->dev, "Using legacy interrupt\n"); rc = oce_alloc_intr(sc, vector, oce_intr); if (rc) goto error; } else { for (; vector < sc->intr_count; vector++) { rc = oce_alloc_intr(sc, vector, oce_intr); if (rc) goto error; } } return 0; error: oce_intr_free(sc); return rc; } static int oce_fast_isr(void *arg) { POCE_INTR_INFO ii = (POCE_INTR_INFO) arg; POCE_SOFTC sc = ii->sc; if (ii->eq == NULL) return FILTER_STRAY; oce_arm_eq(sc, ii->eq->eq_id, 0, FALSE, TRUE); taskqueue_enqueue(ii->tq, &ii->task); ii->eq->intr++; return FILTER_HANDLED; } static int oce_alloc_intr(POCE_SOFTC sc, int vector, void (*isr) (void *arg, int pending)) { POCE_INTR_INFO ii = &sc->intrs[vector]; int rc = 0, rr; if (vector >= OCE_MAX_EQ) return (EINVAL); /* Set the resource id for the interrupt. * MSIx is vector + 1 for the resource id, * INTx is 0 for the resource id. */ if (sc->flags & OCE_FLAGS_USING_MSIX) rr = vector + 1; else rr = 0; ii->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rr, RF_ACTIVE|RF_SHAREABLE); ii->irq_rr = rr; if (ii->intr_res == NULL) { device_printf(sc->dev, "Could not allocate interrupt\n"); rc = ENXIO; return rc; } TASK_INIT(&ii->task, 0, isr, ii); ii->vector = vector; sprintf(ii->task_name, "oce_task[%d]", ii->vector); ii->tq = taskqueue_create_fast(ii->task_name, M_NOWAIT, taskqueue_thread_enqueue, &ii->tq); taskqueue_start_threads(&ii->tq, 1, PI_NET, "%s taskq", device_get_nameunit(sc->dev)); ii->sc = sc; rc = bus_setup_intr(sc->dev, ii->intr_res, INTR_TYPE_NET, oce_fast_isr, NULL, ii, &ii->tag); return rc; } void oce_intr_free(POCE_SOFTC sc) { int i = 0; for (i = 0; i < sc->intr_count; i++) { if (sc->intrs[i].tag != NULL) bus_teardown_intr(sc->dev, sc->intrs[i].intr_res, sc->intrs[i].tag); if (sc->intrs[i].tq != NULL) taskqueue_free(sc->intrs[i].tq); if (sc->intrs[i].intr_res != NULL) bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intrs[i].irq_rr, sc->intrs[i].intr_res); sc->intrs[i].tag = NULL; sc->intrs[i].intr_res = NULL; } if (sc->flags & OCE_FLAGS_USING_MSIX) pci_release_msi(sc->dev); } /****************************************************************************** * Media callbacks functions * ******************************************************************************/ static void oce_media_status(struct ifnet *ifp, struct ifmediareq *req) { POCE_SOFTC sc = (POCE_SOFTC) ifp->if_softc; req->ifm_status = IFM_AVALID; req->ifm_active = IFM_ETHER; if (sc->link_status == 1) req->ifm_status |= IFM_ACTIVE; else return; switch (sc->link_speed) { case 1: /* 10 Mbps */ req->ifm_active |= IFM_10_T | IFM_FDX; sc->speed = 10; break; case 2: /* 100 Mbps */ req->ifm_active |= IFM_100_TX | IFM_FDX; sc->speed = 100; break; case 3: /* 1 Gbps */ req->ifm_active |= IFM_1000_T | IFM_FDX; sc->speed = 1000; break; case 4: /* 10 Gbps */ req->ifm_active |= IFM_10G_SR | IFM_FDX; sc->speed = 10000; break; case 5: /* 20 Gbps */ req->ifm_active |= IFM_10G_SR | IFM_FDX; sc->speed = 20000; break; case 6: /* 25 Gbps */ req->ifm_active |= IFM_10G_SR | IFM_FDX; sc->speed = 25000; break; case 7: /* 40 Gbps */ req->ifm_active |= IFM_40G_SR4 | IFM_FDX; sc->speed = 40000; break; default: sc->speed = 0; break; } return; } int oce_media_change(struct ifnet *ifp) { return 0; } static void oce_is_pkt_dest_bmc(POCE_SOFTC sc, struct mbuf *m, boolean_t *os2bmc, struct mbuf **m_new) { struct ether_header *eh = NULL; eh = mtod(m, struct ether_header *); if (!is_os2bmc_enabled(sc) || *os2bmc) { *os2bmc = FALSE; goto done; } if (!ETHER_IS_MULTICAST(eh->ether_dhost)) goto done; if (is_mc_allowed_on_bmc(sc, eh) || is_bc_allowed_on_bmc(sc, eh) || is_arp_allowed_on_bmc(sc, ntohs(eh->ether_type))) { *os2bmc = TRUE; goto done; } if (mtod(m, struct ip *)->ip_p == IPPROTO_IPV6) { struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); uint8_t nexthdr = ip6->ip6_nxt; if (nexthdr == IPPROTO_ICMPV6) { struct icmp6_hdr *icmp6 = (struct icmp6_hdr *)(ip6 + 1); switch (icmp6->icmp6_type) { case ND_ROUTER_ADVERT: *os2bmc = is_ipv6_ra_filt_enabled(sc); goto done; case ND_NEIGHBOR_ADVERT: *os2bmc = is_ipv6_na_filt_enabled(sc); goto done; default: break; } } } if (mtod(m, struct ip *)->ip_p == IPPROTO_UDP) { struct ip *ip = mtod(m, struct ip *); int iphlen = ip->ip_hl << 2; struct udphdr *uh = (struct udphdr *)((caddr_t)ip + iphlen); switch (uh->uh_dport) { case DHCP_CLIENT_PORT: *os2bmc = is_dhcp_client_filt_enabled(sc); goto done; case DHCP_SERVER_PORT: *os2bmc = is_dhcp_srvr_filt_enabled(sc); goto done; case NET_BIOS_PORT1: case NET_BIOS_PORT2: *os2bmc = is_nbios_filt_enabled(sc); goto done; case DHCPV6_RAS_PORT: *os2bmc = is_ipv6_ras_filt_enabled(sc); goto done; default: break; } } done: if (*os2bmc) { *m_new = m_dup(m, M_NOWAIT); if (!*m_new) { *os2bmc = FALSE; return; } *m_new = oce_insert_vlan_tag(sc, *m_new, NULL); } } /***************************************************************************** * Transmit routines functions * *****************************************************************************/ static int oce_tx(POCE_SOFTC sc, struct mbuf **mpp, int wq_index) { int rc = 0, i, retry_cnt = 0; bus_dma_segment_t segs[OCE_MAX_TX_ELEMENTS]; struct mbuf *m, *m_temp, *m_new = NULL; struct oce_wq *wq = sc->wq[wq_index]; struct oce_packet_desc *pd; struct oce_nic_hdr_wqe *nichdr; struct oce_nic_frag_wqe *nicfrag; struct ether_header *eh = NULL; int num_wqes; uint32_t reg_value; boolean_t complete = TRUE; boolean_t os2bmc = FALSE; m = *mpp; if (!m) return EINVAL; if (!(m->m_flags & M_PKTHDR)) { rc = ENXIO; goto free_ret; } /* Don't allow non-TSO packets longer than MTU */ if (!is_tso_pkt(m)) { eh = mtod(m, struct ether_header *); if(m->m_pkthdr.len > ETHER_MAX_FRAME(sc->ifp, eh->ether_type, FALSE)) goto free_ret; } if(oce_tx_asic_stall_verify(sc, m)) { m = oce_insert_vlan_tag(sc, m, &complete); if(!m) { device_printf(sc->dev, "Insertion unsuccessful\n"); return 0; } } /* Lancer, SH ASIC has a bug wherein Packets that are 32 bytes or less * may cause a transmit stall on that port. So the work-around is to * pad short packets (<= 32 bytes) to a 36-byte length. */ if(IS_SH(sc) || IS_XE201(sc) ) { if(m->m_pkthdr.len <= 32) { char buf[36]; bzero((void *)buf, 36); m_append(m, (36 - m->m_pkthdr.len), buf); } } tx_start: if (m->m_pkthdr.csum_flags & CSUM_TSO) { /* consolidate packet buffers for TSO/LSO segment offload */ #if defined(INET6) || defined(INET) m = oce_tso_setup(sc, mpp); #else m = NULL; #endif if (m == NULL) { rc = ENXIO; goto free_ret; } } pd = &wq->pckts[wq->pkt_desc_head]; retry: rc = bus_dmamap_load_mbuf_sg(wq->tag, pd->map, m, segs, &pd->nsegs, BUS_DMA_NOWAIT); if (rc == 0) { num_wqes = pd->nsegs + 1; if (IS_BE(sc) || IS_SH(sc)) { /*Dummy required only for BE3.*/ if (num_wqes & 1) num_wqes++; } if (num_wqes >= RING_NUM_FREE(wq->ring)) { bus_dmamap_unload(wq->tag, pd->map); return EBUSY; } atomic_store_rel_int(&wq->pkt_desc_head, (wq->pkt_desc_head + 1) % \ OCE_WQ_PACKET_ARRAY_SIZE); bus_dmamap_sync(wq->tag, pd->map, BUS_DMASYNC_PREWRITE); pd->mbuf = m; nichdr = RING_GET_PRODUCER_ITEM_VA(wq->ring, struct oce_nic_hdr_wqe); nichdr->u0.dw[0] = 0; nichdr->u0.dw[1] = 0; nichdr->u0.dw[2] = 0; nichdr->u0.dw[3] = 0; nichdr->u0.s.complete = complete; nichdr->u0.s.mgmt = os2bmc; nichdr->u0.s.event = 1; nichdr->u0.s.crc = 1; nichdr->u0.s.forward = 0; nichdr->u0.s.ipcs = (m->m_pkthdr.csum_flags & CSUM_IP) ? 1 : 0; nichdr->u0.s.udpcs = (m->m_pkthdr.csum_flags & CSUM_UDP) ? 1 : 0; nichdr->u0.s.tcpcs = (m->m_pkthdr.csum_flags & CSUM_TCP) ? 1 : 0; nichdr->u0.s.num_wqe = num_wqes; nichdr->u0.s.total_length = m->m_pkthdr.len; if (m->m_flags & M_VLANTAG) { nichdr->u0.s.vlan = 1; /*Vlan present*/ nichdr->u0.s.vlan_tag = m->m_pkthdr.ether_vtag; } if (m->m_pkthdr.csum_flags & CSUM_TSO) { if (m->m_pkthdr.tso_segsz) { nichdr->u0.s.lso = 1; nichdr->u0.s.lso_mss = m->m_pkthdr.tso_segsz; } if (!IS_BE(sc) || !IS_SH(sc)) nichdr->u0.s.ipcs = 1; } RING_PUT(wq->ring, 1); atomic_add_int(&wq->ring->num_used, 1); for (i = 0; i < pd->nsegs; i++) { nicfrag = RING_GET_PRODUCER_ITEM_VA(wq->ring, struct oce_nic_frag_wqe); nicfrag->u0.s.rsvd0 = 0; nicfrag->u0.s.frag_pa_hi = ADDR_HI(segs[i].ds_addr); nicfrag->u0.s.frag_pa_lo = ADDR_LO(segs[i].ds_addr); nicfrag->u0.s.frag_len = segs[i].ds_len; pd->wqe_idx = wq->ring->pidx; RING_PUT(wq->ring, 1); atomic_add_int(&wq->ring->num_used, 1); } if (num_wqes > (pd->nsegs + 1)) { nicfrag = RING_GET_PRODUCER_ITEM_VA(wq->ring, struct oce_nic_frag_wqe); nicfrag->u0.dw[0] = 0; nicfrag->u0.dw[1] = 0; nicfrag->u0.dw[2] = 0; nicfrag->u0.dw[3] = 0; pd->wqe_idx = wq->ring->pidx; RING_PUT(wq->ring, 1); atomic_add_int(&wq->ring->num_used, 1); pd->nsegs++; } if_inc_counter(sc->ifp, IFCOUNTER_OPACKETS, 1); wq->tx_stats.tx_reqs++; wq->tx_stats.tx_wrbs += num_wqes; wq->tx_stats.tx_bytes += m->m_pkthdr.len; wq->tx_stats.tx_pkts++; bus_dmamap_sync(wq->ring->dma.tag, wq->ring->dma.map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); reg_value = (num_wqes << 16) | wq->wq_id; /* if os2bmc is not enabled or if the pkt is already tagged as bmc, do nothing */ oce_is_pkt_dest_bmc(sc, m, &os2bmc, &m_new); OCE_WRITE_REG32(sc, db, wq->db_offset, reg_value); } else if (rc == EFBIG) { if (retry_cnt == 0) { m_temp = m_defrag(m, M_NOWAIT); if (m_temp == NULL) goto free_ret; m = m_temp; *mpp = m_temp; retry_cnt = retry_cnt + 1; goto retry; } else goto free_ret; } else if (rc == ENOMEM) return rc; else goto free_ret; if (os2bmc) { m = m_new; goto tx_start; } return 0; free_ret: m_freem(*mpp); *mpp = NULL; return rc; } static void oce_process_tx_completion(struct oce_wq *wq) { struct oce_packet_desc *pd; POCE_SOFTC sc = (POCE_SOFTC) wq->parent; struct mbuf *m; pd = &wq->pckts[wq->pkt_desc_tail]; atomic_store_rel_int(&wq->pkt_desc_tail, (wq->pkt_desc_tail + 1) % OCE_WQ_PACKET_ARRAY_SIZE); atomic_subtract_int(&wq->ring->num_used, pd->nsegs + 1); bus_dmamap_sync(wq->tag, pd->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(wq->tag, pd->map); m = pd->mbuf; m_freem(m); pd->mbuf = NULL; if (sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) { if (wq->ring->num_used < (wq->ring->num_items / 2)) { sc->ifp->if_drv_flags &= ~(IFF_DRV_OACTIVE); oce_tx_restart(sc, wq); } } } static void oce_tx_restart(POCE_SOFTC sc, struct oce_wq *wq) { if ((sc->ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) return; #if __FreeBSD_version >= 800000 if (!drbr_empty(sc->ifp, wq->br)) #else if (!IFQ_DRV_IS_EMPTY(&sc->ifp->if_snd)) #endif taskqueue_enqueue(taskqueue_swi, &wq->txtask); } #if defined(INET6) || defined(INET) static struct mbuf * oce_tso_setup(POCE_SOFTC sc, struct mbuf **mpp) { struct mbuf *m; #ifdef INET struct ip *ip; #endif #ifdef INET6 struct ip6_hdr *ip6; #endif struct ether_vlan_header *eh; struct tcphdr *th; uint16_t etype; int total_len = 0, ehdrlen = 0; m = *mpp; if (M_WRITABLE(m) == 0) { m = m_dup(*mpp, M_NOWAIT); if (!m) return NULL; m_freem(*mpp); *mpp = m; } eh = mtod(m, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { etype = ntohs(eh->evl_proto); ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { etype = ntohs(eh->evl_encap_proto); ehdrlen = ETHER_HDR_LEN; } switch (etype) { #ifdef INET case ETHERTYPE_IP: ip = (struct ip *)(m->m_data + ehdrlen); if (ip->ip_p != IPPROTO_TCP) return NULL; th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); total_len = ehdrlen + (ip->ip_hl << 2) + (th->th_off << 2); break; #endif #ifdef INET6 case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(m->m_data + ehdrlen); if (ip6->ip6_nxt != IPPROTO_TCP) return NULL; th = (struct tcphdr *)((caddr_t)ip6 + sizeof(struct ip6_hdr)); total_len = ehdrlen + sizeof(struct ip6_hdr) + (th->th_off << 2); break; #endif default: return NULL; } m = m_pullup(m, total_len); if (!m) return NULL; *mpp = m; return m; } #endif /* INET6 || INET */ void oce_tx_task(void *arg, int npending) { struct oce_wq *wq = arg; POCE_SOFTC sc = wq->parent; struct ifnet *ifp = sc->ifp; int rc = 0; #if __FreeBSD_version >= 800000 LOCK(&wq->tx_lock); rc = oce_multiq_transmit(ifp, NULL, wq); if (rc) { device_printf(sc->dev, "TX[%d] restart failed\n", wq->queue_index); } UNLOCK(&wq->tx_lock); #else oce_start(ifp); #endif } void oce_start(struct ifnet *ifp) { POCE_SOFTC sc = ifp->if_softc; struct mbuf *m; int rc = 0; int def_q = 0; /* Defualt tx queue is 0*/ if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; if (!sc->link_status) return; do { IF_DEQUEUE(&sc->ifp->if_snd, m); if (m == NULL) break; LOCK(&sc->wq[def_q]->tx_lock); rc = oce_tx(sc, &m, def_q); UNLOCK(&sc->wq[def_q]->tx_lock); if (rc) { if (m != NULL) { sc->wq[def_q]->tx_stats.tx_stops ++; ifp->if_drv_flags |= IFF_DRV_OACTIVE; IFQ_DRV_PREPEND(&ifp->if_snd, m); m = NULL; } break; } if (m != NULL) ETHER_BPF_MTAP(ifp, m); } while (TRUE); return; } /* Handle the Completion Queue for transmit */ uint16_t oce_wq_handler(void *arg) { struct oce_wq *wq = (struct oce_wq *)arg; POCE_SOFTC sc = wq->parent; struct oce_cq *cq = wq->cq; struct oce_nic_tx_cqe *cqe; int num_cqes = 0; LOCK(&wq->tx_compl_lock); bus_dmamap_sync(cq->ring->dma.tag, cq->ring->dma.map, BUS_DMASYNC_POSTWRITE); cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_nic_tx_cqe); while (cqe->u0.dw[3]) { DW_SWAP((uint32_t *) cqe, sizeof(oce_wq_cqe)); wq->ring->cidx = cqe->u0.s.wqe_index + 1; if (wq->ring->cidx >= wq->ring->num_items) wq->ring->cidx -= wq->ring->num_items; oce_process_tx_completion(wq); wq->tx_stats.tx_compl++; cqe->u0.dw[3] = 0; RING_GET(cq->ring, 1); bus_dmamap_sync(cq->ring->dma.tag, cq->ring->dma.map, BUS_DMASYNC_POSTWRITE); cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_nic_tx_cqe); num_cqes++; } if (num_cqes) oce_arm_cq(sc, cq->cq_id, num_cqes, FALSE); UNLOCK(&wq->tx_compl_lock); return num_cqes; } static int oce_multiq_transmit(struct ifnet *ifp, struct mbuf *m, struct oce_wq *wq) { POCE_SOFTC sc = ifp->if_softc; int status = 0, queue_index = 0; struct mbuf *next = NULL; struct buf_ring *br = NULL; br = wq->br; queue_index = wq->queue_index; if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) { if (m != NULL) status = drbr_enqueue(ifp, br, m); return status; } if (m != NULL) { if ((status = drbr_enqueue(ifp, br, m)) != 0) return status; } while ((next = drbr_peek(ifp, br)) != NULL) { if (oce_tx(sc, &next, queue_index)) { if (next == NULL) { drbr_advance(ifp, br); } else { drbr_putback(ifp, br, next); wq->tx_stats.tx_stops ++; ifp->if_drv_flags |= IFF_DRV_OACTIVE; } break; } drbr_advance(ifp, br); if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len); if (next->m_flags & M_MCAST) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); ETHER_BPF_MTAP(ifp, next); } return 0; } /***************************************************************************** * Receive routines functions * *****************************************************************************/ static void oce_correct_header(struct mbuf *m, struct nic_hwlro_cqe_part1 *cqe1, struct nic_hwlro_cqe_part2 *cqe2) { uint32_t *p; struct ether_header *eh = NULL; struct tcphdr *tcp_hdr = NULL; struct ip *ip4_hdr = NULL; struct ip6_hdr *ip6 = NULL; uint32_t payload_len = 0; eh = mtod(m, struct ether_header *); /* correct IP header */ if(!cqe2->ipv6_frame) { ip4_hdr = (struct ip *)((char*)eh + sizeof(struct ether_header)); ip4_hdr->ip_ttl = cqe2->frame_lifespan; ip4_hdr->ip_len = htons(cqe2->coalesced_size - sizeof(struct ether_header)); tcp_hdr = (struct tcphdr *)((char*)ip4_hdr + sizeof(struct ip)); }else { ip6 = (struct ip6_hdr *)((char*)eh + sizeof(struct ether_header)); ip6->ip6_ctlun.ip6_un1.ip6_un1_hlim = cqe2->frame_lifespan; payload_len = cqe2->coalesced_size - sizeof(struct ether_header) - sizeof(struct ip6_hdr); ip6->ip6_ctlun.ip6_un1.ip6_un1_plen = htons(payload_len); tcp_hdr = (struct tcphdr *)((char*)ip6 + sizeof(struct ip6_hdr)); } /* correct tcp header */ tcp_hdr->th_ack = htonl(cqe2->tcp_ack_num); if(cqe2->push) { tcp_hdr->th_flags |= TH_PUSH; } tcp_hdr->th_win = htons(cqe2->tcp_window); tcp_hdr->th_sum = 0xffff; if(cqe2->ts_opt) { p = (uint32_t *)((char*)tcp_hdr + sizeof(struct tcphdr) + 2); *p = cqe1->tcp_timestamp_val; *(p+1) = cqe1->tcp_timestamp_ecr; } return; } static void oce_rx_mbuf_chain(struct oce_rq *rq, struct oce_common_cqe_info *cqe_info, struct mbuf **m) { POCE_SOFTC sc = (POCE_SOFTC) rq->parent; uint32_t i = 0, frag_len = 0; uint32_t len = cqe_info->pkt_size; struct oce_packet_desc *pd; struct mbuf *tail = NULL; for (i = 0; i < cqe_info->num_frags; i++) { if (rq->ring->cidx == rq->ring->pidx) { device_printf(sc->dev, "oce_rx_mbuf_chain: Invalid RX completion - Queue is empty\n"); return; } pd = &rq->pckts[rq->ring->cidx]; bus_dmamap_sync(rq->tag, pd->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(rq->tag, pd->map); RING_GET(rq->ring, 1); rq->pending--; frag_len = (len > rq->cfg.frag_size) ? rq->cfg.frag_size : len; pd->mbuf->m_len = frag_len; if (tail != NULL) { /* additional fragments */ pd->mbuf->m_flags &= ~M_PKTHDR; tail->m_next = pd->mbuf; if(rq->islro) tail->m_nextpkt = NULL; tail = pd->mbuf; } else { /* first fragment, fill out much of the packet header */ pd->mbuf->m_pkthdr.len = len; if(rq->islro) pd->mbuf->m_nextpkt = NULL; pd->mbuf->m_pkthdr.csum_flags = 0; if (IF_CSUM_ENABLED(sc)) { if (cqe_info->l4_cksum_pass) { if(!cqe_info->ipv6_frame) { /* IPV4 */ pd->mbuf->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); }else { /* IPV6 frame */ if(rq->islro) { pd->mbuf->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); } } pd->mbuf->m_pkthdr.csum_data = 0xffff; } if (cqe_info->ip_cksum_pass) { pd->mbuf->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID); } } *m = tail = pd->mbuf; } pd->mbuf = NULL; len -= frag_len; } return; } static void oce_rx_lro(struct oce_rq *rq, struct nic_hwlro_singleton_cqe *cqe, struct nic_hwlro_cqe_part2 *cqe2) { POCE_SOFTC sc = (POCE_SOFTC) rq->parent; struct nic_hwlro_cqe_part1 *cqe1 = NULL; struct mbuf *m = NULL; struct oce_common_cqe_info cq_info; /* parse cqe */ if(cqe2 == NULL) { cq_info.pkt_size = cqe->pkt_size; cq_info.vtag = cqe->vlan_tag; cq_info.l4_cksum_pass = cqe->l4_cksum_pass; cq_info.ip_cksum_pass = cqe->ip_cksum_pass; cq_info.ipv6_frame = cqe->ipv6_frame; cq_info.vtp = cqe->vtp; cq_info.qnq = cqe->qnq; }else { cqe1 = (struct nic_hwlro_cqe_part1 *)cqe; cq_info.pkt_size = cqe2->coalesced_size; cq_info.vtag = cqe2->vlan_tag; cq_info.l4_cksum_pass = cqe2->l4_cksum_pass; cq_info.ip_cksum_pass = cqe2->ip_cksum_pass; cq_info.ipv6_frame = cqe2->ipv6_frame; cq_info.vtp = cqe2->vtp; cq_info.qnq = cqe1->qnq; } cq_info.vtag = BSWAP_16(cq_info.vtag); cq_info.num_frags = cq_info.pkt_size / rq->cfg.frag_size; if(cq_info.pkt_size % rq->cfg.frag_size) cq_info.num_frags++; oce_rx_mbuf_chain(rq, &cq_info, &m); if (m) { if(cqe2) { //assert(cqe2->valid != 0); //assert(cqe2->cqe_type != 2); oce_correct_header(m, cqe1, cqe2); } m->m_pkthdr.rcvif = sc->ifp; #if __FreeBSD_version >= 800000 if (rq->queue_index) m->m_pkthdr.flowid = (rq->queue_index - 1); else m->m_pkthdr.flowid = rq->queue_index; M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); #endif /* This deternies if vlan tag is Valid */ if (cq_info.vtp) { if (sc->function_mode & FNM_FLEX10_MODE) { /* FLEX10. If QnQ is not set, neglect VLAN */ if (cq_info.qnq) { m->m_pkthdr.ether_vtag = cq_info.vtag; m->m_flags |= M_VLANTAG; } } else if (sc->pvid != (cq_info.vtag & VLAN_VID_MASK)) { /* In UMC mode generally pvid will be striped by hw. But in some cases we have seen it comes with pvid. So if pvid == vlan, neglect vlan. */ m->m_pkthdr.ether_vtag = cq_info.vtag; m->m_flags |= M_VLANTAG; } } if_inc_counter(sc->ifp, IFCOUNTER_IPACKETS, 1); (*sc->ifp->if_input) (sc->ifp, m); /* Update rx stats per queue */ rq->rx_stats.rx_pkts++; rq->rx_stats.rx_bytes += cq_info.pkt_size; rq->rx_stats.rx_frags += cq_info.num_frags; rq->rx_stats.rx_ucast_pkts++; } return; } static void oce_rx(struct oce_rq *rq, struct oce_nic_rx_cqe *cqe) { POCE_SOFTC sc = (POCE_SOFTC) rq->parent; int len; struct mbuf *m = NULL; struct oce_common_cqe_info cq_info; uint16_t vtag = 0; /* Is it a flush compl that has no data */ if(!cqe->u0.s.num_fragments) goto exit; len = cqe->u0.s.pkt_size; if (!len) { /*partial DMA workaround for Lancer*/ oce_discard_rx_comp(rq, cqe->u0.s.num_fragments); goto exit; } if (!oce_cqe_portid_valid(sc, cqe)) { oce_discard_rx_comp(rq, cqe->u0.s.num_fragments); goto exit; } /* Get vlan_tag value */ if(IS_BE(sc) || IS_SH(sc)) vtag = BSWAP_16(cqe->u0.s.vlan_tag); else vtag = cqe->u0.s.vlan_tag; cq_info.l4_cksum_pass = cqe->u0.s.l4_cksum_pass; cq_info.ip_cksum_pass = cqe->u0.s.ip_cksum_pass; cq_info.ipv6_frame = cqe->u0.s.ip_ver; cq_info.num_frags = cqe->u0.s.num_fragments; cq_info.pkt_size = cqe->u0.s.pkt_size; oce_rx_mbuf_chain(rq, &cq_info, &m); if (m) { m->m_pkthdr.rcvif = sc->ifp; #if __FreeBSD_version >= 800000 if (rq->queue_index) m->m_pkthdr.flowid = (rq->queue_index - 1); else m->m_pkthdr.flowid = rq->queue_index; M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); #endif /* This deternies if vlan tag is Valid */ if (oce_cqe_vtp_valid(sc, cqe)) { if (sc->function_mode & FNM_FLEX10_MODE) { /* FLEX10. If QnQ is not set, neglect VLAN */ if (cqe->u0.s.qnq) { m->m_pkthdr.ether_vtag = vtag; m->m_flags |= M_VLANTAG; } } else if (sc->pvid != (vtag & VLAN_VID_MASK)) { /* In UMC mode generally pvid will be striped by hw. But in some cases we have seen it comes with pvid. So if pvid == vlan, neglect vlan. */ m->m_pkthdr.ether_vtag = vtag; m->m_flags |= M_VLANTAG; } } if_inc_counter(sc->ifp, IFCOUNTER_IPACKETS, 1); #if defined(INET6) || defined(INET) /* Try to queue to LRO */ if (IF_LRO_ENABLED(sc) && (cqe->u0.s.ip_cksum_pass) && (cqe->u0.s.l4_cksum_pass) && (!cqe->u0.s.ip_ver) && (rq->lro.lro_cnt != 0)) { if (tcp_lro_rx(&rq->lro, m, 0) == 0) { rq->lro_pkts_queued ++; goto post_done; } /* If LRO posting fails then try to post to STACK */ } #endif (*sc->ifp->if_input) (sc->ifp, m); #if defined(INET6) || defined(INET) post_done: #endif /* Update rx stats per queue */ rq->rx_stats.rx_pkts++; rq->rx_stats.rx_bytes += cqe->u0.s.pkt_size; rq->rx_stats.rx_frags += cqe->u0.s.num_fragments; if (cqe->u0.s.pkt_type == OCE_MULTICAST_PACKET) rq->rx_stats.rx_mcast_pkts++; if (cqe->u0.s.pkt_type == OCE_UNICAST_PACKET) rq->rx_stats.rx_ucast_pkts++; } exit: return; } void oce_discard_rx_comp(struct oce_rq *rq, int num_frags) { uint32_t i = 0; struct oce_packet_desc *pd; POCE_SOFTC sc = (POCE_SOFTC) rq->parent; for (i = 0; i < num_frags; i++) { if (rq->ring->cidx == rq->ring->pidx) { device_printf(sc->dev, "oce_discard_rx_comp: Invalid RX completion - Queue is empty\n"); return; } pd = &rq->pckts[rq->ring->cidx]; bus_dmamap_sync(rq->tag, pd->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(rq->tag, pd->map); if (pd->mbuf != NULL) { m_freem(pd->mbuf); pd->mbuf = NULL; } RING_GET(rq->ring, 1); rq->pending--; } } static int oce_cqe_vtp_valid(POCE_SOFTC sc, struct oce_nic_rx_cqe *cqe) { struct oce_nic_rx_cqe_v1 *cqe_v1; int vtp = 0; if (sc->be3_native) { cqe_v1 = (struct oce_nic_rx_cqe_v1 *)cqe; vtp = cqe_v1->u0.s.vlan_tag_present; } else vtp = cqe->u0.s.vlan_tag_present; return vtp; } static int oce_cqe_portid_valid(POCE_SOFTC sc, struct oce_nic_rx_cqe *cqe) { struct oce_nic_rx_cqe_v1 *cqe_v1; int port_id = 0; if (sc->be3_native && (IS_BE(sc) || IS_SH(sc))) { cqe_v1 = (struct oce_nic_rx_cqe_v1 *)cqe; port_id = cqe_v1->u0.s.port; if (sc->port_id != port_id) return 0; } else ;/* For BE3 legacy and Lancer this is dummy */ return 1; } #if defined(INET6) || defined(INET) void oce_rx_flush_lro(struct oce_rq *rq) { struct lro_ctrl *lro = &rq->lro; POCE_SOFTC sc = (POCE_SOFTC) rq->parent; if (!IF_LRO_ENABLED(sc)) return; tcp_lro_flush_all(lro); rq->lro_pkts_queued = 0; return; } static int oce_init_lro(POCE_SOFTC sc) { struct lro_ctrl *lro = NULL; int i = 0, rc = 0; for (i = 0; i < sc->nrqs; i++) { lro = &sc->rq[i]->lro; rc = tcp_lro_init(lro); if (rc != 0) { device_printf(sc->dev, "LRO init failed\n"); return rc; } lro->ifp = sc->ifp; } return rc; } void oce_free_lro(POCE_SOFTC sc) { struct lro_ctrl *lro = NULL; int i = 0; for (i = 0; i < sc->nrqs; i++) { lro = &sc->rq[i]->lro; if (lro) tcp_lro_free(lro); } } #endif int oce_alloc_rx_bufs(struct oce_rq *rq, int count) { POCE_SOFTC sc = (POCE_SOFTC) rq->parent; int i, in, rc; struct oce_packet_desc *pd; bus_dma_segment_t segs[6]; int nsegs, added = 0; struct oce_nic_rqe *rqe; pd_rxulp_db_t rxdb_reg; uint32_t val = 0; uint32_t oce_max_rq_posts = 64; bzero(&rxdb_reg, sizeof(pd_rxulp_db_t)); for (i = 0; i < count; i++) { in = (rq->ring->pidx + 1) % OCE_RQ_PACKET_ARRAY_SIZE; pd = &rq->pckts[rq->ring->pidx]; pd->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, oce_rq_buf_size); if (pd->mbuf == NULL) { device_printf(sc->dev, "mbuf allocation failed, size = %d\n",oce_rq_buf_size); break; } pd->mbuf->m_nextpkt = NULL; pd->mbuf->m_len = pd->mbuf->m_pkthdr.len = rq->cfg.frag_size; rc = bus_dmamap_load_mbuf_sg(rq->tag, pd->map, pd->mbuf, segs, &nsegs, BUS_DMA_NOWAIT); if (rc) { m_free(pd->mbuf); device_printf(sc->dev, "bus_dmamap_load_mbuf_sg failed rc = %d\n", rc); break; } if (nsegs != 1) { i--; continue; } bus_dmamap_sync(rq->tag, pd->map, BUS_DMASYNC_PREREAD); rqe = RING_GET_PRODUCER_ITEM_VA(rq->ring, struct oce_nic_rqe); rqe->u0.s.frag_pa_hi = ADDR_HI(segs[0].ds_addr); rqe->u0.s.frag_pa_lo = ADDR_LO(segs[0].ds_addr); DW_SWAP(u32ptr(rqe), sizeof(struct oce_nic_rqe)); RING_PUT(rq->ring, 1); added++; rq->pending++; } oce_max_rq_posts = sc->enable_hwlro ? OCE_HWLRO_MAX_RQ_POSTS : OCE_MAX_RQ_POSTS; if (added != 0) { for (i = added / oce_max_rq_posts; i > 0; i--) { rxdb_reg.bits.num_posted = oce_max_rq_posts; rxdb_reg.bits.qid = rq->rq_id; if(rq->islro) { val |= rq->rq_id & DB_LRO_RQ_ID_MASK; val |= oce_max_rq_posts << 16; OCE_WRITE_REG32(sc, db, DB_OFFSET, val); }else { OCE_WRITE_REG32(sc, db, PD_RXULP_DB, rxdb_reg.dw0); } added -= oce_max_rq_posts; } if (added > 0) { rxdb_reg.bits.qid = rq->rq_id; rxdb_reg.bits.num_posted = added; if(rq->islro) { val |= rq->rq_id & DB_LRO_RQ_ID_MASK; val |= added << 16; OCE_WRITE_REG32(sc, db, DB_OFFSET, val); }else { OCE_WRITE_REG32(sc, db, PD_RXULP_DB, rxdb_reg.dw0); } } } return 0; } static void oce_check_rx_bufs(POCE_SOFTC sc, uint32_t num_cqes, struct oce_rq *rq) { if (num_cqes) { oce_arm_cq(sc, rq->cq->cq_id, num_cqes, FALSE); if(!sc->enable_hwlro) { if((OCE_RQ_PACKET_ARRAY_SIZE - rq->pending) > 1) oce_alloc_rx_bufs(rq, ((OCE_RQ_PACKET_ARRAY_SIZE - rq->pending) - 1)); }else { if ((OCE_RQ_PACKET_ARRAY_SIZE -1 - rq->pending) > 64) oce_alloc_rx_bufs(rq, 64); } } return; } uint16_t oce_rq_handler_lro(void *arg) { struct oce_rq *rq = (struct oce_rq *)arg; struct oce_cq *cq = rq->cq; POCE_SOFTC sc = rq->parent; struct nic_hwlro_singleton_cqe *cqe; struct nic_hwlro_cqe_part2 *cqe2; int num_cqes = 0; LOCK(&rq->rx_lock); bus_dmamap_sync(cq->ring->dma.tag,cq->ring->dma.map, BUS_DMASYNC_POSTWRITE); cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct nic_hwlro_singleton_cqe); while (cqe->valid) { if(cqe->cqe_type == 0) { /* singleton cqe */ /* we should not get singleton cqe after cqe1 on same rq */ if(rq->cqe_firstpart != NULL) { device_printf(sc->dev, "Got singleton cqe after cqe1 \n"); goto exit_rq_handler_lro; } if(cqe->error != 0) { rq->rx_stats.rxcp_err++; if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1); } oce_rx_lro(rq, cqe, NULL); rq->rx_stats.rx_compl++; cqe->valid = 0; RING_GET(cq->ring, 1); num_cqes++; if (num_cqes >= (IS_XE201(sc) ? 8 : oce_max_rsp_handled)) break; }else if(cqe->cqe_type == 0x1) { /* first part */ /* we should not get cqe1 after cqe1 on same rq */ if(rq->cqe_firstpart != NULL) { device_printf(sc->dev, "Got cqe1 after cqe1 \n"); goto exit_rq_handler_lro; } rq->cqe_firstpart = (struct nic_hwlro_cqe_part1 *)cqe; RING_GET(cq->ring, 1); }else if(cqe->cqe_type == 0x2) { /* second part */ cqe2 = (struct nic_hwlro_cqe_part2 *)cqe; if(cqe2->error != 0) { rq->rx_stats.rxcp_err++; if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1); } /* We should not get cqe2 without cqe1 */ if(rq->cqe_firstpart == NULL) { device_printf(sc->dev, "Got cqe2 without cqe1 \n"); goto exit_rq_handler_lro; } oce_rx_lro(rq, (struct nic_hwlro_singleton_cqe *)rq->cqe_firstpart, cqe2); rq->rx_stats.rx_compl++; rq->cqe_firstpart->valid = 0; cqe2->valid = 0; rq->cqe_firstpart = NULL; RING_GET(cq->ring, 1); num_cqes += 2; if (num_cqes >= (IS_XE201(sc) ? 8 : oce_max_rsp_handled)) break; } bus_dmamap_sync(cq->ring->dma.tag,cq->ring->dma.map, BUS_DMASYNC_POSTWRITE); cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct nic_hwlro_singleton_cqe); } oce_check_rx_bufs(sc, num_cqes, rq); exit_rq_handler_lro: UNLOCK(&rq->rx_lock); return 0; } /* Handle the Completion Queue for receive */ uint16_t oce_rq_handler(void *arg) { struct oce_rq *rq = (struct oce_rq *)arg; struct oce_cq *cq = rq->cq; POCE_SOFTC sc = rq->parent; struct oce_nic_rx_cqe *cqe; int num_cqes = 0; if(rq->islro) { oce_rq_handler_lro(arg); return 0; } LOCK(&rq->rx_lock); bus_dmamap_sync(cq->ring->dma.tag, cq->ring->dma.map, BUS_DMASYNC_POSTWRITE); cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_nic_rx_cqe); while (cqe->u0.dw[2]) { DW_SWAP((uint32_t *) cqe, sizeof(oce_rq_cqe)); if (cqe->u0.s.error == 0) { oce_rx(rq, cqe); } else { rq->rx_stats.rxcp_err++; if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1); /* Post L3/L4 errors to stack.*/ oce_rx(rq, cqe); } rq->rx_stats.rx_compl++; cqe->u0.dw[2] = 0; #if defined(INET6) || defined(INET) if (IF_LRO_ENABLED(sc) && rq->lro_pkts_queued >= 16) { oce_rx_flush_lro(rq); } #endif RING_GET(cq->ring, 1); bus_dmamap_sync(cq->ring->dma.tag, cq->ring->dma.map, BUS_DMASYNC_POSTWRITE); cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_nic_rx_cqe); num_cqes++; if (num_cqes >= (IS_XE201(sc) ? 8 : oce_max_rsp_handled)) break; } #if defined(INET6) || defined(INET) if (IF_LRO_ENABLED(sc)) oce_rx_flush_lro(rq); #endif oce_check_rx_bufs(sc, num_cqes, rq); UNLOCK(&rq->rx_lock); return 0; } /***************************************************************************** * Helper function prototypes in this file * *****************************************************************************/ static int oce_attach_ifp(POCE_SOFTC sc) { sc->ifp = if_alloc(IFT_ETHER); if (!sc->ifp) return ENOMEM; ifmedia_init(&sc->media, IFM_IMASK, oce_media_change, oce_media_status); ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO); sc->ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST; sc->ifp->if_ioctl = oce_ioctl; sc->ifp->if_start = oce_start; sc->ifp->if_init = oce_init; sc->ifp->if_mtu = ETHERMTU; sc->ifp->if_softc = sc; #if __FreeBSD_version >= 800000 sc->ifp->if_transmit = oce_multiq_start; sc->ifp->if_qflush = oce_multiq_flush; #endif if_initname(sc->ifp, device_get_name(sc->dev), device_get_unit(sc->dev)); sc->ifp->if_snd.ifq_drv_maxlen = OCE_MAX_TX_DESC - 1; IFQ_SET_MAXLEN(&sc->ifp->if_snd, sc->ifp->if_snd.ifq_drv_maxlen); IFQ_SET_READY(&sc->ifp->if_snd); sc->ifp->if_hwassist = OCE_IF_HWASSIST; sc->ifp->if_hwassist |= CSUM_TSO; sc->ifp->if_hwassist |= (CSUM_IP | CSUM_TCP | CSUM_UDP); sc->ifp->if_capabilities = OCE_IF_CAPABILITIES; sc->ifp->if_capabilities |= IFCAP_HWCSUM; sc->ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; #if defined(INET6) || defined(INET) sc->ifp->if_capabilities |= IFCAP_TSO; sc->ifp->if_capabilities |= IFCAP_LRO; sc->ifp->if_capabilities |= IFCAP_VLAN_HWTSO; #endif sc->ifp->if_capenable = sc->ifp->if_capabilities; sc->ifp->if_baudrate = IF_Gbps(10); #if __FreeBSD_version >= 1000000 sc->ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); sc->ifp->if_hw_tsomaxsegcount = OCE_MAX_TX_ELEMENTS; sc->ifp->if_hw_tsomaxsegsize = 4096; #endif ether_ifattach(sc->ifp, sc->macaddr.mac_addr); return 0; } static void oce_add_vlan(void *arg, struct ifnet *ifp, uint16_t vtag) { POCE_SOFTC sc = ifp->if_softc; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) return; sc->vlan_tag[vtag] = 1; sc->vlans_added++; if (sc->vlans_added <= (sc->max_vlans + 1)) oce_vid_config(sc); } static void oce_del_vlan(void *arg, struct ifnet *ifp, uint16_t vtag) { POCE_SOFTC sc = ifp->if_softc; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) return; sc->vlan_tag[vtag] = 0; sc->vlans_added--; oce_vid_config(sc); } /* * A max of 64 vlans can be configured in BE. If the user configures * more, place the card in vlan promiscuous mode. */ static int oce_vid_config(POCE_SOFTC sc) { struct normal_vlan vtags[MAX_VLANFILTER_SIZE]; uint16_t ntags = 0, i; int status = 0; if ((sc->vlans_added <= MAX_VLANFILTER_SIZE) && (sc->ifp->if_capenable & IFCAP_VLAN_HWFILTER)) { for (i = 0; i < MAX_VLANS; i++) { if (sc->vlan_tag[i]) { vtags[ntags].vtag = i; ntags++; } } if (ntags) status = oce_config_vlan(sc, (uint8_t) sc->if_id, vtags, ntags, 1, 0); } else status = oce_config_vlan(sc, (uint8_t) sc->if_id, NULL, 0, 1, 1); return status; } static void oce_mac_addr_set(POCE_SOFTC sc) { uint32_t old_pmac_id = sc->pmac_id; int status = 0; status = bcmp((IF_LLADDR(sc->ifp)), sc->macaddr.mac_addr, sc->macaddr.size_of_struct); if (!status) return; status = oce_mbox_macaddr_add(sc, (uint8_t *)(IF_LLADDR(sc->ifp)), sc->if_id, &sc->pmac_id); if (!status) { status = oce_mbox_macaddr_del(sc, sc->if_id, old_pmac_id); bcopy((IF_LLADDR(sc->ifp)), sc->macaddr.mac_addr, sc->macaddr.size_of_struct); } if (status) device_printf(sc->dev, "Failed update macaddress\n"); } static int oce_handle_passthrough(struct ifnet *ifp, caddr_t data) { POCE_SOFTC sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; int rc = ENXIO; char cookie[32] = {0}; - void *priv_data = (void *)ifr->ifr_data; + void *priv_data = ifr_data_get_ptr(ifr); void *ioctl_ptr; uint32_t req_size; struct mbx_hdr req; OCE_DMA_MEM dma_mem; struct mbx_common_get_cntl_attr *fw_cmd; if (copyin(priv_data, cookie, strlen(IOCTL_COOKIE))) return EFAULT; if (memcmp(cookie, IOCTL_COOKIE, strlen(IOCTL_COOKIE))) return EINVAL; ioctl_ptr = (char *)priv_data + strlen(IOCTL_COOKIE); if (copyin(ioctl_ptr, &req, sizeof(struct mbx_hdr))) return EFAULT; req_size = le32toh(req.u0.req.request_length); if (req_size > 65536) return EINVAL; req_size += sizeof(struct mbx_hdr); rc = oce_dma_alloc(sc, req_size, &dma_mem, 0); if (rc) return ENOMEM; if (copyin(ioctl_ptr, OCE_DMAPTR(&dma_mem,char), req_size)) { rc = EFAULT; goto dma_free; } rc = oce_pass_through_mbox(sc, &dma_mem, req_size); if (rc) { rc = EIO; goto dma_free; } if (copyout(OCE_DMAPTR(&dma_mem,char), ioctl_ptr, req_size)) rc = EFAULT; /* firmware is filling all the attributes for this ioctl except the driver version..so fill it */ if(req.u0.rsp.opcode == OPCODE_COMMON_GET_CNTL_ATTRIBUTES) { fw_cmd = (struct mbx_common_get_cntl_attr *) ioctl_ptr; strncpy(fw_cmd->params.rsp.cntl_attr_info.hba_attr.drv_ver_str, COMPONENT_REVISION, strlen(COMPONENT_REVISION)); } dma_free: oce_dma_free(sc, &dma_mem); return rc; } static void oce_eqd_set_periodic(POCE_SOFTC sc) { struct oce_set_eqd set_eqd[OCE_MAX_EQ]; struct oce_aic_obj *aic; struct oce_eq *eqo; uint64_t now = 0, delta; int eqd, i, num = 0; uint32_t tx_reqs = 0, rxpkts = 0, pps; struct oce_wq *wq; struct oce_rq *rq; #define ticks_to_msecs(t) (1000 * (t) / hz) for (i = 0 ; i < sc->neqs; i++) { eqo = sc->eq[i]; aic = &sc->aic_obj[i]; /* When setting the static eq delay from the user space */ if (!aic->enable) { if (aic->ticks) aic->ticks = 0; eqd = aic->et_eqd; goto modify_eqd; } rq = sc->rq[i]; rxpkts = rq->rx_stats.rx_pkts; wq = sc->wq[i]; tx_reqs = wq->tx_stats.tx_reqs; now = ticks; if (!aic->ticks || now < aic->ticks || rxpkts < aic->prev_rxpkts || tx_reqs < aic->prev_txreqs) { aic->prev_rxpkts = rxpkts; aic->prev_txreqs = tx_reqs; aic->ticks = now; continue; } delta = ticks_to_msecs(now - aic->ticks); pps = (((uint32_t)(rxpkts - aic->prev_rxpkts) * 1000) / delta) + (((uint32_t)(tx_reqs - aic->prev_txreqs) * 1000) / delta); eqd = (pps / 15000) << 2; if (eqd < 8) eqd = 0; /* Make sure that the eq delay is in the known range */ eqd = min(eqd, aic->max_eqd); eqd = max(eqd, aic->min_eqd); aic->prev_rxpkts = rxpkts; aic->prev_txreqs = tx_reqs; aic->ticks = now; modify_eqd: if (eqd != aic->cur_eqd) { set_eqd[num].delay_multiplier = (eqd * 65)/100; set_eqd[num].eq_id = eqo->eq_id; aic->cur_eqd = eqd; num++; } } /* Is there atleast one eq that needs to be modified? */ for(i = 0; i < num; i += 8) { if((num - i) >=8 ) oce_mbox_eqd_modify_periodic(sc, &set_eqd[i], 8); else oce_mbox_eqd_modify_periodic(sc, &set_eqd[i], (num - i)); } } static void oce_detect_hw_error(POCE_SOFTC sc) { uint32_t ue_low = 0, ue_high = 0, ue_low_mask = 0, ue_high_mask = 0; uint32_t sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0; uint32_t i; if (sc->hw_error) return; if (IS_XE201(sc)) { sliport_status = OCE_READ_REG32(sc, db, SLIPORT_STATUS_OFFSET); if (sliport_status & SLIPORT_STATUS_ERR_MASK) { sliport_err1 = OCE_READ_REG32(sc, db, SLIPORT_ERROR1_OFFSET); sliport_err2 = OCE_READ_REG32(sc, db, SLIPORT_ERROR2_OFFSET); } } else { ue_low = OCE_READ_REG32(sc, devcfg, PCICFG_UE_STATUS_LOW); ue_high = OCE_READ_REG32(sc, devcfg, PCICFG_UE_STATUS_HIGH); ue_low_mask = OCE_READ_REG32(sc, devcfg, PCICFG_UE_STATUS_LOW_MASK); ue_high_mask = OCE_READ_REG32(sc, devcfg, PCICFG_UE_STATUS_HI_MASK); ue_low = (ue_low & ~ue_low_mask); ue_high = (ue_high & ~ue_high_mask); } /* On certain platforms BE hardware can indicate spurious UEs. * Allow the h/w to stop working completely in case of a real UE. * Hence not setting the hw_error for UE detection. */ if (sliport_status & SLIPORT_STATUS_ERR_MASK) { sc->hw_error = TRUE; device_printf(sc->dev, "Error detected in the card\n"); } if (sliport_status & SLIPORT_STATUS_ERR_MASK) { device_printf(sc->dev, "ERR: sliport status 0x%x\n", sliport_status); device_printf(sc->dev, "ERR: sliport error1 0x%x\n", sliport_err1); device_printf(sc->dev, "ERR: sliport error2 0x%x\n", sliport_err2); } if (ue_low) { for (i = 0; ue_low; ue_low >>= 1, i++) { if (ue_low & 1) device_printf(sc->dev, "UE: %s bit set\n", ue_status_low_desc[i]); } } if (ue_high) { for (i = 0; ue_high; ue_high >>= 1, i++) { if (ue_high & 1) device_printf(sc->dev, "UE: %s bit set\n", ue_status_hi_desc[i]); } } } static void oce_local_timer(void *arg) { POCE_SOFTC sc = arg; int i = 0; oce_detect_hw_error(sc); oce_refresh_nic_stats(sc); oce_refresh_queue_stats(sc); oce_mac_addr_set(sc); /* TX Watch Dog*/ for (i = 0; i < sc->nwqs; i++) oce_tx_restart(sc, sc->wq[i]); /* calculate and set the eq delay for optimal interrupt rate */ if (IS_BE(sc) || IS_SH(sc)) oce_eqd_set_periodic(sc); callout_reset(&sc->timer, hz, oce_local_timer, sc); } static void oce_tx_compl_clean(POCE_SOFTC sc) { struct oce_wq *wq; int i = 0, timeo = 0, num_wqes = 0; int pending_txqs = sc->nwqs; /* Stop polling for compls when HW has been silent for 10ms or * hw_error or no outstanding completions expected */ do { pending_txqs = sc->nwqs; for_all_wq_queues(sc, wq, i) { num_wqes = oce_wq_handler(wq); if(num_wqes) timeo = 0; if(!wq->ring->num_used) pending_txqs--; } if (pending_txqs == 0 || ++timeo > 10 || sc->hw_error) break; DELAY(1000); } while (TRUE); for_all_wq_queues(sc, wq, i) { while(wq->ring->num_used) { LOCK(&wq->tx_compl_lock); oce_process_tx_completion(wq); UNLOCK(&wq->tx_compl_lock); } } } /* NOTE : This should only be called holding * DEVICE_LOCK. */ static void oce_if_deactivate(POCE_SOFTC sc) { int i; struct oce_rq *rq; struct oce_wq *wq; struct oce_eq *eq; sc->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); oce_tx_compl_clean(sc); /* Stop intrs and finish any bottom halves pending */ oce_hw_intr_disable(sc); /* Since taskqueue_drain takes a Gaint Lock, We should not acquire any other lock. So unlock device lock and require after completing taskqueue_drain. */ UNLOCK(&sc->dev_lock); for (i = 0; i < sc->intr_count; i++) { if (sc->intrs[i].tq != NULL) { taskqueue_drain(sc->intrs[i].tq, &sc->intrs[i].task); } } LOCK(&sc->dev_lock); /* Delete RX queue in card with flush param */ oce_stop_rx(sc); /* Invalidate any pending cq and eq entries*/ for_all_evnt_queues(sc, eq, i) oce_drain_eq(eq); for_all_rq_queues(sc, rq, i) oce_drain_rq_cq(rq); for_all_wq_queues(sc, wq, i) oce_drain_wq_cq(wq); /* But still we need to get MCC aync events. So enable intrs and also arm first EQ */ oce_hw_intr_enable(sc); oce_arm_eq(sc, sc->eq[0]->eq_id, 0, TRUE, FALSE); DELAY(10); } static void oce_if_activate(POCE_SOFTC sc) { struct oce_eq *eq; struct oce_rq *rq; struct oce_wq *wq; int i, rc = 0; sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; oce_hw_intr_disable(sc); oce_start_rx(sc); for_all_rq_queues(sc, rq, i) { rc = oce_start_rq(rq); if (rc) device_printf(sc->dev, "Unable to start RX\n"); } for_all_wq_queues(sc, wq, i) { rc = oce_start_wq(wq); if (rc) device_printf(sc->dev, "Unable to start TX\n"); } for_all_evnt_queues(sc, eq, i) oce_arm_eq(sc, eq->eq_id, 0, TRUE, FALSE); oce_hw_intr_enable(sc); } static void process_link_state(POCE_SOFTC sc, struct oce_async_cqe_link_state *acqe) { /* Update Link status */ if ((acqe->u0.s.link_status & ~ASYNC_EVENT_LOGICAL) == ASYNC_EVENT_LINK_UP) { sc->link_status = ASYNC_EVENT_LINK_UP; if_link_state_change(sc->ifp, LINK_STATE_UP); } else { sc->link_status = ASYNC_EVENT_LINK_DOWN; if_link_state_change(sc->ifp, LINK_STATE_DOWN); } } static void oce_async_grp5_osbmc_process(POCE_SOFTC sc, struct oce_async_evt_grp5_os2bmc *evt) { DW_SWAP(evt, sizeof(struct oce_async_evt_grp5_os2bmc)); if (evt->u.s.mgmt_enable) sc->flags |= OCE_FLAGS_OS2BMC; else return; sc->bmc_filt_mask = evt->u.s.arp_filter; sc->bmc_filt_mask |= (evt->u.s.dhcp_client_filt << 1); sc->bmc_filt_mask |= (evt->u.s.dhcp_server_filt << 2); sc->bmc_filt_mask |= (evt->u.s.net_bios_filt << 3); sc->bmc_filt_mask |= (evt->u.s.bcast_filt << 4); sc->bmc_filt_mask |= (evt->u.s.ipv6_nbr_filt << 5); sc->bmc_filt_mask |= (evt->u.s.ipv6_ra_filt << 6); sc->bmc_filt_mask |= (evt->u.s.ipv6_ras_filt << 7); sc->bmc_filt_mask |= (evt->u.s.mcast_filt << 8); } static void oce_process_grp5_events(POCE_SOFTC sc, struct oce_mq_cqe *cqe) { struct oce_async_event_grp5_pvid_state *gcqe; struct oce_async_evt_grp5_os2bmc *bmccqe; switch (cqe->u0.s.async_type) { case ASYNC_EVENT_PVID_STATE: /* GRP5 PVID */ gcqe = (struct oce_async_event_grp5_pvid_state *)cqe; if (gcqe->enabled) sc->pvid = gcqe->tag & VLAN_VID_MASK; else sc->pvid = 0; break; case ASYNC_EVENT_OS2BMC: bmccqe = (struct oce_async_evt_grp5_os2bmc *)cqe; oce_async_grp5_osbmc_process(sc, bmccqe); break; default: break; } } /* Handle the Completion Queue for the Mailbox/Async notifications */ uint16_t oce_mq_handler(void *arg) { struct oce_mq *mq = (struct oce_mq *)arg; POCE_SOFTC sc = mq->parent; struct oce_cq *cq = mq->cq; int num_cqes = 0, evt_type = 0, optype = 0; struct oce_mq_cqe *cqe; struct oce_async_cqe_link_state *acqe; struct oce_async_event_qnq *dbgcqe; bus_dmamap_sync(cq->ring->dma.tag, cq->ring->dma.map, BUS_DMASYNC_POSTWRITE); cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_mq_cqe); while (cqe->u0.dw[3]) { DW_SWAP((uint32_t *) cqe, sizeof(oce_mq_cqe)); if (cqe->u0.s.async_event) { evt_type = cqe->u0.s.event_type; optype = cqe->u0.s.async_type; if (evt_type == ASYNC_EVENT_CODE_LINK_STATE) { /* Link status evt */ acqe = (struct oce_async_cqe_link_state *)cqe; process_link_state(sc, acqe); } else if (evt_type == ASYNC_EVENT_GRP5) { oce_process_grp5_events(sc, cqe); } else if (evt_type == ASYNC_EVENT_CODE_DEBUG && optype == ASYNC_EVENT_DEBUG_QNQ) { dbgcqe = (struct oce_async_event_qnq *)cqe; if(dbgcqe->valid) sc->qnqid = dbgcqe->vlan_tag; sc->qnq_debug_event = TRUE; } } cqe->u0.dw[3] = 0; RING_GET(cq->ring, 1); bus_dmamap_sync(cq->ring->dma.tag, cq->ring->dma.map, BUS_DMASYNC_POSTWRITE); cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_mq_cqe); num_cqes++; } if (num_cqes) oce_arm_cq(sc, cq->cq_id, num_cqes, FALSE); return 0; } static void setup_max_queues_want(POCE_SOFTC sc) { /* Check if it is FLEX machine. Is so dont use RSS */ if ((sc->function_mode & FNM_FLEX10_MODE) || (sc->function_mode & FNM_UMC_MODE) || (sc->function_mode & FNM_VNIC_MODE) || (!is_rss_enabled(sc)) || IS_BE2(sc)) { sc->nrqs = 1; sc->nwqs = 1; } else { sc->nrqs = MIN(OCE_NCPUS, sc->nrssqs) + 1; sc->nwqs = MIN(OCE_NCPUS, sc->nrssqs); } if (IS_BE2(sc) && is_rss_enabled(sc)) sc->nrqs = MIN(OCE_NCPUS, sc->nrssqs) + 1; } static void update_queues_got(POCE_SOFTC sc) { if (is_rss_enabled(sc)) { sc->nrqs = sc->intr_count + 1; sc->nwqs = sc->intr_count; } else { sc->nrqs = 1; sc->nwqs = 1; } if (IS_BE2(sc)) sc->nwqs = 1; } static int oce_check_ipv6_ext_hdr(struct mbuf *m) { struct ether_header *eh = mtod(m, struct ether_header *); caddr_t m_datatemp = m->m_data; if (eh->ether_type == htons(ETHERTYPE_IPV6)) { m->m_data += sizeof(struct ether_header); struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); if((ip6->ip6_nxt != IPPROTO_TCP) && \ (ip6->ip6_nxt != IPPROTO_UDP)){ struct ip6_ext *ip6e = NULL; m->m_data += sizeof(struct ip6_hdr); ip6e = (struct ip6_ext *) mtod(m, struct ip6_ext *); if(ip6e->ip6e_len == 0xff) { m->m_data = m_datatemp; return TRUE; } } m->m_data = m_datatemp; } return FALSE; } static int is_be3_a1(POCE_SOFTC sc) { if((sc->flags & OCE_FLAGS_BE3) && ((sc->asic_revision & 0xFF) < 2)) { return TRUE; } return FALSE; } static struct mbuf * oce_insert_vlan_tag(POCE_SOFTC sc, struct mbuf *m, boolean_t *complete) { uint16_t vlan_tag = 0; if(!M_WRITABLE(m)) return NULL; /* Embed vlan tag in the packet if it is not part of it */ if(m->m_flags & M_VLANTAG) { vlan_tag = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag); m->m_flags &= ~M_VLANTAG; } /* if UMC, ignore vlan tag insertion and instead insert pvid */ if(sc->pvid) { if(!vlan_tag) vlan_tag = sc->pvid; if (complete) *complete = FALSE; } if(vlan_tag) { m = ether_vlanencap(m, vlan_tag); } if(sc->qnqid) { m = ether_vlanencap(m, sc->qnqid); if (complete) *complete = FALSE; } return m; } static int oce_tx_asic_stall_verify(POCE_SOFTC sc, struct mbuf *m) { if(is_be3_a1(sc) && IS_QNQ_OR_UMC(sc) && \ oce_check_ipv6_ext_hdr(m)) { return TRUE; } return FALSE; } static void oce_get_config(POCE_SOFTC sc) { int rc = 0; uint32_t max_rss = 0; if ((IS_BE(sc) || IS_SH(sc)) && (!sc->be3_native)) max_rss = OCE_LEGACY_MODE_RSS; else max_rss = OCE_MAX_RSS; if (!IS_BE(sc)) { rc = oce_get_profile_config(sc, max_rss); if (rc) { sc->nwqs = OCE_MAX_WQ; sc->nrssqs = max_rss; sc->nrqs = sc->nrssqs + 1; } } else { /* For BE3 don't rely on fw for determining the resources */ sc->nrssqs = max_rss; sc->nrqs = sc->nrssqs + 1; sc->nwqs = OCE_MAX_WQ; sc->max_vlans = MAX_VLANFILTER_SIZE; } } static void oce_rdma_close(void) { if (oce_rdma_if != NULL) { oce_rdma_if = NULL; } } static void oce_get_mac_addr(POCE_SOFTC sc, uint8_t *macaddr) { memcpy(macaddr, sc->macaddr.mac_addr, 6); } int oce_register_rdma(POCE_RDMA_INFO rdma_info, POCE_RDMA_IF rdma_if) { POCE_SOFTC sc; struct oce_dev_info di; int i; if ((rdma_info == NULL) || (rdma_if == NULL)) { return -EINVAL; } if ((rdma_info->size != OCE_RDMA_INFO_SIZE) || (rdma_if->size != OCE_RDMA_IF_SIZE)) { return -ENXIO; } rdma_info->close = oce_rdma_close; rdma_info->mbox_post = oce_mbox_post; rdma_info->common_req_hdr_init = mbx_common_req_hdr_init; rdma_info->get_mac_addr = oce_get_mac_addr; oce_rdma_if = rdma_if; sc = softc_head; while (sc != NULL) { if (oce_rdma_if->announce != NULL) { memset(&di, 0, sizeof(di)); di.dev = sc->dev; di.softc = sc; di.ifp = sc->ifp; di.db_bhandle = sc->db_bhandle; di.db_btag = sc->db_btag; di.db_page_size = 4096; if (sc->flags & OCE_FLAGS_USING_MSIX) { di.intr_mode = OCE_INTERRUPT_MODE_MSIX; } else if (sc->flags & OCE_FLAGS_USING_MSI) { di.intr_mode = OCE_INTERRUPT_MODE_MSI; } else { di.intr_mode = OCE_INTERRUPT_MODE_INTX; } di.dev_family = OCE_GEN2_FAMILY; // fixme: must detect skyhawk if (di.intr_mode != OCE_INTERRUPT_MODE_INTX) { di.msix.num_vectors = sc->intr_count + sc->roce_intr_count; di.msix.start_vector = sc->intr_count; for (i=0; iintrs[i].vector; } } else { } memcpy(di.mac_addr, sc->macaddr.mac_addr, 6); di.vendor_id = pci_get_vendor(sc->dev); di.dev_id = pci_get_device(sc->dev); if (sc->rdma_flags & OCE_RDMA_FLAG_SUPPORTED) { di.flags |= OCE_RDMA_INFO_RDMA_SUPPORTED; } rdma_if->announce(&di); sc = sc->next; } } return 0; } static void oce_read_env_variables( POCE_SOFTC sc ) { char *value = NULL; int rc = 0; /* read if user wants to enable hwlro or swlro */ //value = getenv("oce_enable_hwlro"); if(value && IS_SH(sc)) { sc->enable_hwlro = strtol(value, NULL, 10); if(sc->enable_hwlro) { rc = oce_mbox_nic_query_lro_capabilities(sc, NULL, NULL); if(rc) { device_printf(sc->dev, "no hardware lro support\n"); device_printf(sc->dev, "software lro enabled\n"); sc->enable_hwlro = 0; }else { device_printf(sc->dev, "hardware lro enabled\n"); oce_max_rsp_handled = 32; } }else { device_printf(sc->dev, "software lro enabled\n"); } }else { sc->enable_hwlro = 0; } /* read mbuf size */ //value = getenv("oce_rq_buf_size"); if(value && IS_SH(sc)) { oce_rq_buf_size = strtol(value, NULL, 10); switch(oce_rq_buf_size) { case 2048: case 4096: case 9216: case 16384: break; default: device_printf(sc->dev, " Supported oce_rq_buf_size values are 2K, 4K, 9K, 16K \n"); oce_rq_buf_size = 2048; } } return; } Index: head/sys/dev/qlnx/qlnxe/qlnx_os.c =================================================================== --- head/sys/dev/qlnx/qlnxe/qlnx_os.c (revision 331796) +++ head/sys/dev/qlnx/qlnxe/qlnx_os.c (revision 331797) @@ -1,7443 +1,7443 @@ /* * Copyright (c) 2017-2018 Cavium, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * File: qlnx_os.c * Author : David C Somayajulu, Cavium, Inc., San Jose, CA 95131. */ #include __FBSDID("$FreeBSD$"); #include "qlnx_os.h" #include "bcm_osal.h" #include "reg_addr.h" #include "ecore_gtt_reg_addr.h" #include "ecore.h" #include "ecore_chain.h" #include "ecore_status.h" #include "ecore_hw.h" #include "ecore_rt_defs.h" #include "ecore_init_ops.h" #include "ecore_int.h" #include "ecore_cxt.h" #include "ecore_spq.h" #include "ecore_init_fw_funcs.h" #include "ecore_sp_commands.h" #include "ecore_dev_api.h" #include "ecore_l2_api.h" #include "ecore_mcp.h" #include "ecore_hw_defs.h" #include "mcp_public.h" #include "ecore_iro.h" #include "nvm_cfg.h" #include "ecore_dev_api.h" #include "ecore_dbg_fw_funcs.h" #include "qlnx_ioctl.h" #include "qlnx_def.h" #include "qlnx_ver.h" #include /* * static functions */ /* * ioctl related functions */ static void qlnx_add_sysctls(qlnx_host_t *ha); /* * main driver */ static void qlnx_release(qlnx_host_t *ha); static void qlnx_fp_isr(void *arg); static void qlnx_init_ifnet(device_t dev, qlnx_host_t *ha); static void qlnx_init(void *arg); static void qlnx_init_locked(qlnx_host_t *ha); static int qlnx_set_multi(qlnx_host_t *ha, uint32_t add_multi); static int qlnx_set_promisc(qlnx_host_t *ha); static int qlnx_set_allmulti(qlnx_host_t *ha); static int qlnx_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static int qlnx_media_change(struct ifnet *ifp); static void qlnx_media_status(struct ifnet *ifp, struct ifmediareq *ifmr); static void qlnx_stop(qlnx_host_t *ha); static int qlnx_send(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct mbuf **m_headp); static int qlnx_get_ifq_snd_maxlen(qlnx_host_t *ha); static uint32_t qlnx_get_optics(qlnx_host_t *ha, struct qlnx_link_output *if_link); static int qlnx_transmit(struct ifnet *ifp, struct mbuf *mp); static int qlnx_transmit_locked(struct ifnet *ifp, struct qlnx_fastpath *fp, struct mbuf *mp); static void qlnx_qflush(struct ifnet *ifp); static int qlnx_alloc_parent_dma_tag(qlnx_host_t *ha); static void qlnx_free_parent_dma_tag(qlnx_host_t *ha); static int qlnx_alloc_tx_dma_tag(qlnx_host_t *ha); static void qlnx_free_tx_dma_tag(qlnx_host_t *ha); static int qlnx_alloc_rx_dma_tag(qlnx_host_t *ha); static void qlnx_free_rx_dma_tag(qlnx_host_t *ha); static int qlnx_get_mfw_version(qlnx_host_t *ha, uint32_t *mfw_ver); static int qlnx_get_flash_size(qlnx_host_t *ha, uint32_t *flash_size); static int qlnx_nic_setup(struct ecore_dev *cdev, struct ecore_pf_params *func_params); static int qlnx_nic_start(struct ecore_dev *cdev); static int qlnx_slowpath_start(qlnx_host_t *ha); static int qlnx_slowpath_stop(qlnx_host_t *ha); static int qlnx_init_hw(qlnx_host_t *ha); static void qlnx_set_id(struct ecore_dev *cdev, char name[NAME_SIZE], char ver_str[VER_SIZE]); static void qlnx_unload(qlnx_host_t *ha); static int qlnx_load(qlnx_host_t *ha); static void qlnx_hw_set_multi(qlnx_host_t *ha, uint8_t *mta, uint32_t mcnt, uint32_t add_mac); static void qlnx_dump_buf8(qlnx_host_t *ha, const char *msg, void *dbuf, uint32_t len); static int qlnx_alloc_rx_buffer(qlnx_host_t *ha, struct qlnx_rx_queue *rxq); static void qlnx_reuse_rx_data(struct qlnx_rx_queue *rxq); static void qlnx_update_rx_prod(struct ecore_hwfn *p_hwfn, struct qlnx_rx_queue *rxq); static int qlnx_set_rx_accept_filter(qlnx_host_t *ha, uint8_t filter); static int qlnx_grc_dumpsize(qlnx_host_t *ha, uint32_t *num_dwords, int hwfn_index); static int qlnx_idle_chk_size(qlnx_host_t *ha, uint32_t *num_dwords, int hwfn_index); static void qlnx_timer(void *arg); static int qlnx_alloc_tx_br(qlnx_host_t *ha, struct qlnx_fastpath *fp); static void qlnx_free_tx_br(qlnx_host_t *ha, struct qlnx_fastpath *fp); static void qlnx_trigger_dump(qlnx_host_t *ha); static uint16_t qlnx_num_tx_compl(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct qlnx_tx_queue *txq); static void qlnx_tx_int(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct qlnx_tx_queue *txq); static int qlnx_rx_int(qlnx_host_t *ha, struct qlnx_fastpath *fp, int budget, int lro_enable); static void qlnx_fp_taskqueue(void *context, int pending); static void qlnx_sample_storm_stats(qlnx_host_t *ha); static int qlnx_alloc_tpa_mbuf(qlnx_host_t *ha, uint16_t rx_buf_size, struct qlnx_agg_info *tpa); static void qlnx_free_tpa_mbuf(qlnx_host_t *ha, struct qlnx_agg_info *tpa); #if __FreeBSD_version >= 1100000 static uint64_t qlnx_get_counter(if_t ifp, ift_counter cnt); #endif /* * Hooks to the Operating Systems */ static int qlnx_pci_probe (device_t); static int qlnx_pci_attach (device_t); static int qlnx_pci_detach (device_t); static device_method_t qlnx_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, qlnx_pci_probe), DEVMETHOD(device_attach, qlnx_pci_attach), DEVMETHOD(device_detach, qlnx_pci_detach), { 0, 0 } }; static driver_t qlnx_pci_driver = { "ql", qlnx_pci_methods, sizeof (qlnx_host_t), }; static devclass_t qlnx_devclass; MODULE_VERSION(if_qlnxe,1); DRIVER_MODULE(if_qlnxe, pci, qlnx_pci_driver, qlnx_devclass, 0, 0); MODULE_DEPEND(if_qlnxe, pci, 1, 1, 1); MODULE_DEPEND(if_qlnxe, ether, 1, 1, 1); MALLOC_DEFINE(M_QLNXBUF, "qlnxbuf", "Buffers for qlnx driver"); char qlnx_dev_str[64]; char qlnx_ver_str[VER_SIZE]; char qlnx_name_str[NAME_SIZE]; /* * Some PCI Configuration Space Related Defines */ #ifndef PCI_VENDOR_QLOGIC #define PCI_VENDOR_QLOGIC 0x1077 #endif /* 40G Adapter QLE45xxx*/ #ifndef QLOGIC_PCI_DEVICE_ID_1634 #define QLOGIC_PCI_DEVICE_ID_1634 0x1634 #endif /* 100G Adapter QLE45xxx*/ #ifndef QLOGIC_PCI_DEVICE_ID_1644 #define QLOGIC_PCI_DEVICE_ID_1644 0x1644 #endif /* 25G Adapter QLE45xxx*/ #ifndef QLOGIC_PCI_DEVICE_ID_1656 #define QLOGIC_PCI_DEVICE_ID_1656 0x1656 #endif /* 50G Adapter QLE45xxx*/ #ifndef QLOGIC_PCI_DEVICE_ID_1654 #define QLOGIC_PCI_DEVICE_ID_1654 0x1654 #endif /* 10G/25G/40G Adapter QLE41xxx*/ #ifndef QLOGIC_PCI_DEVICE_ID_8070 #define QLOGIC_PCI_DEVICE_ID_8070 0x8070 #endif SYSCTL_NODE(_hw, OID_AUTO, qlnxe, CTLFLAG_RD, 0, "qlnxe driver parameters"); /* Number of Queues: 0 (Auto) or 1 to 32 (fixed queue number) */ static int qlnxe_queue_count = QLNX_DEFAULT_RSS; SYSCTL_INT(_hw_qlnxe, OID_AUTO, queue_count, CTLFLAG_RDTUN, &qlnxe_queue_count, 0, "Multi-Queue queue count"); static int qlnx_valid_device(device_t dev) { uint16_t device_id; device_id = pci_get_device(dev); if ((device_id == QLOGIC_PCI_DEVICE_ID_1634) || (device_id == QLOGIC_PCI_DEVICE_ID_1644) || (device_id == QLOGIC_PCI_DEVICE_ID_1656) || (device_id == QLOGIC_PCI_DEVICE_ID_1654) || (device_id == QLOGIC_PCI_DEVICE_ID_8070)) return 0; return -1; } /* * Name: qlnx_pci_probe * Function: Validate the PCI device to be a QLA80XX device */ static int qlnx_pci_probe(device_t dev) { snprintf(qlnx_ver_str, sizeof(qlnx_ver_str), "v%d.%d.%d", QLNX_VERSION_MAJOR, QLNX_VERSION_MINOR, QLNX_VERSION_BUILD); snprintf(qlnx_name_str, sizeof(qlnx_name_str), "qlnx"); if (pci_get_vendor(dev) != PCI_VENDOR_QLOGIC) { return (ENXIO); } switch (pci_get_device(dev)) { case QLOGIC_PCI_DEVICE_ID_1644: snprintf(qlnx_dev_str, sizeof(qlnx_dev_str), "%s v%d.%d.%d", "Qlogic 100GbE PCI CNA Adapter-Ethernet Function", QLNX_VERSION_MAJOR, QLNX_VERSION_MINOR, QLNX_VERSION_BUILD); device_set_desc_copy(dev, qlnx_dev_str); break; case QLOGIC_PCI_DEVICE_ID_1634: snprintf(qlnx_dev_str, sizeof(qlnx_dev_str), "%s v%d.%d.%d", "Qlogic 40GbE PCI CNA Adapter-Ethernet Function", QLNX_VERSION_MAJOR, QLNX_VERSION_MINOR, QLNX_VERSION_BUILD); device_set_desc_copy(dev, qlnx_dev_str); break; case QLOGIC_PCI_DEVICE_ID_1656: snprintf(qlnx_dev_str, sizeof(qlnx_dev_str), "%s v%d.%d.%d", "Qlogic 25GbE PCI CNA Adapter-Ethernet Function", QLNX_VERSION_MAJOR, QLNX_VERSION_MINOR, QLNX_VERSION_BUILD); device_set_desc_copy(dev, qlnx_dev_str); break; case QLOGIC_PCI_DEVICE_ID_1654: snprintf(qlnx_dev_str, sizeof(qlnx_dev_str), "%s v%d.%d.%d", "Qlogic 50GbE PCI CNA Adapter-Ethernet Function", QLNX_VERSION_MAJOR, QLNX_VERSION_MINOR, QLNX_VERSION_BUILD); device_set_desc_copy(dev, qlnx_dev_str); break; case QLOGIC_PCI_DEVICE_ID_8070: snprintf(qlnx_dev_str, sizeof(qlnx_dev_str), "%s v%d.%d.%d", "Qlogic 10GbE/25GbE/40GbE PCI CNA (AH) " "Adapter-Ethernet Function", QLNX_VERSION_MAJOR, QLNX_VERSION_MINOR, QLNX_VERSION_BUILD); device_set_desc_copy(dev, qlnx_dev_str); break; default: return (ENXIO); } return (BUS_PROBE_DEFAULT); } static uint16_t qlnx_num_tx_compl(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct qlnx_tx_queue *txq) { u16 hw_bd_cons; u16 ecore_cons_idx; uint16_t diff; hw_bd_cons = le16toh(*txq->hw_cons_ptr); ecore_cons_idx = ecore_chain_get_cons_idx(&txq->tx_pbl); if (hw_bd_cons < ecore_cons_idx) { diff = (1 << 16) - (ecore_cons_idx - hw_bd_cons); } else { diff = hw_bd_cons - ecore_cons_idx; } return diff; } static void qlnx_sp_intr(void *arg) { struct ecore_hwfn *p_hwfn; qlnx_host_t *ha; int i; p_hwfn = arg; if (p_hwfn == NULL) { printf("%s: spurious slowpath intr\n", __func__); return; } ha = (qlnx_host_t *)p_hwfn->p_dev; QL_DPRINT2(ha, "enter\n"); for (i = 0; i < ha->cdev.num_hwfns; i++) { if (&ha->cdev.hwfns[i] == p_hwfn) { taskqueue_enqueue(ha->sp_taskqueue[i], &ha->sp_task[i]); break; } } QL_DPRINT2(ha, "exit\n"); return; } static void qlnx_sp_taskqueue(void *context, int pending) { struct ecore_hwfn *p_hwfn; p_hwfn = context; if (p_hwfn != NULL) { qlnx_sp_isr(p_hwfn); } return; } static int qlnx_create_sp_taskqueues(qlnx_host_t *ha) { int i; uint8_t tq_name[32]; for (i = 0; i < ha->cdev.num_hwfns; i++) { struct ecore_hwfn *p_hwfn = &ha->cdev.hwfns[i]; bzero(tq_name, sizeof (tq_name)); snprintf(tq_name, sizeof (tq_name), "ql_sp_tq_%d", i); TASK_INIT(&ha->sp_task[i], 0, qlnx_sp_taskqueue, p_hwfn); ha->sp_taskqueue[i] = taskqueue_create_fast(tq_name, M_NOWAIT, taskqueue_thread_enqueue, &ha->sp_taskqueue[i]); if (ha->sp_taskqueue[i] == NULL) return (-1); taskqueue_start_threads(&ha->sp_taskqueue[i], 1, PI_NET, "%s", tq_name); QL_DPRINT1(ha, "%p\n", ha->sp_taskqueue[i]); } return (0); } static void qlnx_destroy_sp_taskqueues(qlnx_host_t *ha) { int i; for (i = 0; i < ha->cdev.num_hwfns; i++) { if (ha->sp_taskqueue[i] != NULL) { taskqueue_drain(ha->sp_taskqueue[i], &ha->sp_task[i]); taskqueue_free(ha->sp_taskqueue[i]); } } return; } static void qlnx_fp_taskqueue(void *context, int pending) { struct qlnx_fastpath *fp; qlnx_host_t *ha; struct ifnet *ifp; #ifdef QLNX_RCV_IN_TASKQ int lro_enable; int rx_int = 0, total_rx_count = 0; struct thread *cthread; #endif /* #ifdef QLNX_RCV_IN_TASKQ */ fp = context; if (fp == NULL) return; ha = (qlnx_host_t *)fp->edev; ifp = ha->ifp; #ifdef QLNX_RCV_IN_TASKQ cthread = curthread; thread_lock(cthread); if (!sched_is_bound(cthread)) sched_bind(cthread, fp->rss_id); thread_unlock(cthread); lro_enable = ifp->if_capenable & IFCAP_LRO; rx_int = qlnx_rx_int(ha, fp, ha->rx_pkt_threshold, lro_enable); if (rx_int) { fp->rx_pkts += rx_int; total_rx_count += rx_int; } #ifdef QLNX_SOFT_LRO { struct lro_ctrl *lro; lro = &fp->rxq->lro; if (lro_enable && total_rx_count) { #if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO) if (ha->dbg_trace_lro_cnt) { if (lro->lro_mbuf_count & ~1023) fp->lro_cnt_1024++; else if (lro->lro_mbuf_count & ~511) fp->lro_cnt_512++; else if (lro->lro_mbuf_count & ~255) fp->lro_cnt_256++; else if (lro->lro_mbuf_count & ~127) fp->lro_cnt_128++; else if (lro->lro_mbuf_count & ~63) fp->lro_cnt_64++; } tcp_lro_flush_all(lro); #else struct lro_entry *queued; while ((!SLIST_EMPTY(&lro->lro_active))) { queued = SLIST_FIRST(&lro->lro_active); SLIST_REMOVE_HEAD(&lro->lro_active, next); tcp_lro_flush(lro, queued); } #endif /* #if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO) */ } } #endif /* #ifdef QLNX_SOFT_LRO */ ecore_sb_update_sb_idx(fp->sb_info); rmb(); #endif /* #ifdef QLNX_RCV_IN_TASKQ */ if(ifp->if_drv_flags & IFF_DRV_RUNNING) { if (!drbr_empty(ifp, fp->tx_br)) { if(mtx_trylock(&fp->tx_mtx)) { #ifdef QLNX_TRACE_PERF_DATA tx_pkts = fp->tx_pkts_transmitted; tx_compl = fp->tx_pkts_completed; #endif qlnx_transmit_locked(ifp, fp, NULL); #ifdef QLNX_TRACE_PERF_DATA fp->tx_pkts_trans_fp += (fp->tx_pkts_transmitted - tx_pkts); fp->tx_pkts_compl_fp += (fp->tx_pkts_completed - tx_compl); #endif mtx_unlock(&fp->tx_mtx); } } } #ifdef QLNX_RCV_IN_TASKQ if (rx_int) { if (fp->fp_taskqueue != NULL) taskqueue_enqueue(fp->fp_taskqueue, &fp->fp_task); } else { if (fp->tx_ring_full) { qlnx_mdelay(__func__, 100); } ecore_sb_ack(fp->sb_info, IGU_INT_ENABLE, 1); } #endif /* #ifdef QLNX_RCV_IN_TASKQ */ QL_DPRINT2(ha, "exit \n"); return; } static int qlnx_create_fp_taskqueues(qlnx_host_t *ha) { int i; uint8_t tq_name[32]; struct qlnx_fastpath *fp; for (i = 0; i < ha->num_rss; i++) { fp = &ha->fp_array[i]; bzero(tq_name, sizeof (tq_name)); snprintf(tq_name, sizeof (tq_name), "ql_fp_tq_%d", i); TASK_INIT(&fp->fp_task, 0, qlnx_fp_taskqueue, fp); fp->fp_taskqueue = taskqueue_create_fast(tq_name, M_NOWAIT, taskqueue_thread_enqueue, &fp->fp_taskqueue); if (fp->fp_taskqueue == NULL) return (-1); taskqueue_start_threads(&fp->fp_taskqueue, 1, PI_NET, "%s", tq_name); QL_DPRINT1(ha, "%p\n",fp->fp_taskqueue); } return (0); } static void qlnx_destroy_fp_taskqueues(qlnx_host_t *ha) { int i; struct qlnx_fastpath *fp; for (i = 0; i < ha->num_rss; i++) { fp = &ha->fp_array[i]; if (fp->fp_taskqueue != NULL) { taskqueue_drain(fp->fp_taskqueue, &fp->fp_task); taskqueue_free(fp->fp_taskqueue); fp->fp_taskqueue = NULL; } } return; } static void qlnx_drain_fp_taskqueues(qlnx_host_t *ha) { int i; struct qlnx_fastpath *fp; for (i = 0; i < ha->num_rss; i++) { fp = &ha->fp_array[i]; if (fp->fp_taskqueue != NULL) { QLNX_UNLOCK(ha); taskqueue_drain(fp->fp_taskqueue, &fp->fp_task); QLNX_LOCK(ha); } } return; } static void qlnx_get_params(qlnx_host_t *ha) { if ((qlnxe_queue_count < 0) || (qlnxe_queue_count > QLNX_MAX_RSS)) { device_printf(ha->pci_dev, "invalid queue_count value (%d)\n", qlnxe_queue_count); qlnxe_queue_count = 0; } return; } /* * Name: qlnx_pci_attach * Function: attaches the device to the operating system */ static int qlnx_pci_attach(device_t dev) { qlnx_host_t *ha = NULL; uint32_t rsrc_len_reg = 0; uint32_t rsrc_len_dbells = 0; uint32_t rsrc_len_msix = 0; int i; uint32_t mfw_ver; if ((ha = device_get_softc(dev)) == NULL) { device_printf(dev, "cannot get softc\n"); return (ENOMEM); } memset(ha, 0, sizeof (qlnx_host_t)); if (qlnx_valid_device(dev) != 0) { device_printf(dev, "device is not valid device\n"); return (ENXIO); } ha->pci_func = pci_get_function(dev); ha->pci_dev = dev; mtx_init(&ha->hw_lock, "qlnx_hw_lock", MTX_NETWORK_LOCK, MTX_DEF); ha->flags.lock_init = 1; pci_enable_busmaster(dev); /* * map the PCI BARs */ ha->reg_rid = PCIR_BAR(0); ha->pci_reg = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ha->reg_rid, RF_ACTIVE); if (ha->pci_reg == NULL) { device_printf(dev, "unable to map BAR0\n"); goto qlnx_pci_attach_err; } rsrc_len_reg = (uint32_t) bus_get_resource_count(dev, SYS_RES_MEMORY, ha->reg_rid); ha->dbells_rid = PCIR_BAR(2); ha->pci_dbells = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ha->dbells_rid, RF_ACTIVE); if (ha->pci_dbells == NULL) { device_printf(dev, "unable to map BAR1\n"); goto qlnx_pci_attach_err; } rsrc_len_dbells = (uint32_t) bus_get_resource_count(dev, SYS_RES_MEMORY, ha->dbells_rid); ha->dbells_phys_addr = (uint64_t) bus_get_resource_start(dev, SYS_RES_MEMORY, ha->dbells_rid);; ha->dbells_size = rsrc_len_dbells; ha->msix_rid = PCIR_BAR(4); ha->msix_bar = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ha->msix_rid, RF_ACTIVE); if (ha->msix_bar == NULL) { device_printf(dev, "unable to map BAR2\n"); goto qlnx_pci_attach_err; } rsrc_len_msix = (uint32_t) bus_get_resource_count(dev, SYS_RES_MEMORY, ha->msix_rid); /* * allocate dma tags */ if (qlnx_alloc_parent_dma_tag(ha)) goto qlnx_pci_attach_err; if (qlnx_alloc_tx_dma_tag(ha)) goto qlnx_pci_attach_err; if (qlnx_alloc_rx_dma_tag(ha)) goto qlnx_pci_attach_err; if (qlnx_init_hw(ha) != 0) goto qlnx_pci_attach_err; qlnx_get_params(ha); if((pci_get_device(dev) == QLOGIC_PCI_DEVICE_ID_1644) && (qlnxe_queue_count == QLNX_DEFAULT_RSS)) { qlnxe_queue_count = QLNX_MAX_RSS; } /* * Allocate MSI-x vectors */ if(qlnxe_queue_count == 0) ha->num_rss = QLNX_DEFAULT_RSS; else ha->num_rss = qlnxe_queue_count; ha->num_tc = QLNX_MAX_TC; ha->msix_count = pci_msix_count(dev); if (ha->msix_count > (mp_ncpus + ha->cdev.num_hwfns)) ha->msix_count = mp_ncpus + ha->cdev.num_hwfns; if (!ha->msix_count || (ha->msix_count < (ha->cdev.num_hwfns + 1 ))) { device_printf(dev, "%s: msix_count[%d] not enough\n", __func__, ha->msix_count); goto qlnx_pci_attach_err; } if (ha->msix_count > (ha->num_rss + ha->cdev.num_hwfns )) ha->msix_count = ha->num_rss + ha->cdev.num_hwfns; else ha->num_rss = ha->msix_count - ha->cdev.num_hwfns; QL_DPRINT1(ha, "\n\t\t\tpci_reg [%p, 0x%08x 0x%08x]" "\n\t\t\tdbells [%p, 0x%08x 0x%08x]" "\n\t\t\tmsix [%p, 0x%08x 0x%08x 0x%x 0x%x]" "\n\t\t\t[ncpus = %d][num_rss = 0x%x] [num_tc = 0x%x]\n", ha->pci_reg, rsrc_len_reg, ha->reg_rid, ha->pci_dbells, rsrc_len_dbells, ha->dbells_rid, ha->msix_bar, rsrc_len_msix, ha->msix_rid, pci_msix_count(dev), ha->msix_count, mp_ncpus, ha->num_rss, ha->num_tc); if (pci_alloc_msix(dev, &ha->msix_count)) { device_printf(dev, "%s: pci_alloc_msix[%d] failed\n", __func__, ha->msix_count); ha->msix_count = 0; goto qlnx_pci_attach_err; } /* * Initialize slow path interrupt and task queue */ if (qlnx_create_sp_taskqueues(ha) != 0) goto qlnx_pci_attach_err; for (i = 0; i < ha->cdev.num_hwfns; i++) { struct ecore_hwfn *p_hwfn = &ha->cdev.hwfns[i]; ha->sp_irq_rid[i] = i + 1; ha->sp_irq[i] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &ha->sp_irq_rid[i], (RF_ACTIVE | RF_SHAREABLE)); if (ha->sp_irq[i] == NULL) { device_printf(dev, "could not allocate mbx interrupt\n"); goto qlnx_pci_attach_err; } if (bus_setup_intr(dev, ha->sp_irq[i], (INTR_TYPE_NET | INTR_MPSAFE), NULL, qlnx_sp_intr, p_hwfn, &ha->sp_handle[i])) { device_printf(dev, "could not setup slow path interrupt\n"); goto qlnx_pci_attach_err; } QL_DPRINT1(ha, "p_hwfn [%p] sp_irq_rid %d" " sp_irq %p sp_handle %p\n", p_hwfn, ha->sp_irq_rid[i], ha->sp_irq[i], ha->sp_handle[i]); } /* * initialize fast path interrupt */ if (qlnx_create_fp_taskqueues(ha) != 0) goto qlnx_pci_attach_err; for (i = 0; i < ha->num_rss; i++) { ha->irq_vec[i].rss_idx = i; ha->irq_vec[i].ha = ha; ha->irq_vec[i].irq_rid = (1 + ha->cdev.num_hwfns) + i; ha->irq_vec[i].irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &ha->irq_vec[i].irq_rid, (RF_ACTIVE | RF_SHAREABLE)); if (ha->irq_vec[i].irq == NULL) { device_printf(dev, "could not allocate interrupt[%d]\n", i); goto qlnx_pci_attach_err; } if (qlnx_alloc_tx_br(ha, &ha->fp_array[i])) { device_printf(dev, "could not allocate tx_br[%d]\n", i); goto qlnx_pci_attach_err; } } callout_init(&ha->qlnx_callout, 1); ha->flags.callout_init = 1; for (i = 0; i < ha->cdev.num_hwfns; i++) { if (qlnx_grc_dumpsize(ha, &ha->grcdump_size[i], i) != 0) goto qlnx_pci_attach_err; if (ha->grcdump_size[i] == 0) goto qlnx_pci_attach_err; ha->grcdump_size[i] = ha->grcdump_size[i] << 2; QL_DPRINT1(ha, "grcdump_size[%d] = 0x%08x\n", i, ha->grcdump_size[i]); ha->grcdump[i] = qlnx_zalloc(ha->grcdump_size[i]); if (ha->grcdump[i] == NULL) { device_printf(dev, "grcdump alloc[%d] failed\n", i); goto qlnx_pci_attach_err; } if (qlnx_idle_chk_size(ha, &ha->idle_chk_size[i], i) != 0) goto qlnx_pci_attach_err; if (ha->idle_chk_size[i] == 0) goto qlnx_pci_attach_err; ha->idle_chk_size[i] = ha->idle_chk_size[i] << 2; QL_DPRINT1(ha, "idle_chk_size[%d] = 0x%08x\n", i, ha->idle_chk_size[i]); ha->idle_chk[i] = qlnx_zalloc(ha->idle_chk_size[i]); if (ha->idle_chk[i] == NULL) { device_printf(dev, "idle_chk alloc failed\n"); goto qlnx_pci_attach_err; } } if (qlnx_slowpath_start(ha) != 0) { qlnx_mdelay(__func__, 1000); qlnx_trigger_dump(ha); goto qlnx_pci_attach_err0; } else ha->flags.slowpath_start = 1; if (qlnx_get_flash_size(ha, &ha->flash_size) != 0) { qlnx_mdelay(__func__, 1000); qlnx_trigger_dump(ha); goto qlnx_pci_attach_err0; } if (qlnx_get_mfw_version(ha, &mfw_ver) != 0) { qlnx_mdelay(__func__, 1000); qlnx_trigger_dump(ha); goto qlnx_pci_attach_err0; } snprintf(ha->mfw_ver, sizeof(ha->mfw_ver), "%d.%d.%d.%d", ((mfw_ver >> 24) & 0xFF), ((mfw_ver >> 16) & 0xFF), ((mfw_ver >> 8) & 0xFF), (mfw_ver & 0xFF)); snprintf(ha->stormfw_ver, sizeof(ha->stormfw_ver), "%d.%d.%d.%d", FW_MAJOR_VERSION, FW_MINOR_VERSION, FW_REVISION_VERSION, FW_ENGINEERING_VERSION); QL_DPRINT1(ha, "STORM_FW version %s MFW version %s\n", ha->stormfw_ver, ha->mfw_ver); qlnx_init_ifnet(dev, ha); /* * add sysctls */ qlnx_add_sysctls(ha); qlnx_pci_attach_err0: /* * create ioctl device interface */ if (qlnx_make_cdev(ha)) { device_printf(dev, "%s: ql_make_cdev failed\n", __func__); goto qlnx_pci_attach_err; } QL_DPRINT2(ha, "success\n"); return (0); qlnx_pci_attach_err: qlnx_release(ha); return (ENXIO); } /* * Name: qlnx_pci_detach * Function: Unhooks the device from the operating system */ static int qlnx_pci_detach(device_t dev) { qlnx_host_t *ha = NULL; if ((ha = device_get_softc(dev)) == NULL) { device_printf(dev, "cannot get softc\n"); return (ENOMEM); } QLNX_LOCK(ha); qlnx_stop(ha); QLNX_UNLOCK(ha); qlnx_release(ha); return (0); } static int qlnx_init_hw(qlnx_host_t *ha) { int rval = 0; struct ecore_hw_prepare_params params; ecore_init_struct(&ha->cdev); /* ha->dp_module = ECORE_MSG_PROBE | ECORE_MSG_INTR | ECORE_MSG_SP | ECORE_MSG_LINK | ECORE_MSG_SPQ | ECORE_MSG_RDMA; ha->dp_level = ECORE_LEVEL_VERBOSE;*/ ha->dp_level = ECORE_LEVEL_NOTICE; ecore_init_dp(&ha->cdev, ha->dp_module, ha->dp_level, ha->pci_dev); ha->cdev.regview = ha->pci_reg; ha->cdev.doorbells = ha->pci_dbells; ha->cdev.db_phys_addr = ha->dbells_phys_addr; ha->cdev.db_size = ha->dbells_size; bzero(¶ms, sizeof (struct ecore_hw_prepare_params)); ha->personality = ECORE_PCI_DEFAULT; params.personality = ha->personality; params.drv_resc_alloc = false; params.chk_reg_fifo = false; params.initiate_pf_flr = true; params.epoch = 0; ecore_hw_prepare(&ha->cdev, ¶ms); qlnx_set_id(&ha->cdev, qlnx_name_str, qlnx_ver_str); return (rval); } static void qlnx_release(qlnx_host_t *ha) { device_t dev; int i; dev = ha->pci_dev; QL_DPRINT2(ha, "enter\n"); for (i = 0; i < QLNX_MAX_HW_FUNCS; i++) { if (ha->idle_chk[i] != NULL) { free(ha->idle_chk[i], M_QLNXBUF); ha->idle_chk[i] = NULL; } if (ha->grcdump[i] != NULL) { free(ha->grcdump[i], M_QLNXBUF); ha->grcdump[i] = NULL; } } if (ha->flags.callout_init) callout_drain(&ha->qlnx_callout); if (ha->flags.slowpath_start) { qlnx_slowpath_stop(ha); } ecore_hw_remove(&ha->cdev); qlnx_del_cdev(ha); if (ha->ifp != NULL) ether_ifdetach(ha->ifp); qlnx_free_tx_dma_tag(ha); qlnx_free_rx_dma_tag(ha); qlnx_free_parent_dma_tag(ha); for (i = 0; i < ha->num_rss; i++) { struct qlnx_fastpath *fp = &ha->fp_array[i]; if (ha->irq_vec[i].handle) { (void)bus_teardown_intr(dev, ha->irq_vec[i].irq, ha->irq_vec[i].handle); } if (ha->irq_vec[i].irq) { (void)bus_release_resource(dev, SYS_RES_IRQ, ha->irq_vec[i].irq_rid, ha->irq_vec[i].irq); } qlnx_free_tx_br(ha, fp); } qlnx_destroy_fp_taskqueues(ha); for (i = 0; i < ha->cdev.num_hwfns; i++) { if (ha->sp_handle[i]) (void)bus_teardown_intr(dev, ha->sp_irq[i], ha->sp_handle[i]); if (ha->sp_irq[i]) (void) bus_release_resource(dev, SYS_RES_IRQ, ha->sp_irq_rid[i], ha->sp_irq[i]); } qlnx_destroy_sp_taskqueues(ha); if (ha->msix_count) pci_release_msi(dev); if (ha->flags.lock_init) { mtx_destroy(&ha->hw_lock); } if (ha->pci_reg) (void) bus_release_resource(dev, SYS_RES_MEMORY, ha->reg_rid, ha->pci_reg); if (ha->pci_dbells) (void) bus_release_resource(dev, SYS_RES_MEMORY, ha->dbells_rid, ha->pci_dbells); if (ha->msix_bar) (void) bus_release_resource(dev, SYS_RES_MEMORY, ha->msix_rid, ha->msix_bar); QL_DPRINT2(ha, "exit\n"); return; } static void qlnx_trigger_dump(qlnx_host_t *ha) { int i; if (ha->ifp != NULL) ha->ifp->if_drv_flags &= ~(IFF_DRV_OACTIVE | IFF_DRV_RUNNING); QL_DPRINT2(ha, "enter\n"); for (i = 0; i < ha->cdev.num_hwfns; i++) { qlnx_grc_dump(ha, &ha->grcdump_dwords[i], i); qlnx_idle_chk(ha, &ha->idle_chk_dwords[i], i); } QL_DPRINT2(ha, "exit\n"); return; } static int qlnx_trigger_dump_sysctl(SYSCTL_HANDLER_ARGS) { int err, ret = 0; qlnx_host_t *ha; err = sysctl_handle_int(oidp, &ret, 0, req); if (err || !req->newptr) return (err); if (ret == 1) { ha = (qlnx_host_t *)arg1; qlnx_trigger_dump(ha); } return (err); } static int qlnx_set_tx_coalesce(SYSCTL_HANDLER_ARGS) { int err, i, ret = 0, usecs = 0; qlnx_host_t *ha; struct ecore_hwfn *p_hwfn; struct qlnx_fastpath *fp; err = sysctl_handle_int(oidp, &usecs, 0, req); if (err || !req->newptr || !usecs || (usecs > 255)) return (err); ha = (qlnx_host_t *)arg1; for (i = 0; i < ha->num_rss; i++) { p_hwfn = &ha->cdev.hwfns[(i % ha->cdev.num_hwfns)]; fp = &ha->fp_array[i]; if (fp->txq[0]->handle != NULL) { ret = ecore_set_queue_coalesce(p_hwfn, 0, (uint16_t)usecs, fp->txq[0]->handle); } } if (!ret) ha->tx_coalesce_usecs = (uint8_t)usecs; return (err); } static int qlnx_set_rx_coalesce(SYSCTL_HANDLER_ARGS) { int err, i, ret = 0, usecs = 0; qlnx_host_t *ha; struct ecore_hwfn *p_hwfn; struct qlnx_fastpath *fp; err = sysctl_handle_int(oidp, &usecs, 0, req); if (err || !req->newptr || !usecs || (usecs > 255)) return (err); ha = (qlnx_host_t *)arg1; for (i = 0; i < ha->num_rss; i++) { p_hwfn = &ha->cdev.hwfns[(i % ha->cdev.num_hwfns)]; fp = &ha->fp_array[i]; if (fp->rxq->handle != NULL) { ret = ecore_set_queue_coalesce(p_hwfn, (uint16_t)usecs, 0, fp->rxq->handle); } } if (!ret) ha->rx_coalesce_usecs = (uint8_t)usecs; return (err); } static void qlnx_add_sp_stats_sysctls(qlnx_host_t *ha) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; struct sysctl_oid *ctx_oid; ctx = device_get_sysctl_ctx(ha->pci_dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(ha->pci_dev)); ctx_oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "spstat", CTLFLAG_RD, NULL, "spstat"); children = SYSCTL_CHILDREN(ctx_oid); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "sp_interrupts", CTLFLAG_RD, &ha->sp_interrupts, "No. of slowpath interrupts"); return; } static void qlnx_add_fp_stats_sysctls(qlnx_host_t *ha) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; struct sysctl_oid_list *node_children; struct sysctl_oid *ctx_oid; int i, j; uint8_t name_str[16]; ctx = device_get_sysctl_ctx(ha->pci_dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(ha->pci_dev)); ctx_oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fpstat", CTLFLAG_RD, NULL, "fpstat"); children = SYSCTL_CHILDREN(ctx_oid); for (i = 0; i < ha->num_rss; i++) { bzero(name_str, (sizeof(uint8_t) * sizeof(name_str))); snprintf(name_str, sizeof(name_str), "%d", i); ctx_oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, name_str, CTLFLAG_RD, NULL, name_str); node_children = SYSCTL_CHILDREN(ctx_oid); /* Tx Related */ SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_pkts_processed", CTLFLAG_RD, &ha->fp_array[i].tx_pkts_processed, "No. of packets processed for transmission"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_pkts_freed", CTLFLAG_RD, &ha->fp_array[i].tx_pkts_freed, "No. of freed packets"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_pkts_transmitted", CTLFLAG_RD, &ha->fp_array[i].tx_pkts_transmitted, "No. of transmitted packets"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_pkts_completed", CTLFLAG_RD, &ha->fp_array[i].tx_pkts_completed, "No. of transmit completions"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_non_tso_pkts", CTLFLAG_RD, &ha->fp_array[i].tx_non_tso_pkts, "No. of non LSO transmited packets"); #ifdef QLNX_TRACE_PERF_DATA SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_pkts_trans_ctx", CTLFLAG_RD, &ha->fp_array[i].tx_pkts_trans_ctx, "No. of transmitted packets in transmit context"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_pkts_compl_ctx", CTLFLAG_RD, &ha->fp_array[i].tx_pkts_compl_ctx, "No. of transmit completions in transmit context"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_pkts_trans_fp", CTLFLAG_RD, &ha->fp_array[i].tx_pkts_trans_fp, "No. of transmitted packets in taskqueue"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_pkts_compl_fp", CTLFLAG_RD, &ha->fp_array[i].tx_pkts_compl_fp, "No. of transmit completions in taskqueue"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_pkts_compl_intr", CTLFLAG_RD, &ha->fp_array[i].tx_pkts_compl_intr, "No. of transmit completions in interrupt ctx"); #endif SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_tso_pkts", CTLFLAG_RD, &ha->fp_array[i].tx_tso_pkts, "No. of LSO transmited packets"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_lso_wnd_min_len", CTLFLAG_RD, &ha->fp_array[i].tx_lso_wnd_min_len, "tx_lso_wnd_min_len"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_defrag", CTLFLAG_RD, &ha->fp_array[i].tx_defrag, "tx_defrag"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_nsegs_gt_elem_left", CTLFLAG_RD, &ha->fp_array[i].tx_nsegs_gt_elem_left, "tx_nsegs_gt_elem_left"); SYSCTL_ADD_UINT(ctx, node_children, OID_AUTO, "tx_tso_max_nsegs", CTLFLAG_RD, &ha->fp_array[i].tx_tso_max_nsegs, ha->fp_array[i].tx_tso_max_nsegs, "tx_tso_max_nsegs"); SYSCTL_ADD_UINT(ctx, node_children, OID_AUTO, "tx_tso_min_nsegs", CTLFLAG_RD, &ha->fp_array[i].tx_tso_min_nsegs, ha->fp_array[i].tx_tso_min_nsegs, "tx_tso_min_nsegs"); SYSCTL_ADD_UINT(ctx, node_children, OID_AUTO, "tx_tso_max_pkt_len", CTLFLAG_RD, &ha->fp_array[i].tx_tso_max_pkt_len, ha->fp_array[i].tx_tso_max_pkt_len, "tx_tso_max_pkt_len"); SYSCTL_ADD_UINT(ctx, node_children, OID_AUTO, "tx_tso_min_pkt_len", CTLFLAG_RD, &ha->fp_array[i].tx_tso_min_pkt_len, ha->fp_array[i].tx_tso_min_pkt_len, "tx_tso_min_pkt_len"); for (j = 0; j < QLNX_FP_MAX_SEGS; j++) { bzero(name_str, (sizeof(uint8_t) * sizeof(name_str))); snprintf(name_str, sizeof(name_str), "tx_pkts_nseg_%02d", (j+1)); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, name_str, CTLFLAG_RD, &ha->fp_array[i].tx_pkts[j], name_str); } #ifdef QLNX_TRACE_PERF_DATA for (j = 0; j < 18; j++) { bzero(name_str, (sizeof(uint8_t) * sizeof(name_str))); snprintf(name_str, sizeof(name_str), "tx_pkts_hist_%02d", (j+1)); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, name_str, CTLFLAG_RD, &ha->fp_array[i].tx_pkts_hist[j], name_str); } for (j = 0; j < 5; j++) { bzero(name_str, (sizeof(uint8_t) * sizeof(name_str))); snprintf(name_str, sizeof(name_str), "tx_comInt_%02d", (j+1)); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, name_str, CTLFLAG_RD, &ha->fp_array[i].tx_comInt[j], name_str); } for (j = 0; j < 18; j++) { bzero(name_str, (sizeof(uint8_t) * sizeof(name_str))); snprintf(name_str, sizeof(name_str), "tx_pkts_q_%02d", (j+1)); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, name_str, CTLFLAG_RD, &ha->fp_array[i].tx_pkts_q[j], name_str); } #endif SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_tx_nsegs_gt_elem_left", CTLFLAG_RD, &ha->fp_array[i].err_tx_nsegs_gt_elem_left, "err_tx_nsegs_gt_elem_left"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_tx_dmamap_create", CTLFLAG_RD, &ha->fp_array[i].err_tx_dmamap_create, "err_tx_dmamap_create"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_tx_defrag_dmamap_load", CTLFLAG_RD, &ha->fp_array[i].err_tx_defrag_dmamap_load, "err_tx_defrag_dmamap_load"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_tx_non_tso_max_seg", CTLFLAG_RD, &ha->fp_array[i].err_tx_non_tso_max_seg, "err_tx_non_tso_max_seg"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_tx_dmamap_load", CTLFLAG_RD, &ha->fp_array[i].err_tx_dmamap_load, "err_tx_dmamap_load"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_tx_defrag", CTLFLAG_RD, &ha->fp_array[i].err_tx_defrag, "err_tx_defrag"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_tx_free_pkt_null", CTLFLAG_RD, &ha->fp_array[i].err_tx_free_pkt_null, "err_tx_free_pkt_null"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_tx_cons_idx_conflict", CTLFLAG_RD, &ha->fp_array[i].err_tx_cons_idx_conflict, "err_tx_cons_idx_conflict"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "lro_cnt_64", CTLFLAG_RD, &ha->fp_array[i].lro_cnt_64, "lro_cnt_64"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "lro_cnt_128", CTLFLAG_RD, &ha->fp_array[i].lro_cnt_128, "lro_cnt_128"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "lro_cnt_256", CTLFLAG_RD, &ha->fp_array[i].lro_cnt_256, "lro_cnt_256"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "lro_cnt_512", CTLFLAG_RD, &ha->fp_array[i].lro_cnt_512, "lro_cnt_512"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "lro_cnt_1024", CTLFLAG_RD, &ha->fp_array[i].lro_cnt_1024, "lro_cnt_1024"); /* Rx Related */ SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "rx_pkts", CTLFLAG_RD, &ha->fp_array[i].rx_pkts, "No. of received packets"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tpa_start", CTLFLAG_RD, &ha->fp_array[i].tpa_start, "No. of tpa_start packets"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tpa_cont", CTLFLAG_RD, &ha->fp_array[i].tpa_cont, "No. of tpa_cont packets"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tpa_end", CTLFLAG_RD, &ha->fp_array[i].tpa_end, "No. of tpa_end packets"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_m_getcl", CTLFLAG_RD, &ha->fp_array[i].err_m_getcl, "err_m_getcl"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_m_getjcl", CTLFLAG_RD, &ha->fp_array[i].err_m_getjcl, "err_m_getjcl"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_rx_hw_errors", CTLFLAG_RD, &ha->fp_array[i].err_rx_hw_errors, "err_rx_hw_errors"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_rx_alloc_errors", CTLFLAG_RD, &ha->fp_array[i].err_rx_alloc_errors, "err_rx_alloc_errors"); } return; } static void qlnx_add_hw_stats_sysctls(qlnx_host_t *ha) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; struct sysctl_oid *ctx_oid; ctx = device_get_sysctl_ctx(ha->pci_dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(ha->pci_dev)); ctx_oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "hwstat", CTLFLAG_RD, NULL, "hwstat"); children = SYSCTL_CHILDREN(ctx_oid); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "no_buff_discards", CTLFLAG_RD, &ha->hw_stats.common.no_buff_discards, "No. of packets discarded due to lack of buffer"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "packet_too_big_discard", CTLFLAG_RD, &ha->hw_stats.common.packet_too_big_discard, "No. of packets discarded because packet was too big"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "ttl0_discard", CTLFLAG_RD, &ha->hw_stats.common.ttl0_discard, "ttl0_discard"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_ucast_bytes", CTLFLAG_RD, &ha->hw_stats.common.rx_ucast_bytes, "rx_ucast_bytes"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_mcast_bytes", CTLFLAG_RD, &ha->hw_stats.common.rx_mcast_bytes, "rx_mcast_bytes"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_bcast_bytes", CTLFLAG_RD, &ha->hw_stats.common.rx_bcast_bytes, "rx_bcast_bytes"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_ucast_pkts", CTLFLAG_RD, &ha->hw_stats.common.rx_ucast_pkts, "rx_ucast_pkts"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_mcast_pkts", CTLFLAG_RD, &ha->hw_stats.common.rx_mcast_pkts, "rx_mcast_pkts"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_bcast_pkts", CTLFLAG_RD, &ha->hw_stats.common.rx_bcast_pkts, "rx_bcast_pkts"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "mftag_filter_discards", CTLFLAG_RD, &ha->hw_stats.common.mftag_filter_discards, "mftag_filter_discards"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "mac_filter_discards", CTLFLAG_RD, &ha->hw_stats.common.mac_filter_discards, "mac_filter_discards"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_ucast_bytes", CTLFLAG_RD, &ha->hw_stats.common.tx_ucast_bytes, "tx_ucast_bytes"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_mcast_bytes", CTLFLAG_RD, &ha->hw_stats.common.tx_mcast_bytes, "tx_mcast_bytes"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_bcast_bytes", CTLFLAG_RD, &ha->hw_stats.common.tx_bcast_bytes, "tx_bcast_bytes"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_ucast_pkts", CTLFLAG_RD, &ha->hw_stats.common.tx_ucast_pkts, "tx_ucast_pkts"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_mcast_pkts", CTLFLAG_RD, &ha->hw_stats.common.tx_mcast_pkts, "tx_mcast_pkts"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_bcast_pkts", CTLFLAG_RD, &ha->hw_stats.common.tx_bcast_pkts, "tx_bcast_pkts"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_err_drop_pkts", CTLFLAG_RD, &ha->hw_stats.common.tx_err_drop_pkts, "tx_err_drop_pkts"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tpa_coalesced_pkts", CTLFLAG_RD, &ha->hw_stats.common.tpa_coalesced_pkts, "tpa_coalesced_pkts"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tpa_coalesced_events", CTLFLAG_RD, &ha->hw_stats.common.tpa_coalesced_events, "tpa_coalesced_events"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tpa_aborts_num", CTLFLAG_RD, &ha->hw_stats.common.tpa_aborts_num, "tpa_aborts_num"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tpa_not_coalesced_pkts", CTLFLAG_RD, &ha->hw_stats.common.tpa_not_coalesced_pkts, "tpa_not_coalesced_pkts"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tpa_coalesced_bytes", CTLFLAG_RD, &ha->hw_stats.common.tpa_coalesced_bytes, "tpa_coalesced_bytes"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_64_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.rx_64_byte_packets, "rx_64_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_65_to_127_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.rx_65_to_127_byte_packets, "rx_65_to_127_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_128_to_255_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.rx_128_to_255_byte_packets, "rx_128_to_255_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_256_to_511_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.rx_256_to_511_byte_packets, "rx_256_to_511_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_512_to_1023_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.rx_512_to_1023_byte_packets, "rx_512_to_1023_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_1024_to_1518_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.rx_1024_to_1518_byte_packets, "rx_1024_to_1518_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_1519_to_1522_byte_packets", CTLFLAG_RD, &ha->hw_stats.bb.rx_1519_to_1522_byte_packets, "rx_1519_to_1522_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_1523_to_2047_byte_packets", CTLFLAG_RD, &ha->hw_stats.bb.rx_1519_to_2047_byte_packets, "rx_1523_to_2047_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_2048_to_4095_byte_packets", CTLFLAG_RD, &ha->hw_stats.bb.rx_2048_to_4095_byte_packets, "rx_2048_to_4095_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_4096_to_9216_byte_packets", CTLFLAG_RD, &ha->hw_stats.bb.rx_4096_to_9216_byte_packets, "rx_4096_to_9216_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_9217_to_16383_byte_packets", CTLFLAG_RD, &ha->hw_stats.bb.rx_9217_to_16383_byte_packets, "rx_9217_to_16383_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_crc_errors", CTLFLAG_RD, &ha->hw_stats.common.rx_crc_errors, "rx_crc_errors"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_mac_crtl_frames", CTLFLAG_RD, &ha->hw_stats.common.rx_mac_crtl_frames, "rx_mac_crtl_frames"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_pause_frames", CTLFLAG_RD, &ha->hw_stats.common.rx_pause_frames, "rx_pause_frames"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_pfc_frames", CTLFLAG_RD, &ha->hw_stats.common.rx_pfc_frames, "rx_pfc_frames"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_align_errors", CTLFLAG_RD, &ha->hw_stats.common.rx_align_errors, "rx_align_errors"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_carrier_errors", CTLFLAG_RD, &ha->hw_stats.common.rx_carrier_errors, "rx_carrier_errors"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_oversize_packets", CTLFLAG_RD, &ha->hw_stats.common.rx_oversize_packets, "rx_oversize_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_jabbers", CTLFLAG_RD, &ha->hw_stats.common.rx_jabbers, "rx_jabbers"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_undersize_packets", CTLFLAG_RD, &ha->hw_stats.common.rx_undersize_packets, "rx_undersize_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_fragments", CTLFLAG_RD, &ha->hw_stats.common.rx_fragments, "rx_fragments"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_64_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.tx_64_byte_packets, "tx_64_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_65_to_127_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.tx_65_to_127_byte_packets, "tx_65_to_127_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_128_to_255_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.tx_128_to_255_byte_packets, "tx_128_to_255_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_256_to_511_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.tx_256_to_511_byte_packets, "tx_256_to_511_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_512_to_1023_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.tx_512_to_1023_byte_packets, "tx_512_to_1023_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_1024_to_1518_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.tx_1024_to_1518_byte_packets, "tx_1024_to_1518_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_1519_to_2047_byte_packets", CTLFLAG_RD, &ha->hw_stats.bb.tx_1519_to_2047_byte_packets, "tx_1519_to_2047_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_2048_to_4095_byte_packets", CTLFLAG_RD, &ha->hw_stats.bb.tx_2048_to_4095_byte_packets, "tx_2048_to_4095_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_4096_to_9216_byte_packets", CTLFLAG_RD, &ha->hw_stats.bb.tx_4096_to_9216_byte_packets, "tx_4096_to_9216_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_9217_to_16383_byte_packets", CTLFLAG_RD, &ha->hw_stats.bb.tx_9217_to_16383_byte_packets, "tx_9217_to_16383_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_pause_frames", CTLFLAG_RD, &ha->hw_stats.common.tx_pause_frames, "tx_pause_frames"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_pfc_frames", CTLFLAG_RD, &ha->hw_stats.common.tx_pfc_frames, "tx_pfc_frames"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_lpi_entry_count", CTLFLAG_RD, &ha->hw_stats.bb.tx_lpi_entry_count, "tx_lpi_entry_count"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_total_collisions", CTLFLAG_RD, &ha->hw_stats.bb.tx_total_collisions, "tx_total_collisions"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "brb_truncates", CTLFLAG_RD, &ha->hw_stats.common.brb_truncates, "brb_truncates"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "brb_discards", CTLFLAG_RD, &ha->hw_stats.common.brb_discards, "brb_discards"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_mac_bytes", CTLFLAG_RD, &ha->hw_stats.common.rx_mac_bytes, "rx_mac_bytes"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_mac_uc_packets", CTLFLAG_RD, &ha->hw_stats.common.rx_mac_uc_packets, "rx_mac_uc_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_mac_mc_packets", CTLFLAG_RD, &ha->hw_stats.common.rx_mac_mc_packets, "rx_mac_mc_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_mac_bc_packets", CTLFLAG_RD, &ha->hw_stats.common.rx_mac_bc_packets, "rx_mac_bc_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_mac_frames_ok", CTLFLAG_RD, &ha->hw_stats.common.rx_mac_frames_ok, "rx_mac_frames_ok"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_mac_bytes", CTLFLAG_RD, &ha->hw_stats.common.tx_mac_bytes, "tx_mac_bytes"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_mac_uc_packets", CTLFLAG_RD, &ha->hw_stats.common.tx_mac_uc_packets, "tx_mac_uc_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_mac_mc_packets", CTLFLAG_RD, &ha->hw_stats.common.tx_mac_mc_packets, "tx_mac_mc_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_mac_bc_packets", CTLFLAG_RD, &ha->hw_stats.common.tx_mac_bc_packets, "tx_mac_bc_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_mac_ctrl_frames", CTLFLAG_RD, &ha->hw_stats.common.tx_mac_ctrl_frames, "tx_mac_ctrl_frames"); return; } static void qlnx_add_sysctls(qlnx_host_t *ha) { device_t dev = ha->pci_dev; struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; ctx = device_get_sysctl_ctx(dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); qlnx_add_fp_stats_sysctls(ha); qlnx_add_sp_stats_sysctls(ha); qlnx_add_hw_stats_sysctls(ha); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Driver_Version", CTLFLAG_RD, qlnx_ver_str, 0, "Driver Version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "STORMFW_Version", CTLFLAG_RD, ha->stormfw_ver, 0, "STORM Firmware Version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "MFW_Version", CTLFLAG_RD, ha->mfw_ver, 0, "Management Firmware Version"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "personality", CTLFLAG_RD, &ha->personality, ha->personality, "\tpersonality = 0 => Ethernet Only\n" "\tpersonality = 3 => Ethernet and RoCE\n" "\tpersonality = 4 => Ethernet and iWARP\n" "\tpersonality = 6 => Default in Shared Memory\n"); ha->dbg_level = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "debug", CTLFLAG_RW, &ha->dbg_level, ha->dbg_level, "Debug Level"); ha->dp_level = 0x01; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "dp_level", CTLFLAG_RW, &ha->dp_level, ha->dp_level, "DP Level"); ha->dbg_trace_lro_cnt = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "dbg_trace_lro_cnt", CTLFLAG_RW, &ha->dbg_trace_lro_cnt, ha->dbg_trace_lro_cnt, "Trace LRO Counts"); ha->dbg_trace_tso_pkt_len = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "dbg_trace_tso_pkt_len", CTLFLAG_RW, &ha->dbg_trace_tso_pkt_len, ha->dbg_trace_tso_pkt_len, "Trace TSO packet lengths"); ha->dp_module = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "dp_module", CTLFLAG_RW, &ha->dp_module, ha->dp_module, "DP Module"); ha->err_inject = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "err_inject", CTLFLAG_RW, &ha->err_inject, ha->err_inject, "Error Inject"); ha->storm_stats_enable = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "storm_stats_enable", CTLFLAG_RW, &ha->storm_stats_enable, ha->storm_stats_enable, "Enable Storm Statistics Gathering"); ha->storm_stats_index = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "storm_stats_index", CTLFLAG_RD, &ha->storm_stats_index, ha->storm_stats_index, "Enable Storm Statistics Gathering Current Index"); ha->grcdump_taken = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "grcdump_taken", CTLFLAG_RD, &ha->grcdump_taken, ha->grcdump_taken, "grcdump_taken"); ha->idle_chk_taken = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "idle_chk_taken", CTLFLAG_RD, &ha->idle_chk_taken, ha->idle_chk_taken, "idle_chk_taken"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_coalesce_usecs", CTLFLAG_RD, &ha->rx_coalesce_usecs, ha->rx_coalesce_usecs, "rx_coalesce_usecs"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_coalesce_usecs", CTLFLAG_RD, &ha->tx_coalesce_usecs, ha->tx_coalesce_usecs, "tx_coalesce_usecs"); ha->rx_pkt_threshold = 128; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_pkt_threshold", CTLFLAG_RW, &ha->rx_pkt_threshold, ha->rx_pkt_threshold, "No. of Rx Pkts to process at a time"); ha->rx_jumbo_buf_eq_mtu = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_jumbo_buf_eq_mtu", CTLFLAG_RW, &ha->rx_jumbo_buf_eq_mtu, ha->rx_jumbo_buf_eq_mtu, "== 0 => Rx Jumbo buffers are capped to 4Kbytes\n" "otherwise Rx Jumbo buffers are set to >= MTU size\n"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "trigger_dump", CTLTYPE_INT | CTLFLAG_RW, (void *)ha, 0, qlnx_trigger_dump_sysctl, "I", "trigger_dump"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "set_rx_coalesce_usecs", CTLTYPE_INT | CTLFLAG_RW, (void *)ha, 0, qlnx_set_rx_coalesce, "I", "rx interrupt coalesce period microseconds"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "set_tx_coalesce_usecs", CTLTYPE_INT | CTLFLAG_RW, (void *)ha, 0, qlnx_set_tx_coalesce, "I", "tx interrupt coalesce period microseconds"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "err_illegal_intr", CTLFLAG_RD, &ha->err_illegal_intr, "err_illegal_intr"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "err_fp_null", CTLFLAG_RD, &ha->err_fp_null, "err_fp_null"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "err_get_proto_invalid_type", CTLFLAG_RD, &ha->err_get_proto_invalid_type, "err_get_proto_invalid_type"); return; } /***************************************************************************** * Operating System Network Interface Functions *****************************************************************************/ static void qlnx_init_ifnet(device_t dev, qlnx_host_t *ha) { uint16_t device_id; struct ifnet *ifp; ifp = ha->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) panic("%s: cannot if_alloc()\n", device_get_nameunit(dev)); if_initname(ifp, device_get_name(dev), device_get_unit(dev)); device_id = pci_get_device(ha->pci_dev); #if __FreeBSD_version >= 1000000 if (device_id == QLOGIC_PCI_DEVICE_ID_1634) ifp->if_baudrate = IF_Gbps(40); else if ((device_id == QLOGIC_PCI_DEVICE_ID_1656) || (device_id == QLOGIC_PCI_DEVICE_ID_8070)) ifp->if_baudrate = IF_Gbps(25); else if (device_id == QLOGIC_PCI_DEVICE_ID_1654) ifp->if_baudrate = IF_Gbps(50); else if (device_id == QLOGIC_PCI_DEVICE_ID_1644) ifp->if_baudrate = IF_Gbps(100); ifp->if_capabilities = IFCAP_LINKSTATE; #else ifp->if_mtu = ETHERMTU; ifp->if_baudrate = (1 * 1000 * 1000 *1000); #endif /* #if __FreeBSD_version >= 1000000 */ ifp->if_init = qlnx_init; ifp->if_softc = ha; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = qlnx_ioctl; ifp->if_transmit = qlnx_transmit; ifp->if_qflush = qlnx_qflush; IFQ_SET_MAXLEN(&ifp->if_snd, qlnx_get_ifq_snd_maxlen(ha)); ifp->if_snd.ifq_drv_maxlen = qlnx_get_ifq_snd_maxlen(ha); IFQ_SET_READY(&ifp->if_snd); #if __FreeBSD_version >= 1100036 if_setgetcounterfn(ifp, qlnx_get_counter); #endif ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; memcpy(ha->primary_mac, qlnx_get_mac_addr(ha), ETH_ALEN); ether_ifattach(ifp, ha->primary_mac); bcopy(IF_LLADDR(ha->ifp), ha->primary_mac, ETHER_ADDR_LEN); ifp->if_capabilities = IFCAP_HWCSUM; ifp->if_capabilities |= IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_VLAN_MTU; ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; ifp->if_capabilities |= IFCAP_VLAN_HWCSUM; ifp->if_capabilities |= IFCAP_VLAN_HWTSO; ifp->if_capabilities |= IFCAP_TSO4; ifp->if_capabilities |= IFCAP_TSO6; ifp->if_capabilities |= IFCAP_LRO; ifp->if_hw_tsomax = QLNX_MAX_TSO_FRAME_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); ifp->if_hw_tsomaxsegcount = QLNX_MAX_SEGMENTS - 1 /* hdr */; ifp->if_hw_tsomaxsegsize = QLNX_MAX_TX_MBUF_SIZE; ifp->if_capenable = ifp->if_capabilities; ifp->if_hwassist = CSUM_IP; ifp->if_hwassist |= CSUM_TCP | CSUM_UDP; ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6; ifp->if_hwassist |= CSUM_TSO; ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifmedia_init(&ha->media, IFM_IMASK, qlnx_media_change,\ qlnx_media_status); if (device_id == QLOGIC_PCI_DEVICE_ID_1634) { ifmedia_add(&ha->media, (IFM_ETHER | IFM_40G_LR4), 0, NULL); ifmedia_add(&ha->media, (IFM_ETHER | IFM_40G_SR4), 0, NULL); ifmedia_add(&ha->media, (IFM_ETHER | IFM_40G_CR4), 0, NULL); } else if ((device_id == QLOGIC_PCI_DEVICE_ID_1656) || (device_id == QLOGIC_PCI_DEVICE_ID_8070)) { ifmedia_add(&ha->media, (IFM_ETHER | QLNX_IFM_25G_SR), 0, NULL); ifmedia_add(&ha->media, (IFM_ETHER | QLNX_IFM_25G_CR), 0, NULL); } else if (device_id == QLOGIC_PCI_DEVICE_ID_1654) { ifmedia_add(&ha->media, (IFM_ETHER | IFM_50G_KR2), 0, NULL); ifmedia_add(&ha->media, (IFM_ETHER | IFM_50G_CR2), 0, NULL); } else if (device_id == QLOGIC_PCI_DEVICE_ID_1644) { ifmedia_add(&ha->media, (IFM_ETHER | QLNX_IFM_100G_LR4), 0, NULL); ifmedia_add(&ha->media, (IFM_ETHER | QLNX_IFM_100G_SR4), 0, NULL); ifmedia_add(&ha->media, (IFM_ETHER | QLNX_IFM_100G_CR4), 0, NULL); } ifmedia_add(&ha->media, (IFM_ETHER | IFM_FDX), 0, NULL); ifmedia_add(&ha->media, (IFM_ETHER | IFM_AUTO), 0, NULL); ifmedia_set(&ha->media, (IFM_ETHER | IFM_AUTO)); QL_DPRINT2(ha, "exit\n"); return; } static void qlnx_init_locked(qlnx_host_t *ha) { struct ifnet *ifp = ha->ifp; QL_DPRINT1(ha, "Driver Initialization start \n"); qlnx_stop(ha); if (qlnx_load(ha) == 0) { ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } return; } static void qlnx_init(void *arg) { qlnx_host_t *ha; ha = (qlnx_host_t *)arg; QL_DPRINT2(ha, "enter\n"); QLNX_LOCK(ha); qlnx_init_locked(ha); QLNX_UNLOCK(ha); QL_DPRINT2(ha, "exit\n"); return; } static int qlnx_config_mcast_mac_addr(qlnx_host_t *ha, uint8_t *mac_addr, uint32_t add_mac) { struct ecore_filter_mcast *mcast; struct ecore_dev *cdev; int rc; cdev = &ha->cdev; mcast = &ha->ecore_mcast; bzero(mcast, sizeof(struct ecore_filter_mcast)); if (add_mac) mcast->opcode = ECORE_FILTER_ADD; else mcast->opcode = ECORE_FILTER_REMOVE; mcast->num_mc_addrs = 1; memcpy(mcast->mac, mac_addr, ETH_ALEN); rc = ecore_filter_mcast_cmd(cdev, mcast, ECORE_SPQ_MODE_CB, NULL); return (rc); } static int qlnx_hw_add_mcast(qlnx_host_t *ha, uint8_t *mta) { int i; for (i = 0; i < QLNX_MAX_NUM_MULTICAST_ADDRS; i++) { if (QL_MAC_CMP(ha->mcast[i].addr, mta) == 0) return 0; /* its been already added */ } for (i = 0; i < QLNX_MAX_NUM_MULTICAST_ADDRS; i++) { if ((ha->mcast[i].addr[0] == 0) && (ha->mcast[i].addr[1] == 0) && (ha->mcast[i].addr[2] == 0) && (ha->mcast[i].addr[3] == 0) && (ha->mcast[i].addr[4] == 0) && (ha->mcast[i].addr[5] == 0)) { if (qlnx_config_mcast_mac_addr(ha, mta, 1)) return (-1); bcopy(mta, ha->mcast[i].addr, ETH_ALEN); ha->nmcast++; return 0; } } return 0; } static int qlnx_hw_del_mcast(qlnx_host_t *ha, uint8_t *mta) { int i; for (i = 0; i < QLNX_MAX_NUM_MULTICAST_ADDRS; i++) { if (QL_MAC_CMP(ha->mcast[i].addr, mta) == 0) { if (qlnx_config_mcast_mac_addr(ha, mta, 0)) return (-1); ha->mcast[i].addr[0] = 0; ha->mcast[i].addr[1] = 0; ha->mcast[i].addr[2] = 0; ha->mcast[i].addr[3] = 0; ha->mcast[i].addr[4] = 0; ha->mcast[i].addr[5] = 0; ha->nmcast--; return 0; } } return 0; } /* * Name: qls_hw_set_multi * Function: Sets the Multicast Addresses provided the host O.S into the * hardware (for the given interface) */ static void qlnx_hw_set_multi(qlnx_host_t *ha, uint8_t *mta, uint32_t mcnt, uint32_t add_mac) { int i; for (i = 0; i < mcnt; i++) { if (add_mac) { if (qlnx_hw_add_mcast(ha, mta)) break; } else { if (qlnx_hw_del_mcast(ha, mta)) break; } mta += ETHER_HDR_LEN; } return; } #define QLNX_MCAST_ADDRS_SIZE (QLNX_MAX_NUM_MULTICAST_ADDRS * ETHER_HDR_LEN) static int qlnx_set_multi(qlnx_host_t *ha, uint32_t add_multi) { uint8_t mta[QLNX_MCAST_ADDRS_SIZE]; struct ifmultiaddr *ifma; int mcnt = 0; struct ifnet *ifp = ha->ifp; int ret = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == QLNX_MAX_NUM_MULTICAST_ADDRS) break; bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), &mta[mcnt * ETHER_HDR_LEN], ETHER_HDR_LEN); mcnt++; } if_maddr_runlock(ifp); QLNX_LOCK(ha); qlnx_hw_set_multi(ha, mta, mcnt, add_multi); QLNX_UNLOCK(ha); return (ret); } static int qlnx_set_promisc(qlnx_host_t *ha) { int rc = 0; uint8_t filter; filter = ha->filter; filter |= ECORE_ACCEPT_MCAST_UNMATCHED; filter |= ECORE_ACCEPT_UCAST_UNMATCHED; rc = qlnx_set_rx_accept_filter(ha, filter); return (rc); } static int qlnx_set_allmulti(qlnx_host_t *ha) { int rc = 0; uint8_t filter; filter = ha->filter; filter |= ECORE_ACCEPT_MCAST_UNMATCHED; rc = qlnx_set_rx_accept_filter(ha, filter); return (rc); } static int qlnx_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { int ret = 0, mask; struct ifreq *ifr = (struct ifreq *)data; struct ifaddr *ifa = (struct ifaddr *)data; qlnx_host_t *ha; ha = (qlnx_host_t *)ifp->if_softc; switch (cmd) { case SIOCSIFADDR: QL_DPRINT4(ha, "SIOCSIFADDR (0x%lx)\n", cmd); if (ifa->ifa_addr->sa_family == AF_INET) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { QLNX_LOCK(ha); qlnx_init_locked(ha); QLNX_UNLOCK(ha); } QL_DPRINT4(ha, "SIOCSIFADDR (0x%lx) ipv4 [0x%08x]\n", cmd, ntohl(IA_SIN(ifa)->sin_addr.s_addr)); arp_ifinit(ifp, ifa); } else { ether_ioctl(ifp, cmd, data); } break; case SIOCSIFMTU: QL_DPRINT4(ha, "SIOCSIFMTU (0x%lx)\n", cmd); if (ifr->ifr_mtu > QLNX_MAX_MTU) { ret = EINVAL; } else { QLNX_LOCK(ha); ifp->if_mtu = ifr->ifr_mtu; ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { qlnx_init_locked(ha); } QLNX_UNLOCK(ha); } break; case SIOCSIFFLAGS: QL_DPRINT4(ha, "SIOCSIFFLAGS (0x%lx)\n", cmd); QLNX_LOCK(ha); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if ((ifp->if_flags ^ ha->if_flags) & IFF_PROMISC) { ret = qlnx_set_promisc(ha); } else if ((ifp->if_flags ^ ha->if_flags) & IFF_ALLMULTI) { ret = qlnx_set_allmulti(ha); } } else { ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; qlnx_init_locked(ha); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) qlnx_stop(ha); ha->if_flags = ifp->if_flags; } QLNX_UNLOCK(ha); break; case SIOCADDMULTI: QL_DPRINT4(ha, "%s (0x%lx)\n", "SIOCADDMULTI", cmd); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if (qlnx_set_multi(ha, 1)) ret = EINVAL; } break; case SIOCDELMULTI: QL_DPRINT4(ha, "%s (0x%lx)\n", "SIOCDELMULTI", cmd); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if (qlnx_set_multi(ha, 0)) ret = EINVAL; } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: QL_DPRINT4(ha, "SIOCSIFMEDIA/SIOCGIFMEDIA (0x%lx)\n", cmd); ret = ifmedia_ioctl(ifp, ifr, &ha->media, cmd); break; case SIOCSIFCAP: mask = ifr->ifr_reqcap ^ ifp->if_capenable; QL_DPRINT4(ha, "SIOCSIFCAP (0x%lx)\n", cmd); if (mask & IFCAP_HWCSUM) ifp->if_capenable ^= IFCAP_HWCSUM; if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_TSO6) ifp->if_capenable ^= IFCAP_TSO6; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) qlnx_init(ha); VLAN_CAPABILITIES(ifp); break; #if (__FreeBSD_version >= 1100101) case SIOCGI2C: { struct ifi2creq i2c; struct ecore_hwfn *p_hwfn = &ha->cdev.hwfns[0]; struct ecore_ptt *p_ptt; - ret = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); + ret = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (ret) break; if ((i2c.len > sizeof (i2c.data)) || (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2)) { ret = EINVAL; break; } p_ptt = ecore_ptt_acquire(p_hwfn); if (!p_ptt) { QL_DPRINT1(ha, "ecore_ptt_acquire failed\n"); ret = -1; break; } ret = ecore_mcp_phy_sfp_read(p_hwfn, p_ptt, (ha->pci_func & 0x1), i2c.dev_addr, i2c.offset, i2c.len, &i2c.data[0]); ecore_ptt_release(p_hwfn, p_ptt); if (ret) { ret = -1; break; } - ret = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); + ret = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c)); QL_DPRINT8(ha, "SIOCGI2C copyout ret = %d \ len = %d addr = 0x%02x offset = 0x%04x \ data[0..7]=0x%02x 0x%02x 0x%02x 0x%02x 0x%02x \ 0x%02x 0x%02x 0x%02x\n", ret, i2c.len, i2c.dev_addr, i2c.offset, i2c.data[0], i2c.data[1], i2c.data[2], i2c.data[3], i2c.data[4], i2c.data[5], i2c.data[6], i2c.data[7]); break; } #endif /* #if (__FreeBSD_version >= 1100101) */ default: QL_DPRINT4(ha, "default (0x%lx)\n", cmd); ret = ether_ioctl(ifp, cmd, data); break; } return (ret); } static int qlnx_media_change(struct ifnet *ifp) { qlnx_host_t *ha; struct ifmedia *ifm; int ret = 0; ha = (qlnx_host_t *)ifp->if_softc; QL_DPRINT2(ha, "enter\n"); ifm = &ha->media; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) ret = EINVAL; QL_DPRINT2(ha, "exit\n"); return (ret); } static void qlnx_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { qlnx_host_t *ha; ha = (qlnx_host_t *)ifp->if_softc; QL_DPRINT2(ha, "enter\n"); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (ha->link_up) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= (IFM_FDX | qlnx_get_optics(ha, &ha->if_link)); if (ha->if_link.link_partner_caps & (QLNX_LINK_CAP_Pause | QLNX_LINK_CAP_Asym_Pause)) ifmr->ifm_active |= (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE); } QL_DPRINT2(ha, "exit (%s)\n", (ha->link_up ? "link_up" : "link_down")); return; } static void qlnx_free_tx_pkt(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct qlnx_tx_queue *txq) { u16 idx; struct mbuf *mp; bus_dmamap_t map; int i; struct eth_tx_bd *tx_data_bd; struct eth_tx_1st_bd *first_bd; int nbds = 0; idx = txq->sw_tx_cons; mp = txq->sw_tx_ring[idx].mp; map = txq->sw_tx_ring[idx].map; if ((mp == NULL) || QL_ERR_INJECT(ha, QL_ERR_INJCT_TX_INT_MBUF_NULL)){ QL_RESET_ERR_INJECT(ha, QL_ERR_INJCT_TX_INT_MBUF_NULL); QL_DPRINT1(ha, "(mp == NULL) " " tx_idx = 0x%x" " ecore_prod_idx = 0x%x" " ecore_cons_idx = 0x%x" " hw_bd_cons = 0x%x" " txq_db_last = 0x%x" " elem_left = 0x%x\n", fp->rss_id, ecore_chain_get_prod_idx(&txq->tx_pbl), ecore_chain_get_cons_idx(&txq->tx_pbl), le16toh(*txq->hw_cons_ptr), txq->tx_db.raw, ecore_chain_get_elem_left(&txq->tx_pbl)); fp->err_tx_free_pkt_null++; //DEBUG qlnx_trigger_dump(ha); return; } else { QLNX_INC_OPACKETS((ha->ifp)); QLNX_INC_OBYTES((ha->ifp), (mp->m_pkthdr.len)); bus_dmamap_sync(ha->tx_tag, map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ha->tx_tag, map); fp->tx_pkts_freed++; fp->tx_pkts_completed++; m_freem(mp); } first_bd = (struct eth_tx_1st_bd *)ecore_chain_consume(&txq->tx_pbl); nbds = first_bd->data.nbds; // BD_SET_UNMAP_ADDR_LEN(first_bd, 0, 0); for (i = 1; i < nbds; i++) { tx_data_bd = ecore_chain_consume(&txq->tx_pbl); // BD_SET_UNMAP_ADDR_LEN(tx_data_bd, 0, 0); } txq->sw_tx_ring[idx].flags = 0; txq->sw_tx_ring[idx].mp = NULL; txq->sw_tx_ring[idx].map = (bus_dmamap_t)0; return; } static void qlnx_tx_int(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct qlnx_tx_queue *txq) { u16 hw_bd_cons; u16 ecore_cons_idx; uint16_t diff; uint16_t idx, idx2; hw_bd_cons = le16toh(*txq->hw_cons_ptr); while (hw_bd_cons != (ecore_cons_idx = ecore_chain_get_cons_idx(&txq->tx_pbl))) { if (hw_bd_cons < ecore_cons_idx) { diff = (1 << 16) - (ecore_cons_idx - hw_bd_cons); } else { diff = hw_bd_cons - ecore_cons_idx; } if ((diff > TX_RING_SIZE) || QL_ERR_INJECT(ha, QL_ERR_INJCT_TX_INT_DIFF)){ QL_RESET_ERR_INJECT(ha, QL_ERR_INJCT_TX_INT_DIFF); QL_DPRINT1(ha, "(diff = 0x%x) " " tx_idx = 0x%x" " ecore_prod_idx = 0x%x" " ecore_cons_idx = 0x%x" " hw_bd_cons = 0x%x" " txq_db_last = 0x%x" " elem_left = 0x%x\n", diff, fp->rss_id, ecore_chain_get_prod_idx(&txq->tx_pbl), ecore_chain_get_cons_idx(&txq->tx_pbl), le16toh(*txq->hw_cons_ptr), txq->tx_db.raw, ecore_chain_get_elem_left(&txq->tx_pbl)); fp->err_tx_cons_idx_conflict++; //DEBUG qlnx_trigger_dump(ha); } idx = (txq->sw_tx_cons + 1) & (TX_RING_SIZE - 1); idx2 = (txq->sw_tx_cons + 2) & (TX_RING_SIZE - 1); prefetch(txq->sw_tx_ring[idx].mp); prefetch(txq->sw_tx_ring[idx2].mp); qlnx_free_tx_pkt(ha, fp, txq); txq->sw_tx_cons = (txq->sw_tx_cons + 1) & (TX_RING_SIZE - 1); } return; } static int qlnx_transmit_locked(struct ifnet *ifp,struct qlnx_fastpath *fp, struct mbuf *mp) { int ret = 0; struct qlnx_tx_queue *txq; qlnx_host_t * ha; uint16_t elem_left; txq = fp->txq[0]; ha = (qlnx_host_t *)fp->edev; if ((!(ifp->if_drv_flags & IFF_DRV_RUNNING)) || (!ha->link_up)) { if(mp != NULL) ret = drbr_enqueue(ifp, fp->tx_br, mp); return (ret); } if(mp != NULL) ret = drbr_enqueue(ifp, fp->tx_br, mp); mp = drbr_peek(ifp, fp->tx_br); while (mp != NULL) { if (qlnx_send(ha, fp, &mp)) { if (mp != NULL) { drbr_putback(ifp, fp->tx_br, mp); } else { fp->tx_pkts_processed++; drbr_advance(ifp, fp->tx_br); } goto qlnx_transmit_locked_exit; } else { drbr_advance(ifp, fp->tx_br); fp->tx_pkts_transmitted++; fp->tx_pkts_processed++; } mp = drbr_peek(ifp, fp->tx_br); } qlnx_transmit_locked_exit: if((qlnx_num_tx_compl(ha,fp, fp->txq[0]) > QLNX_TX_COMPL_THRESH) || ((int)(elem_left = ecore_chain_get_elem_left(&txq->tx_pbl)) < QLNX_TX_ELEM_MAX_THRESH)) (void)qlnx_tx_int(ha, fp, fp->txq[0]); QL_DPRINT2(ha, "%s: exit ret = %d\n", __func__, ret); return ret; } static int qlnx_transmit(struct ifnet *ifp, struct mbuf *mp) { qlnx_host_t *ha = (qlnx_host_t *)ifp->if_softc; struct qlnx_fastpath *fp; int rss_id = 0, ret = 0; #ifdef QLNX_TRACEPERF_DATA uint64_t tx_pkts = 0, tx_compl = 0; #endif QL_DPRINT2(ha, "enter\n"); #if __FreeBSD_version >= 1100000 if (M_HASHTYPE_GET(mp) != M_HASHTYPE_NONE) #else if (mp->m_flags & M_FLOWID) #endif rss_id = (mp->m_pkthdr.flowid % ECORE_RSS_IND_TABLE_SIZE) % ha->num_rss; fp = &ha->fp_array[rss_id]; if (fp->tx_br == NULL) { ret = EINVAL; goto qlnx_transmit_exit; } if (mtx_trylock(&fp->tx_mtx)) { #ifdef QLNX_TRACEPERF_DATA tx_pkts = fp->tx_pkts_transmitted; tx_compl = fp->tx_pkts_completed; #endif ret = qlnx_transmit_locked(ifp, fp, mp); #ifdef QLNX_TRACEPERF_DATA fp->tx_pkts_trans_ctx += (fp->tx_pkts_transmitted - tx_pkts); fp->tx_pkts_compl_ctx += (fp->tx_pkts_completed - tx_compl); #endif mtx_unlock(&fp->tx_mtx); } else { if (mp != NULL && (fp->fp_taskqueue != NULL)) { ret = drbr_enqueue(ifp, fp->tx_br, mp); taskqueue_enqueue(fp->fp_taskqueue, &fp->fp_task); } } qlnx_transmit_exit: QL_DPRINT2(ha, "exit ret = %d\n", ret); return ret; } static void qlnx_qflush(struct ifnet *ifp) { int rss_id; struct qlnx_fastpath *fp; struct mbuf *mp; qlnx_host_t *ha; ha = (qlnx_host_t *)ifp->if_softc; QL_DPRINT2(ha, "enter\n"); for (rss_id = 0; rss_id < ha->num_rss; rss_id++) { fp = &ha->fp_array[rss_id]; if (fp == NULL) continue; if (fp->tx_br) { mtx_lock(&fp->tx_mtx); while ((mp = drbr_dequeue(ifp, fp->tx_br)) != NULL) { fp->tx_pkts_freed++; m_freem(mp); } mtx_unlock(&fp->tx_mtx); } } QL_DPRINT2(ha, "exit\n"); return; } static void qlnx_txq_doorbell_wr32(qlnx_host_t *ha, void *reg_addr, uint32_t value) { struct ecore_dev *cdev; uint32_t offset; cdev = &ha->cdev; offset = (uint32_t)((uint8_t *)reg_addr - (uint8_t *)cdev->doorbells); bus_write_4(ha->pci_dbells, offset, value); bus_barrier(ha->pci_reg, 0, 0, BUS_SPACE_BARRIER_READ); bus_barrier(ha->pci_dbells, 0, 0, BUS_SPACE_BARRIER_READ); return; } static uint32_t qlnx_tcp_offset(qlnx_host_t *ha, struct mbuf *mp) { struct ether_vlan_header *eh = NULL; struct ip *ip = NULL; struct ip6_hdr *ip6 = NULL; struct tcphdr *th = NULL; uint32_t ehdrlen = 0, ip_hlen = 0, offset = 0; uint16_t etype = 0; device_t dev; uint8_t buf[sizeof(struct ip6_hdr)]; dev = ha->pci_dev; eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; etype = ntohs(eh->evl_proto); } else { ehdrlen = ETHER_HDR_LEN; etype = ntohs(eh->evl_encap_proto); } switch (etype) { case ETHERTYPE_IP: ip = (struct ip *)(mp->m_data + ehdrlen); ip_hlen = sizeof (struct ip); if (mp->m_len < (ehdrlen + ip_hlen)) { m_copydata(mp, ehdrlen, sizeof(struct ip), buf); ip = (struct ip *)buf; } th = (struct tcphdr *)(ip + 1); offset = ip_hlen + ehdrlen + (th->th_off << 2); break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); ip_hlen = sizeof(struct ip6_hdr); if (mp->m_len < (ehdrlen + ip_hlen)) { m_copydata(mp, ehdrlen, sizeof (struct ip6_hdr), buf); ip6 = (struct ip6_hdr *)buf; } th = (struct tcphdr *)(ip6 + 1); offset = ip_hlen + ehdrlen + (th->th_off << 2); break; default: break; } return (offset); } static __inline int qlnx_tso_check(struct qlnx_fastpath *fp, bus_dma_segment_t *segs, int nsegs, uint32_t offset) { int i; uint32_t sum, nbds_in_hdr = 1; uint32_t window; bus_dma_segment_t *s_seg; /* If the header spans mulitple segments, skip those segments */ if (nsegs < ETH_TX_LSO_WINDOW_BDS_NUM) return (0); i = 0; while ((i < nsegs) && (offset >= segs->ds_len)) { offset = offset - segs->ds_len; segs++; i++; nbds_in_hdr++; } window = ETH_TX_LSO_WINDOW_BDS_NUM - nbds_in_hdr; nsegs = nsegs - i; while (nsegs >= window) { sum = 0; s_seg = segs; for (i = 0; i < window; i++){ sum += s_seg->ds_len; s_seg++; } if (sum < ETH_TX_LSO_WINDOW_MIN_LEN) { fp->tx_lso_wnd_min_len++; return (-1); } nsegs = nsegs - 1; segs++; } return (0); } static int qlnx_send(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct mbuf **m_headp) { bus_dma_segment_t *segs; bus_dmamap_t map = 0; uint32_t nsegs = 0; int ret = -1; struct mbuf *m_head = *m_headp; uint16_t idx = 0; uint16_t elem_left; uint8_t nbd = 0; struct qlnx_tx_queue *txq; struct eth_tx_1st_bd *first_bd; struct eth_tx_2nd_bd *second_bd; struct eth_tx_3rd_bd *third_bd; struct eth_tx_bd *tx_data_bd; int seg_idx = 0; uint32_t nbds_in_hdr = 0; uint32_t offset = 0; #ifdef QLNX_TRACE_PERF_DATA uint16_t bd_used; #endif QL_DPRINT8(ha, "enter\n"); if (!ha->link_up) return (-1); first_bd = NULL; second_bd = NULL; third_bd = NULL; tx_data_bd = NULL; txq = fp->txq[0]; if ((int)(elem_left = ecore_chain_get_elem_left(&txq->tx_pbl)) < QLNX_TX_ELEM_MIN_THRESH) { fp->tx_nsegs_gt_elem_left++; fp->err_tx_nsegs_gt_elem_left++; return (ENOBUFS); } idx = txq->sw_tx_prod; map = txq->sw_tx_ring[idx].map; segs = txq->segs; ret = bus_dmamap_load_mbuf_sg(ha->tx_tag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT); if (ha->dbg_trace_tso_pkt_len) { if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { if (!fp->tx_tso_min_pkt_len) { fp->tx_tso_min_pkt_len = m_head->m_pkthdr.len; fp->tx_tso_min_pkt_len = m_head->m_pkthdr.len; } else { if (fp->tx_tso_min_pkt_len > m_head->m_pkthdr.len) fp->tx_tso_min_pkt_len = m_head->m_pkthdr.len; if (fp->tx_tso_max_pkt_len < m_head->m_pkthdr.len) fp->tx_tso_max_pkt_len = m_head->m_pkthdr.len; } } } if (m_head->m_pkthdr.csum_flags & CSUM_TSO) offset = qlnx_tcp_offset(ha, m_head); if ((ret == EFBIG) || ((nsegs > QLNX_MAX_SEGMENTS_NON_TSO) && ( (!(m_head->m_pkthdr.csum_flags & CSUM_TSO)) || ((m_head->m_pkthdr.csum_flags & CSUM_TSO) && qlnx_tso_check(fp, segs, nsegs, offset))))) { struct mbuf *m; QL_DPRINT8(ha, "EFBIG [%d]\n", m_head->m_pkthdr.len); fp->tx_defrag++; m = m_defrag(m_head, M_NOWAIT); if (m == NULL) { fp->err_tx_defrag++; fp->tx_pkts_freed++; m_freem(m_head); *m_headp = NULL; QL_DPRINT1(ha, "m_defrag() = NULL [%d]\n", ret); return (ENOBUFS); } m_head = m; *m_headp = m_head; if ((ret = bus_dmamap_load_mbuf_sg(ha->tx_tag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT))) { fp->err_tx_defrag_dmamap_load++; QL_DPRINT1(ha, "bus_dmamap_load_mbuf_sg failed0 [%d, %d]\n", ret, m_head->m_pkthdr.len); fp->tx_pkts_freed++; m_freem(m_head); *m_headp = NULL; return (ret); } if ((nsegs > QLNX_MAX_SEGMENTS_NON_TSO) && !(m_head->m_pkthdr.csum_flags & CSUM_TSO)) { fp->err_tx_non_tso_max_seg++; QL_DPRINT1(ha, "(%d) nsegs too many for non-TSO [%d, %d]\n", ret, nsegs, m_head->m_pkthdr.len); fp->tx_pkts_freed++; m_freem(m_head); *m_headp = NULL; return (ret); } if (m_head->m_pkthdr.csum_flags & CSUM_TSO) offset = qlnx_tcp_offset(ha, m_head); } else if (ret) { fp->err_tx_dmamap_load++; QL_DPRINT1(ha, "bus_dmamap_load_mbuf_sg failed1 [%d, %d]\n", ret, m_head->m_pkthdr.len); fp->tx_pkts_freed++; m_freem(m_head); *m_headp = NULL; return (ret); } QL_ASSERT(ha, (nsegs != 0), ("qlnx_send: empty packet")); if (ha->dbg_trace_tso_pkt_len) { if (nsegs < QLNX_FP_MAX_SEGS) fp->tx_pkts[(nsegs - 1)]++; else fp->tx_pkts[(QLNX_FP_MAX_SEGS - 1)]++; } #ifdef QLNX_TRACE_PERF_DATA if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { if(m_head->m_pkthdr.len <= 2048) fp->tx_pkts_hist[0]++; else if((m_head->m_pkthdr.len > 2048) && (m_head->m_pkthdr.len <= 4096)) fp->tx_pkts_hist[1]++; else if((m_head->m_pkthdr.len > 4096) && (m_head->m_pkthdr.len <= 8192)) fp->tx_pkts_hist[2]++; else if((m_head->m_pkthdr.len > 8192) && (m_head->m_pkthdr.len <= 12288 )) fp->tx_pkts_hist[3]++; else if((m_head->m_pkthdr.len > 11288) && (m_head->m_pkthdr.len <= 16394)) fp->tx_pkts_hist[4]++; else if((m_head->m_pkthdr.len > 16384) && (m_head->m_pkthdr.len <= 20480)) fp->tx_pkts_hist[5]++; else if((m_head->m_pkthdr.len > 20480) && (m_head->m_pkthdr.len <= 24576)) fp->tx_pkts_hist[6]++; else if((m_head->m_pkthdr.len > 24576) && (m_head->m_pkthdr.len <= 28672)) fp->tx_pkts_hist[7]++; else if((m_head->m_pkthdr.len > 28762) && (m_head->m_pkthdr.len <= 32768)) fp->tx_pkts_hist[8]++; else if((m_head->m_pkthdr.len > 32768) && (m_head->m_pkthdr.len <= 36864)) fp->tx_pkts_hist[9]++; else if((m_head->m_pkthdr.len > 36864) && (m_head->m_pkthdr.len <= 40960)) fp->tx_pkts_hist[10]++; else if((m_head->m_pkthdr.len > 40960) && (m_head->m_pkthdr.len <= 45056)) fp->tx_pkts_hist[11]++; else if((m_head->m_pkthdr.len > 45056) && (m_head->m_pkthdr.len <= 49152)) fp->tx_pkts_hist[12]++; else if((m_head->m_pkthdr.len > 49512) && m_head->m_pkthdr.len <= 53248)) fp->tx_pkts_hist[13]++; else if((m_head->m_pkthdr.len > 53248) && (m_head->m_pkthdr.len <= 57344)) fp->tx_pkts_hist[14]++; else if((m_head->m_pkthdr.len > 53248) && (m_head->m_pkthdr.len <= 57344)) fp->tx_pkts_hist[15]++; else if((m_head->m_pkthdr.len > 57344) && (m_head->m_pkthdr.len <= 61440)) fp->tx_pkts_hist[16]++; else fp->tx_pkts_hist[17]++; } if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { elem_left = ecore_chain_get_elem_left(&txq->tx_pbl); bd_used = TX_RING_SIZE - elem_left; if(bd_used <= 100) fp->tx_pkts_q[0]++; else if((bd_used > 100) && (bd_used <= 500)) fp->tx_pkts_q[1]++; else if((bd_used > 500) && (bd_used <= 1000)) fp->tx_pkts_q[2]++; else if((bd_used > 1000) && (bd_used <= 2000)) fp->tx_pkts_q[3]++; else if((bd_used > 3000) && (bd_used <= 4000)) fp->tx_pkts_q[4]++; else if((bd_used > 4000) && (bd_used <= 5000)) fp->tx_pkts_q[5]++; else if((bd_used > 6000) && (bd_used <= 7000)) fp->tx_pkts_q[6]++; else if((bd_used > 7000) && (bd_used <= 8000)) fp->tx_pkts_q[7]++; else if((bd_used > 8000) && (bd_used <= 9000)) fp->tx_pkts_q[8]++; else if((bd_used > 9000) && (bd_used <= 10000)) fp->tx_pkts_q[9]++; else if((bd_used > 10000) && (bd_used <= 11000)) fp->tx_pkts_q[10]++; else if((bd_used > 11000) && (bd_used <= 12000)) fp->tx_pkts_q[11]++; else if((bd_used > 12000) && (bd_used <= 13000)) fp->tx_pkts_q[12]++; else if((bd_used > 13000) && (bd_used <= 14000)) fp->tx_pkts_q[13]++; else if((bd_used > 14000) && (bd_used <= 15000)) fp->tx_pkts_q[14]++; else if((bd_used > 15000) && (bd_used <= 16000)) fp->tx_pkts_q[15]++; else fp->tx_pkts_q[16]++; } #endif /* end of QLNX_TRACE_PERF_DATA */ if ((nsegs + QLNX_TX_ELEM_RESERVE) > (int)(elem_left = ecore_chain_get_elem_left(&txq->tx_pbl))) { QL_DPRINT1(ha, "(%d, 0x%x) insuffient BDs" " in chain[%d] trying to free packets\n", nsegs, elem_left, fp->rss_id); fp->tx_nsegs_gt_elem_left++; (void)qlnx_tx_int(ha, fp, txq); if ((nsegs + QLNX_TX_ELEM_RESERVE) > (int)(elem_left = ecore_chain_get_elem_left(&txq->tx_pbl))) { QL_DPRINT1(ha, "(%d, 0x%x) insuffient BDs in chain[%d]\n", nsegs, elem_left, fp->rss_id); fp->err_tx_nsegs_gt_elem_left++; fp->tx_ring_full = 1; if (ha->storm_stats_enable) ha->storm_stats_gather = 1; return (ENOBUFS); } } bus_dmamap_sync(ha->tx_tag, map, BUS_DMASYNC_PREWRITE); txq->sw_tx_ring[idx].mp = m_head; first_bd = (struct eth_tx_1st_bd *)ecore_chain_produce(&txq->tx_pbl); memset(first_bd, 0, sizeof(*first_bd)); first_bd->data.bd_flags.bitfields = 1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT; BD_SET_UNMAP_ADDR_LEN(first_bd, segs->ds_addr, segs->ds_len); nbd++; if (m_head->m_pkthdr.csum_flags & CSUM_IP) { first_bd->data.bd_flags.bitfields |= (1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT); } if (m_head->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_TCP | CSUM_TCP_IPV6 | CSUM_UDP_IPV6)) { first_bd->data.bd_flags.bitfields |= (1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT); } if (m_head->m_flags & M_VLANTAG) { first_bd->data.vlan = m_head->m_pkthdr.ether_vtag; first_bd->data.bd_flags.bitfields |= (1 << ETH_TX_1ST_BD_FLAGS_VLAN_INSERTION_SHIFT); } if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { first_bd->data.bd_flags.bitfields |= (1 << ETH_TX_1ST_BD_FLAGS_LSO_SHIFT); first_bd->data.bd_flags.bitfields |= (1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT); nbds_in_hdr = 1; if (offset == segs->ds_len) { BD_SET_UNMAP_ADDR_LEN(first_bd, segs->ds_addr, offset); segs++; seg_idx++; second_bd = (struct eth_tx_2nd_bd *) ecore_chain_produce(&txq->tx_pbl); memset(second_bd, 0, sizeof(*second_bd)); nbd++; if (seg_idx < nsegs) { BD_SET_UNMAP_ADDR_LEN(second_bd, \ (segs->ds_addr), (segs->ds_len)); segs++; seg_idx++; } third_bd = (struct eth_tx_3rd_bd *) ecore_chain_produce(&txq->tx_pbl); memset(third_bd, 0, sizeof(*third_bd)); third_bd->data.lso_mss = m_head->m_pkthdr.tso_segsz; third_bd->data.bitfields |= (nbds_in_hdr<ds_addr), (segs->ds_len)); segs++; seg_idx++; } for (; seg_idx < nsegs; seg_idx++) { tx_data_bd = (struct eth_tx_bd *) ecore_chain_produce(&txq->tx_pbl); memset(tx_data_bd, 0, sizeof(*tx_data_bd)); BD_SET_UNMAP_ADDR_LEN(tx_data_bd, \ segs->ds_addr,\ segs->ds_len); segs++; nbd++; } } else if (offset < segs->ds_len) { BD_SET_UNMAP_ADDR_LEN(first_bd, segs->ds_addr, offset); second_bd = (struct eth_tx_2nd_bd *) ecore_chain_produce(&txq->tx_pbl); memset(second_bd, 0, sizeof(*second_bd)); BD_SET_UNMAP_ADDR_LEN(second_bd, \ (segs->ds_addr + offset),\ (segs->ds_len - offset)); nbd++; segs++; third_bd = (struct eth_tx_3rd_bd *) ecore_chain_produce(&txq->tx_pbl); memset(third_bd, 0, sizeof(*third_bd)); BD_SET_UNMAP_ADDR_LEN(third_bd, \ segs->ds_addr,\ segs->ds_len); third_bd->data.lso_mss = m_head->m_pkthdr.tso_segsz; third_bd->data.bitfields |= (nbds_in_hdr<tx_pbl); memset(tx_data_bd, 0, sizeof(*tx_data_bd)); BD_SET_UNMAP_ADDR_LEN(tx_data_bd, \ segs->ds_addr,\ segs->ds_len); segs++; nbd++; } } else { offset = offset - segs->ds_len; segs++; for (seg_idx = 1; seg_idx < nsegs; seg_idx++) { if (offset) nbds_in_hdr++; tx_data_bd = (struct eth_tx_bd *) ecore_chain_produce(&txq->tx_pbl); memset(tx_data_bd, 0, sizeof(*tx_data_bd)); if (second_bd == NULL) { second_bd = (struct eth_tx_2nd_bd *) tx_data_bd; } else if (third_bd == NULL) { third_bd = (struct eth_tx_3rd_bd *) tx_data_bd; } if (offset && (offset < segs->ds_len)) { BD_SET_UNMAP_ADDR_LEN(tx_data_bd,\ segs->ds_addr, offset); tx_data_bd = (struct eth_tx_bd *) ecore_chain_produce(&txq->tx_pbl); memset(tx_data_bd, 0, sizeof(*tx_data_bd)); if (second_bd == NULL) { second_bd = (struct eth_tx_2nd_bd *)tx_data_bd; } else if (third_bd == NULL) { third_bd = (struct eth_tx_3rd_bd *)tx_data_bd; } BD_SET_UNMAP_ADDR_LEN(tx_data_bd,\ (segs->ds_addr + offset), \ (segs->ds_len - offset)); nbd++; offset = 0; } else { if (offset) offset = offset - segs->ds_len; BD_SET_UNMAP_ADDR_LEN(tx_data_bd,\ segs->ds_addr, segs->ds_len); } segs++; nbd++; } if (third_bd == NULL) { third_bd = (struct eth_tx_3rd_bd *) ecore_chain_produce(&txq->tx_pbl); memset(third_bd, 0, sizeof(*third_bd)); } third_bd->data.lso_mss = m_head->m_pkthdr.tso_segsz; third_bd->data.bitfields |= (nbds_in_hdr<tx_tso_pkts++; } else { segs++; for (seg_idx = 1; seg_idx < nsegs; seg_idx++) { tx_data_bd = (struct eth_tx_bd *) ecore_chain_produce(&txq->tx_pbl); memset(tx_data_bd, 0, sizeof(*tx_data_bd)); BD_SET_UNMAP_ADDR_LEN(tx_data_bd, segs->ds_addr,\ segs->ds_len); segs++; nbd++; } first_bd->data.bitfields = (m_head->m_pkthdr.len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK) << ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT; first_bd->data.bitfields = htole16(first_bd->data.bitfields); fp->tx_non_tso_pkts++; } first_bd->data.nbds = nbd; if (ha->dbg_trace_tso_pkt_len) { if (fp->tx_tso_max_nsegs < nsegs) fp->tx_tso_max_nsegs = nsegs; if ((nsegs < fp->tx_tso_min_nsegs) || (!fp->tx_tso_min_nsegs)) fp->tx_tso_min_nsegs = nsegs; } txq->sw_tx_ring[idx].nsegs = nsegs; txq->sw_tx_prod = (txq->sw_tx_prod + 1) & (TX_RING_SIZE - 1); txq->tx_db.data.bd_prod = htole16(ecore_chain_get_prod_idx(&txq->tx_pbl)); qlnx_txq_doorbell_wr32(ha, txq->doorbell_addr, txq->tx_db.raw); QL_DPRINT8(ha, "exit\n"); return (0); } static void qlnx_stop(qlnx_host_t *ha) { struct ifnet *ifp = ha->ifp; device_t dev; int i; dev = ha->pci_dev; ifp->if_drv_flags &= ~(IFF_DRV_OACTIVE | IFF_DRV_RUNNING); /* * We simply lock and unlock each fp->tx_mtx to * propagate the if_drv_flags * state to each tx thread */ QL_DPRINT1(ha, "QLNX STATE = %d\n",ha->state); if (ha->state == QLNX_STATE_OPEN) { for (i = 0; i < ha->num_rss; i++) { struct qlnx_fastpath *fp = &ha->fp_array[i]; mtx_lock(&fp->tx_mtx); mtx_unlock(&fp->tx_mtx); if (fp->fp_taskqueue != NULL) taskqueue_enqueue(fp->fp_taskqueue, &fp->fp_task); } } qlnx_unload(ha); return; } static int qlnx_get_ifq_snd_maxlen(qlnx_host_t *ha) { return(TX_RING_SIZE - 1); } uint8_t * qlnx_get_mac_addr(qlnx_host_t *ha) { struct ecore_hwfn *p_hwfn; p_hwfn = &ha->cdev.hwfns[0]; return (p_hwfn->hw_info.hw_mac_addr); } static uint32_t qlnx_get_optics(qlnx_host_t *ha, struct qlnx_link_output *if_link) { uint32_t ifm_type = 0; switch (if_link->media_type) { case MEDIA_MODULE_FIBER: case MEDIA_UNSPECIFIED: if (if_link->speed == (100 * 1000)) ifm_type = QLNX_IFM_100G_SR4; else if (if_link->speed == (40 * 1000)) ifm_type = IFM_40G_SR4; else if (if_link->speed == (25 * 1000)) ifm_type = QLNX_IFM_25G_SR; else if (if_link->speed == (10 * 1000)) ifm_type = (IFM_10G_LR | IFM_10G_SR); else if (if_link->speed == (1 * 1000)) ifm_type = (IFM_1000_SX | IFM_1000_LX); break; case MEDIA_DA_TWINAX: if (if_link->speed == (100 * 1000)) ifm_type = QLNX_IFM_100G_CR4; else if (if_link->speed == (40 * 1000)) ifm_type = IFM_40G_CR4; else if (if_link->speed == (25 * 1000)) ifm_type = QLNX_IFM_25G_CR; else if (if_link->speed == (10 * 1000)) ifm_type = IFM_10G_TWINAX; break; default : ifm_type = IFM_UNKNOWN; break; } return (ifm_type); } /***************************************************************************** * Interrupt Service Functions *****************************************************************************/ static int qlnx_rx_jumbo_chain(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct mbuf *mp_head, uint16_t len) { struct mbuf *mp, *mpf, *mpl; struct sw_rx_data *sw_rx_data; struct qlnx_rx_queue *rxq; uint16_t len_in_buffer; rxq = fp->rxq; mpf = mpl = mp = NULL; while (len) { rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); sw_rx_data = &rxq->sw_rx_ring[rxq->sw_rx_cons]; mp = sw_rx_data->data; if (mp == NULL) { QL_DPRINT1(ha, "mp = NULL\n"); fp->err_rx_mp_null++; rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); if (mpf != NULL) m_freem(mpf); return (-1); } bus_dmamap_sync(ha->rx_tag, sw_rx_data->map, BUS_DMASYNC_POSTREAD); if (qlnx_alloc_rx_buffer(ha, rxq) != 0) { QL_DPRINT1(ha, "New buffer allocation failed, dropping" " incoming packet and reusing its buffer\n"); qlnx_reuse_rx_data(rxq); fp->err_rx_alloc_errors++; if (mpf != NULL) m_freem(mpf); return (-1); } ecore_chain_consume(&rxq->rx_bd_ring); if (len > rxq->rx_buf_size) len_in_buffer = rxq->rx_buf_size; else len_in_buffer = len; len = len - len_in_buffer; mp->m_flags &= ~M_PKTHDR; mp->m_next = NULL; mp->m_len = len_in_buffer; if (mpf == NULL) mpf = mpl = mp; else { mpl->m_next = mp; mpl = mp; } } if (mpf != NULL) mp_head->m_next = mpf; return (0); } static void qlnx_tpa_start(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct qlnx_rx_queue *rxq, struct eth_fast_path_rx_tpa_start_cqe *cqe) { uint32_t agg_index; struct ifnet *ifp = ha->ifp; struct mbuf *mp; struct mbuf *mpf = NULL, *mpl = NULL, *mpc = NULL; struct sw_rx_data *sw_rx_data; dma_addr_t addr; bus_dmamap_t map; struct eth_rx_bd *rx_bd; int i; device_t dev; #if __FreeBSD_version >= 1100000 uint8_t hash_type; #endif /* #if __FreeBSD_version >= 1100000 */ dev = ha->pci_dev; agg_index = cqe->tpa_agg_index; QL_DPRINT7(ha, "[rss_id = %d]: enter\n \ \t type = 0x%x\n \ \t bitfields = 0x%x\n \ \t seg_len = 0x%x\n \ \t pars_flags = 0x%x\n \ \t vlan_tag = 0x%x\n \ \t rss_hash = 0x%x\n \ \t len_on_first_bd = 0x%x\n \ \t placement_offset = 0x%x\n \ \t tpa_agg_index = 0x%x\n \ \t header_len = 0x%x\n \ \t ext_bd_len_list[0] = 0x%x\n \ \t ext_bd_len_list[1] = 0x%x\n \ \t ext_bd_len_list[2] = 0x%x\n \ \t ext_bd_len_list[3] = 0x%x\n \ \t ext_bd_len_list[4] = 0x%x\n", fp->rss_id, cqe->type, cqe->bitfields, cqe->seg_len, cqe->pars_flags.flags, cqe->vlan_tag, cqe->rss_hash, cqe->len_on_first_bd, cqe->placement_offset, cqe->tpa_agg_index, cqe->header_len, cqe->ext_bd_len_list[0], cqe->ext_bd_len_list[1], cqe->ext_bd_len_list[2], cqe->ext_bd_len_list[3], cqe->ext_bd_len_list[4]); if (agg_index >= ETH_TPA_MAX_AGGS_NUM) { fp->err_rx_tpa_invalid_agg_num++; return; } sw_rx_data = &rxq->sw_rx_ring[rxq->sw_rx_cons]; bus_dmamap_sync(ha->rx_tag, sw_rx_data->map, BUS_DMASYNC_POSTREAD); mp = sw_rx_data->data; QL_DPRINT7(ha, "[rss_id = %d]: mp = %p \n ", fp->rss_id, mp); if (mp == NULL) { QL_DPRINT7(ha, "[%d]: mp = NULL\n", fp->rss_id); fp->err_rx_mp_null++; rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); return; } if ((le16toh(cqe->pars_flags.flags)) & CQE_FLAGS_ERR) { QL_DPRINT7(ha, "[%d]: CQE in CONS = %u has error," " flags = %x, dropping incoming packet\n", fp->rss_id, rxq->sw_rx_cons, le16toh(cqe->pars_flags.flags)); fp->err_rx_hw_errors++; qlnx_reuse_rx_data(rxq); QLNX_INC_IERRORS(ifp); return; } if (qlnx_alloc_rx_buffer(ha, rxq) != 0) { QL_DPRINT7(ha, "[%d]: New buffer allocation failed," " dropping incoming packet and reusing its buffer\n", fp->rss_id); fp->err_rx_alloc_errors++; QLNX_INC_IQDROPS(ifp); /* * Load the tpa mbuf into the rx ring and save the * posted mbuf */ map = sw_rx_data->map; addr = sw_rx_data->dma_addr; sw_rx_data = &rxq->sw_rx_ring[rxq->sw_rx_prod]; sw_rx_data->data = rxq->tpa_info[agg_index].rx_buf.data; sw_rx_data->dma_addr = rxq->tpa_info[agg_index].rx_buf.dma_addr; sw_rx_data->map = rxq->tpa_info[agg_index].rx_buf.map; rxq->tpa_info[agg_index].rx_buf.data = mp; rxq->tpa_info[agg_index].rx_buf.dma_addr = addr; rxq->tpa_info[agg_index].rx_buf.map = map; rx_bd = (struct eth_rx_bd *) ecore_chain_produce(&rxq->rx_bd_ring); rx_bd->addr.hi = htole32(U64_HI(sw_rx_data->dma_addr)); rx_bd->addr.lo = htole32(U64_LO(sw_rx_data->dma_addr)); bus_dmamap_sync(ha->rx_tag, sw_rx_data->map, BUS_DMASYNC_PREREAD); rxq->sw_rx_prod = (rxq->sw_rx_prod + 1) & (RX_RING_SIZE - 1); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); ecore_chain_consume(&rxq->rx_bd_ring); /* Now reuse any buffers posted in ext_bd_len_list */ for (i = 0; i < ETH_TPA_CQE_START_LEN_LIST_SIZE; i++) { if (cqe->ext_bd_len_list[i] == 0) break; qlnx_reuse_rx_data(rxq); } rxq->tpa_info[agg_index].agg_state = QLNX_AGG_STATE_ERROR; return; } if (rxq->tpa_info[agg_index].agg_state != QLNX_AGG_STATE_NONE) { QL_DPRINT7(ha, "[%d]: invalid aggregation state," " dropping incoming packet and reusing its buffer\n", fp->rss_id); QLNX_INC_IQDROPS(ifp); /* if we already have mbuf head in aggregation free it */ if (rxq->tpa_info[agg_index].mpf) { m_freem(rxq->tpa_info[agg_index].mpf); rxq->tpa_info[agg_index].mpl = NULL; } rxq->tpa_info[agg_index].mpf = mp; rxq->tpa_info[agg_index].mpl = NULL; rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); ecore_chain_consume(&rxq->rx_bd_ring); /* Now reuse any buffers posted in ext_bd_len_list */ for (i = 0; i < ETH_TPA_CQE_START_LEN_LIST_SIZE; i++) { if (cqe->ext_bd_len_list[i] == 0) break; qlnx_reuse_rx_data(rxq); } rxq->tpa_info[agg_index].agg_state = QLNX_AGG_STATE_ERROR; return; } /* * first process the ext_bd_len_list * if this fails then we simply drop the packet */ ecore_chain_consume(&rxq->rx_bd_ring); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); for (i = 0; i < ETH_TPA_CQE_START_LEN_LIST_SIZE; i++) { QL_DPRINT7(ha, "[%d]: 4\n ", fp->rss_id); if (cqe->ext_bd_len_list[i] == 0) break; sw_rx_data = &rxq->sw_rx_ring[rxq->sw_rx_cons]; bus_dmamap_sync(ha->rx_tag, sw_rx_data->map, BUS_DMASYNC_POSTREAD); mpc = sw_rx_data->data; if (mpc == NULL) { QL_DPRINT7(ha, "[%d]: mpc = NULL\n", fp->rss_id); fp->err_rx_mp_null++; if (mpf != NULL) m_freem(mpf); mpf = mpl = NULL; rxq->tpa_info[agg_index].agg_state = QLNX_AGG_STATE_ERROR; ecore_chain_consume(&rxq->rx_bd_ring); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); continue; } if (qlnx_alloc_rx_buffer(ha, rxq) != 0) { QL_DPRINT7(ha, "[%d]: New buffer allocation failed," " dropping incoming packet and reusing its" " buffer\n", fp->rss_id); qlnx_reuse_rx_data(rxq); if (mpf != NULL) m_freem(mpf); mpf = mpl = NULL; rxq->tpa_info[agg_index].agg_state = QLNX_AGG_STATE_ERROR; ecore_chain_consume(&rxq->rx_bd_ring); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); continue; } mpc->m_flags &= ~M_PKTHDR; mpc->m_next = NULL; mpc->m_len = cqe->ext_bd_len_list[i]; if (mpf == NULL) { mpf = mpl = mpc; } else { mpl->m_len = ha->rx_buf_size; mpl->m_next = mpc; mpl = mpc; } ecore_chain_consume(&rxq->rx_bd_ring); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); } if (rxq->tpa_info[agg_index].agg_state != QLNX_AGG_STATE_NONE) { QL_DPRINT7(ha, "[%d]: invalid aggregation state, dropping" " incoming packet and reusing its buffer\n", fp->rss_id); QLNX_INC_IQDROPS(ifp); rxq->tpa_info[agg_index].mpf = mp; rxq->tpa_info[agg_index].mpl = NULL; return; } rxq->tpa_info[agg_index].placement_offset = cqe->placement_offset; if (mpf != NULL) { mp->m_len = ha->rx_buf_size; mp->m_next = mpf; rxq->tpa_info[agg_index].mpf = mp; rxq->tpa_info[agg_index].mpl = mpl; } else { mp->m_len = cqe->len_on_first_bd + cqe->placement_offset; rxq->tpa_info[agg_index].mpf = mp; rxq->tpa_info[agg_index].mpl = mp; mp->m_next = NULL; } mp->m_flags |= M_PKTHDR; /* assign packet to this interface interface */ mp->m_pkthdr.rcvif = ifp; /* assume no hardware checksum has complated */ mp->m_pkthdr.csum_flags = 0; //mp->m_pkthdr.flowid = fp->rss_id; mp->m_pkthdr.flowid = cqe->rss_hash; #if __FreeBSD_version >= 1100000 hash_type = cqe->bitfields & (ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE_MASK << ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE_SHIFT); switch (hash_type) { case RSS_HASH_TYPE_IPV4: M_HASHTYPE_SET(mp, M_HASHTYPE_RSS_IPV4); break; case RSS_HASH_TYPE_TCP_IPV4: M_HASHTYPE_SET(mp, M_HASHTYPE_RSS_TCP_IPV4); break; case RSS_HASH_TYPE_IPV6: M_HASHTYPE_SET(mp, M_HASHTYPE_RSS_IPV6); break; case RSS_HASH_TYPE_TCP_IPV6: M_HASHTYPE_SET(mp, M_HASHTYPE_RSS_TCP_IPV6); break; default: M_HASHTYPE_SET(mp, M_HASHTYPE_OPAQUE); break; } #else mp->m_flags |= M_FLOWID; #endif mp->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); mp->m_pkthdr.csum_data = 0xFFFF; if (CQE_HAS_VLAN(cqe->pars_flags.flags)) { mp->m_pkthdr.ether_vtag = le16toh(cqe->vlan_tag); mp->m_flags |= M_VLANTAG; } rxq->tpa_info[agg_index].agg_state = QLNX_AGG_STATE_START; QL_DPRINT7(ha, "[%d]: 5\n\tagg_state = %d\n\t mpf = %p mpl = %p\n", fp->rss_id, rxq->tpa_info[agg_index].agg_state, rxq->tpa_info[agg_index].mpf, rxq->tpa_info[agg_index].mpl); return; } static void qlnx_tpa_cont(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct qlnx_rx_queue *rxq, struct eth_fast_path_rx_tpa_cont_cqe *cqe) { struct sw_rx_data *sw_rx_data; int i; struct mbuf *mpf = NULL, *mpl = NULL, *mpc = NULL; struct mbuf *mp; uint32_t agg_index; device_t dev; dev = ha->pci_dev; QL_DPRINT7(ha, "[%d]: enter\n \ \t type = 0x%x\n \ \t tpa_agg_index = 0x%x\n \ \t len_list[0] = 0x%x\n \ \t len_list[1] = 0x%x\n \ \t len_list[2] = 0x%x\n \ \t len_list[3] = 0x%x\n \ \t len_list[4] = 0x%x\n \ \t len_list[5] = 0x%x\n", fp->rss_id, cqe->type, cqe->tpa_agg_index, cqe->len_list[0], cqe->len_list[1], cqe->len_list[2], cqe->len_list[3], cqe->len_list[4], cqe->len_list[5]); agg_index = cqe->tpa_agg_index; if (agg_index >= ETH_TPA_MAX_AGGS_NUM) { QL_DPRINT7(ha, "[%d]: 0\n ", fp->rss_id); fp->err_rx_tpa_invalid_agg_num++; return; } for (i = 0; i < ETH_TPA_CQE_CONT_LEN_LIST_SIZE; i++) { QL_DPRINT7(ha, "[%d]: 1\n ", fp->rss_id); if (cqe->len_list[i] == 0) break; if (rxq->tpa_info[agg_index].agg_state != QLNX_AGG_STATE_START) { qlnx_reuse_rx_data(rxq); continue; } sw_rx_data = &rxq->sw_rx_ring[rxq->sw_rx_cons]; bus_dmamap_sync(ha->rx_tag, sw_rx_data->map, BUS_DMASYNC_POSTREAD); mpc = sw_rx_data->data; if (mpc == NULL) { QL_DPRINT7(ha, "[%d]: mpc = NULL\n", fp->rss_id); fp->err_rx_mp_null++; if (mpf != NULL) m_freem(mpf); mpf = mpl = NULL; rxq->tpa_info[agg_index].agg_state = QLNX_AGG_STATE_ERROR; ecore_chain_consume(&rxq->rx_bd_ring); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); continue; } if (qlnx_alloc_rx_buffer(ha, rxq) != 0) { QL_DPRINT7(ha, "[%d]: New buffer allocation failed," " dropping incoming packet and reusing its" " buffer\n", fp->rss_id); qlnx_reuse_rx_data(rxq); if (mpf != NULL) m_freem(mpf); mpf = mpl = NULL; rxq->tpa_info[agg_index].agg_state = QLNX_AGG_STATE_ERROR; ecore_chain_consume(&rxq->rx_bd_ring); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); continue; } mpc->m_flags &= ~M_PKTHDR; mpc->m_next = NULL; mpc->m_len = cqe->len_list[i]; if (mpf == NULL) { mpf = mpl = mpc; } else { mpl->m_len = ha->rx_buf_size; mpl->m_next = mpc; mpl = mpc; } ecore_chain_consume(&rxq->rx_bd_ring); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); } QL_DPRINT7(ha, "[%d]: 2\n" "\tmpf = %p mpl = %p\n", fp->rss_id, mpf, mpl); if (mpf != NULL) { mp = rxq->tpa_info[agg_index].mpl; mp->m_len = ha->rx_buf_size; mp->m_next = mpf; rxq->tpa_info[agg_index].mpl = mpl; } return; } static int qlnx_tpa_end(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct qlnx_rx_queue *rxq, struct eth_fast_path_rx_tpa_end_cqe *cqe) { struct sw_rx_data *sw_rx_data; int i; struct mbuf *mpf = NULL, *mpl = NULL, *mpc = NULL; struct mbuf *mp; uint32_t agg_index; uint32_t len = 0; struct ifnet *ifp = ha->ifp; device_t dev; dev = ha->pci_dev; QL_DPRINT7(ha, "[%d]: enter\n \ \t type = 0x%x\n \ \t tpa_agg_index = 0x%x\n \ \t total_packet_len = 0x%x\n \ \t num_of_bds = 0x%x\n \ \t end_reason = 0x%x\n \ \t num_of_coalesced_segs = 0x%x\n \ \t ts_delta = 0x%x\n \ \t len_list[0] = 0x%x\n \ \t len_list[1] = 0x%x\n \ \t len_list[2] = 0x%x\n \ \t len_list[3] = 0x%x\n", fp->rss_id, cqe->type, cqe->tpa_agg_index, cqe->total_packet_len, cqe->num_of_bds, cqe->end_reason, cqe->num_of_coalesced_segs, cqe->ts_delta, cqe->len_list[0], cqe->len_list[1], cqe->len_list[2], cqe->len_list[3]); agg_index = cqe->tpa_agg_index; if (agg_index >= ETH_TPA_MAX_AGGS_NUM) { QL_DPRINT7(ha, "[%d]: 0\n ", fp->rss_id); fp->err_rx_tpa_invalid_agg_num++; return (0); } for (i = 0; i < ETH_TPA_CQE_END_LEN_LIST_SIZE; i++) { QL_DPRINT7(ha, "[%d]: 1\n ", fp->rss_id); if (cqe->len_list[i] == 0) break; if (rxq->tpa_info[agg_index].agg_state != QLNX_AGG_STATE_START) { QL_DPRINT7(ha, "[%d]: 2\n ", fp->rss_id); qlnx_reuse_rx_data(rxq); continue; } sw_rx_data = &rxq->sw_rx_ring[rxq->sw_rx_cons]; bus_dmamap_sync(ha->rx_tag, sw_rx_data->map, BUS_DMASYNC_POSTREAD); mpc = sw_rx_data->data; if (mpc == NULL) { QL_DPRINT7(ha, "[%d]: mpc = NULL\n", fp->rss_id); fp->err_rx_mp_null++; if (mpf != NULL) m_freem(mpf); mpf = mpl = NULL; rxq->tpa_info[agg_index].agg_state = QLNX_AGG_STATE_ERROR; ecore_chain_consume(&rxq->rx_bd_ring); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); continue; } if (qlnx_alloc_rx_buffer(ha, rxq) != 0) { QL_DPRINT7(ha, "[%d]: New buffer allocation failed," " dropping incoming packet and reusing its" " buffer\n", fp->rss_id); qlnx_reuse_rx_data(rxq); if (mpf != NULL) m_freem(mpf); mpf = mpl = NULL; rxq->tpa_info[agg_index].agg_state = QLNX_AGG_STATE_ERROR; ecore_chain_consume(&rxq->rx_bd_ring); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); continue; } mpc->m_flags &= ~M_PKTHDR; mpc->m_next = NULL; mpc->m_len = cqe->len_list[i]; if (mpf == NULL) { mpf = mpl = mpc; } else { mpl->m_len = ha->rx_buf_size; mpl->m_next = mpc; mpl = mpc; } ecore_chain_consume(&rxq->rx_bd_ring); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); } QL_DPRINT7(ha, "[%d]: 5\n ", fp->rss_id); if (mpf != NULL) { QL_DPRINT7(ha, "[%d]: 6\n ", fp->rss_id); mp = rxq->tpa_info[agg_index].mpl; mp->m_len = ha->rx_buf_size; mp->m_next = mpf; } if (rxq->tpa_info[agg_index].agg_state != QLNX_AGG_STATE_START) { QL_DPRINT7(ha, "[%d]: 7\n ", fp->rss_id); if (rxq->tpa_info[agg_index].mpf != NULL) m_freem(rxq->tpa_info[agg_index].mpf); rxq->tpa_info[agg_index].mpf = NULL; rxq->tpa_info[agg_index].mpl = NULL; rxq->tpa_info[agg_index].agg_state = QLNX_AGG_STATE_NONE; return (0); } mp = rxq->tpa_info[agg_index].mpf; m_adj(mp, rxq->tpa_info[agg_index].placement_offset); mp->m_pkthdr.len = cqe->total_packet_len; if (mp->m_next == NULL) mp->m_len = mp->m_pkthdr.len; else { /* compute the total packet length */ mpf = mp; while (mpf != NULL) { len += mpf->m_len; mpf = mpf->m_next; } if (cqe->total_packet_len > len) { mpl = rxq->tpa_info[agg_index].mpl; mpl->m_len += (cqe->total_packet_len - len); } } QLNX_INC_IPACKETS(ifp); QLNX_INC_IBYTES(ifp, (cqe->total_packet_len)); QL_DPRINT7(ha, "[%d]: 8 csum_data = 0x%x csum_flags = 0x%lx\n \ m_len = 0x%x m_pkthdr_len = 0x%x\n", fp->rss_id, mp->m_pkthdr.csum_data, mp->m_pkthdr.csum_flags, mp->m_len, mp->m_pkthdr.len); (*ifp->if_input)(ifp, mp); rxq->tpa_info[agg_index].mpf = NULL; rxq->tpa_info[agg_index].mpl = NULL; rxq->tpa_info[agg_index].agg_state = QLNX_AGG_STATE_NONE; return (cqe->num_of_coalesced_segs); } static int qlnx_rx_int(qlnx_host_t *ha, struct qlnx_fastpath *fp, int budget, int lro_enable) { uint16_t hw_comp_cons, sw_comp_cons; int rx_pkt = 0; struct qlnx_rx_queue *rxq = fp->rxq; struct ifnet *ifp = ha->ifp; struct ecore_dev *cdev = &ha->cdev; struct ecore_hwfn *p_hwfn; #ifdef QLNX_SOFT_LRO struct lro_ctrl *lro; lro = &rxq->lro; #endif /* #ifdef QLNX_SOFT_LRO */ hw_comp_cons = le16toh(*rxq->hw_cons_ptr); sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring); p_hwfn = &ha->cdev.hwfns[(fp->rss_id % cdev->num_hwfns)]; /* Memory barrier to prevent the CPU from doing speculative reads of CQE * / BD in the while-loop before reading hw_comp_cons. If the CQE is * read before it is written by FW, then FW writes CQE and SB, and then * the CPU reads the hw_comp_cons, it will use an old CQE. */ /* Loop to complete all indicated BDs */ while (sw_comp_cons != hw_comp_cons) { union eth_rx_cqe *cqe; struct eth_fast_path_rx_reg_cqe *fp_cqe; struct sw_rx_data *sw_rx_data; register struct mbuf *mp; enum eth_rx_cqe_type cqe_type; uint16_t len, pad, len_on_first_bd; uint8_t *data; #if __FreeBSD_version >= 1100000 uint8_t hash_type; #endif /* #if __FreeBSD_version >= 1100000 */ /* Get the CQE from the completion ring */ cqe = (union eth_rx_cqe *) ecore_chain_consume(&rxq->rx_comp_ring); cqe_type = cqe->fast_path_regular.type; if (cqe_type == ETH_RX_CQE_TYPE_SLOW_PATH) { QL_DPRINT3(ha, "Got a slowath CQE\n"); ecore_eth_cqe_completion(p_hwfn, (struct eth_slow_path_rx_cqe *)cqe); goto next_cqe; } if (cqe_type != ETH_RX_CQE_TYPE_REGULAR) { switch (cqe_type) { case ETH_RX_CQE_TYPE_TPA_START: qlnx_tpa_start(ha, fp, rxq, &cqe->fast_path_tpa_start); fp->tpa_start++; break; case ETH_RX_CQE_TYPE_TPA_CONT: qlnx_tpa_cont(ha, fp, rxq, &cqe->fast_path_tpa_cont); fp->tpa_cont++; break; case ETH_RX_CQE_TYPE_TPA_END: rx_pkt += qlnx_tpa_end(ha, fp, rxq, &cqe->fast_path_tpa_end); fp->tpa_end++; break; default: break; } goto next_cqe; } /* Get the data from the SW ring */ sw_rx_data = &rxq->sw_rx_ring[rxq->sw_rx_cons]; mp = sw_rx_data->data; if (mp == NULL) { QL_DPRINT1(ha, "mp = NULL\n"); fp->err_rx_mp_null++; rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); goto next_cqe; } bus_dmamap_sync(ha->rx_tag, sw_rx_data->map, BUS_DMASYNC_POSTREAD); /* non GRO */ fp_cqe = &cqe->fast_path_regular;/* MK CR TPA check assembly */ len = le16toh(fp_cqe->pkt_len); pad = fp_cqe->placement_offset; QL_DPRINT3(ha, "CQE type = %x, flags = %x, vlan = %x," " len %u, parsing flags = %d pad = %d\n", cqe_type, fp_cqe->bitfields, le16toh(fp_cqe->vlan_tag), len, le16toh(fp_cqe->pars_flags.flags), pad); data = mtod(mp, uint8_t *); data = data + pad; if (0) qlnx_dump_buf8(ha, __func__, data, len); /* For every Rx BD consumed, we allocate a new BD so the BD ring * is always with a fixed size. If allocation fails, we take the * consumed BD and return it to the ring in the PROD position. * The packet that was received on that BD will be dropped (and * not passed to the upper stack). */ /* If this is an error packet then drop it */ if ((le16toh(cqe->fast_path_regular.pars_flags.flags)) & CQE_FLAGS_ERR) { QL_DPRINT1(ha, "CQE in CONS = %u has error, flags = %x," " dropping incoming packet\n", sw_comp_cons, le16toh(cqe->fast_path_regular.pars_flags.flags)); fp->err_rx_hw_errors++; qlnx_reuse_rx_data(rxq); QLNX_INC_IERRORS(ifp); goto next_cqe; } if (qlnx_alloc_rx_buffer(ha, rxq) != 0) { QL_DPRINT1(ha, "New buffer allocation failed, dropping" " incoming packet and reusing its buffer\n"); qlnx_reuse_rx_data(rxq); fp->err_rx_alloc_errors++; QLNX_INC_IQDROPS(ifp); goto next_cqe; } ecore_chain_consume(&rxq->rx_bd_ring); len_on_first_bd = fp_cqe->len_on_first_bd; m_adj(mp, pad); mp->m_pkthdr.len = len; QL_DPRINT1(ha, "len = %d len_on_first_bd = %d\n", len, len_on_first_bd); if ((len > 60 ) && (len > len_on_first_bd)) { mp->m_len = len_on_first_bd; if (qlnx_rx_jumbo_chain(ha, fp, mp, (len - len_on_first_bd)) != 0) { m_freem(mp); QLNX_INC_IQDROPS(ifp); goto next_cqe; } } else if (len_on_first_bd < len) { fp->err_rx_jumbo_chain_pkts++; } else { mp->m_len = len; } mp->m_flags |= M_PKTHDR; /* assign packet to this interface interface */ mp->m_pkthdr.rcvif = ifp; /* assume no hardware checksum has complated */ mp->m_pkthdr.csum_flags = 0; mp->m_pkthdr.flowid = fp_cqe->rss_hash; #if __FreeBSD_version >= 1100000 hash_type = fp_cqe->bitfields & (ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE_MASK << ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE_SHIFT); switch (hash_type) { case RSS_HASH_TYPE_IPV4: M_HASHTYPE_SET(mp, M_HASHTYPE_RSS_IPV4); break; case RSS_HASH_TYPE_TCP_IPV4: M_HASHTYPE_SET(mp, M_HASHTYPE_RSS_TCP_IPV4); break; case RSS_HASH_TYPE_IPV6: M_HASHTYPE_SET(mp, M_HASHTYPE_RSS_IPV6); break; case RSS_HASH_TYPE_TCP_IPV6: M_HASHTYPE_SET(mp, M_HASHTYPE_RSS_TCP_IPV6); break; default: M_HASHTYPE_SET(mp, M_HASHTYPE_OPAQUE); break; } #else mp->m_flags |= M_FLOWID; #endif if (CQE_L3_PACKET(fp_cqe->pars_flags.flags)) { mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; } if (!(CQE_IP_HDR_ERR(fp_cqe->pars_flags.flags))) { mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; } if (CQE_L4_HAS_CSUM(fp_cqe->pars_flags.flags)) { mp->m_pkthdr.csum_data = 0xFFFF; mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); } if (CQE_HAS_VLAN(fp_cqe->pars_flags.flags)) { mp->m_pkthdr.ether_vtag = le16toh(fp_cqe->vlan_tag); mp->m_flags |= M_VLANTAG; } QLNX_INC_IPACKETS(ifp); QLNX_INC_IBYTES(ifp, len); #ifdef QLNX_SOFT_LRO if (lro_enable) { #if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO) tcp_lro_queue_mbuf(lro, mp); #else if (tcp_lro_rx(lro, mp, 0)) (*ifp->if_input)(ifp, mp); #endif /* #if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO) */ } else { (*ifp->if_input)(ifp, mp); } #else (*ifp->if_input)(ifp, mp); #endif /* #ifdef QLNX_SOFT_LRO */ rx_pkt++; rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); next_cqe: /* don't consume bd rx buffer */ ecore_chain_recycle_consumed(&rxq->rx_comp_ring); sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring); /* CR TPA - revisit how to handle budget in TPA perhaps increase on "end" */ if (rx_pkt == budget) break; } /* repeat while sw_comp_cons != hw_comp_cons... */ /* Update producers */ qlnx_update_rx_prod(p_hwfn, rxq); return rx_pkt; } /* * fast path interrupt */ static void qlnx_fp_isr(void *arg) { qlnx_ivec_t *ivec = arg; qlnx_host_t *ha; struct qlnx_fastpath *fp = NULL; int idx; ha = ivec->ha; if (ha->state != QLNX_STATE_OPEN) { return; } idx = ivec->rss_idx; if ((idx = ivec->rss_idx) >= ha->num_rss) { QL_DPRINT1(ha, "illegal interrupt[%d]\n", idx); ha->err_illegal_intr++; return; } fp = &ha->fp_array[idx]; if (fp == NULL) { ha->err_fp_null++; } else { #ifdef QLNX_RCV_IN_TASKQ ecore_sb_ack(fp->sb_info, IGU_INT_DISABLE, 0); if (fp->fp_taskqueue != NULL) taskqueue_enqueue(fp->fp_taskqueue, &fp->fp_task); #else int rx_int = 0, total_rx_count = 0; int lro_enable, tc; struct qlnx_tx_queue *txq; uint16_t elem_left; lro_enable = ha->ifp->if_capenable & IFCAP_LRO; ecore_sb_ack(fp->sb_info, IGU_INT_DISABLE, 0); do { for (tc = 0; tc < ha->num_tc; tc++) { txq = fp->txq[tc]; if((int)(elem_left = ecore_chain_get_elem_left(&txq->tx_pbl)) < QLNX_TX_ELEM_THRESH) { if (mtx_trylock(&fp->tx_mtx)) { #ifdef QLNX_TRACE_PERF_DATA tx_compl = fp->tx_pkts_completed; #endif qlnx_tx_int(ha, fp, fp->txq[tc]); #ifdef QLNX_TRACE_PERF_DATA fp->tx_pkts_compl_intr += (fp->tx_pkts_completed - tx_compl); if ((fp->tx_pkts_completed - tx_compl) <= 32) fp->tx_comInt[0]++; else if (((fp->tx_pkts_completed - tx_compl) > 32) && ((fp->tx_pkts_completed - tx_compl) <= 64)) fp->tx_comInt[1]++; else if(((fp->tx_pkts_completed - tx_compl) > 64) && ((fp->tx_pkts_completed - tx_compl) <= 128)) fp->tx_comInt[2]++; else if(((fp->tx_pkts_completed - tx_compl) > 128)) fp->tx_comInt[3]++; #endif mtx_unlock(&fp->tx_mtx); } } } rx_int = qlnx_rx_int(ha, fp, ha->rx_pkt_threshold, lro_enable); if (rx_int) { fp->rx_pkts += rx_int; total_rx_count += rx_int; } } while (rx_int); #ifdef QLNX_SOFT_LRO { struct lro_ctrl *lro; lro = &fp->rxq->lro; if (lro_enable && total_rx_count) { #if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO) #ifdef QLNX_TRACE_LRO_CNT if (lro->lro_mbuf_count & ~1023) fp->lro_cnt_1024++; else if (lro->lro_mbuf_count & ~511) fp->lro_cnt_512++; else if (lro->lro_mbuf_count & ~255) fp->lro_cnt_256++; else if (lro->lro_mbuf_count & ~127) fp->lro_cnt_128++; else if (lro->lro_mbuf_count & ~63) fp->lro_cnt_64++; #endif /* #ifdef QLNX_TRACE_LRO_CNT */ tcp_lro_flush_all(lro); #else struct lro_entry *queued; while ((!SLIST_EMPTY(&lro->lro_active))) { queued = SLIST_FIRST(&lro->lro_active); SLIST_REMOVE_HEAD(&lro->lro_active, \ next); tcp_lro_flush(lro, queued); } #endif /* #if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO) */ } } #endif /* #ifdef QLNX_SOFT_LRO */ ecore_sb_update_sb_idx(fp->sb_info); rmb(); ecore_sb_ack(fp->sb_info, IGU_INT_ENABLE, 1); #endif /* #ifdef QLNX_RCV_IN_TASKQ */ } return; } /* * slow path interrupt processing function * can be invoked in polled mode or in interrupt mode via taskqueue. */ void qlnx_sp_isr(void *arg) { struct ecore_hwfn *p_hwfn; qlnx_host_t *ha; p_hwfn = arg; ha = (qlnx_host_t *)p_hwfn->p_dev; ha->sp_interrupts++; QL_DPRINT2(ha, "enter\n"); ecore_int_sp_dpc(p_hwfn); QL_DPRINT2(ha, "exit\n"); return; } /***************************************************************************** * Support Functions for DMA'able Memory *****************************************************************************/ static void qlnx_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { *((bus_addr_t *)arg) = 0; if (error) { printf("%s: bus_dmamap_load failed (%d)\n", __func__, error); return; } *((bus_addr_t *)arg) = segs[0].ds_addr; return; } static int qlnx_alloc_dmabuf(qlnx_host_t *ha, qlnx_dma_t *dma_buf) { int ret = 0; device_t dev; bus_addr_t b_addr; dev = ha->pci_dev; ret = bus_dma_tag_create( ha->parent_tag,/* parent */ dma_buf->alignment, ((bus_size_t)(1ULL << 32)),/* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ dma_buf->size, /* maxsize */ 1, /* nsegments */ dma_buf->size, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &dma_buf->dma_tag); if (ret) { QL_DPRINT1(ha, "could not create dma tag\n"); goto qlnx_alloc_dmabuf_exit; } ret = bus_dmamem_alloc(dma_buf->dma_tag, (void **)&dma_buf->dma_b, (BUS_DMA_ZERO | BUS_DMA_COHERENT | BUS_DMA_NOWAIT), &dma_buf->dma_map); if (ret) { bus_dma_tag_destroy(dma_buf->dma_tag); QL_DPRINT1(ha, "bus_dmamem_alloc failed\n"); goto qlnx_alloc_dmabuf_exit; } ret = bus_dmamap_load(dma_buf->dma_tag, dma_buf->dma_map, dma_buf->dma_b, dma_buf->size, qlnx_dmamap_callback, &b_addr, BUS_DMA_NOWAIT); if (ret || !b_addr) { bus_dma_tag_destroy(dma_buf->dma_tag); bus_dmamem_free(dma_buf->dma_tag, dma_buf->dma_b, dma_buf->dma_map); ret = -1; goto qlnx_alloc_dmabuf_exit; } dma_buf->dma_addr = b_addr; qlnx_alloc_dmabuf_exit: return ret; } static void qlnx_free_dmabuf(qlnx_host_t *ha, qlnx_dma_t *dma_buf) { bus_dmamap_unload(dma_buf->dma_tag, dma_buf->dma_map); bus_dmamem_free(dma_buf->dma_tag, dma_buf->dma_b, dma_buf->dma_map); bus_dma_tag_destroy(dma_buf->dma_tag); return; } void * qlnx_dma_alloc_coherent(void *ecore_dev, bus_addr_t *phys, uint32_t size) { qlnx_dma_t dma_buf; qlnx_dma_t *dma_p; qlnx_host_t *ha; device_t dev; ha = (qlnx_host_t *)ecore_dev; dev = ha->pci_dev; size = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1); memset(&dma_buf, 0, sizeof (qlnx_dma_t)); dma_buf.size = size + PAGE_SIZE; dma_buf.alignment = 8; if (qlnx_alloc_dmabuf((qlnx_host_t *)ecore_dev, &dma_buf) != 0) return (NULL); bzero((uint8_t *)dma_buf.dma_b, dma_buf.size); *phys = dma_buf.dma_addr; dma_p = (qlnx_dma_t *)((uint8_t *)dma_buf.dma_b + size); memcpy(dma_p, &dma_buf, sizeof(qlnx_dma_t)); /* QL_DPRINT5(ha, "[%p %p %p %p 0x%08x ]\n", (void *)dma_buf.dma_map, (void *)dma_buf.dma_tag, dma_buf.dma_b, (void *)dma_buf.dma_addr, size); */ return (dma_buf.dma_b); } void qlnx_dma_free_coherent(void *ecore_dev, void *v_addr, bus_addr_t phys, uint32_t size) { qlnx_dma_t dma_buf, *dma_p; qlnx_host_t *ha; device_t dev; ha = (qlnx_host_t *)ecore_dev; dev = ha->pci_dev; if (v_addr == NULL) return; size = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1); dma_p = (qlnx_dma_t *)((uint8_t *)v_addr + size); /* QL_DPRINT5(ha, "[%p %p %p %p 0x%08x ]\n", (void *)dma_p->dma_map, (void *)dma_p->dma_tag, dma_p->dma_b, (void *)dma_p->dma_addr, size); */ dma_buf = *dma_p; qlnx_free_dmabuf((qlnx_host_t *)ecore_dev, &dma_buf); return; } static int qlnx_alloc_parent_dma_tag(qlnx_host_t *ha) { int ret; device_t dev; dev = ha->pci_dev; /* * Allocate parent DMA Tag */ ret = bus_dma_tag_create( bus_get_dma_tag(dev), /* parent */ 1,((bus_size_t)(1ULL << 32)),/* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 0, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &ha->parent_tag); if (ret) { QL_DPRINT1(ha, "could not create parent dma tag\n"); return (-1); } ha->flags.parent_tag = 1; return (0); } static void qlnx_free_parent_dma_tag(qlnx_host_t *ha) { if (ha->parent_tag != NULL) { bus_dma_tag_destroy(ha->parent_tag); ha->parent_tag = NULL; } return; } static int qlnx_alloc_tx_dma_tag(qlnx_host_t *ha) { if (bus_dma_tag_create(NULL, /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ QLNX_MAX_TSO_FRAME_SIZE, /* maxsize */ QLNX_MAX_SEGMENTS, /* nsegments */ QLNX_MAX_TX_MBUF_SIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &ha->tx_tag)) { QL_DPRINT1(ha, "tx_tag alloc failed\n"); return (-1); } return (0); } static void qlnx_free_tx_dma_tag(qlnx_host_t *ha) { if (ha->tx_tag != NULL) { bus_dma_tag_destroy(ha->tx_tag); ha->tx_tag = NULL; } return; } static int qlnx_alloc_rx_dma_tag(qlnx_host_t *ha) { if (bus_dma_tag_create(NULL, /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM9BYTES, /* maxsize */ 1, /* nsegments */ MJUM9BYTES, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &ha->rx_tag)) { QL_DPRINT1(ha, " rx_tag alloc failed\n"); return (-1); } return (0); } static void qlnx_free_rx_dma_tag(qlnx_host_t *ha) { if (ha->rx_tag != NULL) { bus_dma_tag_destroy(ha->rx_tag); ha->rx_tag = NULL; } return; } /********************************* * Exported functions *********************************/ uint32_t qlnx_pci_bus_get_bar_size(void *ecore_dev, uint8_t bar_id) { uint32_t bar_size; bar_id = bar_id * 2; bar_size = bus_get_resource_count(((qlnx_host_t *)ecore_dev)->pci_dev, SYS_RES_MEMORY, PCIR_BAR(bar_id)); return (bar_size); } uint32_t qlnx_pci_read_config_byte(void *ecore_dev, uint32_t pci_reg, uint8_t *reg_value) { *reg_value = pci_read_config(((qlnx_host_t *)ecore_dev)->pci_dev, pci_reg, 1); return 0; } uint32_t qlnx_pci_read_config_word(void *ecore_dev, uint32_t pci_reg, uint16_t *reg_value) { *reg_value = pci_read_config(((qlnx_host_t *)ecore_dev)->pci_dev, pci_reg, 2); return 0; } uint32_t qlnx_pci_read_config_dword(void *ecore_dev, uint32_t pci_reg, uint32_t *reg_value) { *reg_value = pci_read_config(((qlnx_host_t *)ecore_dev)->pci_dev, pci_reg, 4); return 0; } void qlnx_pci_write_config_byte(void *ecore_dev, uint32_t pci_reg, uint8_t reg_value) { pci_write_config(((qlnx_host_t *)ecore_dev)->pci_dev, pci_reg, reg_value, 1); return; } void qlnx_pci_write_config_word(void *ecore_dev, uint32_t pci_reg, uint16_t reg_value) { pci_write_config(((qlnx_host_t *)ecore_dev)->pci_dev, pci_reg, reg_value, 2); return; } void qlnx_pci_write_config_dword(void *ecore_dev, uint32_t pci_reg, uint32_t reg_value) { pci_write_config(((qlnx_host_t *)ecore_dev)->pci_dev, pci_reg, reg_value, 4); return; } int qlnx_pci_find_capability(void *ecore_dev, int cap) { int reg; qlnx_host_t *ha; ha = ecore_dev; if (pci_find_cap(ha->pci_dev, PCIY_EXPRESS, ®) == 0) return reg; else { QL_DPRINT1(ha, "failed\n"); return 0; } } uint32_t qlnx_reg_rd32(void *hwfn, uint32_t reg_addr) { uint32_t data32; struct ecore_dev *cdev; struct ecore_hwfn *p_hwfn; p_hwfn = hwfn; cdev = p_hwfn->p_dev; reg_addr = (uint32_t)((uint8_t *)(p_hwfn->regview) - (uint8_t *)(cdev->regview)) + reg_addr; data32 = bus_read_4(((qlnx_host_t *)cdev)->pci_reg, reg_addr); return (data32); } void qlnx_reg_wr32(void *hwfn, uint32_t reg_addr, uint32_t value) { struct ecore_dev *cdev; struct ecore_hwfn *p_hwfn; p_hwfn = hwfn; cdev = p_hwfn->p_dev; reg_addr = (uint32_t)((uint8_t *)(p_hwfn->regview) - (uint8_t *)(cdev->regview)) + reg_addr; bus_write_4(((qlnx_host_t *)cdev)->pci_reg, reg_addr, value); return; } void qlnx_reg_wr16(void *hwfn, uint32_t reg_addr, uint16_t value) { struct ecore_dev *cdev; struct ecore_hwfn *p_hwfn; p_hwfn = hwfn; cdev = p_hwfn->p_dev; reg_addr = (uint32_t)((uint8_t *)(p_hwfn->regview) - (uint8_t *)(cdev->regview)) + reg_addr; bus_write_2(((qlnx_host_t *)cdev)->pci_reg, reg_addr, value); return; } void qlnx_dbell_wr32(void *hwfn, uint32_t reg_addr, uint32_t value) { struct ecore_dev *cdev; struct ecore_hwfn *p_hwfn; p_hwfn = hwfn; cdev = p_hwfn->p_dev; reg_addr = (uint32_t)((uint8_t *)(p_hwfn->doorbells) - (uint8_t *)(cdev->doorbells)) + reg_addr; bus_write_4(((qlnx_host_t *)cdev)->pci_dbells, reg_addr, value); return; } uint32_t qlnx_direct_reg_rd32(void *p_hwfn, uint32_t *reg_addr) { uint32_t data32; uint32_t offset; struct ecore_dev *cdev; cdev = ((struct ecore_hwfn *)p_hwfn)->p_dev; offset = (uint32_t)((uint8_t *)reg_addr - (uint8_t *)(cdev->regview)); data32 = bus_read_4(((qlnx_host_t *)cdev)->pci_reg, offset); return (data32); } void qlnx_direct_reg_wr32(void *p_hwfn, void *reg_addr, uint32_t value) { uint32_t offset; struct ecore_dev *cdev; cdev = ((struct ecore_hwfn *)p_hwfn)->p_dev; offset = (uint32_t)((uint8_t *)reg_addr - (uint8_t *)(cdev->regview)); bus_write_4(((qlnx_host_t *)cdev)->pci_reg, offset, value); return; } void qlnx_direct_reg_wr64(void *p_hwfn, void *reg_addr, uint64_t value) { uint32_t offset; struct ecore_dev *cdev; cdev = ((struct ecore_hwfn *)p_hwfn)->p_dev; offset = (uint32_t)((uint8_t *)reg_addr - (uint8_t *)(cdev->regview)); bus_write_8(((qlnx_host_t *)cdev)->pci_reg, offset, value); return; } void * qlnx_zalloc(uint32_t size) { caddr_t va; va = malloc((unsigned long)size, M_QLNXBUF, M_NOWAIT); bzero(va, size); return ((void *)va); } void qlnx_barrier(void *p_hwfn) { qlnx_host_t *ha; ha = (qlnx_host_t *)((struct ecore_hwfn *)p_hwfn)->p_dev; bus_barrier(ha->pci_reg, 0, 0, BUS_SPACE_BARRIER_WRITE); } void qlnx_link_update(void *p_hwfn) { qlnx_host_t *ha; int prev_link_state; ha = (qlnx_host_t *)((struct ecore_hwfn *)p_hwfn)->p_dev; qlnx_fill_link(p_hwfn, &ha->if_link); prev_link_state = ha->link_up; ha->link_up = ha->if_link.link_up; if (prev_link_state != ha->link_up) { if (ha->link_up) { if_link_state_change(ha->ifp, LINK_STATE_UP); } else { if_link_state_change(ha->ifp, LINK_STATE_DOWN); } } return; } void qlnx_fill_link(struct ecore_hwfn *hwfn, struct qlnx_link_output *if_link) { struct ecore_mcp_link_params link_params; struct ecore_mcp_link_state link_state; memset(if_link, 0, sizeof(*if_link)); memset(&link_params, 0, sizeof(struct ecore_mcp_link_params)); memset(&link_state, 0, sizeof(struct ecore_mcp_link_state)); /* Prepare source inputs */ /* we only deal with physical functions */ memcpy(&link_params, ecore_mcp_get_link_params(hwfn), sizeof(link_params)); memcpy(&link_state, ecore_mcp_get_link_state(hwfn), sizeof(link_state)); ecore_mcp_get_media_type(hwfn->p_dev, &if_link->media_type); /* Set the link parameters to pass to protocol driver */ if (link_state.link_up) { if_link->link_up = true; if_link->speed = link_state.speed; } if_link->supported_caps = QLNX_LINK_CAP_FIBRE; if (link_params.speed.autoneg) if_link->supported_caps |= QLNX_LINK_CAP_Autoneg; if (link_params.pause.autoneg || (link_params.pause.forced_rx && link_params.pause.forced_tx)) if_link->supported_caps |= QLNX_LINK_CAP_Asym_Pause; if (link_params.pause.autoneg || link_params.pause.forced_rx || link_params.pause.forced_tx) if_link->supported_caps |= QLNX_LINK_CAP_Pause; if (link_params.speed.advertised_speeds & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) if_link->supported_caps |= QLNX_LINK_CAP_1000baseT_Half | QLNX_LINK_CAP_1000baseT_Full; if (link_params.speed.advertised_speeds & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G) if_link->supported_caps |= QLNX_LINK_CAP_10000baseKR_Full; if (link_params.speed.advertised_speeds & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G) if_link->supported_caps |= QLNX_LINK_CAP_25000baseKR_Full; if (link_params.speed.advertised_speeds & NVM_CFG1_PORT_DRV_LINK_SPEED_40G) if_link->supported_caps |= QLNX_LINK_CAP_40000baseLR4_Full; if (link_params.speed.advertised_speeds & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G) if_link->supported_caps |= QLNX_LINK_CAP_50000baseKR2_Full; if (link_params.speed.advertised_speeds & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G) if_link->supported_caps |= QLNX_LINK_CAP_100000baseKR4_Full; if_link->advertised_caps = if_link->supported_caps; if_link->autoneg = link_params.speed.autoneg; if_link->duplex = QLNX_LINK_DUPLEX; /* Link partner capabilities */ if (link_state.partner_adv_speed & ECORE_LINK_PARTNER_SPEED_1G_HD) if_link->link_partner_caps |= QLNX_LINK_CAP_1000baseT_Half; if (link_state.partner_adv_speed & ECORE_LINK_PARTNER_SPEED_1G_FD) if_link->link_partner_caps |= QLNX_LINK_CAP_1000baseT_Full; if (link_state.partner_adv_speed & ECORE_LINK_PARTNER_SPEED_10G) if_link->link_partner_caps |= QLNX_LINK_CAP_10000baseKR_Full; if (link_state.partner_adv_speed & ECORE_LINK_PARTNER_SPEED_25G) if_link->link_partner_caps |= QLNX_LINK_CAP_25000baseKR_Full; if (link_state.partner_adv_speed & ECORE_LINK_PARTNER_SPEED_40G) if_link->link_partner_caps |= QLNX_LINK_CAP_40000baseLR4_Full; if (link_state.partner_adv_speed & ECORE_LINK_PARTNER_SPEED_50G) if_link->link_partner_caps |= QLNX_LINK_CAP_50000baseKR2_Full; if (link_state.partner_adv_speed & ECORE_LINK_PARTNER_SPEED_100G) if_link->link_partner_caps |= QLNX_LINK_CAP_100000baseKR4_Full; if (link_state.an_complete) if_link->link_partner_caps |= QLNX_LINK_CAP_Autoneg; if (link_state.partner_adv_pause) if_link->link_partner_caps |= QLNX_LINK_CAP_Pause; if ((link_state.partner_adv_pause == ECORE_LINK_PARTNER_ASYMMETRIC_PAUSE) || (link_state.partner_adv_pause == ECORE_LINK_PARTNER_BOTH_PAUSE)) if_link->link_partner_caps |= QLNX_LINK_CAP_Asym_Pause; return; } static int qlnx_nic_setup(struct ecore_dev *cdev, struct ecore_pf_params *func_params) { int rc, i; for (i = 0; i < cdev->num_hwfns; i++) { struct ecore_hwfn *p_hwfn = &cdev->hwfns[i]; p_hwfn->pf_params = *func_params; } rc = ecore_resc_alloc(cdev); if (rc) goto qlnx_nic_setup_exit; ecore_resc_setup(cdev); qlnx_nic_setup_exit: return rc; } static int qlnx_nic_start(struct ecore_dev *cdev) { int rc; struct ecore_hw_init_params params; bzero(¶ms, sizeof (struct ecore_hw_init_params)); params.p_tunn = NULL; params.b_hw_start = true; params.int_mode = cdev->int_mode; params.allow_npar_tx_switch = true; params.bin_fw_data = NULL; rc = ecore_hw_init(cdev, ¶ms); if (rc) { ecore_resc_free(cdev); return rc; } return 0; } static int qlnx_slowpath_start(qlnx_host_t *ha) { struct ecore_dev *cdev; struct ecore_pf_params pf_params; int rc; memset(&pf_params, 0, sizeof(struct ecore_pf_params)); pf_params.eth_pf_params.num_cons = (ha->num_rss) * (ha->num_tc + 1); cdev = &ha->cdev; rc = qlnx_nic_setup(cdev, &pf_params); if (rc) goto qlnx_slowpath_start_exit; cdev->int_mode = ECORE_INT_MODE_MSIX; cdev->int_coalescing_mode = ECORE_COAL_MODE_ENABLE; #ifdef QLNX_MAX_COALESCE cdev->rx_coalesce_usecs = 255; cdev->tx_coalesce_usecs = 255; #endif rc = qlnx_nic_start(cdev); ha->rx_coalesce_usecs = cdev->rx_coalesce_usecs; ha->tx_coalesce_usecs = cdev->tx_coalesce_usecs; qlnx_slowpath_start_exit: return (rc); } static int qlnx_slowpath_stop(qlnx_host_t *ha) { struct ecore_dev *cdev; device_t dev = ha->pci_dev; int i; cdev = &ha->cdev; ecore_hw_stop(cdev); for (i = 0; i < ha->cdev.num_hwfns; i++) { if (ha->sp_handle[i]) (void)bus_teardown_intr(dev, ha->sp_irq[i], ha->sp_handle[i]); ha->sp_handle[i] = NULL; if (ha->sp_irq[i]) (void) bus_release_resource(dev, SYS_RES_IRQ, ha->sp_irq_rid[i], ha->sp_irq[i]); ha->sp_irq[i] = NULL; } ecore_resc_free(cdev); return 0; } static void qlnx_set_id(struct ecore_dev *cdev, char name[NAME_SIZE], char ver_str[VER_SIZE]) { int i; memcpy(cdev->name, name, NAME_SIZE); for_each_hwfn(cdev, i) { snprintf(cdev->hwfns[i].name, NAME_SIZE, "%s-%d", name, i); } cdev->drv_type = DRV_ID_DRV_TYPE_FREEBSD; return ; } void qlnx_get_protocol_stats(void *cdev, int proto_type, void *proto_stats) { enum ecore_mcp_protocol_type type; union ecore_mcp_protocol_stats *stats; struct ecore_eth_stats eth_stats; qlnx_host_t *ha; ha = cdev; stats = proto_stats; type = proto_type; switch (type) { case ECORE_MCP_LAN_STATS: ecore_get_vport_stats((struct ecore_dev *)cdev, ð_stats); stats->lan_stats.ucast_rx_pkts = eth_stats.common.rx_ucast_pkts; stats->lan_stats.ucast_tx_pkts = eth_stats.common.tx_ucast_pkts; stats->lan_stats.fcs_err = -1; break; default: ha->err_get_proto_invalid_type++; QL_DPRINT1(ha, "invalid protocol type 0x%x\n", type); break; } return; } static int qlnx_get_mfw_version(qlnx_host_t *ha, uint32_t *mfw_ver) { struct ecore_hwfn *p_hwfn; struct ecore_ptt *p_ptt; p_hwfn = &ha->cdev.hwfns[0]; p_ptt = ecore_ptt_acquire(p_hwfn); if (p_ptt == NULL) { QL_DPRINT1(ha, "ecore_ptt_acquire failed\n"); return (-1); } ecore_mcp_get_mfw_ver(p_hwfn, p_ptt, mfw_ver, NULL); ecore_ptt_release(p_hwfn, p_ptt); return (0); } static int qlnx_get_flash_size(qlnx_host_t *ha, uint32_t *flash_size) { struct ecore_hwfn *p_hwfn; struct ecore_ptt *p_ptt; p_hwfn = &ha->cdev.hwfns[0]; p_ptt = ecore_ptt_acquire(p_hwfn); if (p_ptt == NULL) { QL_DPRINT1(ha,"ecore_ptt_acquire failed\n"); return (-1); } ecore_mcp_get_flash_size(p_hwfn, p_ptt, flash_size); ecore_ptt_release(p_hwfn, p_ptt); return (0); } static int qlnx_alloc_mem_arrays(qlnx_host_t *ha) { struct ecore_dev *cdev; cdev = &ha->cdev; bzero(&ha->txq_array[0], (sizeof(struct qlnx_tx_queue) * QLNX_MAX_RSS)); bzero(&ha->rxq_array[0], (sizeof(struct qlnx_rx_queue) * QLNX_MAX_RSS)); bzero(&ha->sb_array[0], (sizeof(struct ecore_sb_info) * QLNX_MAX_RSS)); return 0; } static void qlnx_init_fp(qlnx_host_t *ha) { int rss_id, txq_array_index, tc; for (rss_id = 0; rss_id < ha->num_rss; rss_id++) { struct qlnx_fastpath *fp = &ha->fp_array[rss_id]; fp->rss_id = rss_id; fp->edev = ha; fp->sb_info = &ha->sb_array[rss_id]; fp->rxq = &ha->rxq_array[rss_id]; fp->rxq->rxq_id = rss_id; for (tc = 0; tc < ha->num_tc; tc++) { txq_array_index = tc * ha->num_rss + rss_id; fp->txq[tc] = &ha->txq_array[txq_array_index]; fp->txq[tc]->index = txq_array_index; } snprintf(fp->name, sizeof(fp->name), "%s-fp-%d", qlnx_name_str, rss_id); fp->tx_ring_full = 0; /* reset all the statistics counters */ fp->tx_pkts_processed = 0; fp->tx_pkts_freed = 0; fp->tx_pkts_transmitted = 0; fp->tx_pkts_completed = 0; #ifdef QLNX_TRACE_PERF_DATA fp->tx_pkts_trans_ctx = 0; fp->tx_pkts_compl_ctx = 0; fp->tx_pkts_trans_fp = 0; fp->tx_pkts_compl_fp = 0; fp->tx_pkts_compl_intr = 0; #endif fp->tx_lso_wnd_min_len = 0; fp->tx_defrag = 0; fp->tx_nsegs_gt_elem_left = 0; fp->tx_tso_max_nsegs = 0; fp->tx_tso_min_nsegs = 0; fp->err_tx_nsegs_gt_elem_left = 0; fp->err_tx_dmamap_create = 0; fp->err_tx_defrag_dmamap_load = 0; fp->err_tx_non_tso_max_seg = 0; fp->err_tx_dmamap_load = 0; fp->err_tx_defrag = 0; fp->err_tx_free_pkt_null = 0; fp->err_tx_cons_idx_conflict = 0; fp->rx_pkts = 0; fp->err_m_getcl = 0; fp->err_m_getjcl = 0; } return; } static void qlnx_free_mem_sb(qlnx_host_t *ha, struct ecore_sb_info *sb_info) { struct ecore_dev *cdev; cdev = &ha->cdev; if (sb_info->sb_virt) { OSAL_DMA_FREE_COHERENT(cdev, ((void *)sb_info->sb_virt), (sb_info->sb_phys), (sizeof(*sb_info->sb_virt))); sb_info->sb_virt = NULL; } } static int qlnx_sb_init(struct ecore_dev *cdev, struct ecore_sb_info *sb_info, void *sb_virt_addr, bus_addr_t sb_phy_addr, u16 sb_id) { struct ecore_hwfn *p_hwfn; int hwfn_index, rc; u16 rel_sb_id; hwfn_index = sb_id % cdev->num_hwfns; p_hwfn = &cdev->hwfns[hwfn_index]; rel_sb_id = sb_id / cdev->num_hwfns; QL_DPRINT2(((qlnx_host_t *)cdev), "hwfn_index = %d p_hwfn = %p sb_id = 0x%x rel_sb_id = 0x%x \ sb_info = %p sb_virt_addr = %p sb_phy_addr = %p\n", hwfn_index, p_hwfn, sb_id, rel_sb_id, sb_info, sb_virt_addr, (void *)sb_phy_addr); rc = ecore_int_sb_init(p_hwfn, p_hwfn->p_main_ptt, sb_info, sb_virt_addr, sb_phy_addr, rel_sb_id); return rc; } /* This function allocates fast-path status block memory */ static int qlnx_alloc_mem_sb(qlnx_host_t *ha, struct ecore_sb_info *sb_info, u16 sb_id) { struct status_block_e4 *sb_virt; bus_addr_t sb_phys; int rc; uint32_t size; struct ecore_dev *cdev; cdev = &ha->cdev; size = sizeof(*sb_virt); sb_virt = OSAL_DMA_ALLOC_COHERENT(cdev, (&sb_phys), size); if (!sb_virt) { QL_DPRINT1(ha, "Status block allocation failed\n"); return -ENOMEM; } rc = qlnx_sb_init(cdev, sb_info, sb_virt, sb_phys, sb_id); if (rc) { OSAL_DMA_FREE_COHERENT(cdev, sb_virt, sb_phys, size); } return rc; } static void qlnx_free_rx_buffers(qlnx_host_t *ha, struct qlnx_rx_queue *rxq) { int i; struct sw_rx_data *rx_buf; for (i = 0; i < rxq->num_rx_buffers; i++) { rx_buf = &rxq->sw_rx_ring[i]; if (rx_buf->data != NULL) { if (rx_buf->map != NULL) { bus_dmamap_unload(ha->rx_tag, rx_buf->map); bus_dmamap_destroy(ha->rx_tag, rx_buf->map); rx_buf->map = NULL; } m_freem(rx_buf->data); rx_buf->data = NULL; } } return; } static void qlnx_free_mem_rxq(qlnx_host_t *ha, struct qlnx_rx_queue *rxq) { struct ecore_dev *cdev; int i; cdev = &ha->cdev; qlnx_free_rx_buffers(ha, rxq); for (i = 0; i < ETH_TPA_MAX_AGGS_NUM; i++) { qlnx_free_tpa_mbuf(ha, &rxq->tpa_info[i]); if (rxq->tpa_info[i].mpf != NULL) m_freem(rxq->tpa_info[i].mpf); } bzero((void *)&rxq->sw_rx_ring[0], (sizeof (struct sw_rx_data) * RX_RING_SIZE)); /* Free the real RQ ring used by FW */ if (rxq->rx_bd_ring.p_virt_addr) { ecore_chain_free(cdev, &rxq->rx_bd_ring); rxq->rx_bd_ring.p_virt_addr = NULL; } /* Free the real completion ring used by FW */ if (rxq->rx_comp_ring.p_virt_addr && rxq->rx_comp_ring.pbl_sp.p_virt_table) { ecore_chain_free(cdev, &rxq->rx_comp_ring); rxq->rx_comp_ring.p_virt_addr = NULL; rxq->rx_comp_ring.pbl_sp.p_virt_table = NULL; } #ifdef QLNX_SOFT_LRO { struct lro_ctrl *lro; lro = &rxq->lro; tcp_lro_free(lro); } #endif /* #ifdef QLNX_SOFT_LRO */ return; } static int qlnx_alloc_rx_buffer(qlnx_host_t *ha, struct qlnx_rx_queue *rxq) { register struct mbuf *mp; uint16_t rx_buf_size; struct sw_rx_data *sw_rx_data; struct eth_rx_bd *rx_bd; dma_addr_t dma_addr; bus_dmamap_t map; bus_dma_segment_t segs[1]; int nsegs; int ret; struct ecore_dev *cdev; cdev = &ha->cdev; rx_buf_size = rxq->rx_buf_size; mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx_buf_size); if (mp == NULL) { QL_DPRINT1(ha, "Failed to allocate Rx data\n"); return -ENOMEM; } mp->m_len = mp->m_pkthdr.len = rx_buf_size; map = (bus_dmamap_t)0; ret = bus_dmamap_load_mbuf_sg(ha->rx_tag, map, mp, segs, &nsegs, BUS_DMA_NOWAIT); dma_addr = segs[0].ds_addr; if (ret || !dma_addr || (nsegs != 1)) { m_freem(mp); QL_DPRINT1(ha, "bus_dmamap_load failed[%d, 0x%016llx, %d]\n", ret, (long long unsigned int)dma_addr, nsegs); return -ENOMEM; } sw_rx_data = &rxq->sw_rx_ring[rxq->sw_rx_prod]; sw_rx_data->data = mp; sw_rx_data->dma_addr = dma_addr; sw_rx_data->map = map; /* Advance PROD and get BD pointer */ rx_bd = (struct eth_rx_bd *)ecore_chain_produce(&rxq->rx_bd_ring); rx_bd->addr.hi = htole32(U64_HI(dma_addr)); rx_bd->addr.lo = htole32(U64_LO(dma_addr)); bus_dmamap_sync(ha->rx_tag, map, BUS_DMASYNC_PREREAD); rxq->sw_rx_prod = (rxq->sw_rx_prod + 1) & (RX_RING_SIZE - 1); return 0; } static int qlnx_alloc_tpa_mbuf(qlnx_host_t *ha, uint16_t rx_buf_size, struct qlnx_agg_info *tpa) { struct mbuf *mp; dma_addr_t dma_addr; bus_dmamap_t map; bus_dma_segment_t segs[1]; int nsegs; int ret; struct sw_rx_data *rx_buf; mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx_buf_size); if (mp == NULL) { QL_DPRINT1(ha, "Failed to allocate Rx data\n"); return -ENOMEM; } mp->m_len = mp->m_pkthdr.len = rx_buf_size; map = (bus_dmamap_t)0; ret = bus_dmamap_load_mbuf_sg(ha->rx_tag, map, mp, segs, &nsegs, BUS_DMA_NOWAIT); dma_addr = segs[0].ds_addr; if (ret || !dma_addr || (nsegs != 1)) { m_freem(mp); QL_DPRINT1(ha, "bus_dmamap_load failed[%d, 0x%016llx, %d]\n", ret, (long long unsigned int)dma_addr, nsegs); return -ENOMEM; } rx_buf = &tpa->rx_buf; memset(rx_buf, 0, sizeof (struct sw_rx_data)); rx_buf->data = mp; rx_buf->dma_addr = dma_addr; rx_buf->map = map; bus_dmamap_sync(ha->rx_tag, map, BUS_DMASYNC_PREREAD); return (0); } static void qlnx_free_tpa_mbuf(qlnx_host_t *ha, struct qlnx_agg_info *tpa) { struct sw_rx_data *rx_buf; rx_buf = &tpa->rx_buf; if (rx_buf->data != NULL) { if (rx_buf->map != NULL) { bus_dmamap_unload(ha->rx_tag, rx_buf->map); bus_dmamap_destroy(ha->rx_tag, rx_buf->map); rx_buf->map = NULL; } m_freem(rx_buf->data); rx_buf->data = NULL; } return; } /* This function allocates all memory needed per Rx queue */ static int qlnx_alloc_mem_rxq(qlnx_host_t *ha, struct qlnx_rx_queue *rxq) { int i, rc, num_allocated; struct ifnet *ifp; struct ecore_dev *cdev; cdev = &ha->cdev; ifp = ha->ifp; rxq->num_rx_buffers = RX_RING_SIZE; rxq->rx_buf_size = ha->rx_buf_size; /* Allocate the parallel driver ring for Rx buffers */ bzero((void *)&rxq->sw_rx_ring[0], (sizeof (struct sw_rx_data) * RX_RING_SIZE)); /* Allocate FW Rx ring */ rc = ecore_chain_alloc(cdev, ECORE_CHAIN_USE_TO_CONSUME_PRODUCE, ECORE_CHAIN_MODE_NEXT_PTR, ECORE_CHAIN_CNT_TYPE_U16, RX_RING_SIZE, sizeof(struct eth_rx_bd), &rxq->rx_bd_ring, NULL); if (rc) goto err; /* Allocate FW completion ring */ rc = ecore_chain_alloc(cdev, ECORE_CHAIN_USE_TO_CONSUME, ECORE_CHAIN_MODE_PBL, ECORE_CHAIN_CNT_TYPE_U16, RX_RING_SIZE, sizeof(union eth_rx_cqe), &rxq->rx_comp_ring, NULL); if (rc) goto err; /* Allocate buffers for the Rx ring */ for (i = 0; i < ETH_TPA_MAX_AGGS_NUM; i++) { rc = qlnx_alloc_tpa_mbuf(ha, rxq->rx_buf_size, &rxq->tpa_info[i]); if (rc) break; } for (i = 0; i < rxq->num_rx_buffers; i++) { rc = qlnx_alloc_rx_buffer(ha, rxq); if (rc) break; } num_allocated = i; if (!num_allocated) { QL_DPRINT1(ha, "Rx buffers allocation failed\n"); goto err; } else if (num_allocated < rxq->num_rx_buffers) { QL_DPRINT1(ha, "Allocated less buffers than" " desired (%d allocated)\n", num_allocated); } #ifdef QLNX_SOFT_LRO { struct lro_ctrl *lro; lro = &rxq->lro; #if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO) if (tcp_lro_init_args(lro, ifp, 0, rxq->num_rx_buffers)) { QL_DPRINT1(ha, "tcp_lro_init[%d] failed\n", rxq->rxq_id); goto err; } #else if (tcp_lro_init(lro)) { QL_DPRINT1(ha, "tcp_lro_init[%d] failed\n", rxq->rxq_id); goto err; } #endif /* #if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO) */ lro->ifp = ha->ifp; } #endif /* #ifdef QLNX_SOFT_LRO */ return 0; err: qlnx_free_mem_rxq(ha, rxq); return -ENOMEM; } static void qlnx_free_mem_txq(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct qlnx_tx_queue *txq) { struct ecore_dev *cdev; cdev = &ha->cdev; bzero((void *)&txq->sw_tx_ring[0], (sizeof (struct sw_tx_bd) * TX_RING_SIZE)); /* Free the real RQ ring used by FW */ if (txq->tx_pbl.p_virt_addr) { ecore_chain_free(cdev, &txq->tx_pbl); txq->tx_pbl.p_virt_addr = NULL; } return; } /* This function allocates all memory needed per Tx queue */ static int qlnx_alloc_mem_txq(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct qlnx_tx_queue *txq) { int ret = ECORE_SUCCESS; union eth_tx_bd_types *p_virt; struct ecore_dev *cdev; cdev = &ha->cdev; bzero((void *)&txq->sw_tx_ring[0], (sizeof (struct sw_tx_bd) * TX_RING_SIZE)); /* Allocate the real Tx ring to be used by FW */ ret = ecore_chain_alloc(cdev, ECORE_CHAIN_USE_TO_CONSUME_PRODUCE, ECORE_CHAIN_MODE_PBL, ECORE_CHAIN_CNT_TYPE_U16, TX_RING_SIZE, sizeof(*p_virt), &txq->tx_pbl, NULL); if (ret != ECORE_SUCCESS) { goto err; } txq->num_tx_buffers = TX_RING_SIZE; return 0; err: qlnx_free_mem_txq(ha, fp, txq); return -ENOMEM; } static void qlnx_free_tx_br(qlnx_host_t *ha, struct qlnx_fastpath *fp) { struct mbuf *mp; struct ifnet *ifp = ha->ifp; if (mtx_initialized(&fp->tx_mtx)) { if (fp->tx_br != NULL) { mtx_lock(&fp->tx_mtx); while ((mp = drbr_dequeue(ifp, fp->tx_br)) != NULL) { fp->tx_pkts_freed++; m_freem(mp); } mtx_unlock(&fp->tx_mtx); buf_ring_free(fp->tx_br, M_DEVBUF); fp->tx_br = NULL; } mtx_destroy(&fp->tx_mtx); } return; } static void qlnx_free_mem_fp(qlnx_host_t *ha, struct qlnx_fastpath *fp) { int tc; qlnx_free_mem_sb(ha, fp->sb_info); qlnx_free_mem_rxq(ha, fp->rxq); for (tc = 0; tc < ha->num_tc; tc++) qlnx_free_mem_txq(ha, fp, fp->txq[tc]); return; } static int qlnx_alloc_tx_br(qlnx_host_t *ha, struct qlnx_fastpath *fp) { snprintf(fp->tx_mtx_name, sizeof(fp->tx_mtx_name), "qlnx%d_fp%d_tx_mq_lock", ha->dev_unit, fp->rss_id); mtx_init(&fp->tx_mtx, fp->tx_mtx_name, NULL, MTX_DEF); fp->tx_br = buf_ring_alloc(TX_RING_SIZE, M_DEVBUF, M_NOWAIT, &fp->tx_mtx); if (fp->tx_br == NULL) { QL_DPRINT1(ha, "buf_ring_alloc failed for fp[%d, %d]\n", ha->dev_unit, fp->rss_id); return -ENOMEM; } return 0; } static int qlnx_alloc_mem_fp(qlnx_host_t *ha, struct qlnx_fastpath *fp) { int rc, tc; rc = qlnx_alloc_mem_sb(ha, fp->sb_info, fp->rss_id); if (rc) goto err; if (ha->rx_jumbo_buf_eq_mtu) { if (ha->max_frame_size <= MCLBYTES) ha->rx_buf_size = MCLBYTES; else if (ha->max_frame_size <= MJUMPAGESIZE) ha->rx_buf_size = MJUMPAGESIZE; else if (ha->max_frame_size <= MJUM9BYTES) ha->rx_buf_size = MJUM9BYTES; else if (ha->max_frame_size <= MJUM16BYTES) ha->rx_buf_size = MJUM16BYTES; } else { if (ha->max_frame_size <= MCLBYTES) ha->rx_buf_size = MCLBYTES; else ha->rx_buf_size = MJUMPAGESIZE; } rc = qlnx_alloc_mem_rxq(ha, fp->rxq); if (rc) goto err; for (tc = 0; tc < ha->num_tc; tc++) { rc = qlnx_alloc_mem_txq(ha, fp, fp->txq[tc]); if (rc) goto err; } return 0; err: qlnx_free_mem_fp(ha, fp); return -ENOMEM; } static void qlnx_free_mem_load(qlnx_host_t *ha) { int i; struct ecore_dev *cdev; cdev = &ha->cdev; for (i = 0; i < ha->num_rss; i++) { struct qlnx_fastpath *fp = &ha->fp_array[i]; qlnx_free_mem_fp(ha, fp); } return; } static int qlnx_alloc_mem_load(qlnx_host_t *ha) { int rc = 0, rss_id; for (rss_id = 0; rss_id < ha->num_rss; rss_id++) { struct qlnx_fastpath *fp = &ha->fp_array[rss_id]; rc = qlnx_alloc_mem_fp(ha, fp); if (rc) break; } return (rc); } static int qlnx_start_vport(struct ecore_dev *cdev, u8 vport_id, u16 mtu, u8 drop_ttl0_flg, u8 inner_vlan_removal_en_flg, u8 tx_switching, u8 hw_lro_enable) { int rc, i; struct ecore_sp_vport_start_params vport_start_params = { 0 }; qlnx_host_t *ha; ha = (qlnx_host_t *)cdev; vport_start_params.remove_inner_vlan = inner_vlan_removal_en_flg; vport_start_params.tx_switching = 0; vport_start_params.handle_ptp_pkts = 0; vport_start_params.only_untagged = 0; vport_start_params.drop_ttl0 = drop_ttl0_flg; vport_start_params.tpa_mode = (hw_lro_enable ? ECORE_TPA_MODE_RSC : ECORE_TPA_MODE_NONE); vport_start_params.max_buffers_per_cqe = QLNX_TPA_MAX_AGG_BUFFERS; vport_start_params.vport_id = vport_id; vport_start_params.mtu = mtu; QL_DPRINT2(ha, "Setting mtu to %d and VPORT ID = %d\n", mtu, vport_id); for_each_hwfn(cdev, i) { struct ecore_hwfn *p_hwfn = &cdev->hwfns[i]; vport_start_params.concrete_fid = p_hwfn->hw_info.concrete_fid; vport_start_params.opaque_fid = p_hwfn->hw_info.opaque_fid; rc = ecore_sp_vport_start(p_hwfn, &vport_start_params); if (rc) { QL_DPRINT1(ha, "Failed to start VPORT V-PORT %d" " with MTU %d\n" , vport_id, mtu); return -ENOMEM; } ecore_hw_start_fastpath(p_hwfn); QL_DPRINT2(ha, "Started V-PORT %d with MTU %d\n", vport_id, mtu); } return 0; } static int qlnx_update_vport(struct ecore_dev *cdev, struct qlnx_update_vport_params *params) { struct ecore_sp_vport_update_params sp_params; int rc, i, j, fp_index; struct ecore_hwfn *p_hwfn; struct ecore_rss_params *rss; qlnx_host_t *ha = (qlnx_host_t *)cdev; struct qlnx_fastpath *fp; memset(&sp_params, 0, sizeof(sp_params)); /* Translate protocol params into sp params */ sp_params.vport_id = params->vport_id; sp_params.update_vport_active_rx_flg = params->update_vport_active_rx_flg; sp_params.vport_active_rx_flg = params->vport_active_rx_flg; sp_params.update_vport_active_tx_flg = params->update_vport_active_tx_flg; sp_params.vport_active_tx_flg = params->vport_active_tx_flg; sp_params.update_inner_vlan_removal_flg = params->update_inner_vlan_removal_flg; sp_params.inner_vlan_removal_flg = params->inner_vlan_removal_flg; sp_params.sge_tpa_params = params->sge_tpa_params; /* RSS - is a bit tricky, since upper-layer isn't familiar with hwfns. * We need to re-fix the rss values per engine for CMT. */ if (params->rss_params->update_rss_config) sp_params.rss_params = params->rss_params; else sp_params.rss_params = NULL; for_each_hwfn(cdev, i) { p_hwfn = &cdev->hwfns[i]; if ((cdev->num_hwfns > 1) && params->rss_params->update_rss_config && params->rss_params->rss_enable) { rss = params->rss_params; for (j = 0; j < ECORE_RSS_IND_TABLE_SIZE; j++) { fp_index = ((cdev->num_hwfns * j) + i) % ha->num_rss; fp = &ha->fp_array[fp_index]; rss->rss_ind_table[j] = fp->rxq->handle; } for (j = 0; j < ECORE_RSS_IND_TABLE_SIZE;) { QL_DPRINT3(ha, "%p %p %p %p %p %p %p %p \n", rss->rss_ind_table[j], rss->rss_ind_table[j+1], rss->rss_ind_table[j+2], rss->rss_ind_table[j+3], rss->rss_ind_table[j+4], rss->rss_ind_table[j+5], rss->rss_ind_table[j+6], rss->rss_ind_table[j+7]); j += 8; } } sp_params.opaque_fid = p_hwfn->hw_info.opaque_fid; QL_DPRINT1(ha, "Update sp vport ID=%d\n", params->vport_id); rc = ecore_sp_vport_update(p_hwfn, &sp_params, ECORE_SPQ_MODE_EBLOCK, NULL); if (rc) { QL_DPRINT1(ha, "Failed to update VPORT\n"); return rc; } QL_DPRINT2(ha, "Updated V-PORT %d: tx_active_flag %d, \ rx_active_flag %d [tx_update %d], [rx_update %d]\n", params->vport_id, params->vport_active_tx_flg, params->vport_active_rx_flg, params->update_vport_active_tx_flg, params->update_vport_active_rx_flg); } return 0; } static void qlnx_reuse_rx_data(struct qlnx_rx_queue *rxq) { struct eth_rx_bd *rx_bd_cons = ecore_chain_consume(&rxq->rx_bd_ring); struct eth_rx_bd *rx_bd_prod = ecore_chain_produce(&rxq->rx_bd_ring); struct sw_rx_data *sw_rx_data_cons = &rxq->sw_rx_ring[rxq->sw_rx_cons]; struct sw_rx_data *sw_rx_data_prod = &rxq->sw_rx_ring[rxq->sw_rx_prod]; sw_rx_data_prod->data = sw_rx_data_cons->data; memcpy(rx_bd_prod, rx_bd_cons, sizeof(struct eth_rx_bd)); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); rxq->sw_rx_prod = (rxq->sw_rx_prod + 1) & (RX_RING_SIZE - 1); return; } static void qlnx_update_rx_prod(struct ecore_hwfn *p_hwfn, struct qlnx_rx_queue *rxq) { uint16_t bd_prod; uint16_t cqe_prod; union { struct eth_rx_prod_data rx_prod_data; uint32_t data32; } rx_prods; bd_prod = ecore_chain_get_prod_idx(&rxq->rx_bd_ring); cqe_prod = ecore_chain_get_prod_idx(&rxq->rx_comp_ring); /* Update producers */ rx_prods.rx_prod_data.bd_prod = htole16(bd_prod); rx_prods.rx_prod_data.cqe_prod = htole16(cqe_prod); /* Make sure that the BD and SGE data is updated before updating the * producers since FW might read the BD/SGE right after the producer * is updated. */ wmb(); internal_ram_wr(p_hwfn, rxq->hw_rxq_prod_addr, sizeof(rx_prods), &rx_prods.data32); /* mmiowb is needed to synchronize doorbell writes from more than one * processor. It guarantees that the write arrives to the device before * the napi lock is released and another qlnx_poll is called (possibly * on another CPU). Without this barrier, the next doorbell can bypass * this doorbell. This is applicable to IA64/Altix systems. */ wmb(); return; } static uint32_t qlnx_hash_key[] = { ((0x6d << 24)|(0x5a << 16)|(0x56 << 8)|0xda), ((0x25 << 24)|(0x5b << 16)|(0x0e << 8)|0xc2), ((0x41 << 24)|(0x67 << 16)|(0x25 << 8)|0x3d), ((0x43 << 24)|(0xa3 << 16)|(0x8f << 8)|0xb0), ((0xd0 << 24)|(0xca << 16)|(0x2b << 8)|0xcb), ((0xae << 24)|(0x7b << 16)|(0x30 << 8)|0xb4), ((0x77 << 24)|(0xcb << 16)|(0x2d << 8)|0xa3), ((0x80 << 24)|(0x30 << 16)|(0xf2 << 8)|0x0c), ((0x6a << 24)|(0x42 << 16)|(0xb7 << 8)|0x3b), ((0xbe << 24)|(0xac << 16)|(0x01 << 8)|0xfa)}; static int qlnx_start_queues(qlnx_host_t *ha) { int rc, tc, i, vport_id = 0, drop_ttl0_flg = 1, vlan_removal_en = 1, tx_switching = 0, hw_lro_enable = 0; struct ecore_dev *cdev = &ha->cdev; struct ecore_rss_params *rss_params = &ha->rss_params; struct qlnx_update_vport_params vport_update_params; struct ifnet *ifp; struct ecore_hwfn *p_hwfn; struct ecore_sge_tpa_params tpa_params; struct ecore_queue_start_common_params qparams; struct qlnx_fastpath *fp; ifp = ha->ifp; QL_DPRINT1(ha, "Num RSS = %d\n", ha->num_rss); if (!ha->num_rss) { QL_DPRINT1(ha, "Cannot update V-VPORT as active as there" " are no Rx queues\n"); return -EINVAL; } #ifndef QLNX_SOFT_LRO hw_lro_enable = ifp->if_capenable & IFCAP_LRO; #endif /* #ifndef QLNX_SOFT_LRO */ rc = qlnx_start_vport(cdev, vport_id, ifp->if_mtu, drop_ttl0_flg, vlan_removal_en, tx_switching, hw_lro_enable); if (rc) { QL_DPRINT1(ha, "Start V-PORT failed %d\n", rc); return rc; } QL_DPRINT2(ha, "Start vport ramrod passed, " "vport_id = %d, MTU = %d, vlan_removal_en = %d\n", vport_id, (int)(ifp->if_mtu + 0xe), vlan_removal_en); for_each_rss(i) { struct ecore_rxq_start_ret_params rx_ret_params; struct ecore_txq_start_ret_params tx_ret_params; fp = &ha->fp_array[i]; p_hwfn = &cdev->hwfns[(fp->rss_id % cdev->num_hwfns)]; bzero(&qparams, sizeof(struct ecore_queue_start_common_params)); bzero(&rx_ret_params, sizeof (struct ecore_rxq_start_ret_params)); qparams.queue_id = i ; qparams.vport_id = vport_id; qparams.stats_id = vport_id; qparams.p_sb = fp->sb_info; qparams.sb_idx = RX_PI; rc = ecore_eth_rx_queue_start(p_hwfn, p_hwfn->hw_info.opaque_fid, &qparams, fp->rxq->rx_buf_size, /* bd_max_bytes */ /* bd_chain_phys_addr */ fp->rxq->rx_bd_ring.p_phys_addr, /* cqe_pbl_addr */ ecore_chain_get_pbl_phys(&fp->rxq->rx_comp_ring), /* cqe_pbl_size */ ecore_chain_get_page_cnt(&fp->rxq->rx_comp_ring), &rx_ret_params); if (rc) { QL_DPRINT1(ha, "Start RXQ #%d failed %d\n", i, rc); return rc; } fp->rxq->hw_rxq_prod_addr = rx_ret_params.p_prod; fp->rxq->handle = rx_ret_params.p_handle; fp->rxq->hw_cons_ptr = &fp->sb_info->sb_virt->pi_array[RX_PI]; qlnx_update_rx_prod(p_hwfn, fp->rxq); for (tc = 0; tc < ha->num_tc; tc++) { struct qlnx_tx_queue *txq = fp->txq[tc]; bzero(&qparams, sizeof(struct ecore_queue_start_common_params)); bzero(&tx_ret_params, sizeof (struct ecore_txq_start_ret_params)); qparams.queue_id = txq->index / cdev->num_hwfns ; qparams.vport_id = vport_id; qparams.stats_id = vport_id; qparams.p_sb = fp->sb_info; qparams.sb_idx = TX_PI(tc); rc = ecore_eth_tx_queue_start(p_hwfn, p_hwfn->hw_info.opaque_fid, &qparams, tc, /* bd_chain_phys_addr */ ecore_chain_get_pbl_phys(&txq->tx_pbl), ecore_chain_get_page_cnt(&txq->tx_pbl), &tx_ret_params); if (rc) { QL_DPRINT1(ha, "Start TXQ #%d failed %d\n", txq->index, rc); return rc; } txq->doorbell_addr = tx_ret_params.p_doorbell; txq->handle = tx_ret_params.p_handle; txq->hw_cons_ptr = &fp->sb_info->sb_virt->pi_array[TX_PI(tc)]; SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_DEST, DB_DEST_XCM); SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_AGG_CMD, DB_AGG_CMD_SET); SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_AGG_VAL_SEL, DQ_XCM_ETH_TX_BD_PROD_CMD); txq->tx_db.data.agg_flags = DQ_XCM_ETH_DQ_CF_CMD; } } /* Fill struct with RSS params */ if (ha->num_rss > 1) { rss_params->update_rss_config = 1; rss_params->rss_enable = 1; rss_params->update_rss_capabilities = 1; rss_params->update_rss_ind_table = 1; rss_params->update_rss_key = 1; rss_params->rss_caps = ECORE_RSS_IPV4 | ECORE_RSS_IPV6 | ECORE_RSS_IPV4_TCP | ECORE_RSS_IPV6_TCP; rss_params->rss_table_size_log = 7; /* 2^7 = 128 */ for (i = 0; i < ECORE_RSS_IND_TABLE_SIZE; i++) { fp = &ha->fp_array[(i % ha->num_rss)]; rss_params->rss_ind_table[i] = fp->rxq->handle; } for (i = 0; i < ECORE_RSS_KEY_SIZE; i++) rss_params->rss_key[i] = (__le32)qlnx_hash_key[i]; } else { memset(rss_params, 0, sizeof(*rss_params)); } /* Prepare and send the vport enable */ memset(&vport_update_params, 0, sizeof(vport_update_params)); vport_update_params.vport_id = vport_id; vport_update_params.update_vport_active_tx_flg = 1; vport_update_params.vport_active_tx_flg = 1; vport_update_params.update_vport_active_rx_flg = 1; vport_update_params.vport_active_rx_flg = 1; vport_update_params.rss_params = rss_params; vport_update_params.update_inner_vlan_removal_flg = 1; vport_update_params.inner_vlan_removal_flg = 1; if (hw_lro_enable) { memset(&tpa_params, 0, sizeof (struct ecore_sge_tpa_params)); tpa_params.max_buffers_per_cqe = QLNX_TPA_MAX_AGG_BUFFERS; tpa_params.update_tpa_en_flg = 1; tpa_params.tpa_ipv4_en_flg = 1; tpa_params.tpa_ipv6_en_flg = 1; tpa_params.update_tpa_param_flg = 1; tpa_params.tpa_pkt_split_flg = 0; tpa_params.tpa_hdr_data_split_flg = 0; tpa_params.tpa_gro_consistent_flg = 0; tpa_params.tpa_max_aggs_num = ETH_TPA_MAX_AGGS_NUM; tpa_params.tpa_max_size = (uint16_t)(-1); tpa_params.tpa_min_size_to_start = ifp->if_mtu/2; tpa_params.tpa_min_size_to_cont = ifp->if_mtu/2; vport_update_params.sge_tpa_params = &tpa_params; } rc = qlnx_update_vport(cdev, &vport_update_params); if (rc) { QL_DPRINT1(ha, "Update V-PORT failed %d\n", rc); return rc; } return 0; } static int qlnx_drain_txq(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct qlnx_tx_queue *txq) { uint16_t hw_bd_cons; uint16_t ecore_cons_idx; QL_DPRINT2(ha, "enter\n"); hw_bd_cons = le16toh(*txq->hw_cons_ptr); while (hw_bd_cons != (ecore_cons_idx = ecore_chain_get_cons_idx(&txq->tx_pbl))) { mtx_lock(&fp->tx_mtx); (void)qlnx_tx_int(ha, fp, txq); mtx_unlock(&fp->tx_mtx); qlnx_mdelay(__func__, 2); hw_bd_cons = le16toh(*txq->hw_cons_ptr); } QL_DPRINT2(ha, "[%d, %d]: done\n", fp->rss_id, txq->index); return 0; } static int qlnx_stop_queues(qlnx_host_t *ha) { struct qlnx_update_vport_params vport_update_params; struct ecore_dev *cdev; struct qlnx_fastpath *fp; int rc, tc, i; cdev = &ha->cdev; /* Disable the vport */ memset(&vport_update_params, 0, sizeof(vport_update_params)); vport_update_params.vport_id = 0; vport_update_params.update_vport_active_tx_flg = 1; vport_update_params.vport_active_tx_flg = 0; vport_update_params.update_vport_active_rx_flg = 1; vport_update_params.vport_active_rx_flg = 0; vport_update_params.rss_params = &ha->rss_params; vport_update_params.rss_params->update_rss_config = 0; vport_update_params.rss_params->rss_enable = 0; vport_update_params.update_inner_vlan_removal_flg = 0; vport_update_params.inner_vlan_removal_flg = 0; QL_DPRINT1(ha, "Update vport ID= %d\n", vport_update_params.vport_id); rc = qlnx_update_vport(cdev, &vport_update_params); if (rc) { QL_DPRINT1(ha, "Failed to update vport\n"); return rc; } /* Flush Tx queues. If needed, request drain from MCP */ for_each_rss(i) { fp = &ha->fp_array[i]; for (tc = 0; tc < ha->num_tc; tc++) { struct qlnx_tx_queue *txq = fp->txq[tc]; rc = qlnx_drain_txq(ha, fp, txq); if (rc) return rc; } } /* Stop all Queues in reverse order*/ for (i = ha->num_rss - 1; i >= 0; i--) { struct ecore_hwfn *p_hwfn = &cdev->hwfns[(i % cdev->num_hwfns)]; fp = &ha->fp_array[i]; /* Stop the Tx Queue(s)*/ for (tc = 0; tc < ha->num_tc; tc++) { int tx_queue_id; tx_queue_id = tc * ha->num_rss + i; rc = ecore_eth_tx_queue_stop(p_hwfn, fp->txq[tc]->handle); if (rc) { QL_DPRINT1(ha, "Failed to stop TXQ #%d\n", tx_queue_id); return rc; } } /* Stop the Rx Queue*/ rc = ecore_eth_rx_queue_stop(p_hwfn, fp->rxq->handle, false, false); if (rc) { QL_DPRINT1(ha, "Failed to stop RXQ #%d\n", i); return rc; } } /* Stop the vport */ for_each_hwfn(cdev, i) { struct ecore_hwfn *p_hwfn = &cdev->hwfns[i]; rc = ecore_sp_vport_stop(p_hwfn, p_hwfn->hw_info.opaque_fid, 0); if (rc) { QL_DPRINT1(ha, "Failed to stop VPORT\n"); return rc; } } return rc; } static int qlnx_set_ucast_rx_mac(qlnx_host_t *ha, enum ecore_filter_opcode opcode, unsigned char mac[ETH_ALEN]) { struct ecore_filter_ucast ucast; struct ecore_dev *cdev; int rc; cdev = &ha->cdev; bzero(&ucast, sizeof(struct ecore_filter_ucast)); ucast.opcode = opcode; ucast.type = ECORE_FILTER_MAC; ucast.is_rx_filter = 1; ucast.vport_to_add_to = 0; memcpy(&ucast.mac[0], mac, ETH_ALEN); rc = ecore_filter_ucast_cmd(cdev, &ucast, ECORE_SPQ_MODE_CB, NULL); return (rc); } static int qlnx_remove_all_ucast_mac(qlnx_host_t *ha) { struct ecore_filter_ucast ucast; struct ecore_dev *cdev; int rc; bzero(&ucast, sizeof(struct ecore_filter_ucast)); ucast.opcode = ECORE_FILTER_REPLACE; ucast.type = ECORE_FILTER_MAC; ucast.is_rx_filter = 1; cdev = &ha->cdev; rc = ecore_filter_ucast_cmd(cdev, &ucast, ECORE_SPQ_MODE_CB, NULL); return (rc); } static int qlnx_remove_all_mcast_mac(qlnx_host_t *ha) { struct ecore_filter_mcast *mcast; struct ecore_dev *cdev; int rc, i; cdev = &ha->cdev; mcast = &ha->ecore_mcast; bzero(mcast, sizeof(struct ecore_filter_mcast)); mcast->opcode = ECORE_FILTER_REMOVE; for (i = 0; i < QLNX_MAX_NUM_MULTICAST_ADDRS; i++) { if (ha->mcast[i].addr[0] || ha->mcast[i].addr[1] || ha->mcast[i].addr[2] || ha->mcast[i].addr[3] || ha->mcast[i].addr[4] || ha->mcast[i].addr[5]) { memcpy(&mcast->mac[i], &ha->mcast[i].addr[0], ETH_ALEN); mcast->num_mc_addrs++; } } mcast = &ha->ecore_mcast; rc = ecore_filter_mcast_cmd(cdev, mcast, ECORE_SPQ_MODE_CB, NULL); bzero(ha->mcast, (sizeof(qlnx_mcast_t) * QLNX_MAX_NUM_MULTICAST_ADDRS)); ha->nmcast = 0; return (rc); } static int qlnx_clean_filters(qlnx_host_t *ha) { int rc = 0; /* Remove all unicast macs */ rc = qlnx_remove_all_ucast_mac(ha); if (rc) return rc; /* Remove all multicast macs */ rc = qlnx_remove_all_mcast_mac(ha); if (rc) return rc; rc = qlnx_set_ucast_rx_mac(ha, ECORE_FILTER_FLUSH, ha->primary_mac); return (rc); } static int qlnx_set_rx_accept_filter(qlnx_host_t *ha, uint8_t filter) { struct ecore_filter_accept_flags accept; int rc = 0; struct ecore_dev *cdev; cdev = &ha->cdev; bzero(&accept, sizeof(struct ecore_filter_accept_flags)); accept.update_rx_mode_config = 1; accept.rx_accept_filter = filter; accept.update_tx_mode_config = 1; accept.tx_accept_filter = ECORE_ACCEPT_UCAST_MATCHED | ECORE_ACCEPT_MCAST_MATCHED | ECORE_ACCEPT_BCAST; rc = ecore_filter_accept_cmd(cdev, 0, accept, false, false, ECORE_SPQ_MODE_CB, NULL); return (rc); } static int qlnx_set_rx_mode(qlnx_host_t *ha) { int rc = 0; uint8_t filter; rc = qlnx_set_ucast_rx_mac(ha, ECORE_FILTER_REPLACE, ha->primary_mac); if (rc) return rc; rc = qlnx_remove_all_mcast_mac(ha); if (rc) return rc; filter = ECORE_ACCEPT_UCAST_MATCHED | ECORE_ACCEPT_MCAST_MATCHED | ECORE_ACCEPT_BCAST; ha->filter = filter; rc = qlnx_set_rx_accept_filter(ha, filter); return (rc); } static int qlnx_set_link(qlnx_host_t *ha, bool link_up) { int i, rc = 0; struct ecore_dev *cdev; struct ecore_hwfn *hwfn; struct ecore_ptt *ptt; cdev = &ha->cdev; for_each_hwfn(cdev, i) { hwfn = &cdev->hwfns[i]; ptt = ecore_ptt_acquire(hwfn); if (!ptt) return -EBUSY; rc = ecore_mcp_set_link(hwfn, ptt, link_up); ecore_ptt_release(hwfn, ptt); if (rc) return rc; } return (rc); } #if __FreeBSD_version >= 1100000 static uint64_t qlnx_get_counter(if_t ifp, ift_counter cnt) { qlnx_host_t *ha; uint64_t count; ha = (qlnx_host_t *)if_getsoftc(ifp); switch (cnt) { case IFCOUNTER_IPACKETS: count = ha->hw_stats.common.rx_ucast_pkts + ha->hw_stats.common.rx_mcast_pkts + ha->hw_stats.common.rx_bcast_pkts; break; case IFCOUNTER_IERRORS: count = ha->hw_stats.common.rx_crc_errors + ha->hw_stats.common.rx_align_errors + ha->hw_stats.common.rx_oversize_packets + ha->hw_stats.common.rx_undersize_packets; break; case IFCOUNTER_OPACKETS: count = ha->hw_stats.common.tx_ucast_pkts + ha->hw_stats.common.tx_mcast_pkts + ha->hw_stats.common.tx_bcast_pkts; break; case IFCOUNTER_OERRORS: count = ha->hw_stats.common.tx_err_drop_pkts; break; case IFCOUNTER_COLLISIONS: return (0); case IFCOUNTER_IBYTES: count = ha->hw_stats.common.rx_ucast_bytes + ha->hw_stats.common.rx_mcast_bytes + ha->hw_stats.common.rx_bcast_bytes; break; case IFCOUNTER_OBYTES: count = ha->hw_stats.common.tx_ucast_bytes + ha->hw_stats.common.tx_mcast_bytes + ha->hw_stats.common.tx_bcast_bytes; break; case IFCOUNTER_IMCASTS: count = ha->hw_stats.common.rx_mcast_bytes; break; case IFCOUNTER_OMCASTS: count = ha->hw_stats.common.tx_mcast_bytes; break; case IFCOUNTER_IQDROPS: case IFCOUNTER_OQDROPS: case IFCOUNTER_NOPROTO: default: return (if_get_counter_default(ifp, cnt)); } return (count); } #endif static void qlnx_timer(void *arg) { qlnx_host_t *ha; ha = (qlnx_host_t *)arg; ecore_get_vport_stats(&ha->cdev, &ha->hw_stats); if (ha->storm_stats_gather) qlnx_sample_storm_stats(ha); callout_reset(&ha->qlnx_callout, hz, qlnx_timer, ha); return; } static int qlnx_load(qlnx_host_t *ha) { int i; int rc = 0; struct ecore_dev *cdev; device_t dev; cdev = &ha->cdev; dev = ha->pci_dev; QL_DPRINT2(ha, "enter\n"); rc = qlnx_alloc_mem_arrays(ha); if (rc) goto qlnx_load_exit0; qlnx_init_fp(ha); rc = qlnx_alloc_mem_load(ha); if (rc) goto qlnx_load_exit1; QL_DPRINT2(ha, "Allocated %d RSS queues on %d TC/s\n", ha->num_rss, ha->num_tc); for (i = 0; i < ha->num_rss; i++) { if ((rc = bus_setup_intr(dev, ha->irq_vec[i].irq, (INTR_TYPE_NET | INTR_MPSAFE), NULL, qlnx_fp_isr, &ha->irq_vec[i], &ha->irq_vec[i].handle))) { QL_DPRINT1(ha, "could not setup interrupt\n"); goto qlnx_load_exit2; } QL_DPRINT2(ha, "rss_id = %d irq_rid %d \ irq %p handle %p\n", i, ha->irq_vec[i].irq_rid, ha->irq_vec[i].irq, ha->irq_vec[i].handle); bus_bind_intr(dev, ha->irq_vec[i].irq, (i % mp_ncpus)); } rc = qlnx_start_queues(ha); if (rc) goto qlnx_load_exit2; QL_DPRINT2(ha, "Start VPORT, RXQ and TXQ succeeded\n"); /* Add primary mac and set Rx filters */ rc = qlnx_set_rx_mode(ha); if (rc) goto qlnx_load_exit2; /* Ask for link-up using current configuration */ qlnx_set_link(ha, true); ha->state = QLNX_STATE_OPEN; bzero(&ha->hw_stats, sizeof(struct ecore_eth_stats)); if (ha->flags.callout_init) callout_reset(&ha->qlnx_callout, hz, qlnx_timer, ha); goto qlnx_load_exit0; qlnx_load_exit2: qlnx_free_mem_load(ha); qlnx_load_exit1: ha->num_rss = 0; qlnx_load_exit0: QL_DPRINT2(ha, "exit [%d]\n", rc); return rc; } static void qlnx_drain_soft_lro(qlnx_host_t *ha) { #ifdef QLNX_SOFT_LRO struct ifnet *ifp; int i; ifp = ha->ifp; if (ifp->if_capenable & IFCAP_LRO) { for (i = 0; i < ha->num_rss; i++) { struct qlnx_fastpath *fp = &ha->fp_array[i]; struct lro_ctrl *lro; lro = &fp->rxq->lro; #if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO) tcp_lro_flush_all(lro); #else struct lro_entry *queued; while ((!SLIST_EMPTY(&lro->lro_active))){ queued = SLIST_FIRST(&lro->lro_active); SLIST_REMOVE_HEAD(&lro->lro_active, next); tcp_lro_flush(lro, queued); } #endif /* #if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO) */ } } #endif /* #ifdef QLNX_SOFT_LRO */ return; } static void qlnx_unload(qlnx_host_t *ha) { struct ecore_dev *cdev; device_t dev; int i; cdev = &ha->cdev; dev = ha->pci_dev; QL_DPRINT2(ha, "enter\n"); QL_DPRINT1(ha, " QLNX STATE = %d\n",ha->state); if (ha->state == QLNX_STATE_OPEN) { qlnx_set_link(ha, false); qlnx_clean_filters(ha); qlnx_stop_queues(ha); ecore_hw_stop_fastpath(cdev); for (i = 0; i < ha->num_rss; i++) { if (ha->irq_vec[i].handle) { (void)bus_teardown_intr(dev, ha->irq_vec[i].irq, ha->irq_vec[i].handle); ha->irq_vec[i].handle = NULL; } } qlnx_drain_fp_taskqueues(ha); qlnx_drain_soft_lro(ha); qlnx_free_mem_load(ha); } if (ha->flags.callout_init) callout_drain(&ha->qlnx_callout); qlnx_mdelay(__func__, 1000); ha->state = QLNX_STATE_CLOSED; QL_DPRINT2(ha, "exit\n"); return; } static int qlnx_grc_dumpsize(qlnx_host_t *ha, uint32_t *num_dwords, int hwfn_index) { int rval = -1; struct ecore_hwfn *p_hwfn; struct ecore_ptt *p_ptt; ecore_dbg_set_app_ver(ecore_dbg_get_fw_func_ver()); p_hwfn = &ha->cdev.hwfns[hwfn_index]; p_ptt = ecore_ptt_acquire(p_hwfn); if (!p_ptt) { QL_DPRINT1(ha, "ecore_ptt_acquire failed\n"); return (rval); } rval = ecore_dbg_grc_get_dump_buf_size(p_hwfn, p_ptt, num_dwords); if (rval == DBG_STATUS_OK) rval = 0; else { QL_DPRINT1(ha, "ecore_dbg_grc_get_dump_buf_size failed" "[0x%x]\n", rval); } ecore_ptt_release(p_hwfn, p_ptt); return (rval); } static int qlnx_idle_chk_size(qlnx_host_t *ha, uint32_t *num_dwords, int hwfn_index) { int rval = -1; struct ecore_hwfn *p_hwfn; struct ecore_ptt *p_ptt; ecore_dbg_set_app_ver(ecore_dbg_get_fw_func_ver()); p_hwfn = &ha->cdev.hwfns[hwfn_index]; p_ptt = ecore_ptt_acquire(p_hwfn); if (!p_ptt) { QL_DPRINT1(ha, "ecore_ptt_acquire failed\n"); return (rval); } rval = ecore_dbg_idle_chk_get_dump_buf_size(p_hwfn, p_ptt, num_dwords); if (rval == DBG_STATUS_OK) rval = 0; else { QL_DPRINT1(ha, "ecore_dbg_idle_chk_get_dump_buf_size failed" " [0x%x]\n", rval); } ecore_ptt_release(p_hwfn, p_ptt); return (rval); } static void qlnx_sample_storm_stats(qlnx_host_t *ha) { int i, index; struct ecore_dev *cdev; qlnx_storm_stats_t *s_stats; uint32_t reg; struct ecore_ptt *p_ptt; struct ecore_hwfn *hwfn; if (ha->storm_stats_index >= QLNX_STORM_STATS_SAMPLES_PER_HWFN) { ha->storm_stats_gather = 0; return; } cdev = &ha->cdev; for_each_hwfn(cdev, i) { hwfn = &cdev->hwfns[i]; p_ptt = ecore_ptt_acquire(hwfn); if (!p_ptt) return; index = ha->storm_stats_index + (i * QLNX_STORM_STATS_SAMPLES_PER_HWFN); s_stats = &ha->storm_stats[index]; /* XSTORM */ reg = XSEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_ACTIVE_CYCLES_BB_K2; s_stats->xstorm_active_cycles = ecore_rd(hwfn, p_ptt, reg); reg = XSEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_STALL_CYCLES_BB_K2; s_stats->xstorm_stall_cycles = ecore_rd(hwfn, p_ptt, reg); reg = XSEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_SLEEPING_CYCLES_BB_K2; s_stats->xstorm_sleeping_cycles = ecore_rd(hwfn, p_ptt, reg); reg = XSEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_INACTIVE_CYCLES_BB_K2; s_stats->xstorm_inactive_cycles = ecore_rd(hwfn, p_ptt, reg); /* YSTORM */ reg = YSEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_ACTIVE_CYCLES_BB_K2; s_stats->ystorm_active_cycles = ecore_rd(hwfn, p_ptt, reg); reg = YSEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_STALL_CYCLES_BB_K2; s_stats->ystorm_stall_cycles = ecore_rd(hwfn, p_ptt, reg); reg = YSEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_SLEEPING_CYCLES_BB_K2; s_stats->ystorm_sleeping_cycles = ecore_rd(hwfn, p_ptt, reg); reg = YSEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_INACTIVE_CYCLES_BB_K2; s_stats->ystorm_inactive_cycles = ecore_rd(hwfn, p_ptt, reg); /* PSTORM */ reg = PSEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_ACTIVE_CYCLES_BB_K2; s_stats->pstorm_active_cycles = ecore_rd(hwfn, p_ptt, reg); reg = PSEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_STALL_CYCLES_BB_K2; s_stats->pstorm_stall_cycles = ecore_rd(hwfn, p_ptt, reg); reg = PSEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_SLEEPING_CYCLES_BB_K2; s_stats->pstorm_sleeping_cycles = ecore_rd(hwfn, p_ptt, reg); reg = PSEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_INACTIVE_CYCLES_BB_K2; s_stats->pstorm_inactive_cycles = ecore_rd(hwfn, p_ptt, reg); /* TSTORM */ reg = TSEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_ACTIVE_CYCLES_BB_K2; s_stats->tstorm_active_cycles = ecore_rd(hwfn, p_ptt, reg); reg = TSEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_STALL_CYCLES_BB_K2; s_stats->tstorm_stall_cycles = ecore_rd(hwfn, p_ptt, reg); reg = TSEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_SLEEPING_CYCLES_BB_K2; s_stats->tstorm_sleeping_cycles = ecore_rd(hwfn, p_ptt, reg); reg = TSEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_INACTIVE_CYCLES_BB_K2; s_stats->tstorm_inactive_cycles = ecore_rd(hwfn, p_ptt, reg); /* MSTORM */ reg = MSEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_ACTIVE_CYCLES_BB_K2; s_stats->mstorm_active_cycles = ecore_rd(hwfn, p_ptt, reg); reg = MSEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_STALL_CYCLES_BB_K2; s_stats->mstorm_stall_cycles = ecore_rd(hwfn, p_ptt, reg); reg = MSEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_SLEEPING_CYCLES_BB_K2; s_stats->mstorm_sleeping_cycles = ecore_rd(hwfn, p_ptt, reg); reg = MSEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_INACTIVE_CYCLES_BB_K2; s_stats->mstorm_inactive_cycles = ecore_rd(hwfn, p_ptt, reg); /* USTORM */ reg = USEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_ACTIVE_CYCLES_BB_K2; s_stats->ustorm_active_cycles = ecore_rd(hwfn, p_ptt, reg); reg = USEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_STALL_CYCLES_BB_K2; s_stats->ustorm_stall_cycles = ecore_rd(hwfn, p_ptt, reg); reg = USEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_SLEEPING_CYCLES_BB_K2; s_stats->ustorm_sleeping_cycles = ecore_rd(hwfn, p_ptt, reg); reg = USEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_INACTIVE_CYCLES_BB_K2; s_stats->ustorm_inactive_cycles = ecore_rd(hwfn, p_ptt, reg); ecore_ptt_release(hwfn, p_ptt); } ha->storm_stats_index++; return; } /* * Name: qlnx_dump_buf8 * Function: dumps a buffer as bytes */ static void qlnx_dump_buf8(qlnx_host_t *ha, const char *msg, void *dbuf, uint32_t len) { device_t dev; uint32_t i = 0; uint8_t *buf; dev = ha->pci_dev; buf = dbuf; device_printf(dev, "%s: %s 0x%x dump start\n", __func__, msg, len); while (len >= 16) { device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x %02x %02x %02x" " %02x %02x %02x %02x %02x %02x %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], buf[8], buf[9], buf[10], buf[11], buf[12], buf[13], buf[14], buf[15]); i += 16; len -= 16; buf += 16; } switch (len) { case 1: device_printf(dev,"0x%08x: %02x\n", i, buf[0]); break; case 2: device_printf(dev,"0x%08x: %02x %02x\n", i, buf[0], buf[1]); break; case 3: device_printf(dev,"0x%08x: %02x %02x %02x\n", i, buf[0], buf[1], buf[2]); break; case 4: device_printf(dev,"0x%08x: %02x %02x %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3]); break; case 5: device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4]); break; case 6: device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); break; case 7: device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6]); break; case 8: device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x %02x %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); break; case 9: device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x %02x %02x %02x" " %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], buf[8]); break; case 10: device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x %02x %02x %02x" " %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], buf[8], buf[9]); break; case 11: device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x %02x %02x %02x" " %02x %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], buf[8], buf[9], buf[10]); break; case 12: device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x %02x %02x %02x" " %02x %02x %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], buf[8], buf[9], buf[10], buf[11]); break; case 13: device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x %02x %02x %02x" " %02x %02x %02x %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], buf[8], buf[9], buf[10], buf[11], buf[12]); break; case 14: device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x %02x %02x %02x" " %02x %02x %02x %02x %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], buf[8], buf[9], buf[10], buf[11], buf[12], buf[13]); break; case 15: device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x %02x %02x %02x" " %02x %02x %02x %02x %02x %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], buf[8], buf[9], buf[10], buf[11], buf[12], buf[13], buf[14]); break; default: break; } device_printf(dev, "%s: %s dump end\n", __func__, msg); return; } Index: head/sys/dev/sbni/if_sbni.c =================================================================== --- head/sys/dev/sbni/if_sbni.c (revision 331796) +++ head/sys/dev/sbni/if_sbni.c (revision 331797) @@ -1,1277 +1,1277 @@ /*- * Copyright (c) 1997-2001 Granch, Ltd. All rights reserved. * Author: Denis I.Timofeev * * Redistributon and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Device driver for Granch SBNI12 leased line adapters * * Revision 2.0.0 1997/08/06 * Initial revision by Alexey Zverev * * Revision 2.0.1 1997/08/11 * Additional internal statistics support (tx statistics) * * Revision 2.0.2 1997/11/05 * if_bpf bug has been fixed * * Revision 2.0.3 1998/12/20 * Memory leakage has been eliminated in * the sbni_st and sbni_timeout routines. * * Revision 3.0 2000/08/10 by Yaroslav Polyakov * Support for PCI cards. 4.1 modification. * * Revision 3.1 2000/09/12 * Removed extra #defines around bpf functions * * Revision 4.0 2000/11/23 by Denis Timofeev * Completely redesigned the buffer management * * Revision 4.1 2001/01/21 * Support for PCI Dual cards and new SBNI12D-10, -11 Dual/ISA cards * * Written with reference to NE2000 driver developed by David Greenman. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void sbni_init(void *); static void sbni_init_locked(struct sbni_softc *); static void sbni_start(struct ifnet *); static void sbni_start_locked(struct ifnet *); static int sbni_ioctl(struct ifnet *, u_long, caddr_t); static void sbni_stop(struct sbni_softc *); static void handle_channel(struct sbni_softc *); static void card_start(struct sbni_softc *); static int recv_frame(struct sbni_softc *); static void send_frame(struct sbni_softc *); static int upload_data(struct sbni_softc *, u_int, u_int, u_int, u_int32_t); static int skip_tail(struct sbni_softc *, u_int, u_int32_t); static void interpret_ack(struct sbni_softc *, u_int); static void download_data(struct sbni_softc *, u_int32_t *); static void prepare_to_send(struct sbni_softc *); static void drop_xmit_queue(struct sbni_softc *); static int get_rx_buf(struct sbni_softc *); static void indicate_pkt(struct sbni_softc *); static void change_level(struct sbni_softc *); static int check_fhdr(struct sbni_softc *, u_int *, u_int *, u_int *, u_int *, u_int32_t *); static int append_frame_to_pkt(struct sbni_softc *, u_int, u_int32_t); static void timeout_change_level(struct sbni_softc *); static void send_frame_header(struct sbni_softc *, u_int32_t *); static void set_initial_values(struct sbni_softc *, struct sbni_flags); static u_int32_t calc_crc32(u_int32_t, caddr_t, u_int); static timeout_t sbni_timeout; static __inline u_char sbni_inb(struct sbni_softc *, enum sbni_reg); static __inline void sbni_outb(struct sbni_softc *, enum sbni_reg, u_char); static __inline void sbni_insb(struct sbni_softc *, u_char *, u_int); static __inline void sbni_outsb(struct sbni_softc *, u_char *, u_int); static u_int32_t crc32tab[]; #ifdef SBNI_DUAL_COMPOUND static struct mtx headlist_lock; MTX_SYSINIT(headlist_lock, &headlist_lock, "sbni headlist", MTX_DEF); static struct sbni_softc *sbni_headlist; #endif /* -------------------------------------------------------------------------- */ static __inline u_char sbni_inb(struct sbni_softc *sc, enum sbni_reg reg) { return bus_space_read_1( rman_get_bustag(sc->io_res), rman_get_bushandle(sc->io_res), sc->io_off + reg); } static __inline void sbni_outb(struct sbni_softc *sc, enum sbni_reg reg, u_char value) { bus_space_write_1( rman_get_bustag(sc->io_res), rman_get_bushandle(sc->io_res), sc->io_off + reg, value); } static __inline void sbni_insb(struct sbni_softc *sc, u_char *to, u_int len) { bus_space_read_multi_1( rman_get_bustag(sc->io_res), rman_get_bushandle(sc->io_res), sc->io_off + DAT, to, len); } static __inline void sbni_outsb(struct sbni_softc *sc, u_char *from, u_int len) { bus_space_write_multi_1( rman_get_bustag(sc->io_res), rman_get_bushandle(sc->io_res), sc->io_off + DAT, from, len); } /* Valid combinations in CSR0 (for probing): VALID_DECODER 0000,0011,1011,1010 ; 0 ; - TR_REQ ; 1 ; + TR_RDY ; 2 ; - TR_RDY TR_REQ ; 3 ; + BU_EMP ; 4 ; + BU_EMP TR_REQ ; 5 ; + BU_EMP TR_RDY ; 6 ; - BU_EMP TR_RDY TR_REQ ; 7 ; + RC_RDY ; 8 ; + RC_RDY TR_REQ ; 9 ; + RC_RDY TR_RDY ; 10 ; - RC_RDY TR_RDY TR_REQ ; 11 ; - RC_RDY BU_EMP ; 12 ; - RC_RDY BU_EMP TR_REQ ; 13 ; - RC_RDY BU_EMP TR_RDY ; 14 ; - RC_RDY BU_EMP TR_RDY TR_REQ ; 15 ; - */ #define VALID_DECODER (2 + 8 + 0x10 + 0x20 + 0x80 + 0x100 + 0x200) int sbni_probe(struct sbni_softc *sc) { u_char csr0; csr0 = sbni_inb(sc, CSR0); if (csr0 != 0xff && csr0 != 0x00) { csr0 &= ~EN_INT; if (csr0 & BU_EMP) csr0 |= EN_INT; if (VALID_DECODER & (1 << (csr0 >> 4))) return (0); } return (ENXIO); } /* * Install interface into kernel networking data structures */ int sbni_attach(struct sbni_softc *sc, int unit, struct sbni_flags flags) { struct ifnet *ifp; u_char csr0; ifp = sc->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) return (ENOMEM); sbni_outb(sc, CSR0, 0); set_initial_values(sc, flags); /* Initialize ifnet structure */ ifp->if_softc = sc; if_initname(ifp, "sbni", unit); ifp->if_init = sbni_init; ifp->if_start = sbni_start; ifp->if_ioctl = sbni_ioctl; IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); /* report real baud rate */ csr0 = sbni_inb(sc, CSR0); ifp->if_baudrate = (csr0 & 0x01 ? 500000 : 2000000) / (1 << flags.rate); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; mtx_init(&sc->lock, ifp->if_xname, MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->wch, &sc->lock, 0); ether_ifattach(ifp, sc->enaddr); /* device attach does transition from UNCONFIGURED to IDLE state */ if_printf(ifp, "speed %ju, rxl ", (uintmax_t)ifp->if_baudrate); if (sc->delta_rxl) printf("auto\n"); else printf("%d (fixed)\n", sc->cur_rxl_index); return (0); } void sbni_detach(struct sbni_softc *sc) { SBNI_LOCK(sc); sbni_stop(sc); SBNI_UNLOCK(sc); callout_drain(&sc->wch); ether_ifdetach(sc->ifp); if (sc->irq_handle) bus_teardown_intr(sc->dev, sc->irq_res, sc->irq_handle); mtx_destroy(&sc->lock); if_free(sc->ifp); } void sbni_release_resources(struct sbni_softc *sc) { if (sc->irq_res) bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid, sc->irq_res); if (sc->io_res && sc->io_off == 0) bus_release_resource(sc->dev, SYS_RES_IOPORT, sc->io_rid, sc->io_res); } /* -------------------------------------------------------------------------- */ static void sbni_init(void *xsc) { struct sbni_softc *sc; sc = (struct sbni_softc *)xsc; SBNI_LOCK(sc); sbni_init_locked(sc); SBNI_UNLOCK(sc); } static void sbni_init_locked(struct sbni_softc *sc) { struct ifnet *ifp; ifp = sc->ifp; /* * kludge to avoid multiple initialization when more than once * protocols configured */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; card_start(sc); callout_reset(&sc->wch, hz/SBNI_HZ, sbni_timeout, sc); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; /* attempt to start output */ sbni_start_locked(ifp); } static void sbni_start(struct ifnet *ifp) { struct sbni_softc *sc = ifp->if_softc; SBNI_LOCK(sc); sbni_start_locked(ifp); SBNI_UNLOCK(sc); } static void sbni_start_locked(struct ifnet *ifp) { struct sbni_softc *sc = ifp->if_softc; if (sc->tx_frameno == 0) prepare_to_send(sc); } static void sbni_stop(struct sbni_softc *sc) { sbni_outb(sc, CSR0, 0); drop_xmit_queue(sc); if (sc->rx_buf_p) { m_freem(sc->rx_buf_p); sc->rx_buf_p = NULL; } callout_stop(&sc->wch); sc->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); } /* -------------------------------------------------------------------------- */ /* interrupt handler */ /* * SBNI12D-10, -11/ISA boards within "common interrupt" mode could not * be looked as two independent single-channel devices. Every channel seems * as Ethernet interface but interrupt handler must be common. Really, first * channel ("master") driver only registers the handler. In it's struct softc * it has got pointer to "slave" channel's struct softc and handles that's * interrupts too. * softc of successfully attached ISA SBNI boards is linked to list. * While next board driver is initialized, it scans this list. If one * has found softc with same irq and ioaddr different by 4 then it assumes * this board to be "master". */ void sbni_intr(void *arg) { struct sbni_softc *sc; int repeat; sc = (struct sbni_softc *)arg; do { repeat = 0; SBNI_LOCK(sc); if (sbni_inb(sc, CSR0) & (RC_RDY | TR_RDY)) { handle_channel(sc); repeat = 1; } SBNI_UNLOCK(sc); if (sc->slave_sc) { /* second channel present */ SBNI_LOCK(sc->slave_sc); if (sbni_inb(sc->slave_sc, CSR0) & (RC_RDY | TR_RDY)) { handle_channel(sc->slave_sc); repeat = 1; } SBNI_UNLOCK(sc->slave_sc); } } while (repeat); } static void handle_channel(struct sbni_softc *sc) { int req_ans; u_char csr0; sbni_outb(sc, CSR0, (sbni_inb(sc, CSR0) & ~EN_INT) | TR_REQ); sc->timer_ticks = CHANGE_LEVEL_START_TICKS; for (;;) { csr0 = sbni_inb(sc, CSR0); if ((csr0 & (RC_RDY | TR_RDY)) == 0) break; req_ans = !(sc->state & FL_PREV_OK); if (csr0 & RC_RDY) req_ans = recv_frame(sc); /* * TR_RDY always equals 1 here because we have owned the marker, * and we set TR_REQ when disabled interrupts */ csr0 = sbni_inb(sc, CSR0); if ((csr0 & TR_RDY) == 0 || (csr0 & RC_RDY) != 0) if_printf(sc->ifp, "internal error!\n"); /* if state & FL_NEED_RESEND != 0 then tx_frameno != 0 */ if (req_ans || sc->tx_frameno != 0) send_frame(sc); else { /* send the marker without any data */ sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) & ~TR_REQ); } } sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) | EN_INT); } /* * Routine returns 1 if it need to acknoweledge received frame. * Empty frame received without errors won't be acknoweledged. */ static int recv_frame(struct sbni_softc *sc) { u_int32_t crc; u_int framelen, frameno, ack; u_int is_first, frame_ok; crc = CRC32_INITIAL; if (check_fhdr(sc, &framelen, &frameno, &ack, &is_first, &crc)) { frame_ok = framelen > 4 ? upload_data(sc, framelen, frameno, is_first, crc) : skip_tail(sc, framelen, crc); if (frame_ok) interpret_ack(sc, ack); } else { framelen = 0; frame_ok = 0; } sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) ^ CT_ZER); if (frame_ok) { sc->state |= FL_PREV_OK; if (framelen > 4) sc->in_stats.all_rx_number++; } else { sc->state &= ~FL_PREV_OK; change_level(sc); sc->in_stats.all_rx_number++; sc->in_stats.bad_rx_number++; } return (!frame_ok || framelen > 4); } static void send_frame(struct sbni_softc *sc) { u_int32_t crc; u_char csr0; crc = CRC32_INITIAL; if (sc->state & FL_NEED_RESEND) { /* if frame was sended but not ACK'ed - resend it */ if (sc->trans_errors) { sc->trans_errors--; if (sc->framelen != 0) sc->in_stats.resend_tx_number++; } else { /* cannot xmit with many attempts */ drop_xmit_queue(sc); goto do_send; } } else sc->trans_errors = TR_ERROR_COUNT; send_frame_header(sc, &crc); sc->state |= FL_NEED_RESEND; /* * FL_NEED_RESEND will be cleared after ACK, but if empty * frame sended then in prepare_to_send next frame */ if (sc->framelen) { download_data(sc, &crc); sc->in_stats.all_tx_number++; sc->state |= FL_WAIT_ACK; } sbni_outsb(sc, (u_char *)&crc, sizeof crc); do_send: csr0 = sbni_inb(sc, CSR0); sbni_outb(sc, CSR0, csr0 & ~TR_REQ); if (sc->tx_frameno) { /* next frame exists - request to send */ sbni_outb(sc, CSR0, csr0 | TR_REQ); } } static void download_data(struct sbni_softc *sc, u_int32_t *crc_p) { struct mbuf *m; caddr_t data_p; u_int data_len, pos, slice; data_p = NULL; /* initialized to avoid warn */ pos = 0; for (m = sc->tx_buf_p; m != NULL && pos < sc->pktlen; m = m->m_next) { if (pos + m->m_len > sc->outpos) { data_len = m->m_len - (sc->outpos - pos); data_p = mtod(m, caddr_t) + (sc->outpos - pos); goto do_copy; } else pos += m->m_len; } data_len = 0; do_copy: pos = 0; do { if (data_len) { slice = min(data_len, sc->framelen - pos); sbni_outsb(sc, data_p, slice); *crc_p = calc_crc32(*crc_p, data_p, slice); pos += slice; if (data_len -= slice) data_p += slice; else { do { m = m->m_next; } while (m != NULL && m->m_len == 0); if (m) { data_len = m->m_len; data_p = mtod(m, caddr_t); } } } else { /* frame too short - zero padding */ pos = sc->framelen - pos; while (pos--) { sbni_outb(sc, DAT, 0); *crc_p = CRC32(0, *crc_p); } return; } } while (pos < sc->framelen); } static int upload_data(struct sbni_softc *sc, u_int framelen, u_int frameno, u_int is_first, u_int32_t crc) { int frame_ok; if (is_first) { sc->wait_frameno = frameno; sc->inppos = 0; } if (sc->wait_frameno == frameno) { if (sc->inppos + framelen <= ETHER_MAX_LEN) { frame_ok = append_frame_to_pkt(sc, framelen, crc); /* * if CRC is right but framelen incorrect then transmitter * error was occurred... drop entire packet */ } else if ((frame_ok = skip_tail(sc, framelen, crc)) != 0) { sc->wait_frameno = 0; sc->inppos = 0; if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1); /* now skip all frames until is_first != 0 */ } } else frame_ok = skip_tail(sc, framelen, crc); if (is_first && !frame_ok) { /* * Frame has been violated, but we have stored * is_first already... Drop entire packet. */ sc->wait_frameno = 0; if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1); } return (frame_ok); } static __inline void send_complete(struct sbni_softc *); static __inline void send_complete(struct sbni_softc *sc) { m_freem(sc->tx_buf_p); sc->tx_buf_p = NULL; if_inc_counter(sc->ifp, IFCOUNTER_OPACKETS, 1); } static void interpret_ack(struct sbni_softc *sc, u_int ack) { if (ack == FRAME_SENT_OK) { sc->state &= ~FL_NEED_RESEND; if (sc->state & FL_WAIT_ACK) { sc->outpos += sc->framelen; if (--sc->tx_frameno) { sc->framelen = min( sc->maxframe, sc->pktlen - sc->outpos); } else { send_complete(sc); prepare_to_send(sc); } } } sc->state &= ~FL_WAIT_ACK; } /* * Glue received frame with previous fragments of packet. * Indicate packet when last frame would be accepted. */ static int append_frame_to_pkt(struct sbni_softc *sc, u_int framelen, u_int32_t crc) { caddr_t p; if (sc->inppos + framelen > ETHER_MAX_LEN) return (0); if (!sc->rx_buf_p && !get_rx_buf(sc)) return (0); p = sc->rx_buf_p->m_data + sc->inppos; sbni_insb(sc, p, framelen); if (calc_crc32(crc, p, framelen) != CRC32_REMAINDER) return (0); sc->inppos += framelen - 4; if (--sc->wait_frameno == 0) { /* last frame received */ indicate_pkt(sc); if_inc_counter(sc->ifp, IFCOUNTER_IPACKETS, 1); } return (1); } /* * Prepare to start output on adapter. Current priority must be set to splimp * before this routine is called. * Transmitter will be actually activated when marker has been accepted. */ static void prepare_to_send(struct sbni_softc *sc) { struct mbuf *m; u_int len; /* sc->tx_buf_p == NULL here! */ if (sc->tx_buf_p) printf("sbni: memory leak!\n"); sc->outpos = 0; sc->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); for (;;) { IF_DEQUEUE(&sc->ifp->if_snd, sc->tx_buf_p); if (!sc->tx_buf_p) { /* nothing to transmit... */ sc->pktlen = 0; sc->tx_frameno = 0; sc->framelen = 0; sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; return; } for (len = 0, m = sc->tx_buf_p; m; m = m->m_next) len += m->m_len; if (len != 0) break; m_freem(sc->tx_buf_p); } if (len < SBNI_MIN_LEN) len = SBNI_MIN_LEN; sc->pktlen = len; sc->tx_frameno = howmany(len, sc->maxframe); sc->framelen = min(len, sc->maxframe); sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) | TR_REQ); sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; BPF_MTAP(sc->ifp, sc->tx_buf_p); } static void drop_xmit_queue(struct sbni_softc *sc) { struct mbuf *m; if (sc->tx_buf_p) { m_freem(sc->tx_buf_p); sc->tx_buf_p = NULL; if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1); } for (;;) { IF_DEQUEUE(&sc->ifp->if_snd, m); if (m == NULL) break; m_freem(m); if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1); } sc->tx_frameno = 0; sc->framelen = 0; sc->outpos = 0; sc->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } static void send_frame_header(struct sbni_softc *sc, u_int32_t *crc_p) { u_int32_t crc; u_int len_field; u_char value; crc = *crc_p; len_field = sc->framelen + 6; /* CRC + frameno + reserved */ if (sc->state & FL_NEED_RESEND) len_field |= FRAME_RETRY; /* non-first attempt... */ if (sc->outpos == 0) len_field |= FRAME_FIRST; len_field |= (sc->state & FL_PREV_OK) ? FRAME_SENT_OK : FRAME_SENT_BAD; sbni_outb(sc, DAT, SBNI_SIG); value = (u_char)len_field; sbni_outb(sc, DAT, value); crc = CRC32(value, crc); value = (u_char)(len_field >> 8); sbni_outb(sc, DAT, value); crc = CRC32(value, crc); sbni_outb(sc, DAT, sc->tx_frameno); crc = CRC32(sc->tx_frameno, crc); sbni_outb(sc, DAT, 0); crc = CRC32(0, crc); *crc_p = crc; } /* * if frame tail not needed (incorrect number or received twice), * it won't store, but CRC will be calculated */ static int skip_tail(struct sbni_softc *sc, u_int tail_len, u_int32_t crc) { while (tail_len--) crc = CRC32(sbni_inb(sc, DAT), crc); return (crc == CRC32_REMAINDER); } static int check_fhdr(struct sbni_softc *sc, u_int *framelen, u_int *frameno, u_int *ack, u_int *is_first, u_int32_t *crc_p) { u_int32_t crc; u_char value; crc = *crc_p; if (sbni_inb(sc, DAT) != SBNI_SIG) return (0); value = sbni_inb(sc, DAT); *framelen = (u_int)value; crc = CRC32(value, crc); value = sbni_inb(sc, DAT); *framelen |= ((u_int)value) << 8; crc = CRC32(value, crc); *ack = *framelen & FRAME_ACK_MASK; *is_first = (*framelen & FRAME_FIRST) != 0; if ((*framelen &= FRAME_LEN_MASK) < 6 || *framelen > SBNI_MAX_FRAME - 3) return (0); value = sbni_inb(sc, DAT); *frameno = (u_int)value; crc = CRC32(value, crc); crc = CRC32(sbni_inb(sc, DAT), crc); /* reserved byte */ *framelen -= 2; *crc_p = crc; return (1); } static int get_rx_buf(struct sbni_softc *sc) { struct mbuf *m; MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) { if_printf(sc->ifp, "cannot allocate header mbuf\n"); return (0); } /* * We always put the received packet in a single buffer - * either with just an mbuf header or in a cluster attached * to the header. The +2 is to compensate for the alignment * fixup below. */ if (ETHER_MAX_LEN + 2 > MHLEN) { /* Attach an mbuf cluster */ if (!(MCLGET(m, M_NOWAIT))) { m_freem(m); return (0); } } m->m_pkthdr.len = m->m_len = ETHER_MAX_LEN + 2; /* * The +2 is to longword align the start of the real packet. * (sizeof ether_header == 14) * This is important for NFS. */ m_adj(m, 2); sc->rx_buf_p = m; return (1); } static void indicate_pkt(struct sbni_softc *sc) { struct ifnet *ifp = sc->ifp; struct mbuf *m; m = sc->rx_buf_p; m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = m->m_len = sc->inppos; sc->rx_buf_p = NULL; SBNI_UNLOCK(sc); (*ifp->if_input)(ifp, m); SBNI_LOCK(sc); } /* -------------------------------------------------------------------------- */ /* * Routine checks periodically wire activity and regenerates marker if * connect was inactive for a long time. */ static void sbni_timeout(void *xsc) { struct sbni_softc *sc; u_char csr0; sc = (struct sbni_softc *)xsc; SBNI_ASSERT_LOCKED(sc); csr0 = sbni_inb(sc, CSR0); if (csr0 & RC_CHK) { if (sc->timer_ticks) { if (csr0 & (RC_RDY | BU_EMP)) /* receiving not active */ sc->timer_ticks--; } else { sc->in_stats.timeout_number++; if (sc->delta_rxl) timeout_change_level(sc); sbni_outb(sc, CSR1, *(u_char *)&sc->csr1 | PR_RES); csr0 = sbni_inb(sc, CSR0); } } sbni_outb(sc, CSR0, csr0 | RC_CHK); callout_reset(&sc->wch, hz/SBNI_HZ, sbni_timeout, sc); } /* -------------------------------------------------------------------------- */ static void card_start(struct sbni_softc *sc) { sc->timer_ticks = CHANGE_LEVEL_START_TICKS; sc->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); sc->state |= FL_PREV_OK; sc->inppos = 0; sc->wait_frameno = 0; sbni_outb(sc, CSR1, *(u_char *)&sc->csr1 | PR_RES); sbni_outb(sc, CSR0, EN_INT); } /* -------------------------------------------------------------------------- */ static u_char rxl_tab[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, 0x0a, 0x0c, 0x0f, 0x16, 0x18, 0x1a, 0x1c, 0x1f }; #define SIZE_OF_TIMEOUT_RXL_TAB 4 static u_char timeout_rxl_tab[] = { 0x03, 0x05, 0x08, 0x0b }; static void set_initial_values(struct sbni_softc *sc, struct sbni_flags flags) { if (flags.fixed_rxl) { sc->delta_rxl = 0; /* disable receive level autodetection */ sc->cur_rxl_index = flags.rxl; } else { sc->delta_rxl = DEF_RXL_DELTA; sc->cur_rxl_index = DEF_RXL; } sc->csr1.rate = flags.fixed_rate ? flags.rate : DEFAULT_RATE; sc->csr1.rxl = rxl_tab[sc->cur_rxl_index]; sc->maxframe = DEFAULT_FRAME_LEN; /* * generate Ethernet address (0x00ff01xxxxxx) */ *(u_int16_t *) sc->enaddr = htons(0x00ff); if (flags.mac_addr) { *(u_int32_t *) (sc->enaddr + 2) = htonl(flags.mac_addr | 0x01000000); } else { *(u_char *) (sc->enaddr + 2) = 0x01; read_random(sc->enaddr + 3, 3); } } #ifdef SBNI_DUAL_COMPOUND void sbni_add(struct sbni_softc *sc) { mtx_lock(&headlist_lock); sc->link = sbni_headlist; sbni_headlist = sc; mtx_unlock(&headlist_lock); } struct sbni_softc * connect_to_master(struct sbni_softc *sc) { struct sbni_softc *p, *p_prev; mtx_lock(&headlist_lock); for (p = sbni_headlist, p_prev = NULL; p; p_prev = p, p = p->link) { if (rman_get_start(p->io_res) == rman_get_start(sc->io_res) + 4 || rman_get_start(p->io_res) == rman_get_start(sc->io_res) - 4) { p->slave_sc = sc; if (p_prev) p_prev->link = p->link; else sbni_headlist = p->link; mtx_unlock(&headlist_lock); return p; } } mtx_unlock(&headlist_lock); return (NULL); } #endif /* SBNI_DUAL_COMPOUND */ /* Receive level auto-selection */ static void change_level(struct sbni_softc *sc) { if (sc->delta_rxl == 0) /* do not auto-negotiate RxL */ return; if (sc->cur_rxl_index == 0) sc->delta_rxl = 1; else if (sc->cur_rxl_index == 15) sc->delta_rxl = -1; else if (sc->cur_rxl_rcvd < sc->prev_rxl_rcvd) sc->delta_rxl = -sc->delta_rxl; sc->csr1.rxl = rxl_tab[sc->cur_rxl_index += sc->delta_rxl]; sbni_inb(sc, CSR0); /* it needed for PCI cards */ sbni_outb(sc, CSR1, *(u_char *)&sc->csr1); sc->prev_rxl_rcvd = sc->cur_rxl_rcvd; sc->cur_rxl_rcvd = 0; } static void timeout_change_level(struct sbni_softc *sc) { sc->cur_rxl_index = timeout_rxl_tab[sc->timeout_rxl]; if (++sc->timeout_rxl >= 4) sc->timeout_rxl = 0; sc->csr1.rxl = rxl_tab[sc->cur_rxl_index]; sbni_inb(sc, CSR0); sbni_outb(sc, CSR1, *(u_char *)&sc->csr1); sc->prev_rxl_rcvd = sc->cur_rxl_rcvd; sc->cur_rxl_rcvd = 0; } /* -------------------------------------------------------------------------- */ /* * Process an ioctl request. This code needs some work - it looks * pretty ugly. */ static int sbni_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct sbni_softc *sc; struct ifreq *ifr; struct thread *td; struct sbni_in_stats *in_stats; struct sbni_flags flags; int error; sc = ifp->if_softc; ifr = (struct ifreq *)data; td = curthread; error = 0; switch (command) { case SIOCSIFFLAGS: /* * If the interface is marked up and stopped, then start it. * If it is marked down and running, then stop it. */ SBNI_LOCK(sc); if (ifp->if_flags & IFF_UP) { if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) sbni_init_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { sbni_stop(sc); } } SBNI_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: /* * Multicast list has changed; set the hardware filter * accordingly. */ error = 0; /* if (ifr == NULL) error = EAFNOSUPPORT; */ break; /* * SBNI specific ioctl */ case SIOCGHWFLAGS: /* get flags */ SBNI_LOCK(sc); bcopy((caddr_t)IF_LLADDR(sc->ifp)+3, (caddr_t) &flags, 3); flags.rxl = sc->cur_rxl_index; flags.rate = sc->csr1.rate; flags.fixed_rxl = (sc->delta_rxl == 0); flags.fixed_rate = 1; SBNI_UNLOCK(sc); bcopy(&flags, &ifr->ifr_ifru, sizeof(flags)); break; case SIOCGINSTATS: in_stats = malloc(sizeof(struct sbni_in_stats), M_DEVBUF, M_WAITOK); SBNI_LOCK(sc); bcopy(&sc->in_stats, in_stats, sizeof(struct sbni_in_stats)); SBNI_UNLOCK(sc); - error = copyout(ifr->ifr_data, in_stats, + error = copyout(ifr_data_get_ptr(ifr), in_stats, sizeof(struct sbni_in_stats)); free(in_stats, M_DEVBUF); break; case SIOCSHWFLAGS: /* set flags */ /* root only */ error = priv_check(td, PRIV_DRIVER); if (error) break; bcopy(&ifr->ifr_ifru, &flags, sizeof(flags)); SBNI_LOCK(sc); if (flags.fixed_rxl) { sc->delta_rxl = 0; sc->cur_rxl_index = flags.rxl; } else { sc->delta_rxl = DEF_RXL_DELTA; sc->cur_rxl_index = DEF_RXL; } sc->csr1.rxl = rxl_tab[sc->cur_rxl_index]; sc->csr1.rate = flags.fixed_rate ? flags.rate : DEFAULT_RATE; if (flags.mac_addr) bcopy((caddr_t) &flags, (caddr_t) IF_LLADDR(sc->ifp)+3, 3); /* Don't be afraid... */ sbni_outb(sc, CSR1, *(char*)(&sc->csr1) | PR_RES); SBNI_UNLOCK(sc); break; case SIOCRINSTATS: SBNI_LOCK(sc); if (!(error = priv_check(td, PRIV_DRIVER))) /* root only */ bzero(&sc->in_stats, sizeof(struct sbni_in_stats)); SBNI_UNLOCK(sc); break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } /* -------------------------------------------------------------------------- */ static u_int32_t calc_crc32(u_int32_t crc, caddr_t p, u_int len) { while (len--) crc = CRC32(*p++, crc); return (crc); } static u_int32_t crc32tab[] __aligned(8) = { 0xD202EF8D, 0xA505DF1B, 0x3C0C8EA1, 0x4B0BBE37, 0xD56F2B94, 0xA2681B02, 0x3B614AB8, 0x4C667A2E, 0xDCD967BF, 0xABDE5729, 0x32D70693, 0x45D03605, 0xDBB4A3A6, 0xACB39330, 0x35BAC28A, 0x42BDF21C, 0xCFB5FFE9, 0xB8B2CF7F, 0x21BB9EC5, 0x56BCAE53, 0xC8D83BF0, 0xBFDF0B66, 0x26D65ADC, 0x51D16A4A, 0xC16E77DB, 0xB669474D, 0x2F6016F7, 0x58672661, 0xC603B3C2, 0xB1048354, 0x280DD2EE, 0x5F0AE278, 0xE96CCF45, 0x9E6BFFD3, 0x0762AE69, 0x70659EFF, 0xEE010B5C, 0x99063BCA, 0x000F6A70, 0x77085AE6, 0xE7B74777, 0x90B077E1, 0x09B9265B, 0x7EBE16CD, 0xE0DA836E, 0x97DDB3F8, 0x0ED4E242, 0x79D3D2D4, 0xF4DBDF21, 0x83DCEFB7, 0x1AD5BE0D, 0x6DD28E9B, 0xF3B61B38, 0x84B12BAE, 0x1DB87A14, 0x6ABF4A82, 0xFA005713, 0x8D076785, 0x140E363F, 0x630906A9, 0xFD6D930A, 0x8A6AA39C, 0x1363F226, 0x6464C2B0, 0xA4DEAE1D, 0xD3D99E8B, 0x4AD0CF31, 0x3DD7FFA7, 0xA3B36A04, 0xD4B45A92, 0x4DBD0B28, 0x3ABA3BBE, 0xAA05262F, 0xDD0216B9, 0x440B4703, 0x330C7795, 0xAD68E236, 0xDA6FD2A0, 0x4366831A, 0x3461B38C, 0xB969BE79, 0xCE6E8EEF, 0x5767DF55, 0x2060EFC3, 0xBE047A60, 0xC9034AF6, 0x500A1B4C, 0x270D2BDA, 0xB7B2364B, 0xC0B506DD, 0x59BC5767, 0x2EBB67F1, 0xB0DFF252, 0xC7D8C2C4, 0x5ED1937E, 0x29D6A3E8, 0x9FB08ED5, 0xE8B7BE43, 0x71BEEFF9, 0x06B9DF6F, 0x98DD4ACC, 0xEFDA7A5A, 0x76D32BE0, 0x01D41B76, 0x916B06E7, 0xE66C3671, 0x7F6567CB, 0x0862575D, 0x9606C2FE, 0xE101F268, 0x7808A3D2, 0x0F0F9344, 0x82079EB1, 0xF500AE27, 0x6C09FF9D, 0x1B0ECF0B, 0x856A5AA8, 0xF26D6A3E, 0x6B643B84, 0x1C630B12, 0x8CDC1683, 0xFBDB2615, 0x62D277AF, 0x15D54739, 0x8BB1D29A, 0xFCB6E20C, 0x65BFB3B6, 0x12B88320, 0x3FBA6CAD, 0x48BD5C3B, 0xD1B40D81, 0xA6B33D17, 0x38D7A8B4, 0x4FD09822, 0xD6D9C998, 0xA1DEF90E, 0x3161E49F, 0x4666D409, 0xDF6F85B3, 0xA868B525, 0x360C2086, 0x410B1010, 0xD80241AA, 0xAF05713C, 0x220D7CC9, 0x550A4C5F, 0xCC031DE5, 0xBB042D73, 0x2560B8D0, 0x52678846, 0xCB6ED9FC, 0xBC69E96A, 0x2CD6F4FB, 0x5BD1C46D, 0xC2D895D7, 0xB5DFA541, 0x2BBB30E2, 0x5CBC0074, 0xC5B551CE, 0xB2B26158, 0x04D44C65, 0x73D37CF3, 0xEADA2D49, 0x9DDD1DDF, 0x03B9887C, 0x74BEB8EA, 0xEDB7E950, 0x9AB0D9C6, 0x0A0FC457, 0x7D08F4C1, 0xE401A57B, 0x930695ED, 0x0D62004E, 0x7A6530D8, 0xE36C6162, 0x946B51F4, 0x19635C01, 0x6E646C97, 0xF76D3D2D, 0x806A0DBB, 0x1E0E9818, 0x6909A88E, 0xF000F934, 0x8707C9A2, 0x17B8D433, 0x60BFE4A5, 0xF9B6B51F, 0x8EB18589, 0x10D5102A, 0x67D220BC, 0xFEDB7106, 0x89DC4190, 0x49662D3D, 0x3E611DAB, 0xA7684C11, 0xD06F7C87, 0x4E0BE924, 0x390CD9B2, 0xA0058808, 0xD702B89E, 0x47BDA50F, 0x30BA9599, 0xA9B3C423, 0xDEB4F4B5, 0x40D06116, 0x37D75180, 0xAEDE003A, 0xD9D930AC, 0x54D13D59, 0x23D60DCF, 0xBADF5C75, 0xCDD86CE3, 0x53BCF940, 0x24BBC9D6, 0xBDB2986C, 0xCAB5A8FA, 0x5A0AB56B, 0x2D0D85FD, 0xB404D447, 0xC303E4D1, 0x5D677172, 0x2A6041E4, 0xB369105E, 0xC46E20C8, 0x72080DF5, 0x050F3D63, 0x9C066CD9, 0xEB015C4F, 0x7565C9EC, 0x0262F97A, 0x9B6BA8C0, 0xEC6C9856, 0x7CD385C7, 0x0BD4B551, 0x92DDE4EB, 0xE5DAD47D, 0x7BBE41DE, 0x0CB97148, 0x95B020F2, 0xE2B71064, 0x6FBF1D91, 0x18B82D07, 0x81B17CBD, 0xF6B64C2B, 0x68D2D988, 0x1FD5E91E, 0x86DCB8A4, 0xF1DB8832, 0x616495A3, 0x1663A535, 0x8F6AF48F, 0xF86DC419, 0x660951BA, 0x110E612C, 0x88073096, 0xFF000000 }; Index: head/sys/dev/sfxge/sfxge.c =================================================================== --- head/sys/dev/sfxge/sfxge.c (revision 331796) +++ head/sys/dev/sfxge/sfxge.c (revision 331797) @@ -1,1202 +1,1204 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2010-2016 Solarflare Communications Inc. * All rights reserved. * * This software was developed in part by Philip Paeps under contract for * Solarflare Communications, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * The views and conclusions contained in the software and documentation are * those of the authors and should not be interpreted as representing official * policies, either expressed or implied, of the FreeBSD Project. */ #include __FBSDID("$FreeBSD$"); #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RSS #include #endif #include "common/efx.h" #include "sfxge.h" #include "sfxge_rx.h" #include "sfxge_ioc.h" #include "sfxge_version.h" #define SFXGE_CAP (IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM | \ IFCAP_RXCSUM | IFCAP_TXCSUM | \ IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6 | \ IFCAP_TSO4 | IFCAP_TSO6 | \ IFCAP_JUMBO_MTU | \ IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWSTATS) #define SFXGE_CAP_ENABLE SFXGE_CAP #define SFXGE_CAP_FIXED (IFCAP_VLAN_MTU | \ IFCAP_JUMBO_MTU | IFCAP_LINKSTATE | IFCAP_HWSTATS) MALLOC_DEFINE(M_SFXGE, "sfxge", "Solarflare 10GigE driver"); SYSCTL_NODE(_hw, OID_AUTO, sfxge, CTLFLAG_RD, 0, "SFXGE driver parameters"); #define SFXGE_PARAM_RX_RING SFXGE_PARAM(rx_ring) static int sfxge_rx_ring_entries = SFXGE_NDESCS; TUNABLE_INT(SFXGE_PARAM_RX_RING, &sfxge_rx_ring_entries); SYSCTL_INT(_hw_sfxge, OID_AUTO, rx_ring, CTLFLAG_RDTUN, &sfxge_rx_ring_entries, 0, "Maximum number of descriptors in a receive ring"); #define SFXGE_PARAM_TX_RING SFXGE_PARAM(tx_ring) static int sfxge_tx_ring_entries = SFXGE_NDESCS; TUNABLE_INT(SFXGE_PARAM_TX_RING, &sfxge_tx_ring_entries); SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_ring, CTLFLAG_RDTUN, &sfxge_tx_ring_entries, 0, "Maximum number of descriptors in a transmit ring"); #define SFXGE_PARAM_RESTART_ATTEMPTS SFXGE_PARAM(restart_attempts) static int sfxge_restart_attempts = 3; TUNABLE_INT(SFXGE_PARAM_RESTART_ATTEMPTS, &sfxge_restart_attempts); SYSCTL_INT(_hw_sfxge, OID_AUTO, restart_attempts, CTLFLAG_RDTUN, &sfxge_restart_attempts, 0, "Maximum number of attempts to bring interface up after reset"); #if EFSYS_OPT_MCDI_LOGGING #define SFXGE_PARAM_MCDI_LOGGING SFXGE_PARAM(mcdi_logging) static int sfxge_mcdi_logging = 0; TUNABLE_INT(SFXGE_PARAM_MCDI_LOGGING, &sfxge_mcdi_logging); #endif static void sfxge_reset(void *arg, int npending); static int sfxge_estimate_rsrc_limits(struct sfxge_softc *sc) { efx_drv_limits_t limits; int rc; unsigned int evq_max; uint32_t evq_allocated; uint32_t rxq_allocated; uint32_t txq_allocated; /* * Limit the number of event queues to: * - number of CPUs * - hardwire maximum RSS channels * - administratively specified maximum RSS channels */ #ifdef RSS /* * Avoid extra limitations so that the number of queues * may be configured at administrator's will */ evq_max = MIN(MAX(rss_getnumbuckets(), 1), EFX_MAXRSS); #else evq_max = MIN(mp_ncpus, EFX_MAXRSS); #endif if (sc->max_rss_channels > 0) evq_max = MIN(evq_max, sc->max_rss_channels); memset(&limits, 0, sizeof(limits)); limits.edl_min_evq_count = 1; limits.edl_max_evq_count = evq_max; limits.edl_min_txq_count = SFXGE_TXQ_NTYPES; limits.edl_max_txq_count = evq_max + SFXGE_TXQ_NTYPES - 1; limits.edl_min_rxq_count = 1; limits.edl_max_rxq_count = evq_max; efx_nic_set_drv_limits(sc->enp, &limits); if ((rc = efx_nic_init(sc->enp)) != 0) return (rc); rc = efx_nic_get_vi_pool(sc->enp, &evq_allocated, &rxq_allocated, &txq_allocated); if (rc != 0) { efx_nic_fini(sc->enp); return (rc); } KASSERT(txq_allocated >= SFXGE_TXQ_NTYPES, ("txq_allocated < SFXGE_TXQ_NTYPES")); sc->evq_max = MIN(evq_allocated, evq_max); sc->evq_max = MIN(rxq_allocated, sc->evq_max); sc->evq_max = MIN(txq_allocated - (SFXGE_TXQ_NTYPES - 1), sc->evq_max); KASSERT(sc->evq_max <= evq_max, ("allocated more than maximum requested")); #ifdef RSS if (sc->evq_max < rss_getnumbuckets()) device_printf(sc->dev, "The number of allocated queues (%u) " "is less than the number of RSS buckets (%u); " "performance degradation might be observed", sc->evq_max, rss_getnumbuckets()); #endif /* * NIC is kept initialized in the case of success to be able to * initialize port to find out media types. */ return (0); } static int sfxge_set_drv_limits(struct sfxge_softc *sc) { efx_drv_limits_t limits; memset(&limits, 0, sizeof(limits)); /* Limits are strict since take into account initial estimation */ limits.edl_min_evq_count = limits.edl_max_evq_count = sc->intr.n_alloc; limits.edl_min_txq_count = limits.edl_max_txq_count = sc->intr.n_alloc + SFXGE_TXQ_NTYPES - 1; limits.edl_min_rxq_count = limits.edl_max_rxq_count = sc->intr.n_alloc; return (efx_nic_set_drv_limits(sc->enp, &limits)); } static int sfxge_start(struct sfxge_softc *sc) { int rc; SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc); if (sc->init_state == SFXGE_STARTED) return (0); if (sc->init_state != SFXGE_REGISTERED) { rc = EINVAL; goto fail; } /* Set required resource limits */ if ((rc = sfxge_set_drv_limits(sc)) != 0) goto fail; if ((rc = efx_nic_init(sc->enp)) != 0) goto fail; /* Start processing interrupts. */ if ((rc = sfxge_intr_start(sc)) != 0) goto fail2; /* Start processing events. */ if ((rc = sfxge_ev_start(sc)) != 0) goto fail3; /* Fire up the port. */ if ((rc = sfxge_port_start(sc)) != 0) goto fail4; /* Start the receiver side. */ if ((rc = sfxge_rx_start(sc)) != 0) goto fail5; /* Start the transmitter side. */ if ((rc = sfxge_tx_start(sc)) != 0) goto fail6; sc->init_state = SFXGE_STARTED; /* Tell the stack we're running. */ sc->ifnet->if_drv_flags |= IFF_DRV_RUNNING; sc->ifnet->if_drv_flags &= ~IFF_DRV_OACTIVE; return (0); fail6: sfxge_rx_stop(sc); fail5: sfxge_port_stop(sc); fail4: sfxge_ev_stop(sc); fail3: sfxge_intr_stop(sc); fail2: efx_nic_fini(sc->enp); fail: device_printf(sc->dev, "sfxge_start: %d\n", rc); return (rc); } static void sfxge_if_init(void *arg) { struct sfxge_softc *sc; sc = (struct sfxge_softc *)arg; SFXGE_ADAPTER_LOCK(sc); (void)sfxge_start(sc); SFXGE_ADAPTER_UNLOCK(sc); } static void sfxge_stop(struct sfxge_softc *sc) { SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc); if (sc->init_state != SFXGE_STARTED) return; sc->init_state = SFXGE_REGISTERED; /* Stop the transmitter. */ sfxge_tx_stop(sc); /* Stop the receiver. */ sfxge_rx_stop(sc); /* Stop the port. */ sfxge_port_stop(sc); /* Stop processing events. */ sfxge_ev_stop(sc); /* Stop processing interrupts. */ sfxge_intr_stop(sc); efx_nic_fini(sc->enp); sc->ifnet->if_drv_flags &= ~IFF_DRV_RUNNING; } static int sfxge_vpd_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc) { efx_vpd_value_t value; int rc = 0; switch (ioc->u.vpd.op) { case SFXGE_VPD_OP_GET_KEYWORD: value.evv_tag = ioc->u.vpd.tag; value.evv_keyword = ioc->u.vpd.keyword; rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value); if (rc != 0) break; ioc->u.vpd.len = MIN(ioc->u.vpd.len, value.evv_length); if (ioc->u.vpd.payload != 0) { rc = copyout(value.evv_value, ioc->u.vpd.payload, ioc->u.vpd.len); } break; case SFXGE_VPD_OP_SET_KEYWORD: if (ioc->u.vpd.len > sizeof(value.evv_value)) return (EINVAL); value.evv_tag = ioc->u.vpd.tag; value.evv_keyword = ioc->u.vpd.keyword; value.evv_length = ioc->u.vpd.len; rc = copyin(ioc->u.vpd.payload, value.evv_value, value.evv_length); if (rc != 0) break; rc = efx_vpd_set(sc->enp, sc->vpd_data, sc->vpd_size, &value); if (rc != 0) break; rc = efx_vpd_verify(sc->enp, sc->vpd_data, sc->vpd_size); if (rc != 0) break; rc = efx_vpd_write(sc->enp, sc->vpd_data, sc->vpd_size); break; default: rc = EOPNOTSUPP; break; } return (rc); } static int sfxge_private_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc) { switch (ioc->op) { case SFXGE_MCDI_IOC: return (sfxge_mcdi_ioctl(sc, ioc)); case SFXGE_NVRAM_IOC: return (sfxge_nvram_ioctl(sc, ioc)); case SFXGE_VPD_IOC: return (sfxge_vpd_ioctl(sc, ioc)); default: return (EOPNOTSUPP); } } static int sfxge_if_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data) { struct sfxge_softc *sc; struct ifreq *ifr; sfxge_ioc_t ioc; int error; ifr = (struct ifreq *)data; sc = ifp->if_softc; error = 0; switch (command) { case SIOCSIFFLAGS: SFXGE_ADAPTER_LOCK(sc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if ((ifp->if_flags ^ sc->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { sfxge_mac_filter_set(sc); } } else sfxge_start(sc); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) sfxge_stop(sc); sc->if_flags = ifp->if_flags; SFXGE_ADAPTER_UNLOCK(sc); break; case SIOCSIFMTU: if (ifr->ifr_mtu == ifp->if_mtu) { /* Nothing to do */ error = 0; } else if (ifr->ifr_mtu > SFXGE_MAX_MTU) { error = EINVAL; } else if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { ifp->if_mtu = ifr->ifr_mtu; error = 0; } else { /* Restart required */ SFXGE_ADAPTER_LOCK(sc); sfxge_stop(sc); ifp->if_mtu = ifr->ifr_mtu; error = sfxge_start(sc); SFXGE_ADAPTER_UNLOCK(sc); if (error != 0) { ifp->if_flags &= ~IFF_UP; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; if_down(ifp); } } break; case SIOCADDMULTI: case SIOCDELMULTI: if (ifp->if_drv_flags & IFF_DRV_RUNNING) sfxge_mac_filter_set(sc); break; case SIOCSIFCAP: { int reqcap = ifr->ifr_reqcap; int capchg_mask; SFXGE_ADAPTER_LOCK(sc); /* Capabilities to be changed in accordance with request */ capchg_mask = ifp->if_capenable ^ reqcap; /* * The networking core already rejects attempts to * enable capabilities we don't have. We still have * to reject attempts to disable capabilities that we * can't (yet) disable. */ KASSERT((reqcap & ~ifp->if_capabilities) == 0, ("Unsupported capabilities 0x%x requested 0x%x vs " "supported 0x%x", reqcap & ~ifp->if_capabilities, reqcap , ifp->if_capabilities)); if (capchg_mask & SFXGE_CAP_FIXED) { error = EINVAL; SFXGE_ADAPTER_UNLOCK(sc); break; } /* Check request before any changes */ if ((capchg_mask & IFCAP_TSO4) && (reqcap & (IFCAP_TSO4 | IFCAP_TXCSUM)) == IFCAP_TSO4) { error = EAGAIN; SFXGE_ADAPTER_UNLOCK(sc); if_printf(ifp, "enable txcsum before tso4\n"); break; } if ((capchg_mask & IFCAP_TSO6) && (reqcap & (IFCAP_TSO6 | IFCAP_TXCSUM_IPV6)) == IFCAP_TSO6) { error = EAGAIN; SFXGE_ADAPTER_UNLOCK(sc); if_printf(ifp, "enable txcsum6 before tso6\n"); break; } if (reqcap & IFCAP_TXCSUM) { ifp->if_hwassist |= (CSUM_IP | CSUM_TCP | CSUM_UDP); } else { ifp->if_hwassist &= ~(CSUM_IP | CSUM_TCP | CSUM_UDP); if (reqcap & IFCAP_TSO4) { reqcap &= ~IFCAP_TSO4; if_printf(ifp, "tso4 disabled due to -txcsum\n"); } } if (reqcap & IFCAP_TXCSUM_IPV6) { ifp->if_hwassist |= (CSUM_TCP_IPV6 | CSUM_UDP_IPV6); } else { ifp->if_hwassist &= ~(CSUM_TCP_IPV6 | CSUM_UDP_IPV6); if (reqcap & IFCAP_TSO6) { reqcap &= ~IFCAP_TSO6; if_printf(ifp, "tso6 disabled due to -txcsum6\n"); } } /* * The kernel takes both IFCAP_TSOx and CSUM_TSO into * account before using TSO. So, we do not touch * checksum flags when IFCAP_TSOx is modified. * Note that CSUM_TSO is (CSUM_IP_TSO|CSUM_IP6_TSO), * but both bits are set in IPv4 and IPv6 mbufs. */ ifp->if_capenable = reqcap; SFXGE_ADAPTER_UNLOCK(sc); break; } case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->media, command); break; #ifdef SIOCGI2C case SIOCGI2C: { struct ifi2creq i2c; - error = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); + error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (error != 0) break; if (i2c.len > sizeof(i2c.data)) { error = EINVAL; break; } SFXGE_ADAPTER_LOCK(sc); error = efx_phy_module_get_info(sc->enp, i2c.dev_addr, i2c.offset, i2c.len, &i2c.data[0]); SFXGE_ADAPTER_UNLOCK(sc); if (error == 0) - error = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); + error = copyout(&i2c, ifr_data_get_ptr(ifr), + sizeof(i2c)); break; } #endif case SIOCGPRIVATE_0: error = priv_check(curthread, PRIV_DRIVER); if (error != 0) break; - error = copyin(ifr->ifr_data, &ioc, sizeof(ioc)); + error = copyin(ifr_data_get_ptr(ifr), &ioc, sizeof(ioc)); if (error != 0) return (error); error = sfxge_private_ioctl(sc, &ioc); if (error == 0) { - error = copyout(&ioc, ifr->ifr_data, sizeof(ioc)); + error = copyout(&ioc, ifr_data_get_ptr(ifr), + sizeof(ioc)); } break; default: error = ether_ioctl(ifp, command, data); } return (error); } static void sfxge_ifnet_fini(struct ifnet *ifp) { struct sfxge_softc *sc = ifp->if_softc; SFXGE_ADAPTER_LOCK(sc); sfxge_stop(sc); SFXGE_ADAPTER_UNLOCK(sc); ifmedia_removeall(&sc->media); ether_ifdetach(ifp); if_free(ifp); } static int sfxge_ifnet_init(struct ifnet *ifp, struct sfxge_softc *sc) { const efx_nic_cfg_t *encp = efx_nic_cfg_get(sc->enp); device_t dev; int rc; dev = sc->dev; sc->ifnet = ifp; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_init = sfxge_if_init; ifp->if_softc = sc; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = sfxge_if_ioctl; ifp->if_capabilities = SFXGE_CAP; ifp->if_capenable = SFXGE_CAP_ENABLE; ifp->if_hw_tsomax = SFXGE_TSO_MAX_SIZE; ifp->if_hw_tsomaxsegcount = SFXGE_TX_MAPPING_MAX_SEG; ifp->if_hw_tsomaxsegsize = PAGE_SIZE; #ifdef SFXGE_LRO ifp->if_capabilities |= IFCAP_LRO; ifp->if_capenable |= IFCAP_LRO; #endif if (encp->enc_hw_tx_insert_vlan_enabled) { ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; ifp->if_capenable |= IFCAP_VLAN_HWTAGGING; } ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO | CSUM_TCP_IPV6 | CSUM_UDP_IPV6; ether_ifattach(ifp, encp->enc_mac_addr); ifp->if_transmit = sfxge_if_transmit; ifp->if_qflush = sfxge_if_qflush; ifp->if_get_counter = sfxge_get_counter; DBGPRINT(sc->dev, "ifmedia_init"); if ((rc = sfxge_port_ifmedia_init(sc)) != 0) goto fail; return (0); fail: ether_ifdetach(sc->ifnet); return (rc); } void sfxge_sram_buf_tbl_alloc(struct sfxge_softc *sc, size_t n, uint32_t *idp) { KASSERT(sc->buffer_table_next + n <= efx_nic_cfg_get(sc->enp)->enc_buftbl_limit, ("buffer table full")); *idp = sc->buffer_table_next; sc->buffer_table_next += n; } static int sfxge_bar_init(struct sfxge_softc *sc) { efsys_bar_t *esbp = &sc->bar; esbp->esb_rid = PCIR_BAR(EFX_MEM_BAR); if ((esbp->esb_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &esbp->esb_rid, RF_ACTIVE)) == NULL) { device_printf(sc->dev, "Cannot allocate BAR region %d\n", EFX_MEM_BAR); return (ENXIO); } esbp->esb_tag = rman_get_bustag(esbp->esb_res); esbp->esb_handle = rman_get_bushandle(esbp->esb_res); SFXGE_BAR_LOCK_INIT(esbp, device_get_nameunit(sc->dev)); return (0); } static void sfxge_bar_fini(struct sfxge_softc *sc) { efsys_bar_t *esbp = &sc->bar; bus_release_resource(sc->dev, SYS_RES_MEMORY, esbp->esb_rid, esbp->esb_res); SFXGE_BAR_LOCK_DESTROY(esbp); } static int sfxge_create(struct sfxge_softc *sc) { device_t dev; efx_nic_t *enp; int error; char rss_param_name[sizeof(SFXGE_PARAM(%d.max_rss_channels))]; #if EFSYS_OPT_MCDI_LOGGING char mcdi_log_param_name[sizeof(SFXGE_PARAM(%d.mcdi_logging))]; #endif dev = sc->dev; SFXGE_ADAPTER_LOCK_INIT(sc, device_get_nameunit(sc->dev)); sc->max_rss_channels = 0; snprintf(rss_param_name, sizeof(rss_param_name), SFXGE_PARAM(%d.max_rss_channels), (int)device_get_unit(dev)); TUNABLE_INT_FETCH(rss_param_name, &sc->max_rss_channels); #if EFSYS_OPT_MCDI_LOGGING sc->mcdi_logging = sfxge_mcdi_logging; snprintf(mcdi_log_param_name, sizeof(mcdi_log_param_name), SFXGE_PARAM(%d.mcdi_logging), (int)device_get_unit(dev)); TUNABLE_INT_FETCH(mcdi_log_param_name, &sc->mcdi_logging); #endif sc->stats_node = SYSCTL_ADD_NODE( device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "stats", CTLFLAG_RD, NULL, "Statistics"); if (sc->stats_node == NULL) { error = ENOMEM; goto fail; } TASK_INIT(&sc->task_reset, 0, sfxge_reset, sc); (void) pci_enable_busmaster(dev); /* Initialize DMA mappings. */ DBGPRINT(sc->dev, "dma_init..."); if ((error = sfxge_dma_init(sc)) != 0) goto fail; /* Map the device registers. */ DBGPRINT(sc->dev, "bar_init..."); if ((error = sfxge_bar_init(sc)) != 0) goto fail; error = efx_family(pci_get_vendor(dev), pci_get_device(dev), &sc->family); KASSERT(error == 0, ("Family should be filtered by sfxge_probe()")); DBGPRINT(sc->dev, "nic_create..."); /* Create the common code nic object. */ SFXGE_EFSYS_LOCK_INIT(&sc->enp_lock, device_get_nameunit(sc->dev), "nic"); if ((error = efx_nic_create(sc->family, (efsys_identifier_t *)sc, &sc->bar, &sc->enp_lock, &enp)) != 0) goto fail3; sc->enp = enp; /* Initialize MCDI to talk to the microcontroller. */ DBGPRINT(sc->dev, "mcdi_init..."); if ((error = sfxge_mcdi_init(sc)) != 0) goto fail4; /* Probe the NIC and build the configuration data area. */ DBGPRINT(sc->dev, "nic_probe..."); if ((error = efx_nic_probe(enp)) != 0) goto fail5; if (!ISP2(sfxge_rx_ring_entries) || (sfxge_rx_ring_entries < EFX_RXQ_MINNDESCS) || (sfxge_rx_ring_entries > EFX_RXQ_MAXNDESCS)) { log(LOG_ERR, "%s=%d must be power of 2 from %u to %u", SFXGE_PARAM_RX_RING, sfxge_rx_ring_entries, EFX_RXQ_MINNDESCS, EFX_RXQ_MAXNDESCS); error = EINVAL; goto fail_rx_ring_entries; } sc->rxq_entries = sfxge_rx_ring_entries; if (!ISP2(sfxge_tx_ring_entries) || (sfxge_tx_ring_entries < EFX_TXQ_MINNDESCS) || (sfxge_tx_ring_entries > EFX_TXQ_MAXNDESCS(efx_nic_cfg_get(enp)))) { log(LOG_ERR, "%s=%d must be power of 2 from %u to %u", SFXGE_PARAM_TX_RING, sfxge_tx_ring_entries, EFX_TXQ_MINNDESCS, EFX_TXQ_MAXNDESCS(efx_nic_cfg_get(enp))); error = EINVAL; goto fail_tx_ring_entries; } sc->txq_entries = sfxge_tx_ring_entries; SYSCTL_ADD_STRING(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "version", CTLFLAG_RD, SFXGE_VERSION_STRING, 0, "Driver version"); SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "phy_type", CTLFLAG_RD, NULL, efx_nic_cfg_get(enp)->enc_phy_type, "PHY type"); /* Initialize the NVRAM. */ DBGPRINT(sc->dev, "nvram_init..."); if ((error = efx_nvram_init(enp)) != 0) goto fail6; /* Initialize the VPD. */ DBGPRINT(sc->dev, "vpd_init..."); if ((error = efx_vpd_init(enp)) != 0) goto fail7; efx_mcdi_new_epoch(enp); /* Reset the NIC. */ DBGPRINT(sc->dev, "nic_reset..."); if ((error = efx_nic_reset(enp)) != 0) goto fail8; /* Initialize buffer table allocation. */ sc->buffer_table_next = 0; /* * Guarantee minimum and estimate maximum number of event queues * to take it into account when MSI-X interrupts are allocated. * It initializes NIC and keeps it initialized on success. */ if ((error = sfxge_estimate_rsrc_limits(sc)) != 0) goto fail8; /* Set up interrupts. */ DBGPRINT(sc->dev, "intr_init..."); if ((error = sfxge_intr_init(sc)) != 0) goto fail9; /* Initialize event processing state. */ DBGPRINT(sc->dev, "ev_init..."); if ((error = sfxge_ev_init(sc)) != 0) goto fail11; /* Initialize port state. */ DBGPRINT(sc->dev, "port_init..."); if ((error = sfxge_port_init(sc)) != 0) goto fail12; /* Initialize receive state. */ DBGPRINT(sc->dev, "rx_init..."); if ((error = sfxge_rx_init(sc)) != 0) goto fail13; /* Initialize transmit state. */ DBGPRINT(sc->dev, "tx_init..."); if ((error = sfxge_tx_init(sc)) != 0) goto fail14; sc->init_state = SFXGE_INITIALIZED; DBGPRINT(sc->dev, "success"); return (0); fail14: sfxge_rx_fini(sc); fail13: sfxge_port_fini(sc); fail12: sfxge_ev_fini(sc); fail11: sfxge_intr_fini(sc); fail9: efx_nic_fini(sc->enp); fail8: efx_vpd_fini(enp); fail7: efx_nvram_fini(enp); fail6: fail_tx_ring_entries: fail_rx_ring_entries: efx_nic_unprobe(enp); fail5: sfxge_mcdi_fini(sc); fail4: sc->enp = NULL; efx_nic_destroy(enp); SFXGE_EFSYS_LOCK_DESTROY(&sc->enp_lock); fail3: sfxge_bar_fini(sc); (void) pci_disable_busmaster(sc->dev); fail: DBGPRINT(sc->dev, "failed %d", error); sc->dev = NULL; SFXGE_ADAPTER_LOCK_DESTROY(sc); return (error); } static void sfxge_destroy(struct sfxge_softc *sc) { efx_nic_t *enp; /* Clean up transmit state. */ sfxge_tx_fini(sc); /* Clean up receive state. */ sfxge_rx_fini(sc); /* Clean up port state. */ sfxge_port_fini(sc); /* Clean up event processing state. */ sfxge_ev_fini(sc); /* Clean up interrupts. */ sfxge_intr_fini(sc); /* Tear down common code subsystems. */ efx_nic_reset(sc->enp); efx_vpd_fini(sc->enp); efx_nvram_fini(sc->enp); efx_nic_unprobe(sc->enp); /* Tear down MCDI. */ sfxge_mcdi_fini(sc); /* Destroy common code context. */ enp = sc->enp; sc->enp = NULL; efx_nic_destroy(enp); /* Free DMA memory. */ sfxge_dma_fini(sc); /* Free mapped BARs. */ sfxge_bar_fini(sc); (void) pci_disable_busmaster(sc->dev); taskqueue_drain(taskqueue_thread, &sc->task_reset); /* Destroy the softc lock. */ SFXGE_ADAPTER_LOCK_DESTROY(sc); } static int sfxge_vpd_handler(SYSCTL_HANDLER_ARGS) { struct sfxge_softc *sc = arg1; efx_vpd_value_t value; int rc; value.evv_tag = arg2 >> 16; value.evv_keyword = arg2 & 0xffff; if ((rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value)) != 0) return (rc); return (SYSCTL_OUT(req, value.evv_value, value.evv_length)); } static void sfxge_vpd_try_add(struct sfxge_softc *sc, struct sysctl_oid_list *list, efx_vpd_tag_t tag, const char *keyword) { struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev); efx_vpd_value_t value; /* Check whether VPD tag/keyword is present */ value.evv_tag = tag; value.evv_keyword = EFX_VPD_KEYWORD(keyword[0], keyword[1]); if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) != 0) return; SYSCTL_ADD_PROC( ctx, list, OID_AUTO, keyword, CTLTYPE_STRING|CTLFLAG_RD, sc, tag << 16 | EFX_VPD_KEYWORD(keyword[0], keyword[1]), sfxge_vpd_handler, "A", ""); } static int sfxge_vpd_init(struct sfxge_softc *sc) { struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev); struct sysctl_oid *vpd_node; struct sysctl_oid_list *vpd_list; char keyword[3]; efx_vpd_value_t value; int rc; if ((rc = efx_vpd_size(sc->enp, &sc->vpd_size)) != 0) { /* * Unpriviledged functions deny VPD access. * Simply skip VPD in this case. */ if (rc == EACCES) goto done; goto fail; } sc->vpd_data = malloc(sc->vpd_size, M_SFXGE, M_WAITOK); if ((rc = efx_vpd_read(sc->enp, sc->vpd_data, sc->vpd_size)) != 0) goto fail2; /* Copy ID (product name) into device description, and log it. */ value.evv_tag = EFX_VPD_ID; if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) == 0) { value.evv_value[value.evv_length] = 0; device_set_desc_copy(sc->dev, value.evv_value); device_printf(sc->dev, "%s\n", value.evv_value); } vpd_node = SYSCTL_ADD_NODE( ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)), OID_AUTO, "vpd", CTLFLAG_RD, NULL, "Vital Product Data"); vpd_list = SYSCTL_CHILDREN(vpd_node); /* Add sysctls for all expected and any vendor-defined keywords. */ sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "PN"); sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "EC"); sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "SN"); keyword[0] = 'V'; keyword[2] = 0; for (keyword[1] = '0'; keyword[1] <= '9'; keyword[1]++) sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword); for (keyword[1] = 'A'; keyword[1] <= 'Z'; keyword[1]++) sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword); done: return (0); fail2: free(sc->vpd_data, M_SFXGE); fail: return (rc); } static void sfxge_vpd_fini(struct sfxge_softc *sc) { free(sc->vpd_data, M_SFXGE); } static void sfxge_reset(void *arg, int npending) { struct sfxge_softc *sc; int rc; unsigned attempt; (void)npending; sc = (struct sfxge_softc *)arg; SFXGE_ADAPTER_LOCK(sc); if (sc->init_state != SFXGE_STARTED) goto done; sfxge_stop(sc); efx_nic_reset(sc->enp); for (attempt = 0; attempt < sfxge_restart_attempts; ++attempt) { if ((rc = sfxge_start(sc)) == 0) goto done; device_printf(sc->dev, "start on reset failed (%d)\n", rc); DELAY(100000); } device_printf(sc->dev, "reset failed; interface is now stopped\n"); done: SFXGE_ADAPTER_UNLOCK(sc); } void sfxge_schedule_reset(struct sfxge_softc *sc) { taskqueue_enqueue(taskqueue_thread, &sc->task_reset); } static int sfxge_attach(device_t dev) { struct sfxge_softc *sc; struct ifnet *ifp; int error; sc = device_get_softc(dev); sc->dev = dev; /* Allocate ifnet. */ ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "Couldn't allocate ifnet\n"); error = ENOMEM; goto fail; } sc->ifnet = ifp; /* Initialize hardware. */ DBGPRINT(sc->dev, "create nic"); if ((error = sfxge_create(sc)) != 0) goto fail2; /* Create the ifnet for the port. */ DBGPRINT(sc->dev, "init ifnet"); if ((error = sfxge_ifnet_init(ifp, sc)) != 0) goto fail3; DBGPRINT(sc->dev, "init vpd"); if ((error = sfxge_vpd_init(sc)) != 0) goto fail4; /* * NIC is initialized inside sfxge_create() and kept inialized * to be able to initialize port to discover media types in * sfxge_ifnet_init(). */ efx_nic_fini(sc->enp); sc->init_state = SFXGE_REGISTERED; DBGPRINT(sc->dev, "success"); return (0); fail4: sfxge_ifnet_fini(ifp); fail3: efx_nic_fini(sc->enp); sfxge_destroy(sc); fail2: if_free(sc->ifnet); fail: DBGPRINT(sc->dev, "failed %d", error); return (error); } static int sfxge_detach(device_t dev) { struct sfxge_softc *sc; sc = device_get_softc(dev); sfxge_vpd_fini(sc); /* Destroy the ifnet. */ sfxge_ifnet_fini(sc->ifnet); /* Tear down hardware. */ sfxge_destroy(sc); return (0); } static int sfxge_probe(device_t dev) { uint16_t pci_vendor_id; uint16_t pci_device_id; efx_family_t family; int rc; pci_vendor_id = pci_get_vendor(dev); pci_device_id = pci_get_device(dev); DBGPRINT(dev, "PCI ID %04x:%04x", pci_vendor_id, pci_device_id); rc = efx_family(pci_vendor_id, pci_device_id, &family); if (rc != 0) { DBGPRINT(dev, "efx_family fail %d", rc); return (ENXIO); } if (family == EFX_FAMILY_SIENA) { device_set_desc(dev, "Solarflare SFC9000 family"); return (0); } if (family == EFX_FAMILY_HUNTINGTON) { device_set_desc(dev, "Solarflare SFC9100 family"); return (0); } if (family == EFX_FAMILY_MEDFORD) { device_set_desc(dev, "Solarflare SFC9200 family"); return (0); } DBGPRINT(dev, "impossible controller family %d", family); return (ENXIO); } static device_method_t sfxge_methods[] = { DEVMETHOD(device_probe, sfxge_probe), DEVMETHOD(device_attach, sfxge_attach), DEVMETHOD(device_detach, sfxge_detach), DEVMETHOD_END }; static devclass_t sfxge_devclass; static driver_t sfxge_driver = { "sfxge", sfxge_methods, sizeof(struct sfxge_softc) }; DRIVER_MODULE(sfxge, pci, sfxge_driver, sfxge_devclass, 0, 0); Index: head/sys/dev/vxge/vxge.c =================================================================== --- head/sys/dev/vxge/vxge.c (revision 331796) +++ head/sys/dev/vxge/vxge.c (revision 331797) @@ -1,4198 +1,4203 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright(c) 2002-2011 Exar Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification are permitted provided the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Exar Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*$FreeBSD$*/ #include static int vxge_pci_bd_no = -1; static u32 vxge_drv_copyright = 0; static u32 vxge_dev_ref_count = 0; static u32 vxge_dev_req_reboot = 0; static int vpath_selector[VXGE_HAL_MAX_VIRTUAL_PATHS] = \ {0, 1, 3, 3, 7, 7, 7, 7, 15, 15, 15, 15, 15, 15, 15, 15, 31}; /* * vxge_probe * Probes for x3100 devices */ int vxge_probe(device_t ndev) { int err = ENXIO; u16 pci_bd_no = 0; u16 pci_vendor_id = 0; u16 pci_device_id = 0; char adapter_name[64]; pci_vendor_id = pci_get_vendor(ndev); if (pci_vendor_id != VXGE_PCI_VENDOR_ID) goto _exit0; pci_device_id = pci_get_device(ndev); if (pci_device_id == VXGE_PCI_DEVICE_ID_TITAN_1) { pci_bd_no = (pci_get_bus(ndev) | pci_get_slot(ndev)); snprintf(adapter_name, sizeof(adapter_name), VXGE_ADAPTER_NAME, pci_get_revid(ndev)); device_set_desc_copy(ndev, adapter_name); if (!vxge_drv_copyright) { device_printf(ndev, VXGE_COPYRIGHT); vxge_drv_copyright = 1; } if (vxge_dev_req_reboot == 0) { vxge_pci_bd_no = pci_bd_no; err = BUS_PROBE_DEFAULT; } else { if (pci_bd_no != vxge_pci_bd_no) { vxge_pci_bd_no = pci_bd_no; err = BUS_PROBE_DEFAULT; } } } _exit0: return (err); } /* * vxge_attach * Connects driver to the system if probe was success @ndev handle */ int vxge_attach(device_t ndev) { int err = 0; vxge_dev_t *vdev; vxge_hal_device_t *hldev = NULL; vxge_hal_device_attr_t device_attr; vxge_free_resources_e error_level = VXGE_FREE_NONE; vxge_hal_status_e status = VXGE_HAL_OK; /* Get per-ndev buffer */ vdev = (vxge_dev_t *) device_get_softc(ndev); if (!vdev) goto _exit0; bzero(vdev, sizeof(vxge_dev_t)); vdev->ndev = ndev; strlcpy(vdev->ndev_name, "vxge", sizeof(vdev->ndev_name)); err = vxge_driver_config(vdev); if (err != 0) goto _exit0; /* Initialize HAL driver */ status = vxge_driver_init(vdev); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "Failed to initialize driver\n"); goto _exit0; } /* Enable PCI bus-master */ pci_enable_busmaster(ndev); /* Allocate resources */ err = vxge_alloc_resources(vdev); if (err != 0) { device_printf(vdev->ndev, "resource allocation failed\n"); goto _exit0; } err = vxge_device_hw_info_get(vdev); if (err != 0) { error_level = VXGE_FREE_BAR2; goto _exit0; } /* Get firmware default values for Device Configuration */ vxge_hal_device_config_default_get(vdev->device_config); /* Customize Device Configuration based on User request */ vxge_vpath_config(vdev); /* Allocate ISR resources */ err = vxge_alloc_isr_resources(vdev); if (err != 0) { error_level = VXGE_FREE_ISR_RESOURCE; device_printf(vdev->ndev, "isr resource allocation failed\n"); goto _exit0; } /* HAL attributes */ device_attr.bar0 = (u8 *) vdev->pdev->bar_info[0]; device_attr.bar1 = (u8 *) vdev->pdev->bar_info[1]; device_attr.bar2 = (u8 *) vdev->pdev->bar_info[2]; device_attr.regh0 = (vxge_bus_res_t *) vdev->pdev->reg_map[0]; device_attr.regh1 = (vxge_bus_res_t *) vdev->pdev->reg_map[1]; device_attr.regh2 = (vxge_bus_res_t *) vdev->pdev->reg_map[2]; device_attr.irqh = (pci_irq_h) vdev->config.isr_info[0].irq_handle; device_attr.cfgh = vdev->pdev; device_attr.pdev = vdev->pdev; /* Initialize HAL Device */ status = vxge_hal_device_initialize((vxge_hal_device_h *) &hldev, &device_attr, vdev->device_config); if (status != VXGE_HAL_OK) { error_level = VXGE_FREE_ISR_RESOURCE; device_printf(vdev->ndev, "hal device initialization failed\n"); goto _exit0; } vdev->devh = hldev; vxge_hal_device_private_set(hldev, vdev); if (vdev->is_privilaged) { err = vxge_firmware_verify(vdev); if (err != 0) { vxge_dev_req_reboot = 1; error_level = VXGE_FREE_TERMINATE_DEVICE; goto _exit0; } } /* Allocate memory for vpath */ vdev->vpaths = (vxge_vpath_t *) vxge_mem_alloc(vdev->no_of_vpath * sizeof(vxge_vpath_t)); if (vdev->vpaths == NULL) { error_level = VXGE_FREE_TERMINATE_DEVICE; device_printf(vdev->ndev, "vpath memory allocation failed\n"); goto _exit0; } vdev->no_of_func = 1; if (vdev->is_privilaged) { vxge_hal_func_mode_count(vdev->devh, vdev->config.hw_info.function_mode, &vdev->no_of_func); vxge_bw_priority_config(vdev); } /* Initialize mutexes */ vxge_mutex_init(vdev); /* Initialize Media */ vxge_media_init(vdev); err = vxge_ifp_setup(ndev); if (err != 0) { error_level = VXGE_FREE_MEDIA; device_printf(vdev->ndev, "setting up interface failed\n"); goto _exit0; } err = vxge_isr_setup(vdev); if (err != 0) { error_level = VXGE_FREE_INTERFACE; device_printf(vdev->ndev, "failed to associate interrupt handler with device\n"); goto _exit0; } vxge_device_hw_info_print(vdev); vdev->is_active = TRUE; _exit0: if (error_level) { vxge_free_resources(ndev, error_level); err = ENXIO; } return (err); } /* * vxge_detach * Detaches driver from the Kernel subsystem */ int vxge_detach(device_t ndev) { vxge_dev_t *vdev; vdev = (vxge_dev_t *) device_get_softc(ndev); if (vdev->is_active) { vdev->is_active = FALSE; vxge_stop(vdev); vxge_free_resources(ndev, VXGE_FREE_ALL); } return (0); } /* * vxge_shutdown * To shutdown device before system shutdown */ int vxge_shutdown(device_t ndev) { vxge_dev_t *vdev = (vxge_dev_t *) device_get_softc(ndev); vxge_stop(vdev); return (0); } /* * vxge_init * Initialize the interface */ void vxge_init(void *vdev_ptr) { vxge_dev_t *vdev = (vxge_dev_t *) vdev_ptr; VXGE_DRV_LOCK(vdev); vxge_init_locked(vdev); VXGE_DRV_UNLOCK(vdev); } /* * vxge_init_locked * Initialize the interface */ void vxge_init_locked(vxge_dev_t *vdev) { int i, err = EINVAL; vxge_hal_device_t *hldev = vdev->devh; vxge_hal_status_e status = VXGE_HAL_OK; vxge_hal_vpath_h vpath_handle; ifnet_t ifp = vdev->ifp; /* If device is in running state, initializing is not required */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) goto _exit0; VXGE_DRV_LOCK_ASSERT(vdev); /* Opening vpaths */ err = vxge_vpath_open(vdev); if (err != 0) goto _exit1; if (vdev->config.rth_enable) { status = vxge_rth_config(vdev); if (status != VXGE_HAL_OK) goto _exit1; } for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; /* check initial mtu before enabling the device */ status = vxge_hal_device_mtu_check(vpath_handle, ifp->if_mtu); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "invalid mtu size %u specified\n", ifp->if_mtu); goto _exit1; } status = vxge_hal_vpath_mtu_set(vpath_handle, ifp->if_mtu); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "setting mtu in device failed\n"); goto _exit1; } } /* Enable HAL device */ status = vxge_hal_device_enable(hldev); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "failed to enable device\n"); goto _exit1; } if (vdev->config.intr_mode == VXGE_HAL_INTR_MODE_MSIX) vxge_msix_enable(vdev); /* Checksum capability */ ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_TSO; for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; /* Enabling mcast for all vpath */ vxge_hal_vpath_mcast_enable(vpath_handle); /* Enabling bcast for all vpath */ status = vxge_hal_vpath_bcast_enable(vpath_handle); if (status != VXGE_HAL_OK) device_printf(vdev->ndev, "can't enable bcast on vpath (%d)\n", i); } /* Enable interrupts */ vxge_hal_device_intr_enable(vdev->devh); for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; bzero(&(vdev->vpaths[i].driver_stats), sizeof(vxge_drv_stats_t)); status = vxge_hal_vpath_enable(vpath_handle); if (status != VXGE_HAL_OK) goto _exit2; } vxge_os_mdelay(1000); /* Device is initialized */ vdev->is_initialized = TRUE; /* Now inform the stack we're ready */ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; ifp->if_drv_flags |= IFF_DRV_RUNNING; goto _exit0; _exit2: vxge_hal_device_intr_disable(vdev->devh); vxge_hal_device_disable(hldev); _exit1: vxge_vpath_close(vdev); _exit0: return; } /* * vxge_driver_init * Initializes HAL driver */ vxge_hal_status_e vxge_driver_init(vxge_dev_t *vdev) { vxge_hal_uld_cbs_t uld_callbacks; vxge_hal_driver_config_t driver_config; vxge_hal_status_e status = VXGE_HAL_OK; /* Initialize HAL driver */ if (!vxge_dev_ref_count) { bzero(&uld_callbacks, sizeof(vxge_hal_uld_cbs_t)); bzero(&driver_config, sizeof(vxge_hal_driver_config_t)); uld_callbacks.link_up = vxge_link_up; uld_callbacks.link_down = vxge_link_down; uld_callbacks.crit_err = vxge_crit_error; uld_callbacks.sched_timer = NULL; uld_callbacks.xpak_alarm_log = NULL; status = vxge_hal_driver_initialize(&driver_config, &uld_callbacks); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "failed to initialize driver\n"); goto _exit0; } } vxge_hal_driver_debug_set(VXGE_TRACE); vxge_dev_ref_count++; _exit0: return (status); } /* * vxge_driver_config */ int vxge_driver_config(vxge_dev_t *vdev) { int i, err = 0; char temp_buffer[30]; vxge_bw_info_t bw_info; VXGE_GET_PARAM("hint.vxge.0.no_of_vpath", vdev->config, no_of_vpath, VXGE_DEFAULT_USER_HARDCODED); if (vdev->config.no_of_vpath == VXGE_DEFAULT_USER_HARDCODED) vdev->config.no_of_vpath = mp_ncpus; if (vdev->config.no_of_vpath <= 0) { err = EINVAL; device_printf(vdev->ndev, "Failed to load driver, \ invalid config : \'no_of_vpath\'\n"); goto _exit0; } VXGE_GET_PARAM("hint.vxge.0.intr_coalesce", vdev->config, intr_coalesce, VXGE_DEFAULT_CONFIG_DISABLE); VXGE_GET_PARAM("hint.vxge.0.rth_enable", vdev->config, rth_enable, VXGE_DEFAULT_CONFIG_ENABLE); VXGE_GET_PARAM("hint.vxge.0.rth_bkt_sz", vdev->config, rth_bkt_sz, VXGE_DEFAULT_RTH_BUCKET_SIZE); VXGE_GET_PARAM("hint.vxge.0.lro_enable", vdev->config, lro_enable, VXGE_DEFAULT_CONFIG_ENABLE); VXGE_GET_PARAM("hint.vxge.0.tso_enable", vdev->config, tso_enable, VXGE_DEFAULT_CONFIG_ENABLE); VXGE_GET_PARAM("hint.vxge.0.tx_steering", vdev->config, tx_steering, VXGE_DEFAULT_CONFIG_DISABLE); VXGE_GET_PARAM("hint.vxge.0.msix_enable", vdev->config, intr_mode, VXGE_HAL_INTR_MODE_MSIX); VXGE_GET_PARAM("hint.vxge.0.ifqmaxlen", vdev->config, ifq_maxlen, VXGE_DEFAULT_CONFIG_IFQ_MAXLEN); VXGE_GET_PARAM("hint.vxge.0.port_mode", vdev->config, port_mode, VXGE_DEFAULT_CONFIG_VALUE); if (vdev->config.port_mode == VXGE_DEFAULT_USER_HARDCODED) vdev->config.port_mode = VXGE_DEFAULT_CONFIG_VALUE; VXGE_GET_PARAM("hint.vxge.0.l2_switch", vdev->config, l2_switch, VXGE_DEFAULT_CONFIG_VALUE); if (vdev->config.l2_switch == VXGE_DEFAULT_USER_HARDCODED) vdev->config.l2_switch = VXGE_DEFAULT_CONFIG_VALUE; VXGE_GET_PARAM("hint.vxge.0.fw_upgrade", vdev->config, fw_option, VXGE_FW_UPGRADE_ALL); VXGE_GET_PARAM("hint.vxge.0.low_latency", vdev->config, low_latency, VXGE_DEFAULT_CONFIG_DISABLE); VXGE_GET_PARAM("hint.vxge.0.func_mode", vdev->config, function_mode, VXGE_DEFAULT_CONFIG_VALUE); if (vdev->config.function_mode == VXGE_DEFAULT_USER_HARDCODED) vdev->config.function_mode = VXGE_DEFAULT_CONFIG_VALUE; if (!(is_multi_func(vdev->config.function_mode) || is_single_func(vdev->config.function_mode))) vdev->config.function_mode = VXGE_DEFAULT_CONFIG_VALUE; for (i = 0; i < VXGE_HAL_MAX_FUNCTIONS; i++) { bw_info.func_id = i; sprintf(temp_buffer, "hint.vxge.0.bandwidth_%d", i); VXGE_GET_PARAM(temp_buffer, bw_info, bandwidth, VXGE_DEFAULT_USER_HARDCODED); if (bw_info.bandwidth == VXGE_DEFAULT_USER_HARDCODED) bw_info.bandwidth = VXGE_HAL_VPATH_BW_LIMIT_DEFAULT; sprintf(temp_buffer, "hint.vxge.0.priority_%d", i); VXGE_GET_PARAM(temp_buffer, bw_info, priority, VXGE_DEFAULT_USER_HARDCODED); if (bw_info.priority == VXGE_DEFAULT_USER_HARDCODED) bw_info.priority = VXGE_HAL_VPATH_PRIORITY_DEFAULT; vxge_os_memcpy(&vdev->config.bw_info[i], &bw_info, sizeof(vxge_bw_info_t)); } _exit0: return (err); } /* * vxge_stop */ void vxge_stop(vxge_dev_t *vdev) { VXGE_DRV_LOCK(vdev); vxge_stop_locked(vdev); VXGE_DRV_UNLOCK(vdev); } /* * vxge_stop_locked * Common code for both stop and part of reset. * disables device, interrupts and closes vpaths handle */ void vxge_stop_locked(vxge_dev_t *vdev) { u64 adapter_status = 0; vxge_hal_status_e status; vxge_hal_device_t *hldev = vdev->devh; ifnet_t ifp = vdev->ifp; VXGE_DRV_LOCK_ASSERT(vdev); /* If device is not in "Running" state, return */ if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; /* Set appropriate flags */ vdev->is_initialized = FALSE; hldev->link_state = VXGE_HAL_LINK_NONE; ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if_link_state_change(ifp, LINK_STATE_DOWN); /* Disable interrupts */ vxge_hal_device_intr_disable(hldev); /* Disable HAL device */ status = vxge_hal_device_disable(hldev); if (status != VXGE_HAL_OK) { vxge_hal_device_status(hldev, &adapter_status); device_printf(vdev->ndev, "adapter status: 0x%llx\n", adapter_status); } /* reset vpaths */ vxge_vpath_reset(vdev); vxge_os_mdelay(1000); /* Close Vpaths */ vxge_vpath_close(vdev); } void vxge_send(ifnet_t ifp) { vxge_vpath_t *vpath; vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; vpath = &(vdev->vpaths[0]); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if (VXGE_TX_TRYLOCK(vpath)) { vxge_send_locked(ifp, vpath); VXGE_TX_UNLOCK(vpath); } } } static inline void vxge_send_locked(ifnet_t ifp, vxge_vpath_t *vpath) { mbuf_t m_head = NULL; vxge_dev_t *vdev = vpath->vdev; VXGE_TX_LOCK_ASSERT(vpath); if ((!vdev->is_initialized) || ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING)) return; while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (vxge_xmit(ifp, vpath, &m_head)) { if (m_head == NULL) break; ifp->if_drv_flags |= IFF_DRV_OACTIVE; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); VXGE_DRV_STATS(vpath, tx_again); break; } /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); } } #if __FreeBSD_version >= 800000 int vxge_mq_send(ifnet_t ifp, mbuf_t m_head) { int i = 0, err = 0; vxge_vpath_t *vpath; vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; if (vdev->config.tx_steering) { i = vxge_vpath_get(vdev, m_head); } else if (M_HASHTYPE_GET(m_head) != M_HASHTYPE_NONE) { i = m_head->m_pkthdr.flowid % vdev->no_of_vpath; } vpath = &(vdev->vpaths[i]); if (VXGE_TX_TRYLOCK(vpath)) { err = vxge_mq_send_locked(ifp, vpath, m_head); VXGE_TX_UNLOCK(vpath); } else err = drbr_enqueue(ifp, vpath->br, m_head); return (err); } static inline int vxge_mq_send_locked(ifnet_t ifp, vxge_vpath_t *vpath, mbuf_t m_head) { int err = 0; mbuf_t next = NULL; vxge_dev_t *vdev = vpath->vdev; VXGE_TX_LOCK_ASSERT(vpath); if ((!vdev->is_initialized) || ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING)) { err = drbr_enqueue(ifp, vpath->br, m_head); goto _exit0; } if (m_head == NULL) { next = drbr_dequeue(ifp, vpath->br); } else if (drbr_needs_enqueue(ifp, vpath->br)) { if ((err = drbr_enqueue(ifp, vpath->br, m_head)) != 0) goto _exit0; next = drbr_dequeue(ifp, vpath->br); } else next = m_head; /* Process the queue */ while (next != NULL) { if ((err = vxge_xmit(ifp, vpath, &next)) != 0) { if (next == NULL) break; ifp->if_drv_flags |= IFF_DRV_OACTIVE; err = drbr_enqueue(ifp, vpath->br, next); VXGE_DRV_STATS(vpath, tx_again); break; } if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len); if (next->m_flags & M_MCAST) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, next); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; next = drbr_dequeue(ifp, vpath->br); } _exit0: return (err); } void vxge_mq_qflush(ifnet_t ifp) { int i; mbuf_t m_head; vxge_vpath_t *vpath; vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); if (!vpath->handle) continue; VXGE_TX_LOCK(vpath); while ((m_head = buf_ring_dequeue_sc(vpath->br)) != NULL) vxge_free_packet(m_head); VXGE_TX_UNLOCK(vpath); } if_qflush(ifp); } #endif static inline int vxge_xmit(ifnet_t ifp, vxge_vpath_t *vpath, mbuf_t *m_headp) { int err, num_segs = 0; u32 txdl_avail, dma_index, tagged = 0; dma_addr_t dma_addr; bus_size_t dma_sizes; void *dtr_priv; vxge_txdl_priv_t *txdl_priv; vxge_hal_txdl_h txdlh; vxge_hal_status_e status; vxge_dev_t *vdev = vpath->vdev; VXGE_DRV_STATS(vpath, tx_xmit); txdl_avail = vxge_hal_fifo_free_txdl_count_get(vpath->handle); if (txdl_avail < VXGE_TX_LOW_THRESHOLD) { VXGE_DRV_STATS(vpath, tx_low_dtr_cnt); err = ENOBUFS; goto _exit0; } /* Reserve descriptors */ status = vxge_hal_fifo_txdl_reserve(vpath->handle, &txdlh, &dtr_priv); if (status != VXGE_HAL_OK) { VXGE_DRV_STATS(vpath, tx_reserve_failed); err = ENOBUFS; goto _exit0; } /* Update Tx private structure for this descriptor */ txdl_priv = (vxge_txdl_priv_t *) dtr_priv; /* * Map the packet for DMA. * Returns number of segments through num_segs. */ err = vxge_dma_mbuf_coalesce(vpath->dma_tag_tx, txdl_priv->dma_map, m_headp, txdl_priv->dma_buffers, &num_segs); if (vpath->driver_stats.tx_max_frags < num_segs) vpath->driver_stats.tx_max_frags = num_segs; if (err == ENOMEM) { VXGE_DRV_STATS(vpath, tx_no_dma_setup); vxge_hal_fifo_txdl_free(vpath->handle, txdlh); goto _exit0; } else if (err != 0) { vxge_free_packet(*m_headp); VXGE_DRV_STATS(vpath, tx_no_dma_setup); vxge_hal_fifo_txdl_free(vpath->handle, txdlh); goto _exit0; } txdl_priv->mbuf_pkt = *m_headp; /* Set VLAN tag in descriptor only if this packet has it */ if ((*m_headp)->m_flags & M_VLANTAG) vxge_hal_fifo_txdl_vlan_set(txdlh, (*m_headp)->m_pkthdr.ether_vtag); /* Set descriptor buffer for header and each fragment/segment */ for (dma_index = 0; dma_index < num_segs; dma_index++) { dma_sizes = txdl_priv->dma_buffers[dma_index].ds_len; dma_addr = htole64(txdl_priv->dma_buffers[dma_index].ds_addr); vxge_hal_fifo_txdl_buffer_set(vpath->handle, txdlh, dma_index, dma_addr, dma_sizes); } /* Pre-write Sync of mapping */ bus_dmamap_sync(vpath->dma_tag_tx, txdl_priv->dma_map, BUS_DMASYNC_PREWRITE); if ((*m_headp)->m_pkthdr.csum_flags & CSUM_TSO) { if ((*m_headp)->m_pkthdr.tso_segsz) { VXGE_DRV_STATS(vpath, tx_tso); vxge_hal_fifo_txdl_lso_set(txdlh, VXGE_HAL_FIFO_LSO_FRM_ENCAP_AUTO, (*m_headp)->m_pkthdr.tso_segsz); } } /* Checksum */ if (ifp->if_hwassist > 0) { vxge_hal_fifo_txdl_cksum_set_bits(txdlh, VXGE_HAL_FIFO_TXD_TX_CKO_IPV4_EN | VXGE_HAL_FIFO_TXD_TX_CKO_TCP_EN | VXGE_HAL_FIFO_TXD_TX_CKO_UDP_EN); } if ((vxge_hal_device_check_id(vdev->devh) == VXGE_HAL_CARD_TITAN_1A) && (vdev->hw_fw_version >= VXGE_FW_VERSION(1, 8, 0))) tagged = 1; vxge_hal_fifo_txdl_post(vpath->handle, txdlh, tagged); VXGE_DRV_STATS(vpath, tx_posted); _exit0: return (err); } /* * vxge_tx_replenish * Allocate buffers and set them into descriptors for later use */ /* ARGSUSED */ vxge_hal_status_e vxge_tx_replenish(vxge_hal_vpath_h vpath_handle, vxge_hal_txdl_h txdlh, void *dtr_priv, u32 dtr_index, void *userdata, vxge_hal_reopen_e reopen) { int err = 0; vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_txdl_priv_t *txdl_priv = (vxge_txdl_priv_t *) dtr_priv; err = bus_dmamap_create(vpath->dma_tag_tx, BUS_DMA_NOWAIT, &txdl_priv->dma_map); return ((err == 0) ? VXGE_HAL_OK : VXGE_HAL_FAIL); } /* * vxge_tx_compl * If the interrupt is due to Tx completion, free the sent buffer */ vxge_hal_status_e vxge_tx_compl(vxge_hal_vpath_h vpath_handle, vxge_hal_txdl_h txdlh, void *dtr_priv, vxge_hal_fifo_tcode_e t_code, void *userdata) { vxge_hal_status_e status = VXGE_HAL_OK; vxge_txdl_priv_t *txdl_priv; vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_dev_t *vdev = vpath->vdev; ifnet_t ifp = vdev->ifp; VXGE_TX_LOCK(vpath); /* * For each completed descriptor * Get private structure, free buffer, do unmapping, and free descriptor */ do { VXGE_DRV_STATS(vpath, tx_compl); if (t_code != VXGE_HAL_FIFO_T_CODE_OK) { device_printf(vdev->ndev, "tx transfer code %d\n", t_code); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); VXGE_DRV_STATS(vpath, tx_tcode); vxge_hal_fifo_handle_tcode(vpath_handle, txdlh, t_code); } if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); txdl_priv = (vxge_txdl_priv_t *) dtr_priv; bus_dmamap_unload(vpath->dma_tag_tx, txdl_priv->dma_map); vxge_free_packet(txdl_priv->mbuf_pkt); vxge_hal_fifo_txdl_free(vpath->handle, txdlh); } while (vxge_hal_fifo_txdl_next_completed(vpath_handle, &txdlh, &dtr_priv, &t_code) == VXGE_HAL_OK); ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; VXGE_TX_UNLOCK(vpath); return (status); } /* ARGSUSED */ void vxge_tx_term(vxge_hal_vpath_h vpath_handle, vxge_hal_txdl_h txdlh, void *dtr_priv, vxge_hal_txdl_state_e state, void *userdata, vxge_hal_reopen_e reopen) { vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_txdl_priv_t *txdl_priv = (vxge_txdl_priv_t *) dtr_priv; if (state != VXGE_HAL_TXDL_STATE_POSTED) return; if (txdl_priv != NULL) { bus_dmamap_sync(vpath->dma_tag_tx, txdl_priv->dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(vpath->dma_tag_tx, txdl_priv->dma_map); bus_dmamap_destroy(vpath->dma_tag_tx, txdl_priv->dma_map); vxge_free_packet(txdl_priv->mbuf_pkt); } /* Free the descriptor */ vxge_hal_fifo_txdl_free(vpath->handle, txdlh); } /* * vxge_rx_replenish * Allocate buffers and set them into descriptors for later use */ /* ARGSUSED */ vxge_hal_status_e vxge_rx_replenish(vxge_hal_vpath_h vpath_handle, vxge_hal_rxd_h rxdh, void *dtr_priv, u32 dtr_index, void *userdata, vxge_hal_reopen_e reopen) { int err = 0; vxge_hal_status_e status = VXGE_HAL_OK; vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_rxd_priv_t *rxd_priv = (vxge_rxd_priv_t *) dtr_priv; /* Create DMA map for these descriptors */ err = bus_dmamap_create(vpath->dma_tag_rx, BUS_DMA_NOWAIT, &rxd_priv->dma_map); if (err == 0) { if (vxge_rx_rxd_1b_set(vpath, rxdh, dtr_priv)) { bus_dmamap_destroy(vpath->dma_tag_rx, rxd_priv->dma_map); status = VXGE_HAL_FAIL; } } return (status); } /* * vxge_rx_compl */ vxge_hal_status_e vxge_rx_compl(vxge_hal_vpath_h vpath_handle, vxge_hal_rxd_h rxdh, void *dtr_priv, u8 t_code, void *userdata) { mbuf_t mbuf_up; vxge_rxd_priv_t *rxd_priv; vxge_hal_ring_rxd_info_t ext_info; vxge_hal_status_e status = VXGE_HAL_OK; vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_dev_t *vdev = vpath->vdev; struct lro_ctrl *lro = &vpath->lro; /* get the interface pointer */ ifnet_t ifp = vdev->ifp; do { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { vxge_hal_ring_rxd_post(vpath_handle, rxdh); status = VXGE_HAL_FAIL; break; } VXGE_DRV_STATS(vpath, rx_compl); rxd_priv = (vxge_rxd_priv_t *) dtr_priv; /* Gets details of mbuf i.e., packet length */ vxge_rx_rxd_1b_get(vpath, rxdh, dtr_priv); /* * Prepare one buffer to send it to upper layer Since upper * layer frees the buffer do not use rxd_priv->mbuf_pkt. * Meanwhile prepare a new buffer, do mapping, use with the * current descriptor and post descriptor back to ring vpath */ mbuf_up = rxd_priv->mbuf_pkt; if (t_code != VXGE_HAL_RING_RXD_T_CODE_OK) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); VXGE_DRV_STATS(vpath, rx_tcode); status = vxge_hal_ring_handle_tcode(vpath_handle, rxdh, t_code); /* * If transfer code is not for unknown protocols and * vxge_hal_device_handle_tcode is NOT returned * VXGE_HAL_OK * drop this packet and increment rx_tcode stats */ if ((status != VXGE_HAL_OK) && (t_code != VXGE_HAL_RING_T_CODE_L3_PKT_ERR)) { vxge_free_packet(mbuf_up); vxge_hal_ring_rxd_post(vpath_handle, rxdh); continue; } } if (vxge_rx_rxd_1b_set(vpath, rxdh, dtr_priv)) { /* * If unable to allocate buffer, post descriptor back * to vpath for future processing of same packet. */ vxge_hal_ring_rxd_post(vpath_handle, rxdh); continue; } /* Get the extended information */ vxge_hal_ring_rxd_1b_info_get(vpath_handle, rxdh, &ext_info); /* post descriptor with newly allocated mbuf back to vpath */ vxge_hal_ring_rxd_post(vpath_handle, rxdh); vpath->rxd_posted++; if (vpath->rxd_posted % VXGE_RXD_REPLENISH_COUNT == 0) vxge_hal_ring_rxd_post_post_db(vpath_handle); /* * Set successfully computed checksums in the mbuf. * Leave the rest to the stack to be reverified. */ vxge_rx_checksum(ext_info, mbuf_up); #if __FreeBSD_version >= 800000 M_HASHTYPE_SET(mbuf_up, M_HASHTYPE_OPAQUE); mbuf_up->m_pkthdr.flowid = vpath->vp_index; #endif /* Post-Read sync for buffers */ bus_dmamap_sync(vpath->dma_tag_rx, rxd_priv->dma_map, BUS_DMASYNC_POSTREAD); vxge_rx_input(ifp, mbuf_up, vpath); } while (vxge_hal_ring_rxd_next_completed(vpath_handle, &rxdh, &dtr_priv, &t_code) == VXGE_HAL_OK); /* Flush any outstanding LRO work */ if (vpath->lro_enable && vpath->lro.lro_cnt) tcp_lro_flush_all(lro); return (status); } static inline void vxge_rx_input(ifnet_t ifp, mbuf_t mbuf_up, vxge_vpath_t *vpath) { if (vpath->lro_enable && vpath->lro.lro_cnt) { if (tcp_lro_rx(&vpath->lro, mbuf_up, 0) == 0) return; } (*ifp->if_input) (ifp, mbuf_up); } static inline void vxge_rx_checksum(vxge_hal_ring_rxd_info_t ext_info, mbuf_t mbuf_up) { if (!(ext_info.proto & VXGE_HAL_FRAME_PROTO_IP_FRAG) && (ext_info.proto & VXGE_HAL_FRAME_PROTO_TCP_OR_UDP) && ext_info.l3_cksum_valid && ext_info.l4_cksum_valid) { mbuf_up->m_pkthdr.csum_data = htons(0xffff); mbuf_up->m_pkthdr.csum_flags = CSUM_IP_CHECKED; mbuf_up->m_pkthdr.csum_flags |= CSUM_IP_VALID; mbuf_up->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); } else { if (ext_info.vlan) { mbuf_up->m_pkthdr.ether_vtag = ext_info.vlan; mbuf_up->m_flags |= M_VLANTAG; } } } /* * vxge_rx_term During unload terminate and free all descriptors * @vpath_handle Rx vpath Handle @rxdh Rx Descriptor Handle @state Descriptor * State @userdata Per-adapter Data @reopen vpath open/reopen option */ /* ARGSUSED */ void vxge_rx_term(vxge_hal_vpath_h vpath_handle, vxge_hal_rxd_h rxdh, void *dtr_priv, vxge_hal_rxd_state_e state, void *userdata, vxge_hal_reopen_e reopen) { vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_rxd_priv_t *rxd_priv = (vxge_rxd_priv_t *) dtr_priv; if (state != VXGE_HAL_RXD_STATE_POSTED) return; if (rxd_priv != NULL) { bus_dmamap_sync(vpath->dma_tag_rx, rxd_priv->dma_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(vpath->dma_tag_rx, rxd_priv->dma_map); bus_dmamap_destroy(vpath->dma_tag_rx, rxd_priv->dma_map); vxge_free_packet(rxd_priv->mbuf_pkt); } /* Free the descriptor */ vxge_hal_ring_rxd_free(vpath_handle, rxdh); } /* * vxge_rx_rxd_1b_get * Get descriptors of packet to send up */ void vxge_rx_rxd_1b_get(vxge_vpath_t *vpath, vxge_hal_rxd_h rxdh, void *dtr_priv) { vxge_rxd_priv_t *rxd_priv = (vxge_rxd_priv_t *) dtr_priv; mbuf_t mbuf_up = rxd_priv->mbuf_pkt; /* Retrieve data from completed descriptor */ vxge_hal_ring_rxd_1b_get(vpath->handle, rxdh, &rxd_priv->dma_addr[0], (u32 *) &rxd_priv->dma_sizes[0]); /* Update newly created buffer to be sent up with packet length */ mbuf_up->m_len = rxd_priv->dma_sizes[0]; mbuf_up->m_pkthdr.len = rxd_priv->dma_sizes[0]; mbuf_up->m_next = NULL; } /* * vxge_rx_rxd_1b_set * Allocates new mbufs to be placed into descriptors */ int vxge_rx_rxd_1b_set(vxge_vpath_t *vpath, vxge_hal_rxd_h rxdh, void *dtr_priv) { int num_segs, err = 0; mbuf_t mbuf_pkt; bus_dmamap_t dma_map; bus_dma_segment_t dma_buffers[1]; vxge_rxd_priv_t *rxd_priv = (vxge_rxd_priv_t *) dtr_priv; vxge_dev_t *vdev = vpath->vdev; mbuf_pkt = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, vdev->rx_mbuf_sz); if (!mbuf_pkt) { err = ENOBUFS; VXGE_DRV_STATS(vpath, rx_no_buf); device_printf(vdev->ndev, "out of memory to allocate mbuf\n"); goto _exit0; } /* Update mbuf's length, packet length and receive interface */ mbuf_pkt->m_len = vdev->rx_mbuf_sz; mbuf_pkt->m_pkthdr.len = vdev->rx_mbuf_sz; mbuf_pkt->m_pkthdr.rcvif = vdev->ifp; /* Load DMA map */ err = vxge_dma_mbuf_coalesce(vpath->dma_tag_rx, vpath->extra_dma_map, &mbuf_pkt, dma_buffers, &num_segs); if (err != 0) { VXGE_DRV_STATS(vpath, rx_map_fail); vxge_free_packet(mbuf_pkt); goto _exit0; } /* Unload DMA map of mbuf in current descriptor */ bus_dmamap_sync(vpath->dma_tag_rx, rxd_priv->dma_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(vpath->dma_tag_rx, rxd_priv->dma_map); /* Update descriptor private data */ dma_map = rxd_priv->dma_map; rxd_priv->mbuf_pkt = mbuf_pkt; rxd_priv->dma_addr[0] = htole64(dma_buffers->ds_addr); rxd_priv->dma_map = vpath->extra_dma_map; vpath->extra_dma_map = dma_map; /* Pre-Read/Write sync */ bus_dmamap_sync(vpath->dma_tag_rx, rxd_priv->dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Set descriptor buffer */ vxge_hal_ring_rxd_1b_set(rxdh, rxd_priv->dma_addr[0], vdev->rx_mbuf_sz); _exit0: return (err); } /* * vxge_link_up * Callback for Link-up indication from HAL */ /* ARGSUSED */ void vxge_link_up(vxge_hal_device_h devh, void *userdata) { int i; vxge_vpath_t *vpath; vxge_hal_device_hw_info_t *hw_info; vxge_dev_t *vdev = (vxge_dev_t *) userdata; hw_info = &vdev->config.hw_info; ifnet_t ifp = vdev->ifp; if (vdev->config.intr_mode == VXGE_HAL_INTR_MODE_MSIX) { for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); vxge_hal_vpath_tti_ci_set(vpath->handle); vxge_hal_vpath_rti_ci_set(vpath->handle); } } if (vdev->is_privilaged && (hw_info->ports > 1)) { vxge_active_port_update(vdev); device_printf(vdev->ndev, "Active Port : %lld\n", vdev->active_port); } ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if_link_state_change(ifp, LINK_STATE_UP); } /* * vxge_link_down * Callback for Link-down indication from HAL */ /* ARGSUSED */ void vxge_link_down(vxge_hal_device_h devh, void *userdata) { int i; vxge_vpath_t *vpath; vxge_dev_t *vdev = (vxge_dev_t *) userdata; ifnet_t ifp = vdev->ifp; if (vdev->config.intr_mode == VXGE_HAL_INTR_MODE_MSIX) { for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); vxge_hal_vpath_tti_ci_reset(vpath->handle); vxge_hal_vpath_rti_ci_reset(vpath->handle); } } ifp->if_drv_flags |= IFF_DRV_OACTIVE; if_link_state_change(ifp, LINK_STATE_DOWN); } /* * vxge_reset */ void vxge_reset(vxge_dev_t *vdev) { if (!vdev->is_initialized) return; VXGE_DRV_LOCK(vdev); vxge_stop_locked(vdev); vxge_init_locked(vdev); VXGE_DRV_UNLOCK(vdev); } /* * vxge_crit_error * Callback for Critical error indication from HAL */ /* ARGSUSED */ void vxge_crit_error(vxge_hal_device_h devh, void *userdata, vxge_hal_event_e type, u64 serr_data) { vxge_dev_t *vdev = (vxge_dev_t *) userdata; ifnet_t ifp = vdev->ifp; switch (type) { case VXGE_HAL_EVENT_SERR: case VXGE_HAL_EVENT_KDFCCTL: case VXGE_HAL_EVENT_CRITICAL: vxge_hal_device_intr_disable(vdev->devh); ifp->if_drv_flags |= IFF_DRV_OACTIVE; if_link_state_change(ifp, LINK_STATE_DOWN); break; default: break; } } /* * vxge_ifp_setup */ int vxge_ifp_setup(device_t ndev) { ifnet_t ifp; int i, j, err = 0; vxge_dev_t *vdev = (vxge_dev_t *) device_get_softc(ndev); for (i = 0, j = 0; i < VXGE_HAL_MAX_VIRTUAL_PATHS; i++) { if (!bVAL1(vdev->config.hw_info.vpath_mask, i)) continue; if (j >= vdev->no_of_vpath) break; vdev->vpaths[j].vp_id = i; vdev->vpaths[j].vp_index = j; vdev->vpaths[j].vdev = vdev; vdev->vpaths[j].is_configured = TRUE; vxge_os_memcpy((u8 *) vdev->vpaths[j].mac_addr, (u8 *) (vdev->config.hw_info.mac_addrs[i]), (size_t) ETHER_ADDR_LEN); j++; } /* Get interface ifnet structure for this Ether device */ ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(vdev->ndev, "memory allocation for ifnet failed\n"); err = ENXIO; goto _exit0; } vdev->ifp = ifp; /* Initialize interface ifnet structure */ if_initname(ifp, device_get_name(ndev), device_get_unit(ndev)); ifp->if_baudrate = VXGE_BAUDRATE; ifp->if_init = vxge_init; ifp->if_softc = vdev; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = vxge_ioctl; ifp->if_start = vxge_send; #if __FreeBSD_version >= 800000 ifp->if_transmit = vxge_mq_send; ifp->if_qflush = vxge_mq_qflush; #endif ifp->if_snd.ifq_drv_maxlen = max(vdev->config.ifq_maxlen, ifqmaxlen); IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen); /* IFQ_SET_READY(&ifp->if_snd); */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; ifp->if_capabilities |= IFCAP_JUMBO_MTU; if (vdev->config.tso_enable) vxge_tso_config(vdev); if (vdev->config.lro_enable) ifp->if_capabilities |= IFCAP_LRO; ifp->if_capenable = ifp->if_capabilities; strlcpy(vdev->ndev_name, device_get_nameunit(ndev), sizeof(vdev->ndev_name)); /* Attach the interface */ ether_ifattach(ifp, vdev->vpaths[0].mac_addr); _exit0: return (err); } /* * vxge_isr_setup * Register isr functions */ int vxge_isr_setup(vxge_dev_t *vdev) { int i, irq_rid, err = 0; vxge_vpath_t *vpath; void *isr_func_arg; void (*isr_func_ptr) (void *); switch (vdev->config.intr_mode) { case VXGE_HAL_INTR_MODE_IRQLINE: err = bus_setup_intr(vdev->ndev, vdev->config.isr_info[0].irq_res, (INTR_TYPE_NET | INTR_MPSAFE), vxge_isr_filter, vxge_isr_line, vdev, &vdev->config.isr_info[0].irq_handle); break; case VXGE_HAL_INTR_MODE_MSIX: for (i = 0; i < vdev->intr_count; i++) { irq_rid = vdev->config.isr_info[i].irq_rid; vpath = &vdev->vpaths[irq_rid / 4]; if ((irq_rid % 4) == 2) { isr_func_ptr = vxge_isr_msix; isr_func_arg = (void *) vpath; } else if ((irq_rid % 4) == 3) { isr_func_ptr = vxge_isr_msix_alarm; isr_func_arg = (void *) vpath; } else break; err = bus_setup_intr(vdev->ndev, vdev->config.isr_info[i].irq_res, (INTR_TYPE_NET | INTR_MPSAFE), NULL, (void *) isr_func_ptr, (void *) isr_func_arg, &vdev->config.isr_info[i].irq_handle); if (err != 0) break; } if (err != 0) { /* Teardown interrupt handler */ while (--i > 0) bus_teardown_intr(vdev->ndev, vdev->config.isr_info[i].irq_res, vdev->config.isr_info[i].irq_handle); } break; } return (err); } /* * vxge_isr_filter * ISR filter function - filter interrupts from other shared devices */ int vxge_isr_filter(void *handle) { u64 val64 = 0; vxge_dev_t *vdev = (vxge_dev_t *) handle; __hal_device_t *hldev = (__hal_device_t *) vdev->devh; vxge_hal_common_reg_t *common_reg = (vxge_hal_common_reg_t *) (hldev->common_reg); val64 = vxge_os_pio_mem_read64(vdev->pdev, (vdev->devh)->regh0, &common_reg->titan_general_int_status); return ((val64) ? FILTER_SCHEDULE_THREAD : FILTER_STRAY); } /* * vxge_isr_line * Interrupt service routine for Line interrupts */ void vxge_isr_line(void *vdev_ptr) { vxge_dev_t *vdev = (vxge_dev_t *) vdev_ptr; vxge_hal_device_handle_irq(vdev->devh, 0); } void vxge_isr_msix(void *vpath_ptr) { u32 got_rx = 0; u32 got_tx = 0; __hal_virtualpath_t *hal_vpath; vxge_vpath_t *vpath = (vxge_vpath_t *) vpath_ptr; vxge_dev_t *vdev = vpath->vdev; hal_vpath = ((__hal_vpath_handle_t *) vpath->handle)->vpath; VXGE_DRV_STATS(vpath, isr_msix); VXGE_HAL_DEVICE_STATS_SW_INFO_TRAFFIC_INTR(vdev->devh); vxge_hal_vpath_mf_msix_mask(vpath->handle, vpath->msix_vec); /* processing rx */ vxge_hal_vpath_poll_rx(vpath->handle, &got_rx); /* processing tx */ if (hal_vpath->vp_config->fifo.enable) { vxge_intr_coalesce_tx(vpath); vxge_hal_vpath_poll_tx(vpath->handle, &got_tx); } vxge_hal_vpath_mf_msix_unmask(vpath->handle, vpath->msix_vec); } void vxge_isr_msix_alarm(void *vpath_ptr) { int i; vxge_hal_status_e status = VXGE_HAL_OK; vxge_vpath_t *vpath = (vxge_vpath_t *) vpath_ptr; vxge_dev_t *vdev = vpath->vdev; VXGE_HAL_DEVICE_STATS_SW_INFO_NOT_TRAFFIC_INTR(vdev->devh); /* Process alarms in each vpath */ for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); vxge_hal_vpath_mf_msix_mask(vpath->handle, vpath->msix_vec_alarm); status = vxge_hal_vpath_alarm_process(vpath->handle, 0); if ((status == VXGE_HAL_ERR_EVENT_SLOT_FREEZE) || (status == VXGE_HAL_ERR_EVENT_SERR)) { device_printf(vdev->ndev, "processing alarms urecoverable error %x\n", status); /* Stop the driver */ vdev->is_initialized = FALSE; break; } vxge_hal_vpath_mf_msix_unmask(vpath->handle, vpath->msix_vec_alarm); } } /* * vxge_msix_enable */ vxge_hal_status_e vxge_msix_enable(vxge_dev_t *vdev) { int i, first_vp_id, msix_id; vxge_vpath_t *vpath; vxge_hal_status_e status = VXGE_HAL_OK; /* * Unmasking and Setting MSIX vectors before enabling interrupts * tim[] : 0 - Tx ## 1 - Rx ## 2 - UMQ-DMQ ## 0 - BITMAP */ int tim[4] = {0, 1, 0, 0}; for (i = 0; i < vdev->no_of_vpath; i++) { vpath = vdev->vpaths + i; first_vp_id = vdev->vpaths[0].vp_id; msix_id = vpath->vp_id * VXGE_HAL_VPATH_MSIX_ACTIVE; tim[1] = vpath->msix_vec = msix_id + 1; vpath->msix_vec_alarm = first_vp_id * VXGE_HAL_VPATH_MSIX_ACTIVE + VXGE_HAL_VPATH_MSIX_ALARM_ID; status = vxge_hal_vpath_mf_msix_set(vpath->handle, tim, VXGE_HAL_VPATH_MSIX_ALARM_ID); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "failed to set msix vectors to vpath\n"); break; } vxge_hal_vpath_mf_msix_unmask(vpath->handle, vpath->msix_vec); vxge_hal_vpath_mf_msix_unmask(vpath->handle, vpath->msix_vec_alarm); } return (status); } /* * vxge_media_init * Initializes, adds and sets media */ void vxge_media_init(vxge_dev_t *vdev) { ifmedia_init(&vdev->media, IFM_IMASK, vxge_media_change, vxge_media_status); /* Add supported media */ ifmedia_add(&vdev->media, IFM_ETHER | vdev->ifm_optics | IFM_FDX, 0, NULL); /* Set media */ ifmedia_add(&vdev->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&vdev->media, IFM_ETHER | IFM_AUTO); } /* * vxge_media_status * Callback for interface media settings */ void vxge_media_status(ifnet_t ifp, struct ifmediareq *ifmr) { vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; vxge_hal_device_t *hldev = vdev->devh; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; /* set link state */ if (vxge_hal_device_link_state_get(hldev) == VXGE_HAL_LINK_UP) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= vdev->ifm_optics | IFM_FDX; if_link_state_change(ifp, LINK_STATE_UP); } } /* * vxge_media_change * Media change driver callback */ int vxge_media_change(ifnet_t ifp) { vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; struct ifmedia *ifmediap = &vdev->media; return (IFM_TYPE(ifmediap->ifm_media) != IFM_ETHER ? EINVAL : 0); } /* * Allocate PCI resources */ int vxge_alloc_resources(vxge_dev_t *vdev) { int err = 0; vxge_pci_info_t *pci_info = NULL; vxge_free_resources_e error_level = VXGE_FREE_NONE; device_t ndev = vdev->ndev; /* Allocate Buffer for HAL Device Configuration */ vdev->device_config = (vxge_hal_device_config_t *) vxge_mem_alloc(sizeof(vxge_hal_device_config_t)); if (!vdev->device_config) { err = ENOMEM; error_level = VXGE_DISABLE_PCI_BUSMASTER; device_printf(vdev->ndev, "failed to allocate memory for device config\n"); goto _exit0; } pci_info = (vxge_pci_info_t *) vxge_mem_alloc(sizeof(vxge_pci_info_t)); if (!pci_info) { error_level = VXGE_FREE_DEVICE_CONFIG; err = ENOMEM; device_printf(vdev->ndev, "failed to allocate memory for pci info\n"); goto _exit0; } pci_info->ndev = ndev; vdev->pdev = pci_info; err = vxge_alloc_bar_resources(vdev, 0); if (err != 0) { error_level = VXGE_FREE_BAR0; goto _exit0; } err = vxge_alloc_bar_resources(vdev, 1); if (err != 0) { error_level = VXGE_FREE_BAR1; goto _exit0; } err = vxge_alloc_bar_resources(vdev, 2); if (err != 0) error_level = VXGE_FREE_BAR2; _exit0: if (error_level) vxge_free_resources(ndev, error_level); return (err); } /* * vxge_alloc_bar_resources * Allocates BAR resources */ int vxge_alloc_bar_resources(vxge_dev_t *vdev, int i) { int err = 0; int res_id = 0; vxge_pci_info_t *pci_info = vdev->pdev; res_id = PCIR_BAR((i == 0) ? 0 : (i * 2)); pci_info->bar_info[i] = bus_alloc_resource_any(vdev->ndev, SYS_RES_MEMORY, &res_id, RF_ACTIVE); if (pci_info->bar_info[i] == NULL) { device_printf(vdev->ndev, "failed to allocate memory for bus resources\n"); err = ENOMEM; goto _exit0; } pci_info->reg_map[i] = (vxge_bus_res_t *) vxge_mem_alloc(sizeof(vxge_bus_res_t)); if (pci_info->reg_map[i] == NULL) { device_printf(vdev->ndev, "failed to allocate memory bar resources\n"); err = ENOMEM; goto _exit0; } ((vxge_bus_res_t *) (pci_info->reg_map[i]))->bus_space_tag = rman_get_bustag(pci_info->bar_info[i]); ((vxge_bus_res_t *) (pci_info->reg_map[i]))->bus_space_handle = rman_get_bushandle(pci_info->bar_info[i]); ((vxge_bus_res_t *) (pci_info->reg_map[i]))->bar_start_addr = pci_info->bar_info[i]; ((vxge_bus_res_t *) (pci_info->reg_map[i]))->bus_res_len = rman_get_size(pci_info->bar_info[i]); _exit0: return (err); } /* * vxge_alloc_isr_resources */ int vxge_alloc_isr_resources(vxge_dev_t *vdev) { int i, err = 0, irq_rid; int msix_vec_reqd, intr_count, msix_count; int intr_mode = VXGE_HAL_INTR_MODE_IRQLINE; if (vdev->config.intr_mode == VXGE_HAL_INTR_MODE_MSIX) { /* MSI-X messages supported by device */ intr_count = pci_msix_count(vdev->ndev); if (intr_count) { msix_vec_reqd = 4 * vdev->no_of_vpath; if (intr_count >= msix_vec_reqd) { intr_count = msix_vec_reqd; err = pci_alloc_msix(vdev->ndev, &intr_count); if (err == 0) intr_mode = VXGE_HAL_INTR_MODE_MSIX; } if ((err != 0) || (intr_count < msix_vec_reqd)) { device_printf(vdev->ndev, "Unable to allocate " "msi/x vectors switching to INTA mode\n"); } } } err = 0; vdev->intr_count = 0; vdev->config.intr_mode = intr_mode; switch (vdev->config.intr_mode) { case VXGE_HAL_INTR_MODE_IRQLINE: vdev->config.isr_info[0].irq_rid = 0; vdev->config.isr_info[0].irq_res = bus_alloc_resource_any(vdev->ndev, SYS_RES_IRQ, &vdev->config.isr_info[0].irq_rid, (RF_SHAREABLE | RF_ACTIVE)); if (vdev->config.isr_info[0].irq_res == NULL) { device_printf(vdev->ndev, "failed to allocate line interrupt resource\n"); err = ENOMEM; goto _exit0; } vdev->intr_count++; break; case VXGE_HAL_INTR_MODE_MSIX: msix_count = 0; for (i = 0; i < vdev->no_of_vpath; i++) { irq_rid = i * 4; vdev->config.isr_info[msix_count].irq_rid = irq_rid + 2; vdev->config.isr_info[msix_count].irq_res = bus_alloc_resource_any(vdev->ndev, SYS_RES_IRQ, &vdev->config.isr_info[msix_count].irq_rid, (RF_SHAREABLE | RF_ACTIVE)); if (vdev->config.isr_info[msix_count].irq_res == NULL) { device_printf(vdev->ndev, "allocating bus resource (rid %d) failed\n", vdev->config.isr_info[msix_count].irq_rid); err = ENOMEM; goto _exit0; } vdev->intr_count++; err = bus_bind_intr(vdev->ndev, vdev->config.isr_info[msix_count].irq_res, (i % mp_ncpus)); if (err != 0) break; msix_count++; } vdev->config.isr_info[msix_count].irq_rid = 3; vdev->config.isr_info[msix_count].irq_res = bus_alloc_resource_any(vdev->ndev, SYS_RES_IRQ, &vdev->config.isr_info[msix_count].irq_rid, (RF_SHAREABLE | RF_ACTIVE)); if (vdev->config.isr_info[msix_count].irq_res == NULL) { device_printf(vdev->ndev, "allocating bus resource (rid %d) failed\n", vdev->config.isr_info[msix_count].irq_rid); err = ENOMEM; goto _exit0; } vdev->intr_count++; err = bus_bind_intr(vdev->ndev, vdev->config.isr_info[msix_count].irq_res, (i % mp_ncpus)); break; } vdev->device_config->intr_mode = vdev->config.intr_mode; _exit0: return (err); } /* * vxge_free_resources * Undo what-all we did during load/attach */ void vxge_free_resources(device_t ndev, vxge_free_resources_e vxge_free_resource) { int i; vxge_dev_t *vdev; vdev = (vxge_dev_t *) device_get_softc(ndev); switch (vxge_free_resource) { case VXGE_FREE_ALL: for (i = 0; i < vdev->intr_count; i++) { bus_teardown_intr(ndev, vdev->config.isr_info[i].irq_res, vdev->config.isr_info[i].irq_handle); } /* FALLTHROUGH */ case VXGE_FREE_INTERFACE: ether_ifdetach(vdev->ifp); bus_generic_detach(ndev); if_free(vdev->ifp); /* FALLTHROUGH */ case VXGE_FREE_MEDIA: ifmedia_removeall(&vdev->media); /* FALLTHROUGH */ case VXGE_FREE_MUTEX: vxge_mutex_destroy(vdev); /* FALLTHROUGH */ case VXGE_FREE_VPATH: vxge_mem_free(vdev->vpaths, vdev->no_of_vpath * sizeof(vxge_vpath_t)); /* FALLTHROUGH */ case VXGE_FREE_TERMINATE_DEVICE: if (vdev->devh != NULL) { vxge_hal_device_private_set(vdev->devh, 0); vxge_hal_device_terminate(vdev->devh); } /* FALLTHROUGH */ case VXGE_FREE_ISR_RESOURCE: vxge_free_isr_resources(vdev); /* FALLTHROUGH */ case VXGE_FREE_BAR2: vxge_free_bar_resources(vdev, 2); /* FALLTHROUGH */ case VXGE_FREE_BAR1: vxge_free_bar_resources(vdev, 1); /* FALLTHROUGH */ case VXGE_FREE_BAR0: vxge_free_bar_resources(vdev, 0); /* FALLTHROUGH */ case VXGE_FREE_PCI_INFO: vxge_mem_free(vdev->pdev, sizeof(vxge_pci_info_t)); /* FALLTHROUGH */ case VXGE_FREE_DEVICE_CONFIG: vxge_mem_free(vdev->device_config, sizeof(vxge_hal_device_config_t)); /* FALLTHROUGH */ case VXGE_DISABLE_PCI_BUSMASTER: pci_disable_busmaster(ndev); /* FALLTHROUGH */ case VXGE_FREE_TERMINATE_DRIVER: if (vxge_dev_ref_count) { --vxge_dev_ref_count; if (0 == vxge_dev_ref_count) vxge_hal_driver_terminate(); } /* FALLTHROUGH */ default: case VXGE_FREE_NONE: break; /* NOTREACHED */ } } void vxge_free_isr_resources(vxge_dev_t *vdev) { int i; switch (vdev->config.intr_mode) { case VXGE_HAL_INTR_MODE_IRQLINE: if (vdev->config.isr_info[0].irq_res) { bus_release_resource(vdev->ndev, SYS_RES_IRQ, vdev->config.isr_info[0].irq_rid, vdev->config.isr_info[0].irq_res); vdev->config.isr_info[0].irq_res = NULL; } break; case VXGE_HAL_INTR_MODE_MSIX: for (i = 0; i < vdev->intr_count; i++) { if (vdev->config.isr_info[i].irq_res) { bus_release_resource(vdev->ndev, SYS_RES_IRQ, vdev->config.isr_info[i].irq_rid, vdev->config.isr_info[i].irq_res); vdev->config.isr_info[i].irq_res = NULL; } } if (vdev->intr_count) pci_release_msi(vdev->ndev); break; } } void vxge_free_bar_resources(vxge_dev_t *vdev, int i) { int res_id = 0; vxge_pci_info_t *pci_info = vdev->pdev; res_id = PCIR_BAR((i == 0) ? 0 : (i * 2)); if (pci_info->bar_info[i]) bus_release_resource(vdev->ndev, SYS_RES_MEMORY, res_id, pci_info->bar_info[i]); vxge_mem_free(pci_info->reg_map[i], sizeof(vxge_bus_res_t)); } /* * vxge_init_mutex * Initializes mutexes used in driver */ void vxge_mutex_init(vxge_dev_t *vdev) { int i; snprintf(vdev->mtx_drv_name, sizeof(vdev->mtx_drv_name), "%s_drv", vdev->ndev_name); mtx_init(&vdev->mtx_drv, vdev->mtx_drv_name, MTX_NETWORK_LOCK, MTX_DEF); for (i = 0; i < vdev->no_of_vpath; i++) { snprintf(vdev->vpaths[i].mtx_tx_name, sizeof(vdev->vpaths[i].mtx_tx_name), "%s_tx_%d", vdev->ndev_name, i); mtx_init(&vdev->vpaths[i].mtx_tx, vdev->vpaths[i].mtx_tx_name, NULL, MTX_DEF); } } /* * vxge_mutex_destroy * Destroys mutexes used in driver */ void vxge_mutex_destroy(vxge_dev_t *vdev) { int i; for (i = 0; i < vdev->no_of_vpath; i++) VXGE_TX_LOCK_DESTROY(&(vdev->vpaths[i])); VXGE_DRV_LOCK_DESTROY(vdev); } /* * vxge_rth_config */ vxge_hal_status_e vxge_rth_config(vxge_dev_t *vdev) { int i; vxge_hal_vpath_h vpath_handle; vxge_hal_rth_hash_types_t hash_types; vxge_hal_status_e status = VXGE_HAL_OK; u8 mtable[256] = {0}; /* Filling matable with bucket-to-vpath mapping */ vdev->config.rth_bkt_sz = VXGE_DEFAULT_RTH_BUCKET_SIZE; for (i = 0; i < (1 << vdev->config.rth_bkt_sz); i++) mtable[i] = i % vdev->no_of_vpath; /* Fill RTH hash types */ hash_types.hash_type_tcpipv4_en = VXGE_HAL_RING_HASH_TYPE_TCP_IPV4; hash_types.hash_type_tcpipv6_en = VXGE_HAL_RING_HASH_TYPE_TCP_IPV6; hash_types.hash_type_tcpipv6ex_en = VXGE_HAL_RING_HASH_TYPE_TCP_IPV6_EX; hash_types.hash_type_ipv4_en = VXGE_HAL_RING_HASH_TYPE_IPV4; hash_types.hash_type_ipv6_en = VXGE_HAL_RING_HASH_TYPE_IPV6; hash_types.hash_type_ipv6ex_en = VXGE_HAL_RING_HASH_TYPE_IPV6_EX; /* set indirection table, bucket-to-vpath mapping */ status = vxge_hal_vpath_rts_rth_itable_set(vdev->vpath_handles, vdev->no_of_vpath, mtable, ((u32) (1 << vdev->config.rth_bkt_sz))); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "rth configuration failed\n"); goto _exit0; } for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; status = vxge_hal_vpath_rts_rth_set(vpath_handle, RTH_ALG_JENKINS, &hash_types, vdev->config.rth_bkt_sz, TRUE); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "rth configuration failed for vpath (%d)\n", vdev->vpaths[i].vp_id); break; } } _exit0: return (status); } /* * vxge_vpath_config * Sets HAL parameter values from kenv */ void vxge_vpath_config(vxge_dev_t *vdev) { int i; u32 no_of_vpath = 0; vxge_hal_vp_config_t *vp_config; vxge_hal_device_config_t *device_config = vdev->device_config; device_config->debug_level = VXGE_TRACE; device_config->debug_mask = VXGE_COMPONENT_ALL; device_config->device_poll_millis = VXGE_DEFAULT_DEVICE_POLL_MILLIS; vdev->config.no_of_vpath = min(vdev->config.no_of_vpath, vdev->max_supported_vpath); for (i = 0; i < VXGE_HAL_MAX_VIRTUAL_PATHS; i++) { vp_config = &(device_config->vp_config[i]); vp_config->fifo.enable = VXGE_HAL_FIFO_DISABLE; vp_config->ring.enable = VXGE_HAL_RING_DISABLE; } for (i = 0; i < VXGE_HAL_MAX_VIRTUAL_PATHS; i++) { if (no_of_vpath >= vdev->config.no_of_vpath) break; if (!bVAL1(vdev->config.hw_info.vpath_mask, i)) continue; no_of_vpath++; vp_config = &(device_config->vp_config[i]); vp_config->mtu = VXGE_HAL_DEFAULT_MTU; vp_config->ring.enable = VXGE_HAL_RING_ENABLE; vp_config->ring.post_mode = VXGE_HAL_RING_POST_MODE_DOORBELL; vp_config->ring.buffer_mode = VXGE_HAL_RING_RXD_BUFFER_MODE_1; vp_config->ring.ring_length = vxge_ring_length_get(VXGE_HAL_RING_RXD_BUFFER_MODE_1); vp_config->ring.scatter_mode = VXGE_HAL_RING_SCATTER_MODE_A; vp_config->rpa_all_vid_en = VXGE_DEFAULT_ALL_VID_ENABLE; vp_config->rpa_strip_vlan_tag = VXGE_DEFAULT_STRIP_VLAN_TAG; vp_config->rpa_ucast_all_addr_en = VXGE_HAL_VPATH_RPA_UCAST_ALL_ADDR_DISABLE; vp_config->rti.intr_enable = VXGE_HAL_TIM_INTR_ENABLE; vp_config->rti.txfrm_cnt_en = VXGE_HAL_TXFRM_CNT_EN_ENABLE; vp_config->rti.util_sel = VXGE_HAL_TIM_UTIL_SEL_LEGACY_RX_NET_UTIL; vp_config->rti.uec_a = VXGE_DEFAULT_RTI_RX_UFC_A; vp_config->rti.uec_b = VXGE_DEFAULT_RTI_RX_UFC_B; vp_config->rti.uec_c = VXGE_DEFAULT_RTI_RX_UFC_C; vp_config->rti.uec_d = VXGE_DEFAULT_RTI_RX_UFC_D; vp_config->rti.urange_a = VXGE_DEFAULT_RTI_RX_URANGE_A; vp_config->rti.urange_b = VXGE_DEFAULT_RTI_RX_URANGE_B; vp_config->rti.urange_c = VXGE_DEFAULT_RTI_RX_URANGE_C; vp_config->rti.timer_ac_en = VXGE_HAL_TIM_TIMER_AC_ENABLE; vp_config->rti.timer_ci_en = VXGE_HAL_TIM_TIMER_CI_ENABLE; vp_config->rti.btimer_val = (VXGE_DEFAULT_RTI_BTIMER_VAL * 1000) / 272; vp_config->rti.rtimer_val = (VXGE_DEFAULT_RTI_RTIMER_VAL * 1000) / 272; vp_config->rti.ltimer_val = (VXGE_DEFAULT_RTI_LTIMER_VAL * 1000) / 272; if ((no_of_vpath > 1) && (VXGE_DEFAULT_CONFIG_MQ_ENABLE == 0)) continue; vp_config->fifo.enable = VXGE_HAL_FIFO_ENABLE; vp_config->fifo.max_aligned_frags = VXGE_DEFAULT_FIFO_ALIGNED_FRAGS; vp_config->tti.intr_enable = VXGE_HAL_TIM_INTR_ENABLE; vp_config->tti.txfrm_cnt_en = VXGE_HAL_TXFRM_CNT_EN_ENABLE; vp_config->tti.util_sel = VXGE_HAL_TIM_UTIL_SEL_LEGACY_TX_NET_UTIL; vp_config->tti.uec_a = VXGE_DEFAULT_TTI_TX_UFC_A; vp_config->tti.uec_b = VXGE_DEFAULT_TTI_TX_UFC_B; vp_config->tti.uec_c = VXGE_DEFAULT_TTI_TX_UFC_C; vp_config->tti.uec_d = VXGE_DEFAULT_TTI_TX_UFC_D; vp_config->tti.urange_a = VXGE_DEFAULT_TTI_TX_URANGE_A; vp_config->tti.urange_b = VXGE_DEFAULT_TTI_TX_URANGE_B; vp_config->tti.urange_c = VXGE_DEFAULT_TTI_TX_URANGE_C; vp_config->tti.timer_ac_en = VXGE_HAL_TIM_TIMER_AC_ENABLE; vp_config->tti.timer_ci_en = VXGE_HAL_TIM_TIMER_CI_ENABLE; vp_config->tti.btimer_val = (VXGE_DEFAULT_TTI_BTIMER_VAL * 1000) / 272; vp_config->tti.rtimer_val = (VXGE_DEFAULT_TTI_RTIMER_VAL * 1000) / 272; vp_config->tti.ltimer_val = (VXGE_DEFAULT_TTI_LTIMER_VAL * 1000) / 272; } vdev->no_of_vpath = no_of_vpath; if (vdev->no_of_vpath == 1) vdev->config.tx_steering = 0; if (vdev->config.rth_enable && (vdev->no_of_vpath > 1)) { device_config->rth_en = VXGE_HAL_RTH_ENABLE; device_config->rth_it_type = VXGE_HAL_RTH_IT_TYPE_MULTI_IT; } vdev->config.rth_enable = device_config->rth_en; } /* * vxge_vpath_cb_fn * Virtual path Callback function */ /* ARGSUSED */ static vxge_hal_status_e vxge_vpath_cb_fn(vxge_hal_client_h client_handle, vxge_hal_up_msg_h msgh, vxge_hal_message_type_e msg_type, vxge_hal_obj_id_t obj_id, vxge_hal_result_e result, vxge_hal_opaque_handle_t *opaque_handle) { return (VXGE_HAL_OK); } /* * vxge_vpath_open */ int vxge_vpath_open(vxge_dev_t *vdev) { int i, err = EINVAL; u64 func_id; vxge_vpath_t *vpath; vxge_hal_vpath_attr_t vpath_attr; vxge_hal_status_e status = VXGE_HAL_OK; struct lro_ctrl *lro = NULL; bzero(&vpath_attr, sizeof(vxge_hal_vpath_attr_t)); for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); lro = &vpath->lro; /* Vpath vpath_attr: FIFO */ vpath_attr.vp_id = vpath->vp_id; vpath_attr.fifo_attr.callback = vxge_tx_compl; vpath_attr.fifo_attr.txdl_init = vxge_tx_replenish; vpath_attr.fifo_attr.txdl_term = vxge_tx_term; vpath_attr.fifo_attr.userdata = vpath; vpath_attr.fifo_attr.per_txdl_space = sizeof(vxge_txdl_priv_t); /* Vpath vpath_attr: Ring */ vpath_attr.ring_attr.callback = vxge_rx_compl; vpath_attr.ring_attr.rxd_init = vxge_rx_replenish; vpath_attr.ring_attr.rxd_term = vxge_rx_term; vpath_attr.ring_attr.userdata = vpath; vpath_attr.ring_attr.per_rxd_space = sizeof(vxge_rxd_priv_t); err = vxge_dma_tags_create(vpath); if (err != 0) { device_printf(vdev->ndev, "failed to create dma tags\n"); break; } #if __FreeBSD_version >= 800000 vpath->br = buf_ring_alloc(VXGE_DEFAULT_BR_SIZE, M_DEVBUF, M_WAITOK, &vpath->mtx_tx); if (vpath->br == NULL) { err = ENOMEM; break; } #endif status = vxge_hal_vpath_open(vdev->devh, &vpath_attr, (vxge_hal_vpath_callback_f) vxge_vpath_cb_fn, NULL, &vpath->handle); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "failed to open vpath (%d)\n", vpath->vp_id); err = EPERM; break; } vpath->is_open = TRUE; vdev->vpath_handles[i] = vpath->handle; vpath->tx_ticks = ticks; vpath->rx_ticks = ticks; vpath->tti_rtimer_val = VXGE_DEFAULT_TTI_RTIMER_VAL; vpath->rti_rtimer_val = VXGE_DEFAULT_RTI_RTIMER_VAL; vpath->tx_intr_coalesce = vdev->config.intr_coalesce; vpath->rx_intr_coalesce = vdev->config.intr_coalesce; func_id = vdev->config.hw_info.func_id; if (vdev->config.low_latency && (vdev->config.bw_info[func_id].priority == VXGE_DEFAULT_VPATH_PRIORITY_HIGH)) { vpath->tx_intr_coalesce = 0; } if (vdev->ifp->if_capenable & IFCAP_LRO) { err = tcp_lro_init(lro); if (err != 0) { device_printf(vdev->ndev, "LRO Initialization failed!\n"); break; } vpath->lro_enable = TRUE; lro->ifp = vdev->ifp; } } return (err); } void vxge_tso_config(vxge_dev_t *vdev) { u32 func_id, priority; vxge_hal_status_e status = VXGE_HAL_OK; vdev->ifp->if_capabilities |= IFCAP_TSO4; status = vxge_bw_priority_get(vdev, NULL); if (status == VXGE_HAL_OK) { func_id = vdev->config.hw_info.func_id; priority = vdev->config.bw_info[func_id].priority; if (priority != VXGE_DEFAULT_VPATH_PRIORITY_HIGH) vdev->ifp->if_capabilities &= ~IFCAP_TSO4; } #if __FreeBSD_version >= 800000 if (vdev->ifp->if_capabilities & IFCAP_TSO4) vdev->ifp->if_capabilities |= IFCAP_VLAN_HWTSO; #endif } vxge_hal_status_e vxge_bw_priority_get(vxge_dev_t *vdev, vxge_bw_info_t *bw_info) { u32 priority, bandwidth; u32 vpath_count; u64 func_id, func_mode, vpath_list[VXGE_HAL_MAX_VIRTUAL_PATHS]; vxge_hal_status_e status = VXGE_HAL_OK; func_id = vdev->config.hw_info.func_id; if (bw_info) { func_id = bw_info->func_id; func_mode = vdev->config.hw_info.function_mode; if ((is_single_func(func_mode)) && (func_id > 0)) return (VXGE_HAL_FAIL); } if (vdev->hw_fw_version >= VXGE_FW_VERSION(1, 8, 0)) { status = vxge_hal_vf_rx_bw_get(vdev->devh, func_id, &bandwidth, &priority); } else { status = vxge_hal_get_vpath_list(vdev->devh, func_id, vpath_list, &vpath_count); if (status == VXGE_HAL_OK) { status = vxge_hal_bw_priority_get(vdev->devh, vpath_list[0], &bandwidth, &priority); } } if (status == VXGE_HAL_OK) { if (bw_info) { bw_info->priority = priority; bw_info->bandwidth = bandwidth; } else { vdev->config.bw_info[func_id].priority = priority; vdev->config.bw_info[func_id].bandwidth = bandwidth; } } return (status); } /* * close vpaths */ void vxge_vpath_close(vxge_dev_t *vdev) { int i; vxge_vpath_t *vpath; for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); if (vpath->handle) vxge_hal_vpath_close(vpath->handle); #if __FreeBSD_version >= 800000 if (vpath->br != NULL) buf_ring_free(vpath->br, M_DEVBUF); #endif /* Free LRO memory */ if (vpath->lro_enable) tcp_lro_free(&vpath->lro); if (vpath->dma_tag_rx) { bus_dmamap_destroy(vpath->dma_tag_rx, vpath->extra_dma_map); bus_dma_tag_destroy(vpath->dma_tag_rx); } if (vpath->dma_tag_tx) bus_dma_tag_destroy(vpath->dma_tag_tx); vpath->handle = NULL; vpath->is_open = FALSE; } } /* * reset vpaths */ void vxge_vpath_reset(vxge_dev_t *vdev) { int i; vxge_hal_vpath_h vpath_handle; vxge_hal_status_e status = VXGE_HAL_OK; for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; status = vxge_hal_vpath_reset(vpath_handle); if (status != VXGE_HAL_OK) device_printf(vdev->ndev, "failed to reset vpath :%d\n", i); } } static inline int vxge_vpath_get(vxge_dev_t *vdev, mbuf_t mhead) { struct tcphdr *th = NULL; struct udphdr *uh = NULL; struct ip *ip = NULL; struct ip6_hdr *ip6 = NULL; struct ether_vlan_header *eth = NULL; void *ulp = NULL; int ehdrlen, iphlen = 0; u8 ipproto = 0; u16 etype, src_port, dst_port; u16 queue_len, counter = 0; src_port = dst_port = 0; queue_len = vdev->no_of_vpath; eth = mtod(mhead, struct ether_vlan_header *); if (eth->evl_encap_proto == htons(ETHERTYPE_VLAN)) { etype = ntohs(eth->evl_proto); ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { etype = ntohs(eth->evl_encap_proto); ehdrlen = ETHER_HDR_LEN; } switch (etype) { case ETHERTYPE_IP: ip = (struct ip *) (mhead->m_data + ehdrlen); iphlen = ip->ip_hl << 2; ipproto = ip->ip_p; th = (struct tcphdr *) ((caddr_t)ip + iphlen); uh = (struct udphdr *) ((caddr_t)ip + iphlen); break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *) (mhead->m_data + ehdrlen); iphlen = sizeof(struct ip6_hdr); ipproto = ip6->ip6_nxt; ulp = mtod(mhead, char *) + iphlen; th = ((struct tcphdr *) (ulp)); uh = ((struct udphdr *) (ulp)); break; default: break; } switch (ipproto) { case IPPROTO_TCP: src_port = th->th_sport; dst_port = th->th_dport; break; case IPPROTO_UDP: src_port = uh->uh_sport; dst_port = uh->uh_dport; break; default: break; } counter = (ntohs(src_port) + ntohs(dst_port)) & vpath_selector[queue_len - 1]; if (counter >= queue_len) counter = queue_len - 1; return (counter); } static inline vxge_hal_vpath_h vxge_vpath_handle_get(vxge_dev_t *vdev, int i) { return (vdev->vpaths[i].is_open ? vdev->vpaths[i].handle : NULL); } int vxge_firmware_verify(vxge_dev_t *vdev) { int err = 0; u64 active_config; vxge_hal_status_e status = VXGE_HAL_FAIL; if (vdev->fw_upgrade) { status = vxge_firmware_upgrade(vdev); if (status == VXGE_HAL_OK) { err = ENXIO; goto _exit0; } } if ((vdev->config.function_mode != VXGE_DEFAULT_CONFIG_VALUE) && (vdev->config.hw_info.function_mode != (u64) vdev->config.function_mode)) { status = vxge_func_mode_set(vdev); if (status == VXGE_HAL_OK) err = ENXIO; } /* l2_switch configuration */ active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_L2SwitchEnabled, &active_config); if (status == VXGE_HAL_OK) { vdev->l2_switch = active_config; if (vdev->config.l2_switch != VXGE_DEFAULT_CONFIG_VALUE) { if (vdev->config.l2_switch != active_config) { status = vxge_l2switch_mode_set(vdev); if (status == VXGE_HAL_OK) err = ENXIO; } } } if (vdev->config.hw_info.ports == VXGE_DUAL_PORT_MODE) { if (vxge_port_mode_update(vdev) == ENXIO) err = ENXIO; } _exit0: if (err == ENXIO) device_printf(vdev->ndev, "PLEASE POWER CYCLE THE SYSTEM\n"); return (err); } vxge_hal_status_e vxge_firmware_upgrade(vxge_dev_t *vdev) { u8 *fw_buffer; u32 fw_size; vxge_hal_device_hw_info_t *hw_info; vxge_hal_status_e status = VXGE_HAL_OK; hw_info = &vdev->config.hw_info; fw_size = sizeof(VXGE_FW_ARRAY_NAME); fw_buffer = (u8 *) VXGE_FW_ARRAY_NAME; device_printf(vdev->ndev, "Current firmware version : %s (%s)\n", hw_info->fw_version.version, hw_info->fw_date.date); device_printf(vdev->ndev, "Upgrading firmware to %d.%d.%d\n", VXGE_MIN_FW_MAJOR_VERSION, VXGE_MIN_FW_MINOR_VERSION, VXGE_MIN_FW_BUILD_NUMBER); /* Call HAL API to upgrade firmware */ status = vxge_hal_mrpcim_fw_upgrade(vdev->pdev, (pci_reg_h) vdev->pdev->reg_map[0], (u8 *) vdev->pdev->bar_info[0], fw_buffer, fw_size); device_printf(vdev->ndev, "firmware upgrade %s\n", (status == VXGE_HAL_OK) ? "successful" : "failed"); return (status); } vxge_hal_status_e vxge_func_mode_set(vxge_dev_t *vdev) { u64 active_config; vxge_hal_status_e status = VXGE_HAL_FAIL; status = vxge_hal_mrpcim_pcie_func_mode_set(vdev->devh, vdev->config.function_mode); device_printf(vdev->ndev, "function mode change %s\n", (status == VXGE_HAL_OK) ? "successful" : "failed"); if (status == VXGE_HAL_OK) { vxge_hal_set_fw_api(vdev->devh, 0ULL, VXGE_HAL_API_FUNC_MODE_COMMIT, 0, 0ULL, 0ULL); vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_NWPortMode, &active_config); /* * If in MF + DP mode * if user changes to SF, change port_mode to single port mode */ if (((is_multi_func(vdev->config.hw_info.function_mode)) && is_single_func(vdev->config.function_mode)) && (active_config == VXGE_HAL_DP_NP_MODE_DUAL_PORT)) { vdev->config.port_mode = VXGE_HAL_DP_NP_MODE_SINGLE_PORT; status = vxge_port_mode_set(vdev); } } return (status); } vxge_hal_status_e vxge_port_mode_set(vxge_dev_t *vdev) { vxge_hal_status_e status = VXGE_HAL_FAIL; status = vxge_hal_set_port_mode(vdev->devh, vdev->config.port_mode); device_printf(vdev->ndev, "port mode change %s\n", (status == VXGE_HAL_OK) ? "successful" : "failed"); if (status == VXGE_HAL_OK) { vxge_hal_set_fw_api(vdev->devh, 0ULL, VXGE_HAL_API_FUNC_MODE_COMMIT, 0, 0ULL, 0ULL); /* Configure vpath_mapping for active-active mode only */ if (vdev->config.port_mode == VXGE_HAL_DP_NP_MODE_DUAL_PORT) { status = vxge_hal_config_vpath_map(vdev->devh, VXGE_DUAL_PORT_MAP); device_printf(vdev->ndev, "dual port map change %s\n", (status == VXGE_HAL_OK) ? "successful" : "failed"); } } return (status); } int vxge_port_mode_update(vxge_dev_t *vdev) { int err = 0; u64 active_config; vxge_hal_status_e status = VXGE_HAL_FAIL; if ((vdev->config.port_mode == VXGE_HAL_DP_NP_MODE_DUAL_PORT) && is_single_func(vdev->config.hw_info.function_mode)) { device_printf(vdev->ndev, "Adapter in SF mode, dual port mode is not allowed\n"); err = EPERM; goto _exit0; } active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_NWPortMode, &active_config); if (status != VXGE_HAL_OK) { err = EINVAL; goto _exit0; } vdev->port_mode = active_config; if (vdev->config.port_mode != VXGE_DEFAULT_CONFIG_VALUE) { if (vdev->config.port_mode != vdev->port_mode) { status = vxge_port_mode_set(vdev); if (status != VXGE_HAL_OK) { err = EINVAL; goto _exit0; } err = ENXIO; vdev->port_mode = vdev->config.port_mode; } } active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_BehaviourOnFail, &active_config); if (status != VXGE_HAL_OK) { err = EINVAL; goto _exit0; } vdev->port_failure = active_config; /* * active/active mode : set to NoMove * active/passive mode: set to Failover-Failback */ if (vdev->port_mode == VXGE_HAL_DP_NP_MODE_DUAL_PORT) vdev->config.port_failure = VXGE_HAL_XMAC_NWIF_OnFailure_NoMove; else if (vdev->port_mode == VXGE_HAL_DP_NP_MODE_ACTIVE_PASSIVE) vdev->config.port_failure = VXGE_HAL_XMAC_NWIF_OnFailure_OtherPortBackOnRestore; if ((vdev->port_mode != VXGE_HAL_DP_NP_MODE_SINGLE_PORT) && (vdev->config.port_failure != vdev->port_failure)) { status = vxge_port_behavior_on_failure_set(vdev); if (status == VXGE_HAL_OK) err = ENXIO; } _exit0: return (err); } vxge_hal_status_e vxge_port_mode_get(vxge_dev_t *vdev, vxge_port_info_t *port_info) { int err = 0; u64 active_config; vxge_hal_status_e status = VXGE_HAL_FAIL; active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_NWPortMode, &active_config); if (status != VXGE_HAL_OK) { err = ENXIO; goto _exit0; } port_info->port_mode = active_config; active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_BehaviourOnFail, &active_config); if (status != VXGE_HAL_OK) { err = ENXIO; goto _exit0; } port_info->port_failure = active_config; _exit0: return (err); } vxge_hal_status_e vxge_port_behavior_on_failure_set(vxge_dev_t *vdev) { vxge_hal_status_e status = VXGE_HAL_FAIL; status = vxge_hal_set_behavior_on_failure(vdev->devh, vdev->config.port_failure); device_printf(vdev->ndev, "port behaviour on failure change %s\n", (status == VXGE_HAL_OK) ? "successful" : "failed"); if (status == VXGE_HAL_OK) vxge_hal_set_fw_api(vdev->devh, 0ULL, VXGE_HAL_API_FUNC_MODE_COMMIT, 0, 0ULL, 0ULL); return (status); } void vxge_active_port_update(vxge_dev_t *vdev) { u64 active_config; vxge_hal_status_e status = VXGE_HAL_FAIL; active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_ActivePort, &active_config); if (status == VXGE_HAL_OK) vdev->active_port = active_config; } vxge_hal_status_e vxge_l2switch_mode_set(vxge_dev_t *vdev) { vxge_hal_status_e status = VXGE_HAL_FAIL; status = vxge_hal_set_l2switch_mode(vdev->devh, vdev->config.l2_switch); device_printf(vdev->ndev, "L2 switch %s\n", (status == VXGE_HAL_OK) ? (vdev->config.l2_switch) ? "enable" : "disable" : "change failed"); if (status == VXGE_HAL_OK) vxge_hal_set_fw_api(vdev->devh, 0ULL, VXGE_HAL_API_FUNC_MODE_COMMIT, 0, 0ULL, 0ULL); return (status); } /* * vxge_promisc_set * Enable Promiscuous Mode */ void vxge_promisc_set(vxge_dev_t *vdev) { int i; ifnet_t ifp; vxge_hal_vpath_h vpath_handle; if (!vdev->is_initialized) return; ifp = vdev->ifp; for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; if (ifp->if_flags & IFF_PROMISC) vxge_hal_vpath_promisc_enable(vpath_handle); else vxge_hal_vpath_promisc_disable(vpath_handle); } } /* * vxge_change_mtu * Change interface MTU to a requested valid size */ int vxge_change_mtu(vxge_dev_t *vdev, unsigned long new_mtu) { int err = EINVAL; if ((new_mtu < VXGE_HAL_MIN_MTU) || (new_mtu > VXGE_HAL_MAX_MTU)) goto _exit0; (vdev->ifp)->if_mtu = new_mtu; device_printf(vdev->ndev, "MTU changed to %u\n", (vdev->ifp)->if_mtu); if (vdev->is_initialized) { if_down(vdev->ifp); vxge_reset(vdev); if_up(vdev->ifp); } err = 0; _exit0: return (err); } /* * Creates DMA tags for both Tx and Rx */ int vxge_dma_tags_create(vxge_vpath_t *vpath) { int err = 0; bus_size_t max_size, boundary; vxge_dev_t *vdev = vpath->vdev; ifnet_t ifp = vdev->ifp; max_size = ifp->if_mtu + VXGE_HAL_MAC_HEADER_MAX_SIZE + VXGE_HAL_HEADER_ETHERNET_II_802_3_ALIGN; VXGE_BUFFER_ALIGN(max_size, 128) if (max_size <= MCLBYTES) vdev->rx_mbuf_sz = MCLBYTES; else vdev->rx_mbuf_sz = (max_size > MJUMPAGESIZE) ? MJUM9BYTES : MJUMPAGESIZE; boundary = (max_size > PAGE_SIZE) ? 0 : PAGE_SIZE; /* DMA tag for Tx */ err = bus_dma_tag_create( bus_get_dma_tag(vdev->ndev), 1, PAGE_SIZE, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, VXGE_TSO_SIZE, VXGE_MAX_SEGS, PAGE_SIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &(vpath->dma_tag_tx)); if (err != 0) goto _exit0; /* DMA tag for Rx */ err = bus_dma_tag_create( bus_get_dma_tag(vdev->ndev), 1, boundary, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, vdev->rx_mbuf_sz, 1, vdev->rx_mbuf_sz, BUS_DMA_ALLOCNOW, NULL, NULL, &(vpath->dma_tag_rx)); if (err != 0) goto _exit1; /* Create DMA map for this descriptor */ err = bus_dmamap_create(vpath->dma_tag_rx, BUS_DMA_NOWAIT, &vpath->extra_dma_map); if (err == 0) goto _exit0; bus_dma_tag_destroy(vpath->dma_tag_rx); _exit1: bus_dma_tag_destroy(vpath->dma_tag_tx); _exit0: return (err); } static inline int vxge_dma_mbuf_coalesce(bus_dma_tag_t dma_tag_tx, bus_dmamap_t dma_map, mbuf_t * m_headp, bus_dma_segment_t * dma_buffers, int *num_segs) { int err = 0; mbuf_t mbuf_pkt = NULL; retry: err = bus_dmamap_load_mbuf_sg(dma_tag_tx, dma_map, *m_headp, dma_buffers, num_segs, BUS_DMA_NOWAIT); if (err == EFBIG) { /* try to defrag, too many segments */ mbuf_pkt = m_defrag(*m_headp, M_NOWAIT); if (mbuf_pkt == NULL) { err = ENOBUFS; goto _exit0; } *m_headp = mbuf_pkt; goto retry; } _exit0: return (err); } int vxge_device_hw_info_get(vxge_dev_t *vdev) { int i, err = ENXIO; u64 vpath_mask = 0; u32 max_supported_vpath = 0; u32 fw_ver_maj_min; vxge_firmware_upgrade_e fw_option; vxge_hal_status_e status = VXGE_HAL_OK; vxge_hal_device_hw_info_t *hw_info; status = vxge_hal_device_hw_info_get(vdev->pdev, (pci_reg_h) vdev->pdev->reg_map[0], (u8 *) vdev->pdev->bar_info[0], &vdev->config.hw_info); if (status != VXGE_HAL_OK) goto _exit0; hw_info = &vdev->config.hw_info; vpath_mask = hw_info->vpath_mask; if (vpath_mask == 0) { device_printf(vdev->ndev, "No vpaths available in device\n"); goto _exit0; } fw_option = vdev->config.fw_option; /* Check how many vpaths are available */ for (i = 0; i < VXGE_HAL_MAX_VIRTUAL_PATHS; i++) { if (!((vpath_mask) & mBIT(i))) continue; max_supported_vpath++; } vdev->max_supported_vpath = max_supported_vpath; status = vxge_hal_device_is_privileged(hw_info->host_type, hw_info->func_id); vdev->is_privilaged = (status == VXGE_HAL_OK) ? TRUE : FALSE; vdev->hw_fw_version = VXGE_FW_VERSION( hw_info->fw_version.major, hw_info->fw_version.minor, hw_info->fw_version.build); fw_ver_maj_min = VXGE_FW_MAJ_MIN_VERSION(hw_info->fw_version.major, hw_info->fw_version.minor); if ((fw_option >= VXGE_FW_UPGRADE_FORCE) || (vdev->hw_fw_version != VXGE_DRV_FW_VERSION)) { /* For fw_ver 1.8.1 and above ignore build number. */ if ((fw_option == VXGE_FW_UPGRADE_ALL) && ((vdev->hw_fw_version >= VXGE_FW_VERSION(1, 8, 1)) && (fw_ver_maj_min == VXGE_DRV_FW_MAJ_MIN_VERSION))) { goto _exit1; } if (vdev->hw_fw_version < VXGE_BASE_FW_VERSION) { device_printf(vdev->ndev, "Upgrade driver through vxge_update, " "Unable to load the driver.\n"); goto _exit0; } vdev->fw_upgrade = TRUE; } _exit1: err = 0; _exit0: return (err); } /* * vxge_device_hw_info_print * Print device and driver information */ void vxge_device_hw_info_print(vxge_dev_t *vdev) { u32 i; device_t ndev; struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; char pmd_type[2][VXGE_PMD_INFO_LEN]; vxge_hal_device_t *hldev; vxge_hal_device_hw_info_t *hw_info; vxge_hal_device_pmd_info_t *pmd_port; hldev = vdev->devh; ndev = vdev->ndev; ctx = device_get_sysctl_ctx(ndev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(ndev)); hw_info = &(vdev->config.hw_info); snprintf(vdev->config.nic_attr[VXGE_PRINT_DRV_VERSION], sizeof(vdev->config.nic_attr[VXGE_PRINT_DRV_VERSION]), "%d.%d.%d.%d", XGELL_VERSION_MAJOR, XGELL_VERSION_MINOR, XGELL_VERSION_FIX, XGELL_VERSION_BUILD); /* Print PCI-e bus type/speed/width info */ snprintf(vdev->config.nic_attr[VXGE_PRINT_PCIE_INFO], sizeof(vdev->config.nic_attr[VXGE_PRINT_PCIE_INFO]), "x%d", hldev->link_width); if (hldev->link_width <= VXGE_HAL_PCI_E_LINK_WIDTH_X4) device_printf(ndev, "For optimal performance a x8 " "PCI-Express slot is required.\n"); vxge_null_terminate((char *) hw_info->serial_number, sizeof(hw_info->serial_number)); vxge_null_terminate((char *) hw_info->part_number, sizeof(hw_info->part_number)); snprintf(vdev->config.nic_attr[VXGE_PRINT_SERIAL_NO], sizeof(vdev->config.nic_attr[VXGE_PRINT_SERIAL_NO]), "%s", hw_info->serial_number); snprintf(vdev->config.nic_attr[VXGE_PRINT_PART_NO], sizeof(vdev->config.nic_attr[VXGE_PRINT_PART_NO]), "%s", hw_info->part_number); snprintf(vdev->config.nic_attr[VXGE_PRINT_FW_VERSION], sizeof(vdev->config.nic_attr[VXGE_PRINT_FW_VERSION]), "%s", hw_info->fw_version.version); snprintf(vdev->config.nic_attr[VXGE_PRINT_FW_DATE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FW_DATE]), "%s", hw_info->fw_date.date); pmd_port = &(hw_info->pmd_port0); for (i = 0; i < hw_info->ports; i++) { vxge_pmd_port_type_get(vdev, pmd_port->type, pmd_type[i], sizeof(pmd_type[i])); strncpy(vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0 + i], "vendor=??, sn=??, pn=??, type=??", sizeof(vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0 + i])); vxge_null_terminate(pmd_port->vendor, sizeof(pmd_port->vendor)); if (strlen(pmd_port->vendor) == 0) { pmd_port = &(hw_info->pmd_port1); continue; } vxge_null_terminate(pmd_port->ser_num, sizeof(pmd_port->ser_num)); vxge_null_terminate(pmd_port->part_num, sizeof(pmd_port->part_num)); snprintf(vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0 + i], sizeof(vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0 + i]), "vendor=%s, sn=%s, pn=%s, type=%s", pmd_port->vendor, pmd_port->ser_num, pmd_port->part_num, pmd_type[i]); pmd_port = &(hw_info->pmd_port1); } switch (hw_info->function_mode) { case VXGE_HAL_PCIE_FUNC_MODE_SF1_VP17: snprintf(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]), "%s %d %s", "Single Function - 1 function(s)", vdev->max_supported_vpath, "VPath(s)/function"); break; case VXGE_HAL_PCIE_FUNC_MODE_MF2_VP8: snprintf(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]), "%s %d %s", "Multi Function - 2 function(s)", vdev->max_supported_vpath, "VPath(s)/function"); break; case VXGE_HAL_PCIE_FUNC_MODE_MF4_VP4: snprintf(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]), "%s %d %s", "Multi Function - 4 function(s)", vdev->max_supported_vpath, "VPath(s)/function"); break; case VXGE_HAL_PCIE_FUNC_MODE_MF8_VP2: snprintf(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]), "%s %d %s", "Multi Function - 8 function(s)", vdev->max_supported_vpath, "VPath(s)/function"); break; case VXGE_HAL_PCIE_FUNC_MODE_MF8P_VP2: snprintf(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]), "%s %d %s", "Multi Function (DirectIO) - 8 function(s)", vdev->max_supported_vpath, "VPath(s)/function"); break; } snprintf(vdev->config.nic_attr[VXGE_PRINT_INTR_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_INTR_MODE]), "%s", ((vdev->config.intr_mode == VXGE_HAL_INTR_MODE_MSIX) ? "MSI-X" : "INTA")); snprintf(vdev->config.nic_attr[VXGE_PRINT_VPATH_COUNT], sizeof(vdev->config.nic_attr[VXGE_PRINT_VPATH_COUNT]), "%d", vdev->no_of_vpath); snprintf(vdev->config.nic_attr[VXGE_PRINT_MTU_SIZE], sizeof(vdev->config.nic_attr[VXGE_PRINT_MTU_SIZE]), "%u", vdev->ifp->if_mtu); snprintf(vdev->config.nic_attr[VXGE_PRINT_LRO_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_LRO_MODE]), "%s", ((vdev->config.lro_enable) ? "Enabled" : "Disabled")); snprintf(vdev->config.nic_attr[VXGE_PRINT_RTH_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_RTH_MODE]), "%s", ((vdev->config.rth_enable) ? "Enabled" : "Disabled")); snprintf(vdev->config.nic_attr[VXGE_PRINT_TSO_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_TSO_MODE]), "%s", ((vdev->ifp->if_capenable & IFCAP_TSO4) ? "Enabled" : "Disabled")); snprintf(vdev->config.nic_attr[VXGE_PRINT_ADAPTER_TYPE], sizeof(vdev->config.nic_attr[VXGE_PRINT_ADAPTER_TYPE]), "%s", ((hw_info->ports == 1) ? "Single Port" : "Dual Port")); if (vdev->is_privilaged) { if (hw_info->ports > 1) { snprintf(vdev->config.nic_attr[VXGE_PRINT_PORT_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_PORT_MODE]), "%s", vxge_port_mode[vdev->port_mode]); if (vdev->port_mode != VXGE_HAL_DP_NP_MODE_SINGLE_PORT) snprintf(vdev->config.nic_attr[VXGE_PRINT_PORT_FAILURE], sizeof(vdev->config.nic_attr[VXGE_PRINT_PORT_FAILURE]), "%s", vxge_port_failure[vdev->port_failure]); vxge_active_port_update(vdev); snprintf(vdev->config.nic_attr[VXGE_PRINT_ACTIVE_PORT], sizeof(vdev->config.nic_attr[VXGE_PRINT_ACTIVE_PORT]), "%lld", vdev->active_port); } if (!is_single_func(hw_info->function_mode)) { snprintf(vdev->config.nic_attr[VXGE_PRINT_L2SWITCH_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_L2SWITCH_MODE]), "%s", ((vdev->l2_switch) ? "Enabled" : "Disabled")); } } device_printf(ndev, "Driver version\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_DRV_VERSION]); device_printf(ndev, "Serial number\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_SERIAL_NO]); device_printf(ndev, "Part number\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PART_NO]); device_printf(ndev, "Firmware version\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_FW_VERSION]); device_printf(ndev, "Firmware date\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_FW_DATE]); device_printf(ndev, "Link width\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PCIE_INFO]); if (vdev->is_privilaged) { device_printf(ndev, "Function mode\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]); } device_printf(ndev, "Interrupt type\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_INTR_MODE]); device_printf(ndev, "VPath(s) opened\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_VPATH_COUNT]); device_printf(ndev, "Adapter Type\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_ADAPTER_TYPE]); device_printf(ndev, "PMD Port 0\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0]); if (hw_info->ports > 1) { device_printf(ndev, "PMD Port 1\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_1]); if (vdev->is_privilaged) { device_printf(ndev, "Port Mode\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PORT_MODE]); if (vdev->port_mode != VXGE_HAL_DP_NP_MODE_SINGLE_PORT) device_printf(ndev, "Port Failure\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PORT_FAILURE]); device_printf(vdev->ndev, "Active Port\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_ACTIVE_PORT]); } } if (vdev->is_privilaged && !is_single_func(hw_info->function_mode)) { device_printf(vdev->ndev, "L2 Switch\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_L2SWITCH_MODE]); } device_printf(ndev, "MTU is %s\n", vdev->config.nic_attr[VXGE_PRINT_MTU_SIZE]); device_printf(ndev, "LRO %s\n", vdev->config.nic_attr[VXGE_PRINT_LRO_MODE]); device_printf(ndev, "RTH %s\n", vdev->config.nic_attr[VXGE_PRINT_RTH_MODE]); device_printf(ndev, "TSO %s\n", vdev->config.nic_attr[VXGE_PRINT_TSO_MODE]); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Driver version", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_DRV_VERSION], 0, "Driver version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Serial number", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_SERIAL_NO], 0, "Serial number"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Part number", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_PART_NO], 0, "Part number"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Firmware version", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_FW_VERSION], 0, "Firmware version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Firmware date", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_FW_DATE], 0, "Firmware date"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Link width", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_PCIE_INFO], 0, "Link width"); if (vdev->is_privilaged) { SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Function mode", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], 0, "Function mode"); } SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Interrupt type", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_INTR_MODE], 0, "Interrupt type"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "VPath(s) opened", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_VPATH_COUNT], 0, "VPath(s) opened"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Adapter Type", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_ADAPTER_TYPE], 0, "Adapter Type"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pmd port 0", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0], 0, "pmd port"); if (hw_info->ports > 1) { SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pmd port 1", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_1], 0, "pmd port"); if (vdev->is_privilaged) { SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Port Mode", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_PORT_MODE], 0, "Port Mode"); if (vdev->port_mode != VXGE_HAL_DP_NP_MODE_SINGLE_PORT) SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Port Failure", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_PORT_FAILURE], 0, "Port Failure"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "L2 Switch", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_L2SWITCH_MODE], 0, "L2 Switch"); } } SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "LRO mode", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_LRO_MODE], 0, "LRO mode"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "RTH mode", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_RTH_MODE], 0, "RTH mode"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "TSO mode", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_TSO_MODE], 0, "TSO mode"); } void vxge_pmd_port_type_get(vxge_dev_t *vdev, u32 port_type, char *ifm_name, u8 ifm_len) { vdev->ifm_optics = IFM_UNKNOWN; switch (port_type) { case VXGE_HAL_DEVICE_PMD_TYPE_10G_SR: vdev->ifm_optics = IFM_10G_SR; strlcpy(ifm_name, "10GbE SR", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_LR: vdev->ifm_optics = IFM_10G_LR; strlcpy(ifm_name, "10GbE LR", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_LRM: vdev->ifm_optics = IFM_10G_LRM; strlcpy(ifm_name, "10GbE LRM", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_DIRECT: vdev->ifm_optics = IFM_10G_TWINAX; strlcpy(ifm_name, "10GbE DA (Direct Attached)", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_CX4: vdev->ifm_optics = IFM_10G_CX4; strlcpy(ifm_name, "10GbE CX4", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_BASE_T: #if __FreeBSD_version >= 800000 vdev->ifm_optics = IFM_10G_T; #endif strlcpy(ifm_name, "10GbE baseT", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_OTHER: strlcpy(ifm_name, "10GbE Other", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_SX: vdev->ifm_optics = IFM_1000_SX; strlcpy(ifm_name, "1GbE SX", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_LX: vdev->ifm_optics = IFM_1000_LX; strlcpy(ifm_name, "1GbE LX", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_CX: vdev->ifm_optics = IFM_1000_CX; strlcpy(ifm_name, "1GbE CX", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_BASE_T: vdev->ifm_optics = IFM_1000_T; strlcpy(ifm_name, "1GbE baseT", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_DIRECT: strlcpy(ifm_name, "1GbE DA (Direct Attached)", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_CX4: strlcpy(ifm_name, "1GbE CX4", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_OTHER: strlcpy(ifm_name, "1GbE Other", ifm_len); break; default: case VXGE_HAL_DEVICE_PMD_TYPE_UNKNOWN: strlcpy(ifm_name, "UNSUP", ifm_len); break; } } u32 vxge_ring_length_get(u32 buffer_mode) { return (VXGE_DEFAULT_RING_BLOCK * vxge_hal_ring_rxds_per_block_get(buffer_mode)); } /* * Removes trailing spaces padded * and NULL terminates strings */ static inline void vxge_null_terminate(char *str, size_t len) { len--; while (*str && (*str != ' ') && (len != 0)) ++str; --len; if (*str) *str = '\0'; } /* * vxge_ioctl * Callback to control the device */ int vxge_ioctl(ifnet_t ifp, u_long command, caddr_t data) { int mask, err = 0; vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; if (!vdev->is_active) return (EBUSY); switch (command) { /* Set/Get ifnet address */ case SIOCSIFADDR: case SIOCGIFADDR: ether_ioctl(ifp, command, data); break; /* Set Interface MTU */ case SIOCSIFMTU: err = vxge_change_mtu(vdev, (unsigned long)ifr->ifr_mtu); break; /* Set Interface Flags */ case SIOCSIFFLAGS: VXGE_DRV_LOCK(vdev); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ vdev->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) vxge_promisc_set(vdev); } else { vxge_init_locked(vdev); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) vxge_stop_locked(vdev); } vdev->if_flags = ifp->if_flags; VXGE_DRV_UNLOCK(vdev); break; /* Add/delete multicast address */ case SIOCADDMULTI: case SIOCDELMULTI: break; /* Get/Set Interface Media */ case SIOCSIFMEDIA: case SIOCGIFMEDIA: err = ifmedia_ioctl(ifp, ifr, &vdev->media, command); break; /* Set Capabilities */ case SIOCSIFCAP: VXGE_DRV_LOCK(vdev); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if ((ifp->if_capenable & IFCAP_TSO) && !(ifp->if_capenable & IFCAP_TXCSUM)) { ifp->if_capenable &= ~IFCAP_TSO; ifp->if_hwassist &= ~CSUM_TSO; if_printf(ifp, "TSO Disabled\n"); } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; if (ifp->if_capenable & IFCAP_TSO) { if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist |= CSUM_TSO; if_printf(ifp, "TSO Enabled\n"); } else { ifp->if_capenable &= ~IFCAP_TSO; ifp->if_hwassist &= ~CSUM_TSO; if_printf(ifp, "Enable tx checksum offload \ first.\n"); err = EAGAIN; } } else { ifp->if_hwassist &= ~CSUM_TSO; if_printf(ifp, "TSO Disabled\n"); } } if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_MTU) ifp->if_capenable ^= IFCAP_VLAN_MTU; if (mask & IFCAP_VLAN_HWCSUM) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; #if __FreeBSD_version >= 800000 if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; #endif #if defined(VLAN_CAPABILITIES) VLAN_CAPABILITIES(ifp); #endif VXGE_DRV_UNLOCK(vdev); break; case SIOCGPRIVATE_0: VXGE_DRV_LOCK(vdev); err = vxge_ioctl_stats(vdev, ifr); VXGE_DRV_UNLOCK(vdev); break; case SIOCGPRIVATE_1: VXGE_DRV_LOCK(vdev); err = vxge_ioctl_regs(vdev, ifr); VXGE_DRV_UNLOCK(vdev); break; default: err = ether_ioctl(ifp, command, data); break; } return (err); } /* * vxge_ioctl_regs * IOCTL to get registers */ int vxge_ioctl_regs(vxge_dev_t *vdev, struct ifreq *ifr) { u64 value = 0x0; u32 vp_id = 0; u32 offset, reqd_size = 0; int i, err = EINVAL; - char *command = (char *) ifr->ifr_data; - void *reg_info = (void *) ifr->ifr_data; + char *command = ifr_data_get_ptr(ifr); + void *reg_info = ifr_data_get_ptr(ifr); vxge_vpath_t *vpath; vxge_hal_status_e status = VXGE_HAL_OK; vxge_hal_mgmt_reg_type_e regs_type; switch (*command) { case vxge_hal_mgmt_reg_type_pcicfgmgmt: if (vdev->is_privilaged) { reqd_size = sizeof(vxge_hal_pcicfgmgmt_reg_t); regs_type = vxge_hal_mgmt_reg_type_pcicfgmgmt; } break; case vxge_hal_mgmt_reg_type_mrpcim: if (vdev->is_privilaged) { reqd_size = sizeof(vxge_hal_mrpcim_reg_t); regs_type = vxge_hal_mgmt_reg_type_mrpcim; } break; case vxge_hal_mgmt_reg_type_srpcim: if (vdev->is_privilaged) { reqd_size = sizeof(vxge_hal_srpcim_reg_t); regs_type = vxge_hal_mgmt_reg_type_srpcim; } break; case vxge_hal_mgmt_reg_type_memrepair: if (vdev->is_privilaged) { /* reqd_size = sizeof(vxge_hal_memrepair_reg_t); */ regs_type = vxge_hal_mgmt_reg_type_memrepair; } break; case vxge_hal_mgmt_reg_type_legacy: reqd_size = sizeof(vxge_hal_legacy_reg_t); regs_type = vxge_hal_mgmt_reg_type_legacy; break; case vxge_hal_mgmt_reg_type_toc: reqd_size = sizeof(vxge_hal_toc_reg_t); regs_type = vxge_hal_mgmt_reg_type_toc; break; case vxge_hal_mgmt_reg_type_common: reqd_size = sizeof(vxge_hal_common_reg_t); regs_type = vxge_hal_mgmt_reg_type_common; break; case vxge_hal_mgmt_reg_type_vpmgmt: reqd_size = sizeof(vxge_hal_vpmgmt_reg_t); regs_type = vxge_hal_mgmt_reg_type_vpmgmt; vpath = &(vdev->vpaths[*((u32 *) reg_info + 1)]); vp_id = vpath->vp_id; break; case vxge_hal_mgmt_reg_type_vpath: reqd_size = sizeof(vxge_hal_vpath_reg_t); regs_type = vxge_hal_mgmt_reg_type_vpath; vpath = &(vdev->vpaths[*((u32 *) reg_info + 1)]); vp_id = vpath->vp_id; break; case VXGE_GET_VPATH_COUNT: *((u32 *) reg_info) = vdev->no_of_vpath; err = 0; break; default: reqd_size = 0; break; } if (reqd_size) { for (i = 0, offset = 0; offset < reqd_size; i++, offset += 0x0008) { value = 0x0; status = vxge_hal_mgmt_reg_read(vdev->devh, regs_type, vp_id, offset, &value); err = (status != VXGE_HAL_OK) ? EINVAL : 0; if (err == EINVAL) break; *((u64 *) ((u64 *) reg_info + i)) = value; } } return (err); } /* * vxge_ioctl_stats * IOCTL to get statistics */ int vxge_ioctl_stats(vxge_dev_t *vdev, struct ifreq *ifr) { int i, retsize, err = EINVAL; u32 bufsize; vxge_vpath_t *vpath; vxge_bw_info_t *bw_info; vxge_port_info_t *port_info; vxge_drv_stats_t *drv_stat; char *buffer = NULL; - char *command = (char *) ifr->ifr_data; + char *command = ifr_data_get_ptr(ifr); vxge_hal_status_e status = VXGE_HAL_OK; switch (*command) { case VXGE_GET_PCI_CONF: bufsize = VXGE_STATS_BUFFER_SIZE; buffer = (char *) vxge_mem_alloc(bufsize); if (buffer != NULL) { status = vxge_hal_aux_pci_config_read(vdev->devh, bufsize, buffer, &retsize); if (status == VXGE_HAL_OK) - err = copyout(buffer, ifr->ifr_data, retsize); + err = copyout(buffer, ifr_data_get_ptr(ifr), + retsize); else device_printf(vdev->ndev, "failed pciconfig statistics query\n"); vxge_mem_free(buffer, bufsize); } break; case VXGE_GET_MRPCIM_STATS: if (!vdev->is_privilaged) break; bufsize = VXGE_STATS_BUFFER_SIZE; buffer = (char *) vxge_mem_alloc(bufsize); if (buffer != NULL) { status = vxge_hal_aux_stats_mrpcim_read(vdev->devh, bufsize, buffer, &retsize); if (status == VXGE_HAL_OK) - err = copyout(buffer, ifr->ifr_data, retsize); + err = copyout(buffer, ifr_data_get_ptr(ifr), + retsize); else device_printf(vdev->ndev, "failed mrpcim statistics query\n"); vxge_mem_free(buffer, bufsize); } break; case VXGE_GET_DEVICE_STATS: bufsize = VXGE_STATS_BUFFER_SIZE; buffer = (char *) vxge_mem_alloc(bufsize); if (buffer != NULL) { status = vxge_hal_aux_stats_device_read(vdev->devh, bufsize, buffer, &retsize); if (status == VXGE_HAL_OK) - err = copyout(buffer, ifr->ifr_data, retsize); + err = copyout(buffer, ifr_data_get_ptr(ifr), + retsize); else device_printf(vdev->ndev, "failed device statistics query\n"); vxge_mem_free(buffer, bufsize); } break; case VXGE_GET_DEVICE_HWINFO: bufsize = sizeof(vxge_device_hw_info_t); buffer = (char *) vxge_mem_alloc(bufsize); if (buffer != NULL) { vxge_os_memcpy( &(((vxge_device_hw_info_t *) buffer)->hw_info), &vdev->config.hw_info, sizeof(vxge_hal_device_hw_info_t)); ((vxge_device_hw_info_t *) buffer)->port_mode = vdev->port_mode; ((vxge_device_hw_info_t *) buffer)->port_failure = vdev->port_failure; - err = copyout(buffer, ifr->ifr_data, bufsize); + err = copyout(buffer, ifr_data_get_ptr(ifr), bufsize); if (err != 0) device_printf(vdev->ndev, "failed device hardware info query\n"); vxge_mem_free(buffer, bufsize); } break; case VXGE_GET_DRIVER_STATS: bufsize = sizeof(vxge_drv_stats_t) * vdev->no_of_vpath; drv_stat = (vxge_drv_stats_t *) vxge_mem_alloc(bufsize); if (drv_stat != NULL) { for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); vpath->driver_stats.rx_lro_queued += vpath->lro.lro_queued; vpath->driver_stats.rx_lro_flushed += vpath->lro.lro_flushed; vxge_os_memcpy(&drv_stat[i], &(vpath->driver_stats), sizeof(vxge_drv_stats_t)); } - err = copyout(drv_stat, ifr->ifr_data, bufsize); + err = copyout(drv_stat, ifr_data_get_ptr(ifr), bufsize); if (err != 0) device_printf(vdev->ndev, "failed driver statistics query\n"); vxge_mem_free(drv_stat, bufsize); } break; case VXGE_GET_BANDWIDTH: - bw_info = (vxge_bw_info_t *) ifr->ifr_data; + bw_info = ifr_data_get_ptr(ifr); if ((vdev->config.hw_info.func_id != 0) && (vdev->hw_fw_version < VXGE_FW_VERSION(1, 8, 0))) break; if (vdev->config.hw_info.func_id != 0) bw_info->func_id = vdev->config.hw_info.func_id; status = vxge_bw_priority_get(vdev, bw_info); if (status != VXGE_HAL_OK) break; - err = copyout(bw_info, ifr->ifr_data, sizeof(vxge_bw_info_t)); + err = copyout(bw_info, ifr_data_get_ptr(ifr), + sizeof(vxge_bw_info_t)); break; case VXGE_SET_BANDWIDTH: if (vdev->is_privilaged) err = vxge_bw_priority_set(vdev, ifr); break; case VXGE_SET_PORT_MODE: if (vdev->is_privilaged) { if (vdev->config.hw_info.ports == VXGE_DUAL_PORT_MODE) { - port_info = (vxge_port_info_t *) ifr->ifr_data; + port_info = ifr_data_get_ptr(ifr); vdev->config.port_mode = port_info->port_mode; err = vxge_port_mode_update(vdev); if (err != ENXIO) err = VXGE_HAL_FAIL; else { err = VXGE_HAL_OK; device_printf(vdev->ndev, "PLEASE POWER CYCLE THE SYSTEM\n"); } } } break; case VXGE_GET_PORT_MODE: if (vdev->is_privilaged) { if (vdev->config.hw_info.ports == VXGE_DUAL_PORT_MODE) { - port_info = (vxge_port_info_t *) ifr->ifr_data; + port_info = ifr_data_get_ptr(ifr); err = vxge_port_mode_get(vdev, port_info); if (err == VXGE_HAL_OK) { - err = copyout(port_info, ifr->ifr_data, + err = copyout(port_info, + ifr_data_get_ptr(ifr), sizeof(vxge_port_info_t)); } } } break; default: break; } return (err); } int vxge_bw_priority_config(vxge_dev_t *vdev) { u32 i; int err = EINVAL; for (i = 0; i < vdev->no_of_func; i++) { err = vxge_bw_priority_update(vdev, i, TRUE); if (err != 0) break; } return (err); } int vxge_bw_priority_set(vxge_dev_t *vdev, struct ifreq *ifr) { int err; u32 func_id; vxge_bw_info_t *bw_info; - bw_info = (vxge_bw_info_t *) ifr->ifr_data; + bw_info = ifr_data_get_ptr(ifr); func_id = bw_info->func_id; vdev->config.bw_info[func_id].priority = bw_info->priority; vdev->config.bw_info[func_id].bandwidth = bw_info->bandwidth; err = vxge_bw_priority_update(vdev, func_id, FALSE); return (err); } int vxge_bw_priority_update(vxge_dev_t *vdev, u32 func_id, bool binit) { u32 i, set = 0; u32 bandwidth, priority, vpath_count; u64 vpath_list[VXGE_HAL_MAX_VIRTUAL_PATHS]; vxge_hal_device_t *hldev; vxge_hal_vp_config_t *vp_config; vxge_hal_status_e status = VXGE_HAL_OK; hldev = vdev->devh; status = vxge_hal_get_vpath_list(vdev->devh, func_id, vpath_list, &vpath_count); if (status != VXGE_HAL_OK) return (status); for (i = 0; i < vpath_count; i++) { vp_config = &(hldev->config.vp_config[vpath_list[i]]); /* Configure Bandwidth */ if (vdev->config.bw_info[func_id].bandwidth != VXGE_HAL_VPATH_BW_LIMIT_DEFAULT) { set = 1; bandwidth = vdev->config.bw_info[func_id].bandwidth; if (bandwidth < VXGE_HAL_VPATH_BW_LIMIT_MIN || bandwidth > VXGE_HAL_VPATH_BW_LIMIT_MAX) { bandwidth = VXGE_HAL_VPATH_BW_LIMIT_DEFAULT; } vp_config->bandwidth = bandwidth; } /* * If b/w limiting is enabled on any of the * VFs, then for remaining VFs set the priority to 3 * and b/w limiting to max i.e 10 Gb) */ if (vp_config->bandwidth == VXGE_HAL_VPATH_BW_LIMIT_DEFAULT) vp_config->bandwidth = VXGE_HAL_VPATH_BW_LIMIT_MAX; if (binit && vdev->config.low_latency) { if (func_id == 0) vdev->config.bw_info[func_id].priority = VXGE_DEFAULT_VPATH_PRIORITY_HIGH; } /* Configure Priority */ if (vdev->config.bw_info[func_id].priority != VXGE_HAL_VPATH_PRIORITY_DEFAULT) { set = 1; priority = vdev->config.bw_info[func_id].priority; if (priority < VXGE_HAL_VPATH_PRIORITY_MIN || priority > VXGE_HAL_VPATH_PRIORITY_MAX) { priority = VXGE_HAL_VPATH_PRIORITY_DEFAULT; } vp_config->priority = priority; } else if (vdev->config.low_latency) { set = 1; vp_config->priority = VXGE_DEFAULT_VPATH_PRIORITY_LOW; } if (set == 1) { status = vxge_hal_rx_bw_priority_set(vdev->devh, vpath_list[i]); if (status != VXGE_HAL_OK) break; if (vpath_list[i] < VXGE_HAL_TX_BW_VPATH_LIMIT) { status = vxge_hal_tx_bw_priority_set( vdev->devh, vpath_list[i]); if (status != VXGE_HAL_OK) break; } } } return ((status == VXGE_HAL_OK) ? 0 : EINVAL); } /* * vxge_intr_coalesce_tx * Changes interrupt coalescing if the interrupts are not within a range * Return Value: Nothing */ void vxge_intr_coalesce_tx(vxge_vpath_t *vpath) { u32 timer; if (!vpath->tx_intr_coalesce) return; vpath->tx_interrupts++; if (ticks > vpath->tx_ticks + hz/100) { vpath->tx_ticks = ticks; timer = vpath->tti_rtimer_val; if (vpath->tx_interrupts > VXGE_MAX_TX_INTERRUPT_COUNT) { if (timer != VXGE_TTI_RTIMER_ADAPT_VAL) { vpath->tti_rtimer_val = VXGE_TTI_RTIMER_ADAPT_VAL; vxge_hal_vpath_dynamic_tti_rtimer_set( vpath->handle, vpath->tti_rtimer_val); } } else { if (timer != 0) { vpath->tti_rtimer_val = 0; vxge_hal_vpath_dynamic_tti_rtimer_set( vpath->handle, vpath->tti_rtimer_val); } } vpath->tx_interrupts = 0; } } /* * vxge_intr_coalesce_rx * Changes interrupt coalescing if the interrupts are not within a range * Return Value: Nothing */ void vxge_intr_coalesce_rx(vxge_vpath_t *vpath) { u32 timer; if (!vpath->rx_intr_coalesce) return; vpath->rx_interrupts++; if (ticks > vpath->rx_ticks + hz/100) { vpath->rx_ticks = ticks; timer = vpath->rti_rtimer_val; if (vpath->rx_interrupts > VXGE_MAX_RX_INTERRUPT_COUNT) { if (timer != VXGE_RTI_RTIMER_ADAPT_VAL) { vpath->rti_rtimer_val = VXGE_RTI_RTIMER_ADAPT_VAL; vxge_hal_vpath_dynamic_rti_rtimer_set( vpath->handle, vpath->rti_rtimer_val); } } else { if (timer != 0) { vpath->rti_rtimer_val = 0; vxge_hal_vpath_dynamic_rti_rtimer_set( vpath->handle, vpath->rti_rtimer_val); } } vpath->rx_interrupts = 0; } } /* * vxge_methods FreeBSD device interface entry points */ static device_method_t vxge_methods[] = { DEVMETHOD(device_probe, vxge_probe), DEVMETHOD(device_attach, vxge_attach), DEVMETHOD(device_detach, vxge_detach), DEVMETHOD(device_shutdown, vxge_shutdown), DEVMETHOD_END }; static driver_t vxge_driver = { "vxge", vxge_methods, sizeof(vxge_dev_t), }; static devclass_t vxge_devclass; DRIVER_MODULE(vxge, pci, vxge_driver, vxge_devclass, 0, 0); Index: head/sys/net/if.c =================================================================== --- head/sys/net/if.c (revision 331796) +++ head/sys/net/if.c (revision 331797) @@ -1,4335 +1,4350 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if.c 8.5 (Berkeley) 1/9/95 * $FreeBSD$ */ #include "opt_compat.h" #include "opt_inet6.h" #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #include #include #ifdef INET #include #endif /* INET */ #ifdef INET6 #include #include #endif /* INET6 */ #endif /* INET || INET6 */ #include #ifdef COMPAT_FREEBSD32 #include #include struct ifreq_buffer32 { uint32_t length; /* (size_t) */ uint32_t buffer; /* (void *) */ }; /* * Interface request structure used for socket * ioctl's. All interface ioctl's must have parameter * definitions which begin with ifr_name. The * remainder may be interface specific. */ struct ifreq32 { char ifr_name[IFNAMSIZ]; /* if name, e.g. "en0" */ union { struct sockaddr ifru_addr; struct sockaddr ifru_dstaddr; struct sockaddr ifru_broadaddr; struct ifreq_buffer32 ifru_buffer; short ifru_flags[2]; short ifru_index; int ifru_jid; int ifru_metric; int ifru_mtu; int ifru_phys; int ifru_media; uint32_t ifru_data; int ifru_cap[2]; u_int ifru_fib; u_char ifru_vlan_pcp; } ifr_ifru; }; CTASSERT(sizeof(struct ifreq) == sizeof(struct ifreq32)); CTASSERT(__offsetof(struct ifreq, ifr_ifru) == __offsetof(struct ifreq32, ifr_ifru)); #endif union ifreq_union { struct ifreq ifr; #ifdef COMPAT_FREEBSD32 struct ifreq32 ifr32; #endif }; SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN, &ifqmaxlen, 0, "max send queue size"); /* Log link state change events */ static int log_link_state_change = 1; SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW, &log_link_state_change, 0, "log interface link state change events"); /* Log promiscuous mode change events */ static int log_promisc_mode_change = 1; SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN, &log_promisc_mode_change, 1, "log promiscuous mode change events"); /* Interface description */ static unsigned int ifdescr_maxlen = 1024; SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW, &ifdescr_maxlen, 0, "administrative maximum length for interface description"); static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions"); /* global sx for non-critical path ifdescr */ static struct sx ifdescr_sx; SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr"); void (*bridge_linkstate_p)(struct ifnet *ifp); void (*ng_ether_link_state_p)(struct ifnet *ifp, int state); void (*lagg_linkstate_p)(struct ifnet *ifp, int state); /* These are external hooks for CARP. */ void (*carp_linkstate_p)(struct ifnet *ifp); void (*carp_demote_adj_p)(int, char *); int (*carp_master_p)(struct ifaddr *); #if defined(INET) || defined(INET6) int (*carp_forus_p)(struct ifnet *ifp, u_char *dhost); int (*carp_output_p)(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa); int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *); int (*carp_attach_p)(struct ifaddr *, int); void (*carp_detach_p)(struct ifaddr *, bool); #endif #ifdef INET int (*carp_iamatch_p)(struct ifaddr *, uint8_t **); #endif #ifdef INET6 struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6); caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr); #endif struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL; /* * XXX: Style; these should be sorted alphabetically, and unprototyped * static functions should be prototyped. Currently they are sorted by * declaration order. */ static void if_attachdomain(void *); static void if_attachdomain1(struct ifnet *); static int ifconf(u_long, caddr_t); static void if_freemulti(struct ifmultiaddr *); static void if_grow(void); static void if_input_default(struct ifnet *, struct mbuf *); static int if_requestencap_default(struct ifnet *, struct if_encap_req *); static void if_route(struct ifnet *, int flag, int fam); static int if_setflag(struct ifnet *, int, int, int *, int); static int if_transmit(struct ifnet *ifp, struct mbuf *m); static void if_unroute(struct ifnet *, int flag, int fam); static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *); static int ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *); static int if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int); static void do_link_state_change(void *, int); static int if_getgroup(struct ifgroupreq *, struct ifnet *); static int if_getgroupmembers(struct ifgroupreq *); static void if_delgroups(struct ifnet *); static void if_attach_internal(struct ifnet *, int, struct if_clone *); static int if_detach_internal(struct ifnet *, int, struct if_clone **); #ifdef VIMAGE static void if_vmove(struct ifnet *, struct vnet *); #endif #ifdef INET6 /* * XXX: declare here to avoid to include many inet6 related files.. * should be more generalized? */ extern void nd6_setmtu(struct ifnet *); #endif /* ipsec helper hooks */ VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]); VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]); VNET_DEFINE(int, if_index); int ifqmaxlen = IFQ_MAXLEN; VNET_DEFINE(struct ifnethead, ifnet); /* depend on static init XXX */ VNET_DEFINE(struct ifgrouphead, ifg_head); static VNET_DEFINE(int, if_indexlim) = 8; /* Table of ifnet by index. */ VNET_DEFINE(struct ifnet **, ifindex_table); #define V_if_indexlim VNET(if_indexlim) #define V_ifindex_table VNET(ifindex_table) /* * The global network interface list (V_ifnet) and related state (such as * if_index, if_indexlim, and ifindex_table) are protected by an sxlock and * an rwlock. Either may be acquired shared to stablize the list, but both * must be acquired writable to modify the list. This model allows us to * both stablize the interface list during interrupt thread processing, but * also to stablize it over long-running ioctls, without introducing priority * inversions and deadlocks. */ struct rwlock ifnet_rwlock; RW_SYSINIT_FLAGS(ifnet_rw, &ifnet_rwlock, "ifnet_rw", RW_RECURSE); struct sx ifnet_sxlock; SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE); /* * The allocation of network interfaces is a rather non-atomic affair; we * need to select an index before we are ready to expose the interface for * use, so will use this pointer value to indicate reservation. */ #define IFNET_HOLD (void *)(uintptr_t)(-1) static if_com_alloc_t *if_com_alloc[256]; static if_com_free_t *if_com_free[256]; static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals"); MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); struct ifnet * ifnet_byindex_locked(u_short idx) { if (idx > V_if_index) return (NULL); if (V_ifindex_table[idx] == IFNET_HOLD) return (NULL); return (V_ifindex_table[idx]); } struct ifnet * ifnet_byindex(u_short idx) { struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); ifp = ifnet_byindex_locked(idx); IFNET_RUNLOCK_NOSLEEP(); return (ifp); } struct ifnet * ifnet_byindex_ref(u_short idx) { struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); ifp = ifnet_byindex_locked(idx); if (ifp == NULL || (ifp->if_flags & IFF_DYING)) { IFNET_RUNLOCK_NOSLEEP(); return (NULL); } if_ref(ifp); IFNET_RUNLOCK_NOSLEEP(); return (ifp); } /* * Allocate an ifindex array entry; return 0 on success or an error on * failure. */ static u_short ifindex_alloc(void) { u_short idx; IFNET_WLOCK_ASSERT(); retry: /* * Try to find an empty slot below V_if_index. If we fail, take the * next slot. */ for (idx = 1; idx <= V_if_index; idx++) { if (V_ifindex_table[idx] == NULL) break; } /* Catch if_index overflow. */ if (idx >= V_if_indexlim) { if_grow(); goto retry; } if (idx > V_if_index) V_if_index = idx; return (idx); } static void ifindex_free_locked(u_short idx) { IFNET_WLOCK_ASSERT(); V_ifindex_table[idx] = NULL; while (V_if_index > 0 && V_ifindex_table[V_if_index] == NULL) V_if_index--; } static void ifindex_free(u_short idx) { IFNET_WLOCK(); ifindex_free_locked(idx); IFNET_WUNLOCK(); } static void ifnet_setbyindex_locked(u_short idx, struct ifnet *ifp) { IFNET_WLOCK_ASSERT(); V_ifindex_table[idx] = ifp; } static void ifnet_setbyindex(u_short idx, struct ifnet *ifp) { IFNET_WLOCK(); ifnet_setbyindex_locked(idx, ifp); IFNET_WUNLOCK(); } struct ifaddr * ifaddr_byindex(u_short idx) { struct ifnet *ifp; struct ifaddr *ifa = NULL; IFNET_RLOCK_NOSLEEP(); ifp = ifnet_byindex_locked(idx); if (ifp != NULL && (ifa = ifp->if_addr) != NULL) ifa_ref(ifa); IFNET_RUNLOCK_NOSLEEP(); return (ifa); } /* * Network interface utility routines. * * Routines with ifa_ifwith* names take sockaddr *'s as * parameters. */ static void vnet_if_init(const void *unused __unused) { TAILQ_INIT(&V_ifnet); TAILQ_INIT(&V_ifg_head); IFNET_WLOCK(); if_grow(); /* create initial table */ IFNET_WUNLOCK(); vnet_if_clone_init(); } VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init, NULL); #ifdef VIMAGE static void vnet_if_uninit(const void *unused __unused) { VNET_ASSERT(TAILQ_EMPTY(&V_ifnet), ("%s:%d tailq &V_ifnet=%p " "not empty", __func__, __LINE__, &V_ifnet)); VNET_ASSERT(TAILQ_EMPTY(&V_ifg_head), ("%s:%d tailq &V_ifg_head=%p " "not empty", __func__, __LINE__, &V_ifg_head)); free((caddr_t)V_ifindex_table, M_IFNET); } VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST, vnet_if_uninit, NULL); static void vnet_if_return(const void *unused __unused) { struct ifnet *ifp, *nifp; /* Return all inherited interfaces to their parent vnets. */ TAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) { if (ifp->if_home_vnet != ifp->if_vnet) if_vmove(ifp, ifp->if_home_vnet); } } VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY, vnet_if_return, NULL); #endif static void if_grow(void) { int oldlim; u_int n; struct ifnet **e; IFNET_WLOCK_ASSERT(); oldlim = V_if_indexlim; IFNET_WUNLOCK(); n = (oldlim << 1) * sizeof(*e); e = malloc(n, M_IFNET, M_WAITOK | M_ZERO); IFNET_WLOCK(); if (V_if_indexlim != oldlim) { free(e, M_IFNET); return; } if (V_ifindex_table != NULL) { memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2); free((caddr_t)V_ifindex_table, M_IFNET); } V_if_indexlim <<= 1; V_ifindex_table = e; } /* * Allocate a struct ifnet and an index for an interface. A layer 2 * common structure will also be allocated if an allocation routine is * registered for the passed type. */ struct ifnet * if_alloc(u_char type) { struct ifnet *ifp; u_short idx; ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO); IFNET_WLOCK(); idx = ifindex_alloc(); ifnet_setbyindex_locked(idx, IFNET_HOLD); IFNET_WUNLOCK(); ifp->if_index = idx; ifp->if_type = type; ifp->if_alloctype = type; #ifdef VIMAGE ifp->if_vnet = curvnet; #endif if (if_com_alloc[type] != NULL) { ifp->if_l2com = if_com_alloc[type](type, ifp); if (ifp->if_l2com == NULL) { free(ifp, M_IFNET); ifindex_free(idx); return (NULL); } } IF_ADDR_LOCK_INIT(ifp); TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp); ifp->if_afdata_initialized = 0; IF_AFDATA_LOCK_INIT(ifp); TAILQ_INIT(&ifp->if_addrhead); TAILQ_INIT(&ifp->if_multiaddrs); TAILQ_INIT(&ifp->if_groups); #ifdef MAC mac_ifnet_init(ifp); #endif ifq_init(&ifp->if_snd, ifp); refcount_init(&ifp->if_refcount, 1); /* Index reference. */ for (int i = 0; i < IFCOUNTERS; i++) ifp->if_counters[i] = counter_u64_alloc(M_WAITOK); ifp->if_get_counter = if_get_counter_default; ifp->if_pcp = IFNET_PCP_NONE; ifnet_setbyindex(ifp->if_index, ifp); return (ifp); } /* * Do the actual work of freeing a struct ifnet, and layer 2 common * structure. This call is made when the last reference to an * interface is released. */ static void if_free_internal(struct ifnet *ifp) { KASSERT((ifp->if_flags & IFF_DYING), ("if_free_internal: interface not dying")); if (if_com_free[ifp->if_alloctype] != NULL) if_com_free[ifp->if_alloctype](ifp->if_l2com, ifp->if_alloctype); #ifdef MAC mac_ifnet_destroy(ifp); #endif /* MAC */ if (ifp->if_description != NULL) free(ifp->if_description, M_IFDESCR); IF_AFDATA_DESTROY(ifp); IF_ADDR_LOCK_DESTROY(ifp); ifq_delete(&ifp->if_snd); for (int i = 0; i < IFCOUNTERS; i++) counter_u64_free(ifp->if_counters[i]); free(ifp, M_IFNET); } /* * Deregister an interface and free the associated storage. */ void if_free(struct ifnet *ifp) { ifp->if_flags |= IFF_DYING; /* XXX: Locking */ CURVNET_SET_QUIET(ifp->if_vnet); IFNET_WLOCK(); KASSERT(ifp == ifnet_byindex_locked(ifp->if_index), ("%s: freeing unallocated ifnet", ifp->if_xname)); ifindex_free_locked(ifp->if_index); IFNET_WUNLOCK(); if (refcount_release(&ifp->if_refcount)) if_free_internal(ifp); CURVNET_RESTORE(); } /* * Interfaces to keep an ifnet type-stable despite the possibility of the * driver calling if_free(). If there are additional references, we defer * freeing the underlying data structure. */ void if_ref(struct ifnet *ifp) { /* We don't assert the ifnet list lock here, but arguably should. */ refcount_acquire(&ifp->if_refcount); } void if_rele(struct ifnet *ifp) { if (!refcount_release(&ifp->if_refcount)) return; if_free_internal(ifp); } void ifq_init(struct ifaltq *ifq, struct ifnet *ifp) { mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF); if (ifq->ifq_maxlen == 0) ifq->ifq_maxlen = ifqmaxlen; ifq->altq_type = 0; ifq->altq_disc = NULL; ifq->altq_flags &= ALTQF_CANTCHANGE; ifq->altq_tbr = NULL; ifq->altq_ifp = ifp; } void ifq_delete(struct ifaltq *ifq) { mtx_destroy(&ifq->ifq_mtx); } /* * Perform generic interface initialization tasks and attach the interface * to the list of "active" interfaces. If vmove flag is set on entry * to if_attach_internal(), perform only a limited subset of initialization * tasks, given that we are moving from one vnet to another an ifnet which * has already been fully initialized. * * Note that if_detach_internal() removes group membership unconditionally * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL. * Thus, when if_vmove() is applied to a cloned interface, group membership * is lost while a cloned one always joins a group whose name is * ifc->ifc_name. To recover this after if_detach_internal() and * if_attach_internal(), the cloner should be specified to * if_attach_internal() via ifc. If it is non-NULL, if_attach_internal() * attempts to join a group whose name is ifc->ifc_name. * * XXX: * - The decision to return void and thus require this function to * succeed is questionable. * - We should probably do more sanity checking. For instance we don't * do anything to insure if_xname is unique or non-empty. */ void if_attach(struct ifnet *ifp) { if_attach_internal(ifp, 0, NULL); } /* * Compute the least common TSO limit. */ void if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax) { /* * 1) If there is no limit currently, take the limit from * the network adapter. * * 2) If the network adapter has a limit below the current * limit, apply it. */ if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 && ifp->if_hw_tsomax < pmax->tsomaxbytes)) { pmax->tsomaxbytes = ifp->if_hw_tsomax; } if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 && ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) { pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; } if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 && ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) { pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; } } /* * Update TSO limit of a network adapter. * * Returns zero if no change. Else non-zero. */ int if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax) { int retval = 0; if (ifp->if_hw_tsomax != pmax->tsomaxbytes) { ifp->if_hw_tsomax = pmax->tsomaxbytes; retval++; } if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) { ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize; retval++; } if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) { ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount; retval++; } return (retval); } static void if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc) { unsigned socksize, ifasize; int namelen, masklen; struct sockaddr_dl *sdl; struct ifaddr *ifa; if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index)) panic ("%s: BUG: if_attach called without if_alloc'd input()\n", ifp->if_xname); #ifdef VIMAGE ifp->if_vnet = curvnet; if (ifp->if_home_vnet == NULL) ifp->if_home_vnet = curvnet; #endif if_addgroup(ifp, IFG_ALL); /* Restore group membership for cloned interfaces. */ if (vmove && ifc != NULL) if_clone_addgroup(ifp, ifc); getmicrotime(&ifp->if_lastchange); ifp->if_epoch = time_uptime; KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) || (ifp->if_transmit != NULL && ifp->if_qflush != NULL), ("transmit and qflush must both either be set or both be NULL")); if (ifp->if_transmit == NULL) { ifp->if_transmit = if_transmit; ifp->if_qflush = if_qflush; } if (ifp->if_input == NULL) ifp->if_input = if_input_default; if (ifp->if_requestencap == NULL) ifp->if_requestencap = if_requestencap_default; if (!vmove) { #ifdef MAC mac_ifnet_create(ifp); #endif /* * Create a Link Level name for this device. */ namelen = strlen(ifp->if_xname); /* * Always save enough space for any possiable name so we * can do a rename in place later. */ masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ; socksize = masklen + ifp->if_addrlen; if (socksize < sizeof(*sdl)) socksize = sizeof(*sdl); socksize = roundup2(socksize, sizeof(long)); ifasize = sizeof(*ifa) + 2 * socksize; ifa = ifa_alloc(ifasize, M_WAITOK); sdl = (struct sockaddr_dl *)(ifa + 1); sdl->sdl_len = socksize; sdl->sdl_family = AF_LINK; bcopy(ifp->if_xname, sdl->sdl_data, namelen); sdl->sdl_nlen = namelen; sdl->sdl_index = ifp->if_index; sdl->sdl_type = ifp->if_type; ifp->if_addr = ifa; ifa->ifa_ifp = ifp; ifa->ifa_rtrequest = link_rtrequest; ifa->ifa_addr = (struct sockaddr *)sdl; sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); ifa->ifa_netmask = (struct sockaddr *)sdl; sdl->sdl_len = masklen; while (namelen != 0) sdl->sdl_data[--namelen] = 0xff; TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link); /* Reliably crash if used uninitialized. */ ifp->if_broadcastaddr = NULL; if (ifp->if_type == IFT_ETHER) { ifp->if_hw_addr = malloc(ifp->if_addrlen, M_IFADDR, M_WAITOK | M_ZERO); } #if defined(INET) || defined(INET6) /* Use defaults for TSO, if nothing is set */ if (ifp->if_hw_tsomax == 0 && ifp->if_hw_tsomaxsegcount == 0 && ifp->if_hw_tsomaxsegsize == 0) { /* * The TSO defaults needs to be such that an * NFS mbuf list of 35 mbufs totalling just * below 64K works and that a chain of mbufs * can be defragged into at most 32 segments: */ ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); ifp->if_hw_tsomaxsegcount = 35; ifp->if_hw_tsomaxsegsize = 2048; /* 2K */ /* XXX some drivers set IFCAP_TSO after ethernet attach */ if (ifp->if_capabilities & IFCAP_TSO) { if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n", ifp->if_hw_tsomax, ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); } } #endif } #ifdef VIMAGE else { /* * Update the interface index in the link layer address * of the interface. */ for (ifa = ifp->if_addr; ifa != NULL; ifa = TAILQ_NEXT(ifa, ifa_link)) { if (ifa->ifa_addr->sa_family == AF_LINK) { sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_index = ifp->if_index; } } } #endif IFNET_WLOCK(); TAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link); #ifdef VIMAGE curvnet->vnet_ifcnt++; #endif IFNET_WUNLOCK(); if (domain_init_status >= 2) if_attachdomain1(ifp); EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); /* Announce the interface. */ rt_ifannouncemsg(ifp, IFAN_ARRIVAL); } static void if_attachdomain(void *dummy) { struct ifnet *ifp; TAILQ_FOREACH(ifp, &V_ifnet, if_link) if_attachdomain1(ifp); } SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND, if_attachdomain, NULL); static void if_attachdomain1(struct ifnet *ifp) { struct domain *dp; /* * Since dp->dom_ifattach calls malloc() with M_WAITOK, we * cannot lock ifp->if_afdata initialization, entirely. */ IF_AFDATA_LOCK(ifp); if (ifp->if_afdata_initialized >= domain_init_status) { IF_AFDATA_UNLOCK(ifp); log(LOG_WARNING, "%s called more than once on %s\n", __func__, ifp->if_xname); return; } ifp->if_afdata_initialized = domain_init_status; IF_AFDATA_UNLOCK(ifp); /* address family dependent data region */ bzero(ifp->if_afdata, sizeof(ifp->if_afdata)); for (dp = domains; dp; dp = dp->dom_next) { if (dp->dom_ifattach) ifp->if_afdata[dp->dom_family] = (*dp->dom_ifattach)(ifp); } } /* * Remove any unicast or broadcast network addresses from an interface. */ void if_purgeaddrs(struct ifnet *ifp) { struct ifaddr *ifa, *next; /* XXX cannot hold IF_ADDR_WLOCK over called functions. */ TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) { if (ifa->ifa_addr->sa_family == AF_LINK) continue; #ifdef INET /* XXX: Ugly!! ad hoc just for INET */ if (ifa->ifa_addr->sa_family == AF_INET) { struct ifaliasreq ifr; bzero(&ifr, sizeof(ifr)); ifr.ifra_addr = *ifa->ifa_addr; if (ifa->ifa_dstaddr) ifr.ifra_broadaddr = *ifa->ifa_dstaddr; if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp, NULL) == 0) continue; } #endif /* INET */ #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) { in6_purgeaddr(ifa); /* ifp_addrhead is already updated */ continue; } #endif /* INET6 */ IF_ADDR_WLOCK(ifp); TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(ifa); } } /* * Remove any multicast network addresses from an interface when an ifnet * is going away. */ static void if_purgemaddrs(struct ifnet *ifp) { struct ifmultiaddr *ifma; struct ifmultiaddr *next; IF_ADDR_WLOCK(ifp); TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) if_delmulti_locked(ifp, ifma, 1); IF_ADDR_WUNLOCK(ifp); } /* * Detach an interface, removing it from the list of "active" interfaces. * If vmove flag is set on entry to if_detach_internal(), perform only a * limited subset of cleanup tasks, given that we are moving an ifnet from * one vnet to another, where it must be fully operational. * * XXXRW: There are some significant questions about event ordering, and * how to prevent things from starting to use the interface during detach. */ void if_detach(struct ifnet *ifp) { CURVNET_SET_QUIET(ifp->if_vnet); if_detach_internal(ifp, 0, NULL); CURVNET_RESTORE(); } /* * The vmove flag, if set, indicates that we are called from a callpath * that is moving an interface to a different vnet instance. * * The shutdown flag, if set, indicates that we are called in the * process of shutting down a vnet instance. Currently only the * vnet_if_return SYSUNINIT function sets it. Note: we can be called * on a vnet instance shutdown without this flag being set, e.g., when * the cloned interfaces are destoyed as first thing of teardown. */ static int if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp) { struct ifaddr *ifa; int i; struct domain *dp; struct ifnet *iter; int found = 0; #ifdef VIMAGE int shutdown; shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET && ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; #endif IFNET_WLOCK(); TAILQ_FOREACH(iter, &V_ifnet, if_link) if (iter == ifp) { TAILQ_REMOVE(&V_ifnet, ifp, if_link); found = 1; break; } IFNET_WUNLOCK(); if (!found) { /* * While we would want to panic here, we cannot * guarantee that the interface is indeed still on * the list given we don't hold locks all the way. */ return (ENOENT); #if 0 if (vmove) panic("%s: ifp=%p not on the ifnet tailq %p", __func__, ifp, &V_ifnet); else return; /* XXX this should panic as well? */ #endif } /* * At this point we know the interface still was on the ifnet list * and we removed it so we are in a stable state. */ #ifdef VIMAGE curvnet->vnet_ifcnt--; #endif /* * In any case (destroy or vmove) detach us from the groups * and remove/wait for pending events on the taskq. * XXX-BZ in theory an interface could still enqueue a taskq change? */ if_delgroups(ifp); taskqueue_drain(taskqueue_swi, &ifp->if_linktask); /* * Check if this is a cloned interface or not. Must do even if * shutting down as a if_vmove_reclaim() would move the ifp and * the if_clone_addgroup() will have a corrupted string overwise * from a gibberish pointer. */ if (vmove && ifcp != NULL) *ifcp = if_clone_findifc(ifp); if_down(ifp); #ifdef VIMAGE /* * On VNET shutdown abort here as the stack teardown will do all * the work top-down for us. */ if (shutdown) { /* * In case of a vmove we are done here without error. * If we would signal an error it would lead to the same * abort as if we did not find the ifnet anymore. * if_detach() calls us in void context and does not care * about an early abort notification, so life is splendid :) */ goto finish_vnet_shutdown; } #endif /* * At this point we are not tearing down a VNET and are either * going to destroy or vmove the interface and have to cleanup * accordingly. */ /* * Remove routes and flush queues. */ #ifdef ALTQ if (ALTQ_IS_ENABLED(&ifp->if_snd)) altq_disable(&ifp->if_snd); if (ALTQ_IS_ATTACHED(&ifp->if_snd)) altq_detach(&ifp->if_snd); #endif if_purgeaddrs(ifp); #ifdef INET in_ifdetach(ifp); #endif #ifdef INET6 /* * Remove all IPv6 kernel structs related to ifp. This should be done * before removing routing entries below, since IPv6 interface direct * routes are expected to be removed by the IPv6-specific kernel API. * Otherwise, the kernel will detect some inconsistency and bark it. */ in6_ifdetach(ifp); #endif if_purgemaddrs(ifp); /* Announce that the interface is gone. */ rt_ifannouncemsg(ifp, IFAN_DEPARTURE); EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); if (!vmove) { /* * Prevent further calls into the device driver via ifnet. */ if_dead(ifp); /* * Remove link ifaddr pointer and maybe decrement if_index. * Clean up all addresses. */ free(ifp->if_hw_addr, M_IFADDR); ifp->if_hw_addr = NULL; ifp->if_addr = NULL; /* We can now free link ifaddr. */ IF_ADDR_WLOCK(ifp); if (!TAILQ_EMPTY(&ifp->if_addrhead)) { ifa = TAILQ_FIRST(&ifp->if_addrhead); TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(ifa); } else IF_ADDR_WUNLOCK(ifp); } rt_flushifroutes(ifp); #ifdef VIMAGE finish_vnet_shutdown: #endif /* * We cannot hold the lock over dom_ifdetach calls as they might * sleep, for example trying to drain a callout, thus open up the * theoretical race with re-attaching. */ IF_AFDATA_LOCK(ifp); i = ifp->if_afdata_initialized; ifp->if_afdata_initialized = 0; IF_AFDATA_UNLOCK(ifp); for (dp = domains; i > 0 && dp; dp = dp->dom_next) { if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) { (*dp->dom_ifdetach)(ifp, ifp->if_afdata[dp->dom_family]); ifp->if_afdata[dp->dom_family] = NULL; } } return (0); } #ifdef VIMAGE /* * if_vmove() performs a limited version of if_detach() in current * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg. * An attempt is made to shrink if_index in current vnet, find an * unused if_index in target vnet and calls if_grow() if necessary, * and finally find an unused if_xname for the target vnet. */ static void if_vmove(struct ifnet *ifp, struct vnet *new_vnet) { struct if_clone *ifc; u_int bif_dlt, bif_hdrlen; int rc; /* * if_detach_internal() will call the eventhandler to notify * interface departure. That will detach if_bpf. We need to * safe the dlt and hdrlen so we can re-attach it later. */ bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen); /* * Detach from current vnet, but preserve LLADDR info, do not * mark as dead etc. so that the ifnet can be reattached later. * If we cannot find it, we lost the race to someone else. */ rc = if_detach_internal(ifp, 1, &ifc); if (rc != 0) return; /* * Unlink the ifnet from ifindex_table[] in current vnet, and shrink * the if_index for that vnet if possible. * * NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized, * or we'd lock on one vnet and unlock on another. */ IFNET_WLOCK(); ifindex_free_locked(ifp->if_index); IFNET_WUNLOCK(); /* * Perform interface-specific reassignment tasks, if provided by * the driver. */ if (ifp->if_reassign != NULL) ifp->if_reassign(ifp, new_vnet, NULL); /* * Switch to the context of the target vnet. */ CURVNET_SET_QUIET(new_vnet); IFNET_WLOCK(); ifp->if_index = ifindex_alloc(); ifnet_setbyindex_locked(ifp->if_index, ifp); IFNET_WUNLOCK(); if_attach_internal(ifp, 1, ifc); if (ifp->if_bpf == NULL) bpfattach(ifp, bif_dlt, bif_hdrlen); CURVNET_RESTORE(); } /* * Move an ifnet to or from another child prison/vnet, specified by the jail id. */ static int if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid) { struct prison *pr; struct ifnet *difp; int shutdown; /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); pr = prison_find_child(td->td_ucred->cr_prison, jid); sx_sunlock(&allprison_lock); if (pr == NULL) return (ENXIO); prison_hold_locked(pr); mtx_unlock(&pr->pr_mtx); /* Do not try to move the iface from and to the same prison. */ if (pr->pr_vnet == ifp->if_vnet) { prison_free(pr); return (EEXIST); } /* Make sure the named iface does not exists in the dst. prison/vnet. */ /* XXX Lock interfaces to avoid races. */ CURVNET_SET_QUIET(pr->pr_vnet); difp = ifunit(ifname); if (difp != NULL) { CURVNET_RESTORE(); prison_free(pr); return (EEXIST); } /* Make sure the VNET is stable. */ shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET && ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; if (shutdown) { CURVNET_RESTORE(); prison_free(pr); return (EBUSY); } CURVNET_RESTORE(); /* Move the interface into the child jail/vnet. */ if_vmove(ifp, pr->pr_vnet); /* Report the new if_xname back to the userland. */ sprintf(ifname, "%s", ifp->if_xname); prison_free(pr); return (0); } static int if_vmove_reclaim(struct thread *td, char *ifname, int jid) { struct prison *pr; struct vnet *vnet_dst; struct ifnet *ifp; int shutdown; /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); pr = prison_find_child(td->td_ucred->cr_prison, jid); sx_sunlock(&allprison_lock); if (pr == NULL) return (ENXIO); prison_hold_locked(pr); mtx_unlock(&pr->pr_mtx); /* Make sure the named iface exists in the source prison/vnet. */ CURVNET_SET(pr->pr_vnet); ifp = ifunit(ifname); /* XXX Lock to avoid races. */ if (ifp == NULL) { CURVNET_RESTORE(); prison_free(pr); return (ENXIO); } /* Do not try to move the iface from and to the same prison. */ vnet_dst = TD_TO_VNET(td); if (vnet_dst == ifp->if_vnet) { CURVNET_RESTORE(); prison_free(pr); return (EEXIST); } /* Make sure the VNET is stable. */ shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET && ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; if (shutdown) { CURVNET_RESTORE(); prison_free(pr); return (EBUSY); } /* Get interface back from child jail/vnet. */ if_vmove(ifp, vnet_dst); CURVNET_RESTORE(); /* Report the new if_xname back to the userland. */ sprintf(ifname, "%s", ifp->if_xname); prison_free(pr); return (0); } #endif /* VIMAGE */ /* * Add a group to an interface */ int if_addgroup(struct ifnet *ifp, const char *groupname) { struct ifg_list *ifgl; struct ifg_group *ifg = NULL; struct ifg_member *ifgm; int new = 0; if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' && groupname[strlen(groupname) - 1] <= '9') return (EINVAL); IFNET_WLOCK(); TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) { IFNET_WUNLOCK(); return (EEXIST); } if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP, M_NOWAIT)) == NULL) { IFNET_WUNLOCK(); return (ENOMEM); } if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member), M_TEMP, M_NOWAIT)) == NULL) { free(ifgl, M_TEMP); IFNET_WUNLOCK(); return (ENOMEM); } TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) if (!strcmp(ifg->ifg_group, groupname)) break; if (ifg == NULL) { if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group), M_TEMP, M_NOWAIT)) == NULL) { free(ifgl, M_TEMP); free(ifgm, M_TEMP); IFNET_WUNLOCK(); return (ENOMEM); } strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); ifg->ifg_refcnt = 0; TAILQ_INIT(&ifg->ifg_members); TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next); new = 1; } ifg->ifg_refcnt++; ifgl->ifgl_group = ifg; ifgm->ifgm_ifp = ifp; IF_ADDR_WLOCK(ifp); TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); IF_ADDR_WUNLOCK(ifp); IFNET_WUNLOCK(); if (new) EVENTHANDLER_INVOKE(group_attach_event, ifg); EVENTHANDLER_INVOKE(group_change_event, groupname); return (0); } /* * Remove a group from an interface */ int if_delgroup(struct ifnet *ifp, const char *groupname) { struct ifg_list *ifgl; struct ifg_member *ifgm; IFNET_WLOCK(); TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) break; if (ifgl == NULL) { IFNET_WUNLOCK(); return (ENOENT); } IF_ADDR_WLOCK(ifp); TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next); IF_ADDR_WUNLOCK(ifp); TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) if (ifgm->ifgm_ifp == ifp) break; if (ifgm != NULL) { TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next); free(ifgm, M_TEMP); } if (--ifgl->ifgl_group->ifg_refcnt == 0) { TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next); IFNET_WUNLOCK(); EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group); free(ifgl->ifgl_group, M_TEMP); } else IFNET_WUNLOCK(); free(ifgl, M_TEMP); EVENTHANDLER_INVOKE(group_change_event, groupname); return (0); } /* * Remove an interface from all groups */ static void if_delgroups(struct ifnet *ifp) { struct ifg_list *ifgl; struct ifg_member *ifgm; char groupname[IFNAMSIZ]; IFNET_WLOCK(); while (!TAILQ_EMPTY(&ifp->if_groups)) { ifgl = TAILQ_FIRST(&ifp->if_groups); strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ); IF_ADDR_WLOCK(ifp); TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next); IF_ADDR_WUNLOCK(ifp); TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) if (ifgm->ifgm_ifp == ifp) break; if (ifgm != NULL) { TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next); free(ifgm, M_TEMP); } if (--ifgl->ifgl_group->ifg_refcnt == 0) { TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next); IFNET_WUNLOCK(); EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group); free(ifgl->ifgl_group, M_TEMP); } else IFNET_WUNLOCK(); free(ifgl, M_TEMP); EVENTHANDLER_INVOKE(group_change_event, groupname); IFNET_WLOCK(); } IFNET_WUNLOCK(); } /* * Stores all groups from an interface in memory pointed * to by data */ static int if_getgroup(struct ifgroupreq *data, struct ifnet *ifp) { int len, error; struct ifg_list *ifgl; struct ifg_req ifgrq, *ifgp; struct ifgroupreq *ifgr = data; if (ifgr->ifgr_len == 0) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) ifgr->ifgr_len += sizeof(struct ifg_req); IF_ADDR_RUNLOCK(ifp); return (0); } len = ifgr->ifgr_len; ifgp = ifgr->ifgr_groups; /* XXX: wire */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { if (len < sizeof(ifgrq)) { IF_ADDR_RUNLOCK(ifp); return (EINVAL); } bzero(&ifgrq, sizeof ifgrq); strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group, sizeof(ifgrq.ifgrq_group)); if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) { IF_ADDR_RUNLOCK(ifp); return (error); } len -= sizeof(ifgrq); ifgp++; } IF_ADDR_RUNLOCK(ifp); return (0); } /* * Stores all members of a group in memory pointed to by data */ static int if_getgroupmembers(struct ifgroupreq *data) { struct ifgroupreq *ifgr = data; struct ifg_group *ifg; struct ifg_member *ifgm; struct ifg_req ifgrq, *ifgp; int len, error; IFNET_RLOCK(); TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) if (!strcmp(ifg->ifg_group, ifgr->ifgr_name)) break; if (ifg == NULL) { IFNET_RUNLOCK(); return (ENOENT); } if (ifgr->ifgr_len == 0) { TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) ifgr->ifgr_len += sizeof(ifgrq); IFNET_RUNLOCK(); return (0); } len = ifgr->ifgr_len; ifgp = ifgr->ifgr_groups; TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { if (len < sizeof(ifgrq)) { IFNET_RUNLOCK(); return (EINVAL); } bzero(&ifgrq, sizeof ifgrq); strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname, sizeof(ifgrq.ifgrq_member)); if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) { IFNET_RUNLOCK(); return (error); } len -= sizeof(ifgrq); ifgp++; } IFNET_RUNLOCK(); return (0); } /* * Return counter values from counter(9)s stored in ifnet. */ uint64_t if_get_counter_default(struct ifnet *ifp, ift_counter cnt) { KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); return (counter_u64_fetch(ifp->if_counters[cnt])); } /* * Increase an ifnet counter. Usually used for counters shared * between the stack and a driver, but function supports them all. */ void if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc) { KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); counter_u64_add(ifp->if_counters[cnt], inc); } /* * Copy data from ifnet to userland API structure if_data. */ void if_data_copy(struct ifnet *ifp, struct if_data *ifd) { ifd->ifi_type = ifp->if_type; ifd->ifi_physical = 0; ifd->ifi_addrlen = ifp->if_addrlen; ifd->ifi_hdrlen = ifp->if_hdrlen; ifd->ifi_link_state = ifp->if_link_state; ifd->ifi_vhid = 0; ifd->ifi_datalen = sizeof(struct if_data); ifd->ifi_mtu = ifp->if_mtu; ifd->ifi_metric = ifp->if_metric; ifd->ifi_baudrate = ifp->if_baudrate; ifd->ifi_hwassist = ifp->if_hwassist; ifd->ifi_epoch = ifp->if_epoch; ifd->ifi_lastchange = ifp->if_lastchange; ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS); ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS); ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS); ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS); ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS); ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES); ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES); ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS); ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS); ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS); ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS); ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO); } /* * Wrapper functions for struct ifnet address list locking macros. These are * used by kernel modules to avoid encoding programming interface or binary * interface assumptions that may be violated when kernel-internal locking * approaches change. */ void if_addr_rlock(struct ifnet *ifp) { IF_ADDR_RLOCK(ifp); } void if_addr_runlock(struct ifnet *ifp) { IF_ADDR_RUNLOCK(ifp); } void if_maddr_rlock(if_t ifp) { IF_ADDR_RLOCK((struct ifnet *)ifp); } void if_maddr_runlock(if_t ifp) { IF_ADDR_RUNLOCK((struct ifnet *)ifp); } /* * Initialization, destruction and refcounting functions for ifaddrs. */ struct ifaddr * ifa_alloc(size_t size, int flags) { struct ifaddr *ifa; KASSERT(size >= sizeof(struct ifaddr), ("%s: invalid size %zu", __func__, size)); ifa = malloc(size, M_IFADDR, M_ZERO | flags); if (ifa == NULL) return (NULL); if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL) goto fail; refcount_init(&ifa->ifa_refcnt, 1); return (ifa); fail: /* free(NULL) is okay */ counter_u64_free(ifa->ifa_opackets); counter_u64_free(ifa->ifa_ipackets); counter_u64_free(ifa->ifa_obytes); counter_u64_free(ifa->ifa_ibytes); free(ifa, M_IFADDR); return (NULL); } void ifa_ref(struct ifaddr *ifa) { refcount_acquire(&ifa->ifa_refcnt); } void ifa_free(struct ifaddr *ifa) { if (refcount_release(&ifa->ifa_refcnt)) { counter_u64_free(ifa->ifa_opackets); counter_u64_free(ifa->ifa_ipackets); counter_u64_free(ifa->ifa_obytes); counter_u64_free(ifa->ifa_ibytes); free(ifa, M_IFADDR); } } static int ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa, struct sockaddr *ia) { int error; struct rt_addrinfo info; struct sockaddr_dl null_sdl; struct ifnet *ifp; ifp = ifa->ifa_ifp; bzero(&info, sizeof(info)); if (cmd != RTM_DELETE) info.rti_ifp = V_loif; info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED; info.rti_info[RTAX_DST] = ia; info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl; link_init_sdl(ifp, (struct sockaddr *)&null_sdl, ifp->if_type); error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib); if (error != 0) log(LOG_DEBUG, "%s: %s failed for interface %s: %u\n", __func__, otype, if_name(ifp), error); return (error); } int ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia) { return (ifa_maintain_loopback_route(RTM_ADD, "insertion", ifa, ia)); } int ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia) { return (ifa_maintain_loopback_route(RTM_DELETE, "deletion", ifa, ia)); } int ifa_switch_loopback_route(struct ifaddr *ifa, struct sockaddr *ia) { return (ifa_maintain_loopback_route(RTM_CHANGE, "switch", ifa, ia)); } /* * XXX: Because sockaddr_dl has deeper structure than the sockaddr * structs used to represent other address families, it is necessary * to perform a different comparison. */ #define sa_dl_equal(a1, a2) \ ((((const struct sockaddr_dl *)(a1))->sdl_len == \ ((const struct sockaddr_dl *)(a2))->sdl_len) && \ (bcmp(CLLADDR((const struct sockaddr_dl *)(a1)), \ CLLADDR((const struct sockaddr_dl *)(a2)), \ ((const struct sockaddr_dl *)(a1))->sdl_alen) == 0)) /* * Locate an interface based on a complete address. */ /*ARGSUSED*/ static struct ifaddr * ifa_ifwithaddr_internal(const struct sockaddr *addr, int getref) { struct ifnet *ifp; struct ifaddr *ifa; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if (sa_equal(addr, ifa->ifa_addr)) { if (getref) ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); goto done; } /* IP6 doesn't have broadcast */ if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr && ifa->ifa_broadaddr->sa_len != 0 && sa_equal(ifa->ifa_broadaddr, addr)) { if (getref) ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); goto done; } } IF_ADDR_RUNLOCK(ifp); } ifa = NULL; done: IFNET_RUNLOCK_NOSLEEP(); return (ifa); } struct ifaddr * ifa_ifwithaddr(const struct sockaddr *addr) { return (ifa_ifwithaddr_internal(addr, 1)); } int ifa_ifwithaddr_check(const struct sockaddr *addr) { return (ifa_ifwithaddr_internal(addr, 0) != NULL); } /* * Locate an interface based on the broadcast address. */ /* ARGSUSED */ struct ifaddr * ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr && ifa->ifa_broadaddr->sa_len != 0 && sa_equal(ifa->ifa_broadaddr, addr)) { ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); goto done; } } IF_ADDR_RUNLOCK(ifp); } ifa = NULL; done: IFNET_RUNLOCK_NOSLEEP(); return (ifa); } /* * Locate the point to point interface with a given destination address. */ /*ARGSUSED*/ struct ifaddr * ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((ifp->if_flags & IFF_POINTOPOINT) == 0) continue; if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if (ifa->ifa_dstaddr != NULL && sa_equal(addr, ifa->ifa_dstaddr)) { ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); goto done; } } IF_ADDR_RUNLOCK(ifp); } ifa = NULL; done: IFNET_RUNLOCK_NOSLEEP(); return (ifa); } /* * Find an interface on a specific network. If many, choice * is most specific found. */ struct ifaddr * ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; struct ifaddr *ifa_maybe = NULL; u_int af = addr->sa_family; const char *addr_data = addr->sa_data, *cplim; /* * AF_LINK addresses can be looked up directly by their index number, * so do that if we can. */ if (af == AF_LINK) { const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)addr; if (sdl->sdl_index && sdl->sdl_index <= V_if_index) return (ifaddr_byindex(sdl->sdl_index)); } /* * Scan though each interface, looking for ones that have addresses * in this address family and the requested fib. Maintain a reference * on ifa_maybe once we find one, as we release the IF_ADDR_RLOCK() that * kept it stable when we move onto the next interface. */ IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { const char *cp, *cp2, *cp3; if (ifa->ifa_addr->sa_family != af) next: continue; if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) { /* * This is a bit broken as it doesn't * take into account that the remote end may * be a single node in the network we are * looking for. * The trouble is that we don't know the * netmask for the remote end. */ if (ifa->ifa_dstaddr != NULL && sa_equal(addr, ifa->ifa_dstaddr)) { ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); goto done; } } else { /* * Scan all the bits in the ifa's address. * If a bit dissagrees with what we are * looking for, mask it with the netmask * to see if it really matters. * (A byte at a time) */ if (ifa->ifa_netmask == 0) continue; cp = addr_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; while (cp3 < cplim) if ((*cp++ ^ *cp2++) & *cp3++) goto next; /* next address! */ /* * If the netmask of what we just found * is more specific than what we had before * (if we had one), or if the virtual status * of new prefix is better than of the old one, * then remember the new one before continuing * to search for an even better one. */ if (ifa_maybe == NULL || ifa_preferred(ifa_maybe, ifa) || rn_refines((caddr_t)ifa->ifa_netmask, (caddr_t)ifa_maybe->ifa_netmask)) { if (ifa_maybe != NULL) ifa_free(ifa_maybe); ifa_maybe = ifa; ifa_ref(ifa_maybe); } } } IF_ADDR_RUNLOCK(ifp); } ifa = ifa_maybe; ifa_maybe = NULL; done: IFNET_RUNLOCK_NOSLEEP(); if (ifa_maybe != NULL) ifa_free(ifa_maybe); return (ifa); } /* * Find an interface address specific to an interface best matching * a given address. */ struct ifaddr * ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp) { struct ifaddr *ifa; const char *cp, *cp2, *cp3; char *cplim; struct ifaddr *ifa_maybe = NULL; u_int af = addr->sa_family; if (af >= AF_MAX) return (NULL); IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != af) continue; if (ifa_maybe == NULL) ifa_maybe = ifa; if (ifa->ifa_netmask == 0) { if (sa_equal(addr, ifa->ifa_addr) || (ifa->ifa_dstaddr && sa_equal(addr, ifa->ifa_dstaddr))) goto done; continue; } if (ifp->if_flags & IFF_POINTOPOINT) { if (sa_equal(addr, ifa->ifa_dstaddr)) goto done; } else { cp = addr->sa_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; for (; cp3 < cplim; cp3++) if ((*cp++ ^ *cp2++) & *cp3) break; if (cp3 == cplim) goto done; } } ifa = ifa_maybe; done: if (ifa != NULL) ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); return (ifa); } /* * See whether new ifa is better than current one: * 1) A non-virtual one is preferred over virtual. * 2) A virtual in master state preferred over any other state. * * Used in several address selecting functions. */ int ifa_preferred(struct ifaddr *cur, struct ifaddr *next) { return (cur->ifa_carp && (!next->ifa_carp || ((*carp_master_p)(next) && !(*carp_master_p)(cur)))); } #include /* * Default action when installing a route with a Link Level gateway. * Lookup an appropriate real ifa to point to. * This should be moved to /sys/net/link.c eventually. */ static void link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info) { struct ifaddr *ifa, *oifa; struct sockaddr *dst; struct ifnet *ifp; if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == NULL) || ((ifp = ifa->ifa_ifp) == NULL) || ((dst = rt_key(rt)) == NULL)) return; ifa = ifaof_ifpforaddr(dst, ifp); if (ifa) { oifa = rt->rt_ifa; rt->rt_ifa = ifa; ifa_free(oifa); if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest) ifa->ifa_rtrequest(cmd, rt, info); } } struct sockaddr_dl * link_alloc_sdl(size_t size, int flags) { return (malloc(size, M_TEMP, flags)); } void link_free_sdl(struct sockaddr *sa) { free(sa, M_TEMP); } /* * Fills in given sdl with interface basic info. * Returns pointer to filled sdl. */ struct sockaddr_dl * link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype) { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)paddr; memset(sdl, 0, sizeof(struct sockaddr_dl)); sdl->sdl_len = sizeof(struct sockaddr_dl); sdl->sdl_family = AF_LINK; sdl->sdl_index = ifp->if_index; sdl->sdl_type = iftype; return (sdl); } /* * Mark an interface down and notify protocols of * the transition. */ static void if_unroute(struct ifnet *ifp, int flag, int fam) { struct ifaddr *ifa; KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP")); ifp->if_flags &= ~flag; getmicrotime(&ifp->if_lastchange); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) pfctlinput(PRC_IFDOWN, ifa->ifa_addr); ifp->if_qflush(ifp); if (ifp->if_carp) (*carp_linkstate_p)(ifp); rt_ifmsg(ifp); } /* * Mark an interface up and notify protocols of * the transition. */ static void if_route(struct ifnet *ifp, int flag, int fam) { struct ifaddr *ifa; KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP")); ifp->if_flags |= flag; getmicrotime(&ifp->if_lastchange); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) pfctlinput(PRC_IFUP, ifa->ifa_addr); if (ifp->if_carp) (*carp_linkstate_p)(ifp); rt_ifmsg(ifp); #ifdef INET6 in6_if_up(ifp); #endif } void (*vlan_link_state_p)(struct ifnet *); /* XXX: private from if_vlan */ void (*vlan_trunk_cap_p)(struct ifnet *); /* XXX: private from if_vlan */ struct ifnet *(*vlan_trunkdev_p)(struct ifnet *); struct ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t); int (*vlan_tag_p)(struct ifnet *, uint16_t *); int (*vlan_setcookie_p)(struct ifnet *, void *); void *(*vlan_cookie_p)(struct ifnet *); /* * Handle a change in the interface link state. To avoid LORs * between driver lock and upper layer locks, as well as possible * recursions, we post event to taskqueue, and all job * is done in static do_link_state_change(). */ void if_link_state_change(struct ifnet *ifp, int link_state) { /* Return if state hasn't changed. */ if (ifp->if_link_state == link_state) return; ifp->if_link_state = link_state; taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask); } static void do_link_state_change(void *arg, int pending) { struct ifnet *ifp = (struct ifnet *)arg; int link_state = ifp->if_link_state; CURVNET_SET(ifp->if_vnet); /* Notify that the link state has changed. */ rt_ifmsg(ifp); if (ifp->if_vlantrunk != NULL) (*vlan_link_state_p)(ifp); if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) && ifp->if_l2com != NULL) (*ng_ether_link_state_p)(ifp, link_state); if (ifp->if_carp) (*carp_linkstate_p)(ifp); if (ifp->if_bridge) (*bridge_linkstate_p)(ifp); if (ifp->if_lagg) (*lagg_linkstate_p)(ifp, link_state); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL); if (pending > 1) if_printf(ifp, "%d link states coalesced\n", pending); if (log_link_state_change) log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname, (link_state == LINK_STATE_UP) ? "UP" : "DOWN" ); EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state); CURVNET_RESTORE(); } /* * Mark an interface down and notify protocols of * the transition. */ void if_down(struct ifnet *ifp) { EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN); if_unroute(ifp, IFF_UP, AF_UNSPEC); } /* * Mark an interface up and notify protocols of * the transition. */ void if_up(struct ifnet *ifp) { if_route(ifp, IFF_UP, AF_UNSPEC); EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP); } /* * Flush an interface queue. */ void if_qflush(struct ifnet *ifp) { struct mbuf *m, *n; struct ifaltq *ifq; ifq = &ifp->if_snd; IFQ_LOCK(ifq); #ifdef ALTQ if (ALTQ_IS_ENABLED(ifq)) ALTQ_PURGE(ifq); #endif n = ifq->ifq_head; while ((m = n) != NULL) { n = m->m_nextpkt; m_freem(m); } ifq->ifq_head = 0; ifq->ifq_tail = 0; ifq->ifq_len = 0; IFQ_UNLOCK(ifq); } /* * Map interface name to interface structure pointer, with or without * returning a reference. */ struct ifnet * ifunit_ref(const char *name) { struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 && !(ifp->if_flags & IFF_DYING)) break; } if (ifp != NULL) if_ref(ifp); IFNET_RUNLOCK_NOSLEEP(); return (ifp); } struct ifnet * ifunit(const char *name) { struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0) break; } IFNET_RUNLOCK_NOSLEEP(); return (ifp); } static void * ifr_buffer_get_buffer(struct thread *td, void *data) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) return ((void *)(uintptr_t) ifrup->ifr32.ifr_ifru.ifru_buffer.buffer); #endif return (ifrup->ifr.ifr_ifru.ifru_buffer.buffer); } static void ifr_buffer_set_buffer_null(struct thread *td, void *data) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) ifrup->ifr32.ifr_ifru.ifru_buffer.buffer = 0; else #endif ifrup->ifr.ifr_ifru.ifru_buffer.buffer = NULL; } static size_t ifr_buffer_get_length(struct thread *td, void *data) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) return (ifrup->ifr32.ifr_ifru.ifru_buffer.length); #endif return (ifrup->ifr.ifr_ifru.ifru_buffer.length); } static void ifr_buffer_set_length(struct thread *td, void *data, size_t len) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) ifrup->ifr32.ifr_ifru.ifru_buffer.length = len; else #endif ifrup->ifr.ifr_ifru.ifru_buffer.length = len; } +void * +ifr_data_get_ptr(void *ifrp) +{ + union ifreq_union *ifrup; + + ifrup = ifrp; +#ifdef COMPAT_FREEBSD32 + if (SV_CURPROC_FLAG(SV_ILP32)) + return ((void *)(uintptr_t) + ifrup->ifr32.ifr_ifru.ifru_data); +#endif + return (ifrup->ifr.ifr_ifru.ifru_data); +} + /* * Hardware specific interface ioctls. */ static int ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) { struct ifreq *ifr; int error = 0, do_ifup = 0; int new_flags, temp_flags; size_t namelen, onamelen; size_t descrlen; char *descrbuf, *odescrbuf; char new_name[IFNAMSIZ]; struct ifaddr *ifa; struct sockaddr_dl *sdl; ifr = (struct ifreq *)data; switch (cmd) { case SIOCGIFINDEX: ifr->ifr_index = ifp->if_index; break; case SIOCGIFFLAGS: temp_flags = ifp->if_flags | ifp->if_drv_flags; ifr->ifr_flags = temp_flags & 0xffff; ifr->ifr_flagshigh = temp_flags >> 16; break; case SIOCGIFCAP: ifr->ifr_reqcap = ifp->if_capabilities; ifr->ifr_curcap = ifp->if_capenable; break; #ifdef MAC case SIOCGIFMAC: error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp); break; #endif case SIOCGIFMETRIC: ifr->ifr_metric = ifp->if_metric; break; case SIOCGIFMTU: ifr->ifr_mtu = ifp->if_mtu; break; case SIOCGIFPHYS: /* XXXGL: did this ever worked? */ ifr->ifr_phys = 0; break; case SIOCGIFDESCR: error = 0; sx_slock(&ifdescr_sx); if (ifp->if_description == NULL) error = ENOMSG; else { /* space for terminating nul */ descrlen = strlen(ifp->if_description) + 1; if (ifr_buffer_get_length(td, ifr) < descrlen) ifr_buffer_set_buffer_null(td, ifr); else error = copyout(ifp->if_description, ifr_buffer_get_buffer(td, ifr), descrlen); ifr_buffer_set_length(td, ifr, descrlen); } sx_sunlock(&ifdescr_sx); break; case SIOCSIFDESCR: error = priv_check(td, PRIV_NET_SETIFDESCR); if (error) return (error); /* * Copy only (length-1) bytes to make sure that * if_description is always nul terminated. The * length parameter is supposed to count the * terminating nul in. */ if (ifr_buffer_get_length(td, ifr) > ifdescr_maxlen) return (ENAMETOOLONG); else if (ifr_buffer_get_length(td, ifr) == 0) descrbuf = NULL; else { descrbuf = malloc(ifr_buffer_get_length(td, ifr), M_IFDESCR, M_WAITOK | M_ZERO); error = copyin(ifr_buffer_get_buffer(td, ifr), descrbuf, ifr_buffer_get_length(td, ifr) - 1); if (error) { free(descrbuf, M_IFDESCR); break; } } sx_xlock(&ifdescr_sx); odescrbuf = ifp->if_description; ifp->if_description = descrbuf; sx_xunlock(&ifdescr_sx); getmicrotime(&ifp->if_lastchange); free(odescrbuf, M_IFDESCR); break; case SIOCGIFFIB: ifr->ifr_fib = ifp->if_fib; break; case SIOCSIFFIB: error = priv_check(td, PRIV_NET_SETIFFIB); if (error) return (error); if (ifr->ifr_fib >= rt_numfibs) return (EINVAL); ifp->if_fib = ifr->ifr_fib; break; case SIOCSIFFLAGS: error = priv_check(td, PRIV_NET_SETIFFLAGS); if (error) return (error); /* * Currently, no driver owned flags pass the IFF_CANTCHANGE * check, so we don't need special handling here yet. */ new_flags = (ifr->ifr_flags & 0xffff) | (ifr->ifr_flagshigh << 16); if (ifp->if_flags & IFF_UP && (new_flags & IFF_UP) == 0) { if_down(ifp); } else if (new_flags & IFF_UP && (ifp->if_flags & IFF_UP) == 0) { do_ifup = 1; } /* See if permanently promiscuous mode bit is about to flip */ if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) { if (new_flags & IFF_PPROMISC) ifp->if_flags |= IFF_PROMISC; else if (ifp->if_pcount == 0) ifp->if_flags &= ~IFF_PROMISC; if (log_promisc_mode_change) log(LOG_INFO, "%s: permanently promiscuous mode %s\n", ifp->if_xname, ((new_flags & IFF_PPROMISC) ? "enabled" : "disabled")); } ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | (new_flags &~ IFF_CANTCHANGE); if (ifp->if_ioctl) { (void) (*ifp->if_ioctl)(ifp, cmd, data); } if (do_ifup) if_up(ifp); getmicrotime(&ifp->if_lastchange); break; case SIOCSIFCAP: error = priv_check(td, PRIV_NET_SETIFCAP); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); if (ifr->ifr_reqcap & ~ifp->if_capabilities) return (EINVAL); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); break; #ifdef MAC case SIOCSIFMAC: error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp); break; #endif case SIOCSIFNAME: error = priv_check(td, PRIV_NET_SETIFNAME); if (error) return (error); - error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL); + error = copyinstr(ifr_data_get_ptr(ifr), new_name, IFNAMSIZ, + NULL); if (error != 0) return (error); if (new_name[0] == '\0') return (EINVAL); if (new_name[IFNAMSIZ-1] != '\0') { new_name[IFNAMSIZ-1] = '\0'; if (strlen(new_name) == IFNAMSIZ-1) return (EINVAL); } if (ifunit(new_name) != NULL) return (EEXIST); /* * XXX: Locking. Nothing else seems to lock if_flags, * and there are numerous other races with the * ifunit() checks not being atomic with namespace * changes (renames, vmoves, if_attach, etc). */ ifp->if_flags |= IFF_RENAMING; /* Announce the departure of the interface. */ rt_ifannouncemsg(ifp, IFAN_DEPARTURE); EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); log(LOG_INFO, "%s: changing name to '%s'\n", ifp->if_xname, new_name); IF_ADDR_WLOCK(ifp); strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); ifa = ifp->if_addr; sdl = (struct sockaddr_dl *)ifa->ifa_addr; namelen = strlen(new_name); onamelen = sdl->sdl_nlen; /* * Move the address if needed. This is safe because we * allocate space for a name of length IFNAMSIZ when we * create this in if_attach(). */ if (namelen != onamelen) { bcopy(sdl->sdl_data + onamelen, sdl->sdl_data + namelen, sdl->sdl_alen); } bcopy(new_name, sdl->sdl_data, namelen); sdl->sdl_nlen = namelen; sdl = (struct sockaddr_dl *)ifa->ifa_netmask; bzero(sdl->sdl_data, onamelen); while (namelen != 0) sdl->sdl_data[--namelen] = 0xff; IF_ADDR_WUNLOCK(ifp); EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); /* Announce the return of the interface. */ rt_ifannouncemsg(ifp, IFAN_ARRIVAL); ifp->if_flags &= ~IFF_RENAMING; break; #ifdef VIMAGE case SIOCSIFVNET: error = priv_check(td, PRIV_NET_SETIFVNET); if (error) return (error); error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid); break; #endif case SIOCSIFMETRIC: error = priv_check(td, PRIV_NET_SETIFMETRIC); if (error) return (error); ifp->if_metric = ifr->ifr_metric; getmicrotime(&ifp->if_lastchange); break; case SIOCSIFPHYS: error = priv_check(td, PRIV_NET_SETIFPHYS); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCSIFMTU: { u_long oldmtu = ifp->if_mtu; error = priv_check(td, PRIV_NET_SETIFMTU); if (error) return (error); if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) return (EINVAL); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) { getmicrotime(&ifp->if_lastchange); rt_ifmsg(ifp); } /* * If the link MTU changed, do network layer specific procedure. */ if (ifp->if_mtu != oldmtu) { #ifdef INET6 nd6_setmtu(ifp); #endif rt_updatemtu(ifp); } break; } case SIOCADDMULTI: case SIOCDELMULTI: if (cmd == SIOCADDMULTI) error = priv_check(td, PRIV_NET_ADDMULTI); else error = priv_check(td, PRIV_NET_DELMULTI); if (error) return (error); /* Don't allow group membership on non-multicast interfaces. */ if ((ifp->if_flags & IFF_MULTICAST) == 0) return (EOPNOTSUPP); /* Don't let users screw up protocols' entries. */ if (ifr->ifr_addr.sa_family != AF_LINK) return (EINVAL); if (cmd == SIOCADDMULTI) { struct ifmultiaddr *ifma; /* * Userland is only permitted to join groups once * via the if_addmulti() KPI, because it cannot hold * struct ifmultiaddr * between calls. It may also * lose a race while we check if the membership * already exists. */ IF_ADDR_RLOCK(ifp); ifma = if_findmulti(ifp, &ifr->ifr_addr); IF_ADDR_RUNLOCK(ifp); if (ifma != NULL) error = EADDRINUSE; else error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); } else { error = if_delmulti(ifp, &ifr->ifr_addr); } if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCSIFPHYADDR: case SIOCDIFPHYADDR: #ifdef INET6 case SIOCSIFPHYADDR_IN6: #endif case SIOCSIFMEDIA: case SIOCSIFGENERIC: error = priv_check(td, PRIV_NET_HWIOCTL); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCGIFSTATUS: case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: case SIOCGIFMEDIA: case SIOCGIFXMEDIA: case SIOCGIFGENERIC: case SIOCGIFRSSKEY: case SIOCGIFRSSHASH: if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); break; case SIOCSIFLLADDR: error = priv_check(td, PRIV_NET_SETLLADDR); if (error) return (error); error = if_setlladdr(ifp, ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len); break; case SIOCGHWADDR: error = if_gethwaddr(ifp, ifr); break; case SIOCAIFGROUP: { struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr; error = priv_check(td, PRIV_NET_ADDIFGROUP); if (error) return (error); if ((error = if_addgroup(ifp, ifgr->ifgr_group))) return (error); break; } case SIOCGIFGROUP: if ((error = if_getgroup((struct ifgroupreq *)ifr, ifp))) return (error); break; case SIOCDIFGROUP: { struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr; error = priv_check(td, PRIV_NET_DELIFGROUP); if (error) return (error); if ((error = if_delgroup(ifp, ifgr->ifgr_group))) return (error); break; } default: error = ENOIOCTL; break; } return (error); } #ifdef COMPAT_FREEBSD32 struct ifconf32 { int32_t ifc_len; union { uint32_t ifcu_buf; uint32_t ifcu_req; } ifc_ifcu; }; #define SIOCGIFCONF32 _IOWR('i', 36, struct ifconf32) #endif /* * Interface ioctls. */ int ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td) { struct ifnet *ifp; struct ifreq *ifr; int error; int oif_flags; #ifdef VIMAGE int shutdown; #endif CURVNET_SET(so->so_vnet); #ifdef VIMAGE /* Make sure the VNET is stable. */ shutdown = (so->so_vnet->vnet_state > SI_SUB_VNET && so->so_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; if (shutdown) { CURVNET_RESTORE(); return (EBUSY); } #endif switch (cmd) { case SIOCGIFCONF: error = ifconf(cmd, data); CURVNET_RESTORE(); return (error); #ifdef COMPAT_FREEBSD32 case SIOCGIFCONF32: { struct ifconf32 *ifc32; struct ifconf ifc; ifc32 = (struct ifconf32 *)data; ifc.ifc_len = ifc32->ifc_len; ifc.ifc_buf = PTRIN(ifc32->ifc_buf); error = ifconf(SIOCGIFCONF, (void *)&ifc); CURVNET_RESTORE(); if (error == 0) ifc32->ifc_len = ifc.ifc_len; return (error); } #endif } ifr = (struct ifreq *)data; switch (cmd) { #ifdef VIMAGE case SIOCSIFRVNET: error = priv_check(td, PRIV_NET_SETIFVNET); if (error == 0) error = if_vmove_reclaim(td, ifr->ifr_name, ifr->ifr_jid); CURVNET_RESTORE(); return (error); #endif case SIOCIFCREATE: case SIOCIFCREATE2: error = priv_check(td, PRIV_NET_IFCREATE); if (error == 0) error = if_clone_create(ifr->ifr_name, - sizeof(ifr->ifr_name), - cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL); + sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ? + ifr_data_get_ptr(ifr) : NULL); CURVNET_RESTORE(); return (error); case SIOCIFDESTROY: error = priv_check(td, PRIV_NET_IFDESTROY); if (error == 0) error = if_clone_destroy(ifr->ifr_name); CURVNET_RESTORE(); return (error); case SIOCIFGCLONERS: error = if_clone_list((struct if_clonereq *)data); CURVNET_RESTORE(); return (error); case SIOCGIFGMEMB: error = if_getgroupmembers((struct ifgroupreq *)data); CURVNET_RESTORE(); return (error); #if defined(INET) || defined(INET6) case SIOCSVH: case SIOCGVH: if (carp_ioctl_p == NULL) error = EPROTONOSUPPORT; else error = (*carp_ioctl_p)(ifr, cmd, td); CURVNET_RESTORE(); return (error); #endif } ifp = ifunit_ref(ifr->ifr_name); if (ifp == NULL) { CURVNET_RESTORE(); return (ENXIO); } error = ifhwioctl(cmd, ifp, data, td); if (error != ENOIOCTL) { if_rele(ifp); CURVNET_RESTORE(); return (error); } oif_flags = ifp->if_flags; if (so->so_proto == NULL) { if_rele(ifp); CURVNET_RESTORE(); return (EOPNOTSUPP); } /* * Pass the request on to the socket control method, and if the * latter returns EOPNOTSUPP, directly to the interface. * * Make an exception for the legacy SIOCSIF* requests. Drivers * trust SIOCSIFADDR et al to come from an already privileged * layer, and do not perform any credentials checks or input * validation. */ error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data, ifp, td)); if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL && cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR && cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK) error = (*ifp->if_ioctl)(ifp, cmd, data); if ((oif_flags ^ ifp->if_flags) & IFF_UP) { #ifdef INET6 if (ifp->if_flags & IFF_UP) in6_if_up(ifp); #endif } if_rele(ifp); CURVNET_RESTORE(); return (error); } /* * The code common to handling reference counted flags, * e.g., in ifpromisc() and if_allmulti(). * The "pflag" argument can specify a permanent mode flag to check, * such as IFF_PPROMISC for promiscuous mode; should be 0 if none. * * Only to be used on stack-owned flags, not driver-owned flags. */ static int if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch) { struct ifreq ifr; int error; int oldflags, oldcount; /* Sanity checks to catch programming errors */ KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0, ("%s: setting driver-owned flag %d", __func__, flag)); if (onswitch) KASSERT(*refcount >= 0, ("%s: increment negative refcount %d for flag %d", __func__, *refcount, flag)); else KASSERT(*refcount > 0, ("%s: decrement non-positive refcount %d for flag %d", __func__, *refcount, flag)); /* In case this mode is permanent, just touch refcount */ if (ifp->if_flags & pflag) { *refcount += onswitch ? 1 : -1; return (0); } /* Save ifnet parameters for if_ioctl() may fail */ oldcount = *refcount; oldflags = ifp->if_flags; /* * See if we aren't the only and touching refcount is enough. * Actually toggle interface flag if we are the first or last. */ if (onswitch) { if ((*refcount)++) return (0); ifp->if_flags |= flag; } else { if (--(*refcount)) return (0); ifp->if_flags &= ~flag; } /* Call down the driver since we've changed interface flags */ if (ifp->if_ioctl == NULL) { error = EOPNOTSUPP; goto recover; } ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); if (error) goto recover; /* Notify userland that interface flags have changed */ rt_ifmsg(ifp); return (0); recover: /* Recover after driver error */ *refcount = oldcount; ifp->if_flags = oldflags; return (error); } /* * Set/clear promiscuous mode on interface ifp based on the truth value * of pswitch. The calls are reference counted so that only the first * "on" request actually has an effect, as does the final "off" request. * Results are undefined if the "off" and "on" requests are not matched. */ int ifpromisc(struct ifnet *ifp, int pswitch) { int error; int oldflags = ifp->if_flags; error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC, &ifp->if_pcount, pswitch); /* If promiscuous mode status has changed, log a message */ if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) && log_promisc_mode_change) log(LOG_INFO, "%s: promiscuous mode %s\n", ifp->if_xname, (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled"); return (error); } /* * Return interface configuration * of system. List may be used * in later ioctl's (above) to get * other information. */ /*ARGSUSED*/ static int ifconf(u_long cmd, caddr_t data) { struct ifconf *ifc = (struct ifconf *)data; struct ifnet *ifp; struct ifaddr *ifa; struct ifreq ifr; struct sbuf *sb; int error, full = 0, valid_len, max_len; /* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */ max_len = MAXPHYS - 1; /* Prevent hostile input from being able to crash the system */ if (ifc->ifc_len <= 0) return (EINVAL); again: if (ifc->ifc_len <= max_len) { max_len = ifc->ifc_len; full = 1; } sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN); max_len = 0; valid_len = 0; IFNET_RLOCK(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { int addrs; /* * Zero the ifr_name buffer to make sure we don't * disclose the contents of the stack. */ memset(ifr.ifr_name, 0, sizeof(ifr.ifr_name)); if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) >= sizeof(ifr.ifr_name)) { sbuf_delete(sb); IFNET_RUNLOCK(); return (ENAMETOOLONG); } addrs = 0; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct sockaddr *sa = ifa->ifa_addr; if (prison_if(curthread->td_ucred, sa) != 0) continue; addrs++; if (sa->sa_len <= sizeof(*sa)) { ifr.ifr_addr = *sa; sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); } else { sbuf_bcat(sb, &ifr, offsetof(struct ifreq, ifr_addr)); max_len += offsetof(struct ifreq, ifr_addr); sbuf_bcat(sb, sa, sa->sa_len); max_len += sa->sa_len; } if (sbuf_error(sb) == 0) valid_len = sbuf_len(sb); } IF_ADDR_RUNLOCK(ifp); if (addrs == 0) { bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr)); sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); if (sbuf_error(sb) == 0) valid_len = sbuf_len(sb); } } IFNET_RUNLOCK(); /* * If we didn't allocate enough space (uncommon), try again. If * we have already allocated as much space as we are allowed, * return what we've got. */ if (valid_len != max_len && !full) { sbuf_delete(sb); goto again; } ifc->ifc_len = valid_len; sbuf_finish(sb); error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len); sbuf_delete(sb); return (error); } /* * Just like ifpromisc(), but for all-multicast-reception mode. */ int if_allmulti(struct ifnet *ifp, int onswitch) { return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch)); } struct ifmultiaddr * if_findmulti(struct ifnet *ifp, const struct sockaddr *sa) { struct ifmultiaddr *ifma; IF_ADDR_LOCK_ASSERT(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (sa->sa_family == AF_LINK) { if (sa_dl_equal(ifma->ifma_addr, sa)) break; } else { if (sa_equal(ifma->ifma_addr, sa)) break; } } return ifma; } /* * Allocate a new ifmultiaddr and initialize based on passed arguments. We * make copies of passed sockaddrs. The ifmultiaddr will not be added to * the ifnet multicast address list here, so the caller must do that and * other setup work (such as notifying the device driver). The reference * count is initialized to 1. */ static struct ifmultiaddr * if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa, int mflags) { struct ifmultiaddr *ifma; struct sockaddr *dupsa; ifma = malloc(sizeof *ifma, M_IFMADDR, mflags | M_ZERO); if (ifma == NULL) return (NULL); dupsa = malloc(sa->sa_len, M_IFMADDR, mflags); if (dupsa == NULL) { free(ifma, M_IFMADDR); return (NULL); } bcopy(sa, dupsa, sa->sa_len); ifma->ifma_addr = dupsa; ifma->ifma_ifp = ifp; ifma->ifma_refcount = 1; ifma->ifma_protospec = NULL; if (llsa == NULL) { ifma->ifma_lladdr = NULL; return (ifma); } dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags); if (dupsa == NULL) { free(ifma->ifma_addr, M_IFMADDR); free(ifma, M_IFMADDR); return (NULL); } bcopy(llsa, dupsa, llsa->sa_len); ifma->ifma_lladdr = dupsa; return (ifma); } /* * if_freemulti: free ifmultiaddr structure and possibly attached related * addresses. The caller is responsible for implementing reference * counting, notifying the driver, handling routing messages, and releasing * any dependent link layer state. */ static void if_freemulti(struct ifmultiaddr *ifma) { KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d", ifma->ifma_refcount)); if (ifma->ifma_lladdr != NULL) free(ifma->ifma_lladdr, M_IFMADDR); free(ifma->ifma_addr, M_IFMADDR); free(ifma, M_IFMADDR); } /* * Register an additional multicast address with a network interface. * * - If the address is already present, bump the reference count on the * address and return. * - If the address is not link-layer, look up a link layer address. * - Allocate address structures for one or both addresses, and attach to the * multicast address list on the interface. If automatically adding a link * layer address, the protocol address will own a reference to the link * layer address, to be freed when it is freed. * - Notify the network device driver of an addition to the multicast address * list. * * 'sa' points to caller-owned memory with the desired multicast address. * * 'retifma' will be used to return a pointer to the resulting multicast * address reference, if desired. */ int if_addmulti(struct ifnet *ifp, struct sockaddr *sa, struct ifmultiaddr **retifma) { struct ifmultiaddr *ifma, *ll_ifma; struct sockaddr *llsa; struct sockaddr_dl sdl; int error; /* * If the address is already present, return a new reference to it; * otherwise, allocate storage and set up a new address. */ IF_ADDR_WLOCK(ifp); ifma = if_findmulti(ifp, sa); if (ifma != NULL) { ifma->ifma_refcount++; if (retifma != NULL) *retifma = ifma; IF_ADDR_WUNLOCK(ifp); return (0); } /* * The address isn't already present; resolve the protocol address * into a link layer address, and then look that up, bump its * refcount or allocate an ifma for that also. * Most link layer resolving functions returns address data which * fits inside default sockaddr_dl structure. However callback * can allocate another sockaddr structure, in that case we need to * free it later. */ llsa = NULL; ll_ifma = NULL; if (ifp->if_resolvemulti != NULL) { /* Provide called function with buffer size information */ sdl.sdl_len = sizeof(sdl); llsa = (struct sockaddr *)&sdl; error = ifp->if_resolvemulti(ifp, &llsa, sa); if (error) goto unlock_out; } /* * Allocate the new address. Don't hook it up yet, as we may also * need to allocate a link layer multicast address. */ ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT); if (ifma == NULL) { error = ENOMEM; goto free_llsa_out; } /* * If a link layer address is found, we'll need to see if it's * already present in the address list, or allocate is as well. * When this block finishes, the link layer address will be on the * list. */ if (llsa != NULL) { ll_ifma = if_findmulti(ifp, llsa); if (ll_ifma == NULL) { ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT); if (ll_ifma == NULL) { --ifma->ifma_refcount; if_freemulti(ifma); error = ENOMEM; goto free_llsa_out; } TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma, ifma_link); } else ll_ifma->ifma_refcount++; ifma->ifma_llifma = ll_ifma; } /* * We now have a new multicast address, ifma, and possibly a new or * referenced link layer address. Add the primary address to the * ifnet address list. */ TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); if (retifma != NULL) *retifma = ifma; /* * Must generate the message while holding the lock so that 'ifma' * pointer is still valid. */ rt_newmaddrmsg(RTM_NEWMADDR, ifma); IF_ADDR_WUNLOCK(ifp); /* * We are certain we have added something, so call down to the * interface to let them know about it. */ if (ifp->if_ioctl != NULL) { (void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0); } if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl)) link_free_sdl(llsa); return (0); free_llsa_out: if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl)) link_free_sdl(llsa); unlock_out: IF_ADDR_WUNLOCK(ifp); return (error); } /* * Delete a multicast group membership by network-layer group address. * * Returns ENOENT if the entry could not be found. If ifp no longer * exists, results are undefined. This entry point should only be used * from subsystems which do appropriate locking to hold ifp for the * duration of the call. * Network-layer protocol domains must use if_delmulti_ifma(). */ int if_delmulti(struct ifnet *ifp, struct sockaddr *sa) { struct ifmultiaddr *ifma; int lastref; #ifdef INVARIANTS struct ifnet *oifp; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(oifp, &V_ifnet, if_link) if (ifp == oifp) break; if (ifp != oifp) ifp = NULL; IFNET_RUNLOCK_NOSLEEP(); KASSERT(ifp != NULL, ("%s: ifnet went away", __func__)); #endif if (ifp == NULL) return (ENOENT); IF_ADDR_WLOCK(ifp); lastref = 0; ifma = if_findmulti(ifp, sa); if (ifma != NULL) lastref = if_delmulti_locked(ifp, ifma, 0); IF_ADDR_WUNLOCK(ifp); if (ifma == NULL) return (ENOENT); if (lastref && ifp->if_ioctl != NULL) { (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0); } return (0); } /* * Delete all multicast group membership for an interface. * Should be used to quickly flush all multicast filters. */ void if_delallmulti(struct ifnet *ifp) { struct ifmultiaddr *ifma; struct ifmultiaddr *next; IF_ADDR_WLOCK(ifp); TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) if_delmulti_locked(ifp, ifma, 0); IF_ADDR_WUNLOCK(ifp); } /* * Delete a multicast group membership by group membership pointer. * Network-layer protocol domains must use this routine. * * It is safe to call this routine if the ifp disappeared. */ void if_delmulti_ifma(struct ifmultiaddr *ifma) { struct ifnet *ifp; int lastref; ifp = ifma->ifma_ifp; #ifdef DIAGNOSTIC if (ifp == NULL) { printf("%s: ifma_ifp seems to be detached\n", __func__); } else { struct ifnet *oifp; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(oifp, &V_ifnet, if_link) if (ifp == oifp) break; if (ifp != oifp) { printf("%s: ifnet %p disappeared\n", __func__, ifp); ifp = NULL; } IFNET_RUNLOCK_NOSLEEP(); } #endif /* * If and only if the ifnet instance exists: Acquire the address lock. */ if (ifp != NULL) IF_ADDR_WLOCK(ifp); lastref = if_delmulti_locked(ifp, ifma, 0); if (ifp != NULL) { /* * If and only if the ifnet instance exists: * Release the address lock. * If the group was left: update the hardware hash filter. */ IF_ADDR_WUNLOCK(ifp); if (lastref && ifp->if_ioctl != NULL) { (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0); } } } /* * Perform deletion of network-layer and/or link-layer multicast address. * * Return 0 if the reference count was decremented. * Return 1 if the final reference was released, indicating that the * hardware hash filter should be reprogrammed. */ static int if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching) { struct ifmultiaddr *ll_ifma; if (ifp != NULL && ifma->ifma_ifp != NULL) { KASSERT(ifma->ifma_ifp == ifp, ("%s: inconsistent ifp %p", __func__, ifp)); IF_ADDR_WLOCK_ASSERT(ifp); } ifp = ifma->ifma_ifp; /* * If the ifnet is detaching, null out references to ifnet, * so that upper protocol layers will notice, and not attempt * to obtain locks for an ifnet which no longer exists. The * routing socket announcement must happen before the ifnet * instance is detached from the system. */ if (detaching) { #ifdef DIAGNOSTIC printf("%s: detaching ifnet instance %p\n", __func__, ifp); #endif /* * ifp may already be nulled out if we are being reentered * to delete the ll_ifma. */ if (ifp != NULL) { rt_newmaddrmsg(RTM_DELMADDR, ifma); ifma->ifma_ifp = NULL; } } if (--ifma->ifma_refcount > 0) return 0; /* * If this ifma is a network-layer ifma, a link-layer ifma may * have been associated with it. Release it first if so. */ ll_ifma = ifma->ifma_llifma; if (ll_ifma != NULL) { KASSERT(ifma->ifma_lladdr != NULL, ("%s: llifma w/o lladdr", __func__)); if (detaching) ll_ifma->ifma_ifp = NULL; /* XXX */ if (--ll_ifma->ifma_refcount == 0) { if (ifp != NULL) { TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifma_link); } if_freemulti(ll_ifma); } } if (ifp != NULL) TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); if_freemulti(ifma); /* * The last reference to this instance of struct ifmultiaddr * was released; the hardware should be notified of this change. */ return 1; } /* * Set the link layer address on an interface. * * At this time we only support certain types of interfaces, * and we don't allow the length of the address to change. * * Set noinline to be dtrace-friendly */ __noinline int if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) { struct sockaddr_dl *sdl; struct ifaddr *ifa; struct ifreq ifr; IF_ADDR_RLOCK(ifp); ifa = ifp->if_addr; if (ifa == NULL) { IF_ADDR_RUNLOCK(ifp); return (EINVAL); } ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); sdl = (struct sockaddr_dl *)ifa->ifa_addr; if (sdl == NULL) { ifa_free(ifa); return (EINVAL); } if (len != sdl->sdl_alen) { /* don't allow length to change */ ifa_free(ifa); return (EINVAL); } switch (ifp->if_type) { case IFT_ETHER: case IFT_FDDI: case IFT_XETHER: case IFT_L2VLAN: case IFT_BRIDGE: case IFT_ARCNET: case IFT_IEEE8023ADLAG: bcopy(lladdr, LLADDR(sdl), len); ifa_free(ifa); break; default: ifa_free(ifa); return (ENODEV); } /* * If the interface is already up, we need * to re-init it in order to reprogram its * address filter. */ if ((ifp->if_flags & IFF_UP) != 0) { if (ifp->if_ioctl) { ifp->if_flags &= ~IFF_UP; ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); ifp->if_flags |= IFF_UP; ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); } } EVENTHANDLER_INVOKE(iflladdr_event, ifp); return (0); } /* * Compat function for handling basic encapsulation requests. * Not converted stacks (FDDI, IB, ..) supports traditional * output model: ARP (and other similar L2 protocols) are handled * inside output routine, arpresolve/nd6_resolve() returns MAC * address instead of full prepend. * * This function creates calculated header==MAC for IPv4/IPv6 and * returns EAFNOSUPPORT (which is then handled in ARP code) for other * address families. */ static int if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req) { if (req->rtype != IFENCAP_LL) return (EOPNOTSUPP); if (req->bufsize < req->lladdr_len) return (ENOMEM); switch (req->family) { case AF_INET: case AF_INET6: break; default: return (EAFNOSUPPORT); } /* Copy lladdr to storage as is */ memmove(req->buf, req->lladdr, req->lladdr_len); req->bufsize = req->lladdr_len; req->lladdr_off = 0; return (0); } /* * Get the link layer address that was read from the hardware at attach. * * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type * their component interfaces as IFT_IEEE8023ADLAG. */ int if_gethwaddr(struct ifnet *ifp, struct ifreq *ifr) { if (ifp->if_hw_addr == NULL) return (ENODEV); switch (ifp->if_type) { case IFT_ETHER: case IFT_IEEE8023ADLAG: bcopy(ifp->if_hw_addr, ifr->ifr_addr.sa_data, ifp->if_addrlen); return (0); default: return (ENODEV); } } /* * The name argument must be a pointer to storage which will last as * long as the interface does. For physical devices, the result of * device_get_name(dev) is a good choice and for pseudo-devices a * static string works well. */ void if_initname(struct ifnet *ifp, const char *name, int unit) { ifp->if_dname = name; ifp->if_dunit = unit; if (unit != IF_DUNIT_NONE) snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit); else strlcpy(ifp->if_xname, name, IFNAMSIZ); } int if_printf(struct ifnet *ifp, const char * fmt, ...) { va_list ap; int retval; retval = printf("%s: ", ifp->if_xname); va_start(ap, fmt); retval += vprintf(fmt, ap); va_end(ap); return (retval); } void if_start(struct ifnet *ifp) { (*(ifp)->if_start)(ifp); } /* * Backwards compatibility interface for drivers * that have not implemented it */ static int if_transmit(struct ifnet *ifp, struct mbuf *m) { int error; IFQ_HANDOFF(ifp, m, error); return (error); } static void if_input_default(struct ifnet *ifp __unused, struct mbuf *m) { m_freem(m); } int if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust) { int active = 0; IF_LOCK(ifq); if (_IF_QFULL(ifq)) { IF_UNLOCK(ifq); if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); m_freem(m); return (0); } if (ifp != NULL) { if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust); if (m->m_flags & (M_BCAST|M_MCAST)) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); active = ifp->if_drv_flags & IFF_DRV_OACTIVE; } _IF_ENQUEUE(ifq, m); IF_UNLOCK(ifq); if (ifp != NULL && !active) (*(ifp)->if_start)(ifp); return (1); } void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f) { KASSERT(if_com_alloc[type] == NULL, ("if_register_com_alloc: %d already registered", type)); KASSERT(if_com_free[type] == NULL, ("if_register_com_alloc: %d free already registered", type)); if_com_alloc[type] = a; if_com_free[type] = f; } void if_deregister_com_alloc(u_char type) { KASSERT(if_com_alloc[type] != NULL, ("if_deregister_com_alloc: %d not registered", type)); KASSERT(if_com_free[type] != NULL, ("if_deregister_com_alloc: %d free not registered", type)); if_com_alloc[type] = NULL; if_com_free[type] = NULL; } /* API for driver access to network stack owned ifnet.*/ uint64_t if_setbaudrate(struct ifnet *ifp, uint64_t baudrate) { uint64_t oldbrate; oldbrate = ifp->if_baudrate; ifp->if_baudrate = baudrate; return (oldbrate); } uint64_t if_getbaudrate(if_t ifp) { return (((struct ifnet *)ifp)->if_baudrate); } int if_setcapabilities(if_t ifp, int capabilities) { ((struct ifnet *)ifp)->if_capabilities = capabilities; return (0); } int if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit) { ((struct ifnet *)ifp)->if_capabilities |= setbit; ((struct ifnet *)ifp)->if_capabilities &= ~clearbit; return (0); } int if_getcapabilities(if_t ifp) { return ((struct ifnet *)ifp)->if_capabilities; } int if_setcapenable(if_t ifp, int capabilities) { ((struct ifnet *)ifp)->if_capenable = capabilities; return (0); } int if_setcapenablebit(if_t ifp, int setcap, int clearcap) { if(setcap) ((struct ifnet *)ifp)->if_capenable |= setcap; if(clearcap) ((struct ifnet *)ifp)->if_capenable &= ~clearcap; return (0); } const char * if_getdname(if_t ifp) { return ((struct ifnet *)ifp)->if_dname; } int if_togglecapenable(if_t ifp, int togglecap) { ((struct ifnet *)ifp)->if_capenable ^= togglecap; return (0); } int if_getcapenable(if_t ifp) { return ((struct ifnet *)ifp)->if_capenable; } /* * This is largely undesirable because it ties ifnet to a device, but does * provide flexiblity for an embedded product vendor. Should be used with * the understanding that it violates the interface boundaries, and should be * a last resort only. */ int if_setdev(if_t ifp, void *dev) { return (0); } int if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags) { ((struct ifnet *)ifp)->if_drv_flags |= set_flags; ((struct ifnet *)ifp)->if_drv_flags &= ~clear_flags; return (0); } int if_getdrvflags(if_t ifp) { return ((struct ifnet *)ifp)->if_drv_flags; } int if_setdrvflags(if_t ifp, int flags) { ((struct ifnet *)ifp)->if_drv_flags = flags; return (0); } int if_setflags(if_t ifp, int flags) { ((struct ifnet *)ifp)->if_flags = flags; return (0); } int if_setflagbits(if_t ifp, int set, int clear) { ((struct ifnet *)ifp)->if_flags |= set; ((struct ifnet *)ifp)->if_flags &= ~clear; return (0); } int if_getflags(if_t ifp) { return ((struct ifnet *)ifp)->if_flags; } int if_clearhwassist(if_t ifp) { ((struct ifnet *)ifp)->if_hwassist = 0; return (0); } int if_sethwassistbits(if_t ifp, int toset, int toclear) { ((struct ifnet *)ifp)->if_hwassist |= toset; ((struct ifnet *)ifp)->if_hwassist &= ~toclear; return (0); } int if_sethwassist(if_t ifp, int hwassist_bit) { ((struct ifnet *)ifp)->if_hwassist = hwassist_bit; return (0); } int if_gethwassist(if_t ifp) { return ((struct ifnet *)ifp)->if_hwassist; } int if_setmtu(if_t ifp, int mtu) { ((struct ifnet *)ifp)->if_mtu = mtu; return (0); } int if_getmtu(if_t ifp) { return ((struct ifnet *)ifp)->if_mtu; } int if_getmtu_family(if_t ifp, int family) { struct domain *dp; for (dp = domains; dp; dp = dp->dom_next) { if (dp->dom_family == family && dp->dom_ifmtu != NULL) return (dp->dom_ifmtu((struct ifnet *)ifp)); } return (((struct ifnet *)ifp)->if_mtu); } int if_setsoftc(if_t ifp, void *softc) { ((struct ifnet *)ifp)->if_softc = softc; return (0); } void * if_getsoftc(if_t ifp) { return ((struct ifnet *)ifp)->if_softc; } void if_setrcvif(struct mbuf *m, if_t ifp) { m->m_pkthdr.rcvif = (struct ifnet *)ifp; } void if_setvtag(struct mbuf *m, uint16_t tag) { m->m_pkthdr.ether_vtag = tag; } uint16_t if_getvtag(struct mbuf *m) { return (m->m_pkthdr.ether_vtag); } int if_sendq_empty(if_t ifp) { return IFQ_DRV_IS_EMPTY(&((struct ifnet *)ifp)->if_snd); } struct ifaddr * if_getifaddr(if_t ifp) { return ((struct ifnet *)ifp)->if_addr; } int if_getamcount(if_t ifp) { return ((struct ifnet *)ifp)->if_amcount; } int if_setsendqready(if_t ifp) { IFQ_SET_READY(&((struct ifnet *)ifp)->if_snd); return (0); } int if_setsendqlen(if_t ifp, int tx_desc_count) { IFQ_SET_MAXLEN(&((struct ifnet *)ifp)->if_snd, tx_desc_count); ((struct ifnet *)ifp)->if_snd.ifq_drv_maxlen = tx_desc_count; return (0); } int if_vlantrunkinuse(if_t ifp) { return ((struct ifnet *)ifp)->if_vlantrunk != NULL?1:0; } int if_input(if_t ifp, struct mbuf* sendmp) { (*((struct ifnet *)ifp)->if_input)((struct ifnet *)ifp, sendmp); return (0); } /* XXX */ #ifndef ETH_ADDR_LEN #define ETH_ADDR_LEN 6 #endif int if_setupmultiaddr(if_t ifp, void *mta, int *cnt, int max) { struct ifmultiaddr *ifma; uint8_t *lmta = (uint8_t *)mta; int mcnt = 0; TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == max) break; bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), &lmta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); mcnt++; } *cnt = mcnt; return (0); } int if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max) { int error; if_maddr_rlock(ifp); error = if_setupmultiaddr(ifp, mta, cnt, max); if_maddr_runlock(ifp); return (error); } int if_multiaddr_count(if_t ifp, int max) { struct ifmultiaddr *ifma; int count; count = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; count++; if (count == max) break; } if_maddr_runlock(ifp); return (count); } int if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg) { struct ifmultiaddr *ifma; int cnt = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) cnt += filter(arg, ifma, cnt); if_maddr_runlock(ifp); return (cnt); } struct mbuf * if_dequeue(if_t ifp) { struct mbuf *m; IFQ_DRV_DEQUEUE(&((struct ifnet *)ifp)->if_snd, m); return (m); } int if_sendq_prepend(if_t ifp, struct mbuf *m) { IFQ_DRV_PREPEND(&((struct ifnet *)ifp)->if_snd, m); return (0); } int if_setifheaderlen(if_t ifp, int len) { ((struct ifnet *)ifp)->if_hdrlen = len; return (0); } caddr_t if_getlladdr(if_t ifp) { return (IF_LLADDR((struct ifnet *)ifp)); } void * if_gethandle(u_char type) { return (if_alloc(type)); } void if_bpfmtap(if_t ifh, struct mbuf *m) { struct ifnet *ifp = (struct ifnet *)ifh; BPF_MTAP(ifp, m); } void if_etherbpfmtap(if_t ifh, struct mbuf *m) { struct ifnet *ifp = (struct ifnet *)ifh; ETHER_BPF_MTAP(ifp, m); } void if_vlancap(if_t ifh) { struct ifnet *ifp = (struct ifnet *)ifh; VLAN_CAPABILITIES(ifp); } int if_sethwtsomax(if_t ifp, u_int if_hw_tsomax) { ((struct ifnet *)ifp)->if_hw_tsomax = if_hw_tsomax; return (0); } int if_sethwtsomaxsegcount(if_t ifp, u_int if_hw_tsomaxsegcount) { ((struct ifnet *)ifp)->if_hw_tsomaxsegcount = if_hw_tsomaxsegcount; return (0); } int if_sethwtsomaxsegsize(if_t ifp, u_int if_hw_tsomaxsegsize) { ((struct ifnet *)ifp)->if_hw_tsomaxsegsize = if_hw_tsomaxsegsize; return (0); } u_int if_gethwtsomax(if_t ifp) { return (((struct ifnet *)ifp)->if_hw_tsomax); } u_int if_gethwtsomaxsegcount(if_t ifp) { return (((struct ifnet *)ifp)->if_hw_tsomaxsegcount); } u_int if_gethwtsomaxsegsize(if_t ifp) { return (((struct ifnet *)ifp)->if_hw_tsomaxsegsize); } void if_setinitfn(if_t ifp, void (*init_fn)(void *)) { ((struct ifnet *)ifp)->if_init = init_fn; } void if_setioctlfn(if_t ifp, int (*ioctl_fn)(if_t, u_long, caddr_t)) { ((struct ifnet *)ifp)->if_ioctl = (void *)ioctl_fn; } void if_setstartfn(if_t ifp, void (*start_fn)(if_t)) { ((struct ifnet *)ifp)->if_start = (void *)start_fn; } void if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn) { ((struct ifnet *)ifp)->if_transmit = start_fn; } void if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn) { ((struct ifnet *)ifp)->if_qflush = flush_fn; } void if_setgetcounterfn(if_t ifp, if_get_counter_t fn) { ifp->if_get_counter = fn; } /* Revisit these - These are inline functions originally. */ int drbr_inuse_drv(if_t ifh, struct buf_ring *br) { return drbr_inuse(ifh, br); } struct mbuf* drbr_dequeue_drv(if_t ifh, struct buf_ring *br) { return drbr_dequeue(ifh, br); } int drbr_needs_enqueue_drv(if_t ifh, struct buf_ring *br) { return drbr_needs_enqueue(ifh, br); } int drbr_enqueue_drv(if_t ifh, struct buf_ring *br, struct mbuf *m) { return drbr_enqueue(ifh, br, m); } Index: head/sys/net/if.h =================================================================== --- head/sys/net/if.h (revision 331796) +++ head/sys/net/if.h (revision 331797) @@ -1,595 +1,597 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #ifndef _NET_IF_H_ #define _NET_IF_H_ #include #if __BSD_VISIBLE /* * does not depend on on most other systems. This * helps userland compatibility. (struct timeval ifi_lastchange) * The same holds for . (struct sockaddr ifru_addr) */ #ifndef _KERNEL #include #include #endif #endif /* * Length of interface external name, including terminating '\0'. * Note: this is the same size as a generic device's external name. */ #define IF_NAMESIZE 16 #if __BSD_VISIBLE #define IFNAMSIZ IF_NAMESIZE #define IF_MAXUNIT 0x7fff /* historical value */ #endif #if __BSD_VISIBLE /* * Structure used to query names of interface cloners. */ struct if_clonereq { int ifcr_total; /* total cloners (out) */ int ifcr_count; /* room for this many in user buffer */ char *ifcr_buffer; /* buffer for cloner names */ }; /* * Structure describing information about an interface * which may be of interest to management entities. */ struct if_data { /* generic interface information */ uint8_t ifi_type; /* ethernet, tokenring, etc */ uint8_t ifi_physical; /* e.g., AUI, Thinnet, 10base-T, etc */ uint8_t ifi_addrlen; /* media address length */ uint8_t ifi_hdrlen; /* media header length */ uint8_t ifi_link_state; /* current link state */ uint8_t ifi_vhid; /* carp vhid */ uint16_t ifi_datalen; /* length of this data struct */ uint32_t ifi_mtu; /* maximum transmission unit */ uint32_t ifi_metric; /* routing metric (external only) */ uint64_t ifi_baudrate; /* linespeed */ /* volatile statistics */ uint64_t ifi_ipackets; /* packets received on interface */ uint64_t ifi_ierrors; /* input errors on interface */ uint64_t ifi_opackets; /* packets sent on interface */ uint64_t ifi_oerrors; /* output errors on interface */ uint64_t ifi_collisions; /* collisions on csma interfaces */ uint64_t ifi_ibytes; /* total number of octets received */ uint64_t ifi_obytes; /* total number of octets sent */ uint64_t ifi_imcasts; /* packets received via multicast */ uint64_t ifi_omcasts; /* packets sent via multicast */ uint64_t ifi_iqdrops; /* dropped on input */ uint64_t ifi_oqdrops; /* dropped on output */ uint64_t ifi_noproto; /* destined for unsupported protocol */ uint64_t ifi_hwassist; /* HW offload capabilities, see IFCAP */ /* Unions are here to make sizes MI. */ union { /* uptime at attach or stat reset */ time_t tt; uint64_t ph; } __ifi_epoch; #define ifi_epoch __ifi_epoch.tt union { /* time of last administrative change */ struct timeval tv; struct { uint64_t ph1; uint64_t ph2; } ph; } __ifi_lastchange; #define ifi_lastchange __ifi_lastchange.tv }; /*- * Interface flags are of two types: network stack owned flags, and driver * owned flags. Historically, these values were stored in the same ifnet * flags field, but with the advent of fine-grained locking, they have been * broken out such that the network stack is responsible for synchronizing * the stack-owned fields, and the device driver the device-owned fields. * Both halves can perform lockless reads of the other half's field, subject * to accepting the involved races. * * Both sets of flags come from the same number space, and should not be * permitted to conflict, as they are exposed to user space via a single * field. * * The following symbols identify read and write requirements for fields: * * (i) if_flags field set by device driver before attach, read-only there * after. * (n) if_flags field written only by the network stack, read by either the * stack or driver. * (d) if_drv_flags field written only by the device driver, read by either * the stack or driver. */ #define IFF_UP 0x1 /* (n) interface is up */ #define IFF_BROADCAST 0x2 /* (i) broadcast address valid */ #define IFF_DEBUG 0x4 /* (n) turn on debugging */ #define IFF_LOOPBACK 0x8 /* (i) is a loopback net */ #define IFF_POINTOPOINT 0x10 /* (i) is a point-to-point link */ /* 0x20 was IFF_SMART */ #define IFF_DRV_RUNNING 0x40 /* (d) resources allocated */ #define IFF_NOARP 0x80 /* (n) no address resolution protocol */ #define IFF_PROMISC 0x100 /* (n) receive all packets */ #define IFF_ALLMULTI 0x200 /* (n) receive all multicast packets */ #define IFF_DRV_OACTIVE 0x400 /* (d) tx hardware queue is full */ #define IFF_SIMPLEX 0x800 /* (i) can't hear own transmissions */ #define IFF_LINK0 0x1000 /* per link layer defined bit */ #define IFF_LINK1 0x2000 /* per link layer defined bit */ #define IFF_LINK2 0x4000 /* per link layer defined bit */ #define IFF_ALTPHYS IFF_LINK2 /* use alternate physical connection */ #define IFF_MULTICAST 0x8000 /* (i) supports multicast */ #define IFF_CANTCONFIG 0x10000 /* (i) unconfigurable using ioctl(2) */ #define IFF_PPROMISC 0x20000 /* (n) user-requested promisc mode */ #define IFF_MONITOR 0x40000 /* (n) user-requested monitor mode */ #define IFF_STATICARP 0x80000 /* (n) static ARP */ #define IFF_DYING 0x200000 /* (n) interface is winding down */ #define IFF_RENAMING 0x400000 /* (n) interface is being renamed */ /* * Old names for driver flags so that user space tools can continue to use * the old (portable) names. */ #ifndef _KERNEL #define IFF_RUNNING IFF_DRV_RUNNING #define IFF_OACTIVE IFF_DRV_OACTIVE #endif /* flags set internally only: */ #define IFF_CANTCHANGE \ (IFF_BROADCAST|IFF_POINTOPOINT|IFF_DRV_RUNNING|IFF_DRV_OACTIVE|\ IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI|IFF_PROMISC|\ IFF_DYING|IFF_CANTCONFIG) /* * Values for if_link_state. */ #define LINK_STATE_UNKNOWN 0 /* link invalid/unknown */ #define LINK_STATE_DOWN 1 /* link is down */ #define LINK_STATE_UP 2 /* link is up */ /* * Some convenience macros used for setting ifi_baudrate. * XXX 1000 vs. 1024? --thorpej@netbsd.org */ #define IF_Kbps(x) ((uintmax_t)(x) * 1000) /* kilobits/sec. */ #define IF_Mbps(x) (IF_Kbps((x) * 1000)) /* megabits/sec. */ #define IF_Gbps(x) (IF_Mbps((x) * 1000)) /* gigabits/sec. */ /* * Capabilities that interfaces can advertise. * * struct ifnet.if_capabilities * contains the optional features & capabilities a particular interface * supports (not only the driver but also the detected hw revision). * Capabilities are defined by IFCAP_* below. * struct ifnet.if_capenable * contains the enabled (either by default or through ifconfig) optional * features & capabilities on this interface. * Capabilities are defined by IFCAP_* below. * struct if_data.ifi_hwassist in mbuf CSUM_ flag form, controlled by above * contains the enabled optional feature & capabilites that can be used * individually per packet and are specified in the mbuf pkthdr.csum_flags * field. IFCAP_* and CSUM_* do not match one to one and CSUM_* may be * more detailed or differenciated than IFCAP_*. * Hwassist features are defined CSUM_* in sys/mbuf.h * * Capabilities that cannot be arbitrarily changed with ifconfig/ioctl * are listed in IFCAP_CANTCHANGE, similar to IFF_CANTCHANGE. * This is not strictly necessary because the common code never * changes capabilities, and it is left to the individual driver * to do the right thing. However, having the filter here * avoids replication of the same code in all individual drivers. */ #define IFCAP_RXCSUM 0x00001 /* can offload checksum on RX */ #define IFCAP_TXCSUM 0x00002 /* can offload checksum on TX */ #define IFCAP_NETCONS 0x00004 /* can be a network console */ #define IFCAP_VLAN_MTU 0x00008 /* VLAN-compatible MTU */ #define IFCAP_VLAN_HWTAGGING 0x00010 /* hardware VLAN tag support */ #define IFCAP_JUMBO_MTU 0x00020 /* 9000 byte MTU supported */ #define IFCAP_POLLING 0x00040 /* driver supports polling */ #define IFCAP_VLAN_HWCSUM 0x00080 /* can do IFCAP_HWCSUM on VLANs */ #define IFCAP_TSO4 0x00100 /* can do TCP Segmentation Offload */ #define IFCAP_TSO6 0x00200 /* can do TCP6 Segmentation Offload */ #define IFCAP_LRO 0x00400 /* can do Large Receive Offload */ #define IFCAP_WOL_UCAST 0x00800 /* wake on any unicast frame */ #define IFCAP_WOL_MCAST 0x01000 /* wake on any multicast frame */ #define IFCAP_WOL_MAGIC 0x02000 /* wake on any Magic Packet */ #define IFCAP_TOE4 0x04000 /* interface can offload TCP */ #define IFCAP_TOE6 0x08000 /* interface can offload TCP6 */ #define IFCAP_VLAN_HWFILTER 0x10000 /* interface hw can filter vlan tag */ /* available 0x20000 */ #define IFCAP_VLAN_HWTSO 0x40000 /* can do IFCAP_TSO on VLANs */ #define IFCAP_LINKSTATE 0x80000 /* the runtime link state is dynamic */ #define IFCAP_NETMAP 0x100000 /* netmap mode supported/enabled */ #define IFCAP_RXCSUM_IPV6 0x200000 /* can offload checksum on IPv6 RX */ #define IFCAP_TXCSUM_IPV6 0x400000 /* can offload checksum on IPv6 TX */ #define IFCAP_HWSTATS 0x800000 /* manages counters internally */ #define IFCAP_TXRTLMT 0x1000000 /* hardware supports TX rate limiting */ #define IFCAP_HWRXTSTMP 0x2000000 /* hardware rx timestamping */ #define IFCAP_HWCSUM_IPV6 (IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6) #define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM) #define IFCAP_TSO (IFCAP_TSO4 | IFCAP_TSO6) #define IFCAP_WOL (IFCAP_WOL_UCAST | IFCAP_WOL_MCAST | IFCAP_WOL_MAGIC) #define IFCAP_TOE (IFCAP_TOE4 | IFCAP_TOE6) #define IFCAP_CANTCHANGE (IFCAP_NETMAP) #define IFQ_MAXLEN 50 #define IFNET_SLOWHZ 1 /* granularity is 1 second */ /* * Message format for use in obtaining information about interfaces * from getkerninfo and the routing socket * For the new, extensible interface see struct if_msghdrl below. */ struct if_msghdr { u_short ifm_msglen; /* to skip over non-understood messages */ u_char ifm_version; /* future binary compatibility */ u_char ifm_type; /* message type */ int ifm_addrs; /* like rtm_addrs */ int ifm_flags; /* value of if_flags */ u_short ifm_index; /* index for associated ifp */ struct if_data ifm_data;/* statistics and other data about if */ }; /* * The 'l' version shall be used by new interfaces, like NET_RT_IFLISTL. It is * extensible after ifm_data_off or within ifm_data. Both the if_msghdr and * if_data now have a member field detailing the struct length in addition to * the routing message length. Macros are provided to find the start of * ifm_data and the start of the socket address strucutres immediately following * struct if_msghdrl given a pointer to struct if_msghdrl. */ #define IF_MSGHDRL_IFM_DATA(_l) \ (struct if_data *)((char *)(_l) + (_l)->ifm_data_off) #define IF_MSGHDRL_RTA(_l) \ (void *)((uintptr_t)(_l) + (_l)->ifm_len) struct if_msghdrl { u_short ifm_msglen; /* to skip over non-understood messages */ u_char ifm_version; /* future binary compatibility */ u_char ifm_type; /* message type */ int ifm_addrs; /* like rtm_addrs */ int ifm_flags; /* value of if_flags */ u_short ifm_index; /* index for associated ifp */ u_short _ifm_spare1; /* spare space to grow if_index, see if_var.h */ u_short ifm_len; /* length of if_msghdrl incl. if_data */ u_short ifm_data_off; /* offset of if_data from beginning */ struct if_data ifm_data;/* statistics and other data about if */ }; /* * Message format for use in obtaining information about interface addresses * from getkerninfo and the routing socket * For the new, extensible interface see struct ifa_msghdrl below. */ struct ifa_msghdr { u_short ifam_msglen; /* to skip over non-understood messages */ u_char ifam_version; /* future binary compatibility */ u_char ifam_type; /* message type */ int ifam_addrs; /* like rtm_addrs */ int ifam_flags; /* value of ifa_flags */ u_short ifam_index; /* index for associated ifp */ int ifam_metric; /* value of ifa_ifp->if_metric */ }; /* * The 'l' version shall be used by new interfaces, like NET_RT_IFLISTL. It is * extensible after ifam_metric or within ifam_data. Both the ifa_msghdrl and * if_data now have a member field detailing the struct length in addition to * the routing message length. Macros are provided to find the start of * ifm_data and the start of the socket address strucutres immediately following * struct ifa_msghdrl given a pointer to struct ifa_msghdrl. */ #define IFA_MSGHDRL_IFAM_DATA(_l) \ (struct if_data *)((char *)(_l) + (_l)->ifam_data_off) #define IFA_MSGHDRL_RTA(_l) \ (void *)((uintptr_t)(_l) + (_l)->ifam_len) struct ifa_msghdrl { u_short ifam_msglen; /* to skip over non-understood messages */ u_char ifam_version; /* future binary compatibility */ u_char ifam_type; /* message type */ int ifam_addrs; /* like rtm_addrs */ int ifam_flags; /* value of ifa_flags */ u_short ifam_index; /* index for associated ifp */ u_short _ifam_spare1; /* spare space to grow if_index, see if_var.h */ u_short ifam_len; /* length of ifa_msghdrl incl. if_data */ u_short ifam_data_off; /* offset of if_data from beginning */ int ifam_metric; /* value of ifa_ifp->if_metric */ struct if_data ifam_data;/* statistics and other data about if or * address */ }; /* * Message format for use in obtaining information about multicast addresses * from the routing socket */ struct ifma_msghdr { u_short ifmam_msglen; /* to skip over non-understood messages */ u_char ifmam_version; /* future binary compatibility */ u_char ifmam_type; /* message type */ int ifmam_addrs; /* like rtm_addrs */ int ifmam_flags; /* value of ifa_flags */ u_short ifmam_index; /* index for associated ifp */ }; /* * Message format announcing the arrival or departure of a network interface. */ struct if_announcemsghdr { u_short ifan_msglen; /* to skip over non-understood messages */ u_char ifan_version; /* future binary compatibility */ u_char ifan_type; /* message type */ u_short ifan_index; /* index for associated ifp */ char ifan_name[IFNAMSIZ]; /* if name, e.g. "en0" */ u_short ifan_what; /* what type of announcement */ }; #define IFAN_ARRIVAL 0 /* interface arrival */ #define IFAN_DEPARTURE 1 /* interface departure */ /* * Buffer with length to be used in SIOCGIFDESCR/SIOCSIFDESCR requests */ struct ifreq_buffer { size_t length; void *buffer; }; /* * Interface request structure used for socket * ioctl's. All interface ioctl's must have parameter * definitions which begin with ifr_name. The * remainder may be interface specific. */ struct ifreq { char ifr_name[IFNAMSIZ]; /* if name, e.g. "en0" */ union { struct sockaddr ifru_addr; struct sockaddr ifru_dstaddr; struct sockaddr ifru_broadaddr; struct ifreq_buffer ifru_buffer; short ifru_flags[2]; short ifru_index; int ifru_jid; int ifru_metric; int ifru_mtu; int ifru_phys; int ifru_media; caddr_t ifru_data; int ifru_cap[2]; u_int ifru_fib; u_char ifru_vlan_pcp; } ifr_ifru; #define ifr_addr ifr_ifru.ifru_addr /* address */ #define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */ #define ifr_broadaddr ifr_ifru.ifru_broadaddr /* broadcast address */ #ifndef _KERNEL #define ifr_buffer ifr_ifru.ifru_buffer /* user supplied buffer with its length */ #endif #define ifr_flags ifr_ifru.ifru_flags[0] /* flags (low 16 bits) */ #define ifr_flagshigh ifr_ifru.ifru_flags[1] /* flags (high 16 bits) */ #define ifr_jid ifr_ifru.ifru_jid /* jail/vnet */ #define ifr_metric ifr_ifru.ifru_metric /* metric */ #define ifr_mtu ifr_ifru.ifru_mtu /* mtu */ #define ifr_phys ifr_ifru.ifru_phys /* physical wire */ #define ifr_media ifr_ifru.ifru_media /* physical media */ +#ifndef _KERNEL #define ifr_data ifr_ifru.ifru_data /* for use by interface */ +#endif #define ifr_reqcap ifr_ifru.ifru_cap[0] /* requested capabilities */ #define ifr_curcap ifr_ifru.ifru_cap[1] /* current capabilities */ #define ifr_index ifr_ifru.ifru_index /* interface index */ #define ifr_fib ifr_ifru.ifru_fib /* interface fib */ #define ifr_vlan_pcp ifr_ifru.ifru_vlan_pcp /* VLAN priority */ #define ifr_lan_pcp ifr_ifru.ifru_vlan_pcp /* VLAN priority */ }; #define _SIZEOF_ADDR_IFREQ(ifr) \ ((ifr).ifr_addr.sa_len > sizeof(struct sockaddr) ? \ (sizeof(struct ifreq) - sizeof(struct sockaddr) + \ (ifr).ifr_addr.sa_len) : sizeof(struct ifreq)) struct ifaliasreq { char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */ struct sockaddr ifra_addr; struct sockaddr ifra_broadaddr; struct sockaddr ifra_mask; int ifra_vhid; }; /* 9.x compat */ struct oifaliasreq { char ifra_name[IFNAMSIZ]; struct sockaddr ifra_addr; struct sockaddr ifra_broadaddr; struct sockaddr ifra_mask; }; struct ifmediareq { char ifm_name[IFNAMSIZ]; /* if name, e.g. "en0" */ int ifm_current; /* current media options */ int ifm_mask; /* don't care mask */ int ifm_status; /* media status */ int ifm_active; /* active options */ int ifm_count; /* # entries in ifm_ulist array */ int *ifm_ulist; /* media words */ }; struct ifdrv { char ifd_name[IFNAMSIZ]; /* if name, e.g. "en0" */ unsigned long ifd_cmd; size_t ifd_len; void *ifd_data; }; /* * Structure used to retrieve aux status data from interfaces. * Kernel suppliers to this interface should respect the formatting * needed by ifconfig(8): each line starts with a TAB and ends with * a newline. The canonical example to copy and paste is in if_tun.c. */ #define IFSTATMAX 800 /* 10 lines of text */ struct ifstat { char ifs_name[IFNAMSIZ]; /* if name, e.g. "en0" */ char ascii[IFSTATMAX + 1]; }; /* * Structure used in SIOCGIFCONF request. * Used to retrieve interface configuration * for machine (useful for programs which * must know all networks accessible). */ struct ifconf { int ifc_len; /* size of associated buffer */ union { caddr_t ifcu_buf; struct ifreq *ifcu_req; } ifc_ifcu; #define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */ #define ifc_req ifc_ifcu.ifcu_req /* array of structures returned */ }; /* * interface groups */ #define IFG_ALL "all" /* group contains all interfaces */ /* XXX: will we implement this? */ #define IFG_EGRESS "egress" /* if(s) default route(s) point to */ struct ifg_req { union { char ifgrqu_group[IFNAMSIZ]; char ifgrqu_member[IFNAMSIZ]; } ifgrq_ifgrqu; #define ifgrq_group ifgrq_ifgrqu.ifgrqu_group #define ifgrq_member ifgrq_ifgrqu.ifgrqu_member }; /* * Used to lookup groups for an interface */ struct ifgroupreq { char ifgr_name[IFNAMSIZ]; u_int ifgr_len; union { char ifgru_group[IFNAMSIZ]; struct ifg_req *ifgru_groups; } ifgr_ifgru; #define ifgr_group ifgr_ifgru.ifgru_group #define ifgr_groups ifgr_ifgru.ifgru_groups }; /* * Structure used to request i2c data * from interface transceivers. */ struct ifi2creq { uint8_t dev_addr; /* i2c address (0xA0, 0xA2) */ uint8_t offset; /* read offset */ uint8_t len; /* read length */ uint8_t spare0; uint32_t spare1; uint8_t data[8]; /* read buffer */ }; /* * RSS hash. */ #define RSS_FUNC_NONE 0 /* RSS disabled */ #define RSS_FUNC_PRIVATE 1 /* non-standard */ #define RSS_FUNC_TOEPLITZ 2 #define RSS_TYPE_IPV4 0x00000001 #define RSS_TYPE_TCP_IPV4 0x00000002 #define RSS_TYPE_IPV6 0x00000004 #define RSS_TYPE_IPV6_EX 0x00000008 #define RSS_TYPE_TCP_IPV6 0x00000010 #define RSS_TYPE_TCP_IPV6_EX 0x00000020 #define RSS_TYPE_UDP_IPV4 0x00000040 #define RSS_TYPE_UDP_IPV6 0x00000080 #define RSS_TYPE_UDP_IPV6_EX 0x00000100 #define RSS_KEYLEN 128 struct ifrsskey { char ifrk_name[IFNAMSIZ]; /* if name, e.g. "en0" */ uint8_t ifrk_func; /* RSS_FUNC_ */ uint8_t ifrk_spare0; uint16_t ifrk_keylen; uint8_t ifrk_key[RSS_KEYLEN]; }; struct ifrsshash { char ifrh_name[IFNAMSIZ]; /* if name, e.g. "en0" */ uint8_t ifrh_func; /* RSS_FUNC_ */ uint8_t ifrh_spare0; uint16_t ifrh_spare1; uint32_t ifrh_types; /* RSS_TYPE_ */ }; #define IFNET_PCP_NONE 0xff /* PCP disabled */ #endif /* __BSD_VISIBLE */ #ifdef _KERNEL #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_IFADDR); MALLOC_DECLARE(M_IFMADDR); #endif #endif #ifndef _KERNEL struct if_nameindex { unsigned int if_index; /* 1, 2, ... */ char *if_name; /* null terminated name: "le0", ... */ }; __BEGIN_DECLS void if_freenameindex(struct if_nameindex *); char *if_indextoname(unsigned int, char *); struct if_nameindex *if_nameindex(void); unsigned int if_nametoindex(const char *); __END_DECLS #endif #endif /* !_NET_IF_H_ */ Index: head/sys/net/if_gif.c =================================================================== --- head/sys/net/if_gif.c (revision 331796) +++ head/sys/net/if_gif.c (revision 331797) @@ -1,1064 +1,1066 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif /* INET */ #ifdef INET6 #ifndef INET #include #endif #include #include #include #include #include #include #endif /* INET6 */ #include #include #include #include #include static const char gifname[] = "gif"; /* * gif_mtx protects a per-vnet gif_softc_list. */ static VNET_DEFINE(struct mtx, gif_mtx); #define V_gif_mtx VNET(gif_mtx) static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface"); static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list); #define V_gif_softc_list VNET(gif_softc_list) static struct sx gif_ioctl_sx; SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl"); #define GIF_LIST_LOCK_INIT(x) mtx_init(&V_gif_mtx, "gif_mtx", \ NULL, MTX_DEF) #define GIF_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gif_mtx) #define GIF_LIST_LOCK(x) mtx_lock(&V_gif_mtx) #define GIF_LIST_UNLOCK(x) mtx_unlock(&V_gif_mtx) void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af); void (*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af); void (*ng_gif_attach_p)(struct ifnet *ifp); void (*ng_gif_detach_p)(struct ifnet *ifp); static int gif_check_nesting(struct ifnet *, struct mbuf *); static int gif_set_tunnel(struct ifnet *, struct sockaddr *, struct sockaddr *); static void gif_delete_tunnel(struct ifnet *); static int gif_ioctl(struct ifnet *, u_long, caddr_t); static int gif_transmit(struct ifnet *, struct mbuf *); static void gif_qflush(struct ifnet *); static int gif_clone_create(struct if_clone *, int, caddr_t); static void gif_clone_destroy(struct ifnet *); static VNET_DEFINE(struct if_clone *, gif_cloner); #define V_gif_cloner VNET(gif_cloner) static int gifmodevent(module_t, int, void *); SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0, "Generic Tunnel Interface"); #ifndef MAX_GIF_NEST /* * This macro controls the default upper limitation on nesting of gif tunnels. * Since, setting a large value to this macro with a careless configuration * may introduce system crash, we don't allow any nestings by default. * If you need to configure nested gif tunnels, you can define this macro * in your kernel configuration file. However, if you do so, please be * careful to configure the tunnels so that it won't make a loop. */ #define MAX_GIF_NEST 1 #endif static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NEST; #define V_max_gif_nesting VNET(max_gif_nesting) SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels"); /* * By default, we disallow creation of multiple tunnels between the same * pair of addresses. Some applications require this functionality so * we allow control over this check here. */ #ifdef XBONEHACK static VNET_DEFINE(int, parallel_tunnels) = 1; #else static VNET_DEFINE(int, parallel_tunnels) = 0; #endif #define V_parallel_tunnels VNET(parallel_tunnels) SYSCTL_INT(_net_link_gif, OID_AUTO, parallel_tunnels, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(parallel_tunnels), 0, "Allow parallel tunnels?"); static int gif_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct gif_softc *sc; sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO); sc->gif_fibnum = curthread->td_proc->p_fibnum; GIF2IFP(sc) = if_alloc(IFT_GIF); GIF_LOCK_INIT(sc); GIF2IFP(sc)->if_softc = sc; if_initname(GIF2IFP(sc), gifname, unit); GIF2IFP(sc)->if_addrlen = 0; GIF2IFP(sc)->if_mtu = GIF_MTU; GIF2IFP(sc)->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; #if 0 /* turn off ingress filter */ GIF2IFP(sc)->if_flags |= IFF_LINK2; #endif GIF2IFP(sc)->if_ioctl = gif_ioctl; GIF2IFP(sc)->if_transmit = gif_transmit; GIF2IFP(sc)->if_qflush = gif_qflush; GIF2IFP(sc)->if_output = gif_output; GIF2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE; GIF2IFP(sc)->if_capenable |= IFCAP_LINKSTATE; if_attach(GIF2IFP(sc)); bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t)); if (ng_gif_attach_p != NULL) (*ng_gif_attach_p)(GIF2IFP(sc)); GIF_LIST_LOCK(); LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list); GIF_LIST_UNLOCK(); return (0); } static void gif_clone_destroy(struct ifnet *ifp) { struct gif_softc *sc; sx_xlock(&gif_ioctl_sx); sc = ifp->if_softc; gif_delete_tunnel(ifp); GIF_LIST_LOCK(); LIST_REMOVE(sc, gif_list); GIF_LIST_UNLOCK(); if (ng_gif_detach_p != NULL) (*ng_gif_detach_p)(ifp); bpfdetach(ifp); if_detach(ifp); ifp->if_softc = NULL; sx_xunlock(&gif_ioctl_sx); if_free(ifp); GIF_LOCK_DESTROY(sc); free(sc, M_GIF); } static void vnet_gif_init(const void *unused __unused) { LIST_INIT(&V_gif_softc_list); GIF_LIST_LOCK_INIT(); V_gif_cloner = if_clone_simple(gifname, gif_clone_create, gif_clone_destroy, 0); } VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_gif_init, NULL); static void vnet_gif_uninit(const void *unused __unused) { if_clone_detach(V_gif_cloner); GIF_LIST_LOCK_DESTROY(); } VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_gif_uninit, NULL); static int gifmodevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: case MOD_UNLOAD: break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t gif_mod = { "if_gif", gifmodevent, 0 }; DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_gif, 1); int gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg) { GIF_RLOCK_TRACKER; const struct ip *ip; struct gif_softc *sc; int ret; sc = (struct gif_softc *)arg; if (sc == NULL || (GIF2IFP(sc)->if_flags & IFF_UP) == 0) return (0); ret = 0; GIF_RLOCK(sc); /* no physical address */ if (sc->gif_family == 0) goto done; switch (proto) { #ifdef INET case IPPROTO_IPV4: #endif #ifdef INET6 case IPPROTO_IPV6: #endif case IPPROTO_ETHERIP: break; default: goto done; } /* Bail on short packets */ M_ASSERTPKTHDR(m); if (m->m_pkthdr.len < sizeof(struct ip)) goto done; ip = mtod(m, const struct ip *); switch (ip->ip_v) { #ifdef INET case 4: if (sc->gif_family != AF_INET) goto done; ret = in_gif_encapcheck(m, off, proto, arg); break; #endif #ifdef INET6 case 6: if (m->m_pkthdr.len < sizeof(struct ip6_hdr)) goto done; if (sc->gif_family != AF_INET6) goto done; ret = in6_gif_encapcheck(m, off, proto, arg); break; #endif } done: GIF_RUNLOCK(sc); return (ret); } static int gif_transmit(struct ifnet *ifp, struct mbuf *m) { struct gif_softc *sc; struct etherip_header *eth; #ifdef INET struct ip *ip; #endif #ifdef INET6 struct ip6_hdr *ip6; uint32_t t; #endif uint32_t af; uint8_t proto, ecn; int error; #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) { m_freem(m); goto err; } #endif error = ENETDOWN; sc = ifp->if_softc; if ((ifp->if_flags & IFF_MONITOR) != 0 || (ifp->if_flags & IFF_UP) == 0 || sc->gif_family == 0 || (error = gif_check_nesting(ifp, m)) != 0) { m_freem(m); goto err; } /* Now pull back the af that we stashed in the csum_data. */ if (ifp->if_bridge) af = AF_LINK; else af = m->m_pkthdr.csum_data; m->m_flags &= ~(M_BCAST|M_MCAST); M_SETFIB(m, sc->gif_fibnum); BPF_MTAP2(ifp, &af, sizeof(af), m); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); /* inner AF-specific encapsulation */ ecn = 0; switch (af) { #ifdef INET case AF_INET: proto = IPPROTO_IPV4; if (m->m_len < sizeof(struct ip)) m = m_pullup(m, sizeof(struct ip)); if (m == NULL) { error = ENOBUFS; goto err; } ip = mtod(m, struct ip *); ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED: ECN_NOCARE, &ecn, &ip->ip_tos); break; #endif #ifdef INET6 case AF_INET6: proto = IPPROTO_IPV6; if (m->m_len < sizeof(struct ip6_hdr)) m = m_pullup(m, sizeof(struct ip6_hdr)); if (m == NULL) { error = ENOBUFS; goto err; } t = 0; ip6 = mtod(m, struct ip6_hdr *); ip6_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED: ECN_NOCARE, &t, &ip6->ip6_flow); ecn = (ntohl(t) >> 20) & 0xff; break; #endif case AF_LINK: proto = IPPROTO_ETHERIP; M_PREPEND(m, sizeof(struct etherip_header), M_NOWAIT); if (m == NULL) { error = ENOBUFS; goto err; } eth = mtod(m, struct etherip_header *); eth->eip_resvh = 0; eth->eip_ver = ETHERIP_VERSION; eth->eip_resvl = 0; break; default: error = EAFNOSUPPORT; m_freem(m); goto err; } /* XXX should we check if our outer source is legal? */ /* dispatch to output logic based on outer AF */ switch (sc->gif_family) { #ifdef INET case AF_INET: error = in_gif_output(ifp, m, proto, ecn); break; #endif #ifdef INET6 case AF_INET6: error = in6_gif_output(ifp, m, proto, ecn); break; #endif default: m_freem(m); } err: if (error) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (error); } static void gif_qflush(struct ifnet *ifp __unused) { } #define MTAG_GIF 1080679712 static int gif_check_nesting(struct ifnet *ifp, struct mbuf *m) { struct m_tag *mtag; int count; /* * gif may cause infinite recursion calls when misconfigured. * We'll prevent this by detecting loops. * * High nesting level may cause stack exhaustion. * We'll prevent this by introducing upper limit. */ count = 1; mtag = NULL; while ((mtag = m_tag_locate(m, MTAG_GIF, 0, mtag)) != NULL) { if (*(struct ifnet **)(mtag + 1) == ifp) { log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp)); return (EIO); } count++; } if (count > V_max_gif_nesting) { log(LOG_NOTICE, "%s: if_output recursively called too many times(%d)\n", if_name(ifp), count); return (EIO); } mtag = m_tag_alloc(MTAG_GIF, 0, sizeof(struct ifnet *), M_NOWAIT); if (mtag == NULL) return (ENOMEM); *(struct ifnet **)(mtag + 1) = ifp; m_tag_prepend(m, mtag); return (0); } int gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { uint32_t af; if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); else af = dst->sa_family; /* * Now save the af in the inbound pkt csum data, this is a cheat since * we are using the inbound csum_data field to carry the af over to * the gif_transmit() routine, avoiding using yet another mtag. */ m->m_pkthdr.csum_data = af; return (ifp->if_transmit(ifp, m)); } void gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn) { struct etherip_header *eip; #ifdef INET struct ip *ip; #endif #ifdef INET6 struct ip6_hdr *ip6; uint32_t t; #endif struct ether_header *eh; struct ifnet *oldifp; int isr, n, af; if (ifp == NULL) { /* just in case */ m_freem(m); return; } m->m_pkthdr.rcvif = ifp; m_clrprotoflags(m); switch (proto) { #ifdef INET case IPPROTO_IPV4: af = AF_INET; if (m->m_len < sizeof(struct ip)) m = m_pullup(m, sizeof(struct ip)); if (m == NULL) goto drop; ip = mtod(m, struct ip *); if (ip_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED: ECN_NOCARE, &ecn, &ip->ip_tos) == 0) { m_freem(m); goto drop; } break; #endif #ifdef INET6 case IPPROTO_IPV6: af = AF_INET6; if (m->m_len < sizeof(struct ip6_hdr)) m = m_pullup(m, sizeof(struct ip6_hdr)); if (m == NULL) goto drop; t = htonl((uint32_t)ecn << 20); ip6 = mtod(m, struct ip6_hdr *); if (ip6_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED: ECN_NOCARE, &t, &ip6->ip6_flow) == 0) { m_freem(m); goto drop; } break; #endif case IPPROTO_ETHERIP: af = AF_LINK; break; default: m_freem(m); goto drop; } #ifdef MAC mac_ifnet_create_mbuf(ifp, m); #endif if (bpf_peers_present(ifp->if_bpf)) { uint32_t af1 = af; bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m); } if ((ifp->if_flags & IFF_MONITOR) != 0) { if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); m_freem(m); return; } if (ng_gif_input_p != NULL) { (*ng_gif_input_p)(ifp, &m, af); if (m == NULL) goto drop; } /* * Put the packet to the network layer input queue according to the * specified address family. * Note: older versions of gif_input directly called network layer * input functions, e.g. ip6_input, here. We changed the policy to * prevent too many recursive calls of such input functions, which * might cause kernel panic. But the change may introduce another * problem; if the input queue is full, packets are discarded. * The kernel stack overflow really happened, and we believed * queue-full rarely occurs, so we changed the policy. */ switch (af) { #ifdef INET case AF_INET: isr = NETISR_IP; break; #endif #ifdef INET6 case AF_INET6: isr = NETISR_IPV6; break; #endif case AF_LINK: n = sizeof(struct etherip_header) + sizeof(struct ether_header); if (n > m->m_len) m = m_pullup(m, n); if (m == NULL) goto drop; eip = mtod(m, struct etherip_header *); if (eip->eip_ver != ETHERIP_VERSION) { /* discard unknown versions */ m_freem(m); goto drop; } m_adj(m, sizeof(struct etherip_header)); m->m_flags &= ~(M_BCAST|M_MCAST); m->m_pkthdr.rcvif = ifp; if (ifp->if_bridge) { oldifp = ifp; eh = mtod(m, struct ether_header *); if (ETHER_IS_MULTICAST(eh->ether_dhost)) { if (ETHER_IS_BROADCAST(eh->ether_dhost)) m->m_flags |= M_BCAST; else m->m_flags |= M_MCAST; if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); } BRIDGE_INPUT(ifp, m); if (m != NULL && ifp != oldifp) { /* * The bridge gave us back itself or one of the * members for which the frame is addressed. */ ether_demux(ifp, m); return; } } if (m != NULL) m_freem(m); return; default: if (ng_gif_input_orphan_p != NULL) (*ng_gif_input_orphan_p)(ifp, m, af); else m_freem(m); return; } if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); M_SETFIB(m, ifp->if_fib); netisr_dispatch(isr, m); return; drop: if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */ int gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { GIF_RLOCK_TRACKER; struct ifreq *ifr = (struct ifreq*)data; struct sockaddr *dst, *src; struct gif_softc *sc; #ifdef INET struct sockaddr_in *sin = NULL; #endif #ifdef INET6 struct sockaddr_in6 *sin6 = NULL; #endif u_int options; int error; switch (cmd) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; case SIOCADDMULTI: case SIOCDELMULTI: case SIOCGIFMTU: case SIOCSIFFLAGS: return (0); case SIOCSIFMTU: if (ifr->ifr_mtu < GIF_MTU_MIN || ifr->ifr_mtu > GIF_MTU_MAX) return (EINVAL); else ifp->if_mtu = ifr->ifr_mtu; return (0); } sx_xlock(&gif_ioctl_sx); sc = ifp->if_softc; if (sc == NULL) { error = ENXIO; goto bad; } error = 0; switch (cmd) { case SIOCSIFPHYADDR: #ifdef INET6 case SIOCSIFPHYADDR_IN6: #endif error = EINVAL; switch (cmd) { #ifdef INET case SIOCSIFPHYADDR: src = (struct sockaddr *) &(((struct in_aliasreq *)data)->ifra_addr); dst = (struct sockaddr *) &(((struct in_aliasreq *)data)->ifra_dstaddr); break; #endif #ifdef INET6 case SIOCSIFPHYADDR_IN6: src = (struct sockaddr *) &(((struct in6_aliasreq *)data)->ifra_addr); dst = (struct sockaddr *) &(((struct in6_aliasreq *)data)->ifra_dstaddr); break; #endif default: goto bad; } /* sa_family must be equal */ if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len) goto bad; /* validate sa_len */ /* check sa_family looks sane for the cmd */ switch (src->sa_family) { #ifdef INET case AF_INET: if (src->sa_len != sizeof(struct sockaddr_in)) goto bad; if (cmd != SIOCSIFPHYADDR) { error = EAFNOSUPPORT; goto bad; } if (satosin(src)->sin_addr.s_addr == INADDR_ANY || satosin(dst)->sin_addr.s_addr == INADDR_ANY) { error = EADDRNOTAVAIL; goto bad; } break; #endif #ifdef INET6 case AF_INET6: if (src->sa_len != sizeof(struct sockaddr_in6)) goto bad; if (cmd != SIOCSIFPHYADDR_IN6) { error = EAFNOSUPPORT; goto bad; } error = EADDRNOTAVAIL; if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr) || IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr)) goto bad; /* * Check validity of the scope zone ID of the * addresses, and convert it into the kernel * internal form if necessary. */ error = sa6_embedscope(satosin6(src), 0); if (error != 0) goto bad; error = sa6_embedscope(satosin6(dst), 0); if (error != 0) goto bad; break; #endif default: error = EAFNOSUPPORT; goto bad; } error = gif_set_tunnel(ifp, src, dst); break; case SIOCDIFPHYADDR: gif_delete_tunnel(ifp); break; case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: #ifdef INET6 case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: #endif if (sc->gif_family == 0) { error = EADDRNOTAVAIL; break; } GIF_RLOCK(sc); switch (cmd) { #ifdef INET case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: if (sc->gif_family != AF_INET) { error = EADDRNOTAVAIL; break; } sin = (struct sockaddr_in *)&ifr->ifr_addr; memset(sin, 0, sizeof(*sin)); sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); break; #endif #ifdef INET6 case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: if (sc->gif_family != AF_INET6) { error = EADDRNOTAVAIL; break; } sin6 = (struct sockaddr_in6 *) &(((struct in6_ifreq *)data)->ifr_addr); memset(sin6, 0, sizeof(*sin6)); sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); break; #endif default: error = EAFNOSUPPORT; } if (error == 0) { switch (cmd) { #ifdef INET case SIOCGIFPSRCADDR: sin->sin_addr = sc->gif_iphdr->ip_src; break; case SIOCGIFPDSTADDR: sin->sin_addr = sc->gif_iphdr->ip_dst; break; #endif #ifdef INET6 case SIOCGIFPSRCADDR_IN6: sin6->sin6_addr = sc->gif_ip6hdr->ip6_src; break; case SIOCGIFPDSTADDR_IN6: sin6->sin6_addr = sc->gif_ip6hdr->ip6_dst; break; #endif } } GIF_RUNLOCK(sc); if (error != 0) break; switch (cmd) { #ifdef INET case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: error = prison_if(curthread->td_ucred, (struct sockaddr *)sin); if (error != 0) memset(sin, 0, sizeof(*sin)); break; #endif #ifdef INET6 case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: error = prison_if(curthread->td_ucred, (struct sockaddr *)sin6); if (error == 0) error = sa6_recoverscope(sin6); if (error != 0) memset(sin6, 0, sizeof(*sin6)); #endif } break; case SIOCGTUNFIB: ifr->ifr_fib = sc->gif_fibnum; break; case SIOCSTUNFIB: if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0) break; if (ifr->ifr_fib >= rt_numfibs) error = EINVAL; else sc->gif_fibnum = ifr->ifr_fib; break; case GIFGOPTS: options = sc->gif_options; - error = copyout(&options, ifr->ifr_data, sizeof(options)); + error = copyout(&options, ifr_data_get_ptr(ifr), + sizeof(options)); break; case GIFSOPTS: if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0) break; - error = copyin(ifr->ifr_data, &options, sizeof(options)); + error = copyin(ifr_data_get_ptr(ifr), &options, + sizeof(options)); if (error) break; if (options & ~GIF_OPTMASK) error = EINVAL; else sc->gif_options = options; break; default: error = EINVAL; break; } bad: sx_xunlock(&gif_ioctl_sx); return (error); } static void gif_detach(struct gif_softc *sc) { sx_assert(&gif_ioctl_sx, SA_XLOCKED); if (sc->gif_ecookie != NULL) encap_detach(sc->gif_ecookie); sc->gif_ecookie = NULL; } static int gif_attach(struct gif_softc *sc, int af) { sx_assert(&gif_ioctl_sx, SA_XLOCKED); switch (af) { #ifdef INET case AF_INET: return (in_gif_attach(sc)); #endif #ifdef INET6 case AF_INET6: return (in6_gif_attach(sc)); #endif } return (EAFNOSUPPORT); } static int gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) { struct gif_softc *sc = ifp->if_softc; struct gif_softc *tsc; #ifdef INET struct ip *ip; #endif #ifdef INET6 struct ip6_hdr *ip6; #endif void *hdr; int error = 0; if (sc == NULL) return (ENXIO); /* Disallow parallel tunnels unless instructed otherwise. */ if (V_parallel_tunnels == 0) { GIF_LIST_LOCK(); LIST_FOREACH(tsc, &V_gif_softc_list, gif_list) { if (tsc == sc || tsc->gif_family != src->sa_family) continue; #ifdef INET if (tsc->gif_family == AF_INET && tsc->gif_iphdr->ip_src.s_addr == satosin(src)->sin_addr.s_addr && tsc->gif_iphdr->ip_dst.s_addr == satosin(dst)->sin_addr.s_addr) { error = EADDRNOTAVAIL; GIF_LIST_UNLOCK(); goto bad; } #endif #ifdef INET6 if (tsc->gif_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_src, &satosin6(src)->sin6_addr) && IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_dst, &satosin6(dst)->sin6_addr)) { error = EADDRNOTAVAIL; GIF_LIST_UNLOCK(); goto bad; } #endif } GIF_LIST_UNLOCK(); } switch (src->sa_family) { #ifdef INET case AF_INET: hdr = ip = malloc(sizeof(struct ip), M_GIF, M_WAITOK | M_ZERO); ip->ip_src.s_addr = satosin(src)->sin_addr.s_addr; ip->ip_dst.s_addr = satosin(dst)->sin_addr.s_addr; break; #endif #ifdef INET6 case AF_INET6: hdr = ip6 = malloc(sizeof(struct ip6_hdr), M_GIF, M_WAITOK | M_ZERO); ip6->ip6_src = satosin6(src)->sin6_addr; ip6->ip6_dst = satosin6(dst)->sin6_addr; ip6->ip6_vfc = IPV6_VERSION; break; #endif default: return (EAFNOSUPPORT); } if (sc->gif_family != src->sa_family) gif_detach(sc); if (sc->gif_family == 0 || sc->gif_family != src->sa_family) error = gif_attach(sc, src->sa_family); GIF_WLOCK(sc); if (sc->gif_family != 0) free(sc->gif_hdr, M_GIF); sc->gif_family = src->sa_family; sc->gif_hdr = hdr; GIF_WUNLOCK(sc); #if defined(INET) || defined(INET6) bad: #endif if (error == 0 && sc->gif_family != 0) { ifp->if_drv_flags |= IFF_DRV_RUNNING; if_link_state_change(ifp, LINK_STATE_UP); } else { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; if_link_state_change(ifp, LINK_STATE_DOWN); } return (error); } static void gif_delete_tunnel(struct ifnet *ifp) { struct gif_softc *sc = ifp->if_softc; int family; if (sc == NULL) return; GIF_WLOCK(sc); family = sc->gif_family; sc->gif_family = 0; GIF_WUNLOCK(sc); if (family != 0) { gif_detach(sc); free(sc->gif_hdr, M_GIF); } ifp->if_drv_flags &= ~IFF_DRV_RUNNING; if_link_state_change(ifp, LINK_STATE_DOWN); } Index: head/sys/net/if_gre.c =================================================================== --- head/sys/net/if_gre.c (revision 331796) +++ head/sys/net/if_gre.c (revision 331797) @@ -1,997 +1,999 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1998 The NetBSD Foundation, Inc. * Copyright (c) 2014 Andrey V. Elsukov * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Heiko W.Rupp * * IPv6-over-GRE contributed by Gert Doering * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #include #include #endif #ifdef INET6 #include #include #include #include #endif #include #include #include #include #include #define GREMTU 1476 static const char grename[] = "gre"; static MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation"); static VNET_DEFINE(struct mtx, gre_mtx); #define V_gre_mtx VNET(gre_mtx) #define GRE_LIST_LOCK_INIT(x) mtx_init(&V_gre_mtx, "gre_mtx", NULL, \ MTX_DEF) #define GRE_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gre_mtx) #define GRE_LIST_LOCK(x) mtx_lock(&V_gre_mtx) #define GRE_LIST_UNLOCK(x) mtx_unlock(&V_gre_mtx) static VNET_DEFINE(LIST_HEAD(, gre_softc), gre_softc_list); #define V_gre_softc_list VNET(gre_softc_list) static struct sx gre_ioctl_sx; SX_SYSINIT(gre_ioctl_sx, &gre_ioctl_sx, "gre_ioctl"); static int gre_clone_create(struct if_clone *, int, caddr_t); static void gre_clone_destroy(struct ifnet *); static VNET_DEFINE(struct if_clone *, gre_cloner); #define V_gre_cloner VNET(gre_cloner) static void gre_qflush(struct ifnet *); static int gre_transmit(struct ifnet *, struct mbuf *); static int gre_ioctl(struct ifnet *, u_long, caddr_t); static int gre_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); static void gre_updatehdr(struct gre_softc *); static int gre_set_tunnel(struct ifnet *, struct sockaddr *, struct sockaddr *); static void gre_delete_tunnel(struct ifnet *); SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0, "Generic Routing Encapsulation"); #ifndef MAX_GRE_NEST /* * This macro controls the default upper limitation on nesting of gre tunnels. * Since, setting a large value to this macro with a careless configuration * may introduce system crash, we don't allow any nestings by default. * If you need to configure nested gre tunnels, you can define this macro * in your kernel configuration file. However, if you do so, please be * careful to configure the tunnels so that it won't make a loop. */ #define MAX_GRE_NEST 1 #endif static VNET_DEFINE(int, max_gre_nesting) = MAX_GRE_NEST; #define V_max_gre_nesting VNET(max_gre_nesting) SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(max_gre_nesting), 0, "Max nested tunnels"); static void vnet_gre_init(const void *unused __unused) { LIST_INIT(&V_gre_softc_list); GRE_LIST_LOCK_INIT(); V_gre_cloner = if_clone_simple(grename, gre_clone_create, gre_clone_destroy, 0); } VNET_SYSINIT(vnet_gre_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_gre_init, NULL); static void vnet_gre_uninit(const void *unused __unused) { if_clone_detach(V_gre_cloner); GRE_LIST_LOCK_DESTROY(); } VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_gre_uninit, NULL); static int gre_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct gre_softc *sc; sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO); sc->gre_fibnum = curthread->td_proc->p_fibnum; GRE2IFP(sc) = if_alloc(IFT_TUNNEL); GRE_LOCK_INIT(sc); GRE2IFP(sc)->if_softc = sc; if_initname(GRE2IFP(sc), grename, unit); GRE2IFP(sc)->if_mtu = GREMTU; GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST; GRE2IFP(sc)->if_output = gre_output; GRE2IFP(sc)->if_ioctl = gre_ioctl; GRE2IFP(sc)->if_transmit = gre_transmit; GRE2IFP(sc)->if_qflush = gre_qflush; GRE2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE; GRE2IFP(sc)->if_capenable |= IFCAP_LINKSTATE; if_attach(GRE2IFP(sc)); bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t)); GRE_LIST_LOCK(); LIST_INSERT_HEAD(&V_gre_softc_list, sc, gre_list); GRE_LIST_UNLOCK(); return (0); } static void gre_clone_destroy(struct ifnet *ifp) { struct gre_softc *sc; sx_xlock(&gre_ioctl_sx); sc = ifp->if_softc; gre_delete_tunnel(ifp); GRE_LIST_LOCK(); LIST_REMOVE(sc, gre_list); GRE_LIST_UNLOCK(); bpfdetach(ifp); if_detach(ifp); ifp->if_softc = NULL; sx_xunlock(&gre_ioctl_sx); if_free(ifp); GRE_LOCK_DESTROY(sc); free(sc, M_GRE); } static int gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { GRE_RLOCK_TRACKER; struct ifreq *ifr = (struct ifreq *)data; struct sockaddr *src, *dst; struct gre_softc *sc; #ifdef INET struct sockaddr_in *sin = NULL; #endif #ifdef INET6 struct sockaddr_in6 *sin6 = NULL; #endif uint32_t opt; int error; switch (cmd) { case SIOCSIFMTU: /* XXX: */ if (ifr->ifr_mtu < 576) return (EINVAL); ifp->if_mtu = ifr->ifr_mtu; return (0); case SIOCSIFADDR: ifp->if_flags |= IFF_UP; case SIOCSIFFLAGS: case SIOCADDMULTI: case SIOCDELMULTI: return (0); case GRESADDRS: case GRESADDRD: case GREGADDRS: case GREGADDRD: case GRESPROTO: case GREGPROTO: return (EOPNOTSUPP); } src = dst = NULL; sx_xlock(&gre_ioctl_sx); sc = ifp->if_softc; if (sc == NULL) { error = ENXIO; goto end; } error = 0; switch (cmd) { case SIOCSIFPHYADDR: #ifdef INET6 case SIOCSIFPHYADDR_IN6: #endif error = EINVAL; switch (cmd) { #ifdef INET case SIOCSIFPHYADDR: src = (struct sockaddr *) &(((struct in_aliasreq *)data)->ifra_addr); dst = (struct sockaddr *) &(((struct in_aliasreq *)data)->ifra_dstaddr); break; #endif #ifdef INET6 case SIOCSIFPHYADDR_IN6: src = (struct sockaddr *) &(((struct in6_aliasreq *)data)->ifra_addr); dst = (struct sockaddr *) &(((struct in6_aliasreq *)data)->ifra_dstaddr); break; #endif default: error = EAFNOSUPPORT; goto end; } /* sa_family must be equal */ if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len) goto end; /* validate sa_len */ switch (src->sa_family) { #ifdef INET case AF_INET: if (src->sa_len != sizeof(struct sockaddr_in)) goto end; break; #endif #ifdef INET6 case AF_INET6: if (src->sa_len != sizeof(struct sockaddr_in6)) goto end; break; #endif default: error = EAFNOSUPPORT; goto end; } /* check sa_family looks sane for the cmd */ error = EAFNOSUPPORT; switch (cmd) { #ifdef INET case SIOCSIFPHYADDR: if (src->sa_family == AF_INET) break; goto end; #endif #ifdef INET6 case SIOCSIFPHYADDR_IN6: if (src->sa_family == AF_INET6) break; goto end; #endif } error = EADDRNOTAVAIL; switch (src->sa_family) { #ifdef INET case AF_INET: if (satosin(src)->sin_addr.s_addr == INADDR_ANY || satosin(dst)->sin_addr.s_addr == INADDR_ANY) goto end; break; #endif #ifdef INET6 case AF_INET6: if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr) || IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr)) goto end; /* * Check validity of the scope zone ID of the * addresses, and convert it into the kernel * internal form if necessary. */ error = sa6_embedscope(satosin6(src), 0); if (error != 0) goto end; error = sa6_embedscope(satosin6(dst), 0); if (error != 0) goto end; #endif } error = gre_set_tunnel(ifp, src, dst); break; case SIOCDIFPHYADDR: gre_delete_tunnel(ifp); break; case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: #ifdef INET6 case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: #endif if (sc->gre_family == 0) { error = EADDRNOTAVAIL; break; } GRE_RLOCK(sc); switch (cmd) { #ifdef INET case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: if (sc->gre_family != AF_INET) { error = EADDRNOTAVAIL; break; } sin = (struct sockaddr_in *)&ifr->ifr_addr; memset(sin, 0, sizeof(*sin)); sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); break; #endif #ifdef INET6 case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: if (sc->gre_family != AF_INET6) { error = EADDRNOTAVAIL; break; } sin6 = (struct sockaddr_in6 *) &(((struct in6_ifreq *)data)->ifr_addr); memset(sin6, 0, sizeof(*sin6)); sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); break; #endif } if (error == 0) { switch (cmd) { #ifdef INET case SIOCGIFPSRCADDR: sin->sin_addr = sc->gre_oip.ip_src; break; case SIOCGIFPDSTADDR: sin->sin_addr = sc->gre_oip.ip_dst; break; #endif #ifdef INET6 case SIOCGIFPSRCADDR_IN6: sin6->sin6_addr = sc->gre_oip6.ip6_src; break; case SIOCGIFPDSTADDR_IN6: sin6->sin6_addr = sc->gre_oip6.ip6_dst; break; #endif } } GRE_RUNLOCK(sc); if (error != 0) break; switch (cmd) { #ifdef INET case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: error = prison_if(curthread->td_ucred, (struct sockaddr *)sin); if (error != 0) memset(sin, 0, sizeof(*sin)); break; #endif #ifdef INET6 case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: error = prison_if(curthread->td_ucred, (struct sockaddr *)sin6); if (error == 0) error = sa6_recoverscope(sin6); if (error != 0) memset(sin6, 0, sizeof(*sin6)); #endif } break; case SIOCGTUNFIB: ifr->ifr_fib = sc->gre_fibnum; break; case SIOCSTUNFIB: if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) break; if (ifr->ifr_fib >= rt_numfibs) error = EINVAL; else sc->gre_fibnum = ifr->ifr_fib; break; case GRESKEY: if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) break; - if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0) + if ((error = copyin(ifr_data_get_ptr(ifr), &opt, + sizeof(opt))) != 0) break; if (sc->gre_key != opt) { GRE_WLOCK(sc); sc->gre_key = opt; gre_updatehdr(sc); GRE_WUNLOCK(sc); } break; case GREGKEY: - error = copyout(&sc->gre_key, ifr->ifr_data, + error = copyout(&sc->gre_key, ifr_data_get_ptr(ifr), sizeof(sc->gre_key)); break; case GRESOPTS: if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) break; - if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0) + if ((error = copyin(ifr_data_get_ptr(ifr), &opt, + sizeof(opt))) != 0) break; if (opt & ~GRE_OPTMASK) error = EINVAL; else { if (sc->gre_options != opt) { GRE_WLOCK(sc); sc->gre_options = opt; gre_updatehdr(sc); GRE_WUNLOCK(sc); } } break; case GREGOPTS: - error = copyout(&sc->gre_options, ifr->ifr_data, + error = copyout(&sc->gre_options, ifr_data_get_ptr(ifr), sizeof(sc->gre_options)); break; default: error = EINVAL; break; } end: sx_xunlock(&gre_ioctl_sx); return (error); } static void gre_updatehdr(struct gre_softc *sc) { struct grehdr *gh = NULL; uint32_t *opts; uint16_t flags; GRE_WLOCK_ASSERT(sc); switch (sc->gre_family) { #ifdef INET case AF_INET: sc->gre_hlen = sizeof(struct greip); sc->gre_oip.ip_v = IPPROTO_IPV4; sc->gre_oip.ip_hl = sizeof(struct ip) >> 2; sc->gre_oip.ip_p = IPPROTO_GRE; gh = &sc->gre_gihdr->gi_gre; break; #endif #ifdef INET6 case AF_INET6: sc->gre_hlen = sizeof(struct greip6); sc->gre_oip6.ip6_vfc = IPV6_VERSION; sc->gre_oip6.ip6_nxt = IPPROTO_GRE; gh = &sc->gre_gi6hdr->gi6_gre; break; #endif default: return; } flags = 0; opts = gh->gre_opts; if (sc->gre_options & GRE_ENABLE_CSUM) { flags |= GRE_FLAGS_CP; sc->gre_hlen += 2 * sizeof(uint16_t); *opts++ = 0; } if (sc->gre_key != 0) { flags |= GRE_FLAGS_KP; sc->gre_hlen += sizeof(uint32_t); *opts++ = htonl(sc->gre_key); } if (sc->gre_options & GRE_ENABLE_SEQ) { flags |= GRE_FLAGS_SP; sc->gre_hlen += sizeof(uint32_t); *opts++ = 0; } else sc->gre_oseq = 0; gh->gre_flags = htons(flags); } static void gre_detach(struct gre_softc *sc) { sx_assert(&gre_ioctl_sx, SA_XLOCKED); if (sc->gre_ecookie != NULL) encap_detach(sc->gre_ecookie); sc->gre_ecookie = NULL; } static int gre_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) { struct gre_softc *sc, *tsc; #ifdef INET6 struct ip6_hdr *ip6; #endif #ifdef INET struct ip *ip; #endif void *hdr; int error; sx_assert(&gre_ioctl_sx, SA_XLOCKED); GRE_LIST_LOCK(); sc = ifp->if_softc; LIST_FOREACH(tsc, &V_gre_softc_list, gre_list) { if (tsc == sc || tsc->gre_family != src->sa_family) continue; #ifdef INET if (tsc->gre_family == AF_INET && tsc->gre_oip.ip_src.s_addr == satosin(src)->sin_addr.s_addr && tsc->gre_oip.ip_dst.s_addr == satosin(dst)->sin_addr.s_addr) { GRE_LIST_UNLOCK(); return (EADDRNOTAVAIL); } #endif #ifdef INET6 if (tsc->gre_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_src, &satosin6(src)->sin6_addr) && IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_dst, &satosin6(dst)->sin6_addr)) { GRE_LIST_UNLOCK(); return (EADDRNOTAVAIL); } #endif } GRE_LIST_UNLOCK(); switch (src->sa_family) { #ifdef INET case AF_INET: hdr = ip = malloc(sizeof(struct greip) + 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO); ip->ip_src = satosin(src)->sin_addr; ip->ip_dst = satosin(dst)->sin_addr; break; #endif #ifdef INET6 case AF_INET6: hdr = ip6 = malloc(sizeof(struct greip6) + 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO); ip6->ip6_src = satosin6(src)->sin6_addr; ip6->ip6_dst = satosin6(dst)->sin6_addr; break; #endif default: return (EAFNOSUPPORT); } if (sc->gre_family != 0) gre_detach(sc); GRE_WLOCK(sc); if (sc->gre_family != 0) free(sc->gre_hdr, M_GRE); sc->gre_family = src->sa_family; sc->gre_hdr = hdr; sc->gre_oseq = 0; sc->gre_iseq = UINT32_MAX; gre_updatehdr(sc); GRE_WUNLOCK(sc); error = 0; switch (src->sa_family) { #ifdef INET case AF_INET: error = in_gre_attach(sc); break; #endif #ifdef INET6 case AF_INET6: error = in6_gre_attach(sc); break; #endif } if (error == 0) { ifp->if_drv_flags |= IFF_DRV_RUNNING; if_link_state_change(ifp, LINK_STATE_UP); } return (error); } static void gre_delete_tunnel(struct ifnet *ifp) { struct gre_softc *sc = ifp->if_softc; int family; GRE_WLOCK(sc); family = sc->gre_family; sc->gre_family = 0; GRE_WUNLOCK(sc); if (family != 0) { gre_detach(sc); free(sc->gre_hdr, M_GRE); } ifp->if_drv_flags &= ~IFF_DRV_RUNNING; if_link_state_change(ifp, LINK_STATE_DOWN); } int gre_input(struct mbuf **mp, int *offp, int proto) { struct gre_softc *sc; struct grehdr *gh; struct ifnet *ifp; struct mbuf *m; uint32_t *opts; #ifdef notyet uint32_t key; #endif uint16_t flags; int hlen, isr, af; m = *mp; sc = encap_getarg(m); KASSERT(sc != NULL, ("encap_getarg returned NULL")); ifp = GRE2IFP(sc); hlen = *offp + sizeof(struct grehdr) + 4 * sizeof(uint32_t); if (m->m_pkthdr.len < hlen) goto drop; if (m->m_len < hlen) { m = m_pullup(m, hlen); if (m == NULL) goto drop; } gh = (struct grehdr *)mtodo(m, *offp); flags = ntohs(gh->gre_flags); if (flags & ~GRE_FLAGS_MASK) goto drop; opts = gh->gre_opts; hlen = 2 * sizeof(uint16_t); if (flags & GRE_FLAGS_CP) { /* reserved1 field must be zero */ if (((uint16_t *)opts)[1] != 0) goto drop; if (in_cksum_skip(m, m->m_pkthdr.len, *offp) != 0) goto drop; hlen += 2 * sizeof(uint16_t); opts++; } if (flags & GRE_FLAGS_KP) { #ifdef notyet /* * XXX: The current implementation uses the key only for outgoing * packets. But we can check the key value here, or even in the * encapcheck function. */ key = ntohl(*opts); #endif hlen += sizeof(uint32_t); opts++; } #ifdef notyet } else key = 0; if (sc->gre_key != 0 && (key != sc->gre_key || key != 0)) goto drop; #endif if (flags & GRE_FLAGS_SP) { #ifdef notyet seq = ntohl(*opts); #endif hlen += sizeof(uint32_t); } switch (ntohs(gh->gre_proto)) { case ETHERTYPE_WCCP: /* * For WCCP skip an additional 4 bytes if after GRE header * doesn't follow an IP header. */ if (flags == 0 && (*(uint8_t *)gh->gre_opts & 0xF0) != 0x40) hlen += sizeof(uint32_t); /* FALLTHROUGH */ case ETHERTYPE_IP: isr = NETISR_IP; af = AF_INET; break; case ETHERTYPE_IPV6: isr = NETISR_IPV6; af = AF_INET6; break; default: goto drop; } m_adj(m, *offp + hlen); m_clrprotoflags(m); m->m_pkthdr.rcvif = ifp; M_SETFIB(m, ifp->if_fib); #ifdef MAC mac_ifnet_create_mbuf(ifp, m); #endif BPF_MTAP2(ifp, &af, sizeof(af), m); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); if ((ifp->if_flags & IFF_MONITOR) != 0) m_freem(m); else netisr_dispatch(isr, m); return (IPPROTO_DONE); drop: if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); m_freem(m); return (IPPROTO_DONE); } #define MTAG_GRE 1307983903 static int gre_check_nesting(struct ifnet *ifp, struct mbuf *m) { struct m_tag *mtag; int count; count = 1; mtag = NULL; while ((mtag = m_tag_locate(m, MTAG_GRE, 0, mtag)) != NULL) { if (*(struct ifnet **)(mtag + 1) == ifp) { log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname); return (EIO); } count++; } if (count > V_max_gre_nesting) { log(LOG_NOTICE, "%s: if_output recursively called too many times(%d)\n", ifp->if_xname, count); return (EIO); } mtag = m_tag_alloc(MTAG_GRE, 0, sizeof(struct ifnet *), M_NOWAIT); if (mtag == NULL) return (ENOMEM); *(struct ifnet **)(mtag + 1) = ifp; m_tag_prepend(m, mtag); return (0); } static int gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { uint32_t af; int error; #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error != 0) goto drop; #endif if ((ifp->if_flags & IFF_MONITOR) != 0 || (ifp->if_flags & IFF_UP) == 0) { error = ENETDOWN; goto drop; } error = gre_check_nesting(ifp, m); if (error != 0) goto drop; m->m_flags &= ~(M_BCAST|M_MCAST); if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); else af = dst->sa_family; BPF_MTAP2(ifp, &af, sizeof(af), m); m->m_pkthdr.csum_data = af; /* save af for if_transmit */ return (ifp->if_transmit(ifp, m)); drop: m_freem(m); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (error); } static void gre_setseqn(struct grehdr *gh, uint32_t seq) { uint32_t *opts; uint16_t flags; opts = gh->gre_opts; flags = ntohs(gh->gre_flags); KASSERT((flags & GRE_FLAGS_SP) != 0, ("gre_setseqn called, but GRE_FLAGS_SP isn't set ")); if (flags & GRE_FLAGS_CP) opts++; if (flags & GRE_FLAGS_KP) opts++; *opts = htonl(seq); } static int gre_transmit(struct ifnet *ifp, struct mbuf *m) { GRE_RLOCK_TRACKER; struct gre_softc *sc; struct grehdr *gh; uint32_t iaf, oaf, oseq; int error, hlen, olen, plen; int want_seq, want_csum; plen = 0; sc = ifp->if_softc; if (sc == NULL) { error = ENETDOWN; m_freem(m); goto drop; } GRE_RLOCK(sc); if (sc->gre_family == 0) { GRE_RUNLOCK(sc); error = ENETDOWN; m_freem(m); goto drop; } iaf = m->m_pkthdr.csum_data; oaf = sc->gre_family; hlen = sc->gre_hlen; want_seq = (sc->gre_options & GRE_ENABLE_SEQ) != 0; if (want_seq) oseq = sc->gre_oseq++; /* XXX */ else oseq = 0; /* Make compiler happy. */ want_csum = (sc->gre_options & GRE_ENABLE_CSUM) != 0; M_SETFIB(m, sc->gre_fibnum); M_PREPEND(m, hlen, M_NOWAIT); if (m == NULL) { GRE_RUNLOCK(sc); error = ENOBUFS; goto drop; } bcopy(sc->gre_hdr, mtod(m, void *), hlen); GRE_RUNLOCK(sc); switch (oaf) { #ifdef INET case AF_INET: olen = sizeof(struct ip); break; #endif #ifdef INET6 case AF_INET6: olen = sizeof(struct ip6_hdr); break; #endif default: error = ENETDOWN; goto drop; } gh = (struct grehdr *)mtodo(m, olen); switch (iaf) { #ifdef INET case AF_INET: gh->gre_proto = htons(ETHERTYPE_IP); break; #endif #ifdef INET6 case AF_INET6: gh->gre_proto = htons(ETHERTYPE_IPV6); break; #endif default: error = ENETDOWN; goto drop; } if (want_seq) gre_setseqn(gh, oseq); if (want_csum) { *(uint16_t *)gh->gre_opts = in_cksum_skip(m, m->m_pkthdr.len, olen); } plen = m->m_pkthdr.len - hlen; switch (oaf) { #ifdef INET case AF_INET: error = in_gre_output(m, iaf, hlen); break; #endif #ifdef INET6 case AF_INET6: error = in6_gre_output(m, iaf, hlen); break; #endif default: m_freem(m); error = ENETDOWN; } drop: if (error) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); else { if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, plen); } return (error); } static void gre_qflush(struct ifnet *ifp __unused) { } static int gremodevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: case MOD_UNLOAD: break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t gre_mod = { "if_gre", gremodevent, 0 }; DECLARE_MODULE(if_gre, gre_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_gre, 1); Index: head/sys/net/if_ipsec.c =================================================================== --- head/sys/net/if_ipsec.c (revision 331796) +++ head/sys/net/if_ipsec.c (revision 331797) @@ -1,1002 +1,1002 @@ /*- * Copyright (c) 2016 Yandex LLC * Copyright (c) 2016 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include static MALLOC_DEFINE(M_IPSEC, "ipsec", "IPsec Virtual Tunnel Interface"); static const char ipsecname[] = "ipsec"; #if defined(INET) && defined(INET6) #define IPSEC_SPCOUNT 4 #else #define IPSEC_SPCOUNT 2 #endif struct ipsec_softc { struct ifnet *ifp; struct rmlock lock; struct secpolicy *sp[IPSEC_SPCOUNT]; uint32_t reqid; u_int family; u_int fibnum; LIST_ENTRY(ipsec_softc) chain; LIST_ENTRY(ipsec_softc) hash; }; #define IPSEC_LOCK_INIT(sc) rm_init(&(sc)->lock, "if_ipsec softc") #define IPSEC_LOCK_DESTROY(sc) rm_destroy(&(sc)->lock) #define IPSEC_RLOCK_TRACKER struct rm_priotracker ipsec_tracker #define IPSEC_RLOCK(sc) rm_rlock(&(sc)->lock, &ipsec_tracker) #define IPSEC_RUNLOCK(sc) rm_runlock(&(sc)->lock, &ipsec_tracker) #define IPSEC_RLOCK_ASSERT(sc) rm_assert(&(sc)->lock, RA_RLOCKED) #define IPSEC_WLOCK(sc) rm_wlock(&(sc)->lock) #define IPSEC_WUNLOCK(sc) rm_wunlock(&(sc)->lock) #define IPSEC_WLOCK_ASSERT(sc) rm_assert(&(sc)->lock, RA_WLOCKED) static struct rmlock ipsec_sc_lock; RM_SYSINIT(ipsec_sc_lock, &ipsec_sc_lock, "if_ipsec softc list"); #define IPSEC_SC_RLOCK_TRACKER struct rm_priotracker ipsec_sc_tracker #define IPSEC_SC_RLOCK() rm_rlock(&ipsec_sc_lock, &ipsec_sc_tracker) #define IPSEC_SC_RUNLOCK() rm_runlock(&ipsec_sc_lock, &ipsec_sc_tracker) #define IPSEC_SC_RLOCK_ASSERT() rm_assert(&ipsec_sc_lock, RA_RLOCKED) #define IPSEC_SC_WLOCK() rm_wlock(&ipsec_sc_lock) #define IPSEC_SC_WUNLOCK() rm_wunlock(&ipsec_sc_lock) #define IPSEC_SC_WLOCK_ASSERT() rm_assert(&ipsec_sc_lock, RA_WLOCKED) LIST_HEAD(ipsec_iflist, ipsec_softc); static VNET_DEFINE(struct ipsec_iflist, ipsec_sc_list); static VNET_DEFINE(struct ipsec_iflist *, ipsec_sc_htbl); static VNET_DEFINE(u_long, ipsec_sc_hmask); #define V_ipsec_sc_list VNET(ipsec_sc_list) #define V_ipsec_sc_htbl VNET(ipsec_sc_htbl) #define V_ipsec_sc_hmask VNET(ipsec_sc_hmask) static uint32_t ipsec_hash(uint32_t id) { return (fnv_32_buf(&id, sizeof(id), FNV1_32_INIT)); } #define SCHASH_NHASH_LOG2 5 #define SCHASH_NHASH (1 << SCHASH_NHASH_LOG2) #define SCHASH_HASHVAL(id) (ipsec_hash((id)) & V_ipsec_sc_hmask) #define SCHASH_HASH(id) &V_ipsec_sc_htbl[SCHASH_HASHVAL(id)] /* * ipsec_ioctl_sx protects from concurrent ioctls. */ static struct sx ipsec_ioctl_sx; SX_SYSINIT(ipsec_ioctl_sx, &ipsec_ioctl_sx, "ipsec_ioctl"); static int ipsec_init_reqid(struct ipsec_softc *); static int ipsec_set_tunnel(struct ipsec_softc *, struct sockaddr *, struct sockaddr *, uint32_t); static void ipsec_delete_tunnel(struct ifnet *, int); static int ipsec_set_addresses(struct ifnet *, struct sockaddr *, struct sockaddr *); static int ipsec_set_reqid(struct ifnet *, uint32_t); static int ipsec_ioctl(struct ifnet *, u_long, caddr_t); static int ipsec_transmit(struct ifnet *, struct mbuf *); static int ipsec_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); static void ipsec_qflush(struct ifnet *); static int ipsec_clone_create(struct if_clone *, int, caddr_t); static void ipsec_clone_destroy(struct ifnet *); static VNET_DEFINE(struct if_clone *, ipsec_cloner); #define V_ipsec_cloner VNET(ipsec_cloner) static int ipsec_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct ipsec_softc *sc; struct ifnet *ifp; sc = malloc(sizeof(*sc), M_IPSEC, M_WAITOK | M_ZERO); sc->fibnum = curthread->td_proc->p_fibnum; sc->ifp = ifp = if_alloc(IFT_TUNNEL); IPSEC_LOCK_INIT(sc); ifp->if_softc = sc; if_initname(ifp, ipsecname, unit); ifp->if_addrlen = 0; ifp->if_mtu = IPSEC_MTU; ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; ifp->if_ioctl = ipsec_ioctl; ifp->if_transmit = ipsec_transmit; ifp->if_qflush = ipsec_qflush; ifp->if_output = ipsec_output; if_attach(ifp); bpfattach(ifp, DLT_NULL, sizeof(uint32_t)); IPSEC_SC_WLOCK(); LIST_INSERT_HEAD(&V_ipsec_sc_list, sc, chain); IPSEC_SC_WUNLOCK(); return (0); } static void ipsec_clone_destroy(struct ifnet *ifp) { struct ipsec_softc *sc; sx_xlock(&ipsec_ioctl_sx); sc = ifp->if_softc; IPSEC_SC_WLOCK(); ipsec_delete_tunnel(ifp, 1); LIST_REMOVE(sc, chain); IPSEC_SC_WUNLOCK(); bpfdetach(ifp); if_detach(ifp); ifp->if_softc = NULL; sx_xunlock(&ipsec_ioctl_sx); if_free(ifp); IPSEC_LOCK_DESTROY(sc); free(sc, M_IPSEC); } static void vnet_ipsec_init(const void *unused __unused) { LIST_INIT(&V_ipsec_sc_list); V_ipsec_sc_htbl = hashinit(SCHASH_NHASH, M_IPSEC, &V_ipsec_sc_hmask); V_ipsec_cloner = if_clone_simple(ipsecname, ipsec_clone_create, ipsec_clone_destroy, 0); } VNET_SYSINIT(vnet_ipsec_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_ipsec_init, NULL); static void vnet_ipsec_uninit(const void *unused __unused) { if_clone_detach(V_ipsec_cloner); hashdestroy(V_ipsec_sc_htbl, M_IPSEC, V_ipsec_sc_hmask); } VNET_SYSUNINIT(vnet_ipsec_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_ipsec_uninit, NULL); static struct secpolicy * ipsec_getpolicy(struct ipsec_softc *sc, int dir, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: return (sc->sp[(dir == IPSEC_DIR_INBOUND ? 0: 1)]); #endif #ifdef INET6 case AF_INET6: return (sc->sp[(dir == IPSEC_DIR_INBOUND ? 0: 1) #ifdef INET + 2 #endif ]); #endif } return (NULL); } static struct secasindex * ipsec_getsaidx(struct ipsec_softc *sc, int dir, sa_family_t af) { struct secpolicy *sp; sp = ipsec_getpolicy(sc, dir, af); if (sp == NULL) return (NULL); return (&sp->req[0]->saidx); } static int ipsec_transmit(struct ifnet *ifp, struct mbuf *m) { IPSEC_RLOCK_TRACKER; struct ipsec_softc *sc; struct secpolicy *sp; struct ip *ip; uint32_t af; int error; #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) { m_freem(m); goto err; } #endif error = ENETDOWN; sc = ifp->if_softc; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (ifp->if_flags & IFF_MONITOR) != 0 || (ifp->if_flags & IFF_UP) == 0) { m_freem(m); goto err; } /* Determine address family to correctly handle packet in BPF */ ip = mtod(m, struct ip *); switch (ip->ip_v) { #ifdef INET case IPVERSION: af = AF_INET; break; #endif #ifdef INET6 case (IPV6_VERSION >> 4): af = AF_INET6; break; #endif default: error = EAFNOSUPPORT; m_freem(m); goto err; } /* * Loop prevention. * XXX: for now just check presence of IPSEC_OUT_DONE mbuf tag. * We can read full chain and compare destination address, * proto and mode from xform_history with values from softc. */ if (m_tag_find(m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) { m_freem(m); goto err; } IPSEC_RLOCK(sc); if (sc->family == 0) { IPSEC_RUNLOCK(sc); m_freem(m); goto err; } sp = ipsec_getpolicy(sc, IPSEC_DIR_OUTBOUND, af); key_addref(sp); M_SETFIB(m, sc->fibnum); IPSEC_RUNLOCK(sc); BPF_MTAP2(ifp, &af, sizeof(af), m); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); switch (af) { #ifdef INET case AF_INET: error = ipsec4_process_packet(m, sp, NULL); break; #endif #ifdef INET6 case AF_INET6: error = ipsec6_process_packet(m, sp, NULL); break; #endif default: panic("%s: unknown address family\n", __func__); } err: if (error != 0) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (error); } static void ipsec_qflush(struct ifnet *ifp __unused) { } static int ipsec_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { return (ifp->if_transmit(ifp, m)); } int ipsec_if_input(struct mbuf *m, struct secasvar *sav, uint32_t af) { IPSEC_SC_RLOCK_TRACKER; struct secasindex *saidx; struct ipsec_softc *sc; struct ifnet *ifp; if (sav->state != SADB_SASTATE_MATURE && sav->state != SADB_SASTATE_DYING) { m_freem(m); return (ENETDOWN); } if (sav->sah->saidx.mode != IPSEC_MODE_TUNNEL || sav->sah->saidx.proto != IPPROTO_ESP) return (0); IPSEC_SC_RLOCK(); /* * We only acquire SC_RLOCK() while we are doing search in * ipsec_sc_htbl. It is safe, because removing softc or changing * of reqid/addresses requires removing from hash table. */ LIST_FOREACH(sc, SCHASH_HASH(sav->sah->saidx.reqid), hash) { saidx = ipsec_getsaidx(sc, IPSEC_DIR_INBOUND, sav->sah->saidx.src.sa.sa_family); /* SA's reqid should match reqid in SP */ if (saidx == NULL || sav->sah->saidx.reqid != saidx->reqid) continue; /* SAH's addresses should match tunnel endpoints. */ if (key_sockaddrcmp(&sav->sah->saidx.dst.sa, &saidx->dst.sa, 0) != 0) continue; if (key_sockaddrcmp(&sav->sah->saidx.src.sa, &saidx->src.sa, 0) == 0) break; } if (sc == NULL) { IPSEC_SC_RUNLOCK(); /* Tunnel was not found. Nothing to do. */ return (0); } ifp = sc->ifp; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (ifp->if_flags & IFF_UP) == 0) { IPSEC_SC_RUNLOCK(); m_freem(m); return (ENETDOWN); } /* * We found matching and working tunnel. * Set its ifnet as receiving interface. */ m->m_pkthdr.rcvif = ifp; IPSEC_SC_RUNLOCK(); /* m_clrprotoflags(m); */ M_SETFIB(m, ifp->if_fib); BPF_MTAP2(ifp, &af, sizeof(af), m); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); if ((ifp->if_flags & IFF_MONITOR) != 0) { m_freem(m); return (ENETDOWN); } return (0); } /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */ int ipsec_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { IPSEC_RLOCK_TRACKER; struct ifreq *ifr = (struct ifreq*)data; struct sockaddr *dst, *src; struct ipsec_softc *sc; struct secasindex *saidx; #ifdef INET struct sockaddr_in *sin = NULL; #endif #ifdef INET6 struct sockaddr_in6 *sin6 = NULL; #endif uint32_t reqid; int error; switch (cmd) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; case SIOCADDMULTI: case SIOCDELMULTI: case SIOCGIFMTU: case SIOCSIFFLAGS: return (0); case SIOCSIFMTU: if (ifr->ifr_mtu < IPSEC_MTU_MIN || ifr->ifr_mtu > IPSEC_MTU_MAX) return (EINVAL); else ifp->if_mtu = ifr->ifr_mtu; return (0); } sx_xlock(&ipsec_ioctl_sx); sc = ifp->if_softc; /* Check that softc is still here */ if (sc == NULL) { error = ENXIO; goto bad; } error = 0; switch (cmd) { case SIOCSIFPHYADDR: #ifdef INET6 case SIOCSIFPHYADDR_IN6: #endif error = EINVAL; switch (cmd) { #ifdef INET case SIOCSIFPHYADDR: src = (struct sockaddr *) &(((struct in_aliasreq *)data)->ifra_addr); dst = (struct sockaddr *) &(((struct in_aliasreq *)data)->ifra_dstaddr); break; #endif #ifdef INET6 case SIOCSIFPHYADDR_IN6: src = (struct sockaddr *) &(((struct in6_aliasreq *)data)->ifra_addr); dst = (struct sockaddr *) &(((struct in6_aliasreq *)data)->ifra_dstaddr); break; #endif default: goto bad; } /* sa_family must be equal */ if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len) goto bad; /* validate sa_len */ switch (src->sa_family) { #ifdef INET case AF_INET: if (src->sa_len != sizeof(struct sockaddr_in)) goto bad; break; #endif #ifdef INET6 case AF_INET6: if (src->sa_len != sizeof(struct sockaddr_in6)) goto bad; break; #endif default: error = EAFNOSUPPORT; goto bad; } /* check sa_family looks sane for the cmd */ error = EAFNOSUPPORT; switch (cmd) { #ifdef INET case SIOCSIFPHYADDR: if (src->sa_family == AF_INET) break; goto bad; #endif #ifdef INET6 case SIOCSIFPHYADDR_IN6: if (src->sa_family == AF_INET6) break; goto bad; #endif } error = EADDRNOTAVAIL; switch (src->sa_family) { #ifdef INET case AF_INET: if (satosin(src)->sin_addr.s_addr == INADDR_ANY || satosin(dst)->sin_addr.s_addr == INADDR_ANY) goto bad; break; #endif #ifdef INET6 case AF_INET6: if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr) || IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr)) goto bad; /* * Check validity of the scope zone ID of the * addresses, and convert it into the kernel * internal form if necessary. */ error = sa6_embedscope(satosin6(src), 0); if (error != 0) goto bad; error = sa6_embedscope(satosin6(dst), 0); if (error != 0) goto bad; #endif }; error = ipsec_set_addresses(ifp, src, dst); break; case SIOCDIFPHYADDR: ipsec_delete_tunnel(ifp, 0); break; case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: #ifdef INET6 case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: #endif IPSEC_RLOCK(sc); if (sc->family == 0) { IPSEC_RUNLOCK(sc); error = EADDRNOTAVAIL; break; } saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, sc->family); switch (cmd) { #ifdef INET case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: if (saidx->src.sa.sa_family != AF_INET) { error = EADDRNOTAVAIL; break; } sin = (struct sockaddr_in *)&ifr->ifr_addr; memset(sin, 0, sizeof(*sin)); sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); break; #endif #ifdef INET6 case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: if (saidx->src.sa.sa_family != AF_INET6) { error = EADDRNOTAVAIL; break; } sin6 = (struct sockaddr_in6 *) &(((struct in6_ifreq *)data)->ifr_addr); memset(sin6, 0, sizeof(*sin6)); sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); break; #endif default: error = EAFNOSUPPORT; } if (error == 0) { switch (cmd) { #ifdef INET case SIOCGIFPSRCADDR: sin->sin_addr = saidx->src.sin.sin_addr; break; case SIOCGIFPDSTADDR: sin->sin_addr = saidx->dst.sin.sin_addr; break; #endif #ifdef INET6 case SIOCGIFPSRCADDR_IN6: sin6->sin6_addr = saidx->src.sin6.sin6_addr; break; case SIOCGIFPDSTADDR_IN6: sin6->sin6_addr = saidx->dst.sin6.sin6_addr; break; #endif } } IPSEC_RUNLOCK(sc); if (error != 0) break; switch (cmd) { #ifdef INET case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: error = prison_if(curthread->td_ucred, (struct sockaddr *)sin); if (error != 0) memset(sin, 0, sizeof(*sin)); break; #endif #ifdef INET6 case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: error = prison_if(curthread->td_ucred, (struct sockaddr *)sin6); if (error == 0) error = sa6_recoverscope(sin6); if (error != 0) memset(sin6, 0, sizeof(*sin6)); #endif } break; case SIOCGTUNFIB: ifr->ifr_fib = sc->fibnum; break; case SIOCSTUNFIB: if ((error = priv_check(curthread, PRIV_NET_SETIFFIB)) != 0) break; if (ifr->ifr_fib >= rt_numfibs) error = EINVAL; else sc->fibnum = ifr->ifr_fib; break; case IPSECGREQID: reqid = sc->reqid; - error = copyout(&reqid, ifr->ifr_data, sizeof(reqid)); + error = copyout(&reqid, ifr_data_get_ptr(ifr), sizeof(reqid)); break; case IPSECSREQID: if ((error = priv_check(curthread, PRIV_NET_SETIFCAP)) != 0) break; - error = copyin(ifr->ifr_data, &reqid, sizeof(reqid)); + error = copyin(ifr_data_get_ptr(ifr), &reqid, sizeof(reqid)); if (error != 0) break; error = ipsec_set_reqid(ifp, reqid); break; default: error = EINVAL; break; } bad: sx_xunlock(&ipsec_ioctl_sx); return (error); } /* * Allocate new private security policies for tunneling interface. * Each tunneling interface has following security policies for * both AF: * 0.0.0.0/0[any] 0.0.0.0/0[any] -P in \ * ipsec esp/tunnel/RemoteIP-LocalIP/unique:reqid * 0.0.0.0/0[any] 0.0.0.0/0[any] -P out \ * ipsec esp/tunnel/LocalIP-RemoteIP/unique:reqid */ static int ipsec_newpolicies(struct ipsec_softc *sc, struct secpolicy *sp[IPSEC_SPCOUNT], const struct sockaddr *src, const struct sockaddr *dst, uint32_t reqid) { struct ipsecrequest *isr; int i; memset(sp, 0, sizeof(struct secpolicy *) * IPSEC_SPCOUNT); for (i = 0; i < IPSEC_SPCOUNT; i++) { if ((sp[i] = key_newsp()) == NULL) goto fail; if ((isr = ipsec_newisr()) == NULL) goto fail; sp[i]->policy = IPSEC_POLICY_IPSEC; sp[i]->state = IPSEC_SPSTATE_DEAD; sp[i]->req[sp[i]->tcount++] = isr; sp[i]->created = time_second; /* Use priority field to store if_index */ sp[i]->priority = sc->ifp->if_index; isr->level = IPSEC_LEVEL_UNIQUE; isr->saidx.proto = IPPROTO_ESP; isr->saidx.mode = IPSEC_MODE_TUNNEL; isr->saidx.reqid = reqid; if (i % 2 == 0) { sp[i]->spidx.dir = IPSEC_DIR_INBOUND; bcopy(src, &isr->saidx.dst, src->sa_len); bcopy(dst, &isr->saidx.src, dst->sa_len); } else { sp[i]->spidx.dir = IPSEC_DIR_OUTBOUND; bcopy(src, &isr->saidx.src, src->sa_len); bcopy(dst, &isr->saidx.dst, dst->sa_len); } sp[i]->spidx.ul_proto = IPSEC_ULPROTO_ANY; #ifdef INET if (i < 2) { sp[i]->spidx.src.sa.sa_family = sp[i]->spidx.dst.sa.sa_family = AF_INET; sp[i]->spidx.src.sa.sa_len = sp[i]->spidx.dst.sa.sa_len = sizeof(struct sockaddr_in); continue; } #endif #ifdef INET6 sp[i]->spidx.src.sa.sa_family = sp[i]->spidx.dst.sa.sa_family = AF_INET6; sp[i]->spidx.src.sa.sa_len = sp[i]->spidx.dst.sa.sa_len = sizeof(struct sockaddr_in6); #endif } return (0); fail: for (i = 0; i < IPSEC_SPCOUNT; i++) key_freesp(&sp[i]); return (ENOMEM); } static int ipsec_check_reqid(uint32_t reqid) { struct ipsec_softc *sc; IPSEC_SC_RLOCK_ASSERT(); LIST_FOREACH(sc, &V_ipsec_sc_list, chain) { if (sc->reqid == reqid) return (EEXIST); } return (0); } /* * We use key_newreqid() to automatically obtain unique reqid. * Then we check that given id is unique, i.e. it is not used by * another if_ipsec(4) interface. This macro limits the number of * tries to get unique id. */ #define IPSEC_REQID_TRYCNT 64 static int ipsec_init_reqid(struct ipsec_softc *sc) { uint32_t reqid; int trycount; IPSEC_SC_RLOCK_ASSERT(); if (sc->reqid != 0) /* already initialized */ return (0); trycount = IPSEC_REQID_TRYCNT; while (--trycount > 0) { reqid = key_newreqid(); if (ipsec_check_reqid(reqid) == 0) break; } if (trycount == 0) return (EEXIST); sc->reqid = reqid; return (0); } /* * Set or update reqid for given tunneling interface. * When specified reqid is zero, generate new one. * We are protected by ioctl_sx lock from concurrent id generation. * Also softc would not disappear while we hold ioctl_sx lock. */ static int ipsec_set_reqid(struct ifnet *ifp, uint32_t reqid) { IPSEC_SC_RLOCK_TRACKER; struct ipsec_softc *sc; struct secasindex *saidx; sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); sc = ifp->if_softc; if (sc->reqid == reqid && reqid != 0) return (0); IPSEC_SC_RLOCK(); if (reqid != 0) { /* Check that specified reqid doesn't exist */ if (ipsec_check_reqid(reqid) != 0) { IPSEC_SC_RUNLOCK(); return (EEXIST); } sc->reqid = reqid; } else { /* Generate new reqid */ if (ipsec_init_reqid(sc) != 0) { IPSEC_SC_RUNLOCK(); return (EEXIST); } } IPSEC_SC_RUNLOCK(); /* Tunnel isn't fully configured, just return. */ if (sc->family == 0) return (0); saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, sc->family); KASSERT(saidx != NULL, ("saidx is NULL, but family is %d", sc->family)); return (ipsec_set_tunnel(sc, &saidx->src.sa, &saidx->dst.sa, sc->reqid)); } /* * Set tunnel endpoints addresses. */ static int ipsec_set_addresses(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) { IPSEC_SC_RLOCK_TRACKER; struct ipsec_softc *sc, *tsc; struct secasindex *saidx; sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); sc = ifp->if_softc; if (sc->family != 0) { saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, src->sa_family); if (saidx != NULL && saidx->reqid == sc->reqid && key_sockaddrcmp(&saidx->src.sa, src, 0) == 0 && key_sockaddrcmp(&saidx->dst.sa, dst, 0) == 0) return (0); /* Nothing has been changed. */ } /* * We cannot service IPsec tunnel when source address is * not our own. */ #ifdef INET if (src->sa_family == AF_INET && in_localip(satosin(src)->sin_addr) == 0) return (EADDRNOTAVAIL); #endif #ifdef INET6 /* * NOTE: IPv6 addresses are in kernel internal form with * embedded scope zone id. */ if (src->sa_family == AF_INET6 && in6_localip(&satosin6(src)->sin6_addr) == 0) return (EADDRNOTAVAIL); #endif /* Check that given addresses aren't already configured */ IPSEC_SC_RLOCK(); LIST_FOREACH(tsc, &V_ipsec_sc_list, chain) { if (tsc == sc || tsc->family != src->sa_family) continue; saidx = ipsec_getsaidx(tsc, IPSEC_DIR_OUTBOUND, tsc->family); if (key_sockaddrcmp(&saidx->src.sa, src, 0) == 0 && key_sockaddrcmp(&saidx->dst.sa, dst, 0) == 0) { /* We already have tunnel with such addresses */ IPSEC_SC_RUNLOCK(); return (EADDRNOTAVAIL); } } /* If reqid is not set, generate new one. */ if (ipsec_init_reqid(sc) != 0) { IPSEC_SC_RUNLOCK(); return (EEXIST); } IPSEC_SC_RUNLOCK(); return (ipsec_set_tunnel(sc, src, dst, sc->reqid)); } static int ipsec_set_tunnel(struct ipsec_softc *sc, struct sockaddr *src, struct sockaddr *dst, uint32_t reqid) { struct secpolicy *sp[IPSEC_SPCOUNT]; struct secpolicy *oldsp[IPSEC_SPCOUNT]; int i, f; sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); /* Allocate SP with new addresses. */ if (ipsec_newpolicies(sc, sp, src, dst, reqid) == 0) { /* Add new policies to SPDB */ if (key_register_ifnet(sp, IPSEC_SPCOUNT) != 0) { for (i = 0; i < IPSEC_SPCOUNT; i++) key_freesp(&sp[i]); return (EAGAIN); } IPSEC_SC_WLOCK(); if ((f = sc->family) != 0) LIST_REMOVE(sc, hash); IPSEC_WLOCK(sc); for (i = 0; i < IPSEC_SPCOUNT; i++) { oldsp[i] = sc->sp[i]; sc->sp[i] = sp[i]; } sc->family = src->sa_family; IPSEC_WUNLOCK(sc); LIST_INSERT_HEAD(SCHASH_HASH(sc->reqid), sc, hash); IPSEC_SC_WUNLOCK(); } else { sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; return (ENOMEM); } sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; if (f != 0) { key_unregister_ifnet(oldsp, IPSEC_SPCOUNT); for (i = 0; i < IPSEC_SPCOUNT; i++) key_freesp(&oldsp[i]); } return (0); } static void ipsec_delete_tunnel(struct ifnet *ifp, int locked) { struct ipsec_softc *sc = ifp->if_softc; struct secpolicy *oldsp[IPSEC_SPCOUNT]; int i; sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; if (sc->family != 0) { if (!locked) IPSEC_SC_WLOCK(); /* Remove from hash table */ LIST_REMOVE(sc, hash); IPSEC_WLOCK(sc); for (i = 0; i < IPSEC_SPCOUNT; i++) { oldsp[i] = sc->sp[i]; sc->sp[i] = NULL; } sc->family = 0; IPSEC_WUNLOCK(sc); if (!locked) IPSEC_SC_WUNLOCK(); key_unregister_ifnet(oldsp, IPSEC_SPCOUNT); for (i = 0; i < IPSEC_SPCOUNT; i++) key_freesp(&oldsp[i]); } } Index: head/sys/net/if_spppsubr.c =================================================================== --- head/sys/net/if_spppsubr.c (revision 331796) +++ head/sys/net/if_spppsubr.c (revision 331797) @@ -1,5423 +1,5423 @@ /* * Synchronous PPP/Cisco/Frame Relay link level subroutines. * Keepalive protocol implemented in both Cisco and PPP modes. */ /*- * Copyright (C) 1994-2000 Cronyx Engineering. * Author: Serge Vakulenko, * * Heavily revamped to conform to RFC 1661. * Copyright (C) 1997, 2001 Joerg Wunsch. * * This software is distributed with NO WARRANTIES, not even the implied * warranties for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * Authors grant any other persons or organisations permission to use * or modify this software as long as this message is kept with the software, * all derivative works or modified versions. * * From: Version 2.4, Thu Apr 30 17:17:21 MSD 1997 * * $FreeBSD$ */ #include #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif #ifdef INET6 #include #endif #include #include #define IOCTL_CMD_T u_long #define MAXALIVECNT 3 /* max. alive packets */ /* * Interface flags that can be set in an ifconfig command. * * Setting link0 will make the link passive, i.e. it will be marked * as being administrative openable, but won't be opened to begin * with. Incoming calls will be answered, or subsequent calls with * -link1 will cause the administrative open of the LCP layer. * * Setting link1 will cause the link to auto-dial only as packets * arrive to be sent. * * Setting IFF_DEBUG will syslog the option negotiation and state * transitions at level kern.debug. Note: all logs consistently look * like * * : * * with being something like "bppp0", and * being one of "lcp", "ipcp", "cisco", "chap", "pap", etc. */ #define IFF_PASSIVE IFF_LINK0 /* wait passively for connection */ #define IFF_AUTO IFF_LINK1 /* auto-dial on output */ #define IFF_CISCO IFF_LINK2 /* auto-dial on output */ #define PPP_ALLSTATIONS 0xff /* All-Stations broadcast address */ #define PPP_UI 0x03 /* Unnumbered Information */ #define PPP_IP 0x0021 /* Internet Protocol */ #define PPP_ISO 0x0023 /* ISO OSI Protocol */ #define PPP_XNS 0x0025 /* Xerox NS Protocol */ #define PPP_IPX 0x002b /* Novell IPX Protocol */ #define PPP_VJ_COMP 0x002d /* VJ compressed TCP/IP */ #define PPP_VJ_UCOMP 0x002f /* VJ uncompressed TCP/IP */ #define PPP_IPV6 0x0057 /* Internet Protocol Version 6 */ #define PPP_LCP 0xc021 /* Link Control Protocol */ #define PPP_PAP 0xc023 /* Password Authentication Protocol */ #define PPP_CHAP 0xc223 /* Challenge-Handshake Auth Protocol */ #define PPP_IPCP 0x8021 /* Internet Protocol Control Protocol */ #define PPP_IPV6CP 0x8057 /* IPv6 Control Protocol */ #define CONF_REQ 1 /* PPP configure request */ #define CONF_ACK 2 /* PPP configure acknowledge */ #define CONF_NAK 3 /* PPP configure negative ack */ #define CONF_REJ 4 /* PPP configure reject */ #define TERM_REQ 5 /* PPP terminate request */ #define TERM_ACK 6 /* PPP terminate acknowledge */ #define CODE_REJ 7 /* PPP code reject */ #define PROTO_REJ 8 /* PPP protocol reject */ #define ECHO_REQ 9 /* PPP echo request */ #define ECHO_REPLY 10 /* PPP echo reply */ #define DISC_REQ 11 /* PPP discard request */ #define LCP_OPT_MRU 1 /* maximum receive unit */ #define LCP_OPT_ASYNC_MAP 2 /* async control character map */ #define LCP_OPT_AUTH_PROTO 3 /* authentication protocol */ #define LCP_OPT_QUAL_PROTO 4 /* quality protocol */ #define LCP_OPT_MAGIC 5 /* magic number */ #define LCP_OPT_RESERVED 6 /* reserved */ #define LCP_OPT_PROTO_COMP 7 /* protocol field compression */ #define LCP_OPT_ADDR_COMP 8 /* address/control field compression */ #define IPCP_OPT_ADDRESSES 1 /* both IP addresses; deprecated */ #define IPCP_OPT_COMPRESSION 2 /* IP compression protocol (VJ) */ #define IPCP_OPT_ADDRESS 3 /* local IP address */ #define IPV6CP_OPT_IFID 1 /* interface identifier */ #define IPV6CP_OPT_COMPRESSION 2 /* IPv6 compression protocol */ #define IPCP_COMP_VJ 0x2d /* Code for VJ compression */ #define PAP_REQ 1 /* PAP name/password request */ #define PAP_ACK 2 /* PAP acknowledge */ #define PAP_NAK 3 /* PAP fail */ #define CHAP_CHALLENGE 1 /* CHAP challenge request */ #define CHAP_RESPONSE 2 /* CHAP challenge response */ #define CHAP_SUCCESS 3 /* CHAP response ok */ #define CHAP_FAILURE 4 /* CHAP response failed */ #define CHAP_MD5 5 /* hash algorithm - MD5 */ #define CISCO_MULTICAST 0x8f /* Cisco multicast address */ #define CISCO_UNICAST 0x0f /* Cisco unicast address */ #define CISCO_KEEPALIVE 0x8035 /* Cisco keepalive protocol */ #define CISCO_ADDR_REQ 0 /* Cisco address request */ #define CISCO_ADDR_REPLY 1 /* Cisco address reply */ #define CISCO_KEEPALIVE_REQ 2 /* Cisco keepalive request */ /* states are named and numbered according to RFC 1661 */ #define STATE_INITIAL 0 #define STATE_STARTING 1 #define STATE_CLOSED 2 #define STATE_STOPPED 3 #define STATE_CLOSING 4 #define STATE_STOPPING 5 #define STATE_REQ_SENT 6 #define STATE_ACK_RCVD 7 #define STATE_ACK_SENT 8 #define STATE_OPENED 9 static MALLOC_DEFINE(M_SPPP, "sppp", "synchronous PPP interface internals"); struct ppp_header { u_char address; u_char control; u_short protocol; } __packed; #define PPP_HEADER_LEN sizeof (struct ppp_header) struct lcp_header { u_char type; u_char ident; u_short len; } __packed; #define LCP_HEADER_LEN sizeof (struct lcp_header) struct cisco_packet { u_long type; u_long par1; u_long par2; u_short rel; u_short time0; u_short time1; } __packed; #define CISCO_PACKET_LEN sizeof (struct cisco_packet) /* * We follow the spelling and capitalization of RFC 1661 here, to make * it easier comparing with the standard. Please refer to this RFC in * case you can't make sense out of these abbreviation; it will also * explain the semantics related to the various events and actions. */ struct cp { u_short proto; /* PPP control protocol number */ u_char protoidx; /* index into state table in struct sppp */ u_char flags; #define CP_LCP 0x01 /* this is the LCP */ #define CP_AUTH 0x02 /* this is an authentication protocol */ #define CP_NCP 0x04 /* this is a NCP */ #define CP_QUAL 0x08 /* this is a quality reporting protocol */ const char *name; /* name of this control protocol */ /* event handlers */ void (*Up)(struct sppp *sp); void (*Down)(struct sppp *sp); void (*Open)(struct sppp *sp); void (*Close)(struct sppp *sp); void (*TO)(void *sp); int (*RCR)(struct sppp *sp, struct lcp_header *h, int len); void (*RCN_rej)(struct sppp *sp, struct lcp_header *h, int len); void (*RCN_nak)(struct sppp *sp, struct lcp_header *h, int len); /* actions */ void (*tlu)(struct sppp *sp); void (*tld)(struct sppp *sp); void (*tls)(struct sppp *sp); void (*tlf)(struct sppp *sp); void (*scr)(struct sppp *sp); }; #define SPP_FMT "%s: " #define SPP_ARGS(ifp) (ifp)->if_xname #define SPPP_LOCK(sp) mtx_lock (&(sp)->mtx) #define SPPP_UNLOCK(sp) mtx_unlock (&(sp)->mtx) #define SPPP_LOCK_ASSERT(sp) mtx_assert (&(sp)->mtx, MA_OWNED) #define SPPP_LOCK_OWNED(sp) mtx_owned (&(sp)->mtx) #ifdef INET /* * The following disgusting hack gets around the problem that IP TOS * can't be set yet. We want to put "interactive" traffic on a high * priority queue. To decide if traffic is interactive, we check that * a) it is TCP and b) one of its ports is telnet, rlogin or ftp control. * * XXX is this really still necessary? - joerg - */ static const u_short interactive_ports[8] = { 0, 513, 0, 0, 0, 21, 0, 23, }; #define INTERACTIVE(p) (interactive_ports[(p) & 7] == (p)) #endif /* almost every function needs these */ #define STDDCL \ struct ifnet *ifp = SP2IFP(sp); \ int debug = ifp->if_flags & IFF_DEBUG static int sppp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro); static void sppp_cisco_send(struct sppp *sp, int type, long par1, long par2); static void sppp_cisco_input(struct sppp *sp, struct mbuf *m); static void sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m); static void sppp_cp_send(struct sppp *sp, u_short proto, u_char type, u_char ident, u_short len, void *data); /* static void sppp_cp_timeout(void *arg); */ static void sppp_cp_change_state(const struct cp *cp, struct sppp *sp, int newstate); static void sppp_auth_send(const struct cp *cp, struct sppp *sp, unsigned int type, unsigned int id, ...); static void sppp_up_event(const struct cp *cp, struct sppp *sp); static void sppp_down_event(const struct cp *cp, struct sppp *sp); static void sppp_open_event(const struct cp *cp, struct sppp *sp); static void sppp_close_event(const struct cp *cp, struct sppp *sp); static void sppp_to_event(const struct cp *cp, struct sppp *sp); static void sppp_null(struct sppp *sp); static void sppp_pp_up(struct sppp *sp); static void sppp_pp_down(struct sppp *sp); static void sppp_lcp_init(struct sppp *sp); static void sppp_lcp_up(struct sppp *sp); static void sppp_lcp_down(struct sppp *sp); static void sppp_lcp_open(struct sppp *sp); static void sppp_lcp_close(struct sppp *sp); static void sppp_lcp_TO(void *sp); static int sppp_lcp_RCR(struct sppp *sp, struct lcp_header *h, int len); static void sppp_lcp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len); static void sppp_lcp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len); static void sppp_lcp_tlu(struct sppp *sp); static void sppp_lcp_tld(struct sppp *sp); static void sppp_lcp_tls(struct sppp *sp); static void sppp_lcp_tlf(struct sppp *sp); static void sppp_lcp_scr(struct sppp *sp); static void sppp_lcp_check_and_close(struct sppp *sp); static int sppp_ncp_check(struct sppp *sp); static void sppp_ipcp_init(struct sppp *sp); static void sppp_ipcp_up(struct sppp *sp); static void sppp_ipcp_down(struct sppp *sp); static void sppp_ipcp_open(struct sppp *sp); static void sppp_ipcp_close(struct sppp *sp); static void sppp_ipcp_TO(void *sp); static int sppp_ipcp_RCR(struct sppp *sp, struct lcp_header *h, int len); static void sppp_ipcp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len); static void sppp_ipcp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len); static void sppp_ipcp_tlu(struct sppp *sp); static void sppp_ipcp_tld(struct sppp *sp); static void sppp_ipcp_tls(struct sppp *sp); static void sppp_ipcp_tlf(struct sppp *sp); static void sppp_ipcp_scr(struct sppp *sp); static void sppp_ipv6cp_init(struct sppp *sp); static void sppp_ipv6cp_up(struct sppp *sp); static void sppp_ipv6cp_down(struct sppp *sp); static void sppp_ipv6cp_open(struct sppp *sp); static void sppp_ipv6cp_close(struct sppp *sp); static void sppp_ipv6cp_TO(void *sp); static int sppp_ipv6cp_RCR(struct sppp *sp, struct lcp_header *h, int len); static void sppp_ipv6cp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len); static void sppp_ipv6cp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len); static void sppp_ipv6cp_tlu(struct sppp *sp); static void sppp_ipv6cp_tld(struct sppp *sp); static void sppp_ipv6cp_tls(struct sppp *sp); static void sppp_ipv6cp_tlf(struct sppp *sp); static void sppp_ipv6cp_scr(struct sppp *sp); static void sppp_pap_input(struct sppp *sp, struct mbuf *m); static void sppp_pap_init(struct sppp *sp); static void sppp_pap_open(struct sppp *sp); static void sppp_pap_close(struct sppp *sp); static void sppp_pap_TO(void *sp); static void sppp_pap_my_TO(void *sp); static void sppp_pap_tlu(struct sppp *sp); static void sppp_pap_tld(struct sppp *sp); static void sppp_pap_scr(struct sppp *sp); static void sppp_chap_input(struct sppp *sp, struct mbuf *m); static void sppp_chap_init(struct sppp *sp); static void sppp_chap_open(struct sppp *sp); static void sppp_chap_close(struct sppp *sp); static void sppp_chap_TO(void *sp); static void sppp_chap_tlu(struct sppp *sp); static void sppp_chap_tld(struct sppp *sp); static void sppp_chap_scr(struct sppp *sp); static const char *sppp_auth_type_name(u_short proto, u_char type); static const char *sppp_cp_type_name(u_char type); #ifdef INET static const char *sppp_dotted_quad(u_long addr); static const char *sppp_ipcp_opt_name(u_char opt); #endif #ifdef INET6 static const char *sppp_ipv6cp_opt_name(u_char opt); #endif static const char *sppp_lcp_opt_name(u_char opt); static const char *sppp_phase_name(enum ppp_phase phase); static const char *sppp_proto_name(u_short proto); static const char *sppp_state_name(int state); static int sppp_params(struct sppp *sp, u_long cmd, void *data); static int sppp_strnlen(u_char *p, int max); static void sppp_keepalive(void *dummy); static void sppp_phase_network(struct sppp *sp); static void sppp_print_bytes(const u_char *p, u_short len); static void sppp_print_string(const char *p, u_short len); static void sppp_qflush(struct ifqueue *ifq); #ifdef INET static void sppp_set_ip_addr(struct sppp *sp, u_long src); #endif #ifdef INET6 static void sppp_get_ip6_addrs(struct sppp *sp, struct in6_addr *src, struct in6_addr *dst, struct in6_addr *srcmask); #ifdef IPV6CP_MYIFID_DYN static void sppp_set_ip6_addr(struct sppp *sp, const struct in6_addr *src); static void sppp_gen_ip6_addr(struct sppp *sp, const struct in6_addr *src); #endif static void sppp_suggest_ip6_addr(struct sppp *sp, struct in6_addr *src); #endif /* if_start () wrapper */ static void sppp_ifstart (struct ifnet *ifp); /* our control protocol descriptors */ static const struct cp lcp = { PPP_LCP, IDX_LCP, CP_LCP, "lcp", sppp_lcp_up, sppp_lcp_down, sppp_lcp_open, sppp_lcp_close, sppp_lcp_TO, sppp_lcp_RCR, sppp_lcp_RCN_rej, sppp_lcp_RCN_nak, sppp_lcp_tlu, sppp_lcp_tld, sppp_lcp_tls, sppp_lcp_tlf, sppp_lcp_scr }; static const struct cp ipcp = { PPP_IPCP, IDX_IPCP, #ifdef INET /* don't run IPCP if there's no IPv4 support */ CP_NCP, #else 0, #endif "ipcp", sppp_ipcp_up, sppp_ipcp_down, sppp_ipcp_open, sppp_ipcp_close, sppp_ipcp_TO, sppp_ipcp_RCR, sppp_ipcp_RCN_rej, sppp_ipcp_RCN_nak, sppp_ipcp_tlu, sppp_ipcp_tld, sppp_ipcp_tls, sppp_ipcp_tlf, sppp_ipcp_scr }; static const struct cp ipv6cp = { PPP_IPV6CP, IDX_IPV6CP, #ifdef INET6 /*don't run IPv6CP if there's no IPv6 support*/ CP_NCP, #else 0, #endif "ipv6cp", sppp_ipv6cp_up, sppp_ipv6cp_down, sppp_ipv6cp_open, sppp_ipv6cp_close, sppp_ipv6cp_TO, sppp_ipv6cp_RCR, sppp_ipv6cp_RCN_rej, sppp_ipv6cp_RCN_nak, sppp_ipv6cp_tlu, sppp_ipv6cp_tld, sppp_ipv6cp_tls, sppp_ipv6cp_tlf, sppp_ipv6cp_scr }; static const struct cp pap = { PPP_PAP, IDX_PAP, CP_AUTH, "pap", sppp_null, sppp_null, sppp_pap_open, sppp_pap_close, sppp_pap_TO, 0, 0, 0, sppp_pap_tlu, sppp_pap_tld, sppp_null, sppp_null, sppp_pap_scr }; static const struct cp chap = { PPP_CHAP, IDX_CHAP, CP_AUTH, "chap", sppp_null, sppp_null, sppp_chap_open, sppp_chap_close, sppp_chap_TO, 0, 0, 0, sppp_chap_tlu, sppp_chap_tld, sppp_null, sppp_null, sppp_chap_scr }; static const struct cp *cps[IDX_COUNT] = { &lcp, /* IDX_LCP */ &ipcp, /* IDX_IPCP */ &ipv6cp, /* IDX_IPV6CP */ &pap, /* IDX_PAP */ &chap, /* IDX_CHAP */ }; static void* sppp_alloc(u_char type, struct ifnet *ifp) { struct sppp *sp; sp = malloc(sizeof(struct sppp), M_SPPP, M_WAITOK | M_ZERO); sp->pp_ifp = ifp; return (sp); } static void sppp_free(void *com, u_char type) { free(com, M_SPPP); } static int sppp_modevent(module_t mod, int type, void *unused) { switch (type) { case MOD_LOAD: /* * XXX: should probably be IFT_SPPP, but it's fairly * harmless to allocate struct sppp's for non-sppp * interfaces. */ if_register_com_alloc(IFT_PPP, sppp_alloc, sppp_free); break; case MOD_UNLOAD: /* if_deregister_com_alloc(IFT_PPP); */ return EACCES; default: return EOPNOTSUPP; } return 0; } static moduledata_t spppmod = { "sppp", sppp_modevent, 0 }; MODULE_VERSION(sppp, 1); DECLARE_MODULE(sppp, spppmod, SI_SUB_DRIVERS, SI_ORDER_ANY); /* * Exported functions, comprising our interface to the lower layer. */ /* * Process the received packet. */ void sppp_input(struct ifnet *ifp, struct mbuf *m) { struct ppp_header *h; int isr = -1; struct sppp *sp = IFP2SP(ifp); int debug, do_account = 0; #ifdef INET int hlen, vjlen; u_char *iphdr; #endif SPPP_LOCK(sp); debug = ifp->if_flags & IFF_DEBUG; if (ifp->if_flags & IFF_UP) /* Count received bytes, add FCS and one flag */ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len + 3); if (m->m_pkthdr.len <= PPP_HEADER_LEN) { /* Too small packet, drop it. */ if (debug) log(LOG_DEBUG, SPP_FMT "input packet is too small, %d bytes\n", SPP_ARGS(ifp), m->m_pkthdr.len); drop: m_freem (m); SPPP_UNLOCK(sp); drop2: if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); return; } if (sp->pp_mode == PP_FR) { sppp_fr_input (sp, m); SPPP_UNLOCK(sp); return; } /* Get PPP header. */ h = mtod (m, struct ppp_header*); m_adj (m, PPP_HEADER_LEN); switch (h->address) { case PPP_ALLSTATIONS: if (h->control != PPP_UI) goto invalid; if (sp->pp_mode == IFF_CISCO) { if (debug) log(LOG_DEBUG, SPP_FMT "PPP packet in Cisco mode " "\n", SPP_ARGS(ifp), h->address, h->control, ntohs(h->protocol)); goto drop; } switch (ntohs (h->protocol)) { default: if (debug) log(LOG_DEBUG, SPP_FMT "rejecting protocol " "\n", SPP_ARGS(ifp), h->address, h->control, ntohs(h->protocol)); if (sp->state[IDX_LCP] == STATE_OPENED) sppp_cp_send (sp, PPP_LCP, PROTO_REJ, ++sp->pp_seq[IDX_LCP], m->m_pkthdr.len + 2, &h->protocol); if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto drop; case PPP_LCP: sppp_cp_input(&lcp, sp, m); m_freem (m); SPPP_UNLOCK(sp); return; case PPP_PAP: if (sp->pp_phase >= PHASE_AUTHENTICATE) sppp_pap_input(sp, m); m_freem (m); SPPP_UNLOCK(sp); return; case PPP_CHAP: if (sp->pp_phase >= PHASE_AUTHENTICATE) sppp_chap_input(sp, m); m_freem (m); SPPP_UNLOCK(sp); return; #ifdef INET case PPP_IPCP: if (sp->pp_phase == PHASE_NETWORK) sppp_cp_input(&ipcp, sp, m); m_freem (m); SPPP_UNLOCK(sp); return; case PPP_IP: if (sp->state[IDX_IPCP] == STATE_OPENED) { isr = NETISR_IP; } do_account++; break; case PPP_VJ_COMP: if (sp->state[IDX_IPCP] == STATE_OPENED) { if ((vjlen = sl_uncompress_tcp_core(mtod(m, u_char *), m->m_len, m->m_len, TYPE_COMPRESSED_TCP, sp->pp_comp, &iphdr, &hlen)) <= 0) { if (debug) log(LOG_INFO, SPP_FMT "VJ uncompress failed on compressed packet\n", SPP_ARGS(ifp)); goto drop; } /* * Trim the VJ header off the packet, and prepend * the uncompressed IP header (which will usually * end up in two chained mbufs since there's not * enough leading space in the existing mbuf). */ m_adj(m, vjlen); M_PREPEND(m, hlen, M_NOWAIT); if (m == NULL) { SPPP_UNLOCK(sp); goto drop2; } bcopy(iphdr, mtod(m, u_char *), hlen); isr = NETISR_IP; } do_account++; break; case PPP_VJ_UCOMP: if (sp->state[IDX_IPCP] == STATE_OPENED) { if (sl_uncompress_tcp_core(mtod(m, u_char *), m->m_len, m->m_len, TYPE_UNCOMPRESSED_TCP, sp->pp_comp, &iphdr, &hlen) != 0) { if (debug) log(LOG_INFO, SPP_FMT "VJ uncompress failed on uncompressed packet\n", SPP_ARGS(ifp)); goto drop; } isr = NETISR_IP; } do_account++; break; #endif #ifdef INET6 case PPP_IPV6CP: if (sp->pp_phase == PHASE_NETWORK) sppp_cp_input(&ipv6cp, sp, m); m_freem (m); SPPP_UNLOCK(sp); return; case PPP_IPV6: if (sp->state[IDX_IPV6CP] == STATE_OPENED) isr = NETISR_IPV6; do_account++; break; #endif } break; case CISCO_MULTICAST: case CISCO_UNICAST: /* Don't check the control field here (RFC 1547). */ if (sp->pp_mode != IFF_CISCO) { if (debug) log(LOG_DEBUG, SPP_FMT "Cisco packet in PPP mode " "\n", SPP_ARGS(ifp), h->address, h->control, ntohs(h->protocol)); goto drop; } switch (ntohs (h->protocol)) { default: if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto invalid; case CISCO_KEEPALIVE: sppp_cisco_input (sp, m); m_freem (m); SPPP_UNLOCK(sp); return; #ifdef INET case ETHERTYPE_IP: isr = NETISR_IP; do_account++; break; #endif #ifdef INET6 case ETHERTYPE_IPV6: isr = NETISR_IPV6; do_account++; break; #endif } break; default: /* Invalid PPP packet. */ invalid: if (debug) log(LOG_DEBUG, SPP_FMT "invalid input packet " "\n", SPP_ARGS(ifp), h->address, h->control, ntohs(h->protocol)); goto drop; } if (! (ifp->if_flags & IFF_UP) || isr == -1) goto drop; SPPP_UNLOCK(sp); M_SETFIB(m, ifp->if_fib); /* Check queue. */ if (netisr_queue(isr, m)) { /* (0) on success. */ if (debug) log(LOG_DEBUG, SPP_FMT "protocol queue overflow\n", SPP_ARGS(ifp)); goto drop2; } if (do_account) /* * Do only account for network packets, not for control * packets. This is used by some subsystems to detect * idle lines. */ sp->pp_last_recv = time_uptime; } static void sppp_ifstart_sched(void *dummy) { struct sppp *sp = dummy; sp->if_start(SP2IFP(sp)); } /* if_start () wrapper function. We use it to schedule real if_start () for * execution. We can't call it directly */ static void sppp_ifstart(struct ifnet *ifp) { struct sppp *sp = IFP2SP(ifp); if (SPPP_LOCK_OWNED(sp)) { if (callout_pending(&sp->ifstart_callout)) return; callout_reset(&sp->ifstart_callout, 1, sppp_ifstart_sched, (void *)sp); } else { sp->if_start(ifp); } } /* * Enqueue transmit packet. */ static int sppp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { struct sppp *sp = IFP2SP(ifp); struct ppp_header *h; struct ifqueue *ifq = NULL; int error, rv = 0; #ifdef INET int ipproto = PPP_IP; #endif int debug = ifp->if_flags & IFF_DEBUG; SPPP_LOCK(sp); if (!(ifp->if_flags & IFF_UP) || (!(ifp->if_flags & IFF_AUTO) && !(ifp->if_drv_flags & IFF_DRV_RUNNING))) { #ifdef INET6 drop: #endif m_freem (m); SPPP_UNLOCK(sp); return (ENETDOWN); } if ((ifp->if_flags & IFF_AUTO) && !(ifp->if_drv_flags & IFF_DRV_RUNNING)) { #ifdef INET6 /* * XXX * * Hack to prevent the initialization-time generated * IPv6 multicast packet to erroneously cause a * dialout event in case IPv6 has been * administratively disabled on that interface. */ if (dst->sa_family == AF_INET6 && !(sp->confflags & CONF_ENABLE_IPV6)) goto drop; #endif /* * Interface is not yet running, but auto-dial. Need * to start LCP for it. */ ifp->if_drv_flags |= IFF_DRV_RUNNING; lcp.Open(sp); } #ifdef INET if (dst->sa_family == AF_INET) { /* XXX Check mbuf length here? */ struct ip *ip = mtod (m, struct ip*); struct tcphdr *tcp = (struct tcphdr*) ((long*)ip + ip->ip_hl); /* * When using dynamic local IP address assignment by using * 0.0.0.0 as a local address, the first TCP session will * not connect because the local TCP checksum is computed * using 0.0.0.0 which will later become our real IP address * so the TCP checksum computed at the remote end will * become invalid. So we * - don't let packets with src ip addr 0 thru * - we flag TCP packets with src ip 0 as an error */ if(ip->ip_src.s_addr == INADDR_ANY) /* -hm */ { m_freem(m); SPPP_UNLOCK(sp); if(ip->ip_p == IPPROTO_TCP) return(EADDRNOTAVAIL); else return(0); } /* * Put low delay, telnet, rlogin and ftp control packets * in front of the queue or let ALTQ take care. */ if (ALTQ_IS_ENABLED(&ifp->if_snd)) ; else if (_IF_QFULL(&sp->pp_fastq)) ; else if (ip->ip_tos & IPTOS_LOWDELAY) ifq = &sp->pp_fastq; else if (m->m_len < sizeof *ip + sizeof *tcp) ; else if (ip->ip_p != IPPROTO_TCP) ; else if (INTERACTIVE (ntohs (tcp->th_sport))) ifq = &sp->pp_fastq; else if (INTERACTIVE (ntohs (tcp->th_dport))) ifq = &sp->pp_fastq; /* * Do IP Header compression */ if (sp->pp_mode != IFF_CISCO && sp->pp_mode != PP_FR && (sp->ipcp.flags & IPCP_VJ) && ip->ip_p == IPPROTO_TCP) switch (sl_compress_tcp(m, ip, sp->pp_comp, sp->ipcp.compress_cid)) { case TYPE_COMPRESSED_TCP: ipproto = PPP_VJ_COMP; break; case TYPE_UNCOMPRESSED_TCP: ipproto = PPP_VJ_UCOMP; break; case TYPE_IP: ipproto = PPP_IP; break; default: m_freem(m); SPPP_UNLOCK(sp); return (EINVAL); } } #endif #ifdef INET6 if (dst->sa_family == AF_INET6) { /* XXX do something tricky here? */ } #endif if (sp->pp_mode == PP_FR) { /* Add frame relay header. */ m = sppp_fr_header (sp, m, dst->sa_family); if (! m) goto nobufs; goto out; } /* * Prepend general data packet PPP header. For now, IP only. */ M_PREPEND (m, PPP_HEADER_LEN, M_NOWAIT); if (! m) { nobufs: if (debug) log(LOG_DEBUG, SPP_FMT "no memory for transmit header\n", SPP_ARGS(ifp)); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); SPPP_UNLOCK(sp); return (ENOBUFS); } /* * May want to check size of packet * (albeit due to the implementation it's always enough) */ h = mtod (m, struct ppp_header*); if (sp->pp_mode == IFF_CISCO) { h->address = CISCO_UNICAST; /* unicast address */ h->control = 0; } else { h->address = PPP_ALLSTATIONS; /* broadcast address */ h->control = PPP_UI; /* Unnumbered Info */ } switch (dst->sa_family) { #ifdef INET case AF_INET: /* Internet Protocol */ if (sp->pp_mode == IFF_CISCO) h->protocol = htons (ETHERTYPE_IP); else { /* * Don't choke with an ENETDOWN early. It's * possible that we just started dialing out, * so don't drop the packet immediately. If * we notice that we run out of buffer space * below, we will however remember that we are * not ready to carry IP packets, and return * ENETDOWN, as opposed to ENOBUFS. */ h->protocol = htons(ipproto); if (sp->state[IDX_IPCP] != STATE_OPENED) rv = ENETDOWN; } break; #endif #ifdef INET6 case AF_INET6: /* Internet Protocol */ if (sp->pp_mode == IFF_CISCO) h->protocol = htons (ETHERTYPE_IPV6); else { /* * Don't choke with an ENETDOWN early. It's * possible that we just started dialing out, * so don't drop the packet immediately. If * we notice that we run out of buffer space * below, we will however remember that we are * not ready to carry IP packets, and return * ENETDOWN, as opposed to ENOBUFS. */ h->protocol = htons(PPP_IPV6); if (sp->state[IDX_IPV6CP] != STATE_OPENED) rv = ENETDOWN; } break; #endif default: m_freem (m); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); SPPP_UNLOCK(sp); return (EAFNOSUPPORT); } /* * Queue message on interface, and start output if interface * not yet active. */ out: if (ifq != NULL) error = !(IF_HANDOFF_ADJ(ifq, m, ifp, 3)); else IFQ_HANDOFF_ADJ(ifp, m, 3, error); if (error) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); SPPP_UNLOCK(sp); return (rv? rv: ENOBUFS); } SPPP_UNLOCK(sp); /* * Unlike in sppp_input(), we can always bump the timestamp * here since sppp_output() is only called on behalf of * network-layer traffic; control-layer traffic is handled * by sppp_cp_send(). */ sp->pp_last_sent = time_uptime; return (0); } void sppp_attach(struct ifnet *ifp) { struct sppp *sp = IFP2SP(ifp); /* Initialize mtx lock */ mtx_init(&sp->mtx, "sppp", MTX_NETWORK_LOCK, MTX_DEF | MTX_RECURSE); /* Initialize keepalive handler. */ callout_init(&sp->keepalive_callout, 1); callout_reset(&sp->keepalive_callout, hz * 10, sppp_keepalive, (void *)sp); ifp->if_mtu = PP_MTU; ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; ifp->if_output = sppp_output; #if 0 sp->pp_flags = PP_KEEPALIVE; #endif ifp->if_snd.ifq_maxlen = 32; sp->pp_fastq.ifq_maxlen = 32; sp->pp_cpq.ifq_maxlen = 20; sp->pp_loopcnt = 0; sp->pp_alivecnt = 0; bzero(&sp->pp_seq[0], sizeof(sp->pp_seq)); bzero(&sp->pp_rseq[0], sizeof(sp->pp_rseq)); sp->pp_phase = PHASE_DEAD; sp->pp_up = sppp_pp_up; sp->pp_down = sppp_pp_down; if(!mtx_initialized(&sp->pp_cpq.ifq_mtx)) mtx_init(&sp->pp_cpq.ifq_mtx, "sppp_cpq", NULL, MTX_DEF); if(!mtx_initialized(&sp->pp_fastq.ifq_mtx)) mtx_init(&sp->pp_fastq.ifq_mtx, "sppp_fastq", NULL, MTX_DEF); sp->pp_last_recv = sp->pp_last_sent = time_uptime; sp->confflags = 0; #ifdef INET sp->confflags |= CONF_ENABLE_VJ; #endif #ifdef INET6 sp->confflags |= CONF_ENABLE_IPV6; #endif callout_init(&sp->ifstart_callout, 1); sp->if_start = ifp->if_start; ifp->if_start = sppp_ifstart; sp->pp_comp = malloc(sizeof(struct slcompress), M_TEMP, M_WAITOK); sl_compress_init(sp->pp_comp, -1); sppp_lcp_init(sp); sppp_ipcp_init(sp); sppp_ipv6cp_init(sp); sppp_pap_init(sp); sppp_chap_init(sp); } void sppp_detach(struct ifnet *ifp) { struct sppp *sp = IFP2SP(ifp); int i; KASSERT(mtx_initialized(&sp->mtx), ("sppp mutex is not initialized")); /* Stop keepalive handler. */ if (!callout_drain(&sp->keepalive_callout)) callout_stop(&sp->keepalive_callout); for (i = 0; i < IDX_COUNT; i++) { if (!callout_drain(&sp->ch[i])) callout_stop(&sp->ch[i]); } if (!callout_drain(&sp->pap_my_to_ch)) callout_stop(&sp->pap_my_to_ch); mtx_destroy(&sp->pp_cpq.ifq_mtx); mtx_destroy(&sp->pp_fastq.ifq_mtx); mtx_destroy(&sp->mtx); } /* * Flush the interface output queue. */ static void sppp_flush_unlocked(struct ifnet *ifp) { struct sppp *sp = IFP2SP(ifp); sppp_qflush ((struct ifqueue *)&SP2IFP(sp)->if_snd); sppp_qflush (&sp->pp_fastq); sppp_qflush (&sp->pp_cpq); } void sppp_flush(struct ifnet *ifp) { struct sppp *sp = IFP2SP(ifp); SPPP_LOCK(sp); sppp_flush_unlocked (ifp); SPPP_UNLOCK(sp); } /* * Check if the output queue is empty. */ int sppp_isempty(struct ifnet *ifp) { struct sppp *sp = IFP2SP(ifp); int empty; SPPP_LOCK(sp); empty = !sp->pp_fastq.ifq_head && !sp->pp_cpq.ifq_head && !SP2IFP(sp)->if_snd.ifq_head; SPPP_UNLOCK(sp); return (empty); } /* * Get next packet to send. */ struct mbuf * sppp_dequeue(struct ifnet *ifp) { struct sppp *sp = IFP2SP(ifp); struct mbuf *m; SPPP_LOCK(sp); /* * Process only the control protocol queue until we have at * least one NCP open. * * Do always serve all three queues in Cisco mode. */ IF_DEQUEUE(&sp->pp_cpq, m); if (m == NULL && (sppp_ncp_check(sp) || sp->pp_mode == IFF_CISCO || sp->pp_mode == PP_FR)) { IF_DEQUEUE(&sp->pp_fastq, m); if (m == NULL) IF_DEQUEUE (&SP2IFP(sp)->if_snd, m); } SPPP_UNLOCK(sp); return m; } /* * Pick the next packet, do not remove it from the queue. */ struct mbuf * sppp_pick(struct ifnet *ifp) { struct sppp *sp = IFP2SP(ifp); struct mbuf *m; SPPP_LOCK(sp); m = sp->pp_cpq.ifq_head; if (m == NULL && (sp->pp_phase == PHASE_NETWORK || sp->pp_mode == IFF_CISCO || sp->pp_mode == PP_FR)) if ((m = sp->pp_fastq.ifq_head) == NULL) m = SP2IFP(sp)->if_snd.ifq_head; SPPP_UNLOCK(sp); return (m); } /* * Process an ioctl request. Called on low priority level. */ int sppp_ioctl(struct ifnet *ifp, IOCTL_CMD_T cmd, void *data) { struct ifreq *ifr = (struct ifreq*) data; struct sppp *sp = IFP2SP(ifp); int rv, going_up, going_down, newmode; SPPP_LOCK(sp); rv = 0; switch (cmd) { case SIOCAIFADDR: break; case SIOCSIFADDR: /* set the interface "up" when assigning an IP address */ ifp->if_flags |= IFF_UP; /* FALLTHROUGH */ case SIOCSIFFLAGS: going_up = ifp->if_flags & IFF_UP && (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0; going_down = (ifp->if_flags & IFF_UP) == 0 && ifp->if_drv_flags & IFF_DRV_RUNNING; newmode = ifp->if_flags & IFF_PASSIVE; if (!newmode) newmode = ifp->if_flags & IFF_AUTO; if (!newmode) newmode = ifp->if_flags & IFF_CISCO; ifp->if_flags &= ~(IFF_PASSIVE | IFF_AUTO | IFF_CISCO); ifp->if_flags |= newmode; if (!newmode) newmode = sp->pp_flags & PP_FR; if (newmode != sp->pp_mode) { going_down = 1; if (!going_up) going_up = ifp->if_drv_flags & IFF_DRV_RUNNING; } if (going_down) { if (sp->pp_mode != IFF_CISCO && sp->pp_mode != PP_FR) lcp.Close(sp); else if (sp->pp_tlf) (sp->pp_tlf)(sp); sppp_flush_unlocked(ifp); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; sp->pp_mode = newmode; } if (going_up) { if (sp->pp_mode != IFF_CISCO && sp->pp_mode != PP_FR) lcp.Close(sp); sp->pp_mode = newmode; if (sp->pp_mode == 0) { ifp->if_drv_flags |= IFF_DRV_RUNNING; lcp.Open(sp); } if ((sp->pp_mode == IFF_CISCO) || (sp->pp_mode == PP_FR)) { if (sp->pp_tls) (sp->pp_tls)(sp); ifp->if_drv_flags |= IFF_DRV_RUNNING; } } break; #ifdef SIOCSIFMTU #ifndef ifr_mtu #define ifr_mtu ifr_metric #endif case SIOCSIFMTU: if (ifr->ifr_mtu < 128 || ifr->ifr_mtu > sp->lcp.their_mru) return (EINVAL); ifp->if_mtu = ifr->ifr_mtu; break; #endif #ifdef SLIOCSETMTU case SLIOCSETMTU: if (*(short*)data < 128 || *(short*)data > sp->lcp.their_mru) return (EINVAL); ifp->if_mtu = *(short*)data; break; #endif #ifdef SIOCGIFMTU case SIOCGIFMTU: ifr->ifr_mtu = ifp->if_mtu; break; #endif #ifdef SLIOCGETMTU case SLIOCGETMTU: *(short*)data = ifp->if_mtu; break; #endif case SIOCADDMULTI: case SIOCDELMULTI: break; case SIOCGIFGENERIC: case SIOCSIFGENERIC: rv = sppp_params(sp, cmd, data); break; default: rv = ENOTTY; } SPPP_UNLOCK(sp); return rv; } /* * Cisco framing implementation. */ /* * Handle incoming Cisco keepalive protocol packets. */ static void sppp_cisco_input(struct sppp *sp, struct mbuf *m) { STDDCL; struct cisco_packet *h; u_long me, mymask; if (m->m_pkthdr.len < CISCO_PACKET_LEN) { if (debug) log(LOG_DEBUG, SPP_FMT "cisco invalid packet length: %d bytes\n", SPP_ARGS(ifp), m->m_pkthdr.len); return; } h = mtod (m, struct cisco_packet*); if (debug) log(LOG_DEBUG, SPP_FMT "cisco input: %d bytes " "<0x%lx 0x%lx 0x%lx 0x%x 0x%x-0x%x>\n", SPP_ARGS(ifp), m->m_pkthdr.len, (u_long)ntohl (h->type), (u_long)h->par1, (u_long)h->par2, (u_int)h->rel, (u_int)h->time0, (u_int)h->time1); switch (ntohl (h->type)) { default: if (debug) log(-1, SPP_FMT "cisco unknown packet type: 0x%lx\n", SPP_ARGS(ifp), (u_long)ntohl (h->type)); break; case CISCO_ADDR_REPLY: /* Reply on address request, ignore */ break; case CISCO_KEEPALIVE_REQ: sp->pp_alivecnt = 0; sp->pp_rseq[IDX_LCP] = ntohl (h->par1); if (sp->pp_seq[IDX_LCP] == sp->pp_rseq[IDX_LCP]) { /* Local and remote sequence numbers are equal. * Probably, the line is in loopback mode. */ if (sp->pp_loopcnt >= MAXALIVECNT) { printf (SPP_FMT "loopback\n", SPP_ARGS(ifp)); sp->pp_loopcnt = 0; if (ifp->if_flags & IFF_UP) { if_down (ifp); sppp_qflush (&sp->pp_cpq); } } ++sp->pp_loopcnt; /* Generate new local sequence number */ sp->pp_seq[IDX_LCP] = random(); break; } sp->pp_loopcnt = 0; if (! (ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { if_up(ifp); printf (SPP_FMT "up\n", SPP_ARGS(ifp)); } break; case CISCO_ADDR_REQ: sppp_get_ip_addrs(sp, &me, 0, &mymask); if (me != 0L) sppp_cisco_send(sp, CISCO_ADDR_REPLY, me, mymask); break; } } /* * Send Cisco keepalive packet. */ static void sppp_cisco_send(struct sppp *sp, int type, long par1, long par2) { STDDCL; struct ppp_header *h; struct cisco_packet *ch; struct mbuf *m; struct timeval tv; getmicrouptime(&tv); MGETHDR (m, M_NOWAIT, MT_DATA); if (! m) return; m->m_pkthdr.len = m->m_len = PPP_HEADER_LEN + CISCO_PACKET_LEN; m->m_pkthdr.rcvif = 0; h = mtod (m, struct ppp_header*); h->address = CISCO_MULTICAST; h->control = 0; h->protocol = htons (CISCO_KEEPALIVE); ch = (struct cisco_packet*) (h + 1); ch->type = htonl (type); ch->par1 = htonl (par1); ch->par2 = htonl (par2); ch->rel = -1; ch->time0 = htons ((u_short) (tv.tv_sec >> 16)); ch->time1 = htons ((u_short) tv.tv_sec); if (debug) log(LOG_DEBUG, SPP_FMT "cisco output: <0x%lx 0x%lx 0x%lx 0x%x 0x%x-0x%x>\n", SPP_ARGS(ifp), (u_long)ntohl (ch->type), (u_long)ch->par1, (u_long)ch->par2, (u_int)ch->rel, (u_int)ch->time0, (u_int)ch->time1); if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3)) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } /* * PPP protocol implementation. */ /* * Send PPP control protocol packet. */ static void sppp_cp_send(struct sppp *sp, u_short proto, u_char type, u_char ident, u_short len, void *data) { STDDCL; struct ppp_header *h; struct lcp_header *lh; struct mbuf *m; if (len > MHLEN - PPP_HEADER_LEN - LCP_HEADER_LEN) len = MHLEN - PPP_HEADER_LEN - LCP_HEADER_LEN; MGETHDR (m, M_NOWAIT, MT_DATA); if (! m) return; m->m_pkthdr.len = m->m_len = PPP_HEADER_LEN + LCP_HEADER_LEN + len; m->m_pkthdr.rcvif = 0; h = mtod (m, struct ppp_header*); h->address = PPP_ALLSTATIONS; /* broadcast address */ h->control = PPP_UI; /* Unnumbered Info */ h->protocol = htons (proto); /* Link Control Protocol */ lh = (struct lcp_header*) (h + 1); lh->type = type; lh->ident = ident; lh->len = htons (LCP_HEADER_LEN + len); if (len) bcopy (data, lh+1, len); if (debug) { log(LOG_DEBUG, SPP_FMT "%s output <%s id=0x%x len=%d", SPP_ARGS(ifp), sppp_proto_name(proto), sppp_cp_type_name (lh->type), lh->ident, ntohs (lh->len)); sppp_print_bytes ((u_char*) (lh+1), len); log(-1, ">\n"); } if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3)) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } /* * Handle incoming PPP control protocol packets. */ static void sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m) { STDDCL; struct lcp_header *h; int len = m->m_pkthdr.len; int rv; u_char *p; if (len < 4) { if (debug) log(LOG_DEBUG, SPP_FMT "%s invalid packet length: %d bytes\n", SPP_ARGS(ifp), cp->name, len); return; } h = mtod (m, struct lcp_header*); if (debug) { log(LOG_DEBUG, SPP_FMT "%s input(%s): <%s id=0x%x len=%d", SPP_ARGS(ifp), cp->name, sppp_state_name(sp->state[cp->protoidx]), sppp_cp_type_name (h->type), h->ident, ntohs (h->len)); sppp_print_bytes ((u_char*) (h+1), len-4); log(-1, ">\n"); } if (len > ntohs (h->len)) len = ntohs (h->len); p = (u_char *)(h + 1); switch (h->type) { case CONF_REQ: if (len < 4) { if (debug) log(-1, SPP_FMT "%s invalid conf-req length %d\n", SPP_ARGS(ifp), cp->name, len); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); break; } /* handle states where RCR doesn't get a SCA/SCN */ switch (sp->state[cp->protoidx]) { case STATE_CLOSING: case STATE_STOPPING: return; case STATE_CLOSED: sppp_cp_send(sp, cp->proto, TERM_ACK, h->ident, 0, 0); return; } rv = (cp->RCR)(sp, h, len); switch (sp->state[cp->protoidx]) { case STATE_OPENED: (cp->tld)(sp); (cp->scr)(sp); /* FALLTHROUGH */ case STATE_ACK_SENT: case STATE_REQ_SENT: /* * sppp_cp_change_state() have the side effect of * restarting the timeouts. We want to avoid that * if the state don't change, otherwise we won't * ever timeout and resend a configuration request * that got lost. */ if (sp->state[cp->protoidx] == (rv ? STATE_ACK_SENT: STATE_REQ_SENT)) break; sppp_cp_change_state(cp, sp, rv? STATE_ACK_SENT: STATE_REQ_SENT); break; case STATE_STOPPED: sp->rst_counter[cp->protoidx] = sp->lcp.max_configure; (cp->scr)(sp); sppp_cp_change_state(cp, sp, rv? STATE_ACK_SENT: STATE_REQ_SENT); break; case STATE_ACK_RCVD: if (rv) { sppp_cp_change_state(cp, sp, STATE_OPENED); if (debug) log(LOG_DEBUG, SPP_FMT "%s tlu\n", SPP_ARGS(ifp), cp->name); (cp->tlu)(sp); } else sppp_cp_change_state(cp, sp, STATE_ACK_RCVD); break; default: printf(SPP_FMT "%s illegal %s in state %s\n", SPP_ARGS(ifp), cp->name, sppp_cp_type_name(h->type), sppp_state_name(sp->state[cp->protoidx])); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } break; case CONF_ACK: if (h->ident != sp->confid[cp->protoidx]) { if (debug) log(-1, SPP_FMT "%s id mismatch 0x%x != 0x%x\n", SPP_ARGS(ifp), cp->name, h->ident, sp->confid[cp->protoidx]); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); break; } switch (sp->state[cp->protoidx]) { case STATE_CLOSED: case STATE_STOPPED: sppp_cp_send(sp, cp->proto, TERM_ACK, h->ident, 0, 0); break; case STATE_CLOSING: case STATE_STOPPING: break; case STATE_REQ_SENT: sp->rst_counter[cp->protoidx] = sp->lcp.max_configure; sppp_cp_change_state(cp, sp, STATE_ACK_RCVD); break; case STATE_OPENED: (cp->tld)(sp); /* FALLTHROUGH */ case STATE_ACK_RCVD: (cp->scr)(sp); sppp_cp_change_state(cp, sp, STATE_REQ_SENT); break; case STATE_ACK_SENT: sp->rst_counter[cp->protoidx] = sp->lcp.max_configure; sppp_cp_change_state(cp, sp, STATE_OPENED); if (debug) log(LOG_DEBUG, SPP_FMT "%s tlu\n", SPP_ARGS(ifp), cp->name); (cp->tlu)(sp); break; default: printf(SPP_FMT "%s illegal %s in state %s\n", SPP_ARGS(ifp), cp->name, sppp_cp_type_name(h->type), sppp_state_name(sp->state[cp->protoidx])); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } break; case CONF_NAK: case CONF_REJ: if (h->ident != sp->confid[cp->protoidx]) { if (debug) log(-1, SPP_FMT "%s id mismatch 0x%x != 0x%x\n", SPP_ARGS(ifp), cp->name, h->ident, sp->confid[cp->protoidx]); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); break; } if (h->type == CONF_NAK) (cp->RCN_nak)(sp, h, len); else /* CONF_REJ */ (cp->RCN_rej)(sp, h, len); switch (sp->state[cp->protoidx]) { case STATE_CLOSED: case STATE_STOPPED: sppp_cp_send(sp, cp->proto, TERM_ACK, h->ident, 0, 0); break; case STATE_REQ_SENT: case STATE_ACK_SENT: sp->rst_counter[cp->protoidx] = sp->lcp.max_configure; /* * Slow things down a bit if we think we might be * in loopback. Depend on the timeout to send the * next configuration request. */ if (sp->pp_loopcnt) break; (cp->scr)(sp); break; case STATE_OPENED: (cp->tld)(sp); /* FALLTHROUGH */ case STATE_ACK_RCVD: sppp_cp_change_state(cp, sp, STATE_REQ_SENT); (cp->scr)(sp); break; case STATE_CLOSING: case STATE_STOPPING: break; default: printf(SPP_FMT "%s illegal %s in state %s\n", SPP_ARGS(ifp), cp->name, sppp_cp_type_name(h->type), sppp_state_name(sp->state[cp->protoidx])); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } break; case TERM_REQ: switch (sp->state[cp->protoidx]) { case STATE_ACK_RCVD: case STATE_ACK_SENT: sppp_cp_change_state(cp, sp, STATE_REQ_SENT); /* FALLTHROUGH */ case STATE_CLOSED: case STATE_STOPPED: case STATE_CLOSING: case STATE_STOPPING: case STATE_REQ_SENT: sta: /* Send Terminate-Ack packet. */ if (debug) log(LOG_DEBUG, SPP_FMT "%s send terminate-ack\n", SPP_ARGS(ifp), cp->name); sppp_cp_send(sp, cp->proto, TERM_ACK, h->ident, 0, 0); break; case STATE_OPENED: (cp->tld)(sp); sp->rst_counter[cp->protoidx] = 0; sppp_cp_change_state(cp, sp, STATE_STOPPING); goto sta; break; default: printf(SPP_FMT "%s illegal %s in state %s\n", SPP_ARGS(ifp), cp->name, sppp_cp_type_name(h->type), sppp_state_name(sp->state[cp->protoidx])); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } break; case TERM_ACK: switch (sp->state[cp->protoidx]) { case STATE_CLOSED: case STATE_STOPPED: case STATE_REQ_SENT: case STATE_ACK_SENT: break; case STATE_CLOSING: sppp_cp_change_state(cp, sp, STATE_CLOSED); (cp->tlf)(sp); break; case STATE_STOPPING: sppp_cp_change_state(cp, sp, STATE_STOPPED); (cp->tlf)(sp); break; case STATE_ACK_RCVD: sppp_cp_change_state(cp, sp, STATE_REQ_SENT); break; case STATE_OPENED: (cp->tld)(sp); (cp->scr)(sp); sppp_cp_change_state(cp, sp, STATE_ACK_RCVD); break; default: printf(SPP_FMT "%s illegal %s in state %s\n", SPP_ARGS(ifp), cp->name, sppp_cp_type_name(h->type), sppp_state_name(sp->state[cp->protoidx])); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } break; case CODE_REJ: /* XXX catastrophic rejects (RXJ-) aren't handled yet. */ log(LOG_INFO, SPP_FMT "%s: ignoring RXJ (%s) for proto 0x%x, " "danger will robinson\n", SPP_ARGS(ifp), cp->name, sppp_cp_type_name(h->type), ntohs(*((u_short *)p))); switch (sp->state[cp->protoidx]) { case STATE_CLOSED: case STATE_STOPPED: case STATE_REQ_SENT: case STATE_ACK_SENT: case STATE_CLOSING: case STATE_STOPPING: case STATE_OPENED: break; case STATE_ACK_RCVD: sppp_cp_change_state(cp, sp, STATE_REQ_SENT); break; default: printf(SPP_FMT "%s illegal %s in state %s\n", SPP_ARGS(ifp), cp->name, sppp_cp_type_name(h->type), sppp_state_name(sp->state[cp->protoidx])); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } break; case PROTO_REJ: { int catastrophic; const struct cp *upper; int i; u_int16_t proto; catastrophic = 0; upper = NULL; proto = ntohs(*((u_int16_t *)p)); for (i = 0; i < IDX_COUNT; i++) { if (cps[i]->proto == proto) { upper = cps[i]; break; } } if (upper == NULL) catastrophic++; if (catastrophic || debug) log(catastrophic? LOG_INFO: LOG_DEBUG, SPP_FMT "%s: RXJ%c (%s) for proto 0x%x (%s/%s)\n", SPP_ARGS(ifp), cp->name, catastrophic ? '-' : '+', sppp_cp_type_name(h->type), proto, upper ? upper->name : "unknown", upper ? sppp_state_name(sp->state[upper->protoidx]) : "?"); /* * if we got RXJ+ against conf-req, the peer does not implement * this particular protocol type. terminate the protocol. */ if (upper && !catastrophic) { if (sp->state[upper->protoidx] == STATE_REQ_SENT) { upper->Close(sp); break; } } /* XXX catastrophic rejects (RXJ-) aren't handled yet. */ switch (sp->state[cp->protoidx]) { case STATE_CLOSED: case STATE_STOPPED: case STATE_REQ_SENT: case STATE_ACK_SENT: case STATE_CLOSING: case STATE_STOPPING: case STATE_OPENED: break; case STATE_ACK_RCVD: sppp_cp_change_state(cp, sp, STATE_REQ_SENT); break; default: printf(SPP_FMT "%s illegal %s in state %s\n", SPP_ARGS(ifp), cp->name, sppp_cp_type_name(h->type), sppp_state_name(sp->state[cp->protoidx])); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } break; } case DISC_REQ: if (cp->proto != PPP_LCP) goto illegal; /* Discard the packet. */ break; case ECHO_REQ: if (cp->proto != PPP_LCP) goto illegal; if (sp->state[cp->protoidx] != STATE_OPENED) { if (debug) log(-1, SPP_FMT "lcp echo req but lcp closed\n", SPP_ARGS(ifp)); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); break; } if (len < 8) { if (debug) log(-1, SPP_FMT "invalid lcp echo request " "packet length: %d bytes\n", SPP_ARGS(ifp), len); break; } if ((sp->lcp.opts & (1 << LCP_OPT_MAGIC)) && ntohl (*(long*)(h+1)) == sp->lcp.magic) { /* Line loopback mode detected. */ printf(SPP_FMT "loopback\n", SPP_ARGS(ifp)); sp->pp_loopcnt = MAXALIVECNT * 5; if_down (ifp); sppp_qflush (&sp->pp_cpq); /* Shut down the PPP link. */ /* XXX */ lcp.Down(sp); lcp.Up(sp); break; } *(long*)(h+1) = htonl (sp->lcp.magic); if (debug) log(-1, SPP_FMT "got lcp echo req, sending echo rep\n", SPP_ARGS(ifp)); sppp_cp_send (sp, PPP_LCP, ECHO_REPLY, h->ident, len-4, h+1); break; case ECHO_REPLY: if (cp->proto != PPP_LCP) goto illegal; if (h->ident != sp->lcp.echoid) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); break; } if (len < 8) { if (debug) log(-1, SPP_FMT "lcp invalid echo reply " "packet length: %d bytes\n", SPP_ARGS(ifp), len); break; } if (debug) log(-1, SPP_FMT "lcp got echo rep\n", SPP_ARGS(ifp)); if (!(sp->lcp.opts & (1 << LCP_OPT_MAGIC)) || ntohl (*(long*)(h+1)) != sp->lcp.magic) sp->pp_alivecnt = 0; break; default: /* Unknown packet type -- send Code-Reject packet. */ illegal: if (debug) log(-1, SPP_FMT "%s send code-rej for 0x%x\n", SPP_ARGS(ifp), cp->name, h->type); sppp_cp_send(sp, cp->proto, CODE_REJ, ++sp->pp_seq[cp->protoidx], m->m_pkthdr.len, h); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } } /* * The generic part of all Up/Down/Open/Close/TO event handlers. * Basically, the state transition handling in the automaton. */ static void sppp_up_event(const struct cp *cp, struct sppp *sp) { STDDCL; if (debug) log(LOG_DEBUG, SPP_FMT "%s up(%s)\n", SPP_ARGS(ifp), cp->name, sppp_state_name(sp->state[cp->protoidx])); switch (sp->state[cp->protoidx]) { case STATE_INITIAL: sppp_cp_change_state(cp, sp, STATE_CLOSED); break; case STATE_STARTING: sp->rst_counter[cp->protoidx] = sp->lcp.max_configure; (cp->scr)(sp); sppp_cp_change_state(cp, sp, STATE_REQ_SENT); break; default: printf(SPP_FMT "%s illegal up in state %s\n", SPP_ARGS(ifp), cp->name, sppp_state_name(sp->state[cp->protoidx])); } } static void sppp_down_event(const struct cp *cp, struct sppp *sp) { STDDCL; if (debug) log(LOG_DEBUG, SPP_FMT "%s down(%s)\n", SPP_ARGS(ifp), cp->name, sppp_state_name(sp->state[cp->protoidx])); switch (sp->state[cp->protoidx]) { case STATE_CLOSED: case STATE_CLOSING: sppp_cp_change_state(cp, sp, STATE_INITIAL); break; case STATE_STOPPED: sppp_cp_change_state(cp, sp, STATE_STARTING); (cp->tls)(sp); break; case STATE_STOPPING: case STATE_REQ_SENT: case STATE_ACK_RCVD: case STATE_ACK_SENT: sppp_cp_change_state(cp, sp, STATE_STARTING); break; case STATE_OPENED: (cp->tld)(sp); sppp_cp_change_state(cp, sp, STATE_STARTING); break; default: printf(SPP_FMT "%s illegal down in state %s\n", SPP_ARGS(ifp), cp->name, sppp_state_name(sp->state[cp->protoidx])); } } static void sppp_open_event(const struct cp *cp, struct sppp *sp) { STDDCL; if (debug) log(LOG_DEBUG, SPP_FMT "%s open(%s)\n", SPP_ARGS(ifp), cp->name, sppp_state_name(sp->state[cp->protoidx])); switch (sp->state[cp->protoidx]) { case STATE_INITIAL: sppp_cp_change_state(cp, sp, STATE_STARTING); (cp->tls)(sp); break; case STATE_STARTING: break; case STATE_CLOSED: sp->rst_counter[cp->protoidx] = sp->lcp.max_configure; (cp->scr)(sp); sppp_cp_change_state(cp, sp, STATE_REQ_SENT); break; case STATE_STOPPED: /* * Try escaping stopped state. This seems to bite * people occasionally, in particular for IPCP, * presumably following previous IPCP negotiation * aborts. Somehow, we must have missed a Down event * which would have caused a transition into starting * state, so as a bandaid we force the Down event now. * This effectively implements (something like the) * `restart' option mentioned in the state transition * table of RFC 1661. */ sppp_cp_change_state(cp, sp, STATE_STARTING); (cp->tls)(sp); break; case STATE_STOPPING: case STATE_REQ_SENT: case STATE_ACK_RCVD: case STATE_ACK_SENT: case STATE_OPENED: break; case STATE_CLOSING: sppp_cp_change_state(cp, sp, STATE_STOPPING); break; } } static void sppp_close_event(const struct cp *cp, struct sppp *sp) { STDDCL; if (debug) log(LOG_DEBUG, SPP_FMT "%s close(%s)\n", SPP_ARGS(ifp), cp->name, sppp_state_name(sp->state[cp->protoidx])); switch (sp->state[cp->protoidx]) { case STATE_INITIAL: case STATE_CLOSED: case STATE_CLOSING: break; case STATE_STARTING: sppp_cp_change_state(cp, sp, STATE_INITIAL); (cp->tlf)(sp); break; case STATE_STOPPED: sppp_cp_change_state(cp, sp, STATE_CLOSED); break; case STATE_STOPPING: sppp_cp_change_state(cp, sp, STATE_CLOSING); break; case STATE_OPENED: (cp->tld)(sp); /* FALLTHROUGH */ case STATE_REQ_SENT: case STATE_ACK_RCVD: case STATE_ACK_SENT: sp->rst_counter[cp->protoidx] = sp->lcp.max_terminate; sppp_cp_send(sp, cp->proto, TERM_REQ, ++sp->pp_seq[cp->protoidx], 0, 0); sppp_cp_change_state(cp, sp, STATE_CLOSING); break; } } static void sppp_to_event(const struct cp *cp, struct sppp *sp) { STDDCL; SPPP_LOCK(sp); if (debug) log(LOG_DEBUG, SPP_FMT "%s TO(%s) rst_counter = %d\n", SPP_ARGS(ifp), cp->name, sppp_state_name(sp->state[cp->protoidx]), sp->rst_counter[cp->protoidx]); if (--sp->rst_counter[cp->protoidx] < 0) /* TO- event */ switch (sp->state[cp->protoidx]) { case STATE_CLOSING: sppp_cp_change_state(cp, sp, STATE_CLOSED); (cp->tlf)(sp); break; case STATE_STOPPING: sppp_cp_change_state(cp, sp, STATE_STOPPED); (cp->tlf)(sp); break; case STATE_REQ_SENT: case STATE_ACK_RCVD: case STATE_ACK_SENT: sppp_cp_change_state(cp, sp, STATE_STOPPED); (cp->tlf)(sp); break; } else /* TO+ event */ switch (sp->state[cp->protoidx]) { case STATE_CLOSING: case STATE_STOPPING: sppp_cp_send(sp, cp->proto, TERM_REQ, ++sp->pp_seq[cp->protoidx], 0, 0); callout_reset(&sp->ch[cp->protoidx], sp->lcp.timeout, cp->TO, (void *)sp); break; case STATE_REQ_SENT: case STATE_ACK_RCVD: (cp->scr)(sp); /* sppp_cp_change_state() will restart the timer */ sppp_cp_change_state(cp, sp, STATE_REQ_SENT); break; case STATE_ACK_SENT: (cp->scr)(sp); callout_reset(&sp->ch[cp->protoidx], sp->lcp.timeout, cp->TO, (void *)sp); break; } SPPP_UNLOCK(sp); } /* * Change the state of a control protocol in the state automaton. * Takes care of starting/stopping the restart timer. */ static void sppp_cp_change_state(const struct cp *cp, struct sppp *sp, int newstate) { sp->state[cp->protoidx] = newstate; callout_stop (&sp->ch[cp->protoidx]); switch (newstate) { case STATE_INITIAL: case STATE_STARTING: case STATE_CLOSED: case STATE_STOPPED: case STATE_OPENED: break; case STATE_CLOSING: case STATE_STOPPING: case STATE_REQ_SENT: case STATE_ACK_RCVD: case STATE_ACK_SENT: callout_reset(&sp->ch[cp->protoidx], sp->lcp.timeout, cp->TO, (void *)sp); break; } } /* *--------------------------------------------------------------------------* * * * The LCP implementation. * * * *--------------------------------------------------------------------------* */ static void sppp_pp_up(struct sppp *sp) { SPPP_LOCK(sp); lcp.Up(sp); SPPP_UNLOCK(sp); } static void sppp_pp_down(struct sppp *sp) { SPPP_LOCK(sp); lcp.Down(sp); SPPP_UNLOCK(sp); } static void sppp_lcp_init(struct sppp *sp) { sp->lcp.opts = (1 << LCP_OPT_MAGIC); sp->lcp.magic = 0; sp->state[IDX_LCP] = STATE_INITIAL; sp->fail_counter[IDX_LCP] = 0; sp->pp_seq[IDX_LCP] = 0; sp->pp_rseq[IDX_LCP] = 0; sp->lcp.protos = 0; sp->lcp.mru = sp->lcp.their_mru = PP_MTU; /* Note that these values are relevant for all control protocols */ sp->lcp.timeout = 3 * hz; sp->lcp.max_terminate = 2; sp->lcp.max_configure = 10; sp->lcp.max_failure = 10; callout_init(&sp->ch[IDX_LCP], 1); } static void sppp_lcp_up(struct sppp *sp) { STDDCL; sp->pp_alivecnt = 0; sp->lcp.opts = (1 << LCP_OPT_MAGIC); sp->lcp.magic = 0; sp->lcp.protos = 0; sp->lcp.mru = sp->lcp.their_mru = PP_MTU; /* * If we are authenticator, negotiate LCP_AUTH */ if (sp->hisauth.proto != 0) sp->lcp.opts |= (1 << LCP_OPT_AUTH_PROTO); else sp->lcp.opts &= ~(1 << LCP_OPT_AUTH_PROTO); sp->pp_flags &= ~PP_NEEDAUTH; /* * If this interface is passive or dial-on-demand, and we are * still in Initial state, it means we've got an incoming * call. Activate the interface. */ if ((ifp->if_flags & (IFF_AUTO | IFF_PASSIVE)) != 0) { if (debug) log(LOG_DEBUG, SPP_FMT "Up event", SPP_ARGS(ifp)); ifp->if_drv_flags |= IFF_DRV_RUNNING; if (sp->state[IDX_LCP] == STATE_INITIAL) { if (debug) log(-1, "(incoming call)\n"); sp->pp_flags |= PP_CALLIN; lcp.Open(sp); } else if (debug) log(-1, "\n"); } else if ((ifp->if_flags & (IFF_AUTO | IFF_PASSIVE)) == 0 && (sp->state[IDX_LCP] == STATE_INITIAL)) { ifp->if_drv_flags |= IFF_DRV_RUNNING; lcp.Open(sp); } sppp_up_event(&lcp, sp); } static void sppp_lcp_down(struct sppp *sp) { STDDCL; sppp_down_event(&lcp, sp); /* * If this is neither a dial-on-demand nor a passive * interface, simulate an ``ifconfig down'' action, so the * administrator can force a redial by another ``ifconfig * up''. XXX For leased line operation, should we immediately * try to reopen the connection here? */ if ((ifp->if_flags & (IFF_AUTO | IFF_PASSIVE)) == 0) { log(LOG_INFO, SPP_FMT "Down event, taking interface down.\n", SPP_ARGS(ifp)); if_down(ifp); } else { if (debug) log(LOG_DEBUG, SPP_FMT "Down event (carrier loss)\n", SPP_ARGS(ifp)); sp->pp_flags &= ~PP_CALLIN; if (sp->state[IDX_LCP] != STATE_INITIAL) lcp.Close(sp); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; } } static void sppp_lcp_open(struct sppp *sp) { sppp_open_event(&lcp, sp); } static void sppp_lcp_close(struct sppp *sp) { sppp_close_event(&lcp, sp); } static void sppp_lcp_TO(void *cookie) { sppp_to_event(&lcp, (struct sppp *)cookie); } /* * Analyze a configure request. Return true if it was agreeable, and * caused action sca, false if it has been rejected or nak'ed, and * caused action scn. (The return value is used to make the state * transition decision in the state automaton.) */ static int sppp_lcp_RCR(struct sppp *sp, struct lcp_header *h, int len) { STDDCL; u_char *buf, *r, *p; int origlen, rlen; u_long nmagic; u_short authproto; len -= 4; origlen = len; buf = r = malloc (len, M_TEMP, M_NOWAIT); if (! buf) return (0); if (debug) log(LOG_DEBUG, SPP_FMT "lcp parse opts: ", SPP_ARGS(ifp)); /* pass 1: check for things that need to be rejected */ p = (void*) (h+1); for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1]; len-=p[1], p+=p[1]) { if (debug) log(-1, " %s ", sppp_lcp_opt_name(*p)); switch (*p) { case LCP_OPT_MAGIC: /* Magic number. */ if (len >= 6 && p[1] == 6) continue; if (debug) log(-1, "[invalid] "); break; case LCP_OPT_ASYNC_MAP: /* Async control character map. */ if (len >= 6 && p[1] == 6) continue; if (debug) log(-1, "[invalid] "); break; case LCP_OPT_MRU: /* Maximum receive unit. */ if (len >= 4 && p[1] == 4) continue; if (debug) log(-1, "[invalid] "); break; case LCP_OPT_AUTH_PROTO: if (len < 4) { if (debug) log(-1, "[invalid] "); break; } authproto = (p[2] << 8) + p[3]; if (authproto == PPP_CHAP && p[1] != 5) { if (debug) log(-1, "[invalid chap len] "); break; } if (sp->myauth.proto == 0) { /* we are not configured to do auth */ if (debug) log(-1, "[not configured] "); break; } /* * Remote want us to authenticate, remember this, * so we stay in PHASE_AUTHENTICATE after LCP got * up. */ sp->pp_flags |= PP_NEEDAUTH; continue; default: /* Others not supported. */ if (debug) log(-1, "[rej] "); break; } /* Add the option to rejected list. */ bcopy (p, r, p[1]); r += p[1]; rlen += p[1]; } if (rlen) { if (debug) log(-1, " send conf-rej\n"); sppp_cp_send (sp, PPP_LCP, CONF_REJ, h->ident, rlen, buf); return 0; } else if (debug) log(-1, "\n"); /* * pass 2: check for option values that are unacceptable and * thus require to be nak'ed. */ if (debug) log(LOG_DEBUG, SPP_FMT "lcp parse opt values: ", SPP_ARGS(ifp)); p = (void*) (h+1); len = origlen; for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1]; len-=p[1], p+=p[1]) { if (debug) log(-1, " %s ", sppp_lcp_opt_name(*p)); switch (*p) { case LCP_OPT_MAGIC: /* Magic number -- extract. */ nmagic = (u_long)p[2] << 24 | (u_long)p[3] << 16 | p[4] << 8 | p[5]; if (nmagic != sp->lcp.magic) { sp->pp_loopcnt = 0; if (debug) log(-1, "0x%lx ", nmagic); continue; } if (debug && sp->pp_loopcnt < MAXALIVECNT*5) log(-1, "[glitch] "); ++sp->pp_loopcnt; /* * We negate our magic here, and NAK it. If * we see it later in an NAK packet, we * suggest a new one. */ nmagic = ~sp->lcp.magic; /* Gonna NAK it. */ p[2] = nmagic >> 24; p[3] = nmagic >> 16; p[4] = nmagic >> 8; p[5] = nmagic; break; case LCP_OPT_ASYNC_MAP: /* * Async control character map -- just ignore it. * * Quote from RFC 1662, chapter 6: * To enable this functionality, synchronous PPP * implementations MUST always respond to the * Async-Control-Character-Map Configuration * Option with the LCP Configure-Ack. However, * acceptance of the Configuration Option does * not imply that the synchronous implementation * will do any ACCM mapping. Instead, all such * octet mapping will be performed by the * asynchronous-to-synchronous converter. */ continue; case LCP_OPT_MRU: /* * Maximum receive unit. Always agreeable, * but ignored by now. */ sp->lcp.their_mru = p[2] * 256 + p[3]; if (debug) log(-1, "%lu ", sp->lcp.their_mru); continue; case LCP_OPT_AUTH_PROTO: authproto = (p[2] << 8) + p[3]; if (sp->myauth.proto != authproto) { /* not agreed, nak */ if (debug) log(-1, "[mine %s != his %s] ", sppp_proto_name(sp->hisauth.proto), sppp_proto_name(authproto)); p[2] = sp->myauth.proto >> 8; p[3] = sp->myauth.proto; break; } if (authproto == PPP_CHAP && p[4] != CHAP_MD5) { if (debug) log(-1, "[chap not MD5] "); p[4] = CHAP_MD5; break; } continue; } /* Add the option to nak'ed list. */ bcopy (p, r, p[1]); r += p[1]; rlen += p[1]; } if (rlen) { /* * Local and remote magics equal -- loopback? */ if (sp->pp_loopcnt >= MAXALIVECNT*5) { if (sp->pp_loopcnt == MAXALIVECNT*5) printf (SPP_FMT "loopback\n", SPP_ARGS(ifp)); if (ifp->if_flags & IFF_UP) { if_down(ifp); sppp_qflush(&sp->pp_cpq); /* XXX ? */ lcp.Down(sp); lcp.Up(sp); } } else if (!sp->pp_loopcnt && ++sp->fail_counter[IDX_LCP] >= sp->lcp.max_failure) { if (debug) log(-1, " max_failure (%d) exceeded, " "send conf-rej\n", sp->lcp.max_failure); sppp_cp_send(sp, PPP_LCP, CONF_REJ, h->ident, rlen, buf); } else { if (debug) log(-1, " send conf-nak\n"); sppp_cp_send (sp, PPP_LCP, CONF_NAK, h->ident, rlen, buf); } } else { if (debug) log(-1, " send conf-ack\n"); sp->fail_counter[IDX_LCP] = 0; sp->pp_loopcnt = 0; sppp_cp_send (sp, PPP_LCP, CONF_ACK, h->ident, origlen, h+1); } free (buf, M_TEMP); return (rlen == 0); } /* * Analyze the LCP Configure-Reject option list, and adjust our * negotiation. */ static void sppp_lcp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len) { STDDCL; u_char *buf, *p; len -= 4; buf = malloc (len, M_TEMP, M_NOWAIT); if (!buf) return; if (debug) log(LOG_DEBUG, SPP_FMT "lcp rej opts: ", SPP_ARGS(ifp)); p = (void*) (h+1); for (; len >= 2 && p[1] >= 2 && len >= p[1]; len -= p[1], p += p[1]) { if (debug) log(-1, " %s ", sppp_lcp_opt_name(*p)); switch (*p) { case LCP_OPT_MAGIC: /* Magic number -- can't use it, use 0 */ sp->lcp.opts &= ~(1 << LCP_OPT_MAGIC); sp->lcp.magic = 0; break; case LCP_OPT_MRU: /* * Should not be rejected anyway, since we only * negotiate a MRU if explicitly requested by * peer. */ sp->lcp.opts &= ~(1 << LCP_OPT_MRU); break; case LCP_OPT_AUTH_PROTO: /* * Peer doesn't want to authenticate himself, * deny unless this is a dialout call, and * AUTHFLAG_NOCALLOUT is set. */ if ((sp->pp_flags & PP_CALLIN) == 0 && (sp->hisauth.flags & AUTHFLAG_NOCALLOUT) != 0) { if (debug) log(-1, "[don't insist on auth " "for callout]"); sp->lcp.opts &= ~(1 << LCP_OPT_AUTH_PROTO); break; } if (debug) log(-1, "[access denied]\n"); lcp.Close(sp); break; } } if (debug) log(-1, "\n"); free (buf, M_TEMP); return; } /* * Analyze the LCP Configure-NAK option list, and adjust our * negotiation. */ static void sppp_lcp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len) { STDDCL; u_char *buf, *p; u_long magic; len -= 4; buf = malloc (len, M_TEMP, M_NOWAIT); if (!buf) return; if (debug) log(LOG_DEBUG, SPP_FMT "lcp nak opts: ", SPP_ARGS(ifp)); p = (void*) (h+1); for (; len >= 2 && p[1] >= 2 && len >= p[1]; len -= p[1], p += p[1]) { if (debug) log(-1, " %s ", sppp_lcp_opt_name(*p)); switch (*p) { case LCP_OPT_MAGIC: /* Magic number -- renegotiate */ if ((sp->lcp.opts & (1 << LCP_OPT_MAGIC)) && len >= 6 && p[1] == 6) { magic = (u_long)p[2] << 24 | (u_long)p[3] << 16 | p[4] << 8 | p[5]; /* * If the remote magic is our negated one, * this looks like a loopback problem. * Suggest a new magic to make sure. */ if (magic == ~sp->lcp.magic) { if (debug) log(-1, "magic glitch "); sp->lcp.magic = random(); } else { sp->lcp.magic = magic; if (debug) log(-1, "%lu ", magic); } } break; case LCP_OPT_MRU: /* * Peer wants to advise us to negotiate an MRU. * Agree on it if it's reasonable, or use * default otherwise. */ if (len >= 4 && p[1] == 4) { u_int mru = p[2] * 256 + p[3]; if (debug) log(-1, "%d ", mru); if (mru < PP_MTU || mru > PP_MAX_MRU) mru = PP_MTU; sp->lcp.mru = mru; sp->lcp.opts |= (1 << LCP_OPT_MRU); } break; case LCP_OPT_AUTH_PROTO: /* * Peer doesn't like our authentication method, * deny. */ if (debug) log(-1, "[access denied]\n"); lcp.Close(sp); break; } } if (debug) log(-1, "\n"); free (buf, M_TEMP); return; } static void sppp_lcp_tlu(struct sppp *sp) { STDDCL; int i; u_long mask; /* XXX ? */ if (! (ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { /* Coming out of loopback mode. */ if_up(ifp); printf (SPP_FMT "up\n", SPP_ARGS(ifp)); } for (i = 0; i < IDX_COUNT; i++) if ((cps[i])->flags & CP_QUAL) (cps[i])->Open(sp); if ((sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) != 0 || (sp->pp_flags & PP_NEEDAUTH) != 0) sp->pp_phase = PHASE_AUTHENTICATE; else sp->pp_phase = PHASE_NETWORK; if (debug) log(LOG_DEBUG, SPP_FMT "phase %s\n", SPP_ARGS(ifp), sppp_phase_name(sp->pp_phase)); /* * Open all authentication protocols. This is even required * if we already proceeded to network phase, since it might be * that remote wants us to authenticate, so we might have to * send a PAP request. Undesired authentication protocols * don't do anything when they get an Open event. */ for (i = 0; i < IDX_COUNT; i++) if ((cps[i])->flags & CP_AUTH) (cps[i])->Open(sp); if (sp->pp_phase == PHASE_NETWORK) { /* Notify all NCPs. */ for (i = 0; i < IDX_COUNT; i++) if (((cps[i])->flags & CP_NCP) && /* * XXX * Hack to administratively disable IPv6 if * not desired. Perhaps we should have another * flag for this, but right now, we can make * all struct cp's read/only. */ (cps[i] != &ipv6cp || (sp->confflags & CONF_ENABLE_IPV6))) (cps[i])->Open(sp); } /* Send Up events to all started protos. */ for (i = 0, mask = 1; i < IDX_COUNT; i++, mask <<= 1) if ((sp->lcp.protos & mask) && ((cps[i])->flags & CP_LCP) == 0) (cps[i])->Up(sp); /* notify low-level driver of state change */ if (sp->pp_chg) sp->pp_chg(sp, (int)sp->pp_phase); if (sp->pp_phase == PHASE_NETWORK) /* if no NCP is starting, close down */ sppp_lcp_check_and_close(sp); } static void sppp_lcp_tld(struct sppp *sp) { STDDCL; int i; u_long mask; sp->pp_phase = PHASE_TERMINATE; if (debug) log(LOG_DEBUG, SPP_FMT "phase %s\n", SPP_ARGS(ifp), sppp_phase_name(sp->pp_phase)); /* * Take upper layers down. We send the Down event first and * the Close second to prevent the upper layers from sending * ``a flurry of terminate-request packets'', as the RFC * describes it. */ for (i = 0, mask = 1; i < IDX_COUNT; i++, mask <<= 1) if ((sp->lcp.protos & mask) && ((cps[i])->flags & CP_LCP) == 0) { (cps[i])->Down(sp); (cps[i])->Close(sp); } } static void sppp_lcp_tls(struct sppp *sp) { STDDCL; sp->pp_phase = PHASE_ESTABLISH; if (debug) log(LOG_DEBUG, SPP_FMT "phase %s\n", SPP_ARGS(ifp), sppp_phase_name(sp->pp_phase)); /* Notify lower layer if desired. */ if (sp->pp_tls) (sp->pp_tls)(sp); else (sp->pp_up)(sp); } static void sppp_lcp_tlf(struct sppp *sp) { STDDCL; sp->pp_phase = PHASE_DEAD; if (debug) log(LOG_DEBUG, SPP_FMT "phase %s\n", SPP_ARGS(ifp), sppp_phase_name(sp->pp_phase)); /* Notify lower layer if desired. */ if (sp->pp_tlf) (sp->pp_tlf)(sp); else (sp->pp_down)(sp); } static void sppp_lcp_scr(struct sppp *sp) { char opt[6 /* magicnum */ + 4 /* mru */ + 5 /* chap */]; int i = 0; u_short authproto; if (sp->lcp.opts & (1 << LCP_OPT_MAGIC)) { if (! sp->lcp.magic) sp->lcp.magic = random(); opt[i++] = LCP_OPT_MAGIC; opt[i++] = 6; opt[i++] = sp->lcp.magic >> 24; opt[i++] = sp->lcp.magic >> 16; opt[i++] = sp->lcp.magic >> 8; opt[i++] = sp->lcp.magic; } if (sp->lcp.opts & (1 << LCP_OPT_MRU)) { opt[i++] = LCP_OPT_MRU; opt[i++] = 4; opt[i++] = sp->lcp.mru >> 8; opt[i++] = sp->lcp.mru; } if (sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) { authproto = sp->hisauth.proto; opt[i++] = LCP_OPT_AUTH_PROTO; opt[i++] = authproto == PPP_CHAP? 5: 4; opt[i++] = authproto >> 8; opt[i++] = authproto; if (authproto == PPP_CHAP) opt[i++] = CHAP_MD5; } sp->confid[IDX_LCP] = ++sp->pp_seq[IDX_LCP]; sppp_cp_send (sp, PPP_LCP, CONF_REQ, sp->confid[IDX_LCP], i, &opt); } /* * Check the open NCPs, return true if at least one NCP is open. */ static int sppp_ncp_check(struct sppp *sp) { int i, mask; for (i = 0, mask = 1; i < IDX_COUNT; i++, mask <<= 1) if ((sp->lcp.protos & mask) && (cps[i])->flags & CP_NCP) return 1; return 0; } /* * Re-check the open NCPs and see if we should terminate the link. * Called by the NCPs during their tlf action handling. */ static void sppp_lcp_check_and_close(struct sppp *sp) { if (sp->pp_phase < PHASE_NETWORK) /* don't bother, we are already going down */ return; if (sppp_ncp_check(sp)) return; lcp.Close(sp); } /* *--------------------------------------------------------------------------* * * * The IPCP implementation. * * * *--------------------------------------------------------------------------* */ #ifdef INET static void sppp_ipcp_init(struct sppp *sp) { sp->ipcp.opts = 0; sp->ipcp.flags = 0; sp->state[IDX_IPCP] = STATE_INITIAL; sp->fail_counter[IDX_IPCP] = 0; sp->pp_seq[IDX_IPCP] = 0; sp->pp_rseq[IDX_IPCP] = 0; callout_init(&sp->ch[IDX_IPCP], 1); } static void sppp_ipcp_up(struct sppp *sp) { sppp_up_event(&ipcp, sp); } static void sppp_ipcp_down(struct sppp *sp) { sppp_down_event(&ipcp, sp); } static void sppp_ipcp_open(struct sppp *sp) { STDDCL; u_long myaddr, hisaddr; sp->ipcp.flags &= ~(IPCP_HISADDR_SEEN | IPCP_MYADDR_SEEN | IPCP_MYADDR_DYN | IPCP_VJ); sp->ipcp.opts = 0; sppp_get_ip_addrs(sp, &myaddr, &hisaddr, 0); /* * If we don't have his address, this probably means our * interface doesn't want to talk IP at all. (This could * be the case if somebody wants to speak only IPX, for * example.) Don't open IPCP in this case. */ if (hisaddr == 0L) { /* XXX this message should go away */ if (debug) log(LOG_DEBUG, SPP_FMT "ipcp_open(): no IP interface\n", SPP_ARGS(ifp)); return; } if (myaddr == 0L) { /* * I don't have an assigned address, so i need to * negotiate my address. */ sp->ipcp.flags |= IPCP_MYADDR_DYN; sp->ipcp.opts |= (1 << IPCP_OPT_ADDRESS); } else sp->ipcp.flags |= IPCP_MYADDR_SEEN; if (sp->confflags & CONF_ENABLE_VJ) { sp->ipcp.opts |= (1 << IPCP_OPT_COMPRESSION); sp->ipcp.max_state = MAX_STATES - 1; sp->ipcp.compress_cid = 1; } sppp_open_event(&ipcp, sp); } static void sppp_ipcp_close(struct sppp *sp) { sppp_close_event(&ipcp, sp); if (sp->ipcp.flags & IPCP_MYADDR_DYN) /* * My address was dynamic, clear it again. */ sppp_set_ip_addr(sp, 0L); } static void sppp_ipcp_TO(void *cookie) { sppp_to_event(&ipcp, (struct sppp *)cookie); } /* * Analyze a configure request. Return true if it was agreeable, and * caused action sca, false if it has been rejected or nak'ed, and * caused action scn. (The return value is used to make the state * transition decision in the state automaton.) */ static int sppp_ipcp_RCR(struct sppp *sp, struct lcp_header *h, int len) { u_char *buf, *r, *p; struct ifnet *ifp = SP2IFP(sp); int rlen, origlen, debug = ifp->if_flags & IFF_DEBUG; u_long hisaddr, desiredaddr; int gotmyaddr = 0; int desiredcomp; len -= 4; origlen = len; /* * Make sure to allocate a buf that can at least hold a * conf-nak with an `address' option. We might need it below. */ buf = r = malloc ((len < 6? 6: len), M_TEMP, M_NOWAIT); if (! buf) return (0); /* pass 1: see if we can recognize them */ if (debug) log(LOG_DEBUG, SPP_FMT "ipcp parse opts: ", SPP_ARGS(ifp)); p = (void*) (h+1); for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1]; len-=p[1], p+=p[1]) { if (debug) log(-1, " %s ", sppp_ipcp_opt_name(*p)); switch (*p) { case IPCP_OPT_COMPRESSION: if (!(sp->confflags & CONF_ENABLE_VJ)) { /* VJ compression administratively disabled */ if (debug) log(-1, "[locally disabled] "); break; } /* * In theory, we should only conf-rej an * option that is shorter than RFC 1618 * requires (i.e. < 4), and should conf-nak * anything else that is not VJ. However, * since our algorithm always uses the * original option to NAK it with new values, * things would become more complicated. In * practice, the only commonly implemented IP * compression option is VJ anyway, so the * difference is negligible. */ if (len >= 6 && p[1] == 6) { /* * correctly formed compression option * that could be VJ compression */ continue; } if (debug) log(-1, "optlen %d [invalid/unsupported] ", p[1]); break; case IPCP_OPT_ADDRESS: if (len >= 6 && p[1] == 6) { /* correctly formed address option */ continue; } if (debug) log(-1, "[invalid] "); break; default: /* Others not supported. */ if (debug) log(-1, "[rej] "); break; } /* Add the option to rejected list. */ bcopy (p, r, p[1]); r += p[1]; rlen += p[1]; } if (rlen) { if (debug) log(-1, " send conf-rej\n"); sppp_cp_send (sp, PPP_IPCP, CONF_REJ, h->ident, rlen, buf); return 0; } else if (debug) log(-1, "\n"); /* pass 2: parse option values */ sppp_get_ip_addrs(sp, 0, &hisaddr, 0); if (debug) log(LOG_DEBUG, SPP_FMT "ipcp parse opt values: ", SPP_ARGS(ifp)); p = (void*) (h+1); len = origlen; for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1]; len-=p[1], p+=p[1]) { if (debug) log(-1, " %s ", sppp_ipcp_opt_name(*p)); switch (*p) { case IPCP_OPT_COMPRESSION: desiredcomp = p[2] << 8 | p[3]; /* We only support VJ */ if (desiredcomp == IPCP_COMP_VJ) { if (debug) log(-1, "VJ [ack] "); sp->ipcp.flags |= IPCP_VJ; sl_compress_init(sp->pp_comp, p[4]); sp->ipcp.max_state = p[4]; sp->ipcp.compress_cid = p[5]; continue; } if (debug) log(-1, "compproto %#04x [not supported] ", desiredcomp); p[2] = IPCP_COMP_VJ >> 8; p[3] = IPCP_COMP_VJ; p[4] = sp->ipcp.max_state; p[5] = sp->ipcp.compress_cid; break; case IPCP_OPT_ADDRESS: /* This is the address he wants in his end */ desiredaddr = p[2] << 24 | p[3] << 16 | p[4] << 8 | p[5]; if (desiredaddr == hisaddr || (hisaddr >= 1 && hisaddr <= 254 && desiredaddr != 0)) { /* * Peer's address is same as our value, * or we have set it to 0.0.0.* to * indicate that we do not really care, * this is agreeable. Gonna conf-ack * it. */ if (debug) log(-1, "%s [ack] ", sppp_dotted_quad(hisaddr)); /* record that we've seen it already */ sp->ipcp.flags |= IPCP_HISADDR_SEEN; continue; } /* * The address wasn't agreeable. This is either * he sent us 0.0.0.0, asking to assign him an * address, or he send us another address not * matching our value. Either case, we gonna * conf-nak it with our value. * XXX: we should "rej" if hisaddr == 0 */ if (debug) { if (desiredaddr == 0) log(-1, "[addr requested] "); else log(-1, "%s [not agreed] ", sppp_dotted_quad(desiredaddr)); } p[2] = hisaddr >> 24; p[3] = hisaddr >> 16; p[4] = hisaddr >> 8; p[5] = hisaddr; break; } /* Add the option to nak'ed list. */ bcopy (p, r, p[1]); r += p[1]; rlen += p[1]; } /* * If we are about to conf-ack the request, but haven't seen * his address so far, gonna conf-nak it instead, with the * `address' option present and our idea of his address being * filled in there, to request negotiation of both addresses. * * XXX This can result in an endless req - nak loop if peer * doesn't want to send us his address. Q: What should we do * about it? XXX A: implement the max-failure counter. */ if (rlen == 0 && !(sp->ipcp.flags & IPCP_HISADDR_SEEN) && !gotmyaddr) { buf[0] = IPCP_OPT_ADDRESS; buf[1] = 6; buf[2] = hisaddr >> 24; buf[3] = hisaddr >> 16; buf[4] = hisaddr >> 8; buf[5] = hisaddr; rlen = 6; if (debug) log(-1, "still need hisaddr "); } if (rlen) { if (debug) log(-1, " send conf-nak\n"); sppp_cp_send (sp, PPP_IPCP, CONF_NAK, h->ident, rlen, buf); } else { if (debug) log(-1, " send conf-ack\n"); sppp_cp_send (sp, PPP_IPCP, CONF_ACK, h->ident, origlen, h+1); } free (buf, M_TEMP); return (rlen == 0); } /* * Analyze the IPCP Configure-Reject option list, and adjust our * negotiation. */ static void sppp_ipcp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len) { u_char *buf, *p; struct ifnet *ifp = SP2IFP(sp); int debug = ifp->if_flags & IFF_DEBUG; len -= 4; buf = malloc (len, M_TEMP, M_NOWAIT); if (!buf) return; if (debug) log(LOG_DEBUG, SPP_FMT "ipcp rej opts: ", SPP_ARGS(ifp)); p = (void*) (h+1); for (; len >= 2 && p[1] >= 2 && len >= p[1]; len -= p[1], p += p[1]) { if (debug) log(-1, " %s ", sppp_ipcp_opt_name(*p)); switch (*p) { case IPCP_OPT_COMPRESSION: sp->ipcp.opts &= ~(1 << IPCP_OPT_COMPRESSION); break; case IPCP_OPT_ADDRESS: /* * Peer doesn't grok address option. This is * bad. XXX Should we better give up here? * XXX We could try old "addresses" option... */ sp->ipcp.opts &= ~(1 << IPCP_OPT_ADDRESS); break; } } if (debug) log(-1, "\n"); free (buf, M_TEMP); return; } /* * Analyze the IPCP Configure-NAK option list, and adjust our * negotiation. */ static void sppp_ipcp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len) { u_char *buf, *p; struct ifnet *ifp = SP2IFP(sp); int debug = ifp->if_flags & IFF_DEBUG; int desiredcomp; u_long wantaddr; len -= 4; buf = malloc (len, M_TEMP, M_NOWAIT); if (!buf) return; if (debug) log(LOG_DEBUG, SPP_FMT "ipcp nak opts: ", SPP_ARGS(ifp)); p = (void*) (h+1); for (; len >= 2 && p[1] >= 2 && len >= p[1]; len -= p[1], p += p[1]) { if (debug) log(-1, " %s ", sppp_ipcp_opt_name(*p)); switch (*p) { case IPCP_OPT_COMPRESSION: if (len >= 6 && p[1] == 6) { desiredcomp = p[2] << 8 | p[3]; if (debug) log(-1, "[wantcomp %#04x] ", desiredcomp); if (desiredcomp == IPCP_COMP_VJ) { sl_compress_init(sp->pp_comp, p[4]); sp->ipcp.max_state = p[4]; sp->ipcp.compress_cid = p[5]; if (debug) log(-1, "[agree] "); } else sp->ipcp.opts &= ~(1 << IPCP_OPT_COMPRESSION); } break; case IPCP_OPT_ADDRESS: /* * Peer doesn't like our local IP address. See * if we can do something for him. We'll drop * him our address then. */ if (len >= 6 && p[1] == 6) { wantaddr = p[2] << 24 | p[3] << 16 | p[4] << 8 | p[5]; sp->ipcp.opts |= (1 << IPCP_OPT_ADDRESS); if (debug) log(-1, "[wantaddr %s] ", sppp_dotted_quad(wantaddr)); /* * When doing dynamic address assignment, * we accept his offer. Otherwise, we * ignore it and thus continue to negotiate * our already existing value. * XXX: Bogus, if he said no once, he'll * just say no again, might as well die. */ if (sp->ipcp.flags & IPCP_MYADDR_DYN) { sppp_set_ip_addr(sp, wantaddr); if (debug) log(-1, "[agree] "); sp->ipcp.flags |= IPCP_MYADDR_SEEN; } } break; } } if (debug) log(-1, "\n"); free (buf, M_TEMP); return; } static void sppp_ipcp_tlu(struct sppp *sp) { /* we are up - notify isdn daemon */ if (sp->pp_con) sp->pp_con(sp); } static void sppp_ipcp_tld(struct sppp *sp) { } static void sppp_ipcp_tls(struct sppp *sp) { /* indicate to LCP that it must stay alive */ sp->lcp.protos |= (1 << IDX_IPCP); } static void sppp_ipcp_tlf(struct sppp *sp) { /* we no longer need LCP */ sp->lcp.protos &= ~(1 << IDX_IPCP); sppp_lcp_check_and_close(sp); } static void sppp_ipcp_scr(struct sppp *sp) { char opt[6 /* compression */ + 6 /* address */]; u_long ouraddr; int i = 0; if (sp->ipcp.opts & (1 << IPCP_OPT_COMPRESSION)) { opt[i++] = IPCP_OPT_COMPRESSION; opt[i++] = 6; opt[i++] = IPCP_COMP_VJ >> 8; opt[i++] = IPCP_COMP_VJ; opt[i++] = sp->ipcp.max_state; opt[i++] = sp->ipcp.compress_cid; } if (sp->ipcp.opts & (1 << IPCP_OPT_ADDRESS)) { sppp_get_ip_addrs(sp, &ouraddr, 0, 0); opt[i++] = IPCP_OPT_ADDRESS; opt[i++] = 6; opt[i++] = ouraddr >> 24; opt[i++] = ouraddr >> 16; opt[i++] = ouraddr >> 8; opt[i++] = ouraddr; } sp->confid[IDX_IPCP] = ++sp->pp_seq[IDX_IPCP]; sppp_cp_send(sp, PPP_IPCP, CONF_REQ, sp->confid[IDX_IPCP], i, &opt); } #else /* !INET */ static void sppp_ipcp_init(struct sppp *sp) { } static void sppp_ipcp_up(struct sppp *sp) { } static void sppp_ipcp_down(struct sppp *sp) { } static void sppp_ipcp_open(struct sppp *sp) { } static void sppp_ipcp_close(struct sppp *sp) { } static void sppp_ipcp_TO(void *cookie) { } static int sppp_ipcp_RCR(struct sppp *sp, struct lcp_header *h, int len) { return (0); } static void sppp_ipcp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len) { } static void sppp_ipcp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len) { } static void sppp_ipcp_tlu(struct sppp *sp) { } static void sppp_ipcp_tld(struct sppp *sp) { } static void sppp_ipcp_tls(struct sppp *sp) { } static void sppp_ipcp_tlf(struct sppp *sp) { } static void sppp_ipcp_scr(struct sppp *sp) { } #endif /* *--------------------------------------------------------------------------* * * * The IPv6CP implementation. * * * *--------------------------------------------------------------------------* */ #ifdef INET6 static void sppp_ipv6cp_init(struct sppp *sp) { sp->ipv6cp.opts = 0; sp->ipv6cp.flags = 0; sp->state[IDX_IPV6CP] = STATE_INITIAL; sp->fail_counter[IDX_IPV6CP] = 0; sp->pp_seq[IDX_IPV6CP] = 0; sp->pp_rseq[IDX_IPV6CP] = 0; callout_init(&sp->ch[IDX_IPV6CP], 1); } static void sppp_ipv6cp_up(struct sppp *sp) { sppp_up_event(&ipv6cp, sp); } static void sppp_ipv6cp_down(struct sppp *sp) { sppp_down_event(&ipv6cp, sp); } static void sppp_ipv6cp_open(struct sppp *sp) { STDDCL; struct in6_addr myaddr, hisaddr; #ifdef IPV6CP_MYIFID_DYN sp->ipv6cp.flags &= ~(IPV6CP_MYIFID_SEEN|IPV6CP_MYIFID_DYN); #else sp->ipv6cp.flags &= ~IPV6CP_MYIFID_SEEN; #endif sppp_get_ip6_addrs(sp, &myaddr, &hisaddr, 0); /* * If we don't have our address, this probably means our * interface doesn't want to talk IPv6 at all. (This could * be the case if somebody wants to speak only IPX, for * example.) Don't open IPv6CP in this case. */ if (IN6_IS_ADDR_UNSPECIFIED(&myaddr)) { /* XXX this message should go away */ if (debug) log(LOG_DEBUG, SPP_FMT "ipv6cp_open(): no IPv6 interface\n", SPP_ARGS(ifp)); return; } sp->ipv6cp.flags |= IPV6CP_MYIFID_SEEN; sp->ipv6cp.opts |= (1 << IPV6CP_OPT_IFID); sppp_open_event(&ipv6cp, sp); } static void sppp_ipv6cp_close(struct sppp *sp) { sppp_close_event(&ipv6cp, sp); } static void sppp_ipv6cp_TO(void *cookie) { sppp_to_event(&ipv6cp, (struct sppp *)cookie); } /* * Analyze a configure request. Return true if it was agreeable, and * caused action sca, false if it has been rejected or nak'ed, and * caused action scn. (The return value is used to make the state * transition decision in the state automaton.) */ static int sppp_ipv6cp_RCR(struct sppp *sp, struct lcp_header *h, int len) { u_char *buf, *r, *p; struct ifnet *ifp = SP2IFP(sp); int rlen, origlen, debug = ifp->if_flags & IFF_DEBUG; struct in6_addr myaddr, desiredaddr, suggestaddr; int ifidcount; int type; int collision, nohisaddr; char ip6buf[INET6_ADDRSTRLEN]; len -= 4; origlen = len; /* * Make sure to allocate a buf that can at least hold a * conf-nak with an `address' option. We might need it below. */ buf = r = malloc ((len < 6? 6: len), M_TEMP, M_NOWAIT); if (! buf) return (0); /* pass 1: see if we can recognize them */ if (debug) log(LOG_DEBUG, SPP_FMT "ipv6cp parse opts:", SPP_ARGS(ifp)); p = (void*) (h+1); ifidcount = 0; for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1]; len-=p[1], p+=p[1]) { if (debug) log(-1, " %s", sppp_ipv6cp_opt_name(*p)); switch (*p) { case IPV6CP_OPT_IFID: if (len >= 10 && p[1] == 10 && ifidcount == 0) { /* correctly formed address option */ ifidcount++; continue; } if (debug) log(-1, " [invalid]"); break; #ifdef notyet case IPV6CP_OPT_COMPRESSION: if (len >= 4 && p[1] >= 4) { /* correctly formed compress option */ continue; } if (debug) log(-1, " [invalid]"); break; #endif default: /* Others not supported. */ if (debug) log(-1, " [rej]"); break; } /* Add the option to rejected list. */ bcopy (p, r, p[1]); r += p[1]; rlen += p[1]; } if (rlen) { if (debug) log(-1, " send conf-rej\n"); sppp_cp_send (sp, PPP_IPV6CP, CONF_REJ, h->ident, rlen, buf); goto end; } else if (debug) log(-1, "\n"); /* pass 2: parse option values */ sppp_get_ip6_addrs(sp, &myaddr, 0, 0); if (debug) log(LOG_DEBUG, SPP_FMT "ipv6cp parse opt values: ", SPP_ARGS(ifp)); p = (void*) (h+1); len = origlen; type = CONF_ACK; for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1]; len-=p[1], p+=p[1]) { if (debug) log(-1, " %s", sppp_ipv6cp_opt_name(*p)); switch (*p) { #ifdef notyet case IPV6CP_OPT_COMPRESSION: continue; #endif case IPV6CP_OPT_IFID: bzero(&desiredaddr, sizeof(desiredaddr)); bcopy(&p[2], &desiredaddr.s6_addr[8], 8); collision = (bcmp(&desiredaddr.s6_addr[8], &myaddr.s6_addr[8], 8) == 0); nohisaddr = IN6_IS_ADDR_UNSPECIFIED(&desiredaddr); desiredaddr.s6_addr16[0] = htons(0xfe80); (void)in6_setscope(&desiredaddr, SP2IFP(sp), NULL); if (!collision && !nohisaddr) { /* no collision, hisaddr known - Conf-Ack */ type = CONF_ACK; if (debug) { log(-1, " %s [%s]", ip6_sprintf(ip6buf, &desiredaddr), sppp_cp_type_name(type)); } continue; } bzero(&suggestaddr, sizeof(suggestaddr)); if (collision && nohisaddr) { /* collision, hisaddr unknown - Conf-Rej */ type = CONF_REJ; bzero(&p[2], 8); } else { /* * - no collision, hisaddr unknown, or * - collision, hisaddr known * Conf-Nak, suggest hisaddr */ type = CONF_NAK; sppp_suggest_ip6_addr(sp, &suggestaddr); bcopy(&suggestaddr.s6_addr[8], &p[2], 8); } if (debug) log(-1, " %s [%s]", ip6_sprintf(ip6buf, &desiredaddr), sppp_cp_type_name(type)); break; } /* Add the option to nak'ed list. */ bcopy (p, r, p[1]); r += p[1]; rlen += p[1]; } if (rlen == 0 && type == CONF_ACK) { if (debug) log(-1, " send %s\n", sppp_cp_type_name(type)); sppp_cp_send (sp, PPP_IPV6CP, type, h->ident, origlen, h+1); } else { #ifdef DIAGNOSTIC if (type == CONF_ACK) panic("IPv6CP RCR: CONF_ACK with non-zero rlen"); #endif if (debug) { log(-1, " send %s suggest %s\n", sppp_cp_type_name(type), ip6_sprintf(ip6buf, &suggestaddr)); } sppp_cp_send (sp, PPP_IPV6CP, type, h->ident, rlen, buf); } end: free (buf, M_TEMP); return (rlen == 0); } /* * Analyze the IPv6CP Configure-Reject option list, and adjust our * negotiation. */ static void sppp_ipv6cp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len) { u_char *buf, *p; struct ifnet *ifp = SP2IFP(sp); int debug = ifp->if_flags & IFF_DEBUG; len -= 4; buf = malloc (len, M_TEMP, M_NOWAIT); if (!buf) return; if (debug) log(LOG_DEBUG, SPP_FMT "ipv6cp rej opts:", SPP_ARGS(ifp)); p = (void*) (h+1); for (; len >= 2 && p[1] >= 2 && len >= p[1]; len -= p[1], p += p[1]) { if (debug) log(-1, " %s", sppp_ipv6cp_opt_name(*p)); switch (*p) { case IPV6CP_OPT_IFID: /* * Peer doesn't grok address option. This is * bad. XXX Should we better give up here? */ sp->ipv6cp.opts &= ~(1 << IPV6CP_OPT_IFID); break; #ifdef notyet case IPV6CP_OPT_COMPRESS: sp->ipv6cp.opts &= ~(1 << IPV6CP_OPT_COMPRESS); break; #endif } } if (debug) log(-1, "\n"); free (buf, M_TEMP); return; } /* * Analyze the IPv6CP Configure-NAK option list, and adjust our * negotiation. */ static void sppp_ipv6cp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len) { u_char *buf, *p; struct ifnet *ifp = SP2IFP(sp); int debug = ifp->if_flags & IFF_DEBUG; struct in6_addr suggestaddr; char ip6buf[INET6_ADDRSTRLEN]; len -= 4; buf = malloc (len, M_TEMP, M_NOWAIT); if (!buf) return; if (debug) log(LOG_DEBUG, SPP_FMT "ipv6cp nak opts:", SPP_ARGS(ifp)); p = (void*) (h+1); for (; len >= 2 && p[1] >= 2 && len >= p[1]; len -= p[1], p += p[1]) { if (debug) log(-1, " %s", sppp_ipv6cp_opt_name(*p)); switch (*p) { case IPV6CP_OPT_IFID: /* * Peer doesn't like our local ifid. See * if we can do something for him. We'll drop * him our address then. */ if (len < 10 || p[1] != 10) break; bzero(&suggestaddr, sizeof(suggestaddr)); suggestaddr.s6_addr16[0] = htons(0xfe80); (void)in6_setscope(&suggestaddr, SP2IFP(sp), NULL); bcopy(&p[2], &suggestaddr.s6_addr[8], 8); sp->ipv6cp.opts |= (1 << IPV6CP_OPT_IFID); if (debug) log(-1, " [suggestaddr %s]", ip6_sprintf(ip6buf, &suggestaddr)); #ifdef IPV6CP_MYIFID_DYN /* * When doing dynamic address assignment, * we accept his offer. */ if (sp->ipv6cp.flags & IPV6CP_MYIFID_DYN) { struct in6_addr lastsuggest; /* * If equals to * , * we have a collision. generate new random * ifid. */ sppp_suggest_ip6_addr(&lastsuggest); if (IN6_ARE_ADDR_EQUAL(&suggestaddr, lastsuggest)) { if (debug) log(-1, " [random]"); sppp_gen_ip6_addr(sp, &suggestaddr); } sppp_set_ip6_addr(sp, &suggestaddr, 0); if (debug) log(-1, " [agree]"); sp->ipv6cp.flags |= IPV6CP_MYIFID_SEEN; } #else /* * Since we do not do dynamic address assignment, * we ignore it and thus continue to negotiate * our already existing value. This can possibly * go into infinite request-reject loop. * * This is not likely because we normally use * ifid based on MAC-address. * If you have no ethernet card on the node, too bad. * XXX should we use fail_counter? */ #endif break; #ifdef notyet case IPV6CP_OPT_COMPRESS: /* * Peer wants different compression parameters. */ break; #endif } } if (debug) log(-1, "\n"); free (buf, M_TEMP); return; } static void sppp_ipv6cp_tlu(struct sppp *sp) { /* we are up - notify isdn daemon */ if (sp->pp_con) sp->pp_con(sp); } static void sppp_ipv6cp_tld(struct sppp *sp) { } static void sppp_ipv6cp_tls(struct sppp *sp) { /* indicate to LCP that it must stay alive */ sp->lcp.protos |= (1 << IDX_IPV6CP); } static void sppp_ipv6cp_tlf(struct sppp *sp) { #if 0 /* need #if 0 to close IPv6CP properly */ /* we no longer need LCP */ sp->lcp.protos &= ~(1 << IDX_IPV6CP); sppp_lcp_check_and_close(sp); #endif } static void sppp_ipv6cp_scr(struct sppp *sp) { char opt[10 /* ifid */ + 4 /* compression, minimum */]; struct in6_addr ouraddr; int i = 0; if (sp->ipv6cp.opts & (1 << IPV6CP_OPT_IFID)) { sppp_get_ip6_addrs(sp, &ouraddr, 0, 0); opt[i++] = IPV6CP_OPT_IFID; opt[i++] = 10; bcopy(&ouraddr.s6_addr[8], &opt[i], 8); i += 8; } #ifdef notyet if (sp->ipv6cp.opts & (1 << IPV6CP_OPT_COMPRESSION)) { opt[i++] = IPV6CP_OPT_COMPRESSION; opt[i++] = 4; opt[i++] = 0; /* TBD */ opt[i++] = 0; /* TBD */ /* variable length data may follow */ } #endif sp->confid[IDX_IPV6CP] = ++sp->pp_seq[IDX_IPV6CP]; sppp_cp_send(sp, PPP_IPV6CP, CONF_REQ, sp->confid[IDX_IPV6CP], i, &opt); } #else /*INET6*/ static void sppp_ipv6cp_init(struct sppp *sp) { } static void sppp_ipv6cp_up(struct sppp *sp) { } static void sppp_ipv6cp_down(struct sppp *sp) { } static void sppp_ipv6cp_open(struct sppp *sp) { } static void sppp_ipv6cp_close(struct sppp *sp) { } static void sppp_ipv6cp_TO(void *sp) { } static int sppp_ipv6cp_RCR(struct sppp *sp, struct lcp_header *h, int len) { return 0; } static void sppp_ipv6cp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len) { } static void sppp_ipv6cp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len) { } static void sppp_ipv6cp_tlu(struct sppp *sp) { } static void sppp_ipv6cp_tld(struct sppp *sp) { } static void sppp_ipv6cp_tls(struct sppp *sp) { } static void sppp_ipv6cp_tlf(struct sppp *sp) { } static void sppp_ipv6cp_scr(struct sppp *sp) { } #endif /*INET6*/ /* *--------------------------------------------------------------------------* * * * The CHAP implementation. * * * *--------------------------------------------------------------------------* */ /* * The authentication protocols don't employ a full-fledged state machine as * the control protocols do, since they do have Open and Close events, but * not Up and Down, nor are they explicitly terminated. Also, use of the * authentication protocols may be different in both directions (this makes * sense, think of a machine that never accepts incoming calls but only * calls out, it doesn't require the called party to authenticate itself). * * Our state machine for the local authentication protocol (we are requesting * the peer to authenticate) looks like: * * RCA- * +--------------------------------------------+ * V scn,tld| * +--------+ Close +---------+ RCA+ * | |<----------------------------------| |------+ * +--->| Closed | TO* | Opened | sca | * | | |-----+ +-------| |<-----+ * | +--------+ irc | | +---------+ * | ^ | | ^ * | | | | | * | | | | | * | TO-| | | | * | |tld TO+ V | | * | | +------->+ | | * | | | | | | * | +--------+ V | | * | | |<----+<--------------------+ | * | | Req- | scr | * | | Sent | | * | | | | * | +--------+ | * | RCA- | | RCA+ | * +------+ +------------------------------------------+ * scn,tld sca,irc,ict,tlu * * * with: * * Open: LCP reached authentication phase * Close: LCP reached terminate phase * * RCA+: received reply (pap-req, chap-response), acceptable * RCN: received reply (pap-req, chap-response), not acceptable * TO+: timeout with restart counter >= 0 * TO-: timeout with restart counter < 0 * TO*: reschedule timeout for CHAP * * scr: send request packet (none for PAP, chap-challenge) * sca: send ack packet (pap-ack, chap-success) * scn: send nak packet (pap-nak, chap-failure) * ict: initialize re-challenge timer (CHAP only) * * tlu: this-layer-up, LCP reaches network phase * tld: this-layer-down, LCP enters terminate phase * * Note that in CHAP mode, after sending a new challenge, while the state * automaton falls back into Req-Sent state, it doesn't signal a tld * event to LCP, so LCP remains in network phase. Only after not getting * any response (or after getting an unacceptable response), CHAP closes, * causing LCP to enter terminate phase. * * With PAP, there is no initial request that can be sent. The peer is * expected to send one based on the successful negotiation of PAP as * the authentication protocol during the LCP option negotiation. * * Incoming authentication protocol requests (remote requests * authentication, we are peer) don't employ a state machine at all, * they are simply answered. Some peers [Ascend P50 firmware rev * 4.50] react allergically when sending IPCP requests while they are * still in authentication phase (thereby violating the standard that * demands that these NCP packets are to be discarded), so we keep * track of the peer demanding us to authenticate, and only proceed to * phase network once we've seen a positive acknowledge for the * authentication. */ /* * Handle incoming CHAP packets. */ static void sppp_chap_input(struct sppp *sp, struct mbuf *m) { STDDCL; struct lcp_header *h; int len; u_char *value, *name, digest[AUTHKEYLEN], dsize; int value_len, name_len; MD5_CTX ctx; len = m->m_pkthdr.len; if (len < 4) { if (debug) log(LOG_DEBUG, SPP_FMT "chap invalid packet length: %d bytes\n", SPP_ARGS(ifp), len); return; } h = mtod (m, struct lcp_header*); if (len > ntohs (h->len)) len = ntohs (h->len); switch (h->type) { /* challenge, failure and success are his authproto */ case CHAP_CHALLENGE: value = 1 + (u_char*)(h+1); value_len = value[-1]; name = value + value_len; name_len = len - value_len - 5; if (name_len < 0) { if (debug) { log(LOG_DEBUG, SPP_FMT "chap corrupted challenge " "<%s id=0x%x len=%d", SPP_ARGS(ifp), sppp_auth_type_name(PPP_CHAP, h->type), h->ident, ntohs(h->len)); sppp_print_bytes((u_char*) (h+1), len-4); log(-1, ">\n"); } break; } if (debug) { log(LOG_DEBUG, SPP_FMT "chap input <%s id=0x%x len=%d name=", SPP_ARGS(ifp), sppp_auth_type_name(PPP_CHAP, h->type), h->ident, ntohs(h->len)); sppp_print_string((char*) name, name_len); log(-1, " value-size=%d value=", value_len); sppp_print_bytes(value, value_len); log(-1, ">\n"); } /* Compute reply value. */ MD5Init(&ctx); MD5Update(&ctx, &h->ident, 1); MD5Update(&ctx, sp->myauth.secret, sppp_strnlen(sp->myauth.secret, AUTHKEYLEN)); MD5Update(&ctx, value, value_len); MD5Final(digest, &ctx); dsize = sizeof digest; sppp_auth_send(&chap, sp, CHAP_RESPONSE, h->ident, sizeof dsize, (const char *)&dsize, sizeof digest, digest, (size_t)sppp_strnlen(sp->myauth.name, AUTHNAMELEN), sp->myauth.name, 0); break; case CHAP_SUCCESS: if (debug) { log(LOG_DEBUG, SPP_FMT "chap success", SPP_ARGS(ifp)); if (len > 4) { log(-1, ": "); sppp_print_string((char*)(h + 1), len - 4); } log(-1, "\n"); } SPPP_LOCK(sp); sp->pp_flags &= ~PP_NEEDAUTH; if (sp->myauth.proto == PPP_CHAP && (sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) && (sp->lcp.protos & (1 << IDX_CHAP)) == 0) { /* * We are authenticator for CHAP but didn't * complete yet. Leave it to tlu to proceed * to network phase. */ SPPP_UNLOCK(sp); break; } SPPP_UNLOCK(sp); sppp_phase_network(sp); break; case CHAP_FAILURE: if (debug) { log(LOG_INFO, SPP_FMT "chap failure", SPP_ARGS(ifp)); if (len > 4) { log(-1, ": "); sppp_print_string((char*)(h + 1), len - 4); } log(-1, "\n"); } else log(LOG_INFO, SPP_FMT "chap failure\n", SPP_ARGS(ifp)); /* await LCP shutdown by authenticator */ break; /* response is my authproto */ case CHAP_RESPONSE: value = 1 + (u_char*)(h+1); value_len = value[-1]; name = value + value_len; name_len = len - value_len - 5; if (name_len < 0) { if (debug) { log(LOG_DEBUG, SPP_FMT "chap corrupted response " "<%s id=0x%x len=%d", SPP_ARGS(ifp), sppp_auth_type_name(PPP_CHAP, h->type), h->ident, ntohs(h->len)); sppp_print_bytes((u_char*)(h+1), len-4); log(-1, ">\n"); } break; } if (h->ident != sp->confid[IDX_CHAP]) { if (debug) log(LOG_DEBUG, SPP_FMT "chap dropping response for old ID " "(got %d, expected %d)\n", SPP_ARGS(ifp), h->ident, sp->confid[IDX_CHAP]); break; } if (name_len != sppp_strnlen(sp->hisauth.name, AUTHNAMELEN) || bcmp(name, sp->hisauth.name, name_len) != 0) { log(LOG_INFO, SPP_FMT "chap response, his name ", SPP_ARGS(ifp)); sppp_print_string(name, name_len); log(-1, " != expected "); sppp_print_string(sp->hisauth.name, sppp_strnlen(sp->hisauth.name, AUTHNAMELEN)); log(-1, "\n"); } if (debug) { log(LOG_DEBUG, SPP_FMT "chap input(%s) " "<%s id=0x%x len=%d name=", SPP_ARGS(ifp), sppp_state_name(sp->state[IDX_CHAP]), sppp_auth_type_name(PPP_CHAP, h->type), h->ident, ntohs (h->len)); sppp_print_string((char*)name, name_len); log(-1, " value-size=%d value=", value_len); sppp_print_bytes(value, value_len); log(-1, ">\n"); } if (value_len != AUTHKEYLEN) { if (debug) log(LOG_DEBUG, SPP_FMT "chap bad hash value length: " "%d bytes, should be %d\n", SPP_ARGS(ifp), value_len, AUTHKEYLEN); break; } MD5Init(&ctx); MD5Update(&ctx, &h->ident, 1); MD5Update(&ctx, sp->hisauth.secret, sppp_strnlen(sp->hisauth.secret, AUTHKEYLEN)); MD5Update(&ctx, sp->myauth.challenge, AUTHKEYLEN); MD5Final(digest, &ctx); #define FAILMSG "Failed..." #define SUCCMSG "Welcome!" if (value_len != sizeof digest || bcmp(digest, value, value_len) != 0) { /* action scn, tld */ sppp_auth_send(&chap, sp, CHAP_FAILURE, h->ident, sizeof(FAILMSG) - 1, (u_char *)FAILMSG, 0); chap.tld(sp); break; } /* action sca, perhaps tlu */ if (sp->state[IDX_CHAP] == STATE_REQ_SENT || sp->state[IDX_CHAP] == STATE_OPENED) sppp_auth_send(&chap, sp, CHAP_SUCCESS, h->ident, sizeof(SUCCMSG) - 1, (u_char *)SUCCMSG, 0); if (sp->state[IDX_CHAP] == STATE_REQ_SENT) { sppp_cp_change_state(&chap, sp, STATE_OPENED); chap.tlu(sp); } break; default: /* Unknown CHAP packet type -- ignore. */ if (debug) { log(LOG_DEBUG, SPP_FMT "chap unknown input(%s) " "<0x%x id=0x%xh len=%d", SPP_ARGS(ifp), sppp_state_name(sp->state[IDX_CHAP]), h->type, h->ident, ntohs(h->len)); sppp_print_bytes((u_char*)(h+1), len-4); log(-1, ">\n"); } break; } } static void sppp_chap_init(struct sppp *sp) { /* Chap doesn't have STATE_INITIAL at all. */ sp->state[IDX_CHAP] = STATE_CLOSED; sp->fail_counter[IDX_CHAP] = 0; sp->pp_seq[IDX_CHAP] = 0; sp->pp_rseq[IDX_CHAP] = 0; callout_init(&sp->ch[IDX_CHAP], 1); } static void sppp_chap_open(struct sppp *sp) { if (sp->myauth.proto == PPP_CHAP && (sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) != 0) { /* we are authenticator for CHAP, start it */ chap.scr(sp); sp->rst_counter[IDX_CHAP] = sp->lcp.max_configure; sppp_cp_change_state(&chap, sp, STATE_REQ_SENT); } /* nothing to be done if we are peer, await a challenge */ } static void sppp_chap_close(struct sppp *sp) { if (sp->state[IDX_CHAP] != STATE_CLOSED) sppp_cp_change_state(&chap, sp, STATE_CLOSED); } static void sppp_chap_TO(void *cookie) { struct sppp *sp = (struct sppp *)cookie; STDDCL; SPPP_LOCK(sp); if (debug) log(LOG_DEBUG, SPP_FMT "chap TO(%s) rst_counter = %d\n", SPP_ARGS(ifp), sppp_state_name(sp->state[IDX_CHAP]), sp->rst_counter[IDX_CHAP]); if (--sp->rst_counter[IDX_CHAP] < 0) /* TO- event */ switch (sp->state[IDX_CHAP]) { case STATE_REQ_SENT: chap.tld(sp); sppp_cp_change_state(&chap, sp, STATE_CLOSED); break; } else /* TO+ (or TO*) event */ switch (sp->state[IDX_CHAP]) { case STATE_OPENED: /* TO* event */ sp->rst_counter[IDX_CHAP] = sp->lcp.max_configure; /* FALLTHROUGH */ case STATE_REQ_SENT: chap.scr(sp); /* sppp_cp_change_state() will restart the timer */ sppp_cp_change_state(&chap, sp, STATE_REQ_SENT); break; } SPPP_UNLOCK(sp); } static void sppp_chap_tlu(struct sppp *sp) { STDDCL; int i; i = 0; sp->rst_counter[IDX_CHAP] = sp->lcp.max_configure; /* * Some broken CHAP implementations (Conware CoNet, firmware * 4.0.?) don't want to re-authenticate their CHAP once the * initial challenge-response exchange has taken place. * Provide for an option to avoid rechallenges. */ if ((sp->hisauth.flags & AUTHFLAG_NORECHALLENGE) == 0) { /* * Compute the re-challenge timeout. This will yield * a number between 300 and 810 seconds. */ i = 300 + ((unsigned)(random() & 0xff00) >> 7); callout_reset(&sp->ch[IDX_CHAP], i * hz, chap.TO, (void *)sp); } if (debug) { log(LOG_DEBUG, SPP_FMT "chap %s, ", SPP_ARGS(ifp), sp->pp_phase == PHASE_NETWORK? "reconfirmed": "tlu"); if ((sp->hisauth.flags & AUTHFLAG_NORECHALLENGE) == 0) log(-1, "next re-challenge in %d seconds\n", i); else log(-1, "re-challenging suppressed\n"); } SPPP_LOCK(sp); /* indicate to LCP that we need to be closed down */ sp->lcp.protos |= (1 << IDX_CHAP); if (sp->pp_flags & PP_NEEDAUTH) { /* * Remote is authenticator, but his auth proto didn't * complete yet. Defer the transition to network * phase. */ SPPP_UNLOCK(sp); return; } SPPP_UNLOCK(sp); /* * If we are already in phase network, we are done here. This * is the case if this is a dummy tlu event after a re-challenge. */ if (sp->pp_phase != PHASE_NETWORK) sppp_phase_network(sp); } static void sppp_chap_tld(struct sppp *sp) { STDDCL; if (debug) log(LOG_DEBUG, SPP_FMT "chap tld\n", SPP_ARGS(ifp)); callout_stop(&sp->ch[IDX_CHAP]); sp->lcp.protos &= ~(1 << IDX_CHAP); lcp.Close(sp); } static void sppp_chap_scr(struct sppp *sp) { u_long *ch, seed; u_char clen; /* Compute random challenge. */ ch = (u_long *)sp->myauth.challenge; read_random(&seed, sizeof seed); ch[0] = seed ^ random(); ch[1] = seed ^ random(); ch[2] = seed ^ random(); ch[3] = seed ^ random(); clen = AUTHKEYLEN; sp->confid[IDX_CHAP] = ++sp->pp_seq[IDX_CHAP]; sppp_auth_send(&chap, sp, CHAP_CHALLENGE, sp->confid[IDX_CHAP], sizeof clen, (const char *)&clen, (size_t)AUTHKEYLEN, sp->myauth.challenge, (size_t)sppp_strnlen(sp->myauth.name, AUTHNAMELEN), sp->myauth.name, 0); } /* *--------------------------------------------------------------------------* * * * The PAP implementation. * * * *--------------------------------------------------------------------------* */ /* * For PAP, we need to keep a little state also if we are the peer, not the * authenticator. This is since we don't get a request to authenticate, but * have to repeatedly authenticate ourself until we got a response (or the * retry counter is expired). */ /* * Handle incoming PAP packets. */ static void sppp_pap_input(struct sppp *sp, struct mbuf *m) { STDDCL; struct lcp_header *h; int len; u_char *name, *passwd, mlen; int name_len, passwd_len; len = m->m_pkthdr.len; if (len < 5) { if (debug) log(LOG_DEBUG, SPP_FMT "pap invalid packet length: %d bytes\n", SPP_ARGS(ifp), len); return; } h = mtod (m, struct lcp_header*); if (len > ntohs (h->len)) len = ntohs (h->len); switch (h->type) { /* PAP request is my authproto */ case PAP_REQ: name = 1 + (u_char*)(h+1); name_len = name[-1]; passwd = name + name_len + 1; if (name_len > len - 6 || (passwd_len = passwd[-1]) > len - 6 - name_len) { if (debug) { log(LOG_DEBUG, SPP_FMT "pap corrupted input " "<%s id=0x%x len=%d", SPP_ARGS(ifp), sppp_auth_type_name(PPP_PAP, h->type), h->ident, ntohs(h->len)); sppp_print_bytes((u_char*)(h+1), len-4); log(-1, ">\n"); } break; } if (debug) { log(LOG_DEBUG, SPP_FMT "pap input(%s) " "<%s id=0x%x len=%d name=", SPP_ARGS(ifp), sppp_state_name(sp->state[IDX_PAP]), sppp_auth_type_name(PPP_PAP, h->type), h->ident, ntohs(h->len)); sppp_print_string((char*)name, name_len); log(-1, " passwd="); sppp_print_string((char*)passwd, passwd_len); log(-1, ">\n"); } if (name_len != sppp_strnlen(sp->hisauth.name, AUTHNAMELEN) || passwd_len != sppp_strnlen(sp->hisauth.secret, AUTHKEYLEN) || bcmp(name, sp->hisauth.name, name_len) != 0 || bcmp(passwd, sp->hisauth.secret, passwd_len) != 0) { /* action scn, tld */ mlen = sizeof(FAILMSG) - 1; sppp_auth_send(&pap, sp, PAP_NAK, h->ident, sizeof mlen, (const char *)&mlen, sizeof(FAILMSG) - 1, (u_char *)FAILMSG, 0); pap.tld(sp); break; } /* action sca, perhaps tlu */ if (sp->state[IDX_PAP] == STATE_REQ_SENT || sp->state[IDX_PAP] == STATE_OPENED) { mlen = sizeof(SUCCMSG) - 1; sppp_auth_send(&pap, sp, PAP_ACK, h->ident, sizeof mlen, (const char *)&mlen, sizeof(SUCCMSG) - 1, (u_char *)SUCCMSG, 0); } if (sp->state[IDX_PAP] == STATE_REQ_SENT) { sppp_cp_change_state(&pap, sp, STATE_OPENED); pap.tlu(sp); } break; /* ack and nak are his authproto */ case PAP_ACK: callout_stop(&sp->pap_my_to_ch); if (debug) { log(LOG_DEBUG, SPP_FMT "pap success", SPP_ARGS(ifp)); name_len = *((char *)h); if (len > 5 && name_len) { log(-1, ": "); sppp_print_string((char*)(h+1), name_len); } log(-1, "\n"); } SPPP_LOCK(sp); sp->pp_flags &= ~PP_NEEDAUTH; if (sp->myauth.proto == PPP_PAP && (sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) && (sp->lcp.protos & (1 << IDX_PAP)) == 0) { /* * We are authenticator for PAP but didn't * complete yet. Leave it to tlu to proceed * to network phase. */ SPPP_UNLOCK(sp); break; } SPPP_UNLOCK(sp); sppp_phase_network(sp); break; case PAP_NAK: callout_stop (&sp->pap_my_to_ch); if (debug) { log(LOG_INFO, SPP_FMT "pap failure", SPP_ARGS(ifp)); name_len = *((char *)h); if (len > 5 && name_len) { log(-1, ": "); sppp_print_string((char*)(h+1), name_len); } log(-1, "\n"); } else log(LOG_INFO, SPP_FMT "pap failure\n", SPP_ARGS(ifp)); /* await LCP shutdown by authenticator */ break; default: /* Unknown PAP packet type -- ignore. */ if (debug) { log(LOG_DEBUG, SPP_FMT "pap corrupted input " "<0x%x id=0x%x len=%d", SPP_ARGS(ifp), h->type, h->ident, ntohs(h->len)); sppp_print_bytes((u_char*)(h+1), len-4); log(-1, ">\n"); } break; } } static void sppp_pap_init(struct sppp *sp) { /* PAP doesn't have STATE_INITIAL at all. */ sp->state[IDX_PAP] = STATE_CLOSED; sp->fail_counter[IDX_PAP] = 0; sp->pp_seq[IDX_PAP] = 0; sp->pp_rseq[IDX_PAP] = 0; callout_init(&sp->ch[IDX_PAP], 1); callout_init(&sp->pap_my_to_ch, 1); } static void sppp_pap_open(struct sppp *sp) { if (sp->hisauth.proto == PPP_PAP && (sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) != 0) { /* we are authenticator for PAP, start our timer */ sp->rst_counter[IDX_PAP] = sp->lcp.max_configure; sppp_cp_change_state(&pap, sp, STATE_REQ_SENT); } if (sp->myauth.proto == PPP_PAP) { /* we are peer, send a request, and start a timer */ pap.scr(sp); callout_reset(&sp->pap_my_to_ch, sp->lcp.timeout, sppp_pap_my_TO, (void *)sp); } } static void sppp_pap_close(struct sppp *sp) { if (sp->state[IDX_PAP] != STATE_CLOSED) sppp_cp_change_state(&pap, sp, STATE_CLOSED); } /* * That's the timeout routine if we are authenticator. Since the * authenticator is basically passive in PAP, we can't do much here. */ static void sppp_pap_TO(void *cookie) { struct sppp *sp = (struct sppp *)cookie; STDDCL; SPPP_LOCK(sp); if (debug) log(LOG_DEBUG, SPP_FMT "pap TO(%s) rst_counter = %d\n", SPP_ARGS(ifp), sppp_state_name(sp->state[IDX_PAP]), sp->rst_counter[IDX_PAP]); if (--sp->rst_counter[IDX_PAP] < 0) /* TO- event */ switch (sp->state[IDX_PAP]) { case STATE_REQ_SENT: pap.tld(sp); sppp_cp_change_state(&pap, sp, STATE_CLOSED); break; } else /* TO+ event, not very much we could do */ switch (sp->state[IDX_PAP]) { case STATE_REQ_SENT: /* sppp_cp_change_state() will restart the timer */ sppp_cp_change_state(&pap, sp, STATE_REQ_SENT); break; } SPPP_UNLOCK(sp); } /* * That's the timeout handler if we are peer. Since the peer is active, * we need to retransmit our PAP request since it is apparently lost. * XXX We should impose a max counter. */ static void sppp_pap_my_TO(void *cookie) { struct sppp *sp = (struct sppp *)cookie; STDDCL; if (debug) log(LOG_DEBUG, SPP_FMT "pap peer TO\n", SPP_ARGS(ifp)); SPPP_LOCK(sp); pap.scr(sp); SPPP_UNLOCK(sp); } static void sppp_pap_tlu(struct sppp *sp) { STDDCL; sp->rst_counter[IDX_PAP] = sp->lcp.max_configure; if (debug) log(LOG_DEBUG, SPP_FMT "%s tlu\n", SPP_ARGS(ifp), pap.name); SPPP_LOCK(sp); /* indicate to LCP that we need to be closed down */ sp->lcp.protos |= (1 << IDX_PAP); if (sp->pp_flags & PP_NEEDAUTH) { /* * Remote is authenticator, but his auth proto didn't * complete yet. Defer the transition to network * phase. */ SPPP_UNLOCK(sp); return; } SPPP_UNLOCK(sp); sppp_phase_network(sp); } static void sppp_pap_tld(struct sppp *sp) { STDDCL; if (debug) log(LOG_DEBUG, SPP_FMT "pap tld\n", SPP_ARGS(ifp)); callout_stop (&sp->ch[IDX_PAP]); callout_stop (&sp->pap_my_to_ch); sp->lcp.protos &= ~(1 << IDX_PAP); lcp.Close(sp); } static void sppp_pap_scr(struct sppp *sp) { u_char idlen, pwdlen; sp->confid[IDX_PAP] = ++sp->pp_seq[IDX_PAP]; pwdlen = sppp_strnlen(sp->myauth.secret, AUTHKEYLEN); idlen = sppp_strnlen(sp->myauth.name, AUTHNAMELEN); sppp_auth_send(&pap, sp, PAP_REQ, sp->confid[IDX_PAP], sizeof idlen, (const char *)&idlen, (size_t)idlen, sp->myauth.name, sizeof pwdlen, (const char *)&pwdlen, (size_t)pwdlen, sp->myauth.secret, 0); } /* * Random miscellaneous functions. */ /* * Send a PAP or CHAP proto packet. * * Varadic function, each of the elements for the ellipsis is of type * ``size_t mlen, const u_char *msg''. Processing will stop iff * mlen == 0. * NOTE: never declare variadic functions with types subject to type * promotion (i.e. u_char). This is asking for big trouble depending * on the architecture you are on... */ static void sppp_auth_send(const struct cp *cp, struct sppp *sp, unsigned int type, unsigned int id, ...) { STDDCL; struct ppp_header *h; struct lcp_header *lh; struct mbuf *m; u_char *p; int len; unsigned int mlen; const char *msg; va_list ap; MGETHDR (m, M_NOWAIT, MT_DATA); if (! m) return; m->m_pkthdr.rcvif = 0; h = mtod (m, struct ppp_header*); h->address = PPP_ALLSTATIONS; /* broadcast address */ h->control = PPP_UI; /* Unnumbered Info */ h->protocol = htons(cp->proto); lh = (struct lcp_header*)(h + 1); lh->type = type; lh->ident = id; p = (u_char*) (lh+1); va_start(ap, id); len = 0; while ((mlen = (unsigned int)va_arg(ap, size_t)) != 0) { msg = va_arg(ap, const char *); len += mlen; if (len > MHLEN - PPP_HEADER_LEN - LCP_HEADER_LEN) { va_end(ap); m_freem(m); return; } bcopy(msg, p, mlen); p += mlen; } va_end(ap); m->m_pkthdr.len = m->m_len = PPP_HEADER_LEN + LCP_HEADER_LEN + len; lh->len = htons (LCP_HEADER_LEN + len); if (debug) { log(LOG_DEBUG, SPP_FMT "%s output <%s id=0x%x len=%d", SPP_ARGS(ifp), cp->name, sppp_auth_type_name(cp->proto, lh->type), lh->ident, ntohs(lh->len)); sppp_print_bytes((u_char*) (lh+1), len); log(-1, ">\n"); } if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3)) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } /* * Flush interface queue. */ static void sppp_qflush(struct ifqueue *ifq) { struct mbuf *m, *n; n = ifq->ifq_head; while ((m = n)) { n = m->m_nextpkt; m_freem (m); } ifq->ifq_head = 0; ifq->ifq_tail = 0; ifq->ifq_len = 0; } /* * Send keepalive packets, every 10 seconds. */ static void sppp_keepalive(void *dummy) { struct sppp *sp = (struct sppp*)dummy; struct ifnet *ifp = SP2IFP(sp); SPPP_LOCK(sp); /* Keepalive mode disabled or channel down? */ if (! (sp->pp_flags & PP_KEEPALIVE) || ! (ifp->if_drv_flags & IFF_DRV_RUNNING)) goto out; if (sp->pp_mode == PP_FR) { sppp_fr_keepalive (sp); goto out; } /* No keepalive in PPP mode if LCP not opened yet. */ if (sp->pp_mode != IFF_CISCO && sp->pp_phase < PHASE_AUTHENTICATE) goto out; if (sp->pp_alivecnt == MAXALIVECNT) { /* No keepalive packets got. Stop the interface. */ printf (SPP_FMT "down\n", SPP_ARGS(ifp)); if_down (ifp); sppp_qflush (&sp->pp_cpq); if (sp->pp_mode != IFF_CISCO) { /* XXX */ /* Shut down the PPP link. */ lcp.Down(sp); /* Initiate negotiation. XXX */ lcp.Up(sp); } } if (sp->pp_alivecnt <= MAXALIVECNT) ++sp->pp_alivecnt; if (sp->pp_mode == IFF_CISCO) sppp_cisco_send (sp, CISCO_KEEPALIVE_REQ, ++sp->pp_seq[IDX_LCP], sp->pp_rseq[IDX_LCP]); else if (sp->pp_phase >= PHASE_AUTHENTICATE) { long nmagic = htonl (sp->lcp.magic); sp->lcp.echoid = ++sp->pp_seq[IDX_LCP]; sppp_cp_send (sp, PPP_LCP, ECHO_REQ, sp->lcp.echoid, 4, &nmagic); } out: SPPP_UNLOCK(sp); callout_reset(&sp->keepalive_callout, hz * 10, sppp_keepalive, (void *)sp); } /* * Get both IP addresses. */ void sppp_get_ip_addrs(struct sppp *sp, u_long *src, u_long *dst, u_long *srcmask) { struct ifnet *ifp = SP2IFP(sp); struct ifaddr *ifa; struct sockaddr_in *si, *sm; u_long ssrc, ddst; sm = NULL; ssrc = ddst = 0L; /* * Pick the first AF_INET address from the list, * aliases don't make any sense on a p2p link anyway. */ si = NULL; if_addr_rlock(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (ifa->ifa_addr->sa_family == AF_INET) { si = (struct sockaddr_in *)ifa->ifa_addr; sm = (struct sockaddr_in *)ifa->ifa_netmask; if (si) break; } if (ifa) { if (si && si->sin_addr.s_addr) { ssrc = si->sin_addr.s_addr; if (srcmask) *srcmask = ntohl(sm->sin_addr.s_addr); } si = (struct sockaddr_in *)ifa->ifa_dstaddr; if (si && si->sin_addr.s_addr) ddst = si->sin_addr.s_addr; } if_addr_runlock(ifp); if (dst) *dst = ntohl(ddst); if (src) *src = ntohl(ssrc); } #ifdef INET /* * Set my IP address. */ static void sppp_set_ip_addr(struct sppp *sp, u_long src) { STDDCL; struct ifaddr *ifa; struct sockaddr_in *si; struct in_ifaddr *ia; /* * Pick the first AF_INET address from the list, * aliases don't make any sense on a p2p link anyway. */ si = NULL; if_addr_rlock(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family == AF_INET) { si = (struct sockaddr_in *)ifa->ifa_addr; if (si != NULL) { ifa_ref(ifa); break; } } } if_addr_runlock(ifp); if (ifa != NULL) { int error; /* delete old route */ error = rtinit(ifa, (int)RTM_DELETE, RTF_HOST); if (debug && error) { log(LOG_DEBUG, SPP_FMT "sppp_set_ip_addr: rtinit DEL failed, error=%d\n", SPP_ARGS(ifp), error); } /* set new address */ si->sin_addr.s_addr = htonl(src); ia = ifatoia(ifa); IN_IFADDR_WLOCK(); LIST_REMOVE(ia, ia_hash); LIST_INSERT_HEAD(INADDR_HASH(si->sin_addr.s_addr), ia, ia_hash); IN_IFADDR_WUNLOCK(); /* add new route */ error = rtinit(ifa, (int)RTM_ADD, RTF_HOST); if (debug && error) { log(LOG_DEBUG, SPP_FMT "sppp_set_ip_addr: rtinit ADD failed, error=%d", SPP_ARGS(ifp), error); } ifa_free(ifa); } } #endif #ifdef INET6 /* * Get both IPv6 addresses. */ static void sppp_get_ip6_addrs(struct sppp *sp, struct in6_addr *src, struct in6_addr *dst, struct in6_addr *srcmask) { struct ifnet *ifp = SP2IFP(sp); struct ifaddr *ifa; struct sockaddr_in6 *si, *sm; struct in6_addr ssrc, ddst; sm = NULL; bzero(&ssrc, sizeof(ssrc)); bzero(&ddst, sizeof(ddst)); /* * Pick the first link-local AF_INET6 address from the list, * aliases don't make any sense on a p2p link anyway. */ si = NULL; if_addr_rlock(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (ifa->ifa_addr->sa_family == AF_INET6) { si = (struct sockaddr_in6 *)ifa->ifa_addr; sm = (struct sockaddr_in6 *)ifa->ifa_netmask; if (si && IN6_IS_ADDR_LINKLOCAL(&si->sin6_addr)) break; } if (ifa) { if (si && !IN6_IS_ADDR_UNSPECIFIED(&si->sin6_addr)) { bcopy(&si->sin6_addr, &ssrc, sizeof(ssrc)); if (srcmask) { bcopy(&sm->sin6_addr, srcmask, sizeof(*srcmask)); } } si = (struct sockaddr_in6 *)ifa->ifa_dstaddr; if (si && !IN6_IS_ADDR_UNSPECIFIED(&si->sin6_addr)) bcopy(&si->sin6_addr, &ddst, sizeof(ddst)); } if (dst) bcopy(&ddst, dst, sizeof(*dst)); if (src) bcopy(&ssrc, src, sizeof(*src)); if_addr_runlock(ifp); } #ifdef IPV6CP_MYIFID_DYN /* * Generate random ifid. */ static void sppp_gen_ip6_addr(struct sppp *sp, struct in6_addr *addr) { /* TBD */ } /* * Set my IPv6 address. */ static void sppp_set_ip6_addr(struct sppp *sp, const struct in6_addr *src) { STDDCL; struct ifaddr *ifa; struct sockaddr_in6 *sin6; /* * Pick the first link-local AF_INET6 address from the list, * aliases don't make any sense on a p2p link anyway. */ sin6 = NULL; if_addr_rlock(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family == AF_INET6) { sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; if (sin6 && IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { ifa_ref(ifa); break; } } } if_addr_runlock(ifp); if (ifa != NULL) { int error; struct sockaddr_in6 new_sin6 = *sin6; bcopy(src, &new_sin6.sin6_addr, sizeof(new_sin6.sin6_addr)); error = in6_ifinit(ifp, ifatoia6(ifa), &new_sin6, 1); if (debug && error) { log(LOG_DEBUG, SPP_FMT "sppp_set_ip6_addr: in6_ifinit " " failed, error=%d\n", SPP_ARGS(ifp), error); } ifa_free(ifa); } } #endif /* * Suggest a candidate address to be used by peer. */ static void sppp_suggest_ip6_addr(struct sppp *sp, struct in6_addr *suggest) { struct in6_addr myaddr; struct timeval tv; sppp_get_ip6_addrs(sp, &myaddr, 0, 0); myaddr.s6_addr[8] &= ~0x02; /* u bit to "local" */ microtime(&tv); if ((tv.tv_usec & 0xff) == 0 && (tv.tv_sec & 0xff) == 0) { myaddr.s6_addr[14] ^= 0xff; myaddr.s6_addr[15] ^= 0xff; } else { myaddr.s6_addr[14] ^= (tv.tv_usec & 0xff); myaddr.s6_addr[15] ^= (tv.tv_sec & 0xff); } if (suggest) bcopy(&myaddr, suggest, sizeof(myaddr)); } #endif /*INET6*/ static int sppp_params(struct sppp *sp, u_long cmd, void *data) { u_long subcmd; struct ifreq *ifr = (struct ifreq *)data; struct spppreq *spr; int rv = 0; if ((spr = malloc(sizeof(struct spppreq), M_TEMP, M_NOWAIT)) == NULL) return (EAGAIN); /* - * ifr->ifr_data is supposed to point to a struct spppreq. + * ifr_data_get_ptr(ifr) is supposed to point to a struct spppreq. * Check the cmd word first before attempting to fetch all the * data. */ - rv = fueword(ifr->ifr_data, &subcmd); + rv = fueword(ifr_data_get_ptr(ifr), &subcmd); if (rv == -1) { rv = EFAULT; goto quit; } - if (copyin((caddr_t)ifr->ifr_data, spr, sizeof(struct spppreq)) != 0) { + if (copyin(ifr_data_get_ptr(ifr), spr, sizeof(struct spppreq)) != 0) { rv = EFAULT; goto quit; } switch (subcmd) { case (u_long)SPPPIOGDEFS: if (cmd != SIOCGIFGENERIC) { rv = EINVAL; break; } /* * We copy over the entire current state, but clean * out some of the stuff we don't wanna pass up. * Remember, SIOCGIFGENERIC is unprotected, and can be * called by any user. No need to ever get PAP or * CHAP secrets back to userland anyway. */ spr->defs.pp_phase = sp->pp_phase; spr->defs.enable_vj = (sp->confflags & CONF_ENABLE_VJ) != 0; spr->defs.enable_ipv6 = (sp->confflags & CONF_ENABLE_IPV6) != 0; spr->defs.lcp = sp->lcp; spr->defs.ipcp = sp->ipcp; spr->defs.ipv6cp = sp->ipv6cp; spr->defs.myauth = sp->myauth; spr->defs.hisauth = sp->hisauth; bzero(spr->defs.myauth.secret, AUTHKEYLEN); bzero(spr->defs.myauth.challenge, AUTHKEYLEN); bzero(spr->defs.hisauth.secret, AUTHKEYLEN); bzero(spr->defs.hisauth.challenge, AUTHKEYLEN); /* * Fixup the LCP timeout value to milliseconds so * spppcontrol doesn't need to bother about the value * of "hz". We do the reverse calculation below when * setting it. */ spr->defs.lcp.timeout = sp->lcp.timeout * 1000 / hz; - rv = copyout(spr, (caddr_t)ifr->ifr_data, - sizeof(struct spppreq)); + rv = copyout(spr, ifr_data_get_ptr(ifr), + sizeof(struct spppreq)); break; case (u_long)SPPPIOSDEFS: if (cmd != SIOCSIFGENERIC) { rv = EINVAL; break; } /* * We have a very specific idea of which fields we * allow being passed back from userland, so to not * clobber our current state. For one, we only allow * setting anything if LCP is in dead or establish * phase. Once the authentication negotiations * started, the authentication settings must not be * changed again. (The administrator can force an * ifconfig down in order to get LCP back into dead * phase.) * * Also, we only allow for authentication parameters to be * specified. * * XXX Should allow to set or clear pp_flags. * * Finally, if the respective authentication protocol to * be used is set differently than 0, but the secret is * passed as all zeros, we don't trash the existing secret. * This allows an administrator to change the system name * only without clobbering the secret (which he didn't get * back in a previous SPPPIOGDEFS call). However, the * secrets are cleared if the authentication protocol is * reset to 0. */ if (sp->pp_phase != PHASE_DEAD && sp->pp_phase != PHASE_ESTABLISH) { rv = EBUSY; break; } if ((spr->defs.myauth.proto != 0 && spr->defs.myauth.proto != PPP_PAP && spr->defs.myauth.proto != PPP_CHAP) || (spr->defs.hisauth.proto != 0 && spr->defs.hisauth.proto != PPP_PAP && spr->defs.hisauth.proto != PPP_CHAP)) { rv = EINVAL; break; } if (spr->defs.myauth.proto == 0) /* resetting myauth */ bzero(&sp->myauth, sizeof sp->myauth); else { /* setting/changing myauth */ sp->myauth.proto = spr->defs.myauth.proto; bcopy(spr->defs.myauth.name, sp->myauth.name, AUTHNAMELEN); if (spr->defs.myauth.secret[0] != '\0') bcopy(spr->defs.myauth.secret, sp->myauth.secret, AUTHKEYLEN); } if (spr->defs.hisauth.proto == 0) /* resetting hisauth */ bzero(&sp->hisauth, sizeof sp->hisauth); else { /* setting/changing hisauth */ sp->hisauth.proto = spr->defs.hisauth.proto; sp->hisauth.flags = spr->defs.hisauth.flags; bcopy(spr->defs.hisauth.name, sp->hisauth.name, AUTHNAMELEN); if (spr->defs.hisauth.secret[0] != '\0') bcopy(spr->defs.hisauth.secret, sp->hisauth.secret, AUTHKEYLEN); } /* set LCP restart timer timeout */ if (spr->defs.lcp.timeout != 0) sp->lcp.timeout = spr->defs.lcp.timeout * hz / 1000; /* set VJ enable and IPv6 disable flags */ #ifdef INET if (spr->defs.enable_vj) sp->confflags |= CONF_ENABLE_VJ; else sp->confflags &= ~CONF_ENABLE_VJ; #endif #ifdef INET6 if (spr->defs.enable_ipv6) sp->confflags |= CONF_ENABLE_IPV6; else sp->confflags &= ~CONF_ENABLE_IPV6; #endif break; default: rv = EINVAL; } quit: free(spr, M_TEMP); return (rv); } static void sppp_phase_network(struct sppp *sp) { STDDCL; int i; u_long mask; sp->pp_phase = PHASE_NETWORK; if (debug) log(LOG_DEBUG, SPP_FMT "phase %s\n", SPP_ARGS(ifp), sppp_phase_name(sp->pp_phase)); /* Notify NCPs now. */ for (i = 0; i < IDX_COUNT; i++) if ((cps[i])->flags & CP_NCP) (cps[i])->Open(sp); /* Send Up events to all NCPs. */ for (i = 0, mask = 1; i < IDX_COUNT; i++, mask <<= 1) if ((sp->lcp.protos & mask) && ((cps[i])->flags & CP_NCP)) (cps[i])->Up(sp); /* if no NCP is starting, all this was in vain, close down */ sppp_lcp_check_and_close(sp); } static const char * sppp_cp_type_name(u_char type) { static char buf[12]; switch (type) { case CONF_REQ: return "conf-req"; case CONF_ACK: return "conf-ack"; case CONF_NAK: return "conf-nak"; case CONF_REJ: return "conf-rej"; case TERM_REQ: return "term-req"; case TERM_ACK: return "term-ack"; case CODE_REJ: return "code-rej"; case PROTO_REJ: return "proto-rej"; case ECHO_REQ: return "echo-req"; case ECHO_REPLY: return "echo-reply"; case DISC_REQ: return "discard-req"; } snprintf (buf, sizeof(buf), "cp/0x%x", type); return buf; } static const char * sppp_auth_type_name(u_short proto, u_char type) { static char buf[12]; switch (proto) { case PPP_CHAP: switch (type) { case CHAP_CHALLENGE: return "challenge"; case CHAP_RESPONSE: return "response"; case CHAP_SUCCESS: return "success"; case CHAP_FAILURE: return "failure"; } case PPP_PAP: switch (type) { case PAP_REQ: return "req"; case PAP_ACK: return "ack"; case PAP_NAK: return "nak"; } } snprintf (buf, sizeof(buf), "auth/0x%x", type); return buf; } static const char * sppp_lcp_opt_name(u_char opt) { static char buf[12]; switch (opt) { case LCP_OPT_MRU: return "mru"; case LCP_OPT_ASYNC_MAP: return "async-map"; case LCP_OPT_AUTH_PROTO: return "auth-proto"; case LCP_OPT_QUAL_PROTO: return "qual-proto"; case LCP_OPT_MAGIC: return "magic"; case LCP_OPT_PROTO_COMP: return "proto-comp"; case LCP_OPT_ADDR_COMP: return "addr-comp"; } snprintf (buf, sizeof(buf), "lcp/0x%x", opt); return buf; } #ifdef INET static const char * sppp_ipcp_opt_name(u_char opt) { static char buf[12]; switch (opt) { case IPCP_OPT_ADDRESSES: return "addresses"; case IPCP_OPT_COMPRESSION: return "compression"; case IPCP_OPT_ADDRESS: return "address"; } snprintf (buf, sizeof(buf), "ipcp/0x%x", opt); return buf; } #endif #ifdef INET6 static const char * sppp_ipv6cp_opt_name(u_char opt) { static char buf[12]; switch (opt) { case IPV6CP_OPT_IFID: return "ifid"; case IPV6CP_OPT_COMPRESSION: return "compression"; } sprintf (buf, "0x%x", opt); return buf; } #endif static const char * sppp_state_name(int state) { switch (state) { case STATE_INITIAL: return "initial"; case STATE_STARTING: return "starting"; case STATE_CLOSED: return "closed"; case STATE_STOPPED: return "stopped"; case STATE_CLOSING: return "closing"; case STATE_STOPPING: return "stopping"; case STATE_REQ_SENT: return "req-sent"; case STATE_ACK_RCVD: return "ack-rcvd"; case STATE_ACK_SENT: return "ack-sent"; case STATE_OPENED: return "opened"; } return "illegal"; } static const char * sppp_phase_name(enum ppp_phase phase) { switch (phase) { case PHASE_DEAD: return "dead"; case PHASE_ESTABLISH: return "establish"; case PHASE_TERMINATE: return "terminate"; case PHASE_AUTHENTICATE: return "authenticate"; case PHASE_NETWORK: return "network"; } return "illegal"; } static const char * sppp_proto_name(u_short proto) { static char buf[12]; switch (proto) { case PPP_LCP: return "lcp"; case PPP_IPCP: return "ipcp"; case PPP_PAP: return "pap"; case PPP_CHAP: return "chap"; case PPP_IPV6CP: return "ipv6cp"; } snprintf(buf, sizeof(buf), "proto/0x%x", (unsigned)proto); return buf; } static void sppp_print_bytes(const u_char *p, u_short len) { if (len) log(-1, " %*D", len, p, "-"); } static void sppp_print_string(const char *p, u_short len) { u_char c; while (len-- > 0) { c = *p++; /* * Print only ASCII chars directly. RFC 1994 recommends * using only them, but we don't rely on it. */ if (c < ' ' || c > '~') log(-1, "\\x%x", c); else log(-1, "%c", c); } } #ifdef INET static const char * sppp_dotted_quad(u_long addr) { static char s[16]; sprintf(s, "%d.%d.%d.%d", (int)((addr >> 24) & 0xff), (int)((addr >> 16) & 0xff), (int)((addr >> 8) & 0xff), (int)(addr & 0xff)); return s; } #endif static int sppp_strnlen(u_char *p, int max) { int len; for (len = 0; len < max && *p; ++p) ++len; return len; } /* a dummy, used to drop uninteresting events */ static void sppp_null(struct sppp *unused) { /* do just nothing */ } Index: head/sys/net/if_var.h =================================================================== --- head/sys/net/if_var.h (revision 331796) +++ head/sys/net/if_var.h (revision 331797) @@ -1,734 +1,737 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)if.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #ifndef _NET_IF_VAR_H_ #define _NET_IF_VAR_H_ /* * Structures defining a network interface, providing a packet * transport mechanism (ala level 0 of the PUP protocols). * * Each interface accepts output datagrams of a specified maximum * length, and provides higher level routines with input datagrams * received from its medium. * * Output occurs when the routine if_output is called, with three parameters: * (*ifp->if_output)(ifp, m, dst, rt) * Here m is the mbuf chain to be sent and dst is the destination address. * The output routine encapsulates the supplied datagram if necessary, * and then transmits it on its medium. * * On input, each interface unwraps the data received by it, and either * places it on the input queue of an internetwork datagram routine * and posts the associated software interrupt, or passes the datagram to a raw * packet input routine. * * Routines exist for locating interfaces by their addresses * or for locating an interface on a certain network, as well as more general * routing and gateway routines maintaining information used to locate * interfaces. These routines live in the files if.c and route.c */ struct rtentry; /* ifa_rtrequest */ struct rt_addrinfo; /* ifa_rtrequest */ struct socket; struct carp_if; struct carp_softc; struct ifvlantrunk; struct route; /* if_output */ struct vnet; struct ifmedia; struct netmap_adapter; #ifdef _KERNEL #include /* ifqueue only? */ #include #include #endif /* _KERNEL */ #include #include /* XXX */ #include /* struct ifqueue */ #include /* XXX */ #include /* XXX */ #include /* if_link_task */ #define IF_DUNIT_NONE -1 #include TAILQ_HEAD(ifnethead, ifnet); /* we use TAILQs so that the order of */ TAILQ_HEAD(ifaddrhead, ifaddr); /* instantiation is preserved in the list */ TAILQ_HEAD(ifmultihead, ifmultiaddr); TAILQ_HEAD(ifgrouphead, ifg_group); #ifdef _KERNEL VNET_DECLARE(struct pfil_head, link_pfil_hook); /* packet filter hooks */ #define V_link_pfil_hook VNET(link_pfil_hook) #define HHOOK_IPSEC_INET 0 #define HHOOK_IPSEC_INET6 1 #define HHOOK_IPSEC_COUNT 2 VNET_DECLARE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]); VNET_DECLARE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]); #define V_ipsec_hhh_in VNET(ipsec_hhh_in) #define V_ipsec_hhh_out VNET(ipsec_hhh_out) #endif /* _KERNEL */ typedef enum { IFCOUNTER_IPACKETS = 0, IFCOUNTER_IERRORS, IFCOUNTER_OPACKETS, IFCOUNTER_OERRORS, IFCOUNTER_COLLISIONS, IFCOUNTER_IBYTES, IFCOUNTER_OBYTES, IFCOUNTER_IMCASTS, IFCOUNTER_OMCASTS, IFCOUNTER_IQDROPS, IFCOUNTER_OQDROPS, IFCOUNTER_NOPROTO, IFCOUNTERS /* Array size. */ } ift_counter; typedef struct ifnet * if_t; typedef void (*if_start_fn_t)(if_t); typedef int (*if_ioctl_fn_t)(if_t, u_long, caddr_t); typedef void (*if_init_fn_t)(void *); typedef void (*if_qflush_fn_t)(if_t); typedef int (*if_transmit_fn_t)(if_t, struct mbuf *); typedef uint64_t (*if_get_counter_t)(if_t, ift_counter); struct ifnet_hw_tsomax { u_int tsomaxbytes; /* TSO total burst length limit in bytes */ u_int tsomaxsegcount; /* TSO maximum segment count */ u_int tsomaxsegsize; /* TSO maximum segment size in bytes */ }; /* Interface encap request types */ typedef enum { IFENCAP_LL = 1 /* pre-calculate link-layer header */ } ife_type; /* * The structure below allows to request various pre-calculated L2/L3 headers * for different media. Requests varies by type (rtype field). * * IFENCAP_LL type: pre-calculates link header based on address family * and destination lladdr. * * Input data fields: * buf: pointer to destination buffer * bufsize: buffer size * flags: IFENCAP_FLAG_BROADCAST if destination is broadcast * family: address family defined by AF_ constant. * lladdr: pointer to link-layer address * lladdr_len: length of link-layer address * hdata: pointer to L3 header (optional, used for ARP requests). * Output data fields: * buf: encap data is stored here * bufsize: resulting encap length is stored here * lladdr_off: offset of link-layer address from encap hdr start * hdata: L3 header may be altered if necessary */ struct if_encap_req { u_char *buf; /* Destination buffer (w) */ size_t bufsize; /* size of provided buffer (r) */ ife_type rtype; /* request type (r) */ uint32_t flags; /* Request flags (r) */ int family; /* Address family AF_* (r) */ int lladdr_off; /* offset from header start (w) */ int lladdr_len; /* lladdr length (r) */ char *lladdr; /* link-level address pointer (r) */ char *hdata; /* Upper layer header data (rw) */ }; #define IFENCAP_FLAG_BROADCAST 0x02 /* Destination is broadcast */ /* * Network interface send tag support. The storage of "struct * m_snd_tag" comes from the network driver and it is free to allocate * as much additional space as it wants for its own use. */ struct m_snd_tag; #define IF_SND_TAG_TYPE_RATE_LIMIT 0 #define IF_SND_TAG_TYPE_UNLIMITED 1 #define IF_SND_TAG_TYPE_MAX 2 struct if_snd_tag_alloc_header { uint32_t type; /* send tag type, see IF_SND_TAG_XXX */ uint32_t flowid; /* mbuf hash value */ uint32_t flowtype; /* mbuf hash type */ }; struct if_snd_tag_alloc_rate_limit { struct if_snd_tag_alloc_header hdr; uint64_t max_rate; /* in bytes/s */ }; struct if_snd_tag_rate_limit_params { uint64_t max_rate; /* in bytes/s */ uint32_t queue_level; /* 0 (empty) .. 65535 (full) */ #define IF_SND_QUEUE_LEVEL_MIN 0 #define IF_SND_QUEUE_LEVEL_MAX 65535 uint32_t reserved; /* padding */ }; union if_snd_tag_alloc_params { struct if_snd_tag_alloc_header hdr; struct if_snd_tag_alloc_rate_limit rate_limit; struct if_snd_tag_alloc_rate_limit unlimited; }; union if_snd_tag_modify_params { struct if_snd_tag_rate_limit_params rate_limit; struct if_snd_tag_rate_limit_params unlimited; }; union if_snd_tag_query_params { struct if_snd_tag_rate_limit_params rate_limit; struct if_snd_tag_rate_limit_params unlimited; }; typedef int (if_snd_tag_alloc_t)(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); typedef int (if_snd_tag_modify_t)(struct m_snd_tag *, union if_snd_tag_modify_params *); typedef int (if_snd_tag_query_t)(struct m_snd_tag *, union if_snd_tag_query_params *); typedef void (if_snd_tag_free_t)(struct m_snd_tag *); /* * Structure defining a network interface. */ struct ifnet { /* General book keeping of interface lists. */ TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */ LIST_ENTRY(ifnet) if_clones; /* interfaces of a cloner */ TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */ /* protected by if_addr_lock */ u_char if_alloctype; /* if_type at time of allocation */ /* Driver and protocol specific information that remains stable. */ void *if_softc; /* pointer to driver state */ void *if_llsoftc; /* link layer softc */ void *if_l2com; /* pointer to protocol bits */ const char *if_dname; /* driver name */ int if_dunit; /* unit or IF_DUNIT_NONE */ u_short if_index; /* numeric abbreviation for this if */ short if_index_reserved; /* spare space to grow if_index */ char if_xname[IFNAMSIZ]; /* external name (name + unit) */ char *if_description; /* interface description */ /* Variable fields that are touched by the stack and drivers. */ int if_flags; /* up/down, broadcast, etc. */ int if_drv_flags; /* driver-managed status flags */ int if_capabilities; /* interface features & capabilities */ int if_capenable; /* enabled features & capabilities */ void *if_linkmib; /* link-type-specific MIB data */ size_t if_linkmiblen; /* length of above data */ u_int if_refcount; /* reference count */ /* These fields are shared with struct if_data. */ uint8_t if_type; /* ethernet, tokenring, etc */ uint8_t if_addrlen; /* media address length */ uint8_t if_hdrlen; /* media header length */ uint8_t if_link_state; /* current link state */ uint32_t if_mtu; /* maximum transmission unit */ uint32_t if_metric; /* routing metric (external only) */ uint64_t if_baudrate; /* linespeed */ uint64_t if_hwassist; /* HW offload capabilities, see IFCAP */ time_t if_epoch; /* uptime at attach or stat reset */ struct timeval if_lastchange; /* time of last administrative change */ struct ifaltq if_snd; /* output queue (includes altq) */ struct task if_linktask; /* task for link change events */ /* Addresses of different protocol families assigned to this if. */ struct rwlock if_addr_lock; /* lock to protect address lists */ /* * if_addrhead is the list of all addresses associated to * an interface. * Some code in the kernel assumes that first element * of the list has type AF_LINK, and contains sockaddr_dl * addresses which store the link-level address and the name * of the interface. * However, access to the AF_LINK address through this * field is deprecated. Use if_addr or ifaddr_byindex() instead. */ struct ifaddrhead if_addrhead; /* linked list of addresses per if */ struct ifmultihead if_multiaddrs; /* multicast addresses configured */ int if_amcount; /* number of all-multicast requests */ struct ifaddr *if_addr; /* pointer to link-level address */ void *if_hw_addr; /* hardware link-level address */ const u_int8_t *if_broadcastaddr; /* linklevel broadcast bytestring */ struct rwlock if_afdata_lock; void *if_afdata[AF_MAX]; int if_afdata_initialized; /* Additional features hung off the interface. */ u_int if_fib; /* interface FIB */ struct vnet *if_vnet; /* pointer to network stack instance */ struct vnet *if_home_vnet; /* where this ifnet originates from */ struct ifvlantrunk *if_vlantrunk; /* pointer to 802.1q data */ struct bpf_if *if_bpf; /* packet filter structure */ int if_pcount; /* number of promiscuous listeners */ void *if_bridge; /* bridge glue */ void *if_lagg; /* lagg glue */ void *if_pf_kif; /* pf glue */ struct carp_if *if_carp; /* carp interface structure */ struct label *if_label; /* interface MAC label */ struct netmap_adapter *if_netmap; /* netmap(4) softc */ /* Various procedures of the layer2 encapsulation and drivers. */ int (*if_output) /* output routine (enqueue) */ (struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); void (*if_input) /* input routine (from h/w driver) */ (struct ifnet *, struct mbuf *); if_start_fn_t if_start; /* initiate output routine */ if_ioctl_fn_t if_ioctl; /* ioctl routine */ if_init_fn_t if_init; /* Init routine */ int (*if_resolvemulti) /* validate/resolve multicast */ (struct ifnet *, struct sockaddr **, struct sockaddr *); if_qflush_fn_t if_qflush; /* flush any queue */ if_transmit_fn_t if_transmit; /* initiate output routine */ void (*if_reassign) /* reassign to vnet routine */ (struct ifnet *, struct vnet *, char *); if_get_counter_t if_get_counter; /* get counter values */ int (*if_requestencap) /* make link header from request */ (struct ifnet *, struct if_encap_req *); /* Statistics. */ counter_u64_t if_counters[IFCOUNTERS]; /* Stuff that's only temporary and doesn't belong here. */ /* * Network adapter TSO limits: * =========================== * * If the "if_hw_tsomax" field is zero the maximum segment * length limit does not apply. If the "if_hw_tsomaxsegcount" * or the "if_hw_tsomaxsegsize" field is zero the TSO segment * count limit does not apply. If all three fields are zero, * there is no TSO limit. * * NOTE: The TSO limits should reflect the values used in the * BUSDMA tag a network adapter is using to load a mbuf chain * for transmission. The TCP/IP network stack will subtract * space for all linklevel and protocol level headers and * ensure that the full mbuf chain passed to the network * adapter fits within the given limits. */ u_int if_hw_tsomax; /* TSO maximum size in bytes */ u_int if_hw_tsomaxsegcount; /* TSO maximum segment count */ u_int if_hw_tsomaxsegsize; /* TSO maximum segment size in bytes */ /* * Network adapter send tag support: */ if_snd_tag_alloc_t *if_snd_tag_alloc; if_snd_tag_modify_t *if_snd_tag_modify; if_snd_tag_query_t *if_snd_tag_query; if_snd_tag_free_t *if_snd_tag_free; /* Ethernet PCP */ uint8_t if_pcp; /* * Spare fields to be added before branching a stable branch, so * that structure can be enhanced without changing the kernel * binary interface. */ int if_ispare[4]; /* general use */ }; /* for compatibility with other BSDs */ #define if_name(ifp) ((ifp)->if_xname) /* * Locks for address lists on the network interface. */ #define IF_ADDR_LOCK_INIT(if) rw_init(&(if)->if_addr_lock, "if_addr_lock") #define IF_ADDR_LOCK_DESTROY(if) rw_destroy(&(if)->if_addr_lock) #define IF_ADDR_WLOCK(if) rw_wlock(&(if)->if_addr_lock) #define IF_ADDR_WUNLOCK(if) rw_wunlock(&(if)->if_addr_lock) #define IF_ADDR_RLOCK(if) rw_rlock(&(if)->if_addr_lock) #define IF_ADDR_RUNLOCK(if) rw_runlock(&(if)->if_addr_lock) #define IF_ADDR_LOCK_ASSERT(if) rw_assert(&(if)->if_addr_lock, RA_LOCKED) #define IF_ADDR_WLOCK_ASSERT(if) rw_assert(&(if)->if_addr_lock, RA_WLOCKED) /* * Function variations on locking macros intended to be used by loadable * kernel modules in order to divorce them from the internals of address list * locking. */ void if_addr_rlock(struct ifnet *ifp); /* if_addrhead */ void if_addr_runlock(struct ifnet *ifp); /* if_addrhead */ void if_maddr_rlock(if_t ifp); /* if_multiaddrs */ void if_maddr_runlock(if_t ifp); /* if_multiaddrs */ #ifdef _KERNEL #ifdef _SYS_EVENTHANDLER_H_ /* interface link layer address change event */ typedef void (*iflladdr_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(iflladdr_event, iflladdr_event_handler_t); /* interface address change event */ typedef void (*ifaddr_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifaddr_event, ifaddr_event_handler_t); /* new interface arrival event */ typedef void (*ifnet_arrival_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifnet_arrival_event, ifnet_arrival_event_handler_t); /* interface departure event */ typedef void (*ifnet_departure_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifnet_departure_event, ifnet_departure_event_handler_t); /* Interface link state change event */ typedef void (*ifnet_link_event_handler_t)(void *, struct ifnet *, int); EVENTHANDLER_DECLARE(ifnet_link_event, ifnet_link_event_handler_t); /* Interface up/down event */ #define IFNET_EVENT_UP 0 #define IFNET_EVENT_DOWN 1 typedef void (*ifnet_event_fn)(void *, struct ifnet *ifp, int event); EVENTHANDLER_DECLARE(ifnet_event, ifnet_event_fn); #endif /* _SYS_EVENTHANDLER_H_ */ /* * interface groups */ struct ifg_group { char ifg_group[IFNAMSIZ]; u_int ifg_refcnt; void *ifg_pf_kif; TAILQ_HEAD(, ifg_member) ifg_members; TAILQ_ENTRY(ifg_group) ifg_next; }; struct ifg_member { TAILQ_ENTRY(ifg_member) ifgm_next; struct ifnet *ifgm_ifp; }; struct ifg_list { struct ifg_group *ifgl_group; TAILQ_ENTRY(ifg_list) ifgl_next; }; #ifdef _SYS_EVENTHANDLER_H_ /* group attach event */ typedef void (*group_attach_event_handler_t)(void *, struct ifg_group *); EVENTHANDLER_DECLARE(group_attach_event, group_attach_event_handler_t); /* group detach event */ typedef void (*group_detach_event_handler_t)(void *, struct ifg_group *); EVENTHANDLER_DECLARE(group_detach_event, group_detach_event_handler_t); /* group change event */ typedef void (*group_change_event_handler_t)(void *, const char *); EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t); #endif /* _SYS_EVENTHANDLER_H_ */ #define IF_AFDATA_LOCK_INIT(ifp) \ rw_init(&(ifp)->if_afdata_lock, "if_afdata") #define IF_AFDATA_WLOCK(ifp) rw_wlock(&(ifp)->if_afdata_lock) #define IF_AFDATA_RLOCK(ifp) rw_rlock(&(ifp)->if_afdata_lock) #define IF_AFDATA_WUNLOCK(ifp) rw_wunlock(&(ifp)->if_afdata_lock) #define IF_AFDATA_RUNLOCK(ifp) rw_runlock(&(ifp)->if_afdata_lock) #define IF_AFDATA_LOCK(ifp) IF_AFDATA_WLOCK(ifp) #define IF_AFDATA_UNLOCK(ifp) IF_AFDATA_WUNLOCK(ifp) #define IF_AFDATA_TRYLOCK(ifp) rw_try_wlock(&(ifp)->if_afdata_lock) #define IF_AFDATA_DESTROY(ifp) rw_destroy(&(ifp)->if_afdata_lock) #define IF_AFDATA_LOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_LOCKED) #define IF_AFDATA_RLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_RLOCKED) #define IF_AFDATA_WLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_WLOCKED) #define IF_AFDATA_UNLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_UNLOCKED) /* * 72 was chosen below because it is the size of a TCP/IP * header (40) + the minimum mss (32). */ #define IF_MINMTU 72 #define IF_MAXMTU 65535 #define TOEDEV(ifp) ((ifp)->if_llsoftc) /* * The ifaddr structure contains information about one address * of an interface. They are maintained by the different address families, * are allocated and attached when an address is set, and are linked * together so all addresses for an interface can be located. * * NOTE: a 'struct ifaddr' is always at the beginning of a larger * chunk of malloc'ed memory, where we store the three addresses * (ifa_addr, ifa_dstaddr and ifa_netmask) referenced here. */ struct ifaddr { struct sockaddr *ifa_addr; /* address of interface */ struct sockaddr *ifa_dstaddr; /* other end of p-to-p link */ #define ifa_broadaddr ifa_dstaddr /* broadcast address interface */ struct sockaddr *ifa_netmask; /* used to determine subnet */ struct ifnet *ifa_ifp; /* back-pointer to interface */ struct carp_softc *ifa_carp; /* pointer to CARP data */ TAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */ void (*ifa_rtrequest) /* check or clean routes (+ or -)'d */ (int, struct rtentry *, struct rt_addrinfo *); u_short ifa_flags; /* mostly rt_flags for cloning */ #define IFA_ROUTE RTF_UP /* route installed */ #define IFA_RTSELF RTF_HOST /* loopback route to self installed */ u_int ifa_refcnt; /* references to this structure */ counter_u64_t ifa_ipackets; counter_u64_t ifa_opackets; counter_u64_t ifa_ibytes; counter_u64_t ifa_obytes; }; struct ifaddr * ifa_alloc(size_t size, int flags); void ifa_free(struct ifaddr *ifa); void ifa_ref(struct ifaddr *ifa); /* * Multicast address structure. This is analogous to the ifaddr * structure except that it keeps track of multicast addresses. */ struct ifmultiaddr { TAILQ_ENTRY(ifmultiaddr) ifma_link; /* queue macro glue */ struct sockaddr *ifma_addr; /* address this membership is for */ struct sockaddr *ifma_lladdr; /* link-layer translation, if any */ struct ifnet *ifma_ifp; /* back-pointer to interface */ u_int ifma_refcount; /* reference count */ void *ifma_protospec; /* protocol-specific state, if any */ struct ifmultiaddr *ifma_llifma; /* pointer to ifma for ifma_lladdr */ }; extern struct rwlock ifnet_rwlock; extern struct sx ifnet_sxlock; #define IFNET_WLOCK() do { \ sx_xlock(&ifnet_sxlock); \ rw_wlock(&ifnet_rwlock); \ } while (0) #define IFNET_WUNLOCK() do { \ rw_wunlock(&ifnet_rwlock); \ sx_xunlock(&ifnet_sxlock); \ } while (0) /* * To assert the ifnet lock, you must know not only whether it's for read or * write, but also whether it was acquired with sleep support or not. */ #define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED) #define IFNET_RLOCK_NOSLEEP_ASSERT() rw_assert(&ifnet_rwlock, RA_RLOCKED) #define IFNET_WLOCK_ASSERT() do { \ sx_assert(&ifnet_sxlock, SA_XLOCKED); \ rw_assert(&ifnet_rwlock, RA_WLOCKED); \ } while (0) #define IFNET_RLOCK() sx_slock(&ifnet_sxlock) #define IFNET_RLOCK_NOSLEEP() rw_rlock(&ifnet_rwlock) #define IFNET_RUNLOCK() sx_sunlock(&ifnet_sxlock) #define IFNET_RUNLOCK_NOSLEEP() rw_runlock(&ifnet_rwlock) /* * Look up an ifnet given its index; the _ref variant also acquires a * reference that must be freed using if_rele(). It is almost always a bug * to call ifnet_byindex() instead of ifnet_byindex_ref(). */ struct ifnet *ifnet_byindex(u_short idx); struct ifnet *ifnet_byindex_locked(u_short idx); struct ifnet *ifnet_byindex_ref(u_short idx); /* * Given the index, ifaddr_byindex() returns the one and only * link-level ifaddr for the interface. You are not supposed to use * it to traverse the list of addresses associated to the interface. */ struct ifaddr *ifaddr_byindex(u_short idx); VNET_DECLARE(struct ifnethead, ifnet); VNET_DECLARE(struct ifgrouphead, ifg_head); VNET_DECLARE(int, if_index); VNET_DECLARE(struct ifnet *, loif); /* first loopback interface */ #define V_ifnet VNET(ifnet) #define V_ifg_head VNET(ifg_head) #define V_if_index VNET(if_index) #define V_loif VNET(loif) int if_addgroup(struct ifnet *, const char *); int if_delgroup(struct ifnet *, const char *); int if_addmulti(struct ifnet *, struct sockaddr *, struct ifmultiaddr **); int if_allmulti(struct ifnet *, int); struct ifnet* if_alloc(u_char); void if_attach(struct ifnet *); void if_dead(struct ifnet *); int if_delmulti(struct ifnet *, struct sockaddr *); void if_delmulti_ifma(struct ifmultiaddr *); void if_detach(struct ifnet *); void if_purgeaddrs(struct ifnet *); void if_delallmulti(struct ifnet *); void if_down(struct ifnet *); struct ifmultiaddr * if_findmulti(struct ifnet *, const struct sockaddr *); void if_free(struct ifnet *); void if_initname(struct ifnet *, const char *, int); void if_link_state_change(struct ifnet *, int); int if_printf(struct ifnet *, const char *, ...) __printflike(2, 3); void if_ref(struct ifnet *); void if_rele(struct ifnet *); int if_setlladdr(struct ifnet *, const u_char *, int); void if_up(struct ifnet *); int ifioctl(struct socket *, u_long, caddr_t, struct thread *); int ifpromisc(struct ifnet *, int); struct ifnet *ifunit(const char *); struct ifnet *ifunit_ref(const char *); int ifa_add_loopback_route(struct ifaddr *, struct sockaddr *); int ifa_del_loopback_route(struct ifaddr *, struct sockaddr *); int ifa_switch_loopback_route(struct ifaddr *, struct sockaddr *); struct ifaddr *ifa_ifwithaddr(const struct sockaddr *); int ifa_ifwithaddr_check(const struct sockaddr *); struct ifaddr *ifa_ifwithbroadaddr(const struct sockaddr *, int); struct ifaddr *ifa_ifwithdstaddr(const struct sockaddr *, int); struct ifaddr *ifa_ifwithnet(const struct sockaddr *, int, int); struct ifaddr *ifa_ifwithroute(int, const struct sockaddr *, struct sockaddr *, u_int); struct ifaddr *ifaof_ifpforaddr(const struct sockaddr *, struct ifnet *); int ifa_preferred(struct ifaddr *, struct ifaddr *); int if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen); typedef void *if_com_alloc_t(u_char type, struct ifnet *ifp); typedef void if_com_free_t(void *com, u_char type); void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f); void if_deregister_com_alloc(u_char type); void if_data_copy(struct ifnet *, struct if_data *); uint64_t if_get_counter_default(struct ifnet *, ift_counter); void if_inc_counter(struct ifnet *, ift_counter, int64_t); #define IF_LLADDR(ifp) \ LLADDR((struct sockaddr_dl *)((ifp)->if_addr->ifa_addr)) uint64_t if_setbaudrate(if_t ifp, uint64_t baudrate); uint64_t if_getbaudrate(if_t ifp); int if_setcapabilities(if_t ifp, int capabilities); int if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit); int if_getcapabilities(if_t ifp); int if_togglecapenable(if_t ifp, int togglecap); int if_setcapenable(if_t ifp, int capenable); int if_setcapenablebit(if_t ifp, int setcap, int clearcap); int if_getcapenable(if_t ifp); const char *if_getdname(if_t ifp); int if_setdev(if_t ifp, void *dev); int if_setdrvflagbits(if_t ifp, int if_setflags, int clear_flags); int if_getdrvflags(if_t ifp); int if_setdrvflags(if_t ifp, int flags); int if_clearhwassist(if_t ifp); int if_sethwassistbits(if_t ifp, int toset, int toclear); int if_sethwassist(if_t ifp, int hwassist_bit); int if_gethwassist(if_t ifp); int if_setsoftc(if_t ifp, void *softc); void *if_getsoftc(if_t ifp); int if_setflags(if_t ifp, int flags); int if_gethwaddr(if_t ifp, struct ifreq *); int if_setmtu(if_t ifp, int mtu); int if_getmtu(if_t ifp); int if_getmtu_family(if_t ifp, int family); int if_setflagbits(if_t ifp, int set, int clear); int if_getflags(if_t ifp); int if_sendq_empty(if_t ifp); int if_setsendqready(if_t ifp); int if_setsendqlen(if_t ifp, int tx_desc_count); int if_sethwtsomax(if_t ifp, u_int if_hw_tsomax); int if_sethwtsomaxsegcount(if_t ifp, u_int if_hw_tsomaxsegcount); int if_sethwtsomaxsegsize(if_t ifp, u_int if_hw_tsomaxsegsize); u_int if_gethwtsomax(if_t ifp); u_int if_gethwtsomaxsegcount(if_t ifp); u_int if_gethwtsomaxsegsize(if_t ifp); int if_input(if_t ifp, struct mbuf* sendmp); int if_sendq_prepend(if_t ifp, struct mbuf *m); struct mbuf *if_dequeue(if_t ifp); int if_setifheaderlen(if_t ifp, int len); void if_setrcvif(struct mbuf *m, if_t ifp); void if_setvtag(struct mbuf *m, u_int16_t tag); u_int16_t if_getvtag(struct mbuf *m); int if_vlantrunkinuse(if_t ifp); caddr_t if_getlladdr(if_t ifp); void *if_gethandle(u_char); void if_bpfmtap(if_t ifp, struct mbuf *m); void if_etherbpfmtap(if_t ifp, struct mbuf *m); void if_vlancap(if_t ifp); int if_setupmultiaddr(if_t ifp, void *mta, int *cnt, int max); int if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max); int if_multiaddr_count(if_t ifp, int max); int if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg); int if_getamcount(if_t ifp); struct ifaddr * if_getifaddr(if_t ifp); /* Functions */ void if_setinitfn(if_t ifp, void (*)(void *)); void if_setioctlfn(if_t ifp, int (*)(if_t, u_long, caddr_t)); void if_setstartfn(if_t ifp, void (*)(if_t)); void if_settransmitfn(if_t ifp, if_transmit_fn_t); void if_setqflushfn(if_t ifp, if_qflush_fn_t); void if_setgetcounterfn(if_t ifp, if_get_counter_t); /* Revisit the below. These are inline functions originally */ int drbr_inuse_drv(if_t ifp, struct buf_ring *br); struct mbuf* drbr_dequeue_drv(if_t ifp, struct buf_ring *br); int drbr_needs_enqueue_drv(if_t ifp, struct buf_ring *br); int drbr_enqueue_drv(if_t ifp, struct buf_ring *br, struct mbuf *m); /* TSO */ void if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *); int if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *); +/* accessors for struct ifreq */ +void *ifr_data_get_ptr(void *ifrp); + #ifdef DEVICE_POLLING enum poll_cmd { POLL_ONLY, POLL_AND_CHECK_STATUS }; typedef int poll_handler_t(if_t ifp, enum poll_cmd cmd, int count); int ether_poll_register(poll_handler_t *h, if_t ifp); int ether_poll_deregister(if_t ifp); #endif /* DEVICE_POLLING */ #endif /* _KERNEL */ #include /* XXXAO: temporary unconditional include */ #endif /* !_NET_IF_VAR_H_ */ Index: head/sys/net/if_vlan.c =================================================================== --- head/sys/net/if_vlan.c (revision 331796) +++ head/sys/net/if_vlan.c (revision 331797) @@ -1,1986 +1,1986 @@ /*- * Copyright 1998 Massachusetts Institute of Technology * Copyright 2012 ADARA Networks, Inc. * Copyright 2017 Dell EMC Isilon * * Portions of this software were developed by Robert N. M. Watson under * contract to ADARA Networks, Inc. * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that both the above copyright notice and this * permission notice appear in all copies, that both the above * copyright notice and this permission notice appear in all * supporting documentation, and that the name of M.I.T. not be used * in advertising or publicity pertaining to distribution of the * software without specific, written prior permission. M.I.T. makes * no representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * if_vlan.c - pseudo-device driver for IEEE 802.1Q virtual LANs. * This is sort of sneaky in the implementation, since * we need to pretend to be enough of an Ethernet implementation * to make arp work. The way we do this is by telling everyone * that we are an Ethernet, and then catch the packets that * ether_output() sends to us via if_transmit(), rewrite them for * use by the real outgoing interface, and ask it to send them. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_vlan.h" #include "opt_ratelimit.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif #define VLAN_DEF_HWIDTH 4 #define VLAN_IFFLAGS (IFF_BROADCAST | IFF_MULTICAST) #define UP_AND_RUNNING(ifp) \ ((ifp)->if_flags & IFF_UP && (ifp)->if_drv_flags & IFF_DRV_RUNNING) LIST_HEAD(ifvlanhead, ifvlan); struct ifvlantrunk { struct ifnet *parent; /* parent interface of this trunk */ struct rmlock lock; #ifdef VLAN_ARRAY #define VLAN_ARRAY_SIZE (EVL_VLID_MASK + 1) struct ifvlan *vlans[VLAN_ARRAY_SIZE]; /* static table */ #else struct ifvlanhead *hash; /* dynamic hash-list table */ uint16_t hmask; uint16_t hwidth; #endif int refcnt; }; /* * This macro provides a facility to iterate over every vlan on a trunk with * the assumption that none will be added/removed during iteration. */ #ifdef VLAN_ARRAY #define VLAN_FOREACH(_ifv, _trunk) \ size_t _i; \ for (_i = 0; _i < VLAN_ARRAY_SIZE; _i++) \ if (((_ifv) = (_trunk)->vlans[_i]) != NULL) #else /* VLAN_ARRAY */ #define VLAN_FOREACH(_ifv, _trunk) \ struct ifvlan *_next; \ size_t _i; \ for (_i = 0; _i < (1 << (_trunk)->hwidth); _i++) \ LIST_FOREACH_SAFE((_ifv), &(_trunk)->hash[_i], ifv_list, _next) #endif /* VLAN_ARRAY */ /* * This macro provides a facility to iterate over every vlan on a trunk while * also modifying the number of vlans on the trunk. The iteration continues * until some condition is met or there are no more vlans on the trunk. */ #ifdef VLAN_ARRAY /* The VLAN_ARRAY case is simple -- just a for loop using the condition. */ #define VLAN_FOREACH_UNTIL_SAFE(_ifv, _trunk, _cond) \ size_t _i; \ for (_i = 0; !(_cond) && _i < VLAN_ARRAY_SIZE; _i++) \ if (((_ifv) = (_trunk)->vlans[_i])) #else /* VLAN_ARRAY */ /* * The hash table case is more complicated. We allow for the hash table to be * modified (i.e. vlans removed) while we are iterating over it. To allow for * this we must restart the iteration every time we "touch" something during * the iteration, since removal will resize the hash table and invalidate our * current position. If acting on the touched element causes the trunk to be * emptied, then iteration also stops. */ #define VLAN_FOREACH_UNTIL_SAFE(_ifv, _trunk, _cond) \ size_t _i; \ bool _touch = false; \ for (_i = 0; \ !(_cond) && _i < (1 << (_trunk)->hwidth); \ _i = (_touch && ((_trunk) != NULL) ? 0 : _i + 1), _touch = false) \ if (((_ifv) = LIST_FIRST(&(_trunk)->hash[_i])) != NULL && \ (_touch = true)) #endif /* VLAN_ARRAY */ struct vlan_mc_entry { struct sockaddr_dl mc_addr; SLIST_ENTRY(vlan_mc_entry) mc_entries; }; struct ifvlan { struct ifvlantrunk *ifv_trunk; struct ifnet *ifv_ifp; #define TRUNK(ifv) ((ifv)->ifv_trunk) #define PARENT(ifv) ((ifv)->ifv_trunk->parent) void *ifv_cookie; int ifv_pflags; /* special flags we have set on parent */ int ifv_capenable; struct ifv_linkmib { int ifvm_encaplen; /* encapsulation length */ int ifvm_mtufudge; /* MTU fudged by this much */ int ifvm_mintu; /* min transmission unit */ uint16_t ifvm_proto; /* encapsulation ethertype */ uint16_t ifvm_tag; /* tag to apply on packets leaving if */ uint16_t ifvm_vid; /* VLAN ID */ uint8_t ifvm_pcp; /* Priority Code Point (PCP). */ } ifv_mib; struct task lladdr_task; SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead; #ifndef VLAN_ARRAY LIST_ENTRY(ifvlan) ifv_list; #endif }; #define ifv_proto ifv_mib.ifvm_proto #define ifv_tag ifv_mib.ifvm_tag #define ifv_vid ifv_mib.ifvm_vid #define ifv_pcp ifv_mib.ifvm_pcp #define ifv_encaplen ifv_mib.ifvm_encaplen #define ifv_mtufudge ifv_mib.ifvm_mtufudge #define ifv_mintu ifv_mib.ifvm_mintu /* Special flags we should propagate to parent. */ static struct { int flag; int (*func)(struct ifnet *, int); } vlan_pflags[] = { {IFF_PROMISC, ifpromisc}, {IFF_ALLMULTI, if_allmulti}, {0, NULL} }; extern int vlan_mtag_pcp; static const char vlanname[] = "vlan"; static MALLOC_DEFINE(M_VLAN, vlanname, "802.1Q Virtual LAN Interface"); static eventhandler_tag ifdetach_tag; static eventhandler_tag iflladdr_tag; /* * if_vlan uses two module-level locks to allow concurrent modification of vlan * interfaces and (mostly) allow for vlans to be destroyed while they are being * used for tx/rx. To accomplish this in a way that has acceptable performance * and cooperation with other parts of the network stack there is a * non-sleepable rmlock(9) and an sx(9). Both locks are exclusively acquired * when destroying a vlan interface, i.e. when the if_vlantrunk field of struct * ifnet is de-allocated and NULL'd. Thus a reader holding either lock has a * guarantee that the struct ifvlantrunk references a valid vlan trunk. * * The performance-sensitive paths that warrant using the rmlock(9) are * vlan_transmit and vlan_input. Both have to check for the vlan interface's * existence using if_vlantrunk, and being in the network tx/rx paths the use * of an rmlock(9) gives a measureable improvement in performance. * * The reason for having an sx(9) is mostly because there are still areas that * must be sleepable and also have safe concurrent access to a vlan interface. * Since the sx(9) exists, it is used by default in most paths unless sleeping * is not permitted, or if it is not clear whether sleeping is permitted. * * Note that despite these protections, there is still an inherent race in the * destruction of vlans since there's no guarantee that the ifnet hasn't been * freed/reused when the tx/rx functions are called by the stack. This can only * be fixed by addressing ifnet's lifetime issues. */ #define _VLAN_RM_ID ifv_rm_lock #define _VLAN_SX_ID ifv_sx static struct rmlock _VLAN_RM_ID; static struct sx _VLAN_SX_ID; #define VLAN_LOCKING_INIT() \ rm_init(&_VLAN_RM_ID, "vlan_rm"); \ sx_init(&_VLAN_SX_ID, "vlan_sx") #define VLAN_LOCKING_DESTROY() \ rm_destroy(&_VLAN_RM_ID); \ sx_destroy(&_VLAN_SX_ID) #define _VLAN_RM_TRACKER _vlan_rm_tracker #define VLAN_RLOCK() rm_rlock(&_VLAN_RM_ID, \ &_VLAN_RM_TRACKER) #define VLAN_RUNLOCK() rm_runlock(&_VLAN_RM_ID, \ &_VLAN_RM_TRACKER) #define VLAN_WLOCK() rm_wlock(&_VLAN_RM_ID) #define VLAN_WUNLOCK() rm_wunlock(&_VLAN_RM_ID) #define VLAN_RLOCK_ASSERT() rm_assert(&_VLAN_RM_ID, RA_RLOCKED) #define VLAN_WLOCK_ASSERT() rm_assert(&_VLAN_RM_ID, RA_WLOCKED) #define VLAN_RWLOCK_ASSERT() rm_assert(&_VLAN_RM_ID, RA_LOCKED) #define VLAN_LOCK_READER struct rm_priotracker _VLAN_RM_TRACKER #define VLAN_SLOCK() sx_slock(&_VLAN_SX_ID) #define VLAN_SUNLOCK() sx_sunlock(&_VLAN_SX_ID) #define VLAN_XLOCK() sx_xlock(&_VLAN_SX_ID) #define VLAN_XUNLOCK() sx_xunlock(&_VLAN_SX_ID) #define VLAN_SLOCK_ASSERT() sx_assert(&_VLAN_SX_ID, SA_SLOCKED) #define VLAN_XLOCK_ASSERT() sx_assert(&_VLAN_SX_ID, SA_XLOCKED) #define VLAN_SXLOCK_ASSERT() sx_assert(&_VLAN_SX_ID, SA_LOCKED) /* * We also have a per-trunk rmlock(9), that is locked shared on packet * processing and exclusive when configuration is changed. Note: This should * only be acquired while there is a shared lock on either of the global locks * via VLAN_SLOCK or VLAN_RLOCK. Thus, an exclusive lock on the global locks * makes a call to TRUNK_RLOCK/TRUNK_WLOCK technically superfluous. */ #define _TRUNK_RM_TRACKER _trunk_rm_tracker #define TRUNK_LOCK_INIT(trunk) rm_init(&(trunk)->lock, vlanname) #define TRUNK_LOCK_DESTROY(trunk) rm_destroy(&(trunk)->lock) #define TRUNK_RLOCK(trunk) rm_rlock(&(trunk)->lock, \ &_TRUNK_RM_TRACKER) #define TRUNK_WLOCK(trunk) rm_wlock(&(trunk)->lock) #define TRUNK_RUNLOCK(trunk) rm_runlock(&(trunk)->lock, \ &_TRUNK_RM_TRACKER) #define TRUNK_WUNLOCK(trunk) rm_wunlock(&(trunk)->lock) #define TRUNK_RLOCK_ASSERT(trunk) rm_assert(&(trunk)->lock, RA_RLOCKED) #define TRUNK_LOCK_ASSERT(trunk) rm_assert(&(trunk)->lock, RA_LOCKED) #define TRUNK_WLOCK_ASSERT(trunk) rm_assert(&(trunk)->lock, RA_WLOCKED) #define TRUNK_LOCK_READER struct rm_priotracker _TRUNK_RM_TRACKER /* * The VLAN_ARRAY substitutes the dynamic hash with a static array * with 4096 entries. In theory this can give a boost in processing, * however in practice it does not. Probably this is because the array * is too big to fit into CPU cache. */ #ifndef VLAN_ARRAY static void vlan_inithash(struct ifvlantrunk *trunk); static void vlan_freehash(struct ifvlantrunk *trunk); static int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv); static int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv); static void vlan_growhash(struct ifvlantrunk *trunk, int howmuch); static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid); #endif static void trunk_destroy(struct ifvlantrunk *trunk); static void vlan_init(void *foo); static void vlan_input(struct ifnet *ifp, struct mbuf *m); static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr); #ifdef RATELIMIT static int vlan_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); #endif static void vlan_qflush(struct ifnet *ifp); static int vlan_setflag(struct ifnet *ifp, int flag, int status, int (*func)(struct ifnet *, int)); static int vlan_setflags(struct ifnet *ifp, int status); static int vlan_setmulti(struct ifnet *ifp); static int vlan_transmit(struct ifnet *ifp, struct mbuf *m); static void vlan_unconfig(struct ifnet *ifp); static void vlan_unconfig_locked(struct ifnet *ifp, int departing); static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag); static void vlan_link_state(struct ifnet *ifp); static void vlan_capabilities(struct ifvlan *ifv); static void vlan_trunk_capabilities(struct ifnet *ifp); static struct ifnet *vlan_clone_match_ethervid(const char *, int *); static int vlan_clone_match(struct if_clone *, const char *); static int vlan_clone_create(struct if_clone *, char *, size_t, caddr_t); static int vlan_clone_destroy(struct if_clone *, struct ifnet *); static void vlan_ifdetach(void *arg, struct ifnet *ifp); static void vlan_iflladdr(void *arg, struct ifnet *ifp); static void vlan_lladdr_fn(void *arg, int pending); static struct if_clone *vlan_cloner; #ifdef VIMAGE static VNET_DEFINE(struct if_clone *, vlan_cloner); #define V_vlan_cloner VNET(vlan_cloner) #endif #ifndef VLAN_ARRAY #define HASH(n, m) ((((n) >> 8) ^ ((n) >> 4) ^ (n)) & (m)) static void vlan_inithash(struct ifvlantrunk *trunk) { int i, n; /* * The trunk must not be locked here since we call malloc(M_WAITOK). * It is OK in case this function is called before the trunk struct * gets hooked up and becomes visible from other threads. */ KASSERT(trunk->hwidth == 0 && trunk->hash == NULL, ("%s: hash already initialized", __func__)); trunk->hwidth = VLAN_DEF_HWIDTH; n = 1 << trunk->hwidth; trunk->hmask = n - 1; trunk->hash = malloc(sizeof(struct ifvlanhead) * n, M_VLAN, M_WAITOK); for (i = 0; i < n; i++) LIST_INIT(&trunk->hash[i]); } static void vlan_freehash(struct ifvlantrunk *trunk) { #ifdef INVARIANTS int i; KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); for (i = 0; i < (1 << trunk->hwidth); i++) KASSERT(LIST_EMPTY(&trunk->hash[i]), ("%s: hash table not empty", __func__)); #endif free(trunk->hash, M_VLAN); trunk->hash = NULL; trunk->hwidth = trunk->hmask = 0; } static int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv) { int i, b; struct ifvlan *ifv2; TRUNK_WLOCK_ASSERT(trunk); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); b = 1 << trunk->hwidth; i = HASH(ifv->ifv_vid, trunk->hmask); LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list) if (ifv->ifv_vid == ifv2->ifv_vid) return (EEXIST); /* * Grow the hash when the number of vlans exceeds half of the number of * hash buckets squared. This will make the average linked-list length * buckets/2. */ if (trunk->refcnt > (b * b) / 2) { vlan_growhash(trunk, 1); i = HASH(ifv->ifv_vid, trunk->hmask); } LIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list); trunk->refcnt++; return (0); } static int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv) { int i, b; struct ifvlan *ifv2; TRUNK_WLOCK_ASSERT(trunk); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); b = 1 << trunk->hwidth; i = HASH(ifv->ifv_vid, trunk->hmask); LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list) if (ifv2 == ifv) { trunk->refcnt--; LIST_REMOVE(ifv2, ifv_list); if (trunk->refcnt < (b * b) / 2) vlan_growhash(trunk, -1); return (0); } panic("%s: vlan not found\n", __func__); return (ENOENT); /*NOTREACHED*/ } /* * Grow the hash larger or smaller if memory permits. */ static void vlan_growhash(struct ifvlantrunk *trunk, int howmuch) { struct ifvlan *ifv; struct ifvlanhead *hash2; int hwidth2, i, j, n, n2; TRUNK_WLOCK_ASSERT(trunk); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); if (howmuch == 0) { /* Harmless yet obvious coding error */ printf("%s: howmuch is 0\n", __func__); return; } hwidth2 = trunk->hwidth + howmuch; n = 1 << trunk->hwidth; n2 = 1 << hwidth2; /* Do not shrink the table below the default */ if (hwidth2 < VLAN_DEF_HWIDTH) return; /* M_NOWAIT because we're called with trunk mutex held */ hash2 = malloc(sizeof(struct ifvlanhead) * n2, M_VLAN, M_NOWAIT); if (hash2 == NULL) { printf("%s: out of memory -- hash size not changed\n", __func__); return; /* We can live with the old hash table */ } for (j = 0; j < n2; j++) LIST_INIT(&hash2[j]); for (i = 0; i < n; i++) while ((ifv = LIST_FIRST(&trunk->hash[i])) != NULL) { LIST_REMOVE(ifv, ifv_list); j = HASH(ifv->ifv_vid, n2 - 1); LIST_INSERT_HEAD(&hash2[j], ifv, ifv_list); } free(trunk->hash, M_VLAN); trunk->hash = hash2; trunk->hwidth = hwidth2; trunk->hmask = n2 - 1; if (bootverbose) if_printf(trunk->parent, "VLAN hash table resized from %d to %d buckets\n", n, n2); } static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid) { struct ifvlan *ifv; TRUNK_RLOCK_ASSERT(trunk); LIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list) if (ifv->ifv_vid == vid) return (ifv); return (NULL); } #if 0 /* Debugging code to view the hashtables. */ static void vlan_dumphash(struct ifvlantrunk *trunk) { int i; struct ifvlan *ifv; for (i = 0; i < (1 << trunk->hwidth); i++) { printf("%d: ", i); LIST_FOREACH(ifv, &trunk->hash[i], ifv_list) printf("%s ", ifv->ifv_ifp->if_xname); printf("\n"); } } #endif /* 0 */ #else static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid) { return trunk->vlans[vid]; } static __inline int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv) { if (trunk->vlans[ifv->ifv_vid] != NULL) return EEXIST; trunk->vlans[ifv->ifv_vid] = ifv; trunk->refcnt++; return (0); } static __inline int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv) { trunk->vlans[ifv->ifv_vid] = NULL; trunk->refcnt--; return (0); } static __inline void vlan_freehash(struct ifvlantrunk *trunk) { } static __inline void vlan_inithash(struct ifvlantrunk *trunk) { } #endif /* !VLAN_ARRAY */ static void trunk_destroy(struct ifvlantrunk *trunk) { VLAN_XLOCK_ASSERT(); VLAN_WLOCK_ASSERT(); vlan_freehash(trunk); trunk->parent->if_vlantrunk = NULL; TRUNK_LOCK_DESTROY(trunk); if_rele(trunk->parent); free(trunk, M_VLAN); } /* * Program our multicast filter. What we're actually doing is * programming the multicast filter of the parent. This has the * side effect of causing the parent interface to receive multicast * traffic that it doesn't really want, which ends up being discarded * later by the upper protocol layers. Unfortunately, there's no way * to avoid this: there really is only one physical interface. */ static int vlan_setmulti(struct ifnet *ifp) { struct ifnet *ifp_p; struct ifmultiaddr *ifma; struct ifvlan *sc; struct vlan_mc_entry *mc; int error; /* * XXX This stupidly needs the rmlock to avoid sleeping while holding * the in6_multi_mtx (see in6_mc_join_locked). */ VLAN_RWLOCK_ASSERT(); /* Find the parent. */ sc = ifp->if_softc; TRUNK_WLOCK_ASSERT(TRUNK(sc)); ifp_p = PARENT(sc); CURVNET_SET_QUIET(ifp_p->if_vnet); /* First, remove any existing filter entries. */ while ((mc = SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) { SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries); (void)if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr); free(mc, M_VLAN); } /* Now program new ones. */ IF_ADDR_WLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT); if (mc == NULL) { IF_ADDR_WUNLOCK(ifp); return (ENOMEM); } bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len); mc->mc_addr.sdl_index = ifp_p->if_index; SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries); } IF_ADDR_WUNLOCK(ifp); SLIST_FOREACH (mc, &sc->vlan_mc_listhead, mc_entries) { error = if_addmulti(ifp_p, (struct sockaddr *)&mc->mc_addr, NULL); if (error) return (error); } CURVNET_RESTORE(); return (0); } /* * A handler for parent interface link layer address changes. * If the parent interface link layer address is changed we * should also change it on all children vlans. */ static void vlan_iflladdr(void *arg __unused, struct ifnet *ifp) { struct ifvlan *ifv; struct ifnet *ifv_ifp; struct ifvlantrunk *trunk; struct sockaddr_dl *sdl; VLAN_LOCK_READER; /* Need the rmlock since this is run on taskqueue_swi. */ VLAN_RLOCK(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { VLAN_RUNLOCK(); return; } /* * OK, it's a trunk. Loop over and change all vlan's lladdrs on it. * We need an exclusive lock here to prevent concurrent SIOCSIFLLADDR * ioctl calls on the parent garbling the lladdr of the child vlan. */ TRUNK_WLOCK(trunk); VLAN_FOREACH(ifv, trunk) { /* * Copy new new lladdr into the ifv_ifp, enqueue a task * to actually call if_setlladdr. if_setlladdr needs to * be deferred to a taskqueue because it will call into * the if_vlan ioctl path and try to acquire the global * lock. */ ifv_ifp = ifv->ifv_ifp; bcopy(IF_LLADDR(ifp), IF_LLADDR(ifv_ifp), ifp->if_addrlen); sdl = (struct sockaddr_dl *)ifv_ifp->if_addr->ifa_addr; sdl->sdl_alen = ifp->if_addrlen; taskqueue_enqueue(taskqueue_thread, &ifv->lladdr_task); } TRUNK_WUNLOCK(trunk); VLAN_RUNLOCK(); } /* * A handler for network interface departure events. * Track departure of trunks here so that we don't access invalid * pointers or whatever if a trunk is ripped from under us, e.g., * by ejecting its hot-plug card. However, if an ifnet is simply * being renamed, then there's no need to tear down the state. */ static void vlan_ifdetach(void *arg __unused, struct ifnet *ifp) { struct ifvlan *ifv; struct ifvlantrunk *trunk; /* If the ifnet is just being renamed, don't do anything. */ if (ifp->if_flags & IFF_RENAMING) return; VLAN_XLOCK(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { VLAN_XUNLOCK(); return; } /* * OK, it's a trunk. Loop over and detach all vlan's on it. * Check trunk pointer after each vlan_unconfig() as it will * free it and set to NULL after the last vlan was detached. */ VLAN_FOREACH_UNTIL_SAFE(ifv, ifp->if_vlantrunk, ifp->if_vlantrunk == NULL) vlan_unconfig_locked(ifv->ifv_ifp, 1); /* Trunk should have been destroyed in vlan_unconfig(). */ KASSERT(ifp->if_vlantrunk == NULL, ("%s: purge failed", __func__)); VLAN_XUNLOCK(); } /* * Return the trunk device for a virtual interface. */ static struct ifnet * vlan_trunkdev(struct ifnet *ifp) { struct ifvlan *ifv; VLAN_LOCK_READER; if (ifp->if_type != IFT_L2VLAN) return (NULL); /* Not clear if callers are sleepable, so acquire the rmlock. */ VLAN_RLOCK(); ifv = ifp->if_softc; ifp = NULL; if (ifv->ifv_trunk) ifp = PARENT(ifv); VLAN_RUNLOCK(); return (ifp); } /* * Return the 12-bit VLAN VID for this interface, for use by external * components such as Infiniband. * * XXXRW: Note that the function name here is historical; it should be named * vlan_vid(). */ static int vlan_tag(struct ifnet *ifp, uint16_t *vidp) { struct ifvlan *ifv; if (ifp->if_type != IFT_L2VLAN) return (EINVAL); ifv = ifp->if_softc; *vidp = ifv->ifv_vid; return (0); } /* * Return a driver specific cookie for this interface. Synchronization * with setcookie must be provided by the driver. */ static void * vlan_cookie(struct ifnet *ifp) { struct ifvlan *ifv; if (ifp->if_type != IFT_L2VLAN) return (NULL); ifv = ifp->if_softc; return (ifv->ifv_cookie); } /* * Store a cookie in our softc that drivers can use to store driver * private per-instance data in. */ static int vlan_setcookie(struct ifnet *ifp, void *cookie) { struct ifvlan *ifv; if (ifp->if_type != IFT_L2VLAN) return (EINVAL); ifv = ifp->if_softc; ifv->ifv_cookie = cookie; return (0); } /* * Return the vlan device present at the specific VID. */ static struct ifnet * vlan_devat(struct ifnet *ifp, uint16_t vid) { struct ifvlantrunk *trunk; struct ifvlan *ifv; VLAN_LOCK_READER; TRUNK_LOCK_READER; /* Not clear if callers are sleepable, so acquire the rmlock. */ VLAN_RLOCK(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { VLAN_RUNLOCK(); return (NULL); } ifp = NULL; TRUNK_RLOCK(trunk); ifv = vlan_gethash(trunk, vid); if (ifv) ifp = ifv->ifv_ifp; TRUNK_RUNLOCK(trunk); VLAN_RUNLOCK(); return (ifp); } /* * Recalculate the cached VLAN tag exposed via the MIB. */ static void vlan_tag_recalculate(struct ifvlan *ifv) { ifv->ifv_tag = EVL_MAKETAG(ifv->ifv_vid, ifv->ifv_pcp, 0); } /* * VLAN support can be loaded as a module. The only place in the * system that's intimately aware of this is ether_input. We hook * into this code through vlan_input_p which is defined there and * set here. No one else in the system should be aware of this so * we use an explicit reference here. */ extern void (*vlan_input_p)(struct ifnet *, struct mbuf *); /* For if_link_state_change() eyes only... */ extern void (*vlan_link_state_p)(struct ifnet *); static int vlan_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: ifdetach_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, vlan_ifdetach, NULL, EVENTHANDLER_PRI_ANY); if (ifdetach_tag == NULL) return (ENOMEM); iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event, vlan_iflladdr, NULL, EVENTHANDLER_PRI_ANY); if (iflladdr_tag == NULL) return (ENOMEM); VLAN_LOCKING_INIT(); vlan_input_p = vlan_input; vlan_link_state_p = vlan_link_state; vlan_trunk_cap_p = vlan_trunk_capabilities; vlan_trunkdev_p = vlan_trunkdev; vlan_cookie_p = vlan_cookie; vlan_setcookie_p = vlan_setcookie; vlan_tag_p = vlan_tag; vlan_devat_p = vlan_devat; #ifndef VIMAGE vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match, vlan_clone_create, vlan_clone_destroy); #endif if (bootverbose) printf("vlan: initialized, using " #ifdef VLAN_ARRAY "full-size arrays" #else "hash tables with chaining" #endif "\n"); break; case MOD_UNLOAD: #ifndef VIMAGE if_clone_detach(vlan_cloner); #endif EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag); EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_tag); vlan_input_p = NULL; vlan_link_state_p = NULL; vlan_trunk_cap_p = NULL; vlan_trunkdev_p = NULL; vlan_tag_p = NULL; vlan_cookie_p = NULL; vlan_setcookie_p = NULL; vlan_devat_p = NULL; VLAN_LOCKING_DESTROY(); if (bootverbose) printf("vlan: unloaded\n"); break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t vlan_mod = { "if_vlan", vlan_modevent, 0 }; DECLARE_MODULE(if_vlan, vlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_vlan, 3); #ifdef VIMAGE static void vnet_vlan_init(const void *unused __unused) { vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match, vlan_clone_create, vlan_clone_destroy); V_vlan_cloner = vlan_cloner; } VNET_SYSINIT(vnet_vlan_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_vlan_init, NULL); static void vnet_vlan_uninit(const void *unused __unused) { if_clone_detach(V_vlan_cloner); } VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST, vnet_vlan_uninit, NULL); #endif /* * Check for . style interface names. */ static struct ifnet * vlan_clone_match_ethervid(const char *name, int *vidp) { char ifname[IFNAMSIZ]; char *cp; struct ifnet *ifp; int vid; strlcpy(ifname, name, IFNAMSIZ); if ((cp = strchr(ifname, '.')) == NULL) return (NULL); *cp = '\0'; if ((ifp = ifunit_ref(ifname)) == NULL) return (NULL); /* Parse VID. */ if (*++cp == '\0') { if_rele(ifp); return (NULL); } vid = 0; for(; *cp >= '0' && *cp <= '9'; cp++) vid = (vid * 10) + (*cp - '0'); if (*cp != '\0') { if_rele(ifp); return (NULL); } if (vidp != NULL) *vidp = vid; return (ifp); } static int vlan_clone_match(struct if_clone *ifc, const char *name) { const char *cp; if (vlan_clone_match_ethervid(name, NULL) != NULL) return (1); if (strncmp(vlanname, name, strlen(vlanname)) != 0) return (0); for (cp = name + 4; *cp != '\0'; cp++) { if (*cp < '0' || *cp > '9') return (0); } return (1); } static int vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) { char *dp; int wildcard; int unit; int error; int vid; struct ifvlan *ifv; struct ifnet *ifp; struct ifnet *p; struct ifaddr *ifa; struct sockaddr_dl *sdl; struct vlanreq vlr; static const u_char eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ /* * There are 3 (ugh) ways to specify the cloned device: * o pass a parameter block with the clone request. * o specify parameters in the text of the clone device name * o specify no parameters and get an unattached device that * must be configured separately. * The first technique is preferred; the latter two are * supported for backwards compatibility. * * XXXRW: Note historic use of the word "tag" here. New ioctls may be * called for. */ if (params) { error = copyin(params, &vlr, sizeof(vlr)); if (error) return error; p = ifunit_ref(vlr.vlr_parent); if (p == NULL) return (ENXIO); error = ifc_name2unit(name, &unit); if (error != 0) { if_rele(p); return (error); } vid = vlr.vlr_tag; wildcard = (unit < 0); } else if ((p = vlan_clone_match_ethervid(name, &vid)) != NULL) { unit = -1; wildcard = 0; } else { p = NULL; error = ifc_name2unit(name, &unit); if (error != 0) return (error); wildcard = (unit < 0); } error = ifc_alloc_unit(ifc, &unit); if (error != 0) { if (p != NULL) if_rele(p); return (error); } /* In the wildcard case, we need to update the name. */ if (wildcard) { for (dp = name; *dp != '\0'; dp++); if (snprintf(dp, len - (dp-name), "%d", unit) > len - (dp-name) - 1) { panic("%s: interface name too long", __func__); } } ifv = malloc(sizeof(struct ifvlan), M_VLAN, M_WAITOK | M_ZERO); ifp = ifv->ifv_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { ifc_free_unit(ifc, unit); free(ifv, M_VLAN); if (p != NULL) if_rele(p); return (ENOSPC); } SLIST_INIT(&ifv->vlan_mc_listhead); ifp->if_softc = ifv; /* * Set the name manually rather than using if_initname because * we don't conform to the default naming convention for interfaces. */ strlcpy(ifp->if_xname, name, IFNAMSIZ); ifp->if_dname = vlanname; ifp->if_dunit = unit; /* NB: flags are not set here */ ifp->if_linkmib = &ifv->ifv_mib; ifp->if_linkmiblen = sizeof(ifv->ifv_mib); /* NB: mtu is not set here */ ifp->if_init = vlan_init; ifp->if_transmit = vlan_transmit; ifp->if_qflush = vlan_qflush; ifp->if_ioctl = vlan_ioctl; #ifdef RATELIMIT ifp->if_snd_tag_alloc = vlan_snd_tag_alloc; #endif ifp->if_flags = VLAN_IFFLAGS; ether_ifattach(ifp, eaddr); /* Now undo some of the damage... */ ifp->if_baudrate = 0; ifp->if_type = IFT_L2VLAN; ifp->if_hdrlen = ETHER_VLAN_ENCAP_LEN; ifa = ifp->if_addr; sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_type = IFT_L2VLAN; if (p != NULL) { error = vlan_config(ifv, p, vid); if_rele(p); if (error != 0) { /* * Since we've partially failed, we need to back * out all the way, otherwise userland could get * confused. Thus, we destroy the interface. */ ether_ifdetach(ifp); vlan_unconfig(ifp); if_free(ifp); ifc_free_unit(ifc, unit); free(ifv, M_VLAN); return (error); } } return (0); } static int vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) { struct ifvlan *ifv = ifp->if_softc; int unit = ifp->if_dunit; ether_ifdetach(ifp); /* first, remove it from system-wide lists */ vlan_unconfig(ifp); /* now it can be unconfigured and freed */ /* * We should have the only reference to the ifv now, so we can now * drain any remaining lladdr task before freeing the ifnet and the * ifvlan. */ taskqueue_drain(taskqueue_thread, &ifv->lladdr_task); if_free(ifp); free(ifv, M_VLAN); ifc_free_unit(ifc, unit); return (0); } /* * The ifp->if_init entry point for vlan(4) is a no-op. */ static void vlan_init(void *foo __unused) { } /* * The if_transmit method for vlan(4) interface. */ static int vlan_transmit(struct ifnet *ifp, struct mbuf *m) { struct ifvlan *ifv; struct ifnet *p; int error, len, mcast; VLAN_LOCK_READER; VLAN_RLOCK(); ifv = ifp->if_softc; if (TRUNK(ifv) == NULL) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); VLAN_RUNLOCK(); m_freem(m); return (ENETDOWN); } p = PARENT(ifv); len = m->m_pkthdr.len; mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; BPF_MTAP(ifp, m); /* * Do not run parent's if_transmit() if the parent is not up, * or parent's driver will cause a system crash. */ if (!UP_AND_RUNNING(p)) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); VLAN_RUNLOCK(); m_freem(m); return (ENETDOWN); } if (!ether_8021q_frame(&m, ifp, p, ifv->ifv_vid, ifv->ifv_pcp)) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); VLAN_RUNLOCK(); return (0); } /* * Send it, precisely as ether_output() would have. */ error = (p->if_transmit)(p, m); if (error == 0) { if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, len); if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast); } else if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); VLAN_RUNLOCK(); return (error); } /* * The ifp->if_qflush entry point for vlan(4) is a no-op. */ static void vlan_qflush(struct ifnet *ifp __unused) { } static void vlan_input(struct ifnet *ifp, struct mbuf *m) { struct ifvlantrunk *trunk; struct ifvlan *ifv; VLAN_LOCK_READER; TRUNK_LOCK_READER; struct m_tag *mtag; uint16_t vid, tag; VLAN_RLOCK(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { VLAN_RUNLOCK(); m_freem(m); return; } if (m->m_flags & M_VLANTAG) { /* * Packet is tagged, but m contains a normal * Ethernet frame; the tag is stored out-of-band. */ tag = m->m_pkthdr.ether_vtag; m->m_flags &= ~M_VLANTAG; } else { struct ether_vlan_header *evl; /* * Packet is tagged in-band as specified by 802.1q. */ switch (ifp->if_type) { case IFT_ETHER: if (m->m_len < sizeof(*evl) && (m = m_pullup(m, sizeof(*evl))) == NULL) { if_printf(ifp, "cannot pullup VLAN header\n"); VLAN_RUNLOCK(); return; } evl = mtod(m, struct ether_vlan_header *); tag = ntohs(evl->evl_tag); /* * Remove the 802.1q header by copying the Ethernet * addresses over it and adjusting the beginning of * the data in the mbuf. The encapsulated Ethernet * type field is already in place. */ bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, ETHER_HDR_LEN - ETHER_TYPE_LEN); m_adj(m, ETHER_VLAN_ENCAP_LEN); break; default: #ifdef INVARIANTS panic("%s: %s has unsupported if_type %u", __func__, ifp->if_xname, ifp->if_type); #endif if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); VLAN_RUNLOCK(); m_freem(m); return; } } vid = EVL_VLANOFTAG(tag); TRUNK_RLOCK(trunk); ifv = vlan_gethash(trunk, vid); if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) { TRUNK_RUNLOCK(trunk); if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); VLAN_RUNLOCK(); m_freem(m); return; } TRUNK_RUNLOCK(trunk); if (vlan_mtag_pcp) { /* * While uncommon, it is possible that we will find a 802.1q * packet encapsulated inside another packet that also had an * 802.1q header. For example, ethernet tunneled over IPSEC * arriving over ethernet. In that case, we replace the * existing 802.1q PCP m_tag value. */ mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_IN, NULL); if (mtag == NULL) { mtag = m_tag_alloc(MTAG_8021Q, MTAG_8021Q_PCP_IN, sizeof(uint8_t), M_NOWAIT); if (mtag == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); VLAN_RUNLOCK(); m_freem(m); return; } m_tag_prepend(m, mtag); } *(uint8_t *)(mtag + 1) = EVL_PRIOFTAG(tag); } m->m_pkthdr.rcvif = ifv->ifv_ifp; if_inc_counter(ifv->ifv_ifp, IFCOUNTER_IPACKETS, 1); VLAN_RUNLOCK(); /* Pass it back through the parent's input routine. */ (*ifv->ifv_ifp->if_input)(ifv->ifv_ifp, m); } static void vlan_lladdr_fn(void *arg, int pending __unused) { struct ifvlan *ifv; struct ifnet *ifp; ifv = (struct ifvlan *)arg; ifp = ifv->ifv_ifp; /* The ifv_ifp already has the lladdr copied in. */ if_setlladdr(ifp, IF_LLADDR(ifp), ifp->if_addrlen); } static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid) { struct ifvlantrunk *trunk; struct ifnet *ifp; int error = 0; /* * We can handle non-ethernet hardware types as long as * they handle the tagging and headers themselves. */ if (p->if_type != IFT_ETHER && (p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) return (EPROTONOSUPPORT); if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS) return (EPROTONOSUPPORT); /* * Don't let the caller set up a VLAN VID with * anything except VLID bits. * VID numbers 0x0 and 0xFFF are reserved. */ if (vid == 0 || vid == 0xFFF || (vid & ~EVL_VLID_MASK)) return (EINVAL); if (ifv->ifv_trunk) return (EBUSY); /* Acquire rmlock after the branch so we can M_WAITOK. */ VLAN_XLOCK(); if (p->if_vlantrunk == NULL) { trunk = malloc(sizeof(struct ifvlantrunk), M_VLAN, M_WAITOK | M_ZERO); vlan_inithash(trunk); TRUNK_LOCK_INIT(trunk); VLAN_WLOCK(); TRUNK_WLOCK(trunk); p->if_vlantrunk = trunk; trunk->parent = p; if_ref(trunk->parent); } else { VLAN_WLOCK(); trunk = p->if_vlantrunk; TRUNK_WLOCK(trunk); } ifv->ifv_vid = vid; /* must set this before vlan_inshash() */ ifv->ifv_pcp = 0; /* Default: best effort delivery. */ vlan_tag_recalculate(ifv); error = vlan_inshash(trunk, ifv); if (error) goto done; ifv->ifv_proto = ETHERTYPE_VLAN; ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN; ifv->ifv_mintu = ETHERMIN; ifv->ifv_pflags = 0; ifv->ifv_capenable = -1; /* * If the parent supports the VLAN_MTU capability, * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames, * use it. */ if (p->if_capenable & IFCAP_VLAN_MTU) { /* * No need to fudge the MTU since the parent can * handle extended frames. */ ifv->ifv_mtufudge = 0; } else { /* * Fudge the MTU by the encapsulation size. This * makes us incompatible with strictly compliant * 802.1Q implementations, but allows us to use * the feature with other NetBSD implementations, * which might still be useful. */ ifv->ifv_mtufudge = ifv->ifv_encaplen; } ifv->ifv_trunk = trunk; ifp = ifv->ifv_ifp; /* * Initialize fields from our parent. This duplicates some * work with ether_ifattach() but allows for non-ethernet * interfaces to also work. */ ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge; ifp->if_baudrate = p->if_baudrate; ifp->if_output = p->if_output; ifp->if_input = p->if_input; ifp->if_resolvemulti = p->if_resolvemulti; ifp->if_addrlen = p->if_addrlen; ifp->if_broadcastaddr = p->if_broadcastaddr; /* * Copy only a selected subset of flags from the parent. * Other flags are none of our business. */ #define VLAN_COPY_FLAGS (IFF_SIMPLEX) ifp->if_flags &= ~VLAN_COPY_FLAGS; ifp->if_flags |= p->if_flags & VLAN_COPY_FLAGS; #undef VLAN_COPY_FLAGS ifp->if_link_state = p->if_link_state; vlan_capabilities(ifv); /* * Set up our interface address to reflect the underlying * physical interface's. */ bcopy(IF_LLADDR(p), IF_LLADDR(ifp), p->if_addrlen); ((struct sockaddr_dl *)ifp->if_addr->ifa_addr)->sdl_alen = p->if_addrlen; /* * Configure multicast addresses that may already be * joined on the vlan device. */ (void)vlan_setmulti(ifp); TASK_INIT(&ifv->lladdr_task, 0, vlan_lladdr_fn, ifv); /* We are ready for operation now. */ ifp->if_drv_flags |= IFF_DRV_RUNNING; /* Update flags on the parent, if necessary. */ vlan_setflags(ifp, 1); done: /* * We need to drop the non-sleepable rmlock so that the underlying * devices can sleep in their vlan_config hooks. */ TRUNK_WUNLOCK(trunk); VLAN_WUNLOCK(); if (error == 0) EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_vid); VLAN_XUNLOCK(); return (error); } static void vlan_unconfig(struct ifnet *ifp) { VLAN_XLOCK(); vlan_unconfig_locked(ifp, 0); VLAN_XUNLOCK(); } static void vlan_unconfig_locked(struct ifnet *ifp, int departing) { struct ifvlantrunk *trunk; struct vlan_mc_entry *mc; struct ifvlan *ifv; struct ifnet *parent; int error; VLAN_XLOCK_ASSERT(); ifv = ifp->if_softc; trunk = ifv->ifv_trunk; parent = NULL; if (trunk != NULL) { /* * Both vlan_transmit and vlan_input rely on the trunk fields * being NULL to determine whether to bail, so we need to get * an exclusive lock here to prevent them from using bad * ifvlans. */ VLAN_WLOCK(); parent = trunk->parent; /* * Since the interface is being unconfigured, we need to * empty the list of multicast groups that we may have joined * while we were alive from the parent's list. */ while ((mc = SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) { /* * If the parent interface is being detached, * all its multicast addresses have already * been removed. Warn about errors if * if_delmulti() does fail, but don't abort as * all callers expect vlan destruction to * succeed. */ if (!departing) { error = if_delmulti(parent, (struct sockaddr *)&mc->mc_addr); if (error) if_printf(ifp, "Failed to delete multicast address from parent: %d\n", error); } SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries); free(mc, M_VLAN); } vlan_setflags(ifp, 0); /* clear special flags on parent */ /* * The trunk lock isn't actually required here, but * vlan_remhash expects it. */ TRUNK_WLOCK(trunk); vlan_remhash(trunk, ifv); TRUNK_WUNLOCK(trunk); ifv->ifv_trunk = NULL; /* * Check if we were the last. */ if (trunk->refcnt == 0) { parent->if_vlantrunk = NULL; trunk_destroy(trunk); } VLAN_WUNLOCK(); } /* Disconnect from parent. */ if (ifv->ifv_pflags) if_printf(ifp, "%s: ifv_pflags unclean\n", __func__); ifp->if_mtu = ETHERMTU; ifp->if_link_state = LINK_STATE_UNKNOWN; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; /* * Only dispatch an event if vlan was * attached, otherwise there is nothing * to cleanup anyway. */ if (parent != NULL) EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_vid); } /* Handle a reference counted flag that should be set on the parent as well */ static int vlan_setflag(struct ifnet *ifp, int flag, int status, int (*func)(struct ifnet *, int)) { struct ifvlan *ifv; int error; VLAN_SXLOCK_ASSERT(); ifv = ifp->if_softc; status = status ? (ifp->if_flags & flag) : 0; /* Now "status" contains the flag value or 0 */ /* * See if recorded parent's status is different from what * we want it to be. If it is, flip it. We record parent's * status in ifv_pflags so that we won't clear parent's flag * we haven't set. In fact, we don't clear or set parent's * flags directly, but get or release references to them. * That's why we can be sure that recorded flags still are * in accord with actual parent's flags. */ if (status != (ifv->ifv_pflags & flag)) { error = (*func)(PARENT(ifv), status); if (error) return (error); ifv->ifv_pflags &= ~flag; ifv->ifv_pflags |= status; } return (0); } /* * Handle IFF_* flags that require certain changes on the parent: * if "status" is true, update parent's flags respective to our if_flags; * if "status" is false, forcedly clear the flags set on parent. */ static int vlan_setflags(struct ifnet *ifp, int status) { int error, i; for (i = 0; vlan_pflags[i].flag; i++) { error = vlan_setflag(ifp, vlan_pflags[i].flag, status, vlan_pflags[i].func); if (error) return (error); } return (0); } /* Inform all vlans that their parent has changed link state */ static void vlan_link_state(struct ifnet *ifp) { struct ifvlantrunk *trunk; struct ifvlan *ifv; VLAN_LOCK_READER; /* Called from a taskqueue_swi task, so we cannot sleep. */ VLAN_RLOCK(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { VLAN_RUNLOCK(); return; } TRUNK_WLOCK(trunk); VLAN_FOREACH(ifv, trunk) { ifv->ifv_ifp->if_baudrate = trunk->parent->if_baudrate; if_link_state_change(ifv->ifv_ifp, trunk->parent->if_link_state); } TRUNK_WUNLOCK(trunk); VLAN_RUNLOCK(); } static void vlan_capabilities(struct ifvlan *ifv) { struct ifnet *p; struct ifnet *ifp; struct ifnet_hw_tsomax hw_tsomax; int cap = 0, ena = 0, mena; u_long hwa = 0; VLAN_SXLOCK_ASSERT(); TRUNK_WLOCK_ASSERT(TRUNK(ifv)); p = PARENT(ifv); ifp = ifv->ifv_ifp; /* Mask parent interface enabled capabilities disabled by user. */ mena = p->if_capenable & ifv->ifv_capenable; /* * If the parent interface can do checksum offloading * on VLANs, then propagate its hardware-assisted * checksumming flags. Also assert that checksum * offloading requires hardware VLAN tagging. */ if (p->if_capabilities & IFCAP_VLAN_HWCSUM) cap |= p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); if (p->if_capenable & IFCAP_VLAN_HWCSUM && p->if_capenable & IFCAP_VLAN_HWTAGGING) { ena |= mena & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); if (ena & IFCAP_TXCSUM) hwa |= p->if_hwassist & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP); if (ena & IFCAP_TXCSUM_IPV6) hwa |= p->if_hwassist & (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6); } /* * If the parent interface can do TSO on VLANs then * propagate the hardware-assisted flag. TSO on VLANs * does not necessarily require hardware VLAN tagging. */ memset(&hw_tsomax, 0, sizeof(hw_tsomax)); if_hw_tsomax_common(p, &hw_tsomax); if_hw_tsomax_update(ifp, &hw_tsomax); if (p->if_capabilities & IFCAP_VLAN_HWTSO) cap |= p->if_capabilities & IFCAP_TSO; if (p->if_capenable & IFCAP_VLAN_HWTSO) { ena |= mena & IFCAP_TSO; if (ena & IFCAP_TSO) hwa |= p->if_hwassist & CSUM_TSO; } /* * If the parent interface can do LRO and checksum offloading on * VLANs, then guess it may do LRO on VLANs. False positive here * cost nothing, while false negative may lead to some confusions. */ if (p->if_capabilities & IFCAP_VLAN_HWCSUM) cap |= p->if_capabilities & IFCAP_LRO; if (p->if_capenable & IFCAP_VLAN_HWCSUM) ena |= p->if_capenable & IFCAP_LRO; /* * If the parent interface can offload TCP connections over VLANs then * propagate its TOE capability to the VLAN interface. * * All TOE drivers in the tree today can deal with VLANs. If this * changes then IFCAP_VLAN_TOE should be promoted to a full capability * with its own bit. */ #define IFCAP_VLAN_TOE IFCAP_TOE if (p->if_capabilities & IFCAP_VLAN_TOE) cap |= p->if_capabilities & IFCAP_TOE; if (p->if_capenable & IFCAP_VLAN_TOE) { TOEDEV(ifp) = TOEDEV(p); ena |= mena & IFCAP_TOE; } /* * If the parent interface supports dynamic link state, so does the * VLAN interface. */ cap |= (p->if_capabilities & IFCAP_LINKSTATE); ena |= (mena & IFCAP_LINKSTATE); #ifdef RATELIMIT /* * If the parent interface supports ratelimiting, so does the * VLAN interface. */ cap |= (p->if_capabilities & IFCAP_TXRTLMT); ena |= (mena & IFCAP_TXRTLMT); #endif ifp->if_capabilities = cap; ifp->if_capenable = ena; ifp->if_hwassist = hwa; } static void vlan_trunk_capabilities(struct ifnet *ifp) { struct ifvlantrunk *trunk; struct ifvlan *ifv; VLAN_SLOCK(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { VLAN_SUNLOCK(); return; } TRUNK_WLOCK(trunk); VLAN_FOREACH(ifv, trunk) { vlan_capabilities(ifv); } TRUNK_WUNLOCK(trunk); VLAN_SUNLOCK(); } static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ifnet *p; struct ifreq *ifr; struct ifaddr *ifa; struct ifvlan *ifv; struct ifvlantrunk *trunk; struct vlanreq vlr; int error = 0; VLAN_LOCK_READER; ifr = (struct ifreq *)data; ifa = (struct ifaddr *) data; ifv = ifp->if_softc; switch (cmd) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) arp_ifinit(ifp, ifa); #endif break; case SIOCGIFADDR: bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0], ifp->if_addrlen); break; case SIOCGIFMEDIA: VLAN_SLOCK(); if (TRUNK(ifv) != NULL) { p = PARENT(ifv); if_ref(p); error = (*p->if_ioctl)(p, SIOCGIFMEDIA, data); if_rele(p); /* Limit the result to the parent's current config. */ if (error == 0) { struct ifmediareq *ifmr; ifmr = (struct ifmediareq *)data; if (ifmr->ifm_count >= 1 && ifmr->ifm_ulist) { ifmr->ifm_count = 1; error = copyout(&ifmr->ifm_current, ifmr->ifm_ulist, sizeof(int)); } } } else { error = EINVAL; } VLAN_SUNLOCK(); break; case SIOCSIFMEDIA: error = EINVAL; break; case SIOCSIFMTU: /* * Set the interface MTU. */ VLAN_SLOCK(); trunk = TRUNK(ifv); if (trunk != NULL) { TRUNK_WLOCK(trunk); if (ifr->ifr_mtu > (PARENT(ifv)->if_mtu - ifv->ifv_mtufudge) || ifr->ifr_mtu < (ifv->ifv_mintu - ifv->ifv_mtufudge)) error = EINVAL; else ifp->if_mtu = ifr->ifr_mtu; TRUNK_WUNLOCK(trunk); } else error = EINVAL; VLAN_SUNLOCK(); break; case SIOCSETVLAN: #ifdef VIMAGE /* * XXXRW/XXXBZ: The goal in these checks is to allow a VLAN * interface to be delegated to a jail without allowing the * jail to change what underlying interface/VID it is * associated with. We are not entirely convinced that this * is the right way to accomplish that policy goal. */ if (ifp->if_vnet != ifp->if_home_vnet) { error = EPERM; break; } #endif - error = copyin(ifr->ifr_data, &vlr, sizeof(vlr)); + error = copyin(ifr_data_get_ptr(ifr), &vlr, sizeof(vlr)); if (error) break; if (vlr.vlr_parent[0] == '\0') { vlan_unconfig(ifp); break; } p = ifunit_ref(vlr.vlr_parent); if (p == NULL) { error = ENOENT; break; } error = vlan_config(ifv, p, vlr.vlr_tag); if_rele(p); break; case SIOCGETVLAN: #ifdef VIMAGE if (ifp->if_vnet != ifp->if_home_vnet) { error = EPERM; break; } #endif bzero(&vlr, sizeof(vlr)); VLAN_SLOCK(); if (TRUNK(ifv) != NULL) { strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname, sizeof(vlr.vlr_parent)); vlr.vlr_tag = ifv->ifv_vid; } VLAN_SUNLOCK(); - error = copyout(&vlr, ifr->ifr_data, sizeof(vlr)); + error = copyout(&vlr, ifr_data_get_ptr(ifr), sizeof(vlr)); break; case SIOCSIFFLAGS: /* * We should propagate selected flags to the parent, * e.g., promiscuous mode. */ VLAN_XLOCK(); if (TRUNK(ifv) != NULL) error = vlan_setflags(ifp, 1); VLAN_XUNLOCK(); break; case SIOCADDMULTI: case SIOCDELMULTI: /* * If we don't have a parent, just remember the membership for * when we do. * * XXX We need the rmlock here to avoid sleeping while * holding in6_multi_mtx. */ VLAN_RLOCK(); trunk = TRUNK(ifv); if (trunk != NULL) { TRUNK_WLOCK(trunk); error = vlan_setmulti(ifp); TRUNK_WUNLOCK(trunk); } VLAN_RUNLOCK(); break; case SIOCGVLANPCP: #ifdef VIMAGE if (ifp->if_vnet != ifp->if_home_vnet) { error = EPERM; break; } #endif ifr->ifr_vlan_pcp = ifv->ifv_pcp; break; case SIOCSVLANPCP: #ifdef VIMAGE if (ifp->if_vnet != ifp->if_home_vnet) { error = EPERM; break; } #endif error = priv_check(curthread, PRIV_NET_SETVLANPCP); if (error) break; if (ifr->ifr_vlan_pcp > 7) { error = EINVAL; break; } ifv->ifv_pcp = ifr->ifr_vlan_pcp; vlan_tag_recalculate(ifv); break; case SIOCSIFCAP: VLAN_SLOCK(); ifv->ifv_capenable = ifr->ifr_reqcap; trunk = TRUNK(ifv); if (trunk != NULL) { TRUNK_WLOCK(trunk); vlan_capabilities(ifv); TRUNK_WUNLOCK(trunk); } VLAN_SUNLOCK(); break; default: error = EINVAL; break; } return (error); } #ifdef RATELIMIT static int vlan_snd_tag_alloc(struct ifnet *ifp, union if_snd_tag_alloc_params *params, struct m_snd_tag **ppmt) { /* get trunk device */ ifp = vlan_trunkdev(ifp); if (ifp == NULL || (ifp->if_capenable & IFCAP_TXRTLMT) == 0) return (EOPNOTSUPP); /* forward allocation request */ return (ifp->if_snd_tag_alloc(ifp, params, ppmt)); } #endif Index: head/sys/net/iflib.c =================================================================== --- head/sys/net/iflib.c (revision 331796) +++ head/sys/net/iflib.c (revision 331797) @@ -1,5968 +1,5969 @@ /*- * Copyright (c) 2014-2017, Matthew Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Neither the name of Matthew Macy nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_acpi.h" #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ifdi_if.h" #if defined(__i386__) || defined(__amd64__) #include #include #include #include #include #include #endif #include /* * enable accounting of every mbuf as it comes in to and goes out of * iflib's software descriptor references */ #define MEMORY_LOGGING 0 /* * Enable mbuf vectors for compressing long mbuf chains */ /* * NB: * - Prefetching in tx cleaning should perhaps be a tunable. The distance ahead * we prefetch needs to be determined by the time spent in m_free vis a vis * the cost of a prefetch. This will of course vary based on the workload: * - NFLX's m_free path is dominated by vm-based M_EXT manipulation which * is quite expensive, thus suggesting very little prefetch. * - small packet forwarding which is just returning a single mbuf to * UMA will typically be very fast vis a vis the cost of a memory * access. */ /* * File organization: * - private structures * - iflib private utility functions * - ifnet functions * - vlan registry and other exported functions * - iflib public core functions * * */ static MALLOC_DEFINE(M_IFLIB, "iflib", "ifnet library"); struct iflib_txq; typedef struct iflib_txq *iflib_txq_t; struct iflib_rxq; typedef struct iflib_rxq *iflib_rxq_t; struct iflib_fl; typedef struct iflib_fl *iflib_fl_t; struct iflib_ctx; static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid); typedef struct iflib_filter_info { driver_filter_t *ifi_filter; void *ifi_filter_arg; struct grouptask *ifi_task; void *ifi_ctx; } *iflib_filter_info_t; struct iflib_ctx { KOBJ_FIELDS; /* * Pointer to hardware driver's softc */ void *ifc_softc; device_t ifc_dev; if_t ifc_ifp; cpuset_t ifc_cpus; if_shared_ctx_t ifc_sctx; struct if_softc_ctx ifc_softc_ctx; struct mtx ifc_mtx; uint16_t ifc_nhwtxqs; uint16_t ifc_nhwrxqs; iflib_txq_t ifc_txqs; iflib_rxq_t ifc_rxqs; uint32_t ifc_if_flags; uint32_t ifc_flags; uint32_t ifc_max_fl_buf_size; int ifc_in_detach; int ifc_link_state; int ifc_link_irq; int ifc_watchdog_events; struct cdev *ifc_led_dev; struct resource *ifc_msix_mem; struct if_irq ifc_legacy_irq; struct grouptask ifc_admin_task; struct grouptask ifc_vflr_task; struct iflib_filter_info ifc_filter_info; struct ifmedia ifc_media; struct sysctl_oid *ifc_sysctl_node; uint16_t ifc_sysctl_ntxqs; uint16_t ifc_sysctl_nrxqs; uint16_t ifc_sysctl_qs_eq_override; uint16_t ifc_sysctl_rx_budget; qidx_t ifc_sysctl_ntxds[8]; qidx_t ifc_sysctl_nrxds[8]; struct if_txrx ifc_txrx; #define isc_txd_encap ifc_txrx.ift_txd_encap #define isc_txd_flush ifc_txrx.ift_txd_flush #define isc_txd_credits_update ifc_txrx.ift_txd_credits_update #define isc_rxd_available ifc_txrx.ift_rxd_available #define isc_rxd_pkt_get ifc_txrx.ift_rxd_pkt_get #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_rxd_flush ifc_txrx.ift_rxd_flush #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_legacy_intr ifc_txrx.ift_legacy_intr eventhandler_tag ifc_vlan_attach_event; eventhandler_tag ifc_vlan_detach_event; uint8_t ifc_mac[ETHER_ADDR_LEN]; char ifc_mtx_name[16]; }; void * iflib_get_softc(if_ctx_t ctx) { return (ctx->ifc_softc); } device_t iflib_get_dev(if_ctx_t ctx) { return (ctx->ifc_dev); } if_t iflib_get_ifp(if_ctx_t ctx) { return (ctx->ifc_ifp); } struct ifmedia * iflib_get_media(if_ctx_t ctx) { return (&ctx->ifc_media); } void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]) { bcopy(mac, ctx->ifc_mac, ETHER_ADDR_LEN); } if_softc_ctx_t iflib_get_softc_ctx(if_ctx_t ctx) { return (&ctx->ifc_softc_ctx); } if_shared_ctx_t iflib_get_sctx(if_ctx_t ctx) { return (ctx->ifc_sctx); } #define IP_ALIGNED(m) ((((uintptr_t)(m)->m_data) & 0x3) == 0x2) #define CACHE_PTR_INCREMENT (CACHE_LINE_SIZE/sizeof(void*)) #define CACHE_PTR_NEXT(ptr) ((void *)(((uintptr_t)(ptr)+CACHE_LINE_SIZE-1) & (CACHE_LINE_SIZE-1))) #define LINK_ACTIVE(ctx) ((ctx)->ifc_link_state == LINK_STATE_UP) #define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF) #define RX_SW_DESC_MAP_CREATED (1 << 0) #define TX_SW_DESC_MAP_CREATED (1 << 1) #define RX_SW_DESC_INUSE (1 << 3) #define TX_SW_DESC_MAPPED (1 << 4) #define M_TOOBIG M_PROTO1 typedef struct iflib_sw_rx_desc_array { bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ struct mbuf **ifsd_m; /* pkthdr mbufs */ caddr_t *ifsd_cl; /* direct cluster pointer for rx */ uint8_t *ifsd_flags; } iflib_rxsd_array_t; typedef struct iflib_sw_tx_desc_array { bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ struct mbuf **ifsd_m; /* pkthdr mbufs */ uint8_t *ifsd_flags; } if_txsd_vec_t; /* magic number that should be high enough for any hardware */ #define IFLIB_MAX_TX_SEGS 128 /* bnxt supports 64 with hardware LRO enabled */ #define IFLIB_MAX_RX_SEGS 64 #define IFLIB_RX_COPY_THRESH 128 #define IFLIB_MAX_RX_REFRESH 32 /* The minimum descriptors per second before we start coalescing */ #define IFLIB_MIN_DESC_SEC 16384 #define IFLIB_DEFAULT_TX_UPDATE_FREQ 16 #define IFLIB_QUEUE_IDLE 0 #define IFLIB_QUEUE_HUNG 1 #define IFLIB_QUEUE_WORKING 2 /* maximum number of txqs that can share an rx interrupt */ #define IFLIB_MAX_TX_SHARED_INTR 4 /* this should really scale with ring size - this is a fairly arbitrary value */ #define TX_BATCH_SIZE 32 #define IFLIB_RESTART_BUDGET 8 #define IFC_LEGACY 0x001 #define IFC_QFLUSH 0x002 #define IFC_MULTISEG 0x004 #define IFC_DMAR 0x008 #define IFC_SC_ALLOCATED 0x010 #define IFC_INIT_DONE 0x020 #define IFC_PREFETCH 0x040 #define IFC_DO_RESET 0x080 #define IFC_CHECK_HUNG 0x100 #define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \ CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP) struct iflib_txq { qidx_t ift_in_use; qidx_t ift_cidx; qidx_t ift_cidx_processed; qidx_t ift_pidx; uint8_t ift_gen; uint8_t ift_br_offset; uint16_t ift_npending; uint16_t ift_db_pending; uint16_t ift_rs_pending; /* implicit pad */ uint8_t ift_txd_size[8]; uint64_t ift_processed; uint64_t ift_cleaned; uint64_t ift_cleaned_prev; #if MEMORY_LOGGING uint64_t ift_enqueued; uint64_t ift_dequeued; #endif uint64_t ift_no_tx_dma_setup; uint64_t ift_no_desc_avail; uint64_t ift_mbuf_defrag_failed; uint64_t ift_mbuf_defrag; uint64_t ift_map_failed; uint64_t ift_txd_encap_efbig; uint64_t ift_pullups; struct mtx ift_mtx; struct mtx ift_db_mtx; /* constant values */ if_ctx_t ift_ctx; struct ifmp_ring *ift_br; struct grouptask ift_task; qidx_t ift_size; uint16_t ift_id; struct callout ift_timer; if_txsd_vec_t ift_sds; uint8_t ift_qstatus; uint8_t ift_closed; uint8_t ift_update_freq; struct iflib_filter_info ift_filter_info; bus_dma_tag_t ift_desc_tag; bus_dma_tag_t ift_tso_desc_tag; iflib_dma_info_t ift_ifdi; #define MTX_NAME_LEN 16 char ift_mtx_name[MTX_NAME_LEN]; char ift_db_mtx_name[MTX_NAME_LEN]; bus_dma_segment_t ift_segs[IFLIB_MAX_TX_SEGS] __aligned(CACHE_LINE_SIZE); #ifdef IFLIB_DIAGNOSTICS uint64_t ift_cpu_exec_count[256]; #endif } __aligned(CACHE_LINE_SIZE); struct iflib_fl { qidx_t ifl_cidx; qidx_t ifl_pidx; qidx_t ifl_credits; uint8_t ifl_gen; uint8_t ifl_rxd_size; #if MEMORY_LOGGING uint64_t ifl_m_enqueued; uint64_t ifl_m_dequeued; uint64_t ifl_cl_enqueued; uint64_t ifl_cl_dequeued; #endif /* implicit pad */ bitstr_t *ifl_rx_bitmap; qidx_t ifl_fragidx; /* constant */ qidx_t ifl_size; uint16_t ifl_buf_size; uint16_t ifl_cltype; uma_zone_t ifl_zone; iflib_rxsd_array_t ifl_sds; iflib_rxq_t ifl_rxq; uint8_t ifl_id; bus_dma_tag_t ifl_desc_tag; iflib_dma_info_t ifl_ifdi; uint64_t ifl_bus_addrs[IFLIB_MAX_RX_REFRESH] __aligned(CACHE_LINE_SIZE); caddr_t ifl_vm_addrs[IFLIB_MAX_RX_REFRESH]; qidx_t ifl_rxd_idxs[IFLIB_MAX_RX_REFRESH]; } __aligned(CACHE_LINE_SIZE); static inline qidx_t get_inuse(int size, qidx_t cidx, qidx_t pidx, uint8_t gen) { qidx_t used; if (pidx > cidx) used = pidx - cidx; else if (pidx < cidx) used = size - cidx + pidx; else if (gen == 0 && pidx == cidx) used = 0; else if (gen == 1 && pidx == cidx) used = size; else panic("bad state"); return (used); } #define TXQ_AVAIL(txq) (txq->ift_size - get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen)) #define IDXDIFF(head, tail, wrap) \ ((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head)) struct iflib_rxq { /* If there is a separate completion queue - * these are the cq cidx and pidx. Otherwise * these are unused. */ qidx_t ifr_size; qidx_t ifr_cq_cidx; qidx_t ifr_cq_pidx; uint8_t ifr_cq_gen; uint8_t ifr_fl_offset; if_ctx_t ifr_ctx; iflib_fl_t ifr_fl; uint64_t ifr_rx_irq; uint16_t ifr_id; uint8_t ifr_lro_enabled; uint8_t ifr_nfl; uint8_t ifr_ntxqirq; uint8_t ifr_txqid[IFLIB_MAX_TX_SHARED_INTR]; struct lro_ctrl ifr_lc; struct grouptask ifr_task; struct iflib_filter_info ifr_filter_info; iflib_dma_info_t ifr_ifdi; /* dynamically allocate if any drivers need a value substantially larger than this */ struct if_rxd_frag ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE); #ifdef IFLIB_DIAGNOSTICS uint64_t ifr_cpu_exec_count[256]; #endif } __aligned(CACHE_LINE_SIZE); typedef struct if_rxsd { caddr_t *ifsd_cl; struct mbuf **ifsd_m; iflib_fl_t ifsd_fl; qidx_t ifsd_cidx; } *if_rxsd_t; /* multiple of word size */ #ifdef __LP64__ #define PKT_INFO_SIZE 6 #define RXD_INFO_SIZE 5 #define PKT_TYPE uint64_t #else #define PKT_INFO_SIZE 11 #define RXD_INFO_SIZE 8 #define PKT_TYPE uint32_t #endif #define PKT_LOOP_BOUND ((PKT_INFO_SIZE/3)*3) #define RXD_LOOP_BOUND ((RXD_INFO_SIZE/4)*4) typedef struct if_pkt_info_pad { PKT_TYPE pkt_val[PKT_INFO_SIZE]; } *if_pkt_info_pad_t; typedef struct if_rxd_info_pad { PKT_TYPE rxd_val[RXD_INFO_SIZE]; } *if_rxd_info_pad_t; CTASSERT(sizeof(struct if_pkt_info_pad) == sizeof(struct if_pkt_info)); CTASSERT(sizeof(struct if_rxd_info_pad) == sizeof(struct if_rxd_info)); static inline void pkt_info_zero(if_pkt_info_t pi) { if_pkt_info_pad_t pi_pad; pi_pad = (if_pkt_info_pad_t)pi; pi_pad->pkt_val[0] = 0; pi_pad->pkt_val[1] = 0; pi_pad->pkt_val[2] = 0; pi_pad->pkt_val[3] = 0; pi_pad->pkt_val[4] = 0; pi_pad->pkt_val[5] = 0; #ifndef __LP64__ pi_pad->pkt_val[6] = 0; pi_pad->pkt_val[7] = 0; pi_pad->pkt_val[8] = 0; pi_pad->pkt_val[9] = 0; pi_pad->pkt_val[10] = 0; #endif } static inline void rxd_info_zero(if_rxd_info_t ri) { if_rxd_info_pad_t ri_pad; int i; ri_pad = (if_rxd_info_pad_t)ri; for (i = 0; i < RXD_LOOP_BOUND; i += 4) { ri_pad->rxd_val[i] = 0; ri_pad->rxd_val[i+1] = 0; ri_pad->rxd_val[i+2] = 0; ri_pad->rxd_val[i+3] = 0; } #ifdef __LP64__ ri_pad->rxd_val[RXD_INFO_SIZE-1] = 0; #endif } /* * Only allow a single packet to take up most 1/nth of the tx ring */ #define MAX_SINGLE_PACKET_FRACTION 12 #define IF_BAD_DMA (bus_addr_t)-1 #define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING)) #define CTX_LOCK_INIT(_sc, _name) mtx_init(&(_sc)->ifc_mtx, _name, "iflib ctx lock", MTX_DEF) #define CTX_LOCK(ctx) mtx_lock(&(ctx)->ifc_mtx) #define CTX_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_mtx) #define CTX_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_mtx) #define CALLOUT_LOCK(txq) mtx_lock(&txq->ift_mtx) #define CALLOUT_UNLOCK(txq) mtx_unlock(&txq->ift_mtx) /* Our boot-time initialization hook */ static int iflib_module_event_handler(module_t, int, void *); static moduledata_t iflib_moduledata = { "iflib", iflib_module_event_handler, NULL }; DECLARE_MODULE(iflib, iflib_moduledata, SI_SUB_INIT_IF, SI_ORDER_ANY); MODULE_VERSION(iflib, 1); MODULE_DEPEND(iflib, pci, 1, 1, 1); MODULE_DEPEND(iflib, ether, 1, 1, 1); TASKQGROUP_DEFINE(if_io_tqg, mp_ncpus, 1); TASKQGROUP_DEFINE(if_config_tqg, 1, 1); #ifndef IFLIB_DEBUG_COUNTERS #ifdef INVARIANTS #define IFLIB_DEBUG_COUNTERS 1 #else #define IFLIB_DEBUG_COUNTERS 0 #endif /* !INVARIANTS */ #endif static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0, "iflib driver parameters"); /* * XXX need to ensure that this can't accidentally cause the head to be moved backwards */ static int iflib_min_tx_latency = 0; SYSCTL_INT(_net_iflib, OID_AUTO, min_tx_latency, CTLFLAG_RW, &iflib_min_tx_latency, 0, "minimize transmit latency at the possible expense of throughput"); static int iflib_no_tx_batch = 0; SYSCTL_INT(_net_iflib, OID_AUTO, no_tx_batch, CTLFLAG_RW, &iflib_no_tx_batch, 0, "minimize transmit latency at the possible expense of throughput"); #if IFLIB_DEBUG_COUNTERS static int iflib_tx_seen; static int iflib_tx_sent; static int iflib_tx_encap; static int iflib_rx_allocs; static int iflib_fl_refills; static int iflib_fl_refills_large; static int iflib_tx_frees; SYSCTL_INT(_net_iflib, OID_AUTO, tx_seen, CTLFLAG_RD, &iflib_tx_seen, 0, "# tx mbufs seen"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_sent, CTLFLAG_RD, &iflib_tx_sent, 0, "# tx mbufs sent"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_encap, CTLFLAG_RD, &iflib_tx_encap, 0, "# tx mbufs encapped"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_frees, CTLFLAG_RD, &iflib_tx_frees, 0, "# tx frees"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_allocs, CTLFLAG_RD, &iflib_rx_allocs, 0, "# rx allocations"); SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills, CTLFLAG_RD, &iflib_fl_refills, 0, "# refills"); SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills_large, CTLFLAG_RD, &iflib_fl_refills_large, 0, "# large refills"); static int iflib_txq_drain_flushing; static int iflib_txq_drain_oactive; static int iflib_txq_drain_notready; static int iflib_txq_drain_encapfail; SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_flushing, CTLFLAG_RD, &iflib_txq_drain_flushing, 0, "# drain flushes"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_oactive, CTLFLAG_RD, &iflib_txq_drain_oactive, 0, "# drain oactives"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_notready, CTLFLAG_RD, &iflib_txq_drain_notready, 0, "# drain notready"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_encapfail, CTLFLAG_RD, &iflib_txq_drain_encapfail, 0, "# drain encap fails"); static int iflib_encap_load_mbuf_fail; static int iflib_encap_pad_mbuf_fail; static int iflib_encap_txq_avail_fail; static int iflib_encap_txd_encap_fail; SYSCTL_INT(_net_iflib, OID_AUTO, encap_load_mbuf_fail, CTLFLAG_RD, &iflib_encap_load_mbuf_fail, 0, "# busdma load failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_pad_mbuf_fail, CTLFLAG_RD, &iflib_encap_pad_mbuf_fail, 0, "# runt frame pad failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_txq_avail_fail, CTLFLAG_RD, &iflib_encap_txq_avail_fail, 0, "# txq avail failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_txd_encap_fail, CTLFLAG_RD, &iflib_encap_txd_encap_fail, 0, "# driver encap failures"); static int iflib_task_fn_rxs; static int iflib_rx_intr_enables; static int iflib_fast_intrs; static int iflib_intr_link; static int iflib_intr_msix; static int iflib_rx_unavail; static int iflib_rx_ctx_inactive; static int iflib_rx_zero_len; static int iflib_rx_if_input; static int iflib_rx_mbuf_null; static int iflib_rxd_flush; static int iflib_verbose_debug; SYSCTL_INT(_net_iflib, OID_AUTO, intr_link, CTLFLAG_RD, &iflib_intr_link, 0, "# intr link calls"); SYSCTL_INT(_net_iflib, OID_AUTO, intr_msix, CTLFLAG_RD, &iflib_intr_msix, 0, "# intr msix calls"); SYSCTL_INT(_net_iflib, OID_AUTO, task_fn_rx, CTLFLAG_RD, &iflib_task_fn_rxs, 0, "# task_fn_rx calls"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_intr_enables, CTLFLAG_RD, &iflib_rx_intr_enables, 0, "# rx intr enables"); SYSCTL_INT(_net_iflib, OID_AUTO, fast_intrs, CTLFLAG_RD, &iflib_fast_intrs, 0, "# fast_intr calls"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_unavail, CTLFLAG_RD, &iflib_rx_unavail, 0, "# times rxeof called with no available data"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_ctx_inactive, CTLFLAG_RD, &iflib_rx_ctx_inactive, 0, "# times rxeof called with inactive context"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_zero_len, CTLFLAG_RD, &iflib_rx_zero_len, 0, "# times rxeof saw zero len mbuf"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_if_input, CTLFLAG_RD, &iflib_rx_if_input, 0, "# times rxeof called if_input"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_mbuf_null, CTLFLAG_RD, &iflib_rx_mbuf_null, 0, "# times rxeof got null mbuf"); SYSCTL_INT(_net_iflib, OID_AUTO, rxd_flush, CTLFLAG_RD, &iflib_rxd_flush, 0, "# times rxd_flush called"); SYSCTL_INT(_net_iflib, OID_AUTO, verbose_debug, CTLFLAG_RW, &iflib_verbose_debug, 0, "enable verbose debugging"); #define DBG_COUNTER_INC(name) atomic_add_int(&(iflib_ ## name), 1) static void iflib_debug_reset(void) { iflib_tx_seen = iflib_tx_sent = iflib_tx_encap = iflib_rx_allocs = iflib_fl_refills = iflib_fl_refills_large = iflib_tx_frees = iflib_txq_drain_flushing = iflib_txq_drain_oactive = iflib_txq_drain_notready = iflib_txq_drain_encapfail = iflib_encap_load_mbuf_fail = iflib_encap_pad_mbuf_fail = iflib_encap_txq_avail_fail = iflib_encap_txd_encap_fail = iflib_task_fn_rxs = iflib_rx_intr_enables = iflib_fast_intrs = iflib_intr_link = iflib_intr_msix = iflib_rx_unavail = iflib_rx_ctx_inactive = iflib_rx_zero_len = iflib_rx_if_input = iflib_rx_mbuf_null = iflib_rxd_flush = 0; } #else #define DBG_COUNTER_INC(name) static void iflib_debug_reset(void) {} #endif #define IFLIB_DEBUG 0 static void iflib_tx_structures_free(if_ctx_t ctx); static void iflib_rx_structures_free(if_ctx_t ctx); static int iflib_queues_alloc(if_ctx_t ctx); static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq); static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget); static int iflib_qset_structures_setup(if_ctx_t ctx); static int iflib_msix_init(if_ctx_t ctx); static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, char *str); static void iflib_txq_check_drain(iflib_txq_t txq, int budget); static uint32_t iflib_txq_can_drain(struct ifmp_ring *); static int iflib_register(if_ctx_t); static void iflib_init_locked(if_ctx_t ctx); static void iflib_add_device_sysctl_pre(if_ctx_t ctx); static void iflib_add_device_sysctl_post(if_ctx_t ctx); static void iflib_ifmp_purge(iflib_txq_t txq); static void _iflib_pre_assert(if_softc_ctx_t scctx); static void iflib_stop(if_ctx_t ctx); static void iflib_if_init_locked(if_ctx_t ctx); #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * iflib_fixup_rx(struct mbuf *m); #endif #ifdef DEV_NETMAP #include #include #include MODULE_DEPEND(iflib, netmap, 1, 1, 1); static int netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init); /* * device-specific sysctl variables: * * iflib_crcstrip: 0: keep CRC in rx frames (default), 1: strip it. * During regular operations the CRC is stripped, but on some * hardware reception of frames not multiple of 64 is slower, * so using crcstrip=0 helps in benchmarks. * * iflib_rx_miss, iflib_rx_miss_bufs: * count packets that might be missed due to lost interrupts. */ SYSCTL_DECL(_dev_netmap); /* * The xl driver by default strips CRCs and we do not override it. */ int iflib_crcstrip = 1; SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_crcstrip, CTLFLAG_RW, &iflib_crcstrip, 1, "strip CRC on rx frames"); int iflib_rx_miss, iflib_rx_miss_bufs; SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss, CTLFLAG_RW, &iflib_rx_miss, 0, "potentially missed rx intr"); SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss_bufs, CTLFLAG_RW, &iflib_rx_miss_bufs, 0, "potentially missed rx intr bufs"); /* * Register/unregister. We are already under netmap lock. * Only called on the first register or the last unregister. */ static int iflib_netmap_register(struct netmap_adapter *na, int onoff) { struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; int status; CTX_LOCK(ctx); IFDI_INTR_DISABLE(ctx); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if (!CTX_IS_VF(ctx)) IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); /* enable or disable flags and callbacks in na and ifp */ if (onoff) { nm_set_native_flags(na); } else { nm_clear_native_flags(na); } iflib_stop(ctx); iflib_init_locked(ctx); IFDI_CRCSTRIP_SET(ctx, onoff, iflib_crcstrip); // XXX why twice ? status = ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1; if (status) nm_clear_native_flags(na); CTX_UNLOCK(ctx); return (status); } static int netmap_fl_refill(iflib_rxq_t rxq, struct netmap_kring *kring, uint32_t nm_i, bool init) { struct netmap_adapter *na = kring->na; u_int const lim = kring->nkr_num_slots - 1; u_int head = kring->rhead; struct netmap_ring *ring = kring->ring; bus_dmamap_t *map; struct if_rxd_update iru; if_ctx_t ctx = rxq->ifr_ctx; iflib_fl_t fl = &rxq->ifr_fl[0]; uint32_t refill_pidx, nic_i; if (nm_i == head && __predict_true(!init)) return 0; iru_init(&iru, rxq, 0 /* flid */); map = fl->ifl_sds.ifsd_map; refill_pidx = netmap_idx_k2n(kring, nm_i); /* * IMPORTANT: we must leave one free slot in the ring, * so move head back by one unit */ head = nm_prev(head, lim); while (nm_i != head) { for (int tmp_pidx = 0; tmp_pidx < IFLIB_MAX_RX_REFRESH && nm_i != head; tmp_pidx++) { struct netmap_slot *slot = &ring->slot[nm_i]; void *addr = PNMB(na, slot, &fl->ifl_bus_addrs[tmp_pidx]); uint32_t nic_i_dma = refill_pidx; nic_i = netmap_idx_k2n(kring, nm_i); MPASS(tmp_pidx < IFLIB_MAX_RX_REFRESH); if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ return netmap_ring_reinit(kring); fl->ifl_vm_addrs[tmp_pidx] = addr; if (__predict_false(init) && map) { netmap_load_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr); } else if (map && (slot->flags & NS_BUF_CHANGED)) { /* buffer has changed, reload map */ netmap_reload_map(na, fl->ifl_ifdi->idi_tag, map[nic_i], addr); } slot->flags &= ~NS_BUF_CHANGED; nm_i = nm_next(nm_i, lim); fl->ifl_rxd_idxs[tmp_pidx] = nic_i = nm_next(nic_i, lim); if (nm_i != head && tmp_pidx < IFLIB_MAX_RX_REFRESH-1) continue; iru.iru_pidx = refill_pidx; iru.iru_count = tmp_pidx+1; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); refill_pidx = nic_i; if (map == NULL) continue; for (int n = 0; n < iru.iru_count; n++) { bus_dmamap_sync(fl->ifl_ifdi->idi_tag, map[nic_i_dma], BUS_DMASYNC_PREREAD); /* XXX - change this to not use the netmap func*/ nic_i_dma = nm_next(nic_i_dma, lim); } } } kring->nr_hwcur = head; if (map) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i); return (0); } /* * Reconcile kernel and user view of the transmit ring. * * All information is in the kring. * Userspace wants to send packets up to the one before kring->rhead, * kernel knows kring->nr_hwcur is the first unsent packet. * * Here we push packets out (as many as possible), and possibly * reclaim buffers from previously completed transmission. * * The caller (netmap) guarantees that there is only one instance * running at any time. Any interference with other driver * methods should be handled by the individual drivers. */ static int iflib_netmap_txsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct ifnet *ifp = na->ifp; struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap ring */ u_int nic_i; /* index into the NIC ring */ u_int n; u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; struct if_pkt_info pi; /* * interrupts on every tx packet are expensive so request * them every half ring, or where NS_REPORT is set */ u_int report_frequency = kring->nkr_num_slots >> 1; /* device-specific */ if_ctx_t ctx = ifp->if_softc; iflib_txq_t txq = &ctx->ifc_txqs[kring->ring_id]; if (txq->ift_sds.ifsd_map) bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* * First part: process new packets to send. * nm_i is the current index in the netmap ring, * nic_i is the corresponding index in the NIC ring. * * If we have packets to send (nm_i != head) * iterate over the netmap ring, fetch length and update * the corresponding slot in the NIC ring. Some drivers also * need to update the buffer's physical address in the NIC slot * even NS_BUF_CHANGED is not set (PNMB computes the addresses). * * The netmap_reload_map() calls is especially expensive, * even when (as in this case) the tag is 0, so do only * when the buffer has actually changed. * * If possible do not set the report/intr bit on all slots, * but only a few times per ring or when NS_REPORT is set. * * Finally, on 10G and faster drivers, it might be useful * to prefetch the next slot and txr entry. */ nm_i = netmap_idx_n2k(kring, kring->nr_hwcur); pkt_info_zero(&pi); pi.ipi_segs = txq->ift_segs; pi.ipi_qsidx = kring->ring_id; if (nm_i != head) { /* we have new packets to send */ nic_i = netmap_idx_k2n(kring, nm_i); __builtin_prefetch(&ring->slot[nm_i]); __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i]); if (txq->ift_sds.ifsd_map) __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]); for (n = 0; nm_i != head; n++) { struct netmap_slot *slot = &ring->slot[nm_i]; u_int len = slot->len; uint64_t paddr; void *addr = PNMB(na, slot, &paddr); int flags = (slot->flags & NS_REPORT || nic_i == 0 || nic_i == report_frequency) ? IPI_TX_INTR : 0; /* device-specific */ pi.ipi_len = len; pi.ipi_segs[0].ds_addr = paddr; pi.ipi_segs[0].ds_len = len; pi.ipi_nsegs = 1; pi.ipi_ndescs = 0; pi.ipi_pidx = nic_i; pi.ipi_flags = flags; /* Fill the slot in the NIC ring. */ ctx->isc_txd_encap(ctx->ifc_softc, &pi); /* prefetch for next round */ __builtin_prefetch(&ring->slot[nm_i + 1]); __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i + 1]); if (txq->ift_sds.ifsd_map) { __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]); NM_CHECK_ADDR_LEN(na, addr, len); if (slot->flags & NS_BUF_CHANGED) { /* buffer has changed, reload map */ netmap_reload_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[nic_i], addr); } /* make sure changes to the buffer are synced */ bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_sds.ifsd_map[nic_i], BUS_DMASYNC_PREWRITE); } slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } kring->nr_hwcur = head; /* synchronize the NIC ring */ if (txq->ift_sds.ifsd_map) bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* (re)start the tx unit up to slot nic_i (excluded) */ ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, nic_i); } /* * Second part: reclaim buffers for completed transmissions. */ if (iflib_tx_credits_update(ctx, txq)) { /* some tx completed, increment avail */ nic_i = txq->ift_cidx_processed; kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); } return (0); } /* * Reconcile kernel and user view of the receive ring. * Same as for the txsync, this routine must be efficient. * The caller guarantees a single invocations, but races against * the rest of the driver should be handled here. * * On call, kring->rhead is the first packet that userspace wants * to keep, and kring->rcur is the wakeup point. * The kernel has previously reported packets up to kring->rtail. * * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective * of whether or not we received an interrupt. */ static int iflib_netmap_rxsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct netmap_ring *ring = kring->ring; uint32_t nm_i; /* index into the netmap ring */ uint32_t nic_i; /* index into the NIC ring */ u_int i, n; u_int const lim = kring->nkr_num_slots - 1; u_int const head = netmap_idx_n2k(kring, kring->rhead); int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; struct if_rxd_info ri; struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id]; iflib_fl_t fl = rxq->ifr_fl; if (head > lim) return netmap_ring_reinit(kring); /* XXX check sync modes */ for (i = 0, fl = rxq->ifr_fl; i < rxq->ifr_nfl; i++, fl++) { if (fl->ifl_sds.ifsd_map == NULL) continue; bus_dmamap_sync(rxq->ifr_fl[i].ifl_desc_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); } /* * First part: import newly received packets. * * nm_i is the index of the next free slot in the netmap ring, * nic_i is the index of the next received packet in the NIC ring, * and they may differ in case if_init() has been called while * in netmap mode. For the receive ring we have * * nic_i = rxr->next_check; * nm_i = kring->nr_hwtail (previous) * and * nm_i == (nic_i + kring->nkr_hwofs) % ring_size * * rxr->next_check is set to 0 on a ring reinit */ if (netmap_no_pendintr || force_update) { int crclen = iflib_crcstrip ? 0 : 4; int error, avail; for (i = 0; i < rxq->ifr_nfl; i++) { fl = &rxq->ifr_fl[i]; nic_i = fl->ifl_cidx; nm_i = netmap_idx_n2k(kring, nic_i); avail = iflib_rxd_avail(ctx, rxq, nic_i, USHRT_MAX); for (n = 0; avail > 0; n++, avail--) { rxd_info_zero(&ri); ri.iri_frags = rxq->ifr_frags; ri.iri_qsidx = kring->ring_id; ri.iri_ifp = ctx->ifc_ifp; ri.iri_cidx = nic_i; error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); ring->slot[nm_i].len = error ? 0 : ri.iri_len - crclen; ring->slot[nm_i].flags = 0; if (fl->ifl_sds.ifsd_map) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_sds.ifsd_map[nic_i], BUS_DMASYNC_POSTREAD); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } if (n) { /* update the state variables */ if (netmap_no_pendintr && !force_update) { /* diagnostics */ iflib_rx_miss ++; iflib_rx_miss_bufs += n; } fl->ifl_cidx = nic_i; kring->nr_hwtail = netmap_idx_k2n(kring, nm_i); } kring->nr_kflags &= ~NKR_PENDINTR; } } /* * Second part: skip past packets that userspace has released. * (kring->nr_hwcur to head excluded), * and make the buffers available for reception. * As usual nm_i is the index in the netmap ring, * nic_i is the index in the NIC ring, and * nm_i == (nic_i + kring->nkr_hwofs) % ring_size */ /* XXX not sure how this will work with multiple free lists */ nm_i = netmap_idx_n2k(kring, kring->nr_hwcur); return (netmap_fl_refill(rxq, kring, nm_i, false)); } static void iflib_netmap_intr(struct netmap_adapter *na, int onoff) { struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; CTX_LOCK(ctx); if (onoff) { IFDI_INTR_ENABLE(ctx); } else { IFDI_INTR_DISABLE(ctx); } CTX_UNLOCK(ctx); } static int iflib_netmap_attach(if_ctx_t ctx) { struct netmap_adapter na; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; bzero(&na, sizeof(na)); na.ifp = ctx->ifc_ifp; na.na_flags = NAF_BDG_MAYSLEEP; MPASS(ctx->ifc_softc_ctx.isc_ntxqsets); MPASS(ctx->ifc_softc_ctx.isc_nrxqsets); na.num_tx_desc = scctx->isc_ntxd[0]; na.num_rx_desc = scctx->isc_nrxd[0]; na.nm_txsync = iflib_netmap_txsync; na.nm_rxsync = iflib_netmap_rxsync; na.nm_register = iflib_netmap_register; na.nm_intr = iflib_netmap_intr; na.num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets; na.num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets; return (netmap_attach(&na)); } static void iflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq) { struct netmap_adapter *na = NA(ctx->ifc_ifp); struct netmap_slot *slot; slot = netmap_reset(na, NR_TX, txq->ift_id, 0); if (slot == NULL) return; if (txq->ift_sds.ifsd_map == NULL) return; for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) { /* * In netmap mode, set the map for the packet buffer. * NOTE: Some drivers (not this one) also need to set * the physical buffer address in the NIC ring. * netmap_idx_n2k() maps a nic index, i, into the corresponding * netmap slot index, si */ int si = netmap_idx_n2k(&na->tx_rings[txq->ift_id], i); netmap_load_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[i], NMB(na, slot + si)); } } static void iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq) { struct netmap_adapter *na = NA(ctx->ifc_ifp); struct netmap_kring *kring = &na->rx_rings[rxq->ifr_id]; struct netmap_slot *slot; uint32_t nm_i; slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0); if (slot == NULL) return; nm_i = netmap_idx_n2k(kring, 0); netmap_fl_refill(rxq, kring, nm_i, true); } #define iflib_netmap_detach(ifp) netmap_detach(ifp) #else #define iflib_netmap_txq_init(ctx, txq) #define iflib_netmap_rxq_init(ctx, rxq) #define iflib_netmap_detach(ifp) #define iflib_netmap_attach(ctx) (0) #define netmap_rx_irq(ifp, qid, budget) (0) #define netmap_tx_irq(ifp, qid) do {} while (0) #endif #if defined(__i386__) || defined(__amd64__) static __inline void prefetch(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); } static __inline void prefetch2cachelines(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); #if (CACHE_LINE_SIZE < 128) __asm volatile("prefetcht0 %0" :: "m" (*(((unsigned long *)x)+CACHE_LINE_SIZE/(sizeof(unsigned long))))); #endif } #else #define prefetch(x) #define prefetch2cachelines(x) #endif static void iru_init(if_rxd_update_t iru, iflib_rxq_t rxq, uint8_t flid) { iflib_fl_t fl; fl = &rxq->ifr_fl[flid]; iru->iru_paddrs = fl->ifl_bus_addrs; iru->iru_vaddrs = &fl->ifl_vm_addrs[0]; iru->iru_idxs = fl->ifl_rxd_idxs; iru->iru_qsidx = rxq->ifr_id; iru->iru_buf_size = fl->ifl_buf_size; iru->iru_flidx = fl->ifl_id; } static void _iflib_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err) { if (err) return; *(bus_addr_t *) arg = segs[0].ds_addr; } int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags) { int err; if_shared_ctx_t sctx = ctx->ifc_sctx; device_t dev = ctx->ifc_dev; KASSERT(sctx->isc_q_align != 0, ("alignment value not initialized")); err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ sctx->isc_q_align, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dma->idi_tag); if (err) { device_printf(dev, "%s: bus_dma_tag_create failed: %d\n", __func__, err); goto fail_0; } err = bus_dmamem_alloc(dma->idi_tag, (void**) &dma->idi_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->idi_map); if (err) { device_printf(dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, err); goto fail_1; } dma->idi_paddr = IF_BAD_DMA; err = bus_dmamap_load(dma->idi_tag, dma->idi_map, dma->idi_vaddr, size, _iflib_dmamap_cb, &dma->idi_paddr, mapflags | BUS_DMA_NOWAIT); if (err || dma->idi_paddr == IF_BAD_DMA) { device_printf(dev, "%s: bus_dmamap_load failed: %d\n", __func__, err); goto fail_2; } dma->idi_size = size; return (0); fail_2: bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); fail_1: bus_dma_tag_destroy(dma->idi_tag); fail_0: dma->idi_tag = NULL; return (err); } int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count) { int i, err; iflib_dma_info_t *dmaiter; dmaiter = dmalist; for (i = 0; i < count; i++, dmaiter++) { if ((err = iflib_dma_alloc(ctx, sizes[i], *dmaiter, mapflags)) != 0) break; } if (err) iflib_dma_free_multi(dmalist, i); return (err); } void iflib_dma_free(iflib_dma_info_t dma) { if (dma->idi_tag == NULL) return; if (dma->idi_paddr != IF_BAD_DMA) { bus_dmamap_sync(dma->idi_tag, dma->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->idi_tag, dma->idi_map); dma->idi_paddr = IF_BAD_DMA; } if (dma->idi_vaddr != NULL) { bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); dma->idi_vaddr = NULL; } bus_dma_tag_destroy(dma->idi_tag); dma->idi_tag = NULL; } void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count) { int i; iflib_dma_info_t *dmaiter = dmalist; for (i = 0; i < count; i++, dmaiter++) iflib_dma_free(*dmaiter); } #ifdef EARLY_AP_STARTUP static const int iflib_started = 1; #else /* * We used to abuse the smp_started flag to decide if the queues have been * fully initialized (by late taskqgroup_adjust() calls in a SYSINIT()). * That gave bad races, since the SYSINIT() runs strictly after smp_started * is set. Run a SYSINIT() strictly after that to just set a usable * completion flag. */ static int iflib_started; static void iflib_record_started(void *arg) { iflib_started = 1; } SYSINIT(iflib_record_started, SI_SUB_SMP + 1, SI_ORDER_FIRST, iflib_record_started, NULL); #endif static int iflib_fast_intr(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; if (!iflib_started) return (FILTER_HANDLED); DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) return (FILTER_HANDLED); GROUPTASK_ENQUEUE(gtask); return (FILTER_HANDLED); } static int iflib_fast_intr_rxtx(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; iflib_rxq_t rxq = (iflib_rxq_t)info->ifi_ctx; if_ctx_t ctx; int i, cidx; if (!iflib_started) return (FILTER_HANDLED); DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) return (FILTER_HANDLED); for (i = 0; i < rxq->ifr_ntxqirq; i++) { qidx_t txqid = rxq->ifr_txqid[i]; ctx = rxq->ifr_ctx; if (!ctx->isc_txd_credits_update(ctx->ifc_softc, txqid, false)) { IFDI_TX_QUEUE_INTR_ENABLE(ctx, txqid); continue; } GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task); } if (ctx->ifc_sctx->isc_flags & IFLIB_HAS_RXCQ) cidx = rxq->ifr_cq_cidx; else cidx = rxq->ifr_fl[0].ifl_cidx; if (iflib_rxd_avail(ctx, rxq, cidx, 1)) GROUPTASK_ENQUEUE(gtask); else IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); return (FILTER_HANDLED); } static int iflib_fast_intr_ctx(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; if (!iflib_started) return (FILTER_HANDLED); DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) return (FILTER_HANDLED); GROUPTASK_ENQUEUE(gtask); return (FILTER_HANDLED); } static int _iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, driver_filter_t filter, driver_intr_t handler, void *arg, char *name) { int rc, flags; struct resource *res; void *tag = NULL; device_t dev = ctx->ifc_dev; flags = RF_ACTIVE; if (ctx->ifc_flags & IFC_LEGACY) flags |= RF_SHAREABLE; MPASS(rid < 512); irq->ii_rid = rid; res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &irq->ii_rid, flags); if (res == NULL) { device_printf(dev, "failed to allocate IRQ for rid %d, name %s.\n", rid, name); return (ENOMEM); } irq->ii_res = res; KASSERT(filter == NULL || handler == NULL, ("filter and handler can't both be non-NULL")); rc = bus_setup_intr(dev, res, INTR_MPSAFE | INTR_TYPE_NET, filter, handler, arg, &tag); if (rc != 0) { device_printf(dev, "failed to setup interrupt for rid %d, name %s: %d\n", rid, name ? name : "unknown", rc); return (rc); } else if (name) bus_describe_intr(dev, res, tag, "%s", name); irq->ii_tag = tag; return (0); } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. This is * called only once at attach, setup is done every reset. * **********************************************************************/ static int iflib_txsd_alloc(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; int err, nsegments, ntsosegments; nsegments = scctx->isc_tx_nsegments; ntsosegments = scctx->isc_tx_tso_segments_max; MPASS(scctx->isc_ntxd[0] > 0); MPASS(scctx->isc_ntxd[txq->ift_br_offset] > 0); MPASS(nsegments > 0); MPASS(ntsosegments > 0); /* * Setup DMA descriptor areas. */ if ((err = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sctx->isc_tx_maxsize, /* maxsize */ nsegments, /* nsegments */ sctx->isc_tx_maxsegsize, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txq->ift_desc_tag))) { device_printf(dev,"Unable to allocate TX DMA tag: %d\n", err); device_printf(dev,"maxsize: %ju nsegments: %d maxsegsize: %ju\n", (uintmax_t)sctx->isc_tx_maxsize, nsegments, (uintmax_t)sctx->isc_tx_maxsegsize); goto fail; } if ((err = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ scctx->isc_tx_tso_size_max, /* maxsize */ ntsosegments, /* nsegments */ scctx->isc_tx_tso_segsize_max, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txq->ift_tso_desc_tag))) { device_printf(dev,"Unable to allocate TX TSO DMA tag: %d\n", err); goto fail; } if (!(txq->ift_sds.ifsd_flags = (uint8_t *) malloc(sizeof(uint8_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } if (!(txq->ift_sds.ifsd_m = (struct mbuf **) malloc(sizeof(struct mbuf *) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ #if defined(ACPI_DMAR) || (! (defined(__i386__) || defined(__amd64__))) if ((ctx->ifc_flags & IFC_DMAR) == 0) return (0); if (!(txq->ift_sds.ifsd_map = (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer map memory\n"); err = ENOMEM; goto fail; } for (int i = 0; i < scctx->isc_ntxd[txq->ift_br_offset]; i++) { err = bus_dmamap_create(txq->ift_desc_tag, 0, &txq->ift_sds.ifsd_map[i]); if (err != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } } #endif return (0); fail: /* We free all, it handles case where we are in the middle */ iflib_tx_structures_free(ctx); return (err); } static void iflib_txsd_destroy(if_ctx_t ctx, iflib_txq_t txq, int i) { bus_dmamap_t map; map = NULL; if (txq->ift_sds.ifsd_map != NULL) map = txq->ift_sds.ifsd_map[i]; if (map != NULL) { bus_dmamap_unload(txq->ift_desc_tag, map); bus_dmamap_destroy(txq->ift_desc_tag, map); txq->ift_sds.ifsd_map[i] = NULL; } } static void iflib_txq_destroy(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; for (int i = 0; i < txq->ift_size; i++) iflib_txsd_destroy(ctx, txq, i); if (txq->ift_sds.ifsd_map != NULL) { free(txq->ift_sds.ifsd_map, M_IFLIB); txq->ift_sds.ifsd_map = NULL; } if (txq->ift_sds.ifsd_m != NULL) { free(txq->ift_sds.ifsd_m, M_IFLIB); txq->ift_sds.ifsd_m = NULL; } if (txq->ift_sds.ifsd_flags != NULL) { free(txq->ift_sds.ifsd_flags, M_IFLIB); txq->ift_sds.ifsd_flags = NULL; } if (txq->ift_desc_tag != NULL) { bus_dma_tag_destroy(txq->ift_desc_tag); txq->ift_desc_tag = NULL; } if (txq->ift_tso_desc_tag != NULL) { bus_dma_tag_destroy(txq->ift_tso_desc_tag); txq->ift_tso_desc_tag = NULL; } } static void iflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i) { struct mbuf **mp; mp = &txq->ift_sds.ifsd_m[i]; if (*mp == NULL) return; if (txq->ift_sds.ifsd_map != NULL) { bus_dmamap_sync(txq->ift_desc_tag, txq->ift_sds.ifsd_map[i], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_desc_tag, txq->ift_sds.ifsd_map[i]); } m_free(*mp); DBG_COUNTER_INC(tx_frees); *mp = NULL; } static int iflib_txq_setup(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; iflib_dma_info_t di; int i; /* Set number of descriptors available */ txq->ift_qstatus = IFLIB_QUEUE_IDLE; /* XXX make configurable */ txq->ift_update_freq = IFLIB_DEFAULT_TX_UPDATE_FREQ; /* Reset indices */ txq->ift_cidx_processed = 0; txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0; txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset]; for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++) bzero((void *)di->idi_vaddr, di->idi_size); IFDI_TXQ_SETUP(ctx, txq->ift_id); for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++) bus_dmamap_sync(di->idi_tag, di->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int iflib_rxsd_alloc(iflib_rxq_t rxq) { if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; iflib_fl_t fl; int err; MPASS(scctx->isc_nrxd[0] > 0); MPASS(scctx->isc_nrxd[rxq->ifr_fl_offset] > 0); fl = rxq->ifr_fl; for (int i = 0; i < rxq->ifr_nfl; i++, fl++) { fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */ err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sctx->isc_rx_maxsize, /* maxsize */ sctx->isc_rx_nsegments, /* nsegments */ sctx->isc_rx_maxsegsize, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &fl->ifl_desc_tag); if (err) { device_printf(dev, "%s: bus_dma_tag_create failed %d\n", __func__, err); goto fail; } if (!(fl->ifl_sds.ifsd_flags = (uint8_t *) malloc(sizeof(uint8_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } if (!(fl->ifl_sds.ifsd_m = (struct mbuf **) malloc(sizeof(struct mbuf *) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } if (!(fl->ifl_sds.ifsd_cl = (caddr_t *) malloc(sizeof(caddr_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ #if defined(ACPI_DMAR) || (! (defined(__i386__) || defined(__amd64__))) if ((ctx->ifc_flags & IFC_DMAR) == 0) continue; if (!(fl->ifl_sds.ifsd_map = (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer map memory\n"); err = ENOMEM; goto fail; } for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++) { err = bus_dmamap_create(fl->ifl_desc_tag, 0, &fl->ifl_sds.ifsd_map[i]); if (err != 0) { device_printf(dev, "Unable to create RX buffer DMA map\n"); goto fail; } } #endif } return (0); fail: iflib_rx_structures_free(ctx); return (err); } /* * Internal service routines */ struct rxq_refill_cb_arg { int error; bus_dma_segment_t seg; int nseg; }; static void _rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct rxq_refill_cb_arg *cb_arg = arg; cb_arg->error = error; cb_arg->seg = segs[0]; cb_arg->nseg = nseg; } #ifdef ACPI_DMAR #define IS_DMAR(ctx) (ctx->ifc_flags & IFC_DMAR) #else #define IS_DMAR(ctx) (0) #endif /** * rxq_refill - refill an rxq free-buffer list * @ctx: the iflib context * @rxq: the free-list to refill * @n: the number of new buffers to allocate * * (Re)populate an rxq free-buffer list with up to @n new packet buffers. * The caller must assure that @n does not exceed the queue's capacity. */ static void _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) { struct mbuf *m; int idx, frag_idx = fl->ifl_fragidx; int pidx = fl->ifl_pidx; caddr_t cl, *sd_cl; struct mbuf **sd_m; uint8_t *sd_flags; struct if_rxd_update iru; bus_dmamap_t *sd_map; int n, i = 0; uint64_t bus_addr; int err; qidx_t credits; sd_m = fl->ifl_sds.ifsd_m; sd_map = fl->ifl_sds.ifsd_map; sd_cl = fl->ifl_sds.ifsd_cl; sd_flags = fl->ifl_sds.ifsd_flags; idx = pidx; credits = fl->ifl_credits; n = count; MPASS(n > 0); MPASS(credits + n <= fl->ifl_size); if (pidx < fl->ifl_cidx) MPASS(pidx + n <= fl->ifl_cidx); if (pidx == fl->ifl_cidx && (credits < fl->ifl_size)) MPASS(fl->ifl_gen == 0); if (pidx > fl->ifl_cidx) MPASS(n <= fl->ifl_size - pidx + fl->ifl_cidx); DBG_COUNTER_INC(fl_refills); if (n > 8) DBG_COUNTER_INC(fl_refills_large); iru_init(&iru, fl->ifl_rxq, fl->ifl_id); while (n--) { /* * We allocate an uninitialized mbuf + cluster, mbuf is * initialized after rx. * * If the cluster is still set then we know a minimum sized packet was received */ bit_ffc_at(fl->ifl_rx_bitmap, frag_idx, fl->ifl_size, &frag_idx); if ((frag_idx < 0) || (frag_idx >= fl->ifl_size)) bit_ffc(fl->ifl_rx_bitmap, fl->ifl_size, &frag_idx); if ((cl = sd_cl[frag_idx]) == NULL) { if ((cl = sd_cl[frag_idx] = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL) break; #if MEMORY_LOGGING fl->ifl_cl_enqueued++; #endif } if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { break; } #if MEMORY_LOGGING fl->ifl_m_enqueued++; #endif DBG_COUNTER_INC(rx_allocs); #if defined(__i386__) || defined(__amd64__) if (!IS_DMAR(ctx)) { bus_addr = pmap_kextract((vm_offset_t)cl); } else #endif { struct rxq_refill_cb_arg cb_arg; iflib_rxq_t q; cb_arg.error = 0; q = fl->ifl_rxq; MPASS(sd_map != NULL); MPASS(sd_map[frag_idx] != NULL); err = bus_dmamap_load(fl->ifl_desc_tag, sd_map[frag_idx], cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg, 0); bus_dmamap_sync(fl->ifl_desc_tag, sd_map[frag_idx], BUS_DMASYNC_PREREAD); if (err != 0 || cb_arg.error) { /* * !zone_pack ? */ if (fl->ifl_zone == zone_pack) uma_zfree(fl->ifl_zone, cl); m_free(m); n = 0; goto done; } bus_addr = cb_arg.seg.ds_addr; } bit_set(fl->ifl_rx_bitmap, frag_idx); sd_flags[frag_idx] |= RX_SW_DESC_INUSE; MPASS(sd_m[frag_idx] == NULL); sd_cl[frag_idx] = cl; sd_m[frag_idx] = m; fl->ifl_rxd_idxs[i] = frag_idx; fl->ifl_bus_addrs[i] = bus_addr; fl->ifl_vm_addrs[i] = cl; credits++; i++; MPASS(credits <= fl->ifl_size); if (++idx == fl->ifl_size) { fl->ifl_gen = 1; idx = 0; } if (n == 0 || i == IFLIB_MAX_RX_REFRESH) { iru.iru_pidx = pidx; iru.iru_count = i; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); i = 0; pidx = idx; fl->ifl_pidx = idx; fl->ifl_credits = credits; } } done: if (i) { iru.iru_pidx = pidx; iru.iru_count = i; ctx->isc_rxd_refill(ctx->ifc_softc, &iru); fl->ifl_pidx = idx; fl->ifl_credits = credits; } DBG_COUNTER_INC(rxd_flush); if (fl->ifl_pidx == 0) pidx = fl->ifl_size - 1; else pidx = fl->ifl_pidx - 1; if (sd_map) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx); fl->ifl_fragidx = frag_idx; } static __inline void __iflib_fl_refill_lt(if_ctx_t ctx, iflib_fl_t fl, int max) { /* we avoid allowing pidx to catch up with cidx as it confuses ixl */ int32_t reclaimable = fl->ifl_size - fl->ifl_credits - 1; #ifdef INVARIANTS int32_t delta = fl->ifl_size - get_inuse(fl->ifl_size, fl->ifl_cidx, fl->ifl_pidx, fl->ifl_gen) - 1; #endif MPASS(fl->ifl_credits <= fl->ifl_size); MPASS(reclaimable == delta); if (reclaimable > 0) _iflib_fl_refill(ctx, fl, min(max, reclaimable)); } static void iflib_fl_bufs_free(iflib_fl_t fl) { iflib_dma_info_t idi = fl->ifl_ifdi; uint32_t i; for (i = 0; i < fl->ifl_size; i++) { struct mbuf **sd_m = &fl->ifl_sds.ifsd_m[i]; uint8_t *sd_flags = &fl->ifl_sds.ifsd_flags[i]; caddr_t *sd_cl = &fl->ifl_sds.ifsd_cl[i]; if (*sd_flags & RX_SW_DESC_INUSE) { if (fl->ifl_sds.ifsd_map != NULL) { bus_dmamap_t sd_map = fl->ifl_sds.ifsd_map[i]; bus_dmamap_unload(fl->ifl_desc_tag, sd_map); if (fl->ifl_rxq->ifr_ctx->ifc_in_detach) bus_dmamap_destroy(fl->ifl_desc_tag, sd_map); } if (*sd_m != NULL) { m_init(*sd_m, M_NOWAIT, MT_DATA, 0); uma_zfree(zone_mbuf, *sd_m); } if (*sd_cl != NULL) uma_zfree(fl->ifl_zone, *sd_cl); *sd_flags = 0; } else { MPASS(*sd_cl == NULL); MPASS(*sd_m == NULL); } #if MEMORY_LOGGING fl->ifl_m_dequeued++; fl->ifl_cl_dequeued++; #endif *sd_cl = NULL; *sd_m = NULL; } #ifdef INVARIANTS for (i = 0; i < fl->ifl_size; i++) { MPASS(fl->ifl_sds.ifsd_flags[i] == 0); MPASS(fl->ifl_sds.ifsd_cl[i] == NULL); MPASS(fl->ifl_sds.ifsd_m[i] == NULL); } #endif /* * Reset free list values */ fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = fl->ifl_fragidx = 0; bzero(idi->idi_vaddr, idi->idi_size); } /********************************************************************* * * Initialize a receive ring and its buffers. * **********************************************************************/ static int iflib_fl_setup(iflib_fl_t fl) { iflib_rxq_t rxq = fl->ifl_rxq; if_ctx_t ctx = rxq->ifr_ctx; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size - 1); /* ** Free current RX buffer structs and their mbufs */ iflib_fl_bufs_free(fl); /* Now replenish the mbufs */ MPASS(fl->ifl_credits == 0); /* * XXX don't set the max_frame_size to larger * than the hardware can handle */ if (sctx->isc_max_frame_size <= 2048) fl->ifl_buf_size = MCLBYTES; #ifndef CONTIGMALLOC_WORKS else fl->ifl_buf_size = MJUMPAGESIZE; #else else if (sctx->isc_max_frame_size <= 4096) fl->ifl_buf_size = MJUMPAGESIZE; else if (sctx->isc_max_frame_size <= 9216) fl->ifl_buf_size = MJUM9BYTES; else fl->ifl_buf_size = MJUM16BYTES; #endif if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size) ctx->ifc_max_fl_buf_size = fl->ifl_buf_size; fl->ifl_cltype = m_gettype(fl->ifl_buf_size); fl->ifl_zone = m_getzone(fl->ifl_buf_size); /* avoid pre-allocating zillions of clusters to an idle card * potentially speeding up attach */ _iflib_fl_refill(ctx, fl, min(128, fl->ifl_size)); MPASS(min(128, fl->ifl_size) == fl->ifl_credits); if (min(128, fl->ifl_size) != fl->ifl_credits) return (ENOBUFS); /* * handle failure */ MPASS(rxq != NULL); MPASS(fl->ifl_ifdi != NULL); bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } /********************************************************************* * * Free receive ring data structures * **********************************************************************/ static void iflib_rx_sds_free(iflib_rxq_t rxq) { iflib_fl_t fl; int i; if (rxq->ifr_fl != NULL) { for (i = 0; i < rxq->ifr_nfl; i++) { fl = &rxq->ifr_fl[i]; if (fl->ifl_desc_tag != NULL) { bus_dma_tag_destroy(fl->ifl_desc_tag); fl->ifl_desc_tag = NULL; } free(fl->ifl_sds.ifsd_m, M_IFLIB); free(fl->ifl_sds.ifsd_cl, M_IFLIB); /* XXX destroy maps first */ free(fl->ifl_sds.ifsd_map, M_IFLIB); fl->ifl_sds.ifsd_m = NULL; fl->ifl_sds.ifsd_cl = NULL; fl->ifl_sds.ifsd_map = NULL; } free(rxq->ifr_fl, M_IFLIB); rxq->ifr_fl = NULL; rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0; } } /* * MI independent logic * */ static void iflib_timer(void *arg) { iflib_txq_t txq = arg; if_ctx_t ctx = txq->ift_ctx; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; /* ** Check on the state of the TX queue(s), this ** can be done without the lock because its RO ** and the HUNG state will be static if set. */ IFDI_TIMER(ctx, txq->ift_id); if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) && ((txq->ift_cleaned_prev == txq->ift_cleaned) || (sctx->isc_pause_frames == 0))) goto hung; if (ifmp_ring_is_stalled(txq->ift_br)) txq->ift_qstatus = IFLIB_QUEUE_HUNG; txq->ift_cleaned_prev = txq->ift_cleaned; /* handle any laggards */ if (txq->ift_db_pending) GROUPTASK_ENQUEUE(&txq->ift_task); sctx->isc_pause_frames = 0; if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); return; hung: CTX_LOCK(ctx); if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); device_printf(ctx->ifc_dev, "TX(%d) desc avail = %d, pidx = %d\n", txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx); IFDI_WATCHDOG_RESET(ctx); ctx->ifc_watchdog_events++; ctx->ifc_flags |= IFC_DO_RESET; iflib_admin_intr_deferred(ctx); CTX_UNLOCK(ctx); } static void iflib_init_locked(if_ctx_t ctx) { if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; if_t ifp = ctx->ifc_ifp; iflib_fl_t fl; iflib_txq_t txq; iflib_rxq_t rxq; int i, j, tx_ip_csum_flags, tx_ip6_csum_flags; if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); tx_ip_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP); tx_ip6_csum_flags = scctx->isc_tx_csum_flags & (CSUM_IP6_TCP | CSUM_IP6_UDP | CSUM_IP6_SCTP); /* Set hardware offload abilities */ if_clearhwassist(ifp); if (if_getcapenable(ifp) & IFCAP_TXCSUM) if_sethwassistbits(ifp, tx_ip_csum_flags, 0); if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6) if_sethwassistbits(ifp, tx_ip6_csum_flags, 0); if (if_getcapenable(ifp) & IFCAP_TSO4) if_sethwassistbits(ifp, CSUM_IP_TSO, 0); if (if_getcapenable(ifp) & IFCAP_TSO6) if_sethwassistbits(ifp, CSUM_IP6_TSO, 0); for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) { CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); iflib_netmap_txq_init(ctx, txq); } #ifdef INVARIANTS i = if_getdrvflags(ifp); #endif IFDI_INIT(ctx); MPASS(if_getdrvflags(ifp) == i); for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { /* XXX this should really be done on a per-queue basis */ if (if_getcapenable(ifp) & IFCAP_NETMAP) { MPASS(rxq->ifr_id == i); iflib_netmap_rxq_init(ctx, rxq); continue; } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { if (iflib_fl_setup(fl)) { device_printf(ctx->ifc_dev, "freelist setup failed - check cluster settings\n"); goto done; } } } done: if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); IFDI_INTR_ENABLE(ctx); txq = ctx->ifc_txqs; for (i = 0; i < sctx->isc_ntxqsets; i++, txq++) callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); } static int iflib_media_change(if_t ifp) { if_ctx_t ctx = if_getsoftc(ifp); int err; CTX_LOCK(ctx); if ((err = IFDI_MEDIA_CHANGE(ctx)) == 0) iflib_init_locked(ctx); CTX_UNLOCK(ctx); return (err); } static void iflib_media_status(if_t ifp, struct ifmediareq *ifmr) { if_ctx_t ctx = if_getsoftc(ifp); CTX_LOCK(ctx); IFDI_UPDATE_ADMIN_STATUS(ctx); IFDI_MEDIA_STATUS(ctx, ifmr); CTX_UNLOCK(ctx); } static void iflib_stop(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; iflib_rxq_t rxq = ctx->ifc_rxqs; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; iflib_dma_info_t di; iflib_fl_t fl; int i, j; /* Tell the stack that the interface is no longer active */ if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); DELAY(1000); IFDI_STOP(ctx); DELAY(1000); iflib_debug_reset(); /* Wait for current tx queue users to exit to disarm watchdog timer. */ for (i = 0; i < scctx->isc_ntxqsets; i++, txq++) { /* make sure all transmitters have completed before proceeding XXX */ CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); /* clean any enqueued buffers */ iflib_ifmp_purge(txq); /* Free any existing tx buffers. */ for (j = 0; j < txq->ift_size; j++) { iflib_txsd_free(ctx, txq, j); } txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0; txq->ift_in_use = txq->ift_gen = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0; txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0; txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0; txq->ift_pullups = 0; ifmp_ring_reset_stats(txq->ift_br); for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwtxqs; j++, di++) bzero((void *)di->idi_vaddr, di->idi_size); } for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) { /* make sure all transmitters have completed before proceeding XXX */ for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwrxqs; j++, di++) bzero((void *)di->idi_vaddr, di->idi_size); /* also resets the free lists pidx/cidx */ for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) iflib_fl_bufs_free(fl); } } static inline caddr_t calc_next_rxd(iflib_fl_t fl, int cidx) { qidx_t size; int nrxd; caddr_t start, end, cur, next; nrxd = fl->ifl_size; size = fl->ifl_rxd_size; start = fl->ifl_ifdi->idi_vaddr; if (__predict_false(size == 0)) return (start); cur = start + size*cidx; end = start + size*nrxd; next = CACHE_PTR_NEXT(cur); return (next < end ? next : start); } static inline void prefetch_pkts(iflib_fl_t fl, int cidx) { int nextptr; int nrxd = fl->ifl_size; caddr_t next_rxd; nextptr = (cidx + CACHE_PTR_INCREMENT) & (nrxd-1); prefetch(&fl->ifl_sds.ifsd_m[nextptr]); prefetch(&fl->ifl_sds.ifsd_cl[nextptr]); next_rxd = calc_next_rxd(fl, cidx); prefetch(next_rxd); prefetch(fl->ifl_sds.ifsd_m[(cidx + 1) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 2) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 3) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_m[(cidx + 4) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 1) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 2) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 3) & (nrxd-1)]); prefetch(fl->ifl_sds.ifsd_cl[(cidx + 4) & (nrxd-1)]); } static void rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int unload, if_rxsd_t sd) { int flid, cidx; bus_dmamap_t map; iflib_fl_t fl; iflib_dma_info_t di; int next; map = NULL; flid = irf->irf_flid; cidx = irf->irf_idx; fl = &rxq->ifr_fl[flid]; sd->ifsd_fl = fl; sd->ifsd_cidx = cidx; sd->ifsd_m = &fl->ifl_sds.ifsd_m[cidx]; sd->ifsd_cl = &fl->ifl_sds.ifsd_cl[cidx]; fl->ifl_credits--; #if MEMORY_LOGGING fl->ifl_m_dequeued++; #endif if (rxq->ifr_ctx->ifc_flags & IFC_PREFETCH) prefetch_pkts(fl, cidx); if (fl->ifl_sds.ifsd_map != NULL) { next = (cidx + CACHE_PTR_INCREMENT) & (fl->ifl_size-1); prefetch(&fl->ifl_sds.ifsd_map[next]); map = fl->ifl_sds.ifsd_map[cidx]; di = fl->ifl_ifdi; next = (cidx + CACHE_LINE_SIZE) & (fl->ifl_size-1); prefetch(&fl->ifl_sds.ifsd_flags[next]); bus_dmamap_sync(di->idi_tag, di->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* not valid assert if bxe really does SGE from non-contiguous elements */ MPASS(fl->ifl_cidx == cidx); if (unload) bus_dmamap_unload(fl->ifl_desc_tag, map); } fl->ifl_cidx = (fl->ifl_cidx + 1) & (fl->ifl_size-1); if (__predict_false(fl->ifl_cidx == 0)) fl->ifl_gen = 0; if (map != NULL) bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); bit_clear(fl->ifl_rx_bitmap, cidx); } static struct mbuf * assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri, if_rxsd_t sd) { int i, padlen , flags; struct mbuf *m, *mh, *mt; caddr_t cl; i = 0; mh = NULL; do { rxd_frag_to_sd(rxq, &ri->iri_frags[i], TRUE, sd); MPASS(*sd->ifsd_cl != NULL); MPASS(*sd->ifsd_m != NULL); /* Don't include zero-length frags */ if (ri->iri_frags[i].irf_len == 0) { /* XXX we can save the cluster here, but not the mbuf */ m_init(*sd->ifsd_m, M_NOWAIT, MT_DATA, 0); m_free(*sd->ifsd_m); *sd->ifsd_m = NULL; continue; } m = *sd->ifsd_m; *sd->ifsd_m = NULL; if (mh == NULL) { flags = M_PKTHDR|M_EXT; mh = mt = m; padlen = ri->iri_pad; } else { flags = M_EXT; mt->m_next = m; mt = m; /* assuming padding is only on the first fragment */ padlen = 0; } cl = *sd->ifsd_cl; *sd->ifsd_cl = NULL; /* Can these two be made one ? */ m_init(m, M_NOWAIT, MT_DATA, flags); m_cljset(m, cl, sd->ifsd_fl->ifl_cltype); /* * These must follow m_init and m_cljset */ m->m_data += padlen; ri->iri_len -= padlen; m->m_len = ri->iri_frags[i].irf_len; } while (++i < ri->iri_nfrags); return (mh); } /* * Process one software descriptor */ static struct mbuf * iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri) { struct if_rxsd sd; struct mbuf *m; /* should I merge this back in now that the two paths are basically duplicated? */ if (ri->iri_nfrags == 1 && ri->iri_frags[0].irf_len <= MIN(IFLIB_RX_COPY_THRESH, MHLEN)) { rxd_frag_to_sd(rxq, &ri->iri_frags[0], FALSE, &sd); m = *sd.ifsd_m; *sd.ifsd_m = NULL; m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR); #ifndef __NO_STRICT_ALIGNMENT if (!IP_ALIGNED(m)) m->m_data += 2; #endif memcpy(m->m_data, *sd.ifsd_cl, ri->iri_len); m->m_len = ri->iri_frags[0].irf_len; } else { m = assemble_segments(rxq, ri, &sd); } m->m_pkthdr.len = ri->iri_len; m->m_pkthdr.rcvif = ri->iri_ifp; m->m_flags |= ri->iri_flags; m->m_pkthdr.ether_vtag = ri->iri_vtag; m->m_pkthdr.flowid = ri->iri_flowid; M_HASHTYPE_SET(m, ri->iri_rsstype); m->m_pkthdr.csum_flags = ri->iri_csum_flags; m->m_pkthdr.csum_data = ri->iri_csum_data; return (m); } #if defined(INET6) || defined(INET) static void iflib_get_ip_forwarding(struct lro_ctrl *lc, bool *v4, bool *v6) { CURVNET_SET(lc->ifp->if_vnet); #if defined(INET6) *v6 = VNET(ip6_forwarding); #endif #if defined(INET) *v4 = VNET(ipforwarding); #endif CURVNET_RESTORE(); } /* * Returns true if it's possible this packet could be LROed. * if it returns false, it is guaranteed that tcp_lro_rx() * would not return zero. */ static bool iflib_check_lro_possible(struct mbuf *m, bool v4_forwarding, bool v6_forwarding) { struct ether_header *eh; uint16_t eh_type; eh = mtod(m, struct ether_header *); eh_type = ntohs(eh->ether_type); switch (eh_type) { #if defined(INET6) case ETHERTYPE_IPV6: return !v6_forwarding; #endif #if defined (INET) case ETHERTYPE_IP: return !v4_forwarding; #endif } return false; } #else static void iflib_get_ip_forwarding(struct lro_ctrl *lc __unused, bool *v4 __unused, bool *v6 __unused) { } #endif static bool iflib_rxeof(iflib_rxq_t rxq, qidx_t budget) { if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; int avail, i; qidx_t *cidxp; struct if_rxd_info ri; int err, budget_left, rx_bytes, rx_pkts; iflib_fl_t fl; struct ifnet *ifp; int lro_enabled; bool lro_possible = false; bool v4_forwarding, v6_forwarding; /* * XXX early demux data packets so that if_input processing only handles * acks in interrupt context */ struct mbuf *m, *mh, *mt, *mf; ifp = ctx->ifc_ifp; mh = mt = NULL; MPASS(budget > 0); rx_pkts = rx_bytes = 0; if (sctx->isc_flags & IFLIB_HAS_RXCQ) cidxp = &rxq->ifr_cq_cidx; else cidxp = &rxq->ifr_fl[0].ifl_cidx; if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) { for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) __iflib_fl_refill_lt(ctx, fl, budget + 8); DBG_COUNTER_INC(rx_unavail); return (false); } for (budget_left = budget; (budget_left > 0) && (avail > 0); budget_left--, avail--) { if (__predict_false(!CTX_ACTIVE(ctx))) { DBG_COUNTER_INC(rx_ctx_inactive); break; } /* * Reset client set fields to their default values */ rxd_info_zero(&ri); ri.iri_qsidx = rxq->ifr_id; ri.iri_cidx = *cidxp; ri.iri_ifp = ifp; ri.iri_frags = rxq->ifr_frags; err = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); if (err) goto err; if (sctx->isc_flags & IFLIB_HAS_RXCQ) { *cidxp = ri.iri_cidx; /* Update our consumer index */ /* XXX NB: shurd - check if this is still safe */ while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) { rxq->ifr_cq_cidx -= scctx->isc_nrxd[0]; rxq->ifr_cq_gen = 0; } /* was this only a completion queue message? */ if (__predict_false(ri.iri_nfrags == 0)) continue; } MPASS(ri.iri_nfrags != 0); MPASS(ri.iri_len != 0); /* will advance the cidx on the corresponding free lists */ m = iflib_rxd_pkt_get(rxq, &ri); if (avail == 0 && budget_left) avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left); if (__predict_false(m == NULL)) { DBG_COUNTER_INC(rx_mbuf_null); continue; } /* imm_pkt: -- cxgb */ if (mh == NULL) mh = mt = m; else { mt->m_nextpkt = m; mt = m; } } /* make sure that we can refill faster than drain */ for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) __iflib_fl_refill_lt(ctx, fl, budget + 8); lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO); if (lro_enabled) iflib_get_ip_forwarding(&rxq->ifr_lc, &v4_forwarding, &v6_forwarding); mt = mf = NULL; while (mh != NULL) { m = mh; mh = mh->m_nextpkt; m->m_nextpkt = NULL; #ifndef __NO_STRICT_ALIGNMENT if (!IP_ALIGNED(m) && (m = iflib_fixup_rx(m)) == NULL) continue; #endif rx_bytes += m->m_pkthdr.len; rx_pkts++; #if defined(INET6) || defined(INET) if (lro_enabled) { if (!lro_possible) { lro_possible = iflib_check_lro_possible(m, v4_forwarding, v6_forwarding); if (lro_possible && mf != NULL) { ifp->if_input(ifp, mf); DBG_COUNTER_INC(rx_if_input); mt = mf = NULL; } } if ((m->m_pkthdr.csum_flags & (CSUM_L4_CALC|CSUM_L4_VALID)) == (CSUM_L4_CALC|CSUM_L4_VALID)) { if (lro_possible && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) continue; } } #endif if (lro_possible) { ifp->if_input(ifp, m); DBG_COUNTER_INC(rx_if_input); continue; } if (mf == NULL) mf = m; if (mt != NULL) mt->m_nextpkt = m; mt = m; } if (mf != NULL) { ifp->if_input(ifp, mf); DBG_COUNTER_INC(rx_if_input); } if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); /* * Flush any outstanding LRO work */ #if defined(INET6) || defined(INET) tcp_lro_flush_all(&rxq->ifr_lc); #endif if (avail) return true; return (iflib_rxd_avail(ctx, rxq, *cidxp, 1)); err: CTX_LOCK(ctx); ctx->ifc_flags |= IFC_DO_RESET; iflib_admin_intr_deferred(ctx); CTX_UNLOCK(ctx); return (false); } #define TXD_NOTIFY_COUNT(txq) (((txq)->ift_size / (txq)->ift_update_freq)-1) static inline qidx_t txq_max_db_deferred(iflib_txq_t txq, qidx_t in_use) { qidx_t notify_count = TXD_NOTIFY_COUNT(txq); qidx_t minthresh = txq->ift_size / 8; if (in_use > 4*minthresh) return (notify_count); if (in_use > 2*minthresh) return (notify_count >> 1); if (in_use > minthresh) return (notify_count >> 3); return (0); } static inline qidx_t txq_max_rs_deferred(iflib_txq_t txq) { qidx_t notify_count = TXD_NOTIFY_COUNT(txq); qidx_t minthresh = txq->ift_size / 8; if (txq->ift_in_use > 4*minthresh) return (notify_count); if (txq->ift_in_use > 2*minthresh) return (notify_count >> 1); if (txq->ift_in_use > minthresh) return (notify_count >> 2); return (2); } #define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags) #define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG) #define TXQ_MAX_DB_DEFERRED(txq, in_use) txq_max_db_deferred((txq), (in_use)) #define TXQ_MAX_RS_DEFERRED(txq) txq_max_rs_deferred(txq) #define TXQ_MAX_DB_CONSUMED(size) (size >> 4) /* forward compatibility for cxgb */ #define FIRST_QSET(ctx) 0 #define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets) #define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets) #define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx)) #define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments)) /* XXX we should be setting this to something other than zero */ #define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh) #define MAX_TX_DESC(ctx) ((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max) static inline bool iflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring, qidx_t in_use) { qidx_t dbval, max; bool rang; rang = false; max = TXQ_MAX_DB_DEFERRED(txq, in_use); if (ring || txq->ift_db_pending >= max) { dbval = txq->ift_npending ? txq->ift_npending : txq->ift_pidx; ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, dbval); txq->ift_db_pending = txq->ift_npending = 0; rang = true; } return (rang); } #ifdef PKT_DEBUG static void print_pkt(if_pkt_info_t pi) { printf("pi len: %d qsidx: %d nsegs: %d ndescs: %d flags: %x pidx: %d\n", pi->ipi_len, pi->ipi_qsidx, pi->ipi_nsegs, pi->ipi_ndescs, pi->ipi_flags, pi->ipi_pidx); printf("pi new_pidx: %d csum_flags: %lx tso_segsz: %d mflags: %x vtag: %d\n", pi->ipi_new_pidx, pi->ipi_csum_flags, pi->ipi_tso_segsz, pi->ipi_mflags, pi->ipi_vtag); printf("pi etype: %d ehdrlen: %d ip_hlen: %d ipproto: %d\n", pi->ipi_etype, pi->ipi_ehdrlen, pi->ipi_ip_hlen, pi->ipi_ipproto); } #endif #define IS_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_IP_TSO) #define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO) static int iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) { if_shared_ctx_t sctx = txq->ift_ctx->ifc_sctx; struct ether_vlan_header *eh; struct mbuf *m, *n; n = m = *mp; if ((sctx->isc_flags & IFLIB_NEED_SCRATCH) && M_WRITABLE(m) == 0) { if ((m = m_dup(m, M_NOWAIT)) == NULL) { return (ENOMEM); } else { m_freem(*mp); n = *mp = m; } } /* * Determine where frame payload starts. * Jump over vlan headers if already present, * helpful for QinQ too. */ if (__predict_false(m->m_len < sizeof(*eh))) { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, sizeof(*eh))) == NULL)) return (ENOMEM); } eh = mtod(m, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { pi->ipi_etype = ntohs(eh->evl_proto); pi->ipi_ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { pi->ipi_etype = ntohs(eh->evl_encap_proto); pi->ipi_ehdrlen = ETHER_HDR_LEN; } switch (pi->ipi_etype) { #ifdef INET case ETHERTYPE_IP: { struct ip *ip = NULL; struct tcphdr *th = NULL; int minthlen; minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th)); if (__predict_false(m->m_len < minthlen)) { /* * if this code bloat is causing too much of a hit * move it to a separate function and mark it noinline */ if (m->m_len == pi->ipi_ehdrlen) { n = m->m_next; MPASS(n); if (n->m_len >= sizeof(*ip)) { ip = (struct ip *)n->m_data; if (n->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } else { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) return (ENOMEM); ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); } } else { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) return (ENOMEM); ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } } else { ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } pi->ipi_ip_hlen = ip->ip_hl << 2; pi->ipi_ipproto = ip->ip_p; pi->ipi_flags |= IPI_TX_IPV4; if ((sctx->isc_flags & IFLIB_NEED_ZERO_CSUM) && (pi->ipi_csum_flags & CSUM_IP)) ip->ip_sum = 0; if (IS_TSO4(pi)) { if (pi->ipi_ipproto == IPPROTO_TCP) { if (__predict_false(th == NULL)) { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL)) return (ENOMEM); th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen); } pi->ipi_tcp_hflags = th->th_flags; pi->ipi_tcp_hlen = th->th_off << 2; pi->ipi_tcp_seq = th->th_seq; } if (__predict_false(ip->ip_p != IPPROTO_TCP)) return (ENXIO); th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; if (sctx->isc_flags & IFLIB_TSO_INIT_IP) { ip->ip_sum = 0; ip->ip_len = htons(pi->ipi_ip_hlen + pi->ipi_tcp_hlen + pi->ipi_tso_segsz); } } break; } #endif #ifdef INET6 case ETHERTYPE_IPV6: { struct ip6_hdr *ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen); struct tcphdr *th; pi->ipi_ip_hlen = sizeof(struct ip6_hdr); if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) { if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) == NULL)) return (ENOMEM); } th = (struct tcphdr *)((caddr_t)ip6 + pi->ipi_ip_hlen); /* XXX-BZ this will go badly in case of ext hdrs. */ pi->ipi_ipproto = ip6->ip6_nxt; pi->ipi_flags |= IPI_TX_IPV6; if (IS_TSO6(pi)) { if (pi->ipi_ipproto == IPPROTO_TCP) { if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) { if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL)) return (ENOMEM); } pi->ipi_tcp_hflags = th->th_flags; pi->ipi_tcp_hlen = th->th_off << 2; } if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP)) return (ENXIO); /* * The corresponding flag is set by the stack in the IPv4 * TSO case, but not in IPv6 (at least in FreeBSD 10.2). * So, set it here because the rest of the flow requires it. */ pi->ipi_csum_flags |= CSUM_TCP_IPV6; th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; } break; } #endif default: pi->ipi_csum_flags &= ~CSUM_OFFLOAD; pi->ipi_ip_hlen = 0; break; } *mp = m; return (0); } static __noinline struct mbuf * collapse_pkthdr(struct mbuf *m0) { struct mbuf *m, *m_next, *tmp; m = m0; m_next = m->m_next; while (m_next != NULL && m_next->m_len == 0) { m = m_next; m->m_next = NULL; m_free(m); m_next = m_next->m_next; } m = m0; m->m_next = m_next; if ((m_next->m_flags & M_EXT) == 0) { m = m_defrag(m, M_NOWAIT); } else { tmp = m_next->m_next; memcpy(m_next, m, MPKTHSIZE); m = m_next; m->m_next = tmp; } return (m); } /* * If dodgy hardware rejects the scatter gather chain we've handed it * we'll need to remove the mbuf chain from ifsg_m[] before we can add the * m_defrag'd mbufs */ static __noinline struct mbuf * iflib_remove_mbuf(iflib_txq_t txq) { int ntxd, i, pidx; struct mbuf *m, *mh, **ifsd_m; pidx = txq->ift_pidx; ifsd_m = txq->ift_sds.ifsd_m; ntxd = txq->ift_size; mh = m = ifsd_m[pidx]; ifsd_m[pidx] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif i = 1; while (m) { ifsd_m[(pidx + i) & (ntxd -1)] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif m = m->m_next; i++; } return (mh); } static int iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map, struct mbuf **m0, bus_dma_segment_t *segs, int *nsegs, int max_segs, int flags) { if_ctx_t ctx; if_shared_ctx_t sctx; if_softc_ctx_t scctx; int i, next, pidx, err, ntxd, count; struct mbuf *m, *tmp, **ifsd_m; m = *m0; /* * Please don't ever do this */ if (__predict_false(m->m_len == 0)) *m0 = m = collapse_pkthdr(m); ctx = txq->ift_ctx; sctx = ctx->ifc_sctx; scctx = &ctx->ifc_softc_ctx; ifsd_m = txq->ift_sds.ifsd_m; ntxd = txq->ift_size; pidx = txq->ift_pidx; if (map != NULL) { uint8_t *ifsd_flags = txq->ift_sds.ifsd_flags; err = bus_dmamap_load_mbuf_sg(tag, map, *m0, segs, nsegs, BUS_DMA_NOWAIT); if (err) return (err); ifsd_flags[pidx] |= TX_SW_DESC_MAPPED; count = 0; m = *m0; do { if (__predict_false(m->m_len <= 0)) { tmp = m; m = m->m_next; tmp->m_next = NULL; m_free(tmp); continue; } m = m->m_next; count++; } while (m != NULL); if (count > *nsegs) { ifsd_m[pidx] = *m0; ifsd_m[pidx]->m_flags |= M_TOOBIG; return (0); } m = *m0; count = 0; do { next = (pidx + count) & (ntxd-1); MPASS(ifsd_m[next] == NULL); ifsd_m[next] = m; count++; tmp = m; m = m->m_next; } while (m != NULL); } else { int buflen, sgsize, maxsegsz, max_sgsize; vm_offset_t vaddr; vm_paddr_t curaddr; count = i = 0; m = *m0; if (m->m_pkthdr.csum_flags & CSUM_TSO) maxsegsz = scctx->isc_tx_tso_segsize_max; else maxsegsz = sctx->isc_tx_maxsegsize; do { if (__predict_false(m->m_len <= 0)) { tmp = m; m = m->m_next; tmp->m_next = NULL; m_free(tmp); continue; } buflen = m->m_len; vaddr = (vm_offset_t)m->m_data; /* * see if we can't be smarter about physically * contiguous mappings */ next = (pidx + count) & (ntxd-1); MPASS(ifsd_m[next] == NULL); #if MEMORY_LOGGING txq->ift_enqueued++; #endif ifsd_m[next] = m; while (buflen > 0) { if (i >= max_segs) goto err; max_sgsize = MIN(buflen, maxsegsz); curaddr = pmap_kextract(vaddr); sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); sgsize = MIN(sgsize, max_sgsize); segs[i].ds_addr = curaddr; segs[i].ds_len = sgsize; vaddr += sgsize; buflen -= sgsize; i++; } count++; tmp = m; m = m->m_next; } while (m != NULL); *nsegs = i; } return (0); err: *m0 = iflib_remove_mbuf(txq); return (EFBIG); } static inline caddr_t calc_next_txd(iflib_txq_t txq, int cidx, uint8_t qid) { qidx_t size; int ntxd; caddr_t start, end, cur, next; ntxd = txq->ift_size; size = txq->ift_txd_size[qid]; start = txq->ift_ifdi[qid].idi_vaddr; if (__predict_false(size == 0)) return (start); cur = start + size*cidx; end = start + size*ntxd; next = CACHE_PTR_NEXT(cur); return (next < end ? next : start); } /* * Pad an mbuf to ensure a minimum ethernet frame size. * min_frame_size is the frame size (less CRC) to pad the mbuf to */ static __noinline int iflib_ether_pad(device_t dev, struct mbuf **m_head, uint16_t min_frame_size) { /* * 18 is enough bytes to pad an ARP packet to 46 bytes, and * and ARP message is the smallest common payload I can think of */ static char pad[18]; /* just zeros */ int n; struct mbuf *new_head; if (!M_WRITABLE(*m_head)) { new_head = m_dup(*m_head, M_NOWAIT); if (new_head == NULL) { m_freem(*m_head); device_printf(dev, "cannot pad short frame, m_dup() failed"); DBG_COUNTER_INC(encap_pad_mbuf_fail); return ENOMEM; } m_freem(*m_head); *m_head = new_head; } for (n = min_frame_size - (*m_head)->m_pkthdr.len; n > 0; n -= sizeof(pad)) if (!m_append(*m_head, min(n, sizeof(pad)), pad)) break; if (n > 0) { m_freem(*m_head); device_printf(dev, "cannot pad short frame\n"); DBG_COUNTER_INC(encap_pad_mbuf_fail); return (ENOBUFS); } return 0; } static int iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) { if_ctx_t ctx; if_shared_ctx_t sctx; if_softc_ctx_t scctx; bus_dma_segment_t *segs; struct mbuf *m_head; void *next_txd; bus_dmamap_t map; struct if_pkt_info pi; int remap = 0; int err, nsegs, ndesc, max_segs, pidx, cidx, next, ntxd; bus_dma_tag_t desc_tag; segs = txq->ift_segs; ctx = txq->ift_ctx; sctx = ctx->ifc_sctx; scctx = &ctx->ifc_softc_ctx; segs = txq->ift_segs; ntxd = txq->ift_size; m_head = *m_headp; map = NULL; /* * If we're doing TSO the next descriptor to clean may be quite far ahead */ cidx = txq->ift_cidx; pidx = txq->ift_pidx; if (ctx->ifc_flags & IFC_PREFETCH) { next = (cidx + CACHE_PTR_INCREMENT) & (ntxd-1); if (!(ctx->ifc_flags & IFLIB_HAS_TXCQ)) { next_txd = calc_next_txd(txq, cidx, 0); prefetch(next_txd); } /* prefetch the next cache line of mbuf pointers and flags */ prefetch(&txq->ift_sds.ifsd_m[next]); if (txq->ift_sds.ifsd_map != NULL) { prefetch(&txq->ift_sds.ifsd_map[next]); next = (cidx + CACHE_LINE_SIZE) & (ntxd-1); prefetch(&txq->ift_sds.ifsd_flags[next]); } } else if (txq->ift_sds.ifsd_map != NULL) map = txq->ift_sds.ifsd_map[pidx]; if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { desc_tag = txq->ift_tso_desc_tag; max_segs = scctx->isc_tx_tso_segments_max; } else { desc_tag = txq->ift_desc_tag; max_segs = scctx->isc_tx_nsegments; } if ((sctx->isc_flags & IFLIB_NEED_ETHER_PAD) && __predict_false(m_head->m_pkthdr.len < scctx->isc_min_frame_size)) { err = iflib_ether_pad(ctx->ifc_dev, m_headp, scctx->isc_min_frame_size); if (err) return err; } m_head = *m_headp; pkt_info_zero(&pi); pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST)); pi.ipi_pidx = pidx; pi.ipi_qsidx = txq->ift_id; pi.ipi_len = m_head->m_pkthdr.len; pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags; pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0; /* deliberate bitwise OR to make one condition */ if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) { if (__predict_false((err = iflib_parse_header(txq, &pi, m_headp)) != 0)) return (err); m_head = *m_headp; } retry: err = iflib_busdma_load_mbuf_sg(txq, desc_tag, map, m_headp, segs, &nsegs, max_segs, BUS_DMA_NOWAIT); defrag: if (__predict_false(err)) { switch (err) { case EFBIG: /* try collapse once and defrag once */ if (remap == 0) m_head = m_collapse(*m_headp, M_NOWAIT, max_segs); if (remap == 1) m_head = m_defrag(*m_headp, M_NOWAIT); remap++; if (__predict_false(m_head == NULL)) goto defrag_failed; txq->ift_mbuf_defrag++; *m_headp = m_head; goto retry; break; case ENOMEM: txq->ift_no_tx_dma_setup++; break; default: txq->ift_no_tx_dma_setup++; m_freem(*m_headp); DBG_COUNTER_INC(tx_frees); *m_headp = NULL; break; } txq->ift_map_failed++; DBG_COUNTER_INC(encap_load_mbuf_fail); return (err); } /* * XXX assumes a 1 to 1 relationship between segments and * descriptors - this does not hold true on all drivers, e.g. * cxgb */ if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) { txq->ift_no_desc_avail++; if (map != NULL) bus_dmamap_unload(desc_tag, map); DBG_COUNTER_INC(encap_txq_avail_fail); if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) GROUPTASK_ENQUEUE(&txq->ift_task); return (ENOBUFS); } /* * On Intel cards we can greatly reduce the number of TX interrupts * we see by only setting report status on every Nth descriptor. * However, this also means that the driver will need to keep track * of the descriptors that RS was set on to check them for the DD bit. */ txq->ift_rs_pending += nsegs + 1; if (txq->ift_rs_pending > TXQ_MAX_RS_DEFERRED(txq) || iflib_no_tx_batch || (TXQ_AVAIL(txq) - nsegs - 1) <= MAX_TX_DESC(ctx)) { pi.ipi_flags |= IPI_TX_INTR; txq->ift_rs_pending = 0; } pi.ipi_segs = segs; pi.ipi_nsegs = nsegs; MPASS(pidx >= 0 && pidx < txq->ift_size); #ifdef PKT_DEBUG print_pkt(&pi); #endif if (map != NULL) bus_dmamap_sync(desc_tag, map, BUS_DMASYNC_PREWRITE); if ((err = ctx->isc_txd_encap(ctx->ifc_softc, &pi)) == 0) { if (map != NULL) bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); DBG_COUNTER_INC(tx_encap); MPASS(pi.ipi_new_pidx < txq->ift_size); ndesc = pi.ipi_new_pidx - pi.ipi_pidx; if (pi.ipi_new_pidx < pi.ipi_pidx) { ndesc += txq->ift_size; txq->ift_gen = 1; } /* * drivers can need as many as * two sentinels */ MPASS(ndesc <= pi.ipi_nsegs + 2); MPASS(pi.ipi_new_pidx != pidx); MPASS(ndesc > 0); txq->ift_in_use += ndesc; /* * We update the last software descriptor again here because there may * be a sentinel and/or there may be more mbufs than segments */ txq->ift_pidx = pi.ipi_new_pidx; txq->ift_npending += pi.ipi_ndescs; } else if (__predict_false(err == EFBIG && remap < 2)) { *m_headp = m_head = iflib_remove_mbuf(txq); remap = 1; txq->ift_txd_encap_efbig++; goto defrag; } else DBG_COUNTER_INC(encap_txd_encap_fail); return (err); defrag_failed: txq->ift_mbuf_defrag_failed++; txq->ift_map_failed++; m_freem(*m_headp); DBG_COUNTER_INC(tx_frees); *m_headp = NULL; return (ENOMEM); } static void iflib_tx_desc_free(iflib_txq_t txq, int n) { int hasmap; uint32_t qsize, cidx, mask, gen; struct mbuf *m, **ifsd_m; uint8_t *ifsd_flags; bus_dmamap_t *ifsd_map; bool do_prefetch; cidx = txq->ift_cidx; gen = txq->ift_gen; qsize = txq->ift_size; mask = qsize-1; hasmap = txq->ift_sds.ifsd_map != NULL; ifsd_flags = txq->ift_sds.ifsd_flags; ifsd_m = txq->ift_sds.ifsd_m; ifsd_map = txq->ift_sds.ifsd_map; do_prefetch = (txq->ift_ctx->ifc_flags & IFC_PREFETCH); while (n--) { if (do_prefetch) { prefetch(ifsd_m[(cidx + 3) & mask]); prefetch(ifsd_m[(cidx + 4) & mask]); } if (ifsd_m[cidx] != NULL) { prefetch(&ifsd_m[(cidx + CACHE_PTR_INCREMENT) & mask]); prefetch(&ifsd_flags[(cidx + CACHE_PTR_INCREMENT) & mask]); if (hasmap && (ifsd_flags[cidx] & TX_SW_DESC_MAPPED)) { /* * does it matter if it's not the TSO tag? If so we'll * have to add the type to flags */ bus_dmamap_unload(txq->ift_desc_tag, ifsd_map[cidx]); ifsd_flags[cidx] &= ~TX_SW_DESC_MAPPED; } if ((m = ifsd_m[cidx]) != NULL) { /* XXX we don't support any drivers that batch packets yet */ MPASS(m->m_nextpkt == NULL); /* if the number of clusters exceeds the number of segments * there won't be space on the ring to save a pointer to each * cluster so we simply free the list here */ if (m->m_flags & M_TOOBIG) { m_freem(m); } else { m_free(m); } ifsd_m[cidx] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif DBG_COUNTER_INC(tx_frees); } } if (__predict_false(++cidx == qsize)) { cidx = 0; gen = 0; } } txq->ift_cidx = cidx; txq->ift_gen = gen; } static __inline int iflib_completed_tx_reclaim(iflib_txq_t txq, int thresh) { int reclaim; if_ctx_t ctx = txq->ift_ctx; KASSERT(thresh >= 0, ("invalid threshold to reclaim")); MPASS(thresh /*+ MAX_TX_DESC(txq->ift_ctx) */ < txq->ift_size); /* * Need a rate-limiting check so that this isn't called every time */ iflib_tx_credits_update(ctx, txq); reclaim = DESC_RECLAIMABLE(txq); if (reclaim <= thresh /* + MAX_TX_DESC(txq->ift_ctx) */) { #ifdef INVARIANTS if (iflib_verbose_debug) { printf("%s processed=%ju cleaned=%ju tx_nsegments=%d reclaim=%d thresh=%d\n", __FUNCTION__, txq->ift_processed, txq->ift_cleaned, txq->ift_ctx->ifc_softc_ctx.isc_tx_nsegments, reclaim, thresh); } #endif return (0); } iflib_tx_desc_free(txq, reclaim); txq->ift_cleaned += reclaim; txq->ift_in_use -= reclaim; return (reclaim); } static struct mbuf ** _ring_peek_one(struct ifmp_ring *r, int cidx, int offset, int remaining) { int next, size; struct mbuf **items; size = r->size; next = (cidx + CACHE_PTR_INCREMENT) & (size-1); items = __DEVOLATILE(struct mbuf **, &r->items[0]); prefetch(items[(cidx + offset) & (size-1)]); if (remaining > 1) { prefetch2cachelines(&items[next]); prefetch2cachelines(items[(cidx + offset + 1) & (size-1)]); prefetch2cachelines(items[(cidx + offset + 2) & (size-1)]); prefetch2cachelines(items[(cidx + offset + 3) & (size-1)]); } return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (size-1)])); } static void iflib_txq_check_drain(iflib_txq_t txq, int budget) { ifmp_ring_check_drainage(txq->ift_br, budget); } static uint32_t iflib_txq_can_drain(struct ifmp_ring *r) { iflib_txq_t txq = r->cookie; if_ctx_t ctx = txq->ift_ctx; return ((TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2) || ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false)); } static uint32_t iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) { iflib_txq_t txq = r->cookie; if_ctx_t ctx = txq->ift_ctx; struct ifnet *ifp = ctx->ifc_ifp; struct mbuf **mp, *m; int i, count, consumed, pkt_sent, bytes_sent, mcast_sent, avail; int reclaimed, err, in_use_prev, desc_used; bool do_prefetch, ring, rang; if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING) || !LINK_ACTIVE(ctx))) { DBG_COUNTER_INC(txq_drain_notready); return (0); } reclaimed = iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); rang = iflib_txd_db_check(ctx, txq, reclaimed, txq->ift_in_use); avail = IDXDIFF(pidx, cidx, r->size); if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) { DBG_COUNTER_INC(txq_drain_flushing); for (i = 0; i < avail; i++) { m_free(r->items[(cidx + i) & (r->size-1)]); r->items[(cidx + i) & (r->size-1)] = NULL; } return (avail); } if (__predict_false(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) { txq->ift_qstatus = IFLIB_QUEUE_IDLE; CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); DBG_COUNTER_INC(txq_drain_oactive); return (0); } if (reclaimed) txq->ift_qstatus = IFLIB_QUEUE_IDLE; consumed = mcast_sent = bytes_sent = pkt_sent = 0; count = MIN(avail, TX_BATCH_SIZE); #ifdef INVARIANTS if (iflib_verbose_debug) printf("%s avail=%d ifc_flags=%x txq_avail=%d ", __FUNCTION__, avail, ctx->ifc_flags, TXQ_AVAIL(txq)); #endif do_prefetch = (ctx->ifc_flags & IFC_PREFETCH); avail = TXQ_AVAIL(txq); for (desc_used = i = 0; i < count && avail > MAX_TX_DESC(ctx) + 2; i++) { int pidx_prev, rem = do_prefetch ? count - i : 0; mp = _ring_peek_one(r, cidx, i, rem); MPASS(mp != NULL && *mp != NULL); if (__predict_false(*mp == (struct mbuf *)txq)) { consumed++; reclaimed++; continue; } in_use_prev = txq->ift_in_use; pidx_prev = txq->ift_pidx; err = iflib_encap(txq, mp); if (__predict_false(err)) { DBG_COUNTER_INC(txq_drain_encapfail); /* no room - bail out */ if (err == ENOBUFS) break; consumed++; DBG_COUNTER_INC(txq_drain_encapfail); /* we can't send this packet - skip it */ continue; } consumed++; pkt_sent++; m = *mp; DBG_COUNTER_INC(tx_sent); bytes_sent += m->m_pkthdr.len; mcast_sent += !!(m->m_flags & M_MCAST); avail = TXQ_AVAIL(txq); txq->ift_db_pending += (txq->ift_in_use - in_use_prev); desc_used += (txq->ift_in_use - in_use_prev); ETHER_BPF_MTAP(ifp, m); if (__predict_false(!(ifp->if_drv_flags & IFF_DRV_RUNNING))) break; rang = iflib_txd_db_check(ctx, txq, false, in_use_prev); } /* deliberate use of bitwise or to avoid gratuitous short-circuit */ ring = rang ? false : (iflib_min_tx_latency | err) || (TXQ_AVAIL(txq) < MAX_TX_DESC(ctx)); iflib_txd_db_check(ctx, txq, ring, txq->ift_in_use); if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent); if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent); if (mcast_sent) if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent); #ifdef INVARIANTS if (iflib_verbose_debug) printf("consumed=%d\n", consumed); #endif return (consumed); } static uint32_t iflib_txq_drain_always(struct ifmp_ring *r) { return (1); } static uint32_t iflib_txq_drain_free(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) { int i, avail; struct mbuf **mp; iflib_txq_t txq; txq = r->cookie; txq->ift_qstatus = IFLIB_QUEUE_IDLE; CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); avail = IDXDIFF(pidx, cidx, r->size); for (i = 0; i < avail; i++) { mp = _ring_peek_one(r, cidx, i, avail - i); if (__predict_false(*mp == (struct mbuf *)txq)) continue; m_freem(*mp); } MPASS(ifmp_ring_is_stalled(r) == 0); return (avail); } static void iflib_ifmp_purge(iflib_txq_t txq) { struct ifmp_ring *r; r = txq->ift_br; r->drain = iflib_txq_drain_free; r->can_drain = iflib_txq_drain_always; ifmp_ring_check_drainage(r, r->size); r->drain = iflib_txq_drain; r->can_drain = iflib_txq_can_drain; } static void _task_fn_tx(void *context) { iflib_txq_t txq = context; if_ctx_t ctx = txq->ift_ctx; struct ifnet *ifp = ctx->ifc_ifp; int rc; #ifdef IFLIB_DIAGNOSTICS txq->ift_cpu_exec_count[curcpu]++; #endif if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; if (if_getcapenable(ifp) & IFCAP_NETMAP) { if (ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, false)) netmap_tx_irq(ifp, txq->ift_id); IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); return; } if (txq->ift_db_pending) ifmp_ring_enqueue(txq->ift_br, (void **)&txq, 1, TX_BATCH_SIZE); ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else { rc = IFDI_TX_QUEUE_INTR_ENABLE(ctx, txq->ift_id); KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver")); } } static void _task_fn_rx(void *context) { iflib_rxq_t rxq = context; if_ctx_t ctx = rxq->ifr_ctx; bool more; int rc; uint16_t budget; #ifdef IFLIB_DIAGNOSTICS rxq->ifr_cpu_exec_count[curcpu]++; #endif DBG_COUNTER_INC(task_fn_rxs); if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) return; more = true; #ifdef DEV_NETMAP if (if_getcapenable(ctx->ifc_ifp) & IFCAP_NETMAP) { u_int work = 0; if (netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &work)) { more = false; } } #endif budget = ctx->ifc_sysctl_rx_budget; if (budget == 0) budget = 16; /* XXX */ if (more == false || (more = iflib_rxeof(rxq, budget)) == false) { if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else { DBG_COUNTER_INC(rx_intr_enables); rc = IFDI_RX_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver")); } } if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) return; if (more) GROUPTASK_ENQUEUE(&rxq->ifr_task); } static void _task_fn_admin(void *context) { if_ctx_t ctx = context; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; iflib_txq_t txq; int i; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) { if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) { return; } } CTX_LOCK(ctx); for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) { CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); } IFDI_UPDATE_ADMIN_STATUS(ctx); for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); IFDI_LINK_INTR_ENABLE(ctx); if (ctx->ifc_flags & IFC_DO_RESET) { ctx->ifc_flags &= ~IFC_DO_RESET; iflib_if_init_locked(ctx); } CTX_UNLOCK(ctx); if (LINK_ACTIVE(ctx) == 0) return; for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); } static void _task_fn_iov(void *context) { if_ctx_t ctx = context; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; CTX_LOCK(ctx); IFDI_VFLR_HANDLE(ctx); CTX_UNLOCK(ctx); } static int iflib_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { int err; if_int_delay_info_t info; if_ctx_t ctx; info = (if_int_delay_info_t)arg1; ctx = info->iidi_ctx; info->iidi_req = req; info->iidi_oidp = oidp; CTX_LOCK(ctx); err = IFDI_SYSCTL_INT_DELAY(ctx, info); CTX_UNLOCK(ctx); return (err); } /********************************************************************* * * IFNET FUNCTIONS * **********************************************************************/ static void iflib_if_init_locked(if_ctx_t ctx) { iflib_stop(ctx); iflib_init_locked(ctx); } static void iflib_if_init(void *arg) { if_ctx_t ctx = arg; CTX_LOCK(ctx); iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); } static int iflib_if_transmit(if_t ifp, struct mbuf *m) { if_ctx_t ctx = if_getsoftc(ifp); iflib_txq_t txq; int err, qidx; if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) { DBG_COUNTER_INC(tx_frees); m_freem(m); return (ENOBUFS); } MPASS(m->m_nextpkt == NULL); qidx = 0; if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m)) qidx = QIDX(ctx, m); /* * XXX calculate buf_ring based on flowid (divvy up bits?) */ txq = &ctx->ifc_txqs[qidx]; #ifdef DRIVER_BACKPRESSURE if (txq->ift_closed) { while (m != NULL) { next = m->m_nextpkt; m->m_nextpkt = NULL; m_freem(m); m = next; } return (ENOBUFS); } #endif #ifdef notyet qidx = count = 0; mp = marr; next = m; do { count++; next = next->m_nextpkt; } while (next != NULL); if (count > nitems(marr)) if ((mp = malloc(count*sizeof(struct mbuf *), M_IFLIB, M_NOWAIT)) == NULL) { /* XXX check nextpkt */ m_freem(m); /* XXX simplify for now */ DBG_COUNTER_INC(tx_frees); return (ENOBUFS); } for (next = m, i = 0; next != NULL; i++) { mp[i] = next; next = next->m_nextpkt; mp[i]->m_nextpkt = NULL; } #endif DBG_COUNTER_INC(tx_seen); err = ifmp_ring_enqueue(txq->ift_br, (void **)&m, 1, TX_BATCH_SIZE); GROUPTASK_ENQUEUE(&txq->ift_task); if (err) { /* support forthcoming later */ #ifdef DRIVER_BACKPRESSURE txq->ift_closed = TRUE; #endif ifmp_ring_check_drainage(txq->ift_br, TX_BATCH_SIZE); m_freem(m); } return (err); } static void iflib_if_qflush(if_t ifp) { if_ctx_t ctx = if_getsoftc(ifp); iflib_txq_t txq = ctx->ifc_txqs; int i; CTX_LOCK(ctx); ctx->ifc_flags |= IFC_QFLUSH; CTX_UNLOCK(ctx); for (i = 0; i < NTXQSETS(ctx); i++, txq++) while (!(ifmp_ring_is_idle(txq->ift_br) || ifmp_ring_is_stalled(txq->ift_br))) iflib_txq_check_drain(txq, 0); CTX_LOCK(ctx); ctx->ifc_flags &= ~IFC_QFLUSH; CTX_UNLOCK(ctx); if_qflush(ifp); } #define IFCAP_FLAGS (IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \ IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTAGGING | IFCAP_HWSTATS | \ IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO) static int iflib_if_ioctl(if_t ifp, u_long command, caddr_t data) { if_ctx_t ctx = if_getsoftc(ifp); struct ifreq *ifr = (struct ifreq *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; #endif bool avoid_reset = FALSE; int err = 0, reinit = 0, bits; switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { if_setflagbits(ifp, IFF_UP,0); if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING)) reinit = 1; #ifdef INET if (!(if_getflags(ifp) & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else err = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: CTX_LOCK(ctx); if (ifr->ifr_mtu == if_getmtu(ifp)) { CTX_UNLOCK(ctx); break; } bits = if_getdrvflags(ifp); /* stop the driver and free any clusters before proceeding */ iflib_stop(ctx); if ((err = IFDI_MTU_SET(ctx, ifr->ifr_mtu)) == 0) { if (ifr->ifr_mtu > ctx->ifc_max_fl_buf_size) ctx->ifc_flags |= IFC_MULTISEG; else ctx->ifc_flags &= ~IFC_MULTISEG; err = if_setmtu(ifp, ifr->ifr_mtu); } iflib_init_locked(ctx); if_setdrvflags(ifp, bits); CTX_UNLOCK(ctx); break; case SIOCSIFFLAGS: CTX_LOCK(ctx); if (if_getflags(ifp) & IFF_UP) { if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { if ((if_getflags(ifp) ^ ctx->ifc_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { err = IFDI_PROMISC_SET(ctx, if_getflags(ifp)); } } else reinit = 1; } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { iflib_stop(ctx); } ctx->ifc_if_flags = if_getflags(ifp); CTX_UNLOCK(ctx); break; case SIOCADDMULTI: case SIOCDELMULTI: if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { CTX_LOCK(ctx); IFDI_INTR_DISABLE(ctx); IFDI_MULTI_SET(ctx); IFDI_INTR_ENABLE(ctx); CTX_UNLOCK(ctx); } break; case SIOCSIFMEDIA: CTX_LOCK(ctx); IFDI_MEDIA_SET(ctx); CTX_UNLOCK(ctx); /* falls thru */ case SIOCGIFMEDIA: case SIOCGIFXMEDIA: err = ifmedia_ioctl(ifp, ifr, &ctx->ifc_media, command); break; case SIOCGI2C: { struct ifi2creq i2c; - err = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); + err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (err != 0) break; if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { err = EINVAL; break; } if (i2c.len > sizeof(i2c.data)) { err = EINVAL; break; } if ((err = IFDI_I2C_REQ(ctx, &i2c)) == 0) - err = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); + err = copyout(&i2c, ifr_data_get_ptr(ifr), + sizeof(i2c)); break; } case SIOCSIFCAP: { int mask, setmask; mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); setmask = 0; #ifdef TCP_OFFLOAD setmask |= mask & (IFCAP_TOE4|IFCAP_TOE6); #endif setmask |= (mask & IFCAP_FLAGS); if (setmask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) setmask |= (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6); if ((mask & IFCAP_WOL) && (if_getcapabilities(ifp) & IFCAP_WOL) != 0) setmask |= (mask & (IFCAP_WOL_MCAST|IFCAP_WOL_MAGIC)); if_vlancap(ifp); /* * want to ensure that traffic has stopped before we change any of the flags */ if (setmask) { CTX_LOCK(ctx); bits = if_getdrvflags(ifp); if (bits & IFF_DRV_RUNNING) iflib_stop(ctx); if_togglecapenable(ifp, setmask); if (bits & IFF_DRV_RUNNING) iflib_init_locked(ctx); if_setdrvflags(ifp, bits); CTX_UNLOCK(ctx); } break; } case SIOCGPRIVATE_0: case SIOCSDRVSPEC: case SIOCGDRVSPEC: CTX_LOCK(ctx); err = IFDI_PRIV_IOCTL(ctx, command, data); CTX_UNLOCK(ctx); break; default: err = ether_ioctl(ifp, command, data); break; } if (reinit) iflib_if_init(ctx); return (err); } static uint64_t iflib_if_get_counter(if_t ifp, ift_counter cnt) { if_ctx_t ctx = if_getsoftc(ifp); return (IFDI_GET_COUNTER(ctx, cnt)); } /********************************************************************* * * OTHER FUNCTIONS EXPORTED TO THE STACK * **********************************************************************/ static void iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag) { if_ctx_t ctx = if_getsoftc(ifp); if ((void *)ctx != arg) return; if ((vtag == 0) || (vtag > 4095)) return; CTX_LOCK(ctx); IFDI_VLAN_REGISTER(ctx, vtag); /* Re-init to load the changes */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); } static void iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag) { if_ctx_t ctx = if_getsoftc(ifp); if ((void *)ctx != arg) return; if ((vtag == 0) || (vtag > 4095)) return; CTX_LOCK(ctx); IFDI_VLAN_UNREGISTER(ctx, vtag); /* Re-init to load the changes */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); } static void iflib_led_func(void *arg, int onoff) { if_ctx_t ctx = arg; CTX_LOCK(ctx); IFDI_LED_FUNC(ctx, onoff); CTX_UNLOCK(ctx); } /********************************************************************* * * BUS FUNCTION DEFINITIONS * **********************************************************************/ int iflib_device_probe(device_t dev) { pci_vendor_info_t *ent; uint16_t pci_vendor_id, pci_device_id; uint16_t pci_subvendor_id, pci_subdevice_id; uint16_t pci_rev_id; if_shared_ctx_t sctx; if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) return (ENOTSUP); pci_vendor_id = pci_get_vendor(dev); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); pci_rev_id = pci_get_revid(dev); if (sctx->isc_parse_devinfo != NULL) sctx->isc_parse_devinfo(&pci_device_id, &pci_subvendor_id, &pci_subdevice_id, &pci_rev_id); ent = sctx->isc_vendor_info; while (ent->pvi_vendor_id != 0) { if (pci_vendor_id != ent->pvi_vendor_id) { ent++; continue; } if ((pci_device_id == ent->pvi_device_id) && ((pci_subvendor_id == ent->pvi_subvendor_id) || (ent->pvi_subvendor_id == 0)) && ((pci_subdevice_id == ent->pvi_subdevice_id) || (ent->pvi_subdevice_id == 0)) && ((pci_rev_id == ent->pvi_rev_id) || (ent->pvi_rev_id == 0))) { device_set_desc_copy(dev, ent->pvi_name); /* this needs to be changed to zero if the bus probing code * ever stops re-probing on best match because the sctx * may have its values over written by register calls * in subsequent probes */ return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } int iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp) { int err, rid, msix, msix_bar; if_ctx_t ctx; if_t ifp; if_softc_ctx_t scctx; int i; uint16_t main_txq; uint16_t main_rxq; ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO); if (sc == NULL) { sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); device_set_softc(dev, ctx); ctx->ifc_flags |= IFC_SC_ALLOCATED; } ctx->ifc_sctx = sctx; ctx->ifc_dev = dev; ctx->ifc_softc = sc; if ((err = iflib_register(ctx)) != 0) { device_printf(dev, "iflib_register failed %d\n", err); return (err); } iflib_add_device_sysctl_pre(ctx); scctx = &ctx->ifc_softc_ctx; ifp = ctx->ifc_ifp; /* * XXX sanity check that ntxd & nrxd are a power of 2 */ if (ctx->ifc_sysctl_ntxqs != 0) scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs; if (ctx->ifc_sysctl_nrxqs != 0) scctx->isc_nrxqsets = ctx->ifc_sysctl_nrxqs; for (i = 0; i < sctx->isc_ntxqs; i++) { if (ctx->ifc_sysctl_ntxds[i] != 0) scctx->isc_ntxd[i] = ctx->ifc_sysctl_ntxds[i]; else scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i]; } for (i = 0; i < sctx->isc_nrxqs; i++) { if (ctx->ifc_sysctl_nrxds[i] != 0) scctx->isc_nrxd[i] = ctx->ifc_sysctl_nrxds[i]; else scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i]; } for (i = 0; i < sctx->isc_nrxqs; i++) { if (scctx->isc_nrxd[i] < sctx->isc_nrxd_min[i]) { device_printf(dev, "nrxd%d: %d less than nrxd_min %d - resetting to min\n", i, scctx->isc_nrxd[i], sctx->isc_nrxd_min[i]); scctx->isc_nrxd[i] = sctx->isc_nrxd_min[i]; } if (scctx->isc_nrxd[i] > sctx->isc_nrxd_max[i]) { device_printf(dev, "nrxd%d: %d greater than nrxd_max %d - resetting to max\n", i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]); scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i]; } } for (i = 0; i < sctx->isc_ntxqs; i++) { if (scctx->isc_ntxd[i] < sctx->isc_ntxd_min[i]) { device_printf(dev, "ntxd%d: %d less than ntxd_min %d - resetting to min\n", i, scctx->isc_ntxd[i], sctx->isc_ntxd_min[i]); scctx->isc_ntxd[i] = sctx->isc_ntxd_min[i]; } if (scctx->isc_ntxd[i] > sctx->isc_ntxd_max[i]) { device_printf(dev, "ntxd%d: %d greater than ntxd_max %d - resetting to max\n", i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]); scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i]; } } if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); return (err); } _iflib_pre_assert(scctx); ctx->ifc_txrx = *scctx->isc_txrx; #ifdef INVARIANTS MPASS(scctx->isc_capenable); if (scctx->isc_capenable & IFCAP_TXCSUM) MPASS(scctx->isc_tx_csum_flags); #endif if_setcapabilities(ifp, scctx->isc_capenable | IFCAP_HWSTATS); if_setcapenable(ifp, scctx->isc_capenable | IFCAP_HWSTATS); if (scctx->isc_ntxqsets == 0 || (scctx->isc_ntxqsets_max && scctx->isc_ntxqsets_max < scctx->isc_ntxqsets)) scctx->isc_ntxqsets = scctx->isc_ntxqsets_max; if (scctx->isc_nrxqsets == 0 || (scctx->isc_nrxqsets_max && scctx->isc_nrxqsets_max < scctx->isc_nrxqsets)) scctx->isc_nrxqsets = scctx->isc_nrxqsets_max; #ifdef ACPI_DMAR if (dmar_get_dma_tag(device_get_parent(dev), dev) != NULL) ctx->ifc_flags |= IFC_DMAR; #elif !(defined(__i386__) || defined(__amd64__)) /* set unconditionally for !x86 */ ctx->ifc_flags |= IFC_DMAR; #endif msix_bar = scctx->isc_msix_bar; main_txq = (sctx->isc_flags & IFLIB_HAS_TXCQ) ? 1 : 0; main_rxq = (sctx->isc_flags & IFLIB_HAS_RXCQ) ? 1 : 0; /* XXX change for per-queue sizes */ device_printf(dev, "using %d tx descriptors and %d rx descriptors\n", scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]); for (i = 0; i < sctx->isc_nrxqs; i++) { if (!powerof2(scctx->isc_nrxd[i])) { /* round down instead? */ device_printf(dev, "# rx descriptors must be a power of 2\n"); err = EINVAL; goto fail; } } for (i = 0; i < sctx->isc_ntxqs; i++) { if (!powerof2(scctx->isc_ntxd[i])) { device_printf(dev, "# tx descriptors must be a power of 2"); err = EINVAL; goto fail; } } if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_nsegments = max(1, scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION); if (scctx->isc_tx_tso_segments_max > scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_tso_segments_max = max(1, scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION); /* * Protect the stack against modern hardware */ if (scctx->isc_tx_tso_size_max > FREEBSD_TSO_SIZE_MAX) scctx->isc_tx_tso_size_max = FREEBSD_TSO_SIZE_MAX; /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */ ifp->if_hw_tsomaxsegcount = scctx->isc_tx_tso_segments_max; ifp->if_hw_tsomax = scctx->isc_tx_tso_size_max; ifp->if_hw_tsomaxsegsize = scctx->isc_tx_tso_segsize_max; if (scctx->isc_rss_table_size == 0) scctx->isc_rss_table_size = 64; scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); /* XXX format name */ taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin"); /* Set up cpu set. If it fails, use the set of all CPUs. */ if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) != 0) { device_printf(dev, "Unable to fetch CPU list\n"); CPU_COPY(&all_cpus, &ctx->ifc_cpus); } MPASS(CPU_COUNT(&ctx->ifc_cpus) > 0); /* ** Now setup MSI or MSI/X, should ** return us the number of supported ** vectors. (Will be 1 for MSI) */ if (sctx->isc_flags & IFLIB_SKIP_MSIX) { msix = scctx->isc_vectors; } else if (scctx->isc_msix_bar != 0) /* * The simple fact that isc_msix_bar is not 0 does not mean we * we have a good value there that is known to work. */ msix = iflib_msix_init(ctx); else { scctx->isc_vectors = 1; scctx->isc_ntxqsets = 1; scctx->isc_nrxqsets = 1; scctx->isc_intr = IFLIB_INTR_LEGACY; msix = 0; } /* Get memory for the station queues */ if ((err = iflib_queues_alloc(ctx))) { device_printf(dev, "Unable to allocate queue memory\n"); goto fail; } if ((err = iflib_qset_structures_setup(ctx))) { device_printf(dev, "qset structure setup failed %d\n", err); goto fail_queues; } /* * Group taskqueues aren't properly set up until SMP is started, * so we disable interrupts until we can handle them post * SI_SUB_SMP. * * XXX: disabling interrupts doesn't actually work, at least for * the non-MSI case. When they occur before SI_SUB_SMP completes, * we do null handling and depend on this not causing too large an * interrupt storm. */ IFDI_INTR_DISABLE(ctx); if (msix > 1 && (err = IFDI_MSIX_INTR_ASSIGN(ctx, msix)) != 0) { device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", err); goto fail_intr_free; } if (msix <= 1) { rid = 0; if (scctx->isc_intr == IFLIB_INTR_MSI) { MPASS(msix == 1); rid = 1; } if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx->ifc_softc, &rid, "irq0")) != 0) { device_printf(dev, "iflib_legacy_setup failed %d\n", err); goto fail_intr_free; } } ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac); if ((err = IFDI_ATTACH_POST(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); goto fail_detach; } if ((err = iflib_netmap_attach(ctx))) { device_printf(ctx->ifc_dev, "netmap attach failed: %d\n", err); goto fail_detach; } *ctxp = ctx; if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); ctx->ifc_flags |= IFC_INIT_DONE; return (0); fail_detach: ether_ifdetach(ctx->ifc_ifp); fail_intr_free: if (scctx->isc_intr == IFLIB_INTR_MSIX || scctx->isc_intr == IFLIB_INTR_MSI) pci_release_msi(ctx->ifc_dev); fail_queues: /* XXX free queues */ fail: IFDI_DETACH(ctx); return (err); } int iflib_device_attach(device_t dev) { if_ctx_t ctx; if_shared_ctx_t sctx; if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) return (ENOTSUP); pci_enable_busmaster(dev); return (iflib_device_register(dev, NULL, sctx, &ctx)); } int iflib_device_deregister(if_ctx_t ctx) { if_t ifp = ctx->ifc_ifp; iflib_txq_t txq; iflib_rxq_t rxq; device_t dev = ctx->ifc_dev; int i, j; struct taskqgroup *tqg; iflib_fl_t fl; /* Make sure VLANS are not using driver */ if (if_vlantrunkinuse(ifp)) { device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } CTX_LOCK(ctx); ctx->ifc_in_detach = 1; iflib_stop(ctx); CTX_UNLOCK(ctx); /* Unregister VLAN events */ if (ctx->ifc_vlan_attach_event != NULL) EVENTHANDLER_DEREGISTER(vlan_config, ctx->ifc_vlan_attach_event); if (ctx->ifc_vlan_detach_event != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, ctx->ifc_vlan_detach_event); iflib_netmap_detach(ifp); ether_ifdetach(ifp); /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ CTX_LOCK_DESTROY(ctx); if (ctx->ifc_led_dev != NULL) led_destroy(ctx->ifc_led_dev); /* XXX drain any dependent tasks */ tqg = qgroup_if_io_tqg; for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) { callout_drain(&txq->ift_timer); if (txq->ift_task.gt_uniq != NULL) taskqgroup_detach(tqg, &txq->ift_task); } for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { if (rxq->ifr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &rxq->ifr_task); for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) free(fl->ifl_rx_bitmap, M_IFLIB); } tqg = qgroup_if_config_tqg; if (ctx->ifc_admin_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_admin_task); if (ctx->ifc_vflr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_vflr_task); IFDI_DETACH(ctx); device_set_softc(ctx->ifc_dev, NULL); if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) { pci_release_msi(dev); } if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_MSIX) { iflib_irq_free(ctx, &ctx->ifc_legacy_irq); } if (ctx->ifc_msix_mem != NULL) { bus_release_resource(ctx->ifc_dev, SYS_RES_MEMORY, ctx->ifc_softc_ctx.isc_msix_bar, ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; } bus_generic_detach(dev); if_free(ifp); iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); if (ctx->ifc_flags & IFC_SC_ALLOCATED) free(ctx->ifc_softc, M_IFLIB); free(ctx, M_IFLIB); return (0); } int iflib_device_detach(device_t dev) { if_ctx_t ctx = device_get_softc(dev); return (iflib_device_deregister(ctx)); } int iflib_device_suspend(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_SUSPEND(ctx); CTX_UNLOCK(ctx); return bus_generic_suspend(dev); } int iflib_device_shutdown(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_SHUTDOWN(ctx); CTX_UNLOCK(ctx); return bus_generic_suspend(dev); } int iflib_device_resume(device_t dev) { if_ctx_t ctx = device_get_softc(dev); iflib_txq_t txq = ctx->ifc_txqs; CTX_LOCK(ctx); IFDI_RESUME(ctx); iflib_init_locked(ctx); CTX_UNLOCK(ctx); for (int i = 0; i < NTXQSETS(ctx); i++, txq++) iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); return (bus_generic_resume(dev)); } int iflib_device_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *params) { int error; if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); error = IFDI_IOV_INIT(ctx, num_vfs, params); CTX_UNLOCK(ctx); return (error); } void iflib_device_iov_uninit(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_IOV_UNINIT(ctx); CTX_UNLOCK(ctx); } int iflib_device_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params) { int error; if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); error = IFDI_IOV_VF_ADD(ctx, vfnum, params); CTX_UNLOCK(ctx); return (error); } /********************************************************************* * * MODULE FUNCTION DEFINITIONS * **********************************************************************/ /* * - Start a fast taskqueue thread for each core * - Start a taskqueue for control operations */ static int iflib_module_init(void) { return (0); } static int iflib_module_event_handler(module_t mod, int what, void *arg) { int err; switch (what) { case MOD_LOAD: if ((err = iflib_module_init()) != 0) return (err); break; case MOD_UNLOAD: return (EBUSY); default: return (EOPNOTSUPP); } return (0); } /********************************************************************* * * PUBLIC FUNCTION DEFINITIONS * ordered as in iflib.h * **********************************************************************/ static void _iflib_assert(if_shared_ctx_t sctx) { MPASS(sctx->isc_tx_maxsize); MPASS(sctx->isc_tx_maxsegsize); MPASS(sctx->isc_rx_maxsize); MPASS(sctx->isc_rx_nsegments); MPASS(sctx->isc_rx_maxsegsize); MPASS(sctx->isc_nrxd_min[0]); MPASS(sctx->isc_nrxd_max[0]); MPASS(sctx->isc_nrxd_default[0]); MPASS(sctx->isc_ntxd_min[0]); MPASS(sctx->isc_ntxd_max[0]); MPASS(sctx->isc_ntxd_default[0]); } static void _iflib_pre_assert(if_softc_ctx_t scctx) { MPASS(scctx->isc_txrx->ift_txd_encap); MPASS(scctx->isc_txrx->ift_txd_flush); MPASS(scctx->isc_txrx->ift_txd_credits_update); MPASS(scctx->isc_txrx->ift_rxd_available); MPASS(scctx->isc_txrx->ift_rxd_pkt_get); MPASS(scctx->isc_txrx->ift_rxd_refill); MPASS(scctx->isc_txrx->ift_rxd_flush); } static int iflib_register(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; driver_t *driver = sctx->isc_driver; device_t dev = ctx->ifc_dev; if_t ifp; _iflib_assert(sctx); CTX_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev)); ifp = ctx->ifc_ifp = if_gethandle(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (ENOMEM); } /* * Initialize our context's device specific methods */ kobj_init((kobj_t) ctx, (kobj_class_t) driver); kobj_class_compile((kobj_class_t) driver); driver->refs++; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); if_setsoftc(ifp, ctx); if_setdev(ifp, dev); if_setinitfn(ifp, iflib_if_init); if_setioctlfn(ifp, iflib_if_ioctl); if_settransmitfn(ifp, iflib_if_transmit); if_setqflushfn(ifp, iflib_if_qflush); if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); ctx->ifc_vlan_attach_event = EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx, EVENTHANDLER_PRI_FIRST); ctx->ifc_vlan_detach_event = EVENTHANDLER_REGISTER(vlan_unconfig, iflib_vlan_unregister, ctx, EVENTHANDLER_PRI_FIRST); ifmedia_init(&ctx->ifc_media, IFM_IMASK, iflib_media_change, iflib_media_status); return (0); } static int iflib_queues_alloc(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; int nrxqsets = scctx->isc_nrxqsets; int ntxqsets = scctx->isc_ntxqsets; iflib_txq_t txq; iflib_rxq_t rxq; iflib_fl_t fl = NULL; int i, j, cpu, err, txconf, rxconf; iflib_dma_info_t ifdip; uint32_t *rxqsizes = scctx->isc_rxqsizes; uint32_t *txqsizes = scctx->isc_txqsizes; uint8_t nrxqs = sctx->isc_nrxqs; uint8_t ntxqs = sctx->isc_ntxqs; int nfree_lists = sctx->isc_nfl ? sctx->isc_nfl : 1; caddr_t *vaddrs; uint64_t *paddrs; struct ifmp_ring **brscp; KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1")); KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1")); brscp = NULL; txq = NULL; rxq = NULL; /* Allocate the TX ring struct memory */ if (!(txq = (iflib_txq_t) malloc(sizeof(struct iflib_txq) * ntxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); err = ENOMEM; goto fail; } /* Now allocate the RX */ if (!(rxq = (iflib_rxq_t) malloc(sizeof(struct iflib_rxq) * nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX ring memory\n"); err = ENOMEM; goto rx_fail; } ctx->ifc_txqs = txq; ctx->ifc_rxqs = rxq; /* * XXX handle allocation failure */ for (txconf = i = 0, cpu = CPU_FIRST(); i < ntxqsets; i++, txconf++, txq++, cpu = CPU_NEXT(cpu)) { /* Set up some basics */ if ((ifdip = malloc(sizeof(struct iflib_dma_info) * ntxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) { device_printf(dev, "failed to allocate iflib_dma_info\n"); err = ENOMEM; goto err_tx_desc; } txq->ift_ifdi = ifdip; for (j = 0; j < ntxqs; j++, ifdip++) { if (iflib_dma_alloc(ctx, txqsizes[j], ifdip, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate Descriptor memory\n"); err = ENOMEM; goto err_tx_desc; } txq->ift_txd_size[j] = scctx->isc_txd_size[j]; bzero((void *)ifdip->idi_vaddr, txqsizes[j]); } txq->ift_ctx = ctx; txq->ift_id = i; if (sctx->isc_flags & IFLIB_HAS_TXCQ) { txq->ift_br_offset = 1; } else { txq->ift_br_offset = 0; } /* XXX fix this */ txq->ift_timer.c_cpu = cpu; if (iflib_txsd_alloc(txq)) { device_printf(dev, "Critical Failure setting up TX buffers\n"); err = ENOMEM; goto err_tx_desc; } /* Initialize the TX lock */ snprintf(txq->ift_mtx_name, MTX_NAME_LEN, "%s:tx(%d):callout", device_get_nameunit(dev), txq->ift_id); mtx_init(&txq->ift_mtx, txq->ift_mtx_name, NULL, MTX_DEF); callout_init_mtx(&txq->ift_timer, &txq->ift_mtx, 0); snprintf(txq->ift_db_mtx_name, MTX_NAME_LEN, "%s:tx(%d):db", device_get_nameunit(dev), txq->ift_id); err = ifmp_ring_alloc(&txq->ift_br, 2048, txq, iflib_txq_drain, iflib_txq_can_drain, M_IFLIB, M_WAITOK); if (err) { /* XXX free any allocated rings */ device_printf(dev, "Unable to allocate buf_ring\n"); goto err_tx_desc; } } for (rxconf = i = 0; i < nrxqsets; i++, rxconf++, rxq++) { /* Set up some basics */ if ((ifdip = malloc(sizeof(struct iflib_dma_info) * nrxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) { device_printf(dev, "failed to allocate iflib_dma_info\n"); err = ENOMEM; goto err_tx_desc; } rxq->ifr_ifdi = ifdip; /* XXX this needs to be changed if #rx queues != #tx queues */ rxq->ifr_ntxqirq = 1; rxq->ifr_txqid[0] = i; for (j = 0; j < nrxqs; j++, ifdip++) { if (iflib_dma_alloc(ctx, rxqsizes[j], ifdip, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate Descriptor memory\n"); err = ENOMEM; goto err_tx_desc; } bzero((void *)ifdip->idi_vaddr, rxqsizes[j]); } rxq->ifr_ctx = ctx; rxq->ifr_id = i; if (sctx->isc_flags & IFLIB_HAS_RXCQ) { rxq->ifr_fl_offset = 1; } else { rxq->ifr_fl_offset = 0; } rxq->ifr_nfl = nfree_lists; if (!(fl = (iflib_fl_t) malloc(sizeof(struct iflib_fl) * nfree_lists, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate free list memory\n"); err = ENOMEM; goto err_tx_desc; } rxq->ifr_fl = fl; for (j = 0; j < nfree_lists; j++) { fl[j].ifl_rxq = rxq; fl[j].ifl_id = j; fl[j].ifl_ifdi = &rxq->ifr_ifdi[j + rxq->ifr_fl_offset]; fl[j].ifl_rxd_size = scctx->isc_rxd_size[j]; } /* Allocate receive buffers for the ring*/ if (iflib_rxsd_alloc(rxq)) { device_printf(dev, "Critical Failure setting up receive buffers\n"); err = ENOMEM; goto err_rx_desc; } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) fl->ifl_rx_bitmap = bit_alloc(fl->ifl_size, M_IFLIB, M_WAITOK|M_ZERO); } /* TXQs */ vaddrs = malloc(sizeof(caddr_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); paddrs = malloc(sizeof(uint64_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); for (i = 0; i < ntxqsets; i++) { iflib_dma_info_t di = ctx->ifc_txqs[i].ift_ifdi; for (j = 0; j < ntxqs; j++, di++) { vaddrs[i*ntxqs + j] = di->idi_vaddr; paddrs[i*ntxqs + j] = di->idi_paddr; } } if ((err = IFDI_TX_QUEUES_ALLOC(ctx, vaddrs, paddrs, ntxqs, ntxqsets)) != 0) { device_printf(ctx->ifc_dev, "device queue allocation failed\n"); iflib_tx_structures_free(ctx); free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); goto err_rx_desc; } free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); /* RXQs */ vaddrs = malloc(sizeof(caddr_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); paddrs = malloc(sizeof(uint64_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); for (i = 0; i < nrxqsets; i++) { iflib_dma_info_t di = ctx->ifc_rxqs[i].ifr_ifdi; for (j = 0; j < nrxqs; j++, di++) { vaddrs[i*nrxqs + j] = di->idi_vaddr; paddrs[i*nrxqs + j] = di->idi_paddr; } } if ((err = IFDI_RX_QUEUES_ALLOC(ctx, vaddrs, paddrs, nrxqs, nrxqsets)) != 0) { device_printf(ctx->ifc_dev, "device queue allocation failed\n"); iflib_tx_structures_free(ctx); free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); goto err_rx_desc; } free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); return (0); /* XXX handle allocation failure changes */ err_rx_desc: err_tx_desc: if (ctx->ifc_rxqs != NULL) free(ctx->ifc_rxqs, M_IFLIB); ctx->ifc_rxqs = NULL; if (ctx->ifc_txqs != NULL) free(ctx->ifc_txqs, M_IFLIB); ctx->ifc_txqs = NULL; rx_fail: if (brscp != NULL) free(brscp, M_IFLIB); if (rxq != NULL) free(rxq, M_IFLIB); if (txq != NULL) free(txq, M_IFLIB); fail: return (err); } static int iflib_tx_structures_setup(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; int i; for (i = 0; i < NTXQSETS(ctx); i++, txq++) iflib_txq_setup(txq); return (0); } static void iflib_tx_structures_free(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; int i, j; for (i = 0; i < NTXQSETS(ctx); i++, txq++) { iflib_txq_destroy(txq); for (j = 0; j < ctx->ifc_nhwtxqs; j++) iflib_dma_free(&txq->ift_ifdi[j]); } free(ctx->ifc_txqs, M_IFLIB); ctx->ifc_txqs = NULL; IFDI_QUEUES_FREE(ctx); } /********************************************************************* * * Initialize all receive rings. * **********************************************************************/ static int iflib_rx_structures_setup(if_ctx_t ctx) { iflib_rxq_t rxq = ctx->ifc_rxqs; int q; #if defined(INET6) || defined(INET) int i, err; #endif for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) { #if defined(INET6) || defined(INET) tcp_lro_free(&rxq->ifr_lc); if ((err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp, TCP_LRO_ENTRIES, min(1024, ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset]))) != 0) { device_printf(ctx->ifc_dev, "LRO Initialization failed!\n"); goto fail; } rxq->ifr_lro_enabled = TRUE; #endif IFDI_RXQ_SETUP(ctx, rxq->ifr_id); } return (0); #if defined(INET6) || defined(INET) fail: /* * Free RX software descriptors allocated so far, we will only handle * the rings that completed, the failing case will have * cleaned up for itself. 'q' failed, so its the terminus. */ rxq = ctx->ifc_rxqs; for (i = 0; i < q; ++i, rxq++) { iflib_rx_sds_free(rxq); rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0; } return (err); #endif } /********************************************************************* * * Free all receive rings. * **********************************************************************/ static void iflib_rx_structures_free(if_ctx_t ctx) { iflib_rxq_t rxq = ctx->ifc_rxqs; for (int i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) { iflib_rx_sds_free(rxq); } } static int iflib_qset_structures_setup(if_ctx_t ctx) { int err; if ((err = iflib_tx_structures_setup(ctx)) != 0) return (err); if ((err = iflib_rx_structures_setup(ctx)) != 0) { device_printf(ctx->ifc_dev, "iflib_rx_structures_setup failed: %d\n", err); iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); } return (err); } int iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, driver_filter_t filter, void *filter_arg, driver_intr_t handler, void *arg, char *name) { return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name)); } #ifdef SMP static int find_nth(if_ctx_t ctx, int qid) { cpuset_t cpus; int i, cpuid, eqid, count; CPU_COPY(&ctx->ifc_cpus, &cpus); count = CPU_COUNT(&cpus); eqid = qid % count; /* clear up to the qid'th bit */ for (i = 0; i < eqid; i++) { cpuid = CPU_FFS(&cpus); MPASS(cpuid != 0); CPU_CLR(cpuid-1, &cpus); } cpuid = CPU_FFS(&cpus); MPASS(cpuid != 0); return (cpuid-1); } #ifdef SCHED_ULE extern struct cpu_group *cpu_top; /* CPU topology */ static int find_child_with_core(int cpu, struct cpu_group *grp) { int i; if (grp->cg_children == 0) return -1; MPASS(grp->cg_child); for (i = 0; i < grp->cg_children; i++) { if (CPU_ISSET(cpu, &grp->cg_child[i].cg_mask)) return i; } return -1; } /* * Find the nth thread on the specified core */ static int find_thread(int cpu, int thread_num) { struct cpu_group *grp; int i; cpuset_t cs; grp = cpu_top; if (grp == NULL) return cpu; i = 0; while ((i = find_child_with_core(cpu, grp)) != -1) { /* If the child only has one cpu, don't descend */ if (grp->cg_child[i].cg_count <= 1) break; grp = &grp->cg_child[i]; } /* If they don't share at least an L2 cache, use the same CPU */ if (grp->cg_level > CG_SHARE_L2 || grp->cg_level == CG_SHARE_NONE) return cpu; /* Now pick one */ CPU_COPY(&grp->cg_mask, &cs); for (i = thread_num % grp->cg_count; i > 0; i--) { MPASS(CPU_FFS(&cs)); CPU_CLR(CPU_FFS(&cs) - 1, &cs); } MPASS(CPU_FFS(&cs)); return CPU_FFS(&cs) - 1; } #else static int find_thread(int cpu, int thread_num __unused) { return cpu; } #endif static int get_thread_num(if_ctx_t ctx, iflib_intr_type_t type, int qid) { switch (type) { case IFLIB_INTR_TX: /* TX queues get threads on the same core as the corresponding RX queue */ /* XXX handle multiple RX threads per core and more than two threads per core */ return qid / CPU_COUNT(&ctx->ifc_cpus) + 1; case IFLIB_INTR_RX: case IFLIB_INTR_RXTX: /* RX queues get the first thread on their core */ return qid / CPU_COUNT(&ctx->ifc_cpus); default: return -1; } } #else #define get_thread_num(ctx, type, qid) CPU_FIRST() #define find_thread(cpuid, tid) CPU_FIRST() #define find_nth(ctx, gid) CPU_FIRST() #endif /* Just to avoid copy/paste */ static inline int iflib_irq_set_affinity(if_ctx_t ctx, int irq, iflib_intr_type_t type, int qid, struct grouptask *gtask, struct taskqgroup *tqg, void *uniq, char *name) { int cpuid; int err, tid; cpuid = find_nth(ctx, qid); tid = get_thread_num(ctx, type, qid); MPASS(tid >= 0); cpuid = find_thread(cpuid, tid); err = taskqgroup_attach_cpu(tqg, gtask, uniq, cpuid, irq, name); if (err) { device_printf(ctx->ifc_dev, "taskqgroup_attach_cpu failed %d\n", err); return (err); } #ifdef notyet if (cpuid > ctx->ifc_cpuid_highest) ctx->ifc_cpuid_highest = cpuid; #endif return 0; } int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, iflib_intr_type_t type, driver_filter_t *filter, void *filter_arg, int qid, char *name) { struct grouptask *gtask; struct taskqgroup *tqg; iflib_filter_info_t info; gtask_fn_t *fn; int tqrid, err; driver_filter_t *intr_fast; void *q; info = &ctx->ifc_filter_info; tqrid = rid; switch (type) { /* XXX merge tx/rx for netmap? */ case IFLIB_INTR_TX: q = &ctx->ifc_txqs[qid]; info = &ctx->ifc_txqs[qid].ift_filter_info; gtask = &ctx->ifc_txqs[qid].ift_task; tqg = qgroup_if_io_tqg; fn = _task_fn_tx; intr_fast = iflib_fast_intr; GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_RX: q = &ctx->ifc_rxqs[qid]; info = &ctx->ifc_rxqs[qid].ifr_filter_info; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; intr_fast = iflib_fast_intr; GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_RXTX: q = &ctx->ifc_rxqs[qid]; info = &ctx->ifc_rxqs[qid].ifr_filter_info; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; intr_fast = iflib_fast_intr_rxtx; GROUPTASK_INIT(gtask, 0, fn, q); break; case IFLIB_INTR_ADMIN: q = ctx; tqrid = -1; info = &ctx->ifc_filter_info; gtask = &ctx->ifc_admin_task; tqg = qgroup_if_config_tqg; fn = _task_fn_admin; intr_fast = iflib_fast_intr_ctx; break; default: panic("unknown net intr type"); } info->ifi_filter = filter; info->ifi_filter_arg = filter_arg; info->ifi_task = gtask; info->ifi_ctx = q; err = _iflib_irq_alloc(ctx, irq, rid, intr_fast, NULL, info, name); if (err != 0) { device_printf(ctx->ifc_dev, "_iflib_irq_alloc failed %d\n", err); return (err); } if (type == IFLIB_INTR_ADMIN) return (0); if (tqrid != -1) { err = iflib_irq_set_affinity(ctx, rman_get_start(irq->ii_res), type, qid, gtask, tqg, q, name); if (err) return (err); } else { taskqgroup_attach(tqg, gtask, q, rman_get_start(irq->ii_res), name); } return (0); } void iflib_softirq_alloc_generic(if_ctx_t ctx, if_irq_t irq, iflib_intr_type_t type, void *arg, int qid, char *name) { struct grouptask *gtask; struct taskqgroup *tqg; gtask_fn_t *fn; void *q; int irq_num = -1; int err; switch (type) { case IFLIB_INTR_TX: q = &ctx->ifc_txqs[qid]; gtask = &ctx->ifc_txqs[qid].ift_task; tqg = qgroup_if_io_tqg; fn = _task_fn_tx; if (irq != NULL) irq_num = rman_get_start(irq->ii_res); break; case IFLIB_INTR_RX: q = &ctx->ifc_rxqs[qid]; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_if_io_tqg; fn = _task_fn_rx; if (irq != NULL) irq_num = rman_get_start(irq->ii_res); break; case IFLIB_INTR_IOV: q = ctx; gtask = &ctx->ifc_vflr_task; tqg = qgroup_if_config_tqg; fn = _task_fn_iov; break; default: panic("unknown net intr type"); } GROUPTASK_INIT(gtask, 0, fn, q); if (irq_num != -1) { err = iflib_irq_set_affinity(ctx, irq_num, type, qid, gtask, tqg, q, name); if (err) taskqgroup_attach(tqg, gtask, q, irq_num, name); } else { taskqgroup_attach(tqg, gtask, q, irq_num, name); } } void iflib_irq_free(if_ctx_t ctx, if_irq_t irq) { if (irq->ii_tag) bus_teardown_intr(ctx->ifc_dev, irq->ii_res, irq->ii_tag); if (irq->ii_res) bus_release_resource(ctx->ifc_dev, SYS_RES_IRQ, irq->ii_rid, irq->ii_res); } static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *rid, char *name) { iflib_txq_t txq = ctx->ifc_txqs; iflib_rxq_t rxq = ctx->ifc_rxqs; if_irq_t irq = &ctx->ifc_legacy_irq; iflib_filter_info_t info; struct grouptask *gtask; struct taskqgroup *tqg; gtask_fn_t *fn; int tqrid; void *q; int err; q = &ctx->ifc_rxqs[0]; info = &rxq[0].ifr_filter_info; gtask = &rxq[0].ifr_task; tqg = qgroup_if_io_tqg; tqrid = irq->ii_rid = *rid; fn = _task_fn_rx; ctx->ifc_flags |= IFC_LEGACY; info->ifi_filter = filter; info->ifi_filter_arg = filter_arg; info->ifi_task = gtask; info->ifi_ctx = ctx; /* We allocate a single interrupt resource */ if ((err = _iflib_irq_alloc(ctx, irq, tqrid, iflib_fast_intr_ctx, NULL, info, name)) != 0) return (err); GROUPTASK_INIT(gtask, 0, fn, q); taskqgroup_attach(tqg, gtask, q, rman_get_start(irq->ii_res), name); GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq); taskqgroup_attach(qgroup_if_io_tqg, &txq->ift_task, txq, rman_get_start(irq->ii_res), "tx"); return (0); } void iflib_led_create(if_ctx_t ctx) { ctx->ifc_led_dev = led_create(iflib_led_func, ctx, device_get_nameunit(ctx->ifc_dev)); } void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid) { GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task); } void iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid) { GROUPTASK_ENQUEUE(&ctx->ifc_rxqs[rxqid].ifr_task); } void iflib_admin_intr_deferred(if_ctx_t ctx) { #ifdef INVARIANTS struct grouptask *gtask; gtask = &ctx->ifc_admin_task; MPASS(gtask != NULL && gtask->gt_taskqueue != NULL); #endif GROUPTASK_ENQUEUE(&ctx->ifc_admin_task); } void iflib_iov_intr_deferred(if_ctx_t ctx) { GROUPTASK_ENQUEUE(&ctx->ifc_vflr_task); } void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name) { taskqgroup_attach_cpu(qgroup_if_io_tqg, gt, uniq, cpu, -1, name); } void iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, gtask_fn_t *fn, char *name) { GROUPTASK_INIT(gtask, 0, fn, ctx); taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, -1, name); } void iflib_config_gtask_deinit(struct grouptask *gtask) { taskqgroup_detach(qgroup_if_config_tqg, gtask); } void iflib_link_state_change(if_ctx_t ctx, int link_state, uint64_t baudrate) { if_t ifp = ctx->ifc_ifp; iflib_txq_t txq = ctx->ifc_txqs; if_setbaudrate(ifp, baudrate); if (baudrate >= IF_Gbps(10)) ctx->ifc_flags |= IFC_PREFETCH; /* If link down, disable watchdog */ if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) { for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq++) txq->ift_qstatus = IFLIB_QUEUE_IDLE; } ctx->ifc_link_state = link_state; if_link_state_change(ifp, link_state); } static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq) { int credits; #ifdef INVARIANTS int credits_pre = txq->ift_cidx_processed; #endif if (ctx->isc_txd_credits_update == NULL) return (0); if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, true)) == 0) return (0); txq->ift_processed += credits; txq->ift_cidx_processed += credits; MPASS(credits_pre + credits == txq->ift_cidx_processed); if (txq->ift_cidx_processed >= txq->ift_size) txq->ift_cidx_processed -= txq->ift_size; return (credits); } static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, qidx_t cidx, qidx_t budget) { return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx, budget)); } void iflib_add_int_delay_sysctl(if_ctx_t ctx, const char *name, const char *description, if_int_delay_info_t info, int offset, int value) { info->iidi_ctx = ctx; info->iidi_offset = offset; info->iidi_value = value; SYSCTL_ADD_PROC(device_get_sysctl_ctx(ctx->ifc_dev), SYSCTL_CHILDREN(device_get_sysctl_tree(ctx->ifc_dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, info, 0, iflib_sysctl_int_delay, "I", description); } struct mtx * iflib_ctx_lock_get(if_ctx_t ctx) { return (&ctx->ifc_mtx); } static int iflib_msix_init(if_ctx_t ctx) { device_t dev = ctx->ifc_dev; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; int vectors, queues, rx_queues, tx_queues, queuemsgs, msgs; int iflib_num_tx_queues, iflib_num_rx_queues; int err, admincnt, bar; iflib_num_tx_queues = ctx->ifc_sysctl_ntxqs; iflib_num_rx_queues = ctx->ifc_sysctl_nrxqs; device_printf(dev, "msix_init qsets capped at %d\n", imax(scctx->isc_ntxqsets, scctx->isc_nrxqsets)); bar = ctx->ifc_softc_ctx.isc_msix_bar; admincnt = sctx->isc_admin_intrcnt; /* Override by global tuneable */ { int i; size_t len = sizeof(i); err = kernel_sysctlbyname(curthread, "hw.pci.enable_msix", &i, &len, NULL, 0, NULL, 0); if (err == 0) { if (i == 0) goto msi; } else { device_printf(dev, "unable to read hw.pci.enable_msix."); } } /* Override by tuneable */ if (scctx->isc_disable_msix) goto msi; /* ** When used in a virtualized environment ** PCI BUSMASTER capability may not be set ** so explicity set it here and rewrite ** the ENABLE in the MSIX control register ** at this point to cause the host to ** successfully initialize us. */ { int msix_ctrl, rid; pci_enable_busmaster(dev); rid = 0; if (pci_find_cap(dev, PCIY_MSIX, &rid) == 0 && rid != 0) { rid += PCIR_MSIX_CTRL; msix_ctrl = pci_read_config(dev, rid, 2); msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; pci_write_config(dev, rid, msix_ctrl, 2); } else { device_printf(dev, "PCIY_MSIX capability not found; " "or rid %d == 0.\n", rid); goto msi; } } /* * bar == -1 => "trust me I know what I'm doing" * Some drivers are for hardware that is so shoddily * documented that no one knows which bars are which * so the developer has to map all bars. This hack * allows shoddy garbage to use msix in this framework. */ if (bar != -1) { ctx->ifc_msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE); if (ctx->ifc_msix_mem == NULL) { /* May not be enabled */ device_printf(dev, "Unable to map MSIX table \n"); goto msi; } } /* First try MSI/X */ if ((msgs = pci_msix_count(dev)) == 0) { /* system has msix disabled */ device_printf(dev, "System has MSIX disabled \n"); bus_release_resource(dev, SYS_RES_MEMORY, bar, ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; goto msi; } #if IFLIB_DEBUG /* use only 1 qset in debug mode */ queuemsgs = min(msgs - admincnt, 1); #else queuemsgs = msgs - admincnt; #endif #ifdef RSS queues = imin(queuemsgs, rss_getnumbuckets()); #else queues = queuemsgs; #endif queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues); device_printf(dev, "pxm cpus: %d queue msgs: %d admincnt: %d\n", CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt); #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ if (queues > rss_getnumbuckets()) queues = rss_getnumbuckets(); #endif if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queuemsgs - admincnt) rx_queues = iflib_num_rx_queues; else rx_queues = queues; if (rx_queues > scctx->isc_nrxqsets) rx_queues = scctx->isc_nrxqsets; /* * We want this to be all logical CPUs by default */ if (iflib_num_tx_queues > 0 && iflib_num_tx_queues < queues) tx_queues = iflib_num_tx_queues; else tx_queues = mp_ncpus; if (tx_queues > scctx->isc_ntxqsets) tx_queues = scctx->isc_ntxqsets; if (ctx->ifc_sysctl_qs_eq_override == 0) { #ifdef INVARIANTS if (tx_queues != rx_queues) device_printf(dev, "queue equality override not set, capping rx_queues at %d and tx_queues at %d\n", min(rx_queues, tx_queues), min(rx_queues, tx_queues)); #endif tx_queues = min(rx_queues, tx_queues); rx_queues = min(rx_queues, tx_queues); } device_printf(dev, "using %d rx queues %d tx queues \n", rx_queues, tx_queues); vectors = rx_queues + admincnt; if ((err = pci_alloc_msix(dev, &vectors)) == 0) { device_printf(dev, "Using MSIX interrupts with %d vectors\n", vectors); scctx->isc_vectors = vectors; scctx->isc_nrxqsets = rx_queues; scctx->isc_ntxqsets = tx_queues; scctx->isc_intr = IFLIB_INTR_MSIX; return (vectors); } else { device_printf(dev, "failed to allocate %d msix vectors, err: %d - using MSI\n", vectors, err); } msi: vectors = pci_msi_count(dev); scctx->isc_nrxqsets = 1; scctx->isc_ntxqsets = 1; scctx->isc_vectors = vectors; if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) { device_printf(dev,"Using an MSI interrupt\n"); scctx->isc_intr = IFLIB_INTR_MSI; } else { device_printf(dev,"Using a Legacy interrupt\n"); scctx->isc_intr = IFLIB_INTR_LEGACY; } return (vectors); } char * ring_states[] = { "IDLE", "BUSY", "STALLED", "ABDICATED" }; static int mp_ring_state_handler(SYSCTL_HANDLER_ARGS) { int rc; uint16_t *state = ((uint16_t *)oidp->oid_arg1); struct sbuf *sb; char *ring_state = "UNKNOWN"; /* XXX needed ? */ rc = sysctl_wire_old_buffer(req, 0); MPASS(rc == 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 80, req); MPASS(sb != NULL); if (sb == NULL) return (ENOMEM); if (state[3] <= 3) ring_state = ring_states[state[3]]; sbuf_printf(sb, "pidx_head: %04hd pidx_tail: %04hd cidx: %04hd state: %s", state[0], state[1], state[2], ring_state); rc = sbuf_finish(sb); sbuf_delete(sb); return(rc); } enum iflib_ndesc_handler { IFLIB_NTXD_HANDLER, IFLIB_NRXD_HANDLER, }; static int mp_ndesc_handler(SYSCTL_HANDLER_ARGS) { if_ctx_t ctx = (void *)arg1; enum iflib_ndesc_handler type = arg2; char buf[256] = {0}; qidx_t *ndesc; char *p, *next; int nqs, rc, i; MPASS(type == IFLIB_NTXD_HANDLER || type == IFLIB_NRXD_HANDLER); nqs = 8; switch(type) { case IFLIB_NTXD_HANDLER: ndesc = ctx->ifc_sysctl_ntxds; if (ctx->ifc_sctx) nqs = ctx->ifc_sctx->isc_ntxqs; break; case IFLIB_NRXD_HANDLER: ndesc = ctx->ifc_sysctl_nrxds; if (ctx->ifc_sctx) nqs = ctx->ifc_sctx->isc_nrxqs; break; } if (nqs == 0) nqs = 8; for (i=0; i<8; i++) { if (i >= nqs) break; if (i) strcat(buf, ","); sprintf(strchr(buf, 0), "%d", ndesc[i]); } rc = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (rc || req->newptr == NULL) return rc; for (i = 0, next = buf, p = strsep(&next, " ,"); i < 8 && p; i++, p = strsep(&next, " ,")) { ndesc[i] = strtoul(p, NULL, 10); } return(rc); } #define NAME_BUFLEN 32 static void iflib_add_device_sysctl_pre(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct sysctl_oid_list *child, *oid_list; struct sysctl_ctx_list *ctx_list; struct sysctl_oid *node; ctx_list = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "iflib", CTLFLAG_RD, NULL, "IFLIB fields"); oid_list = SYSCTL_CHILDREN(node); SYSCTL_ADD_STRING(ctx_list, oid_list, OID_AUTO, "driver_version", CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, 0, "driver version"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, "# of txqs to use, 0 => use default #"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0, "# of rxqs to use, 0 => use default #"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable", CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0, "permit #txq != #rxq"); SYSCTL_ADD_INT(ctx_list, oid_list, OID_AUTO, "disable_msix", CTLFLAG_RWTUN, &ctx->ifc_softc_ctx.isc_disable_msix, 0, "disable MSIX (default 0)"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "rx_budget", CTLFLAG_RWTUN, &ctx->ifc_sysctl_rx_budget, 0, "set the rx budget"); /* XXX change for per-queue sizes */ SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds", CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NTXD_HANDLER, mp_ndesc_handler, "A", "list of # of tx descriptors to use, 0 = use default #"); SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds", CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NRXD_HANDLER, mp_ndesc_handler, "A", "list of # of rx descriptors to use, 0 = use default #"); } static void iflib_add_device_sysctl_post(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = iflib_get_dev(ctx); struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx_list; iflib_fl_t fl; iflib_txq_t txq; iflib_rxq_t rxq; int i, j; char namebuf[NAME_BUFLEN]; char *qfmt; struct sysctl_oid *queue_node, *fl_node, *node; struct sysctl_oid_list *queue_list, *fl_list; ctx_list = device_get_sysctl_ctx(dev); node = ctx->ifc_sysctl_node; child = SYSCTL_CHILDREN(node); if (scctx->isc_ntxqsets > 100) qfmt = "txq%03d"; else if (scctx->isc_ntxqsets > 10) qfmt = "txq%02d"; else qfmt = "txq%d"; for (i = 0, txq = ctx->ifc_txqs; i < scctx->isc_ntxqsets; i++, txq++) { snprintf(namebuf, NAME_BUFLEN, qfmt, i); queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); #if MEMORY_LOGGING SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_dequeued", CTLFLAG_RD, &txq->ift_dequeued, "total mbufs freed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_enqueued", CTLFLAG_RD, &txq->ift_enqueued, "total mbufs enqueued"); #endif SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag", CTLFLAG_RD, &txq->ift_mbuf_defrag, "# of times m_defrag was called"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "m_pullups", CTLFLAG_RD, &txq->ift_pullups, "# of times m_pullup was called"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag_failed", CTLFLAG_RD, &txq->ift_mbuf_defrag_failed, "# of times m_defrag failed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txq->ift_no_desc_avail, "# of times no descriptors were available"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed", CTLFLAG_RD, &txq->ift_map_failed, "# of times dma map failed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txd_encap_efbig", CTLFLAG_RD, &txq->ift_txd_encap_efbig, "# of times txd_encap returned EFBIG"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_tx_dma_setup", CTLFLAG_RD, &txq->ift_no_tx_dma_setup, "# of times map failed for other than EFBIG"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_pidx", CTLFLAG_RD, &txq->ift_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx", CTLFLAG_RD, &txq->ift_cidx, 1, "Consumer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx_processed", CTLFLAG_RD, &txq->ift_cidx_processed, 1, "Consumer Index seen by credit update"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_in_use", CTLFLAG_RD, &txq->ift_in_use, 1, "descriptors in use"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_processed", CTLFLAG_RD, &txq->ift_processed, "descriptors procesed for clean"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_cleaned", CTLFLAG_RD, &txq->ift_cleaned, "total cleaned"); SYSCTL_ADD_PROC(ctx_list, queue_list, OID_AUTO, "ring_state", CTLTYPE_STRING | CTLFLAG_RD, __DEVOLATILE(uint64_t *, &txq->ift_br->state), 0, mp_ring_state_handler, "A", "soft ring state"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_enqueues", CTLFLAG_RD, &txq->ift_br->enqueues, "# of enqueues to the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_drops", CTLFLAG_RD, &txq->ift_br->drops, "# of drops in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_starts", CTLFLAG_RD, &txq->ift_br->starts, "# of normal consumer starts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_stalls", CTLFLAG_RD, &txq->ift_br->stalls, "# of consumer stalls in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_restarts", CTLFLAG_RD, &txq->ift_br->restarts, "# of consumer restarts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_abdications", CTLFLAG_RD, &txq->ift_br->abdications, "# of consumer abdications in the mp_ring for this queue"); } if (scctx->isc_nrxqsets > 100) qfmt = "rxq%03d"; else if (scctx->isc_nrxqsets > 10) qfmt = "rxq%02d"; else qfmt = "rxq%d"; for (i = 0, rxq = ctx->ifc_rxqs; i < scctx->isc_nrxqsets; i++, rxq++) { snprintf(namebuf, NAME_BUFLEN, qfmt, i); queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); if (sctx->isc_flags & IFLIB_HAS_RXCQ) { SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_pidx", CTLFLAG_RD, &rxq->ifr_cq_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_cidx", CTLFLAG_RD, &rxq->ifr_cq_cidx, 1, "Consumer Index"); } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { snprintf(namebuf, NAME_BUFLEN, "rxq_fl%d", j); fl_node = SYSCTL_ADD_NODE(ctx_list, queue_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "freelist Name"); fl_list = SYSCTL_CHILDREN(fl_node); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "pidx", CTLFLAG_RD, &fl->ifl_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "cidx", CTLFLAG_RD, &fl->ifl_cidx, 1, "Consumer Index"); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "credits", CTLFLAG_RD, &fl->ifl_credits, 1, "credits available"); #if MEMORY_LOGGING SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_enqueued", CTLFLAG_RD, &fl->ifl_m_enqueued, "mbufs allocated"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_dequeued", CTLFLAG_RD, &fl->ifl_m_dequeued, "mbufs freed"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_enqueued", CTLFLAG_RD, &fl->ifl_cl_enqueued, "clusters allocated"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_dequeued", CTLFLAG_RD, &fl->ifl_cl_dequeued, "clusters freed"); #endif } } } #ifndef __NO_STRICT_ALIGNMENT static struct mbuf * iflib_fixup_rx(struct mbuf *m) { struct mbuf *n; if (m->m_len <= (MCLBYTES - ETHER_HDR_LEN)) { bcopy(m->m_data, m->m_data + ETHER_HDR_LEN, m->m_len); m->m_data += ETHER_HDR_LEN; n = m; } else { MGETHDR(n, M_NOWAIT, MT_DATA); if (n == NULL) { m_freem(m); return (NULL); } bcopy(m->m_data, n->m_data, ETHER_HDR_LEN); m->m_data += ETHER_HDR_LEN; m->m_len -= ETHER_HDR_LEN; n->m_len = ETHER_HDR_LEN; M_MOVE_PKTHDR(n, m); n->m_next = m; } return (n); } #endif Index: head/sys/net80211/ieee80211_ioctl.c =================================================================== --- head/sys/net80211/ieee80211_ioctl.c (revision 331796) +++ head/sys/net80211/ieee80211_ioctl.c (revision 331797) @@ -1,3619 +1,3620 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2001 Atsushi Onoe * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * IEEE 802.11 ioctl support (FreeBSD-specific) */ #include "opt_inet.h" #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif #include #include #include #include #define IS_UP_AUTO(_vap) \ (IFNET_IS_UP_RUNNING((_vap)->iv_ifp) && \ (_vap)->iv_roaming == IEEE80211_ROAMING_AUTO) static const uint8_t zerobssid[IEEE80211_ADDR_LEN]; static struct ieee80211_channel *findchannel(struct ieee80211com *, int ieee, int mode); static int ieee80211_scanreq(struct ieee80211vap *, struct ieee80211_scan_req *); static int ieee80211_ioctl_getkey(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_node *ni; struct ieee80211req_key ik; struct ieee80211_key *wk; const struct ieee80211_cipher *cip; u_int kid; int error; if (ireq->i_len != sizeof(ik)) return EINVAL; error = copyin(ireq->i_data, &ik, sizeof(ik)); if (error) return error; kid = ik.ik_keyix; if (kid == IEEE80211_KEYIX_NONE) { ni = ieee80211_find_vap_node(&ic->ic_sta, vap, ik.ik_macaddr); if (ni == NULL) return ENOENT; wk = &ni->ni_ucastkey; } else { if (kid >= IEEE80211_WEP_NKID) return EINVAL; wk = &vap->iv_nw_keys[kid]; IEEE80211_ADDR_COPY(&ik.ik_macaddr, vap->iv_bss->ni_macaddr); ni = NULL; } cip = wk->wk_cipher; ik.ik_type = cip->ic_cipher; ik.ik_keylen = wk->wk_keylen; ik.ik_flags = wk->wk_flags & (IEEE80211_KEY_XMIT | IEEE80211_KEY_RECV); if (wk->wk_keyix == vap->iv_def_txkey) ik.ik_flags |= IEEE80211_KEY_DEFAULT; if (priv_check(curthread, PRIV_NET80211_GETKEY) == 0) { /* NB: only root can read key data */ ik.ik_keyrsc = wk->wk_keyrsc[IEEE80211_NONQOS_TID]; ik.ik_keytsc = wk->wk_keytsc; memcpy(ik.ik_keydata, wk->wk_key, wk->wk_keylen); if (cip->ic_cipher == IEEE80211_CIPHER_TKIP) { memcpy(ik.ik_keydata+wk->wk_keylen, wk->wk_key + IEEE80211_KEYBUF_SIZE, IEEE80211_MICBUF_SIZE); ik.ik_keylen += IEEE80211_MICBUF_SIZE; } } else { ik.ik_keyrsc = 0; ik.ik_keytsc = 0; memset(ik.ik_keydata, 0, sizeof(ik.ik_keydata)); } if (ni != NULL) ieee80211_free_node(ni); return copyout(&ik, ireq->i_data, sizeof(ik)); } static int ieee80211_ioctl_getchanlist(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; if (sizeof(ic->ic_chan_active) < ireq->i_len) ireq->i_len = sizeof(ic->ic_chan_active); return copyout(&ic->ic_chan_active, ireq->i_data, ireq->i_len); } static int ieee80211_ioctl_getchaninfo(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; uint32_t space; space = __offsetof(struct ieee80211req_chaninfo, ic_chans[ic->ic_nchans]); if (space > ireq->i_len) space = ireq->i_len; /* XXX assumes compatible layout */ return copyout(&ic->ic_nchans, ireq->i_data, space); } static int ieee80211_ioctl_getwpaie(struct ieee80211vap *vap, struct ieee80211req *ireq, int req) { struct ieee80211_node *ni; struct ieee80211req_wpaie2 *wpaie; int error; if (ireq->i_len < IEEE80211_ADDR_LEN) return EINVAL; wpaie = IEEE80211_MALLOC(sizeof(*wpaie), M_TEMP, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO); if (wpaie == NULL) return ENOMEM; error = copyin(ireq->i_data, wpaie->wpa_macaddr, IEEE80211_ADDR_LEN); if (error != 0) goto bad; ni = ieee80211_find_vap_node(&vap->iv_ic->ic_sta, vap, wpaie->wpa_macaddr); if (ni == NULL) { error = ENOENT; goto bad; } if (ni->ni_ies.wpa_ie != NULL) { int ielen = ni->ni_ies.wpa_ie[1] + 2; if (ielen > sizeof(wpaie->wpa_ie)) ielen = sizeof(wpaie->wpa_ie); memcpy(wpaie->wpa_ie, ni->ni_ies.wpa_ie, ielen); } if (req == IEEE80211_IOC_WPAIE2) { if (ni->ni_ies.rsn_ie != NULL) { int ielen = ni->ni_ies.rsn_ie[1] + 2; if (ielen > sizeof(wpaie->rsn_ie)) ielen = sizeof(wpaie->rsn_ie); memcpy(wpaie->rsn_ie, ni->ni_ies.rsn_ie, ielen); } if (ireq->i_len > sizeof(struct ieee80211req_wpaie2)) ireq->i_len = sizeof(struct ieee80211req_wpaie2); } else { /* compatibility op, may overwrite wpa ie */ /* XXX check ic_flags? */ if (ni->ni_ies.rsn_ie != NULL) { int ielen = ni->ni_ies.rsn_ie[1] + 2; if (ielen > sizeof(wpaie->wpa_ie)) ielen = sizeof(wpaie->wpa_ie); memcpy(wpaie->wpa_ie, ni->ni_ies.rsn_ie, ielen); } if (ireq->i_len > sizeof(struct ieee80211req_wpaie)) ireq->i_len = sizeof(struct ieee80211req_wpaie); } ieee80211_free_node(ni); error = copyout(wpaie, ireq->i_data, ireq->i_len); bad: IEEE80211_FREE(wpaie, M_TEMP); return error; } static int ieee80211_ioctl_getstastats(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211_node *ni; uint8_t macaddr[IEEE80211_ADDR_LEN]; const size_t off = __offsetof(struct ieee80211req_sta_stats, is_stats); int error; if (ireq->i_len < off) return EINVAL; error = copyin(ireq->i_data, macaddr, IEEE80211_ADDR_LEN); if (error != 0) return error; ni = ieee80211_find_vap_node(&vap->iv_ic->ic_sta, vap, macaddr); if (ni == NULL) return ENOENT; if (ireq->i_len > sizeof(struct ieee80211req_sta_stats)) ireq->i_len = sizeof(struct ieee80211req_sta_stats); /* NB: copy out only the statistics */ error = copyout(&ni->ni_stats, (uint8_t *) ireq->i_data + off, ireq->i_len - off); ieee80211_free_node(ni); return error; } struct scanreq { struct ieee80211req_scan_result *sr; size_t space; }; static size_t scan_space(const struct ieee80211_scan_entry *se, int *ielen) { size_t len; *ielen = se->se_ies.len; /* * NB: ie's can be no more than 255 bytes and the max 802.11 * packet is <3Kbytes so we are sure this doesn't overflow * 16-bits; if this is a concern we can drop the ie's. */ len = sizeof(struct ieee80211req_scan_result) + se->se_ssid[1] + se->se_meshid[1] + *ielen; return roundup(len, sizeof(uint32_t)); } static void get_scan_space(void *arg, const struct ieee80211_scan_entry *se) { struct scanreq *req = arg; int ielen; req->space += scan_space(se, &ielen); } static void get_scan_result(void *arg, const struct ieee80211_scan_entry *se) { struct scanreq *req = arg; struct ieee80211req_scan_result *sr; int ielen, len, nr, nxr; uint8_t *cp; len = scan_space(se, &ielen); if (len > req->space) return; sr = req->sr; KASSERT(len <= 65535 && ielen <= 65535, ("len %u ssid %u ie %u", len, se->se_ssid[1], ielen)); sr->isr_len = len; sr->isr_ie_off = sizeof(struct ieee80211req_scan_result); sr->isr_ie_len = ielen; sr->isr_freq = se->se_chan->ic_freq; sr->isr_flags = se->se_chan->ic_flags; sr->isr_rssi = se->se_rssi; sr->isr_noise = se->se_noise; sr->isr_intval = se->se_intval; sr->isr_capinfo = se->se_capinfo; sr->isr_erp = se->se_erp; IEEE80211_ADDR_COPY(sr->isr_bssid, se->se_bssid); nr = min(se->se_rates[1], IEEE80211_RATE_MAXSIZE); memcpy(sr->isr_rates, se->se_rates+2, nr); nxr = min(se->se_xrates[1], IEEE80211_RATE_MAXSIZE - nr); memcpy(sr->isr_rates+nr, se->se_xrates+2, nxr); sr->isr_nrates = nr + nxr; /* copy SSID */ sr->isr_ssid_len = se->se_ssid[1]; cp = ((uint8_t *)sr) + sr->isr_ie_off; memcpy(cp, se->se_ssid+2, sr->isr_ssid_len); /* copy mesh id */ cp += sr->isr_ssid_len; sr->isr_meshid_len = se->se_meshid[1]; memcpy(cp, se->se_meshid+2, sr->isr_meshid_len); cp += sr->isr_meshid_len; if (ielen) memcpy(cp, se->se_ies.data, ielen); req->space -= len; req->sr = (struct ieee80211req_scan_result *)(((uint8_t *)sr) + len); } static int ieee80211_ioctl_getscanresults(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct scanreq req; int error; if (ireq->i_len < sizeof(struct scanreq)) return EFAULT; error = 0; req.space = 0; ieee80211_scan_iterate(vap, get_scan_space, &req); if (req.space > ireq->i_len) req.space = ireq->i_len; if (req.space > 0) { uint32_t space; void *p; space = req.space; /* XXX M_WAITOK after driver lock released */ p = IEEE80211_MALLOC(space, M_TEMP, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO); if (p == NULL) return ENOMEM; req.sr = p; ieee80211_scan_iterate(vap, get_scan_result, &req); ireq->i_len = space - req.space; error = copyout(p, ireq->i_data, ireq->i_len); IEEE80211_FREE(p, M_TEMP); } else ireq->i_len = 0; return error; } struct stainforeq { struct ieee80211req_sta_info *si; size_t space; }; static size_t sta_space(const struct ieee80211_node *ni, size_t *ielen) { *ielen = ni->ni_ies.len; return roundup(sizeof(struct ieee80211req_sta_info) + *ielen, sizeof(uint32_t)); } static void get_sta_space(void *arg, struct ieee80211_node *ni) { struct stainforeq *req = arg; size_t ielen; if (ni->ni_vap->iv_opmode == IEEE80211_M_HOSTAP && ni->ni_associd == 0) /* only associated stations */ return; req->space += sta_space(ni, &ielen); } static void get_sta_info(void *arg, struct ieee80211_node *ni) { struct stainforeq *req = arg; struct ieee80211vap *vap = ni->ni_vap; struct ieee80211req_sta_info *si; size_t ielen, len; uint8_t *cp; if (vap->iv_opmode == IEEE80211_M_HOSTAP && ni->ni_associd == 0) /* only associated stations */ return; if (ni->ni_chan == IEEE80211_CHAN_ANYC) /* XXX bogus entry */ return; len = sta_space(ni, &ielen); if (len > req->space) return; si = req->si; si->isi_len = len; si->isi_ie_off = sizeof(struct ieee80211req_sta_info); si->isi_ie_len = ielen; si->isi_freq = ni->ni_chan->ic_freq; si->isi_flags = ni->ni_chan->ic_flags; si->isi_state = ni->ni_flags; si->isi_authmode = ni->ni_authmode; vap->iv_ic->ic_node_getsignal(ni, &si->isi_rssi, &si->isi_noise); vap->iv_ic->ic_node_getmimoinfo(ni, &si->isi_mimo); si->isi_capinfo = ni->ni_capinfo; si->isi_erp = ni->ni_erp; IEEE80211_ADDR_COPY(si->isi_macaddr, ni->ni_macaddr); si->isi_nrates = ni->ni_rates.rs_nrates; if (si->isi_nrates > 15) si->isi_nrates = 15; memcpy(si->isi_rates, ni->ni_rates.rs_rates, si->isi_nrates); si->isi_txrate = ni->ni_txrate; if (si->isi_txrate & IEEE80211_RATE_MCS) { const struct ieee80211_mcs_rates *mcs = &ieee80211_htrates[ni->ni_txrate &~ IEEE80211_RATE_MCS]; if (IEEE80211_IS_CHAN_HT40(ni->ni_chan)) { if (ni->ni_flags & IEEE80211_NODE_SGI40) si->isi_txmbps = mcs->ht40_rate_800ns; else si->isi_txmbps = mcs->ht40_rate_400ns; } else { if (ni->ni_flags & IEEE80211_NODE_SGI20) si->isi_txmbps = mcs->ht20_rate_800ns; else si->isi_txmbps = mcs->ht20_rate_400ns; } } else si->isi_txmbps = si->isi_txrate; si->isi_associd = ni->ni_associd; si->isi_txpower = ni->ni_txpower; si->isi_vlan = ni->ni_vlan; if (ni->ni_flags & IEEE80211_NODE_QOS) { memcpy(si->isi_txseqs, ni->ni_txseqs, sizeof(ni->ni_txseqs)); memcpy(si->isi_rxseqs, ni->ni_rxseqs, sizeof(ni->ni_rxseqs)); } else { si->isi_txseqs[0] = ni->ni_txseqs[IEEE80211_NONQOS_TID]; si->isi_rxseqs[0] = ni->ni_rxseqs[IEEE80211_NONQOS_TID]; } /* NB: leave all cases in case we relax ni_associd == 0 check */ if (ieee80211_node_is_authorized(ni)) si->isi_inact = vap->iv_inact_run; else if (ni->ni_associd != 0 || (vap->iv_opmode == IEEE80211_M_WDS && (vap->iv_flags_ext & IEEE80211_FEXT_WDSLEGACY))) si->isi_inact = vap->iv_inact_auth; else si->isi_inact = vap->iv_inact_init; si->isi_inact = (si->isi_inact - ni->ni_inact) * IEEE80211_INACT_WAIT; si->isi_localid = ni->ni_mllid; si->isi_peerid = ni->ni_mlpid; si->isi_peerstate = ni->ni_mlstate; if (ielen) { cp = ((uint8_t *)si) + si->isi_ie_off; memcpy(cp, ni->ni_ies.data, ielen); } req->si = (struct ieee80211req_sta_info *)(((uint8_t *)si) + len); req->space -= len; } static int getstainfo_common(struct ieee80211vap *vap, struct ieee80211req *ireq, struct ieee80211_node *ni, size_t off) { struct ieee80211com *ic = vap->iv_ic; struct stainforeq req; size_t space; void *p; int error; error = 0; req.space = 0; if (ni == NULL) { ieee80211_iterate_nodes_vap(&ic->ic_sta, vap, get_sta_space, &req); } else get_sta_space(&req, ni); if (req.space > ireq->i_len) req.space = ireq->i_len; if (req.space > 0) { space = req.space; /* XXX M_WAITOK after driver lock released */ p = IEEE80211_MALLOC(space, M_TEMP, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO); if (p == NULL) { error = ENOMEM; goto bad; } req.si = p; if (ni == NULL) { ieee80211_iterate_nodes_vap(&ic->ic_sta, vap, get_sta_info, &req); } else get_sta_info(&req, ni); ireq->i_len = space - req.space; error = copyout(p, (uint8_t *) ireq->i_data+off, ireq->i_len); IEEE80211_FREE(p, M_TEMP); } else ireq->i_len = 0; bad: if (ni != NULL) ieee80211_free_node(ni); return error; } static int ieee80211_ioctl_getstainfo(struct ieee80211vap *vap, struct ieee80211req *ireq) { uint8_t macaddr[IEEE80211_ADDR_LEN]; const size_t off = __offsetof(struct ieee80211req_sta_req, info); struct ieee80211_node *ni; int error; if (ireq->i_len < sizeof(struct ieee80211req_sta_req)) return EFAULT; error = copyin(ireq->i_data, macaddr, IEEE80211_ADDR_LEN); if (error != 0) return error; if (IEEE80211_ADDR_EQ(macaddr, vap->iv_ifp->if_broadcastaddr)) { ni = NULL; } else { ni = ieee80211_find_vap_node(&vap->iv_ic->ic_sta, vap, macaddr); if (ni == NULL) return ENOENT; } return getstainfo_common(vap, ireq, ni, off); } static int ieee80211_ioctl_getstatxpow(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211_node *ni; struct ieee80211req_sta_txpow txpow; int error; if (ireq->i_len != sizeof(txpow)) return EINVAL; error = copyin(ireq->i_data, &txpow, sizeof(txpow)); if (error != 0) return error; ni = ieee80211_find_vap_node(&vap->iv_ic->ic_sta, vap, txpow.it_macaddr); if (ni == NULL) return ENOENT; txpow.it_txpow = ni->ni_txpower; error = copyout(&txpow, ireq->i_data, sizeof(txpow)); ieee80211_free_node(ni); return error; } static int ieee80211_ioctl_getwmeparam(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_wme_state *wme = &ic->ic_wme; struct wmeParams *wmep; int ac; if ((ic->ic_caps & IEEE80211_C_WME) == 0) return EINVAL; ac = (ireq->i_len & IEEE80211_WMEPARAM_VAL); if (ac >= WME_NUM_AC) ac = WME_AC_BE; if (ireq->i_len & IEEE80211_WMEPARAM_BSS) wmep = &wme->wme_wmeBssChanParams.cap_wmeParams[ac]; else wmep = &wme->wme_wmeChanParams.cap_wmeParams[ac]; switch (ireq->i_type) { case IEEE80211_IOC_WME_CWMIN: /* WME: CWmin */ ireq->i_val = wmep->wmep_logcwmin; break; case IEEE80211_IOC_WME_CWMAX: /* WME: CWmax */ ireq->i_val = wmep->wmep_logcwmax; break; case IEEE80211_IOC_WME_AIFS: /* WME: AIFS */ ireq->i_val = wmep->wmep_aifsn; break; case IEEE80211_IOC_WME_TXOPLIMIT: /* WME: txops limit */ ireq->i_val = wmep->wmep_txopLimit; break; case IEEE80211_IOC_WME_ACM: /* WME: ACM (bss only) */ wmep = &wme->wme_wmeBssChanParams.cap_wmeParams[ac]; ireq->i_val = wmep->wmep_acm; break; case IEEE80211_IOC_WME_ACKPOLICY: /* WME: ACK policy (!bss only)*/ wmep = &wme->wme_wmeChanParams.cap_wmeParams[ac]; ireq->i_val = !wmep->wmep_noackPolicy; break; } return 0; } static int ieee80211_ioctl_getmaccmd(struct ieee80211vap *vap, struct ieee80211req *ireq) { const struct ieee80211_aclator *acl = vap->iv_acl; return (acl == NULL ? EINVAL : acl->iac_getioctl(vap, ireq)); } static int ieee80211_ioctl_getcurchan(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_channel *c; if (ireq->i_len != sizeof(struct ieee80211_channel)) return EINVAL; /* * vap's may have different operating channels when HT is * in use. When in RUN state report the vap-specific channel. * Otherwise return curchan. */ if (vap->iv_state == IEEE80211_S_RUN || vap->iv_state == IEEE80211_S_SLEEP) c = vap->iv_bss->ni_chan; else c = ic->ic_curchan; return copyout(c, ireq->i_data, sizeof(*c)); } static int getappie(const struct ieee80211_appie *aie, struct ieee80211req *ireq) { if (aie == NULL) return EINVAL; /* NB: truncate, caller can check length */ if (ireq->i_len > aie->ie_len) ireq->i_len = aie->ie_len; return copyout(aie->ie_data, ireq->i_data, ireq->i_len); } static int ieee80211_ioctl_getappie(struct ieee80211vap *vap, struct ieee80211req *ireq) { uint8_t fc0; fc0 = ireq->i_val & 0xff; if ((fc0 & IEEE80211_FC0_TYPE_MASK) != IEEE80211_FC0_TYPE_MGT) return EINVAL; /* NB: could check iv_opmode and reject but hardly worth the effort */ switch (fc0 & IEEE80211_FC0_SUBTYPE_MASK) { case IEEE80211_FC0_SUBTYPE_BEACON: return getappie(vap->iv_appie_beacon, ireq); case IEEE80211_FC0_SUBTYPE_PROBE_RESP: return getappie(vap->iv_appie_proberesp, ireq); case IEEE80211_FC0_SUBTYPE_ASSOC_RESP: return getappie(vap->iv_appie_assocresp, ireq); case IEEE80211_FC0_SUBTYPE_PROBE_REQ: return getappie(vap->iv_appie_probereq, ireq); case IEEE80211_FC0_SUBTYPE_ASSOC_REQ: return getappie(vap->iv_appie_assocreq, ireq); case IEEE80211_FC0_SUBTYPE_BEACON|IEEE80211_FC0_SUBTYPE_PROBE_RESP: return getappie(vap->iv_appie_wpa, ireq); } return EINVAL; } static int ieee80211_ioctl_getregdomain(struct ieee80211vap *vap, const struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; if (ireq->i_len != sizeof(ic->ic_regdomain)) return EINVAL; return copyout(&ic->ic_regdomain, ireq->i_data, sizeof(ic->ic_regdomain)); } static int ieee80211_ioctl_getroam(struct ieee80211vap *vap, const struct ieee80211req *ireq) { size_t len = ireq->i_len; /* NB: accept short requests for backwards compat */ if (len > sizeof(vap->iv_roamparms)) len = sizeof(vap->iv_roamparms); return copyout(vap->iv_roamparms, ireq->i_data, len); } static int ieee80211_ioctl_gettxparams(struct ieee80211vap *vap, const struct ieee80211req *ireq) { size_t len = ireq->i_len; /* NB: accept short requests for backwards compat */ if (len > sizeof(vap->iv_txparms)) len = sizeof(vap->iv_txparms); return copyout(vap->iv_txparms, ireq->i_data, len); } static int ieee80211_ioctl_getdevcaps(struct ieee80211com *ic, const struct ieee80211req *ireq) { struct ieee80211_devcaps_req *dc; struct ieee80211req_chaninfo *ci; int maxchans, error; maxchans = 1 + ((ireq->i_len - sizeof(struct ieee80211_devcaps_req)) / sizeof(struct ieee80211_channel)); /* NB: require 1 so we know ic_nchans is accessible */ if (maxchans < 1) return EINVAL; /* constrain max request size, 2K channels is ~24Kbytes */ if (maxchans > 2048) maxchans = 2048; dc = (struct ieee80211_devcaps_req *) IEEE80211_MALLOC(IEEE80211_DEVCAPS_SIZE(maxchans), M_TEMP, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO); if (dc == NULL) return ENOMEM; dc->dc_drivercaps = ic->ic_caps; dc->dc_cryptocaps = ic->ic_cryptocaps; dc->dc_htcaps = ic->ic_htcaps; dc->dc_vhtcaps = ic->ic_vhtcaps; ci = &dc->dc_chaninfo; ic->ic_getradiocaps(ic, maxchans, &ci->ic_nchans, ci->ic_chans); KASSERT(ci->ic_nchans <= maxchans, ("nchans %d maxchans %d", ci->ic_nchans, maxchans)); ieee80211_sort_channels(ci->ic_chans, ci->ic_nchans); error = copyout(dc, ireq->i_data, IEEE80211_DEVCAPS_SPACE(dc)); IEEE80211_FREE(dc, M_TEMP); return error; } static int ieee80211_ioctl_getstavlan(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211_node *ni; struct ieee80211req_sta_vlan vlan; int error; if (ireq->i_len != sizeof(vlan)) return EINVAL; error = copyin(ireq->i_data, &vlan, sizeof(vlan)); if (error != 0) return error; if (!IEEE80211_ADDR_EQ(vlan.sv_macaddr, zerobssid)) { ni = ieee80211_find_vap_node(&vap->iv_ic->ic_sta, vap, vlan.sv_macaddr); if (ni == NULL) return ENOENT; } else ni = ieee80211_ref_node(vap->iv_bss); vlan.sv_vlan = ni->ni_vlan; error = copyout(&vlan, ireq->i_data, sizeof(vlan)); ieee80211_free_node(ni); return error; } /* * Dummy ioctl get handler so the linker set is defined. */ static int dummy_ioctl_get(struct ieee80211vap *vap, struct ieee80211req *ireq) { return ENOSYS; } IEEE80211_IOCTL_GET(dummy, dummy_ioctl_get); static int ieee80211_ioctl_getdefault(struct ieee80211vap *vap, struct ieee80211req *ireq) { ieee80211_ioctl_getfunc * const *get; int error; SET_FOREACH(get, ieee80211_ioctl_getset) { error = (*get)(vap, ireq); if (error != ENOSYS) return error; } return EINVAL; } static int ieee80211_ioctl_get80211(struct ieee80211vap *vap, u_long cmd, struct ieee80211req *ireq) { #define MS(_v, _f) (((_v) & _f) >> _f##_S) struct ieee80211com *ic = vap->iv_ic; u_int kid, len; uint8_t tmpkey[IEEE80211_KEYBUF_SIZE]; char tmpssid[IEEE80211_NWID_LEN]; int error = 0; switch (ireq->i_type) { case IEEE80211_IOC_SSID: switch (vap->iv_state) { case IEEE80211_S_INIT: case IEEE80211_S_SCAN: ireq->i_len = vap->iv_des_ssid[0].len; memcpy(tmpssid, vap->iv_des_ssid[0].ssid, ireq->i_len); break; default: ireq->i_len = vap->iv_bss->ni_esslen; memcpy(tmpssid, vap->iv_bss->ni_essid, ireq->i_len); break; } error = copyout(tmpssid, ireq->i_data, ireq->i_len); break; case IEEE80211_IOC_NUMSSIDS: ireq->i_val = 1; break; case IEEE80211_IOC_WEP: if ((vap->iv_flags & IEEE80211_F_PRIVACY) == 0) ireq->i_val = IEEE80211_WEP_OFF; else if (vap->iv_flags & IEEE80211_F_DROPUNENC) ireq->i_val = IEEE80211_WEP_ON; else ireq->i_val = IEEE80211_WEP_MIXED; break; case IEEE80211_IOC_WEPKEY: kid = (u_int) ireq->i_val; if (kid >= IEEE80211_WEP_NKID) return EINVAL; len = (u_int) vap->iv_nw_keys[kid].wk_keylen; /* NB: only root can read WEP keys */ if (priv_check(curthread, PRIV_NET80211_GETKEY) == 0) { bcopy(vap->iv_nw_keys[kid].wk_key, tmpkey, len); } else { bzero(tmpkey, len); } ireq->i_len = len; error = copyout(tmpkey, ireq->i_data, len); break; case IEEE80211_IOC_NUMWEPKEYS: ireq->i_val = IEEE80211_WEP_NKID; break; case IEEE80211_IOC_WEPTXKEY: ireq->i_val = vap->iv_def_txkey; break; case IEEE80211_IOC_AUTHMODE: if (vap->iv_flags & IEEE80211_F_WPA) ireq->i_val = IEEE80211_AUTH_WPA; else ireq->i_val = vap->iv_bss->ni_authmode; break; case IEEE80211_IOC_CHANNEL: ireq->i_val = ieee80211_chan2ieee(ic, ic->ic_curchan); break; case IEEE80211_IOC_POWERSAVE: if (vap->iv_flags & IEEE80211_F_PMGTON) ireq->i_val = IEEE80211_POWERSAVE_ON; else ireq->i_val = IEEE80211_POWERSAVE_OFF; break; case IEEE80211_IOC_POWERSAVESLEEP: ireq->i_val = ic->ic_lintval; break; case IEEE80211_IOC_RTSTHRESHOLD: ireq->i_val = vap->iv_rtsthreshold; break; case IEEE80211_IOC_PROTMODE: ireq->i_val = ic->ic_protmode; break; case IEEE80211_IOC_TXPOWER: /* * Tx power limit is the min of max regulatory * power, any user-set limit, and the max the * radio can do. * * TODO: methodize this */ ireq->i_val = 2*ic->ic_curchan->ic_maxregpower; if (ireq->i_val > ic->ic_txpowlimit) ireq->i_val = ic->ic_txpowlimit; if (ireq->i_val > ic->ic_curchan->ic_maxpower) ireq->i_val = ic->ic_curchan->ic_maxpower; break; case IEEE80211_IOC_WPA: switch (vap->iv_flags & IEEE80211_F_WPA) { case IEEE80211_F_WPA1: ireq->i_val = 1; break; case IEEE80211_F_WPA2: ireq->i_val = 2; break; case IEEE80211_F_WPA1 | IEEE80211_F_WPA2: ireq->i_val = 3; break; default: ireq->i_val = 0; break; } break; case IEEE80211_IOC_CHANLIST: error = ieee80211_ioctl_getchanlist(vap, ireq); break; case IEEE80211_IOC_ROAMING: ireq->i_val = vap->iv_roaming; break; case IEEE80211_IOC_PRIVACY: ireq->i_val = (vap->iv_flags & IEEE80211_F_PRIVACY) != 0; break; case IEEE80211_IOC_DROPUNENCRYPTED: ireq->i_val = (vap->iv_flags & IEEE80211_F_DROPUNENC) != 0; break; case IEEE80211_IOC_COUNTERMEASURES: ireq->i_val = (vap->iv_flags & IEEE80211_F_COUNTERM) != 0; break; case IEEE80211_IOC_WME: ireq->i_val = (vap->iv_flags & IEEE80211_F_WME) != 0; break; case IEEE80211_IOC_HIDESSID: ireq->i_val = (vap->iv_flags & IEEE80211_F_HIDESSID) != 0; break; case IEEE80211_IOC_APBRIDGE: ireq->i_val = (vap->iv_flags & IEEE80211_F_NOBRIDGE) == 0; break; case IEEE80211_IOC_WPAKEY: error = ieee80211_ioctl_getkey(vap, ireq); break; case IEEE80211_IOC_CHANINFO: error = ieee80211_ioctl_getchaninfo(vap, ireq); break; case IEEE80211_IOC_BSSID: if (ireq->i_len != IEEE80211_ADDR_LEN) return EINVAL; if (vap->iv_state == IEEE80211_S_RUN || vap->iv_state == IEEE80211_S_SLEEP) { error = copyout(vap->iv_opmode == IEEE80211_M_WDS ? vap->iv_bss->ni_macaddr : vap->iv_bss->ni_bssid, ireq->i_data, ireq->i_len); } else error = copyout(vap->iv_des_bssid, ireq->i_data, ireq->i_len); break; case IEEE80211_IOC_WPAIE: case IEEE80211_IOC_WPAIE2: error = ieee80211_ioctl_getwpaie(vap, ireq, ireq->i_type); break; case IEEE80211_IOC_SCAN_RESULTS: error = ieee80211_ioctl_getscanresults(vap, ireq); break; case IEEE80211_IOC_STA_STATS: error = ieee80211_ioctl_getstastats(vap, ireq); break; case IEEE80211_IOC_TXPOWMAX: ireq->i_val = vap->iv_bss->ni_txpower; break; case IEEE80211_IOC_STA_TXPOW: error = ieee80211_ioctl_getstatxpow(vap, ireq); break; case IEEE80211_IOC_STA_INFO: error = ieee80211_ioctl_getstainfo(vap, ireq); break; case IEEE80211_IOC_WME_CWMIN: /* WME: CWmin */ case IEEE80211_IOC_WME_CWMAX: /* WME: CWmax */ case IEEE80211_IOC_WME_AIFS: /* WME: AIFS */ case IEEE80211_IOC_WME_TXOPLIMIT: /* WME: txops limit */ case IEEE80211_IOC_WME_ACM: /* WME: ACM (bss only) */ case IEEE80211_IOC_WME_ACKPOLICY: /* WME: ACK policy (!bss only) */ error = ieee80211_ioctl_getwmeparam(vap, ireq); break; case IEEE80211_IOC_DTIM_PERIOD: ireq->i_val = vap->iv_dtim_period; break; case IEEE80211_IOC_BEACON_INTERVAL: /* NB: get from ic_bss for station mode */ ireq->i_val = vap->iv_bss->ni_intval; break; case IEEE80211_IOC_PUREG: ireq->i_val = (vap->iv_flags & IEEE80211_F_PUREG) != 0; break; case IEEE80211_IOC_QUIET: ireq->i_val = vap->iv_quiet; break; case IEEE80211_IOC_QUIET_COUNT: ireq->i_val = vap->iv_quiet_count; break; case IEEE80211_IOC_QUIET_PERIOD: ireq->i_val = vap->iv_quiet_period; break; case IEEE80211_IOC_QUIET_DUR: ireq->i_val = vap->iv_quiet_duration; break; case IEEE80211_IOC_QUIET_OFFSET: ireq->i_val = vap->iv_quiet_offset; break; case IEEE80211_IOC_BGSCAN: ireq->i_val = (vap->iv_flags & IEEE80211_F_BGSCAN) != 0; break; case IEEE80211_IOC_BGSCAN_IDLE: ireq->i_val = vap->iv_bgscanidle*hz/1000; /* ms */ break; case IEEE80211_IOC_BGSCAN_INTERVAL: ireq->i_val = vap->iv_bgscanintvl/hz; /* seconds */ break; case IEEE80211_IOC_SCANVALID: ireq->i_val = vap->iv_scanvalid/hz; /* seconds */ break; case IEEE80211_IOC_FRAGTHRESHOLD: ireq->i_val = vap->iv_fragthreshold; break; case IEEE80211_IOC_MACCMD: error = ieee80211_ioctl_getmaccmd(vap, ireq); break; case IEEE80211_IOC_BURST: ireq->i_val = (vap->iv_flags & IEEE80211_F_BURST) != 0; break; case IEEE80211_IOC_BMISSTHRESHOLD: ireq->i_val = vap->iv_bmissthreshold; break; case IEEE80211_IOC_CURCHAN: error = ieee80211_ioctl_getcurchan(vap, ireq); break; case IEEE80211_IOC_SHORTGI: ireq->i_val = 0; if (vap->iv_flags_ht & IEEE80211_FHT_SHORTGI20) ireq->i_val |= IEEE80211_HTCAP_SHORTGI20; if (vap->iv_flags_ht & IEEE80211_FHT_SHORTGI40) ireq->i_val |= IEEE80211_HTCAP_SHORTGI40; break; case IEEE80211_IOC_AMPDU: ireq->i_val = 0; if (vap->iv_flags_ht & IEEE80211_FHT_AMPDU_TX) ireq->i_val |= 1; if (vap->iv_flags_ht & IEEE80211_FHT_AMPDU_RX) ireq->i_val |= 2; break; case IEEE80211_IOC_AMPDU_LIMIT: /* XXX TODO: make this a per-node thing; and leave this as global */ if (vap->iv_opmode == IEEE80211_M_HOSTAP) ireq->i_val = vap->iv_ampdu_rxmax; else if (vap->iv_state == IEEE80211_S_RUN || vap->iv_state == IEEE80211_S_SLEEP) /* * XXX TODO: this isn't completely correct, as we've * negotiated the higher of the two. */ ireq->i_val = MS(vap->iv_bss->ni_htparam, IEEE80211_HTCAP_MAXRXAMPDU); else ireq->i_val = vap->iv_ampdu_limit; break; case IEEE80211_IOC_AMPDU_DENSITY: /* XXX TODO: make this a per-node thing; and leave this as global */ if (vap->iv_opmode == IEEE80211_M_STA && (vap->iv_state == IEEE80211_S_RUN || vap->iv_state == IEEE80211_S_SLEEP)) /* * XXX TODO: this isn't completely correct, as we've * negotiated the higher of the two. */ ireq->i_val = MS(vap->iv_bss->ni_htparam, IEEE80211_HTCAP_MPDUDENSITY); else ireq->i_val = vap->iv_ampdu_density; break; case IEEE80211_IOC_AMSDU: ireq->i_val = 0; if (vap->iv_flags_ht & IEEE80211_FHT_AMSDU_TX) ireq->i_val |= 1; if (vap->iv_flags_ht & IEEE80211_FHT_AMSDU_RX) ireq->i_val |= 2; break; case IEEE80211_IOC_AMSDU_LIMIT: ireq->i_val = vap->iv_amsdu_limit; /* XXX truncation? */ break; case IEEE80211_IOC_PUREN: ireq->i_val = (vap->iv_flags_ht & IEEE80211_FHT_PUREN) != 0; break; case IEEE80211_IOC_DOTH: ireq->i_val = (vap->iv_flags & IEEE80211_F_DOTH) != 0; break; case IEEE80211_IOC_REGDOMAIN: error = ieee80211_ioctl_getregdomain(vap, ireq); break; case IEEE80211_IOC_ROAM: error = ieee80211_ioctl_getroam(vap, ireq); break; case IEEE80211_IOC_TXPARAMS: error = ieee80211_ioctl_gettxparams(vap, ireq); break; case IEEE80211_IOC_HTCOMPAT: ireq->i_val = (vap->iv_flags_ht & IEEE80211_FHT_HTCOMPAT) != 0; break; case IEEE80211_IOC_DWDS: ireq->i_val = (vap->iv_flags & IEEE80211_F_DWDS) != 0; break; case IEEE80211_IOC_INACTIVITY: ireq->i_val = (vap->iv_flags_ext & IEEE80211_FEXT_INACT) != 0; break; case IEEE80211_IOC_APPIE: error = ieee80211_ioctl_getappie(vap, ireq); break; case IEEE80211_IOC_WPS: ireq->i_val = (vap->iv_flags_ext & IEEE80211_FEXT_WPS) != 0; break; case IEEE80211_IOC_TSN: ireq->i_val = (vap->iv_flags_ext & IEEE80211_FEXT_TSN) != 0; break; case IEEE80211_IOC_DFS: ireq->i_val = (vap->iv_flags_ext & IEEE80211_FEXT_DFS) != 0; break; case IEEE80211_IOC_DOTD: ireq->i_val = (vap->iv_flags_ext & IEEE80211_FEXT_DOTD) != 0; break; case IEEE80211_IOC_DEVCAPS: error = ieee80211_ioctl_getdevcaps(ic, ireq); break; case IEEE80211_IOC_HTPROTMODE: ireq->i_val = ic->ic_htprotmode; break; case IEEE80211_IOC_HTCONF: if (vap->iv_flags_ht & IEEE80211_FHT_HT) { ireq->i_val = 1; if (vap->iv_flags_ht & IEEE80211_FHT_USEHT40) ireq->i_val |= 2; } else ireq->i_val = 0; break; case IEEE80211_IOC_STA_VLAN: error = ieee80211_ioctl_getstavlan(vap, ireq); break; case IEEE80211_IOC_SMPS: if (vap->iv_opmode == IEEE80211_M_STA && (vap->iv_state == IEEE80211_S_RUN || vap->iv_state == IEEE80211_S_SLEEP)) { if (vap->iv_bss->ni_flags & IEEE80211_NODE_MIMO_RTS) ireq->i_val = IEEE80211_HTCAP_SMPS_DYNAMIC; else if (vap->iv_bss->ni_flags & IEEE80211_NODE_MIMO_PS) ireq->i_val = IEEE80211_HTCAP_SMPS_ENA; else ireq->i_val = IEEE80211_HTCAP_SMPS_OFF; } else ireq->i_val = vap->iv_htcaps & IEEE80211_HTCAP_SMPS; break; case IEEE80211_IOC_RIFS: if (vap->iv_opmode == IEEE80211_M_STA && (vap->iv_state == IEEE80211_S_RUN || vap->iv_state == IEEE80211_S_SLEEP)) ireq->i_val = (vap->iv_bss->ni_flags & IEEE80211_NODE_RIFS) != 0; else ireq->i_val = (vap->iv_flags_ht & IEEE80211_FHT_RIFS) != 0; break; case IEEE80211_IOC_STBC: ireq->i_val = 0; if (vap->iv_flags_ht & IEEE80211_FHT_STBC_TX) ireq->i_val |= 1; if (vap->iv_flags_ht & IEEE80211_FHT_STBC_RX) ireq->i_val |= 2; break; case IEEE80211_IOC_LDPC: ireq->i_val = 0; if (vap->iv_flags_ht & IEEE80211_FHT_LDPC_TX) ireq->i_val |= 1; if (vap->iv_flags_ht & IEEE80211_FHT_LDPC_RX) ireq->i_val |= 2; break; /* VHT */ case IEEE80211_IOC_VHTCONF: ireq->i_val = 0; if (vap->iv_flags_vht & IEEE80211_FVHT_VHT) ireq->i_val |= 1; if (vap->iv_flags_vht & IEEE80211_FVHT_USEVHT40) ireq->i_val |= 2; if (vap->iv_flags_vht & IEEE80211_FVHT_USEVHT80) ireq->i_val |= 4; if (vap->iv_flags_vht & IEEE80211_FVHT_USEVHT80P80) ireq->i_val |= 8; if (vap->iv_flags_vht & IEEE80211_FVHT_USEVHT160) ireq->i_val |= 16; break; default: error = ieee80211_ioctl_getdefault(vap, ireq); break; } return error; #undef MS } static int ieee80211_ioctl_setkey(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211req_key ik; struct ieee80211_node *ni; struct ieee80211_key *wk; uint16_t kid; int error, i; if (ireq->i_len != sizeof(ik)) return EINVAL; error = copyin(ireq->i_data, &ik, sizeof(ik)); if (error) return error; /* NB: cipher support is verified by ieee80211_crypt_newkey */ /* NB: this also checks ik->ik_keylen > sizeof(wk->wk_key) */ if (ik.ik_keylen > sizeof(ik.ik_keydata)) return E2BIG; kid = ik.ik_keyix; if (kid == IEEE80211_KEYIX_NONE) { /* XXX unicast keys currently must be tx/rx */ if (ik.ik_flags != (IEEE80211_KEY_XMIT | IEEE80211_KEY_RECV)) return EINVAL; if (vap->iv_opmode == IEEE80211_M_STA) { ni = ieee80211_ref_node(vap->iv_bss); if (!IEEE80211_ADDR_EQ(ik.ik_macaddr, ni->ni_bssid)) { ieee80211_free_node(ni); return EADDRNOTAVAIL; } } else { ni = ieee80211_find_vap_node(&vap->iv_ic->ic_sta, vap, ik.ik_macaddr); if (ni == NULL) return ENOENT; } wk = &ni->ni_ucastkey; } else { if (kid >= IEEE80211_WEP_NKID) return EINVAL; wk = &vap->iv_nw_keys[kid]; /* * Global slots start off w/o any assigned key index. * Force one here for consistency with IEEE80211_IOC_WEPKEY. */ if (wk->wk_keyix == IEEE80211_KEYIX_NONE) wk->wk_keyix = kid; ni = NULL; } error = 0; ieee80211_key_update_begin(vap); if (ieee80211_crypto_newkey(vap, ik.ik_type, ik.ik_flags, wk)) { wk->wk_keylen = ik.ik_keylen; /* NB: MIC presence is implied by cipher type */ if (wk->wk_keylen > IEEE80211_KEYBUF_SIZE) wk->wk_keylen = IEEE80211_KEYBUF_SIZE; for (i = 0; i < IEEE80211_TID_SIZE; i++) wk->wk_keyrsc[i] = ik.ik_keyrsc; wk->wk_keytsc = 0; /* new key, reset */ memset(wk->wk_key, 0, sizeof(wk->wk_key)); memcpy(wk->wk_key, ik.ik_keydata, ik.ik_keylen); IEEE80211_ADDR_COPY(wk->wk_macaddr, ni != NULL ? ni->ni_macaddr : ik.ik_macaddr); if (!ieee80211_crypto_setkey(vap, wk)) error = EIO; else if ((ik.ik_flags & IEEE80211_KEY_DEFAULT)) /* * Inform the driver that this is the default * transmit key. Now, ideally we'd just set * a flag in the key update that would * say "yes, we're the default key", but * that currently isn't the way the ioctl -> * key interface works. */ ieee80211_crypto_set_deftxkey(vap, kid); } else error = ENXIO; ieee80211_key_update_end(vap); if (ni != NULL) ieee80211_free_node(ni); return error; } static int ieee80211_ioctl_delkey(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211req_del_key dk; int kid, error; if (ireq->i_len != sizeof(dk)) return EINVAL; error = copyin(ireq->i_data, &dk, sizeof(dk)); if (error) return error; kid = dk.idk_keyix; /* XXX uint8_t -> uint16_t */ if (dk.idk_keyix == (uint8_t) IEEE80211_KEYIX_NONE) { struct ieee80211_node *ni; if (vap->iv_opmode == IEEE80211_M_STA) { ni = ieee80211_ref_node(vap->iv_bss); if (!IEEE80211_ADDR_EQ(dk.idk_macaddr, ni->ni_bssid)) { ieee80211_free_node(ni); return EADDRNOTAVAIL; } } else { ni = ieee80211_find_vap_node(&vap->iv_ic->ic_sta, vap, dk.idk_macaddr); if (ni == NULL) return ENOENT; } /* XXX error return */ ieee80211_node_delucastkey(ni); ieee80211_free_node(ni); } else { if (kid >= IEEE80211_WEP_NKID) return EINVAL; /* XXX error return */ ieee80211_crypto_delkey(vap, &vap->iv_nw_keys[kid]); } return 0; } struct mlmeop { struct ieee80211vap *vap; int op; int reason; }; static void mlmedebug(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN], int op, int reason) { #ifdef IEEE80211_DEBUG static const struct { int mask; const char *opstr; } ops[] = { { 0, "op#0" }, { IEEE80211_MSG_IOCTL | IEEE80211_MSG_STATE | IEEE80211_MSG_ASSOC, "assoc" }, { IEEE80211_MSG_IOCTL | IEEE80211_MSG_STATE | IEEE80211_MSG_ASSOC, "disassoc" }, { IEEE80211_MSG_IOCTL | IEEE80211_MSG_STATE | IEEE80211_MSG_AUTH, "deauth" }, { IEEE80211_MSG_IOCTL | IEEE80211_MSG_STATE | IEEE80211_MSG_AUTH, "authorize" }, { IEEE80211_MSG_IOCTL | IEEE80211_MSG_STATE | IEEE80211_MSG_AUTH, "unauthorize" }, }; if (op == IEEE80211_MLME_AUTH) { IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_IOCTL | IEEE80211_MSG_STATE | IEEE80211_MSG_AUTH, mac, "station authenticate %s via MLME (reason: %d (%s))", reason == IEEE80211_STATUS_SUCCESS ? "ACCEPT" : "REJECT", reason, ieee80211_reason_to_string(reason)); } else if (!(IEEE80211_MLME_ASSOC <= op && op <= IEEE80211_MLME_AUTH)) { IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_ANY, mac, "unknown MLME request %d (reason: %d (%s))", op, reason, ieee80211_reason_to_string(reason)); } else if (reason == IEEE80211_STATUS_SUCCESS) { IEEE80211_NOTE_MAC(vap, ops[op].mask, mac, "station %s via MLME", ops[op].opstr); } else { IEEE80211_NOTE_MAC(vap, ops[op].mask, mac, "station %s via MLME (reason: %d (%s))", ops[op].opstr, reason, ieee80211_reason_to_string(reason)); } #endif /* IEEE80211_DEBUG */ } static void domlme(void *arg, struct ieee80211_node *ni) { struct mlmeop *mop = arg; struct ieee80211vap *vap = ni->ni_vap; if (vap != mop->vap) return; /* * NB: if ni_associd is zero then the node is already cleaned * up and we don't need to do this (we're safely holding a * reference but should otherwise not modify it's state). */ if (ni->ni_associd == 0) return; mlmedebug(vap, ni->ni_macaddr, mop->op, mop->reason); if (mop->op == IEEE80211_MLME_DEAUTH) { IEEE80211_SEND_MGMT(ni, IEEE80211_FC0_SUBTYPE_DEAUTH, mop->reason); } else { IEEE80211_SEND_MGMT(ni, IEEE80211_FC0_SUBTYPE_DISASSOC, mop->reason); } ieee80211_node_leave(ni); } static int setmlme_dropsta(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN], struct mlmeop *mlmeop) { struct ieee80211_node_table *nt = &vap->iv_ic->ic_sta; struct ieee80211_node *ni; int error = 0; /* NB: the broadcast address means do 'em all */ if (!IEEE80211_ADDR_EQ(mac, vap->iv_ifp->if_broadcastaddr)) { IEEE80211_NODE_LOCK(nt); ni = ieee80211_find_node_locked(nt, mac); IEEE80211_NODE_UNLOCK(nt); /* * Don't do the node update inside the node * table lock. This unfortunately causes LORs * with drivers and their TX paths. */ if (ni != NULL) { domlme(mlmeop, ni); ieee80211_free_node(ni); } else error = ENOENT; } else { ieee80211_iterate_nodes(nt, domlme, mlmeop); } return error; } static int setmlme_common(struct ieee80211vap *vap, int op, const uint8_t mac[IEEE80211_ADDR_LEN], int reason) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_node_table *nt = &ic->ic_sta; struct ieee80211_node *ni; struct mlmeop mlmeop; int error; error = 0; switch (op) { case IEEE80211_MLME_DISASSOC: case IEEE80211_MLME_DEAUTH: switch (vap->iv_opmode) { case IEEE80211_M_STA: mlmedebug(vap, vap->iv_bss->ni_macaddr, op, reason); /* XXX not quite right */ ieee80211_new_state(vap, IEEE80211_S_INIT, reason); break; case IEEE80211_M_HOSTAP: mlmeop.vap = vap; mlmeop.op = op; mlmeop.reason = reason; error = setmlme_dropsta(vap, mac, &mlmeop); break; case IEEE80211_M_WDS: /* XXX user app should send raw frame? */ if (op != IEEE80211_MLME_DEAUTH) { error = EINVAL; break; } #if 0 /* XXX accept any address, simplifies user code */ if (!IEEE80211_ADDR_EQ(mac, vap->iv_bss->ni_macaddr)) { error = EINVAL; break; } #endif mlmedebug(vap, vap->iv_bss->ni_macaddr, op, reason); ni = ieee80211_ref_node(vap->iv_bss); IEEE80211_SEND_MGMT(ni, IEEE80211_FC0_SUBTYPE_DEAUTH, reason); ieee80211_free_node(ni); break; case IEEE80211_M_MBSS: IEEE80211_NODE_LOCK(nt); ni = ieee80211_find_node_locked(nt, mac); /* * Don't do the node update inside the node * table lock. This unfortunately causes LORs * with drivers and their TX paths. */ IEEE80211_NODE_UNLOCK(nt); if (ni != NULL) { ieee80211_node_leave(ni); ieee80211_free_node(ni); } else { error = ENOENT; } break; default: error = EINVAL; break; } break; case IEEE80211_MLME_AUTHORIZE: case IEEE80211_MLME_UNAUTHORIZE: if (vap->iv_opmode != IEEE80211_M_HOSTAP && vap->iv_opmode != IEEE80211_M_WDS) { error = EINVAL; break; } IEEE80211_NODE_LOCK(nt); ni = ieee80211_find_vap_node_locked(nt, vap, mac); /* * Don't do the node update inside the node * table lock. This unfortunately causes LORs * with drivers and their TX paths. */ IEEE80211_NODE_UNLOCK(nt); if (ni != NULL) { mlmedebug(vap, mac, op, reason); if (op == IEEE80211_MLME_AUTHORIZE) ieee80211_node_authorize(ni); else ieee80211_node_unauthorize(ni); ieee80211_free_node(ni); } else error = ENOENT; break; case IEEE80211_MLME_AUTH: if (vap->iv_opmode != IEEE80211_M_HOSTAP) { error = EINVAL; break; } IEEE80211_NODE_LOCK(nt); ni = ieee80211_find_vap_node_locked(nt, vap, mac); /* * Don't do the node update inside the node * table lock. This unfortunately causes LORs * with drivers and their TX paths. */ IEEE80211_NODE_UNLOCK(nt); if (ni != NULL) { mlmedebug(vap, mac, op, reason); if (reason == IEEE80211_STATUS_SUCCESS) { IEEE80211_SEND_MGMT(ni, IEEE80211_FC0_SUBTYPE_AUTH, 2); /* * For shared key auth, just continue the * exchange. Otherwise when 802.1x is not in * use mark the port authorized at this point * so traffic can flow. */ if (ni->ni_authmode != IEEE80211_AUTH_8021X && ni->ni_challenge == NULL) ieee80211_node_authorize(ni); } else { vap->iv_stats.is_rx_acl++; ieee80211_send_error(ni, ni->ni_macaddr, IEEE80211_FC0_SUBTYPE_AUTH, 2|(reason<<16)); ieee80211_node_leave(ni); } ieee80211_free_node(ni); } else error = ENOENT; break; default: error = EINVAL; break; } return error; } struct scanlookup { const uint8_t *mac; int esslen; const uint8_t *essid; const struct ieee80211_scan_entry *se; }; /* * Match mac address and any ssid. */ static void mlmelookup(void *arg, const struct ieee80211_scan_entry *se) { struct scanlookup *look = arg; if (!IEEE80211_ADDR_EQ(look->mac, se->se_macaddr)) return; if (look->esslen != 0) { if (se->se_ssid[1] != look->esslen) return; if (memcmp(look->essid, se->se_ssid+2, look->esslen)) return; } look->se = se; } static int setmlme_assoc_sta(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN], int ssid_len, const uint8_t ssid[IEEE80211_NWID_LEN]) { struct scanlookup lookup; KASSERT(vap->iv_opmode == IEEE80211_M_STA, ("expected opmode STA not %s", ieee80211_opmode_name[vap->iv_opmode])); /* NB: this is racey if roaming is !manual */ lookup.se = NULL; lookup.mac = mac; lookup.esslen = ssid_len; lookup.essid = ssid; ieee80211_scan_iterate(vap, mlmelookup, &lookup); if (lookup.se == NULL) return ENOENT; mlmedebug(vap, mac, IEEE80211_MLME_ASSOC, 0); if (!ieee80211_sta_join(vap, lookup.se->se_chan, lookup.se)) return EIO; /* XXX unique but could be better */ return 0; } static int setmlme_assoc_adhoc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN], int ssid_len, const uint8_t ssid[IEEE80211_NWID_LEN]) { struct ieee80211_scan_req *sr; int error; KASSERT(vap->iv_opmode == IEEE80211_M_IBSS || vap->iv_opmode == IEEE80211_M_AHDEMO, ("expected opmode IBSS or AHDEMO not %s", ieee80211_opmode_name[vap->iv_opmode])); if (ssid_len == 0) return EINVAL; sr = IEEE80211_MALLOC(sizeof(*sr), M_TEMP, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO); if (sr == NULL) return ENOMEM; /* NB: IEEE80211_IOC_SSID call missing for ap_scan=2. */ memset(vap->iv_des_ssid[0].ssid, 0, IEEE80211_NWID_LEN); vap->iv_des_ssid[0].len = ssid_len; memcpy(vap->iv_des_ssid[0].ssid, ssid, ssid_len); vap->iv_des_nssid = 1; sr->sr_flags = IEEE80211_IOC_SCAN_ACTIVE | IEEE80211_IOC_SCAN_ONCE; sr->sr_duration = IEEE80211_IOC_SCAN_FOREVER; memcpy(sr->sr_ssid[0].ssid, ssid, ssid_len); sr->sr_ssid[0].len = ssid_len; sr->sr_nssid = 1; error = ieee80211_scanreq(vap, sr); IEEE80211_FREE(sr, M_TEMP); return error; } static int ieee80211_ioctl_setmlme(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211req_mlme mlme; int error; if (ireq->i_len != sizeof(mlme)) return EINVAL; error = copyin(ireq->i_data, &mlme, sizeof(mlme)); if (error) return error; if (vap->iv_opmode == IEEE80211_M_STA && mlme.im_op == IEEE80211_MLME_ASSOC) return setmlme_assoc_sta(vap, mlme.im_macaddr, vap->iv_des_ssid[0].len, vap->iv_des_ssid[0].ssid); else if ((vap->iv_opmode == IEEE80211_M_IBSS || vap->iv_opmode == IEEE80211_M_AHDEMO) && mlme.im_op == IEEE80211_MLME_ASSOC) return setmlme_assoc_adhoc(vap, mlme.im_macaddr, mlme.im_ssid_len, mlme.im_ssid); else return setmlme_common(vap, mlme.im_op, mlme.im_macaddr, mlme.im_reason); } static int ieee80211_ioctl_macmac(struct ieee80211vap *vap, struct ieee80211req *ireq) { uint8_t mac[IEEE80211_ADDR_LEN]; const struct ieee80211_aclator *acl = vap->iv_acl; int error; if (ireq->i_len != sizeof(mac)) return EINVAL; error = copyin(ireq->i_data, mac, ireq->i_len); if (error) return error; if (acl == NULL) { acl = ieee80211_aclator_get("mac"); if (acl == NULL || !acl->iac_attach(vap)) return EINVAL; vap->iv_acl = acl; } if (ireq->i_type == IEEE80211_IOC_ADDMAC) acl->iac_add(vap, mac); else acl->iac_remove(vap, mac); return 0; } static int ieee80211_ioctl_setmaccmd(struct ieee80211vap *vap, struct ieee80211req *ireq) { const struct ieee80211_aclator *acl = vap->iv_acl; switch (ireq->i_val) { case IEEE80211_MACCMD_POLICY_OPEN: case IEEE80211_MACCMD_POLICY_ALLOW: case IEEE80211_MACCMD_POLICY_DENY: case IEEE80211_MACCMD_POLICY_RADIUS: if (acl == NULL) { acl = ieee80211_aclator_get("mac"); if (acl == NULL || !acl->iac_attach(vap)) return EINVAL; vap->iv_acl = acl; } acl->iac_setpolicy(vap, ireq->i_val); break; case IEEE80211_MACCMD_FLUSH: if (acl != NULL) acl->iac_flush(vap); /* NB: silently ignore when not in use */ break; case IEEE80211_MACCMD_DETACH: if (acl != NULL) { vap->iv_acl = NULL; acl->iac_detach(vap); } break; default: if (acl == NULL) return EINVAL; else return acl->iac_setioctl(vap, ireq); } return 0; } static int ieee80211_ioctl_setchanlist(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; uint8_t *chanlist, *list; int i, nchan, maxchan, error; if (ireq->i_len > sizeof(ic->ic_chan_active)) ireq->i_len = sizeof(ic->ic_chan_active); list = IEEE80211_MALLOC(ireq->i_len + IEEE80211_CHAN_BYTES, M_TEMP, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO); if (list == NULL) return ENOMEM; error = copyin(ireq->i_data, list, ireq->i_len); if (error) { IEEE80211_FREE(list, M_TEMP); return error; } nchan = 0; chanlist = list + ireq->i_len; /* NB: zero'd already */ maxchan = ireq->i_len * NBBY; for (i = 0; i < ic->ic_nchans; i++) { const struct ieee80211_channel *c = &ic->ic_channels[i]; /* * Calculate the intersection of the user list and the * available channels so users can do things like specify * 1-255 to get all available channels. */ if (c->ic_ieee < maxchan && isset(list, c->ic_ieee)) { setbit(chanlist, c->ic_ieee); nchan++; } } if (nchan == 0) { IEEE80211_FREE(list, M_TEMP); return EINVAL; } if (ic->ic_bsschan != IEEE80211_CHAN_ANYC && /* XXX */ isclr(chanlist, ic->ic_bsschan->ic_ieee)) ic->ic_bsschan = IEEE80211_CHAN_ANYC; memcpy(ic->ic_chan_active, chanlist, IEEE80211_CHAN_BYTES); ieee80211_scan_flush(vap); IEEE80211_FREE(list, M_TEMP); return ENETRESET; } static int ieee80211_ioctl_setstastats(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211_node *ni; uint8_t macaddr[IEEE80211_ADDR_LEN]; int error; /* * NB: we could copyin ieee80211req_sta_stats so apps * could make selective changes but that's overkill; * just clear all stats for now. */ if (ireq->i_len < IEEE80211_ADDR_LEN) return EINVAL; error = copyin(ireq->i_data, macaddr, IEEE80211_ADDR_LEN); if (error != 0) return error; ni = ieee80211_find_vap_node(&vap->iv_ic->ic_sta, vap, macaddr); if (ni == NULL) return ENOENT; /* XXX require ni_vap == vap? */ memset(&ni->ni_stats, 0, sizeof(ni->ni_stats)); ieee80211_free_node(ni); return 0; } static int ieee80211_ioctl_setstatxpow(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211_node *ni; struct ieee80211req_sta_txpow txpow; int error; if (ireq->i_len != sizeof(txpow)) return EINVAL; error = copyin(ireq->i_data, &txpow, sizeof(txpow)); if (error != 0) return error; ni = ieee80211_find_vap_node(&vap->iv_ic->ic_sta, vap, txpow.it_macaddr); if (ni == NULL) return ENOENT; ni->ni_txpower = txpow.it_txpow; ieee80211_free_node(ni); return error; } static int ieee80211_ioctl_setwmeparam(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_wme_state *wme = &ic->ic_wme; struct wmeParams *wmep, *chanp; int isbss, ac, aggrmode; if ((ic->ic_caps & IEEE80211_C_WME) == 0) return EOPNOTSUPP; isbss = (ireq->i_len & IEEE80211_WMEPARAM_BSS); ac = (ireq->i_len & IEEE80211_WMEPARAM_VAL); aggrmode = (wme->wme_flags & WME_F_AGGRMODE); if (ac >= WME_NUM_AC) ac = WME_AC_BE; if (isbss) { chanp = &wme->wme_bssChanParams.cap_wmeParams[ac]; wmep = &wme->wme_wmeBssChanParams.cap_wmeParams[ac]; } else { chanp = &wme->wme_chanParams.cap_wmeParams[ac]; wmep = &wme->wme_wmeChanParams.cap_wmeParams[ac]; } switch (ireq->i_type) { case IEEE80211_IOC_WME_CWMIN: /* WME: CWmin */ wmep->wmep_logcwmin = ireq->i_val; if (!isbss || !aggrmode) chanp->wmep_logcwmin = ireq->i_val; break; case IEEE80211_IOC_WME_CWMAX: /* WME: CWmax */ wmep->wmep_logcwmax = ireq->i_val; if (!isbss || !aggrmode) chanp->wmep_logcwmax = ireq->i_val; break; case IEEE80211_IOC_WME_AIFS: /* WME: AIFS */ wmep->wmep_aifsn = ireq->i_val; if (!isbss || !aggrmode) chanp->wmep_aifsn = ireq->i_val; break; case IEEE80211_IOC_WME_TXOPLIMIT: /* WME: txops limit */ wmep->wmep_txopLimit = ireq->i_val; if (!isbss || !aggrmode) chanp->wmep_txopLimit = ireq->i_val; break; case IEEE80211_IOC_WME_ACM: /* WME: ACM (bss only) */ wmep->wmep_acm = ireq->i_val; if (!aggrmode) chanp->wmep_acm = ireq->i_val; break; case IEEE80211_IOC_WME_ACKPOLICY: /* WME: ACK policy (!bss only)*/ wmep->wmep_noackPolicy = chanp->wmep_noackPolicy = (ireq->i_val) == 0; break; } ieee80211_wme_updateparams(vap); return 0; } static int find11gchannel(struct ieee80211com *ic, int start, int freq) { const struct ieee80211_channel *c; int i; for (i = start+1; i < ic->ic_nchans; i++) { c = &ic->ic_channels[i]; if (c->ic_freq == freq && IEEE80211_IS_CHAN_ANYG(c)) return 1; } /* NB: should not be needed but in case things are mis-sorted */ for (i = 0; i < start; i++) { c = &ic->ic_channels[i]; if (c->ic_freq == freq && IEEE80211_IS_CHAN_ANYG(c)) return 1; } return 0; } static struct ieee80211_channel * findchannel(struct ieee80211com *ic, int ieee, int mode) { static const u_int chanflags[IEEE80211_MODE_MAX] = { [IEEE80211_MODE_AUTO] = 0, [IEEE80211_MODE_11A] = IEEE80211_CHAN_A, [IEEE80211_MODE_11B] = IEEE80211_CHAN_B, [IEEE80211_MODE_11G] = IEEE80211_CHAN_G, [IEEE80211_MODE_FH] = IEEE80211_CHAN_FHSS, [IEEE80211_MODE_TURBO_A] = IEEE80211_CHAN_108A, [IEEE80211_MODE_TURBO_G] = IEEE80211_CHAN_108G, [IEEE80211_MODE_STURBO_A] = IEEE80211_CHAN_STURBO, [IEEE80211_MODE_HALF] = IEEE80211_CHAN_HALF, [IEEE80211_MODE_QUARTER] = IEEE80211_CHAN_QUARTER, /* NB: handled specially below */ [IEEE80211_MODE_11NA] = IEEE80211_CHAN_A, [IEEE80211_MODE_11NG] = IEEE80211_CHAN_G, [IEEE80211_MODE_VHT_5GHZ] = IEEE80211_CHAN_A, [IEEE80211_MODE_VHT_2GHZ] = IEEE80211_CHAN_G, }; u_int modeflags; int i; modeflags = chanflags[mode]; for (i = 0; i < ic->ic_nchans; i++) { struct ieee80211_channel *c = &ic->ic_channels[i]; if (c->ic_ieee != ieee) continue; if (mode == IEEE80211_MODE_AUTO) { /* ignore turbo channels for autoselect */ if (IEEE80211_IS_CHAN_TURBO(c)) continue; /* * XXX special-case 11b/g channels so we * always select the g channel if both * are present. * XXX prefer HT to non-HT? */ if (!IEEE80211_IS_CHAN_B(c) || !find11gchannel(ic, i, c->ic_freq)) return c; } else { /* must check VHT specifically */ if ((mode == IEEE80211_MODE_VHT_5GHZ || mode == IEEE80211_MODE_VHT_2GHZ) && !IEEE80211_IS_CHAN_VHT(c)) continue; /* * Must check HT specially - only match on HT, * not HT+VHT channels */ if ((mode == IEEE80211_MODE_11NA || mode == IEEE80211_MODE_11NG) && !IEEE80211_IS_CHAN_HT(c)) continue; if ((mode == IEEE80211_MODE_11NA || mode == IEEE80211_MODE_11NG) && IEEE80211_IS_CHAN_VHT(c)) continue; /* Check that the modeflags above match */ if ((c->ic_flags & modeflags) == modeflags) return c; } } return NULL; } /* * Check the specified against any desired mode (aka netband). * This is only used (presently) when operating in hostap mode * to enforce consistency. */ static int check_mode_consistency(const struct ieee80211_channel *c, int mode) { KASSERT(c != IEEE80211_CHAN_ANYC, ("oops, no channel")); switch (mode) { case IEEE80211_MODE_11B: return (IEEE80211_IS_CHAN_B(c)); case IEEE80211_MODE_11G: return (IEEE80211_IS_CHAN_ANYG(c) && !IEEE80211_IS_CHAN_HT(c)); case IEEE80211_MODE_11A: return (IEEE80211_IS_CHAN_A(c) && !IEEE80211_IS_CHAN_HT(c)); case IEEE80211_MODE_STURBO_A: return (IEEE80211_IS_CHAN_STURBO(c)); case IEEE80211_MODE_11NA: return (IEEE80211_IS_CHAN_HTA(c)); case IEEE80211_MODE_11NG: return (IEEE80211_IS_CHAN_HTG(c)); } return 1; } /* * Common code to set the current channel. If the device * is up and running this may result in an immediate channel * change or a kick of the state machine. */ static int setcurchan(struct ieee80211vap *vap, struct ieee80211_channel *c) { struct ieee80211com *ic = vap->iv_ic; int error; if (c != IEEE80211_CHAN_ANYC) { if (IEEE80211_IS_CHAN_RADAR(c)) return EBUSY; /* XXX better code? */ if (vap->iv_opmode == IEEE80211_M_HOSTAP) { if (IEEE80211_IS_CHAN_NOHOSTAP(c)) return EINVAL; if (!check_mode_consistency(c, vap->iv_des_mode)) return EINVAL; } else if (vap->iv_opmode == IEEE80211_M_IBSS) { if (IEEE80211_IS_CHAN_NOADHOC(c)) return EINVAL; } if ((vap->iv_state == IEEE80211_S_RUN || vap->iv_state == IEEE80211_S_SLEEP) && vap->iv_bss->ni_chan == c) return 0; /* NB: nothing to do */ } vap->iv_des_chan = c; error = 0; if (vap->iv_opmode == IEEE80211_M_MONITOR && vap->iv_des_chan != IEEE80211_CHAN_ANYC) { /* * Monitor mode can switch directly. */ if (IFNET_IS_UP_RUNNING(vap->iv_ifp)) { /* XXX need state machine for other vap's to follow */ ieee80211_setcurchan(ic, vap->iv_des_chan); vap->iv_bss->ni_chan = ic->ic_curchan; } else { ic->ic_curchan = vap->iv_des_chan; ic->ic_rt = ieee80211_get_ratetable(ic->ic_curchan); } } else { /* * Need to go through the state machine in case we * need to reassociate or the like. The state machine * will pickup the desired channel and avoid scanning. */ if (IS_UP_AUTO(vap)) ieee80211_new_state(vap, IEEE80211_S_SCAN, 0); else if (vap->iv_des_chan != IEEE80211_CHAN_ANYC) { /* * When not up+running and a real channel has * been specified fix the current channel so * there is immediate feedback; e.g. via ifconfig. */ ic->ic_curchan = vap->iv_des_chan; ic->ic_rt = ieee80211_get_ratetable(ic->ic_curchan); } } return error; } /* * Old api for setting the current channel; this is * deprecated because channel numbers are ambiguous. */ static int ieee80211_ioctl_setchannel(struct ieee80211vap *vap, const struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_channel *c; /* XXX 0xffff overflows 16-bit signed */ if (ireq->i_val == 0 || ireq->i_val == (int16_t) IEEE80211_CHAN_ANY) { c = IEEE80211_CHAN_ANYC; } else { struct ieee80211_channel *c2; c = findchannel(ic, ireq->i_val, vap->iv_des_mode); if (c == NULL) { c = findchannel(ic, ireq->i_val, IEEE80211_MODE_AUTO); if (c == NULL) return EINVAL; } /* * Fine tune channel selection based on desired mode: * if 11b is requested, find the 11b version of any * 11g channel returned, * if static turbo, find the turbo version of any * 11a channel return, * if 11na is requested, find the ht version of any * 11a channel returned, * if 11ng is requested, find the ht version of any * 11g channel returned, * if 11ac is requested, find the 11ac version * of any 11a/11na channel returned, * (TBD) 11acg (2GHz VHT) * otherwise we should be ok with what we've got. */ switch (vap->iv_des_mode) { case IEEE80211_MODE_11B: if (IEEE80211_IS_CHAN_ANYG(c)) { c2 = findchannel(ic, ireq->i_val, IEEE80211_MODE_11B); /* NB: should not happen, =>'s 11g w/o 11b */ if (c2 != NULL) c = c2; } break; case IEEE80211_MODE_TURBO_A: if (IEEE80211_IS_CHAN_A(c)) { c2 = findchannel(ic, ireq->i_val, IEEE80211_MODE_TURBO_A); if (c2 != NULL) c = c2; } break; case IEEE80211_MODE_11NA: if (IEEE80211_IS_CHAN_A(c)) { c2 = findchannel(ic, ireq->i_val, IEEE80211_MODE_11NA); if (c2 != NULL) c = c2; } break; case IEEE80211_MODE_11NG: if (IEEE80211_IS_CHAN_ANYG(c)) { c2 = findchannel(ic, ireq->i_val, IEEE80211_MODE_11NG); if (c2 != NULL) c = c2; } break; case IEEE80211_MODE_VHT_2GHZ: printf("%s: TBD\n", __func__); break; case IEEE80211_MODE_VHT_5GHZ: if (IEEE80211_IS_CHAN_A(c)) { c2 = findchannel(ic, ireq->i_val, IEEE80211_MODE_VHT_5GHZ); if (c2 != NULL) c = c2; } break; default: /* NB: no static turboG */ break; } } return setcurchan(vap, c); } /* * New/current api for setting the current channel; a complete * channel description is provide so there is no ambiguity in * identifying the channel. */ static int ieee80211_ioctl_setcurchan(struct ieee80211vap *vap, const struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_channel chan, *c; int error; if (ireq->i_len != sizeof(chan)) return EINVAL; error = copyin(ireq->i_data, &chan, sizeof(chan)); if (error != 0) return error; /* XXX 0xffff overflows 16-bit signed */ if (chan.ic_freq == 0 || chan.ic_freq == IEEE80211_CHAN_ANY) { c = IEEE80211_CHAN_ANYC; } else { c = ieee80211_find_channel(ic, chan.ic_freq, chan.ic_flags); if (c == NULL) return EINVAL; } return setcurchan(vap, c); } static int ieee80211_ioctl_setregdomain(struct ieee80211vap *vap, const struct ieee80211req *ireq) { struct ieee80211_regdomain_req *reg; int nchans, error; nchans = 1 + ((ireq->i_len - sizeof(struct ieee80211_regdomain_req)) / sizeof(struct ieee80211_channel)); if (!(1 <= nchans && nchans <= IEEE80211_CHAN_MAX)) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_IOCTL, "%s: bad # chans, i_len %d nchans %d\n", __func__, ireq->i_len, nchans); return EINVAL; } reg = (struct ieee80211_regdomain_req *) IEEE80211_MALLOC(IEEE80211_REGDOMAIN_SIZE(nchans), M_TEMP, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO); if (reg == NULL) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_IOCTL, "%s: no memory, nchans %d\n", __func__, nchans); return ENOMEM; } error = copyin(ireq->i_data, reg, IEEE80211_REGDOMAIN_SIZE(nchans)); if (error == 0) { /* NB: validate inline channel count against storage size */ if (reg->chaninfo.ic_nchans != nchans) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_IOCTL, "%s: chan cnt mismatch, %d != %d\n", __func__, reg->chaninfo.ic_nchans, nchans); error = EINVAL; } else error = ieee80211_setregdomain(vap, reg); } IEEE80211_FREE(reg, M_TEMP); return (error == 0 ? ENETRESET : error); } static int ieee80211_ioctl_setroam(struct ieee80211vap *vap, const struct ieee80211req *ireq) { if (ireq->i_len != sizeof(vap->iv_roamparms)) return EINVAL; /* XXX validate params */ /* XXX? ENETRESET to push to device? */ return copyin(ireq->i_data, vap->iv_roamparms, sizeof(vap->iv_roamparms)); } static int checkrate(const struct ieee80211_rateset *rs, int rate) { int i; if (rate == IEEE80211_FIXED_RATE_NONE) return 1; for (i = 0; i < rs->rs_nrates; i++) if ((rs->rs_rates[i] & IEEE80211_RATE_VAL) == rate) return 1; return 0; } static int checkmcs(const struct ieee80211_htrateset *rs, int mcs) { int rate_val = IEEE80211_RV(mcs); int i; if (mcs == IEEE80211_FIXED_RATE_NONE) return 1; if ((mcs & IEEE80211_RATE_MCS) == 0) /* MCS always have 0x80 set */ return 0; for (i = 0; i < rs->rs_nrates; i++) if (IEEE80211_RV(rs->rs_rates[i]) == rate_val) return 1; return 0; } static int ieee80211_ioctl_settxparams(struct ieee80211vap *vap, const struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_txparams_req parms; /* XXX stack use? */ struct ieee80211_txparam *src, *dst; const struct ieee80211_htrateset *rs_ht; const struct ieee80211_rateset *rs; int error, mode, changed, is11n, nmodes; /* NB: accept short requests for backwards compat */ if (ireq->i_len > sizeof(parms)) return EINVAL; error = copyin(ireq->i_data, &parms, ireq->i_len); if (error != 0) return error; nmodes = ireq->i_len / sizeof(struct ieee80211_txparam); changed = 0; /* validate parameters and check if anything changed */ for (mode = IEEE80211_MODE_11A; mode < nmodes; mode++) { if (isclr(ic->ic_modecaps, mode)) continue; src = &parms.params[mode]; dst = &vap->iv_txparms[mode]; rs = &ic->ic_sup_rates[mode]; /* NB: 11n maps to legacy */ rs_ht = &ic->ic_sup_htrates; is11n = (mode == IEEE80211_MODE_11NA || mode == IEEE80211_MODE_11NG); if (src->ucastrate != dst->ucastrate) { if (!checkrate(rs, src->ucastrate) && (!is11n || !checkmcs(rs_ht, src->ucastrate))) return EINVAL; changed++; } if (src->mcastrate != dst->mcastrate) { if (!checkrate(rs, src->mcastrate) && (!is11n || !checkmcs(rs_ht, src->mcastrate))) return EINVAL; changed++; } if (src->mgmtrate != dst->mgmtrate) { if (!checkrate(rs, src->mgmtrate) && (!is11n || !checkmcs(rs_ht, src->mgmtrate))) return EINVAL; changed++; } if (src->maxretry != dst->maxretry) /* NB: no bounds */ changed++; } if (changed) { /* * Copy new parameters in place and notify the * driver so it can push state to the device. */ for (mode = IEEE80211_MODE_11A; mode < nmodes; mode++) { if (isset(ic->ic_modecaps, mode)) vap->iv_txparms[mode] = parms.params[mode]; } /* XXX could be more intelligent, e.g. don't reset if setting not being used */ return ENETRESET; } return 0; } /* * Application Information Element support. */ static int setappie(struct ieee80211_appie **aie, const struct ieee80211req *ireq) { struct ieee80211_appie *app = *aie; struct ieee80211_appie *napp; int error; if (ireq->i_len == 0) { /* delete any existing ie */ if (app != NULL) { *aie = NULL; /* XXX racey */ IEEE80211_FREE(app, M_80211_NODE_IE); } return 0; } if (!(2 <= ireq->i_len && ireq->i_len <= IEEE80211_MAX_APPIE)) return EINVAL; /* * Allocate a new appie structure and copy in the user data. * When done swap in the new structure. Note that we do not * guard against users holding a ref to the old structure; * this must be handled outside this code. * * XXX bad bad bad */ napp = (struct ieee80211_appie *) IEEE80211_MALLOC( sizeof(struct ieee80211_appie) + ireq->i_len, M_80211_NODE_IE, IEEE80211_M_NOWAIT); if (napp == NULL) return ENOMEM; /* XXX holding ic lock */ error = copyin(ireq->i_data, napp->ie_data, ireq->i_len); if (error) { IEEE80211_FREE(napp, M_80211_NODE_IE); return error; } napp->ie_len = ireq->i_len; *aie = napp; if (app != NULL) IEEE80211_FREE(app, M_80211_NODE_IE); return 0; } static void setwparsnie(struct ieee80211vap *vap, uint8_t *ie, int space) { /* validate data is present as best we can */ if (space == 0 || 2+ie[1] > space) return; if (ie[0] == IEEE80211_ELEMID_VENDOR) vap->iv_wpa_ie = ie; else if (ie[0] == IEEE80211_ELEMID_RSN) vap->iv_rsn_ie = ie; } static int ieee80211_ioctl_setappie_locked(struct ieee80211vap *vap, const struct ieee80211req *ireq, int fc0) { int error; IEEE80211_LOCK_ASSERT(vap->iv_ic); switch (fc0 & IEEE80211_FC0_SUBTYPE_MASK) { case IEEE80211_FC0_SUBTYPE_BEACON: if (vap->iv_opmode != IEEE80211_M_HOSTAP && vap->iv_opmode != IEEE80211_M_IBSS) { error = EINVAL; break; } error = setappie(&vap->iv_appie_beacon, ireq); if (error == 0) ieee80211_beacon_notify(vap, IEEE80211_BEACON_APPIE); break; case IEEE80211_FC0_SUBTYPE_PROBE_RESP: error = setappie(&vap->iv_appie_proberesp, ireq); break; case IEEE80211_FC0_SUBTYPE_ASSOC_RESP: if (vap->iv_opmode == IEEE80211_M_HOSTAP) error = setappie(&vap->iv_appie_assocresp, ireq); else error = EINVAL; break; case IEEE80211_FC0_SUBTYPE_PROBE_REQ: error = setappie(&vap->iv_appie_probereq, ireq); break; case IEEE80211_FC0_SUBTYPE_ASSOC_REQ: if (vap->iv_opmode == IEEE80211_M_STA) error = setappie(&vap->iv_appie_assocreq, ireq); else error = EINVAL; break; case (IEEE80211_APPIE_WPA & IEEE80211_FC0_SUBTYPE_MASK): error = setappie(&vap->iv_appie_wpa, ireq); if (error == 0) { /* * Must split single blob of data into separate * WPA and RSN ie's because they go in different * locations in the mgt frames. * XXX use IEEE80211_IOC_WPA2 so user code does split */ vap->iv_wpa_ie = NULL; vap->iv_rsn_ie = NULL; if (vap->iv_appie_wpa != NULL) { struct ieee80211_appie *appie = vap->iv_appie_wpa; uint8_t *data = appie->ie_data; /* XXX ie length validate is painful, cheat */ setwparsnie(vap, data, appie->ie_len); setwparsnie(vap, data + 2 + data[1], appie->ie_len - (2 + data[1])); } if (vap->iv_opmode == IEEE80211_M_HOSTAP || vap->iv_opmode == IEEE80211_M_IBSS) { /* * Must rebuild beacon frame as the update * mechanism doesn't handle WPA/RSN ie's. * Could extend it but it doesn't normally * change; this is just to deal with hostapd * plumbing the ie after the interface is up. */ error = ENETRESET; } } break; default: error = EINVAL; break; } return error; } static int ieee80211_ioctl_setappie(struct ieee80211vap *vap, const struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; int error; uint8_t fc0; fc0 = ireq->i_val & 0xff; if ((fc0 & IEEE80211_FC0_TYPE_MASK) != IEEE80211_FC0_TYPE_MGT) return EINVAL; /* NB: could check iv_opmode and reject but hardly worth the effort */ IEEE80211_LOCK(ic); error = ieee80211_ioctl_setappie_locked(vap, ireq, fc0); IEEE80211_UNLOCK(ic); return error; } static int ieee80211_ioctl_chanswitch(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_chanswitch_req csr; struct ieee80211_channel *c; int error; if (ireq->i_len != sizeof(csr)) return EINVAL; error = copyin(ireq->i_data, &csr, sizeof(csr)); if (error != 0) return error; /* XXX adhoc mode not supported */ if (vap->iv_opmode != IEEE80211_M_HOSTAP || (vap->iv_flags & IEEE80211_F_DOTH) == 0) return EOPNOTSUPP; c = ieee80211_find_channel(ic, csr.csa_chan.ic_freq, csr.csa_chan.ic_flags); if (c == NULL) return ENOENT; IEEE80211_LOCK(ic); if ((ic->ic_flags & IEEE80211_F_CSAPENDING) == 0) ieee80211_csa_startswitch(ic, c, csr.csa_mode, csr.csa_count); else if (csr.csa_count == 0) ieee80211_csa_cancelswitch(ic); else error = EBUSY; IEEE80211_UNLOCK(ic); return error; } static int ieee80211_scanreq(struct ieee80211vap *vap, struct ieee80211_scan_req *sr) { #define IEEE80211_IOC_SCAN_FLAGS \ (IEEE80211_IOC_SCAN_NOPICK | IEEE80211_IOC_SCAN_ACTIVE | \ IEEE80211_IOC_SCAN_PICK1ST | IEEE80211_IOC_SCAN_BGSCAN | \ IEEE80211_IOC_SCAN_ONCE | IEEE80211_IOC_SCAN_NOBCAST | \ IEEE80211_IOC_SCAN_NOJOIN | IEEE80211_IOC_SCAN_FLUSH | \ IEEE80211_IOC_SCAN_CHECK) struct ieee80211com *ic = vap->iv_ic; int error, i; /* convert duration */ if (sr->sr_duration == IEEE80211_IOC_SCAN_FOREVER) sr->sr_duration = IEEE80211_SCAN_FOREVER; else { if (sr->sr_duration < IEEE80211_IOC_SCAN_DURATION_MIN || sr->sr_duration > IEEE80211_IOC_SCAN_DURATION_MAX) return EINVAL; sr->sr_duration = msecs_to_ticks(sr->sr_duration); if (sr->sr_duration < 1) sr->sr_duration = 1; } /* convert min/max channel dwell */ if (sr->sr_mindwell != 0) { sr->sr_mindwell = msecs_to_ticks(sr->sr_mindwell); if (sr->sr_mindwell < 1) sr->sr_mindwell = 1; } if (sr->sr_maxdwell != 0) { sr->sr_maxdwell = msecs_to_ticks(sr->sr_maxdwell); if (sr->sr_maxdwell < 1) sr->sr_maxdwell = 1; } /* NB: silently reduce ssid count to what is supported */ if (sr->sr_nssid > IEEE80211_SCAN_MAX_SSID) sr->sr_nssid = IEEE80211_SCAN_MAX_SSID; for (i = 0; i < sr->sr_nssid; i++) if (sr->sr_ssid[i].len > IEEE80211_NWID_LEN) return EINVAL; /* cleanse flags just in case, could reject if invalid flags */ sr->sr_flags &= IEEE80211_IOC_SCAN_FLAGS; /* * Add an implicit NOPICK if the vap is not marked UP. This * allows applications to scan without joining a bss (or picking * a channel and setting up a bss) and without forcing manual * roaming mode--you just need to mark the parent device UP. */ if ((vap->iv_ifp->if_flags & IFF_UP) == 0) sr->sr_flags |= IEEE80211_IOC_SCAN_NOPICK; IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: flags 0x%x%s duration 0x%x mindwell %u maxdwell %u nssid %d\n", __func__, sr->sr_flags, (vap->iv_ifp->if_flags & IFF_UP) == 0 ? " (!IFF_UP)" : "", sr->sr_duration, sr->sr_mindwell, sr->sr_maxdwell, sr->sr_nssid); /* * If we are in INIT state then the driver has never had a chance * to setup hardware state to do a scan; we must use the state * machine to get us up to the SCAN state but once we reach SCAN * state we then want to use the supplied params. Stash the * parameters in the vap and mark IEEE80211_FEXT_SCANREQ; the * state machines will recognize this and use the stashed params * to issue the scan request. * * Otherwise just invoke the scan machinery directly. */ IEEE80211_LOCK(ic); if (ic->ic_nrunning == 0) { IEEE80211_UNLOCK(ic); return ENXIO; } if (vap->iv_state == IEEE80211_S_INIT) { /* NB: clobbers previous settings */ vap->iv_scanreq_flags = sr->sr_flags; vap->iv_scanreq_duration = sr->sr_duration; vap->iv_scanreq_nssid = sr->sr_nssid; for (i = 0; i < sr->sr_nssid; i++) { vap->iv_scanreq_ssid[i].len = sr->sr_ssid[i].len; memcpy(vap->iv_scanreq_ssid[i].ssid, sr->sr_ssid[i].ssid, sr->sr_ssid[i].len); } vap->iv_flags_ext |= IEEE80211_FEXT_SCANREQ; IEEE80211_UNLOCK(ic); ieee80211_new_state(vap, IEEE80211_S_SCAN, 0); } else { vap->iv_flags_ext &= ~IEEE80211_FEXT_SCANREQ; IEEE80211_UNLOCK(ic); if (sr->sr_flags & IEEE80211_IOC_SCAN_CHECK) { error = ieee80211_check_scan(vap, sr->sr_flags, sr->sr_duration, sr->sr_mindwell, sr->sr_maxdwell, sr->sr_nssid, /* NB: cheat, we assume structures are compatible */ (const struct ieee80211_scan_ssid *) &sr->sr_ssid[0]); } else { error = ieee80211_start_scan(vap, sr->sr_flags, sr->sr_duration, sr->sr_mindwell, sr->sr_maxdwell, sr->sr_nssid, /* NB: cheat, we assume structures are compatible */ (const struct ieee80211_scan_ssid *) &sr->sr_ssid[0]); } if (error == 0) return EINPROGRESS; } return 0; #undef IEEE80211_IOC_SCAN_FLAGS } static int ieee80211_ioctl_scanreq(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211_scan_req *sr; int error; if (ireq->i_len != sizeof(*sr)) return EINVAL; sr = IEEE80211_MALLOC(sizeof(*sr), M_TEMP, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO); if (sr == NULL) return ENOMEM; error = copyin(ireq->i_data, sr, sizeof(*sr)); if (error != 0) goto bad; error = ieee80211_scanreq(vap, sr); bad: IEEE80211_FREE(sr, M_TEMP); return error; } static int ieee80211_ioctl_setstavlan(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211_node *ni; struct ieee80211req_sta_vlan vlan; int error; if (ireq->i_len != sizeof(vlan)) return EINVAL; error = copyin(ireq->i_data, &vlan, sizeof(vlan)); if (error != 0) return error; if (!IEEE80211_ADDR_EQ(vlan.sv_macaddr, zerobssid)) { ni = ieee80211_find_vap_node(&vap->iv_ic->ic_sta, vap, vlan.sv_macaddr); if (ni == NULL) return ENOENT; } else ni = ieee80211_ref_node(vap->iv_bss); ni->ni_vlan = vlan.sv_vlan; ieee80211_free_node(ni); return error; } static int isvap11g(const struct ieee80211vap *vap) { const struct ieee80211_node *bss = vap->iv_bss; return bss->ni_chan != IEEE80211_CHAN_ANYC && IEEE80211_IS_CHAN_ANYG(bss->ni_chan); } static int isvapht(const struct ieee80211vap *vap) { const struct ieee80211_node *bss = vap->iv_bss; return bss->ni_chan != IEEE80211_CHAN_ANYC && IEEE80211_IS_CHAN_HT(bss->ni_chan); } /* * Dummy ioctl set handler so the linker set is defined. */ static int dummy_ioctl_set(struct ieee80211vap *vap, struct ieee80211req *ireq) { return ENOSYS; } IEEE80211_IOCTL_SET(dummy, dummy_ioctl_set); static int ieee80211_ioctl_setdefault(struct ieee80211vap *vap, struct ieee80211req *ireq) { ieee80211_ioctl_setfunc * const *set; int error; SET_FOREACH(set, ieee80211_ioctl_setset) { error = (*set)(vap, ireq); if (error != ENOSYS) return error; } return EINVAL; } static int ieee80211_ioctl_set80211(struct ieee80211vap *vap, u_long cmd, struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; int error; const struct ieee80211_authenticator *auth; uint8_t tmpkey[IEEE80211_KEYBUF_SIZE]; char tmpssid[IEEE80211_NWID_LEN]; uint8_t tmpbssid[IEEE80211_ADDR_LEN]; struct ieee80211_key *k; u_int kid; uint32_t flags; error = 0; switch (ireq->i_type) { case IEEE80211_IOC_SSID: if (ireq->i_val != 0 || ireq->i_len > IEEE80211_NWID_LEN) return EINVAL; error = copyin(ireq->i_data, tmpssid, ireq->i_len); if (error) break; memset(vap->iv_des_ssid[0].ssid, 0, IEEE80211_NWID_LEN); vap->iv_des_ssid[0].len = ireq->i_len; memcpy(vap->iv_des_ssid[0].ssid, tmpssid, ireq->i_len); vap->iv_des_nssid = (ireq->i_len > 0); error = ENETRESET; break; case IEEE80211_IOC_WEP: switch (ireq->i_val) { case IEEE80211_WEP_OFF: vap->iv_flags &= ~IEEE80211_F_PRIVACY; vap->iv_flags &= ~IEEE80211_F_DROPUNENC; break; case IEEE80211_WEP_ON: vap->iv_flags |= IEEE80211_F_PRIVACY; vap->iv_flags |= IEEE80211_F_DROPUNENC; break; case IEEE80211_WEP_MIXED: vap->iv_flags |= IEEE80211_F_PRIVACY; vap->iv_flags &= ~IEEE80211_F_DROPUNENC; break; } error = ENETRESET; break; case IEEE80211_IOC_WEPKEY: kid = (u_int) ireq->i_val; if (kid >= IEEE80211_WEP_NKID) return EINVAL; k = &vap->iv_nw_keys[kid]; if (ireq->i_len == 0) { /* zero-len =>'s delete any existing key */ (void) ieee80211_crypto_delkey(vap, k); break; } if (ireq->i_len > sizeof(tmpkey)) return EINVAL; memset(tmpkey, 0, sizeof(tmpkey)); error = copyin(ireq->i_data, tmpkey, ireq->i_len); if (error) break; ieee80211_key_update_begin(vap); k->wk_keyix = kid; /* NB: force fixed key id */ if (ieee80211_crypto_newkey(vap, IEEE80211_CIPHER_WEP, IEEE80211_KEY_XMIT | IEEE80211_KEY_RECV, k)) { k->wk_keylen = ireq->i_len; memcpy(k->wk_key, tmpkey, sizeof(tmpkey)); IEEE80211_ADDR_COPY(k->wk_macaddr, vap->iv_myaddr); if (!ieee80211_crypto_setkey(vap, k)) error = EINVAL; } else error = EINVAL; ieee80211_key_update_end(vap); break; case IEEE80211_IOC_WEPTXKEY: kid = (u_int) ireq->i_val; if (kid >= IEEE80211_WEP_NKID && (uint16_t) kid != IEEE80211_KEYIX_NONE) return EINVAL; /* * Firmware devices may need to be told about an explicit * key index here, versus just inferring it from the * key set / change. Since we may also need to pause * things like transmit before the key is updated, * give the driver a chance to flush things by tying * into key update begin/end. */ ieee80211_key_update_begin(vap); ieee80211_crypto_set_deftxkey(vap, kid); ieee80211_key_update_end(vap); break; case IEEE80211_IOC_AUTHMODE: switch (ireq->i_val) { case IEEE80211_AUTH_WPA: case IEEE80211_AUTH_8021X: /* 802.1x */ case IEEE80211_AUTH_OPEN: /* open */ case IEEE80211_AUTH_SHARED: /* shared-key */ case IEEE80211_AUTH_AUTO: /* auto */ auth = ieee80211_authenticator_get(ireq->i_val); if (auth == NULL) return EINVAL; break; default: return EINVAL; } switch (ireq->i_val) { case IEEE80211_AUTH_WPA: /* WPA w/ 802.1x */ vap->iv_flags |= IEEE80211_F_PRIVACY; ireq->i_val = IEEE80211_AUTH_8021X; break; case IEEE80211_AUTH_OPEN: /* open */ vap->iv_flags &= ~(IEEE80211_F_WPA|IEEE80211_F_PRIVACY); break; case IEEE80211_AUTH_SHARED: /* shared-key */ case IEEE80211_AUTH_8021X: /* 802.1x */ vap->iv_flags &= ~IEEE80211_F_WPA; /* both require a key so mark the PRIVACY capability */ vap->iv_flags |= IEEE80211_F_PRIVACY; break; case IEEE80211_AUTH_AUTO: /* auto */ vap->iv_flags &= ~IEEE80211_F_WPA; /* XXX PRIVACY handling? */ /* XXX what's the right way to do this? */ break; } /* NB: authenticator attach/detach happens on state change */ vap->iv_bss->ni_authmode = ireq->i_val; /* XXX mixed/mode/usage? */ vap->iv_auth = auth; error = ENETRESET; break; case IEEE80211_IOC_CHANNEL: error = ieee80211_ioctl_setchannel(vap, ireq); break; case IEEE80211_IOC_POWERSAVE: switch (ireq->i_val) { case IEEE80211_POWERSAVE_OFF: if (vap->iv_flags & IEEE80211_F_PMGTON) { ieee80211_syncflag(vap, -IEEE80211_F_PMGTON); error = ERESTART; } break; case IEEE80211_POWERSAVE_ON: if ((vap->iv_caps & IEEE80211_C_PMGT) == 0) error = EOPNOTSUPP; else if ((vap->iv_flags & IEEE80211_F_PMGTON) == 0) { ieee80211_syncflag(vap, IEEE80211_F_PMGTON); error = ERESTART; } break; default: error = EINVAL; break; } break; case IEEE80211_IOC_POWERSAVESLEEP: if (ireq->i_val < 0) return EINVAL; ic->ic_lintval = ireq->i_val; error = ERESTART; break; case IEEE80211_IOC_RTSTHRESHOLD: if (!(IEEE80211_RTS_MIN <= ireq->i_val && ireq->i_val <= IEEE80211_RTS_MAX)) return EINVAL; vap->iv_rtsthreshold = ireq->i_val; error = ERESTART; break; case IEEE80211_IOC_PROTMODE: if (ireq->i_val > IEEE80211_PROT_RTSCTS) return EINVAL; ic->ic_protmode = (enum ieee80211_protmode)ireq->i_val; /* NB: if not operating in 11g this can wait */ if (ic->ic_bsschan != IEEE80211_CHAN_ANYC && IEEE80211_IS_CHAN_ANYG(ic->ic_bsschan)) error = ERESTART; break; case IEEE80211_IOC_TXPOWER: if ((ic->ic_caps & IEEE80211_C_TXPMGT) == 0) return EOPNOTSUPP; if (!(IEEE80211_TXPOWER_MIN <= ireq->i_val && ireq->i_val <= IEEE80211_TXPOWER_MAX)) return EINVAL; ic->ic_txpowlimit = ireq->i_val; error = ERESTART; break; case IEEE80211_IOC_ROAMING: if (!(IEEE80211_ROAMING_DEVICE <= ireq->i_val && ireq->i_val <= IEEE80211_ROAMING_MANUAL)) return EINVAL; vap->iv_roaming = (enum ieee80211_roamingmode)ireq->i_val; /* XXXX reset? */ break; case IEEE80211_IOC_PRIVACY: if (ireq->i_val) { /* XXX check for key state? */ vap->iv_flags |= IEEE80211_F_PRIVACY; } else vap->iv_flags &= ~IEEE80211_F_PRIVACY; /* XXX ERESTART? */ break; case IEEE80211_IOC_DROPUNENCRYPTED: if (ireq->i_val) vap->iv_flags |= IEEE80211_F_DROPUNENC; else vap->iv_flags &= ~IEEE80211_F_DROPUNENC; /* XXX ERESTART? */ break; case IEEE80211_IOC_WPAKEY: error = ieee80211_ioctl_setkey(vap, ireq); break; case IEEE80211_IOC_DELKEY: error = ieee80211_ioctl_delkey(vap, ireq); break; case IEEE80211_IOC_MLME: error = ieee80211_ioctl_setmlme(vap, ireq); break; case IEEE80211_IOC_COUNTERMEASURES: if (ireq->i_val) { if ((vap->iv_flags & IEEE80211_F_WPA) == 0) return EOPNOTSUPP; vap->iv_flags |= IEEE80211_F_COUNTERM; } else vap->iv_flags &= ~IEEE80211_F_COUNTERM; /* XXX ERESTART? */ break; case IEEE80211_IOC_WPA: if (ireq->i_val > 3) return EINVAL; /* XXX verify ciphers available */ flags = vap->iv_flags & ~IEEE80211_F_WPA; switch (ireq->i_val) { case 0: /* wpa_supplicant calls this to clear the WPA config */ break; case 1: if (!(vap->iv_caps & IEEE80211_C_WPA1)) return EOPNOTSUPP; flags |= IEEE80211_F_WPA1; break; case 2: if (!(vap->iv_caps & IEEE80211_C_WPA2)) return EOPNOTSUPP; flags |= IEEE80211_F_WPA2; break; case 3: if ((vap->iv_caps & IEEE80211_C_WPA) != IEEE80211_C_WPA) return EOPNOTSUPP; flags |= IEEE80211_F_WPA1 | IEEE80211_F_WPA2; break; default: /* Can't set any -> error */ return EOPNOTSUPP; } vap->iv_flags = flags; error = ERESTART; /* NB: can change beacon frame */ break; case IEEE80211_IOC_WME: if (ireq->i_val) { if ((vap->iv_caps & IEEE80211_C_WME) == 0) return EOPNOTSUPP; ieee80211_syncflag(vap, IEEE80211_F_WME); } else ieee80211_syncflag(vap, -IEEE80211_F_WME); error = ERESTART; /* NB: can change beacon frame */ break; case IEEE80211_IOC_HIDESSID: if (ireq->i_val) vap->iv_flags |= IEEE80211_F_HIDESSID; else vap->iv_flags &= ~IEEE80211_F_HIDESSID; error = ERESTART; /* XXX ENETRESET? */ break; case IEEE80211_IOC_APBRIDGE: if (ireq->i_val == 0) vap->iv_flags |= IEEE80211_F_NOBRIDGE; else vap->iv_flags &= ~IEEE80211_F_NOBRIDGE; break; case IEEE80211_IOC_BSSID: if (ireq->i_len != sizeof(tmpbssid)) return EINVAL; error = copyin(ireq->i_data, tmpbssid, ireq->i_len); if (error) break; IEEE80211_ADDR_COPY(vap->iv_des_bssid, tmpbssid); if (IEEE80211_ADDR_EQ(vap->iv_des_bssid, zerobssid)) vap->iv_flags &= ~IEEE80211_F_DESBSSID; else vap->iv_flags |= IEEE80211_F_DESBSSID; error = ENETRESET; break; case IEEE80211_IOC_CHANLIST: error = ieee80211_ioctl_setchanlist(vap, ireq); break; #define OLD_IEEE80211_IOC_SCAN_REQ 23 #ifdef OLD_IEEE80211_IOC_SCAN_REQ case OLD_IEEE80211_IOC_SCAN_REQ: IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: active scan request\n", __func__); /* * If we are in INIT state then the driver has never * had a chance to setup hardware state to do a scan; * use the state machine to get us up the SCAN state. * Otherwise just invoke the scan machinery to start * a one-time scan. */ if (vap->iv_state == IEEE80211_S_INIT) ieee80211_new_state(vap, IEEE80211_S_SCAN, 0); else (void) ieee80211_start_scan(vap, IEEE80211_SCAN_ACTIVE | IEEE80211_SCAN_NOPICK | IEEE80211_SCAN_ONCE, IEEE80211_SCAN_FOREVER, 0, 0, /* XXX use ioctl params */ vap->iv_des_nssid, vap->iv_des_ssid); break; #endif /* OLD_IEEE80211_IOC_SCAN_REQ */ case IEEE80211_IOC_SCAN_REQ: error = ieee80211_ioctl_scanreq(vap, ireq); break; case IEEE80211_IOC_SCAN_CANCEL: IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: cancel scan\n", __func__); ieee80211_cancel_scan(vap); break; case IEEE80211_IOC_HTCONF: if (ireq->i_val & 1) ieee80211_syncflag_ht(vap, IEEE80211_FHT_HT); else ieee80211_syncflag_ht(vap, -IEEE80211_FHT_HT); if (ireq->i_val & 2) ieee80211_syncflag_ht(vap, IEEE80211_FHT_USEHT40); else ieee80211_syncflag_ht(vap, -IEEE80211_FHT_USEHT40); error = ENETRESET; break; case IEEE80211_IOC_ADDMAC: case IEEE80211_IOC_DELMAC: error = ieee80211_ioctl_macmac(vap, ireq); break; case IEEE80211_IOC_MACCMD: error = ieee80211_ioctl_setmaccmd(vap, ireq); break; case IEEE80211_IOC_STA_STATS: error = ieee80211_ioctl_setstastats(vap, ireq); break; case IEEE80211_IOC_STA_TXPOW: error = ieee80211_ioctl_setstatxpow(vap, ireq); break; case IEEE80211_IOC_WME_CWMIN: /* WME: CWmin */ case IEEE80211_IOC_WME_CWMAX: /* WME: CWmax */ case IEEE80211_IOC_WME_AIFS: /* WME: AIFS */ case IEEE80211_IOC_WME_TXOPLIMIT: /* WME: txops limit */ case IEEE80211_IOC_WME_ACM: /* WME: ACM (bss only) */ case IEEE80211_IOC_WME_ACKPOLICY: /* WME: ACK policy (!bss only) */ error = ieee80211_ioctl_setwmeparam(vap, ireq); break; case IEEE80211_IOC_DTIM_PERIOD: if (vap->iv_opmode != IEEE80211_M_HOSTAP && vap->iv_opmode != IEEE80211_M_MBSS && vap->iv_opmode != IEEE80211_M_IBSS) return EINVAL; if (IEEE80211_DTIM_MIN <= ireq->i_val && ireq->i_val <= IEEE80211_DTIM_MAX) { vap->iv_dtim_period = ireq->i_val; error = ENETRESET; /* requires restart */ } else error = EINVAL; break; case IEEE80211_IOC_BEACON_INTERVAL: if (vap->iv_opmode != IEEE80211_M_HOSTAP && vap->iv_opmode != IEEE80211_M_MBSS && vap->iv_opmode != IEEE80211_M_IBSS) return EINVAL; if (IEEE80211_BINTVAL_MIN <= ireq->i_val && ireq->i_val <= IEEE80211_BINTVAL_MAX) { ic->ic_bintval = ireq->i_val; error = ENETRESET; /* requires restart */ } else error = EINVAL; break; case IEEE80211_IOC_PUREG: if (ireq->i_val) vap->iv_flags |= IEEE80211_F_PUREG; else vap->iv_flags &= ~IEEE80211_F_PUREG; /* NB: reset only if we're operating on an 11g channel */ if (isvap11g(vap)) error = ENETRESET; break; case IEEE80211_IOC_QUIET: vap->iv_quiet= ireq->i_val; break; case IEEE80211_IOC_QUIET_COUNT: vap->iv_quiet_count=ireq->i_val; break; case IEEE80211_IOC_QUIET_PERIOD: vap->iv_quiet_period=ireq->i_val; break; case IEEE80211_IOC_QUIET_OFFSET: vap->iv_quiet_offset=ireq->i_val; break; case IEEE80211_IOC_QUIET_DUR: if(ireq->i_val < vap->iv_bss->ni_intval) vap->iv_quiet_duration = ireq->i_val; else error = EINVAL; break; case IEEE80211_IOC_BGSCAN: if (ireq->i_val) { if ((vap->iv_caps & IEEE80211_C_BGSCAN) == 0) return EOPNOTSUPP; vap->iv_flags |= IEEE80211_F_BGSCAN; } else vap->iv_flags &= ~IEEE80211_F_BGSCAN; break; case IEEE80211_IOC_BGSCAN_IDLE: if (ireq->i_val >= IEEE80211_BGSCAN_IDLE_MIN) vap->iv_bgscanidle = ireq->i_val*hz/1000; else error = EINVAL; break; case IEEE80211_IOC_BGSCAN_INTERVAL: if (ireq->i_val >= IEEE80211_BGSCAN_INTVAL_MIN) vap->iv_bgscanintvl = ireq->i_val*hz; else error = EINVAL; break; case IEEE80211_IOC_SCANVALID: if (ireq->i_val >= IEEE80211_SCAN_VALID_MIN) vap->iv_scanvalid = ireq->i_val*hz; else error = EINVAL; break; case IEEE80211_IOC_FRAGTHRESHOLD: if ((vap->iv_caps & IEEE80211_C_TXFRAG) == 0 && ireq->i_val != IEEE80211_FRAG_MAX) return EOPNOTSUPP; if (!(IEEE80211_FRAG_MIN <= ireq->i_val && ireq->i_val <= IEEE80211_FRAG_MAX)) return EINVAL; vap->iv_fragthreshold = ireq->i_val; error = ERESTART; break; case IEEE80211_IOC_BURST: if (ireq->i_val) { if ((vap->iv_caps & IEEE80211_C_BURST) == 0) return EOPNOTSUPP; ieee80211_syncflag(vap, IEEE80211_F_BURST); } else ieee80211_syncflag(vap, -IEEE80211_F_BURST); error = ERESTART; break; case IEEE80211_IOC_BMISSTHRESHOLD: if (!(IEEE80211_HWBMISS_MIN <= ireq->i_val && ireq->i_val <= IEEE80211_HWBMISS_MAX)) return EINVAL; vap->iv_bmissthreshold = ireq->i_val; error = ERESTART; break; case IEEE80211_IOC_CURCHAN: error = ieee80211_ioctl_setcurchan(vap, ireq); break; case IEEE80211_IOC_SHORTGI: if (ireq->i_val) { #define IEEE80211_HTCAP_SHORTGI \ (IEEE80211_HTCAP_SHORTGI20 | IEEE80211_HTCAP_SHORTGI40) if (((ireq->i_val ^ vap->iv_htcaps) & IEEE80211_HTCAP_SHORTGI) != 0) return EINVAL; if (ireq->i_val & IEEE80211_HTCAP_SHORTGI20) vap->iv_flags_ht |= IEEE80211_FHT_SHORTGI20; if (ireq->i_val & IEEE80211_HTCAP_SHORTGI40) vap->iv_flags_ht |= IEEE80211_FHT_SHORTGI40; #undef IEEE80211_HTCAP_SHORTGI } else vap->iv_flags_ht &= ~(IEEE80211_FHT_SHORTGI20 | IEEE80211_FHT_SHORTGI40); error = ERESTART; break; case IEEE80211_IOC_AMPDU: if (ireq->i_val && (vap->iv_htcaps & IEEE80211_HTC_AMPDU) == 0) return EINVAL; if (ireq->i_val & 1) vap->iv_flags_ht |= IEEE80211_FHT_AMPDU_TX; else vap->iv_flags_ht &= ~IEEE80211_FHT_AMPDU_TX; if (ireq->i_val & 2) vap->iv_flags_ht |= IEEE80211_FHT_AMPDU_RX; else vap->iv_flags_ht &= ~IEEE80211_FHT_AMPDU_RX; /* NB: reset only if we're operating on an 11n channel */ if (isvapht(vap)) error = ERESTART; break; case IEEE80211_IOC_AMPDU_LIMIT: /* XXX TODO: figure out ampdu_limit versus ampdu_rxmax */ if (!(IEEE80211_HTCAP_MAXRXAMPDU_8K <= ireq->i_val && ireq->i_val <= IEEE80211_HTCAP_MAXRXAMPDU_64K)) return EINVAL; if (vap->iv_opmode == IEEE80211_M_HOSTAP) vap->iv_ampdu_rxmax = ireq->i_val; else vap->iv_ampdu_limit = ireq->i_val; error = ERESTART; break; case IEEE80211_IOC_AMPDU_DENSITY: if (!(IEEE80211_HTCAP_MPDUDENSITY_NA <= ireq->i_val && ireq->i_val <= IEEE80211_HTCAP_MPDUDENSITY_16)) return EINVAL; vap->iv_ampdu_density = ireq->i_val; error = ERESTART; break; case IEEE80211_IOC_AMSDU: if (ireq->i_val && (vap->iv_htcaps & IEEE80211_HTC_AMSDU) == 0) return EINVAL; if (ireq->i_val & 1) vap->iv_flags_ht |= IEEE80211_FHT_AMSDU_TX; else vap->iv_flags_ht &= ~IEEE80211_FHT_AMSDU_TX; if (ireq->i_val & 2) vap->iv_flags_ht |= IEEE80211_FHT_AMSDU_RX; else vap->iv_flags_ht &= ~IEEE80211_FHT_AMSDU_RX; /* NB: reset only if we're operating on an 11n channel */ if (isvapht(vap)) error = ERESTART; break; case IEEE80211_IOC_AMSDU_LIMIT: /* XXX validate */ vap->iv_amsdu_limit = ireq->i_val; /* XXX truncation? */ break; case IEEE80211_IOC_PUREN: if (ireq->i_val) { if ((vap->iv_flags_ht & IEEE80211_FHT_HT) == 0) return EINVAL; vap->iv_flags_ht |= IEEE80211_FHT_PUREN; } else vap->iv_flags_ht &= ~IEEE80211_FHT_PUREN; /* NB: reset only if we're operating on an 11n channel */ if (isvapht(vap)) error = ERESTART; break; case IEEE80211_IOC_DOTH: if (ireq->i_val) { #if 0 /* XXX no capability */ if ((vap->iv_caps & IEEE80211_C_DOTH) == 0) return EOPNOTSUPP; #endif vap->iv_flags |= IEEE80211_F_DOTH; } else vap->iv_flags &= ~IEEE80211_F_DOTH; error = ENETRESET; break; case IEEE80211_IOC_REGDOMAIN: error = ieee80211_ioctl_setregdomain(vap, ireq); break; case IEEE80211_IOC_ROAM: error = ieee80211_ioctl_setroam(vap, ireq); break; case IEEE80211_IOC_TXPARAMS: error = ieee80211_ioctl_settxparams(vap, ireq); break; case IEEE80211_IOC_HTCOMPAT: if (ireq->i_val) { if ((vap->iv_flags_ht & IEEE80211_FHT_HT) == 0) return EOPNOTSUPP; vap->iv_flags_ht |= IEEE80211_FHT_HTCOMPAT; } else vap->iv_flags_ht &= ~IEEE80211_FHT_HTCOMPAT; /* NB: reset only if we're operating on an 11n channel */ if (isvapht(vap)) error = ERESTART; break; case IEEE80211_IOC_DWDS: if (ireq->i_val) { /* NB: DWDS only makes sense for WDS-capable devices */ if ((ic->ic_caps & IEEE80211_C_WDS) == 0) return EOPNOTSUPP; /* NB: DWDS is used only with ap+sta vaps */ if (vap->iv_opmode != IEEE80211_M_HOSTAP && vap->iv_opmode != IEEE80211_M_STA) return EINVAL; vap->iv_flags |= IEEE80211_F_DWDS; if (vap->iv_opmode == IEEE80211_M_STA) vap->iv_flags_ext |= IEEE80211_FEXT_4ADDR; } else { vap->iv_flags &= ~IEEE80211_F_DWDS; if (vap->iv_opmode == IEEE80211_M_STA) vap->iv_flags_ext &= ~IEEE80211_FEXT_4ADDR; } break; case IEEE80211_IOC_INACTIVITY: if (ireq->i_val) vap->iv_flags_ext |= IEEE80211_FEXT_INACT; else vap->iv_flags_ext &= ~IEEE80211_FEXT_INACT; break; case IEEE80211_IOC_APPIE: error = ieee80211_ioctl_setappie(vap, ireq); break; case IEEE80211_IOC_WPS: if (ireq->i_val) { if ((vap->iv_caps & IEEE80211_C_WPA) == 0) return EOPNOTSUPP; vap->iv_flags_ext |= IEEE80211_FEXT_WPS; } else vap->iv_flags_ext &= ~IEEE80211_FEXT_WPS; break; case IEEE80211_IOC_TSN: if (ireq->i_val) { if ((vap->iv_caps & IEEE80211_C_WPA) == 0) return EOPNOTSUPP; vap->iv_flags_ext |= IEEE80211_FEXT_TSN; } else vap->iv_flags_ext &= ~IEEE80211_FEXT_TSN; break; case IEEE80211_IOC_CHANSWITCH: error = ieee80211_ioctl_chanswitch(vap, ireq); break; case IEEE80211_IOC_DFS: if (ireq->i_val) { if ((vap->iv_caps & IEEE80211_C_DFS) == 0) return EOPNOTSUPP; /* NB: DFS requires 11h support */ if ((vap->iv_flags & IEEE80211_F_DOTH) == 0) return EINVAL; vap->iv_flags_ext |= IEEE80211_FEXT_DFS; } else vap->iv_flags_ext &= ~IEEE80211_FEXT_DFS; break; case IEEE80211_IOC_DOTD: if (ireq->i_val) vap->iv_flags_ext |= IEEE80211_FEXT_DOTD; else vap->iv_flags_ext &= ~IEEE80211_FEXT_DOTD; if (vap->iv_opmode == IEEE80211_M_STA) error = ENETRESET; break; case IEEE80211_IOC_HTPROTMODE: if (ireq->i_val > IEEE80211_PROT_RTSCTS) return EINVAL; ic->ic_htprotmode = ireq->i_val ? IEEE80211_PROT_RTSCTS : IEEE80211_PROT_NONE; /* NB: if not operating in 11n this can wait */ if (isvapht(vap)) error = ERESTART; break; case IEEE80211_IOC_STA_VLAN: error = ieee80211_ioctl_setstavlan(vap, ireq); break; case IEEE80211_IOC_SMPS: if ((ireq->i_val &~ IEEE80211_HTCAP_SMPS) != 0 || ireq->i_val == 0x0008) /* value of 2 is reserved */ return EINVAL; if (ireq->i_val != IEEE80211_HTCAP_SMPS_OFF && (vap->iv_htcaps & IEEE80211_HTC_SMPS) == 0) return EOPNOTSUPP; vap->iv_htcaps = (vap->iv_htcaps &~ IEEE80211_HTCAP_SMPS) | ireq->i_val; /* NB: if not operating in 11n this can wait */ if (isvapht(vap)) error = ERESTART; break; case IEEE80211_IOC_RIFS: if (ireq->i_val != 0) { if ((vap->iv_htcaps & IEEE80211_HTC_RIFS) == 0) return EOPNOTSUPP; vap->iv_flags_ht |= IEEE80211_FHT_RIFS; } else vap->iv_flags_ht &= ~IEEE80211_FHT_RIFS; /* NB: if not operating in 11n this can wait */ if (isvapht(vap)) error = ERESTART; break; case IEEE80211_IOC_STBC: /* Check if we can do STBC TX/RX before changing the setting */ if ((ireq->i_val & 1) && ((vap->iv_htcaps & IEEE80211_HTCAP_TXSTBC) == 0)) return EOPNOTSUPP; if ((ireq->i_val & 2) && ((vap->iv_htcaps & IEEE80211_HTCAP_RXSTBC) == 0)) return EOPNOTSUPP; /* TX */ if (ireq->i_val & 1) vap->iv_flags_ht |= IEEE80211_FHT_STBC_TX; else vap->iv_flags_ht &= ~IEEE80211_FHT_STBC_TX; /* RX */ if (ireq->i_val & 2) vap->iv_flags_ht |= IEEE80211_FHT_STBC_RX; else vap->iv_flags_ht &= ~IEEE80211_FHT_STBC_RX; /* NB: reset only if we're operating on an 11n channel */ if (isvapht(vap)) error = ERESTART; break; case IEEE80211_IOC_LDPC: /* Check if we can do LDPC TX/RX before changing the setting */ if ((ireq->i_val & 1) && (vap->iv_htcaps & IEEE80211_HTC_TXLDPC) == 0) return EOPNOTSUPP; if ((ireq->i_val & 2) && (vap->iv_htcaps & IEEE80211_HTCAP_LDPC) == 0) return EOPNOTSUPP; /* TX */ if (ireq->i_val & 1) vap->iv_flags_ht |= IEEE80211_FHT_LDPC_TX; else vap->iv_flags_ht &= ~IEEE80211_FHT_LDPC_TX; /* RX */ if (ireq->i_val & 2) vap->iv_flags_ht |= IEEE80211_FHT_LDPC_RX; else vap->iv_flags_ht &= ~IEEE80211_FHT_LDPC_RX; /* NB: reset only if we're operating on an 11n channel */ if (isvapht(vap)) error = ERESTART; break; /* VHT */ case IEEE80211_IOC_VHTCONF: if (ireq->i_val & 1) ieee80211_syncflag_vht(vap, IEEE80211_FVHT_VHT); else ieee80211_syncflag_vht(vap, -IEEE80211_FVHT_VHT); if (ireq->i_val & 2) ieee80211_syncflag_vht(vap, IEEE80211_FVHT_USEVHT40); else ieee80211_syncflag_vht(vap, -IEEE80211_FVHT_USEVHT40); if (ireq->i_val & 4) ieee80211_syncflag_vht(vap, IEEE80211_FVHT_USEVHT80); else ieee80211_syncflag_vht(vap, -IEEE80211_FVHT_USEVHT80); if (ireq->i_val & 8) ieee80211_syncflag_vht(vap, IEEE80211_FVHT_USEVHT80P80); else ieee80211_syncflag_vht(vap, -IEEE80211_FVHT_USEVHT80P80); if (ireq->i_val & 16) ieee80211_syncflag_vht(vap, IEEE80211_FVHT_USEVHT160); else ieee80211_syncflag_vht(vap, -IEEE80211_FVHT_USEVHT160); error = ENETRESET; break; default: error = ieee80211_ioctl_setdefault(vap, ireq); break; } /* * The convention is that ENETRESET means an operation * requires a complete re-initialization of the device (e.g. * changing something that affects the association state). * ERESTART means the request may be handled with only a * reload of the hardware state. We hand ERESTART requests * to the iv_reset callback so the driver can decide. If * a device does not fillin iv_reset then it defaults to one * that returns ENETRESET. Otherwise a driver may return * ENETRESET (in which case a full reset will be done) or * 0 to mean there's no need to do anything (e.g. when the * change has no effect on the driver/device). */ if (error == ERESTART) error = IFNET_IS_UP_RUNNING(vap->iv_ifp) ? vap->iv_reset(vap, ireq->i_type) : 0; if (error == ENETRESET) { /* XXX need to re-think AUTO handling */ if (IS_UP_AUTO(vap)) ieee80211_init(vap); error = 0; } return error; } int ieee80211_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ieee80211vap *vap = ifp->if_softc; struct ieee80211com *ic = vap->iv_ic; int error = 0, wait = 0; struct ifreq *ifr; struct ifaddr *ifa; /* XXX */ switch (cmd) { case SIOCSIFFLAGS: IEEE80211_LOCK(ic); if ((ifp->if_flags ^ vap->iv_ifflags) & IFF_PROMISC) { /* * Enable promiscuous mode when: * 1. Interface is not a member of bridge, or * 2. Requested by user, or * 3. In monitor (or adhoc-demo) mode. */ if (ifp->if_bridge == NULL || (ifp->if_flags & IFF_PPROMISC) != 0 || vap->iv_opmode == IEEE80211_M_MONITOR || (vap->iv_opmode == IEEE80211_M_AHDEMO && (vap->iv_caps & IEEE80211_C_TDMA) == 0)) { ieee80211_promisc(vap, ifp->if_flags & IFF_PROMISC); vap->iv_ifflags ^= IFF_PROMISC; } } if ((ifp->if_flags ^ vap->iv_ifflags) & IFF_ALLMULTI) { ieee80211_allmulti(vap, ifp->if_flags & IFF_ALLMULTI); vap->iv_ifflags ^= IFF_ALLMULTI; } if (ifp->if_flags & IFF_UP) { /* * Bring ourself up unless we're already operational. * If we're the first vap and the parent is not up * then it will automatically be brought up as a * side-effect of bringing ourself up. */ if (vap->iv_state == IEEE80211_S_INIT) { if (ic->ic_nrunning == 0) wait = 1; ieee80211_start_locked(vap); } } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) { /* * Stop ourself. If we are the last vap to be * marked down the parent will also be taken down. */ if (ic->ic_nrunning == 1) wait = 1; ieee80211_stop_locked(vap); } IEEE80211_UNLOCK(ic); /* Wait for parent ioctl handler if it was queued */ if (wait) { ieee80211_waitfor_parent(ic); /* * Check if the MAC address was changed * via SIOCSIFLLADDR ioctl. */ if_addr_rlock(ifp); if ((ifp->if_flags & IFF_UP) == 0 && !IEEE80211_ADDR_EQ(vap->iv_myaddr, IF_LLADDR(ifp))) IEEE80211_ADDR_COPY(vap->iv_myaddr, IF_LLADDR(ifp)); if_addr_runlock(ifp); } break; case SIOCADDMULTI: case SIOCDELMULTI: ieee80211_runtask(ic, &ic->ic_mcast_task); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: ifr = (struct ifreq *)data; error = ifmedia_ioctl(ifp, ifr, &vap->iv_media, cmd); break; case SIOCG80211: error = ieee80211_ioctl_get80211(vap, cmd, (struct ieee80211req *) data); break; case SIOCS80211: error = priv_check(curthread, PRIV_NET80211_MANAGE); if (error == 0) error = ieee80211_ioctl_set80211(vap, cmd, (struct ieee80211req *) data); break; case SIOCG80211STATS: ifr = (struct ifreq *)data; - copyout(&vap->iv_stats, ifr->ifr_data, sizeof (vap->iv_stats)); + copyout(&vap->iv_stats, ifr_data_get_ptr(ifr), + sizeof (vap->iv_stats)); break; case SIOCSIFMTU: ifr = (struct ifreq *)data; if (!(IEEE80211_MTU_MIN <= ifr->ifr_mtu && ifr->ifr_mtu <= IEEE80211_MTU_MAX)) error = EINVAL; else ifp->if_mtu = ifr->ifr_mtu; break; case SIOCSIFADDR: /* * XXX Handle this directly so we can suppress if_init calls. * XXX This should be done in ether_ioctl but for the moment * XXX there are too many other parts of the system that * XXX set IFF_UP and so suppress if_init being called when * XXX it should be. */ ifa = (struct ifaddr *) data; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: if ((ifp->if_flags & IFF_UP) == 0) { ifp->if_flags |= IFF_UP; ifp->if_init(ifp->if_softc); } arp_ifinit(ifp, ifa); break; #endif default: if ((ifp->if_flags & IFF_UP) == 0) { ifp->if_flags |= IFF_UP; ifp->if_init(ifp->if_softc); } break; } break; default: /* * Pass unknown ioctls first to the driver, and if it * returns ENOTTY, then to the generic Ethernet handler. */ if (ic->ic_ioctl != NULL && (error = ic->ic_ioctl(ic, cmd, data)) != ENOTTY) break; error = ether_ioctl(ifp, cmd, data); break; } return (error); } Index: head/sys/netinet/ip_carp.c =================================================================== --- head/sys/netinet/ip_carp.c (revision 331796) +++ head/sys/netinet/ip_carp.c (revision 331797) @@ -1,2249 +1,2251 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2002 Michael Shalayeff. * Copyright (c) 2003 Ryan McBride. * Copyright (c) 2011 Gleb Smirnoff * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_bpf.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #include #include #endif #ifdef INET #include #include #endif #ifdef INET6 #include #include #include #include #include #include #endif #include static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses"); struct carp_softc { struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */ struct ifaddr **sc_ifas; /* Our ifaddrs. */ struct sockaddr_dl sc_addr; /* Our link level address. */ struct callout sc_ad_tmo; /* Advertising timeout. */ #ifdef INET struct callout sc_md_tmo; /* Master down timeout. */ #endif #ifdef INET6 struct callout sc_md6_tmo; /* XXX: Master down timeout. */ #endif struct mtx sc_mtx; int sc_vhid; int sc_advskew; int sc_advbase; int sc_naddrs; int sc_naddrs6; int sc_ifasiz; enum { INIT = 0, BACKUP, MASTER } sc_state; int sc_suppress; int sc_sendad_errors; #define CARP_SENDAD_MAX_ERRORS 3 int sc_sendad_success; #define CARP_SENDAD_MIN_SUCCESS 3 int sc_init_counter; uint64_t sc_counter; /* authentication */ #define CARP_HMAC_PAD 64 unsigned char sc_key[CARP_KEY_LEN]; unsigned char sc_pad[CARP_HMAC_PAD]; SHA1_CTX sc_sha1; TAILQ_ENTRY(carp_softc) sc_list; /* On the carp_if list. */ LIST_ENTRY(carp_softc) sc_next; /* On the global list. */ }; struct carp_if { #ifdef INET int cif_naddrs; #endif #ifdef INET6 int cif_naddrs6; #endif TAILQ_HEAD(, carp_softc) cif_vrs; #ifdef INET struct ip_moptions cif_imo; #endif #ifdef INET6 struct ip6_moptions cif_im6o; #endif struct ifnet *cif_ifp; struct mtx cif_mtx; uint32_t cif_flags; #define CIF_PROMISC 0x00000001 }; #define CARP_INET 0 #define CARP_INET6 1 static int proto_reg[] = {-1, -1}; /* * Brief design of carp(4). * * Any carp-capable ifnet may have a list of carp softcs hanging off * its ifp->if_carp pointer. Each softc represents one unique virtual * host id, or vhid. The softc has a back pointer to the ifnet. All * softcs are joined in a global list, which has quite limited use. * * Any interface address that takes part in CARP negotiation has a * pointer to the softc of its vhid, ifa->ifa_carp. That could be either * AF_INET or AF_INET6 address. * * Although, one can get the softc's backpointer to ifnet and traverse * through its ifp->if_addrhead queue to find all interface addresses * involved in CARP, we keep a growable array of ifaddr pointers. This * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that * do calls into the network stack, thus avoiding LORs. * * Locking: * * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(), * callout-driven events and ioctl()s. * * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx. * To traverse the global list we use the mutex carp_mtx. * * Known issues with locking: * * - Sending ad, we put the pointer to the softc in an mtag, and no reference * counting is done on the softc. * - On module unload we may race (?) with packet processing thread * dereferencing our function pointers. */ /* Accept incoming CARP packets. */ static VNET_DEFINE(int, carp_allow) = 1; #define V_carp_allow VNET(carp_allow) /* Preempt slower nodes. */ static VNET_DEFINE(int, carp_preempt) = 0; #define V_carp_preempt VNET(carp_preempt) /* Log level. */ static VNET_DEFINE(int, carp_log) = 1; #define V_carp_log VNET(carp_log) /* Global advskew demotion. */ static VNET_DEFINE(int, carp_demotion) = 0; #define V_carp_demotion VNET(carp_demotion) /* Send error demotion factor. */ static VNET_DEFINE(int, carp_senderr_adj) = CARP_MAXSKEW; #define V_carp_senderr_adj VNET(carp_senderr_adj) /* Iface down demotion factor. */ static VNET_DEFINE(int, carp_ifdown_adj) = CARP_MAXSKEW; #define V_carp_ifdown_adj VNET(carp_ifdown_adj) static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS); SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP"); SYSCTL_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(carp_allow), 0, "Accept incoming CARP packets"); SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode"); SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(carp_log), 0, "CARP log level"); SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, 0, 0, carp_demote_adj_sysctl, "I", "Adjust demotion factor (skew of advskew)"); SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment"); SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(carp_ifdown_adj), 0, "Interface down demotion factor adjustment"); VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats); VNET_PCPUSTAT_SYSINIT(carpstats); VNET_PCPUSTAT_SYSUNINIT(carpstats); #define CARPSTATS_ADD(name, val) \ counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \ sizeof(uint64_t)], (val)) #define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1) SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats, carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)"); #define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \ NULL, MTX_DEF) #define CARP_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx) #define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) #define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx) #define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) #define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \ NULL, MTX_DEF) #define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx) #define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED) #define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx) #define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx) #define CIF_FREE(cif) do { \ CIF_LOCK(cif); \ if (TAILQ_EMPTY(&(cif)->cif_vrs)) \ carp_free_if(cif); \ else \ CIF_UNLOCK(cif); \ } while (0) #define CARP_LOG(...) do { \ if (V_carp_log > 0) \ log(LOG_INFO, "carp: " __VA_ARGS__); \ } while (0) #define CARP_DEBUG(...) do { \ if (V_carp_log > 1) \ log(LOG_DEBUG, __VA_ARGS__); \ } while (0) #define IFNET_FOREACH_IFA(ifp, ifa) \ IF_ADDR_LOCK_ASSERT(ifp); \ TAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \ if ((ifa)->ifa_carp != NULL) #define CARP_FOREACH_IFA(sc, ifa) \ CARP_LOCK_ASSERT(sc); \ for (int _i = 0; \ _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 && \ ((ifa) = sc->sc_ifas[_i]) != NULL; \ ++_i) #define IFNET_FOREACH_CARP(ifp, sc) \ KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) || \ sx_xlocked(&carp_sx), ("cif_vrs not locked")); \ TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list) #define DEMOTE_ADVSKEW(sc) \ (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ? \ CARP_MAXSKEW : ((sc)->sc_advskew + V_carp_demotion)) static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); static struct carp_softc *carp_alloc(struct ifnet *); static void carp_destroy(struct carp_softc *); static struct carp_if *carp_alloc_if(struct ifnet *); static void carp_free_if(struct carp_if *); static void carp_set_state(struct carp_softc *, int, const char* reason); static void carp_sc_state(struct carp_softc *); static void carp_setrun(struct carp_softc *, sa_family_t); static void carp_master_down(void *); static void carp_master_down_locked(struct carp_softc *, const char* reason); static void carp_send_ad(void *); static void carp_send_ad_locked(struct carp_softc *); static void carp_addroute(struct carp_softc *); static void carp_ifa_addroute(struct ifaddr *); static void carp_delroute(struct carp_softc *); static void carp_ifa_delroute(struct ifaddr *); static void carp_send_ad_all(void *, int); static void carp_demote_adj(int, char *); static LIST_HEAD(, carp_softc) carp_list; static struct mtx carp_mtx; static struct sx carp_sx; static struct task carp_sendall_task = TASK_INITIALIZER(0, carp_send_ad_all, NULL); static void carp_hmac_prepare(struct carp_softc *sc) { uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; uint8_t vhid = sc->sc_vhid & 0xff; struct ifaddr *ifa; int i, found; #ifdef INET struct in_addr last, cur, in; #endif #ifdef INET6 struct in6_addr last6, cur6, in6; #endif CARP_LOCK_ASSERT(sc); /* Compute ipad from key. */ bzero(sc->sc_pad, sizeof(sc->sc_pad)); bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); for (i = 0; i < sizeof(sc->sc_pad); i++) sc->sc_pad[i] ^= 0x36; /* Precompute first part of inner hash. */ SHA1Init(&sc->sc_sha1); SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); #ifdef INET cur.s_addr = 0; do { found = 0; last = cur; cur.s_addr = 0xffffffff; CARP_FOREACH_IFA(sc, ifa) { in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; if (ifa->ifa_addr->sa_family == AF_INET && ntohl(in.s_addr) > ntohl(last.s_addr) && ntohl(in.s_addr) < ntohl(cur.s_addr)) { cur.s_addr = in.s_addr; found++; } } if (found) SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); } while (found); #endif /* INET */ #ifdef INET6 memset(&cur6, 0, sizeof(cur6)); do { found = 0; last6 = cur6; memset(&cur6, 0xff, sizeof(cur6)); CARP_FOREACH_IFA(sc, ifa) { in6 = ifatoia6(ifa)->ia_addr.sin6_addr; if (IN6_IS_SCOPE_EMBED(&in6)) in6.s6_addr16[1] = 0; if (ifa->ifa_addr->sa_family == AF_INET6 && memcmp(&in6, &last6, sizeof(in6)) > 0 && memcmp(&in6, &cur6, sizeof(in6)) < 0) { cur6 = in6; found++; } } if (found) SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); } while (found); #endif /* INET6 */ /* convert ipad to opad */ for (i = 0; i < sizeof(sc->sc_pad); i++) sc->sc_pad[i] ^= 0x36 ^ 0x5c; } static void carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], unsigned char md[20]) { SHA1_CTX sha1ctx; CARP_LOCK_ASSERT(sc); /* fetch first half of inner hash */ bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); SHA1Final(md, &sha1ctx); /* outer hash */ SHA1Init(&sha1ctx); SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); SHA1Update(&sha1ctx, md, 20); SHA1Final(md, &sha1ctx); } static int carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], unsigned char md[20]) { unsigned char md2[20]; CARP_LOCK_ASSERT(sc); carp_hmac_generate(sc, counter, md2); return (bcmp(md, md2, sizeof(md2))); } /* * process input packet. * we have rearranged checks order compared to the rfc, * but it seems more efficient this way or not possible otherwise. */ #ifdef INET int carp_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); struct carp_header *ch; int iplen, len; iplen = *offp; *mp = NULL; CARPSTATS_INC(carps_ipackets); if (!V_carp_allow) { m_freem(m); return (IPPROTO_DONE); } /* verify that the IP TTL is 255. */ if (ip->ip_ttl != CARP_DFLTTL) { CARPSTATS_INC(carps_badttl); CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, ip->ip_ttl, m->m_pkthdr.rcvif->if_xname); m_freem(m); return (IPPROTO_DONE); } iplen = ip->ip_hl << 2; if (m->m_pkthdr.len < iplen + sizeof(*ch)) { CARPSTATS_INC(carps_badlen); CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) " "on %s\n", __func__, m->m_len - sizeof(struct ip), m->m_pkthdr.rcvif->if_xname); m_freem(m); return (IPPROTO_DONE); } if (iplen + sizeof(*ch) < m->m_len) { if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { CARPSTATS_INC(carps_hdrops); CARP_DEBUG("%s: pullup failed\n", __func__); return (IPPROTO_DONE); } ip = mtod(m, struct ip *); } ch = (struct carp_header *)((char *)ip + iplen); /* * verify that the received packet length is * equal to the CARP header */ len = iplen + sizeof(*ch); if (len > m->m_pkthdr.len) { CARPSTATS_INC(carps_badlen); CARP_DEBUG("%s: packet too short %d on %s\n", __func__, m->m_pkthdr.len, m->m_pkthdr.rcvif->if_xname); m_freem(m); return (IPPROTO_DONE); } if ((m = m_pullup(m, len)) == NULL) { CARPSTATS_INC(carps_hdrops); return (IPPROTO_DONE); } ip = mtod(m, struct ip *); ch = (struct carp_header *)((char *)ip + iplen); /* verify the CARP checksum */ m->m_data += iplen; if (in_cksum(m, len - iplen)) { CARPSTATS_INC(carps_badsum); CARP_DEBUG("%s: checksum failed on %s\n", __func__, m->m_pkthdr.rcvif->if_xname); m_freem(m); return (IPPROTO_DONE); } m->m_data -= iplen; carp_input_c(m, ch, AF_INET); return (IPPROTO_DONE); } #endif #ifdef INET6 int carp6_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct carp_header *ch; u_int len; CARPSTATS_INC(carps_ipackets6); if (!V_carp_allow) { m_freem(m); return (IPPROTO_DONE); } /* check if received on a valid carp interface */ if (m->m_pkthdr.rcvif->if_carp == NULL) { CARPSTATS_INC(carps_badif); CARP_DEBUG("%s: packet received on non-carp interface: %s\n", __func__, m->m_pkthdr.rcvif->if_xname); m_freem(m); return (IPPROTO_DONE); } /* verify that the IP TTL is 255 */ if (ip6->ip6_hlim != CARP_DFLTTL) { CARPSTATS_INC(carps_badttl); CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, ip6->ip6_hlim, m->m_pkthdr.rcvif->if_xname); m_freem(m); return (IPPROTO_DONE); } /* verify that we have a complete carp packet */ len = m->m_len; IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); if (ch == NULL) { CARPSTATS_INC(carps_badlen); CARP_DEBUG("%s: packet size %u too small\n", __func__, len); return (IPPROTO_DONE); } /* verify the CARP checksum */ m->m_data += *offp; if (in_cksum(m, sizeof(*ch))) { CARPSTATS_INC(carps_badsum); CARP_DEBUG("%s: checksum failed, on %s\n", __func__, m->m_pkthdr.rcvif->if_xname); m_freem(m); return (IPPROTO_DONE); } m->m_data -= *offp; carp_input_c(m, ch, AF_INET6); return (IPPROTO_DONE); } #endif /* INET6 */ /* * This routine should not be necessary at all, but some switches * (VMWare ESX vswitches) can echo our own packets back at us, * and we must ignore them or they will cause us to drop out of * MASTER mode. * * We cannot catch all cases of network loops. Instead, what we * do here is catch any packet that arrives with a carp header * with a VHID of 0, that comes from an address that is our own. * These packets are by definition "from us" (even if they are from * a misconfigured host that is pretending to be us). * * The VHID test is outside this mini-function. */ static int carp_source_is_self(struct mbuf *m, struct ifaddr *ifa, sa_family_t af) { #ifdef INET struct ip *ip4; struct in_addr in4; #endif #ifdef INET6 struct ip6_hdr *ip6; struct in6_addr in6; #endif switch (af) { #ifdef INET case AF_INET: ip4 = mtod(m, struct ip *); in4 = ifatoia(ifa)->ia_addr.sin_addr; return (in4.s_addr == ip4->ip_src.s_addr); #endif #ifdef INET6 case AF_INET6: ip6 = mtod(m, struct ip6_hdr *); in6 = ifatoia6(ifa)->ia_addr.sin6_addr; return (memcmp(&in6, &ip6->ip6_src, sizeof(in6)) == 0); #endif default: break; } return (0); } static void carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ifaddr *ifa, *match; struct carp_softc *sc; uint64_t tmp_counter; struct timeval sc_tv, ch_tv; int error; /* * Verify that the VHID is valid on the receiving interface. * * There should be just one match. If there are none * the VHID is not valid and we drop the packet. If * there are multiple VHID matches, take just the first * one, for compatibility with previous code. While we're * scanning, check for obvious loops in the network topology * (these should never happen, and as noted above, we may * miss real loops; this is just a double-check). */ IF_ADDR_RLOCK(ifp); error = 0; match = NULL; IFNET_FOREACH_IFA(ifp, ifa) { if (match == NULL && ifa->ifa_carp != NULL && ifa->ifa_addr->sa_family == af && ifa->ifa_carp->sc_vhid == ch->carp_vhid) match = ifa; if (ch->carp_vhid == 0 && carp_source_is_self(m, ifa, af)) error = ELOOP; } ifa = error ? NULL : match; if (ifa != NULL) ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); if (ifa == NULL) { if (error == ELOOP) { CARP_DEBUG("dropping looped packet on interface %s\n", ifp->if_xname); CARPSTATS_INC(carps_badif); /* ??? */ } else { CARPSTATS_INC(carps_badvhid); } m_freem(m); return; } /* verify the CARP version. */ if (ch->carp_version != CARP_VERSION) { CARPSTATS_INC(carps_badver); CARP_DEBUG("%s: invalid version %d\n", ifp->if_xname, ch->carp_version); ifa_free(ifa); m_freem(m); return; } sc = ifa->ifa_carp; CARP_LOCK(sc); ifa_free(ifa); if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { CARPSTATS_INC(carps_badauth); CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__, sc->sc_vhid, ifp->if_xname); goto out; } tmp_counter = ntohl(ch->carp_counter[0]); tmp_counter = tmp_counter<<32; tmp_counter += ntohl(ch->carp_counter[1]); /* XXX Replay protection goes here */ sc->sc_init_counter = 0; sc->sc_counter = tmp_counter; sc_tv.tv_sec = sc->sc_advbase; sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256; ch_tv.tv_sec = ch->carp_advbase; ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; switch (sc->sc_state) { case INIT: break; case MASTER: /* * If we receive an advertisement from a master who's going to * be more frequent than us, go into BACKUP state. */ if (timevalcmp(&sc_tv, &ch_tv, >) || timevalcmp(&sc_tv, &ch_tv, ==)) { callout_stop(&sc->sc_ad_tmo); carp_set_state(sc, BACKUP, "more frequent advertisement received"); carp_setrun(sc, 0); carp_delroute(sc); } break; case BACKUP: /* * If we're pre-empting masters who advertise slower than us, * and this one claims to be slower, treat him as down. */ if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) { carp_master_down_locked(sc, "preempting a slower master"); break; } /* * If the master is going to advertise at such a low frequency * that he's guaranteed to time out, we'd might as well just * treat him as timed out now. */ sc_tv.tv_sec = sc->sc_advbase * 3; if (timevalcmp(&sc_tv, &ch_tv, <)) { carp_master_down_locked(sc, "master will time out"); break; } /* * Otherwise, we reset the counter and wait for the next * advertisement. */ carp_setrun(sc, af); break; } out: CARP_UNLOCK(sc); m_freem(m); } static int carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) { struct m_tag *mtag; if (sc->sc_init_counter) { /* this could also be seconds since unix epoch */ sc->sc_counter = arc4random(); sc->sc_counter = sc->sc_counter << 32; sc->sc_counter += arc4random(); } else sc->sc_counter++; ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); /* Tag packet for carp_output */ if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), M_NOWAIT)) == NULL) { m_freem(m); CARPSTATS_INC(carps_onomem); return (ENOMEM); } bcopy(&sc, mtag + 1, sizeof(sc)); m_tag_prepend(m, mtag); return (0); } /* * To avoid LORs and possible recursions this function shouldn't * be called directly, but scheduled via taskqueue. */ static void carp_send_ad_all(void *ctx __unused, int pending __unused) { struct carp_softc *sc; mtx_lock(&carp_mtx); LIST_FOREACH(sc, &carp_list, sc_next) if (sc->sc_state == MASTER) { CARP_LOCK(sc); CURVNET_SET(sc->sc_carpdev->if_vnet); carp_send_ad_locked(sc); CURVNET_RESTORE(); CARP_UNLOCK(sc); } mtx_unlock(&carp_mtx); } /* Send a periodic advertisement, executed in callout context. */ static void carp_send_ad(void *v) { struct carp_softc *sc = v; CARP_LOCK_ASSERT(sc); CURVNET_SET(sc->sc_carpdev->if_vnet); carp_send_ad_locked(sc); CURVNET_RESTORE(); CARP_UNLOCK(sc); } static void carp_send_ad_error(struct carp_softc *sc, int error) { if (error) { if (sc->sc_sendad_errors < INT_MAX) sc->sc_sendad_errors++; if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { static const char fmt[] = "send error %d on %s"; char msg[sizeof(fmt) + IFNAMSIZ]; sprintf(msg, fmt, error, sc->sc_carpdev->if_xname); carp_demote_adj(V_carp_senderr_adj, msg); } sc->sc_sendad_success = 0; } else { if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS && ++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) { static const char fmt[] = "send ok on %s"; char msg[sizeof(fmt) + IFNAMSIZ]; sprintf(msg, fmt, sc->sc_carpdev->if_xname); carp_demote_adj(-V_carp_senderr_adj, msg); sc->sc_sendad_errors = 0; } else sc->sc_sendad_errors = 0; } } /* * Pick the best ifaddr on the given ifp for sending CARP * advertisements. * * "Best" here is defined by ifa_preferred(). This function is much * much like ifaof_ifpforaddr() except that we just use ifa_preferred(). * * (This could be simplified to return the actual address, except that * it has a different format in AF_INET and AF_INET6.) */ static struct ifaddr * carp_best_ifa(int af, struct ifnet *ifp) { struct ifaddr *ifa, *best; if (af >= AF_MAX) return (NULL); best = NULL; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family == af && (best == NULL || ifa_preferred(best, ifa))) best = ifa; } IF_ADDR_RUNLOCK(ifp); if (best != NULL) ifa_ref(best); return (best); } static void carp_send_ad_locked(struct carp_softc *sc) { struct carp_header ch; struct timeval tv; struct ifaddr *ifa; struct carp_header *ch_ptr; struct mbuf *m; int len, advskew; CARP_LOCK_ASSERT(sc); advskew = DEMOTE_ADVSKEW(sc); tv.tv_sec = sc->sc_advbase; tv.tv_usec = advskew * 1000000 / 256; ch.carp_version = CARP_VERSION; ch.carp_type = CARP_ADVERTISEMENT; ch.carp_vhid = sc->sc_vhid; ch.carp_advbase = sc->sc_advbase; ch.carp_advskew = advskew; ch.carp_authlen = 7; /* XXX DEFINE */ ch.carp_pad1 = 0; /* must be zero */ ch.carp_cksum = 0; /* XXXGL: OpenBSD picks first ifaddr with needed family. */ #ifdef INET if (sc->sc_naddrs) { struct ip *ip; m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { CARPSTATS_INC(carps_onomem); goto resched; } len = sizeof(*ip) + sizeof(ch); m->m_pkthdr.len = len; m->m_pkthdr.rcvif = NULL; m->m_len = len; M_ALIGN(m, m->m_len); m->m_flags |= M_MCAST; ip = mtod(m, struct ip *); ip->ip_v = IPVERSION; ip->ip_hl = sizeof(*ip) >> 2; ip->ip_tos = IPTOS_LOWDELAY; ip->ip_len = htons(len); ip->ip_off = htons(IP_DF); ip->ip_ttl = CARP_DFLTTL; ip->ip_p = IPPROTO_CARP; ip->ip_sum = 0; ip_fillid(ip); ifa = carp_best_ifa(AF_INET, sc->sc_carpdev); if (ifa != NULL) { ip->ip_src.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; ifa_free(ifa); } else ip->ip_src.s_addr = 0; ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); ch_ptr = (struct carp_header *)(&ip[1]); bcopy(&ch, ch_ptr, sizeof(ch)); if (carp_prepare_ad(m, sc, ch_ptr)) goto resched; m->m_data += sizeof(*ip); ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip)); m->m_data -= sizeof(*ip); CARPSTATS_INC(carps_opackets); carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_carpdev->if_carp->cif_imo, NULL)); } #endif /* INET */ #ifdef INET6 if (sc->sc_naddrs6) { struct ip6_hdr *ip6; m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { CARPSTATS_INC(carps_onomem); goto resched; } len = sizeof(*ip6) + sizeof(ch); m->m_pkthdr.len = len; m->m_pkthdr.rcvif = NULL; m->m_len = len; M_ALIGN(m, m->m_len); m->m_flags |= M_MCAST; ip6 = mtod(m, struct ip6_hdr *); bzero(ip6, sizeof(*ip6)); ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_hlim = CARP_DFLTTL; ip6->ip6_nxt = IPPROTO_CARP; /* set the source address */ ifa = carp_best_ifa(AF_INET6, sc->sc_carpdev); if (ifa != NULL) { bcopy(IFA_IN6(ifa), &ip6->ip6_src, sizeof(struct in6_addr)); ifa_free(ifa); } else /* This should never happen with IPv6. */ bzero(&ip6->ip6_src, sizeof(struct in6_addr)); /* Set the multicast destination. */ ip6->ip6_dst.s6_addr16[0] = htons(0xff02); ip6->ip6_dst.s6_addr8[15] = 0x12; if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { m_freem(m); CARP_DEBUG("%s: in6_setscope failed\n", __func__); goto resched; } ch_ptr = (struct carp_header *)(&ip6[1]); bcopy(&ch, ch_ptr, sizeof(ch)); if (carp_prepare_ad(m, sc, ch_ptr)) goto resched; m->m_data += sizeof(*ip6); ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6)); m->m_data -= sizeof(*ip6); CARPSTATS_INC(carps_opackets6); carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); } #endif /* INET6 */ resched: callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc); } static void carp_addroute(struct carp_softc *sc) { struct ifaddr *ifa; CARP_FOREACH_IFA(sc, ifa) carp_ifa_addroute(ifa); } static void carp_ifa_addroute(struct ifaddr *ifa) { switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: in_addprefix(ifatoia(ifa), RTF_UP); ifa_add_loopback_route(ifa, (struct sockaddr *)&ifatoia(ifa)->ia_addr); break; #endif #ifdef INET6 case AF_INET6: ifa_add_loopback_route(ifa, (struct sockaddr *)&ifatoia6(ifa)->ia_addr); nd6_add_ifa_lle(ifatoia6(ifa)); break; #endif } } static void carp_delroute(struct carp_softc *sc) { struct ifaddr *ifa; CARP_FOREACH_IFA(sc, ifa) carp_ifa_delroute(ifa); } static void carp_ifa_delroute(struct ifaddr *ifa) { switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: ifa_del_loopback_route(ifa, (struct sockaddr *)&ifatoia(ifa)->ia_addr); in_scrubprefix(ifatoia(ifa), LLE_STATIC); break; #endif #ifdef INET6 case AF_INET6: ifa_del_loopback_route(ifa, (struct sockaddr *)&ifatoia6(ifa)->ia_addr); nd6_rem_ifa_lle(ifatoia6(ifa), 1); break; #endif } } int carp_master(struct ifaddr *ifa) { struct carp_softc *sc = ifa->ifa_carp; return (sc->sc_state == MASTER); } #ifdef INET /* * Broadcast a gratuitous ARP request containing * the virtual router MAC address for each IP address * associated with the virtual router. */ static void carp_send_arp(struct carp_softc *sc) { struct ifaddr *ifa; struct in_addr addr; CARP_FOREACH_IFA(sc, ifa) { if (ifa->ifa_addr->sa_family != AF_INET) continue; addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; arp_announce_ifaddr(sc->sc_carpdev, addr, LLADDR(&sc->sc_addr)); } } int carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr) { struct carp_softc *sc = ifa->ifa_carp; if (sc->sc_state == MASTER) { *enaddr = LLADDR(&sc->sc_addr); return (1); } return (0); } #endif #ifdef INET6 static void carp_send_na(struct carp_softc *sc) { static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; struct ifaddr *ifa; struct in6_addr *in6; CARP_FOREACH_IFA(sc, ifa) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; in6 = IFA_IN6(ifa); nd6_na_output(sc->sc_carpdev, &mcast, in6, ND_NA_FLAG_OVERRIDE, 1, NULL); DELAY(1000); /* XXX */ } } /* * Returns ifa in case it's a carp address and it is MASTER, or if the address * matches and is not a carp address. Returns NULL otherwise. */ struct ifaddr * carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) { struct ifaddr *ifa; ifa = NULL; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) continue; if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER) ifa = NULL; else ifa_ref(ifa); break; } IF_ADDR_RUNLOCK(ifp); return (ifa); } caddr_t carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) { struct ifaddr *ifa; IF_ADDR_RLOCK(ifp); IFNET_FOREACH_IFA(ifp, ifa) if (ifa->ifa_addr->sa_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) { struct carp_softc *sc = ifa->ifa_carp; struct m_tag *mtag; IF_ADDR_RUNLOCK(ifp); mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), M_NOWAIT); if (mtag == NULL) /* Better a bit than nothing. */ return (LLADDR(&sc->sc_addr)); bcopy(&sc, mtag + 1, sizeof(sc)); m_tag_prepend(m, mtag); return (LLADDR(&sc->sc_addr)); } IF_ADDR_RUNLOCK(ifp); return (NULL); } #endif /* INET6 */ int carp_forus(struct ifnet *ifp, u_char *dhost) { struct carp_softc *sc; uint8_t *ena = dhost; if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) return (0); CIF_LOCK(ifp->if_carp); IFNET_FOREACH_CARP(ifp, sc) { CARP_LOCK(sc); if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr), ETHER_ADDR_LEN)) { CARP_UNLOCK(sc); CIF_UNLOCK(ifp->if_carp); return (1); } CARP_UNLOCK(sc); } CIF_UNLOCK(ifp->if_carp); return (0); } /* Master down timeout event, executed in callout context. */ static void carp_master_down(void *v) { struct carp_softc *sc = v; CARP_LOCK_ASSERT(sc); CURVNET_SET(sc->sc_carpdev->if_vnet); if (sc->sc_state == BACKUP) { carp_master_down_locked(sc, "master timed out"); } CURVNET_RESTORE(); CARP_UNLOCK(sc); } static void carp_master_down_locked(struct carp_softc *sc, const char *reason) { CARP_LOCK_ASSERT(sc); switch (sc->sc_state) { case BACKUP: carp_set_state(sc, MASTER, reason); carp_send_ad_locked(sc); #ifdef INET carp_send_arp(sc); #endif #ifdef INET6 carp_send_na(sc); #endif carp_setrun(sc, 0); carp_addroute(sc); break; case INIT: case MASTER: #ifdef INVARIANTS panic("carp: VHID %u@%s: master_down event in %s state\n", sc->sc_vhid, sc->sc_carpdev->if_xname, sc->sc_state ? "MASTER" : "INIT"); #endif break; } } /* * When in backup state, af indicates whether to reset the master down timer * for v4 or v6. If it's set to zero, reset the ones which are already pending. */ static void carp_setrun(struct carp_softc *sc, sa_family_t af) { struct timeval tv; CARP_LOCK_ASSERT(sc); if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 || sc->sc_carpdev->if_link_state != LINK_STATE_UP || (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0)) return; switch (sc->sc_state) { case INIT: carp_set_state(sc, BACKUP, "initialization complete"); carp_setrun(sc, 0); break; case BACKUP: callout_stop(&sc->sc_ad_tmo); tv.tv_sec = 3 * sc->sc_advbase; tv.tv_usec = sc->sc_advskew * 1000000 / 256; switch (af) { #ifdef INET case AF_INET: callout_reset(&sc->sc_md_tmo, tvtohz(&tv), carp_master_down, sc); break; #endif #ifdef INET6 case AF_INET6: callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), carp_master_down, sc); break; #endif default: #ifdef INET if (sc->sc_naddrs) callout_reset(&sc->sc_md_tmo, tvtohz(&tv), carp_master_down, sc); #endif #ifdef INET6 if (sc->sc_naddrs6) callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), carp_master_down, sc); #endif break; } break; case MASTER: tv.tv_sec = sc->sc_advbase; tv.tv_usec = sc->sc_advskew * 1000000 / 256; callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc); break; } } /* * Setup multicast structures. */ static int carp_multicast_setup(struct carp_if *cif, sa_family_t sa) { struct ifnet *ifp = cif->cif_ifp; int error = 0; switch (sa) { #ifdef INET case AF_INET: { struct ip_moptions *imo = &cif->cif_imo; struct in_addr addr; if (imo->imo_membership) return (0); imo->imo_membership = (struct in_multi **)malloc( (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP, M_WAITOK); imo->imo_mfilters = NULL; imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; imo->imo_multicast_vif = -1; addr.s_addr = htonl(INADDR_CARP_GROUP); if ((error = in_joingroup(ifp, &addr, NULL, &imo->imo_membership[0])) != 0) { free(imo->imo_membership, M_CARP); break; } imo->imo_num_memberships++; imo->imo_multicast_ifp = ifp; imo->imo_multicast_ttl = CARP_DFLTTL; imo->imo_multicast_loop = 0; break; } #endif #ifdef INET6 case AF_INET6: { struct ip6_moptions *im6o = &cif->cif_im6o; struct in6_addr in6; struct in6_multi *in6m; if (im6o->im6o_membership) return (0); im6o->im6o_membership = (struct in6_multi **)malloc( (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP, M_ZERO | M_WAITOK); im6o->im6o_mfilters = NULL; im6o->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS; im6o->im6o_multicast_hlim = CARP_DFLTTL; im6o->im6o_multicast_ifp = ifp; /* Join IPv6 CARP multicast group. */ bzero(&in6, sizeof(in6)); in6.s6_addr16[0] = htons(0xff02); in6.s6_addr8[15] = 0x12; if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { free(im6o->im6o_membership, M_CARP); break; } in6m = NULL; if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) { free(im6o->im6o_membership, M_CARP); break; } im6o->im6o_membership[0] = in6m; im6o->im6o_num_memberships++; /* Join solicited multicast address. */ bzero(&in6, sizeof(in6)); in6.s6_addr16[0] = htons(0xff02); in6.s6_addr32[1] = 0; in6.s6_addr32[2] = htonl(1); in6.s6_addr32[3] = 0; in6.s6_addr8[12] = 0xff; if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { in6_mc_leave(im6o->im6o_membership[0], NULL); free(im6o->im6o_membership, M_CARP); break; } in6m = NULL; if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) { in6_mc_leave(im6o->im6o_membership[0], NULL); free(im6o->im6o_membership, M_CARP); break; } im6o->im6o_membership[1] = in6m; im6o->im6o_num_memberships++; break; } #endif } return (error); } /* * Free multicast structures. */ static void carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa) { sx_assert(&carp_sx, SA_XLOCKED); switch (sa) { #ifdef INET case AF_INET: if (cif->cif_naddrs == 0) { struct ip_moptions *imo = &cif->cif_imo; in_leavegroup(imo->imo_membership[0], NULL); KASSERT(imo->imo_mfilters == NULL, ("%s: imo_mfilters != NULL", __func__)); free(imo->imo_membership, M_CARP); imo->imo_membership = NULL; } break; #endif #ifdef INET6 case AF_INET6: if (cif->cif_naddrs6 == 0) { struct ip6_moptions *im6o = &cif->cif_im6o; in6_mc_leave(im6o->im6o_membership[0], NULL); in6_mc_leave(im6o->im6o_membership[1], NULL); KASSERT(im6o->im6o_mfilters == NULL, ("%s: im6o_mfilters != NULL", __func__)); free(im6o->im6o_membership, M_CARP); im6o->im6o_membership = NULL; } break; #endif } } int carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa) { struct m_tag *mtag; struct carp_softc *sc; if (!sa) return (0); switch (sa->sa_family) { #ifdef INET case AF_INET: break; #endif #ifdef INET6 case AF_INET6: break; #endif default: return (0); } mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); if (mtag == NULL) return (0); bcopy(mtag + 1, &sc, sizeof(sc)); /* Set the source MAC address to the Virtual Router MAC Address. */ switch (ifp->if_type) { case IFT_ETHER: case IFT_BRIDGE: case IFT_L2VLAN: { struct ether_header *eh; eh = mtod(m, struct ether_header *); eh->ether_shost[0] = 0; eh->ether_shost[1] = 0; eh->ether_shost[2] = 0x5e; eh->ether_shost[3] = 0; eh->ether_shost[4] = 1; eh->ether_shost[5] = sc->sc_vhid; } break; case IFT_FDDI: { struct fddi_header *fh; fh = mtod(m, struct fddi_header *); fh->fddi_shost[0] = 0; fh->fddi_shost[1] = 0; fh->fddi_shost[2] = 0x5e; fh->fddi_shost[3] = 0; fh->fddi_shost[4] = 1; fh->fddi_shost[5] = sc->sc_vhid; } break; default: printf("%s: carp is not supported for the %d interface type\n", ifp->if_xname, ifp->if_type); return (EOPNOTSUPP); } return (0); } static struct carp_softc* carp_alloc(struct ifnet *ifp) { struct carp_softc *sc; struct carp_if *cif; sx_assert(&carp_sx, SA_XLOCKED); if ((cif = ifp->if_carp) == NULL) cif = carp_alloc_if(ifp); sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); sc->sc_advbase = CARP_DFLTINTV; sc->sc_vhid = -1; /* required setting */ sc->sc_init_counter = 1; sc->sc_state = INIT; sc->sc_ifasiz = sizeof(struct ifaddr *); sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO); sc->sc_carpdev = ifp; CARP_LOCK_INIT(sc); #ifdef INET callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); #endif #ifdef INET6 callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); #endif callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); CIF_LOCK(cif); TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list); CIF_UNLOCK(cif); mtx_lock(&carp_mtx); LIST_INSERT_HEAD(&carp_list, sc, sc_next); mtx_unlock(&carp_mtx); return (sc); } static void carp_grow_ifas(struct carp_softc *sc) { struct ifaddr **new; new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO); CARP_LOCK(sc); bcopy(sc->sc_ifas, new, sc->sc_ifasiz); free(sc->sc_ifas, M_CARP); sc->sc_ifas = new; sc->sc_ifasiz *= 2; CARP_UNLOCK(sc); } static void carp_destroy(struct carp_softc *sc) { struct ifnet *ifp = sc->sc_carpdev; struct carp_if *cif = ifp->if_carp; sx_assert(&carp_sx, SA_XLOCKED); if (sc->sc_suppress) carp_demote_adj(-V_carp_ifdown_adj, "vhid removed"); CARP_UNLOCK(sc); CIF_LOCK(cif); TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list); CIF_UNLOCK(cif); mtx_lock(&carp_mtx); LIST_REMOVE(sc, sc_next); mtx_unlock(&carp_mtx); callout_drain(&sc->sc_ad_tmo); #ifdef INET callout_drain(&sc->sc_md_tmo); #endif #ifdef INET6 callout_drain(&sc->sc_md6_tmo); #endif CARP_LOCK_DESTROY(sc); free(sc->sc_ifas, M_CARP); free(sc, M_CARP); } static struct carp_if* carp_alloc_if(struct ifnet *ifp) { struct carp_if *cif; int error; cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO); if ((error = ifpromisc(ifp, 1)) != 0) printf("%s: ifpromisc(%s) failed: %d\n", __func__, ifp->if_xname, error); else cif->cif_flags |= CIF_PROMISC; CIF_LOCK_INIT(cif); cif->cif_ifp = ifp; TAILQ_INIT(&cif->cif_vrs); IF_ADDR_WLOCK(ifp); ifp->if_carp = cif; if_ref(ifp); IF_ADDR_WUNLOCK(ifp); return (cif); } static void carp_free_if(struct carp_if *cif) { struct ifnet *ifp = cif->cif_ifp; CIF_LOCK_ASSERT(cif); KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty", __func__)); IF_ADDR_WLOCK(ifp); ifp->if_carp = NULL; IF_ADDR_WUNLOCK(ifp); CIF_LOCK_DESTROY(cif); if (cif->cif_flags & CIF_PROMISC) ifpromisc(ifp, 0); if_rele(ifp); free(cif, M_CARP); } static void carp_carprcp(struct carpreq *carpr, struct carp_softc *sc, int priv) { CARP_LOCK(sc); carpr->carpr_state = sc->sc_state; carpr->carpr_vhid = sc->sc_vhid; carpr->carpr_advbase = sc->sc_advbase; carpr->carpr_advskew = sc->sc_advskew; if (priv) bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key)); else bzero(carpr->carpr_key, sizeof(carpr->carpr_key)); CARP_UNLOCK(sc); } int carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) { struct carpreq carpr; struct ifnet *ifp; struct carp_softc *sc = NULL; int error = 0, locked = 0; - if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) + if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr))) return (error); ifp = ifunit_ref(ifr->ifr_name); if (ifp == NULL) return (ENXIO); switch (ifp->if_type) { case IFT_ETHER: case IFT_L2VLAN: case IFT_BRIDGE: case IFT_FDDI: break; default: error = EOPNOTSUPP; goto out; } if ((ifp->if_flags & IFF_MULTICAST) == 0) { error = EADDRNOTAVAIL; goto out; } sx_xlock(&carp_sx); switch (cmd) { case SIOCSVH: if ((error = priv_check(td, PRIV_NETINET_CARP))) break; if (carpr.carpr_vhid <= 0 || carpr.carpr_vhid > CARP_MAXVHID || carpr.carpr_advbase < 0 || carpr.carpr_advskew < 0) { error = EINVAL; break; } if (ifp->if_carp) { IFNET_FOREACH_CARP(ifp, sc) if (sc->sc_vhid == carpr.carpr_vhid) break; } if (sc == NULL) { sc = carp_alloc(ifp); CARP_LOCK(sc); sc->sc_vhid = carpr.carpr_vhid; LLADDR(&sc->sc_addr)[0] = 0; LLADDR(&sc->sc_addr)[1] = 0; LLADDR(&sc->sc_addr)[2] = 0x5e; LLADDR(&sc->sc_addr)[3] = 0; LLADDR(&sc->sc_addr)[4] = 1; LLADDR(&sc->sc_addr)[5] = sc->sc_vhid; } else CARP_LOCK(sc); locked = 1; if (carpr.carpr_advbase > 0) { if (carpr.carpr_advbase > 255 || carpr.carpr_advbase < CARP_DFLTINTV) { error = EINVAL; break; } sc->sc_advbase = carpr.carpr_advbase; } if (carpr.carpr_advskew >= 255) { error = EINVAL; break; } sc->sc_advskew = carpr.carpr_advskew; if (carpr.carpr_key[0] != '\0') { bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); carp_hmac_prepare(sc); } if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { switch (carpr.carpr_state) { case BACKUP: callout_stop(&sc->sc_ad_tmo); carp_set_state(sc, BACKUP, "user requested via ifconfig"); carp_setrun(sc, 0); carp_delroute(sc); break; case MASTER: carp_master_down_locked(sc, "user requested via ifconfig"); break; default: break; } } break; case SIOCGVH: { int priveleged; if (carpr.carpr_vhid < 0 || carpr.carpr_vhid > CARP_MAXVHID) { error = EINVAL; break; } if (carpr.carpr_count < 1) { error = EMSGSIZE; break; } if (ifp->if_carp == NULL) { error = ENOENT; break; } priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0); if (carpr.carpr_vhid != 0) { IFNET_FOREACH_CARP(ifp, sc) if (sc->sc_vhid == carpr.carpr_vhid) break; if (sc == NULL) { error = ENOENT; break; } carp_carprcp(&carpr, sc, priveleged); - error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); + error = copyout(&carpr, ifr_data_get_ptr(ifr), + sizeof(carpr)); } else { int i, count; count = 0; IFNET_FOREACH_CARP(ifp, sc) count++; if (count > carpr.carpr_count) { CIF_UNLOCK(ifp->if_carp); error = EMSGSIZE; break; } i = 0; IFNET_FOREACH_CARP(ifp, sc) { carp_carprcp(&carpr, sc, priveleged); carpr.carpr_count = count; - error = copyout(&carpr, ifr->ifr_data + + error = copyout(&carpr, + (caddr_t)ifr_data_get_ptr(ifr) + (i * sizeof(carpr)), sizeof(carpr)); if (error) { CIF_UNLOCK(ifp->if_carp); break; } i++; } } break; } default: error = EINVAL; } sx_xunlock(&carp_sx); out: if (locked) CARP_UNLOCK(sc); if_rele(ifp); return (error); } static int carp_get_vhid(struct ifaddr *ifa) { if (ifa == NULL || ifa->ifa_carp == NULL) return (0); return (ifa->ifa_carp->sc_vhid); } int carp_attach(struct ifaddr *ifa, int vhid) { struct ifnet *ifp = ifa->ifa_ifp; struct carp_if *cif = ifp->if_carp; struct carp_softc *sc; int index, error; KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa)); switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: #endif #ifdef INET6 case AF_INET6: #endif break; default: return (EPROTOTYPE); } sx_xlock(&carp_sx); if (ifp->if_carp == NULL) { sx_xunlock(&carp_sx); return (ENOPROTOOPT); } IFNET_FOREACH_CARP(ifp, sc) if (sc->sc_vhid == vhid) break; if (sc == NULL) { sx_xunlock(&carp_sx); return (ENOENT); } error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family); if (error) { CIF_FREE(cif); sx_xunlock(&carp_sx); return (error); } index = sc->sc_naddrs + sc->sc_naddrs6 + 1; if (index > sc->sc_ifasiz / sizeof(struct ifaddr *)) carp_grow_ifas(sc); switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: cif->cif_naddrs++; sc->sc_naddrs++; break; #endif #ifdef INET6 case AF_INET6: cif->cif_naddrs6++; sc->sc_naddrs6++; break; #endif } ifa_ref(ifa); CARP_LOCK(sc); sc->sc_ifas[index - 1] = ifa; ifa->ifa_carp = sc; carp_hmac_prepare(sc); carp_sc_state(sc); CARP_UNLOCK(sc); sx_xunlock(&carp_sx); return (0); } void carp_detach(struct ifaddr *ifa, bool keep_cif) { struct ifnet *ifp = ifa->ifa_ifp; struct carp_if *cif = ifp->if_carp; struct carp_softc *sc = ifa->ifa_carp; int i, index; KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa)); sx_xlock(&carp_sx); CARP_LOCK(sc); /* Shift array. */ index = sc->sc_naddrs + sc->sc_naddrs6; for (i = 0; i < index; i++) if (sc->sc_ifas[i] == ifa) break; KASSERT(i < index, ("%s: %p no backref", __func__, ifa)); for (; i < index - 1; i++) sc->sc_ifas[i] = sc->sc_ifas[i+1]; sc->sc_ifas[index - 1] = NULL; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: cif->cif_naddrs--; sc->sc_naddrs--; break; #endif #ifdef INET6 case AF_INET6: cif->cif_naddrs6--; sc->sc_naddrs6--; break; #endif } carp_ifa_delroute(ifa); carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family); ifa->ifa_carp = NULL; ifa_free(ifa); carp_hmac_prepare(sc); carp_sc_state(sc); if (!keep_cif && sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) carp_destroy(sc); else CARP_UNLOCK(sc); if (!keep_cif) CIF_FREE(cif); sx_xunlock(&carp_sx); } static void carp_set_state(struct carp_softc *sc, int state, const char *reason) { CARP_LOCK_ASSERT(sc); if (sc->sc_state != state) { const char *carp_states[] = { CARP_STATES }; char subsys[IFNAMSIZ+5]; snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid, sc->sc_carpdev->if_xname); CARP_LOG("%s: %s -> %s (%s)\n", subsys, carp_states[sc->sc_state], carp_states[state], reason); sc->sc_state = state; devctl_notify("CARP", subsys, carp_states[state], NULL); } } static void carp_linkstate(struct ifnet *ifp) { struct carp_softc *sc; CIF_LOCK(ifp->if_carp); IFNET_FOREACH_CARP(ifp, sc) { CARP_LOCK(sc); carp_sc_state(sc); CARP_UNLOCK(sc); } CIF_UNLOCK(ifp->if_carp); } static void carp_sc_state(struct carp_softc *sc) { CARP_LOCK_ASSERT(sc); if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || !(sc->sc_carpdev->if_flags & IFF_UP)) { callout_stop(&sc->sc_ad_tmo); #ifdef INET callout_stop(&sc->sc_md_tmo); #endif #ifdef INET6 callout_stop(&sc->sc_md6_tmo); #endif carp_set_state(sc, INIT, "hardware interface down"); carp_setrun(sc, 0); if (!sc->sc_suppress) carp_demote_adj(V_carp_ifdown_adj, "interface down"); sc->sc_suppress = 1; } else { carp_set_state(sc, INIT, "hardware interface up"); carp_setrun(sc, 0); if (sc->sc_suppress) carp_demote_adj(-V_carp_ifdown_adj, "interface up"); sc->sc_suppress = 0; } } static void carp_demote_adj(int adj, char *reason) { atomic_add_int(&V_carp_demotion, adj); CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason); taskqueue_enqueue(taskqueue_swi, &carp_sendall_task); } static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS) { int new, error; new = V_carp_demotion; error = sysctl_handle_int(oidp, &new, 0, req); if (error || !req->newptr) return (error); carp_demote_adj(new, "sysctl"); return (0); } #ifdef INET extern struct domain inetdomain; static struct protosw in_carp_protosw = { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_CARP, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = carp_input, .pr_output = rip_output, .pr_ctloutput = rip_ctloutput, .pr_usrreqs = &rip_usrreqs }; #endif #ifdef INET6 extern struct domain inet6domain; static struct protosw in6_carp_protosw = { .pr_type = SOCK_RAW, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_CARP, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = carp6_input, .pr_output = rip6_output, .pr_ctloutput = rip6_ctloutput, .pr_usrreqs = &rip6_usrreqs }; #endif static void carp_mod_cleanup(void) { #ifdef INET if (proto_reg[CARP_INET] == 0) { (void)ipproto_unregister(IPPROTO_CARP); pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW); proto_reg[CARP_INET] = -1; } carp_iamatch_p = NULL; #endif #ifdef INET6 if (proto_reg[CARP_INET6] == 0) { (void)ip6proto_unregister(IPPROTO_CARP); pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW); proto_reg[CARP_INET6] = -1; } carp_iamatch6_p = NULL; carp_macmatch6_p = NULL; #endif carp_ioctl_p = NULL; carp_attach_p = NULL; carp_detach_p = NULL; carp_get_vhid_p = NULL; carp_linkstate_p = NULL; carp_forus_p = NULL; carp_output_p = NULL; carp_demote_adj_p = NULL; carp_master_p = NULL; mtx_unlock(&carp_mtx); taskqueue_drain(taskqueue_swi, &carp_sendall_task); mtx_destroy(&carp_mtx); sx_destroy(&carp_sx); } static int carp_mod_load(void) { int err; mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); sx_init(&carp_sx, "carp_sx"); LIST_INIT(&carp_list); carp_get_vhid_p = carp_get_vhid; carp_forus_p = carp_forus; carp_output_p = carp_output; carp_linkstate_p = carp_linkstate; carp_ioctl_p = carp_ioctl; carp_attach_p = carp_attach; carp_detach_p = carp_detach; carp_demote_adj_p = carp_demote_adj; carp_master_p = carp_master; #ifdef INET6 carp_iamatch6_p = carp_iamatch6; carp_macmatch6_p = carp_macmatch6; proto_reg[CARP_INET6] = pf_proto_register(PF_INET6, (struct protosw *)&in6_carp_protosw); if (proto_reg[CARP_INET6]) { printf("carp: error %d attaching to PF_INET6\n", proto_reg[CARP_INET6]); carp_mod_cleanup(); return (proto_reg[CARP_INET6]); } err = ip6proto_register(IPPROTO_CARP); if (err) { printf("carp: error %d registering with INET6\n", err); carp_mod_cleanup(); return (err); } #endif #ifdef INET carp_iamatch_p = carp_iamatch; proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw); if (proto_reg[CARP_INET]) { printf("carp: error %d attaching to PF_INET\n", proto_reg[CARP_INET]); carp_mod_cleanup(); return (proto_reg[CARP_INET]); } err = ipproto_register(IPPROTO_CARP); if (err) { printf("carp: error %d registering with INET\n", err); carp_mod_cleanup(); return (err); } #endif return (0); } static int carp_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: return carp_mod_load(); /* NOTREACHED */ case MOD_UNLOAD: mtx_lock(&carp_mtx); if (LIST_EMPTY(&carp_list)) carp_mod_cleanup(); else { mtx_unlock(&carp_mtx); return (EBUSY); } break; default: return (EINVAL); } return (0); } static moduledata_t carp_mod = { "carp", carp_modevent, 0 }; DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); Index: head/sys/netpfil/pf/if_pfsync.c =================================================================== --- head/sys/netpfil/pf/if_pfsync.c (revision 331796) +++ head/sys/netpfil/pf/if_pfsync.c (revision 331797) @@ -1,2433 +1,2435 @@ /*- * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND ISC) * * Copyright (c) 2002 Michael Shalayeff * Copyright (c) 2012 Gleb Smirnoff * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2009 David Gwynne * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ * * Revisions picked from OpenBSD after revision 1.110 import: * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input() * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates * 1.120, 1.175 - use monotonic time_uptime * 1.122 - reduce number of updates for non-TCP sessions * 1.125, 1.127 - rewrite merge or stale processing * 1.128 - cleanups * 1.146 - bzero() mbuf before sparsely filling it with data * 1.170 - SIOCSIFMTU checks * 1.126, 1.142 - deferred packets processing * 1.173 - correct expire time processing */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_pf.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define PFSYNC_MINPKT ( \ sizeof(struct ip) + \ sizeof(struct pfsync_header) + \ sizeof(struct pfsync_subheader) ) struct pfsync_pkt { struct ip *ip; struct in_addr src; u_int8_t flags; }; static int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, struct pfsync_state_peer *); static int pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int); static int pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int); static int pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int); static int pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int); static int pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int); static int pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int); static int pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int); static int pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int); static int pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int); static int pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int); static int pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int); static int pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int); static int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = { pfsync_in_clr, /* PFSYNC_ACT_CLR */ pfsync_in_ins, /* PFSYNC_ACT_INS */ pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ pfsync_in_upd, /* PFSYNC_ACT_UPD */ pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ pfsync_in_del, /* PFSYNC_ACT_DEL */ pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ pfsync_in_error, /* PFSYNC_ACT_INS_F */ pfsync_in_error, /* PFSYNC_ACT_DEL_F */ pfsync_in_bus, /* PFSYNC_ACT_BUS */ pfsync_in_tdb, /* PFSYNC_ACT_TDB */ pfsync_in_eof /* PFSYNC_ACT_EOF */ }; struct pfsync_q { void (*write)(struct pf_state *, void *); size_t len; u_int8_t action; }; /* we have one of these for every PFSYNC_S_ */ static void pfsync_out_state(struct pf_state *, void *); static void pfsync_out_iack(struct pf_state *, void *); static void pfsync_out_upd_c(struct pf_state *, void *); static void pfsync_out_del(struct pf_state *, void *); static struct pfsync_q pfsync_qs[] = { { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD }, { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } }; static void pfsync_q_ins(struct pf_state *, int, bool); static void pfsync_q_del(struct pf_state *, bool); static void pfsync_update_state(struct pf_state *); struct pfsync_upd_req_item { TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; struct pfsync_upd_req ur_msg; }; struct pfsync_deferral { struct pfsync_softc *pd_sc; TAILQ_ENTRY(pfsync_deferral) pd_entry; u_int pd_refs; struct callout pd_tmo; struct pf_state *pd_st; struct mbuf *pd_m; }; struct pfsync_softc { /* Configuration */ struct ifnet *sc_ifp; struct ifnet *sc_sync_if; struct ip_moptions sc_imo; struct in_addr sc_sync_peer; uint32_t sc_flags; #define PFSYNCF_OK 0x00000001 #define PFSYNCF_DEFER 0x00000002 #define PFSYNCF_PUSH 0x00000004 uint8_t sc_maxupdates; struct ip sc_template; struct callout sc_tmo; struct mtx sc_mtx; /* Queued data */ size_t sc_len; TAILQ_HEAD(, pf_state) sc_qs[PFSYNC_S_COUNT]; TAILQ_HEAD(, pfsync_upd_req_item) sc_upd_req_list; TAILQ_HEAD(, pfsync_deferral) sc_deferrals; u_int sc_deferred; void *sc_plus; size_t sc_pluslen; /* Bulk update info */ struct mtx sc_bulk_mtx; uint32_t sc_ureq_sent; int sc_bulk_tries; uint32_t sc_ureq_received; int sc_bulk_hashid; uint64_t sc_bulk_stateid; uint32_t sc_bulk_creatorid; struct callout sc_bulk_tmo; struct callout sc_bulkfail_tmo; }; #define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) #define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) #define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) #define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) static const char pfsyncname[] = "pfsync"; static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); static VNET_DEFINE(struct pfsync_softc *, pfsyncif) = NULL; #define V_pfsyncif VNET(pfsyncif) static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL; #define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) static VNET_DEFINE(struct pfsyncstats, pfsyncstats); #define V_pfsyncstats VNET(pfsyncstats) static VNET_DEFINE(int, pfsync_carp_adj) = CARP_MAXSKEW; #define V_pfsync_carp_adj VNET(pfsync_carp_adj) static void pfsync_timeout(void *); static void pfsync_push(struct pfsync_softc *); static void pfsyncintr(void *); static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, void *); static void pfsync_multicast_cleanup(struct pfsync_softc *); static void pfsync_pointers_init(void); static void pfsync_pointers_uninit(void); static int pfsync_init(void); static void pfsync_uninit(void); SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC"); SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(pfsyncstats), pfsyncstats, "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW, &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); static int pfsync_clone_create(struct if_clone *, int, caddr_t); static void pfsync_clone_destroy(struct ifnet *); static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, struct pf_state_peer *); static int pfsyncoutput(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); static int pfsyncioctl(struct ifnet *, u_long, caddr_t); static int pfsync_defer(struct pf_state *, struct mbuf *); static void pfsync_undefer(struct pfsync_deferral *, int); static void pfsync_undefer_state(struct pf_state *, int); static void pfsync_defer_tmo(void *); static void pfsync_request_update(u_int32_t, u_int64_t); static void pfsync_update_state_req(struct pf_state *); static void pfsync_drop(struct pfsync_softc *); static void pfsync_sendout(int); static void pfsync_send_plus(void *, size_t); static void pfsync_bulk_start(void); static void pfsync_bulk_status(u_int8_t); static void pfsync_bulk_update(void *); static void pfsync_bulk_fail(void *); #ifdef IPSEC static void pfsync_update_net_tdb(struct pfsync_tdb *); #endif #define PFSYNC_MAX_BULKTRIES 12 VNET_DEFINE(struct if_clone *, pfsync_cloner); #define V_pfsync_cloner VNET(pfsync_cloner) static int pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) { struct pfsync_softc *sc; struct ifnet *ifp; int q; if (unit != 0) return (EINVAL); sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); sc->sc_flags |= PFSYNCF_OK; for (q = 0; q < PFSYNC_S_COUNT; q++) TAILQ_INIT(&sc->sc_qs[q]); TAILQ_INIT(&sc->sc_upd_req_list); TAILQ_INIT(&sc->sc_deferrals); sc->sc_len = PFSYNC_MINPKT; sc->sc_maxupdates = 128; ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); if (ifp == NULL) { free(sc, M_PFSYNC); return (ENOSPC); } if_initname(ifp, pfsyncname, unit); ifp->if_softc = sc; ifp->if_ioctl = pfsyncioctl; ifp->if_output = pfsyncoutput; ifp->if_type = IFT_PFSYNC; ifp->if_snd.ifq_maxlen = ifqmaxlen; ifp->if_hdrlen = sizeof(struct pfsync_header); ifp->if_mtu = ETHERMTU; mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); callout_init(&sc->sc_tmo, 1); callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); if_attach(ifp); bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); V_pfsyncif = sc; return (0); } static void pfsync_clone_destroy(struct ifnet *ifp) { struct pfsync_softc *sc = ifp->if_softc; /* * At this stage, everything should have already been * cleared by pfsync_uninit(), and we have only to * drain callouts. */ while (sc->sc_deferred > 0) { struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_deferrals); TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); sc->sc_deferred--; if (callout_stop(&pd->pd_tmo) > 0) { pf_release_state(pd->pd_st); m_freem(pd->pd_m); free(pd, M_PFSYNC); } else { pd->pd_refs++; callout_drain(&pd->pd_tmo); free(pd, M_PFSYNC); } } callout_drain(&sc->sc_tmo); callout_drain(&sc->sc_bulkfail_tmo); callout_drain(&sc->sc_bulk_tmo); if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); bpfdetach(ifp); if_detach(ifp); pfsync_drop(sc); if_free(ifp); if (sc->sc_imo.imo_membership) pfsync_multicast_cleanup(sc); mtx_destroy(&sc->sc_mtx); mtx_destroy(&sc->sc_bulk_mtx); free(sc, M_PFSYNC); V_pfsyncif = NULL; } static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, struct pf_state_peer *d) { if (s->scrub.scrub_flag && d->scrub == NULL) { d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); if (d->scrub == NULL) return (ENOMEM); } return (0); } static int pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) { struct pfsync_softc *sc = V_pfsyncif; #ifndef __NO_STRICT_ALIGNMENT struct pfsync_state_key key[2]; #endif struct pfsync_state_key *kw, *ks; struct pf_state *st = NULL; struct pf_state_key *skw = NULL, *sks = NULL; struct pf_rule *r = NULL; struct pfi_kif *kif; int error; PF_RULES_RASSERT(); if (sp->creatorid == 0) { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("%s: invalid creator id: %08x\n", __func__, ntohl(sp->creatorid)); return (EINVAL); } if ((kif = pfi_kif_find(sp->ifname)) == NULL) { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("%s: unknown interface: %s\n", __func__, sp->ifname); if (flags & PFSYNC_SI_IOCTL) return (EINVAL); return (0); /* skip this state */ } /* * If the ruleset checksums match or the state is coming from the ioctl, * it's safe to associate the state with the rule of that number. */ if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) r = pf_main_ruleset.rules[ PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; else r = &V_pf_default_rule; if ((r->max_states && counter_u64_fetch(r->states_cur) >= r->max_states)) goto cleanup; /* * XXXGL: consider M_WAITOK in ioctl path after. */ if ((st = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO)) == NULL) goto cleanup; if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) goto cleanup; #ifndef __NO_STRICT_ALIGNMENT bcopy(&sp->key, key, sizeof(struct pfsync_state_key) * 2); kw = &key[PF_SK_WIRE]; ks = &key[PF_SK_STACK]; #else kw = &sp->key[PF_SK_WIRE]; ks = &sp->key[PF_SK_STACK]; #endif if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->af) || PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->af) || kw->port[0] != ks->port[0] || kw->port[1] != ks->port[1]) { sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); if (sks == NULL) goto cleanup; } else sks = skw; /* allocate memory for scrub info */ if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) goto cleanup; /* Copy to state key(s). */ skw->addr[0] = kw->addr[0]; skw->addr[1] = kw->addr[1]; skw->port[0] = kw->port[0]; skw->port[1] = kw->port[1]; skw->proto = sp->proto; skw->af = sp->af; if (sks != skw) { sks->addr[0] = ks->addr[0]; sks->addr[1] = ks->addr[1]; sks->port[0] = ks->port[0]; sks->port[1] = ks->port[1]; sks->proto = sp->proto; sks->af = sp->af; } /* copy to state */ bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); st->creation = time_uptime - ntohl(sp->creation); st->expire = time_uptime; if (sp->expire) { uint32_t timeout; timeout = r->timeout[sp->timeout]; if (!timeout) timeout = V_pf_default_rule.timeout[sp->timeout]; /* sp->expire may have been adaptively scaled by export. */ st->expire -= timeout - ntohl(sp->expire); } st->direction = sp->direction; st->log = sp->log; st->timeout = sp->timeout; st->state_flags = sp->state_flags; st->id = sp->id; st->creatorid = sp->creatorid; pf_state_peer_ntoh(&sp->src, &st->src); pf_state_peer_ntoh(&sp->dst, &st->dst); st->rule.ptr = r; st->nat_rule.ptr = NULL; st->anchor.ptr = NULL; st->rt_kif = NULL; st->pfsync_time = time_uptime; st->sync_state = PFSYNC_S_NONE; if (!(flags & PFSYNC_SI_IOCTL)) st->state_flags |= PFSTATE_NOSYNC; if ((error = pf_state_insert(kif, skw, sks, st)) != 0) goto cleanup_state; /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ counter_u64_add(r->states_cur, 1); counter_u64_add(r->states_tot, 1); if (!(flags & PFSYNC_SI_IOCTL)) { st->state_flags &= ~PFSTATE_NOSYNC; if (st->state_flags & PFSTATE_ACK) { pfsync_q_ins(st, PFSYNC_S_IACK, true); pfsync_push(sc); } } st->state_flags &= ~PFSTATE_ACK; PF_STATE_UNLOCK(st); return (0); cleanup: error = ENOMEM; if (skw == sks) sks = NULL; if (skw != NULL) uma_zfree(V_pf_state_key_z, skw); if (sks != NULL) uma_zfree(V_pf_state_key_z, sks); cleanup_state: /* pf_state_insert() frees the state keys. */ if (st) { if (st->dst.scrub) uma_zfree(V_pf_state_scrub_z, st->dst.scrub); if (st->src.scrub) uma_zfree(V_pf_state_scrub_z, st->src.scrub); uma_zfree(V_pf_state_z, st); } return (error); } static int pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_pkt pkt; struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); struct pfsync_header *ph; struct pfsync_subheader subh; int offset, len; int rv; uint16_t count; *mp = NULL; V_pfsyncstats.pfsyncs_ipackets++; /* Verify that we have a sync interface configured. */ if (!sc || !sc->sc_sync_if || !V_pf_status.running || (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) goto done; /* verify that the packet came in on the right interface */ if (sc->sc_sync_if != m->m_pkthdr.rcvif) { V_pfsyncstats.pfsyncs_badif++; goto done; } if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); /* verify that the IP TTL is 255. */ if (ip->ip_ttl != PFSYNC_DFLTTL) { V_pfsyncstats.pfsyncs_badttl++; goto done; } offset = ip->ip_hl << 2; if (m->m_pkthdr.len < offset + sizeof(*ph)) { V_pfsyncstats.pfsyncs_hdrops++; goto done; } if (offset + sizeof(*ph) > m->m_len) { if (m_pullup(m, offset + sizeof(*ph)) == NULL) { V_pfsyncstats.pfsyncs_hdrops++; return (IPPROTO_DONE); } ip = mtod(m, struct ip *); } ph = (struct pfsync_header *)((char *)ip + offset); /* verify the version */ if (ph->version != PFSYNC_VERSION) { V_pfsyncstats.pfsyncs_badver++; goto done; } len = ntohs(ph->len) + offset; if (m->m_pkthdr.len < len) { V_pfsyncstats.pfsyncs_badlen++; goto done; } /* Cheaper to grab this now than having to mess with mbufs later */ pkt.ip = ip; pkt.src = ip->ip_src; pkt.flags = 0; /* * Trusting pf_chksum during packet processing, as well as seeking * in interface name tree, require holding PF_RULES_RLOCK(). */ PF_RULES_RLOCK(); if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) pkt.flags |= PFSYNC_SI_CKSUM; offset += sizeof(*ph); while (offset <= len - sizeof(subh)) { m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); offset += sizeof(subh); if (subh.action >= PFSYNC_ACT_MAX) { V_pfsyncstats.pfsyncs_badact++; PF_RULES_RUNLOCK(); goto done; } count = ntohs(subh.count); V_pfsyncstats.pfsyncs_iacts[subh.action] += count; rv = (*pfsync_acts[subh.action])(&pkt, m, offset, count); if (rv == -1) { PF_RULES_RUNLOCK(); return (IPPROTO_DONE); } offset += rv; } PF_RULES_RUNLOCK(); done: m_freem(m); return (IPPROTO_DONE); } static int pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct pfsync_clr *clr; struct mbuf *mp; int len = sizeof(*clr) * count; int i, offp; u_int32_t creatorid; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } clr = (struct pfsync_clr *)(mp->m_data + offp); for (i = 0; i < count; i++) { creatorid = clr[i].creatorid; if (clr[i].ifname[0] != '\0' && pfi_kif_find(clr[i].ifname) == NULL) continue; for (int i = 0; i <= pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; struct pf_state *s; relock: PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { if (s->creatorid == creatorid) { s->state_flags |= PFSTATE_NOSYNC; pf_unlink_state(s, PF_ENTER_LOCKED); goto relock; } } PF_HASHROW_UNLOCK(ih); } } return (len); } static int pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct mbuf *mp; struct pfsync_state *sa, *sp; int len = sizeof(*sp) * count; int i, offp; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } sa = (struct pfsync_state *)(mp->m_data + offp); for (i = 0; i < count; i++) { sp = &sa[i]; /* Check for invalid values. */ if (sp->timeout >= PFTM_MAX || sp->src.state > PF_TCPS_PROXY_DST || sp->dst.state > PF_TCPS_PROXY_DST || sp->direction > PF_OUT || (sp->af != AF_INET && sp->af != AF_INET6)) { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("%s: invalid value\n", __func__); V_pfsyncstats.pfsyncs_badval++; continue; } if (pfsync_state_import(sp, pkt->flags) == ENOMEM) /* Drop out, but process the rest of the actions. */ break; } return (len); } static int pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct pfsync_ins_ack *ia, *iaa; struct pf_state *st; struct mbuf *mp; int len = count * sizeof(*ia); int offp, i; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); for (i = 0; i < count; i++) { ia = &iaa[i]; st = pf_find_state_byid(ia->id, ia->creatorid); if (st == NULL) continue; if (st->state_flags & PFSTATE_ACK) { PFSYNC_LOCK(V_pfsyncif); pfsync_undefer_state(st, 0); PFSYNC_UNLOCK(V_pfsyncif); } PF_STATE_UNLOCK(st); } /* * XXX this is not yet implemented, but we know the size of the * message so we can skip it. */ return (count * sizeof(struct pfsync_ins_ack)); } static int pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, struct pfsync_state_peer *dst) { int sync = 0; PF_STATE_LOCK_ASSERT(st); /* * The state should never go backwards except * for syn-proxy states. Neither should the * sequence window slide backwards. */ if ((st->src.state > src->state && (st->src.state < PF_TCPS_PROXY_SRC || src->state >= PF_TCPS_PROXY_SRC)) || (st->src.state == src->state && SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) sync++; else pf_state_peer_ntoh(src, &st->src); if ((st->dst.state > dst->state) || (st->dst.state >= TCPS_SYN_SENT && SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) sync++; else pf_state_peer_ntoh(dst, &st->dst); return (sync); } static int pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_state *sa, *sp; struct pf_state *st; int sync; struct mbuf *mp; int len = count * sizeof(*sp); int offp, i; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } sa = (struct pfsync_state *)(mp->m_data + offp); for (i = 0; i < count; i++) { sp = &sa[i]; /* check for invalid values */ if (sp->timeout >= PFTM_MAX || sp->src.state > PF_TCPS_PROXY_DST || sp->dst.state > PF_TCPS_PROXY_DST) { if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pfsync_input: PFSYNC_ACT_UPD: " "invalid value\n"); } V_pfsyncstats.pfsyncs_badval++; continue; } st = pf_find_state_byid(sp->id, sp->creatorid); if (st == NULL) { /* insert the update */ if (pfsync_state_import(sp, 0)) V_pfsyncstats.pfsyncs_badstate++; continue; } if (st->state_flags & PFSTATE_ACK) { PFSYNC_LOCK(sc); pfsync_undefer_state(st, 1); PFSYNC_UNLOCK(sc); } if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); else { sync = 0; /* * Non-TCP protocol state machine always go * forwards */ if (st->src.state > sp->src.state) sync++; else pf_state_peer_ntoh(&sp->src, &st->src); if (st->dst.state > sp->dst.state) sync++; else pf_state_peer_ntoh(&sp->dst, &st->dst); } if (sync < 2) { pfsync_alloc_scrub_memory(&sp->dst, &st->dst); pf_state_peer_ntoh(&sp->dst, &st->dst); st->expire = time_uptime; st->timeout = sp->timeout; } st->pfsync_time = time_uptime; if (sync) { V_pfsyncstats.pfsyncs_stale++; pfsync_update_state(st); PF_STATE_UNLOCK(st); PFSYNC_LOCK(sc); pfsync_push(sc); PFSYNC_UNLOCK(sc); continue; } PF_STATE_UNLOCK(st); } return (len); } static int pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_upd_c *ua, *up; struct pf_state *st; int len = count * sizeof(*up); int sync; struct mbuf *mp; int offp, i; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } ua = (struct pfsync_upd_c *)(mp->m_data + offp); for (i = 0; i < count; i++) { up = &ua[i]; /* check for invalid values */ if (up->timeout >= PFTM_MAX || up->src.state > PF_TCPS_PROXY_DST || up->dst.state > PF_TCPS_PROXY_DST) { if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pfsync_input: " "PFSYNC_ACT_UPD_C: " "invalid value\n"); } V_pfsyncstats.pfsyncs_badval++; continue; } st = pf_find_state_byid(up->id, up->creatorid); if (st == NULL) { /* We don't have this state. Ask for it. */ PFSYNC_LOCK(sc); pfsync_request_update(up->creatorid, up->id); PFSYNC_UNLOCK(sc); continue; } if (st->state_flags & PFSTATE_ACK) { PFSYNC_LOCK(sc); pfsync_undefer_state(st, 1); PFSYNC_UNLOCK(sc); } if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) sync = pfsync_upd_tcp(st, &up->src, &up->dst); else { sync = 0; /* * Non-TCP protocol state machine always go * forwards */ if (st->src.state > up->src.state) sync++; else pf_state_peer_ntoh(&up->src, &st->src); if (st->dst.state > up->dst.state) sync++; else pf_state_peer_ntoh(&up->dst, &st->dst); } if (sync < 2) { pfsync_alloc_scrub_memory(&up->dst, &st->dst); pf_state_peer_ntoh(&up->dst, &st->dst); st->expire = time_uptime; st->timeout = up->timeout; } st->pfsync_time = time_uptime; if (sync) { V_pfsyncstats.pfsyncs_stale++; pfsync_update_state(st); PF_STATE_UNLOCK(st); PFSYNC_LOCK(sc); pfsync_push(sc); PFSYNC_UNLOCK(sc); continue; } PF_STATE_UNLOCK(st); } return (len); } static int pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct pfsync_upd_req *ur, *ura; struct mbuf *mp; int len = count * sizeof(*ur); int i, offp; struct pf_state *st; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } ura = (struct pfsync_upd_req *)(mp->m_data + offp); for (i = 0; i < count; i++) { ur = &ura[i]; if (ur->id == 0 && ur->creatorid == 0) pfsync_bulk_start(); else { st = pf_find_state_byid(ur->id, ur->creatorid); if (st == NULL) { V_pfsyncstats.pfsyncs_badstate++; continue; } if (st->state_flags & PFSTATE_NOSYNC) { PF_STATE_UNLOCK(st); continue; } pfsync_update_state_req(st); PF_STATE_UNLOCK(st); } } return (len); } static int pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct mbuf *mp; struct pfsync_state *sa, *sp; struct pf_state *st; int len = count * sizeof(*sp); int offp, i; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } sa = (struct pfsync_state *)(mp->m_data + offp); for (i = 0; i < count; i++) { sp = &sa[i]; st = pf_find_state_byid(sp->id, sp->creatorid); if (st == NULL) { V_pfsyncstats.pfsyncs_badstate++; continue; } st->state_flags |= PFSTATE_NOSYNC; pf_unlink_state(st, PF_ENTER_LOCKED); } return (len); } static int pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct mbuf *mp; struct pfsync_del_c *sa, *sp; struct pf_state *st; int len = count * sizeof(*sp); int offp, i; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } sa = (struct pfsync_del_c *)(mp->m_data + offp); for (i = 0; i < count; i++) { sp = &sa[i]; st = pf_find_state_byid(sp->id, sp->creatorid); if (st == NULL) { V_pfsyncstats.pfsyncs_badstate++; continue; } st->state_flags |= PFSTATE_NOSYNC; pf_unlink_state(st, PF_ENTER_LOCKED); } return (len); } static int pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_bus *bus; struct mbuf *mp; int len = count * sizeof(*bus); int offp; PFSYNC_BLOCK(sc); /* If we're not waiting for a bulk update, who cares. */ if (sc->sc_ureq_sent == 0) { PFSYNC_BUNLOCK(sc); return (len); } mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { PFSYNC_BUNLOCK(sc); V_pfsyncstats.pfsyncs_badlen++; return (-1); } bus = (struct pfsync_bus *)(mp->m_data + offp); switch (bus->status) { case PFSYNC_BUS_START: callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + V_pf_limits[PF_LIMIT_STATES].limit / ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / sizeof(struct pfsync_state)), pfsync_bulk_fail, sc); if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received bulk update start\n"); break; case PFSYNC_BUS_END: if (time_uptime - ntohl(bus->endtime) >= sc->sc_ureq_sent) { /* that's it, we're happy */ sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; callout_stop(&sc->sc_bulkfail_tmo); if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync bulk done"); sc->sc_flags |= PFSYNCF_OK; if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received valid " "bulk update end\n"); } else { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received invalid " "bulk update end: bad timestamp\n"); } break; } PFSYNC_BUNLOCK(sc); return (len); } static int pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { int len = count * sizeof(struct pfsync_tdb); #if defined(IPSEC) struct pfsync_tdb *tp; struct mbuf *mp; int offp; int i; int s; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } tp = (struct pfsync_tdb *)(mp->m_data + offp); for (i = 0; i < count; i++) pfsync_update_net_tdb(&tp[i]); #endif return (len); } #if defined(IPSEC) /* Update an in-kernel tdb. Silently fail if no tdb is found. */ static void pfsync_update_net_tdb(struct pfsync_tdb *pt) { struct tdb *tdb; int s; /* check for invalid values */ if (ntohl(pt->spi) <= SPI_RESERVED_MAX || (pt->dst.sa.sa_family != AF_INET && pt->dst.sa.sa_family != AF_INET6)) goto bad; tdb = gettdb(pt->spi, &pt->dst, pt->sproto); if (tdb) { pt->rpl = ntohl(pt->rpl); pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); /* Neither replay nor byte counter should ever decrease. */ if (pt->rpl < tdb->tdb_rpl || pt->cur_bytes < tdb->tdb_cur_bytes) { goto bad; } tdb->tdb_rpl = pt->rpl; tdb->tdb_cur_bytes = pt->cur_bytes; } return; bad: if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " "invalid value\n"); V_pfsyncstats.pfsyncs_badstate++; return; } #endif static int pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { /* check if we are at the right place in the packet */ if (offset != m->m_pkthdr.len) V_pfsyncstats.pfsyncs_badlen++; /* we're done. free and let the caller return */ m_freem(m); return (-1); } static int pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { V_pfsyncstats.pfsyncs_badact++; m_freem(m); return (-1); } static int pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *rt) { m_freem(m); return (0); } /* ARGSUSED */ static int pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct pfsync_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; struct pfsyncreq pfsyncr; int error; switch (cmd) { case SIOCSIFFLAGS: PFSYNC_LOCK(sc); if (ifp->if_flags & IFF_UP) { ifp->if_drv_flags |= IFF_DRV_RUNNING; PFSYNC_UNLOCK(sc); pfsync_pointers_init(); } else { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; PFSYNC_UNLOCK(sc); pfsync_pointers_uninit(); } break; case SIOCSIFMTU: if (!sc->sc_sync_if || ifr->ifr_mtu <= PFSYNC_MINPKT || ifr->ifr_mtu > sc->sc_sync_if->if_mtu) return (EINVAL); if (ifr->ifr_mtu < ifp->if_mtu) { PFSYNC_LOCK(sc); if (sc->sc_len > PFSYNC_MINPKT) pfsync_sendout(1); PFSYNC_UNLOCK(sc); } ifp->if_mtu = ifr->ifr_mtu; break; case SIOCGETPFSYNC: bzero(&pfsyncr, sizeof(pfsyncr)); PFSYNC_LOCK(sc); if (sc->sc_sync_if) { strlcpy(pfsyncr.pfsyncr_syncdev, sc->sc_sync_if->if_xname, IFNAMSIZ); } pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; pfsyncr.pfsyncr_defer = (PFSYNCF_DEFER == (sc->sc_flags & PFSYNCF_DEFER)); PFSYNC_UNLOCK(sc); - return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); + return (copyout(&pfsyncr, ifr_data_get_ptr(ifr), + sizeof(pfsyncr))); case SIOCSETPFSYNC: { struct ip_moptions *imo = &sc->sc_imo; struct ifnet *sifp; struct ip *ip; void *mship = NULL; if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) return (error); - if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) + if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr, + sizeof(pfsyncr)))) return (error); if (pfsyncr.pfsyncr_maxupdates > 255) return (EINVAL); if (pfsyncr.pfsyncr_syncdev[0] == 0) sifp = NULL; else if ((sifp = ifunit_ref(pfsyncr.pfsyncr_syncdev)) == NULL) return (EINVAL); if (sifp != NULL && ( pfsyncr.pfsyncr_syncpeer.s_addr == 0 || pfsyncr.pfsyncr_syncpeer.s_addr == htonl(INADDR_PFSYNC_GROUP))) mship = malloc((sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_PFSYNC, M_WAITOK | M_ZERO); PFSYNC_LOCK(sc); if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP); else sc->sc_sync_peer.s_addr = pfsyncr.pfsyncr_syncpeer.s_addr; sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; if (pfsyncr.pfsyncr_defer) { sc->sc_flags |= PFSYNCF_DEFER; pfsync_defer_ptr = pfsync_defer; } else { sc->sc_flags &= ~PFSYNCF_DEFER; pfsync_defer_ptr = NULL; } if (sifp == NULL) { if (sc->sc_sync_if) if_rele(sc->sc_sync_if); sc->sc_sync_if = NULL; if (imo->imo_membership) pfsync_multicast_cleanup(sc); PFSYNC_UNLOCK(sc); break; } if (sc->sc_len > PFSYNC_MINPKT && (sifp->if_mtu < sc->sc_ifp->if_mtu || (sc->sc_sync_if != NULL && sifp->if_mtu < sc->sc_sync_if->if_mtu) || sifp->if_mtu < MCLBYTES - sizeof(struct ip))) pfsync_sendout(1); if (imo->imo_membership) pfsync_multicast_cleanup(sc); if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) { error = pfsync_multicast_setup(sc, sifp, mship); if (error) { if_rele(sifp); free(mship, M_PFSYNC); return (error); } } if (sc->sc_sync_if) if_rele(sc->sc_sync_if); sc->sc_sync_if = sifp; ip = &sc->sc_template; bzero(ip, sizeof(*ip)); ip->ip_v = IPVERSION; ip->ip_hl = sizeof(sc->sc_template) >> 2; ip->ip_tos = IPTOS_LOWDELAY; /* len and id are set later. */ ip->ip_off = htons(IP_DF); ip->ip_ttl = PFSYNC_DFLTTL; ip->ip_p = IPPROTO_PFSYNC; ip->ip_src.s_addr = INADDR_ANY; ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; /* Request a full state table update. */ if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) (*carp_demote_adj_p)(V_pfsync_carp_adj, "pfsync bulk start"); sc->sc_flags &= ~PFSYNCF_OK; if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: requesting bulk update\n"); pfsync_request_update(0, 0); PFSYNC_UNLOCK(sc); PFSYNC_BLOCK(sc); sc->sc_ureq_sent = time_uptime; callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, sc); PFSYNC_BUNLOCK(sc); break; } default: return (ENOTTY); } return (0); } static void pfsync_out_state(struct pf_state *st, void *buf) { struct pfsync_state *sp = buf; pfsync_state_export(sp, st); } static void pfsync_out_iack(struct pf_state *st, void *buf) { struct pfsync_ins_ack *iack = buf; iack->id = st->id; iack->creatorid = st->creatorid; } static void pfsync_out_upd_c(struct pf_state *st, void *buf) { struct pfsync_upd_c *up = buf; bzero(up, sizeof(*up)); up->id = st->id; pf_state_peer_hton(&st->src, &up->src); pf_state_peer_hton(&st->dst, &up->dst); up->creatorid = st->creatorid; up->timeout = st->timeout; } static void pfsync_out_del(struct pf_state *st, void *buf) { struct pfsync_del_c *dp = buf; dp->id = st->id; dp->creatorid = st->creatorid; st->state_flags |= PFSTATE_NOSYNC; } static void pfsync_drop(struct pfsync_softc *sc) { struct pf_state *st, *next; struct pfsync_upd_req_item *ur; int q; for (q = 0; q < PFSYNC_S_COUNT; q++) { if (TAILQ_EMPTY(&sc->sc_qs[q])) continue; TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) { KASSERT(st->sync_state == q, ("%s: st->sync_state == q", __func__)); st->sync_state = PFSYNC_S_NONE; pf_release_state(st); } TAILQ_INIT(&sc->sc_qs[q]); } while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); free(ur, M_PFSYNC); } sc->sc_plus = NULL; sc->sc_len = PFSYNC_MINPKT; } static void pfsync_sendout(int schedswi) { struct pfsync_softc *sc = V_pfsyncif; struct ifnet *ifp = sc->sc_ifp; struct mbuf *m; struct ip *ip; struct pfsync_header *ph; struct pfsync_subheader *subh; struct pf_state *st, *st_next; struct pfsync_upd_req_item *ur; int offset; int q, count = 0; KASSERT(sc != NULL, ("%s: null sc", __func__)); KASSERT(sc->sc_len > PFSYNC_MINPKT, ("%s: sc_len %zu", __func__, sc->sc_len)); PFSYNC_LOCK_ASSERT(sc); if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) { pfsync_drop(sc); return; } m = m_get2(max_linkhdr + sc->sc_len, M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) { if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); V_pfsyncstats.pfsyncs_onomem++; return; } m->m_data += max_linkhdr; m->m_len = m->m_pkthdr.len = sc->sc_len; /* build the ip header */ ip = (struct ip *)m->m_data; bcopy(&sc->sc_template, ip, sizeof(*ip)); offset = sizeof(*ip); ip->ip_len = htons(m->m_pkthdr.len); ip_fillid(ip); /* build the pfsync header */ ph = (struct pfsync_header *)(m->m_data + offset); bzero(ph, sizeof(*ph)); offset += sizeof(*ph); ph->version = PFSYNC_VERSION; ph->len = htons(sc->sc_len - sizeof(*ip)); bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); /* walk the queues */ for (q = 0; q < PFSYNC_S_COUNT; q++) { if (TAILQ_EMPTY(&sc->sc_qs[q])) continue; subh = (struct pfsync_subheader *)(m->m_data + offset); offset += sizeof(*subh); count = 0; TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, st_next) { KASSERT(st->sync_state == q, ("%s: st->sync_state == q", __func__)); /* * XXXGL: some of write methods do unlocked reads * of state data :( */ pfsync_qs[q].write(st, m->m_data + offset); offset += pfsync_qs[q].len; st->sync_state = PFSYNC_S_NONE; pf_release_state(st); count++; } TAILQ_INIT(&sc->sc_qs[q]); bzero(subh, sizeof(*subh)); subh->action = pfsync_qs[q].action; subh->count = htons(count); V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; } if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { subh = (struct pfsync_subheader *)(m->m_data + offset); offset += sizeof(*subh); count = 0; while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); bcopy(&ur->ur_msg, m->m_data + offset, sizeof(ur->ur_msg)); offset += sizeof(ur->ur_msg); free(ur, M_PFSYNC); count++; } bzero(subh, sizeof(*subh)); subh->action = PFSYNC_ACT_UPD_REQ; subh->count = htons(count); V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; } /* has someone built a custom region for us to add? */ if (sc->sc_plus != NULL) { bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); offset += sc->sc_pluslen; sc->sc_plus = NULL; } subh = (struct pfsync_subheader *)(m->m_data + offset); offset += sizeof(*subh); bzero(subh, sizeof(*subh)); subh->action = PFSYNC_ACT_EOF; subh->count = htons(1); V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; /* we're done, let's put it on the wire */ if (ifp->if_bpf) { m->m_data += sizeof(*ip); m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); BPF_MTAP(ifp, m); m->m_data -= sizeof(*ip); m->m_len = m->m_pkthdr.len = sc->sc_len; } if (sc->sc_sync_if == NULL) { sc->sc_len = PFSYNC_MINPKT; m_freem(m); return; } if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); sc->sc_len = PFSYNC_MINPKT; if (!_IF_QFULL(&sc->sc_ifp->if_snd)) _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); else { m_freem(m); if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1); } if (schedswi) swi_sched(V_pfsync_swi_cookie, 0); } static void pfsync_insert_state(struct pf_state *st) { struct pfsync_softc *sc = V_pfsyncif; if (st->state_flags & PFSTATE_NOSYNC) return; if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { st->state_flags |= PFSTATE_NOSYNC; return; } KASSERT(st->sync_state == PFSYNC_S_NONE, ("%s: st->sync_state %u", __func__, st->sync_state)); PFSYNC_LOCK(sc); if (sc->sc_len == PFSYNC_MINPKT) callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); pfsync_q_ins(st, PFSYNC_S_INS, true); PFSYNC_UNLOCK(sc); st->sync_updates = 0; } static int pfsync_defer(struct pf_state *st, struct mbuf *m) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_deferral *pd; if (m->m_flags & (M_BCAST|M_MCAST)) return (0); PFSYNC_LOCK(sc); if (sc == NULL || !(sc->sc_ifp->if_flags & IFF_DRV_RUNNING) || !(sc->sc_flags & PFSYNCF_DEFER)) { PFSYNC_UNLOCK(sc); return (0); } if (sc->sc_deferred >= 128) pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); if (pd == NULL) return (0); sc->sc_deferred++; m->m_flags |= M_SKIP_FIREWALL; st->state_flags |= PFSTATE_ACK; pd->pd_sc = sc; pd->pd_refs = 0; pd->pd_st = st; pf_ref_state(st); pd->pd_m = m; TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); callout_init_mtx(&pd->pd_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd); pfsync_push(sc); return (1); } static void pfsync_undefer(struct pfsync_deferral *pd, int drop) { struct pfsync_softc *sc = pd->pd_sc; struct mbuf *m = pd->pd_m; struct pf_state *st = pd->pd_st; PFSYNC_LOCK_ASSERT(sc); TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); sc->sc_deferred--; pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ free(pd, M_PFSYNC); pf_release_state(st); if (drop) m_freem(m); else { _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); pfsync_push(sc); } } static void pfsync_defer_tmo(void *arg) { struct pfsync_deferral *pd = arg; struct pfsync_softc *sc = pd->pd_sc; struct mbuf *m = pd->pd_m; struct pf_state *st = pd->pd_st; PFSYNC_LOCK_ASSERT(sc); CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); sc->sc_deferred--; pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ if (pd->pd_refs == 0) free(pd, M_PFSYNC); PFSYNC_UNLOCK(sc); ip_output(m, NULL, NULL, 0, NULL, NULL); pf_release_state(st); CURVNET_RESTORE(); } static void pfsync_undefer_state(struct pf_state *st, int drop) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_deferral *pd; PFSYNC_LOCK_ASSERT(sc); TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { if (pd->pd_st == st) { if (callout_stop(&pd->pd_tmo) > 0) pfsync_undefer(pd, drop); return; } } panic("%s: unable to find deferred state", __func__); } static void pfsync_update_state(struct pf_state *st) { struct pfsync_softc *sc = V_pfsyncif; bool sync = false, ref = true; PF_STATE_LOCK_ASSERT(st); PFSYNC_LOCK(sc); if (st->state_flags & PFSTATE_ACK) pfsync_undefer_state(st, 0); if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) pfsync_q_del(st, true); PFSYNC_UNLOCK(sc); return; } if (sc->sc_len == PFSYNC_MINPKT) callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); switch (st->sync_state) { case PFSYNC_S_UPD_C: case PFSYNC_S_UPD: case PFSYNC_S_INS: /* we're already handling it */ if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { st->sync_updates++; if (st->sync_updates >= sc->sc_maxupdates) sync = true; } break; case PFSYNC_S_IACK: pfsync_q_del(st, false); ref = false; /* FALLTHROUGH */ case PFSYNC_S_NONE: pfsync_q_ins(st, PFSYNC_S_UPD_C, ref); st->sync_updates = 0; break; default: panic("%s: unexpected sync state %d", __func__, st->sync_state); } if (sync || (time_uptime - st->pfsync_time) < 2) pfsync_push(sc); PFSYNC_UNLOCK(sc); } static void pfsync_request_update(u_int32_t creatorid, u_int64_t id) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_upd_req_item *item; size_t nlen = sizeof(struct pfsync_upd_req); PFSYNC_LOCK_ASSERT(sc); /* * This code does a bit to prevent multiple update requests for the * same state being generated. It searches current subheader queue, * but it doesn't lookup into queue of already packed datagrams. */ TAILQ_FOREACH(item, &sc->sc_upd_req_list, ur_entry) if (item->ur_msg.id == id && item->ur_msg.creatorid == creatorid) return; item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); if (item == NULL) return; /* XXX stats */ item->ur_msg.id = id; item->ur_msg.creatorid = creatorid; if (TAILQ_EMPTY(&sc->sc_upd_req_list)) nlen += sizeof(struct pfsync_subheader); if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { pfsync_sendout(1); nlen = sizeof(struct pfsync_subheader) + sizeof(struct pfsync_upd_req); } TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); sc->sc_len += nlen; } static void pfsync_update_state_req(struct pf_state *st) { struct pfsync_softc *sc = V_pfsyncif; bool ref = true; PF_STATE_LOCK_ASSERT(st); PFSYNC_LOCK(sc); if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) pfsync_q_del(st, true); PFSYNC_UNLOCK(sc); return; } switch (st->sync_state) { case PFSYNC_S_UPD_C: case PFSYNC_S_IACK: pfsync_q_del(st, false); ref = false; /* FALLTHROUGH */ case PFSYNC_S_NONE: pfsync_q_ins(st, PFSYNC_S_UPD, ref); pfsync_push(sc); break; case PFSYNC_S_INS: case PFSYNC_S_UPD: case PFSYNC_S_DEL: /* we're already handling it */ break; default: panic("%s: unexpected sync state %d", __func__, st->sync_state); } PFSYNC_UNLOCK(sc); } static void pfsync_delete_state(struct pf_state *st) { struct pfsync_softc *sc = V_pfsyncif; bool ref = true; PFSYNC_LOCK(sc); if (st->state_flags & PFSTATE_ACK) pfsync_undefer_state(st, 1); if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) pfsync_q_del(st, true); PFSYNC_UNLOCK(sc); return; } if (sc->sc_len == PFSYNC_MINPKT) callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); switch (st->sync_state) { case PFSYNC_S_INS: /* We never got to tell the world so just forget about it. */ pfsync_q_del(st, true); break; case PFSYNC_S_UPD_C: case PFSYNC_S_UPD: case PFSYNC_S_IACK: pfsync_q_del(st, false); ref = false; /* FALLTHROUGH */ case PFSYNC_S_NONE: pfsync_q_ins(st, PFSYNC_S_DEL, ref); break; default: panic("%s: unexpected sync state %d", __func__, st->sync_state); } PFSYNC_UNLOCK(sc); } static void pfsync_clear_states(u_int32_t creatorid, const char *ifname) { struct pfsync_softc *sc = V_pfsyncif; struct { struct pfsync_subheader subh; struct pfsync_clr clr; } __packed r; bzero(&r, sizeof(r)); r.subh.action = PFSYNC_ACT_CLR; r.subh.count = htons(1); V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); r.clr.creatorid = creatorid; PFSYNC_LOCK(sc); pfsync_send_plus(&r, sizeof(r)); PFSYNC_UNLOCK(sc); } static void pfsync_q_ins(struct pf_state *st, int q, bool ref) { struct pfsync_softc *sc = V_pfsyncif; size_t nlen = pfsync_qs[q].len; PFSYNC_LOCK_ASSERT(sc); KASSERT(st->sync_state == PFSYNC_S_NONE, ("%s: st->sync_state %u", __func__, st->sync_state)); KASSERT(sc->sc_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", sc->sc_len)); if (TAILQ_EMPTY(&sc->sc_qs[q])) nlen += sizeof(struct pfsync_subheader); if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { pfsync_sendout(1); nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; } sc->sc_len += nlen; TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); st->sync_state = q; if (ref) pf_ref_state(st); } static void pfsync_q_del(struct pf_state *st, bool unref) { struct pfsync_softc *sc = V_pfsyncif; int q = st->sync_state; PFSYNC_LOCK_ASSERT(sc); KASSERT(st->sync_state != PFSYNC_S_NONE, ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); sc->sc_len -= pfsync_qs[q].len; TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); st->sync_state = PFSYNC_S_NONE; if (unref) pf_release_state(st); if (TAILQ_EMPTY(&sc->sc_qs[q])) sc->sc_len -= sizeof(struct pfsync_subheader); } static void pfsync_bulk_start(void) { struct pfsync_softc *sc = V_pfsyncif; if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received bulk update request\n"); PFSYNC_BLOCK(sc); sc->sc_ureq_received = time_uptime; sc->sc_bulk_hashid = 0; sc->sc_bulk_stateid = 0; pfsync_bulk_status(PFSYNC_BUS_START); callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); PFSYNC_BUNLOCK(sc); } static void pfsync_bulk_update(void *arg) { struct pfsync_softc *sc = arg; struct pf_state *s; int i, sent = 0; PFSYNC_BLOCK_ASSERT(sc); CURVNET_SET(sc->sc_ifp->if_vnet); /* * Start with last state from previous invocation. * It may had gone, in this case start from the * hash slot. */ s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); if (s != NULL) i = PF_IDHASH(s); else i = sc->sc_bulk_hashid; for (; i <= pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; if (s != NULL) PF_HASHROW_ASSERT(ih); else { PF_HASHROW_LOCK(ih); s = LIST_FIRST(&ih->states); } for (; s; s = LIST_NEXT(s, entry)) { if (sent > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) < sizeof(struct pfsync_state)) { /* We've filled a packet. */ sc->sc_bulk_hashid = i; sc->sc_bulk_stateid = s->id; sc->sc_bulk_creatorid = s->creatorid; PF_HASHROW_UNLOCK(ih); callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); goto full; } if (s->sync_state == PFSYNC_S_NONE && s->timeout < PFTM_MAX && s->pfsync_time <= sc->sc_ureq_received) { pfsync_update_state_req(s); sent++; } } PF_HASHROW_UNLOCK(ih); } /* We're done. */ pfsync_bulk_status(PFSYNC_BUS_END); full: CURVNET_RESTORE(); } static void pfsync_bulk_status(u_int8_t status) { struct { struct pfsync_subheader subh; struct pfsync_bus bus; } __packed r; struct pfsync_softc *sc = V_pfsyncif; bzero(&r, sizeof(r)); r.subh.action = PFSYNC_ACT_BUS; r.subh.count = htons(1); V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; r.bus.creatorid = V_pf_status.hostid; r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); r.bus.status = status; PFSYNC_LOCK(sc); pfsync_send_plus(&r, sizeof(r)); PFSYNC_UNLOCK(sc); } static void pfsync_bulk_fail(void *arg) { struct pfsync_softc *sc = arg; CURVNET_SET(sc->sc_ifp->if_vnet); PFSYNC_BLOCK_ASSERT(sc); if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { /* Try again */ callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, V_pfsyncif); PFSYNC_LOCK(sc); pfsync_request_update(0, 0); PFSYNC_UNLOCK(sc); } else { /* Pretend like the transfer was ok. */ sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; PFSYNC_LOCK(sc); if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync bulk fail"); sc->sc_flags |= PFSYNCF_OK; PFSYNC_UNLOCK(sc); if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: failed to receive bulk update\n"); } CURVNET_RESTORE(); } static void pfsync_send_plus(void *plus, size_t pluslen) { struct pfsync_softc *sc = V_pfsyncif; PFSYNC_LOCK_ASSERT(sc); if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu) pfsync_sendout(1); sc->sc_plus = plus; sc->sc_len += (sc->sc_pluslen = pluslen); pfsync_sendout(1); } static void pfsync_timeout(void *arg) { struct pfsync_softc *sc = arg; CURVNET_SET(sc->sc_ifp->if_vnet); PFSYNC_LOCK(sc); pfsync_push(sc); PFSYNC_UNLOCK(sc); CURVNET_RESTORE(); } static void pfsync_push(struct pfsync_softc *sc) { PFSYNC_LOCK_ASSERT(sc); sc->sc_flags |= PFSYNCF_PUSH; swi_sched(V_pfsync_swi_cookie, 0); } static void pfsyncintr(void *arg) { struct pfsync_softc *sc = arg; struct mbuf *m, *n; CURVNET_SET(sc->sc_ifp->if_vnet); PFSYNC_LOCK(sc); if ((sc->sc_flags & PFSYNCF_PUSH) && sc->sc_len > PFSYNC_MINPKT) { pfsync_sendout(0); sc->sc_flags &= ~PFSYNCF_PUSH; } _IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m); PFSYNC_UNLOCK(sc); for (; m != NULL; m = n) { n = m->m_nextpkt; m->m_nextpkt = NULL; /* * We distinguish between a deferral packet and our * own pfsync packet based on M_SKIP_FIREWALL * flag. This is XXX. */ if (m->m_flags & M_SKIP_FIREWALL) ip_output(m, NULL, NULL, 0, NULL, NULL); else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL) == 0) V_pfsyncstats.pfsyncs_opackets++; else V_pfsyncstats.pfsyncs_oerrors++; } CURVNET_RESTORE(); } static int pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship) { struct ip_moptions *imo = &sc->sc_imo; int error; if (!(ifp->if_flags & IFF_MULTICAST)) return (EADDRNOTAVAIL); imo->imo_membership = (struct in_multi **)mship; imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; imo->imo_multicast_vif = -1; if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL, &imo->imo_membership[0])) != 0) { imo->imo_membership = NULL; return (error); } imo->imo_num_memberships++; imo->imo_multicast_ifp = ifp; imo->imo_multicast_ttl = PFSYNC_DFLTTL; imo->imo_multicast_loop = 0; return (0); } static void pfsync_multicast_cleanup(struct pfsync_softc *sc) { struct ip_moptions *imo = &sc->sc_imo; in_leavegroup(imo->imo_membership[0], NULL); free(imo->imo_membership, M_PFSYNC); imo->imo_membership = NULL; imo->imo_multicast_ifp = NULL; } #ifdef INET extern struct domain inetdomain; static struct protosw in_pfsync_protosw = { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_PFSYNC, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = pfsync_input, .pr_output = rip_output, .pr_ctloutput = rip_ctloutput, .pr_usrreqs = &rip_usrreqs }; #endif static void pfsync_pointers_init() { PF_RULES_WLOCK(); pfsync_state_import_ptr = pfsync_state_import; pfsync_insert_state_ptr = pfsync_insert_state; pfsync_update_state_ptr = pfsync_update_state; pfsync_delete_state_ptr = pfsync_delete_state; pfsync_clear_states_ptr = pfsync_clear_states; pfsync_defer_ptr = pfsync_defer; PF_RULES_WUNLOCK(); } static void pfsync_pointers_uninit() { PF_RULES_WLOCK(); pfsync_state_import_ptr = NULL; pfsync_insert_state_ptr = NULL; pfsync_update_state_ptr = NULL; pfsync_delete_state_ptr = NULL; pfsync_clear_states_ptr = NULL; pfsync_defer_ptr = NULL; PF_RULES_WUNLOCK(); } static void vnet_pfsync_init(const void *unused __unused) { int error; V_pfsync_cloner = if_clone_simple(pfsyncname, pfsync_clone_create, pfsync_clone_destroy, 1); error = swi_add(NULL, pfsyncname, pfsyncintr, V_pfsyncif, SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); if (error) { if_clone_detach(V_pfsync_cloner); log(LOG_INFO, "swi_add() failed in %s\n", __func__); } } VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY, vnet_pfsync_init, NULL); static void vnet_pfsync_uninit(const void *unused __unused) { if_clone_detach(V_pfsync_cloner); swi_remove(V_pfsync_swi_cookie); } /* * Detach after pf is gone; otherwise we might touch pfsync memory * from within pf after freeing pfsync. */ VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_pfsync_uninit, NULL); static int pfsync_init() { #ifdef INET int error; error = pf_proto_register(PF_INET, &in_pfsync_protosw); if (error) return (error); error = ipproto_register(IPPROTO_PFSYNC); if (error) { pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW); return (error); } #endif pfsync_pointers_init(); return (0); } static void pfsync_uninit() { pfsync_pointers_uninit(); #ifdef INET ipproto_unregister(IPPROTO_PFSYNC); pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW); #endif } static int pfsync_modevent(module_t mod, int type, void *data) { int error = 0; switch (type) { case MOD_LOAD: error = pfsync_init(); break; case MOD_QUIESCE: /* * Module should not be unloaded due to race conditions. */ error = EBUSY; break; case MOD_UNLOAD: pfsync_uninit(); break; default: error = EINVAL; break; } return (error); } static moduledata_t pfsync_mod = { pfsyncname, pfsync_modevent, 0 }; #define PFSYNC_MODVER 1 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */ DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); MODULE_VERSION(pfsync, PFSYNC_MODVER); MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); Index: head/sys/security/mac/mac_net.c =================================================================== --- head/sys/security/mac/mac_net.c (revision 331796) +++ head/sys/security/mac/mac_net.c (revision 331797) @@ -1,501 +1,501 @@ /*- * Copyright (c) 1999-2002, 2009 Robert N. M. Watson * Copyright (c) 2001 Ilmar S. Habibulin * Copyright (c) 2001-2004 Networks Associates Technology, Inc. * Copyright (c) 2006 SPARTA, Inc. * Copyright (c) 2008 Apple Inc. * All rights reserved. * * This software was developed by Robert Watson and Ilmar Habibulin for the * TrustedBSD Project. * * This software was enhanced by SPARTA ISSO under SPAWAR contract * N66001-04-C-6019 ("SEFOS"). * * This software was developed for the FreeBSD Project in part by Network * Associates Laboratories, the Security Research Division of Network * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), * as part of the DARPA CHATS research program. * * This software was developed at the University of Cambridge Computer * Laboratory with support from a grant from Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * XXXRW: struct ifnet locking is incomplete in the network code, so we use * our own global mutex for struct ifnet. Non-ideal, but should help in the * SMP environment. */ struct mtx mac_ifnet_mtx; MTX_SYSINIT(mac_ifnet_mtx, &mac_ifnet_mtx, "mac_ifnet", MTX_DEF); /* * Retrieve the label associated with an mbuf by searching for the tag. * Depending on the value of mac_labelmbufs, it's possible that a label will * not be present, in which case NULL is returned. Policies must handle the * possibility of an mbuf not having label storage if they do not enforce * early loading. */ struct label * mac_mbuf_to_label(struct mbuf *m) { struct m_tag *tag; struct label *label; if (m == NULL) return (NULL); tag = m_tag_find(m, PACKET_TAG_MACLABEL, NULL); if (tag == NULL) return (NULL); label = (struct label *)(tag+1); return (label); } static struct label * mac_bpfdesc_label_alloc(void) { struct label *label; label = mac_labelzone_alloc(M_WAITOK); MAC_POLICY_PERFORM(bpfdesc_init_label, label); return (label); } void mac_bpfdesc_init(struct bpf_d *d) { if (mac_labeled & MPC_OBJECT_BPFDESC) d->bd_label = mac_bpfdesc_label_alloc(); else d->bd_label = NULL; } static struct label * mac_ifnet_label_alloc(void) { struct label *label; label = mac_labelzone_alloc(M_WAITOK); MAC_POLICY_PERFORM(ifnet_init_label, label); return (label); } void mac_ifnet_init(struct ifnet *ifp) { if (mac_labeled & MPC_OBJECT_IFNET) ifp->if_label = mac_ifnet_label_alloc(); else ifp->if_label = NULL; } int mac_mbuf_tag_init(struct m_tag *tag, int flag) { struct label *label; int error; label = (struct label *) (tag + 1); mac_init_label(label); if (flag & M_WAITOK) MAC_POLICY_CHECK(mbuf_init_label, label, flag); else MAC_POLICY_CHECK_NOSLEEP(mbuf_init_label, label, flag); if (error) { MAC_POLICY_PERFORM_NOSLEEP(mbuf_destroy_label, label); mac_destroy_label(label); } return (error); } int mac_mbuf_init(struct mbuf *m, int flag) { struct m_tag *tag; int error; M_ASSERTPKTHDR(m); if (mac_labeled & MPC_OBJECT_MBUF) { tag = m_tag_get(PACKET_TAG_MACLABEL, sizeof(struct label), flag); if (tag == NULL) return (ENOMEM); error = mac_mbuf_tag_init(tag, flag); if (error) { m_tag_free(tag); return (error); } m_tag_prepend(m, tag); } return (0); } static void mac_bpfdesc_label_free(struct label *label) { MAC_POLICY_PERFORM_NOSLEEP(bpfdesc_destroy_label, label); mac_labelzone_free(label); } void mac_bpfdesc_destroy(struct bpf_d *d) { if (d->bd_label != NULL) { mac_bpfdesc_label_free(d->bd_label); d->bd_label = NULL; } } static void mac_ifnet_label_free(struct label *label) { MAC_POLICY_PERFORM_NOSLEEP(ifnet_destroy_label, label); mac_labelzone_free(label); } void mac_ifnet_destroy(struct ifnet *ifp) { if (ifp->if_label != NULL) { mac_ifnet_label_free(ifp->if_label); ifp->if_label = NULL; } } void mac_mbuf_tag_destroy(struct m_tag *tag) { struct label *label; label = (struct label *)(tag+1); MAC_POLICY_PERFORM_NOSLEEP(mbuf_destroy_label, label); mac_destroy_label(label); } /* * mac_mbuf_tag_copy is called when an mbuf header is duplicated, in which * case the labels must also be duplicated. */ void mac_mbuf_tag_copy(struct m_tag *src, struct m_tag *dest) { struct label *src_label, *dest_label; src_label = (struct label *)(src+1); dest_label = (struct label *)(dest+1); /* * mac_mbuf_tag_init() is called on the target tag in m_tag_copy(), * so we don't need to call it here. */ MAC_POLICY_PERFORM_NOSLEEP(mbuf_copy_label, src_label, dest_label); } void mac_mbuf_copy(struct mbuf *m_from, struct mbuf *m_to) { struct label *src_label, *dest_label; if (mac_policy_count == 0) return; src_label = mac_mbuf_to_label(m_from); dest_label = mac_mbuf_to_label(m_to); MAC_POLICY_PERFORM_NOSLEEP(mbuf_copy_label, src_label, dest_label); } static void mac_ifnet_copy_label(struct label *src, struct label *dest) { MAC_POLICY_PERFORM_NOSLEEP(ifnet_copy_label, src, dest); } static int mac_ifnet_externalize_label(struct label *label, char *elements, char *outbuf, size_t outbuflen) { int error; MAC_POLICY_EXTERNALIZE(ifnet, label, elements, outbuf, outbuflen); return (error); } static int mac_ifnet_internalize_label(struct label *label, char *string) { int error; MAC_POLICY_INTERNALIZE(ifnet, label, string); return (error); } void mac_ifnet_create(struct ifnet *ifp) { if (mac_policy_count == 0) return; MAC_IFNET_LOCK(ifp); MAC_POLICY_PERFORM_NOSLEEP(ifnet_create, ifp, ifp->if_label); MAC_IFNET_UNLOCK(ifp); } void mac_bpfdesc_create(struct ucred *cred, struct bpf_d *d) { MAC_POLICY_PERFORM_NOSLEEP(bpfdesc_create, cred, d, d->bd_label); } void mac_bpfdesc_create_mbuf(struct bpf_d *d, struct mbuf *m) { struct label *label; /* Assume reader lock is enough. */ BPFD_LOCK_ASSERT(d); if (mac_policy_count == 0) return; label = mac_mbuf_to_label(m); MAC_POLICY_PERFORM_NOSLEEP(bpfdesc_create_mbuf, d, d->bd_label, m, label); } void mac_ifnet_create_mbuf(struct ifnet *ifp, struct mbuf *m) { struct label *label; if (mac_policy_count == 0) return; label = mac_mbuf_to_label(m); MAC_IFNET_LOCK(ifp); MAC_POLICY_PERFORM_NOSLEEP(ifnet_create_mbuf, ifp, ifp->if_label, m, label); MAC_IFNET_UNLOCK(ifp); } MAC_CHECK_PROBE_DEFINE2(bpfdesc_check_receive, "struct bpf_d *", "struct ifnet *"); int mac_bpfdesc_check_receive(struct bpf_d *d, struct ifnet *ifp) { int error; /* Assume reader lock is enough. */ BPFD_LOCK_ASSERT(d); if (mac_policy_count == 0) return (0); MAC_IFNET_LOCK(ifp); MAC_POLICY_CHECK_NOSLEEP(bpfdesc_check_receive, d, d->bd_label, ifp, ifp->if_label); MAC_CHECK_PROBE2(bpfdesc_check_receive, error, d, ifp); MAC_IFNET_UNLOCK(ifp); return (error); } MAC_CHECK_PROBE_DEFINE2(ifnet_check_transmit, "struct ifnet *", "struct mbuf *"); int mac_ifnet_check_transmit(struct ifnet *ifp, struct mbuf *m) { struct label *label; int error; M_ASSERTPKTHDR(m); if (mac_policy_count == 0) return (0); label = mac_mbuf_to_label(m); MAC_IFNET_LOCK(ifp); MAC_POLICY_CHECK_NOSLEEP(ifnet_check_transmit, ifp, ifp->if_label, m, label); MAC_CHECK_PROBE2(ifnet_check_transmit, error, ifp, m); MAC_IFNET_UNLOCK(ifp); return (error); } int mac_ifnet_ioctl_get(struct ucred *cred, struct ifreq *ifr, struct ifnet *ifp) { char *elements, *buffer; struct label *intlabel; struct mac mac; int error; if (!(mac_labeled & MPC_OBJECT_IFNET)) return (EINVAL); - error = copyin(ifr->ifr_ifru.ifru_data, &mac, sizeof(mac)); + error = copyin(ifr_data_get_ptr(ifr), &mac, sizeof(mac)); if (error) return (error); error = mac_check_structmac_consistent(&mac); if (error) return (error); elements = malloc(mac.m_buflen, M_MACTEMP, M_WAITOK); error = copyinstr(mac.m_string, elements, mac.m_buflen, NULL); if (error) { free(elements, M_MACTEMP); return (error); } buffer = malloc(mac.m_buflen, M_MACTEMP, M_WAITOK | M_ZERO); intlabel = mac_ifnet_label_alloc(); MAC_IFNET_LOCK(ifp); mac_ifnet_copy_label(ifp->if_label, intlabel); MAC_IFNET_UNLOCK(ifp); error = mac_ifnet_externalize_label(intlabel, elements, buffer, mac.m_buflen); mac_ifnet_label_free(intlabel); if (error == 0) error = copyout(buffer, mac.m_string, strlen(buffer)+1); free(buffer, M_MACTEMP); free(elements, M_MACTEMP); return (error); } int mac_ifnet_ioctl_set(struct ucred *cred, struct ifreq *ifr, struct ifnet *ifp) { struct label *intlabel; struct mac mac; char *buffer; int error; if (!(mac_labeled & MPC_OBJECT_IFNET)) return (EINVAL); - error = copyin(ifr->ifr_ifru.ifru_data, &mac, sizeof(mac)); + error = copyin(ifr_data_get_ptr(ifr), &mac, sizeof(mac)); if (error) return (error); error = mac_check_structmac_consistent(&mac); if (error) return (error); buffer = malloc(mac.m_buflen, M_MACTEMP, M_WAITOK); error = copyinstr(mac.m_string, buffer, mac.m_buflen, NULL); if (error) { free(buffer, M_MACTEMP); return (error); } intlabel = mac_ifnet_label_alloc(); error = mac_ifnet_internalize_label(intlabel, buffer); free(buffer, M_MACTEMP); if (error) { mac_ifnet_label_free(intlabel); return (error); } /* * XXX: Note that this is a redundant privilege check, since policies * impose this check themselves if required by the policy * Eventually, this should go away. */ error = priv_check_cred(cred, PRIV_NET_SETIFMAC, 0); if (error) { mac_ifnet_label_free(intlabel); return (error); } MAC_IFNET_LOCK(ifp); MAC_POLICY_CHECK_NOSLEEP(ifnet_check_relabel, cred, ifp, ifp->if_label, intlabel); if (error) { MAC_IFNET_UNLOCK(ifp); mac_ifnet_label_free(intlabel); return (error); } MAC_POLICY_PERFORM_NOSLEEP(ifnet_relabel, cred, ifp, ifp->if_label, intlabel); MAC_IFNET_UNLOCK(ifp); mac_ifnet_label_free(intlabel); return (0); }