Index: stable/11/sys/dev/an/if_an.c =================================================================== --- stable/11/sys/dev/an/if_an.c (revision 332287) +++ stable/11/sys/dev/an/if_an.c (revision 332288) @@ -1,3810 +1,3816 @@ /*- * Copyright (c) 1997, 1998, 1999 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /* * Aironet 4500/4800 802.11 PCMCIA/ISA/PCI driver for FreeBSD. * * Written by Bill Paul * Electrical Engineering Department * Columbia University, New York City */ #include __FBSDID("$FreeBSD$"); /* * The Aironet 4500/4800 series cards come in PCMCIA, ISA and PCI form. * This driver supports all three device types (PCI devices are supported * through an extra PCI shim: /sys/dev/an/if_an_pci.c). ISA devices can be * supported either using hard-coded IO port/IRQ settings or via Plug * and Play. The 4500 series devices support 1Mbps and 2Mbps data rates. * The 4800 devices support 1, 2, 5.5 and 11Mbps rates. * * Like the WaveLAN/IEEE cards, the Aironet NICs are all essentially * PCMCIA devices. The ISA and PCI cards are a combination of a PCMCIA * device and a PCMCIA to ISA or PCMCIA to PCI adapter card. There are * a couple of important differences though: * * - Lucent ISA card looks to the host like a PCMCIA controller with * a PCMCIA WaveLAN card inserted. This means that even desktop * machines need to be configured with PCMCIA support in order to * use WaveLAN/IEEE ISA cards. The Aironet cards on the other hand * actually look like normal ISA and PCI devices to the host, so * no PCMCIA controller support is needed * * The latter point results in a small gotcha. The Aironet PCMCIA * cards can be configured for one of two operating modes depending * on how the Vpp1 and Vpp2 programming voltages are set when the * card is activated. In order to put the card in proper PCMCIA * operation (where the CIS table is visible and the interface is * programmed for PCMCIA operation), both Vpp1 and Vpp2 have to be * set to 5 volts. FreeBSD by default doesn't set the Vpp voltages, * which leaves the card in ISA/PCI mode, which prevents it from * being activated as an PCMCIA device. * * Note that some PCMCIA controller software packages for Windows NT * fail to set the voltages as well. * * The Aironet devices can operate in both station mode and access point * mode. Typically, when programmed for station mode, the card can be set * to automatically perform encapsulation/decapsulation of Ethernet II * and 802.3 frames within 802.11 frames so that the host doesn't have * to do it itself. This driver doesn't program the card that way: the * driver handles all of the encapsulation/decapsulation itself. */ #include "opt_inet.h" #ifdef INET #define ANCACHE /* enable signal strength cache */ #endif #include #include #include #include #include #include #include #include #include #ifdef ANCACHE #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #include #include #endif #include #include #include #include /* These are global because we need them in sys/pci/if_an_p.c. */ static void an_reset(struct an_softc *); static int an_init_mpi350_desc(struct an_softc *); static int an_ioctl(struct ifnet *, u_long, caddr_t); static void an_init(void *); static void an_init_locked(struct an_softc *); static int an_init_tx_ring(struct an_softc *); static void an_start(struct ifnet *); static void an_start_locked(struct ifnet *); static void an_watchdog(struct an_softc *); static void an_rxeof(struct an_softc *); static void an_txeof(struct an_softc *, int); static void an_promisc(struct an_softc *, int); static int an_cmd(struct an_softc *, int, int); static int an_cmd_struct(struct an_softc *, struct an_command *, struct an_reply *); static int an_read_record(struct an_softc *, struct an_ltv_gen *); static int an_write_record(struct an_softc *, struct an_ltv_gen *); static int an_read_data(struct an_softc *, int, int, caddr_t, int); static int an_write_data(struct an_softc *, int, int, caddr_t, int); static int an_seek(struct an_softc *, int, int, int); static int an_alloc_nicmem(struct an_softc *, int, int *); static int an_dma_malloc(struct an_softc *, bus_size_t, struct an_dma_alloc *, int); static void an_dma_free(struct an_softc *, struct an_dma_alloc *); static void an_dma_malloc_cb(void *, bus_dma_segment_t *, int, int); static void an_stats_update(void *); static void an_setdef(struct an_softc *, struct an_req *); #ifdef ANCACHE static void an_cache_store(struct an_softc *, struct ether_header *, struct mbuf *, u_int8_t, u_int8_t); #endif /* function definitions for use with the Cisco's Linux configuration utilities */ static int readrids(struct ifnet*, struct aironet_ioctl*); static int writerids(struct ifnet*, struct aironet_ioctl*); static int flashcard(struct ifnet*, struct aironet_ioctl*); static int cmdreset(struct ifnet *); static int setflashmode(struct ifnet *); static int flashgchar(struct ifnet *,int,int); static int flashpchar(struct ifnet *,int,int); static int flashputbuf(struct ifnet *); static int flashrestart(struct ifnet *); static int WaitBusy(struct ifnet *, int); static int unstickbusy(struct ifnet *); static void an_dump_record (struct an_softc *,struct an_ltv_gen *, char *); static int an_media_change (struct ifnet *); static void an_media_status (struct ifnet *, struct ifmediareq *); static int an_dump = 0; static int an_cache_mode = 0; #define DBM 0 #define PERCENT 1 #define RAW 2 static char an_conf[256]; static char an_conf_cache[256]; /* sysctl vars */ static SYSCTL_NODE(_hw, OID_AUTO, an, CTLFLAG_RD, 0, "Wireless driver parameters"); /* XXX violate ethernet/netgraph callback hooks */ extern void (*ng_ether_attach_p)(struct ifnet *ifp); extern void (*ng_ether_detach_p)(struct ifnet *ifp); static int sysctl_an_dump(SYSCTL_HANDLER_ARGS) { int error, r, last; char *s = an_conf; last = an_dump; switch (an_dump) { case 0: strcpy(an_conf, "off"); break; case 1: strcpy(an_conf, "type"); break; case 2: strcpy(an_conf, "dump"); break; default: snprintf(an_conf, 5, "%x", an_dump); break; } error = sysctl_handle_string(oidp, an_conf, sizeof(an_conf), req); if (strncmp(an_conf,"off", 3) == 0) { an_dump = 0; } if (strncmp(an_conf,"dump", 4) == 0) { an_dump = 1; } if (strncmp(an_conf,"type", 4) == 0) { an_dump = 2; } if (*s == 'f') { r = 0; for (;;s++) { if ((*s >= '0') && (*s <= '9')) { r = r * 16 + (*s - '0'); } else if ((*s >= 'a') && (*s <= 'f')) { r = r * 16 + (*s - 'a' + 10); } else { break; } } an_dump = r; } if (an_dump != last) printf("Sysctl changed for Aironet driver\n"); return error; } SYSCTL_PROC(_hw_an, OID_AUTO, an_dump, CTLTYPE_STRING | CTLFLAG_RW, 0, sizeof(an_conf), sysctl_an_dump, "A", ""); static int sysctl_an_cache_mode(SYSCTL_HANDLER_ARGS) { int error; switch (an_cache_mode) { case 1: strcpy(an_conf_cache, "per"); break; case 2: strcpy(an_conf_cache, "raw"); break; default: strcpy(an_conf_cache, "dbm"); break; } error = sysctl_handle_string(oidp, an_conf_cache, sizeof(an_conf_cache), req); if (strncmp(an_conf_cache,"dbm", 3) == 0) { an_cache_mode = 0; } if (strncmp(an_conf_cache,"per", 3) == 0) { an_cache_mode = 1; } if (strncmp(an_conf_cache,"raw", 3) == 0) { an_cache_mode = 2; } return error; } SYSCTL_PROC(_hw_an, OID_AUTO, an_cache_mode, CTLTYPE_STRING | CTLFLAG_RW, 0, sizeof(an_conf_cache), sysctl_an_cache_mode, "A", ""); /* * We probe for an Aironet 4500/4800 card by attempting to * read the default SSID list. On reset, the first entry in * the SSID list will contain the name "tsunami." If we don't * find this, then there's no card present. */ int an_probe(device_t dev) { struct an_softc *sc = device_get_softc(dev); struct an_ltv_ssidlist_new ssid; int error; bzero((char *)&ssid, sizeof(ssid)); error = an_alloc_port(dev, 0, AN_IOSIZ); if (error != 0) return (0); /* can't do autoprobing */ if (rman_get_start(sc->port_res) == -1) return(0); /* * We need to fake up a softc structure long enough * to be able to issue commands and call some of the * other routines. */ ssid.an_len = sizeof(ssid); ssid.an_type = AN_RID_SSIDLIST; /* Make sure interrupts are disabled. */ sc->mpi350 = 0; CSR_WRITE_2(sc, AN_INT_EN(sc->mpi350), 0); CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), 0xFFFF); sc->an_dev = dev; mtx_init(&sc->an_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); AN_LOCK(sc); an_reset(sc); if (an_cmd(sc, AN_CMD_READCFG, 0)) { AN_UNLOCK(sc); goto fail; } if (an_read_record(sc, (struct an_ltv_gen *)&ssid)) { AN_UNLOCK(sc); goto fail; } /* See if the ssid matches what we expect ... but doesn't have to */ if (strcmp(ssid.an_entry[0].an_ssid, AN_DEF_SSID)) { AN_UNLOCK(sc); goto fail; } AN_UNLOCK(sc); return(AN_IOSIZ); fail: mtx_destroy(&sc->an_mtx); return(0); } /* * Allocate a port resource with the given resource id. */ int an_alloc_port(device_t dev, int rid, int size) { struct an_softc *sc = device_get_softc(dev); struct resource *res; res = bus_alloc_resource_anywhere(dev, SYS_RES_IOPORT, &rid, size, RF_ACTIVE); if (res) { sc->port_rid = rid; sc->port_res = res; return (0); } else { return (ENOENT); } } /* * Allocate a memory resource with the given resource id. */ int an_alloc_memory(device_t dev, int rid, int size) { struct an_softc *sc = device_get_softc(dev); struct resource *res; res = bus_alloc_resource_anywhere(dev, SYS_RES_MEMORY, &rid, size, RF_ACTIVE); if (res) { sc->mem_rid = rid; sc->mem_res = res; sc->mem_used = size; return (0); } else { return (ENOENT); } } /* * Allocate a auxiliary memory resource with the given resource id. */ int an_alloc_aux_memory(device_t dev, int rid, int size) { struct an_softc *sc = device_get_softc(dev); struct resource *res; res = bus_alloc_resource_anywhere(dev, SYS_RES_MEMORY, &rid, size, RF_ACTIVE); if (res) { sc->mem_aux_rid = rid; sc->mem_aux_res = res; sc->mem_aux_used = size; return (0); } else { return (ENOENT); } } /* * Allocate an irq resource with the given resource id. */ int an_alloc_irq(device_t dev, int rid, int flags) { struct an_softc *sc = device_get_softc(dev); struct resource *res; res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, (RF_ACTIVE | flags)); if (res) { sc->irq_rid = rid; sc->irq_res = res; return (0); } else { return (ENOENT); } } static void an_dma_malloc_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *paddr = (bus_addr_t*) arg; *paddr = segs->ds_addr; } /* * Alloc DMA memory and set the pointer to it */ static int an_dma_malloc(struct an_softc *sc, bus_size_t size, struct an_dma_alloc *dma, int mapflags) { int r; r = bus_dmamem_alloc(sc->an_dtag, (void**) &dma->an_dma_vaddr, BUS_DMA_NOWAIT, &dma->an_dma_map); if (r != 0) goto fail_1; r = bus_dmamap_load(sc->an_dtag, dma->an_dma_map, dma->an_dma_vaddr, size, an_dma_malloc_cb, &dma->an_dma_paddr, mapflags | BUS_DMA_NOWAIT); if (r != 0) goto fail_2; dma->an_dma_size = size; return (0); fail_2: bus_dmamap_unload(sc->an_dtag, dma->an_dma_map); fail_1: bus_dmamem_free(sc->an_dtag, dma->an_dma_vaddr, dma->an_dma_map); return (r); } static void an_dma_free(struct an_softc *sc, struct an_dma_alloc *dma) { bus_dmamap_unload(sc->an_dtag, dma->an_dma_map); bus_dmamem_free(sc->an_dtag, dma->an_dma_vaddr, dma->an_dma_map); dma->an_dma_vaddr = 0; } /* * Release all resources */ void an_release_resources(device_t dev) { struct an_softc *sc = device_get_softc(dev); int i; if (sc->port_res) { bus_release_resource(dev, SYS_RES_IOPORT, sc->port_rid, sc->port_res); sc->port_res = 0; } if (sc->mem_res) { bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_rid, sc->mem_res); sc->mem_res = 0; } if (sc->mem_aux_res) { bus_release_resource(dev, SYS_RES_MEMORY, sc->mem_aux_rid, sc->mem_aux_res); sc->mem_aux_res = 0; } if (sc->irq_res) { bus_release_resource(dev, SYS_RES_IRQ, sc->irq_rid, sc->irq_res); sc->irq_res = 0; } if (sc->an_rid_buffer.an_dma_paddr) { an_dma_free(sc, &sc->an_rid_buffer); } for (i = 0; i < AN_MAX_RX_DESC; i++) if (sc->an_rx_buffer[i].an_dma_paddr) { an_dma_free(sc, &sc->an_rx_buffer[i]); } for (i = 0; i < AN_MAX_TX_DESC; i++) if (sc->an_tx_buffer[i].an_dma_paddr) { an_dma_free(sc, &sc->an_tx_buffer[i]); } if (sc->an_dtag) { bus_dma_tag_destroy(sc->an_dtag); } } int an_init_mpi350_desc(struct an_softc *sc) { struct an_command cmd_struct; struct an_reply reply; struct an_card_rid_desc an_rid_desc; struct an_card_rx_desc an_rx_desc; struct an_card_tx_desc an_tx_desc; int i, desc; AN_LOCK_ASSERT(sc); if(!sc->an_rid_buffer.an_dma_paddr) an_dma_malloc(sc, AN_RID_BUFFER_SIZE, &sc->an_rid_buffer, 0); for (i = 0; i < AN_MAX_RX_DESC; i++) if(!sc->an_rx_buffer[i].an_dma_paddr) an_dma_malloc(sc, AN_RX_BUFFER_SIZE, &sc->an_rx_buffer[i], 0); for (i = 0; i < AN_MAX_TX_DESC; i++) if(!sc->an_tx_buffer[i].an_dma_paddr) an_dma_malloc(sc, AN_TX_BUFFER_SIZE, &sc->an_tx_buffer[i], 0); /* * Allocate RX descriptor */ bzero(&reply,sizeof(reply)); cmd_struct.an_cmd = AN_CMD_ALLOC_DESC; cmd_struct.an_parm0 = AN_DESCRIPTOR_RX; cmd_struct.an_parm1 = AN_RX_DESC_OFFSET; cmd_struct.an_parm2 = AN_MAX_RX_DESC; if (an_cmd_struct(sc, &cmd_struct, &reply)) { if_printf(sc->an_ifp, "failed to allocate RX descriptor\n"); return(EIO); } for (desc = 0; desc < AN_MAX_RX_DESC; desc++) { bzero(&an_rx_desc, sizeof(an_rx_desc)); an_rx_desc.an_valid = 1; an_rx_desc.an_len = AN_RX_BUFFER_SIZE; an_rx_desc.an_done = 0; an_rx_desc.an_phys = sc->an_rx_buffer[desc].an_dma_paddr; for (i = 0; i < sizeof(an_rx_desc) / 4; i++) CSR_MEM_AUX_WRITE_4(sc, AN_RX_DESC_OFFSET + (desc * sizeof(an_rx_desc)) + (i * 4), ((u_int32_t *)(void *)&an_rx_desc)[i]); } /* * Allocate TX descriptor */ bzero(&reply,sizeof(reply)); cmd_struct.an_cmd = AN_CMD_ALLOC_DESC; cmd_struct.an_parm0 = AN_DESCRIPTOR_TX; cmd_struct.an_parm1 = AN_TX_DESC_OFFSET; cmd_struct.an_parm2 = AN_MAX_TX_DESC; if (an_cmd_struct(sc, &cmd_struct, &reply)) { if_printf(sc->an_ifp, "failed to allocate TX descriptor\n"); return(EIO); } for (desc = 0; desc < AN_MAX_TX_DESC; desc++) { bzero(&an_tx_desc, sizeof(an_tx_desc)); an_tx_desc.an_offset = 0; an_tx_desc.an_eoc = 0; an_tx_desc.an_valid = 0; an_tx_desc.an_len = 0; an_tx_desc.an_phys = sc->an_tx_buffer[desc].an_dma_paddr; for (i = 0; i < sizeof(an_tx_desc) / 4; i++) CSR_MEM_AUX_WRITE_4(sc, AN_TX_DESC_OFFSET + (desc * sizeof(an_tx_desc)) + (i * 4), ((u_int32_t *)(void *)&an_tx_desc)[i]); } /* * Allocate RID descriptor */ bzero(&reply,sizeof(reply)); cmd_struct.an_cmd = AN_CMD_ALLOC_DESC; cmd_struct.an_parm0 = AN_DESCRIPTOR_HOSTRW; cmd_struct.an_parm1 = AN_HOST_DESC_OFFSET; cmd_struct.an_parm2 = 1; if (an_cmd_struct(sc, &cmd_struct, &reply)) { if_printf(sc->an_ifp, "failed to allocate host descriptor\n"); return(EIO); } bzero(&an_rid_desc, sizeof(an_rid_desc)); an_rid_desc.an_valid = 1; an_rid_desc.an_len = AN_RID_BUFFER_SIZE; an_rid_desc.an_rid = 0; an_rid_desc.an_phys = sc->an_rid_buffer.an_dma_paddr; for (i = 0; i < sizeof(an_rid_desc) / 4; i++) CSR_MEM_AUX_WRITE_4(sc, AN_HOST_DESC_OFFSET + i * 4, ((u_int32_t *)(void *)&an_rid_desc)[i]); return(0); } int an_attach(struct an_softc *sc, int flags) { struct ifnet *ifp; int error = EIO; int i, nrate, mword; u_int8_t r; ifp = sc->an_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(sc->an_dev, "can not if_alloc()\n"); goto fail; } ifp->if_softc = sc; if_initname(ifp, device_get_name(sc->an_dev), device_get_unit(sc->an_dev)); sc->an_gone = 0; sc->an_associated = 0; sc->an_monitor = 0; sc->an_was_monitor = 0; sc->an_flash_buffer = NULL; /* Reset the NIC. */ AN_LOCK(sc); an_reset(sc); if (sc->mpi350) { error = an_init_mpi350_desc(sc); if (error) goto fail; } /* Load factory config */ if (an_cmd(sc, AN_CMD_READCFG, 0)) { device_printf(sc->an_dev, "failed to load config data\n"); goto fail; } /* Read the current configuration */ sc->an_config.an_type = AN_RID_GENCONFIG; sc->an_config.an_len = sizeof(struct an_ltv_genconfig); if (an_read_record(sc, (struct an_ltv_gen *)&sc->an_config)) { device_printf(sc->an_dev, "read record failed\n"); goto fail; } /* Read the card capabilities */ sc->an_caps.an_type = AN_RID_CAPABILITIES; sc->an_caps.an_len = sizeof(struct an_ltv_caps); if (an_read_record(sc, (struct an_ltv_gen *)&sc->an_caps)) { device_printf(sc->an_dev, "read record failed\n"); goto fail; } /* Read ssid list */ sc->an_ssidlist.an_type = AN_RID_SSIDLIST; sc->an_ssidlist.an_len = sizeof(struct an_ltv_ssidlist_new); if (an_read_record(sc, (struct an_ltv_gen *)&sc->an_ssidlist)) { device_printf(sc->an_dev, "read record failed\n"); goto fail; } /* Read AP list */ sc->an_aplist.an_type = AN_RID_APLIST; sc->an_aplist.an_len = sizeof(struct an_ltv_aplist); if (an_read_record(sc, (struct an_ltv_gen *)&sc->an_aplist)) { device_printf(sc->an_dev, "read record failed\n"); goto fail; } #ifdef ANCACHE /* Read the RSSI <-> dBm map */ sc->an_have_rssimap = 0; if (sc->an_caps.an_softcaps & 8) { sc->an_rssimap.an_type = AN_RID_RSSI_MAP; sc->an_rssimap.an_len = sizeof(struct an_ltv_rssi_map); if (an_read_record(sc, (struct an_ltv_gen *)&sc->an_rssimap)) { device_printf(sc->an_dev, "unable to get RSSI <-> dBM map\n"); } else { device_printf(sc->an_dev, "got RSSI <-> dBM map\n"); sc->an_have_rssimap = 1; } } else { device_printf(sc->an_dev, "no RSSI <-> dBM map\n"); } #endif AN_UNLOCK(sc); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = an_ioctl; ifp->if_start = an_start; ifp->if_init = an_init; ifp->if_baudrate = 10000000; IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); bzero(sc->an_config.an_nodename, sizeof(sc->an_config.an_nodename)); bcopy(AN_DEFAULT_NODENAME, sc->an_config.an_nodename, sizeof(AN_DEFAULT_NODENAME) - 1); bzero(sc->an_ssidlist.an_entry[0].an_ssid, sizeof(sc->an_ssidlist.an_entry[0].an_ssid)); bcopy(AN_DEFAULT_NETNAME, sc->an_ssidlist.an_entry[0].an_ssid, sizeof(AN_DEFAULT_NETNAME) - 1); sc->an_ssidlist.an_entry[0].an_len = strlen(AN_DEFAULT_NETNAME); sc->an_config.an_opmode = AN_OPMODE_INFRASTRUCTURE_STATION; sc->an_tx_rate = 0; bzero((char *)&sc->an_stats, sizeof(sc->an_stats)); nrate = 8; ifmedia_init(&sc->an_ifmedia, 0, an_media_change, an_media_status); if_printf(ifp, "supported rates: "); #define ADD(s, o) ifmedia_add(&sc->an_ifmedia, \ IFM_MAKEWORD(IFM_IEEE80211, (s), (o), 0), 0, NULL) ADD(IFM_AUTO, 0); ADD(IFM_AUTO, IFM_IEEE80211_ADHOC); for (i = 0; i < nrate; i++) { r = sc->an_caps.an_rates[i]; mword = ieee80211_rate2media(NULL, r, IEEE80211_MODE_AUTO); if (mword == 0) continue; printf("%s%d%sMbps", (i != 0 ? " " : ""), (r & IEEE80211_RATE_VAL) / 2, ((r & 0x1) != 0 ? ".5" : "")); ADD(mword, 0); ADD(mword, IFM_IEEE80211_ADHOC); } printf("\n"); ifmedia_set(&sc->an_ifmedia, IFM_MAKEWORD(IFM_IEEE80211, IFM_AUTO, 0, 0)); #undef ADD /* * Call MI attach routine. */ ether_ifattach(ifp, sc->an_caps.an_oemaddr); callout_init_mtx(&sc->an_stat_ch, &sc->an_mtx, 0); return(0); fail: AN_UNLOCK(sc); mtx_destroy(&sc->an_mtx); if (ifp != NULL) if_free(ifp); return(error); } int an_detach(device_t dev) { struct an_softc *sc = device_get_softc(dev); struct ifnet *ifp = sc->an_ifp; if (sc->an_gone) { device_printf(dev,"already unloaded\n"); return(0); } AN_LOCK(sc); an_stop(sc); sc->an_gone = 1; ifmedia_removeall(&sc->an_ifmedia); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; AN_UNLOCK(sc); ether_ifdetach(ifp); bus_teardown_intr(dev, sc->irq_res, sc->irq_handle); callout_drain(&sc->an_stat_ch); if_free(ifp); an_release_resources(dev); mtx_destroy(&sc->an_mtx); return (0); } static void an_rxeof(struct an_softc *sc) { struct ifnet *ifp; struct ether_header *eh; struct ieee80211_frame *ih; struct an_rxframe rx_frame; struct an_rxframe_802_3 rx_frame_802_3; struct mbuf *m; int len, id, error = 0, i, count = 0; int ieee80211_header_len; u_char *bpf_buf; u_short fc1; struct an_card_rx_desc an_rx_desc; u_int8_t *buf; AN_LOCK_ASSERT(sc); ifp = sc->an_ifp; if (!sc->mpi350) { id = CSR_READ_2(sc, AN_RX_FID); if (sc->an_monitor && (ifp->if_flags & IFF_PROMISC)) { /* read raw 802.11 packet */ bpf_buf = sc->buf_802_11; /* read header */ if (an_read_data(sc, id, 0x0, (caddr_t)&rx_frame, sizeof(rx_frame))) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } /* * skip beacon by default since this increases the * system load a lot */ if (!(sc->an_monitor & AN_MONITOR_INCLUDE_BEACON) && (rx_frame.an_frame_ctl & IEEE80211_FC0_SUBTYPE_BEACON)) { return; } if (sc->an_monitor & AN_MONITOR_AIRONET_HEADER) { len = rx_frame.an_rx_payload_len + sizeof(rx_frame); /* Check for insane frame length */ if (len > sizeof(sc->buf_802_11)) { if_printf(ifp, "oversized packet " "received (%d, %d)\n", len, MCLBYTES); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } bcopy((char *)&rx_frame, bpf_buf, sizeof(rx_frame)); error = an_read_data(sc, id, sizeof(rx_frame), (caddr_t)bpf_buf+sizeof(rx_frame), rx_frame.an_rx_payload_len); } else { fc1=rx_frame.an_frame_ctl >> 8; ieee80211_header_len = sizeof(struct ieee80211_frame); if ((fc1 & IEEE80211_FC1_DIR_TODS) && (fc1 & IEEE80211_FC1_DIR_FROMDS)) { ieee80211_header_len += ETHER_ADDR_LEN; } len = rx_frame.an_rx_payload_len + ieee80211_header_len; /* Check for insane frame length */ if (len > sizeof(sc->buf_802_11)) { if_printf(ifp, "oversized packet " "received (%d, %d)\n", len, MCLBYTES); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } ih = (struct ieee80211_frame *)bpf_buf; bcopy((char *)&rx_frame.an_frame_ctl, (char *)ih, ieee80211_header_len); error = an_read_data(sc, id, sizeof(rx_frame) + rx_frame.an_gaplen, (caddr_t)ih +ieee80211_header_len, rx_frame.an_rx_payload_len); } /* dump raw 802.11 packet to bpf and skip ip stack */ BPF_TAP(ifp, bpf_buf, len); } else { MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } if (!(MCLGET(m, M_NOWAIT))) { m_freem(m); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } m->m_pkthdr.rcvif = ifp; /* Read Ethernet encapsulated packet */ #ifdef ANCACHE /* Read NIC frame header */ if (an_read_data(sc, id, 0, (caddr_t)&rx_frame, sizeof(rx_frame))) { m_freem(m); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } #endif /* Read in the 802_3 frame header */ if (an_read_data(sc, id, 0x34, (caddr_t)&rx_frame_802_3, sizeof(rx_frame_802_3))) { m_freem(m); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } if (rx_frame_802_3.an_rx_802_3_status != 0) { m_freem(m); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } /* Check for insane frame length */ len = rx_frame_802_3.an_rx_802_3_payload_len; if (len > sizeof(sc->buf_802_11)) { m_freem(m); if_printf(ifp, "oversized packet " "received (%d, %d)\n", len, MCLBYTES); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } m->m_pkthdr.len = m->m_len = rx_frame_802_3.an_rx_802_3_payload_len + 12; eh = mtod(m, struct ether_header *); bcopy((char *)&rx_frame_802_3.an_rx_dst_addr, (char *)&eh->ether_dhost, ETHER_ADDR_LEN); bcopy((char *)&rx_frame_802_3.an_rx_src_addr, (char *)&eh->ether_shost, ETHER_ADDR_LEN); /* in mbuf header type is just before payload */ error = an_read_data(sc, id, 0x44, (caddr_t)&(eh->ether_type), rx_frame_802_3.an_rx_802_3_payload_len); if (error) { m_freem(m); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); /* Receive packet. */ #ifdef ANCACHE an_cache_store(sc, eh, m, rx_frame.an_rx_signal_strength, rx_frame.an_rsvd0); #endif AN_UNLOCK(sc); (*ifp->if_input)(ifp, m); AN_LOCK(sc); } } else { /* MPI-350 */ for (count = 0; count < AN_MAX_RX_DESC; count++){ for (i = 0; i < sizeof(an_rx_desc) / 4; i++) ((u_int32_t *)(void *)&an_rx_desc)[i] = CSR_MEM_AUX_READ_4(sc, AN_RX_DESC_OFFSET + (count * sizeof(an_rx_desc)) + (i * 4)); if (an_rx_desc.an_done && !an_rx_desc.an_valid) { buf = sc->an_rx_buffer[count].an_dma_vaddr; MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } if (!(MCLGET(m, M_NOWAIT))) { m_freem(m); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } m->m_pkthdr.rcvif = ifp; /* Read Ethernet encapsulated packet */ /* * No ANCACHE support since we just get back * an Ethernet packet no 802.11 info */ #if 0 #ifdef ANCACHE /* Read NIC frame header */ bcopy(buf, (caddr_t)&rx_frame, sizeof(rx_frame)); #endif #endif /* Check for insane frame length */ len = an_rx_desc.an_len + 12; if (len > MCLBYTES) { m_freem(m); if_printf(ifp, "oversized packet " "received (%d, %d)\n", len, MCLBYTES); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return; } m->m_pkthdr.len = m->m_len = an_rx_desc.an_len + 12; eh = mtod(m, struct ether_header *); bcopy(buf, (char *)eh, m->m_pkthdr.len); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); /* Receive packet. */ #if 0 #ifdef ANCACHE an_cache_store(sc, eh, m, rx_frame.an_rx_signal_strength, rx_frame.an_rsvd0); #endif #endif AN_UNLOCK(sc); (*ifp->if_input)(ifp, m); AN_LOCK(sc); an_rx_desc.an_valid = 1; an_rx_desc.an_len = AN_RX_BUFFER_SIZE; an_rx_desc.an_done = 0; an_rx_desc.an_phys = sc->an_rx_buffer[count].an_dma_paddr; for (i = 0; i < sizeof(an_rx_desc) / 4; i++) CSR_MEM_AUX_WRITE_4(sc, AN_RX_DESC_OFFSET + (count * sizeof(an_rx_desc)) + (i * 4), ((u_int32_t *)(void *)&an_rx_desc)[i]); } else { if_printf(ifp, "Didn't get valid RX packet " "%x %x %d\n", an_rx_desc.an_done, an_rx_desc.an_valid, an_rx_desc.an_len); } } } } static void an_txeof(struct an_softc *sc, int status) { struct ifnet *ifp; int id, i; AN_LOCK_ASSERT(sc); ifp = sc->an_ifp; sc->an_timer = 0; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if (!sc->mpi350) { id = CSR_READ_2(sc, AN_TX_CMP_FID(sc->mpi350)); if (status & AN_EV_TX_EXC) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } else if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); for (i = 0; i < AN_TX_RING_CNT; i++) { if (id == sc->an_rdata.an_tx_ring[i]) { sc->an_rdata.an_tx_ring[i] = 0; break; } } AN_INC(sc->an_rdata.an_tx_cons, AN_TX_RING_CNT); } else { /* MPI 350 */ id = CSR_READ_2(sc, AN_TX_CMP_FID(sc->mpi350)); if (!sc->an_rdata.an_tx_empty){ if (status & AN_EV_TX_EXC) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } else if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); AN_INC(sc->an_rdata.an_tx_cons, AN_MAX_TX_DESC); if (sc->an_rdata.an_tx_prod == sc->an_rdata.an_tx_cons) sc->an_rdata.an_tx_empty = 1; } } return; } /* * We abuse the stats updater to check the current NIC status. This * is important because we don't want to allow transmissions until * the NIC has synchronized to the current cell (either as the master * in an ad-hoc group, or as a station connected to an access point). * * Note that this function will be called via callout(9) with a lock held. */ static void an_stats_update(void *xsc) { struct an_softc *sc; struct ifnet *ifp; sc = xsc; AN_LOCK_ASSERT(sc); ifp = sc->an_ifp; if (sc->an_timer > 0 && --sc->an_timer == 0) an_watchdog(sc); sc->an_status.an_type = AN_RID_STATUS; sc->an_status.an_len = sizeof(struct an_ltv_status); if (an_read_record(sc, (struct an_ltv_gen *)&sc->an_status)) return; if (sc->an_status.an_opmode & AN_STATUS_OPMODE_IN_SYNC) sc->an_associated = 1; else sc->an_associated = 0; /* Don't do this while we're transmitting */ if (ifp->if_drv_flags & IFF_DRV_OACTIVE) { callout_reset(&sc->an_stat_ch, hz, an_stats_update, sc); return; } sc->an_stats.an_len = sizeof(struct an_ltv_stats); sc->an_stats.an_type = AN_RID_32BITS_CUM; if (an_read_record(sc, (struct an_ltv_gen *)&sc->an_stats.an_len)) return; callout_reset(&sc->an_stat_ch, hz, an_stats_update, sc); return; } void an_intr(void *xsc) { struct an_softc *sc; struct ifnet *ifp; u_int16_t status; sc = (struct an_softc*)xsc; AN_LOCK(sc); if (sc->an_gone) { AN_UNLOCK(sc); return; } ifp = sc->an_ifp; /* Disable interrupts. */ CSR_WRITE_2(sc, AN_INT_EN(sc->mpi350), 0); status = CSR_READ_2(sc, AN_EVENT_STAT(sc->mpi350)); CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), ~AN_INTRS(sc->mpi350)); if (status & AN_EV_MIC) { CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_MIC); } if (status & AN_EV_LINKSTAT) { if (CSR_READ_2(sc, AN_LINKSTAT(sc->mpi350)) == AN_LINKSTAT_ASSOCIATED) sc->an_associated = 1; else sc->an_associated = 0; CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_LINKSTAT); } if (status & AN_EV_RX) { an_rxeof(sc); CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_RX); } if (sc->mpi350 && status & AN_EV_TX_CPY) { an_txeof(sc, status); CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_TX_CPY); } if (status & AN_EV_TX) { an_txeof(sc, status); CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_TX); } if (status & AN_EV_TX_EXC) { an_txeof(sc, status); CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_TX_EXC); } if (status & AN_EV_ALLOC) CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_ALLOC); /* Re-enable interrupts. */ CSR_WRITE_2(sc, AN_INT_EN(sc->mpi350), AN_INTRS(sc->mpi350)); if ((ifp->if_flags & IFF_UP) && !IFQ_DRV_IS_EMPTY(&ifp->if_snd)) an_start_locked(ifp); AN_UNLOCK(sc); return; } static int an_cmd_struct(struct an_softc *sc, struct an_command *cmd, struct an_reply *reply) { int i; AN_LOCK_ASSERT(sc); for (i = 0; i != AN_TIMEOUT; i++) { if (CSR_READ_2(sc, AN_COMMAND(sc->mpi350)) & AN_CMD_BUSY) { DELAY(1000); } else break; } if( i == AN_TIMEOUT) { printf("BUSY\n"); return(ETIMEDOUT); } CSR_WRITE_2(sc, AN_PARAM0(sc->mpi350), cmd->an_parm0); CSR_WRITE_2(sc, AN_PARAM1(sc->mpi350), cmd->an_parm1); CSR_WRITE_2(sc, AN_PARAM2(sc->mpi350), cmd->an_parm2); CSR_WRITE_2(sc, AN_COMMAND(sc->mpi350), cmd->an_cmd); for (i = 0; i < AN_TIMEOUT; i++) { if (CSR_READ_2(sc, AN_EVENT_STAT(sc->mpi350)) & AN_EV_CMD) break; DELAY(1000); } reply->an_resp0 = CSR_READ_2(sc, AN_RESP0(sc->mpi350)); reply->an_resp1 = CSR_READ_2(sc, AN_RESP1(sc->mpi350)); reply->an_resp2 = CSR_READ_2(sc, AN_RESP2(sc->mpi350)); reply->an_status = CSR_READ_2(sc, AN_STATUS(sc->mpi350)); if (CSR_READ_2(sc, AN_COMMAND(sc->mpi350)) & AN_CMD_BUSY) CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_CLR_STUCK_BUSY); /* Ack the command */ CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_CMD); if (i == AN_TIMEOUT) return(ETIMEDOUT); return(0); } static int an_cmd(struct an_softc *sc, int cmd, int val) { int i, s = 0; AN_LOCK_ASSERT(sc); CSR_WRITE_2(sc, AN_PARAM0(sc->mpi350), val); CSR_WRITE_2(sc, AN_PARAM1(sc->mpi350), 0); CSR_WRITE_2(sc, AN_PARAM2(sc->mpi350), 0); CSR_WRITE_2(sc, AN_COMMAND(sc->mpi350), cmd); for (i = 0; i < AN_TIMEOUT; i++) { if (CSR_READ_2(sc, AN_EVENT_STAT(sc->mpi350)) & AN_EV_CMD) break; else { if (CSR_READ_2(sc, AN_COMMAND(sc->mpi350)) == cmd) CSR_WRITE_2(sc, AN_COMMAND(sc->mpi350), cmd); } } for (i = 0; i < AN_TIMEOUT; i++) { CSR_READ_2(sc, AN_RESP0(sc->mpi350)); CSR_READ_2(sc, AN_RESP1(sc->mpi350)); CSR_READ_2(sc, AN_RESP2(sc->mpi350)); s = CSR_READ_2(sc, AN_STATUS(sc->mpi350)); if ((s & AN_STAT_CMD_CODE) == (cmd & AN_STAT_CMD_CODE)) break; } /* Ack the command */ CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_CMD); if (CSR_READ_2(sc, AN_COMMAND(sc->mpi350)) & AN_CMD_BUSY) CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_CLR_STUCK_BUSY); if (i == AN_TIMEOUT) return(ETIMEDOUT); return(0); } /* * This reset sequence may look a little strange, but this is the * most reliable method I've found to really kick the NIC in the * head and force it to reboot correctly. */ static void an_reset(struct an_softc *sc) { if (sc->an_gone) return; AN_LOCK_ASSERT(sc); an_cmd(sc, AN_CMD_ENABLE, 0); an_cmd(sc, AN_CMD_FW_RESTART, 0); an_cmd(sc, AN_CMD_NOOP2, 0); if (an_cmd(sc, AN_CMD_FORCE_SYNCLOSS, 0) == ETIMEDOUT) device_printf(sc->an_dev, "reset failed\n"); an_cmd(sc, AN_CMD_DISABLE, 0); return; } /* * Read an LTV record from the NIC. */ static int an_read_record(struct an_softc *sc, struct an_ltv_gen *ltv) { struct an_ltv_gen *an_ltv; struct an_card_rid_desc an_rid_desc; struct an_command cmd; struct an_reply reply; struct ifnet *ifp; u_int16_t *ptr; u_int8_t *ptr2; int i, len; AN_LOCK_ASSERT(sc); if (ltv->an_len < 4 || ltv->an_type == 0) return(EINVAL); ifp = sc->an_ifp; if (!sc->mpi350){ /* Tell the NIC to enter record read mode. */ if (an_cmd(sc, AN_CMD_ACCESS|AN_ACCESS_READ, ltv->an_type)) { if_printf(ifp, "RID access failed\n"); return(EIO); } /* Seek to the record. */ if (an_seek(sc, ltv->an_type, 0, AN_BAP1)) { if_printf(ifp, "seek to record failed\n"); return(EIO); } /* * Read the length and record type and make sure they * match what we expect (this verifies that we have enough * room to hold all of the returned data). * Length includes type but not length. */ len = CSR_READ_2(sc, AN_DATA1); if (len > (ltv->an_len - 2)) { if_printf(ifp, "record length mismatch -- expected %d, " "got %d for Rid %x\n", ltv->an_len - 2, len, ltv->an_type); len = ltv->an_len - 2; } else { ltv->an_len = len + 2; } /* Now read the data. */ len -= 2; /* skip the type */ ptr = <v->an_val; for (i = len; i > 1; i -= 2) *ptr++ = CSR_READ_2(sc, AN_DATA1); if (i) { ptr2 = (u_int8_t *)ptr; *ptr2 = CSR_READ_1(sc, AN_DATA1); } } else { /* MPI-350 */ if (!sc->an_rid_buffer.an_dma_vaddr) return(EIO); an_rid_desc.an_valid = 1; an_rid_desc.an_len = AN_RID_BUFFER_SIZE; an_rid_desc.an_rid = 0; an_rid_desc.an_phys = sc->an_rid_buffer.an_dma_paddr; bzero(sc->an_rid_buffer.an_dma_vaddr, AN_RID_BUFFER_SIZE); bzero(&cmd, sizeof(cmd)); bzero(&reply, sizeof(reply)); cmd.an_cmd = AN_CMD_ACCESS|AN_ACCESS_READ; cmd.an_parm0 = ltv->an_type; for (i = 0; i < sizeof(an_rid_desc) / 4; i++) CSR_MEM_AUX_WRITE_4(sc, AN_HOST_DESC_OFFSET + i * 4, ((u_int32_t *)(void *)&an_rid_desc)[i]); if (an_cmd_struct(sc, &cmd, &reply) || reply.an_status & AN_CMD_QUAL_MASK) { if_printf(ifp, "failed to read RID %x %x %x %x %x, %d\n", ltv->an_type, reply.an_status, reply.an_resp0, reply.an_resp1, reply.an_resp2, i); return(EIO); } an_ltv = (struct an_ltv_gen *)sc->an_rid_buffer.an_dma_vaddr; if (an_ltv->an_len + 2 < an_rid_desc.an_len) { an_rid_desc.an_len = an_ltv->an_len; } len = an_rid_desc.an_len; if (len > (ltv->an_len - 2)) { if_printf(ifp, "record length mismatch -- expected %d, " "got %d for Rid %x\n", ltv->an_len - 2, len, ltv->an_type); len = ltv->an_len - 2; } else { ltv->an_len = len + 2; } bcopy(&an_ltv->an_type, <v->an_val, len); } if (an_dump) an_dump_record(sc, ltv, "Read"); return(0); } /* * Same as read, except we inject data instead of reading it. */ static int an_write_record(struct an_softc *sc, struct an_ltv_gen *ltv) { struct an_card_rid_desc an_rid_desc; struct an_command cmd; struct an_reply reply; u_int16_t *ptr; u_int8_t *ptr2; int i, len; AN_LOCK_ASSERT(sc); if (an_dump) an_dump_record(sc, ltv, "Write"); if (!sc->mpi350){ if (an_cmd(sc, AN_CMD_ACCESS|AN_ACCESS_READ, ltv->an_type)) return(EIO); if (an_seek(sc, ltv->an_type, 0, AN_BAP1)) return(EIO); /* * Length includes type but not length. */ len = ltv->an_len - 2; CSR_WRITE_2(sc, AN_DATA1, len); len -= 2; /* skip the type */ ptr = <v->an_val; for (i = len; i > 1; i -= 2) CSR_WRITE_2(sc, AN_DATA1, *ptr++); if (i) { ptr2 = (u_int8_t *)ptr; CSR_WRITE_1(sc, AN_DATA0, *ptr2); } if (an_cmd(sc, AN_CMD_ACCESS|AN_ACCESS_WRITE, ltv->an_type)) return(EIO); } else { /* MPI-350 */ for (i = 0; i != AN_TIMEOUT; i++) { if (CSR_READ_2(sc, AN_COMMAND(sc->mpi350)) & AN_CMD_BUSY) { DELAY(10); } else break; } if (i == AN_TIMEOUT) { printf("BUSY\n"); } an_rid_desc.an_valid = 1; an_rid_desc.an_len = ltv->an_len - 2; an_rid_desc.an_rid = ltv->an_type; an_rid_desc.an_phys = sc->an_rid_buffer.an_dma_paddr; bcopy(<v->an_type, sc->an_rid_buffer.an_dma_vaddr, an_rid_desc.an_len); bzero(&cmd,sizeof(cmd)); bzero(&reply,sizeof(reply)); cmd.an_cmd = AN_CMD_ACCESS|AN_ACCESS_WRITE; cmd.an_parm0 = ltv->an_type; for (i = 0; i < sizeof(an_rid_desc) / 4; i++) CSR_MEM_AUX_WRITE_4(sc, AN_HOST_DESC_OFFSET + i * 4, ((u_int32_t *)(void *)&an_rid_desc)[i]); DELAY(100000); if ((i = an_cmd_struct(sc, &cmd, &reply))) { if_printf(sc->an_ifp, "failed to write RID 1 %x %x %x %x %x, %d\n", ltv->an_type, reply.an_status, reply.an_resp0, reply.an_resp1, reply.an_resp2, i); return(EIO); } if (reply.an_status & AN_CMD_QUAL_MASK) { if_printf(sc->an_ifp, "failed to write RID 2 %x %x %x %x %x, %d\n", ltv->an_type, reply.an_status, reply.an_resp0, reply.an_resp1, reply.an_resp2, i); return(EIO); } DELAY(100000); } return(0); } static void an_dump_record(struct an_softc *sc, struct an_ltv_gen *ltv, char *string) { u_int8_t *ptr2; int len; int i; int count = 0; char buf[17], temp; len = ltv->an_len - 4; if_printf(sc->an_ifp, "RID %4x, Length %4d, Mode %s\n", ltv->an_type, ltv->an_len - 4, string); if (an_dump == 1 || (an_dump == ltv->an_type)) { if_printf(sc->an_ifp, "\t"); bzero(buf,sizeof(buf)); ptr2 = (u_int8_t *)<v->an_val; for (i = len; i > 0; i--) { printf("%02x ", *ptr2); temp = *ptr2++; if (isprint(temp)) buf[count] = temp; else buf[count] = '.'; if (++count == 16) { count = 0; printf("%s\n",buf); if_printf(sc->an_ifp, "\t"); bzero(buf,sizeof(buf)); } } for (; count != 16; count++) { printf(" "); } printf(" %s\n",buf); } } static int an_seek(struct an_softc *sc, int id, int off, int chan) { int i; int selreg, offreg; switch (chan) { case AN_BAP0: selreg = AN_SEL0; offreg = AN_OFF0; break; case AN_BAP1: selreg = AN_SEL1; offreg = AN_OFF1; break; default: if_printf(sc->an_ifp, "invalid data path: %x\n", chan); return(EIO); } CSR_WRITE_2(sc, selreg, id); CSR_WRITE_2(sc, offreg, off); for (i = 0; i < AN_TIMEOUT; i++) { if (!(CSR_READ_2(sc, offreg) & (AN_OFF_BUSY|AN_OFF_ERR))) break; } if (i == AN_TIMEOUT) return(ETIMEDOUT); return(0); } static int an_read_data(struct an_softc *sc, int id, int off, caddr_t buf, int len) { int i; u_int16_t *ptr; u_int8_t *ptr2; if (off != -1) { if (an_seek(sc, id, off, AN_BAP1)) return(EIO); } ptr = (u_int16_t *)buf; for (i = len; i > 1; i -= 2) *ptr++ = CSR_READ_2(sc, AN_DATA1); if (i) { ptr2 = (u_int8_t *)ptr; *ptr2 = CSR_READ_1(sc, AN_DATA1); } return(0); } static int an_write_data(struct an_softc *sc, int id, int off, caddr_t buf, int len) { int i; u_int16_t *ptr; u_int8_t *ptr2; if (off != -1) { if (an_seek(sc, id, off, AN_BAP0)) return(EIO); } ptr = (u_int16_t *)buf; for (i = len; i > 1; i -= 2) CSR_WRITE_2(sc, AN_DATA0, *ptr++); if (i) { ptr2 = (u_int8_t *)ptr; CSR_WRITE_1(sc, AN_DATA0, *ptr2); } return(0); } /* * Allocate a region of memory inside the NIC and zero * it out. */ static int an_alloc_nicmem(struct an_softc *sc, int len, int *id) { int i; if (an_cmd(sc, AN_CMD_ALLOC_MEM, len)) { if_printf(sc->an_ifp, "failed to allocate %d bytes on NIC\n", len); return(ENOMEM); } for (i = 0; i < AN_TIMEOUT; i++) { if (CSR_READ_2(sc, AN_EVENT_STAT(sc->mpi350)) & AN_EV_ALLOC) break; } if (i == AN_TIMEOUT) return(ETIMEDOUT); CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_ALLOC); *id = CSR_READ_2(sc, AN_ALLOC_FID); if (an_seek(sc, *id, 0, AN_BAP0)) return(EIO); for (i = 0; i < len / 2; i++) CSR_WRITE_2(sc, AN_DATA0, 0); return(0); } static void an_setdef(struct an_softc *sc, struct an_req *areq) { struct ifnet *ifp; struct an_ltv_genconfig *cfg; struct an_ltv_ssidlist_new *ssid; struct an_ltv_aplist *ap; struct an_ltv_gen *sp; ifp = sc->an_ifp; AN_LOCK_ASSERT(sc); switch (areq->an_type) { case AN_RID_GENCONFIG: cfg = (struct an_ltv_genconfig *)areq; bcopy((char *)&cfg->an_macaddr, IF_LLADDR(sc->an_ifp), ETHER_ADDR_LEN); bcopy((char *)cfg, (char *)&sc->an_config, sizeof(struct an_ltv_genconfig)); break; case AN_RID_SSIDLIST: ssid = (struct an_ltv_ssidlist_new *)areq; bcopy((char *)ssid, (char *)&sc->an_ssidlist, sizeof(struct an_ltv_ssidlist_new)); break; case AN_RID_APLIST: ap = (struct an_ltv_aplist *)areq; bcopy((char *)ap, (char *)&sc->an_aplist, sizeof(struct an_ltv_aplist)); break; case AN_RID_TX_SPEED: sp = (struct an_ltv_gen *)areq; sc->an_tx_rate = sp->an_val; /* Read the current configuration */ sc->an_config.an_type = AN_RID_GENCONFIG; sc->an_config.an_len = sizeof(struct an_ltv_genconfig); an_read_record(sc, (struct an_ltv_gen *)&sc->an_config); cfg = &sc->an_config; /* clear other rates and set the only one we want */ bzero(cfg->an_rates, sizeof(cfg->an_rates)); cfg->an_rates[0] = sc->an_tx_rate; /* Save the new rate */ sc->an_config.an_type = AN_RID_GENCONFIG; sc->an_config.an_len = sizeof(struct an_ltv_genconfig); break; case AN_RID_WEP_TEMP: /* Cache the temp keys */ bcopy(areq, &sc->an_temp_keys[((struct an_ltv_key *)areq)->kindex], sizeof(struct an_ltv_key)); case AN_RID_WEP_PERM: case AN_RID_LEAPUSERNAME: case AN_RID_LEAPPASSWORD: an_init_locked(sc); /* Disable the MAC. */ an_cmd(sc, AN_CMD_DISABLE, 0); /* Write the key */ an_write_record(sc, (struct an_ltv_gen *)areq); /* Turn the MAC back on. */ an_cmd(sc, AN_CMD_ENABLE, 0); break; case AN_RID_MONITOR_MODE: cfg = (struct an_ltv_genconfig *)areq; bpfdetach(ifp); if (ng_ether_detach_p != NULL) (*ng_ether_detach_p) (ifp); sc->an_monitor = cfg->an_len; if (sc->an_monitor & AN_MONITOR) { if (sc->an_monitor & AN_MONITOR_AIRONET_HEADER) { bpfattach(ifp, DLT_AIRONET_HEADER, sizeof(struct ether_header)); } else { bpfattach(ifp, DLT_IEEE802_11, sizeof(struct ether_header)); } } else { bpfattach(ifp, DLT_EN10MB, sizeof(struct ether_header)); if (ng_ether_attach_p != NULL) (*ng_ether_attach_p) (ifp); } break; default: if_printf(ifp, "unknown RID: %x\n", areq->an_type); return; } /* Reinitialize the card. */ if (ifp->if_flags) an_init_locked(sc); return; } /* * Derived from Linux driver to enable promiscious mode. */ static void an_promisc(struct an_softc *sc, int promisc) { AN_LOCK_ASSERT(sc); if (sc->an_was_monitor) { an_reset(sc); if (sc->mpi350) an_init_mpi350_desc(sc); } if (sc->an_monitor || sc->an_was_monitor) an_init_locked(sc); sc->an_was_monitor = sc->an_monitor; an_cmd(sc, AN_CMD_SET_MODE, promisc ? 0xffff : 0); return; } static int an_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { int error = 0; int len; int i, max; struct an_softc *sc; struct ifreq *ifr; struct thread *td = curthread; struct ieee80211req *ireq; struct ieee80211_channel ch; u_int8_t tmpstr[IEEE80211_NWID_LEN*2]; u_int8_t *tmpptr; struct an_ltv_genconfig *config; struct an_ltv_key *key; struct an_ltv_status *status; struct an_ltv_ssidlist_new *ssids; int mode; struct aironet_ioctl l_ioctl; sc = ifp->if_softc; ifr = (struct ifreq *)data; ireq = (struct ieee80211req *)data; config = (struct an_ltv_genconfig *)&sc->areq; key = (struct an_ltv_key *)&sc->areq; status = (struct an_ltv_status *)&sc->areq; ssids = (struct an_ltv_ssidlist_new *)&sc->areq; if (sc->an_gone) { error = ENODEV; goto out; } switch (command) { case SIOCSIFFLAGS: AN_LOCK(sc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING && ifp->if_flags & IFF_PROMISC && !(sc->an_if_flags & IFF_PROMISC)) { an_promisc(sc, 1); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && !(ifp->if_flags & IFF_PROMISC) && sc->an_if_flags & IFF_PROMISC) { an_promisc(sc, 0); } else an_init_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) an_stop(sc); } sc->an_if_flags = ifp->if_flags; AN_UNLOCK(sc); error = 0; break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->an_ifmedia, command); break; case SIOCADDMULTI: case SIOCDELMULTI: /* The Aironet has no multicast filter. */ error = 0; break; case SIOCGAIRONET: - error = copyin(ifr->ifr_data, &sc->areq, sizeof(sc->areq)); + error = copyin(ifr_data_get_ptr(ifr), &sc->areq, + sizeof(sc->areq)); if (error != 0) break; AN_LOCK(sc); #ifdef ANCACHE if (sc->areq.an_type == AN_RID_ZERO_CACHE) { error = priv_check(td, PRIV_DRIVER); if (error) break; sc->an_sigitems = sc->an_nextitem = 0; break; } else if (sc->areq.an_type == AN_RID_READ_CACHE) { char *pt = (char *)&sc->areq.an_val; bcopy((char *)&sc->an_sigitems, (char *)pt, sizeof(int)); pt += sizeof(int); sc->areq.an_len = sizeof(int) / 2; bcopy((char *)&sc->an_sigcache, (char *)pt, sizeof(struct an_sigcache) * sc->an_sigitems); sc->areq.an_len += ((sizeof(struct an_sigcache) * sc->an_sigitems) / 2) + 1; } else #endif if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { AN_UNLOCK(sc); error = EINVAL; break; } AN_UNLOCK(sc); - error = copyout(&sc->areq, ifr->ifr_data, sizeof(sc->areq)); + error = copyout(&sc->areq, ifr_data_get_ptr(ifr), + sizeof(sc->areq)); break; case SIOCSAIRONET: if ((error = priv_check(td, PRIV_DRIVER))) goto out; AN_LOCK(sc); - error = copyin(ifr->ifr_data, &sc->areq, sizeof(sc->areq)); + error = copyin(ifr_data_get_ptr(ifr), &sc->areq, + sizeof(sc->areq)); if (error != 0) break; an_setdef(sc, &sc->areq); AN_UNLOCK(sc); break; case SIOCGPRIVATE_0: /* used by Cisco client utility */ if ((error = priv_check(td, PRIV_DRIVER))) goto out; - error = copyin(ifr->ifr_data, &l_ioctl, sizeof(l_ioctl)); + error = copyin(ifr_data_get_ptr(ifr), &l_ioctl, + sizeof(l_ioctl)); if (error) goto out; mode = l_ioctl.command; AN_LOCK(sc); if (mode >= AIROGCAP && mode <= AIROGSTATSD32) { error = readrids(ifp, &l_ioctl); } else if (mode >= AIROPCAP && mode <= AIROPLEAPUSR) { error = writerids(ifp, &l_ioctl); } else if (mode >= AIROFLSHRST && mode <= AIRORESTART) { error = flashcard(ifp, &l_ioctl); } else { error =-1; } AN_UNLOCK(sc); if (!error) { /* copy out the updated command info */ - error = copyout(&l_ioctl, ifr->ifr_data, sizeof(l_ioctl)); + error = copyout(&l_ioctl, ifr_data_get_ptr(ifr), + sizeof(l_ioctl)); } break; case SIOCGPRIVATE_1: /* used by Cisco client utility */ if ((error = priv_check(td, PRIV_DRIVER))) goto out; - error = copyin(ifr->ifr_data, &l_ioctl, sizeof(l_ioctl)); + error = copyin(ifr_data_get_ptr(ifr), &l_ioctl, + sizeof(l_ioctl)); if (error) goto out; l_ioctl.command = 0; error = AIROMAGIC; (void) copyout(&error, l_ioctl.data, sizeof(error)); error = 0; break; case SIOCG80211: sc->areq.an_len = sizeof(sc->areq); /* was that a good idea DJA we are doing a short-cut */ switch (ireq->i_type) { case IEEE80211_IOC_SSID: AN_LOCK(sc); if (ireq->i_val == -1) { sc->areq.an_type = AN_RID_STATUS; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } len = status->an_ssidlen; tmpptr = status->an_ssid; } else if (ireq->i_val >= 0) { sc->areq.an_type = AN_RID_SSIDLIST; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } max = (sc->areq.an_len - 4) / sizeof(struct an_ltv_ssid_entry); if ( max > MAX_SSIDS ) { printf("To many SSIDs only using " "%d of %d\n", MAX_SSIDS, max); max = MAX_SSIDS; } if (ireq->i_val > max) { error = EINVAL; AN_UNLOCK(sc); break; } else { len = ssids->an_entry[ireq->i_val].an_len; tmpptr = ssids->an_entry[ireq->i_val].an_ssid; } } else { error = EINVAL; AN_UNLOCK(sc); break; } if (len > IEEE80211_NWID_LEN) { error = EINVAL; AN_UNLOCK(sc); break; } AN_UNLOCK(sc); ireq->i_len = len; bzero(tmpstr, IEEE80211_NWID_LEN); bcopy(tmpptr, tmpstr, len); error = copyout(tmpstr, ireq->i_data, IEEE80211_NWID_LEN); break; case IEEE80211_IOC_NUMSSIDS: AN_LOCK(sc); sc->areq.an_len = sizeof(sc->areq); sc->areq.an_type = AN_RID_SSIDLIST; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { AN_UNLOCK(sc); error = EINVAL; break; } max = (sc->areq.an_len - 4) / sizeof(struct an_ltv_ssid_entry); AN_UNLOCK(sc); if ( max > MAX_SSIDS ) { printf("To many SSIDs only using " "%d of %d\n", MAX_SSIDS, max); max = MAX_SSIDS; } ireq->i_val = max; break; case IEEE80211_IOC_WEP: AN_LOCK(sc); sc->areq.an_type = AN_RID_ACTUALCFG; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } AN_UNLOCK(sc); if (config->an_authtype & AN_AUTHTYPE_PRIVACY_IN_USE) { if (config->an_authtype & AN_AUTHTYPE_ALLOW_UNENCRYPTED) ireq->i_val = IEEE80211_WEP_MIXED; else ireq->i_val = IEEE80211_WEP_ON; } else { ireq->i_val = IEEE80211_WEP_OFF; } break; case IEEE80211_IOC_WEPKEY: /* * XXX: I'm not entierly convinced this is * correct, but it's what is implemented in * ancontrol so it will have to do until we get * access to actual Cisco code. */ if (ireq->i_val < 0 || ireq->i_val > 8) { error = EINVAL; break; } len = 0; if (ireq->i_val < 5) { AN_LOCK(sc); sc->areq.an_type = AN_RID_WEP_TEMP; for (i = 0; i < 5; i++) { if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; break; } if (key->kindex == 0xffff) break; if (key->kindex == ireq->i_val) len = key->klen; /* Required to get next entry */ sc->areq.an_type = AN_RID_WEP_PERM; } AN_UNLOCK(sc); if (error != 0) { break; } } /* We aren't allowed to read the value of the * key from the card so we just output zeros * like we would if we could read the card, but * denied the user access. */ bzero(tmpstr, len); ireq->i_len = len; error = copyout(tmpstr, ireq->i_data, len); break; case IEEE80211_IOC_NUMWEPKEYS: ireq->i_val = 9; /* include home key */ break; case IEEE80211_IOC_WEPTXKEY: /* * For some strange reason, you have to read all * keys before you can read the txkey. */ AN_LOCK(sc); sc->areq.an_type = AN_RID_WEP_TEMP; for (i = 0; i < 5; i++) { if (an_read_record(sc, (struct an_ltv_gen *) &sc->areq)) { error = EINVAL; break; } if (key->kindex == 0xffff) { break; } /* Required to get next entry */ sc->areq.an_type = AN_RID_WEP_PERM; } if (error != 0) { AN_UNLOCK(sc); break; } sc->areq.an_type = AN_RID_WEP_PERM; key->kindex = 0xffff; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } ireq->i_val = key->mac[0]; /* * Check for home mode. Map home mode into * 5th key since that is how it is stored on * the card */ sc->areq.an_len = sizeof(struct an_ltv_genconfig); sc->areq.an_type = AN_RID_GENCONFIG; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } if (config->an_home_product & AN_HOME_NETWORK) ireq->i_val = 4; AN_UNLOCK(sc); break; case IEEE80211_IOC_AUTHMODE: AN_LOCK(sc); sc->areq.an_type = AN_RID_ACTUALCFG; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } AN_UNLOCK(sc); if ((config->an_authtype & AN_AUTHTYPE_MASK) == AN_AUTHTYPE_NONE) { ireq->i_val = IEEE80211_AUTH_NONE; } else if ((config->an_authtype & AN_AUTHTYPE_MASK) == AN_AUTHTYPE_OPEN) { ireq->i_val = IEEE80211_AUTH_OPEN; } else if ((config->an_authtype & AN_AUTHTYPE_MASK) == AN_AUTHTYPE_SHAREDKEY) { ireq->i_val = IEEE80211_AUTH_SHARED; } else error = EINVAL; break; case IEEE80211_IOC_STATIONNAME: AN_LOCK(sc); sc->areq.an_type = AN_RID_ACTUALCFG; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } AN_UNLOCK(sc); ireq->i_len = sizeof(config->an_nodename); tmpptr = config->an_nodename; bzero(tmpstr, IEEE80211_NWID_LEN); bcopy(tmpptr, tmpstr, ireq->i_len); error = copyout(tmpstr, ireq->i_data, IEEE80211_NWID_LEN); break; case IEEE80211_IOC_CHANNEL: AN_LOCK(sc); sc->areq.an_type = AN_RID_STATUS; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } AN_UNLOCK(sc); ireq->i_val = status->an_cur_channel; break; case IEEE80211_IOC_CURCHAN: AN_LOCK(sc); sc->areq.an_type = AN_RID_STATUS; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } AN_UNLOCK(sc); bzero(&ch, sizeof(ch)); ch.ic_freq = ieee80211_ieee2mhz(status->an_cur_channel, IEEE80211_CHAN_B); ch.ic_flags = IEEE80211_CHAN_B; ch.ic_ieee = status->an_cur_channel; error = copyout(&ch, ireq->i_data, sizeof(ch)); break; case IEEE80211_IOC_POWERSAVE: AN_LOCK(sc); sc->areq.an_type = AN_RID_ACTUALCFG; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } AN_UNLOCK(sc); if (config->an_psave_mode == AN_PSAVE_NONE) { ireq->i_val = IEEE80211_POWERSAVE_OFF; } else if (config->an_psave_mode == AN_PSAVE_CAM) { ireq->i_val = IEEE80211_POWERSAVE_CAM; } else if (config->an_psave_mode == AN_PSAVE_PSP) { ireq->i_val = IEEE80211_POWERSAVE_PSP; } else if (config->an_psave_mode == AN_PSAVE_PSP_CAM) { ireq->i_val = IEEE80211_POWERSAVE_PSP_CAM; } else error = EINVAL; break; case IEEE80211_IOC_POWERSAVESLEEP: AN_LOCK(sc); sc->areq.an_type = AN_RID_ACTUALCFG; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } AN_UNLOCK(sc); ireq->i_val = config->an_listen_interval; break; } break; case SIOCS80211: if ((error = priv_check(td, PRIV_NET80211_MANAGE))) goto out; AN_LOCK(sc); sc->areq.an_len = sizeof(sc->areq); /* * We need a config structure for everything but the WEP * key management and SSIDs so we get it now so avoid * duplicating this code every time. */ if (ireq->i_type != IEEE80211_IOC_SSID && ireq->i_type != IEEE80211_IOC_WEPKEY && ireq->i_type != IEEE80211_IOC_WEPTXKEY) { sc->areq.an_type = AN_RID_GENCONFIG; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } } switch (ireq->i_type) { case IEEE80211_IOC_SSID: sc->areq.an_len = sizeof(sc->areq); sc->areq.an_type = AN_RID_SSIDLIST; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } if (ireq->i_len > IEEE80211_NWID_LEN) { error = EINVAL; AN_UNLOCK(sc); break; } max = (sc->areq.an_len - 4) / sizeof(struct an_ltv_ssid_entry); if ( max > MAX_SSIDS ) { printf("To many SSIDs only using " "%d of %d\n", MAX_SSIDS, max); max = MAX_SSIDS; } if (ireq->i_val > max) { error = EINVAL; AN_UNLOCK(sc); break; } else { error = copyin(ireq->i_data, ssids->an_entry[ireq->i_val].an_ssid, ireq->i_len); ssids->an_entry[ireq->i_val].an_len = ireq->i_len; sc->areq.an_len = sizeof(sc->areq); sc->areq.an_type = AN_RID_SSIDLIST; an_setdef(sc, &sc->areq); AN_UNLOCK(sc); break; } break; case IEEE80211_IOC_WEP: switch (ireq->i_val) { case IEEE80211_WEP_OFF: config->an_authtype &= ~(AN_AUTHTYPE_PRIVACY_IN_USE | AN_AUTHTYPE_ALLOW_UNENCRYPTED); break; case IEEE80211_WEP_ON: config->an_authtype |= AN_AUTHTYPE_PRIVACY_IN_USE; config->an_authtype &= ~AN_AUTHTYPE_ALLOW_UNENCRYPTED; break; case IEEE80211_WEP_MIXED: config->an_authtype |= AN_AUTHTYPE_PRIVACY_IN_USE | AN_AUTHTYPE_ALLOW_UNENCRYPTED; break; default: error = EINVAL; break; } if (error != EINVAL) an_setdef(sc, &sc->areq); AN_UNLOCK(sc); break; case IEEE80211_IOC_WEPKEY: if (ireq->i_val < 0 || ireq->i_val > 8 || ireq->i_len > 13) { error = EINVAL; AN_UNLOCK(sc); break; } error = copyin(ireq->i_data, tmpstr, 13); if (error != 0) { AN_UNLOCK(sc); break; } /* * Map the 9th key into the home mode * since that is how it is stored on * the card */ bzero(&sc->areq, sizeof(struct an_ltv_key)); sc->areq.an_len = sizeof(struct an_ltv_key); key->mac[0] = 1; /* The others are 0. */ if (ireq->i_val < 4) { sc->areq.an_type = AN_RID_WEP_TEMP; key->kindex = ireq->i_val; } else { sc->areq.an_type = AN_RID_WEP_PERM; key->kindex = ireq->i_val - 4; } key->klen = ireq->i_len; bcopy(tmpstr, key->key, key->klen); an_setdef(sc, &sc->areq); AN_UNLOCK(sc); break; case IEEE80211_IOC_WEPTXKEY: if (ireq->i_val < 0 || ireq->i_val > 4) { error = EINVAL; AN_UNLOCK(sc); break; } /* * Map the 5th key into the home mode * since that is how it is stored on * the card */ sc->areq.an_len = sizeof(struct an_ltv_genconfig); sc->areq.an_type = AN_RID_ACTUALCFG; if (an_read_record(sc, (struct an_ltv_gen *)&sc->areq)) { error = EINVAL; AN_UNLOCK(sc); break; } if (ireq->i_val == 4) { config->an_home_product |= AN_HOME_NETWORK; ireq->i_val = 0; } else { config->an_home_product &= ~AN_HOME_NETWORK; } sc->an_config.an_home_product = config->an_home_product; /* update configuration */ an_init_locked(sc); bzero(&sc->areq, sizeof(struct an_ltv_key)); sc->areq.an_len = sizeof(struct an_ltv_key); sc->areq.an_type = AN_RID_WEP_PERM; key->kindex = 0xffff; key->mac[0] = ireq->i_val; an_setdef(sc, &sc->areq); AN_UNLOCK(sc); break; case IEEE80211_IOC_AUTHMODE: switch (ireq->i_val) { case IEEE80211_AUTH_NONE: config->an_authtype = AN_AUTHTYPE_NONE | (config->an_authtype & ~AN_AUTHTYPE_MASK); break; case IEEE80211_AUTH_OPEN: config->an_authtype = AN_AUTHTYPE_OPEN | (config->an_authtype & ~AN_AUTHTYPE_MASK); break; case IEEE80211_AUTH_SHARED: config->an_authtype = AN_AUTHTYPE_SHAREDKEY | (config->an_authtype & ~AN_AUTHTYPE_MASK); break; default: error = EINVAL; } if (error != EINVAL) { an_setdef(sc, &sc->areq); } AN_UNLOCK(sc); break; case IEEE80211_IOC_STATIONNAME: if (ireq->i_len > 16) { error = EINVAL; AN_UNLOCK(sc); break; } bzero(config->an_nodename, 16); error = copyin(ireq->i_data, config->an_nodename, ireq->i_len); an_setdef(sc, &sc->areq); AN_UNLOCK(sc); break; case IEEE80211_IOC_CHANNEL: /* * The actual range is 1-14, but if you set it * to 0 you get the default so we let that work * too. */ if (ireq->i_val < 0 || ireq->i_val >14) { error = EINVAL; AN_UNLOCK(sc); break; } config->an_ds_channel = ireq->i_val; an_setdef(sc, &sc->areq); AN_UNLOCK(sc); break; case IEEE80211_IOC_POWERSAVE: switch (ireq->i_val) { case IEEE80211_POWERSAVE_OFF: config->an_psave_mode = AN_PSAVE_NONE; break; case IEEE80211_POWERSAVE_CAM: config->an_psave_mode = AN_PSAVE_CAM; break; case IEEE80211_POWERSAVE_PSP: config->an_psave_mode = AN_PSAVE_PSP; break; case IEEE80211_POWERSAVE_PSP_CAM: config->an_psave_mode = AN_PSAVE_PSP_CAM; break; default: error = EINVAL; break; } an_setdef(sc, &sc->areq); AN_UNLOCK(sc); break; case IEEE80211_IOC_POWERSAVESLEEP: config->an_listen_interval = ireq->i_val; an_setdef(sc, &sc->areq); AN_UNLOCK(sc); break; default: AN_UNLOCK(sc); break; } /* if (!error) { AN_LOCK(sc); an_setdef(sc, &sc->areq); AN_UNLOCK(sc); } */ break; default: error = ether_ioctl(ifp, command, data); break; } out: return(error != 0); } static int an_init_tx_ring(struct an_softc *sc) { int i; int id; if (sc->an_gone) return (0); if (!sc->mpi350) { for (i = 0; i < AN_TX_RING_CNT; i++) { if (an_alloc_nicmem(sc, 1518 + 0x44, &id)) return(ENOMEM); sc->an_rdata.an_tx_fids[i] = id; sc->an_rdata.an_tx_ring[i] = 0; } } sc->an_rdata.an_tx_prod = 0; sc->an_rdata.an_tx_cons = 0; sc->an_rdata.an_tx_empty = 1; return(0); } static void an_init(void *xsc) { struct an_softc *sc = xsc; AN_LOCK(sc); an_init_locked(sc); AN_UNLOCK(sc); } static void an_init_locked(struct an_softc *sc) { struct ifnet *ifp; AN_LOCK_ASSERT(sc); ifp = sc->an_ifp; if (sc->an_gone) return; if (ifp->if_drv_flags & IFF_DRV_RUNNING) an_stop(sc); sc->an_associated = 0; /* Allocate the TX buffers */ if (an_init_tx_ring(sc)) { an_reset(sc); if (sc->mpi350) an_init_mpi350_desc(sc); if (an_init_tx_ring(sc)) { if_printf(ifp, "tx buffer allocation failed\n"); return; } } /* Set our MAC address. */ bcopy((char *)IF_LLADDR(sc->an_ifp), (char *)&sc->an_config.an_macaddr, ETHER_ADDR_LEN); if (ifp->if_flags & IFF_BROADCAST) sc->an_config.an_rxmode = AN_RXMODE_BC_ADDR; else sc->an_config.an_rxmode = AN_RXMODE_ADDR; if (ifp->if_flags & IFF_MULTICAST) sc->an_config.an_rxmode = AN_RXMODE_BC_MC_ADDR; if (ifp->if_flags & IFF_PROMISC) { if (sc->an_monitor & AN_MONITOR) { if (sc->an_monitor & AN_MONITOR_ANY_BSS) { sc->an_config.an_rxmode |= AN_RXMODE_80211_MONITOR_ANYBSS | AN_RXMODE_NO_8023_HEADER; } else { sc->an_config.an_rxmode |= AN_RXMODE_80211_MONITOR_CURBSS | AN_RXMODE_NO_8023_HEADER; } } } #ifdef ANCACHE if (sc->an_have_rssimap) sc->an_config.an_rxmode |= AN_RXMODE_NORMALIZED_RSSI; #endif /* Set the ssid list */ sc->an_ssidlist.an_type = AN_RID_SSIDLIST; sc->an_ssidlist.an_len = sizeof(struct an_ltv_ssidlist_new); if (an_write_record(sc, (struct an_ltv_gen *)&sc->an_ssidlist)) { if_printf(ifp, "failed to set ssid list\n"); return; } /* Set the AP list */ sc->an_aplist.an_type = AN_RID_APLIST; sc->an_aplist.an_len = sizeof(struct an_ltv_aplist); if (an_write_record(sc, (struct an_ltv_gen *)&sc->an_aplist)) { if_printf(ifp, "failed to set AP list\n"); return; } /* Set the configuration in the NIC */ sc->an_config.an_len = sizeof(struct an_ltv_genconfig); sc->an_config.an_type = AN_RID_GENCONFIG; if (an_write_record(sc, (struct an_ltv_gen *)&sc->an_config)) { if_printf(ifp, "failed to set configuration\n"); return; } /* Enable the MAC */ if (an_cmd(sc, AN_CMD_ENABLE, 0)) { if_printf(ifp, "failed to enable MAC\n"); return; } if (ifp->if_flags & IFF_PROMISC) an_cmd(sc, AN_CMD_SET_MODE, 0xffff); /* enable interrupts */ CSR_WRITE_2(sc, AN_INT_EN(sc->mpi350), AN_INTRS(sc->mpi350)); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; callout_reset(&sc->an_stat_ch, hz, an_stats_update, sc); return; } static void an_start(struct ifnet *ifp) { struct an_softc *sc; sc = ifp->if_softc; AN_LOCK(sc); an_start_locked(ifp); AN_UNLOCK(sc); } static void an_start_locked(struct ifnet *ifp) { struct an_softc *sc; struct mbuf *m0 = NULL; struct an_txframe_802_3 tx_frame_802_3; struct ether_header *eh; int id, idx, i; unsigned char txcontrol; struct an_card_tx_desc an_tx_desc; u_int8_t *buf; sc = ifp->if_softc; AN_LOCK_ASSERT(sc); if (sc->an_gone) return; if (ifp->if_drv_flags & IFF_DRV_OACTIVE) return; if (!sc->an_associated) return; /* We can't send in monitor mode so toss any attempts. */ if (sc->an_monitor && (ifp->if_flags & IFF_PROMISC)) { for (;;) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m0); if (m0 == NULL) break; m_freem(m0); } return; } idx = sc->an_rdata.an_tx_prod; if (!sc->mpi350) { bzero((char *)&tx_frame_802_3, sizeof(tx_frame_802_3)); while (sc->an_rdata.an_tx_ring[idx] == 0) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m0); if (m0 == NULL) break; id = sc->an_rdata.an_tx_fids[idx]; eh = mtod(m0, struct ether_header *); bcopy((char *)&eh->ether_dhost, (char *)&tx_frame_802_3.an_tx_dst_addr, ETHER_ADDR_LEN); bcopy((char *)&eh->ether_shost, (char *)&tx_frame_802_3.an_tx_src_addr, ETHER_ADDR_LEN); /* minus src/dest mac & type */ tx_frame_802_3.an_tx_802_3_payload_len = m0->m_pkthdr.len - 12; m_copydata(m0, sizeof(struct ether_header) - 2 , tx_frame_802_3.an_tx_802_3_payload_len, (caddr_t)&sc->an_txbuf); txcontrol = AN_TXCTL_8023 | AN_TXCTL_HW(sc->mpi350); /* write the txcontrol only */ an_write_data(sc, id, 0x08, (caddr_t)&txcontrol, sizeof(txcontrol)); /* 802_3 header */ an_write_data(sc, id, 0x34, (caddr_t)&tx_frame_802_3, sizeof(struct an_txframe_802_3)); /* in mbuf header type is just before payload */ an_write_data(sc, id, 0x44, (caddr_t)&sc->an_txbuf, tx_frame_802_3.an_tx_802_3_payload_len); /* * If there's a BPF listner, bounce a copy of * this frame to him. */ BPF_MTAP(ifp, m0); m_freem(m0); m0 = NULL; sc->an_rdata.an_tx_ring[idx] = id; if (an_cmd(sc, AN_CMD_TX, id)) if_printf(ifp, "xmit failed\n"); AN_INC(idx, AN_TX_RING_CNT); /* * Set a timeout in case the chip goes out to lunch. */ sc->an_timer = 5; } } else { /* MPI-350 */ /* Disable interrupts. */ CSR_WRITE_2(sc, AN_INT_EN(sc->mpi350), 0); while (sc->an_rdata.an_tx_empty || idx != sc->an_rdata.an_tx_cons) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m0); if (m0 == NULL) { break; } buf = sc->an_tx_buffer[idx].an_dma_vaddr; eh = mtod(m0, struct ether_header *); /* DJA optimize this to limit bcopy */ bcopy((char *)&eh->ether_dhost, (char *)&tx_frame_802_3.an_tx_dst_addr, ETHER_ADDR_LEN); bcopy((char *)&eh->ether_shost, (char *)&tx_frame_802_3.an_tx_src_addr, ETHER_ADDR_LEN); /* minus src/dest mac & type */ tx_frame_802_3.an_tx_802_3_payload_len = m0->m_pkthdr.len - 12; m_copydata(m0, sizeof(struct ether_header) - 2 , tx_frame_802_3.an_tx_802_3_payload_len, (caddr_t)&sc->an_txbuf); txcontrol = AN_TXCTL_8023 | AN_TXCTL_HW(sc->mpi350); /* write the txcontrol only */ bcopy((caddr_t)&txcontrol, &buf[0x08], sizeof(txcontrol)); /* 802_3 header */ bcopy((caddr_t)&tx_frame_802_3, &buf[0x34], sizeof(struct an_txframe_802_3)); /* in mbuf header type is just before payload */ bcopy((caddr_t)&sc->an_txbuf, &buf[0x44], tx_frame_802_3.an_tx_802_3_payload_len); bzero(&an_tx_desc, sizeof(an_tx_desc)); an_tx_desc.an_offset = 0; an_tx_desc.an_eoc = 1; an_tx_desc.an_valid = 1; an_tx_desc.an_len = 0x44 + tx_frame_802_3.an_tx_802_3_payload_len; an_tx_desc.an_phys = sc->an_tx_buffer[idx].an_dma_paddr; for (i = sizeof(an_tx_desc) / 4 - 1; i >= 0; i--) { CSR_MEM_AUX_WRITE_4(sc, AN_TX_DESC_OFFSET /* zero for now */ + (0 * sizeof(an_tx_desc)) + (i * 4), ((u_int32_t *)(void *)&an_tx_desc)[i]); } /* * If there's a BPF listner, bounce a copy of * this frame to him. */ BPF_MTAP(ifp, m0); m_freem(m0); m0 = NULL; AN_INC(idx, AN_MAX_TX_DESC); sc->an_rdata.an_tx_empty = 0; CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_ALLOC); /* * Set a timeout in case the chip goes out to lunch. */ sc->an_timer = 5; } /* Re-enable interrupts. */ CSR_WRITE_2(sc, AN_INT_EN(sc->mpi350), AN_INTRS(sc->mpi350)); } if (m0 != NULL) ifp->if_drv_flags |= IFF_DRV_OACTIVE; sc->an_rdata.an_tx_prod = idx; return; } void an_stop(struct an_softc *sc) { struct ifnet *ifp; int i; AN_LOCK_ASSERT(sc); if (sc->an_gone) return; ifp = sc->an_ifp; an_cmd(sc, AN_CMD_FORCE_SYNCLOSS, 0); CSR_WRITE_2(sc, AN_INT_EN(sc->mpi350), 0); an_cmd(sc, AN_CMD_DISABLE, 0); for (i = 0; i < AN_TX_RING_CNT; i++) an_cmd(sc, AN_CMD_DEALLOC_MEM, sc->an_rdata.an_tx_fids[i]); callout_stop(&sc->an_stat_ch); ifp->if_drv_flags &= ~(IFF_DRV_RUNNING|IFF_DRV_OACTIVE); if (sc->an_flash_buffer) { free(sc->an_flash_buffer, M_DEVBUF); sc->an_flash_buffer = NULL; } } static void an_watchdog(struct an_softc *sc) { struct ifnet *ifp; AN_LOCK_ASSERT(sc); if (sc->an_gone) return; ifp = sc->an_ifp; if_printf(ifp, "device timeout\n"); an_reset(sc); if (sc->mpi350) an_init_mpi350_desc(sc); an_init_locked(sc); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } int an_shutdown(device_t dev) { struct an_softc *sc; sc = device_get_softc(dev); AN_LOCK(sc); an_stop(sc); sc->an_gone = 1; AN_UNLOCK(sc); return (0); } void an_resume(device_t dev) { struct an_softc *sc; struct ifnet *ifp; int i; sc = device_get_softc(dev); AN_LOCK(sc); ifp = sc->an_ifp; sc->an_gone = 0; an_reset(sc); if (sc->mpi350) an_init_mpi350_desc(sc); an_init_locked(sc); /* Recovery temporary keys */ for (i = 0; i < 4; i++) { sc->areq.an_type = AN_RID_WEP_TEMP; sc->areq.an_len = sizeof(struct an_ltv_key); bcopy(&sc->an_temp_keys[i], &sc->areq, sizeof(struct an_ltv_key)); an_setdef(sc, &sc->areq); } if (ifp->if_flags & IFF_UP) an_start_locked(ifp); AN_UNLOCK(sc); return; } #ifdef ANCACHE /* Aironet signal strength cache code. * store signal/noise/quality on per MAC src basis in * a small fixed cache. The cache wraps if > MAX slots * used. The cache may be zeroed out to start over. * Two simple filters exist to reduce computation: * 1. ip only (literally 0x800, ETHERTYPE_IP) which may be used * to ignore some packets. It defaults to ip only. * it could be used to focus on broadcast, non-IP 802.11 beacons. * 2. multicast/broadcast only. This may be used to * ignore unicast packets and only cache signal strength * for multicast/broadcast packets (beacons); e.g., Mobile-IP * beacons and not unicast traffic. * * The cache stores (MAC src(index), IP src (major clue), signal, * quality, noise) * * No apologies for storing IP src here. It's easy and saves much * trouble elsewhere. The cache is assumed to be INET dependent, * although it need not be. * * Note: the Aironet only has a single byte of signal strength value * in the rx frame header, and it's not scaled to anything sensible. * This is kind of lame, but it's all we've got. */ #ifdef documentation int an_sigitems; /* number of cached entries */ struct an_sigcache an_sigcache[MAXANCACHE]; /* array of cache entries */ int an_nextitem; /* index/# of entries */ #endif /* control variables for cache filtering. Basic idea is * to reduce cost (e.g., to only Mobile-IP agent beacons * which are broadcast or multicast). Still you might * want to measure signal strength anth unicast ping packets * on a pt. to pt. ant. setup. */ /* set true if you want to limit cache items to broadcast/mcast * only packets (not unicast). Useful for mobile-ip beacons which * are broadcast/multicast at network layer. Default is all packets * so ping/unicast anll work say anth pt. to pt. antennae setup. */ static int an_cache_mcastonly = 0; SYSCTL_INT(_hw_an, OID_AUTO, an_cache_mcastonly, CTLFLAG_RW, &an_cache_mcastonly, 0, ""); /* set true if you want to limit cache items to IP packets only */ static int an_cache_iponly = 1; SYSCTL_INT(_hw_an, OID_AUTO, an_cache_iponly, CTLFLAG_RW, &an_cache_iponly, 0, ""); /* * an_cache_store, per rx packet store signal * strength in MAC (src) indexed cache. */ static void an_cache_store(struct an_softc *sc, struct ether_header *eh, struct mbuf *m, u_int8_t rx_rssi, u_int8_t rx_quality) { struct ip *ip = NULL; int i; static int cache_slot = 0; /* use this cache entry */ static int wrapindex = 0; /* next "free" cache entry */ int type_ipv4 = 0; /* filters: * 1. ip only * 2. configurable filter to throw out unicast packets, * keep multicast only. */ if ((ntohs(eh->ether_type) == ETHERTYPE_IP)) { type_ipv4 = 1; } /* filter for ip packets only */ if ( an_cache_iponly && !type_ipv4) { return; } /* filter for broadcast/multicast only */ if (an_cache_mcastonly && ((eh->ether_dhost[0] & 1) == 0)) { return; } #ifdef SIGDEBUG if_printf(sc->an_ifp, "q value %x (MSB=0x%x, LSB=0x%x) \n", rx_rssi & 0xffff, rx_rssi >> 8, rx_rssi & 0xff); #endif /* find the ip header. we want to store the ip_src * address. */ if (type_ipv4) { ip = mtod(m, struct ip *); } /* do a linear search for a matching MAC address * in the cache table * . MAC address is 6 bytes, * . var w_nextitem holds total number of entries already cached */ for (i = 0; i < sc->an_nextitem; i++) { if (! bcmp(eh->ether_shost , sc->an_sigcache[i].macsrc, 6 )) { /* Match!, * so we already have this entry, * update the data */ break; } } /* did we find a matching mac address? * if yes, then overwrite a previously existing cache entry */ if (i < sc->an_nextitem ) { cache_slot = i; } /* else, have a new address entry,so * add this new entry, * if table full, then we need to replace LRU entry */ else { /* check for space in cache table * note: an_nextitem also holds number of entries * added in the cache table */ if ( sc->an_nextitem < MAXANCACHE ) { cache_slot = sc->an_nextitem; sc->an_nextitem++; sc->an_sigitems = sc->an_nextitem; } /* no space found, so simply wrap anth wrap index * and "zap" the next entry */ else { if (wrapindex == MAXANCACHE) { wrapindex = 0; } cache_slot = wrapindex++; } } /* invariant: cache_slot now points at some slot * in cache. */ if (cache_slot < 0 || cache_slot >= MAXANCACHE) { log(LOG_ERR, "an_cache_store, bad index: %d of " "[0..%d], gross cache error\n", cache_slot, MAXANCACHE); return; } /* store items in cache * .ip source address * .mac src * .signal, etc. */ if (type_ipv4) { sc->an_sigcache[cache_slot].ipsrc = ip->ip_src.s_addr; } bcopy( eh->ether_shost, sc->an_sigcache[cache_slot].macsrc, 6); switch (an_cache_mode) { case DBM: if (sc->an_have_rssimap) { sc->an_sigcache[cache_slot].signal = - sc->an_rssimap.an_entries[rx_rssi].an_rss_dbm; sc->an_sigcache[cache_slot].quality = - sc->an_rssimap.an_entries[rx_quality].an_rss_dbm; } else { sc->an_sigcache[cache_slot].signal = rx_rssi - 100; sc->an_sigcache[cache_slot].quality = rx_quality - 100; } break; case PERCENT: if (sc->an_have_rssimap) { sc->an_sigcache[cache_slot].signal = sc->an_rssimap.an_entries[rx_rssi].an_rss_pct; sc->an_sigcache[cache_slot].quality = sc->an_rssimap.an_entries[rx_quality].an_rss_pct; } else { if (rx_rssi > 100) rx_rssi = 100; if (rx_quality > 100) rx_quality = 100; sc->an_sigcache[cache_slot].signal = rx_rssi; sc->an_sigcache[cache_slot].quality = rx_quality; } break; case RAW: sc->an_sigcache[cache_slot].signal = rx_rssi; sc->an_sigcache[cache_slot].quality = rx_quality; break; } sc->an_sigcache[cache_slot].noise = 0; return; } #endif static int an_media_change(struct ifnet *ifp) { struct an_softc *sc = ifp->if_softc; struct an_ltv_genconfig *cfg; int otype = sc->an_config.an_opmode; int orate = sc->an_tx_rate; AN_LOCK(sc); sc->an_tx_rate = ieee80211_media2rate( IFM_SUBTYPE(sc->an_ifmedia.ifm_cur->ifm_media)); if (sc->an_tx_rate < 0) sc->an_tx_rate = 0; if (orate != sc->an_tx_rate) { /* Read the current configuration */ sc->an_config.an_type = AN_RID_GENCONFIG; sc->an_config.an_len = sizeof(struct an_ltv_genconfig); an_read_record(sc, (struct an_ltv_gen *)&sc->an_config); cfg = &sc->an_config; /* clear other rates and set the only one we want */ bzero(cfg->an_rates, sizeof(cfg->an_rates)); cfg->an_rates[0] = sc->an_tx_rate; /* Save the new rate */ sc->an_config.an_type = AN_RID_GENCONFIG; sc->an_config.an_len = sizeof(struct an_ltv_genconfig); } if ((sc->an_ifmedia.ifm_cur->ifm_media & IFM_IEEE80211_ADHOC) != 0) sc->an_config.an_opmode &= ~AN_OPMODE_INFRASTRUCTURE_STATION; else sc->an_config.an_opmode |= AN_OPMODE_INFRASTRUCTURE_STATION; if (otype != sc->an_config.an_opmode || orate != sc->an_tx_rate) an_init_locked(sc); AN_UNLOCK(sc); return(0); } static void an_media_status(struct ifnet *ifp, struct ifmediareq *imr) { struct an_ltv_status status; struct an_softc *sc = ifp->if_softc; imr->ifm_active = IFM_IEEE80211; AN_LOCK(sc); status.an_len = sizeof(status); status.an_type = AN_RID_STATUS; if (an_read_record(sc, (struct an_ltv_gen *)&status)) { /* If the status read fails, just lie. */ imr->ifm_active = sc->an_ifmedia.ifm_cur->ifm_media; imr->ifm_status = IFM_AVALID|IFM_ACTIVE; } if (sc->an_tx_rate == 0) { imr->ifm_active = IFM_IEEE80211|IFM_AUTO; } if (sc->an_config.an_opmode == AN_OPMODE_IBSS_ADHOC) imr->ifm_active |= IFM_IEEE80211_ADHOC; imr->ifm_active |= ieee80211_rate2media(NULL, status.an_current_tx_rate, IEEE80211_MODE_AUTO); imr->ifm_status = IFM_AVALID; if (status.an_opmode & AN_STATUS_OPMODE_ASSOCIATED) imr->ifm_status |= IFM_ACTIVE; AN_UNLOCK(sc); } /********************** Cisco utility support routines *************/ /* * ReadRids & WriteRids derived from Cisco driver additions to Ben Reed's * Linux driver */ static int readrids(struct ifnet *ifp, struct aironet_ioctl *l_ioctl) { unsigned short rid; struct an_softc *sc; int error; switch (l_ioctl->command) { case AIROGCAP: rid = AN_RID_CAPABILITIES; break; case AIROGCFG: rid = AN_RID_GENCONFIG; break; case AIROGSLIST: rid = AN_RID_SSIDLIST; break; case AIROGVLIST: rid = AN_RID_APLIST; break; case AIROGDRVNAM: rid = AN_RID_DRVNAME; break; case AIROGEHTENC: rid = AN_RID_ENCAPPROTO; break; case AIROGWEPKTMP: rid = AN_RID_WEP_TEMP; break; case AIROGWEPKNV: rid = AN_RID_WEP_PERM; break; case AIROGSTAT: rid = AN_RID_STATUS; break; case AIROGSTATSD32: rid = AN_RID_32BITS_DELTA; break; case AIROGSTATSC32: rid = AN_RID_32BITS_CUM; break; default: rid = 999; break; } if (rid == 999) /* Is bad command */ return -EINVAL; sc = ifp->if_softc; sc->areq.an_len = AN_MAX_DATALEN; sc->areq.an_type = rid; an_read_record(sc, (struct an_ltv_gen *)&sc->areq); l_ioctl->len = sc->areq.an_len - 4; /* just data */ AN_UNLOCK(sc); /* the data contains the length at first */ if (copyout(&(sc->areq.an_len), l_ioctl->data, sizeof(sc->areq.an_len))) { error = -EFAULT; goto lock_exit; } /* Just copy the data back */ if (copyout(&(sc->areq.an_val), l_ioctl->data + 2, l_ioctl->len)) { error = -EFAULT; goto lock_exit; } error = 0; lock_exit: AN_LOCK(sc); return (error); } static int writerids(struct ifnet *ifp, struct aironet_ioctl *l_ioctl) { struct an_softc *sc; int rid, command, error; sc = ifp->if_softc; AN_LOCK_ASSERT(sc); rid = 0; command = l_ioctl->command; switch (command) { case AIROPSIDS: rid = AN_RID_SSIDLIST; break; case AIROPCAP: rid = AN_RID_CAPABILITIES; break; case AIROPAPLIST: rid = AN_RID_APLIST; break; case AIROPCFG: rid = AN_RID_GENCONFIG; break; case AIROPMACON: an_cmd(sc, AN_CMD_ENABLE, 0); return 0; break; case AIROPMACOFF: an_cmd(sc, AN_CMD_DISABLE, 0); return 0; break; case AIROPSTCLR: /* * This command merely clears the counts does not actually * store any data only reads rid. But as it changes the cards * state, I put it in the writerid routines. */ rid = AN_RID_32BITS_DELTACLR; sc = ifp->if_softc; sc->areq.an_len = AN_MAX_DATALEN; sc->areq.an_type = rid; an_read_record(sc, (struct an_ltv_gen *)&sc->areq); l_ioctl->len = sc->areq.an_len - 4; /* just data */ AN_UNLOCK(sc); /* the data contains the length at first */ error = copyout(&(sc->areq.an_len), l_ioctl->data, sizeof(sc->areq.an_len)); if (error) { AN_LOCK(sc); return -EFAULT; } /* Just copy the data */ error = copyout(&(sc->areq.an_val), l_ioctl->data + 2, l_ioctl->len); AN_LOCK(sc); if (error) return -EFAULT; return 0; break; case AIROPWEPKEY: rid = AN_RID_WEP_TEMP; break; case AIROPWEPKEYNV: rid = AN_RID_WEP_PERM; break; case AIROPLEAPUSR: rid = AN_RID_LEAPUSERNAME; break; case AIROPLEAPPWD: rid = AN_RID_LEAPPASSWORD; break; default: return -EOPNOTSUPP; } if (rid) { if (l_ioctl->len > sizeof(sc->areq.an_val) + 4) return -EINVAL; sc->areq.an_len = l_ioctl->len + 4; /* add type & length */ sc->areq.an_type = rid; /* Just copy the data back */ AN_UNLOCK(sc); error = copyin((l_ioctl->data) + 2, &sc->areq.an_val, l_ioctl->len); AN_LOCK(sc); if (error) return -EFAULT; an_cmd(sc, AN_CMD_DISABLE, 0); an_write_record(sc, (struct an_ltv_gen *)&sc->areq); an_cmd(sc, AN_CMD_ENABLE, 0); return 0; } return -EOPNOTSUPP; } /* * General Flash utilities derived from Cisco driver additions to Ben Reed's * Linux driver */ #define FLASH_DELAY(_sc, x) msleep(ifp, &(_sc)->an_mtx, PZERO, \ "flash", ((x) / hz) + 1); #define FLASH_COMMAND 0x7e7e #define FLASH_SIZE 32 * 1024 static int unstickbusy(struct ifnet *ifp) { struct an_softc *sc = ifp->if_softc; if (CSR_READ_2(sc, AN_COMMAND(sc->mpi350)) & AN_CMD_BUSY) { CSR_WRITE_2(sc, AN_EVENT_ACK(sc->mpi350), AN_EV_CLR_STUCK_BUSY); return 1; } return 0; } /* * Wait for busy completion from card wait for delay uSec's Return true for * success meaning command reg is clear */ static int WaitBusy(struct ifnet *ifp, int uSec) { int statword = 0xffff; int delay = 0; struct an_softc *sc = ifp->if_softc; while ((statword & AN_CMD_BUSY) && delay <= (1000 * 100)) { FLASH_DELAY(sc, 10); delay += 10; statword = CSR_READ_2(sc, AN_COMMAND(sc->mpi350)); if ((AN_CMD_BUSY & statword) && (delay % 200)) { unstickbusy(ifp); } } return 0 == (AN_CMD_BUSY & statword); } /* * STEP 1) Disable MAC and do soft reset on card. */ static int cmdreset(struct ifnet *ifp) { int status; struct an_softc *sc = ifp->if_softc; AN_LOCK(sc); an_stop(sc); an_cmd(sc, AN_CMD_DISABLE, 0); if (!(status = WaitBusy(ifp, AN_TIMEOUT))) { if_printf(ifp, "Waitbusy hang b4 RESET =%d\n", status); AN_UNLOCK(sc); return -EBUSY; } CSR_WRITE_2(sc, AN_COMMAND(sc->mpi350), AN_CMD_FW_RESTART); FLASH_DELAY(sc, 1000); /* WAS 600 12/7/00 */ if (!(status = WaitBusy(ifp, 100))) { if_printf(ifp, "Waitbusy hang AFTER RESET =%d\n", status); AN_UNLOCK(sc); return -EBUSY; } AN_UNLOCK(sc); return 0; } /* * STEP 2) Put the card in legendary flash mode */ static int setflashmode(struct ifnet *ifp) { int status; struct an_softc *sc = ifp->if_softc; CSR_WRITE_2(sc, AN_SW0(sc->mpi350), FLASH_COMMAND); CSR_WRITE_2(sc, AN_SW1(sc->mpi350), FLASH_COMMAND); CSR_WRITE_2(sc, AN_SW0(sc->mpi350), FLASH_COMMAND); CSR_WRITE_2(sc, AN_COMMAND(sc->mpi350), FLASH_COMMAND); /* * mdelay(500); // 500ms delay */ FLASH_DELAY(sc, 500); if (!(status = WaitBusy(ifp, AN_TIMEOUT))) { printf("Waitbusy hang after setflash mode\n"); return -EIO; } return 0; } /* * Get a character from the card matching matchbyte Step 3) */ static int flashgchar(struct ifnet *ifp, int matchbyte, int dwelltime) { int rchar; unsigned char rbyte = 0; int success = -1; struct an_softc *sc = ifp->if_softc; do { rchar = CSR_READ_2(sc, AN_SW1(sc->mpi350)); if (dwelltime && !(0x8000 & rchar)) { dwelltime -= 10; FLASH_DELAY(sc, 10); continue; } rbyte = 0xff & rchar; if ((rbyte == matchbyte) && (0x8000 & rchar)) { CSR_WRITE_2(sc, AN_SW1(sc->mpi350), 0); success = 1; break; } if (rbyte == 0x81 || rbyte == 0x82 || rbyte == 0x83 || rbyte == 0x1a || 0xffff == rchar) break; CSR_WRITE_2(sc, AN_SW1(sc->mpi350), 0); } while (dwelltime > 0); return success; } /* * Put character to SWS0 wait for dwelltime x 50us for echo . */ static int flashpchar(struct ifnet *ifp, int byte, int dwelltime) { int echo; int pollbusy, waittime; struct an_softc *sc = ifp->if_softc; byte |= 0x8000; if (dwelltime == 0) dwelltime = 200; waittime = dwelltime; /* * Wait for busy bit d15 to go false indicating buffer empty */ do { pollbusy = CSR_READ_2(sc, AN_SW0(sc->mpi350)); if (pollbusy & 0x8000) { FLASH_DELAY(sc, 50); waittime -= 50; continue; } else break; } while (waittime >= 0); /* timeout for busy clear wait */ if (waittime <= 0) { if_printf(ifp, "flash putchar busywait timeout!\n"); return -1; } /* * Port is clear now write byte and wait for it to echo back */ do { CSR_WRITE_2(sc, AN_SW0(sc->mpi350), byte); FLASH_DELAY(sc, 50); dwelltime -= 50; echo = CSR_READ_2(sc, AN_SW1(sc->mpi350)); } while (dwelltime >= 0 && echo != byte); CSR_WRITE_2(sc, AN_SW1(sc->mpi350), 0); return echo == byte; } /* * Transfer 32k of firmware data from user buffer to our buffer and send to * the card */ static int flashputbuf(struct ifnet *ifp) { unsigned short *bufp; int nwords; struct an_softc *sc = ifp->if_softc; /* Write stuff */ bufp = sc->an_flash_buffer; if (!sc->mpi350) { CSR_WRITE_2(sc, AN_AUX_PAGE, 0x100); CSR_WRITE_2(sc, AN_AUX_OFFSET, 0); for (nwords = 0; nwords != FLASH_SIZE / 2; nwords++) { CSR_WRITE_2(sc, AN_AUX_DATA, bufp[nwords] & 0xffff); } } else { for (nwords = 0; nwords != FLASH_SIZE / 4; nwords++) { CSR_MEM_AUX_WRITE_4(sc, 0x8000, ((u_int32_t *)bufp)[nwords] & 0xffff); } } CSR_WRITE_2(sc, AN_SW0(sc->mpi350), 0x8000); return 0; } /* * After flashing restart the card. */ static int flashrestart(struct ifnet *ifp) { int status = 0; struct an_softc *sc = ifp->if_softc; FLASH_DELAY(sc, 1024); /* Added 12/7/00 */ an_init_locked(sc); FLASH_DELAY(sc, 1024); /* Added 12/7/00 */ return status; } /* * Entry point for flash ioclt. */ static int flashcard(struct ifnet *ifp, struct aironet_ioctl *l_ioctl) { int z = 0, status; struct an_softc *sc; sc = ifp->if_softc; if (sc->mpi350) { if_printf(ifp, "flashing not supported on MPI 350 yet\n"); return(-1); } status = l_ioctl->command; switch (l_ioctl->command) { case AIROFLSHRST: return cmdreset(ifp); break; case AIROFLSHSTFL: if (sc->an_flash_buffer) { free(sc->an_flash_buffer, M_DEVBUF); sc->an_flash_buffer = NULL; } sc->an_flash_buffer = malloc(FLASH_SIZE, M_DEVBUF, M_WAITOK); if (sc->an_flash_buffer) return setflashmode(ifp); else return ENOBUFS; break; case AIROFLSHGCHR: /* Get char from aux */ if (l_ioctl->len > sizeof(sc->areq)) { return -EINVAL; } AN_UNLOCK(sc); status = copyin(l_ioctl->data, &sc->areq, l_ioctl->len); AN_LOCK(sc); if (status) return status; z = *(int *)&sc->areq; if ((status = flashgchar(ifp, z, 8000)) == 1) return 0; else return -1; case AIROFLSHPCHR: /* Send char to card. */ if (l_ioctl->len > sizeof(sc->areq)) { return -EINVAL; } AN_UNLOCK(sc); status = copyin(l_ioctl->data, &sc->areq, l_ioctl->len); AN_LOCK(sc); if (status) return status; z = *(int *)&sc->areq; if ((status = flashpchar(ifp, z, 8000)) == -1) return -EIO; else return 0; break; case AIROFLPUTBUF: /* Send 32k to card */ if (l_ioctl->len > FLASH_SIZE) { if_printf(ifp, "Buffer to big, %x %x\n", l_ioctl->len, FLASH_SIZE); return -EINVAL; } AN_UNLOCK(sc); status = copyin(l_ioctl->data, sc->an_flash_buffer, l_ioctl->len); AN_LOCK(sc); if (status) return status; if ((status = flashputbuf(ifp)) != 0) return -EIO; else return 0; break; case AIRORESTART: if ((status = flashrestart(ifp)) != 0) { if_printf(ifp, "FLASHRESTART returned %d\n", status); return -EIO; } else return 0; break; default: return -EINVAL; } return -EINVAL; } Index: stable/11/sys/dev/ath/if_ath_ioctl.c =================================================================== --- stable/11/sys/dev/ath/if_ath_ioctl.c (revision 332287) +++ stable/11/sys/dev/ath/if_ath_ioctl.c (revision 332288) @@ -1,307 +1,307 @@ /*- * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. */ #include __FBSDID("$FreeBSD$"); /* * Driver for the Atheros Wireless LAN controller. * * This software is derived from work of Atsushi Onoe; his contribution * is greatly appreciated. */ #include "opt_inet.h" #include "opt_ath.h" /* * This is needed for register operations which are performed * by the driver - eg, calls to ath_hal_gettsf32(). * * It's also required for any AH_DEBUG checks in here, eg the * module dependencies. */ #include "opt_ah.h" #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* for mp_ncpus */ #include #include #include #include #include #include #include #include #include #include #include #ifdef IEEE80211_SUPPORT_SUPERG #include #endif #ifdef IEEE80211_SUPPORT_TDMA #include #endif #include #ifdef INET #include #include #endif #include #include /* XXX for softled */ #include #include #include #include #include #include #include #ifdef IEEE80211_SUPPORT_TDMA #include #endif #include /* * ioctl() related pieces. * * Some subsystems (eg spectral, dfs) have their own ioctl method which * we call. */ /* * Fetch the rate control statistics for the given node. */ static int ath_ioctl_ratestats(struct ath_softc *sc, struct ath_rateioctl *rs) { struct ath_node *an; struct ieee80211com *ic = &sc->sc_ic; struct ieee80211_node *ni; int error = 0; /* Perform a lookup on the given node */ ni = ieee80211_find_node(&ic->ic_sta, rs->is_u.macaddr); if (ni == NULL) { error = EINVAL; goto bad; } /* Lock the ath_node */ an = ATH_NODE(ni); ATH_NODE_LOCK(an); /* Fetch the rate control stats for this node */ error = ath_rate_fetch_node_stats(sc, an, rs); /* No matter what happens here, just drop through */ /* Unlock the ath_node */ ATH_NODE_UNLOCK(an); /* Unref the node */ ieee80211_node_decref(ni); bad: return (error); } #ifdef ATH_DIAGAPI /* * Diagnostic interface to the HAL. This is used by various * tools to do things like retrieve register contents for * debugging. The mechanism is intentionally opaque so that * it can change frequently w/o concern for compatibility. */ static int ath_ioctl_diag(struct ath_softc *sc, struct ath_diag *ad) { struct ath_hal *ah = sc->sc_ah; u_int id = ad->ad_id & ATH_DIAG_ID; void *indata = NULL; void *outdata = NULL; u_int32_t insize = ad->ad_in_size; u_int32_t outsize = ad->ad_out_size; int error = 0; if (ad->ad_id & ATH_DIAG_IN) { /* * Copy in data. */ indata = malloc(insize, M_TEMP, M_NOWAIT); if (indata == NULL) { error = ENOMEM; goto bad; } error = copyin(ad->ad_in_data, indata, insize); if (error) goto bad; } if (ad->ad_id & ATH_DIAG_DYN) { /* * Allocate a buffer for the results (otherwise the HAL * returns a pointer to a buffer where we can read the * results). Note that we depend on the HAL leaving this * pointer for us to use below in reclaiming the buffer; * may want to be more defensive. */ outdata = malloc(outsize, M_TEMP, M_NOWAIT); if (outdata == NULL) { error = ENOMEM; goto bad; } } ATH_LOCK(sc); if (id != HAL_DIAG_REGS) ath_power_set_power_state(sc, HAL_PM_AWAKE); ATH_UNLOCK(sc); if (ath_hal_getdiagstate(ah, id, indata, insize, &outdata, &outsize)) { if (outsize < ad->ad_out_size) ad->ad_out_size = outsize; if (outdata != NULL) error = copyout(outdata, ad->ad_out_data, ad->ad_out_size); } else { error = EINVAL; } ATH_LOCK(sc); if (id != HAL_DIAG_REGS) ath_power_restore_power_state(sc); ATH_UNLOCK(sc); bad: if ((ad->ad_id & ATH_DIAG_IN) && indata != NULL) free(indata, M_TEMP); if ((ad->ad_id & ATH_DIAG_DYN) && outdata != NULL) free(outdata, M_TEMP); return error; } #endif /* ATH_DIAGAPI */ int ath_ioctl(struct ieee80211com *ic, u_long cmd, void *data) { struct ifreq *ifr = data; struct ath_softc *sc = ic->ic_softc; switch (cmd) { case SIOCGATHSTATS: { struct ieee80211vap *vap; struct ifnet *ifp; const HAL_RATE_TABLE *rt; /* NB: embed these numbers to get a consistent view */ sc->sc_stats.ast_tx_packets = 0; sc->sc_stats.ast_rx_packets = 0; TAILQ_FOREACH(vap, &ic->ic_vaps, iv_next) { ifp = vap->iv_ifp; sc->sc_stats.ast_tx_packets += ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS); sc->sc_stats.ast_rx_packets += ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS); } sc->sc_stats.ast_tx_rssi = ATH_RSSI(sc->sc_halstats.ns_avgtxrssi); sc->sc_stats.ast_rx_rssi = ATH_RSSI(sc->sc_halstats.ns_avgrssi); #ifdef IEEE80211_SUPPORT_TDMA sc->sc_stats.ast_tdma_tsfadjp = TDMA_AVG(sc->sc_avgtsfdeltap); sc->sc_stats.ast_tdma_tsfadjm = TDMA_AVG(sc->sc_avgtsfdeltam); #endif rt = sc->sc_currates; sc->sc_stats.ast_tx_rate = rt->info[sc->sc_txrix].dot11Rate &~ IEEE80211_RATE_BASIC; if (rt->info[sc->sc_txrix].phy & IEEE80211_T_HT) sc->sc_stats.ast_tx_rate |= IEEE80211_RATE_MCS; - return copyout(&sc->sc_stats, - ifr->ifr_data, sizeof (sc->sc_stats)); + return copyout(&sc->sc_stats, ifr_data_get_ptr(ifr), + sizeof (sc->sc_stats)); } case SIOCGATHAGSTATS: - return copyout(&sc->sc_aggr_stats, - ifr->ifr_data, sizeof (sc->sc_aggr_stats)); + return copyout(&sc->sc_aggr_stats, ifr_data_get_ptr(ifr), + sizeof (sc->sc_aggr_stats)); case SIOCZATHSTATS: { int error; error = priv_check(curthread, PRIV_DRIVER); if (error == 0) { memset(&sc->sc_stats, 0, sizeof(sc->sc_stats)); memset(&sc->sc_aggr_stats, 0, sizeof(sc->sc_aggr_stats)); memset(&sc->sc_intr_stats, 0, sizeof(sc->sc_intr_stats)); } return (error); } #ifdef ATH_DIAGAPI case SIOCGATHDIAG: return (ath_ioctl_diag(sc, data)); case SIOCGATHPHYERR: return (ath_ioctl_phyerr(sc, data)); #endif case SIOCGATHSPECTRAL: return (ath_ioctl_spectral(sc, data)); case SIOCGATHNODERATESTATS: return (ath_ioctl_ratestats(sc, data)); default: /* * This signals the net80211 layer that we didn't handle this * ioctl. */ return (ENOTTY); } } Index: stable/11/sys/dev/cxgbe/t4_main.c =================================================================== --- stable/11/sys/dev/cxgbe/t4_main.c (revision 332287) +++ stable/11/sys/dev/cxgbe/t4_main.c (revision 332288) @@ -1,10299 +1,10299 @@ /*- * Copyright (c) 2011 Chelsio Communications, Inc. * All rights reserved. * Written by: Navdeep Parhar * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RSS #include #endif #if defined(__i386__) || defined(__amd64__) #include #include #include #include #endif #ifdef DDB #include #include #endif #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "cudbg/cudbg.h" #include "t4_ioctl.h" #include "t4_l2t.h" #include "t4_mp_ring.h" #include "t4_if.h" /* T4 bus driver interface */ static int t4_probe(device_t); static int t4_attach(device_t); static int t4_detach(device_t); static int t4_ready(device_t); static int t4_read_port_device(device_t, int, device_t *); static device_method_t t4_methods[] = { DEVMETHOD(device_probe, t4_probe), DEVMETHOD(device_attach, t4_attach), DEVMETHOD(device_detach, t4_detach), DEVMETHOD(t4_is_main_ready, t4_ready), DEVMETHOD(t4_read_port_device, t4_read_port_device), DEVMETHOD_END }; static driver_t t4_driver = { "t4nex", t4_methods, sizeof(struct adapter) }; /* T4 port (cxgbe) interface */ static int cxgbe_probe(device_t); static int cxgbe_attach(device_t); static int cxgbe_detach(device_t); device_method_t cxgbe_methods[] = { DEVMETHOD(device_probe, cxgbe_probe), DEVMETHOD(device_attach, cxgbe_attach), DEVMETHOD(device_detach, cxgbe_detach), { 0, 0 } }; static driver_t cxgbe_driver = { "cxgbe", cxgbe_methods, sizeof(struct port_info) }; /* T4 VI (vcxgbe) interface */ static int vcxgbe_probe(device_t); static int vcxgbe_attach(device_t); static int vcxgbe_detach(device_t); static device_method_t vcxgbe_methods[] = { DEVMETHOD(device_probe, vcxgbe_probe), DEVMETHOD(device_attach, vcxgbe_attach), DEVMETHOD(device_detach, vcxgbe_detach), { 0, 0 } }; static driver_t vcxgbe_driver = { "vcxgbe", vcxgbe_methods, sizeof(struct vi_info) }; static d_ioctl_t t4_ioctl; static struct cdevsw t4_cdevsw = { .d_version = D_VERSION, .d_ioctl = t4_ioctl, .d_name = "t4nex", }; /* T5 bus driver interface */ static int t5_probe(device_t); static device_method_t t5_methods[] = { DEVMETHOD(device_probe, t5_probe), DEVMETHOD(device_attach, t4_attach), DEVMETHOD(device_detach, t4_detach), DEVMETHOD(t4_is_main_ready, t4_ready), DEVMETHOD(t4_read_port_device, t4_read_port_device), DEVMETHOD_END }; static driver_t t5_driver = { "t5nex", t5_methods, sizeof(struct adapter) }; /* T5 port (cxl) interface */ static driver_t cxl_driver = { "cxl", cxgbe_methods, sizeof(struct port_info) }; /* T5 VI (vcxl) interface */ static driver_t vcxl_driver = { "vcxl", vcxgbe_methods, sizeof(struct vi_info) }; /* T6 bus driver interface */ static int t6_probe(device_t); static device_method_t t6_methods[] = { DEVMETHOD(device_probe, t6_probe), DEVMETHOD(device_attach, t4_attach), DEVMETHOD(device_detach, t4_detach), DEVMETHOD(t4_is_main_ready, t4_ready), DEVMETHOD(t4_read_port_device, t4_read_port_device), DEVMETHOD_END }; static driver_t t6_driver = { "t6nex", t6_methods, sizeof(struct adapter) }; /* T6 port (cc) interface */ static driver_t cc_driver = { "cc", cxgbe_methods, sizeof(struct port_info) }; /* T6 VI (vcc) interface */ static driver_t vcc_driver = { "vcc", vcxgbe_methods, sizeof(struct vi_info) }; /* ifnet + media interface */ static void cxgbe_init(void *); static int cxgbe_ioctl(struct ifnet *, unsigned long, caddr_t); static int cxgbe_transmit(struct ifnet *, struct mbuf *); static void cxgbe_qflush(struct ifnet *); static int cxgbe_media_change(struct ifnet *); static void cxgbe_media_status(struct ifnet *, struct ifmediareq *); MALLOC_DEFINE(M_CXGBE, "cxgbe", "Chelsio T4/T5 Ethernet driver and services"); /* * Correct lock order when you need to acquire multiple locks is t4_list_lock, * then ADAPTER_LOCK, then t4_uld_list_lock. */ static struct sx t4_list_lock; SLIST_HEAD(, adapter) t4_list; #ifdef TCP_OFFLOAD static struct sx t4_uld_list_lock; SLIST_HEAD(, uld_info) t4_uld_list; #endif /* * Tunables. See tweak_tunables() too. * * Each tunable is set to a default value here if it's known at compile-time. * Otherwise it is set to -n as an indication to tweak_tunables() that it should * provide a reasonable default (upto n) when the driver is loaded. * * Tunables applicable to both T4 and T5 are under hw.cxgbe. Those specific to * T5 are under hw.cxl. */ /* * Number of queues for tx and rx, NIC and offload. */ #define NTXQ 16 int t4_ntxq = -NTXQ; TUNABLE_INT("hw.cxgbe.ntxq", &t4_ntxq); TUNABLE_INT("hw.cxgbe.ntxq10g", &t4_ntxq); /* Old name, undocumented */ #define NRXQ 8 int t4_nrxq = -NRXQ; TUNABLE_INT("hw.cxgbe.nrxq", &t4_nrxq); TUNABLE_INT("hw.cxgbe.nrxq10g", &t4_nrxq); /* Old name, undocumented */ #define NTXQ_VI 1 static int t4_ntxq_vi = -NTXQ_VI; TUNABLE_INT("hw.cxgbe.ntxq_vi", &t4_ntxq_vi); #define NRXQ_VI 1 static int t4_nrxq_vi = -NRXQ_VI; TUNABLE_INT("hw.cxgbe.nrxq_vi", &t4_nrxq_vi); static int t4_rsrv_noflowq = 0; TUNABLE_INT("hw.cxgbe.rsrv_noflowq", &t4_rsrv_noflowq); #ifdef TCP_OFFLOAD #define NOFLDTXQ 8 static int t4_nofldtxq = -NOFLDTXQ; TUNABLE_INT("hw.cxgbe.nofldtxq", &t4_nofldtxq); #define NOFLDRXQ 2 static int t4_nofldrxq = -NOFLDRXQ; TUNABLE_INT("hw.cxgbe.nofldrxq", &t4_nofldrxq); #define NOFLDTXQ_VI 1 static int t4_nofldtxq_vi = -NOFLDTXQ_VI; TUNABLE_INT("hw.cxgbe.nofldtxq_vi", &t4_nofldtxq_vi); #define NOFLDRXQ_VI 1 static int t4_nofldrxq_vi = -NOFLDRXQ_VI; TUNABLE_INT("hw.cxgbe.nofldrxq_vi", &t4_nofldrxq_vi); #define TMR_IDX_OFLD 1 int t4_tmr_idx_ofld = TMR_IDX_OFLD; TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_ofld", &t4_tmr_idx_ofld); #define PKTC_IDX_OFLD (-1) int t4_pktc_idx_ofld = PKTC_IDX_OFLD; TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_ofld", &t4_pktc_idx_ofld); /* 0 means chip/fw default, non-zero number is value in microseconds */ static u_long t4_toe_keepalive_idle = 0; TUNABLE_ULONG("hw.cxgbe.toe.keepalive_idle", &t4_toe_keepalive_idle); /* 0 means chip/fw default, non-zero number is value in microseconds */ static u_long t4_toe_keepalive_interval = 0; TUNABLE_ULONG("hw.cxgbe.toe.keepalive_interval", &t4_toe_keepalive_interval); /* 0 means chip/fw default, non-zero number is # of keepalives before abort */ static int t4_toe_keepalive_count = 0; TUNABLE_INT("hw.cxgbe.toe.keepalive_count", &t4_toe_keepalive_count); /* 0 means chip/fw default, non-zero number is value in microseconds */ static u_long t4_toe_rexmt_min = 0; TUNABLE_ULONG("hw.cxgbe.toe.rexmt_min", &t4_toe_rexmt_min); /* 0 means chip/fw default, non-zero number is value in microseconds */ static u_long t4_toe_rexmt_max = 0; TUNABLE_ULONG("hw.cxgbe.toe.rexmt_max", &t4_toe_rexmt_max); /* 0 means chip/fw default, non-zero number is # of rexmt before abort */ static int t4_toe_rexmt_count = 0; TUNABLE_INT("hw.cxgbe.toe.rexmt_count", &t4_toe_rexmt_count); /* -1 means chip/fw default, other values are raw backoff values to use */ static int t4_toe_rexmt_backoff[16] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.0", &t4_toe_rexmt_backoff[0]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.1", &t4_toe_rexmt_backoff[1]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.2", &t4_toe_rexmt_backoff[2]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.3", &t4_toe_rexmt_backoff[3]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.4", &t4_toe_rexmt_backoff[4]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.5", &t4_toe_rexmt_backoff[5]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.6", &t4_toe_rexmt_backoff[6]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.7", &t4_toe_rexmt_backoff[7]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.8", &t4_toe_rexmt_backoff[8]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.9", &t4_toe_rexmt_backoff[9]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.10", &t4_toe_rexmt_backoff[10]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.11", &t4_toe_rexmt_backoff[11]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.12", &t4_toe_rexmt_backoff[12]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.13", &t4_toe_rexmt_backoff[13]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.14", &t4_toe_rexmt_backoff[14]); TUNABLE_INT("hw.cxgbe.toe.rexmt_backoff.15", &t4_toe_rexmt_backoff[15]); #endif #ifdef DEV_NETMAP #define NNMTXQ_VI 2 static int t4_nnmtxq_vi = -NNMTXQ_VI; TUNABLE_INT("hw.cxgbe.nnmtxq_vi", &t4_nnmtxq_vi); #define NNMRXQ_VI 2 static int t4_nnmrxq_vi = -NNMRXQ_VI; TUNABLE_INT("hw.cxgbe.nnmrxq_vi", &t4_nnmrxq_vi); #endif /* * Holdoff parameters for ports. */ #define TMR_IDX 1 int t4_tmr_idx = TMR_IDX; TUNABLE_INT("hw.cxgbe.holdoff_timer_idx", &t4_tmr_idx); TUNABLE_INT("hw.cxgbe.holdoff_timer_idx_10G", &t4_tmr_idx); /* Old name */ #define PKTC_IDX (-1) int t4_pktc_idx = PKTC_IDX; TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx", &t4_pktc_idx); TUNABLE_INT("hw.cxgbe.holdoff_pktc_idx_10G", &t4_pktc_idx); /* Old name */ /* * Size (# of entries) of each tx and rx queue. */ unsigned int t4_qsize_txq = TX_EQ_QSIZE; TUNABLE_INT("hw.cxgbe.qsize_txq", &t4_qsize_txq); unsigned int t4_qsize_rxq = RX_IQ_QSIZE; TUNABLE_INT("hw.cxgbe.qsize_rxq", &t4_qsize_rxq); /* * Interrupt types allowed (bits 0, 1, 2 = INTx, MSI, MSI-X respectively). */ int t4_intr_types = INTR_MSIX | INTR_MSI | INTR_INTX; TUNABLE_INT("hw.cxgbe.interrupt_types", &t4_intr_types); /* * Configuration file. */ #define DEFAULT_CF "default" #define FLASH_CF "flash" #define UWIRE_CF "uwire" #define FPGA_CF "fpga" static char t4_cfg_file[32] = DEFAULT_CF; TUNABLE_STR("hw.cxgbe.config_file", t4_cfg_file, sizeof(t4_cfg_file)); /* * PAUSE settings (bit 0, 1 = rx_pause, tx_pause respectively). * rx_pause = 1 to heed incoming PAUSE frames, 0 to ignore them. * tx_pause = 1 to emit PAUSE frames when the rx FIFO reaches its high water * mark or when signalled to do so, 0 to never emit PAUSE. */ static int t4_pause_settings = PAUSE_TX | PAUSE_RX; TUNABLE_INT("hw.cxgbe.pause_settings", &t4_pause_settings); /* * Forward Error Correction settings (bit 0, 1, 2 = FEC_RS, FEC_BASER_RS, * FEC_RESERVED respectively). * -1 to run with the firmware default. * 0 to disable FEC. */ static int t4_fec = -1; TUNABLE_INT("hw.cxgbe.fec", &t4_fec); /* * Link autonegotiation. * -1 to run with the firmware default. * 0 to disable. * 1 to enable. */ static int t4_autoneg = -1; TUNABLE_INT("hw.cxgbe.autoneg", &t4_autoneg); /* * Firmware auto-install by driver during attach (0, 1, 2 = prohibited, allowed, * encouraged respectively). */ static unsigned int t4_fw_install = 1; TUNABLE_INT("hw.cxgbe.fw_install", &t4_fw_install); /* * ASIC features that will be used. Disable the ones you don't want so that the * chip resources aren't wasted on features that will not be used. */ static int t4_nbmcaps_allowed = 0; TUNABLE_INT("hw.cxgbe.nbmcaps_allowed", &t4_nbmcaps_allowed); static int t4_linkcaps_allowed = 0; /* No DCBX, PPP, etc. by default */ TUNABLE_INT("hw.cxgbe.linkcaps_allowed", &t4_linkcaps_allowed); static int t4_switchcaps_allowed = FW_CAPS_CONFIG_SWITCH_INGRESS | FW_CAPS_CONFIG_SWITCH_EGRESS; TUNABLE_INT("hw.cxgbe.switchcaps_allowed", &t4_switchcaps_allowed); static int t4_niccaps_allowed = FW_CAPS_CONFIG_NIC; TUNABLE_INT("hw.cxgbe.niccaps_allowed", &t4_niccaps_allowed); static int t4_toecaps_allowed = -1; TUNABLE_INT("hw.cxgbe.toecaps_allowed", &t4_toecaps_allowed); static int t4_rdmacaps_allowed = -1; TUNABLE_INT("hw.cxgbe.rdmacaps_allowed", &t4_rdmacaps_allowed); static int t4_cryptocaps_allowed = 0; TUNABLE_INT("hw.cxgbe.cryptocaps_allowed", &t4_cryptocaps_allowed); static int t4_iscsicaps_allowed = -1; TUNABLE_INT("hw.cxgbe.iscsicaps_allowed", &t4_iscsicaps_allowed); static int t4_fcoecaps_allowed = 0; TUNABLE_INT("hw.cxgbe.fcoecaps_allowed", &t4_fcoecaps_allowed); static int t5_write_combine = 1; TUNABLE_INT("hw.cxl.write_combine", &t5_write_combine); static int t4_num_vis = 1; TUNABLE_INT("hw.cxgbe.num_vis", &t4_num_vis); /* * PCIe Relaxed Ordering. * -1: driver should figure out a good value. * 0: disable RO. * 1: enable RO. * 2: leave RO alone. */ static int pcie_relaxed_ordering = -1; TUNABLE_INT("hw.cxgbe.pcie_relaxed_ordering", &pcie_relaxed_ordering); /* Functions used by VIs to obtain unique MAC addresses for each VI. */ static int vi_mac_funcs[] = { FW_VI_FUNC_ETH, FW_VI_FUNC_OFLD, FW_VI_FUNC_IWARP, FW_VI_FUNC_OPENISCSI, FW_VI_FUNC_OPENFCOE, FW_VI_FUNC_FOISCSI, FW_VI_FUNC_FOFCOE, }; struct intrs_and_queues { uint16_t intr_type; /* INTx, MSI, or MSI-X */ uint16_t num_vis; /* number of VIs for each port */ uint16_t nirq; /* Total # of vectors */ uint16_t ntxq; /* # of NIC txq's for each port */ uint16_t nrxq; /* # of NIC rxq's for each port */ uint16_t nofldtxq; /* # of TOE txq's for each port */ uint16_t nofldrxq; /* # of TOE rxq's for each port */ /* The vcxgbe/vcxl interfaces use these and not the ones above. */ uint16_t ntxq_vi; /* # of NIC txq's */ uint16_t nrxq_vi; /* # of NIC rxq's */ uint16_t nofldtxq_vi; /* # of TOE txq's */ uint16_t nofldrxq_vi; /* # of TOE rxq's */ uint16_t nnmtxq_vi; /* # of netmap txq's */ uint16_t nnmrxq_vi; /* # of netmap rxq's */ }; struct filter_entry { uint32_t valid:1; /* filter allocated and valid */ uint32_t locked:1; /* filter is administratively locked */ uint32_t pending:1; /* filter action is pending firmware reply */ uint32_t smtidx:8; /* Source MAC Table index for smac */ struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ struct t4_filter_specification fs; }; static void setup_memwin(struct adapter *); static void position_memwin(struct adapter *, int, uint32_t); static int rw_via_memwin(struct adapter *, int, uint32_t, uint32_t *, int, int); static inline int read_via_memwin(struct adapter *, int, uint32_t, uint32_t *, int); static inline int write_via_memwin(struct adapter *, int, uint32_t, const uint32_t *, int); static int validate_mem_range(struct adapter *, uint32_t, int); static int fwmtype_to_hwmtype(int); static int validate_mt_off_len(struct adapter *, int, uint32_t, int, uint32_t *); static int fixup_devlog_params(struct adapter *); static int cfg_itype_and_nqueues(struct adapter *, struct intrs_and_queues *); static int prep_firmware(struct adapter *); static int partition_resources(struct adapter *, const struct firmware *, const char *); static int get_params__pre_init(struct adapter *); static int get_params__post_init(struct adapter *); static int set_params__post_init(struct adapter *); static void t4_set_desc(struct adapter *); static void build_medialist(struct port_info *, struct ifmedia *); static void init_l1cfg(struct port_info *); static int cxgbe_init_synchronized(struct vi_info *); static int cxgbe_uninit_synchronized(struct vi_info *); static void quiesce_txq(struct adapter *, struct sge_txq *); static void quiesce_wrq(struct adapter *, struct sge_wrq *); static void quiesce_iq(struct adapter *, struct sge_iq *); static void quiesce_fl(struct adapter *, struct sge_fl *); static int t4_alloc_irq(struct adapter *, struct irq *, int rid, driver_intr_t *, void *, char *); static int t4_free_irq(struct adapter *, struct irq *); static void get_regs(struct adapter *, struct t4_regdump *, uint8_t *); static void vi_refresh_stats(struct adapter *, struct vi_info *); static void cxgbe_refresh_stats(struct adapter *, struct port_info *); static void cxgbe_tick(void *); static void cxgbe_vlan_config(void *, struct ifnet *, uint16_t); static void cxgbe_sysctls(struct port_info *); static int sysctl_int_array(SYSCTL_HANDLER_ARGS); static int sysctl_bitfield(SYSCTL_HANDLER_ARGS); static int sysctl_btphy(SYSCTL_HANDLER_ARGS); static int sysctl_noflowq(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS); static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS); static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS); static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS); static int sysctl_fec(SYSCTL_HANDLER_ARGS); static int sysctl_autoneg(SYSCTL_HANDLER_ARGS); static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS); static int sysctl_temperature(SYSCTL_HANDLER_ARGS); #ifdef SBUF_DRAIN static int sysctl_cctrl(SYSCTL_HANDLER_ARGS); static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS); static int sysctl_cim_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS); static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS); static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS); static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS); static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS); static int sysctl_devlog(SYSCTL_HANDLER_ARGS); static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS); static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS); static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS); static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS); static int sysctl_meminfo(SYSCTL_HANDLER_ARGS); static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS); static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS); static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS); static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS); static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tids(SYSCTL_HANDLER_ARGS); static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS); static int sysctl_tp_la(SYSCTL_HANDLER_ARGS); static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS); static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS); static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS); static int sysctl_tc_params(SYSCTL_HANDLER_ARGS); #endif #ifdef TCP_OFFLOAD static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS); static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS); static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS); static int sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS); static int sysctl_tp_backoff(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS); static int sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS); #endif static uint32_t fconf_iconf_to_mode(uint32_t, uint32_t); static uint32_t mode_to_fconf(uint32_t); static uint32_t mode_to_iconf(uint32_t); static int check_fspec_against_fconf_iconf(struct adapter *, struct t4_filter_specification *); static int get_filter_mode(struct adapter *, uint32_t *); static int set_filter_mode(struct adapter *, uint32_t); static inline uint64_t get_filter_hits(struct adapter *, uint32_t); static int get_filter(struct adapter *, struct t4_filter *); static int set_filter(struct adapter *, struct t4_filter *); static int del_filter(struct adapter *, struct t4_filter *); static void clear_filter(struct filter_entry *); static int set_filter_wr(struct adapter *, int); static int del_filter_wr(struct adapter *, int); static int set_tcb_rpl(struct sge_iq *, const struct rss_header *, struct mbuf *); static int get_sge_context(struct adapter *, struct t4_sge_context *); static int load_fw(struct adapter *, struct t4_data *); static int load_cfg(struct adapter *, struct t4_data *); static int load_boot(struct adapter *, struct t4_bootrom *); static int load_bootcfg(struct adapter *, struct t4_data *); static int cudbg_dump(struct adapter *, struct t4_cudbg_dump *); static int read_card_mem(struct adapter *, int, struct t4_mem_range *); static int read_i2c(struct adapter *, struct t4_i2c_data *); #ifdef TCP_OFFLOAD static int toe_capability(struct vi_info *, int); #endif static int mod_event(module_t, int, void *); static int notify_siblings(device_t, int); struct { uint16_t device; char *desc; } t4_pciids[] = { {0xa000, "Chelsio Terminator 4 FPGA"}, {0x4400, "Chelsio T440-dbg"}, {0x4401, "Chelsio T420-CR"}, {0x4402, "Chelsio T422-CR"}, {0x4403, "Chelsio T440-CR"}, {0x4404, "Chelsio T420-BCH"}, {0x4405, "Chelsio T440-BCH"}, {0x4406, "Chelsio T440-CH"}, {0x4407, "Chelsio T420-SO"}, {0x4408, "Chelsio T420-CX"}, {0x4409, "Chelsio T420-BT"}, {0x440a, "Chelsio T404-BT"}, {0x440e, "Chelsio T440-LP-CR"}, }, t5_pciids[] = { {0xb000, "Chelsio Terminator 5 FPGA"}, {0x5400, "Chelsio T580-dbg"}, {0x5401, "Chelsio T520-CR"}, /* 2 x 10G */ {0x5402, "Chelsio T522-CR"}, /* 2 x 10G, 2 X 1G */ {0x5403, "Chelsio T540-CR"}, /* 4 x 10G */ {0x5407, "Chelsio T520-SO"}, /* 2 x 10G, nomem */ {0x5409, "Chelsio T520-BT"}, /* 2 x 10GBaseT */ {0x540a, "Chelsio T504-BT"}, /* 4 x 1G */ {0x540d, "Chelsio T580-CR"}, /* 2 x 40G */ {0x540e, "Chelsio T540-LP-CR"}, /* 4 x 10G */ {0x5410, "Chelsio T580-LP-CR"}, /* 2 x 40G */ {0x5411, "Chelsio T520-LL-CR"}, /* 2 x 10G */ {0x5412, "Chelsio T560-CR"}, /* 1 x 40G, 2 x 10G */ {0x5414, "Chelsio T580-LP-SO-CR"}, /* 2 x 40G, nomem */ {0x5415, "Chelsio T502-BT"}, /* 2 x 1G */ #ifdef notyet {0x5404, "Chelsio T520-BCH"}, {0x5405, "Chelsio T540-BCH"}, {0x5406, "Chelsio T540-CH"}, {0x5408, "Chelsio T520-CX"}, {0x540b, "Chelsio B520-SR"}, {0x540c, "Chelsio B504-BT"}, {0x540f, "Chelsio Amsterdam"}, {0x5413, "Chelsio T580-CHR"}, #endif }, t6_pciids[] = { {0xc006, "Chelsio Terminator 6 FPGA"}, /* T6 PE10K6 FPGA (PF0) */ {0x6400, "Chelsio T6-DBG-25"}, /* 2 x 10/25G, debug */ {0x6401, "Chelsio T6225-CR"}, /* 2 x 10/25G */ {0x6402, "Chelsio T6225-SO-CR"}, /* 2 x 10/25G, nomem */ {0x6403, "Chelsio T6425-CR"}, /* 4 x 10/25G */ {0x6404, "Chelsio T6425-SO-CR"}, /* 4 x 10/25G, nomem */ {0x6405, "Chelsio T6225-OCP-SO"}, /* 2 x 10/25G, nomem */ {0x6406, "Chelsio T62100-OCP-SO"}, /* 2 x 40/50/100G, nomem */ {0x6407, "Chelsio T62100-LP-CR"}, /* 2 x 40/50/100G */ {0x6408, "Chelsio T62100-SO-CR"}, /* 2 x 40/50/100G, nomem */ {0x6409, "Chelsio T6210-BT"}, /* 2 x 10GBASE-T */ {0x640d, "Chelsio T62100-CR"}, /* 2 x 40/50/100G */ {0x6410, "Chelsio T6-DBG-100"}, /* 2 x 40/50/100G, debug */ {0x6411, "Chelsio T6225-LL-CR"}, /* 2 x 10/25G */ {0x6414, "Chelsio T61100-OCP-SO"}, /* 1 x 40/50/100G, nomem */ {0x6415, "Chelsio T6201-BT"}, /* 2 x 1000BASE-T */ /* Custom */ {0x6480, "Chelsio T6225 80"}, {0x6481, "Chelsio T62100 81"}, {0x6484, "Chelsio T62100 84"}, }; #ifdef TCP_OFFLOAD /* * service_iq() has an iq and needs the fl. Offset of fl from the iq should be * exactly the same for both rxq and ofld_rxq. */ CTASSERT(offsetof(struct sge_ofld_rxq, iq) == offsetof(struct sge_rxq, iq)); CTASSERT(offsetof(struct sge_ofld_rxq, fl) == offsetof(struct sge_rxq, fl)); #endif CTASSERT(sizeof(struct cluster_metadata) <= CL_METADATA_SIZE); static int t4_probe(device_t dev) { int i; uint16_t v = pci_get_vendor(dev); uint16_t d = pci_get_device(dev); uint8_t f = pci_get_function(dev); if (v != PCI_VENDOR_ID_CHELSIO) return (ENXIO); /* Attach only to PF0 of the FPGA */ if (d == 0xa000 && f != 0) return (ENXIO); for (i = 0; i < nitems(t4_pciids); i++) { if (d == t4_pciids[i].device) { device_set_desc(dev, t4_pciids[i].desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static int t5_probe(device_t dev) { int i; uint16_t v = pci_get_vendor(dev); uint16_t d = pci_get_device(dev); uint8_t f = pci_get_function(dev); if (v != PCI_VENDOR_ID_CHELSIO) return (ENXIO); /* Attach only to PF0 of the FPGA */ if (d == 0xb000 && f != 0) return (ENXIO); for (i = 0; i < nitems(t5_pciids); i++) { if (d == t5_pciids[i].device) { device_set_desc(dev, t5_pciids[i].desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static int t6_probe(device_t dev) { int i; uint16_t v = pci_get_vendor(dev); uint16_t d = pci_get_device(dev); if (v != PCI_VENDOR_ID_CHELSIO) return (ENXIO); for (i = 0; i < nitems(t6_pciids); i++) { if (d == t6_pciids[i].device) { device_set_desc(dev, t6_pciids[i].desc); return (BUS_PROBE_DEFAULT); } } return (ENXIO); } static void t5_attribute_workaround(device_t dev) { device_t root_port; uint32_t v; /* * The T5 chips do not properly echo the No Snoop and Relaxed * Ordering attributes when replying to a TLP from a Root * Port. As a workaround, find the parent Root Port and * disable No Snoop and Relaxed Ordering. Note that this * affects all devices under this root port. */ root_port = pci_find_pcie_root_port(dev); if (root_port == NULL) { device_printf(dev, "Unable to find parent root port\n"); return; } v = pcie_adjust_config(root_port, PCIER_DEVICE_CTL, PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE, 0, 2); if ((v & (PCIEM_CTL_RELAXED_ORD_ENABLE | PCIEM_CTL_NOSNOOP_ENABLE)) != 0) device_printf(dev, "Disabled No Snoop/Relaxed Ordering on %s\n", device_get_nameunit(root_port)); } static const struct devnames devnames[] = { { .nexus_name = "t4nex", .ifnet_name = "cxgbe", .vi_ifnet_name = "vcxgbe", .pf03_drv_name = "t4iov", .vf_nexus_name = "t4vf", .vf_ifnet_name = "cxgbev" }, { .nexus_name = "t5nex", .ifnet_name = "cxl", .vi_ifnet_name = "vcxl", .pf03_drv_name = "t5iov", .vf_nexus_name = "t5vf", .vf_ifnet_name = "cxlv" }, { .nexus_name = "t6nex", .ifnet_name = "cc", .vi_ifnet_name = "vcc", .pf03_drv_name = "t6iov", .vf_nexus_name = "t6vf", .vf_ifnet_name = "ccv" } }; void t4_init_devnames(struct adapter *sc) { int id; id = chip_id(sc); if (id >= CHELSIO_T4 && id - CHELSIO_T4 < nitems(devnames)) sc->names = &devnames[id - CHELSIO_T4]; else { device_printf(sc->dev, "chip id %d is not supported.\n", id); sc->names = NULL; } } static int t4_attach(device_t dev) { struct adapter *sc; int rc = 0, i, j, rqidx, tqidx, nports; struct make_dev_args mda; struct intrs_and_queues iaq; struct sge *s; uint32_t *buf; #ifdef TCP_OFFLOAD int ofld_rqidx, ofld_tqidx; #endif #ifdef DEV_NETMAP int nm_rqidx, nm_tqidx; #endif int num_vis; sc = device_get_softc(dev); sc->dev = dev; TUNABLE_INT_FETCH("hw.cxgbe.dflags", &sc->debug_flags); if ((pci_get_device(dev) & 0xff00) == 0x5400) t5_attribute_workaround(dev); pci_enable_busmaster(dev); if (pci_find_cap(dev, PCIY_EXPRESS, &i) == 0) { uint32_t v; pci_set_max_read_req(dev, 4096); v = pci_read_config(dev, i + PCIER_DEVICE_CTL, 2); sc->params.pci.mps = 128 << ((v & PCIEM_CTL_MAX_PAYLOAD) >> 5); if (pcie_relaxed_ordering == 0 && (v | PCIEM_CTL_RELAXED_ORD_ENABLE) != 0) { v &= ~PCIEM_CTL_RELAXED_ORD_ENABLE; pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2); } else if (pcie_relaxed_ordering == 1 && (v & PCIEM_CTL_RELAXED_ORD_ENABLE) == 0) { v |= PCIEM_CTL_RELAXED_ORD_ENABLE; pci_write_config(dev, i + PCIER_DEVICE_CTL, v, 2); } } sc->sge_gts_reg = MYPF_REG(A_SGE_PF_GTS); sc->sge_kdoorbell_reg = MYPF_REG(A_SGE_PF_KDOORBELL); sc->traceq = -1; mtx_init(&sc->ifp_lock, sc->ifp_lockname, 0, MTX_DEF); snprintf(sc->ifp_lockname, sizeof(sc->ifp_lockname), "%s tracer", device_get_nameunit(dev)); snprintf(sc->lockname, sizeof(sc->lockname), "%s", device_get_nameunit(dev)); mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF); t4_add_adapter(sc); mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF); TAILQ_INIT(&sc->sfl); callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0); mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF); rc = t4_map_bars_0_and_4(sc); if (rc != 0) goto done; /* error message displayed already */ memset(sc->chan_map, 0xff, sizeof(sc->chan_map)); /* Prepare the adapter for operation. */ buf = malloc(PAGE_SIZE, M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_prep_adapter(sc, buf); free(buf, M_CXGBE); if (rc != 0) { device_printf(dev, "failed to prepare adapter: %d.\n", rc); goto done; } /* * This is the real PF# to which we're attaching. Works from within PCI * passthrough environments too, where pci_get_function() could return a * different PF# depending on the passthrough configuration. We need to * use the real PF# in all our communication with the firmware. */ j = t4_read_reg(sc, A_PL_WHOAMI); sc->pf = chip_id(sc) <= CHELSIO_T5 ? G_SOURCEPF(j) : G_T6_SOURCEPF(j); sc->mbox = sc->pf; t4_init_devnames(sc); if (sc->names == NULL) { rc = ENOTSUP; goto done; /* error message displayed already */ } /* * Do this really early, with the memory windows set up even before the * character device. The userland tool's register i/o and mem read * will work even in "recovery mode". */ setup_memwin(sc); if (t4_init_devlog_params(sc, 0) == 0) fixup_devlog_params(sc); make_dev_args_init(&mda); mda.mda_devsw = &t4_cdevsw; mda.mda_uid = UID_ROOT; mda.mda_gid = GID_WHEEL; mda.mda_mode = 0600; mda.mda_si_drv1 = sc; rc = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev)); if (rc != 0) device_printf(dev, "failed to create nexus char device: %d.\n", rc); /* Go no further if recovery mode has been requested. */ if (TUNABLE_INT_FETCH("hw.cxgbe.sos", &i) && i != 0) { device_printf(dev, "recovery mode.\n"); goto done; } #if defined(__i386__) if ((cpu_feature & CPUID_CX8) == 0) { device_printf(dev, "64 bit atomics not available.\n"); rc = ENOTSUP; goto done; } #endif /* Prepare the firmware for operation */ rc = prep_firmware(sc); if (rc != 0) goto done; /* error message displayed already */ rc = get_params__post_init(sc); if (rc != 0) goto done; /* error message displayed already */ rc = set_params__post_init(sc); if (rc != 0) goto done; /* error message displayed already */ rc = t4_map_bar_2(sc); if (rc != 0) goto done; /* error message displayed already */ rc = t4_create_dma_tag(sc); if (rc != 0) goto done; /* error message displayed already */ /* * First pass over all the ports - allocate VIs and initialize some * basic parameters like mac address, port type, etc. */ for_each_port(sc, i) { struct port_info *pi; pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK); sc->port[i] = pi; /* These must be set before t4_port_init */ pi->adapter = sc; pi->port_id = i; /* * XXX: vi[0] is special so we can't delay this allocation until * pi->nvi's final value is known. */ pi->vi = malloc(sizeof(struct vi_info) * t4_num_vis, M_CXGBE, M_ZERO | M_WAITOK); /* * Allocate the "main" VI and initialize parameters * like mac addr. */ rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i); if (rc != 0) { device_printf(dev, "unable to initialize port %d: %d\n", i, rc); free(pi->vi, M_CXGBE); free(pi, M_CXGBE); sc->port[i] = NULL; goto done; } snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d", device_get_nameunit(dev), i); mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF); sc->chan_map[pi->tx_chan] = i; /* All VIs on this port share this media. */ ifmedia_init(&pi->media, IFM_IMASK, cxgbe_media_change, cxgbe_media_status); pi->dev = device_add_child(dev, sc->names->ifnet_name, -1); if (pi->dev == NULL) { device_printf(dev, "failed to add device for port %d.\n", i); rc = ENXIO; goto done; } pi->vi[0].dev = pi->dev; device_set_softc(pi->dev, pi); } /* * Interrupt type, # of interrupts, # of rx/tx queues, etc. */ nports = sc->params.nports; rc = cfg_itype_and_nqueues(sc, &iaq); if (rc != 0) goto done; /* error message displayed already */ num_vis = iaq.num_vis; sc->intr_type = iaq.intr_type; sc->intr_count = iaq.nirq; s = &sc->sge; s->nrxq = nports * iaq.nrxq; s->ntxq = nports * iaq.ntxq; if (num_vis > 1) { s->nrxq += nports * (num_vis - 1) * iaq.nrxq_vi; s->ntxq += nports * (num_vis - 1) * iaq.ntxq_vi; } s->neq = s->ntxq + s->nrxq; /* the free list in an rxq is an eq */ s->neq += nports + 1;/* ctrl queues: 1 per port + 1 mgmt */ s->niq = s->nrxq + 1; /* 1 extra for firmware event queue */ #ifdef TCP_OFFLOAD if (is_offload(sc)) { s->nofldrxq = nports * iaq.nofldrxq; s->nofldtxq = nports * iaq.nofldtxq; if (num_vis > 1) { s->nofldrxq += nports * (num_vis - 1) * iaq.nofldrxq_vi; s->nofldtxq += nports * (num_vis - 1) * iaq.nofldtxq_vi; } s->neq += s->nofldtxq + s->nofldrxq; s->niq += s->nofldrxq; s->ofld_rxq = malloc(s->nofldrxq * sizeof(struct sge_ofld_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->ofld_txq = malloc(s->nofldtxq * sizeof(struct sge_wrq), M_CXGBE, M_ZERO | M_WAITOK); } #endif #ifdef DEV_NETMAP if (num_vis > 1) { s->nnmrxq = nports * (num_vis - 1) * iaq.nnmrxq_vi; s->nnmtxq = nports * (num_vis - 1) * iaq.nnmtxq_vi; } s->neq += s->nnmtxq + s->nnmrxq; s->niq += s->nnmrxq; s->nm_rxq = malloc(s->nnmrxq * sizeof(struct sge_nm_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->nm_txq = malloc(s->nnmtxq * sizeof(struct sge_nm_txq), M_CXGBE, M_ZERO | M_WAITOK); #endif s->ctrlq = malloc(nports * sizeof(struct sge_wrq), M_CXGBE, M_ZERO | M_WAITOK); s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE, M_ZERO | M_WAITOK); s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE, M_ZERO | M_WAITOK); s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE, M_ZERO | M_WAITOK); s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE, M_ZERO | M_WAITOK); sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE, M_ZERO | M_WAITOK); t4_init_l2t(sc, M_WAITOK); t4_init_tx_sched(sc); /* * Second pass over the ports. This time we know the number of rx and * tx queues that each port should get. */ rqidx = tqidx = 0; #ifdef TCP_OFFLOAD ofld_rqidx = ofld_tqidx = 0; #endif #ifdef DEV_NETMAP nm_rqidx = nm_tqidx = 0; #endif for_each_port(sc, i) { struct port_info *pi = sc->port[i]; struct vi_info *vi; if (pi == NULL) continue; pi->nvi = num_vis; for_each_vi(pi, j, vi) { vi->pi = pi; vi->qsize_rxq = t4_qsize_rxq; vi->qsize_txq = t4_qsize_txq; vi->first_rxq = rqidx; vi->first_txq = tqidx; vi->tmr_idx = t4_tmr_idx; vi->pktc_idx = t4_pktc_idx; vi->nrxq = j == 0 ? iaq.nrxq : iaq.nrxq_vi; vi->ntxq = j == 0 ? iaq.ntxq : iaq.ntxq_vi; rqidx += vi->nrxq; tqidx += vi->ntxq; if (j == 0 && vi->ntxq > 1) vi->rsrv_noflowq = t4_rsrv_noflowq ? 1 : 0; else vi->rsrv_noflowq = 0; #ifdef TCP_OFFLOAD vi->ofld_tmr_idx = t4_tmr_idx_ofld; vi->ofld_pktc_idx = t4_pktc_idx_ofld; vi->first_ofld_rxq = ofld_rqidx; vi->first_ofld_txq = ofld_tqidx; vi->nofldrxq = j == 0 ? iaq.nofldrxq : iaq.nofldrxq_vi; vi->nofldtxq = j == 0 ? iaq.nofldtxq : iaq.nofldtxq_vi; ofld_rqidx += vi->nofldrxq; ofld_tqidx += vi->nofldtxq; #endif #ifdef DEV_NETMAP if (j > 0) { vi->first_nm_rxq = nm_rqidx; vi->first_nm_txq = nm_tqidx; vi->nnmrxq = iaq.nnmrxq_vi; vi->nnmtxq = iaq.nnmtxq_vi; nm_rqidx += vi->nnmrxq; nm_tqidx += vi->nnmtxq; } #endif } } rc = t4_setup_intr_handlers(sc); if (rc != 0) { device_printf(dev, "failed to setup interrupt handlers: %d\n", rc); goto done; } rc = bus_generic_probe(dev); if (rc != 0) { device_printf(dev, "failed to probe child drivers: %d\n", rc); goto done; } /* * Ensure thread-safe mailbox access (in debug builds). * * So far this was the only thread accessing the mailbox but various * ifnets and sysctls are about to be created and their handlers/ioctls * will access the mailbox from different threads. */ sc->flags |= CHK_MBOX_ACCESS; rc = bus_generic_attach(dev); if (rc != 0) { device_printf(dev, "failed to attach all child ports: %d\n", rc); goto done; } device_printf(dev, "PCIe gen%d x%d, %d ports, %d %s interrupt%s, %d eq, %d iq\n", sc->params.pci.speed, sc->params.pci.width, sc->params.nports, sc->intr_count, sc->intr_type == INTR_MSIX ? "MSI-X" : (sc->intr_type == INTR_MSI ? "MSI" : "INTx"), sc->intr_count > 1 ? "s" : "", sc->sge.neq, sc->sge.niq); t4_set_desc(sc); notify_siblings(dev, 0); done: if (rc != 0 && sc->cdev) { /* cdev was created and so cxgbetool works; recover that way. */ device_printf(dev, "error during attach, adapter is now in recovery mode.\n"); rc = 0; } if (rc != 0) t4_detach_common(dev); else t4_sysctls(sc); return (rc); } static int t4_ready(device_t dev) { struct adapter *sc; sc = device_get_softc(dev); if (sc->flags & FW_OK) return (0); return (ENXIO); } static int t4_read_port_device(device_t dev, int port, device_t *child) { struct adapter *sc; struct port_info *pi; sc = device_get_softc(dev); if (port < 0 || port >= MAX_NPORTS) return (EINVAL); pi = sc->port[port]; if (pi == NULL || pi->dev == NULL) return (ENXIO); *child = pi->dev; return (0); } static int notify_siblings(device_t dev, int detaching) { device_t sibling; int error, i; error = 0; for (i = 0; i < PCI_FUNCMAX; i++) { if (i == pci_get_function(dev)) continue; sibling = pci_find_dbsf(pci_get_domain(dev), pci_get_bus(dev), pci_get_slot(dev), i); if (sibling == NULL || !device_is_attached(sibling)) continue; if (detaching) error = T4_DETACH_CHILD(sibling); else (void)T4_ATTACH_CHILD(sibling); if (error) break; } return (error); } /* * Idempotent */ static int t4_detach(device_t dev) { struct adapter *sc; int rc; sc = device_get_softc(dev); rc = notify_siblings(dev, 1); if (rc) { device_printf(dev, "failed to detach sibling devices: %d\n", rc); return (rc); } return (t4_detach_common(dev)); } int t4_detach_common(device_t dev) { struct adapter *sc; struct port_info *pi; int i, rc; sc = device_get_softc(dev); sc->flags &= ~CHK_MBOX_ACCESS; if (sc->flags & FULL_INIT_DONE) { if (!(sc->flags & IS_VF)) t4_intr_disable(sc); } if (sc->cdev) { destroy_dev(sc->cdev); sc->cdev = NULL; } if (device_is_attached(dev)) { rc = bus_generic_detach(dev); if (rc) { device_printf(dev, "failed to detach child devices: %d\n", rc); return (rc); } } for (i = 0; i < sc->intr_count; i++) t4_free_irq(sc, &sc->irq[i]); if ((sc->flags & (IS_VF | FW_OK)) == FW_OK) t4_free_tx_sched(sc); for (i = 0; i < MAX_NPORTS; i++) { pi = sc->port[i]; if (pi) { t4_free_vi(sc, sc->mbox, sc->pf, 0, pi->vi[0].viid); if (pi->dev) device_delete_child(dev, pi->dev); mtx_destroy(&pi->pi_lock); free(pi->vi, M_CXGBE); free(pi, M_CXGBE); } } device_delete_children(dev); if (sc->flags & FULL_INIT_DONE) adapter_full_uninit(sc); if ((sc->flags & (IS_VF | FW_OK)) == FW_OK) t4_fw_bye(sc, sc->mbox); if (sc->intr_type == INTR_MSI || sc->intr_type == INTR_MSIX) pci_release_msi(dev); if (sc->regs_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->regs_rid, sc->regs_res); if (sc->udbs_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->udbs_rid, sc->udbs_res); if (sc->msix_res) bus_release_resource(dev, SYS_RES_MEMORY, sc->msix_rid, sc->msix_res); if (sc->l2t) t4_free_l2t(sc->l2t); #ifdef TCP_OFFLOAD free(sc->sge.ofld_rxq, M_CXGBE); free(sc->sge.ofld_txq, M_CXGBE); #endif #ifdef DEV_NETMAP free(sc->sge.nm_rxq, M_CXGBE); free(sc->sge.nm_txq, M_CXGBE); #endif free(sc->irq, M_CXGBE); free(sc->sge.rxq, M_CXGBE); free(sc->sge.txq, M_CXGBE); free(sc->sge.ctrlq, M_CXGBE); free(sc->sge.iqmap, M_CXGBE); free(sc->sge.eqmap, M_CXGBE); free(sc->tids.ftid_tab, M_CXGBE); t4_destroy_dma_tag(sc); if (mtx_initialized(&sc->sc_lock)) { sx_xlock(&t4_list_lock); SLIST_REMOVE(&t4_list, sc, adapter, link); sx_xunlock(&t4_list_lock); mtx_destroy(&sc->sc_lock); } callout_drain(&sc->sfl_callout); if (mtx_initialized(&sc->tids.ftid_lock)) mtx_destroy(&sc->tids.ftid_lock); if (mtx_initialized(&sc->sfl_lock)) mtx_destroy(&sc->sfl_lock); if (mtx_initialized(&sc->ifp_lock)) mtx_destroy(&sc->ifp_lock); if (mtx_initialized(&sc->reg_lock)) mtx_destroy(&sc->reg_lock); for (i = 0; i < NUM_MEMWIN; i++) { struct memwin *mw = &sc->memwin[i]; if (rw_initialized(&mw->mw_lock)) rw_destroy(&mw->mw_lock); } bzero(sc, sizeof(*sc)); return (0); } static int cxgbe_probe(device_t dev) { char buf[128]; struct port_info *pi = device_get_softc(dev); snprintf(buf, sizeof(buf), "port %d", pi->port_id); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } #define T4_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \ IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \ IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6 | IFCAP_HWSTATS) #define T4_CAP_ENABLE (T4_CAP) static int cxgbe_vi_attach(device_t dev, struct vi_info *vi) { struct ifnet *ifp; struct sbuf *sb; vi->xact_addr_filt = -1; callout_init(&vi->tick, 1); /* Allocate an ifnet and set it up */ ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "Cannot allocate ifnet\n"); return (ENOMEM); } vi->ifp = ifp; ifp->if_softc = vi; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = cxgbe_init; ifp->if_ioctl = cxgbe_ioctl; ifp->if_transmit = cxgbe_transmit; ifp->if_qflush = cxgbe_qflush; ifp->if_get_counter = cxgbe_get_counter; ifp->if_capabilities = T4_CAP; #ifdef TCP_OFFLOAD if (vi->nofldrxq != 0) ifp->if_capabilities |= IFCAP_TOE; #endif #ifdef DEV_NETMAP if (vi->nnmrxq != 0) ifp->if_capabilities |= IFCAP_NETMAP; #endif ifp->if_capenable = T4_CAP_ENABLE; ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO | CSUM_UDP_IPV6 | CSUM_TCP_IPV6; ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS; ifp->if_hw_tsomaxsegsize = 65536; vi->vlan_c = EVENTHANDLER_REGISTER(vlan_config, cxgbe_vlan_config, ifp, EVENTHANDLER_PRI_ANY); ether_ifattach(ifp, vi->hw_addr); #ifdef DEV_NETMAP if (ifp->if_capabilities & IFCAP_NETMAP) cxgbe_nm_attach(vi); #endif sb = sbuf_new_auto(); sbuf_printf(sb, "%d txq, %d rxq (NIC)", vi->ntxq, vi->nrxq); #ifdef TCP_OFFLOAD if (ifp->if_capabilities & IFCAP_TOE) sbuf_printf(sb, "; %d txq, %d rxq (TOE)", vi->nofldtxq, vi->nofldrxq); #endif #ifdef DEV_NETMAP if (ifp->if_capabilities & IFCAP_NETMAP) sbuf_printf(sb, "; %d txq, %d rxq (netmap)", vi->nnmtxq, vi->nnmrxq); #endif sbuf_finish(sb); device_printf(dev, "%s\n", sbuf_data(sb)); sbuf_delete(sb); vi_sysctls(vi); return (0); } static int cxgbe_attach(device_t dev) { struct port_info *pi = device_get_softc(dev); struct adapter *sc = pi->adapter; struct vi_info *vi; int i, rc; callout_init_mtx(&pi->tick, &pi->pi_lock, 0); rc = cxgbe_vi_attach(dev, &pi->vi[0]); if (rc) return (rc); for_each_vi(pi, i, vi) { if (i == 0) continue; vi->dev = device_add_child(dev, sc->names->vi_ifnet_name, -1); if (vi->dev == NULL) { device_printf(dev, "failed to add VI %d\n", i); continue; } device_set_softc(vi->dev, vi); } cxgbe_sysctls(pi); bus_generic_attach(dev); return (0); } static void cxgbe_vi_detach(struct vi_info *vi) { struct ifnet *ifp = vi->ifp; ether_ifdetach(ifp); if (vi->vlan_c) EVENTHANDLER_DEREGISTER(vlan_config, vi->vlan_c); /* Let detach proceed even if these fail. */ #ifdef DEV_NETMAP if (ifp->if_capabilities & IFCAP_NETMAP) cxgbe_nm_detach(vi); #endif cxgbe_uninit_synchronized(vi); callout_drain(&vi->tick); vi_full_uninit(vi); if_free(vi->ifp); vi->ifp = NULL; } static int cxgbe_detach(device_t dev) { struct port_info *pi = device_get_softc(dev); struct adapter *sc = pi->adapter; int rc; /* Detach the extra VIs first. */ rc = bus_generic_detach(dev); if (rc) return (rc); device_delete_children(dev); doom_vi(sc, &pi->vi[0]); if (pi->flags & HAS_TRACEQ) { sc->traceq = -1; /* cloner should not create ifnet */ t4_tracer_port_detach(sc); } cxgbe_vi_detach(&pi->vi[0]); callout_drain(&pi->tick); ifmedia_removeall(&pi->media); end_synchronized_op(sc, 0); return (0); } static void cxgbe_init(void *arg) { struct vi_info *vi = arg; struct adapter *sc = vi->pi->adapter; if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4init") != 0) return; cxgbe_init_synchronized(vi); end_synchronized_op(sc, 0); } static int cxgbe_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data) { int rc = 0, mtu, flags, can_sleep; struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct ifreq *ifr = (struct ifreq *)data; uint32_t mask; switch (cmd) { case SIOCSIFMTU: mtu = ifr->ifr_mtu; if (mtu < ETHERMIN || mtu > MAX_MTU) return (EINVAL); rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4mtu"); if (rc) return (rc); ifp->if_mtu = mtu; if (vi->flags & VI_INIT_DONE) { t4_update_fl_bufsize(ifp); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(ifp, XGMAC_MTU); } end_synchronized_op(sc, 0); break; case SIOCSIFFLAGS: can_sleep = 0; redo_sifflags: rc = begin_synchronized_op(sc, vi, can_sleep ? (SLEEP_OK | INTR_OK) : HOLD_LOCK, "t4flg"); if (rc) return (rc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { flags = vi->if_flags; if ((ifp->if_flags ^ flags) & (IFF_PROMISC | IFF_ALLMULTI)) { if (can_sleep == 1) { end_synchronized_op(sc, 0); can_sleep = 0; goto redo_sifflags; } rc = update_mac_settings(ifp, XGMAC_PROMISC | XGMAC_ALLMULTI); } } else { if (can_sleep == 0) { end_synchronized_op(sc, LOCK_HELD); can_sleep = 1; goto redo_sifflags; } rc = cxgbe_init_synchronized(vi); } vi->if_flags = ifp->if_flags; } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if (can_sleep == 0) { end_synchronized_op(sc, LOCK_HELD); can_sleep = 1; goto redo_sifflags; } rc = cxgbe_uninit_synchronized(vi); } end_synchronized_op(sc, can_sleep ? 0 : LOCK_HELD); break; case SIOCADDMULTI: case SIOCDELMULTI: /* these two are called with a mutex held :-( */ rc = begin_synchronized_op(sc, vi, HOLD_LOCK, "t4multi"); if (rc) return (rc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(ifp, XGMAC_MCADDRS); end_synchronized_op(sc, LOCK_HELD); break; case SIOCSIFCAP: rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4cap"); if (rc) return (rc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (IFCAP_TSO4 & ifp->if_capenable && !(IFCAP_TXCSUM & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO4; if_printf(ifp, "tso4 disabled due to -txcsum.\n"); } } if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); if (IFCAP_TSO6 & ifp->if_capenable && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO6; if_printf(ifp, "tso6 disabled due to -txcsum6.\n"); } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; /* * Note that we leave CSUM_TSO alone (it is always set). The * kernel takes both IFCAP_TSOx and CSUM_TSO into account before * sending a TSO request our way, so it's sufficient to toggle * IFCAP_TSOx only. */ if (mask & IFCAP_TSO4) { if (!(IFCAP_TSO4 & ifp->if_capenable) && !(IFCAP_TXCSUM & ifp->if_capenable)) { if_printf(ifp, "enable txcsum first.\n"); rc = EAGAIN; goto fail; } ifp->if_capenable ^= IFCAP_TSO4; } if (mask & IFCAP_TSO6) { if (!(IFCAP_TSO6 & ifp->if_capenable) && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { if_printf(ifp, "enable txcsum6 first.\n"); rc = EAGAIN; goto fail; } ifp->if_capenable ^= IFCAP_TSO6; } if (mask & IFCAP_LRO) { #if defined(INET) || defined(INET6) int i; struct sge_rxq *rxq; ifp->if_capenable ^= IFCAP_LRO; for_each_rxq(vi, i, rxq) { if (ifp->if_capenable & IFCAP_LRO) rxq->iq.flags |= IQ_LRO_ENABLED; else rxq->iq.flags &= ~IQ_LRO_ENABLED; } #endif } #ifdef TCP_OFFLOAD if (mask & IFCAP_TOE) { int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE; rc = toe_capability(vi, enable); if (rc != 0) goto fail; ifp->if_capenable ^= mask; } #endif if (mask & IFCAP_VLAN_HWTAGGING) { ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (ifp->if_drv_flags & IFF_DRV_RUNNING) rc = update_mac_settings(ifp, XGMAC_VLANEX); } if (mask & IFCAP_VLAN_MTU) { ifp->if_capenable ^= IFCAP_VLAN_MTU; /* Need to find out how to disable auto-mtu-inflation */ } if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (mask & IFCAP_VLAN_HWCSUM) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; #ifdef VLAN_CAPABILITIES VLAN_CAPABILITIES(ifp); #endif fail: end_synchronized_op(sc, 0); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: case SIOCGIFXMEDIA: ifmedia_ioctl(ifp, ifr, &pi->media, cmd); break; case SIOCGI2C: { struct ifi2creq i2c; - rc = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); + rc = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (rc != 0) break; if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { rc = EPERM; break; } if (i2c.len > sizeof(i2c.data)) { rc = EINVAL; break; } rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4i2c"); if (rc) return (rc); rc = -t4_i2c_rd(sc, sc->mbox, pi->port_id, i2c.dev_addr, i2c.offset, i2c.len, &i2c.data[0]); end_synchronized_op(sc, 0); if (rc == 0) - rc = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); + rc = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c)); break; } default: rc = ether_ioctl(ifp, cmd, data); } return (rc); } static int cxgbe_transmit(struct ifnet *ifp, struct mbuf *m) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct sge_txq *txq; void *items[1]; int rc; M_ASSERTPKTHDR(m); MPASS(m->m_nextpkt == NULL); /* not quite ready for this yet */ if (__predict_false(pi->link_cfg.link_ok == 0)) { m_freem(m); return (ENETDOWN); } rc = parse_pkt(sc, &m); if (__predict_false(rc != 0)) { MPASS(m == NULL); /* was freed already */ atomic_add_int(&pi->tx_parse_error, 1); /* rare, atomic is ok */ return (rc); } /* Select a txq. */ txq = &sc->sge.txq[vi->first_txq]; if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) + vi->rsrv_noflowq); items[0] = m; rc = mp_ring_enqueue(txq->r, items, 1, 4096); if (__predict_false(rc != 0)) m_freem(m); return (rc); } static void cxgbe_qflush(struct ifnet *ifp) { struct vi_info *vi = ifp->if_softc; struct sge_txq *txq; int i; /* queues do not exist if !VI_INIT_DONE. */ if (vi->flags & VI_INIT_DONE) { for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags |= EQ_QFLUSH; TXQ_UNLOCK(txq); while (!mp_ring_is_idle(txq->r)) { mp_ring_check_drainage(txq->r, 0); pause("qflush", 1); } TXQ_LOCK(txq); txq->eq.flags &= ~EQ_QFLUSH; TXQ_UNLOCK(txq); } } if_qflush(ifp); } static uint64_t vi_get_counter(struct ifnet *ifp, ift_counter c) { struct vi_info *vi = ifp->if_softc; struct fw_vi_stats_vf *s = &vi->stats; vi_refresh_stats(vi->pi->adapter, vi); switch (c) { case IFCOUNTER_IPACKETS: return (s->rx_bcast_frames + s->rx_mcast_frames + s->rx_ucast_frames); case IFCOUNTER_IERRORS: return (s->rx_err_frames); case IFCOUNTER_OPACKETS: return (s->tx_bcast_frames + s->tx_mcast_frames + s->tx_ucast_frames + s->tx_offload_frames); case IFCOUNTER_OERRORS: return (s->tx_drop_frames); case IFCOUNTER_IBYTES: return (s->rx_bcast_bytes + s->rx_mcast_bytes + s->rx_ucast_bytes); case IFCOUNTER_OBYTES: return (s->tx_bcast_bytes + s->tx_mcast_bytes + s->tx_ucast_bytes + s->tx_offload_bytes); case IFCOUNTER_IMCASTS: return (s->rx_mcast_frames); case IFCOUNTER_OMCASTS: return (s->tx_mcast_frames); case IFCOUNTER_OQDROPS: { uint64_t drops; drops = 0; if (vi->flags & VI_INIT_DONE) { int i; struct sge_txq *txq; for_each_txq(vi, i, txq) drops += counter_u64_fetch(txq->r->drops); } return (drops); } default: return (if_get_counter_default(ifp, c)); } } uint64_t cxgbe_get_counter(struct ifnet *ifp, ift_counter c) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct port_stats *s = &pi->stats; if (pi->nvi > 1 || sc->flags & IS_VF) return (vi_get_counter(ifp, c)); cxgbe_refresh_stats(sc, pi); switch (c) { case IFCOUNTER_IPACKETS: return (s->rx_frames); case IFCOUNTER_IERRORS: return (s->rx_jabber + s->rx_runt + s->rx_too_long + s->rx_fcs_err + s->rx_len_err); case IFCOUNTER_OPACKETS: return (s->tx_frames); case IFCOUNTER_OERRORS: return (s->tx_error_frames); case IFCOUNTER_IBYTES: return (s->rx_octets); case IFCOUNTER_OBYTES: return (s->tx_octets); case IFCOUNTER_IMCASTS: return (s->rx_mcast_frames); case IFCOUNTER_OMCASTS: return (s->tx_mcast_frames); case IFCOUNTER_IQDROPS: return (s->rx_ovflow0 + s->rx_ovflow1 + s->rx_ovflow2 + s->rx_ovflow3 + s->rx_trunc0 + s->rx_trunc1 + s->rx_trunc2 + s->rx_trunc3 + pi->tnl_cong_drops); case IFCOUNTER_OQDROPS: { uint64_t drops; drops = s->tx_drop; if (vi->flags & VI_INIT_DONE) { int i; struct sge_txq *txq; for_each_txq(vi, i, txq) drops += counter_u64_fetch(txq->r->drops); } return (drops); } default: return (if_get_counter_default(ifp, c)); } } static int cxgbe_media_change(struct ifnet *ifp) { struct vi_info *vi = ifp->if_softc; device_printf(vi->dev, "%s unimplemented.\n", __func__); return (EOPNOTSUPP); } static void cxgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct ifmedia_entry *cur; struct link_config *lc = &pi->link_cfg; /* * If all the interfaces are administratively down the firmware does not * report transceiver changes. Refresh port info here so that ifconfig * displays accurate information at all times. */ if (begin_synchronized_op(pi->adapter, NULL, SLEEP_OK | INTR_OK, "t4med") == 0) { PORT_LOCK(pi); if (pi->up_vis == 0) { t4_update_port_info(pi); build_medialist(pi, &pi->media); } PORT_UNLOCK(pi); end_synchronized_op(pi->adapter, 0); } ifmr->ifm_status = IFM_AVALID; if (lc->link_ok == 0) return; ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active &= ~(IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE); if (lc->fc & PAUSE_RX) ifmr->ifm_active |= IFM_ETH_RXPAUSE; if (lc->fc & PAUSE_TX) ifmr->ifm_active |= IFM_ETH_TXPAUSE; /* active and current will differ iff current media is autoselect. */ cur = pi->media.ifm_cur; if (cur != NULL && IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO) return; ifmr->ifm_active = IFM_ETHER | IFM_FDX; if (lc->fc & PAUSE_RX) ifmr->ifm_active |= IFM_ETH_RXPAUSE; if (lc->fc & PAUSE_TX) ifmr->ifm_active |= IFM_ETH_TXPAUSE; switch (lc->speed) { case 10000: ifmr->ifm_active |= IFM_10G_T; break; case 1000: ifmr->ifm_active |= IFM_1000_T; break; case 100: ifmr->ifm_active |= IFM_100_TX; break; case 10: ifmr->ifm_active |= IFM_10_T; break; default: device_printf(vi->dev, "link up but speed unknown (%u)\n", lc->speed); } } static int vcxgbe_probe(device_t dev) { char buf[128]; struct vi_info *vi = device_get_softc(dev); snprintf(buf, sizeof(buf), "port %d vi %td", vi->pi->port_id, vi - vi->pi->vi); device_set_desc_copy(dev, buf); return (BUS_PROBE_DEFAULT); } static int alloc_extra_vi(struct adapter *sc, struct port_info *pi, struct vi_info *vi) { int func, index, rc; uint32_t param, val; ASSERT_SYNCHRONIZED_OP(sc); index = vi - pi->vi; MPASS(index > 0); /* This function deals with _extra_ VIs only */ KASSERT(index < nitems(vi_mac_funcs), ("%s: VI %s doesn't have a MAC func", __func__, device_get_nameunit(vi->dev))); func = vi_mac_funcs[index]; rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1, vi->hw_addr, &vi->rss_size, func, 0); if (rc < 0) { device_printf(vi->dev, "failed to allocate virtual interface %d" "for port %d: %d\n", index, pi->port_id, -rc); return (-rc); } vi->viid = rc; if (chip_id(sc) <= CHELSIO_T5) vi->smt_idx = (rc & 0x7f) << 1; else vi->smt_idx = (rc & 0x7f); if (vi->rss_size == 1) { /* * This VI didn't get a slice of the RSS table. Reduce the * number of VIs being created (hw.cxgbe.num_vis) or modify the * configuration file (nvi, rssnvi for this PF) if this is a * problem. */ device_printf(vi->dev, "RSS table not available.\n"); vi->rss_base = 0xffff; return (0); } param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_RSSINFO) | V_FW_PARAMS_PARAM_YZ(vi->viid); rc = t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); if (rc) vi->rss_base = 0xffff; else { MPASS((val >> 16) == vi->rss_size); vi->rss_base = val & 0xffff; } return (0); } static int vcxgbe_attach(device_t dev) { struct vi_info *vi; struct port_info *pi; struct adapter *sc; int rc; vi = device_get_softc(dev); pi = vi->pi; sc = pi->adapter; rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4via"); if (rc) return (rc); rc = alloc_extra_vi(sc, pi, vi); end_synchronized_op(sc, 0); if (rc) return (rc); rc = cxgbe_vi_attach(dev, vi); if (rc) { t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid); return (rc); } return (0); } static int vcxgbe_detach(device_t dev) { struct vi_info *vi; struct adapter *sc; vi = device_get_softc(dev); sc = vi->pi->adapter; doom_vi(sc, vi); cxgbe_vi_detach(vi); t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid); end_synchronized_op(sc, 0); return (0); } void t4_fatal_err(struct adapter *sc) { t4_set_reg_field(sc, A_SGE_CONTROL, F_GLOBALENABLE, 0); t4_intr_disable(sc); log(LOG_EMERG, "%s: encountered fatal error, adapter stopped.\n", device_get_nameunit(sc->dev)); } void t4_add_adapter(struct adapter *sc) { sx_xlock(&t4_list_lock); SLIST_INSERT_HEAD(&t4_list, sc, link); sx_xunlock(&t4_list_lock); } int t4_map_bars_0_and_4(struct adapter *sc) { sc->regs_rid = PCIR_BAR(0); sc->regs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->regs_rid, RF_ACTIVE); if (sc->regs_res == NULL) { device_printf(sc->dev, "cannot map registers.\n"); return (ENXIO); } sc->bt = rman_get_bustag(sc->regs_res); sc->bh = rman_get_bushandle(sc->regs_res); sc->mmio_len = rman_get_size(sc->regs_res); setbit(&sc->doorbells, DOORBELL_KDB); sc->msix_rid = PCIR_BAR(4); sc->msix_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->msix_rid, RF_ACTIVE); if (sc->msix_res == NULL) { device_printf(sc->dev, "cannot map MSI-X BAR.\n"); return (ENXIO); } return (0); } int t4_map_bar_2(struct adapter *sc) { /* * T4: only iWARP driver uses the userspace doorbells. There is no need * to map it if RDMA is disabled. */ if (is_t4(sc) && sc->rdmacaps == 0) return (0); sc->udbs_rid = PCIR_BAR(2); sc->udbs_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &sc->udbs_rid, RF_ACTIVE); if (sc->udbs_res == NULL) { device_printf(sc->dev, "cannot map doorbell BAR.\n"); return (ENXIO); } sc->udbs_base = rman_get_virtual(sc->udbs_res); if (chip_id(sc) >= CHELSIO_T5) { setbit(&sc->doorbells, DOORBELL_UDB); #if defined(__i386__) || defined(__amd64__) if (t5_write_combine) { int rc, mode; /* * Enable write combining on BAR2. This is the * userspace doorbell BAR and is split into 128B * (UDBS_SEG_SIZE) doorbell regions, each associated * with an egress queue. The first 64B has the doorbell * and the second 64B can be used to submit a tx work * request with an implicit doorbell. */ rc = pmap_change_attr((vm_offset_t)sc->udbs_base, rman_get_size(sc->udbs_res), PAT_WRITE_COMBINING); if (rc == 0) { clrbit(&sc->doorbells, DOORBELL_UDB); setbit(&sc->doorbells, DOORBELL_WCWR); setbit(&sc->doorbells, DOORBELL_UDBWC); } else { t5_write_combine = 0; device_printf(sc->dev, "couldn't enable write combining: %d\n", rc); } mode = is_t5(sc) ? V_STATMODE(0) : V_T6_STATMODE(0); t4_write_reg(sc, A_SGE_STAT_CFG, V_STATSOURCE_T5(7) | mode); } #else t5_write_combine = 0; #endif sc->iwt.wc_en = t5_write_combine; } return (0); } struct memwin_init { uint32_t base; uint32_t aperture; }; static const struct memwin_init t4_memwin[NUM_MEMWIN] = { { MEMWIN0_BASE, MEMWIN0_APERTURE }, { MEMWIN1_BASE, MEMWIN1_APERTURE }, { MEMWIN2_BASE_T4, MEMWIN2_APERTURE_T4 } }; static const struct memwin_init t5_memwin[NUM_MEMWIN] = { { MEMWIN0_BASE, MEMWIN0_APERTURE }, { MEMWIN1_BASE, MEMWIN1_APERTURE }, { MEMWIN2_BASE_T5, MEMWIN2_APERTURE_T5 }, }; static void setup_memwin(struct adapter *sc) { const struct memwin_init *mw_init; struct memwin *mw; int i; uint32_t bar0; if (is_t4(sc)) { /* * Read low 32b of bar0 indirectly via the hardware backdoor * mechanism. Works from within PCI passthrough environments * too, where rman_get_start() can return a different value. We * need to program the T4 memory window decoders with the actual * addresses that will be coming across the PCIe link. */ bar0 = t4_hw_pci_read_cfg4(sc, PCIR_BAR(0)); bar0 &= (uint32_t) PCIM_BAR_MEM_BASE; mw_init = &t4_memwin[0]; } else { /* T5+ use the relative offset inside the PCIe BAR */ bar0 = 0; mw_init = &t5_memwin[0]; } for (i = 0, mw = &sc->memwin[0]; i < NUM_MEMWIN; i++, mw_init++, mw++) { rw_init(&mw->mw_lock, "memory window access"); mw->mw_base = mw_init->base; mw->mw_aperture = mw_init->aperture; mw->mw_curpos = 0; t4_write_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, i), (mw->mw_base + bar0) | V_BIR(0) | V_WINDOW(ilog2(mw->mw_aperture) - 10)); rw_wlock(&mw->mw_lock); position_memwin(sc, i, 0); rw_wunlock(&mw->mw_lock); } /* flush */ t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_BASE_WIN, 2)); } /* * Positions the memory window at the given address in the card's address space. * There are some alignment requirements and the actual position may be at an * address prior to the requested address. mw->mw_curpos always has the actual * position of the window. */ static void position_memwin(struct adapter *sc, int idx, uint32_t addr) { struct memwin *mw; uint32_t pf; uint32_t reg; MPASS(idx >= 0 && idx < NUM_MEMWIN); mw = &sc->memwin[idx]; rw_assert(&mw->mw_lock, RA_WLOCKED); if (is_t4(sc)) { pf = 0; mw->mw_curpos = addr & ~0xf; /* start must be 16B aligned */ } else { pf = V_PFNUM(sc->pf); mw->mw_curpos = addr & ~0x7f; /* start must be 128B aligned */ } reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, idx); t4_write_reg(sc, reg, mw->mw_curpos | pf); t4_read_reg(sc, reg); /* flush */ } static int rw_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val, int len, int rw) { struct memwin *mw; uint32_t mw_end, v; MPASS(idx >= 0 && idx < NUM_MEMWIN); /* Memory can only be accessed in naturally aligned 4 byte units */ if (addr & 3 || len & 3 || len <= 0) return (EINVAL); mw = &sc->memwin[idx]; while (len > 0) { rw_rlock(&mw->mw_lock); mw_end = mw->mw_curpos + mw->mw_aperture; if (addr >= mw_end || addr < mw->mw_curpos) { /* Will need to reposition the window */ if (!rw_try_upgrade(&mw->mw_lock)) { rw_runlock(&mw->mw_lock); rw_wlock(&mw->mw_lock); } rw_assert(&mw->mw_lock, RA_WLOCKED); position_memwin(sc, idx, addr); rw_downgrade(&mw->mw_lock); mw_end = mw->mw_curpos + mw->mw_aperture; } rw_assert(&mw->mw_lock, RA_RLOCKED); while (addr < mw_end && len > 0) { if (rw == 0) { v = t4_read_reg(sc, mw->mw_base + addr - mw->mw_curpos); *val++ = le32toh(v); } else { v = *val++; t4_write_reg(sc, mw->mw_base + addr - mw->mw_curpos, htole32(v)); } addr += 4; len -= 4; } rw_runlock(&mw->mw_lock); } return (0); } static inline int read_via_memwin(struct adapter *sc, int idx, uint32_t addr, uint32_t *val, int len) { return (rw_via_memwin(sc, idx, addr, val, len, 0)); } static inline int write_via_memwin(struct adapter *sc, int idx, uint32_t addr, const uint32_t *val, int len) { return (rw_via_memwin(sc, idx, addr, (void *)(uintptr_t)val, len, 1)); } static int t4_range_cmp(const void *a, const void *b) { return ((const struct t4_range *)a)->start - ((const struct t4_range *)b)->start; } /* * Verify that the memory range specified by the addr/len pair is valid within * the card's address space. */ static int validate_mem_range(struct adapter *sc, uint32_t addr, int len) { struct t4_range mem_ranges[4], *r, *next; uint32_t em, addr_len; int i, n, remaining; /* Memory can only be accessed in naturally aligned 4 byte units */ if (addr & 3 || len & 3 || len <= 0) return (EINVAL); /* Enabled memories */ em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); r = &mem_ranges[0]; n = 0; bzero(r, sizeof(mem_ranges)); if (em & F_EDRAM0_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR); r->size = G_EDRAM0_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EDRAM0_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } if (em & F_EDRAM1_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR); r->size = G_EDRAM1_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EDRAM1_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } if (em & F_EXT_MEM_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); r->size = G_EXT_MEM_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EXT_MEM_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } if (is_t5(sc) && em & F_EXT_MEM1_ENABLE) { addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); r->size = G_EXT_MEM1_SIZE(addr_len) << 20; if (r->size > 0) { r->start = G_EXT_MEM1_BASE(addr_len) << 20; if (addr >= r->start && addr + len <= r->start + r->size) return (0); r++; n++; } } MPASS(n <= nitems(mem_ranges)); if (n > 1) { /* Sort and merge the ranges. */ qsort(mem_ranges, n, sizeof(struct t4_range), t4_range_cmp); /* Start from index 0 and examine the next n - 1 entries. */ r = &mem_ranges[0]; for (remaining = n - 1; remaining > 0; remaining--, r++) { MPASS(r->size > 0); /* r is a valid entry. */ next = r + 1; MPASS(next->size > 0); /* and so is the next one. */ while (r->start + r->size >= next->start) { /* Merge the next one into the current entry. */ r->size = max(r->start + r->size, next->start + next->size) - r->start; n--; /* One fewer entry in total. */ if (--remaining == 0) goto done; /* short circuit */ next++; } if (next != r + 1) { /* * Some entries were merged into r and next * points to the first valid entry that couldn't * be merged. */ MPASS(next->size > 0); /* must be valid */ memcpy(r + 1, next, remaining * sizeof(*r)); #ifdef INVARIANTS /* * This so that the foo->size assertion in the * next iteration of the loop do the right * thing for entries that were pulled up and are * no longer valid. */ MPASS(n < nitems(mem_ranges)); bzero(&mem_ranges[n], (nitems(mem_ranges) - n) * sizeof(struct t4_range)); #endif } } done: /* Done merging the ranges. */ MPASS(n > 0); r = &mem_ranges[0]; for (i = 0; i < n; i++, r++) { if (addr >= r->start && addr + len <= r->start + r->size) return (0); } } return (EFAULT); } static int fwmtype_to_hwmtype(int mtype) { switch (mtype) { case FW_MEMTYPE_EDC0: return (MEM_EDC0); case FW_MEMTYPE_EDC1: return (MEM_EDC1); case FW_MEMTYPE_EXTMEM: return (MEM_MC0); case FW_MEMTYPE_EXTMEM1: return (MEM_MC1); default: panic("%s: cannot translate fw mtype %d.", __func__, mtype); } } /* * Verify that the memory range specified by the memtype/offset/len pair is * valid and lies entirely within the memtype specified. The global address of * the start of the range is returned in addr. */ static int validate_mt_off_len(struct adapter *sc, int mtype, uint32_t off, int len, uint32_t *addr) { uint32_t em, addr_len, maddr; /* Memory can only be accessed in naturally aligned 4 byte units */ if (off & 3 || len & 3 || len == 0) return (EINVAL); em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); switch (fwmtype_to_hwmtype(mtype)) { case MEM_EDC0: if (!(em & F_EDRAM0_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EDRAM0_BAR); maddr = G_EDRAM0_BASE(addr_len) << 20; break; case MEM_EDC1: if (!(em & F_EDRAM1_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EDRAM1_BAR); maddr = G_EDRAM1_BASE(addr_len) << 20; break; case MEM_MC: if (!(em & F_EXT_MEM_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); maddr = G_EXT_MEM_BASE(addr_len) << 20; break; case MEM_MC1: if (!is_t5(sc) || !(em & F_EXT_MEM1_ENABLE)) return (EINVAL); addr_len = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); maddr = G_EXT_MEM1_BASE(addr_len) << 20; break; default: return (EINVAL); } *addr = maddr + off; /* global address */ return (validate_mem_range(sc, *addr, len)); } static int fixup_devlog_params(struct adapter *sc) { struct devlog_params *dparams = &sc->params.devlog; int rc; rc = validate_mt_off_len(sc, dparams->memtype, dparams->start, dparams->size, &dparams->addr); return (rc); } static void update_nirq(struct intrs_and_queues *iaq, int nports) { int extra = T4_EXTRA_INTR; iaq->nirq = extra; iaq->nirq += nports * (iaq->nrxq + iaq->nofldrxq); iaq->nirq += nports * (iaq->num_vis - 1) * max(iaq->nrxq_vi, iaq->nnmrxq_vi); iaq->nirq += nports * (iaq->num_vis - 1) * iaq->nofldrxq_vi; } /* * Adjust requirements to fit the number of interrupts available. */ static void calculate_iaq(struct adapter *sc, struct intrs_and_queues *iaq, int itype, int navail) { int old_nirq; const int nports = sc->params.nports; MPASS(nports > 0); MPASS(navail > 0); bzero(iaq, sizeof(*iaq)); iaq->intr_type = itype; iaq->num_vis = t4_num_vis; iaq->ntxq = t4_ntxq; iaq->ntxq_vi = t4_ntxq_vi; iaq->nrxq = t4_nrxq; iaq->nrxq_vi = t4_nrxq_vi; #ifdef TCP_OFFLOAD if (is_offload(sc)) { iaq->nofldtxq = t4_nofldtxq; iaq->nofldtxq_vi = t4_nofldtxq_vi; iaq->nofldrxq = t4_nofldrxq; iaq->nofldrxq_vi = t4_nofldrxq_vi; } #endif #ifdef DEV_NETMAP iaq->nnmtxq_vi = t4_nnmtxq_vi; iaq->nnmrxq_vi = t4_nnmrxq_vi; #endif update_nirq(iaq, nports); if (iaq->nirq <= navail && (itype != INTR_MSI || powerof2(iaq->nirq))) { /* * This is the normal case -- there are enough interrupts for * everything. */ goto done; } /* * If extra VIs have been configured try reducing their count and see if * that works. */ while (iaq->num_vis > 1) { iaq->num_vis--; update_nirq(iaq, nports); if (iaq->nirq <= navail && (itype != INTR_MSI || powerof2(iaq->nirq))) { device_printf(sc->dev, "virtual interfaces per port " "reduced to %d from %d. nrxq=%u, nofldrxq=%u, " "nrxq_vi=%u nofldrxq_vi=%u, nnmrxq_vi=%u. " "itype %d, navail %u, nirq %d.\n", iaq->num_vis, t4_num_vis, iaq->nrxq, iaq->nofldrxq, iaq->nrxq_vi, iaq->nofldrxq_vi, iaq->nnmrxq_vi, itype, navail, iaq->nirq); goto done; } } /* * Extra VIs will not be created. Log a message if they were requested. */ MPASS(iaq->num_vis == 1); iaq->ntxq_vi = iaq->nrxq_vi = 0; iaq->nofldtxq_vi = iaq->nofldrxq_vi = 0; iaq->nnmtxq_vi = iaq->nnmrxq_vi = 0; if (iaq->num_vis != t4_num_vis) { device_printf(sc->dev, "extra virtual interfaces disabled. " "nrxq=%u, nofldrxq=%u, nrxq_vi=%u nofldrxq_vi=%u, " "nnmrxq_vi=%u. itype %d, navail %u, nirq %d.\n", iaq->nrxq, iaq->nofldrxq, iaq->nrxq_vi, iaq->nofldrxq_vi, iaq->nnmrxq_vi, itype, navail, iaq->nirq); } /* * Keep reducing the number of NIC rx queues to the next lower power of * 2 (for even RSS distribution) and halving the TOE rx queues and see * if that works. */ do { if (iaq->nrxq > 1) { do { iaq->nrxq--; } while (!powerof2(iaq->nrxq)); } if (iaq->nofldrxq > 1) iaq->nofldrxq >>= 1; old_nirq = iaq->nirq; update_nirq(iaq, nports); if (iaq->nirq <= navail && (itype != INTR_MSI || powerof2(iaq->nirq))) { device_printf(sc->dev, "running with reduced number of " "rx queues because of shortage of interrupts. " "nrxq=%u, nofldrxq=%u. " "itype %d, navail %u, nirq %d.\n", iaq->nrxq, iaq->nofldrxq, itype, navail, iaq->nirq); goto done; } } while (old_nirq != iaq->nirq); /* One interrupt for everything. Ugh. */ device_printf(sc->dev, "running with minimal number of queues. " "itype %d, navail %u.\n", itype, navail); iaq->nirq = 1; MPASS(iaq->nrxq == 1); iaq->ntxq = 1; if (iaq->nofldrxq > 1) iaq->nofldtxq = 1; done: MPASS(iaq->num_vis > 0); if (iaq->num_vis > 1) { MPASS(iaq->nrxq_vi > 0); MPASS(iaq->ntxq_vi > 0); } MPASS(iaq->nirq > 0); MPASS(iaq->nrxq > 0); MPASS(iaq->ntxq > 0); if (itype == INTR_MSI) { MPASS(powerof2(iaq->nirq)); } } static int cfg_itype_and_nqueues(struct adapter *sc, struct intrs_and_queues *iaq) { int rc, itype, navail, nalloc; for (itype = INTR_MSIX; itype; itype >>= 1) { if ((itype & t4_intr_types) == 0) continue; /* not allowed */ if (itype == INTR_MSIX) navail = pci_msix_count(sc->dev); else if (itype == INTR_MSI) navail = pci_msi_count(sc->dev); else navail = 1; restart: if (navail == 0) continue; calculate_iaq(sc, iaq, itype, navail); nalloc = iaq->nirq; rc = 0; if (itype == INTR_MSIX) rc = pci_alloc_msix(sc->dev, &nalloc); else if (itype == INTR_MSI) rc = pci_alloc_msi(sc->dev, &nalloc); if (rc == 0 && nalloc > 0) { if (nalloc == iaq->nirq) return (0); /* * Didn't get the number requested. Use whatever number * the kernel is willing to allocate. */ device_printf(sc->dev, "fewer vectors than requested, " "type=%d, req=%d, rcvd=%d; will downshift req.\n", itype, iaq->nirq, nalloc); pci_release_msi(sc->dev); navail = nalloc; goto restart; } device_printf(sc->dev, "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n", itype, rc, iaq->nirq, nalloc); } device_printf(sc->dev, "failed to find a usable interrupt type. " "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types, pci_msix_count(sc->dev), pci_msi_count(sc->dev)); return (ENXIO); } #define FW_VERSION(chip) ( \ V_FW_HDR_FW_VER_MAJOR(chip##FW_VERSION_MAJOR) | \ V_FW_HDR_FW_VER_MINOR(chip##FW_VERSION_MINOR) | \ V_FW_HDR_FW_VER_MICRO(chip##FW_VERSION_MICRO) | \ V_FW_HDR_FW_VER_BUILD(chip##FW_VERSION_BUILD)) #define FW_INTFVER(chip, intf) (chip##FW_HDR_INTFVER_##intf) struct fw_info { uint8_t chip; char *kld_name; char *fw_mod_name; struct fw_hdr fw_hdr; /* XXX: waste of space, need a sparse struct */ } fw_info[] = { { .chip = CHELSIO_T4, .kld_name = "t4fw_cfg", .fw_mod_name = "t4fw", .fw_hdr = { .chip = FW_HDR_CHIP_T4, .fw_ver = htobe32_const(FW_VERSION(T4)), .intfver_nic = FW_INTFVER(T4, NIC), .intfver_vnic = FW_INTFVER(T4, VNIC), .intfver_ofld = FW_INTFVER(T4, OFLD), .intfver_ri = FW_INTFVER(T4, RI), .intfver_iscsipdu = FW_INTFVER(T4, ISCSIPDU), .intfver_iscsi = FW_INTFVER(T4, ISCSI), .intfver_fcoepdu = FW_INTFVER(T4, FCOEPDU), .intfver_fcoe = FW_INTFVER(T4, FCOE), }, }, { .chip = CHELSIO_T5, .kld_name = "t5fw_cfg", .fw_mod_name = "t5fw", .fw_hdr = { .chip = FW_HDR_CHIP_T5, .fw_ver = htobe32_const(FW_VERSION(T5)), .intfver_nic = FW_INTFVER(T5, NIC), .intfver_vnic = FW_INTFVER(T5, VNIC), .intfver_ofld = FW_INTFVER(T5, OFLD), .intfver_ri = FW_INTFVER(T5, RI), .intfver_iscsipdu = FW_INTFVER(T5, ISCSIPDU), .intfver_iscsi = FW_INTFVER(T5, ISCSI), .intfver_fcoepdu = FW_INTFVER(T5, FCOEPDU), .intfver_fcoe = FW_INTFVER(T5, FCOE), }, }, { .chip = CHELSIO_T6, .kld_name = "t6fw_cfg", .fw_mod_name = "t6fw", .fw_hdr = { .chip = FW_HDR_CHIP_T6, .fw_ver = htobe32_const(FW_VERSION(T6)), .intfver_nic = FW_INTFVER(T6, NIC), .intfver_vnic = FW_INTFVER(T6, VNIC), .intfver_ofld = FW_INTFVER(T6, OFLD), .intfver_ri = FW_INTFVER(T6, RI), .intfver_iscsipdu = FW_INTFVER(T6, ISCSIPDU), .intfver_iscsi = FW_INTFVER(T6, ISCSI), .intfver_fcoepdu = FW_INTFVER(T6, FCOEPDU), .intfver_fcoe = FW_INTFVER(T6, FCOE), }, } }; static struct fw_info * find_fw_info(int chip) { int i; for (i = 0; i < nitems(fw_info); i++) { if (fw_info[i].chip == chip) return (&fw_info[i]); } return (NULL); } /* * Is the given firmware API compatible with the one the driver was compiled * with? */ static int fw_compatible(const struct fw_hdr *hdr1, const struct fw_hdr *hdr2) { /* short circuit if it's the exact same firmware version */ if (hdr1->chip == hdr2->chip && hdr1->fw_ver == hdr2->fw_ver) return (1); /* * XXX: Is this too conservative? Perhaps I should limit this to the * features that are supported in the driver. */ #define SAME_INTF(x) (hdr1->intfver_##x == hdr2->intfver_##x) if (hdr1->chip == hdr2->chip && SAME_INTF(nic) && SAME_INTF(vnic) && SAME_INTF(ofld) && SAME_INTF(ri) && SAME_INTF(iscsipdu) && SAME_INTF(iscsi) && SAME_INTF(fcoepdu) && SAME_INTF(fcoe)) return (1); #undef SAME_INTF return (0); } /* * The firmware in the KLD is usable, but should it be installed? This routine * explains itself in detail if it indicates the KLD firmware should be * installed. */ static int should_install_kld_fw(struct adapter *sc, int card_fw_usable, int k, int c) { const char *reason; if (!card_fw_usable) { reason = "incompatible or unusable"; goto install; } if (k > c) { reason = "older than the version bundled with this driver"; goto install; } if (t4_fw_install == 2 && k != c) { reason = "different than the version bundled with this driver"; goto install; } return (0); install: if (t4_fw_install == 0) { device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, " "but the driver is prohibited from installing a different " "firmware on the card.\n", G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason); return (0); } device_printf(sc->dev, "firmware on card (%u.%u.%u.%u) is %s, " "installing firmware %u.%u.%u.%u on card.\n", G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), reason, G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k), G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k)); return (1); } /* * Establish contact with the firmware and determine if we are the master driver * or not, and whether we are responsible for chip initialization. */ static int prep_firmware(struct adapter *sc) { const struct firmware *fw = NULL, *default_cfg; int rc, pf, card_fw_usable, kld_fw_usable, need_fw_reset = 1; enum dev_state state; struct fw_info *fw_info; struct fw_hdr *card_fw; /* fw on the card */ const struct fw_hdr *kld_fw; /* fw in the KLD */ const struct fw_hdr *drv_fw; /* fw header the driver was compiled against */ /* This is the firmware whose headers the driver was compiled against */ fw_info = find_fw_info(chip_id(sc)); if (fw_info == NULL) { device_printf(sc->dev, "unable to look up firmware information for chip %d.\n", chip_id(sc)); return (EINVAL); } drv_fw = &fw_info->fw_hdr; /* * The firmware KLD contains many modules. The KLD name is also the * name of the module that contains the default config file. */ default_cfg = firmware_get(fw_info->kld_name); /* This is the firmware in the KLD */ fw = firmware_get(fw_info->fw_mod_name); if (fw != NULL) { kld_fw = (const void *)fw->data; kld_fw_usable = fw_compatible(drv_fw, kld_fw); } else { kld_fw = NULL; kld_fw_usable = 0; } /* Read the header of the firmware on the card */ card_fw = malloc(sizeof(*card_fw), M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_read_flash(sc, FLASH_FW_START, sizeof (*card_fw) / sizeof (uint32_t), (uint32_t *)card_fw, 1); if (rc == 0) { card_fw_usable = fw_compatible(drv_fw, (const void*)card_fw); if (card_fw->fw_ver == be32toh(0xffffffff)) { uint32_t d = be32toh(kld_fw->fw_ver); if (!kld_fw_usable) { device_printf(sc->dev, "no firmware on the card and no usable " "firmware bundled with the driver.\n"); rc = EIO; goto done; } else if (t4_fw_install == 0) { device_printf(sc->dev, "no firmware on the card and the driver " "is prohibited from installing new " "firmware.\n"); rc = EIO; goto done; } device_printf(sc->dev, "no firmware on the card, " "installing firmware %d.%d.%d.%d\n", G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d), G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d)); rc = t4_fw_forceinstall(sc, fw->data, fw->datasize); if (rc < 0) { rc = -rc; device_printf(sc->dev, "firmware install failed: %d.\n", rc); goto done; } memcpy(card_fw, kld_fw, sizeof(*card_fw)); card_fw_usable = 1; need_fw_reset = 0; } } else { device_printf(sc->dev, "Unable to read card's firmware header: %d\n", rc); card_fw_usable = 0; } /* Contact firmware. */ rc = t4_fw_hello(sc, sc->mbox, sc->mbox, MASTER_MAY, &state); if (rc < 0 || state == DEV_STATE_ERR) { rc = -rc; device_printf(sc->dev, "failed to connect to the firmware: %d, %d.\n", rc, state); goto done; } pf = rc; if (pf == sc->mbox) sc->flags |= MASTER_PF; else if (state == DEV_STATE_UNINIT) { /* * We didn't get to be the master so we definitely won't be * configuring the chip. It's a bug if someone else hasn't * configured it already. */ device_printf(sc->dev, "couldn't be master(%d), " "device not already initialized either(%d).\n", rc, state); rc = EPROTO; goto done; } if (card_fw_usable && card_fw->fw_ver == drv_fw->fw_ver && (!kld_fw_usable || kld_fw->fw_ver == drv_fw->fw_ver)) { /* * Common case: the firmware on the card is an exact match and * the KLD is an exact match too, or the KLD is * absent/incompatible. Note that t4_fw_install = 2 is ignored * here -- use cxgbetool loadfw if you want to reinstall the * same firmware as the one on the card. */ } else if (kld_fw_usable && state == DEV_STATE_UNINIT && should_install_kld_fw(sc, card_fw_usable, be32toh(kld_fw->fw_ver), be32toh(card_fw->fw_ver))) { rc = -t4_fw_upgrade(sc, sc->mbox, fw->data, fw->datasize, 0); if (rc != 0) { device_printf(sc->dev, "failed to install firmware: %d\n", rc); goto done; } /* Installed successfully, update the cached header too. */ memcpy(card_fw, kld_fw, sizeof(*card_fw)); card_fw_usable = 1; need_fw_reset = 0; /* already reset as part of load_fw */ } if (!card_fw_usable) { uint32_t d, c, k; d = ntohl(drv_fw->fw_ver); c = ntohl(card_fw->fw_ver); k = kld_fw ? ntohl(kld_fw->fw_ver) : 0; device_printf(sc->dev, "Cannot find a usable firmware: " "fw_install %d, chip state %d, " "driver compiled with %d.%d.%d.%d, " "card has %d.%d.%d.%d, KLD has %d.%d.%d.%d\n", t4_fw_install, state, G_FW_HDR_FW_VER_MAJOR(d), G_FW_HDR_FW_VER_MINOR(d), G_FW_HDR_FW_VER_MICRO(d), G_FW_HDR_FW_VER_BUILD(d), G_FW_HDR_FW_VER_MAJOR(c), G_FW_HDR_FW_VER_MINOR(c), G_FW_HDR_FW_VER_MICRO(c), G_FW_HDR_FW_VER_BUILD(c), G_FW_HDR_FW_VER_MAJOR(k), G_FW_HDR_FW_VER_MINOR(k), G_FW_HDR_FW_VER_MICRO(k), G_FW_HDR_FW_VER_BUILD(k)); rc = EINVAL; goto done; } /* Reset device */ if (need_fw_reset && (rc = -t4_fw_reset(sc, sc->mbox, F_PIORSTMODE | F_PIORST)) != 0) { device_printf(sc->dev, "firmware reset failed: %d.\n", rc); if (rc != ETIMEDOUT && rc != EIO) t4_fw_bye(sc, sc->mbox); goto done; } sc->flags |= FW_OK; rc = get_params__pre_init(sc); if (rc != 0) goto done; /* error message displayed already */ /* Partition adapter resources as specified in the config file. */ if (state == DEV_STATE_UNINIT) { KASSERT(sc->flags & MASTER_PF, ("%s: trying to change chip settings when not master.", __func__)); rc = partition_resources(sc, default_cfg, fw_info->kld_name); if (rc != 0) goto done; /* error message displayed already */ t4_tweak_chip_settings(sc); /* get basic stuff going */ rc = -t4_fw_initialize(sc, sc->mbox); if (rc != 0) { device_printf(sc->dev, "fw init failed: %d.\n", rc); goto done; } } else { snprintf(sc->cfg_file, sizeof(sc->cfg_file), "pf%d", pf); sc->cfcsum = 0; } done: free(card_fw, M_CXGBE); if (fw != NULL) firmware_put(fw, FIRMWARE_UNLOAD); if (default_cfg != NULL) firmware_put(default_cfg, FIRMWARE_UNLOAD); return (rc); } #define FW_PARAM_DEV(param) \ (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \ V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param)) #define FW_PARAM_PFVF(param) \ (V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \ V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param)) /* * Partition chip resources for use between various PFs, VFs, etc. */ static int partition_resources(struct adapter *sc, const struct firmware *default_cfg, const char *name_prefix) { const struct firmware *cfg = NULL; int rc = 0; struct fw_caps_config_cmd caps; uint32_t mtype, moff, finicsum, cfcsum; /* * Figure out what configuration file to use. Pick the default config * file for the card if the user hasn't specified one explicitly. */ snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", t4_cfg_file); if (strncmp(t4_cfg_file, DEFAULT_CF, sizeof(t4_cfg_file)) == 0) { /* Card specific overrides go here. */ if (pci_get_device(sc->dev) == 0x440a) snprintf(sc->cfg_file, sizeof(sc->cfg_file), UWIRE_CF); if (is_fpga(sc)) snprintf(sc->cfg_file, sizeof(sc->cfg_file), FPGA_CF); } /* * We need to load another module if the profile is anything except * "default" or "flash". */ if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) != 0 && strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) { char s[32]; snprintf(s, sizeof(s), "%s_%s", name_prefix, sc->cfg_file); cfg = firmware_get(s); if (cfg == NULL) { if (default_cfg != NULL) { device_printf(sc->dev, "unable to load module \"%s\" for " "configuration profile \"%s\", will use " "the default config file instead.\n", s, sc->cfg_file); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", DEFAULT_CF); } else { device_printf(sc->dev, "unable to load module \"%s\" for " "configuration profile \"%s\", will use " "the config file on the card's flash " "instead.\n", s, sc->cfg_file); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF); } } } if (strncmp(sc->cfg_file, DEFAULT_CF, sizeof(sc->cfg_file)) == 0 && default_cfg == NULL) { device_printf(sc->dev, "default config file not available, will use the config " "file on the card's flash instead.\n"); snprintf(sc->cfg_file, sizeof(sc->cfg_file), "%s", FLASH_CF); } if (strncmp(sc->cfg_file, FLASH_CF, sizeof(sc->cfg_file)) != 0) { u_int cflen; const uint32_t *cfdata; uint32_t param, val, addr; KASSERT(cfg != NULL || default_cfg != NULL, ("%s: no config to upload", __func__)); /* * Ask the firmware where it wants us to upload the config file. */ param = FW_PARAM_DEV(CF); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); if (rc != 0) { /* No support for config file? Shouldn't happen. */ device_printf(sc->dev, "failed to query config file location: %d.\n", rc); goto done; } mtype = G_FW_PARAMS_PARAM_Y(val); moff = G_FW_PARAMS_PARAM_Z(val) << 16; /* * XXX: sheer laziness. We deliberately added 4 bytes of * useless stuffing/comments at the end of the config file so * it's ok to simply throw away the last remaining bytes when * the config file is not an exact multiple of 4. This also * helps with the validate_mt_off_len check. */ if (cfg != NULL) { cflen = cfg->datasize & ~3; cfdata = cfg->data; } else { cflen = default_cfg->datasize & ~3; cfdata = default_cfg->data; } if (cflen > FLASH_CFG_MAX_SIZE) { device_printf(sc->dev, "config file too long (%d, max allowed is %d). " "Will try to use the config on the card, if any.\n", cflen, FLASH_CFG_MAX_SIZE); goto use_config_on_flash; } rc = validate_mt_off_len(sc, mtype, moff, cflen, &addr); if (rc != 0) { device_printf(sc->dev, "%s: addr (%d/0x%x) or len %d is not valid: %d. " "Will try to use the config on the card, if any.\n", __func__, mtype, moff, cflen, rc); goto use_config_on_flash; } write_via_memwin(sc, 2, addr, cfdata, cflen); } else { use_config_on_flash: mtype = FW_MEMTYPE_FLASH; moff = t4_flash_cfg_addr(sc); } bzero(&caps, sizeof(caps)); caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ); caps.cfvalid_to_len16 = htobe32(F_FW_CAPS_CONFIG_CMD_CFVALID | V_FW_CAPS_CONFIG_CMD_MEMTYPE_CF(mtype) | V_FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(moff >> 16) | FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps); if (rc != 0) { device_printf(sc->dev, "failed to pre-process config file: %d " "(mtype %d, moff 0x%x).\n", rc, mtype, moff); goto done; } finicsum = be32toh(caps.finicsum); cfcsum = be32toh(caps.cfcsum); if (finicsum != cfcsum) { device_printf(sc->dev, "WARNING: config file checksum mismatch: %08x %08x\n", finicsum, cfcsum); } sc->cfcsum = cfcsum; #define LIMIT_CAPS(x) do { \ caps.x &= htobe16(t4_##x##_allowed); \ } while (0) /* * Let the firmware know what features will (not) be used so it can tune * things accordingly. */ LIMIT_CAPS(nbmcaps); LIMIT_CAPS(linkcaps); LIMIT_CAPS(switchcaps); LIMIT_CAPS(niccaps); LIMIT_CAPS(toecaps); LIMIT_CAPS(rdmacaps); LIMIT_CAPS(cryptocaps); LIMIT_CAPS(iscsicaps); LIMIT_CAPS(fcoecaps); #undef LIMIT_CAPS caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_WRITE); caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), NULL); if (rc != 0) { device_printf(sc->dev, "failed to process config file: %d.\n", rc); } done: if (cfg != NULL) firmware_put(cfg, FIRMWARE_UNLOAD); return (rc); } /* * Retrieve parameters that are needed (or nice to have) very early. */ static int get_params__pre_init(struct adapter *sc) { int rc; uint32_t param[2], val[2]; t4_get_version_info(sc); snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers), G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers), G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers), G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers)); snprintf(sc->bs_version, sizeof(sc->bs_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.bs_vers), G_FW_HDR_FW_VER_MINOR(sc->params.bs_vers), G_FW_HDR_FW_VER_MICRO(sc->params.bs_vers), G_FW_HDR_FW_VER_BUILD(sc->params.bs_vers)); snprintf(sc->tp_version, sizeof(sc->tp_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.tp_vers), G_FW_HDR_FW_VER_MINOR(sc->params.tp_vers), G_FW_HDR_FW_VER_MICRO(sc->params.tp_vers), G_FW_HDR_FW_VER_BUILD(sc->params.tp_vers)); snprintf(sc->er_version, sizeof(sc->er_version), "%u.%u.%u.%u", G_FW_HDR_FW_VER_MAJOR(sc->params.er_vers), G_FW_HDR_FW_VER_MINOR(sc->params.er_vers), G_FW_HDR_FW_VER_MICRO(sc->params.er_vers), G_FW_HDR_FW_VER_BUILD(sc->params.er_vers)); param[0] = FW_PARAM_DEV(PORTVEC); param[1] = FW_PARAM_DEV(CCLK); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query parameters (pre_init): %d.\n", rc); return (rc); } sc->params.portvec = val[0]; sc->params.nports = bitcount32(val[0]); sc->params.vpd.cclk = val[1]; /* Read device log parameters. */ rc = -t4_init_devlog_params(sc, 1); if (rc == 0) fixup_devlog_params(sc); else { device_printf(sc->dev, "failed to get devlog parameters: %d.\n", rc); rc = 0; /* devlog isn't critical for device operation */ } return (rc); } /* * Retrieve various parameters that are of interest to the driver. The device * has been initialized by the firmware at this point. */ static int get_params__post_init(struct adapter *sc) { int rc; uint32_t param[7], val[7]; struct fw_caps_config_cmd caps; param[0] = FW_PARAM_PFVF(IQFLINT_START); param[1] = FW_PARAM_PFVF(EQ_START); param[2] = FW_PARAM_PFVF(FILTER_START); param[3] = FW_PARAM_PFVF(FILTER_END); param[4] = FW_PARAM_PFVF(L2T_START); param[5] = FW_PARAM_PFVF(L2T_END); param[6] = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) | V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_VDD); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 7, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query parameters (post_init): %d.\n", rc); return (rc); } sc->sge.iq_start = val[0]; sc->sge.eq_start = val[1]; sc->tids.ftid_base = val[2]; sc->tids.nftids = val[3] - val[2] + 1; sc->params.ftid_min = val[2]; sc->params.ftid_max = val[3]; sc->vres.l2t.start = val[4]; sc->vres.l2t.size = val[5] - val[4] + 1; KASSERT(sc->vres.l2t.size <= L2T_SIZE, ("%s: L2 table size (%u) larger than expected (%u)", __func__, sc->vres.l2t.size, L2T_SIZE)); sc->params.core_vdd = val[6]; /* * MPSBGMAP is queried separately because only recent firmwares support * it as a parameter and we don't want the compound query above to fail * on older firmwares. */ param[0] = FW_PARAM_DEV(MPSBGMAP); val[0] = 0; rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, param, val); if (rc == 0) sc->params.mps_bg_map = val[0]; else sc->params.mps_bg_map = 0; /* get capabilites */ bzero(&caps, sizeof(caps)); caps.op_to_write = htobe32(V_FW_CMD_OP(FW_CAPS_CONFIG_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ); caps.cfvalid_to_len16 = htobe32(FW_LEN16(caps)); rc = -t4_wr_mbox(sc, sc->mbox, &caps, sizeof(caps), &caps); if (rc != 0) { device_printf(sc->dev, "failed to get card capabilities: %d.\n", rc); return (rc); } #define READ_CAPS(x) do { \ sc->x = htobe16(caps.x); \ } while (0) READ_CAPS(nbmcaps); READ_CAPS(linkcaps); READ_CAPS(switchcaps); READ_CAPS(niccaps); READ_CAPS(toecaps); READ_CAPS(rdmacaps); READ_CAPS(cryptocaps); READ_CAPS(iscsicaps); READ_CAPS(fcoecaps); /* * The firmware attempts memfree TOE configuration for -SO cards and * will report toecaps=0 if it runs out of resources (this depends on * the config file). It may not report 0 for other capabilities * dependent on the TOE in this case. Set them to 0 here so that the * driver doesn't bother tracking resources that will never be used. */ if (sc->toecaps == 0) { sc->iscsicaps = 0; sc->rdmacaps = 0; } if (sc->niccaps & FW_CAPS_CONFIG_NIC_ETHOFLD) { param[0] = FW_PARAM_PFVF(ETHOFLD_START); param[1] = FW_PARAM_PFVF(ETHOFLD_END); param[2] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 3, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query NIC parameters: %d.\n", rc); return (rc); } sc->tids.etid_base = val[0]; sc->params.etid_min = val[0]; sc->tids.netids = val[1] - val[0] + 1; sc->params.netids = sc->tids.netids; sc->params.eo_wr_cred = val[2]; sc->params.ethoffload = 1; } if (sc->toecaps) { /* query offload-related parameters */ param[0] = FW_PARAM_DEV(NTID); param[1] = FW_PARAM_PFVF(SERVER_START); param[2] = FW_PARAM_PFVF(SERVER_END); param[3] = FW_PARAM_PFVF(TDDP_START); param[4] = FW_PARAM_PFVF(TDDP_END); param[5] = FW_PARAM_DEV(FLOWC_BUFFIFO_SZ); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query TOE parameters: %d.\n", rc); return (rc); } sc->tids.ntids = val[0]; sc->tids.natids = min(sc->tids.ntids / 2, MAX_ATIDS); sc->tids.stid_base = val[1]; sc->tids.nstids = val[2] - val[1] + 1; sc->vres.ddp.start = val[3]; sc->vres.ddp.size = val[4] - val[3] + 1; sc->params.ofldq_wr_cred = val[5]; sc->params.offload = 1; } if (sc->rdmacaps) { param[0] = FW_PARAM_PFVF(STAG_START); param[1] = FW_PARAM_PFVF(STAG_END); param[2] = FW_PARAM_PFVF(RQ_START); param[3] = FW_PARAM_PFVF(RQ_END); param[4] = FW_PARAM_PFVF(PBL_START); param[5] = FW_PARAM_PFVF(PBL_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query RDMA parameters(1): %d.\n", rc); return (rc); } sc->vres.stag.start = val[0]; sc->vres.stag.size = val[1] - val[0] + 1; sc->vres.rq.start = val[2]; sc->vres.rq.size = val[3] - val[2] + 1; sc->vres.pbl.start = val[4]; sc->vres.pbl.size = val[5] - val[4] + 1; param[0] = FW_PARAM_PFVF(SQRQ_START); param[1] = FW_PARAM_PFVF(SQRQ_END); param[2] = FW_PARAM_PFVF(CQ_START); param[3] = FW_PARAM_PFVF(CQ_END); param[4] = FW_PARAM_PFVF(OCQ_START); param[5] = FW_PARAM_PFVF(OCQ_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 6, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query RDMA parameters(2): %d.\n", rc); return (rc); } sc->vres.qp.start = val[0]; sc->vres.qp.size = val[1] - val[0] + 1; sc->vres.cq.start = val[2]; sc->vres.cq.size = val[3] - val[2] + 1; sc->vres.ocq.start = val[4]; sc->vres.ocq.size = val[5] - val[4] + 1; param[0] = FW_PARAM_PFVF(SRQ_START); param[1] = FW_PARAM_PFVF(SRQ_END); param[2] = FW_PARAM_DEV(MAXORDIRD_QP); param[3] = FW_PARAM_DEV(MAXIRD_ADAPTER); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 4, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query RDMA parameters(3): %d.\n", rc); return (rc); } sc->vres.srq.start = val[0]; sc->vres.srq.size = val[1] - val[0] + 1; sc->params.max_ordird_qp = val[2]; sc->params.max_ird_adapter = val[3]; } if (sc->iscsicaps) { param[0] = FW_PARAM_PFVF(ISCSI_START); param[1] = FW_PARAM_PFVF(ISCSI_END); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 2, param, val); if (rc != 0) { device_printf(sc->dev, "failed to query iSCSI parameters: %d.\n", rc); return (rc); } sc->vres.iscsi.start = val[0]; sc->vres.iscsi.size = val[1] - val[0] + 1; } t4_init_sge_params(sc); /* * We've got the params we wanted to query via the firmware. Now grab * some others directly from the chip. */ rc = t4_read_chip_settings(sc); return (rc); } static int set_params__post_init(struct adapter *sc) { uint32_t param, val; #ifdef TCP_OFFLOAD int i, v, shift; #endif /* ask for encapsulated CPLs */ param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP); val = 1; (void)t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); #ifdef TCP_OFFLOAD /* * Override the TOE timers with user provided tunables. This is not the * recommended way to change the timers (the firmware config file is) so * these tunables are not documented. * * All the timer tunables are in microseconds. */ if (t4_toe_keepalive_idle != 0) { v = us_to_tcp_ticks(sc, t4_toe_keepalive_idle); v &= M_KEEPALIVEIDLE; t4_set_reg_field(sc, A_TP_KEEP_IDLE, V_KEEPALIVEIDLE(M_KEEPALIVEIDLE), V_KEEPALIVEIDLE(v)); } if (t4_toe_keepalive_interval != 0) { v = us_to_tcp_ticks(sc, t4_toe_keepalive_interval); v &= M_KEEPALIVEINTVL; t4_set_reg_field(sc, A_TP_KEEP_INTVL, V_KEEPALIVEINTVL(M_KEEPALIVEINTVL), V_KEEPALIVEINTVL(v)); } if (t4_toe_keepalive_count != 0) { v = t4_toe_keepalive_count & M_KEEPALIVEMAXR2; t4_set_reg_field(sc, A_TP_SHIFT_CNT, V_KEEPALIVEMAXR1(M_KEEPALIVEMAXR1) | V_KEEPALIVEMAXR2(M_KEEPALIVEMAXR2), V_KEEPALIVEMAXR1(1) | V_KEEPALIVEMAXR2(v)); } if (t4_toe_rexmt_min != 0) { v = us_to_tcp_ticks(sc, t4_toe_rexmt_min); v &= M_RXTMIN; t4_set_reg_field(sc, A_TP_RXT_MIN, V_RXTMIN(M_RXTMIN), V_RXTMIN(v)); } if (t4_toe_rexmt_max != 0) { v = us_to_tcp_ticks(sc, t4_toe_rexmt_max); v &= M_RXTMAX; t4_set_reg_field(sc, A_TP_RXT_MAX, V_RXTMAX(M_RXTMAX), V_RXTMAX(v)); } if (t4_toe_rexmt_count != 0) { v = t4_toe_rexmt_count & M_RXTSHIFTMAXR2; t4_set_reg_field(sc, A_TP_SHIFT_CNT, V_RXTSHIFTMAXR1(M_RXTSHIFTMAXR1) | V_RXTSHIFTMAXR2(M_RXTSHIFTMAXR2), V_RXTSHIFTMAXR1(1) | V_RXTSHIFTMAXR2(v)); } for (i = 0; i < nitems(t4_toe_rexmt_backoff); i++) { if (t4_toe_rexmt_backoff[i] != -1) { v = t4_toe_rexmt_backoff[i] & M_TIMERBACKOFFINDEX0; shift = (i & 3) << 3; t4_set_reg_field(sc, A_TP_TCP_BACKOFF_REG0 + (i & ~3), M_TIMERBACKOFFINDEX0 << shift, v << shift); } } #endif return (0); } #undef FW_PARAM_PFVF #undef FW_PARAM_DEV static void t4_set_desc(struct adapter *sc) { char buf[128]; struct adapter_params *p = &sc->params; snprintf(buf, sizeof(buf), "Chelsio %s", p->vpd.id); device_set_desc_copy(sc->dev, buf); } static void build_medialist(struct port_info *pi, struct ifmedia *media) { int m; PORT_LOCK_ASSERT_OWNED(pi); ifmedia_removeall(media); /* * XXX: Would it be better to ifmedia_add all 4 combinations of pause * settings for every speed instead of just txpause|rxpause? ifconfig * media display looks much better if autoselect is the only case where * ifm_current is different from ifm_active. If the user picks anything * except txpause|rxpause the display is ugly. */ m = IFM_ETHER | IFM_FDX | IFM_ETH_TXPAUSE | IFM_ETH_RXPAUSE; switch(pi->port_type) { case FW_PORT_TYPE_BT_XFI: case FW_PORT_TYPE_BT_XAUI: ifmedia_add(media, m | IFM_10G_T, 0, NULL); /* fall through */ case FW_PORT_TYPE_BT_SGMII: ifmedia_add(media, m | IFM_1000_T, 0, NULL); ifmedia_add(media, m | IFM_100_TX, 0, NULL); ifmedia_add(media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(media, IFM_ETHER | IFM_AUTO); break; case FW_PORT_TYPE_CX4: ifmedia_add(media, m | IFM_10G_CX4, 0, NULL); ifmedia_set(media, m | IFM_10G_CX4); break; case FW_PORT_TYPE_QSFP_10G: case FW_PORT_TYPE_SFP: case FW_PORT_TYPE_FIBER_XFI: case FW_PORT_TYPE_FIBER_XAUI: switch (pi->mod_type) { case FW_PORT_MOD_TYPE_LR: ifmedia_add(media, m | IFM_10G_LR, 0, NULL); ifmedia_set(media, m | IFM_10G_LR); break; case FW_PORT_MOD_TYPE_SR: ifmedia_add(media, m | IFM_10G_SR, 0, NULL); ifmedia_set(media, m | IFM_10G_SR); break; case FW_PORT_MOD_TYPE_LRM: ifmedia_add(media, m | IFM_10G_LRM, 0, NULL); ifmedia_set(media, m | IFM_10G_LRM); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: ifmedia_add(media, m | IFM_10G_TWINAX, 0, NULL); ifmedia_set(media, m | IFM_10G_TWINAX); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; ifmedia_add(media, m | IFM_NONE, 0, NULL); ifmedia_set(media, m | IFM_NONE); break; case FW_PORT_MOD_TYPE_NA: case FW_PORT_MOD_TYPE_ER: default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } break; case FW_PORT_TYPE_CR_QSFP: case FW_PORT_TYPE_SFP28: case FW_PORT_TYPE_KR_SFP28: switch (pi->mod_type) { case FW_PORT_MOD_TYPE_SR: ifmedia_add(media, m | IFM_25G_SR, 0, NULL); ifmedia_set(media, m | IFM_25G_SR); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: ifmedia_add(media, m | IFM_25G_CR, 0, NULL); ifmedia_set(media, m | IFM_25G_CR); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; ifmedia_add(media, m | IFM_NONE, 0, NULL); ifmedia_set(media, m | IFM_NONE); break; default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } break; case FW_PORT_TYPE_QSFP: switch (pi->mod_type) { case FW_PORT_MOD_TYPE_LR: ifmedia_add(media, m | IFM_40G_LR4, 0, NULL); ifmedia_set(media, m | IFM_40G_LR4); break; case FW_PORT_MOD_TYPE_SR: ifmedia_add(media, m | IFM_40G_SR4, 0, NULL); ifmedia_set(media, m | IFM_40G_SR4); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: ifmedia_add(media, m | IFM_40G_CR4, 0, NULL); ifmedia_set(media, m | IFM_40G_CR4); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; ifmedia_add(media, m | IFM_NONE, 0, NULL); ifmedia_set(media, m | IFM_NONE); break; default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } break; case FW_PORT_TYPE_KR4_100G: case FW_PORT_TYPE_CR4_QSFP: switch (pi->mod_type) { case FW_PORT_MOD_TYPE_LR: ifmedia_add(media, m | IFM_100G_LR4, 0, NULL); ifmedia_set(media, m | IFM_100G_LR4); break; case FW_PORT_MOD_TYPE_SR: ifmedia_add(media, m | IFM_100G_SR4, 0, NULL); ifmedia_set(media, m | IFM_100G_SR4); break; case FW_PORT_MOD_TYPE_TWINAX_PASSIVE: case FW_PORT_MOD_TYPE_TWINAX_ACTIVE: ifmedia_add(media, m | IFM_100G_CR4, 0, NULL); ifmedia_set(media, m | IFM_100G_CR4); break; case FW_PORT_MOD_TYPE_NONE: m &= ~IFM_FDX; ifmedia_add(media, m | IFM_NONE, 0, NULL); ifmedia_set(media, m | IFM_NONE); break; default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } break; default: device_printf(pi->dev, "unknown port_type (%d), mod_type (%d)\n", pi->port_type, pi->mod_type); ifmedia_add(media, m | IFM_UNKNOWN, 0, NULL); ifmedia_set(media, m | IFM_UNKNOWN); break; } } /* * Update all the requested_* fields in the link config and then send a mailbox * command to apply the settings. */ static void init_l1cfg(struct port_info *pi) { struct adapter *sc = pi->adapter; struct link_config *lc = &pi->link_cfg; int rc; ASSERT_SYNCHRONIZED_OP(sc); if (t4_autoneg != 0 && lc->supported & FW_PORT_CAP_ANEG) { lc->requested_aneg = AUTONEG_ENABLE; lc->requested_speed = 0; } else { lc->requested_aneg = AUTONEG_DISABLE; lc->requested_speed = port_top_speed(pi); /* in Gbps */ } lc->requested_fc = t4_pause_settings & (PAUSE_TX | PAUSE_RX); if (t4_fec != -1) { lc->requested_fec = t4_fec & (FEC_RS | FEC_BASER_RS | FEC_RESERVED); } else { /* Use the suggested value provided by the firmware in acaps */ if (lc->advertising & FW_PORT_CAP_FEC_RS) lc->requested_fec = FEC_RS; else if (lc->advertising & FW_PORT_CAP_FEC_BASER_RS) lc->requested_fec = FEC_BASER_RS; else if (lc->advertising & FW_PORT_CAP_FEC_RESERVED) lc->requested_fec = FEC_RESERVED; else lc->requested_fec = 0; } rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc); if (rc != 0) { device_printf(pi->dev, "l1cfg failed: %d\n", rc); } else { lc->fc = lc->requested_fc; lc->fec = lc->requested_fec; } } #define FW_MAC_EXACT_CHUNK 7 /* * Program the port's XGMAC based on parameters in ifnet. The caller also * indicates which parameters should be programmed (the rest are left alone). */ int update_mac_settings(struct ifnet *ifp, int flags) { int rc = 0; struct vi_info *vi = ifp->if_softc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; int mtu = -1, promisc = -1, allmulti = -1, vlanex = -1; ASSERT_SYNCHRONIZED_OP(sc); KASSERT(flags, ("%s: not told what to update.", __func__)); if (flags & XGMAC_MTU) mtu = ifp->if_mtu; if (flags & XGMAC_PROMISC) promisc = ifp->if_flags & IFF_PROMISC ? 1 : 0; if (flags & XGMAC_ALLMULTI) allmulti = ifp->if_flags & IFF_ALLMULTI ? 1 : 0; if (flags & XGMAC_VLANEX) vlanex = ifp->if_capenable & IFCAP_VLAN_HWTAGGING ? 1 : 0; if (flags & (XGMAC_MTU|XGMAC_PROMISC|XGMAC_ALLMULTI|XGMAC_VLANEX)) { rc = -t4_set_rxmode(sc, sc->mbox, vi->viid, mtu, promisc, allmulti, 1, vlanex, false); if (rc) { if_printf(ifp, "set_rxmode (%x) failed: %d\n", flags, rc); return (rc); } } if (flags & XGMAC_UCADDR) { uint8_t ucaddr[ETHER_ADDR_LEN]; bcopy(IF_LLADDR(ifp), ucaddr, sizeof(ucaddr)); rc = t4_change_mac(sc, sc->mbox, vi->viid, vi->xact_addr_filt, ucaddr, true, true); if (rc < 0) { rc = -rc; if_printf(ifp, "change_mac failed: %d\n", rc); return (rc); } else { vi->xact_addr_filt = rc; rc = 0; } } if (flags & XGMAC_MCADDRS) { const uint8_t *mcaddr[FW_MAC_EXACT_CHUNK]; int del = 1; uint64_t hash = 0; struct ifmultiaddr *ifma; int i = 0, j; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mcaddr[i] = LLADDR((struct sockaddr_dl *)ifma->ifma_addr); MPASS(ETHER_IS_MULTICAST(mcaddr[i])); i++; if (i == FW_MAC_EXACT_CHUNK) { rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i, mcaddr, NULL, &hash, 0); if (rc < 0) { rc = -rc; for (j = 0; j < i; j++) { if_printf(ifp, "failed to add mc address" " %02x:%02x:%02x:" "%02x:%02x:%02x rc=%d\n", mcaddr[j][0], mcaddr[j][1], mcaddr[j][2], mcaddr[j][3], mcaddr[j][4], mcaddr[j][5], rc); } goto mcfail; } del = 0; i = 0; } } if (i > 0) { rc = t4_alloc_mac_filt(sc, sc->mbox, vi->viid, del, i, mcaddr, NULL, &hash, 0); if (rc < 0) { rc = -rc; for (j = 0; j < i; j++) { if_printf(ifp, "failed to add mc address" " %02x:%02x:%02x:" "%02x:%02x:%02x rc=%d\n", mcaddr[j][0], mcaddr[j][1], mcaddr[j][2], mcaddr[j][3], mcaddr[j][4], mcaddr[j][5], rc); } goto mcfail; } } rc = -t4_set_addr_hash(sc, sc->mbox, vi->viid, 0, hash, 0); if (rc != 0) if_printf(ifp, "failed to set mc address hash: %d", rc); mcfail: if_maddr_runlock(ifp); } return (rc); } /* * {begin|end}_synchronized_op must be called from the same thread. */ int begin_synchronized_op(struct adapter *sc, struct vi_info *vi, int flags, char *wmesg) { int rc, pri; #ifdef WITNESS /* the caller thinks it's ok to sleep, but is it really? */ if (flags & SLEEP_OK) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "begin_synchronized_op"); #endif if (INTR_OK) pri = PCATCH; else pri = 0; ADAPTER_LOCK(sc); for (;;) { if (vi && IS_DOOMED(vi)) { rc = ENXIO; goto done; } if (!IS_BUSY(sc)) { rc = 0; break; } if (!(flags & SLEEP_OK)) { rc = EBUSY; goto done; } if (mtx_sleep(&sc->flags, &sc->sc_lock, pri, wmesg, 0)) { rc = EINTR; goto done; } } KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__)); SET_BUSY(sc); #ifdef INVARIANTS sc->last_op = wmesg; sc->last_op_thr = curthread; sc->last_op_flags = flags; #endif done: if (!(flags & HOLD_LOCK) || rc) ADAPTER_UNLOCK(sc); return (rc); } /* * Tell if_ioctl and if_init that the VI is going away. This is * special variant of begin_synchronized_op and must be paired with a * call to end_synchronized_op. */ void doom_vi(struct adapter *sc, struct vi_info *vi) { ADAPTER_LOCK(sc); SET_DOOMED(vi); wakeup(&sc->flags); while (IS_BUSY(sc)) mtx_sleep(&sc->flags, &sc->sc_lock, 0, "t4detach", 0); SET_BUSY(sc); #ifdef INVARIANTS sc->last_op = "t4detach"; sc->last_op_thr = curthread; sc->last_op_flags = 0; #endif ADAPTER_UNLOCK(sc); } /* * {begin|end}_synchronized_op must be called from the same thread. */ void end_synchronized_op(struct adapter *sc, int flags) { if (flags & LOCK_HELD) ADAPTER_LOCK_ASSERT_OWNED(sc); else ADAPTER_LOCK(sc); KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__)); CLR_BUSY(sc); wakeup(&sc->flags); ADAPTER_UNLOCK(sc); } static int cxgbe_init_synchronized(struct vi_info *vi) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct ifnet *ifp = vi->ifp; int rc = 0, i; struct sge_txq *txq; ASSERT_SYNCHRONIZED_OP(sc); if (ifp->if_drv_flags & IFF_DRV_RUNNING) return (0); /* already running */ if (!(sc->flags & FULL_INIT_DONE) && ((rc = adapter_full_init(sc)) != 0)) return (rc); /* error message displayed already */ if (!(vi->flags & VI_INIT_DONE) && ((rc = vi_full_init(vi)) != 0)) return (rc); /* error message displayed already */ rc = update_mac_settings(ifp, XGMAC_ALL); if (rc) goto done; /* error message displayed already */ rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true); if (rc != 0) { if_printf(ifp, "enable_vi failed: %d\n", rc); goto done; } /* * Can't fail from this point onwards. Review cxgbe_uninit_synchronized * if this changes. */ for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags |= EQ_ENABLED; TXQ_UNLOCK(txq); } /* * The first iq of the first port to come up is used for tracing. */ if (sc->traceq < 0 && IS_MAIN_VI(vi)) { sc->traceq = sc->sge.rxq[vi->first_rxq].iq.abs_id; t4_write_reg(sc, is_t4(sc) ? A_MPS_TRC_RSS_CONTROL : A_MPS_T5_TRC_RSS_CONTROL, V_RSSCONTROL(pi->tx_chan) | V_QUEUENUMBER(sc->traceq)); pi->flags |= HAS_TRACEQ; } /* all ok */ PORT_LOCK(pi); if (pi->up_vis++ == 0) { t4_update_port_info(pi); build_medialist(pi, &pi->media); init_l1cfg(pi); } ifp->if_drv_flags |= IFF_DRV_RUNNING; if (pi->nvi > 1 || sc->flags & IS_VF) callout_reset(&vi->tick, hz, vi_tick, vi); else callout_reset(&pi->tick, hz, cxgbe_tick, pi); PORT_UNLOCK(pi); done: if (rc != 0) cxgbe_uninit_synchronized(vi); return (rc); } /* * Idempotent. */ static int cxgbe_uninit_synchronized(struct vi_info *vi) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; struct ifnet *ifp = vi->ifp; int rc, i; struct sge_txq *txq; ASSERT_SYNCHRONIZED_OP(sc); if (!(vi->flags & VI_INIT_DONE)) { KASSERT(!(ifp->if_drv_flags & IFF_DRV_RUNNING), ("uninited VI is running")); return (0); } /* * Disable the VI so that all its data in either direction is discarded * by the MPS. Leave everything else (the queues, interrupts, and 1Hz * tick) intact as the TP can deliver negative advice or data that it's * holding in its RAM (for an offloaded connection) even after the VI is * disabled. */ rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false); if (rc) { if_printf(ifp, "disable_vi failed: %d\n", rc); return (rc); } for_each_txq(vi, i, txq) { TXQ_LOCK(txq); txq->eq.flags &= ~EQ_ENABLED; TXQ_UNLOCK(txq); } PORT_LOCK(pi); if (pi->nvi > 1 || sc->flags & IS_VF) callout_stop(&vi->tick); else callout_stop(&pi->tick); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { PORT_UNLOCK(pi); return (0); } ifp->if_drv_flags &= ~IFF_DRV_RUNNING; pi->up_vis--; if (pi->up_vis > 0) { PORT_UNLOCK(pi); return (0); } PORT_UNLOCK(pi); pi->link_cfg.link_ok = 0; pi->link_cfg.speed = 0; pi->link_cfg.link_down_rc = 255; t4_os_link_changed(pi); pi->old_link_cfg = pi->link_cfg; return (0); } /* * It is ok for this function to fail midway and return right away. t4_detach * will walk the entire sc->irq list and clean up whatever is valid. */ int t4_setup_intr_handlers(struct adapter *sc) { int rc, rid, p, q, v; char s[8]; struct irq *irq; struct port_info *pi; struct vi_info *vi; struct sge *sge = &sc->sge; struct sge_rxq *rxq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; #endif #ifdef DEV_NETMAP struct sge_nm_rxq *nm_rxq; #endif #ifdef RSS int nbuckets = rss_getnumbuckets(); #endif /* * Setup interrupts. */ irq = &sc->irq[0]; rid = sc->intr_type == INTR_INTX ? 0 : 1; if (forwarding_intr_to_fwq(sc)) return (t4_alloc_irq(sc, irq, rid, t4_intr_all, sc, "all")); /* Multiple interrupts. */ if (sc->flags & IS_VF) KASSERT(sc->intr_count >= T4VF_EXTRA_INTR + sc->params.nports, ("%s: too few intr.", __func__)); else KASSERT(sc->intr_count >= T4_EXTRA_INTR + sc->params.nports, ("%s: too few intr.", __func__)); /* The first one is always error intr on PFs */ if (!(sc->flags & IS_VF)) { rc = t4_alloc_irq(sc, irq, rid, t4_intr_err, sc, "err"); if (rc != 0) return (rc); irq++; rid++; } /* The second one is always the firmware event queue (first on VFs) */ rc = t4_alloc_irq(sc, irq, rid, t4_intr_evt, &sge->fwq, "evt"); if (rc != 0) return (rc); irq++; rid++; for_each_port(sc, p) { pi = sc->port[p]; for_each_vi(pi, v, vi) { vi->first_intr = rid - 1; if (vi->nnmrxq > 0) { int n = max(vi->nrxq, vi->nnmrxq); rxq = &sge->rxq[vi->first_rxq]; #ifdef DEV_NETMAP nm_rxq = &sge->nm_rxq[vi->first_nm_rxq]; #endif for (q = 0; q < n; q++) { snprintf(s, sizeof(s), "%x%c%x", p, 'a' + v, q); if (q < vi->nrxq) irq->rxq = rxq++; #ifdef DEV_NETMAP if (q < vi->nnmrxq) irq->nm_rxq = nm_rxq++; #endif rc = t4_alloc_irq(sc, irq, rid, t4_vi_intr, irq, s); if (rc != 0) return (rc); #ifdef RSS if (q < vi->nrxq) { bus_bind_intr(sc->dev, irq->res, rss_getcpu(q % nbuckets)); } #endif irq++; rid++; vi->nintr++; } } else { for_each_rxq(vi, q, rxq) { snprintf(s, sizeof(s), "%x%c%x", p, 'a' + v, q); rc = t4_alloc_irq(sc, irq, rid, t4_intr, rxq, s); if (rc != 0) return (rc); #ifdef RSS bus_bind_intr(sc->dev, irq->res, rss_getcpu(q % nbuckets)); #endif irq++; rid++; vi->nintr++; } } #ifdef TCP_OFFLOAD for_each_ofld_rxq(vi, q, ofld_rxq) { snprintf(s, sizeof(s), "%x%c%x", p, 'A' + v, q); rc = t4_alloc_irq(sc, irq, rid, t4_intr, ofld_rxq, s); if (rc != 0) return (rc); irq++; rid++; vi->nintr++; } #endif } } MPASS(irq == &sc->irq[sc->intr_count]); return (0); } int adapter_full_init(struct adapter *sc) { int rc, i; #ifdef RSS uint32_t raw_rss_key[RSS_KEYSIZE / sizeof(uint32_t)]; uint32_t rss_key[RSS_KEYSIZE / sizeof(uint32_t)]; #endif ASSERT_SYNCHRONIZED_OP(sc); ADAPTER_LOCK_ASSERT_NOTOWNED(sc); KASSERT((sc->flags & FULL_INIT_DONE) == 0, ("%s: FULL_INIT_DONE already", __func__)); /* * queues that belong to the adapter (not any particular port). */ rc = t4_setup_adapter_queues(sc); if (rc != 0) goto done; for (i = 0; i < nitems(sc->tq); i++) { sc->tq[i] = taskqueue_create("t4 taskq", M_NOWAIT, taskqueue_thread_enqueue, &sc->tq[i]); if (sc->tq[i] == NULL) { device_printf(sc->dev, "failed to allocate task queue %d\n", i); rc = ENOMEM; goto done; } taskqueue_start_threads(&sc->tq[i], 1, PI_NET, "%s tq%d", device_get_nameunit(sc->dev), i); } #ifdef RSS MPASS(RSS_KEYSIZE == 40); rss_getkey((void *)&raw_rss_key[0]); for (i = 0; i < nitems(rss_key); i++) { rss_key[i] = htobe32(raw_rss_key[nitems(rss_key) - 1 - i]); } t4_write_rss_key(sc, &rss_key[0], -1, 1); #endif if (!(sc->flags & IS_VF)) t4_intr_enable(sc); sc->flags |= FULL_INIT_DONE; done: if (rc != 0) adapter_full_uninit(sc); return (rc); } int adapter_full_uninit(struct adapter *sc) { int i; ADAPTER_LOCK_ASSERT_NOTOWNED(sc); t4_teardown_adapter_queues(sc); for (i = 0; i < nitems(sc->tq) && sc->tq[i]; i++) { taskqueue_free(sc->tq[i]); sc->tq[i] = NULL; } sc->flags &= ~FULL_INIT_DONE; return (0); } #ifdef RSS #define SUPPORTED_RSS_HASHTYPES (RSS_HASHTYPE_RSS_IPV4 | \ RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | \ RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_UDP_IPV4 | \ RSS_HASHTYPE_RSS_UDP_IPV6) /* Translates kernel hash types to hardware. */ static int hashconfig_to_hashen(int hashconfig) { int hashen = 0; if (hashconfig & RSS_HASHTYPE_RSS_IPV4) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN; if (hashconfig & RSS_HASHTYPE_RSS_IPV6) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN; if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV4) { hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN | F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN; } if (hashconfig & RSS_HASHTYPE_RSS_UDP_IPV6) { hashen |= F_FW_RSS_VI_CONFIG_CMD_UDPEN | F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN; } if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV4) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN; if (hashconfig & RSS_HASHTYPE_RSS_TCP_IPV6) hashen |= F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN; return (hashen); } /* Translates hardware hash types to kernel. */ static int hashen_to_hashconfig(int hashen) { int hashconfig = 0; if (hashen & F_FW_RSS_VI_CONFIG_CMD_UDPEN) { /* * If UDP hashing was enabled it must have been enabled for * either IPv4 or IPv6 (inclusive or). Enabling UDP without * enabling any 4-tuple hash is nonsense configuration. */ MPASS(hashen & (F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN)); if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV4; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_UDP_IPV6; } if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV4; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN) hashconfig |= RSS_HASHTYPE_RSS_TCP_IPV6; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN) hashconfig |= RSS_HASHTYPE_RSS_IPV4; if (hashen & F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN) hashconfig |= RSS_HASHTYPE_RSS_IPV6; return (hashconfig); } #endif int vi_full_init(struct vi_info *vi) { struct adapter *sc = vi->pi->adapter; struct ifnet *ifp = vi->ifp; uint16_t *rss; struct sge_rxq *rxq; int rc, i, j, hashen; #ifdef RSS int nbuckets = rss_getnumbuckets(); int hashconfig = rss_gethashconfig(); int extra; #endif ASSERT_SYNCHRONIZED_OP(sc); KASSERT((vi->flags & VI_INIT_DONE) == 0, ("%s: VI_INIT_DONE already", __func__)); sysctl_ctx_init(&vi->ctx); vi->flags |= VI_SYSCTL_CTX; /* * Allocate tx/rx/fl queues for this VI. */ rc = t4_setup_vi_queues(vi); if (rc != 0) goto done; /* error message displayed already */ /* * Setup RSS for this VI. Save a copy of the RSS table for later use. */ if (vi->nrxq > vi->rss_size) { if_printf(ifp, "nrxq (%d) > hw RSS table size (%d); " "some queues will never receive traffic.\n", vi->nrxq, vi->rss_size); } else if (vi->rss_size % vi->nrxq) { if_printf(ifp, "nrxq (%d), hw RSS table size (%d); " "expect uneven traffic distribution.\n", vi->nrxq, vi->rss_size); } #ifdef RSS if (vi->nrxq != nbuckets) { if_printf(ifp, "nrxq (%d) != kernel RSS buckets (%d);" "performance will be impacted.\n", vi->nrxq, nbuckets); } #endif rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO | M_WAITOK); for (i = 0; i < vi->rss_size;) { #ifdef RSS j = rss_get_indirection_to_bucket(i); j %= vi->nrxq; rxq = &sc->sge.rxq[vi->first_rxq + j]; rss[i++] = rxq->iq.abs_id; #else for_each_rxq(vi, j, rxq) { rss[i++] = rxq->iq.abs_id; if (i == vi->rss_size) break; } #endif } rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size, rss, vi->rss_size); if (rc != 0) { if_printf(ifp, "rss_config failed: %d\n", rc); goto done; } #ifdef RSS hashen = hashconfig_to_hashen(hashconfig); /* * We may have had to enable some hashes even though the global config * wants them disabled. This is a potential problem that must be * reported to the user. */ extra = hashen_to_hashconfig(hashen) ^ hashconfig; /* * If we consider only the supported hash types, then the enabled hashes * are a superset of the requested hashes. In other words, there cannot * be any supported hash that was requested but not enabled, but there * can be hashes that were not requested but had to be enabled. */ extra &= SUPPORTED_RSS_HASHTYPES; MPASS((extra & hashconfig) == 0); if (extra) { if_printf(ifp, "global RSS config (0x%x) cannot be accommodated.\n", hashconfig); } if (extra & RSS_HASHTYPE_RSS_IPV4) if_printf(ifp, "IPv4 2-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_TCP_IPV4) if_printf(ifp, "TCP/IPv4 4-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_IPV6) if_printf(ifp, "IPv6 2-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_TCP_IPV6) if_printf(ifp, "TCP/IPv6 4-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_UDP_IPV4) if_printf(ifp, "UDP/IPv4 4-tuple hashing forced on.\n"); if (extra & RSS_HASHTYPE_RSS_UDP_IPV6) if_printf(ifp, "UDP/IPv6 4-tuple hashing forced on.\n"); #else hashen = F_FW_RSS_VI_CONFIG_CMD_IP6FOURTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP6TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP4FOURTUPEN | F_FW_RSS_VI_CONFIG_CMD_IP4TWOTUPEN | F_FW_RSS_VI_CONFIG_CMD_UDPEN; #endif rc = -t4_config_vi_rss(sc, sc->mbox, vi->viid, hashen, rss[0], 0, 0); if (rc != 0) { if_printf(ifp, "rss hash/defaultq config failed: %d\n", rc); goto done; } vi->rss = rss; vi->flags |= VI_INIT_DONE; done: if (rc != 0) vi_full_uninit(vi); return (rc); } /* * Idempotent. */ int vi_full_uninit(struct vi_info *vi) { struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; int i; struct sge_rxq *rxq; struct sge_txq *txq; #ifdef TCP_OFFLOAD struct sge_ofld_rxq *ofld_rxq; struct sge_wrq *ofld_txq; #endif if (vi->flags & VI_INIT_DONE) { /* Need to quiesce queues. */ /* XXX: Only for the first VI? */ if (IS_MAIN_VI(vi) && !(sc->flags & IS_VF)) quiesce_wrq(sc, &sc->sge.ctrlq[pi->port_id]); for_each_txq(vi, i, txq) { quiesce_txq(sc, txq); } #ifdef TCP_OFFLOAD for_each_ofld_txq(vi, i, ofld_txq) { quiesce_wrq(sc, ofld_txq); } #endif for_each_rxq(vi, i, rxq) { quiesce_iq(sc, &rxq->iq); quiesce_fl(sc, &rxq->fl); } #ifdef TCP_OFFLOAD for_each_ofld_rxq(vi, i, ofld_rxq) { quiesce_iq(sc, &ofld_rxq->iq); quiesce_fl(sc, &ofld_rxq->fl); } #endif free(vi->rss, M_CXGBE); free(vi->nm_rss, M_CXGBE); } t4_teardown_vi_queues(vi); vi->flags &= ~VI_INIT_DONE; return (0); } static void quiesce_txq(struct adapter *sc, struct sge_txq *txq) { struct sge_eq *eq = &txq->eq; struct sge_qstat *spg = (void *)&eq->desc[eq->sidx]; (void) sc; /* unused */ #ifdef INVARIANTS TXQ_LOCK(txq); MPASS((eq->flags & EQ_ENABLED) == 0); TXQ_UNLOCK(txq); #endif /* Wait for the mp_ring to empty. */ while (!mp_ring_is_idle(txq->r)) { mp_ring_check_drainage(txq->r, 0); pause("rquiesce", 1); } /* Then wait for the hardware to finish. */ while (spg->cidx != htobe16(eq->pidx)) pause("equiesce", 1); /* Finally, wait for the driver to reclaim all descriptors. */ while (eq->cidx != eq->pidx) pause("dquiesce", 1); } static void quiesce_wrq(struct adapter *sc, struct sge_wrq *wrq) { /* XXXTX */ } static void quiesce_iq(struct adapter *sc, struct sge_iq *iq) { (void) sc; /* unused */ /* Synchronize with the interrupt handler */ while (!atomic_cmpset_int(&iq->state, IQS_IDLE, IQS_DISABLED)) pause("iqfree", 1); } static void quiesce_fl(struct adapter *sc, struct sge_fl *fl) { mtx_lock(&sc->sfl_lock); FL_LOCK(fl); fl->flags |= FL_DOOMED; FL_UNLOCK(fl); callout_stop(&sc->sfl_callout); mtx_unlock(&sc->sfl_lock); KASSERT((fl->flags & FL_STARVING) == 0, ("%s: still starving", __func__)); } static int t4_alloc_irq(struct adapter *sc, struct irq *irq, int rid, driver_intr_t *handler, void *arg, char *name) { int rc; irq->rid = rid; irq->res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &irq->rid, RF_SHAREABLE | RF_ACTIVE); if (irq->res == NULL) { device_printf(sc->dev, "failed to allocate IRQ for rid %d, name %s.\n", rid, name); return (ENOMEM); } rc = bus_setup_intr(sc->dev, irq->res, INTR_MPSAFE | INTR_TYPE_NET, NULL, handler, arg, &irq->tag); if (rc != 0) { device_printf(sc->dev, "failed to setup interrupt for rid %d, name %s: %d\n", rid, name, rc); } else if (name) bus_describe_intr(sc->dev, irq->res, irq->tag, "%s", name); return (rc); } static int t4_free_irq(struct adapter *sc, struct irq *irq) { if (irq->tag) bus_teardown_intr(sc->dev, irq->res, irq->tag); if (irq->res) bus_release_resource(sc->dev, SYS_RES_IRQ, irq->rid, irq->res); bzero(irq, sizeof(*irq)); return (0); } static void get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf) { regs->version = chip_id(sc) | chip_rev(sc) << 10; t4_get_regs(sc, buf, regs->len); } #define A_PL_INDIR_CMD 0x1f8 #define S_PL_AUTOINC 31 #define M_PL_AUTOINC 0x1U #define V_PL_AUTOINC(x) ((x) << S_PL_AUTOINC) #define G_PL_AUTOINC(x) (((x) >> S_PL_AUTOINC) & M_PL_AUTOINC) #define S_PL_VFID 20 #define M_PL_VFID 0xffU #define V_PL_VFID(x) ((x) << S_PL_VFID) #define G_PL_VFID(x) (((x) >> S_PL_VFID) & M_PL_VFID) #define S_PL_ADDR 0 #define M_PL_ADDR 0xfffffU #define V_PL_ADDR(x) ((x) << S_PL_ADDR) #define G_PL_ADDR(x) (((x) >> S_PL_ADDR) & M_PL_ADDR) #define A_PL_INDIR_DATA 0x1fc static uint64_t read_vf_stat(struct adapter *sc, unsigned int viid, int reg) { u32 stats[2]; mtx_assert(&sc->reg_lock, MA_OWNED); if (sc->flags & IS_VF) { stats[0] = t4_read_reg(sc, VF_MPS_REG(reg)); stats[1] = t4_read_reg(sc, VF_MPS_REG(reg + 4)); } else { t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | V_PL_VFID(G_FW_VIID_VIN(viid)) | V_PL_ADDR(VF_MPS_REG(reg))); stats[0] = t4_read_reg(sc, A_PL_INDIR_DATA); stats[1] = t4_read_reg(sc, A_PL_INDIR_DATA); } return (((uint64_t)stats[1]) << 32 | stats[0]); } static void t4_get_vi_stats(struct adapter *sc, unsigned int viid, struct fw_vi_stats_vf *stats) { #define GET_STAT(name) \ read_vf_stat(sc, viid, A_MPS_VF_STAT_##name##_L) stats->tx_bcast_bytes = GET_STAT(TX_VF_BCAST_BYTES); stats->tx_bcast_frames = GET_STAT(TX_VF_BCAST_FRAMES); stats->tx_mcast_bytes = GET_STAT(TX_VF_MCAST_BYTES); stats->tx_mcast_frames = GET_STAT(TX_VF_MCAST_FRAMES); stats->tx_ucast_bytes = GET_STAT(TX_VF_UCAST_BYTES); stats->tx_ucast_frames = GET_STAT(TX_VF_UCAST_FRAMES); stats->tx_drop_frames = GET_STAT(TX_VF_DROP_FRAMES); stats->tx_offload_bytes = GET_STAT(TX_VF_OFFLOAD_BYTES); stats->tx_offload_frames = GET_STAT(TX_VF_OFFLOAD_FRAMES); stats->rx_bcast_bytes = GET_STAT(RX_VF_BCAST_BYTES); stats->rx_bcast_frames = GET_STAT(RX_VF_BCAST_FRAMES); stats->rx_mcast_bytes = GET_STAT(RX_VF_MCAST_BYTES); stats->rx_mcast_frames = GET_STAT(RX_VF_MCAST_FRAMES); stats->rx_ucast_bytes = GET_STAT(RX_VF_UCAST_BYTES); stats->rx_ucast_frames = GET_STAT(RX_VF_UCAST_FRAMES); stats->rx_err_frames = GET_STAT(RX_VF_ERR_FRAMES); #undef GET_STAT } static void t4_clr_vi_stats(struct adapter *sc, unsigned int viid) { int reg; t4_write_reg(sc, A_PL_INDIR_CMD, V_PL_AUTOINC(1) | V_PL_VFID(G_FW_VIID_VIN(viid)) | V_PL_ADDR(VF_MPS_REG(A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L))); for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L; reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4) t4_write_reg(sc, A_PL_INDIR_DATA, 0); } static void vi_refresh_stats(struct adapter *sc, struct vi_info *vi) { struct timeval tv; const struct timeval interval = {0, 250000}; /* 250ms */ if (!(vi->flags & VI_INIT_DONE)) return; getmicrotime(&tv); timevalsub(&tv, &interval); if (timevalcmp(&tv, &vi->last_refreshed, <)) return; mtx_lock(&sc->reg_lock); t4_get_vi_stats(sc, vi->viid, &vi->stats); getmicrotime(&vi->last_refreshed); mtx_unlock(&sc->reg_lock); } static void cxgbe_refresh_stats(struct adapter *sc, struct port_info *pi) { u_int i, v, tnl_cong_drops, bg_map; struct timeval tv; const struct timeval interval = {0, 250000}; /* 250ms */ getmicrotime(&tv); timevalsub(&tv, &interval); if (timevalcmp(&tv, &pi->last_refreshed, <)) return; tnl_cong_drops = 0; t4_get_port_stats(sc, pi->tx_chan, &pi->stats); bg_map = pi->mps_bg_map; while (bg_map) { i = ffs(bg_map) - 1; mtx_lock(&sc->reg_lock); t4_read_indirect(sc, A_TP_MIB_INDEX, A_TP_MIB_DATA, &v, 1, A_TP_MIB_TNL_CNG_DROP_0 + i); mtx_unlock(&sc->reg_lock); tnl_cong_drops += v; bg_map &= ~(1 << i); } pi->tnl_cong_drops = tnl_cong_drops; getmicrotime(&pi->last_refreshed); } static void cxgbe_tick(void *arg) { struct port_info *pi = arg; struct adapter *sc = pi->adapter; PORT_LOCK_ASSERT_OWNED(pi); cxgbe_refresh_stats(sc, pi); callout_schedule(&pi->tick, hz); } void vi_tick(void *arg) { struct vi_info *vi = arg; struct adapter *sc = vi->pi->adapter; vi_refresh_stats(sc, vi); callout_schedule(&vi->tick, hz); } static void cxgbe_vlan_config(void *arg, struct ifnet *ifp, uint16_t vid) { struct ifnet *vlan; if (arg != ifp || ifp->if_type != IFT_ETHER) return; vlan = VLAN_DEVAT(ifp, vid); VLAN_SETCOOKIE(vlan, ifp); } /* * Should match fw_caps_config_ enums in t4fw_interface.h */ static char *caps_decoder[] = { "\20\001IPMI\002NCSI", /* 0: NBM */ "\20\001PPP\002QFC\003DCBX", /* 1: link */ "\20\001INGRESS\002EGRESS", /* 2: switch */ "\20\001NIC\002VM\003IDS\004UM\005UM_ISGL" /* 3: NIC */ "\006HASHFILTER\007ETHOFLD", "\20\001TOE", /* 4: TOE */ "\20\001RDDP\002RDMAC", /* 5: RDMA */ "\20\001INITIATOR_PDU\002TARGET_PDU" /* 6: iSCSI */ "\003INITIATOR_CNXOFLD\004TARGET_CNXOFLD" "\005INITIATOR_SSNOFLD\006TARGET_SSNOFLD" "\007T10DIF" "\010INITIATOR_CMDOFLD\011TARGET_CMDOFLD", "\20\001LOOKASIDE\002TLSKEYS", /* 7: Crypto */ "\20\001INITIATOR\002TARGET\003CTRL_OFLD" /* 8: FCoE */ "\004PO_INITIATOR\005PO_TARGET", }; void t4_sysctls(struct adapter *sc) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children, *c0; static char *doorbells = {"\20\1UDB\2WCWR\3UDBWC\4KDB"}; ctx = device_get_sysctl_ctx(sc->dev); /* * dev.t4nex.X. */ oid = device_get_sysctl_tree(sc->dev); c0 = children = SYSCTL_CHILDREN(oid); sc->sc_do_rxcopy = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "do_rx_copy", CTLFLAG_RW, &sc->sc_do_rxcopy, 1, "Do RX copy of small frames"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nports", CTLFLAG_RD, NULL, sc->params.nports, "# of ports"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "doorbells", CTLTYPE_STRING | CTLFLAG_RD, doorbells, sc->doorbells, sysctl_bitfield, "A", "available doorbells"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_clock", CTLFLAG_RD, NULL, sc->params.vpd.cclk, "core clock frequency (in KHz)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_timers", CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.timer_val, sizeof(sc->params.sge.timer_val), sysctl_int_array, "A", "interrupt holdoff timer values (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pkt_counts", CTLTYPE_STRING | CTLFLAG_RD, sc->params.sge.counter_val, sizeof(sc->params.sge.counter_val), sysctl_int_array, "A", "interrupt holdoff packet counter values"); t4_sge_sysctls(sc, ctx, children); sc->lro_timeout = 100; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "lro_timeout", CTLFLAG_RW, &sc->lro_timeout, 0, "lro inactive-flush timeout (in us)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "dflags", CTLFLAG_RW, &sc->debug_flags, 0, "flags to enable runtime debugging"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "tp_version", CTLFLAG_RD, sc->tp_version, 0, "TP microcode version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "firmware_version", CTLFLAG_RD, sc->fw_version, 0, "firmware version"); if (sc->flags & IS_VF) return; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "hw_revision", CTLFLAG_RD, NULL, chip_rev(sc), "chip hardware revision"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "sn", CTLFLAG_RD, sc->params.vpd.sn, 0, "serial number"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pn", CTLFLAG_RD, sc->params.vpd.pn, 0, "part number"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "ec", CTLFLAG_RD, sc->params.vpd.ec, 0, "engineering change"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "md_version", CTLFLAG_RD, sc->params.vpd.md, 0, "manufacturing diags version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "na", CTLFLAG_RD, sc->params.vpd.na, 0, "network address"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "er_version", CTLFLAG_RD, sc->er_version, 0, "expansion ROM version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "bs_version", CTLFLAG_RD, sc->bs_version, 0, "bootstrap firmware version"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "scfg_version", CTLFLAG_RD, NULL, sc->params.scfg_vers, "serial config version"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "vpd_version", CTLFLAG_RD, NULL, sc->params.vpd_vers, "VPD version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "cf", CTLFLAG_RD, sc->cfg_file, 0, "configuration file"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "cfcsum", CTLFLAG_RD, NULL, sc->cfcsum, "config file checksum"); #define SYSCTL_CAP(name, n, text) \ SYSCTL_ADD_PROC(ctx, children, OID_AUTO, #name, \ CTLTYPE_STRING | CTLFLAG_RD, caps_decoder[n], sc->name, \ sysctl_bitfield, "A", "available " text " capabilities") SYSCTL_CAP(nbmcaps, 0, "NBM"); SYSCTL_CAP(linkcaps, 1, "link"); SYSCTL_CAP(switchcaps, 2, "switch"); SYSCTL_CAP(niccaps, 3, "NIC"); SYSCTL_CAP(toecaps, 4, "TCP offload"); SYSCTL_CAP(rdmacaps, 5, "RDMA"); SYSCTL_CAP(iscsicaps, 6, "iSCSI"); SYSCTL_CAP(cryptocaps, 7, "crypto"); SYSCTL_CAP(fcoecaps, 8, "FCoE"); #undef SYSCTL_CAP SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nfilters", CTLFLAG_RD, NULL, sc->tids.nftids, "number of filters"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD, sc, 0, sysctl_temperature, "I", "chip temperature (in Celsius)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "core_vdd", CTLFLAG_RD, &sc->params.core_vdd, 0, "core Vdd (in mV)"); #ifdef SBUF_DRAIN /* * dev.t4nex.X.misc. Marked CTLFLAG_SKIP to avoid information overload. */ oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "misc", CTLFLAG_RD | CTLFLAG_SKIP, NULL, "logs and miscellaneous information"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cctrl", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cctrl, "A", "congestion control"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp0", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_ibq_obq, "A", "CIM IBQ 0 (TP0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_tp1", CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_cim_ibq_obq, "A", "CIM IBQ 1 (TP1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ulp", CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_cim_ibq_obq, "A", "CIM IBQ 2 (ULP)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge0", CTLTYPE_STRING | CTLFLAG_RD, sc, 3, sysctl_cim_ibq_obq, "A", "CIM IBQ 3 (SGE0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_sge1", CTLTYPE_STRING | CTLFLAG_RD, sc, 4, sysctl_cim_ibq_obq, "A", "CIM IBQ 4 (SGE1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ibq_ncsi", CTLTYPE_STRING | CTLFLAG_RD, sc, 5, sysctl_cim_ibq_obq, "A", "CIM IBQ 5 (NCSI)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, chip_id(sc) <= CHELSIO_T5 ? sysctl_cim_la : sysctl_cim_la_t6, "A", "CIM logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_ma_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_ma_la, "A", "CIM MA logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp0", CTLTYPE_STRING | CTLFLAG_RD, sc, 0 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 0 (ULP0)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp1", CTLTYPE_STRING | CTLFLAG_RD, sc, 1 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 1 (ULP1)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp2", CTLTYPE_STRING | CTLFLAG_RD, sc, 2 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 2 (ULP2)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ulp3", CTLTYPE_STRING | CTLFLAG_RD, sc, 3 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 3 (ULP3)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge", CTLTYPE_STRING | CTLFLAG_RD, sc, 4 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 4 (SGE)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_ncsi", CTLTYPE_STRING | CTLFLAG_RD, sc, 5 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 5 (NCSI)"); if (chip_id(sc) > CHELSIO_T4) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge0_rx", CTLTYPE_STRING | CTLFLAG_RD, sc, 6 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 6 (SGE0-RX)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_obq_sge1_rx", CTLTYPE_STRING | CTLFLAG_RD, sc, 7 + CIM_NUM_IBQ, sysctl_cim_ibq_obq, "A", "CIM OBQ 7 (SGE1-RX)"); } SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_pif_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_pif_la, "A", "CIM PIF logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cim_qcfg", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cim_qcfg, "A", "CIM queue configuration"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "cpl_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_cpl_stats, "A", "CPL statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ddp_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_ddp_stats, "A", "non-TCP DDP statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "devlog", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_devlog, "A", "firmware's device log"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fcoe_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_fcoe_stats, "A", "FCoE statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "hw_sched", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_hw_sched, "A", "hardware scheduler "); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "l2t", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_l2t, "A", "hardware L2 table"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "lb_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_lb_stats, "A", "loopback statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "meminfo", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_meminfo, "A", "memory regions"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "mps_tcam", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, chip_id(sc) <= CHELSIO_T5 ? sysctl_mps_tcam : sysctl_mps_tcam_t6, "A", "MPS TCAM entries"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "path_mtus", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_path_mtus, "A", "path MTUs"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pm_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_pm_stats, "A", "PM statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rdma_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_rdma_stats, "A", "RDMA statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tcp_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tcp_stats, "A", "TCP statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tids", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tids, "A", "TID information"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_err_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_err_stats, "A", "TP error statistics"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la_mask", CTLTYPE_INT | CTLFLAG_RW, sc, 0, sysctl_tp_la_mask, "I", "TP logic analyzer event capture mask"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tp_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_la, "A", "TP logic analyzer"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_rate", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tx_rate, "A", "Tx rate"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "ulprx_la", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_ulprx_la, "A", "ULPRX logic analyzer"); if (chip_id(sc) >= CHELSIO_T5) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "wcwr_stats", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_wcwr_stats, "A", "write combined work requests"); } #endif #ifdef TCP_OFFLOAD if (is_offload(sc)) { int i; char s[4]; /* * dev.t4nex.X.toe. */ oid = SYSCTL_ADD_NODE(ctx, c0, OID_AUTO, "toe", CTLFLAG_RD, NULL, "TOE parameters"); children = SYSCTL_CHILDREN(oid); sc->tt.cong_algorithm = -1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "cong_algorithm", CTLFLAG_RW, &sc->tt.cong_algorithm, 0, "congestion control " "(-1 = default, 0 = reno, 1 = tahoe, 2 = newreno, " "3 = highspeed)"); sc->tt.sndbuf = 256 * 1024; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "sndbuf", CTLFLAG_RW, &sc->tt.sndbuf, 0, "max hardware send buffer size"); sc->tt.ddp = 0; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ddp", CTLFLAG_RW, &sc->tt.ddp, 0, "DDP allowed"); sc->tt.rx_coalesce = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_coalesce", CTLFLAG_RW, &sc->tt.rx_coalesce, 0, "receive coalescing"); sc->tt.tx_align = 1; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_align", CTLFLAG_RW, &sc->tt.tx_align, 0, "chop and align payload"); sc->tt.tx_zcopy = 0; SYSCTL_ADD_INT(ctx, children, OID_AUTO, "tx_zcopy", CTLFLAG_RW, &sc->tt.tx_zcopy, 0, "Enable zero-copy aio_write(2)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timer_tick", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, sysctl_tp_tick, "A", "TP timer tick (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "timestamp_tick", CTLTYPE_STRING | CTLFLAG_RD, sc, 1, sysctl_tp_tick, "A", "TCP timestamp tick (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_tick", CTLTYPE_STRING | CTLFLAG_RD, sc, 2, sysctl_tp_tick, "A", "DACK tick (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "dack_timer", CTLTYPE_UINT | CTLFLAG_RD, sc, 0, sysctl_tp_dack_timer, "IU", "DACK timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_min", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MIN, sysctl_tp_timer, "LU", "Minimum retransmit interval (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_max", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_RXT_MAX, sysctl_tp_timer, "LU", "Maximum retransmit interval (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_min", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MIN, sysctl_tp_timer, "LU", "Persist timer min (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "persist_max", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_PERS_MAX, sysctl_tp_timer, "LU", "Persist timer max (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_idle", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_IDLE, sysctl_tp_timer, "LU", "Keepalive idle timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_interval", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_KEEP_INTVL, sysctl_tp_timer, "LU", "Keepalive interval timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "initial_srtt", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_INIT_SRTT, sysctl_tp_timer, "LU", "Initial SRTT (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "finwait2_timer", CTLTYPE_ULONG | CTLFLAG_RD, sc, A_TP_FINWAIT2_TIMER, sysctl_tp_timer, "LU", "FINWAIT2 timer (us)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "syn_rexmt_count", CTLTYPE_UINT | CTLFLAG_RD, sc, S_SYNSHIFTMAX, sysctl_tp_shift_cnt, "IU", "Number of SYN retransmissions before abort"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rexmt_count", CTLTYPE_UINT | CTLFLAG_RD, sc, S_RXTSHIFTMAXR2, sysctl_tp_shift_cnt, "IU", "Number of retransmissions before abort"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "keepalive_count", CTLTYPE_UINT | CTLFLAG_RD, sc, S_KEEPALIVEMAXR2, sysctl_tp_shift_cnt, "IU", "Number of keepalive probes before abort"); oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "rexmt_backoff", CTLFLAG_RD, NULL, "TOE retransmit backoffs"); children = SYSCTL_CHILDREN(oid); for (i = 0; i < 16; i++) { snprintf(s, sizeof(s), "%u", i); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, s, CTLTYPE_UINT | CTLFLAG_RD, sc, i, sysctl_tp_backoff, "IU", "TOE retransmit backoff"); } } #endif } void vi_sysctls(struct vi_info *vi) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children; ctx = device_get_sysctl_ctx(vi->dev); /* * dev.v?(cxgbe|cxl).X. */ oid = device_get_sysctl_tree(vi->dev); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "viid", CTLFLAG_RD, NULL, vi->viid, "VI identifer"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nrxq", CTLFLAG_RD, &vi->nrxq, 0, "# of rx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "ntxq", CTLFLAG_RD, &vi->ntxq, 0, "# of tx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_rxq", CTLFLAG_RD, &vi->first_rxq, 0, "index of first rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_txq", CTLFLAG_RD, &vi->first_txq, 0, "index of first tx queue"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rss_size", CTLFLAG_RD, NULL, vi->rss_size, "size of RSS indirection table"); if (IS_MAIN_VI(vi)) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "rsrv_noflowq", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_noflowq, "IU", "Reserve queue 0 for non-flowid packets"); } #ifdef TCP_OFFLOAD if (vi->nofldrxq != 0) { SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD, &vi->nofldrxq, 0, "# of rx queues for offloaded TCP connections"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldtxq", CTLFLAG_RD, &vi->nofldtxq, 0, "# of tx queues for offloaded TCP connections"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_rxq", CTLFLAG_RD, &vi->first_ofld_rxq, 0, "index of first TOE rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_ofld_txq", CTLFLAG_RD, &vi->first_ofld_txq, 0, "index of first TOE tx queue"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx_ofld", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx_ofld, "I", "holdoff timer index for TOE queues"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx_ofld", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx_ofld, "I", "holdoff packet counter index for TOE queues"); } #endif #ifdef DEV_NETMAP if (vi->nnmrxq != 0) { SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmrxq", CTLFLAG_RD, &vi->nnmrxq, 0, "# of netmap rx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nnmtxq", CTLFLAG_RD, &vi->nnmtxq, 0, "# of netmap tx queues"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_rxq", CTLFLAG_RD, &vi->first_nm_rxq, 0, "index of first netmap rx queue"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "first_nm_txq", CTLFLAG_RD, &vi->first_nm_txq, 0, "index of first netmap tx queue"); } #endif SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_tmr_idx", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_tmr_idx, "I", "holdoff timer index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "holdoff_pktc_idx", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_holdoff_pktc_idx, "I", "holdoff packet counter index"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_rxq", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_rxq, "I", "rx queue size"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "qsize_txq", CTLTYPE_INT | CTLFLAG_RW, vi, 0, sysctl_qsize_txq, "I", "tx queue size"); } static void cxgbe_sysctls(struct port_info *pi) { struct sysctl_ctx_list *ctx; struct sysctl_oid *oid; struct sysctl_oid_list *children, *children2; struct adapter *sc = pi->adapter; int i; char name[16]; ctx = device_get_sysctl_ctx(pi->dev); /* * dev.cxgbe.X. */ oid = device_get_sysctl_tree(pi->dev); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "linkdnrc", CTLTYPE_STRING | CTLFLAG_RD, pi, 0, sysctl_linkdnrc, "A", "reason why link is down"); if (pi->port_type == FW_PORT_TYPE_BT_XAUI) { SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "temperature", CTLTYPE_INT | CTLFLAG_RD, pi, 0, sysctl_btphy, "I", "PHY temperature (in Celsius)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fw_version", CTLTYPE_INT | CTLFLAG_RD, pi, 1, sysctl_btphy, "I", "PHY firmware version"); } SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "pause_settings", CTLTYPE_STRING | CTLFLAG_RW, pi, 0, sysctl_pause_settings, "A", "PAUSE settings (bit 0 = rx_pause, bit 1 = tx_pause)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "fec", CTLTYPE_STRING | CTLFLAG_RW, pi, 0, sysctl_fec, "A", "Forward Error Correction (bit 0 = RS, bit 1 = BASER_RS)"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "autoneg", CTLTYPE_INT | CTLFLAG_RW, pi, 0, sysctl_autoneg, "I", "autonegotiation (-1 = not supported)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "max_speed", CTLFLAG_RD, NULL, port_top_speed(pi), "max speed (in Gbps)"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "mps_bg_map", CTLFLAG_RD, NULL, pi->mps_bg_map, "MPS buffer group map"); SYSCTL_ADD_INT(ctx, children, OID_AUTO, "rx_e_chan_map", CTLFLAG_RD, NULL, pi->rx_e_chan_map, "TP rx e-channel map"); if (sc->flags & IS_VF) return; /* * dev.(cxgbe|cxl).X.tc. */ oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "tc", CTLFLAG_RD, NULL, "Tx scheduler traffic classes (cl_rl)"); for (i = 0; i < sc->chip_params->nsched_cls; i++) { struct tx_cl_rl_params *tc = &pi->sched_params->cl_rl[i]; snprintf(name, sizeof(name), "%d", i); children2 = SYSCTL_CHILDREN(SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(oid), OID_AUTO, name, CTLFLAG_RD, NULL, "traffic class")); SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "flags", CTLFLAG_RD, &tc->flags, 0, "flags"); SYSCTL_ADD_UINT(ctx, children2, OID_AUTO, "refcount", CTLFLAG_RD, &tc->refcount, 0, "references to this class"); #ifdef SBUF_DRAIN SYSCTL_ADD_PROC(ctx, children2, OID_AUTO, "params", CTLTYPE_STRING | CTLFLAG_RD, sc, (pi->port_id << 16) | i, sysctl_tc_params, "A", "traffic class parameters"); #endif } /* * dev.cxgbe.X.stats. */ oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "stats", CTLFLAG_RD, NULL, "port statistics"); children = SYSCTL_CHILDREN(oid); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_parse_error", CTLFLAG_RD, &pi->tx_parse_error, 0, "# of tx packets with invalid length or # of segments"); #define SYSCTL_ADD_T4_REG64(pi, name, desc, reg) \ SYSCTL_ADD_OID(ctx, children, OID_AUTO, name, \ CTLTYPE_U64 | CTLFLAG_RD, sc, reg, \ sysctl_handle_t4_reg64, "QU", desc) SYSCTL_ADD_T4_REG64(pi, "tx_octets", "# of octets in good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BYTES_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames", "total # of good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_FRAMES_L)); SYSCTL_ADD_T4_REG64(pi, "tx_bcast_frames", "# of broadcast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_BCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_mcast_frames", "# of multicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_MCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ucast_frames", "# of unicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_UCAST_L)); SYSCTL_ADD_T4_REG64(pi, "tx_error_frames", "# of error frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_64", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_64B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_65_127", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_65B_127B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_128_255", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_128B_255B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_256_511", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_256B_511B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_512_1023", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_512B_1023B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_1024_1518", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1024B_1518B_L)); SYSCTL_ADD_T4_REG64(pi, "tx_frames_1519_max", "# of tx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_1519B_MAX_L)); SYSCTL_ADD_T4_REG64(pi, "tx_drop", "# of dropped tx frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_DROP_L)); SYSCTL_ADD_T4_REG64(pi, "tx_pause", "# of pause frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PAUSE_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp0", "# of PPP prio 0 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP0_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp1", "# of PPP prio 1 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP1_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp2", "# of PPP prio 2 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP2_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp3", "# of PPP prio 3 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP3_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp4", "# of PPP prio 4 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP4_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp5", "# of PPP prio 5 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP5_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp6", "# of PPP prio 6 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP6_L)); SYSCTL_ADD_T4_REG64(pi, "tx_ppp7", "# of PPP prio 7 frames transmitted", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_TX_PORT_PPP7_L)); SYSCTL_ADD_T4_REG64(pi, "rx_octets", "# of octets in good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BYTES_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames", "total # of good frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_FRAMES_L)); SYSCTL_ADD_T4_REG64(pi, "rx_bcast_frames", "# of broadcast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_BCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_mcast_frames", "# of multicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ucast_frames", "# of unicast frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_UCAST_L)); SYSCTL_ADD_T4_REG64(pi, "rx_too_long", "# of frames exceeding MTU", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_jabber", "# of jabber frames", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_MTU_CRC_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_fcs_err", "# of frames received with bad FCS", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_CRC_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_len_err", "# of frames received with length error", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LEN_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_symbol_err", "symbol errors", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_SYM_ERROR_L)); SYSCTL_ADD_T4_REG64(pi, "rx_runt", "# of short frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_LESS_64B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_64", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_64B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_65_127", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_65B_127B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_128_255", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_128B_255B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_256_511", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_256B_511B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_512_1023", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_512B_1023B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_1024_1518", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1024B_1518B_L)); SYSCTL_ADD_T4_REG64(pi, "rx_frames_1519_max", "# of rx frames in this range", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_1519B_MAX_L)); SYSCTL_ADD_T4_REG64(pi, "rx_pause", "# of pause frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PAUSE_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp0", "# of PPP prio 0 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP0_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp1", "# of PPP prio 1 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP1_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp2", "# of PPP prio 2 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP2_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp3", "# of PPP prio 3 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP3_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp4", "# of PPP prio 4 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP4_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp5", "# of PPP prio 5 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP5_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp6", "# of PPP prio 6 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP6_L)); SYSCTL_ADD_T4_REG64(pi, "rx_ppp7", "# of PPP prio 7 frames received", PORT_REG(pi->tx_chan, A_MPS_PORT_STAT_RX_PORT_PPP7_L)); #undef SYSCTL_ADD_T4_REG64 #define SYSCTL_ADD_T4_PORTSTAT(name, desc) \ SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, #name, CTLFLAG_RD, \ &pi->stats.name, desc) /* We get these from port_stats and they may be stale by up to 1s */ SYSCTL_ADD_T4_PORTSTAT(rx_ovflow0, "# drops due to buffer-group 0 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow1, "# drops due to buffer-group 1 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow2, "# drops due to buffer-group 2 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_ovflow3, "# drops due to buffer-group 3 overflows"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc0, "# of buffer-group 0 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc1, "# of buffer-group 1 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc2, "# of buffer-group 2 truncated packets"); SYSCTL_ADD_T4_PORTSTAT(rx_trunc3, "# of buffer-group 3 truncated packets"); #undef SYSCTL_ADD_T4_PORTSTAT } static int sysctl_int_array(SYSCTL_HANDLER_ARGS) { int rc, *i, space = 0; struct sbuf sb; sbuf_new_for_sysctl(&sb, NULL, 64, req); for (i = arg1; arg2; arg2 -= sizeof(int), i++) { if (space) sbuf_printf(&sb, " "); sbuf_printf(&sb, "%d", *i); space = 1; } rc = sbuf_finish(&sb); sbuf_delete(&sb); return (rc); } static int sysctl_bitfield(SYSCTL_HANDLER_ARGS) { int rc; struct sbuf *sb; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "%b", (int)arg2, (char *)arg1); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_btphy(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; int op = arg2; struct adapter *sc = pi->adapter; u_int v; int rc; rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4btt"); if (rc) return (rc); /* XXX: magic numbers */ rc = -t4_mdio_rd(sc, sc->mbox, pi->mdio_addr, 0x1e, op ? 0x20 : 0xc820, &v); end_synchronized_op(sc, 0); if (rc) return (rc); if (op == 0) v /= 256; rc = sysctl_handle_int(oidp, &v, 0, req); return (rc); } static int sysctl_noflowq(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; int rc, val; val = vi->rsrv_noflowq; rc = sysctl_handle_int(oidp, &val, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if ((val >= 1) && (vi->ntxq > 1)) vi->rsrv_noflowq = 1; else vi->rsrv_noflowq = 0; return (rc); } static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int idx, rc, i; struct sge_rxq *rxq; uint8_t v; idx = vi->tmr_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < 0 || idx >= SGE_NTIMERS) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4tmr"); if (rc) return (rc); v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->pktc_idx != -1); for_each_rxq(vi, i, rxq) { #ifdef atomic_store_rel_8 atomic_store_rel_8(&rxq->iq.intr_params, v); #else rxq->iq.intr_params = v; #endif } vi->tmr_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (0); } static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int idx, rc; idx = vi->pktc_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < -1 || idx >= SGE_NCOUNTERS) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4pktc"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->pktc_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int qsize, rc; qsize = vi->qsize_rxq; rc = sysctl_handle_int(oidp, &qsize, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (qsize < 128 || (qsize & 7)) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4rxqs"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->qsize_rxq = qsize; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_qsize_txq(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int qsize, rc; qsize = vi->qsize_txq; rc = sysctl_handle_int(oidp, &qsize, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (qsize < 128 || qsize > 65536) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4txqs"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->qsize_txq = qsize; end_synchronized_op(sc, LOCK_HELD); return (rc); } static int sysctl_pause_settings(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; struct link_config *lc = &pi->link_cfg; int rc; if (req->newptr == NULL) { struct sbuf *sb; static char *bits = "\20\1PAUSE_RX\2PAUSE_TX"; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "%b", lc->fc & (PAUSE_TX | PAUSE_RX), bits); rc = sbuf_finish(sb); sbuf_delete(sb); } else { char s[2]; int n; s[0] = '0' + (lc->requested_fc & (PAUSE_TX | PAUSE_RX)); s[1] = 0; rc = sysctl_handle_string(oidp, s, sizeof(s), req); if (rc != 0) return(rc); if (s[1] != 0) return (EINVAL); if (s[0] < '0' || s[0] > '9') return (EINVAL); /* not a number */ n = s[0] - '0'; if (n & ~(PAUSE_TX | PAUSE_RX)) return (EINVAL); /* some other bit is set too */ rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4PAUSE"); if (rc) return (rc); if ((lc->requested_fc & (PAUSE_TX | PAUSE_RX)) != n) { lc->requested_fc &= ~(PAUSE_TX | PAUSE_RX); lc->requested_fc |= n; rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc); if (rc == 0) { lc->fc = lc->requested_fc; } } end_synchronized_op(sc, 0); } return (rc); } static int sysctl_fec(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; struct link_config *lc = &pi->link_cfg; int rc; if (req->newptr == NULL) { struct sbuf *sb; static char *bits = "\20\1RS\2BASER_RS\3RESERVED"; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "%b", lc->fec & M_FW_PORT_CAP_FEC, bits); rc = sbuf_finish(sb); sbuf_delete(sb); } else { char s[2]; int n; s[0] = '0' + (lc->requested_fec & M_FW_PORT_CAP_FEC); s[1] = 0; rc = sysctl_handle_string(oidp, s, sizeof(s), req); if (rc != 0) return(rc); if (s[1] != 0) return (EINVAL); if (s[0] < '0' || s[0] > '9') return (EINVAL); /* not a number */ n = s[0] - '0'; if (n & ~M_FW_PORT_CAP_FEC) return (EINVAL); /* some other bit is set too */ rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4fec"); if (rc) return (rc); if ((lc->requested_fec & M_FW_PORT_CAP_FEC) != n) { lc->requested_fec = n & G_FW_PORT_CAP_FEC(lc->supported); rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc); if (rc == 0) { lc->fec = lc->requested_fec; } } end_synchronized_op(sc, 0); } return (rc); } static int sysctl_autoneg(SYSCTL_HANDLER_ARGS) { struct port_info *pi = arg1; struct adapter *sc = pi->adapter; struct link_config *lc = &pi->link_cfg; int rc, val, old; if (lc->supported & FW_PORT_CAP_ANEG) val = lc->requested_aneg == AUTONEG_ENABLE ? 1 : 0; else val = -1; rc = sysctl_handle_int(oidp, &val, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if ((lc->supported & FW_PORT_CAP_ANEG) == 0) return (ENOTSUP); if (val == 0) val = AUTONEG_DISABLE; else if (val == 1) val = AUTONEG_ENABLE; else return (EINVAL); if (lc->requested_aneg == val) return (0); /* no change */ rc = begin_synchronized_op(sc, &pi->vi[0], SLEEP_OK | INTR_OK, "t4aneg"); if (rc) return (rc); old = lc->requested_aneg; lc->requested_aneg = val; rc = -t4_link_l1cfg(sc, sc->mbox, pi->tx_chan, lc); if (rc != 0) lc->requested_aneg = old; end_synchronized_op(sc, 0); return (rc); } static int sysctl_handle_t4_reg64(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int reg = arg2; uint64_t val; val = t4_read_reg64(sc, reg); return (sysctl_handle_64(oidp, &val, 0, req)); } static int sysctl_temperature(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int rc, t; uint32_t param, val; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4temp"); if (rc) return (rc); param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_DIAG) | V_FW_PARAMS_PARAM_Y(FW_PARAM_DEV_DIAG_TMP); rc = -t4_query_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val); end_synchronized_op(sc, 0); if (rc) return (rc); /* unknown is returned as 0 but we display -1 in that case */ t = val == 0 ? -1 : val; rc = sysctl_handle_int(oidp, &t, 0, req); return (rc); } #ifdef SBUF_DRAIN static int sysctl_cctrl(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint16_t incr[NMTUS][NCCTRL_WIN]; static const char *dec_fac[] = { "0.5", "0.5625", "0.625", "0.6875", "0.75", "0.8125", "0.875", "0.9375" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); t4_read_cong_tbl(sc, incr); for (i = 0; i < NCCTRL_WIN; ++i) { sbuf_printf(sb, "%2d: %4u %4u %4u %4u %4u %4u %4u %4u\n", i, incr[0][i], incr[1][i], incr[2][i], incr[3][i], incr[4][i], incr[5][i], incr[6][i], incr[7][i]); sbuf_printf(sb, "%8u %4u %4u %4u %4u %4u %4u %4u %5u %s\n", incr[8][i], incr[9][i], incr[10][i], incr[11][i], incr[12][i], incr[13][i], incr[14][i], incr[15][i], sc->params.a_wnd[i], dec_fac[sc->params.b_wnd[i]]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static const char *qname[CIM_NUM_IBQ + CIM_NUM_OBQ_T5] = { "TP0", "TP1", "ULP", "SGE0", "SGE1", "NC-SI", /* ibq's */ "ULP0", "ULP1", "ULP2", "ULP3", "SGE", "NC-SI", /* obq's */ "SGE0-RX", "SGE1-RX" /* additional obq's (T5 onwards) */ }; static int sysctl_cim_ibq_obq(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, n, qid = arg2; uint32_t *buf, *p; char *qtype; u_int cim_num_obq = sc->chip_params->cim_num_obq; KASSERT(qid >= 0 && qid < CIM_NUM_IBQ + cim_num_obq, ("%s: bad qid %d\n", __func__, qid)); if (qid < CIM_NUM_IBQ) { /* inbound queue */ qtype = "IBQ"; n = 4 * CIM_IBQ_SIZE; buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = t4_read_cim_ibq(sc, qid, buf, n); } else { /* outbound queue */ qtype = "OBQ"; qid -= CIM_NUM_IBQ; n = 4 * cim_num_obq * CIM_OBQ_SIZE; buf = malloc(n * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = t4_read_cim_obq(sc, qid, buf, n); } if (rc < 0) { rc = -rc; goto done; } n = rc * sizeof(uint32_t); /* rc has # of words actually read */ rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) goto done; sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req); if (sb == NULL) { rc = ENOMEM; goto done; } sbuf_printf(sb, "%s%d %s", qtype , qid, qname[arg2]); for (i = 0, p = buf; i < n; i += 16, p += 4) sbuf_printf(sb, "\n%#06x: %08x %08x %08x %08x", i, p[0], p[1], p[2], p[3]); rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_cim_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int cfg; struct sbuf *sb; uint32_t *buf, *p; int rc; MPASS(chip_id(sc) <= CHELSIO_T5); rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg); if (rc != 0) return (rc); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_cim_read_la(sc, buf, NULL); if (rc != 0) goto done; sbuf_printf(sb, "Status Data PC%s", cfg & F_UPDBGLACAPTPCONLY ? "" : " LS0Stat LS0Addr LS0Data"); for (p = buf; p <= &buf[sc->params.cim_la_size - 8]; p += 8) { if (cfg & F_UPDBGLACAPTPCONLY) { sbuf_printf(sb, "\n %02x %08x %08x", p[5] & 0xff, p[6], p[7]); sbuf_printf(sb, "\n %02x %02x%06x %02x%06x", (p[3] >> 8) & 0xff, p[3] & 0xff, p[4] >> 8, p[4] & 0xff, p[5] >> 8); sbuf_printf(sb, "\n %02x %x%07x %x%07x", (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4, p[1] & 0xf, p[2] >> 4); } else { sbuf_printf(sb, "\n %02x %x%07x %x%07x %08x %08x " "%08x%08x%08x%08x", (p[0] >> 4) & 0xff, p[0] & 0xf, p[1] >> 4, p[1] & 0xf, p[2] >> 4, p[2] & 0xf, p[3], p[4], p[5], p[6], p[7]); } } rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_cim_la_t6(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int cfg; struct sbuf *sb; uint32_t *buf, *p; int rc; MPASS(chip_id(sc) > CHELSIO_T5); rc = -t4_cim_read(sc, A_UP_UP_DBG_LA_CFG, 1, &cfg); if (rc != 0) return (rc); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(sc->params.cim_la_size * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); rc = -t4_cim_read_la(sc, buf, NULL); if (rc != 0) goto done; sbuf_printf(sb, "Status Inst Data PC%s", cfg & F_UPDBGLACAPTPCONLY ? "" : " LS0Stat LS0Addr LS0Data LS1Stat LS1Addr LS1Data"); for (p = buf; p <= &buf[sc->params.cim_la_size - 10]; p += 10) { if (cfg & F_UPDBGLACAPTPCONLY) { sbuf_printf(sb, "\n %02x %08x %08x %08x", p[3] & 0xff, p[2], p[1], p[0]); sbuf_printf(sb, "\n %02x %02x%06x %02x%06x %02x%06x", (p[6] >> 8) & 0xff, p[6] & 0xff, p[5] >> 8, p[5] & 0xff, p[4] >> 8, p[4] & 0xff, p[3] >> 8); sbuf_printf(sb, "\n %02x %04x%04x %04x%04x %04x%04x", (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16, p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff, p[6] >> 16); } else { sbuf_printf(sb, "\n %02x %04x%04x %04x%04x %04x%04x " "%08x %08x %08x %08x %08x %08x", (p[9] >> 16) & 0xff, p[9] & 0xffff, p[8] >> 16, p[8] & 0xffff, p[7] >> 16, p[7] & 0xffff, p[6] >> 16, p[2], p[1], p[0], p[5], p[4], p[3]); } } rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_cim_ma_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int i; struct sbuf *sb; uint32_t *buf, *p; int rc; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(2 * CIM_MALA_SIZE * 5 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_cim_read_ma_la(sc, buf, buf + 5 * CIM_MALA_SIZE); p = buf; for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) { sbuf_printf(sb, "\n%02x%08x%08x%08x%08x", p[4], p[3], p[2], p[1], p[0]); } sbuf_printf(sb, "\n\nCnt ID Tag UE Data RDY VLD"); for (i = 0; i < CIM_MALA_SIZE; i++, p += 5) { sbuf_printf(sb, "\n%3u %2u %x %u %08x%08x %u %u", (p[2] >> 10) & 0xff, (p[2] >> 7) & 7, (p[2] >> 3) & 0xf, (p[2] >> 2) & 1, (p[1] >> 2) | ((p[2] & 3) << 30), (p[0] >> 2) | ((p[1] & 3) << 30), (p[0] >> 1) & 1, p[0] & 1); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_cim_pif_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int i; struct sbuf *sb; uint32_t *buf, *p; int rc; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(2 * CIM_PIFLA_SIZE * 6 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_cim_read_pif_la(sc, buf, buf + 6 * CIM_PIFLA_SIZE, NULL, NULL); p = buf; sbuf_printf(sb, "Cntl ID DataBE Addr Data"); for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) { sbuf_printf(sb, "\n %02x %02x %04x %08x %08x%08x%08x%08x", (p[5] >> 22) & 0xff, (p[5] >> 16) & 0x3f, p[5] & 0xffff, p[4], p[3], p[2], p[1], p[0]); } sbuf_printf(sb, "\n\nCntl ID Data"); for (i = 0; i < CIM_PIFLA_SIZE; i++, p += 6) { sbuf_printf(sb, "\n %02x %02x %08x%08x%08x%08x", (p[4] >> 6) & 0xff, p[4] & 0x3f, p[3], p[2], p[1], p[0]); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_cim_qcfg(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint16_t base[CIM_NUM_IBQ + CIM_NUM_OBQ_T5]; uint16_t size[CIM_NUM_IBQ + CIM_NUM_OBQ_T5]; uint16_t thres[CIM_NUM_IBQ]; uint32_t obq_wr[2 * CIM_NUM_OBQ_T5], *wr = obq_wr; uint32_t stat[4 * (CIM_NUM_IBQ + CIM_NUM_OBQ_T5)], *p = stat; u_int cim_num_obq, ibq_rdaddr, obq_rdaddr, nq; cim_num_obq = sc->chip_params->cim_num_obq; if (is_t4(sc)) { ibq_rdaddr = A_UP_IBQ_0_RDADDR; obq_rdaddr = A_UP_OBQ_0_REALADDR; } else { ibq_rdaddr = A_UP_IBQ_0_SHADOW_RDADDR; obq_rdaddr = A_UP_OBQ_0_SHADOW_REALADDR; } nq = CIM_NUM_IBQ + cim_num_obq; rc = -t4_cim_read(sc, ibq_rdaddr, 4 * nq, stat); if (rc == 0) rc = -t4_cim_read(sc, obq_rdaddr, 2 * cim_num_obq, obq_wr); if (rc != 0) return (rc); t4_read_cimq_cfg(sc, base, size, thres); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, PAGE_SIZE, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, " Queue Base Size Thres RdPtr WrPtr SOP EOP Avail"); for (i = 0; i < CIM_NUM_IBQ; i++, p += 4) sbuf_printf(sb, "\n%7s %5x %5u %5u %6x %4x %4u %4u %5u", qname[i], base[i], size[i], thres[i], G_IBQRDADDR(p[0]), G_IBQWRADDR(p[1]), G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]), G_QUEREMFLITS(p[2]) * 16); for ( ; i < nq; i++, p += 4, wr += 2) sbuf_printf(sb, "\n%7s %5x %5u %12x %4x %4u %4u %5u", qname[i], base[i], size[i], G_QUERDADDR(p[0]) & 0x3fff, wr[0] - base[i], G_QUESOPCNT(p[3]), G_QUEEOPCNT(p[3]), G_QUEREMFLITS(p[2]) * 16); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_cpl_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_cpl_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_cpl_stats(sc, &stats, 0); mtx_unlock(&sc->reg_lock); if (sc->chip_params->nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3"); sbuf_printf(sb, "\nCPL requests: %10u %10u %10u %10u", stats.req[0], stats.req[1], stats.req[2], stats.req[3]); sbuf_printf(sb, "\nCPL responses: %10u %10u %10u %10u", stats.rsp[0], stats.rsp[1], stats.rsp[2], stats.rsp[3]); } else { sbuf_printf(sb, " channel 0 channel 1"); sbuf_printf(sb, "\nCPL requests: %10u %10u", stats.req[0], stats.req[1]); sbuf_printf(sb, "\nCPL responses: %10u %10u", stats.rsp[0], stats.rsp[1]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_ddp_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_usm_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_get_usm_stats(sc, &stats, 1); sbuf_printf(sb, "Frames: %u\n", stats.frames); sbuf_printf(sb, "Octets: %ju\n", stats.octets); sbuf_printf(sb, "Drops: %u", stats.drops); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static const char * const devlog_level_strings[] = { [FW_DEVLOG_LEVEL_EMERG] = "EMERG", [FW_DEVLOG_LEVEL_CRIT] = "CRIT", [FW_DEVLOG_LEVEL_ERR] = "ERR", [FW_DEVLOG_LEVEL_NOTICE] = "NOTICE", [FW_DEVLOG_LEVEL_INFO] = "INFO", [FW_DEVLOG_LEVEL_DEBUG] = "DEBUG" }; static const char * const devlog_facility_strings[] = { [FW_DEVLOG_FACILITY_CORE] = "CORE", [FW_DEVLOG_FACILITY_CF] = "CF", [FW_DEVLOG_FACILITY_SCHED] = "SCHED", [FW_DEVLOG_FACILITY_TIMER] = "TIMER", [FW_DEVLOG_FACILITY_RES] = "RES", [FW_DEVLOG_FACILITY_HW] = "HW", [FW_DEVLOG_FACILITY_FLR] = "FLR", [FW_DEVLOG_FACILITY_DMAQ] = "DMAQ", [FW_DEVLOG_FACILITY_PHY] = "PHY", [FW_DEVLOG_FACILITY_MAC] = "MAC", [FW_DEVLOG_FACILITY_PORT] = "PORT", [FW_DEVLOG_FACILITY_VI] = "VI", [FW_DEVLOG_FACILITY_FILTER] = "FILTER", [FW_DEVLOG_FACILITY_ACL] = "ACL", [FW_DEVLOG_FACILITY_TM] = "TM", [FW_DEVLOG_FACILITY_QFC] = "QFC", [FW_DEVLOG_FACILITY_DCB] = "DCB", [FW_DEVLOG_FACILITY_ETH] = "ETH", [FW_DEVLOG_FACILITY_OFLD] = "OFLD", [FW_DEVLOG_FACILITY_RI] = "RI", [FW_DEVLOG_FACILITY_ISCSI] = "ISCSI", [FW_DEVLOG_FACILITY_FCOE] = "FCOE", [FW_DEVLOG_FACILITY_FOISCSI] = "FOISCSI", [FW_DEVLOG_FACILITY_FOFCOE] = "FOFCOE", [FW_DEVLOG_FACILITY_CHNET] = "CHNET", }; static int sysctl_devlog(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct devlog_params *dparams = &sc->params.devlog; struct fw_devlog_e *buf, *e; int i, j, rc, nentries, first = 0; struct sbuf *sb; uint64_t ftstamp = UINT64_MAX; if (dparams->addr == 0) return (ENXIO); buf = malloc(dparams->size, M_CXGBE, M_NOWAIT); if (buf == NULL) return (ENOMEM); rc = read_via_memwin(sc, 1, dparams->addr, (void *)buf, dparams->size); if (rc != 0) goto done; nentries = dparams->size / sizeof(struct fw_devlog_e); for (i = 0; i < nentries; i++) { e = &buf[i]; if (e->timestamp == 0) break; /* end */ e->timestamp = be64toh(e->timestamp); e->seqno = be32toh(e->seqno); for (j = 0; j < 8; j++) e->params[j] = be32toh(e->params[j]); if (e->timestamp < ftstamp) { ftstamp = e->timestamp; first = i; } } if (buf[first].timestamp == 0) goto done; /* nothing in the log */ rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) goto done; sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) { rc = ENOMEM; goto done; } sbuf_printf(sb, "%10s %15s %8s %8s %s\n", "Seq#", "Tstamp", "Level", "Facility", "Message"); i = first; do { e = &buf[i]; if (e->timestamp == 0) break; /* end */ sbuf_printf(sb, "%10d %15ju %8s %8s ", e->seqno, e->timestamp, (e->level < nitems(devlog_level_strings) ? devlog_level_strings[e->level] : "UNKNOWN"), (e->facility < nitems(devlog_facility_strings) ? devlog_facility_strings[e->facility] : "UNKNOWN")); sbuf_printf(sb, e->fmt, e->params[0], e->params[1], e->params[2], e->params[3], e->params[4], e->params[5], e->params[6], e->params[7]); if (++i == nentries) i = 0; } while (i != first); rc = sbuf_finish(sb); sbuf_delete(sb); done: free(buf, M_CXGBE); return (rc); } static int sysctl_fcoe_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_fcoe_stats stats[MAX_NCHAN]; int i, nchan = sc->chip_params->nchan; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); for (i = 0; i < nchan; i++) t4_get_fcoe_stats(sc, i, &stats[i], 1); if (nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3"); sbuf_printf(sb, "\noctetsDDP: %16ju %16ju %16ju %16ju", stats[0].octets_ddp, stats[1].octets_ddp, stats[2].octets_ddp, stats[3].octets_ddp); sbuf_printf(sb, "\nframesDDP: %16u %16u %16u %16u", stats[0].frames_ddp, stats[1].frames_ddp, stats[2].frames_ddp, stats[3].frames_ddp); sbuf_printf(sb, "\nframesDrop: %16u %16u %16u %16u", stats[0].frames_drop, stats[1].frames_drop, stats[2].frames_drop, stats[3].frames_drop); } else { sbuf_printf(sb, " channel 0 channel 1"); sbuf_printf(sb, "\noctetsDDP: %16ju %16ju", stats[0].octets_ddp, stats[1].octets_ddp); sbuf_printf(sb, "\nframesDDP: %16u %16u", stats[0].frames_ddp, stats[1].frames_ddp); sbuf_printf(sb, "\nframesDrop: %16u %16u", stats[0].frames_drop, stats[1].frames_drop); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_hw_sched(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; unsigned int map, kbps, ipg, mode; unsigned int pace_tab[NTX_SCHED]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); map = t4_read_reg(sc, A_TP_TX_MOD_QUEUE_REQ_MAP); mode = G_TIMERMODE(t4_read_reg(sc, A_TP_MOD_CONFIG)); t4_read_pace_tbl(sc, pace_tab); sbuf_printf(sb, "Scheduler Mode Channel Rate (Kbps) " "Class IPG (0.1 ns) Flow IPG (us)"); for (i = 0; i < NTX_SCHED; ++i, map >>= 2) { t4_get_tx_sched(sc, i, &kbps, &ipg, 1); sbuf_printf(sb, "\n %u %-5s %u ", i, (mode & (1 << i)) ? "flow" : "class", map & 3); if (kbps) sbuf_printf(sb, "%9u ", kbps); else sbuf_printf(sb, " disabled "); if (ipg) sbuf_printf(sb, "%13u ", ipg); else sbuf_printf(sb, " disabled "); if (pace_tab[i]) sbuf_printf(sb, "%10u", pace_tab[i]); else sbuf_printf(sb, " disabled"); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_lb_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, j; uint64_t *p0, *p1; struct lb_port_stats s[2]; static const char *stat_name[] = { "OctetsOK:", "FramesOK:", "BcastFrames:", "McastFrames:", "UcastFrames:", "ErrorFrames:", "Frames64:", "Frames65To127:", "Frames128To255:", "Frames256To511:", "Frames512To1023:", "Frames1024To1518:", "Frames1519ToMax:", "FramesDropped:", "BG0FramesDropped:", "BG1FramesDropped:", "BG2FramesDropped:", "BG3FramesDropped:", "BG0FramesTrunc:", "BG1FramesTrunc:", "BG2FramesTrunc:", "BG3FramesTrunc:" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); memset(s, 0, sizeof(s)); for (i = 0; i < sc->chip_params->nchan; i += 2) { t4_get_lb_stats(sc, i, &s[0]); t4_get_lb_stats(sc, i + 1, &s[1]); p0 = &s[0].octets; p1 = &s[1].octets; sbuf_printf(sb, "%s Loopback %u" " Loopback %u", i == 0 ? "" : "\n", i, i + 1); for (j = 0; j < nitems(stat_name); j++) sbuf_printf(sb, "\n%-17s %20ju %20ju", stat_name[j], *p0++, *p1++); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_linkdnrc(SYSCTL_HANDLER_ARGS) { int rc = 0; struct port_info *pi = arg1; struct link_config *lc = &pi->link_cfg; struct sbuf *sb; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return(rc); sb = sbuf_new_for_sysctl(NULL, NULL, 64, req); if (sb == NULL) return (ENOMEM); if (lc->link_ok || lc->link_down_rc == 255) sbuf_printf(sb, "n/a"); else sbuf_printf(sb, "%s", t4_link_down_rc_str(lc->link_down_rc)); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } struct mem_desc { unsigned int base; unsigned int limit; unsigned int idx; }; static int mem_desc_cmp(const void *a, const void *b) { return ((const struct mem_desc *)a)->base - ((const struct mem_desc *)b)->base; } static void mem_region_show(struct sbuf *sb, const char *name, unsigned int from, unsigned int to) { unsigned int size; if (from == to) return; size = to - from + 1; if (size == 0) return; /* XXX: need humanize_number(3) in libkern for a more readable 'size' */ sbuf_printf(sb, "%-15s %#x-%#x [%u]\n", name, from, to, size); } static int sysctl_meminfo(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i, n; uint32_t lo, hi, used, alloc; static const char *memory[] = {"EDC0:", "EDC1:", "MC:", "MC0:", "MC1:"}; static const char *region[] = { "DBQ contexts:", "IMSG contexts:", "FLM cache:", "TCBs:", "Pstructs:", "Timers:", "Rx FL:", "Tx FL:", "Pstruct FL:", "Tx payload:", "Rx payload:", "LE hash:", "iSCSI region:", "TDDP region:", "TPT region:", "STAG region:", "RQ region:", "RQUDP region:", "PBL region:", "TXPBL region:", "DBVFIFO region:", "ULPRX state:", "ULPTX state:", "On-chip queues:" }; struct mem_desc avail[4]; struct mem_desc mem[nitems(region) + 3]; /* up to 3 holes */ struct mem_desc *md = mem; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); for (i = 0; i < nitems(mem); i++) { mem[i].limit = 0; mem[i].idx = i; } /* Find and sort the populated memory ranges */ i = 0; lo = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); if (lo & F_EDRAM0_ENABLE) { hi = t4_read_reg(sc, A_MA_EDRAM0_BAR); avail[i].base = G_EDRAM0_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EDRAM0_SIZE(hi) << 20); avail[i].idx = 0; i++; } if (lo & F_EDRAM1_ENABLE) { hi = t4_read_reg(sc, A_MA_EDRAM1_BAR); avail[i].base = G_EDRAM1_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EDRAM1_SIZE(hi) << 20); avail[i].idx = 1; i++; } if (lo & F_EXT_MEM_ENABLE) { hi = t4_read_reg(sc, A_MA_EXT_MEMORY_BAR); avail[i].base = G_EXT_MEM_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EXT_MEM_SIZE(hi) << 20); avail[i].idx = is_t5(sc) ? 3 : 2; /* Call it MC0 for T5 */ i++; } if (is_t5(sc) && lo & F_EXT_MEM1_ENABLE) { hi = t4_read_reg(sc, A_MA_EXT_MEMORY1_BAR); avail[i].base = G_EXT_MEM1_BASE(hi) << 20; avail[i].limit = avail[i].base + (G_EXT_MEM1_SIZE(hi) << 20); avail[i].idx = 4; i++; } if (!i) /* no memory available */ return 0; qsort(avail, i, sizeof(struct mem_desc), mem_desc_cmp); (md++)->base = t4_read_reg(sc, A_SGE_DBQ_CTXT_BADDR); (md++)->base = t4_read_reg(sc, A_SGE_IMSG_CTXT_BADDR); (md++)->base = t4_read_reg(sc, A_SGE_FLM_CACHE_BADDR); (md++)->base = t4_read_reg(sc, A_TP_CMM_TCB_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_TIMER_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_RX_FLST_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_TX_FLST_BASE); (md++)->base = t4_read_reg(sc, A_TP_CMM_MM_PS_FLST_BASE); /* the next few have explicit upper bounds */ md->base = t4_read_reg(sc, A_TP_PMM_TX_BASE); md->limit = md->base - 1 + t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE) * G_PMTXMAXPAGE(t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE)); md++; md->base = t4_read_reg(sc, A_TP_PMM_RX_BASE); md->limit = md->base - 1 + t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) * G_PMRXMAXPAGE(t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE)); md++; if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) { if (chip_id(sc) <= CHELSIO_T5) md->base = t4_read_reg(sc, A_LE_DB_HASH_TID_BASE); else md->base = t4_read_reg(sc, A_LE_DB_HASH_TBL_BASE_ADDR); md->limit = 0; } else { md->base = 0; md->idx = nitems(region); /* hide it */ } md++; #define ulp_region(reg) \ md->base = t4_read_reg(sc, A_ULP_ ## reg ## _LLIMIT);\ (md++)->limit = t4_read_reg(sc, A_ULP_ ## reg ## _ULIMIT) ulp_region(RX_ISCSI); ulp_region(RX_TDDP); ulp_region(TX_TPT); ulp_region(RX_STAG); ulp_region(RX_RQ); ulp_region(RX_RQUDP); ulp_region(RX_PBL); ulp_region(TX_PBL); #undef ulp_region md->base = 0; md->idx = nitems(region); if (!is_t4(sc)) { uint32_t size = 0; uint32_t sge_ctrl = t4_read_reg(sc, A_SGE_CONTROL2); uint32_t fifo_size = t4_read_reg(sc, A_SGE_DBVFIFO_SIZE); if (is_t5(sc)) { if (sge_ctrl & F_VFIFO_ENABLE) size = G_DBVFIFO_SIZE(fifo_size); } else size = G_T6_DBVFIFO_SIZE(fifo_size); if (size) { md->base = G_BASEADDR(t4_read_reg(sc, A_SGE_DBVFIFO_BADDR)); md->limit = md->base + (size << 2) - 1; } } md++; md->base = t4_read_reg(sc, A_ULP_RX_CTX_BASE); md->limit = 0; md++; md->base = t4_read_reg(sc, A_ULP_TX_ERR_TABLE_BASE); md->limit = 0; md++; md->base = sc->vres.ocq.start; if (sc->vres.ocq.size) md->limit = md->base + sc->vres.ocq.size - 1; else md->idx = nitems(region); /* hide it */ md++; /* add any address-space holes, there can be up to 3 */ for (n = 0; n < i - 1; n++) if (avail[n].limit < avail[n + 1].base) (md++)->base = avail[n].limit; if (avail[n].limit) (md++)->base = avail[n].limit; n = md - mem; qsort(mem, n, sizeof(struct mem_desc), mem_desc_cmp); for (lo = 0; lo < i; lo++) mem_region_show(sb, memory[avail[lo].idx], avail[lo].base, avail[lo].limit - 1); sbuf_printf(sb, "\n"); for (i = 0; i < n; i++) { if (mem[i].idx >= nitems(region)) continue; /* skip holes */ if (!mem[i].limit) mem[i].limit = i < n - 1 ? mem[i + 1].base - 1 : ~0; mem_region_show(sb, region[mem[i].idx], mem[i].base, mem[i].limit); } sbuf_printf(sb, "\n"); lo = t4_read_reg(sc, A_CIM_SDRAM_BASE_ADDR); hi = t4_read_reg(sc, A_CIM_SDRAM_ADDR_SIZE) + lo - 1; mem_region_show(sb, "uP RAM:", lo, hi); lo = t4_read_reg(sc, A_CIM_EXTMEM2_BASE_ADDR); hi = t4_read_reg(sc, A_CIM_EXTMEM2_ADDR_SIZE) + lo - 1; mem_region_show(sb, "uP Extmem2:", lo, hi); lo = t4_read_reg(sc, A_TP_PMM_RX_MAX_PAGE); sbuf_printf(sb, "\n%u Rx pages of size %uKiB for %u channels\n", G_PMRXMAXPAGE(lo), t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE) >> 10, (lo & F_PMRXNUMCHN) ? 2 : 1); lo = t4_read_reg(sc, A_TP_PMM_TX_MAX_PAGE); hi = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE); sbuf_printf(sb, "%u Tx pages of size %u%ciB for %u channels\n", G_PMTXMAXPAGE(lo), hi >= (1 << 20) ? (hi >> 20) : (hi >> 10), hi >= (1 << 20) ? 'M' : 'K', 1 << G_PMTXNUMCHN(lo)); sbuf_printf(sb, "%u p-structs\n", t4_read_reg(sc, A_TP_CMM_MM_MAX_PSTRUCT)); for (i = 0; i < 4; i++) { if (chip_id(sc) > CHELSIO_T5) lo = t4_read_reg(sc, A_MPS_RX_MAC_BG_PG_CNT0 + i * 4); else lo = t4_read_reg(sc, A_MPS_RX_PG_RSV0 + i * 4); if (is_t5(sc)) { used = G_T5_USED(lo); alloc = G_T5_ALLOC(lo); } else { used = G_USED(lo); alloc = G_ALLOC(lo); } /* For T6 these are MAC buffer groups */ sbuf_printf(sb, "\nPort %d using %u pages out of %u allocated", i, used, alloc); } for (i = 0; i < sc->chip_params->nchan; i++) { if (chip_id(sc) > CHELSIO_T5) lo = t4_read_reg(sc, A_MPS_RX_LPBK_BG_PG_CNT0 + i * 4); else lo = t4_read_reg(sc, A_MPS_RX_PG_RSV4 + i * 4); if (is_t5(sc)) { used = G_T5_USED(lo); alloc = G_T5_ALLOC(lo); } else { used = G_USED(lo); alloc = G_ALLOC(lo); } /* For T6 these are MAC buffer groups */ sbuf_printf(sb, "\nLoopback %d using %u pages out of %u allocated", i, used, alloc); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static inline void tcamxy2valmask(uint64_t x, uint64_t y, uint8_t *addr, uint64_t *mask) { *mask = x | y; y = htobe64(y); memcpy(addr, (char *)&y + 2, ETHER_ADDR_LEN); } static int sysctl_mps_tcam(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; MPASS(chip_id(sc) <= CHELSIO_T5); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "Idx Ethernet address Mask Vld Ports PF" " VF Replication P0 P1 P2 P3 ML"); for (i = 0; i < sc->chip_params->mps_tcam_size; i++) { uint64_t tcamx, tcamy, mask; uint32_t cls_lo, cls_hi; uint8_t addr[ETHER_ADDR_LEN]; tcamy = t4_read_reg64(sc, MPS_CLS_TCAM_Y_L(i)); tcamx = t4_read_reg64(sc, MPS_CLS_TCAM_X_L(i)); if (tcamx & tcamy) continue; tcamxy2valmask(tcamx, tcamy, addr, &mask); cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i)); cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i)); sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x %012jx" " %c %#x%4u%4d", i, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], (uintmax_t)mask, (cls_lo & F_SRAM_VLD) ? 'Y' : 'N', G_PORTMAP(cls_hi), G_PF(cls_lo), (cls_lo & F_VF_VALID) ? G_VF(cls_lo) : -1); if (cls_lo & F_REPLICATE) { struct fw_ldst_cmd ldst_cmd; memset(&ldst_cmd, 0, sizeof(ldst_cmd)); ldst_cmd.op_to_addrspace = htobe32(V_FW_CMD_OP(FW_LDST_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ | V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS)); ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd)); ldst_cmd.u.mps.rplc.fid_idx = htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) | V_FW_LDST_CMD_IDX(i)); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4mps"); if (rc) break; rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd, sizeof(ldst_cmd), &ldst_cmd); end_synchronized_op(sc, 0); if (rc != 0) { sbuf_printf(sb, "%36d", rc); rc = 0; } else { sbuf_printf(sb, " %08x %08x %08x %08x", be32toh(ldst_cmd.u.mps.rplc.rplc127_96), be32toh(ldst_cmd.u.mps.rplc.rplc95_64), be32toh(ldst_cmd.u.mps.rplc.rplc63_32), be32toh(ldst_cmd.u.mps.rplc.rplc31_0)); } } else sbuf_printf(sb, "%36s", ""); sbuf_printf(sb, "%4u%3u%3u%3u %#3x", G_SRAM_PRIO0(cls_lo), G_SRAM_PRIO1(cls_lo), G_SRAM_PRIO2(cls_lo), G_SRAM_PRIO3(cls_lo), (cls_lo >> S_MULTILISTEN0) & 0xf); } if (rc) (void) sbuf_finish(sb); else rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_mps_tcam_t6(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; MPASS(chip_id(sc) > CHELSIO_T5); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); sbuf_printf(sb, "Idx Ethernet address Mask VNI Mask" " IVLAN Vld DIP_Hit Lookup Port Vld Ports PF VF" " Replication" " P0 P1 P2 P3 ML\n"); for (i = 0; i < sc->chip_params->mps_tcam_size; i++) { uint8_t dip_hit, vlan_vld, lookup_type, port_num; uint16_t ivlan; uint64_t tcamx, tcamy, val, mask; uint32_t cls_lo, cls_hi, ctl, data2, vnix, vniy; uint8_t addr[ETHER_ADDR_LEN]; ctl = V_CTLREQID(1) | V_CTLCMDTYPE(0) | V_CTLXYBITSEL(0); if (i < 256) ctl |= V_CTLTCAMINDEX(i) | V_CTLTCAMSEL(0); else ctl |= V_CTLTCAMINDEX(i - 256) | V_CTLTCAMSEL(1); t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl); val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1); tcamy = G_DMACH(val) << 32; tcamy |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1); data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1); lookup_type = G_DATALKPTYPE(data2); port_num = G_DATAPORTNUM(data2); if (lookup_type && lookup_type != M_DATALKPTYPE) { /* Inner header VNI */ vniy = ((data2 & F_DATAVIDH2) << 23) | (G_DATAVIDH1(data2) << 16) | G_VIDL(val); dip_hit = data2 & F_DATADIPHIT; vlan_vld = 0; } else { vniy = 0; dip_hit = 0; vlan_vld = data2 & F_DATAVIDH2; ivlan = G_VIDL(val); } ctl |= V_CTLXYBITSEL(1); t4_write_reg(sc, A_MPS_CLS_TCAM_DATA2_CTL, ctl); val = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA1_REQ_ID1); tcamx = G_DMACH(val) << 32; tcamx |= t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA0_REQ_ID1); data2 = t4_read_reg(sc, A_MPS_CLS_TCAM_RDATA2_REQ_ID1); if (lookup_type && lookup_type != M_DATALKPTYPE) { /* Inner header VNI mask */ vnix = ((data2 & F_DATAVIDH2) << 23) | (G_DATAVIDH1(data2) << 16) | G_VIDL(val); } else vnix = 0; if (tcamx & tcamy) continue; tcamxy2valmask(tcamx, tcamy, addr, &mask); cls_lo = t4_read_reg(sc, MPS_CLS_SRAM_L(i)); cls_hi = t4_read_reg(sc, MPS_CLS_SRAM_H(i)); if (lookup_type && lookup_type != M_DATALKPTYPE) { sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x " "%012jx %06x %06x - - %3c" " 'I' %4x %3c %#x%4u%4d", i, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], (uintmax_t)mask, vniy, vnix, dip_hit ? 'Y' : 'N', port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N', G_PORTMAP(cls_hi), G_T6_PF(cls_lo), cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1); } else { sbuf_printf(sb, "\n%3u %02x:%02x:%02x:%02x:%02x:%02x " "%012jx - - ", i, addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], (uintmax_t)mask); if (vlan_vld) sbuf_printf(sb, "%4u Y ", ivlan); else sbuf_printf(sb, " - N "); sbuf_printf(sb, "- %3c %4x %3c %#x%4u%4d", lookup_type ? 'I' : 'O', port_num, cls_lo & F_T6_SRAM_VLD ? 'Y' : 'N', G_PORTMAP(cls_hi), G_T6_PF(cls_lo), cls_lo & F_T6_VF_VALID ? G_T6_VF(cls_lo) : -1); } if (cls_lo & F_T6_REPLICATE) { struct fw_ldst_cmd ldst_cmd; memset(&ldst_cmd, 0, sizeof(ldst_cmd)); ldst_cmd.op_to_addrspace = htobe32(V_FW_CMD_OP(FW_LDST_CMD) | F_FW_CMD_REQUEST | F_FW_CMD_READ | V_FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_MPS)); ldst_cmd.cycles_to_len16 = htobe32(FW_LEN16(ldst_cmd)); ldst_cmd.u.mps.rplc.fid_idx = htobe16(V_FW_LDST_CMD_FID(FW_LDST_MPS_RPLC) | V_FW_LDST_CMD_IDX(i)); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t6mps"); if (rc) break; rc = -t4_wr_mbox(sc, sc->mbox, &ldst_cmd, sizeof(ldst_cmd), &ldst_cmd); end_synchronized_op(sc, 0); if (rc != 0) { sbuf_printf(sb, "%72d", rc); rc = 0; } else { sbuf_printf(sb, " %08x %08x %08x %08x" " %08x %08x %08x %08x", be32toh(ldst_cmd.u.mps.rplc.rplc255_224), be32toh(ldst_cmd.u.mps.rplc.rplc223_192), be32toh(ldst_cmd.u.mps.rplc.rplc191_160), be32toh(ldst_cmd.u.mps.rplc.rplc159_128), be32toh(ldst_cmd.u.mps.rplc.rplc127_96), be32toh(ldst_cmd.u.mps.rplc.rplc95_64), be32toh(ldst_cmd.u.mps.rplc.rplc63_32), be32toh(ldst_cmd.u.mps.rplc.rplc31_0)); } } else sbuf_printf(sb, "%72s", ""); sbuf_printf(sb, "%4u%3u%3u%3u %#x", G_T6_SRAM_PRIO0(cls_lo), G_T6_SRAM_PRIO1(cls_lo), G_T6_SRAM_PRIO2(cls_lo), G_T6_SRAM_PRIO3(cls_lo), (cls_lo >> S_T6_MULTILISTEN0) & 0xf); } if (rc) (void) sbuf_finish(sb); else rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_path_mtus(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; uint16_t mtus[NMTUS]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_read_mtu_tbl(sc, mtus, NULL); sbuf_printf(sb, "%u %u %u %u %u %u %u %u %u %u %u %u %u %u %u %u", mtus[0], mtus[1], mtus[2], mtus[3], mtus[4], mtus[5], mtus[6], mtus[7], mtus[8], mtus[9], mtus[10], mtus[11], mtus[12], mtus[13], mtus[14], mtus[15]); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_pm_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, i; uint32_t tx_cnt[MAX_PM_NSTATS], rx_cnt[MAX_PM_NSTATS]; uint64_t tx_cyc[MAX_PM_NSTATS], rx_cyc[MAX_PM_NSTATS]; static const char *tx_stats[MAX_PM_NSTATS] = { "Read:", "Write bypass:", "Write mem:", "Bypass + mem:", "Tx FIFO wait", NULL, "Tx latency" }; static const char *rx_stats[MAX_PM_NSTATS] = { "Read:", "Write bypass:", "Write mem:", "Flush:", "Rx FIFO wait", NULL, "Rx latency" }; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_pmtx_get_stats(sc, tx_cnt, tx_cyc); t4_pmrx_get_stats(sc, rx_cnt, rx_cyc); sbuf_printf(sb, " Tx pcmds Tx bytes"); for (i = 0; i < 4; i++) { sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i], tx_cyc[i]); } sbuf_printf(sb, "\n Rx pcmds Rx bytes"); for (i = 0; i < 4; i++) { sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i], rx_cyc[i]); } if (chip_id(sc) > CHELSIO_T5) { sbuf_printf(sb, "\n Total wait Total occupancy"); sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i], tx_cyc[i]); sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i], rx_cyc[i]); i += 2; MPASS(i < nitems(tx_stats)); sbuf_printf(sb, "\n Reads Total wait"); sbuf_printf(sb, "\n%-13s %10u %20ju", tx_stats[i], tx_cnt[i], tx_cyc[i]); sbuf_printf(sb, "\n%-13s %10u %20ju", rx_stats[i], rx_cnt[i], rx_cyc[i]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_rdma_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_rdma_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_rdma_stats(sc, &stats, 0); mtx_unlock(&sc->reg_lock); sbuf_printf(sb, "NoRQEModDefferals: %u\n", stats.rqe_dfr_mod); sbuf_printf(sb, "NoRQEPktDefferals: %u", stats.rqe_dfr_pkt); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tcp_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_tcp_stats v4, v6; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_tcp_stats(sc, &v4, &v6, 0); mtx_unlock(&sc->reg_lock); sbuf_printf(sb, " IP IPv6\n"); sbuf_printf(sb, "OutRsts: %20u %20u\n", v4.tcp_out_rsts, v6.tcp_out_rsts); sbuf_printf(sb, "InSegs: %20ju %20ju\n", v4.tcp_in_segs, v6.tcp_in_segs); sbuf_printf(sb, "OutSegs: %20ju %20ju\n", v4.tcp_out_segs, v6.tcp_out_segs); sbuf_printf(sb, "RetransSegs: %20ju %20ju", v4.tcp_retrans_segs, v6.tcp_retrans_segs); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tids(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tid_info *t = &sc->tids; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); if (t->natids) { sbuf_printf(sb, "ATID range: 0-%u, in use: %u\n", t->natids - 1, t->atids_in_use); } if (t->ntids) { sbuf_printf(sb, "TID range: "); if (t4_read_reg(sc, A_LE_DB_CONFIG) & F_HASHEN) { uint32_t b, hb; if (chip_id(sc) <= CHELSIO_T5) { b = t4_read_reg(sc, A_LE_DB_SERVER_INDEX) / 4; hb = t4_read_reg(sc, A_LE_DB_TID_HASHBASE) / 4; } else { b = t4_read_reg(sc, A_LE_DB_SRVR_START_INDEX); hb = t4_read_reg(sc, A_T6_LE_DB_HASH_TID_BASE); } if (b) sbuf_printf(sb, "0-%u, ", b - 1); sbuf_printf(sb, "%u-%u", hb, t->ntids - 1); } else sbuf_printf(sb, "0-%u", t->ntids - 1); sbuf_printf(sb, ", in use: %u\n", atomic_load_acq_int(&t->tids_in_use)); } if (t->nstids) { sbuf_printf(sb, "STID range: %u-%u, in use: %u\n", t->stid_base, t->stid_base + t->nstids - 1, t->stids_in_use); } if (t->nftids) { sbuf_printf(sb, "FTID range: %u-%u\n", t->ftid_base, t->ftid_base + t->nftids - 1); } if (t->netids) { sbuf_printf(sb, "ETID range: %u-%u\n", t->etid_base, t->etid_base + t->netids - 1); } sbuf_printf(sb, "HW TID usage: %u IP users, %u IPv6 users", t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV4), t4_read_reg(sc, A_LE_DB_ACT_CNT_IPV6)); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tp_err_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; struct tp_err_stats stats; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); mtx_lock(&sc->reg_lock); t4_tp_get_err_stats(sc, &stats, 0); mtx_unlock(&sc->reg_lock); if (sc->chip_params->nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3\n"); sbuf_printf(sb, "macInErrs: %10u %10u %10u %10u\n", stats.mac_in_errs[0], stats.mac_in_errs[1], stats.mac_in_errs[2], stats.mac_in_errs[3]); sbuf_printf(sb, "hdrInErrs: %10u %10u %10u %10u\n", stats.hdr_in_errs[0], stats.hdr_in_errs[1], stats.hdr_in_errs[2], stats.hdr_in_errs[3]); sbuf_printf(sb, "tcpInErrs: %10u %10u %10u %10u\n", stats.tcp_in_errs[0], stats.tcp_in_errs[1], stats.tcp_in_errs[2], stats.tcp_in_errs[3]); sbuf_printf(sb, "tcp6InErrs: %10u %10u %10u %10u\n", stats.tcp6_in_errs[0], stats.tcp6_in_errs[1], stats.tcp6_in_errs[2], stats.tcp6_in_errs[3]); sbuf_printf(sb, "tnlCongDrops: %10u %10u %10u %10u\n", stats.tnl_cong_drops[0], stats.tnl_cong_drops[1], stats.tnl_cong_drops[2], stats.tnl_cong_drops[3]); sbuf_printf(sb, "tnlTxDrops: %10u %10u %10u %10u\n", stats.tnl_tx_drops[0], stats.tnl_tx_drops[1], stats.tnl_tx_drops[2], stats.tnl_tx_drops[3]); sbuf_printf(sb, "ofldVlanDrops: %10u %10u %10u %10u\n", stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1], stats.ofld_vlan_drops[2], stats.ofld_vlan_drops[3]); sbuf_printf(sb, "ofldChanDrops: %10u %10u %10u %10u\n\n", stats.ofld_chan_drops[0], stats.ofld_chan_drops[1], stats.ofld_chan_drops[2], stats.ofld_chan_drops[3]); } else { sbuf_printf(sb, " channel 0 channel 1\n"); sbuf_printf(sb, "macInErrs: %10u %10u\n", stats.mac_in_errs[0], stats.mac_in_errs[1]); sbuf_printf(sb, "hdrInErrs: %10u %10u\n", stats.hdr_in_errs[0], stats.hdr_in_errs[1]); sbuf_printf(sb, "tcpInErrs: %10u %10u\n", stats.tcp_in_errs[0], stats.tcp_in_errs[1]); sbuf_printf(sb, "tcp6InErrs: %10u %10u\n", stats.tcp6_in_errs[0], stats.tcp6_in_errs[1]); sbuf_printf(sb, "tnlCongDrops: %10u %10u\n", stats.tnl_cong_drops[0], stats.tnl_cong_drops[1]); sbuf_printf(sb, "tnlTxDrops: %10u %10u\n", stats.tnl_tx_drops[0], stats.tnl_tx_drops[1]); sbuf_printf(sb, "ofldVlanDrops: %10u %10u\n", stats.ofld_vlan_drops[0], stats.ofld_vlan_drops[1]); sbuf_printf(sb, "ofldChanDrops: %10u %10u\n\n", stats.ofld_chan_drops[0], stats.ofld_chan_drops[1]); } sbuf_printf(sb, "ofldNoNeigh: %u\nofldCongDefer: %u", stats.ofld_no_neigh, stats.ofld_cong_defer); rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tp_la_mask(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct tp_params *tpp = &sc->params.tp; u_int mask; int rc; mask = tpp->la_mask >> 16; rc = sysctl_handle_int(oidp, &mask, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (mask > 0xffff) return (EINVAL); tpp->la_mask = mask << 16; t4_set_reg_field(sc, A_TP_DBG_LA_CONFIG, 0xffff0000U, tpp->la_mask); return (0); } struct field_desc { const char *name; u_int start; u_int width; }; static void field_desc_show(struct sbuf *sb, uint64_t v, const struct field_desc *f) { char buf[32]; int line_size = 0; while (f->name) { uint64_t mask = (1ULL << f->width) - 1; int len = snprintf(buf, sizeof(buf), "%s: %ju", f->name, ((uintmax_t)v >> f->start) & mask); if (line_size + len >= 79) { line_size = 8; sbuf_printf(sb, "\n "); } sbuf_printf(sb, "%s ", buf); line_size += len + 1; f++; } sbuf_printf(sb, "\n"); } static const struct field_desc tp_la0[] = { { "RcfOpCodeOut", 60, 4 }, { "State", 56, 4 }, { "WcfState", 52, 4 }, { "RcfOpcSrcOut", 50, 2 }, { "CRxError", 49, 1 }, { "ERxError", 48, 1 }, { "SanityFailed", 47, 1 }, { "SpuriousMsg", 46, 1 }, { "FlushInputMsg", 45, 1 }, { "FlushInputCpl", 44, 1 }, { "RssUpBit", 43, 1 }, { "RssFilterHit", 42, 1 }, { "Tid", 32, 10 }, { "InitTcb", 31, 1 }, { "LineNumber", 24, 7 }, { "Emsg", 23, 1 }, { "EdataOut", 22, 1 }, { "Cmsg", 21, 1 }, { "CdataOut", 20, 1 }, { "EreadPdu", 19, 1 }, { "CreadPdu", 18, 1 }, { "TunnelPkt", 17, 1 }, { "RcfPeerFin", 16, 1 }, { "RcfReasonOut", 12, 4 }, { "TxCchannel", 10, 2 }, { "RcfTxChannel", 8, 2 }, { "RxEchannel", 6, 2 }, { "RcfRxChannel", 5, 1 }, { "RcfDataOutSrdy", 4, 1 }, { "RxDvld", 3, 1 }, { "RxOoDvld", 2, 1 }, { "RxCongestion", 1, 1 }, { "TxCongestion", 0, 1 }, { NULL } }; static const struct field_desc tp_la1[] = { { "CplCmdIn", 56, 8 }, { "CplCmdOut", 48, 8 }, { "ESynOut", 47, 1 }, { "EAckOut", 46, 1 }, { "EFinOut", 45, 1 }, { "ERstOut", 44, 1 }, { "SynIn", 43, 1 }, { "AckIn", 42, 1 }, { "FinIn", 41, 1 }, { "RstIn", 40, 1 }, { "DataIn", 39, 1 }, { "DataInVld", 38, 1 }, { "PadIn", 37, 1 }, { "RxBufEmpty", 36, 1 }, { "RxDdp", 35, 1 }, { "RxFbCongestion", 34, 1 }, { "TxFbCongestion", 33, 1 }, { "TxPktSumSrdy", 32, 1 }, { "RcfUlpType", 28, 4 }, { "Eread", 27, 1 }, { "Ebypass", 26, 1 }, { "Esave", 25, 1 }, { "Static0", 24, 1 }, { "Cread", 23, 1 }, { "Cbypass", 22, 1 }, { "Csave", 21, 1 }, { "CPktOut", 20, 1 }, { "RxPagePoolFull", 18, 2 }, { "RxLpbkPkt", 17, 1 }, { "TxLpbkPkt", 16, 1 }, { "RxVfValid", 15, 1 }, { "SynLearned", 14, 1 }, { "SetDelEntry", 13, 1 }, { "SetInvEntry", 12, 1 }, { "CpcmdDvld", 11, 1 }, { "CpcmdSave", 10, 1 }, { "RxPstructsFull", 8, 2 }, { "EpcmdDvld", 7, 1 }, { "EpcmdFlush", 6, 1 }, { "EpcmdTrimPrefix", 5, 1 }, { "EpcmdTrimPostfix", 4, 1 }, { "ERssIp4Pkt", 3, 1 }, { "ERssIp6Pkt", 2, 1 }, { "ERssTcpUdpPkt", 1, 1 }, { "ERssFceFipPkt", 0, 1 }, { NULL } }; static const struct field_desc tp_la2[] = { { "CplCmdIn", 56, 8 }, { "MpsVfVld", 55, 1 }, { "MpsPf", 52, 3 }, { "MpsVf", 44, 8 }, { "SynIn", 43, 1 }, { "AckIn", 42, 1 }, { "FinIn", 41, 1 }, { "RstIn", 40, 1 }, { "DataIn", 39, 1 }, { "DataInVld", 38, 1 }, { "PadIn", 37, 1 }, { "RxBufEmpty", 36, 1 }, { "RxDdp", 35, 1 }, { "RxFbCongestion", 34, 1 }, { "TxFbCongestion", 33, 1 }, { "TxPktSumSrdy", 32, 1 }, { "RcfUlpType", 28, 4 }, { "Eread", 27, 1 }, { "Ebypass", 26, 1 }, { "Esave", 25, 1 }, { "Static0", 24, 1 }, { "Cread", 23, 1 }, { "Cbypass", 22, 1 }, { "Csave", 21, 1 }, { "CPktOut", 20, 1 }, { "RxPagePoolFull", 18, 2 }, { "RxLpbkPkt", 17, 1 }, { "TxLpbkPkt", 16, 1 }, { "RxVfValid", 15, 1 }, { "SynLearned", 14, 1 }, { "SetDelEntry", 13, 1 }, { "SetInvEntry", 12, 1 }, { "CpcmdDvld", 11, 1 }, { "CpcmdSave", 10, 1 }, { "RxPstructsFull", 8, 2 }, { "EpcmdDvld", 7, 1 }, { "EpcmdFlush", 6, 1 }, { "EpcmdTrimPrefix", 5, 1 }, { "EpcmdTrimPostfix", 4, 1 }, { "ERssIp4Pkt", 3, 1 }, { "ERssIp6Pkt", 2, 1 }, { "ERssTcpUdpPkt", 1, 1 }, { "ERssFceFipPkt", 0, 1 }, { NULL } }; static void tp_la_show(struct sbuf *sb, uint64_t *p, int idx) { field_desc_show(sb, *p, tp_la0); } static void tp_la_show2(struct sbuf *sb, uint64_t *p, int idx) { if (idx) sbuf_printf(sb, "\n"); field_desc_show(sb, p[0], tp_la0); if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL) field_desc_show(sb, p[1], tp_la0); } static void tp_la_show3(struct sbuf *sb, uint64_t *p, int idx) { if (idx) sbuf_printf(sb, "\n"); field_desc_show(sb, p[0], tp_la0); if (idx < (TPLA_SIZE / 2 - 1) || p[1] != ~0ULL) field_desc_show(sb, p[1], (p[0] & (1 << 17)) ? tp_la2 : tp_la1); } static int sysctl_tp_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; uint64_t *buf, *p; int rc; u_int i, inc; void (*show_func)(struct sbuf *, uint64_t *, int); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(TPLA_SIZE * sizeof(uint64_t), M_CXGBE, M_ZERO | M_WAITOK); t4_tp_read_la(sc, buf, NULL); p = buf; switch (G_DBGLAMODE(t4_read_reg(sc, A_TP_DBG_LA_CONFIG))) { case 2: inc = 2; show_func = tp_la_show2; break; case 3: inc = 2; show_func = tp_la_show3; break; default: inc = 1; show_func = tp_la_show; } for (i = 0; i < TPLA_SIZE / inc; i++, p += inc) (*show_func)(sb, p, i); rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_tx_rate(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc; u64 nrate[MAX_NCHAN], orate[MAX_NCHAN]; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); if (sb == NULL) return (ENOMEM); t4_get_chan_txrate(sc, nrate, orate); if (sc->chip_params->nchan > 2) { sbuf_printf(sb, " channel 0 channel 1" " channel 2 channel 3\n"); sbuf_printf(sb, "NIC B/s: %10ju %10ju %10ju %10ju\n", nrate[0], nrate[1], nrate[2], nrate[3]); sbuf_printf(sb, "Offload B/s: %10ju %10ju %10ju %10ju", orate[0], orate[1], orate[2], orate[3]); } else { sbuf_printf(sb, " channel 0 channel 1\n"); sbuf_printf(sb, "NIC B/s: %10ju %10ju\n", nrate[0], nrate[1]); sbuf_printf(sb, "Offload B/s: %10ju %10ju", orate[0], orate[1]); } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_ulprx_la(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; uint32_t *buf, *p; int rc, i; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); buf = malloc(ULPRX_LA_SIZE * 8 * sizeof(uint32_t), M_CXGBE, M_ZERO | M_WAITOK); t4_ulprx_read_la(sc, buf); p = buf; sbuf_printf(sb, " Pcmd Type Message" " Data"); for (i = 0; i < ULPRX_LA_SIZE; i++, p += 8) { sbuf_printf(sb, "\n%08x%08x %4x %08x %08x%08x%08x%08x", p[1], p[0], p[2], p[3], p[7], p[6], p[5], p[4]); } rc = sbuf_finish(sb); sbuf_delete(sb); free(buf, M_CXGBE); return (rc); } static int sysctl_wcwr_stats(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct sbuf *sb; int rc, v; MPASS(chip_id(sc) >= CHELSIO_T5); rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); v = t4_read_reg(sc, A_SGE_STAT_CFG); if (G_STATSOURCE_T5(v) == 7) { int mode; mode = is_t5(sc) ? G_STATMODE(v) : G_T6_STATMODE(v); if (mode == 0) { sbuf_printf(sb, "total %d, incomplete %d", t4_read_reg(sc, A_SGE_STAT_TOTAL), t4_read_reg(sc, A_SGE_STAT_MATCH)); } else if (mode == 1) { sbuf_printf(sb, "total %d, data overflow %d", t4_read_reg(sc, A_SGE_STAT_TOTAL), t4_read_reg(sc, A_SGE_STAT_MATCH)); } else { sbuf_printf(sb, "unknown mode %d", mode); } } rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } static int sysctl_tc_params(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; struct tx_cl_rl_params tc; struct sbuf *sb; int i, rc, port_id, mbps, gbps; rc = sysctl_wire_old_buffer(req, 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 4096, req); if (sb == NULL) return (ENOMEM); port_id = arg2 >> 16; MPASS(port_id < sc->params.nports); MPASS(sc->port[port_id] != NULL); i = arg2 & 0xffff; MPASS(i < sc->chip_params->nsched_cls); mtx_lock(&sc->tc_lock); tc = sc->port[port_id]->sched_params->cl_rl[i]; mtx_unlock(&sc->tc_lock); if (tc.flags & TX_CLRL_ERROR) { sbuf_printf(sb, "error"); goto done; } if (tc.ratemode == SCHED_CLASS_RATEMODE_REL) { /* XXX: top speed or actual link speed? */ gbps = port_top_speed(sc->port[port_id]); sbuf_printf(sb, " %u%% of %uGbps", tc.maxrate, gbps); } else if (tc.ratemode == SCHED_CLASS_RATEMODE_ABS) { switch (tc.rateunit) { case SCHED_CLASS_RATEUNIT_BITS: mbps = tc.maxrate / 1000; gbps = tc.maxrate / 1000000; if (tc.maxrate == gbps * 1000000) sbuf_printf(sb, " %uGbps", gbps); else if (tc.maxrate == mbps * 1000) sbuf_printf(sb, " %uMbps", mbps); else sbuf_printf(sb, " %uKbps", tc.maxrate); break; case SCHED_CLASS_RATEUNIT_PKTS: sbuf_printf(sb, " %upps", tc.maxrate); break; default: rc = ENXIO; goto done; } } switch (tc.mode) { case SCHED_CLASS_MODE_CLASS: sbuf_printf(sb, " aggregate"); break; case SCHED_CLASS_MODE_FLOW: sbuf_printf(sb, " per-flow"); break; default: rc = ENXIO; goto done; } done: if (rc == 0) rc = sbuf_finish(sb); sbuf_delete(sb); return (rc); } #endif #ifdef TCP_OFFLOAD static void unit_conv(char *buf, size_t len, u_int val, u_int factor) { u_int rem = val % factor; if (rem == 0) snprintf(buf, len, "%u", val / factor); else { while (rem % 10 == 0) rem /= 10; snprintf(buf, len, "%u.%u", val / factor, rem); } } static int sysctl_tp_tick(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; char buf[16]; u_int res, re; u_int cclk_ps = 1000000000 / sc->params.vpd.cclk; res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION); switch (arg2) { case 0: /* timer_tick */ re = G_TIMERRESOLUTION(res); break; case 1: /* TCP timestamp tick */ re = G_TIMESTAMPRESOLUTION(res); break; case 2: /* DACK tick */ re = G_DELAYEDACKRESOLUTION(res); break; default: return (EDOOFUS); } unit_conv(buf, sizeof(buf), (cclk_ps << re), 1000000); return (sysctl_handle_string(oidp, buf, sizeof(buf), req)); } static int sysctl_tp_dack_timer(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; u_int res, dack_re, v; u_int cclk_ps = 1000000000 / sc->params.vpd.cclk; res = t4_read_reg(sc, A_TP_TIMER_RESOLUTION); dack_re = G_DELAYEDACKRESOLUTION(res); v = ((cclk_ps << dack_re) / 1000000) * t4_read_reg(sc, A_TP_DACK_TIMER); return (sysctl_handle_int(oidp, &v, 0, req)); } static int sysctl_tp_timer(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int reg = arg2; u_int tre; u_long tp_tick_us, v; u_int cclk_ps = 1000000000 / sc->params.vpd.cclk; MPASS(reg == A_TP_RXT_MIN || reg == A_TP_RXT_MAX || reg == A_TP_PERS_MIN || reg == A_TP_PERS_MAX || reg == A_TP_KEEP_IDLE || reg == A_TP_KEEP_INTVL || reg == A_TP_INIT_SRTT || reg == A_TP_FINWAIT2_TIMER); tre = G_TIMERRESOLUTION(t4_read_reg(sc, A_TP_TIMER_RESOLUTION)); tp_tick_us = (cclk_ps << tre) / 1000000; if (reg == A_TP_INIT_SRTT) v = tp_tick_us * G_INITSRTT(t4_read_reg(sc, reg)); else v = tp_tick_us * t4_read_reg(sc, reg); return (sysctl_handle_long(oidp, &v, 0, req)); } /* * All fields in TP_SHIFT_CNT are 4b and the starting location of the field is * passed to this function. */ static int sysctl_tp_shift_cnt(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int idx = arg2; u_int v; MPASS(idx >= 0 && idx <= 24); v = (t4_read_reg(sc, A_TP_SHIFT_CNT) >> idx) & 0xf; return (sysctl_handle_int(oidp, &v, 0, req)); } static int sysctl_tp_backoff(SYSCTL_HANDLER_ARGS) { struct adapter *sc = arg1; int idx = arg2; u_int shift, v, r; MPASS(idx >= 0 && idx < 16); r = A_TP_TCP_BACKOFF_REG0 + (idx & ~3); shift = (idx & 3) << 3; v = (t4_read_reg(sc, r) >> shift) & M_TIMERBACKOFFINDEX0; return (sysctl_handle_int(oidp, &v, 0, req)); } static int sysctl_holdoff_tmr_idx_ofld(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int idx, rc, i; struct sge_ofld_rxq *ofld_rxq; uint8_t v; idx = vi->ofld_tmr_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < 0 || idx >= SGE_NTIMERS) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4otmr"); if (rc) return (rc); v = V_QINTR_TIMER_IDX(idx) | V_QINTR_CNT_EN(vi->ofld_pktc_idx != -1); for_each_ofld_rxq(vi, i, ofld_rxq) { #ifdef atomic_store_rel_8 atomic_store_rel_8(&ofld_rxq->iq.intr_params, v); #else ofld_rxq->iq.intr_params = v; #endif } vi->ofld_tmr_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (0); } static int sysctl_holdoff_pktc_idx_ofld(SYSCTL_HANDLER_ARGS) { struct vi_info *vi = arg1; struct adapter *sc = vi->pi->adapter; int idx, rc; idx = vi->ofld_pktc_idx; rc = sysctl_handle_int(oidp, &idx, 0, req); if (rc != 0 || req->newptr == NULL) return (rc); if (idx < -1 || idx >= SGE_NCOUNTERS) return (EINVAL); rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4opktc"); if (rc) return (rc); if (vi->flags & VI_INIT_DONE) rc = EBUSY; /* cannot be changed once the queues are created */ else vi->ofld_pktc_idx = idx; end_synchronized_op(sc, LOCK_HELD); return (rc); } #endif static uint32_t fconf_iconf_to_mode(uint32_t fconf, uint32_t iconf) { uint32_t mode; mode = T4_FILTER_IPv4 | T4_FILTER_IPv6 | T4_FILTER_IP_SADDR | T4_FILTER_IP_DADDR | T4_FILTER_IP_SPORT | T4_FILTER_IP_DPORT; if (fconf & F_FRAGMENTATION) mode |= T4_FILTER_IP_FRAGMENT; if (fconf & F_MPSHITTYPE) mode |= T4_FILTER_MPS_HIT_TYPE; if (fconf & F_MACMATCH) mode |= T4_FILTER_MAC_IDX; if (fconf & F_ETHERTYPE) mode |= T4_FILTER_ETH_TYPE; if (fconf & F_PROTOCOL) mode |= T4_FILTER_IP_PROTO; if (fconf & F_TOS) mode |= T4_FILTER_IP_TOS; if (fconf & F_VLAN) mode |= T4_FILTER_VLAN; if (fconf & F_VNIC_ID) { mode |= T4_FILTER_VNIC; if (iconf & F_VNIC) mode |= T4_FILTER_IC_VNIC; } if (fconf & F_PORT) mode |= T4_FILTER_PORT; if (fconf & F_FCOE) mode |= T4_FILTER_FCoE; return (mode); } static uint32_t mode_to_fconf(uint32_t mode) { uint32_t fconf = 0; if (mode & T4_FILTER_IP_FRAGMENT) fconf |= F_FRAGMENTATION; if (mode & T4_FILTER_MPS_HIT_TYPE) fconf |= F_MPSHITTYPE; if (mode & T4_FILTER_MAC_IDX) fconf |= F_MACMATCH; if (mode & T4_FILTER_ETH_TYPE) fconf |= F_ETHERTYPE; if (mode & T4_FILTER_IP_PROTO) fconf |= F_PROTOCOL; if (mode & T4_FILTER_IP_TOS) fconf |= F_TOS; if (mode & T4_FILTER_VLAN) fconf |= F_VLAN; if (mode & T4_FILTER_VNIC) fconf |= F_VNIC_ID; if (mode & T4_FILTER_PORT) fconf |= F_PORT; if (mode & T4_FILTER_FCoE) fconf |= F_FCOE; return (fconf); } static uint32_t mode_to_iconf(uint32_t mode) { if (mode & T4_FILTER_IC_VNIC) return (F_VNIC); return (0); } static int check_fspec_against_fconf_iconf(struct adapter *sc, struct t4_filter_specification *fs) { struct tp_params *tpp = &sc->params.tp; uint32_t fconf = 0; if (fs->val.frag || fs->mask.frag) fconf |= F_FRAGMENTATION; if (fs->val.matchtype || fs->mask.matchtype) fconf |= F_MPSHITTYPE; if (fs->val.macidx || fs->mask.macidx) fconf |= F_MACMATCH; if (fs->val.ethtype || fs->mask.ethtype) fconf |= F_ETHERTYPE; if (fs->val.proto || fs->mask.proto) fconf |= F_PROTOCOL; if (fs->val.tos || fs->mask.tos) fconf |= F_TOS; if (fs->val.vlan_vld || fs->mask.vlan_vld) fconf |= F_VLAN; if (fs->val.ovlan_vld || fs->mask.ovlan_vld) { fconf |= F_VNIC_ID; if (tpp->ingress_config & F_VNIC) return (EINVAL); } if (fs->val.pfvf_vld || fs->mask.pfvf_vld) { fconf |= F_VNIC_ID; if ((tpp->ingress_config & F_VNIC) == 0) return (EINVAL); } if (fs->val.iport || fs->mask.iport) fconf |= F_PORT; if (fs->val.fcoe || fs->mask.fcoe) fconf |= F_FCOE; if ((tpp->vlan_pri_map | fconf) != tpp->vlan_pri_map) return (E2BIG); return (0); } static int get_filter_mode(struct adapter *sc, uint32_t *mode) { struct tp_params *tpp = &sc->params.tp; /* * We trust the cached values of the relevant TP registers. This means * things work reliably only if writes to those registers are always via * t4_set_filter_mode. */ *mode = fconf_iconf_to_mode(tpp->vlan_pri_map, tpp->ingress_config); return (0); } static int set_filter_mode(struct adapter *sc, uint32_t mode) { struct tp_params *tpp = &sc->params.tp; uint32_t fconf, iconf; int rc; iconf = mode_to_iconf(mode); if ((iconf ^ tpp->ingress_config) & F_VNIC) { /* * For now we just complain if A_TP_INGRESS_CONFIG is not * already set to the correct value for the requested filter * mode. It's not clear if it's safe to write to this register * on the fly. (And we trust the cached value of the register). */ return (EBUSY); } fconf = mode_to_fconf(mode); rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4setfm"); if (rc) return (rc); if (sc->tids.ftids_in_use > 0) { rc = EBUSY; goto done; } #ifdef TCP_OFFLOAD if (uld_active(sc, ULD_TOM)) { rc = EBUSY; goto done; } #endif rc = -t4_set_filter_mode(sc, fconf, true); done: end_synchronized_op(sc, LOCK_HELD); return (rc); } static inline uint64_t get_filter_hits(struct adapter *sc, uint32_t fid) { uint32_t tcb_addr; tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE) + (fid + sc->tids.ftid_base) * TCB_SIZE; if (is_t4(sc)) { uint64_t hits; read_via_memwin(sc, 0, tcb_addr + 16, (uint32_t *)&hits, 8); return (be64toh(hits)); } else { uint32_t hits; read_via_memwin(sc, 0, tcb_addr + 24, &hits, 4); return (be32toh(hits)); } } static int get_filter(struct adapter *sc, struct t4_filter *t) { int i, rc, nfilters = sc->tids.nftids; struct filter_entry *f; rc = begin_synchronized_op(sc, NULL, HOLD_LOCK | SLEEP_OK | INTR_OK, "t4getf"); if (rc) return (rc); if (sc->tids.ftids_in_use == 0 || sc->tids.ftid_tab == NULL || t->idx >= nfilters) { t->idx = 0xffffffff; goto done; } f = &sc->tids.ftid_tab[t->idx]; for (i = t->idx; i < nfilters; i++, f++) { if (f->valid) { t->idx = i; t->l2tidx = f->l2t ? f->l2t->idx : 0; t->smtidx = f->smtidx; if (f->fs.hitcnts) t->hits = get_filter_hits(sc, t->idx); else t->hits = UINT64_MAX; t->fs = f->fs; goto done; } } t->idx = 0xffffffff; done: end_synchronized_op(sc, LOCK_HELD); return (0); } static int set_filter(struct adapter *sc, struct t4_filter *t) { unsigned int nfilters, nports; struct filter_entry *f; int i, rc; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4setf"); if (rc) return (rc); nfilters = sc->tids.nftids; nports = sc->params.nports; if (nfilters == 0) { rc = ENOTSUP; goto done; } if (t->idx >= nfilters) { rc = EINVAL; goto done; } /* Validate against the global filter mode and ingress config */ rc = check_fspec_against_fconf_iconf(sc, &t->fs); if (rc != 0) goto done; if (t->fs.action == FILTER_SWITCH && t->fs.eport >= nports) { rc = EINVAL; goto done; } if (t->fs.val.iport >= nports) { rc = EINVAL; goto done; } /* Can't specify an iq if not steering to it */ if (!t->fs.dirsteer && t->fs.iq) { rc = EINVAL; goto done; } /* IPv6 filter idx must be 4 aligned */ if (t->fs.type == 1 && ((t->idx & 0x3) || t->idx + 4 >= nfilters)) { rc = EINVAL; goto done; } if (!(sc->flags & FULL_INIT_DONE) && ((rc = adapter_full_init(sc)) != 0)) goto done; if (sc->tids.ftid_tab == NULL) { KASSERT(sc->tids.ftids_in_use == 0, ("%s: no memory allocated but filters_in_use > 0", __func__)); sc->tids.ftid_tab = malloc(sizeof (struct filter_entry) * nfilters, M_CXGBE, M_NOWAIT | M_ZERO); if (sc->tids.ftid_tab == NULL) { rc = ENOMEM; goto done; } mtx_init(&sc->tids.ftid_lock, "T4 filters", 0, MTX_DEF); } for (i = 0; i < 4; i++) { f = &sc->tids.ftid_tab[t->idx + i]; if (f->pending || f->valid) { rc = EBUSY; goto done; } if (f->locked) { rc = EPERM; goto done; } if (t->fs.type == 0) break; } f = &sc->tids.ftid_tab[t->idx]; f->fs = t->fs; rc = set_filter_wr(sc, t->idx); done: end_synchronized_op(sc, 0); if (rc == 0) { mtx_lock(&sc->tids.ftid_lock); for (;;) { if (f->pending == 0) { rc = f->valid ? 0 : EIO; break; } if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock, PCATCH, "t4setfw", 0)) { rc = EINPROGRESS; break; } } mtx_unlock(&sc->tids.ftid_lock); } return (rc); } static int del_filter(struct adapter *sc, struct t4_filter *t) { unsigned int nfilters; struct filter_entry *f; int rc; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4delf"); if (rc) return (rc); nfilters = sc->tids.nftids; if (nfilters == 0) { rc = ENOTSUP; goto done; } if (sc->tids.ftid_tab == NULL || sc->tids.ftids_in_use == 0 || t->idx >= nfilters) { rc = EINVAL; goto done; } if (!(sc->flags & FULL_INIT_DONE)) { rc = EAGAIN; goto done; } f = &sc->tids.ftid_tab[t->idx]; if (f->pending) { rc = EBUSY; goto done; } if (f->locked) { rc = EPERM; goto done; } if (f->valid) { t->fs = f->fs; /* extra info for the caller */ rc = del_filter_wr(sc, t->idx); } done: end_synchronized_op(sc, 0); if (rc == 0) { mtx_lock(&sc->tids.ftid_lock); for (;;) { if (f->pending == 0) { rc = f->valid ? EIO : 0; break; } if (mtx_sleep(&sc->tids.ftid_tab, &sc->tids.ftid_lock, PCATCH, "t4delfw", 0)) { rc = EINPROGRESS; break; } } mtx_unlock(&sc->tids.ftid_lock); } return (rc); } static void clear_filter(struct filter_entry *f) { if (f->l2t) t4_l2t_release(f->l2t); bzero(f, sizeof (*f)); } static int set_filter_wr(struct adapter *sc, int fidx) { struct filter_entry *f = &sc->tids.ftid_tab[fidx]; struct fw_filter_wr *fwr; unsigned int ftid, vnic_vld, vnic_vld_mask; struct wrq_cookie cookie; ASSERT_SYNCHRONIZED_OP(sc); if (f->fs.newdmac || f->fs.newvlan) { /* This filter needs an L2T entry; allocate one. */ f->l2t = t4_l2t_alloc_switching(sc->l2t); if (f->l2t == NULL) return (EAGAIN); if (t4_l2t_set_switching(sc, f->l2t, f->fs.vlan, f->fs.eport, f->fs.dmac)) { t4_l2t_release(f->l2t); f->l2t = NULL; return (ENOMEM); } } /* Already validated against fconf, iconf */ MPASS((f->fs.val.pfvf_vld & f->fs.val.ovlan_vld) == 0); MPASS((f->fs.mask.pfvf_vld & f->fs.mask.ovlan_vld) == 0); if (f->fs.val.pfvf_vld || f->fs.val.ovlan_vld) vnic_vld = 1; else vnic_vld = 0; if (f->fs.mask.pfvf_vld || f->fs.mask.ovlan_vld) vnic_vld_mask = 1; else vnic_vld_mask = 0; ftid = sc->tids.ftid_base + fidx; fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie); if (fwr == NULL) return (ENOMEM); bzero(fwr, sizeof(*fwr)); fwr->op_pkd = htobe32(V_FW_WR_OP(FW_FILTER_WR)); fwr->len16_pkd = htobe32(FW_LEN16(*fwr)); fwr->tid_to_iq = htobe32(V_FW_FILTER_WR_TID(ftid) | V_FW_FILTER_WR_RQTYPE(f->fs.type) | V_FW_FILTER_WR_NOREPLY(0) | V_FW_FILTER_WR_IQ(f->fs.iq)); fwr->del_filter_to_l2tix = htobe32(V_FW_FILTER_WR_RPTTID(f->fs.rpttid) | V_FW_FILTER_WR_DROP(f->fs.action == FILTER_DROP) | V_FW_FILTER_WR_DIRSTEER(f->fs.dirsteer) | V_FW_FILTER_WR_MASKHASH(f->fs.maskhash) | V_FW_FILTER_WR_DIRSTEERHASH(f->fs.dirsteerhash) | V_FW_FILTER_WR_LPBK(f->fs.action == FILTER_SWITCH) | V_FW_FILTER_WR_DMAC(f->fs.newdmac) | V_FW_FILTER_WR_SMAC(f->fs.newsmac) | V_FW_FILTER_WR_INSVLAN(f->fs.newvlan == VLAN_INSERT || f->fs.newvlan == VLAN_REWRITE) | V_FW_FILTER_WR_RMVLAN(f->fs.newvlan == VLAN_REMOVE || f->fs.newvlan == VLAN_REWRITE) | V_FW_FILTER_WR_HITCNTS(f->fs.hitcnts) | V_FW_FILTER_WR_TXCHAN(f->fs.eport) | V_FW_FILTER_WR_PRIO(f->fs.prio) | V_FW_FILTER_WR_L2TIX(f->l2t ? f->l2t->idx : 0)); fwr->ethtype = htobe16(f->fs.val.ethtype); fwr->ethtypem = htobe16(f->fs.mask.ethtype); fwr->frag_to_ovlan_vldm = (V_FW_FILTER_WR_FRAG(f->fs.val.frag) | V_FW_FILTER_WR_FRAGM(f->fs.mask.frag) | V_FW_FILTER_WR_IVLAN_VLD(f->fs.val.vlan_vld) | V_FW_FILTER_WR_OVLAN_VLD(vnic_vld) | V_FW_FILTER_WR_IVLAN_VLDM(f->fs.mask.vlan_vld) | V_FW_FILTER_WR_OVLAN_VLDM(vnic_vld_mask)); fwr->smac_sel = 0; fwr->rx_chan_rx_rpl_iq = htobe16(V_FW_FILTER_WR_RX_CHAN(0) | V_FW_FILTER_WR_RX_RPL_IQ(sc->sge.fwq.abs_id)); fwr->maci_to_matchtypem = htobe32(V_FW_FILTER_WR_MACI(f->fs.val.macidx) | V_FW_FILTER_WR_MACIM(f->fs.mask.macidx) | V_FW_FILTER_WR_FCOE(f->fs.val.fcoe) | V_FW_FILTER_WR_FCOEM(f->fs.mask.fcoe) | V_FW_FILTER_WR_PORT(f->fs.val.iport) | V_FW_FILTER_WR_PORTM(f->fs.mask.iport) | V_FW_FILTER_WR_MATCHTYPE(f->fs.val.matchtype) | V_FW_FILTER_WR_MATCHTYPEM(f->fs.mask.matchtype)); fwr->ptcl = f->fs.val.proto; fwr->ptclm = f->fs.mask.proto; fwr->ttyp = f->fs.val.tos; fwr->ttypm = f->fs.mask.tos; fwr->ivlan = htobe16(f->fs.val.vlan); fwr->ivlanm = htobe16(f->fs.mask.vlan); fwr->ovlan = htobe16(f->fs.val.vnic); fwr->ovlanm = htobe16(f->fs.mask.vnic); bcopy(f->fs.val.dip, fwr->lip, sizeof (fwr->lip)); bcopy(f->fs.mask.dip, fwr->lipm, sizeof (fwr->lipm)); bcopy(f->fs.val.sip, fwr->fip, sizeof (fwr->fip)); bcopy(f->fs.mask.sip, fwr->fipm, sizeof (fwr->fipm)); fwr->lp = htobe16(f->fs.val.dport); fwr->lpm = htobe16(f->fs.mask.dport); fwr->fp = htobe16(f->fs.val.sport); fwr->fpm = htobe16(f->fs.mask.sport); if (f->fs.newsmac) bcopy(f->fs.smac, fwr->sma, sizeof (fwr->sma)); f->pending = 1; sc->tids.ftids_in_use++; commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie); return (0); } static int del_filter_wr(struct adapter *sc, int fidx) { struct filter_entry *f = &sc->tids.ftid_tab[fidx]; struct fw_filter_wr *fwr; unsigned int ftid; struct wrq_cookie cookie; ftid = sc->tids.ftid_base + fidx; fwr = start_wrq_wr(&sc->sge.mgmtq, howmany(sizeof(*fwr), 16), &cookie); if (fwr == NULL) return (ENOMEM); bzero(fwr, sizeof (*fwr)); t4_mk_filtdelwr(ftid, fwr, sc->sge.fwq.abs_id); f->pending = 1; commit_wrq_wr(&sc->sge.mgmtq, fwr, &cookie); return (0); } int t4_filter_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { struct adapter *sc = iq->adapter; const struct cpl_set_tcb_rpl *rpl = (const void *)(rss + 1); unsigned int idx = GET_TID(rpl); unsigned int rc; struct filter_entry *f; KASSERT(m == NULL, ("%s: payload with opcode %02x", __func__, rss->opcode)); MPASS(iq == &sc->sge.fwq); MPASS(is_ftid(sc, idx)); idx -= sc->tids.ftid_base; f = &sc->tids.ftid_tab[idx]; rc = G_COOKIE(rpl->cookie); mtx_lock(&sc->tids.ftid_lock); if (rc == FW_FILTER_WR_FLT_ADDED) { KASSERT(f->pending, ("%s: filter[%u] isn't pending.", __func__, idx)); f->smtidx = (be64toh(rpl->oldval) >> 24) & 0xff; f->pending = 0; /* asynchronous setup completed */ f->valid = 1; } else { if (rc != FW_FILTER_WR_FLT_DELETED) { /* Add or delete failed, display an error */ log(LOG_ERR, "filter %u setup failed with error %u\n", idx, rc); } clear_filter(f); sc->tids.ftids_in_use--; } wakeup(&sc->tids.ftid_tab); mtx_unlock(&sc->tids.ftid_lock); return (0); } static int set_tcb_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { MPASS(iq->set_tcb_rpl != NULL); return (iq->set_tcb_rpl(iq, rss, m)); } static int l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) { MPASS(iq->l2t_write_rpl != NULL); return (iq->l2t_write_rpl(iq, rss, m)); } static int get_sge_context(struct adapter *sc, struct t4_sge_context *cntxt) { int rc; if (cntxt->cid > M_CTXTQID) return (EINVAL); if (cntxt->mem_id != CTXT_EGRESS && cntxt->mem_id != CTXT_INGRESS && cntxt->mem_id != CTXT_FLM && cntxt->mem_id != CTXT_CNM) return (EINVAL); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ctxt"); if (rc) return (rc); if (sc->flags & FW_OK) { rc = -t4_sge_ctxt_rd(sc, sc->mbox, cntxt->cid, cntxt->mem_id, &cntxt->data[0]); if (rc == 0) goto done; } /* * Read via firmware failed or wasn't even attempted. Read directly via * the backdoor. */ rc = -t4_sge_ctxt_rd_bd(sc, cntxt->cid, cntxt->mem_id, &cntxt->data[0]); done: end_synchronized_op(sc, 0); return (rc); } static int load_fw(struct adapter *sc, struct t4_data *fw) { int rc; uint8_t *fw_data; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldfw"); if (rc) return (rc); /* * The firmware, with the sole exception of the memory parity error * handler, runs from memory and not flash. It is almost always safe to * install a new firmware on a running system. Just set bit 1 in * hw.cxgbe.dflags or dev...dflags first. */ if (sc->flags & FULL_INIT_DONE && (sc->debug_flags & DF_LOAD_FW_ANYTIME) == 0) { rc = EBUSY; goto done; } fw_data = malloc(fw->len, M_CXGBE, M_WAITOK); if (fw_data == NULL) { rc = ENOMEM; goto done; } rc = copyin(fw->data, fw_data, fw->len); if (rc == 0) rc = -t4_load_fw(sc, fw_data, fw->len); free(fw_data, M_CXGBE); done: end_synchronized_op(sc, 0); return (rc); } static int load_cfg(struct adapter *sc, struct t4_data *cfg) { int rc; uint8_t *cfg_data = NULL; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf"); if (rc) return (rc); if (cfg->len == 0) { /* clear */ rc = -t4_load_cfg(sc, NULL, 0); goto done; } cfg_data = malloc(cfg->len, M_CXGBE, M_WAITOK); if (cfg_data == NULL) { rc = ENOMEM; goto done; } rc = copyin(cfg->data, cfg_data, cfg->len); if (rc == 0) rc = -t4_load_cfg(sc, cfg_data, cfg->len); free(cfg_data, M_CXGBE); done: end_synchronized_op(sc, 0); return (rc); } static int load_boot(struct adapter *sc, struct t4_bootrom *br) { int rc; uint8_t *br_data = NULL; u_int offset; if (br->len > 1024 * 1024) return (EFBIG); if (br->pf_offset == 0) { /* pfidx */ if (br->pfidx_addr > 7) return (EINVAL); offset = G_OFFSET(t4_read_reg(sc, PF_REG(br->pfidx_addr, A_PCIE_PF_EXPROM_OFST))); } else if (br->pf_offset == 1) { /* offset */ offset = G_OFFSET(br->pfidx_addr); } else { return (EINVAL); } rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldbr"); if (rc) return (rc); if (br->len == 0) { /* clear */ rc = -t4_load_boot(sc, NULL, offset, 0); goto done; } br_data = malloc(br->len, M_CXGBE, M_WAITOK); if (br_data == NULL) { rc = ENOMEM; goto done; } rc = copyin(br->data, br_data, br->len); if (rc == 0) rc = -t4_load_boot(sc, br_data, offset, br->len); free(br_data, M_CXGBE); done: end_synchronized_op(sc, 0); return (rc); } static int load_bootcfg(struct adapter *sc, struct t4_data *bc) { int rc; uint8_t *bc_data = NULL; rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4ldcf"); if (rc) return (rc); if (bc->len == 0) { /* clear */ rc = -t4_load_bootcfg(sc, NULL, 0); goto done; } bc_data = malloc(bc->len, M_CXGBE, M_WAITOK); if (bc_data == NULL) { rc = ENOMEM; goto done; } rc = copyin(bc->data, bc_data, bc->len); if (rc == 0) rc = -t4_load_bootcfg(sc, bc_data, bc->len); free(bc_data, M_CXGBE); done: end_synchronized_op(sc, 0); return (rc); } static int cudbg_dump(struct adapter *sc, struct t4_cudbg_dump *dump) { int rc; struct cudbg_init *cudbg; void *handle, *buf; /* buf is large, don't block if no memory is available */ buf = malloc(dump->len, M_CXGBE, M_NOWAIT | M_ZERO); if (buf == NULL) return (ENOMEM); handle = cudbg_alloc_handle(); if (handle == NULL) { rc = ENOMEM; goto done; } cudbg = cudbg_get_init(handle); cudbg->adap = sc; cudbg->print = (cudbg_print_cb)printf; #ifndef notyet device_printf(sc->dev, "%s: wr_flash %u, len %u, data %p.\n", __func__, dump->wr_flash, dump->len, dump->data); #endif if (dump->wr_flash) cudbg->use_flash = 1; MPASS(sizeof(cudbg->dbg_bitmap) == sizeof(dump->bitmap)); memcpy(cudbg->dbg_bitmap, dump->bitmap, sizeof(cudbg->dbg_bitmap)); rc = cudbg_collect(handle, buf, &dump->len); if (rc != 0) goto done; rc = copyout(buf, dump->data, dump->len); done: cudbg_free_handle(handle); free(buf, M_CXGBE); return (rc); } #define MAX_READ_BUF_SIZE (128 * 1024) static int read_card_mem(struct adapter *sc, int win, struct t4_mem_range *mr) { uint32_t addr, remaining, n; uint32_t *buf; int rc; uint8_t *dst; rc = validate_mem_range(sc, mr->addr, mr->len); if (rc != 0) return (rc); buf = malloc(min(mr->len, MAX_READ_BUF_SIZE), M_CXGBE, M_WAITOK); addr = mr->addr; remaining = mr->len; dst = (void *)mr->data; while (remaining) { n = min(remaining, MAX_READ_BUF_SIZE); read_via_memwin(sc, 2, addr, buf, n); rc = copyout(buf, dst, n); if (rc != 0) break; dst += n; remaining -= n; addr += n; } free(buf, M_CXGBE); return (rc); } #undef MAX_READ_BUF_SIZE static int read_i2c(struct adapter *sc, struct t4_i2c_data *i2cd) { int rc; if (i2cd->len == 0 || i2cd->port_id >= sc->params.nports) return (EINVAL); if (i2cd->len > sizeof(i2cd->data)) return (EFBIG); rc = begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4i2crd"); if (rc) return (rc); rc = -t4_i2c_rd(sc, sc->mbox, i2cd->port_id, i2cd->dev_addr, i2cd->offset, i2cd->len, &i2cd->data[0]); end_synchronized_op(sc, 0); return (rc); } int t4_os_find_pci_capability(struct adapter *sc, int cap) { int i; return (pci_find_cap(sc->dev, cap, &i) == 0 ? i : 0); } int t4_os_pci_save_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_save(dev, dinfo, 0); return (0); } int t4_os_pci_restore_state(struct adapter *sc) { device_t dev; struct pci_devinfo *dinfo; dev = sc->dev; dinfo = device_get_ivars(dev); pci_cfg_restore(dev, dinfo); return (0); } void t4_os_portmod_changed(struct port_info *pi) { struct adapter *sc = pi->adapter; struct vi_info *vi; struct ifnet *ifp; static const char *mod_str[] = { NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM" }; PORT_LOCK(pi); build_medialist(pi, &pi->media); PORT_UNLOCK(pi); vi = &pi->vi[0]; if (begin_synchronized_op(sc, vi, HOLD_LOCK, "t4mod") == 0) { init_l1cfg(pi); end_synchronized_op(sc, LOCK_HELD); } ifp = vi->ifp; if (pi->mod_type == FW_PORT_MOD_TYPE_NONE) if_printf(ifp, "transceiver unplugged.\n"); else if (pi->mod_type == FW_PORT_MOD_TYPE_UNKNOWN) if_printf(ifp, "unknown transceiver inserted.\n"); else if (pi->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED) if_printf(ifp, "unsupported transceiver inserted.\n"); else if (pi->mod_type > 0 && pi->mod_type < nitems(mod_str)) { if_printf(ifp, "%dGbps %s transceiver inserted.\n", port_top_speed(pi), mod_str[pi->mod_type]); } else { if_printf(ifp, "transceiver (type %d) inserted.\n", pi->mod_type); } } void t4_os_link_changed(struct port_info *pi) { struct vi_info *vi; struct ifnet *ifp; struct link_config *lc; int v; for_each_vi(pi, v, vi) { ifp = vi->ifp; if (ifp == NULL) continue; lc = &pi->link_cfg; if (lc->link_ok) { ifp->if_baudrate = IF_Mbps(lc->speed); if_link_state_change(ifp, LINK_STATE_UP); } else { if_link_state_change(ifp, LINK_STATE_DOWN); } } } void t4_iterate(void (*func)(struct adapter *, void *), void *arg) { struct adapter *sc; sx_slock(&t4_list_lock); SLIST_FOREACH(sc, &t4_list, link) { /* * func should not make any assumptions about what state sc is * in - the only guarantee is that sc->sc_lock is a valid lock. */ func(sc, arg); } sx_sunlock(&t4_list_lock); } static int t4_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag, struct thread *td) { int rc; struct adapter *sc = dev->si_drv1; rc = priv_check(td, PRIV_DRIVER); if (rc != 0) return (rc); switch (cmd) { case CHELSIO_T4_GETREG: { struct t4_reg *edata = (struct t4_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); if (edata->size == 4) edata->val = t4_read_reg(sc, edata->addr); else if (edata->size == 8) edata->val = t4_read_reg64(sc, edata->addr); else return (EINVAL); break; } case CHELSIO_T4_SETREG: { struct t4_reg *edata = (struct t4_reg *)data; if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len) return (EFAULT); if (edata->size == 4) { if (edata->val & 0xffffffff00000000) return (EINVAL); t4_write_reg(sc, edata->addr, (uint32_t) edata->val); } else if (edata->size == 8) t4_write_reg64(sc, edata->addr, edata->val); else return (EINVAL); break; } case CHELSIO_T4_REGDUMP: { struct t4_regdump *regs = (struct t4_regdump *)data; int reglen = t4_get_regs_len(sc); uint8_t *buf; if (regs->len < reglen) { regs->len = reglen; /* hint to the caller */ return (ENOBUFS); } regs->len = reglen; buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO); get_regs(sc, regs, buf); rc = copyout(buf, regs->data, reglen); free(buf, M_CXGBE); break; } case CHELSIO_T4_GET_FILTER_MODE: rc = get_filter_mode(sc, (uint32_t *)data); break; case CHELSIO_T4_SET_FILTER_MODE: rc = set_filter_mode(sc, *(uint32_t *)data); break; case CHELSIO_T4_GET_FILTER: rc = get_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_SET_FILTER: rc = set_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_DEL_FILTER: rc = del_filter(sc, (struct t4_filter *)data); break; case CHELSIO_T4_GET_SGE_CONTEXT: rc = get_sge_context(sc, (struct t4_sge_context *)data); break; case CHELSIO_T4_LOAD_FW: rc = load_fw(sc, (struct t4_data *)data); break; case CHELSIO_T4_GET_MEM: rc = read_card_mem(sc, 2, (struct t4_mem_range *)data); break; case CHELSIO_T4_GET_I2C: rc = read_i2c(sc, (struct t4_i2c_data *)data); break; case CHELSIO_T4_CLEAR_STATS: { int i, v; u_int port_id = *(uint32_t *)data; struct port_info *pi; struct vi_info *vi; if (port_id >= sc->params.nports) return (EINVAL); pi = sc->port[port_id]; if (pi == NULL) return (EIO); /* MAC stats */ t4_clr_port_stats(sc, pi->tx_chan); pi->tx_parse_error = 0; mtx_lock(&sc->reg_lock); for_each_vi(pi, v, vi) { if (vi->flags & VI_INIT_DONE) t4_clr_vi_stats(sc, vi->viid); } mtx_unlock(&sc->reg_lock); /* * Since this command accepts a port, clear stats for * all VIs on this port. */ for_each_vi(pi, v, vi) { if (vi->flags & VI_INIT_DONE) { struct sge_rxq *rxq; struct sge_txq *txq; struct sge_wrq *wrq; for_each_rxq(vi, i, rxq) { #if defined(INET) || defined(INET6) rxq->lro.lro_queued = 0; rxq->lro.lro_flushed = 0; #endif rxq->rxcsum = 0; rxq->vlan_extraction = 0; } for_each_txq(vi, i, txq) { txq->txcsum = 0; txq->tso_wrs = 0; txq->vlan_insertion = 0; txq->imm_wrs = 0; txq->sgl_wrs = 0; txq->txpkt_wrs = 0; txq->txpkts0_wrs = 0; txq->txpkts1_wrs = 0; txq->txpkts0_pkts = 0; txq->txpkts1_pkts = 0; mp_ring_reset_stats(txq->r); } #ifdef TCP_OFFLOAD /* nothing to clear for each ofld_rxq */ for_each_ofld_txq(vi, i, wrq) { wrq->tx_wrs_direct = 0; wrq->tx_wrs_copied = 0; } #endif if (IS_MAIN_VI(vi)) { wrq = &sc->sge.ctrlq[pi->port_id]; wrq->tx_wrs_direct = 0; wrq->tx_wrs_copied = 0; } } } break; } case CHELSIO_T4_SCHED_CLASS: rc = t4_set_sched_class(sc, (struct t4_sched_params *)data); break; case CHELSIO_T4_SCHED_QUEUE: rc = t4_set_sched_queue(sc, (struct t4_sched_queue *)data); break; case CHELSIO_T4_GET_TRACER: rc = t4_get_tracer(sc, (struct t4_tracer *)data); break; case CHELSIO_T4_SET_TRACER: rc = t4_set_tracer(sc, (struct t4_tracer *)data); break; case CHELSIO_T4_LOAD_CFG: rc = load_cfg(sc, (struct t4_data *)data); break; case CHELSIO_T4_LOAD_BOOT: rc = load_boot(sc, (struct t4_bootrom *)data); break; case CHELSIO_T4_LOAD_BOOTCFG: rc = load_bootcfg(sc, (struct t4_data *)data); break; case CHELSIO_T4_CUDBG_DUMP: rc = cudbg_dump(sc, (struct t4_cudbg_dump *)data); break; default: rc = ENOTTY; } return (rc); } void t4_db_full(struct adapter *sc) { CXGBE_UNIMPLEMENTED(__func__); } void t4_db_dropped(struct adapter *sc) { CXGBE_UNIMPLEMENTED(__func__); } #ifdef TCP_OFFLOAD void t4_iscsi_init(struct adapter *sc, u_int tag_mask, const u_int *pgsz_order) { t4_write_reg(sc, A_ULP_RX_ISCSI_TAGMASK, tag_mask); t4_write_reg(sc, A_ULP_RX_ISCSI_PSZ, V_HPZ0(pgsz_order[0]) | V_HPZ1(pgsz_order[1]) | V_HPZ2(pgsz_order[2]) | V_HPZ3(pgsz_order[3])); } static int toe_capability(struct vi_info *vi, int enable) { int rc; struct port_info *pi = vi->pi; struct adapter *sc = pi->adapter; ASSERT_SYNCHRONIZED_OP(sc); if (!is_offload(sc)) return (ENODEV); if (enable) { if ((vi->ifp->if_capenable & IFCAP_TOE) != 0) { /* TOE is already enabled. */ return (0); } /* * We need the port's queues around so that we're able to send * and receive CPLs to/from the TOE even if the ifnet for this * port has never been UP'd administratively. */ if (!(vi->flags & VI_INIT_DONE)) { rc = vi_full_init(vi); if (rc) return (rc); } if (!(pi->vi[0].flags & VI_INIT_DONE)) { rc = vi_full_init(&pi->vi[0]); if (rc) return (rc); } if (isset(&sc->offload_map, pi->port_id)) { /* TOE is enabled on another VI of this port. */ pi->uld_vis++; return (0); } if (!uld_active(sc, ULD_TOM)) { rc = t4_activate_uld(sc, ULD_TOM); if (rc == EAGAIN) { log(LOG_WARNING, "You must kldload t4_tom.ko before trying " "to enable TOE on a cxgbe interface.\n"); } if (rc != 0) return (rc); KASSERT(sc->tom_softc != NULL, ("%s: TOM activated but softc NULL", __func__)); KASSERT(uld_active(sc, ULD_TOM), ("%s: TOM activated but flag not set", __func__)); } /* Activate iWARP and iSCSI too, if the modules are loaded. */ if (!uld_active(sc, ULD_IWARP)) (void) t4_activate_uld(sc, ULD_IWARP); if (!uld_active(sc, ULD_ISCSI)) (void) t4_activate_uld(sc, ULD_ISCSI); pi->uld_vis++; setbit(&sc->offload_map, pi->port_id); } else { pi->uld_vis--; if (!isset(&sc->offload_map, pi->port_id) || pi->uld_vis > 0) return (0); KASSERT(uld_active(sc, ULD_TOM), ("%s: TOM never initialized?", __func__)); clrbit(&sc->offload_map, pi->port_id); } return (0); } /* * Add an upper layer driver to the global list. */ int t4_register_uld(struct uld_info *ui) { int rc = 0; struct uld_info *u; sx_xlock(&t4_uld_list_lock); SLIST_FOREACH(u, &t4_uld_list, link) { if (u->uld_id == ui->uld_id) { rc = EEXIST; goto done; } } SLIST_INSERT_HEAD(&t4_uld_list, ui, link); ui->refcount = 0; done: sx_xunlock(&t4_uld_list_lock); return (rc); } int t4_unregister_uld(struct uld_info *ui) { int rc = EINVAL; struct uld_info *u; sx_xlock(&t4_uld_list_lock); SLIST_FOREACH(u, &t4_uld_list, link) { if (u == ui) { if (ui->refcount > 0) { rc = EBUSY; goto done; } SLIST_REMOVE(&t4_uld_list, ui, uld_info, link); rc = 0; goto done; } } done: sx_xunlock(&t4_uld_list_lock); return (rc); } int t4_activate_uld(struct adapter *sc, int id) { int rc; struct uld_info *ui; ASSERT_SYNCHRONIZED_OP(sc); if (id < 0 || id > ULD_MAX) return (EINVAL); rc = EAGAIN; /* kldoad the module with this ULD and try again. */ sx_slock(&t4_uld_list_lock); SLIST_FOREACH(ui, &t4_uld_list, link) { if (ui->uld_id == id) { if (!(sc->flags & FULL_INIT_DONE)) { rc = adapter_full_init(sc); if (rc != 0) break; } rc = ui->activate(sc); if (rc == 0) { setbit(&sc->active_ulds, id); ui->refcount++; } break; } } sx_sunlock(&t4_uld_list_lock); return (rc); } int t4_deactivate_uld(struct adapter *sc, int id) { int rc; struct uld_info *ui; ASSERT_SYNCHRONIZED_OP(sc); if (id < 0 || id > ULD_MAX) return (EINVAL); rc = ENXIO; sx_slock(&t4_uld_list_lock); SLIST_FOREACH(ui, &t4_uld_list, link) { if (ui->uld_id == id) { rc = ui->deactivate(sc); if (rc == 0) { clrbit(&sc->active_ulds, id); ui->refcount--; } break; } } sx_sunlock(&t4_uld_list_lock); return (rc); } int uld_active(struct adapter *sc, int uld_id) { MPASS(uld_id >= 0 && uld_id <= ULD_MAX); return (isset(&sc->active_ulds, uld_id)); } #endif /* * t = ptr to tunable. * nc = number of CPUs. * c = compiled in default for that tunable. */ static void calculate_nqueues(int *t, int nc, const int c) { int nq; if (*t > 0) return; nq = *t < 0 ? -*t : c; *t = min(nc, nq); } /* * Come up with reasonable defaults for some of the tunables, provided they're * not set by the user (in which case we'll use the values as is). */ static void tweak_tunables(void) { int nc = mp_ncpus; /* our snapshot of the number of CPUs */ if (t4_ntxq < 1) { #ifdef RSS t4_ntxq = rss_getnumbuckets(); #else calculate_nqueues(&t4_ntxq, nc, NTXQ); #endif } calculate_nqueues(&t4_ntxq_vi, nc, NTXQ_VI); if (t4_nrxq < 1) { #ifdef RSS t4_nrxq = rss_getnumbuckets(); #else calculate_nqueues(&t4_nrxq, nc, NRXQ); #endif } calculate_nqueues(&t4_nrxq_vi, nc, NRXQ_VI); #ifdef TCP_OFFLOAD calculate_nqueues(&t4_nofldtxq, nc, NOFLDTXQ); calculate_nqueues(&t4_nofldtxq_vi, nc, NOFLDTXQ_VI); calculate_nqueues(&t4_nofldrxq, nc, NOFLDRXQ); calculate_nqueues(&t4_nofldrxq_vi, nc, NOFLDRXQ_VI); if (t4_toecaps_allowed == -1) t4_toecaps_allowed = FW_CAPS_CONFIG_TOE; if (t4_rdmacaps_allowed == -1) { t4_rdmacaps_allowed = FW_CAPS_CONFIG_RDMA_RDDP | FW_CAPS_CONFIG_RDMA_RDMAC; } if (t4_iscsicaps_allowed == -1) { t4_iscsicaps_allowed = FW_CAPS_CONFIG_ISCSI_INITIATOR_PDU | FW_CAPS_CONFIG_ISCSI_TARGET_PDU | FW_CAPS_CONFIG_ISCSI_T10DIF; } if (t4_tmr_idx_ofld < 0 || t4_tmr_idx_ofld >= SGE_NTIMERS) t4_tmr_idx_ofld = TMR_IDX_OFLD; if (t4_pktc_idx_ofld < -1 || t4_pktc_idx_ofld >= SGE_NCOUNTERS) t4_pktc_idx_ofld = PKTC_IDX_OFLD; #else if (t4_toecaps_allowed == -1) t4_toecaps_allowed = 0; if (t4_rdmacaps_allowed == -1) t4_rdmacaps_allowed = 0; if (t4_iscsicaps_allowed == -1) t4_iscsicaps_allowed = 0; #endif #ifdef DEV_NETMAP calculate_nqueues(&t4_nnmtxq_vi, nc, NNMTXQ_VI); calculate_nqueues(&t4_nnmrxq_vi, nc, NNMRXQ_VI); #endif if (t4_tmr_idx < 0 || t4_tmr_idx >= SGE_NTIMERS) t4_tmr_idx = TMR_IDX; if (t4_pktc_idx < -1 || t4_pktc_idx >= SGE_NCOUNTERS) t4_pktc_idx = PKTC_IDX; if (t4_qsize_txq < 128) t4_qsize_txq = 128; if (t4_qsize_rxq < 128) t4_qsize_rxq = 128; while (t4_qsize_rxq & 7) t4_qsize_rxq++; t4_intr_types &= INTR_MSIX | INTR_MSI | INTR_INTX; /* * Number of VIs to create per-port. The first VI is the "main" regular * VI for the port. The rest are additional virtual interfaces on the * same physical port. Note that the main VI does not have native * netmap support but the extra VIs do. * * Limit the number of VIs per port to the number of available * MAC addresses per port. */ if (t4_num_vis < 1) t4_num_vis = 1; if (t4_num_vis > nitems(vi_mac_funcs)) { t4_num_vis = nitems(vi_mac_funcs); printf("cxgbe: number of VIs limited to %d\n", t4_num_vis); } if (pcie_relaxed_ordering < 0 || pcie_relaxed_ordering > 2) { pcie_relaxed_ordering = 1; #if defined(__i386__) || defined(__amd64__) if (cpu_vendor_id == CPU_VENDOR_INTEL) pcie_relaxed_ordering = 0; #endif } } #ifdef DDB static void t4_dump_tcb(struct adapter *sc, int tid) { uint32_t base, i, j, off, pf, reg, save, tcb_addr, win_pos; reg = PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2); save = t4_read_reg(sc, reg); base = sc->memwin[2].mw_base; /* Dump TCB for the tid */ tcb_addr = t4_read_reg(sc, A_TP_CMM_TCB_BASE); tcb_addr += tid * TCB_SIZE; if (is_t4(sc)) { pf = 0; win_pos = tcb_addr & ~0xf; /* start must be 16B aligned */ } else { pf = V_PFNUM(sc->pf); win_pos = tcb_addr & ~0x7f; /* start must be 128B aligned */ } t4_write_reg(sc, reg, win_pos | pf); t4_read_reg(sc, reg); off = tcb_addr - win_pos; for (i = 0; i < 4; i++) { uint32_t buf[8]; for (j = 0; j < 8; j++, off += 4) buf[j] = htonl(t4_read_reg(sc, base + off)); db_printf("%08x %08x %08x %08x %08x %08x %08x %08x\n", buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); } t4_write_reg(sc, reg, save); t4_read_reg(sc, reg); } static void t4_dump_devlog(struct adapter *sc) { struct devlog_params *dparams = &sc->params.devlog; struct fw_devlog_e e; int i, first, j, m, nentries, rc; uint64_t ftstamp = UINT64_MAX; if (dparams->start == 0) { db_printf("devlog params not valid\n"); return; } nentries = dparams->size / sizeof(struct fw_devlog_e); m = fwmtype_to_hwmtype(dparams->memtype); /* Find the first entry. */ first = -1; for (i = 0; i < nentries && !db_pager_quit; i++) { rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e), sizeof(e), (void *)&e); if (rc != 0) break; if (e.timestamp == 0) break; e.timestamp = be64toh(e.timestamp); if (e.timestamp < ftstamp) { ftstamp = e.timestamp; first = i; } } if (first == -1) return; i = first; do { rc = -t4_mem_read(sc, m, dparams->start + i * sizeof(e), sizeof(e), (void *)&e); if (rc != 0) return; if (e.timestamp == 0) return; e.timestamp = be64toh(e.timestamp); e.seqno = be32toh(e.seqno); for (j = 0; j < 8; j++) e.params[j] = be32toh(e.params[j]); db_printf("%10d %15ju %8s %8s ", e.seqno, e.timestamp, (e.level < nitems(devlog_level_strings) ? devlog_level_strings[e.level] : "UNKNOWN"), (e.facility < nitems(devlog_facility_strings) ? devlog_facility_strings[e.facility] : "UNKNOWN")); db_printf(e.fmt, e.params[0], e.params[1], e.params[2], e.params[3], e.params[4], e.params[5], e.params[6], e.params[7]); if (++i == nentries) i = 0; } while (i != first && !db_pager_quit); } static struct command_table db_t4_table = LIST_HEAD_INITIALIZER(db_t4_table); _DB_SET(_show, t4, NULL, db_show_table, 0, &db_t4_table); DB_FUNC(devlog, db_show_devlog, db_t4_table, CS_OWN, NULL) { device_t dev; int t; bool valid; valid = false; t = db_read_token(); if (t == tIDENT) { dev = device_lookup_by_name(db_tok_string); valid = true; } db_skip_to_eol(); if (!valid) { db_printf("usage: show t4 devlog \n"); return; } if (dev == NULL) { db_printf("device not found\n"); return; } t4_dump_devlog(device_get_softc(dev)); } DB_FUNC(tcb, db_show_t4tcb, db_t4_table, CS_OWN, NULL) { device_t dev; int radix, tid, t; bool valid; valid = false; radix = db_radix; db_radix = 10; t = db_read_token(); if (t == tIDENT) { dev = device_lookup_by_name(db_tok_string); t = db_read_token(); if (t == tNUMBER) { tid = db_tok_number; valid = true; } } db_radix = radix; db_skip_to_eol(); if (!valid) { db_printf("usage: show t4 tcb \n"); return; } if (dev == NULL) { db_printf("device not found\n"); return; } if (tid < 0) { db_printf("invalid tid\n"); return; } t4_dump_tcb(device_get_softc(dev), tid); } #endif static struct sx mlu; /* mod load unload */ SX_SYSINIT(cxgbe_mlu, &mlu, "cxgbe mod load/unload"); static int mod_event(module_t mod, int cmd, void *arg) { int rc = 0; static int loaded = 0; switch (cmd) { case MOD_LOAD: sx_xlock(&mlu); if (loaded++ == 0) { t4_sge_modload(); t4_register_cpl_handler(CPL_SET_TCB_RPL, set_tcb_rpl); t4_register_cpl_handler(CPL_L2T_WRITE_RPL, l2t_write_rpl); t4_register_cpl_handler(CPL_TRACE_PKT, t4_trace_pkt); t4_register_cpl_handler(CPL_T5_TRACE_PKT, t5_trace_pkt); sx_init(&t4_list_lock, "T4/T5 adapters"); SLIST_INIT(&t4_list); #ifdef TCP_OFFLOAD sx_init(&t4_uld_list_lock, "T4/T5 ULDs"); SLIST_INIT(&t4_uld_list); #endif t4_tracer_modload(); tweak_tunables(); } sx_xunlock(&mlu); break; case MOD_UNLOAD: sx_xlock(&mlu); if (--loaded == 0) { int tries; sx_slock(&t4_list_lock); if (!SLIST_EMPTY(&t4_list)) { rc = EBUSY; sx_sunlock(&t4_list_lock); goto done_unload; } #ifdef TCP_OFFLOAD sx_slock(&t4_uld_list_lock); if (!SLIST_EMPTY(&t4_uld_list)) { rc = EBUSY; sx_sunlock(&t4_uld_list_lock); sx_sunlock(&t4_list_lock); goto done_unload; } #endif tries = 0; while (tries++ < 5 && t4_sge_extfree_refs() != 0) { uprintf("%ju clusters with custom free routine " "still is use.\n", t4_sge_extfree_refs()); pause("t4unload", 2 * hz); } #ifdef TCP_OFFLOAD sx_sunlock(&t4_uld_list_lock); #endif sx_sunlock(&t4_list_lock); if (t4_sge_extfree_refs() == 0) { t4_tracer_modunload(); #ifdef TCP_OFFLOAD sx_destroy(&t4_uld_list_lock); #endif sx_destroy(&t4_list_lock); t4_sge_modunload(); loaded = 0; } else { rc = EBUSY; loaded++; /* undo earlier decrement */ } } done_unload: sx_xunlock(&mlu); break; } return (rc); } static devclass_t t4_devclass, t5_devclass, t6_devclass; static devclass_t cxgbe_devclass, cxl_devclass, cc_devclass; static devclass_t vcxgbe_devclass, vcxl_devclass, vcc_devclass; DRIVER_MODULE(t4nex, pci, t4_driver, t4_devclass, mod_event, 0); MODULE_VERSION(t4nex, 1); MODULE_DEPEND(t4nex, firmware, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(t4nex, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ DRIVER_MODULE(t5nex, pci, t5_driver, t5_devclass, mod_event, 0); MODULE_VERSION(t5nex, 1); MODULE_DEPEND(t5nex, firmware, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(t5nex, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ DRIVER_MODULE(t6nex, pci, t6_driver, t6_devclass, mod_event, 0); MODULE_VERSION(t6nex, 1); MODULE_DEPEND(t6nex, firmware, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(t6nex, netmap, 1, 1, 1); #endif /* DEV_NETMAP */ DRIVER_MODULE(cxgbe, t4nex, cxgbe_driver, cxgbe_devclass, 0, 0); MODULE_VERSION(cxgbe, 1); DRIVER_MODULE(cxl, t5nex, cxl_driver, cxl_devclass, 0, 0); MODULE_VERSION(cxl, 1); DRIVER_MODULE(cc, t6nex, cc_driver, cc_devclass, 0, 0); MODULE_VERSION(cc, 1); DRIVER_MODULE(vcxgbe, cxgbe, vcxgbe_driver, vcxgbe_devclass, 0, 0); MODULE_VERSION(vcxgbe, 1); DRIVER_MODULE(vcxl, cxl, vcxl_driver, vcxl_devclass, 0, 0); MODULE_VERSION(vcxl, 1); DRIVER_MODULE(vcc, cc, vcc_driver, vcc_devclass, 0, 0); MODULE_VERSION(vcc, 1); Index: stable/11/sys/dev/de/if_de.c =================================================================== --- stable/11/sys/dev/de/if_de.c (revision 332287) +++ stable/11/sys/dev/de/if_de.c (revision 332288) @@ -1,5017 +1,5017 @@ /* $NetBSD: if_de.c,v 1.86 1999/06/01 19:17:59 thorpej Exp $ */ /*- * Copyright (c) 1994-1997 Matt Thomas (matt@3am-software.com) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Id: if_de.c,v 1.94 1997/07/03 16:55:07 thomas Exp */ /* * DEC 21040 PCI Ethernet Controller * * Written by Matt Thomas * BPF support code stolen directly from if_ec.c * * This driver supports the DEC DE435 or any other PCI * board which support 21040, 21041, or 21140 (mostly). */ #include __FBSDID("$FreeBSD$"); #define TULIP_HDR_DATA #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif #include #include #include #include #include #include #ifdef DDB #include #endif /* * Intel CPUs should use I/O mapped access. */ #if defined(__i386__) #define TULIP_IOMAPPED #endif #if 0 /* This enables KTR traces at KTR_DEV. */ #define KTR_TULIP KTR_DEV #else #define KTR_TULIP 0 #endif #if 0 /* * This turns on all sort of debugging stuff and make the * driver much larger. */ #define TULIP_DEBUG #endif #if 0 #define TULIP_PERFSTATS #endif #define TULIP_HZ 10 #include #define SYNC_NONE 0 #define SYNC_RX 1 #define SYNC_TX 2 /* * This module supports * the DEC 21040 PCI Ethernet Controller. * the DEC 21041 PCI Ethernet Controller. * the DEC 21140 PCI Fast Ethernet Controller. */ static void tulip_addr_filter(tulip_softc_t * const sc); static int tulip_ifmedia_change(struct ifnet * const ifp); static void tulip_ifmedia_status(struct ifnet * const ifp, struct ifmediareq *req); static void tulip_init(void *); static void tulip_init_locked(tulip_softc_t * const sc); static void tulip_intr_shared(void *arg); static void tulip_intr_normal(void *arg); static void tulip_mii_autonegotiate(tulip_softc_t * const sc, const unsigned phyaddr); static int tulip_mii_map_abilities(tulip_softc_t * const sc, unsigned abilities); static tulip_media_t tulip_mii_phy_readspecific(tulip_softc_t * const sc); static unsigned tulip_mii_readreg(tulip_softc_t * const sc, unsigned devaddr, unsigned regno); static void tulip_mii_writereg(tulip_softc_t * const sc, unsigned devaddr, unsigned regno, unsigned data); static void tulip_reset(tulip_softc_t * const sc); static void tulip_rx_intr(tulip_softc_t * const sc); static int tulip_srom_decode(tulip_softc_t * const sc); static void tulip_start(struct ifnet *ifp); static void tulip_start_locked(tulip_softc_t * const sc); static struct mbuf * tulip_txput(tulip_softc_t * const sc, struct mbuf *m); static void tulip_txput_setup(tulip_softc_t * const sc); static void tulip_watchdog(void *arg); struct mbuf * tulip_dequeue_mbuf(tulip_ringinfo_t *ri, tulip_descinfo_t *di, int sync); static void tulip_dma_map_addr(void *, bus_dma_segment_t *, int, int); static void tulip_dma_map_rxbuf(void *, bus_dma_segment_t *, int, bus_size_t, int); static void tulip_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { bus_addr_t *paddr; if (error) return; paddr = arg; *paddr = segs->ds_addr; } static void tulip_dma_map_rxbuf(void *arg, bus_dma_segment_t *segs, int nseg, bus_size_t mapsize, int error) { tulip_desc_t *desc; if (error) return; desc = arg; KASSERT(nseg == 1, ("too many DMA segments")); KASSERT(segs[0].ds_len >= TULIP_RX_BUFLEN, ("receive buffer too small")); desc->d_addr1 = segs[0].ds_addr & 0xffffffff; desc->d_length1 = TULIP_RX_BUFLEN; #ifdef not_needed /* These should already always be zero. */ desc->d_addr2 = 0; desc->d_length2 = 0; #endif } struct mbuf * tulip_dequeue_mbuf(tulip_ringinfo_t *ri, tulip_descinfo_t *di, int sync) { struct mbuf *m; m = di->di_mbuf; if (m != NULL) { switch (sync) { case SYNC_NONE: break; case SYNC_RX: TULIP_RXMAP_POSTSYNC(ri, di); break; case SYNC_TX: TULIP_TXMAP_POSTSYNC(ri, di); break; default: panic("bad sync flag: %d", sync); } bus_dmamap_unload(ri->ri_data_tag, *di->di_map); di->di_mbuf = NULL; } return (m); } static void tulip_timeout_callback(void *arg) { tulip_softc_t * const sc = arg; TULIP_PERFSTART(timeout) TULIP_LOCK_ASSERT(sc); sc->tulip_flags &= ~TULIP_TIMEOUTPENDING; sc->tulip_probe_timeout -= 1000 / TULIP_HZ; (sc->tulip_boardsw->bd_media_poll)(sc, TULIP_MEDIAPOLL_TIMER); TULIP_PERFEND(timeout); } static void tulip_timeout(tulip_softc_t * const sc) { TULIP_LOCK_ASSERT(sc); if (sc->tulip_flags & TULIP_TIMEOUTPENDING) return; sc->tulip_flags |= TULIP_TIMEOUTPENDING; callout_reset(&sc->tulip_callout, (hz + TULIP_HZ / 2) / TULIP_HZ, tulip_timeout_callback, sc); } static int tulip_txprobe(tulip_softc_t * const sc) { struct mbuf *m; u_char *enaddr; /* * Before we are sure this is the right media we need * to send a small packet to make sure there's carrier. * Strangely, BNC and AUI will "see" receive data if * either is connected so the transmit is the only way * to verify the connectivity. */ TULIP_LOCK_ASSERT(sc); MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) return 0; /* * Construct a LLC TEST message which will point to ourselves. */ if (sc->tulip_ifp->if_input != NULL) enaddr = IF_LLADDR(sc->tulip_ifp); else enaddr = sc->tulip_enaddr; bcopy(enaddr, mtod(m, struct ether_header *)->ether_dhost, ETHER_ADDR_LEN); bcopy(enaddr, mtod(m, struct ether_header *)->ether_shost, ETHER_ADDR_LEN); mtod(m, struct ether_header *)->ether_type = htons(3); mtod(m, unsigned char *)[14] = 0; mtod(m, unsigned char *)[15] = 0; mtod(m, unsigned char *)[16] = 0xE3; /* LLC Class1 TEST (no poll) */ m->m_len = m->m_pkthdr.len = sizeof(struct ether_header) + 3; /* * send it! */ sc->tulip_cmdmode |= TULIP_CMD_TXRUN; sc->tulip_intrmask |= TULIP_STS_TXINTR; sc->tulip_flags |= TULIP_TXPROBE_ACTIVE; TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode); TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); if ((m = tulip_txput(sc, m)) != NULL) m_freem(m); sc->tulip_probe.probe_txprobes++; return 1; } static void tulip_media_set(tulip_softc_t * const sc, tulip_media_t media) { const tulip_media_info_t *mi = sc->tulip_mediums[media]; TULIP_LOCK_ASSERT(sc); if (mi == NULL) return; /* * If we are switching media, make sure we don't think there's * any stale RX activity */ sc->tulip_flags &= ~TULIP_RXACT; if (mi->mi_type == TULIP_MEDIAINFO_SIA) { TULIP_CSR_WRITE(sc, csr_sia_connectivity, TULIP_SIACONN_RESET); TULIP_CSR_WRITE(sc, csr_sia_tx_rx, mi->mi_sia_tx_rx); if (sc->tulip_features & TULIP_HAVE_SIAGP) { TULIP_CSR_WRITE(sc, csr_sia_general, mi->mi_sia_gp_control|mi->mi_sia_general); DELAY(50); TULIP_CSR_WRITE(sc, csr_sia_general, mi->mi_sia_gp_data|mi->mi_sia_general); } else { TULIP_CSR_WRITE(sc, csr_sia_general, mi->mi_sia_general); } TULIP_CSR_WRITE(sc, csr_sia_connectivity, mi->mi_sia_connectivity); } else if (mi->mi_type == TULIP_MEDIAINFO_GPR) { #define TULIP_GPR_CMDBITS (TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION|TULIP_CMD_SCRAMBLER|TULIP_CMD_TXTHRSHLDCTL) /* * If the cmdmode bits don't match the currently operating mode, * set the cmdmode appropriately and reset the chip. */ if (((mi->mi_cmdmode ^ TULIP_CSR_READ(sc, csr_command)) & TULIP_GPR_CMDBITS) != 0) { sc->tulip_cmdmode &= ~TULIP_GPR_CMDBITS; sc->tulip_cmdmode |= mi->mi_cmdmode; tulip_reset(sc); } TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_PINSET|sc->tulip_gpinit); DELAY(10); TULIP_CSR_WRITE(sc, csr_gp, (u_int8_t) mi->mi_gpdata); } else if (mi->mi_type == TULIP_MEDIAINFO_SYM) { /* * If the cmdmode bits don't match the currently operating mode, * set the cmdmode appropriately and reset the chip. */ if (((mi->mi_cmdmode ^ TULIP_CSR_READ(sc, csr_command)) & TULIP_GPR_CMDBITS) != 0) { sc->tulip_cmdmode &= ~TULIP_GPR_CMDBITS; sc->tulip_cmdmode |= mi->mi_cmdmode; tulip_reset(sc); } TULIP_CSR_WRITE(sc, csr_sia_general, mi->mi_gpcontrol); TULIP_CSR_WRITE(sc, csr_sia_general, mi->mi_gpdata); } else if (mi->mi_type == TULIP_MEDIAINFO_MII && sc->tulip_probe_state != TULIP_PROBE_INACTIVE) { int idx; if (sc->tulip_features & TULIP_HAVE_SIAGP) { const u_int8_t *dp; dp = &sc->tulip_rombuf[mi->mi_reset_offset]; for (idx = 0; idx < mi->mi_reset_length; idx++, dp += 2) { DELAY(10); TULIP_CSR_WRITE(sc, csr_sia_general, (dp[0] + 256 * dp[1]) << 16); } sc->tulip_phyaddr = mi->mi_phyaddr; dp = &sc->tulip_rombuf[mi->mi_gpr_offset]; for (idx = 0; idx < mi->mi_gpr_length; idx++, dp += 2) { DELAY(10); TULIP_CSR_WRITE(sc, csr_sia_general, (dp[0] + 256 * dp[1]) << 16); } } else { for (idx = 0; idx < mi->mi_reset_length; idx++) { DELAY(10); TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_reset_offset + idx]); } sc->tulip_phyaddr = mi->mi_phyaddr; for (idx = 0; idx < mi->mi_gpr_length; idx++) { DELAY(10); TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_gpr_offset + idx]); } } if (sc->tulip_flags & TULIP_TRYNWAY) { tulip_mii_autonegotiate(sc, sc->tulip_phyaddr); } else if ((sc->tulip_flags & TULIP_DIDNWAY) == 0) { u_int32_t data = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_CONTROL); data &= ~(PHYCTL_SELECT_100MB|PHYCTL_FULL_DUPLEX|PHYCTL_AUTONEG_ENABLE); sc->tulip_flags &= ~TULIP_DIDNWAY; if (TULIP_IS_MEDIA_FD(media)) data |= PHYCTL_FULL_DUPLEX; if (TULIP_IS_MEDIA_100MB(media)) data |= PHYCTL_SELECT_100MB; tulip_mii_writereg(sc, sc->tulip_phyaddr, PHYREG_CONTROL, data); } } } static void tulip_linkup(tulip_softc_t * const sc, tulip_media_t media) { TULIP_LOCK_ASSERT(sc); if ((sc->tulip_flags & TULIP_LINKUP) == 0) sc->tulip_flags |= TULIP_PRINTLINKUP; sc->tulip_flags |= TULIP_LINKUP; sc->tulip_ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; #if 0 /* XXX how does with work with ifmedia? */ if ((sc->tulip_flags & TULIP_DIDNWAY) == 0) { if (sc->tulip_ifp->if_flags & IFF_FULLDUPLEX) { if (TULIP_CAN_MEDIA_FD(media) && sc->tulip_mediums[TULIP_FD_MEDIA_OF(media)] != NULL) media = TULIP_FD_MEDIA_OF(media); } else { if (TULIP_IS_MEDIA_FD(media) && sc->tulip_mediums[TULIP_HD_MEDIA_OF(media)] != NULL) media = TULIP_HD_MEDIA_OF(media); } } #endif if (sc->tulip_media != media) { #ifdef TULIP_DEBUG sc->tulip_dbg.dbg_last_media = sc->tulip_media; #endif sc->tulip_media = media; sc->tulip_flags |= TULIP_PRINTMEDIA; if (TULIP_IS_MEDIA_FD(sc->tulip_media)) { sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX; } else if (sc->tulip_chipid != TULIP_21041 || (sc->tulip_flags & TULIP_DIDNWAY) == 0) { sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX; } } /* * We could set probe_timeout to 0 but setting to 3000 puts this * in one central place and the only matters is tulip_link is * followed by a tulip_timeout. Therefore setting it should not * result in aberrant behaviour. */ sc->tulip_probe_timeout = 3000; sc->tulip_probe_state = TULIP_PROBE_INACTIVE; sc->tulip_flags &= ~(TULIP_TXPROBE_ACTIVE|TULIP_TRYNWAY); if (sc->tulip_flags & TULIP_INRESET) { tulip_media_set(sc, sc->tulip_media); } else if (sc->tulip_probe_media != sc->tulip_media) { /* * No reason to change media if we have the right media. */ tulip_reset(sc); } tulip_init_locked(sc); } static void tulip_media_print(tulip_softc_t * const sc) { TULIP_LOCK_ASSERT(sc); if ((sc->tulip_flags & TULIP_LINKUP) == 0) return; if (sc->tulip_flags & TULIP_PRINTMEDIA) { device_printf(sc->tulip_dev, "enabling %s port\n", tulip_mediums[sc->tulip_media]); sc->tulip_flags &= ~(TULIP_PRINTMEDIA|TULIP_PRINTLINKUP); } else if (sc->tulip_flags & TULIP_PRINTLINKUP) { device_printf(sc->tulip_dev, "link up\n"); sc->tulip_flags &= ~TULIP_PRINTLINKUP; } } #if defined(TULIP_DO_GPR_SENSE) static tulip_media_t tulip_21140_gpr_media_sense(tulip_softc_t * const sc) { struct ifnet *ifp sc->tulip_ifp; tulip_media_t maybe_media = TULIP_MEDIA_UNKNOWN; tulip_media_t last_media = TULIP_MEDIA_UNKNOWN; tulip_media_t media; TULIP_LOCK_ASSERT(sc); /* * If one of the media blocks contained a default media flag, * use that. */ for (media = TULIP_MEDIA_UNKNOWN; media < TULIP_MEDIA_MAX; media++) { const tulip_media_info_t *mi; /* * Media is not supported (or is full-duplex). */ if ((mi = sc->tulip_mediums[media]) == NULL || TULIP_IS_MEDIA_FD(media)) continue; if (mi->mi_type != TULIP_MEDIAINFO_GPR) continue; /* * Remember the media is this is the "default" media. */ if (mi->mi_default && maybe_media == TULIP_MEDIA_UNKNOWN) maybe_media = media; /* * No activity mask? Can't see if it is active if there's no mask. */ if (mi->mi_actmask == 0) continue; /* * Does the activity data match? */ if ((TULIP_CSR_READ(sc, csr_gp) & mi->mi_actmask) != mi->mi_actdata) continue; #if defined(TULIP_DEBUG) device_printf(sc->tulip_dev, "%s: %s: 0x%02x & 0x%02x == 0x%02x\n", __func__, tulip_mediums[media], TULIP_CSR_READ(sc, csr_gp) & 0xFF, mi->mi_actmask, mi->mi_actdata); #endif /* * It does! If this is the first media we detected, then * remember this media. If isn't the first, then there were * multiple matches which we equate to no match (since we don't * which to select (if any). */ if (last_media == TULIP_MEDIA_UNKNOWN) { last_media = media; } else if (last_media != media) { last_media = TULIP_MEDIA_UNKNOWN; } } return (last_media != TULIP_MEDIA_UNKNOWN) ? last_media : maybe_media; } #endif /* TULIP_DO_GPR_SENSE */ static tulip_link_status_t tulip_media_link_monitor(tulip_softc_t * const sc) { const tulip_media_info_t * const mi = sc->tulip_mediums[sc->tulip_media]; tulip_link_status_t linkup = TULIP_LINK_DOWN; TULIP_LOCK_ASSERT(sc); if (mi == NULL) { #if defined(DIAGNOSTIC) || defined(TULIP_DEBUG) panic("tulip_media_link_monitor: %s: botch at line %d\n", tulip_mediums[sc->tulip_media],__LINE__); #else return TULIP_LINK_UNKNOWN; #endif } /* * Have we seen some packets? If so, the link must be good. */ if ((sc->tulip_flags & (TULIP_RXACT|TULIP_LINKUP)) == (TULIP_RXACT|TULIP_LINKUP)) { sc->tulip_flags &= ~TULIP_RXACT; sc->tulip_probe_timeout = 3000; return TULIP_LINK_UP; } sc->tulip_flags &= ~TULIP_RXACT; if (mi->mi_type == TULIP_MEDIAINFO_MII) { u_int32_t status; /* * Read the PHY status register. */ status = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_STATUS); if (status & PHYSTS_AUTONEG_DONE) { /* * If the PHY has completed autonegotiation, see the if the * remote systems abilities have changed. If so, upgrade or * downgrade as appropriate. */ u_int32_t abilities = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_AUTONEG_ABILITIES); abilities = (abilities << 6) & status; if (abilities != sc->tulip_abilities) { #if defined(TULIP_DEBUG) loudprintf("%s(phy%d): autonegotiation changed: 0x%04x -> 0x%04x\n", ifp->if_xname, sc->tulip_phyaddr, sc->tulip_abilities, abilities); #endif if (tulip_mii_map_abilities(sc, abilities)) { tulip_linkup(sc, sc->tulip_probe_media); return TULIP_LINK_UP; } /* * if we had selected media because of autonegotiation, * we need to probe for the new media. */ sc->tulip_probe_state = TULIP_PROBE_INACTIVE; if (sc->tulip_flags & TULIP_DIDNWAY) return TULIP_LINK_DOWN; } } /* * The link is now up. If was down, say its back up. */ if ((status & (PHYSTS_LINK_UP|PHYSTS_REMOTE_FAULT)) == PHYSTS_LINK_UP) linkup = TULIP_LINK_UP; } else if (mi->mi_type == TULIP_MEDIAINFO_GPR) { /* * No activity sensor? Assume all's well. */ if (mi->mi_actmask == 0) return TULIP_LINK_UNKNOWN; /* * Does the activity data match? */ if ((TULIP_CSR_READ(sc, csr_gp) & mi->mi_actmask) == mi->mi_actdata) linkup = TULIP_LINK_UP; } else if (mi->mi_type == TULIP_MEDIAINFO_SIA) { /* * Assume non TP ok for now. */ if (!TULIP_IS_MEDIA_TP(sc->tulip_media)) return TULIP_LINK_UNKNOWN; if ((TULIP_CSR_READ(sc, csr_sia_status) & TULIP_SIASTS_LINKFAIL) == 0) linkup = TULIP_LINK_UP; #if defined(TULIP_DEBUG) if (sc->tulip_probe_timeout <= 0) device_printf(sc->tulip_dev, "sia status = 0x%08x\n", TULIP_CSR_READ(sc, csr_sia_status)); #endif } else if (mi->mi_type == TULIP_MEDIAINFO_SYM) { return TULIP_LINK_UNKNOWN; } /* * We will wait for 3 seconds until the link goes into suspect mode. */ if (sc->tulip_flags & TULIP_LINKUP) { if (linkup == TULIP_LINK_UP) sc->tulip_probe_timeout = 3000; if (sc->tulip_probe_timeout > 0) return TULIP_LINK_UP; sc->tulip_flags &= ~TULIP_LINKUP; device_printf(sc->tulip_dev, "link down: cable problem?\n"); } #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_link_downed++; #endif return TULIP_LINK_DOWN; } static void tulip_media_poll(tulip_softc_t * const sc, tulip_mediapoll_event_t event) { TULIP_LOCK_ASSERT(sc); #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_events[event]++; #endif if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE && event == TULIP_MEDIAPOLL_TIMER) { switch (tulip_media_link_monitor(sc)) { case TULIP_LINK_DOWN: { /* * Link Monitor failed. Probe for new media. */ event = TULIP_MEDIAPOLL_LINKFAIL; break; } case TULIP_LINK_UP: { /* * Check again soon. */ tulip_timeout(sc); return; } case TULIP_LINK_UNKNOWN: { /* * We can't tell so don't bother. */ return; } } } if (event == TULIP_MEDIAPOLL_LINKFAIL) { if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE) { if (TULIP_DO_AUTOSENSE(sc)) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_link_failures++; #endif sc->tulip_media = TULIP_MEDIA_UNKNOWN; if (sc->tulip_ifp->if_flags & IFF_UP) tulip_reset(sc); /* restart probe */ } return; } #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_link_pollintrs++; #endif } if (event == TULIP_MEDIAPOLL_START) { sc->tulip_ifp->if_drv_flags |= IFF_DRV_OACTIVE; if (sc->tulip_probe_state != TULIP_PROBE_INACTIVE) return; sc->tulip_probe_mediamask = 0; sc->tulip_probe_passes = 0; #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_media_probes++; #endif /* * If the SROM contained an explicit media to use, use it. */ sc->tulip_cmdmode &= ~(TULIP_CMD_RXRUN|TULIP_CMD_FULLDUPLEX); sc->tulip_flags |= TULIP_TRYNWAY|TULIP_PROBE1STPASS; sc->tulip_flags &= ~(TULIP_DIDNWAY|TULIP_PRINTMEDIA|TULIP_PRINTLINKUP); /* * connidx is defaulted to a media_unknown type. */ sc->tulip_probe_media = tulip_srom_conninfo[sc->tulip_connidx].sc_media; if (sc->tulip_probe_media != TULIP_MEDIA_UNKNOWN) { tulip_linkup(sc, sc->tulip_probe_media); tulip_timeout(sc); return; } if (sc->tulip_features & TULIP_HAVE_GPR) { sc->tulip_probe_state = TULIP_PROBE_GPRTEST; sc->tulip_probe_timeout = 2000; } else { sc->tulip_probe_media = TULIP_MEDIA_MAX; sc->tulip_probe_timeout = 0; sc->tulip_probe_state = TULIP_PROBE_MEDIATEST; } } /* * Ignore txprobe failures or spurious callbacks. */ if (event == TULIP_MEDIAPOLL_TXPROBE_FAILED && sc->tulip_probe_state != TULIP_PROBE_MEDIATEST) { sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE; return; } /* * If we really transmitted a packet, then that's the media we'll use. */ if (event == TULIP_MEDIAPOLL_TXPROBE_OK || event == TULIP_MEDIAPOLL_LINKPASS) { if (event == TULIP_MEDIAPOLL_LINKPASS) { /* XXX Check media status just to be sure */ sc->tulip_probe_media = TULIP_MEDIA_10BASET; #if defined(TULIP_DEBUG) } else { sc->tulip_dbg.dbg_txprobes_ok[sc->tulip_probe_media]++; #endif } tulip_linkup(sc, sc->tulip_probe_media); tulip_timeout(sc); return; } if (sc->tulip_probe_state == TULIP_PROBE_GPRTEST) { #if defined(TULIP_DO_GPR_SENSE) /* * Check for media via the general purpose register. * * Try to sense the media via the GPR. If the same value * occurs 3 times in a row then just use that. */ if (sc->tulip_probe_timeout > 0) { tulip_media_t new_probe_media = tulip_21140_gpr_media_sense(sc); #if defined(TULIP_DEBUG) device_printf(sc->tulip_dev, "%s: gpr sensing = %s\n", __func__, tulip_mediums[new_probe_media]); #endif if (new_probe_media != TULIP_MEDIA_UNKNOWN) { if (new_probe_media == sc->tulip_probe_media) { if (--sc->tulip_probe_count == 0) tulip_linkup(sc, sc->tulip_probe_media); } else { sc->tulip_probe_count = 10; } } sc->tulip_probe_media = new_probe_media; tulip_timeout(sc); return; } #endif /* TULIP_DO_GPR_SENSE */ /* * Brute force. We cycle through each of the media types * and try to transmit a packet. */ sc->tulip_probe_state = TULIP_PROBE_MEDIATEST; sc->tulip_probe_media = TULIP_MEDIA_MAX; sc->tulip_probe_timeout = 0; tulip_timeout(sc); return; } if (sc->tulip_probe_state != TULIP_PROBE_MEDIATEST && (sc->tulip_features & TULIP_HAVE_MII)) { tulip_media_t old_media = sc->tulip_probe_media; tulip_mii_autonegotiate(sc, sc->tulip_phyaddr); switch (sc->tulip_probe_state) { case TULIP_PROBE_FAILED: case TULIP_PROBE_MEDIATEST: { /* * Try the next media. */ sc->tulip_probe_mediamask |= sc->tulip_mediums[sc->tulip_probe_media]->mi_mediamask; sc->tulip_probe_timeout = 0; #ifdef notyet if (sc->tulip_probe_state == TULIP_PROBE_FAILED) break; if (sc->tulip_probe_media != tulip_mii_phy_readspecific(sc)) break; sc->tulip_probe_timeout = TULIP_IS_MEDIA_TP(sc->tulip_probe_media) ? 2500 : 300; #endif break; } case TULIP_PROBE_PHYAUTONEG: { return; } case TULIP_PROBE_INACTIVE: { /* * Only probe if we autonegotiated a media that hasn't failed. */ sc->tulip_probe_timeout = 0; if (sc->tulip_probe_mediamask & TULIP_BIT(sc->tulip_probe_media)) { sc->tulip_probe_media = old_media; break; } tulip_linkup(sc, sc->tulip_probe_media); tulip_timeout(sc); return; } default: { #if defined(DIAGNOSTIC) || defined(TULIP_DEBUG) panic("tulip_media_poll: botch at line %d\n", __LINE__); #endif break; } } } if (event == TULIP_MEDIAPOLL_TXPROBE_FAILED) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_txprobes_failed[sc->tulip_probe_media]++; #endif sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE; return; } /* * switch to another media if we tried this one enough. */ if (/* event == TULIP_MEDIAPOLL_TXPROBE_FAILED || */ sc->tulip_probe_timeout <= 0) { #if defined(TULIP_DEBUG) if (sc->tulip_probe_media == TULIP_MEDIA_UNKNOWN) { device_printf(sc->tulip_dev, "poll media unknown!\n"); sc->tulip_probe_media = TULIP_MEDIA_MAX; } #endif /* * Find the next media type to check for. Full Duplex * types are not allowed. */ do { sc->tulip_probe_media -= 1; if (sc->tulip_probe_media == TULIP_MEDIA_UNKNOWN) { if (++sc->tulip_probe_passes == 3) { device_printf(sc->tulip_dev, "autosense failed: cable problem?\n"); if ((sc->tulip_ifp->if_flags & IFF_UP) == 0) { sc->tulip_ifp->if_drv_flags &= ~IFF_DRV_RUNNING; sc->tulip_probe_state = TULIP_PROBE_INACTIVE; return; } } sc->tulip_flags ^= TULIP_TRYNWAY; /* XXX */ sc->tulip_probe_mediamask = 0; sc->tulip_probe_media = TULIP_MEDIA_MAX - 1; } } while (sc->tulip_mediums[sc->tulip_probe_media] == NULL || (sc->tulip_probe_mediamask & TULIP_BIT(sc->tulip_probe_media)) || TULIP_IS_MEDIA_FD(sc->tulip_probe_media)); #if defined(TULIP_DEBUG) device_printf(sc->tulip_dev, "%s: probing %s\n", event == TULIP_MEDIAPOLL_TXPROBE_FAILED ? "txprobe failed" : "timeout", tulip_mediums[sc->tulip_probe_media]); #endif sc->tulip_probe_timeout = TULIP_IS_MEDIA_TP(sc->tulip_probe_media) ? 2500 : 1000; sc->tulip_probe_state = TULIP_PROBE_MEDIATEST; sc->tulip_probe.probe_txprobes = 0; tulip_reset(sc); tulip_media_set(sc, sc->tulip_probe_media); sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE; } tulip_timeout(sc); /* * If this is hanging off a phy, we know are doing NWAY and we have * forced the phy to a specific speed. Wait for link up before * before sending a packet. */ switch (sc->tulip_mediums[sc->tulip_probe_media]->mi_type) { case TULIP_MEDIAINFO_MII: { if (sc->tulip_probe_media != tulip_mii_phy_readspecific(sc)) return; break; } case TULIP_MEDIAINFO_SIA: { if (TULIP_IS_MEDIA_TP(sc->tulip_probe_media)) { if (TULIP_CSR_READ(sc, csr_sia_status) & TULIP_SIASTS_LINKFAIL) return; tulip_linkup(sc, sc->tulip_probe_media); #ifdef notyet if (sc->tulip_features & TULIP_HAVE_MII) tulip_timeout(sc); #endif return; } break; } case TULIP_MEDIAINFO_RESET: case TULIP_MEDIAINFO_SYM: case TULIP_MEDIAINFO_NONE: case TULIP_MEDIAINFO_GPR: { break; } } /* * Try to send a packet. */ tulip_txprobe(sc); } static void tulip_media_select(tulip_softc_t * const sc) { TULIP_LOCK_ASSERT(sc); if (sc->tulip_features & TULIP_HAVE_GPR) { TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_PINSET|sc->tulip_gpinit); DELAY(10); TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_gpdata); } /* * If this board has no media, just return */ if (sc->tulip_features & TULIP_HAVE_NOMEDIA) return; if (sc->tulip_media == TULIP_MEDIA_UNKNOWN) { TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); (*sc->tulip_boardsw->bd_media_poll)(sc, TULIP_MEDIAPOLL_START); } else { tulip_media_set(sc, sc->tulip_media); } } static void tulip_21040_mediainfo_init(tulip_softc_t * const sc, tulip_media_t media) { TULIP_LOCK_ASSERT(sc); sc->tulip_cmdmode |= TULIP_CMD_CAPTREFFCT|TULIP_CMD_THRSHLD160 |TULIP_CMD_BACKOFFCTR; sc->tulip_ifp->if_baudrate = 10000000; if (media == TULIP_MEDIA_10BASET || media == TULIP_MEDIA_UNKNOWN) { TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[0], 21040, 10BASET); TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[1], 21040, 10BASET_FD); sc->tulip_intrmask |= TULIP_STS_LINKPASS|TULIP_STS_LINKFAIL; } if (media == TULIP_MEDIA_AUIBNC || media == TULIP_MEDIA_UNKNOWN) { TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[2], 21040, AUIBNC); } if (media == TULIP_MEDIA_UNKNOWN) { TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[3], 21040, EXTSIA); } } static void tulip_21040_media_probe(tulip_softc_t * const sc) { TULIP_LOCK_ASSERT(sc); tulip_21040_mediainfo_init(sc, TULIP_MEDIA_UNKNOWN); return; } static void tulip_21040_10baset_only_media_probe(tulip_softc_t * const sc) { TULIP_LOCK_ASSERT(sc); tulip_21040_mediainfo_init(sc, TULIP_MEDIA_10BASET); tulip_media_set(sc, TULIP_MEDIA_10BASET); sc->tulip_media = TULIP_MEDIA_10BASET; } static void tulip_21040_10baset_only_media_select(tulip_softc_t * const sc) { TULIP_LOCK_ASSERT(sc); sc->tulip_flags |= TULIP_LINKUP; if (sc->tulip_media == TULIP_MEDIA_10BASET_FD) { sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX; sc->tulip_flags &= ~TULIP_SQETEST; } else { sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX; sc->tulip_flags |= TULIP_SQETEST; } tulip_media_set(sc, sc->tulip_media); } static void tulip_21040_auibnc_only_media_probe(tulip_softc_t * const sc) { TULIP_LOCK_ASSERT(sc); tulip_21040_mediainfo_init(sc, TULIP_MEDIA_AUIBNC); sc->tulip_flags |= TULIP_SQETEST|TULIP_LINKUP; tulip_media_set(sc, TULIP_MEDIA_AUIBNC); sc->tulip_media = TULIP_MEDIA_AUIBNC; } static void tulip_21040_auibnc_only_media_select(tulip_softc_t * const sc) { TULIP_LOCK_ASSERT(sc); tulip_media_set(sc, TULIP_MEDIA_AUIBNC); sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX; } static const tulip_boardsw_t tulip_21040_boardsw = { TULIP_21040_GENERIC, tulip_21040_media_probe, tulip_media_select, tulip_media_poll, }; static const tulip_boardsw_t tulip_21040_10baset_only_boardsw = { TULIP_21040_GENERIC, tulip_21040_10baset_only_media_probe, tulip_21040_10baset_only_media_select, NULL, }; static const tulip_boardsw_t tulip_21040_auibnc_only_boardsw = { TULIP_21040_GENERIC, tulip_21040_auibnc_only_media_probe, tulip_21040_auibnc_only_media_select, NULL, }; static void tulip_21041_mediainfo_init(tulip_softc_t * const sc) { tulip_media_info_t * const mi = sc->tulip_mediainfo; TULIP_LOCK_ASSERT(sc); #ifdef notyet if (sc->tulip_revinfo >= 0x20) { TULIP_MEDIAINFO_SIA_INIT(sc, &mi[0], 21041P2, 10BASET); TULIP_MEDIAINFO_SIA_INIT(sc, &mi[1], 21041P2, 10BASET_FD); TULIP_MEDIAINFO_SIA_INIT(sc, &mi[0], 21041P2, AUI); TULIP_MEDIAINFO_SIA_INIT(sc, &mi[1], 21041P2, BNC); return; } #endif TULIP_MEDIAINFO_SIA_INIT(sc, &mi[0], 21041, 10BASET); TULIP_MEDIAINFO_SIA_INIT(sc, &mi[1], 21041, 10BASET_FD); TULIP_MEDIAINFO_SIA_INIT(sc, &mi[2], 21041, AUI); TULIP_MEDIAINFO_SIA_INIT(sc, &mi[3], 21041, BNC); } static void tulip_21041_media_probe(tulip_softc_t * const sc) { TULIP_LOCK_ASSERT(sc); sc->tulip_ifp->if_baudrate = 10000000; sc->tulip_cmdmode |= TULIP_CMD_CAPTREFFCT|TULIP_CMD_ENHCAPTEFFCT |TULIP_CMD_THRSHLD160|TULIP_CMD_BACKOFFCTR; sc->tulip_intrmask |= TULIP_STS_LINKPASS|TULIP_STS_LINKFAIL; tulip_21041_mediainfo_init(sc); } static void tulip_21041_media_poll(tulip_softc_t * const sc, const tulip_mediapoll_event_t event) { u_int32_t sia_status; TULIP_LOCK_ASSERT(sc); #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_events[event]++; #endif if (event == TULIP_MEDIAPOLL_LINKFAIL) { if (sc->tulip_probe_state != TULIP_PROBE_INACTIVE || !TULIP_DO_AUTOSENSE(sc)) return; sc->tulip_media = TULIP_MEDIA_UNKNOWN; tulip_reset(sc); /* start probe */ return; } /* * If we've been been asked to start a poll or link change interrupt * restart the probe (and reset the tulip to a known state). */ if (event == TULIP_MEDIAPOLL_START) { sc->tulip_ifp->if_drv_flags |= IFF_DRV_OACTIVE; sc->tulip_cmdmode &= ~(TULIP_CMD_FULLDUPLEX|TULIP_CMD_RXRUN); #ifdef notyet if (sc->tulip_revinfo >= 0x20) { sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX; sc->tulip_flags |= TULIP_DIDNWAY; } #endif TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode); sc->tulip_probe_state = TULIP_PROBE_MEDIATEST; sc->tulip_probe_media = TULIP_MEDIA_10BASET; sc->tulip_probe_timeout = TULIP_21041_PROBE_10BASET_TIMEOUT; tulip_media_set(sc, TULIP_MEDIA_10BASET); tulip_timeout(sc); return; } if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE) return; if (event == TULIP_MEDIAPOLL_TXPROBE_OK) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_txprobes_ok[sc->tulip_probe_media]++; #endif tulip_linkup(sc, sc->tulip_probe_media); return; } sia_status = TULIP_CSR_READ(sc, csr_sia_status); TULIP_CSR_WRITE(sc, csr_sia_status, sia_status); if ((sia_status & TULIP_SIASTS_LINKFAIL) == 0) { if (sc->tulip_revinfo >= 0x20) { if (sia_status & (PHYSTS_10BASET_FD << (16 - 6))) sc->tulip_probe_media = TULIP_MEDIA_10BASET_FD; } /* * If the link has passed LinkPass, 10baseT is the * proper media to use. */ tulip_linkup(sc, sc->tulip_probe_media); return; } /* * wait for up to 2.4 seconds for the link to reach pass state. * Only then start scanning the other media for activity. * choose media with receive activity over those without. */ if (sc->tulip_probe_media == TULIP_MEDIA_10BASET) { if (event != TULIP_MEDIAPOLL_TIMER) return; if (sc->tulip_probe_timeout > 0 && (sia_status & TULIP_SIASTS_OTHERRXACTIVITY) == 0) { tulip_timeout(sc); return; } sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT; sc->tulip_flags |= TULIP_WANTRXACT; if (sia_status & TULIP_SIASTS_OTHERRXACTIVITY) { sc->tulip_probe_media = TULIP_MEDIA_BNC; } else { sc->tulip_probe_media = TULIP_MEDIA_AUI; } tulip_media_set(sc, sc->tulip_probe_media); tulip_timeout(sc); return; } /* * If we failed, clear the txprobe active flag. */ if (event == TULIP_MEDIAPOLL_TXPROBE_FAILED) sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE; if (event == TULIP_MEDIAPOLL_TIMER) { /* * If we've received something, then that's our link! */ if (sc->tulip_flags & TULIP_RXACT) { tulip_linkup(sc, sc->tulip_probe_media); return; } /* * if no txprobe active */ if ((sc->tulip_flags & TULIP_TXPROBE_ACTIVE) == 0 && ((sc->tulip_flags & TULIP_WANTRXACT) == 0 || (sia_status & TULIP_SIASTS_RXACTIVITY))) { sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT; tulip_txprobe(sc); tulip_timeout(sc); return; } /* * Take 2 passes through before deciding to not * wait for receive activity. Then take another * two passes before spitting out a warning. */ if (sc->tulip_probe_timeout <= 0) { if (sc->tulip_flags & TULIP_WANTRXACT) { sc->tulip_flags &= ~TULIP_WANTRXACT; sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT; } else { device_printf(sc->tulip_dev, "autosense failed: cable problem?\n"); if ((sc->tulip_ifp->if_flags & IFF_UP) == 0) { sc->tulip_ifp->if_drv_flags &= ~IFF_DRV_RUNNING; sc->tulip_probe_state = TULIP_PROBE_INACTIVE; return; } } } } /* * Since this media failed to probe, try the other one. */ sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT; if (sc->tulip_probe_media == TULIP_MEDIA_AUI) { sc->tulip_probe_media = TULIP_MEDIA_BNC; } else { sc->tulip_probe_media = TULIP_MEDIA_AUI; } tulip_media_set(sc, sc->tulip_probe_media); sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE; tulip_timeout(sc); } static const tulip_boardsw_t tulip_21041_boardsw = { TULIP_21041_GENERIC, tulip_21041_media_probe, tulip_media_select, tulip_21041_media_poll }; static const tulip_phy_attr_t tulip_mii_phy_attrlist[] = { { 0x20005c00, 0, /* 08-00-17 */ { { 0x19, 0x0040, 0x0040 }, /* 10TX */ { 0x19, 0x0040, 0x0000 }, /* 100TX */ }, #if defined(TULIP_DEBUG) "NS DP83840", #endif }, { 0x0281F400, 0, /* 00-A0-7D */ { { 0x12, 0x0010, 0x0000 }, /* 10T */ { }, /* 100TX */ { 0x12, 0x0010, 0x0010 }, /* 100T4 */ { 0x12, 0x0008, 0x0008 }, /* FULL_DUPLEX */ }, #if defined(TULIP_DEBUG) "Seeq 80C240" #endif }, #if 0 { 0x0015F420, 0, /* 00-A0-7D */ { { 0x12, 0x0010, 0x0000 }, /* 10T */ { }, /* 100TX */ { 0x12, 0x0010, 0x0010 }, /* 100T4 */ { 0x12, 0x0008, 0x0008 }, /* FULL_DUPLEX */ }, #if defined(TULIP_DEBUG) "Broadcom BCM5000" #endif }, #endif { 0x0281F400, 0, /* 00-A0-BE */ { { 0x11, 0x8000, 0x0000 }, /* 10T */ { 0x11, 0x8000, 0x8000 }, /* 100TX */ { }, /* 100T4 */ { 0x11, 0x4000, 0x4000 }, /* FULL_DUPLEX */ }, #if defined(TULIP_DEBUG) "ICS 1890" #endif }, { 0 } }; static tulip_media_t tulip_mii_phy_readspecific(tulip_softc_t * const sc) { const tulip_phy_attr_t *attr; u_int16_t data; u_int32_t id; unsigned idx = 0; static const tulip_media_t table[] = { TULIP_MEDIA_UNKNOWN, TULIP_MEDIA_10BASET, TULIP_MEDIA_100BASETX, TULIP_MEDIA_100BASET4, TULIP_MEDIA_UNKNOWN, TULIP_MEDIA_10BASET_FD, TULIP_MEDIA_100BASETX_FD, TULIP_MEDIA_UNKNOWN }; TULIP_LOCK_ASSERT(sc); /* * Don't read phy specific registers if link is not up. */ data = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_STATUS); if ((data & (PHYSTS_LINK_UP|PHYSTS_EXTENDED_REGS)) != (PHYSTS_LINK_UP|PHYSTS_EXTENDED_REGS)) return TULIP_MEDIA_UNKNOWN; id = (tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_IDLOW) << 16) | tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_IDHIGH); for (attr = tulip_mii_phy_attrlist;; attr++) { if (attr->attr_id == 0) return TULIP_MEDIA_UNKNOWN; if ((id & ~0x0F) == attr->attr_id) break; } if (attr->attr_modes[PHY_MODE_100TX].pm_regno) { const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_100TX]; data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno); if ((data & pm->pm_mask) == pm->pm_value) idx = 2; } if (idx == 0 && attr->attr_modes[PHY_MODE_100T4].pm_regno) { const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_100T4]; data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno); if ((data & pm->pm_mask) == pm->pm_value) idx = 3; } if (idx == 0 && attr->attr_modes[PHY_MODE_10T].pm_regno) { const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_10T]; data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno); if ((data & pm->pm_mask) == pm->pm_value) idx = 1; } if (idx != 0 && attr->attr_modes[PHY_MODE_FULLDUPLEX].pm_regno) { const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_FULLDUPLEX]; data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno); idx += ((data & pm->pm_mask) == pm->pm_value ? 4 : 0); } return table[idx]; } static unsigned tulip_mii_get_phyaddr(tulip_softc_t * const sc, unsigned offset) { unsigned phyaddr; TULIP_LOCK_ASSERT(sc); for (phyaddr = 1; phyaddr < 32; phyaddr++) { unsigned status = tulip_mii_readreg(sc, phyaddr, PHYREG_STATUS); if (status == 0 || status == 0xFFFF || status < PHYSTS_10BASET) continue; if (offset == 0) return phyaddr; offset--; } if (offset == 0) { unsigned status = tulip_mii_readreg(sc, 0, PHYREG_STATUS); if (status == 0 || status == 0xFFFF || status < PHYSTS_10BASET) return TULIP_MII_NOPHY; return 0; } return TULIP_MII_NOPHY; } static int tulip_mii_map_abilities(tulip_softc_t * const sc, unsigned abilities) { TULIP_LOCK_ASSERT(sc); sc->tulip_abilities = abilities; if (abilities & PHYSTS_100BASETX_FD) { sc->tulip_probe_media = TULIP_MEDIA_100BASETX_FD; } else if (abilities & PHYSTS_100BASET4) { sc->tulip_probe_media = TULIP_MEDIA_100BASET4; } else if (abilities & PHYSTS_100BASETX) { sc->tulip_probe_media = TULIP_MEDIA_100BASETX; } else if (abilities & PHYSTS_10BASET_FD) { sc->tulip_probe_media = TULIP_MEDIA_10BASET_FD; } else if (abilities & PHYSTS_10BASET) { sc->tulip_probe_media = TULIP_MEDIA_10BASET; } else { sc->tulip_probe_state = TULIP_PROBE_MEDIATEST; return 0; } sc->tulip_probe_state = TULIP_PROBE_INACTIVE; return 1; } static void tulip_mii_autonegotiate(tulip_softc_t * const sc, const unsigned phyaddr) { struct ifnet *ifp = sc->tulip_ifp; TULIP_LOCK_ASSERT(sc); switch (sc->tulip_probe_state) { case TULIP_PROBE_MEDIATEST: case TULIP_PROBE_INACTIVE: { sc->tulip_flags |= TULIP_DIDNWAY; tulip_mii_writereg(sc, phyaddr, PHYREG_CONTROL, PHYCTL_RESET); sc->tulip_probe_timeout = 3000; sc->tulip_intrmask |= TULIP_STS_ABNRMLINTR|TULIP_STS_NORMALINTR; sc->tulip_probe_state = TULIP_PROBE_PHYRESET; } /* FALLTHROUGH */ case TULIP_PROBE_PHYRESET: { u_int32_t status; u_int32_t data = tulip_mii_readreg(sc, phyaddr, PHYREG_CONTROL); if (data & PHYCTL_RESET) { if (sc->tulip_probe_timeout > 0) { tulip_timeout(sc); return; } printf("%s(phy%d): error: reset of PHY never completed!\n", ifp->if_xname, phyaddr); sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE; sc->tulip_probe_state = TULIP_PROBE_FAILED; sc->tulip_ifp->if_flags &= ~IFF_UP; sc->tulip_ifp->if_drv_flags &= ~IFF_DRV_RUNNING; return; } status = tulip_mii_readreg(sc, phyaddr, PHYREG_STATUS); if ((status & PHYSTS_CAN_AUTONEG) == 0) { #if defined(TULIP_DEBUG) loudprintf("%s(phy%d): autonegotiation disabled\n", ifp->if_xname, phyaddr); #endif sc->tulip_flags &= ~TULIP_DIDNWAY; sc->tulip_probe_state = TULIP_PROBE_MEDIATEST; return; } if (tulip_mii_readreg(sc, phyaddr, PHYREG_AUTONEG_ADVERTISEMENT) != ((status >> 6) | 0x01)) tulip_mii_writereg(sc, phyaddr, PHYREG_AUTONEG_ADVERTISEMENT, (status >> 6) | 0x01); tulip_mii_writereg(sc, phyaddr, PHYREG_CONTROL, data|PHYCTL_AUTONEG_RESTART|PHYCTL_AUTONEG_ENABLE); data = tulip_mii_readreg(sc, phyaddr, PHYREG_CONTROL); #if defined(TULIP_DEBUG) if ((data & PHYCTL_AUTONEG_ENABLE) == 0) loudprintf("%s(phy%d): oops: enable autonegotiation failed: 0x%04x\n", ifp->if_xname, phyaddr, data); else loudprintf("%s(phy%d): autonegotiation restarted: 0x%04x\n", ifp->if_xname, phyaddr, data); sc->tulip_dbg.dbg_nway_starts++; #endif sc->tulip_probe_state = TULIP_PROBE_PHYAUTONEG; sc->tulip_probe_timeout = 3000; } /* FALLTHROUGH */ case TULIP_PROBE_PHYAUTONEG: { u_int32_t status = tulip_mii_readreg(sc, phyaddr, PHYREG_STATUS); u_int32_t data; if ((status & PHYSTS_AUTONEG_DONE) == 0) { if (sc->tulip_probe_timeout > 0) { tulip_timeout(sc); return; } #if defined(TULIP_DEBUG) loudprintf("%s(phy%d): autonegotiation timeout: sts=0x%04x, ctl=0x%04x\n", ifp->if_xname, phyaddr, status, tulip_mii_readreg(sc, phyaddr, PHYREG_CONTROL)); #endif sc->tulip_flags &= ~TULIP_DIDNWAY; sc->tulip_probe_state = TULIP_PROBE_MEDIATEST; return; } data = tulip_mii_readreg(sc, phyaddr, PHYREG_AUTONEG_ABILITIES); #if defined(TULIP_DEBUG) loudprintf("%s(phy%d): autonegotiation complete: 0x%04x\n", ifp->if_xname, phyaddr, data); #endif data = (data << 6) & status; if (!tulip_mii_map_abilities(sc, data)) sc->tulip_flags &= ~TULIP_DIDNWAY; return; } default: { #if defined(DIAGNOSTIC) panic("tulip_media_poll: botch at line %d\n", __LINE__); #endif break; } } #if defined(TULIP_DEBUG) loudprintf("%s(phy%d): autonegotiation failure: state = %d\n", ifp->if_xname, phyaddr, sc->tulip_probe_state); sc->tulip_dbg.dbg_nway_failures++; #endif } static void tulip_2114x_media_preset(tulip_softc_t * const sc) { const tulip_media_info_t *mi = NULL; tulip_media_t media = sc->tulip_media; TULIP_LOCK_ASSERT(sc); if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE) media = sc->tulip_media; else media = sc->tulip_probe_media; sc->tulip_cmdmode &= ~TULIP_CMD_PORTSELECT; sc->tulip_flags &= ~TULIP_SQETEST; if (media != TULIP_MEDIA_UNKNOWN && media != TULIP_MEDIA_MAX) { #if defined(TULIP_DEBUG) if (media < TULIP_MEDIA_MAX && sc->tulip_mediums[media] != NULL) { #endif mi = sc->tulip_mediums[media]; if (mi->mi_type == TULIP_MEDIAINFO_MII) { sc->tulip_cmdmode |= TULIP_CMD_PORTSELECT; } else if (mi->mi_type == TULIP_MEDIAINFO_GPR || mi->mi_type == TULIP_MEDIAINFO_SYM) { sc->tulip_cmdmode &= ~TULIP_GPR_CMDBITS; sc->tulip_cmdmode |= mi->mi_cmdmode; } else if (mi->mi_type == TULIP_MEDIAINFO_SIA) { TULIP_CSR_WRITE(sc, csr_sia_connectivity, TULIP_SIACONN_RESET); } #if defined(TULIP_DEBUG) } else { device_printf(sc->tulip_dev, "preset: bad media %d!\n", media); } #endif } switch (media) { case TULIP_MEDIA_BNC: case TULIP_MEDIA_AUI: case TULIP_MEDIA_10BASET: { sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX; sc->tulip_cmdmode |= TULIP_CMD_TXTHRSHLDCTL; sc->tulip_ifp->if_baudrate = 10000000; sc->tulip_flags |= TULIP_SQETEST; break; } case TULIP_MEDIA_10BASET_FD: { sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX|TULIP_CMD_TXTHRSHLDCTL; sc->tulip_ifp->if_baudrate = 10000000; break; } case TULIP_MEDIA_100BASEFX: case TULIP_MEDIA_100BASET4: case TULIP_MEDIA_100BASETX: { sc->tulip_cmdmode &= ~(TULIP_CMD_FULLDUPLEX|TULIP_CMD_TXTHRSHLDCTL); sc->tulip_cmdmode |= TULIP_CMD_PORTSELECT; sc->tulip_ifp->if_baudrate = 100000000; break; } case TULIP_MEDIA_100BASEFX_FD: case TULIP_MEDIA_100BASETX_FD: { sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX|TULIP_CMD_PORTSELECT; sc->tulip_cmdmode &= ~TULIP_CMD_TXTHRSHLDCTL; sc->tulip_ifp->if_baudrate = 100000000; break; } default: { break; } } TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode); } /* ******************************************************************** * Start of 21140/21140A support which does not use the MII interface */ static void tulip_null_media_poll(tulip_softc_t * const sc, tulip_mediapoll_event_t event) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_events[event]++; #endif #if defined(DIAGNOSTIC) device_printf(sc->tulip_dev, "botch(media_poll) at line %d\n", __LINE__); #endif } static inline void tulip_21140_mediainit(tulip_softc_t * const sc, tulip_media_info_t * const mip, tulip_media_t const media, unsigned gpdata, unsigned cmdmode) { TULIP_LOCK_ASSERT(sc); sc->tulip_mediums[media] = mip; mip->mi_type = TULIP_MEDIAINFO_GPR; mip->mi_cmdmode = cmdmode; mip->mi_gpdata = gpdata; } static void tulip_21140_evalboard_media_probe(tulip_softc_t * const sc) { tulip_media_info_t *mip = sc->tulip_mediainfo; TULIP_LOCK_ASSERT(sc); sc->tulip_gpinit = TULIP_GP_EB_PINS; sc->tulip_gpdata = TULIP_GP_EB_INIT; TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_PINS); TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_INIT); TULIP_CSR_WRITE(sc, csr_command, TULIP_CSR_READ(sc, csr_command) | TULIP_CMD_PORTSELECT | TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE); TULIP_CSR_WRITE(sc, csr_command, TULIP_CSR_READ(sc, csr_command) & ~TULIP_CMD_TXTHRSHLDCTL); DELAY(1000000); if ((TULIP_CSR_READ(sc, csr_gp) & TULIP_GP_EB_OK100) != 0) { sc->tulip_media = TULIP_MEDIA_10BASET; } else { sc->tulip_media = TULIP_MEDIA_100BASETX; } tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET, TULIP_GP_EB_INIT, TULIP_CMD_TXTHRSHLDCTL); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD, TULIP_GP_EB_INIT, TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX, TULIP_GP_EB_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD, TULIP_GP_EB_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX); } static const tulip_boardsw_t tulip_21140_eb_boardsw = { TULIP_21140_DEC_EB, tulip_21140_evalboard_media_probe, tulip_media_select, tulip_null_media_poll, tulip_2114x_media_preset, }; static void tulip_21140_accton_media_probe(tulip_softc_t * const sc) { tulip_media_info_t *mip = sc->tulip_mediainfo; unsigned gpdata; TULIP_LOCK_ASSERT(sc); sc->tulip_gpinit = TULIP_GP_EB_PINS; sc->tulip_gpdata = TULIP_GP_EB_INIT; TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_PINS); TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_INIT); TULIP_CSR_WRITE(sc, csr_command, TULIP_CSR_READ(sc, csr_command) | TULIP_CMD_PORTSELECT | TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE); TULIP_CSR_WRITE(sc, csr_command, TULIP_CSR_READ(sc, csr_command) & ~TULIP_CMD_TXTHRSHLDCTL); DELAY(1000000); gpdata = TULIP_CSR_READ(sc, csr_gp); if ((gpdata & TULIP_GP_EN1207_UTP_INIT) == 0) { sc->tulip_media = TULIP_MEDIA_10BASET; } else { if ((gpdata & TULIP_GP_EN1207_BNC_INIT) == 0) { sc->tulip_media = TULIP_MEDIA_BNC; } else { sc->tulip_media = TULIP_MEDIA_100BASETX; } } tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_BNC, TULIP_GP_EN1207_BNC_INIT, TULIP_CMD_TXTHRSHLDCTL); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET, TULIP_GP_EN1207_UTP_INIT, TULIP_CMD_TXTHRSHLDCTL); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD, TULIP_GP_EN1207_UTP_INIT, TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX, TULIP_GP_EN1207_100_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD, TULIP_GP_EN1207_100_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX); } static const tulip_boardsw_t tulip_21140_accton_boardsw = { TULIP_21140_EN1207, tulip_21140_accton_media_probe, tulip_media_select, tulip_null_media_poll, tulip_2114x_media_preset, }; static void tulip_21140_smc9332_media_probe(tulip_softc_t * const sc) { tulip_media_info_t *mip = sc->tulip_mediainfo; int idx, cnt = 0; TULIP_LOCK_ASSERT(sc); TULIP_CSR_WRITE(sc, csr_command, TULIP_CMD_PORTSELECT|TULIP_CMD_MUSTBEONE); TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET); DELAY(10); /* Wait 10 microseconds (actually 50 PCI cycles but at 33MHz that comes to two microseconds but wait a bit longer anyways) */ TULIP_CSR_WRITE(sc, csr_command, TULIP_CMD_PORTSELECT | TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE); sc->tulip_gpinit = TULIP_GP_SMC_9332_PINS; sc->tulip_gpdata = TULIP_GP_SMC_9332_INIT; TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_SMC_9332_PINS|TULIP_GP_PINSET); TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_SMC_9332_INIT); DELAY(200000); for (idx = 1000; idx > 0; idx--) { u_int32_t csr = TULIP_CSR_READ(sc, csr_gp); if ((csr & (TULIP_GP_SMC_9332_OK10|TULIP_GP_SMC_9332_OK100)) == (TULIP_GP_SMC_9332_OK10|TULIP_GP_SMC_9332_OK100)) { if (++cnt > 100) break; } else if ((csr & TULIP_GP_SMC_9332_OK10) == 0) { break; } else { cnt = 0; } DELAY(1000); } sc->tulip_media = cnt > 100 ? TULIP_MEDIA_100BASETX : TULIP_MEDIA_10BASET; tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX, TULIP_GP_SMC_9332_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD, TULIP_GP_SMC_9332_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET, TULIP_GP_SMC_9332_INIT, TULIP_CMD_TXTHRSHLDCTL); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD, TULIP_GP_SMC_9332_INIT, TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX); } static const tulip_boardsw_t tulip_21140_smc9332_boardsw = { TULIP_21140_SMC_9332, tulip_21140_smc9332_media_probe, tulip_media_select, tulip_null_media_poll, tulip_2114x_media_preset, }; static void tulip_21140_cogent_em100_media_probe(tulip_softc_t * const sc) { tulip_media_info_t *mip = sc->tulip_mediainfo; u_int32_t cmdmode = TULIP_CSR_READ(sc, csr_command); TULIP_LOCK_ASSERT(sc); sc->tulip_gpinit = TULIP_GP_EM100_PINS; sc->tulip_gpdata = TULIP_GP_EM100_INIT; TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EM100_PINS); TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EM100_INIT); cmdmode = TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION|TULIP_CMD_MUSTBEONE; cmdmode &= ~(TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_SCRAMBLER); if (sc->tulip_rombuf[32] == TULIP_COGENT_EM100FX_ID) { TULIP_CSR_WRITE(sc, csr_command, cmdmode); sc->tulip_media = TULIP_MEDIA_100BASEFX; tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASEFX, TULIP_GP_EM100_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASEFX_FD, TULIP_GP_EM100_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_FULLDUPLEX); } else { TULIP_CSR_WRITE(sc, csr_command, cmdmode|TULIP_CMD_SCRAMBLER); sc->tulip_media = TULIP_MEDIA_100BASETX; tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX, TULIP_GP_EM100_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD, TULIP_GP_EM100_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX); } } static const tulip_boardsw_t tulip_21140_cogent_em100_boardsw = { TULIP_21140_COGENT_EM100, tulip_21140_cogent_em100_media_probe, tulip_media_select, tulip_null_media_poll, tulip_2114x_media_preset }; static void tulip_21140_znyx_zx34x_media_probe(tulip_softc_t * const sc) { tulip_media_info_t *mip = sc->tulip_mediainfo; int cnt10 = 0, cnt100 = 0, idx; TULIP_LOCK_ASSERT(sc); sc->tulip_gpinit = TULIP_GP_ZX34X_PINS; sc->tulip_gpdata = TULIP_GP_ZX34X_INIT; TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ZX34X_PINS); TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ZX34X_INIT); TULIP_CSR_WRITE(sc, csr_command, TULIP_CSR_READ(sc, csr_command) | TULIP_CMD_PORTSELECT | TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE); TULIP_CSR_WRITE(sc, csr_command, TULIP_CSR_READ(sc, csr_command) & ~TULIP_CMD_TXTHRSHLDCTL); DELAY(200000); for (idx = 1000; idx > 0; idx--) { u_int32_t csr = TULIP_CSR_READ(sc, csr_gp); if ((csr & (TULIP_GP_ZX34X_LNKFAIL|TULIP_GP_ZX34X_SYMDET|TULIP_GP_ZX34X_SIGDET)) == (TULIP_GP_ZX34X_LNKFAIL|TULIP_GP_ZX34X_SYMDET|TULIP_GP_ZX34X_SIGDET)) { if (++cnt100 > 100) break; } else if ((csr & TULIP_GP_ZX34X_LNKFAIL) == 0) { if (++cnt10 > 100) break; } else { cnt10 = 0; cnt100 = 0; } DELAY(1000); } sc->tulip_media = cnt100 > 100 ? TULIP_MEDIA_100BASETX : TULIP_MEDIA_10BASET; tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET, TULIP_GP_ZX34X_INIT, TULIP_CMD_TXTHRSHLDCTL); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD, TULIP_GP_ZX34X_INIT, TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX, TULIP_GP_ZX34X_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER); tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD, TULIP_GP_ZX34X_INIT, TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX); } static const tulip_boardsw_t tulip_21140_znyx_zx34x_boardsw = { TULIP_21140_ZNYX_ZX34X, tulip_21140_znyx_zx34x_media_probe, tulip_media_select, tulip_null_media_poll, tulip_2114x_media_preset, }; static void tulip_2114x_media_probe(tulip_softc_t * const sc) { TULIP_LOCK_ASSERT(sc); sc->tulip_cmdmode |= TULIP_CMD_MUSTBEONE |TULIP_CMD_BACKOFFCTR|TULIP_CMD_THRSHLD72; } static const tulip_boardsw_t tulip_2114x_isv_boardsw = { TULIP_21140_ISV, tulip_2114x_media_probe, tulip_media_select, tulip_media_poll, tulip_2114x_media_preset, }; /* * ******** END of chip-specific handlers. *********** */ /* * Code the read the SROM and MII bit streams (I2C) */ #define EMIT do { TULIP_CSR_WRITE(sc, csr_srom_mii, csr); DELAY(1); } while (0) static void tulip_srom_idle(tulip_softc_t * const sc) { unsigned bit, csr; csr = SROMSEL ; EMIT; csr = SROMSEL | SROMRD; EMIT; csr ^= SROMCS; EMIT; csr ^= SROMCLKON; EMIT; /* * Write 25 cycles of 0 which will force the SROM to be idle. */ for (bit = 3 + SROM_BITWIDTH + 16; bit > 0; bit--) { csr ^= SROMCLKOFF; EMIT; /* clock low; data not valid */ csr ^= SROMCLKON; EMIT; /* clock high; data valid */ } csr ^= SROMCLKOFF; EMIT; csr ^= SROMCS; EMIT; csr = 0; EMIT; } static void tulip_srom_read(tulip_softc_t * const sc) { unsigned idx; const unsigned bitwidth = SROM_BITWIDTH; const unsigned cmdmask = (SROMCMD_RD << bitwidth); const unsigned msb = 1 << (bitwidth + 3 - 1); unsigned lastidx = (1 << bitwidth) - 1; tulip_srom_idle(sc); for (idx = 0; idx <= lastidx; idx++) { unsigned lastbit, data, bits, bit, csr; csr = SROMSEL ; EMIT; csr = SROMSEL | SROMRD; EMIT; csr ^= SROMCSON; EMIT; csr ^= SROMCLKON; EMIT; lastbit = 0; for (bits = idx|cmdmask, bit = bitwidth + 3; bit > 0; bit--, bits <<= 1) { const unsigned thisbit = bits & msb; csr ^= SROMCLKOFF; EMIT; /* clock low; data not valid */ if (thisbit != lastbit) { csr ^= SROMDOUT; EMIT; /* clock low; invert data */ } else { EMIT; } csr ^= SROMCLKON; EMIT; /* clock high; data valid */ lastbit = thisbit; } csr ^= SROMCLKOFF; EMIT; for (data = 0, bits = 0; bits < 16; bits++) { data <<= 1; csr ^= SROMCLKON; EMIT; /* clock high; data valid */ data |= TULIP_CSR_READ(sc, csr_srom_mii) & SROMDIN ? 1 : 0; csr ^= SROMCLKOFF; EMIT; /* clock low; data not valid */ } sc->tulip_rombuf[idx*2] = data & 0xFF; sc->tulip_rombuf[idx*2+1] = data >> 8; csr = SROMSEL | SROMRD; EMIT; csr = 0; EMIT; } tulip_srom_idle(sc); } #define MII_EMIT do { TULIP_CSR_WRITE(sc, csr_srom_mii, csr); DELAY(1); } while (0) static void tulip_mii_writebits(tulip_softc_t * const sc, unsigned data, unsigned bits) { unsigned msb = 1 << (bits - 1); unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK); unsigned lastbit = (csr & MII_DOUT) ? msb : 0; TULIP_LOCK_ASSERT(sc); csr |= MII_WR; MII_EMIT; /* clock low; assert write */ for (; bits > 0; bits--, data <<= 1) { const unsigned thisbit = data & msb; if (thisbit != lastbit) { csr ^= MII_DOUT; MII_EMIT; /* clock low; invert data */ } csr ^= MII_CLKON; MII_EMIT; /* clock high; data valid */ lastbit = thisbit; csr ^= MII_CLKOFF; MII_EMIT; /* clock low; data not valid */ } } static void tulip_mii_turnaround(tulip_softc_t * const sc, unsigned cmd) { unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK); TULIP_LOCK_ASSERT(sc); if (cmd == MII_WRCMD) { csr |= MII_DOUT; MII_EMIT; /* clock low; change data */ csr ^= MII_CLKON; MII_EMIT; /* clock high; data valid */ csr ^= MII_CLKOFF; MII_EMIT; /* clock low; data not valid */ csr ^= MII_DOUT; MII_EMIT; /* clock low; change data */ } else { csr |= MII_RD; MII_EMIT; /* clock low; switch to read */ } csr ^= MII_CLKON; MII_EMIT; /* clock high; data valid */ csr ^= MII_CLKOFF; MII_EMIT; /* clock low; data not valid */ } static unsigned tulip_mii_readbits(tulip_softc_t * const sc) { unsigned data; unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK); int idx; TULIP_LOCK_ASSERT(sc); for (idx = 0, data = 0; idx < 16; idx++) { data <<= 1; /* this is NOOP on the first pass through */ csr ^= MII_CLKON; MII_EMIT; /* clock high; data valid */ if (TULIP_CSR_READ(sc, csr_srom_mii) & MII_DIN) data |= 1; csr ^= MII_CLKOFF; MII_EMIT; /* clock low; data not valid */ } csr ^= MII_RD; MII_EMIT; /* clock low; turn off read */ return data; } static unsigned tulip_mii_readreg(tulip_softc_t * const sc, unsigned devaddr, unsigned regno) { unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK); unsigned data; TULIP_LOCK_ASSERT(sc); csr &= ~(MII_RD|MII_CLK); MII_EMIT; tulip_mii_writebits(sc, MII_PREAMBLE, 32); tulip_mii_writebits(sc, MII_RDCMD, 8); tulip_mii_writebits(sc, devaddr, 5); tulip_mii_writebits(sc, regno, 5); tulip_mii_turnaround(sc, MII_RDCMD); data = tulip_mii_readbits(sc); #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_phyregs[regno][0] = data; sc->tulip_dbg.dbg_phyregs[regno][1]++; #endif return data; } static void tulip_mii_writereg(tulip_softc_t * const sc, unsigned devaddr, unsigned regno, unsigned data) { unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK); TULIP_LOCK_ASSERT(sc); csr &= ~(MII_RD|MII_CLK); MII_EMIT; tulip_mii_writebits(sc, MII_PREAMBLE, 32); tulip_mii_writebits(sc, MII_WRCMD, 8); tulip_mii_writebits(sc, devaddr, 5); tulip_mii_writebits(sc, regno, 5); tulip_mii_turnaround(sc, MII_WRCMD); tulip_mii_writebits(sc, data, 16); #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_phyregs[regno][2] = data; sc->tulip_dbg.dbg_phyregs[regno][3]++; #endif } #define tulip_mchash(mca) (ether_crc32_le(mca, 6) & 0x1FF) #define tulip_srom_crcok(databuf) ( \ ((ether_crc32_le(databuf, 126) & 0xFFFFU) ^ 0xFFFFU) == \ ((databuf)[126] | ((databuf)[127] << 8))) static void tulip_identify_dec_nic(tulip_softc_t * const sc) { TULIP_LOCK_ASSERT(sc); strcpy(sc->tulip_boardid, "DEC "); #define D0 4 if (sc->tulip_chipid <= TULIP_21040) return; if (bcmp(sc->tulip_rombuf + 29, "DE500", 5) == 0 || bcmp(sc->tulip_rombuf + 29, "DE450", 5) == 0) { bcopy(sc->tulip_rombuf + 29, &sc->tulip_boardid[D0], 8); sc->tulip_boardid[D0+8] = ' '; } #undef D0 } static void tulip_identify_znyx_nic(tulip_softc_t * const sc) { unsigned id = 0; TULIP_LOCK_ASSERT(sc); strcpy(sc->tulip_boardid, "ZNYX ZX3XX "); if (sc->tulip_chipid == TULIP_21140 || sc->tulip_chipid == TULIP_21140A) { unsigned znyx_ptr; sc->tulip_boardid[8] = '4'; znyx_ptr = sc->tulip_rombuf[124] + 256 * sc->tulip_rombuf[125]; if (znyx_ptr < 26 || znyx_ptr > 116) { sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw; return; } /* ZX344 = 0010 .. 0013FF */ if (sc->tulip_rombuf[znyx_ptr] == 0x4A && sc->tulip_rombuf[znyx_ptr + 1] == 0x52 && sc->tulip_rombuf[znyx_ptr + 2] == 0x01) { id = sc->tulip_rombuf[znyx_ptr + 5] + 256 * sc->tulip_rombuf[znyx_ptr + 4]; if ((id >> 8) == (TULIP_ZNYX_ID_ZX342 >> 8)) { sc->tulip_boardid[9] = '2'; if (id == TULIP_ZNYX_ID_ZX342B) { sc->tulip_boardid[10] = 'B'; sc->tulip_boardid[11] = ' '; } sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw; } else if (id == TULIP_ZNYX_ID_ZX344) { sc->tulip_boardid[10] = '4'; sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw; } else if (id == TULIP_ZNYX_ID_ZX345) { sc->tulip_boardid[9] = (sc->tulip_rombuf[19] > 1) ? '8' : '5'; } else if (id == TULIP_ZNYX_ID_ZX346) { sc->tulip_boardid[9] = '6'; } else if (id == TULIP_ZNYX_ID_ZX351) { sc->tulip_boardid[8] = '5'; sc->tulip_boardid[9] = '1'; } } if (id == 0) { /* * Assume it's a ZX342... */ sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw; } return; } sc->tulip_boardid[8] = '1'; if (sc->tulip_chipid == TULIP_21041) { sc->tulip_boardid[10] = '1'; return; } if (sc->tulip_rombuf[32] == 0x4A && sc->tulip_rombuf[33] == 0x52) { id = sc->tulip_rombuf[37] + 256 * sc->tulip_rombuf[36]; if (id == TULIP_ZNYX_ID_ZX312T) { sc->tulip_boardid[9] = '2'; sc->tulip_boardid[10] = 'T'; sc->tulip_boardid[11] = ' '; sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw; } else if (id == TULIP_ZNYX_ID_ZX314_INTA) { sc->tulip_boardid[9] = '4'; sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw; sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM; } else if (id == TULIP_ZNYX_ID_ZX314) { sc->tulip_boardid[9] = '4'; sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw; sc->tulip_features |= TULIP_HAVE_BASEROM; } else if (id == TULIP_ZNYX_ID_ZX315_INTA) { sc->tulip_boardid[9] = '5'; sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM; } else if (id == TULIP_ZNYX_ID_ZX315) { sc->tulip_boardid[9] = '5'; sc->tulip_features |= TULIP_HAVE_BASEROM; } else { id = 0; } } if (id == 0) { if ((sc->tulip_enaddr[3] & ~3) == 0xF0 && (sc->tulip_enaddr[5] & 2) == 0) { sc->tulip_boardid[9] = '4'; sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw; sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM; } else if ((sc->tulip_enaddr[3] & ~3) == 0xF4 && (sc->tulip_enaddr[5] & 1) == 0) { sc->tulip_boardid[9] = '5'; sc->tulip_boardsw = &tulip_21040_boardsw; sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM; } else if ((sc->tulip_enaddr[3] & ~3) == 0xEC) { sc->tulip_boardid[9] = '2'; sc->tulip_boardsw = &tulip_21040_boardsw; } } } static void tulip_identify_smc_nic(tulip_softc_t * const sc) { u_int32_t id1, id2, ei; int auibnc = 0, utp = 0; char *cp; TULIP_LOCK_ASSERT(sc); strcpy(sc->tulip_boardid, "SMC "); if (sc->tulip_chipid == TULIP_21041) return; if (sc->tulip_chipid != TULIP_21040) { if (sc->tulip_boardsw != &tulip_2114x_isv_boardsw) { strcpy(&sc->tulip_boardid[4], "9332DST "); sc->tulip_boardsw = &tulip_21140_smc9332_boardsw; } else if (sc->tulip_features & (TULIP_HAVE_BASEROM|TULIP_HAVE_SLAVEDROM)) { strcpy(&sc->tulip_boardid[4], "9334BDT "); } else { strcpy(&sc->tulip_boardid[4], "9332BDT "); } return; } id1 = sc->tulip_rombuf[0x60] | (sc->tulip_rombuf[0x61] << 8); id2 = sc->tulip_rombuf[0x62] | (sc->tulip_rombuf[0x63] << 8); ei = sc->tulip_rombuf[0x66] | (sc->tulip_rombuf[0x67] << 8); strcpy(&sc->tulip_boardid[4], "8432"); cp = &sc->tulip_boardid[8]; if ((id1 & 1) == 0) *cp++ = 'B', auibnc = 1; if ((id1 & 0xFF) > 0x32) *cp++ = 'T', utp = 1; if ((id1 & 0x4000) == 0) *cp++ = 'A', auibnc = 1; if (id2 == 0x15) { sc->tulip_boardid[7] = '4'; *cp++ = '-'; *cp++ = 'C'; *cp++ = 'H'; *cp++ = (ei ? '2' : '1'); } *cp++ = ' '; *cp = '\0'; if (utp && !auibnc) sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw; else if (!utp && auibnc) sc->tulip_boardsw = &tulip_21040_auibnc_only_boardsw; } static void tulip_identify_cogent_nic(tulip_softc_t * const sc) { TULIP_LOCK_ASSERT(sc); strcpy(sc->tulip_boardid, "Cogent "); if (sc->tulip_chipid == TULIP_21140 || sc->tulip_chipid == TULIP_21140A) { if (sc->tulip_rombuf[32] == TULIP_COGENT_EM100TX_ID) { strcat(sc->tulip_boardid, "EM100TX "); sc->tulip_boardsw = &tulip_21140_cogent_em100_boardsw; #if defined(TULIP_COGENT_EM110TX_ID) } else if (sc->tulip_rombuf[32] == TULIP_COGENT_EM110TX_ID) { strcat(sc->tulip_boardid, "EM110TX "); sc->tulip_boardsw = &tulip_21140_cogent_em100_boardsw; #endif } else if (sc->tulip_rombuf[32] == TULIP_COGENT_EM100FX_ID) { strcat(sc->tulip_boardid, "EM100FX "); sc->tulip_boardsw = &tulip_21140_cogent_em100_boardsw; } /* * Magic number (0x24001109U) is the SubVendor (0x2400) and * SubDevId (0x1109) for the ANA6944TX (EM440TX). */ if (*(u_int32_t *) sc->tulip_rombuf == 0x24001109U && (sc->tulip_features & TULIP_HAVE_BASEROM)) { /* * Cogent (Adaptec) is still mapping all INTs to INTA of * first 21140. Dumb! Dumb! */ strcat(sc->tulip_boardid, "EM440TX "); sc->tulip_features |= TULIP_HAVE_SHAREDINTR; } } else if (sc->tulip_chipid == TULIP_21040) { sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM; } } static void tulip_identify_accton_nic(tulip_softc_t * const sc) { TULIP_LOCK_ASSERT(sc); strcpy(sc->tulip_boardid, "ACCTON "); switch (sc->tulip_chipid) { case TULIP_21140A: strcat(sc->tulip_boardid, "EN1207 "); if (sc->tulip_boardsw != &tulip_2114x_isv_boardsw) sc->tulip_boardsw = &tulip_21140_accton_boardsw; break; case TULIP_21140: strcat(sc->tulip_boardid, "EN1207TX "); if (sc->tulip_boardsw != &tulip_2114x_isv_boardsw) sc->tulip_boardsw = &tulip_21140_eb_boardsw; break; case TULIP_21040: strcat(sc->tulip_boardid, "EN1203 "); sc->tulip_boardsw = &tulip_21040_boardsw; break; case TULIP_21041: strcat(sc->tulip_boardid, "EN1203 "); sc->tulip_boardsw = &tulip_21041_boardsw; break; default: sc->tulip_boardsw = &tulip_2114x_isv_boardsw; break; } } static void tulip_identify_asante_nic(tulip_softc_t * const sc) { TULIP_LOCK_ASSERT(sc); strcpy(sc->tulip_boardid, "Asante "); if ((sc->tulip_chipid == TULIP_21140 || sc->tulip_chipid == TULIP_21140A) && sc->tulip_boardsw != &tulip_2114x_isv_boardsw) { tulip_media_info_t *mi = sc->tulip_mediainfo; int idx; /* * The Asante Fast Ethernet doesn't always ship with a valid * new format SROM. So if isn't in the new format, we cheat * set it up as if we had. */ sc->tulip_gpinit = TULIP_GP_ASANTE_PINS; sc->tulip_gpdata = 0; TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ASANTE_PINS|TULIP_GP_PINSET); TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ASANTE_PHYRESET); DELAY(100); TULIP_CSR_WRITE(sc, csr_gp, 0); mi->mi_type = TULIP_MEDIAINFO_MII; mi->mi_gpr_length = 0; mi->mi_gpr_offset = 0; mi->mi_reset_length = 0; mi->mi_reset_offset = 0; mi->mi_phyaddr = TULIP_MII_NOPHY; for (idx = 20; idx > 0 && mi->mi_phyaddr == TULIP_MII_NOPHY; idx--) { DELAY(10000); mi->mi_phyaddr = tulip_mii_get_phyaddr(sc, 0); } if (mi->mi_phyaddr == TULIP_MII_NOPHY) { device_printf(sc->tulip_dev, "can't find phy 0\n"); return; } sc->tulip_features |= TULIP_HAVE_MII; mi->mi_capabilities = PHYSTS_10BASET|PHYSTS_10BASET_FD|PHYSTS_100BASETX|PHYSTS_100BASETX_FD; mi->mi_advertisement = PHYSTS_10BASET|PHYSTS_10BASET_FD|PHYSTS_100BASETX|PHYSTS_100BASETX_FD; mi->mi_full_duplex = PHYSTS_10BASET_FD|PHYSTS_100BASETX_FD; mi->mi_tx_threshold = PHYSTS_10BASET|PHYSTS_10BASET_FD; TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX_FD); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASET4); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET_FD); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET); mi->mi_phyid = (tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDLOW) << 16) | tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDHIGH); sc->tulip_boardsw = &tulip_2114x_isv_boardsw; } } static void tulip_identify_compex_nic(tulip_softc_t * const sc) { TULIP_LOCK_ASSERT(sc); strcpy(sc->tulip_boardid, "COMPEX "); if (sc->tulip_chipid == TULIP_21140A) { int root_unit; tulip_softc_t *root_sc = NULL; strcat(sc->tulip_boardid, "400TX/PCI "); /* * All 4 chips on these boards share an interrupt. This code * copied from tulip_read_macaddr. */ sc->tulip_features |= TULIP_HAVE_SHAREDINTR; for (root_unit = sc->tulip_unit - 1; root_unit >= 0; root_unit--) { root_sc = tulips[root_unit]; if (root_sc == NULL || !(root_sc->tulip_features & TULIP_HAVE_SLAVEDINTR)) break; root_sc = NULL; } if (root_sc != NULL && root_sc->tulip_chipid == sc->tulip_chipid && root_sc->tulip_pci_busno == sc->tulip_pci_busno) { sc->tulip_features |= TULIP_HAVE_SLAVEDINTR; sc->tulip_slaves = root_sc->tulip_slaves; root_sc->tulip_slaves = sc; } else if(sc->tulip_features & TULIP_HAVE_SLAVEDINTR) { printf("\nCannot find master device for %s interrupts", sc->tulip_ifp->if_xname); } } else { strcat(sc->tulip_boardid, "unknown "); } /* sc->tulip_boardsw = &tulip_21140_eb_boardsw; */ return; } static int tulip_srom_decode(tulip_softc_t * const sc) { unsigned idx1, idx2, idx3; const tulip_srom_header_t *shp = (const tulip_srom_header_t *) &sc->tulip_rombuf[0]; const tulip_srom_adapter_info_t *saip = (const tulip_srom_adapter_info_t *) (shp + 1); tulip_srom_media_t srom_media; tulip_media_info_t *mi = sc->tulip_mediainfo; const u_int8_t *dp; u_int32_t leaf_offset, blocks, data; TULIP_LOCK_ASSERT(sc); for (idx1 = 0; idx1 < shp->sh_adapter_count; idx1++, saip++) { if (shp->sh_adapter_count == 1) break; if (saip->sai_device == sc->tulip_pci_devno) break; } /* * Didn't find the right media block for this card. */ if (idx1 == shp->sh_adapter_count) return 0; /* * Save the hardware address. */ bcopy(shp->sh_ieee802_address, sc->tulip_enaddr, 6); /* * If this is a multiple port card, add the adapter index to the last * byte of the hardware address. (if it isn't multiport, adding 0 * won't hurt. */ sc->tulip_enaddr[5] += idx1; leaf_offset = saip->sai_leaf_offset_lowbyte + saip->sai_leaf_offset_highbyte * 256; dp = sc->tulip_rombuf + leaf_offset; sc->tulip_conntype = (tulip_srom_connection_t) (dp[0] + dp[1] * 256); dp += 2; for (idx2 = 0;; idx2++) { if (tulip_srom_conninfo[idx2].sc_type == sc->tulip_conntype || tulip_srom_conninfo[idx2].sc_type == TULIP_SROM_CONNTYPE_NOT_USED) break; } sc->tulip_connidx = idx2; if (sc->tulip_chipid == TULIP_21041) { blocks = *dp++; for (idx2 = 0; idx2 < blocks; idx2++) { tulip_media_t media; data = *dp++; srom_media = (tulip_srom_media_t) (data & 0x3F); for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) { if (tulip_srom_mediums[idx3].sm_srom_type == srom_media) break; } media = tulip_srom_mediums[idx3].sm_type; if (media != TULIP_MEDIA_UNKNOWN) { if (data & TULIP_SROM_21041_EXTENDED) { mi->mi_type = TULIP_MEDIAINFO_SIA; sc->tulip_mediums[media] = mi; mi->mi_sia_connectivity = dp[0] + dp[1] * 256; mi->mi_sia_tx_rx = dp[2] + dp[3] * 256; mi->mi_sia_general = dp[4] + dp[5] * 256; mi++; } else { switch (media) { case TULIP_MEDIA_BNC: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, BNC); mi++; break; } case TULIP_MEDIA_AUI: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, AUI); mi++; break; } case TULIP_MEDIA_10BASET: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET); mi++; break; } case TULIP_MEDIA_10BASET_FD: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET_FD); mi++; break; } default: { break; } } } } if (data & TULIP_SROM_21041_EXTENDED) dp += 6; } #ifdef notdef if (blocks == 0) { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, BNC); mi++; TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, AUI); mi++; TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET); mi++; TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET_FD); mi++; } #endif } else { unsigned length, type; tulip_media_t gp_media = TULIP_MEDIA_UNKNOWN; if (sc->tulip_features & TULIP_HAVE_GPR) sc->tulip_gpinit = *dp++; blocks = *dp++; for (idx2 = 0; idx2 < blocks; idx2++) { const u_int8_t *ep; if ((*dp & 0x80) == 0) { length = 4; type = 0; } else { length = (*dp++ & 0x7f) - 1; type = *dp++ & 0x3f; } ep = dp + length; switch (type & 0x3f) { case 0: { /* 21140[A] GPR block */ tulip_media_t media; srom_media = (tulip_srom_media_t)(dp[0] & 0x3f); for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) { if (tulip_srom_mediums[idx3].sm_srom_type == srom_media) break; } media = tulip_srom_mediums[idx3].sm_type; if (media == TULIP_MEDIA_UNKNOWN) break; mi->mi_type = TULIP_MEDIAINFO_GPR; sc->tulip_mediums[media] = mi; mi->mi_gpdata = dp[1]; if (media > gp_media && !TULIP_IS_MEDIA_FD(media)) { sc->tulip_gpdata = mi->mi_gpdata; gp_media = media; } data = dp[2] + dp[3] * 256; mi->mi_cmdmode = TULIP_SROM_2114X_CMDBITS(data); if (data & TULIP_SROM_2114X_NOINDICATOR) { mi->mi_actmask = 0; } else { #if 0 mi->mi_default = (data & TULIP_SROM_2114X_DEFAULT) != 0; #endif mi->mi_actmask = TULIP_SROM_2114X_BITPOS(data); mi->mi_actdata = (data & TULIP_SROM_2114X_POLARITY) ? 0 : mi->mi_actmask; } mi++; break; } case 1: { /* 21140[A] MII block */ const unsigned phyno = *dp++; mi->mi_type = TULIP_MEDIAINFO_MII; mi->mi_gpr_length = *dp++; mi->mi_gpr_offset = dp - sc->tulip_rombuf; dp += mi->mi_gpr_length; mi->mi_reset_length = *dp++; mi->mi_reset_offset = dp - sc->tulip_rombuf; dp += mi->mi_reset_length; /* * Before we probe for a PHY, use the GPR information * to select it. If we don't, it may be inaccessible. */ TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_gpinit|TULIP_GP_PINSET); for (idx3 = 0; idx3 < mi->mi_reset_length; idx3++) { DELAY(10); TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_reset_offset + idx3]); } sc->tulip_phyaddr = mi->mi_phyaddr; for (idx3 = 0; idx3 < mi->mi_gpr_length; idx3++) { DELAY(10); TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_gpr_offset + idx3]); } /* * At least write something! */ if (mi->mi_reset_length == 0 && mi->mi_gpr_length == 0) TULIP_CSR_WRITE(sc, csr_gp, 0); mi->mi_phyaddr = TULIP_MII_NOPHY; for (idx3 = 20; idx3 > 0 && mi->mi_phyaddr == TULIP_MII_NOPHY; idx3--) { DELAY(10000); mi->mi_phyaddr = tulip_mii_get_phyaddr(sc, phyno); } if (mi->mi_phyaddr == TULIP_MII_NOPHY) { #if defined(TULIP_DEBUG) device_printf(sc->tulip_dev, "can't find phy %d\n", phyno); #endif break; } sc->tulip_features |= TULIP_HAVE_MII; mi->mi_capabilities = dp[0] + dp[1] * 256; dp += 2; mi->mi_advertisement = dp[0] + dp[1] * 256; dp += 2; mi->mi_full_duplex = dp[0] + dp[1] * 256; dp += 2; mi->mi_tx_threshold = dp[0] + dp[1] * 256; dp += 2; TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX_FD); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASET4); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET_FD); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET); mi->mi_phyid = (tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDLOW) << 16) | tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDHIGH); mi++; break; } case 2: { /* 2114[23] SIA block */ tulip_media_t media; srom_media = (tulip_srom_media_t)(dp[0] & 0x3f); for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) { if (tulip_srom_mediums[idx3].sm_srom_type == srom_media) break; } media = tulip_srom_mediums[idx3].sm_type; if (media == TULIP_MEDIA_UNKNOWN) break; mi->mi_type = TULIP_MEDIAINFO_SIA; sc->tulip_mediums[media] = mi; if (dp[0] & 0x40) { mi->mi_sia_connectivity = dp[1] + dp[2] * 256; mi->mi_sia_tx_rx = dp[3] + dp[4] * 256; mi->mi_sia_general = dp[5] + dp[6] * 256; dp += 6; } else { switch (media) { case TULIP_MEDIA_BNC: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, BNC); break; } case TULIP_MEDIA_AUI: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, AUI); break; } case TULIP_MEDIA_10BASET: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, 10BASET); sc->tulip_intrmask |= TULIP_STS_LINKPASS|TULIP_STS_LINKFAIL; break; } case TULIP_MEDIA_10BASET_FD: { TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, 10BASET_FD); sc->tulip_intrmask |= TULIP_STS_LINKPASS|TULIP_STS_LINKFAIL; break; } default: { goto bad_media; } } } mi->mi_sia_gp_control = (dp[1] + dp[2] * 256) << 16; mi->mi_sia_gp_data = (dp[3] + dp[4] * 256) << 16; mi++; bad_media: break; } case 3: { /* 2114[23] MII PHY block */ const unsigned phyno = *dp++; const u_int8_t *dp0; mi->mi_type = TULIP_MEDIAINFO_MII; mi->mi_gpr_length = *dp++; mi->mi_gpr_offset = dp - sc->tulip_rombuf; dp += 2 * mi->mi_gpr_length; mi->mi_reset_length = *dp++; mi->mi_reset_offset = dp - sc->tulip_rombuf; dp += 2 * mi->mi_reset_length; dp0 = &sc->tulip_rombuf[mi->mi_reset_offset]; for (idx3 = 0; idx3 < mi->mi_reset_length; idx3++, dp0 += 2) { DELAY(10); TULIP_CSR_WRITE(sc, csr_sia_general, (dp0[0] + 256 * dp0[1]) << 16); } sc->tulip_phyaddr = mi->mi_phyaddr; dp0 = &sc->tulip_rombuf[mi->mi_gpr_offset]; for (idx3 = 0; idx3 < mi->mi_gpr_length; idx3++, dp0 += 2) { DELAY(10); TULIP_CSR_WRITE(sc, csr_sia_general, (dp0[0] + 256 * dp0[1]) << 16); } if (mi->mi_reset_length == 0 && mi->mi_gpr_length == 0) TULIP_CSR_WRITE(sc, csr_sia_general, 0); mi->mi_phyaddr = TULIP_MII_NOPHY; for (idx3 = 20; idx3 > 0 && mi->mi_phyaddr == TULIP_MII_NOPHY; idx3--) { DELAY(10000); mi->mi_phyaddr = tulip_mii_get_phyaddr(sc, phyno); } if (mi->mi_phyaddr == TULIP_MII_NOPHY) { #if defined(TULIP_DEBUG) device_printf(sc->tulip_dev, "can't find phy %d\n", phyno); #endif break; } sc->tulip_features |= TULIP_HAVE_MII; mi->mi_capabilities = dp[0] + dp[1] * 256; dp += 2; mi->mi_advertisement = dp[0] + dp[1] * 256; dp += 2; mi->mi_full_duplex = dp[0] + dp[1] * 256; dp += 2; mi->mi_tx_threshold = dp[0] + dp[1] * 256; dp += 2; mi->mi_mii_interrupt = dp[0] + dp[1] * 256; dp += 2; TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX_FD); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASET4); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET_FD); TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET); mi->mi_phyid = (tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDLOW) << 16) | tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDHIGH); mi++; break; } case 4: { /* 21143 SYM block */ tulip_media_t media; srom_media = (tulip_srom_media_t) dp[0]; for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) { if (tulip_srom_mediums[idx3].sm_srom_type == srom_media) break; } media = tulip_srom_mediums[idx3].sm_type; if (media == TULIP_MEDIA_UNKNOWN) break; mi->mi_type = TULIP_MEDIAINFO_SYM; sc->tulip_mediums[media] = mi; mi->mi_gpcontrol = (dp[1] + dp[2] * 256) << 16; mi->mi_gpdata = (dp[3] + dp[4] * 256) << 16; data = dp[5] + dp[6] * 256; mi->mi_cmdmode = TULIP_SROM_2114X_CMDBITS(data); if (data & TULIP_SROM_2114X_NOINDICATOR) { mi->mi_actmask = 0; } else { mi->mi_default = (data & TULIP_SROM_2114X_DEFAULT) != 0; mi->mi_actmask = TULIP_SROM_2114X_BITPOS(data); mi->mi_actdata = (data & TULIP_SROM_2114X_POLARITY) ? 0 : mi->mi_actmask; } if (TULIP_IS_MEDIA_TP(media)) sc->tulip_intrmask |= TULIP_STS_LINKPASS|TULIP_STS_LINKFAIL; mi++; break; } #if 0 case 5: { /* 21143 Reset block */ mi->mi_type = TULIP_MEDIAINFO_RESET; mi->mi_reset_length = *dp++; mi->mi_reset_offset = dp - sc->tulip_rombuf; dp += 2 * mi->mi_reset_length; mi++; break; } #endif default: { } } dp = ep; } } return mi - sc->tulip_mediainfo; } static const struct { void (*vendor_identify_nic)(tulip_softc_t * const sc); unsigned char vendor_oui[3]; } tulip_vendors[] = { { tulip_identify_dec_nic, { 0x08, 0x00, 0x2B } }, { tulip_identify_dec_nic, { 0x00, 0x00, 0xF8 } }, { tulip_identify_smc_nic, { 0x00, 0x00, 0xC0 } }, { tulip_identify_smc_nic, { 0x00, 0xE0, 0x29 } }, { tulip_identify_znyx_nic, { 0x00, 0xC0, 0x95 } }, { tulip_identify_cogent_nic, { 0x00, 0x00, 0x92 } }, { tulip_identify_asante_nic, { 0x00, 0x00, 0x94 } }, { tulip_identify_cogent_nic, { 0x00, 0x00, 0xD1 } }, { tulip_identify_accton_nic, { 0x00, 0x00, 0xE8 } }, { tulip_identify_compex_nic, { 0x00, 0x80, 0x48 } }, { NULL } }; /* * This deals with the vagaries of the address roms and the * brain-deadness that various vendors commit in using them. */ static int tulip_read_macaddr(tulip_softc_t * const sc) { unsigned cksum, rom_cksum, idx; u_int32_t csr; unsigned char tmpbuf[8]; static const u_char testpat[] = { 0xFF, 0, 0x55, 0xAA, 0xFF, 0, 0x55, 0xAA }; sc->tulip_connidx = TULIP_SROM_LASTCONNIDX; if (sc->tulip_chipid == TULIP_21040) { TULIP_CSR_WRITE(sc, csr_enetrom, 1); for (idx = 0; idx < sizeof(sc->tulip_rombuf); idx++) { int cnt = 0; while (((csr = TULIP_CSR_READ(sc, csr_enetrom)) & 0x80000000L) && cnt < 10000) cnt++; sc->tulip_rombuf[idx] = csr & 0xFF; } sc->tulip_boardsw = &tulip_21040_boardsw; } else { if (sc->tulip_chipid == TULIP_21041) { /* * Thankfully all 21041's act the same. */ sc->tulip_boardsw = &tulip_21041_boardsw; } else { /* * Assume all 21140 board are compatible with the * DEC 10/100 evaluation board. Not really valid but * it's the best we can do until every one switches to * the new SROM format. */ sc->tulip_boardsw = &tulip_21140_eb_boardsw; } tulip_srom_read(sc); if (tulip_srom_crcok(sc->tulip_rombuf)) { /* * SROM CRC is valid therefore it must be in the * new format. */ sc->tulip_features |= TULIP_HAVE_ISVSROM|TULIP_HAVE_OKSROM; } else if (sc->tulip_rombuf[126] == 0xff && sc->tulip_rombuf[127] == 0xFF) { /* * No checksum is present. See if the SROM id checks out; * the first 18 bytes should be 0 followed by a 1 followed * by the number of adapters (which we don't deal with yet). */ for (idx = 0; idx < 18; idx++) { if (sc->tulip_rombuf[idx] != 0) break; } if (idx == 18 && sc->tulip_rombuf[18] == 1 && sc->tulip_rombuf[19] != 0) sc->tulip_features |= TULIP_HAVE_ISVSROM; } else if (sc->tulip_chipid >= TULIP_21142) { sc->tulip_features |= TULIP_HAVE_ISVSROM; sc->tulip_boardsw = &tulip_2114x_isv_boardsw; } if ((sc->tulip_features & TULIP_HAVE_ISVSROM) && tulip_srom_decode(sc)) { if (sc->tulip_chipid != TULIP_21041) sc->tulip_boardsw = &tulip_2114x_isv_boardsw; /* * If the SROM specifies more than one adapter, tag this as a * BASE rom. */ if (sc->tulip_rombuf[19] > 1) sc->tulip_features |= TULIP_HAVE_BASEROM; if (sc->tulip_boardsw == NULL) return -6; goto check_oui; } } if (bcmp(&sc->tulip_rombuf[0], &sc->tulip_rombuf[16], 8) != 0) { /* * Some folks don't use the standard ethernet rom format * but instead just put the address in the first 6 bytes * of the rom and let the rest be all 0xffs. (Can we say * ZNYX?) (well sometimes they put in a checksum so we'll * start at 8). */ for (idx = 8; idx < 32; idx++) { if (sc->tulip_rombuf[idx] != 0xFF) return -4; } /* * Make sure the address is not multicast or locally assigned * that the OUI is not 00-00-00. */ if ((sc->tulip_rombuf[0] & 3) != 0) return -4; if (sc->tulip_rombuf[0] == 0 && sc->tulip_rombuf[1] == 0 && sc->tulip_rombuf[2] == 0) return -4; bcopy(sc->tulip_rombuf, sc->tulip_enaddr, 6); sc->tulip_features |= TULIP_HAVE_OKROM; goto check_oui; } else { /* * A number of makers of multiport boards (ZNYX and Cogent) * only put on one address ROM on their 21040 boards. So * if the ROM is all zeros (or all 0xFFs), look at the * previous configured boards (as long as they are on the same * PCI bus and the bus number is non-zero) until we find the * master board with address ROM. We then use its address ROM * as the base for this board. (we add our relative board * to the last byte of its address). */ for (idx = 0; idx < sizeof(sc->tulip_rombuf); idx++) { if (sc->tulip_rombuf[idx] != 0 && sc->tulip_rombuf[idx] != 0xFF) break; } if (idx == sizeof(sc->tulip_rombuf)) { int root_unit; tulip_softc_t *root_sc = NULL; for (root_unit = sc->tulip_unit - 1; root_unit >= 0; root_unit--) { root_sc = tulips[root_unit]; if (root_sc == NULL || (root_sc->tulip_features & (TULIP_HAVE_OKROM|TULIP_HAVE_SLAVEDROM)) == TULIP_HAVE_OKROM) break; root_sc = NULL; } if (root_sc != NULL && (root_sc->tulip_features & TULIP_HAVE_BASEROM) && root_sc->tulip_chipid == sc->tulip_chipid && root_sc->tulip_pci_busno == sc->tulip_pci_busno) { sc->tulip_features |= TULIP_HAVE_SLAVEDROM; sc->tulip_boardsw = root_sc->tulip_boardsw; strcpy(sc->tulip_boardid, root_sc->tulip_boardid); if (sc->tulip_boardsw->bd_type == TULIP_21140_ISV) { bcopy(root_sc->tulip_rombuf, sc->tulip_rombuf, sizeof(sc->tulip_rombuf)); if (!tulip_srom_decode(sc)) return -5; } else { bcopy(root_sc->tulip_enaddr, sc->tulip_enaddr, 6); sc->tulip_enaddr[5] += sc->tulip_unit - root_sc->tulip_unit; } /* * Now for a truly disgusting kludge: all 4 21040s on * the ZX314 share the same INTA line so the mapping * setup by the BIOS on the PCI bridge is worthless. * Rather than reprogramming the value in the config * register, we will handle this internally. */ if (root_sc->tulip_features & TULIP_HAVE_SHAREDINTR) { sc->tulip_slaves = root_sc->tulip_slaves; root_sc->tulip_slaves = sc; sc->tulip_features |= TULIP_HAVE_SLAVEDINTR; } return 0; } } } /* * This is the standard DEC address ROM test. */ if (bcmp(&sc->tulip_rombuf[24], testpat, 8) != 0) return -3; tmpbuf[0] = sc->tulip_rombuf[15]; tmpbuf[1] = sc->tulip_rombuf[14]; tmpbuf[2] = sc->tulip_rombuf[13]; tmpbuf[3] = sc->tulip_rombuf[12]; tmpbuf[4] = sc->tulip_rombuf[11]; tmpbuf[5] = sc->tulip_rombuf[10]; tmpbuf[6] = sc->tulip_rombuf[9]; tmpbuf[7] = sc->tulip_rombuf[8]; if (bcmp(&sc->tulip_rombuf[0], tmpbuf, 8) != 0) return -2; bcopy(sc->tulip_rombuf, sc->tulip_enaddr, 6); cksum = *(u_int16_t *) &sc->tulip_enaddr[0]; cksum *= 2; if (cksum > 65535) cksum -= 65535; cksum += *(u_int16_t *) &sc->tulip_enaddr[2]; if (cksum > 65535) cksum -= 65535; cksum *= 2; if (cksum > 65535) cksum -= 65535; cksum += *(u_int16_t *) &sc->tulip_enaddr[4]; if (cksum >= 65535) cksum -= 65535; rom_cksum = *(u_int16_t *) &sc->tulip_rombuf[6]; if (cksum != rom_cksum) return -1; check_oui: /* * Check for various boards based on OUI. Did I say braindead? */ for (idx = 0; tulip_vendors[idx].vendor_identify_nic != NULL; idx++) { if (bcmp(sc->tulip_enaddr, tulip_vendors[idx].vendor_oui, 3) == 0) { (*tulip_vendors[idx].vendor_identify_nic)(sc); break; } } sc->tulip_features |= TULIP_HAVE_OKROM; return 0; } static void tulip_ifmedia_add(tulip_softc_t * const sc) { tulip_media_t media; int medias = 0; TULIP_LOCK_ASSERT(sc); for (media = TULIP_MEDIA_UNKNOWN; media < TULIP_MEDIA_MAX; media++) { if (sc->tulip_mediums[media] != NULL) { ifmedia_add(&sc->tulip_ifmedia, tulip_media_to_ifmedia[media], 0, 0); medias++; } } if (medias == 0) { sc->tulip_features |= TULIP_HAVE_NOMEDIA; ifmedia_add(&sc->tulip_ifmedia, IFM_ETHER | IFM_NONE, 0, 0); ifmedia_set(&sc->tulip_ifmedia, IFM_ETHER | IFM_NONE); } else if (sc->tulip_media == TULIP_MEDIA_UNKNOWN) { ifmedia_add(&sc->tulip_ifmedia, IFM_ETHER | IFM_AUTO, 0, 0); ifmedia_set(&sc->tulip_ifmedia, IFM_ETHER | IFM_AUTO); } else { ifmedia_set(&sc->tulip_ifmedia, tulip_media_to_ifmedia[sc->tulip_media]); sc->tulip_flags |= TULIP_PRINTMEDIA; tulip_linkup(sc, sc->tulip_media); } } static int tulip_ifmedia_change(struct ifnet * const ifp) { tulip_softc_t * const sc = (tulip_softc_t *)ifp->if_softc; TULIP_LOCK(sc); sc->tulip_flags |= TULIP_NEEDRESET; sc->tulip_probe_state = TULIP_PROBE_INACTIVE; sc->tulip_media = TULIP_MEDIA_UNKNOWN; if (IFM_SUBTYPE(sc->tulip_ifmedia.ifm_media) != IFM_AUTO) { tulip_media_t media; for (media = TULIP_MEDIA_UNKNOWN; media < TULIP_MEDIA_MAX; media++) { if (sc->tulip_mediums[media] != NULL && sc->tulip_ifmedia.ifm_media == tulip_media_to_ifmedia[media]) { sc->tulip_flags |= TULIP_PRINTMEDIA; sc->tulip_flags &= ~TULIP_DIDNWAY; tulip_linkup(sc, media); TULIP_UNLOCK(sc); return 0; } } } sc->tulip_flags &= ~(TULIP_TXPROBE_ACTIVE|TULIP_WANTRXACT); tulip_reset(sc); tulip_init_locked(sc); TULIP_UNLOCK(sc); return 0; } /* * Media status callback */ static void tulip_ifmedia_status(struct ifnet * const ifp, struct ifmediareq *req) { tulip_softc_t *sc = (tulip_softc_t *)ifp->if_softc; TULIP_LOCK(sc); if (sc->tulip_media == TULIP_MEDIA_UNKNOWN) { TULIP_UNLOCK(sc); return; } req->ifm_status = IFM_AVALID; if (sc->tulip_flags & TULIP_LINKUP) req->ifm_status |= IFM_ACTIVE; req->ifm_active = tulip_media_to_ifmedia[sc->tulip_media]; TULIP_UNLOCK(sc); } static void tulip_addr_filter(tulip_softc_t * const sc) { struct ifmultiaddr *ifma; struct ifnet *ifp; u_char *addrp; u_int16_t eaddr[ETHER_ADDR_LEN/2]; int multicnt; TULIP_LOCK_ASSERT(sc); sc->tulip_flags &= ~(TULIP_WANTHASHPERFECT|TULIP_WANTHASHONLY|TULIP_ALLMULTI); sc->tulip_flags |= TULIP_WANTSETUP|TULIP_WANTTXSTART; sc->tulip_cmdmode &= ~TULIP_CMD_RXRUN; sc->tulip_intrmask &= ~TULIP_STS_RXSTOPPED; #if defined(IFF_ALLMULTI) if (sc->tulip_ifp->if_flags & IFF_ALLMULTI) sc->tulip_flags |= TULIP_ALLMULTI ; #endif multicnt = 0; ifp = sc->tulip_ifp; if_maddr_rlock(ifp); /* Copy MAC address on stack to align. */ if (ifp->if_input != NULL) bcopy(IF_LLADDR(ifp), eaddr, ETHER_ADDR_LEN); else bcopy(sc->tulip_enaddr, eaddr, ETHER_ADDR_LEN); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family == AF_LINK) multicnt++; } if (multicnt > 14) { u_int32_t *sp = sc->tulip_setupdata; unsigned hash; /* * Some early passes of the 21140 have broken implementations of * hash-perfect mode. When we get too many multicasts for perfect * filtering with these chips, we need to switch into hash-only * mode (this is better than all-multicast on network with lots * of multicast traffic). */ if (sc->tulip_features & TULIP_HAVE_BROKEN_HASH) sc->tulip_flags |= TULIP_WANTHASHONLY; else sc->tulip_flags |= TULIP_WANTHASHPERFECT; /* * If we have more than 14 multicasts, we have * go into hash perfect mode (512 bit multicast * hash and one perfect hardware). */ bzero(sc->tulip_setupdata, sizeof(sc->tulip_setupdata)); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; hash = tulip_mchash(LLADDR((struct sockaddr_dl *)ifma->ifma_addr)); sp[hash >> 4] |= htole32(1 << (hash & 0xF)); } /* * No reason to use a hash if we are going to be * receiving every multicast. */ if ((sc->tulip_flags & TULIP_ALLMULTI) == 0) { hash = tulip_mchash(ifp->if_broadcastaddr); sp[hash >> 4] |= htole32(1 << (hash & 0xF)); if (sc->tulip_flags & TULIP_WANTHASHONLY) { hash = tulip_mchash((caddr_t)eaddr); sp[hash >> 4] |= htole32(1 << (hash & 0xF)); } else { sp[39] = TULIP_SP_MAC(eaddr[0]); sp[40] = TULIP_SP_MAC(eaddr[1]); sp[41] = TULIP_SP_MAC(eaddr[2]); } } } if ((sc->tulip_flags & (TULIP_WANTHASHPERFECT|TULIP_WANTHASHONLY)) == 0) { u_int32_t *sp = sc->tulip_setupdata; int idx = 0; if ((sc->tulip_flags & TULIP_ALLMULTI) == 0) { /* * Else can get perfect filtering for 16 addresses. */ TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; addrp = LLADDR((struct sockaddr_dl *)ifma->ifma_addr); *sp++ = TULIP_SP_MAC(((u_int16_t *)addrp)[0]); *sp++ = TULIP_SP_MAC(((u_int16_t *)addrp)[1]); *sp++ = TULIP_SP_MAC(((u_int16_t *)addrp)[2]); idx++; } /* * Add the broadcast address. */ idx++; *sp++ = TULIP_SP_MAC(0xFFFF); *sp++ = TULIP_SP_MAC(0xFFFF); *sp++ = TULIP_SP_MAC(0xFFFF); } /* * Pad the rest with our hardware address */ for (; idx < 16; idx++) { *sp++ = TULIP_SP_MAC(eaddr[0]); *sp++ = TULIP_SP_MAC(eaddr[1]); *sp++ = TULIP_SP_MAC(eaddr[2]); } } if_maddr_runlock(ifp); } static void tulip_reset(tulip_softc_t * const sc) { tulip_ringinfo_t *ri; tulip_descinfo_t *di; struct mbuf *m; u_int32_t inreset = (sc->tulip_flags & TULIP_INRESET); TULIP_LOCK_ASSERT(sc); CTR1(KTR_TULIP, "tulip_reset: inreset %d", inreset); /* * Brilliant. Simply brilliant. When switching modes/speeds * on a 2114*, you need to set the appriopriate MII/PCS/SCL/PS * bits in CSR6 and then do a software reset to get the 21140 * to properly reset its internal pathways to the right places. * Grrrr. */ if ((sc->tulip_flags & TULIP_DEVICEPROBE) == 0 && sc->tulip_boardsw->bd_media_preset != NULL) (*sc->tulip_boardsw->bd_media_preset)(sc); TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET); DELAY(10); /* Wait 10 microseconds (actually 50 PCI cycles but at 33MHz that comes to two microseconds but wait a bit longer anyways) */ if (!inreset) { sc->tulip_flags |= TULIP_INRESET; sc->tulip_flags &= ~(TULIP_NEEDRESET|TULIP_RXBUFSLOW); sc->tulip_ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } TULIP_CSR_WRITE(sc, csr_txlist, sc->tulip_txinfo.ri_dma_addr & 0xffffffff); TULIP_CSR_WRITE(sc, csr_rxlist, sc->tulip_rxinfo.ri_dma_addr & 0xffffffff); TULIP_CSR_WRITE(sc, csr_busmode, (1 << (3 /*pci_max_burst_len*/ + 8)) |TULIP_BUSMODE_CACHE_ALIGN8 |TULIP_BUSMODE_READMULTIPLE |(BYTE_ORDER != LITTLE_ENDIAN ? TULIP_BUSMODE_DESC_BIGENDIAN : 0)); sc->tulip_txtimer = 0; /* * Free all the mbufs that were on the transmit ring. */ CTR0(KTR_TULIP, "tulip_reset: drain transmit ring"); ri = &sc->tulip_txinfo; for (di = ri->ri_first; di < ri->ri_last; di++) { m = tulip_dequeue_mbuf(ri, di, SYNC_NONE); if (m != NULL) m_freem(m); di->di_desc->d_status = 0; } ri->ri_nextin = ri->ri_nextout = ri->ri_first; ri->ri_free = ri->ri_max; TULIP_TXDESC_PRESYNC(ri); /* * We need to collect all the mbufs that were on the * receive ring before we reinit it either to put * them back on or to know if we have to allocate * more. */ CTR0(KTR_TULIP, "tulip_reset: drain receive ring"); ri = &sc->tulip_rxinfo; ri->ri_nextin = ri->ri_nextout = ri->ri_first; ri->ri_free = ri->ri_max; for (di = ri->ri_first; di < ri->ri_last; di++) { di->di_desc->d_status = 0; di->di_desc->d_length1 = 0; di->di_desc->d_addr1 = 0; di->di_desc->d_length2 = 0; di->di_desc->d_addr2 = 0; } TULIP_RXDESC_PRESYNC(ri); for (di = ri->ri_first; di < ri->ri_last; di++) { m = tulip_dequeue_mbuf(ri, di, SYNC_NONE); if (m != NULL) m_freem(m); } /* * If tulip_reset is being called recursively, exit quickly knowing * that when the outer tulip_reset returns all the right stuff will * have happened. */ if (inreset) return; sc->tulip_intrmask |= TULIP_STS_NORMALINTR|TULIP_STS_RXINTR|TULIP_STS_TXINTR |TULIP_STS_ABNRMLINTR|TULIP_STS_SYSERROR|TULIP_STS_TXSTOPPED |TULIP_STS_TXUNDERFLOW|TULIP_STS_TXBABBLE |TULIP_STS_RXSTOPPED; if ((sc->tulip_flags & TULIP_DEVICEPROBE) == 0) (*sc->tulip_boardsw->bd_media_select)(sc); #if defined(TULIP_DEBUG) if ((sc->tulip_flags & TULIP_NEEDRESET) == TULIP_NEEDRESET) device_printf(sc->tulip_dev, "tulip_reset: additional reset needed?!?\n"); #endif if (bootverbose) tulip_media_print(sc); if (sc->tulip_features & TULIP_HAVE_DUALSENSE) TULIP_CSR_WRITE(sc, csr_sia_status, TULIP_CSR_READ(sc, csr_sia_status)); sc->tulip_flags &= ~(TULIP_DOINGSETUP|TULIP_WANTSETUP|TULIP_INRESET |TULIP_RXACT); } static void tulip_init(void *arg) { tulip_softc_t *sc = (tulip_softc_t *)arg; TULIP_LOCK(sc); tulip_init_locked(sc); TULIP_UNLOCK(sc); } static void tulip_init_locked(tulip_softc_t * const sc) { CTR0(KTR_TULIP, "tulip_init_locked"); if (sc->tulip_ifp->if_flags & IFF_UP) { if ((sc->tulip_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { /* initialize the media */ CTR0(KTR_TULIP, "tulip_init_locked: up but not running, reset chip"); tulip_reset(sc); } tulip_addr_filter(sc); sc->tulip_ifp->if_drv_flags |= IFF_DRV_RUNNING; if (sc->tulip_ifp->if_flags & IFF_PROMISC) { sc->tulip_flags |= TULIP_PROMISC; sc->tulip_cmdmode |= TULIP_CMD_PROMISCUOUS; sc->tulip_intrmask |= TULIP_STS_TXINTR; } else { sc->tulip_flags &= ~TULIP_PROMISC; sc->tulip_cmdmode &= ~TULIP_CMD_PROMISCUOUS; if (sc->tulip_flags & TULIP_ALLMULTI) { sc->tulip_cmdmode |= TULIP_CMD_ALLMULTI; } else { sc->tulip_cmdmode &= ~TULIP_CMD_ALLMULTI; } } sc->tulip_cmdmode |= TULIP_CMD_TXRUN; if ((sc->tulip_flags & (TULIP_TXPROBE_ACTIVE|TULIP_WANTSETUP)) == 0) { tulip_rx_intr(sc); sc->tulip_cmdmode |= TULIP_CMD_RXRUN; sc->tulip_intrmask |= TULIP_STS_RXSTOPPED; } else { sc->tulip_ifp->if_drv_flags |= IFF_DRV_OACTIVE; sc->tulip_cmdmode &= ~TULIP_CMD_RXRUN; sc->tulip_intrmask &= ~TULIP_STS_RXSTOPPED; } CTR2(KTR_TULIP, "tulip_init_locked: intr mask %08x cmdmode %08x", sc->tulip_intrmask, sc->tulip_cmdmode); TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode); CTR1(KTR_TULIP, "tulip_init_locked: status %08x\n", TULIP_CSR_READ(sc, csr_status)); if ((sc->tulip_flags & (TULIP_WANTSETUP|TULIP_TXPROBE_ACTIVE)) == TULIP_WANTSETUP) tulip_txput_setup(sc); callout_reset(&sc->tulip_stat_timer, hz, tulip_watchdog, sc); } else { CTR0(KTR_TULIP, "tulip_init_locked: not up, reset chip"); sc->tulip_ifp->if_drv_flags &= ~IFF_DRV_RUNNING; tulip_reset(sc); tulip_addr_filter(sc); callout_stop(&sc->tulip_stat_timer); } } #define DESC_STATUS(di) (((volatile tulip_desc_t *)((di)->di_desc))->d_status) #define DESC_FLAG(di) ((di)->di_desc->d_flag) static void tulip_rx_intr(tulip_softc_t * const sc) { TULIP_PERFSTART(rxintr) tulip_ringinfo_t * const ri = &sc->tulip_rxinfo; struct ifnet * const ifp = sc->tulip_ifp; int fillok = 1; #if defined(TULIP_DEBUG) int cnt = 0; #endif TULIP_LOCK_ASSERT(sc); CTR0(KTR_TULIP, "tulip_rx_intr: start"); for (;;) { TULIP_PERFSTART(rxget) tulip_descinfo_t *eop = ri->ri_nextin, *dip; int total_len = 0, last_offset = 0; struct mbuf *ms = NULL, *me = NULL; int accept = 0; int error; if (fillok && (ri->ri_max - ri->ri_free) < TULIP_RXQ_TARGET) goto queue_mbuf; #if defined(TULIP_DEBUG) if (cnt == ri->ri_max) break; #endif /* * If the TULIP has no descriptors, there can't be any receive * descriptors to process. */ if (eop == ri->ri_nextout) break; /* * 90% of the packets will fit in one descriptor. So we optimize * for that case. */ TULIP_RXDESC_POSTSYNC(ri); if ((DESC_STATUS(eop) & (TULIP_DSTS_OWNER|TULIP_DSTS_RxFIRSTDESC|TULIP_DSTS_RxLASTDESC)) == (TULIP_DSTS_RxFIRSTDESC|TULIP_DSTS_RxLASTDESC)) { ms = tulip_dequeue_mbuf(ri, eop, SYNC_RX); CTR2(KTR_TULIP, "tulip_rx_intr: single packet mbuf %p from descriptor %td", ms, eop - ri->ri_first); me = ms; ri->ri_free++; } else { /* * If still owned by the TULIP, don't touch it. */ if (DESC_STATUS(eop) & TULIP_DSTS_OWNER) break; /* * It is possible (though improbable unless MCLBYTES < 1518) for * a received packet to cross more than one receive descriptor. * We first loop through the descriptor ring making sure we have * received a complete packet. If not, we bail until the next * interrupt. */ dip = eop; while ((DESC_STATUS(eop) & TULIP_DSTS_RxLASTDESC) == 0) { if (++eop == ri->ri_last) eop = ri->ri_first; TULIP_RXDESC_POSTSYNC(ri); if (eop == ri->ri_nextout || DESC_STATUS(eop) & TULIP_DSTS_OWNER) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_rxintrs++; sc->tulip_dbg.dbg_rxpktsperintr[cnt]++; #endif TULIP_PERFEND(rxget); TULIP_PERFEND(rxintr); return; } total_len++; } /* * Dequeue the first buffer for the start of the packet. Hopefully * this will be the only one we need to dequeue. However, if the * packet consumed multiple descriptors, then we need to dequeue * those buffers and chain to the starting mbuf. All buffers but * the last buffer have the same length so we can set that now. * (we add to last_offset instead of multiplying since we normally * won't go into the loop and thereby saving ourselves from * doing a multiplication by 0 in the normal case). */ ms = tulip_dequeue_mbuf(ri, dip, SYNC_RX); CTR2(KTR_TULIP, "tulip_rx_intr: start packet mbuf %p from descriptor %td", ms, dip - ri->ri_first); ri->ri_free++; for (me = ms; total_len > 0; total_len--) { me->m_len = TULIP_RX_BUFLEN; last_offset += TULIP_RX_BUFLEN; if (++dip == ri->ri_last) dip = ri->ri_first; me->m_next = tulip_dequeue_mbuf(ri, dip, SYNC_RX); ri->ri_free++; me = me->m_next; CTR2(KTR_TULIP, "tulip_rx_intr: cont packet mbuf %p from descriptor %td", me, dip - ri->ri_first); } KASSERT(dip == eop, ("mismatched descinfo structs")); } /* * Now get the size of received packet (minus the CRC). */ total_len = ((DESC_STATUS(eop) >> 16) & 0x7FFF) - ETHER_CRC_LEN; if ((sc->tulip_flags & TULIP_RXIGNORE) == 0 && ((DESC_STATUS(eop) & TULIP_DSTS_ERRSUM) == 0)) { me->m_len = total_len - last_offset; sc->tulip_flags |= TULIP_RXACT; accept = 1; CTR1(KTR_TULIP, "tulip_rx_intr: good packet; length %d", total_len); } else { CTR1(KTR_TULIP, "tulip_rx_intr: bad packet; status %08x", DESC_STATUS(eop)); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); if (DESC_STATUS(eop) & (TULIP_DSTS_RxBADLENGTH|TULIP_DSTS_RxOVERFLOW|TULIP_DSTS_RxWATCHDOG)) { sc->tulip_dot3stats.dot3StatsInternalMacReceiveErrors++; } else { #if defined(TULIP_VERBOSE) const char *error = NULL; #endif if (DESC_STATUS(eop) & TULIP_DSTS_RxTOOLONG) { sc->tulip_dot3stats.dot3StatsFrameTooLongs++; #if defined(TULIP_VERBOSE) error = "frame too long"; #endif } if (DESC_STATUS(eop) & TULIP_DSTS_RxBADCRC) { if (DESC_STATUS(eop) & TULIP_DSTS_RxDRBBLBIT) { sc->tulip_dot3stats.dot3StatsAlignmentErrors++; #if defined(TULIP_VERBOSE) error = "alignment error"; #endif } else { sc->tulip_dot3stats.dot3StatsFCSErrors++; #if defined(TULIP_VERBOSE) error = "bad crc"; #endif } } #if defined(TULIP_VERBOSE) if (error != NULL && (sc->tulip_flags & TULIP_NOMESSAGES) == 0) { device_printf(sc->tulip_dev, "receive: %6D: %s\n", mtod(ms, u_char *) + 6, ":", error); sc->tulip_flags |= TULIP_NOMESSAGES; } #endif } } #if defined(TULIP_DEBUG) cnt++; #endif if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if (++eop == ri->ri_last) eop = ri->ri_first; ri->ri_nextin = eop; queue_mbuf: /* * We have received a good packet that needs to be passed up the * stack. */ if (accept) { struct mbuf *m0; KASSERT(ms != NULL, ("no packet to accept")); #ifndef __NO_STRICT_ALIGNMENT /* * Copy the data into a new mbuf that is properly aligned. If * we fail to allocate a new mbuf, then drop the packet. We will * reuse the same rx buffer ('ms') below for another packet * regardless. */ m0 = m_devget(mtod(ms, caddr_t), total_len, ETHER_ALIGN, ifp, NULL); if (m0 == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); goto skip_input; } #else /* * Update the header for the mbuf referencing this receive * buffer and pass it up the stack. Allocate a new mbuf cluster * to replace the one we just passed up the stack. * * Note that if this packet crossed multiple descriptors * we don't even try to reallocate all the mbufs here. * Instead we rely on the test at the beginning of * the loop to refill for the extra consumed mbufs. */ ms->m_pkthdr.len = total_len; ms->m_pkthdr.rcvif = ifp; m0 = ms; ms = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); #endif TULIP_UNLOCK(sc); CTR1(KTR_TULIP, "tulip_rx_intr: passing %p to upper layer", m0); (*ifp->if_input)(ifp, m0); TULIP_LOCK(sc); } else if (ms == NULL) /* * If we are priming the TULIP with mbufs, then allocate * a new cluster for the next descriptor. */ ms = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); #ifndef __NO_STRICT_ALIGNMENT skip_input: #endif if (ms == NULL) { /* * Couldn't allocate a new buffer. Don't bother * trying to replenish the receive queue. */ fillok = 0; sc->tulip_flags |= TULIP_RXBUFSLOW; #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_rxlowbufs++; #endif TULIP_PERFEND(rxget); continue; } /* * Now give the buffer(s) to the TULIP and save in our * receive queue. */ do { tulip_descinfo_t * const nextout = ri->ri_nextout; M_ASSERTPKTHDR(ms); KASSERT(ms->m_data == ms->m_ext.ext_buf, ("rx mbuf data doesn't point to cluster")); ms->m_len = ms->m_pkthdr.len = TULIP_RX_BUFLEN; error = bus_dmamap_load_mbuf(ri->ri_data_tag, *nextout->di_map, ms, tulip_dma_map_rxbuf, nextout->di_desc, BUS_DMA_NOWAIT); if (error) { device_printf(sc->tulip_dev, "unable to load rx map, error = %d\n", error); panic("tulip_rx_intr"); /* XXX */ } nextout->di_desc->d_status = TULIP_DSTS_OWNER; KASSERT(nextout->di_mbuf == NULL, ("clobbering earlier rx mbuf")); nextout->di_mbuf = ms; CTR2(KTR_TULIP, "tulip_rx_intr: enqueued mbuf %p to descriptor %td", ms, nextout - ri->ri_first); TULIP_RXDESC_POSTSYNC(ri); if (++ri->ri_nextout == ri->ri_last) ri->ri_nextout = ri->ri_first; ri->ri_free--; me = ms->m_next; ms->m_next = NULL; } while ((ms = me) != NULL); if ((ri->ri_max - ri->ri_free) >= TULIP_RXQ_TARGET) sc->tulip_flags &= ~TULIP_RXBUFSLOW; TULIP_PERFEND(rxget); } #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_rxintrs++; sc->tulip_dbg.dbg_rxpktsperintr[cnt]++; #endif TULIP_PERFEND(rxintr); } static int tulip_tx_intr(tulip_softc_t * const sc) { TULIP_PERFSTART(txintr) tulip_ringinfo_t * const ri = &sc->tulip_txinfo; struct mbuf *m; int xmits = 0; int descs = 0; CTR0(KTR_TULIP, "tulip_tx_intr: start"); TULIP_LOCK_ASSERT(sc); while (ri->ri_free < ri->ri_max) { u_int32_t d_flag; TULIP_TXDESC_POSTSYNC(ri); if (DESC_STATUS(ri->ri_nextin) & TULIP_DSTS_OWNER) break; ri->ri_free++; descs++; d_flag = DESC_FLAG(ri->ri_nextin); if (d_flag & TULIP_DFLAG_TxLASTSEG) { if (d_flag & TULIP_DFLAG_TxSETUPPKT) { CTR2(KTR_TULIP, "tulip_tx_intr: setup packet from descriptor %td: %08x", ri->ri_nextin - ri->ri_first, DESC_STATUS(ri->ri_nextin)); /* * We've just finished processing a setup packet. * Mark that we finished it. If there's not * another pending, startup the TULIP receiver. * Make sure we ack the RXSTOPPED so we won't get * an abormal interrupt indication. */ bus_dmamap_sync(sc->tulip_setup_tag, sc->tulip_setup_map, BUS_DMASYNC_POSTWRITE); sc->tulip_flags &= ~(TULIP_DOINGSETUP|TULIP_HASHONLY); if (DESC_FLAG(ri->ri_nextin) & TULIP_DFLAG_TxINVRSFILT) sc->tulip_flags |= TULIP_HASHONLY; if ((sc->tulip_flags & (TULIP_WANTSETUP|TULIP_TXPROBE_ACTIVE)) == 0) { tulip_rx_intr(sc); sc->tulip_cmdmode |= TULIP_CMD_RXRUN; sc->tulip_intrmask |= TULIP_STS_RXSTOPPED; CTR2(KTR_TULIP, "tulip_tx_intr: intr mask %08x cmdmode %08x", sc->tulip_intrmask, sc->tulip_cmdmode); TULIP_CSR_WRITE(sc, csr_status, TULIP_STS_RXSTOPPED); TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode); } } else { const u_int32_t d_status = DESC_STATUS(ri->ri_nextin); m = tulip_dequeue_mbuf(ri, ri->ri_nextin, SYNC_TX); CTR2(KTR_TULIP, "tulip_tx_intr: data packet %p from descriptor %td", m, ri->ri_nextin - ri->ri_first); if (m != NULL) { m_freem(m); #if defined(TULIP_DEBUG) } else { device_printf(sc->tulip_dev, "tx_intr: failed to dequeue mbuf?!?\n"); #endif } if (sc->tulip_flags & TULIP_TXPROBE_ACTIVE) { tulip_mediapoll_event_t event = TULIP_MEDIAPOLL_TXPROBE_OK; if (d_status & (TULIP_DSTS_TxNOCARR|TULIP_DSTS_TxEXCCOLL)) { #if defined(TULIP_DEBUG) if (d_status & TULIP_DSTS_TxNOCARR) sc->tulip_dbg.dbg_txprobe_nocarr++; if (d_status & TULIP_DSTS_TxEXCCOLL) sc->tulip_dbg.dbg_txprobe_exccoll++; #endif event = TULIP_MEDIAPOLL_TXPROBE_FAILED; } (*sc->tulip_boardsw->bd_media_poll)(sc, event); /* * Escape from the loop before media poll has reset the TULIP! */ break; } else { xmits++; if (d_status & TULIP_DSTS_ERRSUM) { CTR1(KTR_TULIP, "tulip_tx_intr: output error: %08x", d_status); if_inc_counter(sc->tulip_ifp, IFCOUNTER_OERRORS, 1); if (d_status & TULIP_DSTS_TxEXCCOLL) sc->tulip_dot3stats.dot3StatsExcessiveCollisions++; if (d_status & TULIP_DSTS_TxLATECOLL) sc->tulip_dot3stats.dot3StatsLateCollisions++; if (d_status & (TULIP_DSTS_TxNOCARR|TULIP_DSTS_TxCARRLOSS)) sc->tulip_dot3stats.dot3StatsCarrierSenseErrors++; if (d_status & (TULIP_DSTS_TxUNDERFLOW|TULIP_DSTS_TxBABBLE)) sc->tulip_dot3stats.dot3StatsInternalMacTransmitErrors++; if (d_status & TULIP_DSTS_TxUNDERFLOW) sc->tulip_dot3stats.dot3StatsInternalTransmitUnderflows++; if (d_status & TULIP_DSTS_TxBABBLE) sc->tulip_dot3stats.dot3StatsInternalTransmitBabbles++; } else { u_int32_t collisions = (d_status & TULIP_DSTS_TxCOLLMASK) >> TULIP_DSTS_V_TxCOLLCNT; CTR2(KTR_TULIP, "tulip_tx_intr: output ok, collisions %d, status %08x", collisions, d_status); if_inc_counter(sc->tulip_ifp, IFCOUNTER_COLLISIONS, collisions); if (collisions == 1) sc->tulip_dot3stats.dot3StatsSingleCollisionFrames++; else if (collisions > 1) sc->tulip_dot3stats.dot3StatsMultipleCollisionFrames++; else if (d_status & TULIP_DSTS_TxDEFERRED) sc->tulip_dot3stats.dot3StatsDeferredTransmissions++; /* * SQE is only valid for 10baseT/BNC/AUI when not * running in full-duplex. In order to speed up the * test, the corresponding bit in tulip_flags needs to * set as well to get us to count SQE Test Errors. */ if (d_status & TULIP_DSTS_TxNOHRTBT & sc->tulip_flags) sc->tulip_dot3stats.dot3StatsSQETestErrors++; } } } } if (++ri->ri_nextin == ri->ri_last) ri->ri_nextin = ri->ri_first; if ((sc->tulip_flags & TULIP_TXPROBE_ACTIVE) == 0) sc->tulip_ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } /* * If nothing left to transmit, disable the timer. * Else if progress, reset the timer back to 2 ticks. */ if (ri->ri_free == ri->ri_max || (sc->tulip_flags & TULIP_TXPROBE_ACTIVE)) sc->tulip_txtimer = 0; else if (xmits > 0) sc->tulip_txtimer = TULIP_TXTIMER; if_inc_counter(sc->tulip_ifp, IFCOUNTER_OPACKETS, xmits); TULIP_PERFEND(txintr); return descs; } static void tulip_print_abnormal_interrupt(tulip_softc_t * const sc, u_int32_t csr) { const char * const *msgp = tulip_status_bits; const char *sep; u_int32_t mask; const char thrsh[] = "72|128\0\0\0" "96|256\0\0\0" "128|512\0\0" "160|1024"; TULIP_LOCK_ASSERT(sc); csr &= (1 << (sizeof(tulip_status_bits)/sizeof(tulip_status_bits[0]))) - 1; device_printf(sc->tulip_dev, "abnormal interrupt:"); for (sep = " ", mask = 1; mask <= csr; mask <<= 1, msgp++) { if ((csr & mask) && *msgp != NULL) { printf("%s%s", sep, *msgp); if (mask == TULIP_STS_TXUNDERFLOW && (sc->tulip_flags & TULIP_NEWTXTHRESH)) { sc->tulip_flags &= ~TULIP_NEWTXTHRESH; if (sc->tulip_cmdmode & TULIP_CMD_STOREFWD) { printf(" (switching to store-and-forward mode)"); } else { printf(" (raising TX threshold to %s)", &thrsh[9 * ((sc->tulip_cmdmode & TULIP_CMD_THRESHOLDCTL) >> 14)]); } } sep = ", "; } } printf("\n"); } static void tulip_intr_handler(tulip_softc_t * const sc) { TULIP_PERFSTART(intr) u_int32_t csr; CTR0(KTR_TULIP, "tulip_intr_handler invoked"); TULIP_LOCK_ASSERT(sc); while ((csr = TULIP_CSR_READ(sc, csr_status)) & sc->tulip_intrmask) { TULIP_CSR_WRITE(sc, csr_status, csr); if (csr & TULIP_STS_SYSERROR) { sc->tulip_last_system_error = (csr & TULIP_STS_ERRORMASK) >> TULIP_STS_ERR_SHIFT; if (sc->tulip_flags & TULIP_NOMESSAGES) { sc->tulip_flags |= TULIP_SYSTEMERROR; } else { device_printf(sc->tulip_dev, "system error: %s\n", tulip_system_errors[sc->tulip_last_system_error]); } sc->tulip_flags |= TULIP_NEEDRESET; sc->tulip_system_errors++; break; } if (csr & (TULIP_STS_LINKPASS|TULIP_STS_LINKFAIL) & sc->tulip_intrmask) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_link_intrs++; #endif if (sc->tulip_boardsw->bd_media_poll != NULL) { (*sc->tulip_boardsw->bd_media_poll)(sc, csr & TULIP_STS_LINKFAIL ? TULIP_MEDIAPOLL_LINKFAIL : TULIP_MEDIAPOLL_LINKPASS); csr &= ~TULIP_STS_ABNRMLINTR; } tulip_media_print(sc); } if (csr & (TULIP_STS_RXINTR|TULIP_STS_RXNOBUF)) { u_int32_t misses = TULIP_CSR_READ(sc, csr_missed_frames); if (csr & TULIP_STS_RXNOBUF) sc->tulip_dot3stats.dot3StatsMissedFrames += misses & 0xFFFF; /* * Pass 2.[012] of the 21140A-A[CDE] may hang and/or corrupt data * on receive overflows. */ if ((misses & 0x0FFE0000) && (sc->tulip_features & TULIP_HAVE_RXBADOVRFLW)) { sc->tulip_dot3stats.dot3StatsInternalMacReceiveErrors++; /* * Stop the receiver process and spin until it's stopped. * Tell rx_intr to drop the packets it dequeues. */ TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode & ~TULIP_CMD_RXRUN); while ((TULIP_CSR_READ(sc, csr_status) & TULIP_STS_RXSTOPPED) == 0) ; TULIP_CSR_WRITE(sc, csr_status, TULIP_STS_RXSTOPPED); sc->tulip_flags |= TULIP_RXIGNORE; } tulip_rx_intr(sc); if (sc->tulip_flags & TULIP_RXIGNORE) { /* * Restart the receiver. */ sc->tulip_flags &= ~TULIP_RXIGNORE; TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode); } } if (csr & TULIP_STS_ABNRMLINTR) { u_int32_t tmp = csr & sc->tulip_intrmask & ~(TULIP_STS_NORMALINTR|TULIP_STS_ABNRMLINTR); if (csr & TULIP_STS_TXUNDERFLOW) { if ((sc->tulip_cmdmode & TULIP_CMD_THRESHOLDCTL) != TULIP_CMD_THRSHLD160) { sc->tulip_cmdmode += TULIP_CMD_THRSHLD96; sc->tulip_flags |= TULIP_NEWTXTHRESH; } else if (sc->tulip_features & TULIP_HAVE_STOREFWD) { sc->tulip_cmdmode |= TULIP_CMD_STOREFWD; sc->tulip_flags |= TULIP_NEWTXTHRESH; } } if (sc->tulip_flags & TULIP_NOMESSAGES) { sc->tulip_statusbits |= tmp; } else { tulip_print_abnormal_interrupt(sc, tmp); sc->tulip_flags |= TULIP_NOMESSAGES; } TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode); } if (sc->tulip_flags & (TULIP_WANTTXSTART|TULIP_TXPROBE_ACTIVE|TULIP_DOINGSETUP|TULIP_PROMISC)) { tulip_tx_intr(sc); if ((sc->tulip_flags & TULIP_TXPROBE_ACTIVE) == 0) tulip_start_locked(sc); } } if (sc->tulip_flags & TULIP_NEEDRESET) { tulip_reset(sc); tulip_init_locked(sc); } TULIP_PERFEND(intr); } static void tulip_intr_shared(void *arg) { tulip_softc_t * sc = arg; for (; sc != NULL; sc = sc->tulip_slaves) { TULIP_LOCK(sc); #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_intrs++; #endif tulip_intr_handler(sc); TULIP_UNLOCK(sc); } } static void tulip_intr_normal(void *arg) { tulip_softc_t * sc = (tulip_softc_t *) arg; TULIP_LOCK(sc); #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_intrs++; #endif tulip_intr_handler(sc); TULIP_UNLOCK(sc); } static struct mbuf * tulip_txput(tulip_softc_t * const sc, struct mbuf *m) { TULIP_PERFSTART(txput) tulip_ringinfo_t * const ri = &sc->tulip_txinfo; tulip_descinfo_t *eop, *nextout; int segcnt, free; u_int32_t d_status; bus_dma_segment_t segs[TULIP_MAX_TXSEG]; bus_dmamap_t *map; int error, nsegs; struct mbuf *m0; TULIP_LOCK_ASSERT(sc); #if defined(TULIP_DEBUG) if ((sc->tulip_cmdmode & TULIP_CMD_TXRUN) == 0) { device_printf(sc->tulip_dev, "txput%s: tx not running\n", (sc->tulip_flags & TULIP_TXPROBE_ACTIVE) ? "(probe)" : ""); sc->tulip_flags |= TULIP_WANTTXSTART; sc->tulip_dbg.dbg_txput_finishes[0]++; goto finish; } #endif /* * Now we try to fill in our transmit descriptors. This is * a bit reminiscent of going on the Ark two by two * since each descriptor for the TULIP can describe * two buffers. So we advance through packet filling * each of the two entries at a time to fill each * descriptor. Clear the first and last segment bits * in each descriptor (actually just clear everything * but the end-of-ring or chain bits) to make sure * we don't get messed up by previously sent packets. * * We may fail to put the entire packet on the ring if * there is either not enough ring entries free or if the * packet has more than MAX_TXSEG segments. In the former * case we will just wait for the ring to empty. In the * latter case we have to recopy. */ #if defined(KTR) && KTR_TULIP segcnt = 1; m0 = m; while (m0->m_next != NULL) { segcnt++; m0 = m0->m_next; } CTR2(KTR_TULIP, "tulip_txput: sending packet %p (%d chunks)", m, segcnt); #endif d_status = 0; eop = nextout = ri->ri_nextout; segcnt = 0; free = ri->ri_free; /* * Reclaim some tx descriptors if we are out since we need at least one * free descriptor so that we have a dma_map to load the mbuf. */ if (free == 0) { #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_no_txmaps++; #endif free += tulip_tx_intr(sc); } if (free == 0) { sc->tulip_flags |= TULIP_WANTTXSTART; #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_txput_finishes[1]++; #endif goto finish; } error = bus_dmamap_load_mbuf_sg(ri->ri_data_tag, *eop->di_map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { if (error == EFBIG) { /* * The packet exceeds the number of transmit buffer * entries that we can use for one packet, so we have * to recopy it into one mbuf and then try again. If * we can't recopy it, try again later. */ m0 = m_defrag(m, M_NOWAIT); if (m0 == NULL) { sc->tulip_flags |= TULIP_WANTTXSTART; #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_txput_finishes[2]++; #endif goto finish; } m = m0; error = bus_dmamap_load_mbuf_sg(ri->ri_data_tag, *eop->di_map, m, segs, &nsegs, BUS_DMA_NOWAIT); } if (error != 0) { device_printf(sc->tulip_dev, "unable to load tx map, error = %d\n", error); #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_txput_finishes[3]++; #endif goto finish; } } CTR1(KTR_TULIP, "tulip_txput: nsegs %d", nsegs); /* * Each descriptor allows for up to 2 fragments since we don't use * the descriptor chaining mode in this driver. */ if ((free -= (nsegs + 1) / 2) <= 0 /* * See if there's any unclaimed space in the transmit ring. */ && (free += tulip_tx_intr(sc)) <= 0) { /* * There's no more room but since nothing * has been committed at this point, just * show output is active, put back the * mbuf and return. */ sc->tulip_flags |= TULIP_WANTTXSTART; #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_txput_finishes[4]++; #endif bus_dmamap_unload(ri->ri_data_tag, *eop->di_map); goto finish; } for (; nsegs - segcnt > 1; segcnt += 2) { eop = nextout; eop->di_desc->d_flag &= TULIP_DFLAG_ENDRING|TULIP_DFLAG_CHAIN; eop->di_desc->d_status = d_status; eop->di_desc->d_addr1 = segs[segcnt].ds_addr & 0xffffffff; eop->di_desc->d_length1 = segs[segcnt].ds_len; eop->di_desc->d_addr2 = segs[segcnt+1].ds_addr & 0xffffffff; eop->di_desc->d_length2 = segs[segcnt+1].ds_len; d_status = TULIP_DSTS_OWNER; if (++nextout == ri->ri_last) nextout = ri->ri_first; } if (segcnt < nsegs) { eop = nextout; eop->di_desc->d_flag &= TULIP_DFLAG_ENDRING|TULIP_DFLAG_CHAIN; eop->di_desc->d_status = d_status; eop->di_desc->d_addr1 = segs[segcnt].ds_addr & 0xffffffff; eop->di_desc->d_length1 = segs[segcnt].ds_len; eop->di_desc->d_addr2 = 0; eop->di_desc->d_length2 = 0; if (++nextout == ri->ri_last) nextout = ri->ri_first; } /* * tulip_tx_intr() harvests the mbuf from the last descriptor in the * frame. We just used the dmamap in the first descriptor for the * load operation however. Thus, to let the tulip_dequeue_mbuf() call * in tulip_tx_intr() unload the correct dmamap, we swap the dmamap * pointers in the two descriptors if this is a multiple-descriptor * packet. */ if (eop != ri->ri_nextout) { map = eop->di_map; eop->di_map = ri->ri_nextout->di_map; ri->ri_nextout->di_map = map; } /* * bounce a copy to the bpf listener, if any. */ if (!(sc->tulip_flags & TULIP_DEVICEPROBE)) BPF_MTAP(sc->tulip_ifp, m); /* * The descriptors have been filled in. Now get ready * to transmit. */ CTR3(KTR_TULIP, "tulip_txput: enqueued mbuf %p to descriptors %td - %td", m, ri->ri_nextout - ri->ri_first, eop - ri->ri_first); KASSERT(eop->di_mbuf == NULL, ("clobbering earlier tx mbuf")); eop->di_mbuf = m; TULIP_TXMAP_PRESYNC(ri, ri->ri_nextout); m = NULL; /* * Make sure the next descriptor after this packet is owned * by us since it may have been set up above if we ran out * of room in the ring. */ nextout->di_desc->d_status = 0; TULIP_TXDESC_PRESYNC(ri); /* * Mark the last and first segments, indicate we want a transmit * complete interrupt, and tell it to transmit! */ eop->di_desc->d_flag |= TULIP_DFLAG_TxLASTSEG|TULIP_DFLAG_TxWANTINTR; /* * Note that ri->ri_nextout is still the start of the packet * and until we set the OWNER bit, we can still back out of * everything we have done. */ ri->ri_nextout->di_desc->d_flag |= TULIP_DFLAG_TxFIRSTSEG; TULIP_TXDESC_PRESYNC(ri); ri->ri_nextout->di_desc->d_status = TULIP_DSTS_OWNER; TULIP_TXDESC_PRESYNC(ri); /* * This advances the ring for us. */ ri->ri_nextout = nextout; ri->ri_free = free; TULIP_PERFEND(txput); if (sc->tulip_flags & TULIP_TXPROBE_ACTIVE) { TULIP_CSR_WRITE(sc, csr_txpoll, 1); sc->tulip_ifp->if_drv_flags |= IFF_DRV_OACTIVE; TULIP_PERFEND(txput); return NULL; } /* * switch back to the single queueing ifstart. */ sc->tulip_flags &= ~TULIP_WANTTXSTART; if (sc->tulip_txtimer == 0) sc->tulip_txtimer = TULIP_TXTIMER; #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_txput_finishes[5]++; #endif /* * If we want a txstart, there must be not enough space in the * transmit ring. So we want to enable transmit done interrupts * so we can immediately reclaim some space. When the transmit * interrupt is posted, the interrupt handler will call tx_intr * to reclaim space and then txstart (since WANTTXSTART is set). * txstart will move the packet into the transmit ring and clear * WANTTXSTART thereby causing TXINTR to be cleared. */ finish: #if defined(TULIP_DEBUG) sc->tulip_dbg.dbg_txput_finishes[6]++; #endif if (sc->tulip_flags & (TULIP_WANTTXSTART|TULIP_DOINGSETUP)) { sc->tulip_ifp->if_drv_flags |= IFF_DRV_OACTIVE; if ((sc->tulip_intrmask & TULIP_STS_TXINTR) == 0) { sc->tulip_intrmask |= TULIP_STS_TXINTR; TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); } } else if ((sc->tulip_flags & TULIP_PROMISC) == 0) { if (sc->tulip_intrmask & TULIP_STS_TXINTR) { sc->tulip_intrmask &= ~TULIP_STS_TXINTR; TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); } } TULIP_CSR_WRITE(sc, csr_txpoll, 1); TULIP_PERFEND(txput); return m; } static void tulip_txput_setup(tulip_softc_t * const sc) { tulip_ringinfo_t * const ri = &sc->tulip_txinfo; tulip_desc_t *nextout; TULIP_LOCK_ASSERT(sc); /* * We will transmit, at most, one setup packet per call to ifstart. */ #if defined(TULIP_DEBUG) if ((sc->tulip_cmdmode & TULIP_CMD_TXRUN) == 0) { device_printf(sc->tulip_dev, "txput_setup: tx not running\n"); sc->tulip_flags |= TULIP_WANTTXSTART; return; } #endif /* * Try to reclaim some free descriptors.. */ if (ri->ri_free < 2) tulip_tx_intr(sc); if ((sc->tulip_flags & TULIP_DOINGSETUP) || ri->ri_free == 1) { sc->tulip_flags |= TULIP_WANTTXSTART; return; } bcopy(sc->tulip_setupdata, sc->tulip_setupbuf, sizeof(sc->tulip_setupdata)); /* * Clear WANTSETUP and set DOINGSETUP. Since we know that WANTSETUP is * set and DOINGSETUP is clear doing an XOR of the two will DTRT. */ sc->tulip_flags ^= TULIP_WANTSETUP|TULIP_DOINGSETUP; ri->ri_free--; nextout = ri->ri_nextout->di_desc; nextout->d_flag &= TULIP_DFLAG_ENDRING|TULIP_DFLAG_CHAIN; nextout->d_flag |= TULIP_DFLAG_TxFIRSTSEG|TULIP_DFLAG_TxLASTSEG |TULIP_DFLAG_TxSETUPPKT|TULIP_DFLAG_TxWANTINTR; if (sc->tulip_flags & TULIP_WANTHASHPERFECT) nextout->d_flag |= TULIP_DFLAG_TxHASHFILT; else if (sc->tulip_flags & TULIP_WANTHASHONLY) nextout->d_flag |= TULIP_DFLAG_TxHASHFILT|TULIP_DFLAG_TxINVRSFILT; nextout->d_length2 = 0; nextout->d_addr2 = 0; nextout->d_length1 = sizeof(sc->tulip_setupdata); nextout->d_addr1 = sc->tulip_setup_dma_addr & 0xffffffff; bus_dmamap_sync(sc->tulip_setup_tag, sc->tulip_setup_map, BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE); TULIP_TXDESC_PRESYNC(ri); CTR1(KTR_TULIP, "tulip_txput_setup: using descriptor %td", ri->ri_nextout - ri->ri_first); /* * Advance the ring for the next transmit packet. */ if (++ri->ri_nextout == ri->ri_last) ri->ri_nextout = ri->ri_first; /* * Make sure the next descriptor is owned by us since it * may have been set up above if we ran out of room in the * ring. */ ri->ri_nextout->di_desc->d_status = 0; TULIP_TXDESC_PRESYNC(ri); nextout->d_status = TULIP_DSTS_OWNER; /* * Flush the ownwership of the current descriptor */ TULIP_TXDESC_PRESYNC(ri); TULIP_CSR_WRITE(sc, csr_txpoll, 1); if ((sc->tulip_intrmask & TULIP_STS_TXINTR) == 0) { sc->tulip_intrmask |= TULIP_STS_TXINTR; TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask); } } static int tulip_ifioctl(struct ifnet * ifp, u_long cmd, caddr_t data) { TULIP_PERFSTART(ifioctl) tulip_softc_t * const sc = (tulip_softc_t *)ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; int error = 0; switch (cmd) { case SIOCSIFFLAGS: { TULIP_LOCK(sc); tulip_init_locked(sc); TULIP_UNLOCK(sc); break; } case SIOCSIFMEDIA: case SIOCGIFMEDIA: { error = ifmedia_ioctl(ifp, ifr, &sc->tulip_ifmedia, cmd); break; } case SIOCADDMULTI: case SIOCDELMULTI: { /* * Update multicast listeners */ TULIP_LOCK(sc); tulip_init_locked(sc); TULIP_UNLOCK(sc); error = 0; break; } #ifdef SIOCGADDRROM case SIOCGADDRROM: { - error = copyout(sc->tulip_rombuf, ifr->ifr_data, sizeof(sc->tulip_rombuf)); + error = copyout(sc->tulip_rombuf, ifr_data_get_ptr(ifr), sizeof(sc->tulip_rombuf)); break; } #endif #ifdef SIOCGCHIPID case SIOCGCHIPID: { ifr->ifr_metric = (int) sc->tulip_chipid; break; } #endif default: { error = ether_ioctl(ifp, cmd, data); break; } } TULIP_PERFEND(ifioctl); return error; } static void tulip_start(struct ifnet * const ifp) { TULIP_PERFSTART(ifstart) tulip_softc_t * const sc = (tulip_softc_t *)ifp->if_softc; TULIP_LOCK(sc); tulip_start_locked(sc); TULIP_UNLOCK(sc); TULIP_PERFEND(ifstart); } static void tulip_start_locked(tulip_softc_t * const sc) { struct mbuf *m; TULIP_LOCK_ASSERT(sc); CTR0(KTR_TULIP, "tulip_start_locked invoked"); if ((sc->tulip_flags & (TULIP_WANTSETUP|TULIP_TXPROBE_ACTIVE)) == TULIP_WANTSETUP) tulip_txput_setup(sc); CTR1(KTR_TULIP, "tulip_start_locked: %d tx packets pending", sc->tulip_ifp->if_snd.ifq_len); while (!IFQ_DRV_IS_EMPTY(&sc->tulip_ifp->if_snd)) { IFQ_DRV_DEQUEUE(&sc->tulip_ifp->if_snd, m); if(m == NULL) break; if ((m = tulip_txput(sc, m)) != NULL) { IFQ_DRV_PREPEND(&sc->tulip_ifp->if_snd, m); break; } } } static void tulip_watchdog(void *arg) { TULIP_PERFSTART(stat) tulip_softc_t *sc = arg; #if defined(TULIP_DEBUG) u_int32_t rxintrs; #endif TULIP_LOCK_ASSERT(sc); callout_reset(&sc->tulip_stat_timer, hz, tulip_watchdog, sc); #if defined(TULIP_DEBUG) rxintrs = sc->tulip_dbg.dbg_rxintrs - sc->tulip_dbg.dbg_last_rxintrs; if (rxintrs > sc->tulip_dbg.dbg_high_rxintrs_hz) sc->tulip_dbg.dbg_high_rxintrs_hz = rxintrs; sc->tulip_dbg.dbg_last_rxintrs = sc->tulip_dbg.dbg_rxintrs; #endif /* TULIP_DEBUG */ /* * These should be rare so do a bulk test up front so we can just skip * them if needed. */ if (sc->tulip_flags & (TULIP_SYSTEMERROR|TULIP_RXBUFSLOW|TULIP_NOMESSAGES)) { /* * If the number of receive buffer is low, try to refill */ if (sc->tulip_flags & TULIP_RXBUFSLOW) tulip_rx_intr(sc); if (sc->tulip_flags & TULIP_SYSTEMERROR) { if_printf(sc->tulip_ifp, "%d system errors: last was %s\n", sc->tulip_system_errors, tulip_system_errors[sc->tulip_last_system_error]); } if (sc->tulip_statusbits) { tulip_print_abnormal_interrupt(sc, sc->tulip_statusbits); sc->tulip_statusbits = 0; } sc->tulip_flags &= ~(TULIP_NOMESSAGES|TULIP_SYSTEMERROR); } if (sc->tulip_txtimer) tulip_tx_intr(sc); if (sc->tulip_txtimer && --sc->tulip_txtimer == 0) { if_printf(sc->tulip_ifp, "transmission timeout\n"); if (TULIP_DO_AUTOSENSE(sc)) { sc->tulip_media = TULIP_MEDIA_UNKNOWN; sc->tulip_probe_state = TULIP_PROBE_INACTIVE; sc->tulip_flags &= ~(TULIP_WANTRXACT|TULIP_LINKUP); } tulip_reset(sc); tulip_init_locked(sc); } TULIP_PERFEND(stat); TULIP_PERFMERGE(sc, perf_intr_cycles); TULIP_PERFMERGE(sc, perf_ifstart_cycles); TULIP_PERFMERGE(sc, perf_ifioctl_cycles); TULIP_PERFMERGE(sc, perf_stat_cycles); TULIP_PERFMERGE(sc, perf_timeout_cycles); TULIP_PERFMERGE(sc, perf_ifstart_one_cycles); TULIP_PERFMERGE(sc, perf_txput_cycles); TULIP_PERFMERGE(sc, perf_txintr_cycles); TULIP_PERFMERGE(sc, perf_rxintr_cycles); TULIP_PERFMERGE(sc, perf_rxget_cycles); TULIP_PERFMERGE(sc, perf_intr); TULIP_PERFMERGE(sc, perf_ifstart); TULIP_PERFMERGE(sc, perf_ifioctl); TULIP_PERFMERGE(sc, perf_stat); TULIP_PERFMERGE(sc, perf_timeout); TULIP_PERFMERGE(sc, perf_ifstart_one); TULIP_PERFMERGE(sc, perf_txput); TULIP_PERFMERGE(sc, perf_txintr); TULIP_PERFMERGE(sc, perf_rxintr); TULIP_PERFMERGE(sc, perf_rxget); } static void tulip_attach(tulip_softc_t * const sc) { struct ifnet *ifp; ifp = sc->tulip_ifp = if_alloc(IFT_ETHER); /* XXX: driver name/unit should be set some other way */ if_initname(ifp, "de", sc->tulip_unit); ifp->if_softc = sc; ifp->if_flags = IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST; ifp->if_ioctl = tulip_ifioctl; ifp->if_start = tulip_start; ifp->if_init = tulip_init; IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); ifp->if_snd.ifq_drv_maxlen = ifqmaxlen; IFQ_SET_READY(&ifp->if_snd); device_printf(sc->tulip_dev, "%s%s pass %d.%d%s\n", sc->tulip_boardid, tulip_chipdescs[sc->tulip_chipid], (sc->tulip_revinfo & 0xF0) >> 4, sc->tulip_revinfo & 0x0F, (sc->tulip_features & (TULIP_HAVE_ISVSROM|TULIP_HAVE_OKSROM)) == TULIP_HAVE_ISVSROM ? " (invalid EESPROM checksum)" : ""); TULIP_LOCK(sc); (*sc->tulip_boardsw->bd_media_probe)(sc); ifmedia_init(&sc->tulip_ifmedia, 0, tulip_ifmedia_change, tulip_ifmedia_status); tulip_ifmedia_add(sc); tulip_reset(sc); TULIP_UNLOCK(sc); ether_ifattach(sc->tulip_ifp, sc->tulip_enaddr); TULIP_LOCK(sc); sc->tulip_flags &= ~TULIP_DEVICEPROBE; TULIP_UNLOCK(sc); } /* Release memory for a single descriptor ring. */ static void tulip_busdma_freering(tulip_ringinfo_t *ri) { int i; /* Release the DMA maps and tag for data buffers. */ if (ri->ri_data_maps != NULL) { for (i = 0; i < ri->ri_max; i++) { if (ri->ri_data_maps[i] != NULL) { bus_dmamap_destroy(ri->ri_data_tag, ri->ri_data_maps[i]); ri->ri_data_maps[i] = NULL; } } free(ri->ri_data_maps, M_DEVBUF); ri->ri_data_maps = NULL; } if (ri->ri_data_tag != NULL) { bus_dma_tag_destroy(ri->ri_data_tag); ri->ri_data_tag = NULL; } /* Release the DMA memory and tag for the ring descriptors. */ if (ri->ri_dma_addr != 0) { bus_dmamap_unload(ri->ri_ring_tag, ri->ri_ring_map); ri->ri_dma_addr = 0; } if (ri->ri_descs != NULL) { bus_dmamem_free(ri->ri_ring_tag, ri->ri_descs, ri->ri_ring_map); ri->ri_descs = NULL; } if (ri->ri_ring_tag != NULL) { bus_dma_tag_destroy(ri->ri_ring_tag); ri->ri_ring_tag = NULL; } } /* Allocate memory for a single descriptor ring. */ static int tulip_busdma_allocring(device_t dev, tulip_softc_t * const sc, size_t count, bus_size_t align, int nsegs, tulip_ringinfo_t *ri, const char *name) { size_t size; int error, i; /* First, setup a tag. */ ri->ri_max = count; size = count * sizeof(tulip_desc_t); error = bus_dma_tag_create(bus_get_dma_tag(dev), 32, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, size, 1, size, 0, NULL, NULL, &ri->ri_ring_tag); if (error) { device_printf(dev, "failed to allocate %s descriptor ring dma tag\n", name); return (error); } /* Next, allocate memory for the descriptors. */ error = bus_dmamem_alloc(ri->ri_ring_tag, (void **)&ri->ri_descs, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &ri->ri_ring_map); if (error) { device_printf(dev, "failed to allocate memory for %s descriptor ring\n", name); return (error); } /* Map the descriptors. */ error = bus_dmamap_load(ri->ri_ring_tag, ri->ri_ring_map, ri->ri_descs, size, tulip_dma_map_addr, &ri->ri_dma_addr, BUS_DMA_NOWAIT); if (error) { device_printf(dev, "failed to get dma address for %s descriptor ring\n", name); return (error); } /* Allocate a tag for the data buffers. */ error = bus_dma_tag_create(bus_get_dma_tag(dev), align, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES * nsegs, nsegs, MCLBYTES, 0, NULL, NULL, &ri->ri_data_tag); if (error) { device_printf(dev, "failed to allocate %s buffer dma tag\n", name); return (error); } /* Allocate maps for the data buffers. */ ri->ri_data_maps = malloc(sizeof(bus_dmamap_t) * count, M_DEVBUF, M_WAITOK | M_ZERO); for (i = 0; i < count; i++) { error = bus_dmamap_create(ri->ri_data_tag, 0, &ri->ri_data_maps[i]); if (error) { device_printf(dev, "failed to create map for %s buffer %d\n", name, i); return (error); } } return (0); } /* Release busdma maps, tags, and memory. */ static void tulip_busdma_cleanup(tulip_softc_t * const sc) { /* Release resources for the setup descriptor. */ if (sc->tulip_setup_dma_addr != 0) { bus_dmamap_unload(sc->tulip_setup_tag, sc->tulip_setup_map); sc->tulip_setup_dma_addr = 0; } if (sc->tulip_setupbuf != NULL) { bus_dmamem_free(sc->tulip_setup_tag, sc->tulip_setupbuf, sc->tulip_setup_map); sc->tulip_setupbuf = NULL; } if (sc->tulip_setup_tag != NULL) { bus_dma_tag_destroy(sc->tulip_setup_tag); sc->tulip_setup_tag = NULL; } /* Release the transmit ring. */ tulip_busdma_freering(&sc->tulip_txinfo); /* Release the receive ring. */ tulip_busdma_freering(&sc->tulip_rxinfo); } static int tulip_busdma_init(device_t dev, tulip_softc_t * const sc) { int error; /* * Allocate space and dmamap for transmit ring. */ error = tulip_busdma_allocring(dev, sc, TULIP_TXDESCS, 1, TULIP_MAX_TXSEG, &sc->tulip_txinfo, "transmit"); if (error) return (error); /* * Allocate space and dmamap for receive ring. We tell bus_dma that * we can map MCLBYTES so that it will accept a full MCLBYTES cluster, * but we will only map the first TULIP_RX_BUFLEN bytes. This is not * a waste in practice though as an ethernet frame can easily fit * in TULIP_RX_BUFLEN bytes. */ error = tulip_busdma_allocring(dev, sc, TULIP_RXDESCS, 4, 1, &sc->tulip_rxinfo, "receive"); if (error) return (error); /* * Allocate a DMA tag, memory, and map for setup descriptor */ error = bus_dma_tag_create(bus_get_dma_tag(dev), 32, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, sizeof(sc->tulip_setupdata), 1, sizeof(sc->tulip_setupdata), 0, NULL, NULL, &sc->tulip_setup_tag); if (error) { device_printf(dev, "failed to allocate setup descriptor dma tag\n"); return (error); } error = bus_dmamem_alloc(sc->tulip_setup_tag, (void **)&sc->tulip_setupbuf, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &sc->tulip_setup_map); if (error) { device_printf(dev, "failed to allocate memory for setup descriptor\n"); return (error); } error = bus_dmamap_load(sc->tulip_setup_tag, sc->tulip_setup_map, sc->tulip_setupbuf, sizeof(sc->tulip_setupdata), tulip_dma_map_addr, &sc->tulip_setup_dma_addr, BUS_DMA_NOWAIT); if (error) { device_printf(dev, "failed to get dma address for setup descriptor\n"); return (error); } return error; } static void tulip_initcsrs(tulip_softc_t * const sc, tulip_csrptr_t csr_base, size_t csr_size) { sc->tulip_csrs.csr_busmode = csr_base + 0 * csr_size; sc->tulip_csrs.csr_txpoll = csr_base + 1 * csr_size; sc->tulip_csrs.csr_rxpoll = csr_base + 2 * csr_size; sc->tulip_csrs.csr_rxlist = csr_base + 3 * csr_size; sc->tulip_csrs.csr_txlist = csr_base + 4 * csr_size; sc->tulip_csrs.csr_status = csr_base + 5 * csr_size; sc->tulip_csrs.csr_command = csr_base + 6 * csr_size; sc->tulip_csrs.csr_intr = csr_base + 7 * csr_size; sc->tulip_csrs.csr_missed_frames = csr_base + 8 * csr_size; sc->tulip_csrs.csr_9 = csr_base + 9 * csr_size; sc->tulip_csrs.csr_10 = csr_base + 10 * csr_size; sc->tulip_csrs.csr_11 = csr_base + 11 * csr_size; sc->tulip_csrs.csr_12 = csr_base + 12 * csr_size; sc->tulip_csrs.csr_13 = csr_base + 13 * csr_size; sc->tulip_csrs.csr_14 = csr_base + 14 * csr_size; sc->tulip_csrs.csr_15 = csr_base + 15 * csr_size; } static int tulip_initring( device_t dev, tulip_softc_t * const sc, tulip_ringinfo_t * const ri, int ndescs) { int i; ri->ri_descinfo = malloc(sizeof(tulip_descinfo_t) * ndescs, M_DEVBUF, M_WAITOK | M_ZERO); for (i = 0; i < ndescs; i++) { ri->ri_descinfo[i].di_desc = &ri->ri_descs[i]; ri->ri_descinfo[i].di_map = &ri->ri_data_maps[i]; } ri->ri_first = ri->ri_descinfo; ri->ri_max = ndescs; ri->ri_last = ri->ri_first + ri->ri_max; bzero(ri->ri_descs, sizeof(tulip_desc_t) * ri->ri_max); ri->ri_last[-1].di_desc->d_flag = TULIP_DFLAG_ENDRING; return (0); } /* * This is the PCI configuration support. */ #define PCI_CBIO PCIR_BAR(0) /* Configuration Base IO Address */ #define PCI_CBMA PCIR_BAR(1) /* Configuration Base Memory Address */ #define PCI_CFDA 0x40 /* Configuration Driver Area */ static int tulip_pci_probe(device_t dev) { const char *name = NULL; if (pci_get_vendor(dev) != DEC_VENDORID) return ENXIO; /* * Some LanMedia WAN cards use the Tulip chip, but they have * their own driver, and we should not recognize them */ if (pci_get_subvendor(dev) == 0x1376) return ENXIO; switch (pci_get_device(dev)) { case CHIPID_21040: name = "Digital 21040 Ethernet"; break; case CHIPID_21041: name = "Digital 21041 Ethernet"; break; case CHIPID_21140: if (pci_get_revid(dev) >= 0x20) name = "Digital 21140A Fast Ethernet"; else name = "Digital 21140 Fast Ethernet"; break; case CHIPID_21142: if (pci_get_revid(dev) >= 0x20) name = "Digital 21143 Fast Ethernet"; else name = "Digital 21142 Fast Ethernet"; break; } if (name) { device_set_desc(dev, name); return BUS_PROBE_LOW_PRIORITY; } return ENXIO; } static int tulip_shutdown(device_t dev) { tulip_softc_t * const sc = device_get_softc(dev); TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET); DELAY(10); /* Wait 10 microseconds (actually 50 PCI cycles but at 33MHz that comes to two microseconds but wait a bit longer anyways) */ return 0; } static int tulip_pci_attach(device_t dev) { tulip_softc_t *sc; int retval, idx; u_int32_t revinfo, cfdainfo; unsigned csroffset = TULIP_PCI_CSROFFSET; unsigned csrsize = TULIP_PCI_CSRSIZE; tulip_csrptr_t csr_base; tulip_chipid_t chipid = TULIP_CHIPID_UNKNOWN; struct resource *res; int rid, unit; unit = device_get_unit(dev); if (unit >= TULIP_MAX_DEVICES) { device_printf(dev, "not configured; limit of %d reached or exceeded\n", TULIP_MAX_DEVICES); return ENXIO; } revinfo = pci_get_revid(dev); cfdainfo = pci_read_config(dev, PCI_CFDA, 4); /* turn busmaster on in case BIOS doesn't set it */ pci_enable_busmaster(dev); if (pci_get_vendor(dev) == DEC_VENDORID) { if (pci_get_device(dev) == CHIPID_21040) chipid = TULIP_21040; else if (pci_get_device(dev) == CHIPID_21041) chipid = TULIP_21041; else if (pci_get_device(dev) == CHIPID_21140) chipid = (revinfo >= 0x20) ? TULIP_21140A : TULIP_21140; else if (pci_get_device(dev) == CHIPID_21142) chipid = (revinfo >= 0x20) ? TULIP_21143 : TULIP_21142; } if (chipid == TULIP_CHIPID_UNKNOWN) return ENXIO; if (chipid == TULIP_21040 && revinfo < 0x20) { device_printf(dev, "not configured; 21040 pass 2.0 required (%d.%d found)\n", revinfo >> 4, revinfo & 0x0f); return ENXIO; } else if (chipid == TULIP_21140 && revinfo < 0x11) { device_printf(dev, "not configured; 21140 pass 1.1 required (%d.%d found)\n", revinfo >> 4, revinfo & 0x0f); return ENXIO; } sc = device_get_softc(dev); sc->tulip_dev = dev; sc->tulip_pci_busno = pci_get_bus(dev); sc->tulip_pci_devno = pci_get_slot(dev); sc->tulip_chipid = chipid; sc->tulip_flags |= TULIP_DEVICEPROBE; if (chipid == TULIP_21140 || chipid == TULIP_21140A) sc->tulip_features |= TULIP_HAVE_GPR|TULIP_HAVE_STOREFWD; if (chipid == TULIP_21140A && revinfo <= 0x22) sc->tulip_features |= TULIP_HAVE_RXBADOVRFLW; if (chipid == TULIP_21140) sc->tulip_features |= TULIP_HAVE_BROKEN_HASH; if (chipid != TULIP_21040 && chipid != TULIP_21140) sc->tulip_features |= TULIP_HAVE_POWERMGMT; if (chipid == TULIP_21041 || chipid == TULIP_21142 || chipid == TULIP_21143) { sc->tulip_features |= TULIP_HAVE_DUALSENSE; if (chipid != TULIP_21041 || revinfo >= 0x20) sc->tulip_features |= TULIP_HAVE_SIANWAY; if (chipid != TULIP_21041) sc->tulip_features |= TULIP_HAVE_SIAGP|TULIP_HAVE_RXBADOVRFLW|TULIP_HAVE_STOREFWD; if (chipid != TULIP_21041 && revinfo >= 0x20) sc->tulip_features |= TULIP_HAVE_SIA100; } if (sc->tulip_features & TULIP_HAVE_POWERMGMT && (cfdainfo & (TULIP_CFDA_SLEEP|TULIP_CFDA_SNOOZE))) { cfdainfo &= ~(TULIP_CFDA_SLEEP|TULIP_CFDA_SNOOZE); pci_write_config(dev, PCI_CFDA, cfdainfo, 4); DELAY(11*1000); } sc->tulip_unit = unit; sc->tulip_revinfo = revinfo; #if defined(TULIP_IOMAPPED) rid = PCI_CBIO; res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid, RF_ACTIVE); #else rid = PCI_CBMA; res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); #endif if (!res) return ENXIO; sc->tulip_csrs_bst = rman_get_bustag(res); sc->tulip_csrs_bsh = rman_get_bushandle(res); csr_base = 0; mtx_init(TULIP_MUTEX(sc), MTX_NETWORK_LOCK, device_get_nameunit(dev), MTX_DEF); callout_init_mtx(&sc->tulip_callout, TULIP_MUTEX(sc), 0); callout_init_mtx(&sc->tulip_stat_timer, TULIP_MUTEX(sc), 0); tulips[unit] = sc; tulip_initcsrs(sc, csr_base + csroffset, csrsize); if ((retval = tulip_busdma_init(dev, sc)) != 0) { device_printf(dev, "error initing bus_dma: %d\n", retval); tulip_busdma_cleanup(sc); mtx_destroy(TULIP_MUTEX(sc)); return ENXIO; } retval = tulip_initring(dev, sc, &sc->tulip_rxinfo, TULIP_RXDESCS); if (retval == 0) retval = tulip_initring(dev, sc, &sc->tulip_txinfo, TULIP_TXDESCS); if (retval) { tulip_busdma_cleanup(sc); mtx_destroy(TULIP_MUTEX(sc)); return retval; } /* * Make sure there won't be any interrupts or such... */ TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET); DELAY(100); /* Wait 10 microseconds (actually 50 PCI cycles but at 33MHz that comes to two microseconds but wait a bit longer anyways) */ TULIP_LOCK(sc); retval = tulip_read_macaddr(sc); TULIP_UNLOCK(sc); if (retval < 0) { device_printf(dev, "can't read ENET ROM (why=%d) (", retval); for (idx = 0; idx < 32; idx++) printf("%02x", sc->tulip_rombuf[idx]); printf("\n"); device_printf(dev, "%s%s pass %d.%d\n", sc->tulip_boardid, tulip_chipdescs[sc->tulip_chipid], (sc->tulip_revinfo & 0xF0) >> 4, sc->tulip_revinfo & 0x0F); device_printf(dev, "address unknown\n"); } else { void (*intr_rtn)(void *) = tulip_intr_normal; if (sc->tulip_features & TULIP_HAVE_SHAREDINTR) intr_rtn = tulip_intr_shared; tulip_attach(sc); /* Setup interrupt last. */ if ((sc->tulip_features & TULIP_HAVE_SLAVEDINTR) == 0) { void *ih; rid = 0; res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (res == NULL || bus_setup_intr(dev, res, INTR_TYPE_NET | INTR_MPSAFE, NULL, intr_rtn, sc, &ih)) { device_printf(dev, "couldn't map interrupt\n"); tulip_busdma_cleanup(sc); ether_ifdetach(sc->tulip_ifp); if_free(sc->tulip_ifp); mtx_destroy(TULIP_MUTEX(sc)); return ENXIO; } } } return 0; } static device_method_t tulip_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, tulip_pci_probe), DEVMETHOD(device_attach, tulip_pci_attach), DEVMETHOD(device_shutdown, tulip_shutdown), { 0, 0 } }; static driver_t tulip_pci_driver = { "de", tulip_pci_methods, sizeof(tulip_softc_t), }; static devclass_t tulip_devclass; DRIVER_MODULE(de, pci, tulip_pci_driver, tulip_devclass, 0, 0); #ifdef DDB void tulip_dumpring(int unit, int ring); void tulip_dumpdesc(int unit, int ring, int desc); void tulip_status(int unit); void tulip_dumpring(int unit, int ring) { tulip_softc_t *sc; tulip_ringinfo_t *ri; tulip_descinfo_t *di; if (unit < 0 || unit >= TULIP_MAX_DEVICES) { db_printf("invalid unit %d\n", unit); return; } sc = tulips[unit]; if (sc == NULL) { db_printf("unit %d not present\n", unit); return; } switch (ring) { case 0: db_printf("receive ring:\n"); ri = &sc->tulip_rxinfo; break; case 1: db_printf("transmit ring:\n"); ri = &sc->tulip_txinfo; break; default: db_printf("invalid ring %d\n", ring); return; } db_printf(" nextin: %td, nextout: %td, max: %d, free: %d\n", ri->ri_nextin - ri->ri_first, ri->ri_nextout - ri->ri_first, ri->ri_max, ri->ri_free); for (di = ri->ri_first; di != ri->ri_last; di++) { if (di->di_mbuf != NULL) db_printf(" descriptor %td: mbuf %p\n", di - ri->ri_first, di->di_mbuf); else if (di->di_desc->d_flag & TULIP_DFLAG_TxSETUPPKT) db_printf(" descriptor %td: setup packet\n", di - ri->ri_first); } } void tulip_dumpdesc(int unit, int ring, int desc) { tulip_softc_t *sc; tulip_ringinfo_t *ri; tulip_descinfo_t *di; char *s; if (unit < 0 || unit >= TULIP_MAX_DEVICES) { db_printf("invalid unit %d\n", unit); return; } sc = tulips[unit]; if (sc == NULL) { db_printf("unit %d not present\n", unit); return; } switch (ring) { case 0: s = "receive"; ri = &sc->tulip_rxinfo; break; case 1: s = "transmit"; ri = &sc->tulip_txinfo; break; default: db_printf("invalid ring %d\n", ring); return; } if (desc < 0 || desc >= ri->ri_max) { db_printf("invalid descriptor %d\n", desc); return; } db_printf("%s descriptor %d:\n", s, desc); di = &ri->ri_first[desc]; db_printf(" mbuf: %p\n", di->di_mbuf); db_printf(" status: %08x flag: %03x\n", di->di_desc->d_status, di->di_desc->d_flag); db_printf(" addr1: %08x len1: %03x\n", di->di_desc->d_addr1, di->di_desc->d_length1); db_printf(" addr2: %08x len2: %03x\n", di->di_desc->d_addr2, di->di_desc->d_length2); } #endif Index: stable/11/sys/dev/en/midway.c =================================================================== --- stable/11/sys/dev/en/midway.c (revision 332287) +++ stable/11/sys/dev/en/midway.c (revision 332288) @@ -1,3367 +1,3367 @@ /* $NetBSD: midway.c,v 1.30 1997/09/29 17:40:38 chuck Exp $ */ /* (sync'd to midway.c 1.68) */ /*- * Copyright (c) 1996 Charles D. Cranor and Washington University. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Charles D. Cranor and * Washington University. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * * m i d w a y . c e n i 1 5 5 d r i v e r * * author: Chuck Cranor * started: spring, 1996 (written from scratch). * * notes from the author: * Extra special thanks go to Werner Almesberger, EPFL LRC. Werner's * ENI driver was especially useful in figuring out how this card works. * I would also like to thank Werner for promptly answering email and being * generally helpful. */ #define EN_DIAG #define EN_DDBHOOK 1 /* compile in ddb functions */ /* * Note on EN_ENIDMAFIX: the byte aligner on the ENI version of the card * appears to be broken. it works just fine if there is no load... however * when the card is loaded the data get corrupted. to see this, one only * has to use "telnet" over ATM. do the following command in "telnet": * cat /usr/share/misc/termcap * "telnet" seems to generate lots of 1023 byte mbufs (which make great * use of the byte aligner). watch "netstat -s" for checksum errors. * * I further tested this by adding a function that compared the transmit * data on the card's SRAM with the data in the mbuf chain _after_ the * "transmit DMA complete" interrupt. using the "telnet" test I got data * mismatches where the byte-aligned data should have been. using ddb * and en_dumpmem() I verified that the DTQs fed into the card were * absolutely correct. thus, we are forced to concluded that the ENI * hardware is buggy. note that the Adaptec version of the card works * just fine with byte DMA. * * bottom line: we set EN_ENIDMAFIX to 1 to avoid byte DMAs on the ENI * card. */ #if defined(DIAGNOSTIC) && !defined(EN_DIAG) #define EN_DIAG /* link in with master DIAG option */ #endif #define EN_COUNT(X) (X)++ #ifdef EN_DEBUG #undef EN_DDBHOOK #define EN_DDBHOOK 1 /* * This macro removes almost all the EN_DEBUG conditionals in the code that make * to code a good deal less readable. */ #define DBG(SC, FL, PRINT) do { \ if ((SC)->debug & DBG_##FL) { \ device_printf((SC)->dev, "%s: "#FL": ", __func__); \ printf PRINT; \ printf("\n"); \ } \ } while (0) enum { DBG_INIT = 0x0001, /* debug attach/detach */ DBG_TX = 0x0002, /* debug transmitting */ DBG_SERV = 0x0004, /* debug service interrupts */ DBG_IOCTL = 0x0008, /* debug ioctls */ DBG_VC = 0x0010, /* debug VC handling */ DBG_INTR = 0x0020, /* debug interrupts */ DBG_DMA = 0x0040, /* debug DMA probing */ DBG_IPACKETS = 0x0080, /* print input packets */ DBG_REG = 0x0100, /* print all register access */ DBG_LOCK = 0x0200, /* debug locking */ }; #else /* EN_DEBUG */ #define DBG(SC, FL, PRINT) do { } while (0) #endif /* EN_DEBUG */ #include "opt_inet.h" #include "opt_natm.h" #include "opt_ddb.h" #ifdef DDB #undef EN_DDBHOOK #define EN_DDBHOOK 1 #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(NATM) || defined(INET) || defined(INET6) #include #if defined(INET) || defined(INET6) #include #endif #endif #ifdef NATM #include #endif #include #include #include #include #include #include #include #include #include #include #include /* * params */ #ifndef EN_TXHIWAT #define EN_TXHIWAT (64 * 1024) /* max 64 KB waiting to be DMAd out */ #endif SYSCTL_DECL(_hw_atm); /* * dma tables * * The plan is indexed by the number of words to transfer. * The maximum index is 15 for 60 words. */ struct en_dmatab { uint8_t bcode; /* code */ uint8_t divshift; /* byte divisor */ }; static const struct en_dmatab en_dmaplan[] = { { 0, 0 }, /* 0 */ { MIDDMA_WORD, 2}, /* 1 */ { MIDDMA_2WORD, 3}, /* 2 */ { MIDDMA_WORD, 2}, /* 3 */ { MIDDMA_4WORD, 4}, /* 4 */ { MIDDMA_WORD, 2}, /* 5 */ { MIDDMA_2WORD, 3}, /* 6 */ { MIDDMA_WORD, 2}, /* 7 */ { MIDDMA_8WORD, 5}, /* 8 */ { MIDDMA_WORD, 2}, /* 9 */ { MIDDMA_2WORD, 3}, /* 10 */ { MIDDMA_WORD, 2}, /* 11 */ { MIDDMA_4WORD, 4}, /* 12 */ { MIDDMA_WORD, 2}, /* 13 */ { MIDDMA_2WORD, 3}, /* 14 */ { MIDDMA_WORD, 2}, /* 15 */ { MIDDMA_16WORD,6}, /* 16 */ }; /* * prototypes */ #ifdef EN_DDBHOOK int en_dump(int unit, int level); int en_dumpmem(int,int,int); #endif static void en_close_finish(struct en_softc *sc, struct en_vcc *vc); #define EN_LOCK(SC) do { \ DBG(SC, LOCK, ("ENLOCK %d\n", __LINE__)); \ mtx_lock(&sc->en_mtx); \ } while (0) #define EN_UNLOCK(SC) do { \ DBG(SC, LOCK, ("ENUNLOCK %d\n", __LINE__)); \ mtx_unlock(&sc->en_mtx); \ } while (0) #define EN_CHECKLOCK(sc) mtx_assert(&sc->en_mtx, MA_OWNED) /* * While a transmit mbuf is waiting to get transmit DMA resources we * need to keep some information with it. We don't want to allocate * additional memory for this so we stuff it into free fields in the * mbuf packet header. Neither the checksum fields nor the rcvif field are used * so use these. */ #define TX_AAL5 0x1 /* transmit AAL5 PDU */ #define TX_HAS_TBD 0x2 /* TBD did fit into mbuf */ #define TX_HAS_PAD 0x4 /* padding did fit into mbuf */ #define TX_HAS_PDU 0x8 /* PDU trailer did fit into mbuf */ #define MBUF_SET_TX(M, VCI, FLAGS, DATALEN, PAD, MAP) do { \ (M)->m_pkthdr.csum_data = (VCI) | ((FLAGS) << MID_VCI_BITS); \ (M)->m_pkthdr.csum_flags = ((DATALEN) & 0xffff) | \ ((PAD & 0x3f) << 16); \ (M)->m_pkthdr.rcvif = (void *)(MAP); \ } while (0) #define MBUF_GET_TX(M, VCI, FLAGS, DATALEN, PAD, MAP) do { \ (VCI) = (M)->m_pkthdr.csum_data & ((1 << MID_VCI_BITS) - 1); \ (FLAGS) = ((M)->m_pkthdr.csum_data >> MID_VCI_BITS) & 0xf; \ (DATALEN) = (M)->m_pkthdr.csum_flags & 0xffff; \ (PAD) = ((M)->m_pkthdr.csum_flags >> 16) & 0x3f; \ (MAP) = (void *)((M)->m_pkthdr.rcvif); \ } while (0) #define EN_WRAPADD(START, STOP, CUR, VAL) do { \ (CUR) = (CUR) + (VAL); \ if ((CUR) >= (STOP)) \ (CUR) = (START) + ((CUR) - (STOP)); \ } while (0) #define WORD_IDX(START, X) (((X) - (START)) / sizeof(uint32_t)) #define SETQ_END(SC, VAL) ((SC)->is_adaptec ? \ ((VAL) | (MID_DMA_END >> 4)) : \ ((VAL) | (MID_DMA_END))) /* * The dtq and drq members are set for each END entry in the corresponding * card queue entry. It is used to find out, when a buffer has been * finished DMAing and can be freed. * * We store sc->dtq and sc->drq data in the following format... * the 0x80000 ensures we != 0 */ #define EN_DQ_MK(SLOT, LEN) (((SLOT) << 20) | (LEN) | (0x80000)) #define EN_DQ_SLOT(X) ((X) >> 20) #define EN_DQ_LEN(X) ((X) & 0x3ffff) /* * Variables */ static uma_zone_t en_vcc_zone; /***********************************************************************/ /* * en_read{x}: read a word from the card. These are the only functions * that read from the card. */ static __inline uint32_t en_readx(struct en_softc *sc, uint32_t r) { uint32_t v; #ifdef EN_DIAG if (r > MID_MAXOFF || (r % 4)) panic("en_read out of range, r=0x%x", r); #endif v = bus_space_read_4(sc->en_memt, sc->en_base, r); return (v); } static __inline uint32_t en_read(struct en_softc *sc, uint32_t r) { uint32_t v; #ifdef EN_DIAG if (r > MID_MAXOFF || (r % 4)) panic("en_read out of range, r=0x%x", r); #endif v = bus_space_read_4(sc->en_memt, sc->en_base, r); DBG(sc, REG, ("en_read(%#x) -> %08x", r, v)); return (v); } /* * en_write: write a word to the card. This is the only function that * writes to the card. */ static __inline void en_write(struct en_softc *sc, uint32_t r, uint32_t v) { #ifdef EN_DIAG if (r > MID_MAXOFF || (r % 4)) panic("en_write out of range, r=0x%x", r); #endif DBG(sc, REG, ("en_write(%#x) <- %08x", r, v)); bus_space_write_4(sc->en_memt, sc->en_base, r, v); } /* * en_k2sz: convert KBytes to a size parameter (a log2) */ static __inline int en_k2sz(int k) { switch(k) { case 1: return (0); case 2: return (1); case 4: return (2); case 8: return (3); case 16: return (4); case 32: return (5); case 64: return (6); case 128: return (7); default: panic("en_k2sz"); } return (0); } #define en_log2(X) en_k2sz(X) #if 0 /* * en_b2sz: convert a DMA burst code to its byte size */ static __inline int en_b2sz(int b) { switch (b) { case MIDDMA_WORD: return (1*4); case MIDDMA_2WMAYBE: case MIDDMA_2WORD: return (2*4); case MIDDMA_4WMAYBE: case MIDDMA_4WORD: return (4*4); case MIDDMA_8WMAYBE: case MIDDMA_8WORD: return (8*4); case MIDDMA_16WMAYBE: case MIDDMA_16WORD: return (16*4); default: panic("en_b2sz"); } return (0); } #endif /* * en_sz2b: convert a burst size (bytes) to DMA burst code */ static __inline int en_sz2b(int sz) { switch (sz) { case 1*4: return (MIDDMA_WORD); case 2*4: return (MIDDMA_2WORD); case 4*4: return (MIDDMA_4WORD); case 8*4: return (MIDDMA_8WORD); case 16*4: return (MIDDMA_16WORD); default: panic("en_sz2b"); } return(0); } #ifdef EN_DEBUG /* * Dump a packet */ static void en_dump_packet(struct en_softc *sc, struct mbuf *m) { int plen = m->m_pkthdr.len; u_int pos = 0; u_int totlen = 0; int len; u_char *ptr; device_printf(sc->dev, "packet len=%d", plen); while (m != NULL) { totlen += m->m_len; ptr = mtod(m, u_char *); for (len = 0; len < m->m_len; len++, pos++, ptr++) { if (pos % 16 == 8) printf(" "); if (pos % 16 == 0) printf("\n"); printf(" %02x", *ptr); } m = m->m_next; } printf("\n"); if (totlen != plen) printf("sum of m_len=%u\n", totlen); } #endif /*********************************************************************/ /* * DMA maps */ /* * Map constructor for a MAP. * * This is called each time when a map is allocated * from the pool and about to be returned to the user. Here we actually * allocate the map if there isn't one. The problem is that we may fail * to allocate the DMA map yet have no means to signal this error. Therefor * when allocating a map, the call must check that there is a map. An * additional problem is, that i386 maps will be NULL, yet are ok and must * be freed so let's use a flag to signal allocation. * * Caveat: we have no way to know that we are called from an interrupt context * here. We rely on the fact, that bus_dmamap_create uses M_NOWAIT in all * its allocations. * * LOCK: any, not needed */ static int en_map_ctor(void *mem, int size, void *arg, int flags) { struct en_softc *sc = arg; struct en_map *map = mem; int err; err = bus_dmamap_create(sc->txtag, 0, &map->map); if (err != 0) { device_printf(sc->dev, "cannot create DMA map %d\n", err); return (err); } map->flags = ENMAP_ALLOC; map->sc = sc; return (0); } /* * Map destructor. * * Called when a map is disposed into the zone. If the map is loaded, unload * it. * * LOCK: any, not needed */ static void en_map_dtor(void *mem, int size, void *arg) { struct en_map *map = mem; if (map->flags & ENMAP_LOADED) { bus_dmamap_unload(map->sc->txtag, map->map); map->flags &= ~ENMAP_LOADED; } } /* * Map finializer. * * This is called each time a map is returned from the zone to the system. * Get rid of the dmamap here. * * LOCK: any, not needed */ static void en_map_fini(void *mem, int size) { struct en_map *map = mem; bus_dmamap_destroy(map->sc->txtag, map->map); } /*********************************************************************/ /* * Transmission */ /* * Argument structure to load a transmit DMA map */ struct txarg { struct en_softc *sc; struct mbuf *m; u_int vci; u_int chan; /* transmit channel */ u_int datalen; /* length of user data */ u_int flags; u_int wait; /* return: out of resources */ }; /* * TX DMA map loader helper. This function is the callback when the map * is loaded. It should fill the DMA segment descriptors into the hardware. * * LOCK: locked, needed */ static void en_txdma_load(void *uarg, bus_dma_segment_t *segs, int nseg, bus_size_t mapsize, int error) { struct txarg *tx = uarg; struct en_softc *sc = tx->sc; struct en_txslot *slot = &sc->txslot[tx->chan]; uint32_t cur; /* on-card buffer position (bytes offset) */ uint32_t dtq; /* on-card queue position (byte offset) */ uint32_t last_dtq; /* last DTQ we have written */ uint32_t tmp; u_int free; /* free queue entries on card */ u_int needalign, cnt; bus_size_t rest; /* remaining bytes in current segment */ bus_addr_t addr; bus_dma_segment_t *s; uint32_t count, bcode; int i; if (error != 0) return; cur = slot->cur; dtq = sc->dtq_us; free = sc->dtq_free; last_dtq = 0; /* make gcc happy */ /* * Local macro to add an entry to the transmit DMA area. If there * are no entries left, return. Save the byte offset of the entry * in last_dtq for later use. */ #define PUT_DTQ_ENTRY(ENI, BCODE, COUNT, ADDR) \ if (free == 0) { \ EN_COUNT(sc->stats.txdtqout); \ tx->wait = 1; \ return; \ } \ last_dtq = dtq; \ en_write(sc, dtq + 0, (ENI || !sc->is_adaptec) ? \ MID_MK_TXQ_ENI(COUNT, tx->chan, 0, BCODE) : \ MID_MK_TXQ_ADP(COUNT, tx->chan, 0, BCODE)); \ en_write(sc, dtq + 4, ADDR); \ \ EN_WRAPADD(MID_DTQOFF, MID_DTQEND, dtq, 8); \ free--; /* * Local macro to generate a DMA entry to DMA cnt bytes. Updates * the current buffer byte offset accordingly. */ #define DO_DTQ(TYPE) do { \ rest -= cnt; \ EN_WRAPADD(slot->start, slot->stop, cur, cnt); \ DBG(sc, TX, ("tx%d: "TYPE" %u bytes, %ju left, cur %#x", \ tx->chan, cnt, (uintmax_t)rest, cur)); \ \ PUT_DTQ_ENTRY(1, bcode, count, addr); \ \ addr += cnt; \ } while (0) if (!(tx->flags & TX_HAS_TBD)) { /* * Prepend the TBD - it did not fit into the first mbuf */ tmp = MID_TBD_MK1((tx->flags & TX_AAL5) ? MID_TBD_AAL5 : MID_TBD_NOAAL5, sc->vccs[tx->vci]->txspeed, tx->m->m_pkthdr.len / MID_ATMDATASZ); en_write(sc, cur, tmp); EN_WRAPADD(slot->start, slot->stop, cur, 4); tmp = MID_TBD_MK2(tx->vci, 0, 0); en_write(sc, cur, tmp); EN_WRAPADD(slot->start, slot->stop, cur, 4); /* update DMA address */ PUT_DTQ_ENTRY(0, MIDDMA_JK, WORD_IDX(slot->start, cur), 0); } for (i = 0, s = segs; i < nseg; i++, s++) { rest = s->ds_len; addr = s->ds_addr; if (sc->is_adaptec) { /* adaptec card - simple */ /* advance the on-card buffer pointer */ EN_WRAPADD(slot->start, slot->stop, cur, rest); DBG(sc, TX, ("tx%d: adp %ju bytes %#jx (cur now 0x%x)", tx->chan, (uintmax_t)rest, (uintmax_t)addr, cur)); PUT_DTQ_ENTRY(0, 0, rest, addr); continue; } /* * do we need to do a DMA op to align to the maximum * burst? Note, that we are alway 32-bit aligned. */ if (sc->alburst && (needalign = (addr & sc->bestburstmask)) != 0) { /* compute number of bytes, words and code */ cnt = sc->bestburstlen - needalign; if (cnt > rest) cnt = rest; count = cnt / sizeof(uint32_t); if (sc->noalbursts) { bcode = MIDDMA_WORD; } else { bcode = en_dmaplan[count].bcode; count = cnt >> en_dmaplan[count].divshift; } DO_DTQ("al_dma"); } /* do we need to do a max-sized burst? */ if (rest >= sc->bestburstlen) { count = rest >> sc->bestburstshift; cnt = count << sc->bestburstshift; bcode = sc->bestburstcode; DO_DTQ("best_dma"); } /* do we need to do a cleanup burst? */ if (rest != 0) { cnt = rest; count = rest / sizeof(uint32_t); if (sc->noalbursts) { bcode = MIDDMA_WORD; } else { bcode = en_dmaplan[count].bcode; count = cnt >> en_dmaplan[count].divshift; } DO_DTQ("clean_dma"); } } KASSERT (tx->flags & TX_HAS_PAD, ("PDU not padded")); if ((tx->flags & TX_AAL5) && !(tx->flags & TX_HAS_PDU)) { /* * Append the AAL5 PDU trailer */ tmp = MID_PDU_MK1(0, 0, tx->datalen); en_write(sc, cur, tmp); EN_WRAPADD(slot->start, slot->stop, cur, 4); en_write(sc, cur, 0); EN_WRAPADD(slot->start, slot->stop, cur, 4); /* update DMA address */ PUT_DTQ_ENTRY(0, MIDDMA_JK, WORD_IDX(slot->start, cur), 0); } /* record the end for the interrupt routine */ sc->dtq[MID_DTQ_A2REG(last_dtq)] = EN_DQ_MK(tx->chan, tx->m->m_pkthdr.len); /* set the end flag in the last descriptor */ en_write(sc, last_dtq + 0, SETQ_END(sc, en_read(sc, last_dtq + 0))); #undef PUT_DTQ_ENTRY #undef DO_DTQ /* commit */ slot->cur = cur; sc->dtq_free = free; sc->dtq_us = dtq; /* tell card */ en_write(sc, MID_DMA_WRTX, MID_DTQ_A2REG(sc->dtq_us)); } /* * en_txdma: start transmit DMA on the given channel, if possible * * This is called from two places: when we got new packets from the upper * layer or when we found that buffer space has freed up during interrupt * processing. * * LOCK: locked, needed */ static void en_txdma(struct en_softc *sc, struct en_txslot *slot) { struct en_map *map; struct mbuf *lastm; struct txarg tx; u_int pad; int error; DBG(sc, TX, ("tx%td: starting ...", slot - sc->txslot)); again: bzero(&tx, sizeof(tx)); tx.chan = slot - sc->txslot; tx.sc = sc; /* * get an mbuf waiting for DMA */ _IF_DEQUEUE(&slot->q, tx.m); if (tx.m == NULL) { DBG(sc, TX, ("tx%td: ...done!", slot - sc->txslot)); return; } MBUF_GET_TX(tx.m, tx.vci, tx.flags, tx.datalen, pad, map); /* * note: don't use the entire buffer space. if WRTX becomes equal * to RDTX, the transmitter stops assuming the buffer is empty! --kjc */ if (tx.m->m_pkthdr.len >= slot->bfree) { EN_COUNT(sc->stats.txoutspace); DBG(sc, TX, ("tx%td: out of transmit space", slot - sc->txslot)); goto waitres; } lastm = NULL; if (!(tx.flags & TX_HAS_PAD)) { if (pad != 0) { /* Append the padding buffer */ (void)m_length(tx.m, &lastm); lastm->m_next = sc->padbuf; sc->padbuf->m_len = pad; } tx.flags |= TX_HAS_PAD; } /* * Try to load that map */ error = bus_dmamap_load_mbuf(sc->txtag, map->map, tx.m, en_txdma_load, &tx, BUS_DMA_NOWAIT); if (lastm != NULL) lastm->m_next = NULL; if (error != 0) { device_printf(sc->dev, "loading TX map failed %d\n", error); goto dequeue_drop; } map->flags |= ENMAP_LOADED; if (tx.wait) { /* probably not enough space */ bus_dmamap_unload(map->sc->txtag, map->map); map->flags &= ~ENMAP_LOADED; sc->need_dtqs = 1; DBG(sc, TX, ("tx%td: out of transmit DTQs", slot - sc->txslot)); goto waitres; } EN_COUNT(sc->stats.launch); if_inc_counter(sc->ifp, IFCOUNTER_OPACKETS, 1); sc->vccs[tx.vci]->opackets++; sc->vccs[tx.vci]->obytes += tx.datalen; #ifdef ENABLE_BPF if (bpf_peers_present(sc->ifp->if_bpf)) { /* * adjust the top of the mbuf to skip the TBD if present * before passing the packet to bpf. * Also remove padding and the PDU trailer. Assume both of * them to be in the same mbuf. pktlen, m_len and m_data * are not needed anymore so we can change them. */ if (tx.flags & TX_HAS_TBD) { tx.m->m_data += MID_TBD_SIZE; tx.m->m_len -= MID_TBD_SIZE; } tx.m->m_pkthdr.len = m_length(tx.m, &lastm); if (tx.m->m_pkthdr.len > tx.datalen) { lastm->m_len -= tx.m->m_pkthdr.len - tx.datalen; tx.m->m_pkthdr.len = tx.datalen; } bpf_mtap(sc->ifp->if_bpf, tx.m); } #endif /* * do some housekeeping and get the next packet */ slot->bfree -= tx.m->m_pkthdr.len; _IF_ENQUEUE(&slot->indma, tx.m); goto again; /* * error handling. This is jumped to when we just want to drop * the packet. Must be unlocked here. */ dequeue_drop: if (map != NULL) uma_zfree(sc->map_zone, map); slot->mbsize -= tx.m->m_pkthdr.len; m_freem(tx.m); goto again; waitres: _IF_PREPEND(&slot->q, tx.m); } /* * Create a copy of a single mbuf. It can have either internal or * external data, it may have a packet header. External data is really * copied, so the new buffer is writeable. * * LOCK: any, not needed */ static struct mbuf * copy_mbuf(struct mbuf *m) { struct mbuf *new; MGET(new, M_WAITOK, MT_DATA); if (m->m_flags & M_PKTHDR) { M_MOVE_PKTHDR(new, m); if (m->m_len > MHLEN) MCLGET(new, M_WAITOK); } else { if (m->m_len > MLEN) MCLGET(new, M_WAITOK); } bcopy(m->m_data, new->m_data, m->m_len); new->m_len = m->m_len; new->m_flags &= ~M_RDONLY; return (new); } /* * This function is called when we have an ENI adapter. It fixes the * mbuf chain, so that all addresses and lengths are 4 byte aligned. * The overall length is already padded to multiple of cells plus the * TBD so this must always succeed. The routine can fail, when it * needs to copy an mbuf (this may happen if an mbuf is readonly). * * We assume here, that aligning the virtual addresses to 4 bytes also * aligns the physical addresses. * * LOCK: locked, needed */ static struct mbuf * en_fix_mchain(struct en_softc *sc, struct mbuf *m0, u_int *pad) { struct mbuf **prev = &m0; struct mbuf *m = m0; struct mbuf *new; u_char *d; int off; while (m != NULL) { d = mtod(m, u_char *); if ((off = (uintptr_t)d % sizeof(uint32_t)) != 0) { EN_COUNT(sc->stats.mfixaddr); if (M_WRITABLE(m)) { bcopy(d, d - off, m->m_len); m->m_data -= off; } else { if ((new = copy_mbuf(m)) == NULL) { EN_COUNT(sc->stats.mfixfail); m_freem(m0); return (NULL); } new->m_next = m_free(m); *prev = m = new; } } if ((off = m->m_len % sizeof(uint32_t)) != 0) { EN_COUNT(sc->stats.mfixlen); if (!M_WRITABLE(m)) { if ((new = copy_mbuf(m)) == NULL) { EN_COUNT(sc->stats.mfixfail); m_freem(m0); return (NULL); } new->m_next = m_free(m); *prev = m = new; } d = mtod(m, u_char *) + m->m_len; off = 4 - off; while (off) { while (m->m_next && m->m_next->m_len == 0) m->m_next = m_free(m->m_next); if (m->m_next == NULL) { *d++ = 0; KASSERT(*pad > 0, ("no padding space")); (*pad)--; } else { *d++ = *mtod(m->m_next, u_char *); m->m_next->m_len--; m->m_next->m_data++; } m->m_len++; off--; } } prev = &m->m_next; m = m->m_next; } return (m0); } /* * en_start: start transmitting the next packet that needs to go out * if there is one. We take off all packets from the interface's queue and * put them into the channels queue. * * Here we also prepend the transmit packet descriptor and append the padding * and (for aal5) the PDU trailer. This is different from the original driver: * we assume, that allocating one or two additional mbufs is actually cheaper * than all this algorithmic fiddling we would need otherwise. * * While the packet is on the channels wait queue we use the csum_* fields * in the packet header to hold the original datalen, the AAL5 flag and the * VCI. The packet length field in the header holds the needed buffer space. * This may actually be more than the length of the current mbuf chain (when * one or more of TBD, padding and PDU do not fit). * * LOCK: unlocked, needed */ static void en_start(struct ifnet *ifp) { struct en_softc *sc = (struct en_softc *)ifp->if_softc; struct mbuf *m, *lastm; struct atm_pseudohdr *ap; u_int pad; /* 0-bytes to pad at PDU end */ u_int datalen; /* length of user data */ u_int vci; /* the VCI we are transmitting on */ u_int flags; uint32_t tbd[2]; uint32_t pdu[2]; struct en_vcc *vc; struct en_map *map; struct en_txslot *tx; while (1) { IF_DEQUEUE(&ifp->if_snd, m); if (m == NULL) return; flags = 0; ap = mtod(m, struct atm_pseudohdr *); vci = ATM_PH_VCI(ap); if (ATM_PH_VPI(ap) != 0 || vci >= MID_N_VC || (vc = sc->vccs[vci]) == NULL || (vc->vflags & VCC_CLOSE_RX)) { DBG(sc, TX, ("output vpi=%u, vci=%u -- drop", ATM_PH_VPI(ap), vci)); m_freem(m); continue; } if (vc->vcc.aal == ATMIO_AAL_5) flags |= TX_AAL5; m_adj(m, sizeof(struct atm_pseudohdr)); /* * (re-)calculate size of packet (in bytes) */ m->m_pkthdr.len = datalen = m_length(m, &lastm); /* * computing how much padding we need on the end of the mbuf, * then see if we can put the TBD at the front of the mbuf * where the link header goes (well behaved protocols will * reserve room for us). Last, check if room for PDU tail. */ if (flags & TX_AAL5) m->m_pkthdr.len += MID_PDU_SIZE; m->m_pkthdr.len = roundup(m->m_pkthdr.len, MID_ATMDATASZ); pad = m->m_pkthdr.len - datalen; if (flags & TX_AAL5) pad -= MID_PDU_SIZE; m->m_pkthdr.len += MID_TBD_SIZE; DBG(sc, TX, ("txvci%d: buflen=%u datalen=%u lead=%d trail=%d", vci, m->m_pkthdr.len, datalen, (int)M_LEADINGSPACE(m), (int)M_TRAILINGSPACE(lastm))); /* * From here on we need access to sc */ EN_LOCK(sc); /* * Allocate a map. We do this here rather then in en_txdma, * because en_txdma is also called from the interrupt handler * and we are going to have a locking problem then. We must * use NOWAIT here, because the ip_output path holds various * locks. */ map = uma_zalloc_arg(sc->map_zone, sc, M_NOWAIT); if (map == NULL) { /* drop that packet */ EN_COUNT(sc->stats.txnomap); EN_UNLOCK(sc); m_freem(m); continue; } if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { EN_UNLOCK(sc); uma_zfree(sc->map_zone, map); m_freem(m); continue; } /* * Look, whether we can prepend the TBD (8 byte) */ if (M_WRITABLE(m) && M_LEADINGSPACE(m) >= MID_TBD_SIZE) { tbd[0] = htobe32(MID_TBD_MK1((flags & TX_AAL5) ? MID_TBD_AAL5 : MID_TBD_NOAAL5, vc->txspeed, m->m_pkthdr.len / MID_ATMDATASZ)); tbd[1] = htobe32(MID_TBD_MK2(vci, 0, 0)); m->m_data -= MID_TBD_SIZE; bcopy(tbd, m->m_data, MID_TBD_SIZE); m->m_len += MID_TBD_SIZE; flags |= TX_HAS_TBD; } /* * Check whether the padding fits (must be writeable - * we pad with zero). */ if (M_WRITABLE(lastm) && M_TRAILINGSPACE(lastm) >= pad) { bzero(lastm->m_data + lastm->m_len, pad); lastm->m_len += pad; flags |= TX_HAS_PAD; if ((flags & TX_AAL5) && M_TRAILINGSPACE(lastm) > MID_PDU_SIZE) { pdu[0] = htobe32(MID_PDU_MK1(0, 0, datalen)); pdu[1] = 0; bcopy(pdu, lastm->m_data + lastm->m_len, MID_PDU_SIZE); lastm->m_len += MID_PDU_SIZE; flags |= TX_HAS_PDU; } } if (!sc->is_adaptec && (m = en_fix_mchain(sc, m, &pad)) == NULL) { EN_UNLOCK(sc); uma_zfree(sc->map_zone, map); continue; } /* * get assigned channel (will be zero unless txspeed is set) */ tx = vc->txslot; if (m->m_pkthdr.len > EN_TXSZ * 1024) { DBG(sc, TX, ("tx%td: packet larger than xmit buffer " "(%d > %d)\n", tx - sc->txslot, m->m_pkthdr.len, EN_TXSZ * 1024)); EN_UNLOCK(sc); m_freem(m); uma_zfree(sc->map_zone, map); continue; } if (tx->mbsize > EN_TXHIWAT) { EN_COUNT(sc->stats.txmbovr); DBG(sc, TX, ("tx%td: buffer space shortage", tx - sc->txslot)); EN_UNLOCK(sc); m_freem(m); uma_zfree(sc->map_zone, map); continue; } /* commit */ tx->mbsize += m->m_pkthdr.len; DBG(sc, TX, ("tx%td: VCI=%d, speed=0x%x, buflen=%d, mbsize=%d", tx - sc->txslot, vci, sc->vccs[vci]->txspeed, m->m_pkthdr.len, tx->mbsize)); MBUF_SET_TX(m, vci, flags, datalen, pad, map); _IF_ENQUEUE(&tx->q, m); en_txdma(sc, tx); EN_UNLOCK(sc); } } /*********************************************************************/ /* * VCs */ /* * en_loadvc: load a vc tab entry from a slot * * LOCK: locked, needed */ static void en_loadvc(struct en_softc *sc, struct en_vcc *vc) { uint32_t reg = en_read(sc, MID_VC(vc->vcc.vci)); reg = MIDV_SETMODE(reg, MIDV_TRASH); en_write(sc, MID_VC(vc->vcc.vci), reg); DELAY(27); /* no need to set CRC */ /* read pointer = 0, desc. start = 0 */ en_write(sc, MID_DST_RP(vc->vcc.vci), 0); /* write pointer = 0 */ en_write(sc, MID_WP_ST_CNT(vc->vcc.vci), 0); /* set mode, size, loc */ en_write(sc, MID_VC(vc->vcc.vci), vc->rxslot->mode); vc->rxslot->cur = vc->rxslot->start; DBG(sc, VC, ("rx%td: assigned to VCI %d", vc->rxslot - sc->rxslot, vc->vcc.vci)); } /* * Open the given vcc. * * LOCK: unlocked, needed */ static int en_open_vcc(struct en_softc *sc, struct atmio_openvcc *op) { uint32_t oldmode, newmode; struct en_rxslot *slot; struct en_vcc *vc; int error = 0; DBG(sc, IOCTL, ("enable vpi=%d, vci=%d, flags=%#x", op->param.vpi, op->param.vci, op->param.flags)); if (op->param.vpi != 0 || op->param.vci >= MID_N_VC) return (EINVAL); vc = uma_zalloc(en_vcc_zone, M_NOWAIT | M_ZERO); if (vc == NULL) return (ENOMEM); EN_LOCK(sc); if (sc->vccs[op->param.vci] != NULL) { error = EBUSY; goto done; } /* find a free receive slot */ for (slot = sc->rxslot; slot < &sc->rxslot[sc->en_nrx]; slot++) if (slot->vcc == NULL) break; if (slot == &sc->rxslot[sc->en_nrx]) { error = ENOSPC; goto done; } vc->rxslot = slot; vc->rxhand = op->rxhand; vc->vcc = op->param; oldmode = slot->mode; newmode = (op->param.aal == ATMIO_AAL_5) ? MIDV_AAL5 : MIDV_NOAAL; slot->mode = MIDV_SETMODE(oldmode, newmode); slot->vcc = vc; KASSERT (_IF_QLEN(&slot->indma) == 0 && _IF_QLEN(&slot->q) == 0, ("en_rxctl: left over mbufs on enable slot=%td", vc->rxslot - sc->rxslot)); vc->txspeed = 0; vc->txslot = sc->txslot; vc->txslot->nref++; /* bump reference count */ en_loadvc(sc, vc); /* does debug printf for us */ /* don't free below */ sc->vccs[vc->vcc.vci] = vc; vc = NULL; sc->vccs_open++; done: if (vc != NULL) uma_zfree(en_vcc_zone, vc); EN_UNLOCK(sc); return (error); } /* * Close finished */ static void en_close_finish(struct en_softc *sc, struct en_vcc *vc) { if (vc->rxslot != NULL) vc->rxslot->vcc = NULL; DBG(sc, VC, ("vci: %u free (%p)", vc->vcc.vci, vc)); sc->vccs[vc->vcc.vci] = NULL; uma_zfree(en_vcc_zone, vc); sc->vccs_open--; } /* * LOCK: unlocked, needed */ static int en_close_vcc(struct en_softc *sc, struct atmio_closevcc *cl) { uint32_t oldmode, newmode; struct en_vcc *vc; int error = 0; DBG(sc, IOCTL, ("disable vpi=%d, vci=%d", cl->vpi, cl->vci)); if (cl->vpi != 0 || cl->vci >= MID_N_VC) return (EINVAL); EN_LOCK(sc); if ((vc = sc->vccs[cl->vci]) == NULL) { error = ENOTCONN; goto done; } /* * turn off VCI */ if (vc->rxslot == NULL) { error = ENOTCONN; goto done; } if (vc->vflags & VCC_DRAIN) { error = EINVAL; goto done; } oldmode = en_read(sc, MID_VC(cl->vci)); newmode = MIDV_SETMODE(oldmode, MIDV_TRASH) & ~MIDV_INSERVICE; en_write(sc, MID_VC(cl->vci), (newmode | (oldmode & MIDV_INSERVICE))); /* halt in tracks, be careful to preserve inservice bit */ DELAY(27); vc->rxslot->mode = newmode; vc->txslot->nref--; /* if stuff is still going on we are going to have to drain it out */ if (_IF_QLEN(&vc->rxslot->indma) == 0 && _IF_QLEN(&vc->rxslot->q) == 0 && (vc->vflags & VCC_SWSL) == 0) { en_close_finish(sc, vc); goto done; } vc->vflags |= VCC_DRAIN; DBG(sc, IOCTL, ("VCI %u now draining", cl->vci)); if (vc->vcc.flags & ATMIO_FLAG_ASYNC) goto done; vc->vflags |= VCC_CLOSE_RX; while ((sc->ifp->if_drv_flags & IFF_DRV_RUNNING) && (vc->vflags & VCC_DRAIN)) cv_wait(&sc->cv_close, &sc->en_mtx); en_close_finish(sc, vc); if (!(sc->ifp->if_drv_flags & IFF_DRV_RUNNING)) { error = EIO; goto done; } done: EN_UNLOCK(sc); return (error); } /*********************************************************************/ /* * starting/stopping the card */ /* * en_reset_ul: reset the board, throw away work in progress. * must en_init to recover. * * LOCK: locked, needed */ static void en_reset_ul(struct en_softc *sc) { struct en_map *map; struct mbuf *m; struct en_rxslot *rx; int lcv; device_printf(sc->dev, "reset\n"); sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; if (sc->en_busreset) sc->en_busreset(sc); en_write(sc, MID_RESID, 0x0); /* reset hardware */ /* * recv: dump any mbufs we are dma'ing into, if DRAINing, then a reset * will free us! Don't release the rxslot from the channel. */ for (lcv = 0 ; lcv < MID_N_VC ; lcv++) { if (sc->vccs[lcv] == NULL) continue; rx = sc->vccs[lcv]->rxslot; for (;;) { _IF_DEQUEUE(&rx->indma, m); if (m == NULL) break; map = (void *)m->m_pkthdr.rcvif; uma_zfree(sc->map_zone, map); m_freem(m); } for (;;) { _IF_DEQUEUE(&rx->q, m); if (m == NULL) break; m_freem(m); } sc->vccs[lcv]->vflags = 0; } /* * xmit: dump everything */ for (lcv = 0 ; lcv < EN_NTX ; lcv++) { for (;;) { _IF_DEQUEUE(&sc->txslot[lcv].indma, m); if (m == NULL) break; map = (void *)m->m_pkthdr.rcvif; uma_zfree(sc->map_zone, map); m_freem(m); } for (;;) { _IF_DEQUEUE(&sc->txslot[lcv].q, m); if (m == NULL) break; map = (void *)m->m_pkthdr.rcvif; uma_zfree(sc->map_zone, map); m_freem(m); } sc->txslot[lcv].mbsize = 0; } /* * Unstop all waiters */ cv_broadcast(&sc->cv_close); } /* * en_reset: reset the board, throw away work in progress. * must en_init to recover. * * LOCK: unlocked, needed * * Use en_reset_ul if you alreay have the lock */ void en_reset(struct en_softc *sc) { EN_LOCK(sc); en_reset_ul(sc); EN_UNLOCK(sc); } /* * en_init: init board and sync the card with the data in the softc. * * LOCK: locked, needed */ static void en_init(struct en_softc *sc) { int vc, slot; uint32_t loc; if ((sc->ifp->if_flags & IFF_UP) == 0) { DBG(sc, INIT, ("going down")); en_reset(sc); /* to be safe */ return; } DBG(sc, INIT, ("going up")); sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; /* enable */ if (sc->en_busreset) sc->en_busreset(sc); en_write(sc, MID_RESID, 0x0); /* reset */ /* zero memory */ bus_space_set_region_4(sc->en_memt, sc->en_base, MID_RAMOFF, 0, sc->en_obmemsz / 4); /* * init obmem data structures: vc tab, dma q's, slist. * * note that we set drq_free/dtq_free to one less than the total number * of DTQ/DRQs present. we do this because the card uses the condition * (drq_chip == drq_us) to mean "list is empty"... but if you allow the * circular list to be completely full then (drq_chip == drq_us) [i.e. * the drq_us pointer will wrap all the way around]. by restricting * the number of active requests to (N - 1) we prevent the list from * becoming completely full. note that the card will sometimes give * us an interrupt for a DTQ/DRQ we have already processes... this helps * keep that interrupt from messing us up. */ bzero(&sc->drq, sizeof(sc->drq)); sc->drq_free = MID_DRQ_N - 1; sc->drq_chip = MID_DRQ_REG2A(en_read(sc, MID_DMA_RDRX)); en_write(sc, MID_DMA_WRRX, MID_DRQ_A2REG(sc->drq_chip)); sc->drq_us = sc->drq_chip; bzero(&sc->dtq, sizeof(sc->dtq)); sc->dtq_free = MID_DTQ_N - 1; sc->dtq_chip = MID_DTQ_REG2A(en_read(sc, MID_DMA_RDTX)); en_write(sc, MID_DMA_WRTX, MID_DRQ_A2REG(sc->dtq_chip)); sc->dtq_us = sc->dtq_chip; sc->hwslistp = MID_SL_REG2A(en_read(sc, MID_SERV_WRITE)); sc->swsl_size = sc->swsl_head = sc->swsl_tail = 0; DBG(sc, INIT, ("drq free/chip: %d/0x%x, dtq free/chip: %d/0x%x, " "hwslist: 0x%x", sc->drq_free, sc->drq_chip, sc->dtq_free, sc->dtq_chip, sc->hwslistp)); for (slot = 0 ; slot < EN_NTX ; slot++) { sc->txslot[slot].bfree = EN_TXSZ * 1024; en_write(sc, MIDX_READPTR(slot), 0); en_write(sc, MIDX_DESCSTART(slot), 0); loc = sc->txslot[slot].cur = sc->txslot[slot].start; loc = loc - MID_RAMOFF; /* mask, cvt to words */ loc = rounddown2(loc, EN_TXSZ * 1024) >> 2; /* top 11 bits */ loc = loc >> MIDV_LOCTOPSHFT; en_write(sc, MIDX_PLACE(slot), MIDX_MKPLACE(en_k2sz(EN_TXSZ), loc)); DBG(sc, INIT, ("tx%d: place 0x%x", slot, (u_int)en_read(sc, MIDX_PLACE(slot)))); } for (vc = 0; vc < MID_N_VC; vc++) if (sc->vccs[vc] != NULL) en_loadvc(sc, sc->vccs[vc]); /* * enable! */ en_write(sc, MID_INTENA, MID_INT_TX | MID_INT_DMA_OVR | MID_INT_IDENT | MID_INT_LERR | MID_INT_DMA_ERR | MID_INT_DMA_RX | MID_INT_DMA_TX | MID_INT_SERVICE | MID_INT_SUNI | MID_INT_STATS); en_write(sc, MID_MAST_CSR, MID_SETIPL(sc->ipl) | MID_MCSR_ENDMA | MID_MCSR_ENTX | MID_MCSR_ENRX); } /*********************************************************************/ /* * Ioctls */ /* * en_ioctl: handle ioctl requests * * NOTE: if you add an ioctl to set txspeed, you should choose a new * TX channel/slot. Choose the one with the lowest sc->txslot[slot].nref * value, subtract one from sc->txslot[0].nref, add one to the * sc->txslot[slot].nref, set sc->txvc2slot[vci] = slot, and then set * txspeed[vci]. * * LOCK: unlocked, needed */ static int en_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct en_softc *sc = (struct en_softc *)ifp->if_softc; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; #endif struct ifreq *ifr = (struct ifreq *)data; struct atmio_vcctable *vtab; int error = 0; switch (cmd) { case SIOCSIFADDR: EN_LOCK(sc); ifp->if_flags |= IFF_UP; #if defined(INET) || defined(INET6) if (ifa->ifa_addr->sa_family == AF_INET || ifa->ifa_addr->sa_family == AF_INET6) { if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { en_reset_ul(sc); en_init(sc); } ifa->ifa_rtrequest = atm_rtrequest; /* ??? */ EN_UNLOCK(sc); break; } #endif /* INET */ if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { en_reset_ul(sc); en_init(sc); } EN_UNLOCK(sc); break; case SIOCSIFFLAGS: EN_LOCK(sc); if (ifp->if_flags & IFF_UP) { if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) en_init(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) en_reset_ul(sc); } EN_UNLOCK(sc); break; case SIOCSIFMTU: /* * Set the interface MTU. */ if (ifr->ifr_mtu > ATMMTU) { error = EINVAL; break; } ifp->if_mtu = ifr->ifr_mtu; break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd); break; case SIOCATMOPENVCC: /* kernel internal use */ error = en_open_vcc(sc, (struct atmio_openvcc *)data); break; case SIOCATMCLOSEVCC: /* kernel internal use */ error = en_close_vcc(sc, (struct atmio_closevcc *)data); break; case SIOCATMGETVCCS: /* internal netgraph use */ vtab = atm_getvccs((struct atmio_vcc **)sc->vccs, MID_N_VC, sc->vccs_open, &sc->en_mtx, 0); if (vtab == NULL) { error = ENOMEM; break; } *(void **)data = vtab; break; case SIOCATMGVCCS: /* return vcc table */ vtab = atm_getvccs((struct atmio_vcc **)sc->vccs, MID_N_VC, sc->vccs_open, &sc->en_mtx, 1); - error = copyout(vtab, ifr->ifr_data, sizeof(*vtab) + + error = copyout(vtab, ifr_data_get_ptr(ifr), sizeof(*vtab) + vtab->count * sizeof(vtab->vccs[0])); free(vtab, M_DEVBUF); break; default: error = EINVAL; break; } return (error); } /*********************************************************************/ /* * Sysctl's */ /* * Sysctl handler for internal statistics * * LOCK: unlocked, needed */ static int en_sysctl_istats(SYSCTL_HANDLER_ARGS) { struct en_softc *sc = arg1; uint32_t *ret; int error; ret = malloc(sizeof(sc->stats), M_TEMP, M_WAITOK); EN_LOCK(sc); bcopy(&sc->stats, ret, sizeof(sc->stats)); EN_UNLOCK(sc); error = SYSCTL_OUT(req, ret, sizeof(sc->stats)); free(ret, M_TEMP); return (error); } /*********************************************************************/ /* * Interrupts */ /* * Transmit interrupt handler * * check for tx complete, if detected then this means that some space * has come free on the card. we must account for it and arrange to * kick the channel to life (in case it is stalled waiting on the card). * * LOCK: locked, needed */ static uint32_t en_intr_tx(struct en_softc *sc, uint32_t reg) { uint32_t kick; uint32_t mask; uint32_t val; int chan; kick = 0; /* bitmask of channels to kick */ for (mask = 1, chan = 0; chan < EN_NTX; chan++, mask *= 2) { if (!(reg & MID_TXCHAN(chan))) continue; kick = kick | mask; /* current read pointer */ val = en_read(sc, MIDX_READPTR(chan)); /* as offset */ val = (val * sizeof(uint32_t)) + sc->txslot[chan].start; if (val > sc->txslot[chan].cur) sc->txslot[chan].bfree = val - sc->txslot[chan].cur; else sc->txslot[chan].bfree = (val + (EN_TXSZ * 1024)) - sc->txslot[chan].cur; DBG(sc, INTR, ("tx%d: transmit done. %d bytes now free in " "buffer", chan, sc->txslot[chan].bfree)); } return (kick); } /* * TX DMA interrupt * * check for TX DMA complete, if detected then this means * that some DTQs are now free. it also means some indma * mbufs can be freed. if we needed DTQs, kick all channels. * * LOCK: locked, needed */ static uint32_t en_intr_tx_dma(struct en_softc *sc) { uint32_t kick = 0; uint32_t val; uint32_t idx; uint32_t slot; uint32_t dtq; struct en_map *map; struct mbuf *m; val = en_read(sc, MID_DMA_RDTX); /* chip's current location */ idx = MID_DTQ_A2REG(sc->dtq_chip); /* where we last saw chip */ if (sc->need_dtqs) { kick = MID_NTX_CH - 1; /* assume power of 2, kick all! */ sc->need_dtqs = 0; /* recalculated in "kick" loop below */ DBG(sc, INTR, ("cleared need DTQ condition")); } while (idx != val) { sc->dtq_free++; if ((dtq = sc->dtq[idx]) != 0) { /* don't forget to zero it out when done */ sc->dtq[idx] = 0; slot = EN_DQ_SLOT(dtq); _IF_DEQUEUE(&sc->txslot[slot].indma, m); if (m == NULL) panic("enintr: dtqsync"); map = (void *)m->m_pkthdr.rcvif; uma_zfree(sc->map_zone, map); m_freem(m); sc->txslot[slot].mbsize -= EN_DQ_LEN(dtq); DBG(sc, INTR, ("tx%d: free %d dma bytes, mbsize now " "%d", slot, EN_DQ_LEN(dtq), sc->txslot[slot].mbsize)); } EN_WRAPADD(0, MID_DTQ_N, idx, 1); } sc->dtq_chip = MID_DTQ_REG2A(val); /* sync softc */ return (kick); } /* * Service interrupt * * LOCK: locked, needed */ static int en_intr_service(struct en_softc *sc) { uint32_t chip; uint32_t vci; int need_softserv = 0; struct en_vcc *vc; chip = MID_SL_REG2A(en_read(sc, MID_SERV_WRITE)); while (sc->hwslistp != chip) { /* fetch and remove it from hardware service list */ vci = en_read(sc, sc->hwslistp); EN_WRAPADD(MID_SLOFF, MID_SLEND, sc->hwslistp, 4); if ((vc = sc->vccs[vci]) == NULL || (vc->vcc.flags & ATMIO_FLAG_NORX)) { DBG(sc, INTR, ("unexpected rx interrupt VCI %d", vci)); en_write(sc, MID_VC(vci), MIDV_TRASH); /* rx off */ continue; } /* remove from hwsl */ en_write(sc, MID_VC(vci), vc->rxslot->mode); EN_COUNT(sc->stats.hwpull); DBG(sc, INTR, ("pulled VCI %d off hwslist", vci)); /* add it to the software service list (if needed) */ if ((vc->vflags & VCC_SWSL) == 0) { EN_COUNT(sc->stats.swadd); need_softserv = 1; vc->vflags |= VCC_SWSL; sc->swslist[sc->swsl_tail] = vci; EN_WRAPADD(0, MID_SL_N, sc->swsl_tail, 1); sc->swsl_size++; DBG(sc, INTR, ("added VCI %d to swslist", vci)); } } return (need_softserv); } /* * Handle a receive DMA completion */ static void en_rx_drain(struct en_softc *sc, u_int drq) { struct en_rxslot *slot; struct en_vcc *vc; struct mbuf *m; struct atm_pseudohdr ah; slot = &sc->rxslot[EN_DQ_SLOT(drq)]; m = NULL; /* assume "JK" trash DMA */ if (EN_DQ_LEN(drq) != 0) { _IF_DEQUEUE(&slot->indma, m); KASSERT(m != NULL, ("drqsync: %s: lost mbuf in slot %td!", sc->ifp->if_xname, slot - sc->rxslot)); uma_zfree(sc->map_zone, (struct en_map *)m->m_pkthdr.rcvif); } if ((vc = slot->vcc) == NULL) { /* ups */ if (m != NULL) m_freem(m); return; } /* do something with this mbuf */ if (vc->vflags & VCC_DRAIN) { /* drain? */ if (m != NULL) m_freem(m); if (_IF_QLEN(&slot->indma) == 0 && _IF_QLEN(&slot->q) == 0 && (en_read(sc, MID_VC(vc->vcc.vci)) & MIDV_INSERVICE) == 0 && (vc->vflags & VCC_SWSL) == 0) { vc->vflags &= ~VCC_CLOSE_RX; if (vc->vcc.flags & ATMIO_FLAG_ASYNC) en_close_finish(sc, vc); else cv_signal(&sc->cv_close); } return; } if (m != NULL) { ATM_PH_FLAGS(&ah) = vc->vcc.flags; ATM_PH_VPI(&ah) = 0; ATM_PH_SETVCI(&ah, vc->vcc.vci); DBG(sc, INTR, ("rx%td: rxvci%d: atm_input, mbuf %p, len %d, " "hand %p", slot - sc->rxslot, vc->vcc.vci, m, EN_DQ_LEN(drq), vc->rxhand)); m->m_pkthdr.rcvif = sc->ifp; if_inc_counter(sc->ifp, IFCOUNTER_IPACKETS, 1); vc->ipackets++; vc->ibytes += m->m_pkthdr.len; #ifdef EN_DEBUG if (sc->debug & DBG_IPACKETS) en_dump_packet(sc, m); #endif #ifdef ENABLE_BPF BPF_MTAP(sc->ifp, m); #endif EN_UNLOCK(sc); atm_input(sc->ifp, &ah, m, vc->rxhand); EN_LOCK(sc); } } /* * check for RX DMA complete, and pass the data "upstairs" * * LOCK: locked, needed */ static int en_intr_rx_dma(struct en_softc *sc) { uint32_t val; uint32_t idx; uint32_t drq; val = en_read(sc, MID_DMA_RDRX); /* chip's current location */ idx = MID_DRQ_A2REG(sc->drq_chip); /* where we last saw chip */ while (idx != val) { sc->drq_free++; if ((drq = sc->drq[idx]) != 0) { /* don't forget to zero it out when done */ sc->drq[idx] = 0; en_rx_drain(sc, drq); } EN_WRAPADD(0, MID_DRQ_N, idx, 1); } sc->drq_chip = MID_DRQ_REG2A(val); /* sync softc */ if (sc->need_drqs) { /* true if we had a DRQ shortage */ sc->need_drqs = 0; DBG(sc, INTR, ("cleared need DRQ condition")); return (1); } else return (0); } /* * en_mget: get an mbuf chain that can hold totlen bytes and return it * (for recv). For the actual allocation totlen is rounded up to a multiple * of 4. We also ensure, that each mbuf has a multiple of 4 bytes. * * After this call the sum of all the m_len's in the chain will be totlen. * This is called at interrupt time, so we can't wait here. * * LOCK: any, not needed */ static struct mbuf * en_mget(struct en_softc *sc, u_int pktlen) { struct mbuf *m, *tmp; u_int totlen, pad; totlen = roundup(pktlen, sizeof(uint32_t)); pad = totlen - pktlen; /* * First get an mbuf with header. Keep space for a couple of * words at the begin. */ /* called from interrupt context */ MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) return (NULL); m->m_pkthdr.rcvif = NULL; m->m_pkthdr.len = pktlen; m->m_len = EN_RX1BUF; M_ALIGN(m, EN_RX1BUF); if (m->m_len >= totlen) { m->m_len = totlen; } else { totlen -= m->m_len; /* called from interrupt context */ tmp = m_getm(m, totlen, M_NOWAIT, MT_DATA); if (tmp == NULL) { m_free(m); return (NULL); } tmp = m->m_next; /* m_getm could do this for us */ while (tmp != NULL) { tmp->m_len = min(MCLBYTES, totlen); totlen -= tmp->m_len; tmp = tmp->m_next; } } return (m); } /* * Argument for RX DMAMAP loader. */ struct rxarg { struct en_softc *sc; struct mbuf *m; u_int pre_skip; /* number of bytes to skip at begin */ u_int post_skip; /* number of bytes to skip at end */ struct en_vcc *vc; /* vc we are receiving on */ int wait; /* wait for DRQ entries */ }; /* * Copy the segment table to the buffer for later use. And compute the * number of dma queue entries we need. * * LOCK: locked, needed */ static void en_rxdma_load(void *uarg, bus_dma_segment_t *segs, int nseg, bus_size_t mapsize, int error) { struct rxarg *rx = uarg; struct en_softc *sc = rx->sc; struct en_rxslot *slot = rx->vc->rxslot; u_int free; /* number of free DRQ entries */ uint32_t cur; /* current buffer offset */ uint32_t drq; /* DRQ entry pointer */ uint32_t last_drq; /* where we have written last */ u_int needalign, cnt, count, bcode; bus_addr_t addr; bus_size_t rest; int i; if (error != 0) return; if (nseg > EN_MAX_DMASEG) panic("too many DMA segments"); rx->wait = 0; free = sc->drq_free; drq = sc->drq_us; cur = slot->cur; last_drq = 0; /* * Local macro to add an entry to the receive DMA area. If there * are no entries left, return. Save the byte offset of the entry * in last_drq for later use. */ #define PUT_DRQ_ENTRY(ENI, BCODE, COUNT, ADDR) \ if (free == 0) { \ EN_COUNT(sc->stats.rxdrqout); \ rx->wait = 1; \ return; \ } \ last_drq = drq; \ en_write(sc, drq + 0, (ENI || !sc->is_adaptec) ? \ MID_MK_RXQ_ENI(COUNT, rx->vc->vcc.vci, 0, BCODE) : \ MID_MK_RXQ_ADP(COUNT, rx->vc->vcc.vci, 0, BCODE)); \ en_write(sc, drq + 4, ADDR); \ \ EN_WRAPADD(MID_DRQOFF, MID_DRQEND, drq, 8); \ free--; /* * Local macro to generate a DMA entry to DMA cnt bytes. Updates * the current buffer byte offset accordingly. */ #define DO_DRQ(TYPE) do { \ rest -= cnt; \ EN_WRAPADD(slot->start, slot->stop, cur, cnt); \ DBG(sc, SERV, ("rx%td: "TYPE" %u bytes, %ju left, cur %#x", \ slot - sc->rxslot, cnt, (uintmax_t)rest, cur)); \ \ PUT_DRQ_ENTRY(1, bcode, count, addr); \ \ addr += cnt; \ } while (0) /* * Skip the RBD at the beginning */ if (rx->pre_skip > 0) { /* update DMA address */ EN_WRAPADD(slot->start, slot->stop, cur, rx->pre_skip); PUT_DRQ_ENTRY(0, MIDDMA_JK, WORD_IDX(slot->start, cur), 0); } for (i = 0; i < nseg; i++, segs++) { addr = segs->ds_addr; rest = segs->ds_len; if (sc->is_adaptec) { /* adaptec card - simple */ /* advance the on-card buffer pointer */ EN_WRAPADD(slot->start, slot->stop, cur, rest); DBG(sc, SERV, ("rx%td: adp %ju bytes %#jx " "(cur now 0x%x)", slot - sc->rxslot, (uintmax_t)rest, (uintmax_t)addr, cur)); PUT_DRQ_ENTRY(0, 0, rest, addr); continue; } /* * do we need to do a DMA op to align to the maximum * burst? Note, that we are alway 32-bit aligned. */ if (sc->alburst && (needalign = (addr & sc->bestburstmask)) != 0) { /* compute number of bytes, words and code */ cnt = sc->bestburstlen - needalign; if (cnt > rest) cnt = rest; count = cnt / sizeof(uint32_t); if (sc->noalbursts) { bcode = MIDDMA_WORD; } else { bcode = en_dmaplan[count].bcode; count = cnt >> en_dmaplan[count].divshift; } DO_DRQ("al_dma"); } /* do we need to do a max-sized burst? */ if (rest >= sc->bestburstlen) { count = rest >> sc->bestburstshift; cnt = count << sc->bestburstshift; bcode = sc->bestburstcode; DO_DRQ("best_dma"); } /* do we need to do a cleanup burst? */ if (rest != 0) { cnt = rest; count = rest / sizeof(uint32_t); if (sc->noalbursts) { bcode = MIDDMA_WORD; } else { bcode = en_dmaplan[count].bcode; count = cnt >> en_dmaplan[count].divshift; } DO_DRQ("clean_dma"); } } /* * Skip stuff at the end */ if (rx->post_skip > 0) { /* update DMA address */ EN_WRAPADD(slot->start, slot->stop, cur, rx->post_skip); PUT_DRQ_ENTRY(0, MIDDMA_JK, WORD_IDX(slot->start, cur), 0); } /* record the end for the interrupt routine */ sc->drq[MID_DRQ_A2REG(last_drq)] = EN_DQ_MK(slot - sc->rxslot, rx->m->m_pkthdr.len); /* set the end flag in the last descriptor */ en_write(sc, last_drq + 0, SETQ_END(sc, en_read(sc, last_drq + 0))); #undef PUT_DRQ_ENTRY #undef DO_DRQ /* commit */ slot->cur = cur; sc->drq_free = free; sc->drq_us = drq; /* signal to card */ en_write(sc, MID_DMA_WRRX, MID_DRQ_A2REG(sc->drq_us)); } /* * en_service: handle a service interrupt * * Q: why do we need a software service list? * * A: if we remove a VCI from the hardware list and we find that we are * out of DRQs we must defer processing until some DRQs become free. * so we must remember to look at this RX VCI/slot later, but we can't * put it back on the hardware service list (since that isn't allowed). * so we instead save it on the software service list. it would be nice * if we could peek at the VCI on top of the hwservice list without removing * it, however this leads to a race condition: if we peek at it and * decide we are done with it new data could come in before we have a * chance to remove it from the hwslist. by the time we get it out of * the list the interrupt for the new data will be lost. oops! * * LOCK: locked, needed */ static void en_service(struct en_softc *sc) { struct mbuf *m, *lastm; struct en_map *map; struct rxarg rx; uint32_t cur; uint32_t dstart; /* data start (as reported by card) */ uint32_t rbd; /* receive buffer descriptor */ uint32_t pdu; /* AAL5 trailer */ int mlen; int error; struct en_rxslot *slot; struct en_vcc *vc; rx.sc = sc; next_vci: if (sc->swsl_size == 0) { DBG(sc, SERV, ("en_service done")); return; } /* * get vcc to service */ rx.vc = vc = sc->vccs[sc->swslist[sc->swsl_head]]; slot = vc->rxslot; KASSERT (slot->vcc->rxslot == slot, ("en_service: rx slot/vci sync")); /* * determine our mode and if we've got any work to do */ DBG(sc, SERV, ("rx%td: service vci=%d start/stop/cur=0x%x 0x%x " "0x%x", slot - sc->rxslot, vc->vcc.vci, slot->start, slot->stop, slot->cur)); same_vci: cur = slot->cur; dstart = MIDV_DSTART(en_read(sc, MID_DST_RP(vc->vcc.vci))); dstart = (dstart * sizeof(uint32_t)) + slot->start; /* check to see if there is any data at all */ if (dstart == cur) { EN_WRAPADD(0, MID_SL_N, sc->swsl_head, 1); /* remove from swslist */ vc->vflags &= ~VCC_SWSL; sc->swsl_size--; DBG(sc, SERV, ("rx%td: remove vci %d from swslist", slot - sc->rxslot, vc->vcc.vci)); goto next_vci; } /* * figure out how many bytes we need * [mlen = # bytes to go in mbufs] */ rbd = en_read(sc, cur); if (MID_RBD_ID(rbd) != MID_RBD_STDID) panic("en_service: id mismatch"); if (rbd & MID_RBD_T) { mlen = 0; /* we've got trash */ rx.pre_skip = MID_RBD_SIZE; rx.post_skip = 0; EN_COUNT(sc->stats.ttrash); DBG(sc, SERV, ("RX overflow lost %d cells!", MID_RBD_CNT(rbd))); } else if (vc->vcc.aal != ATMIO_AAL_5) { /* 1 cell (ick!) */ mlen = MID_CHDR_SIZE + MID_ATMDATASZ; rx.pre_skip = MID_RBD_SIZE; rx.post_skip = 0; } else { rx.pre_skip = MID_RBD_SIZE; /* get PDU trailer in correct byte order */ pdu = cur + MID_RBD_CNT(rbd) * MID_ATMDATASZ + MID_RBD_SIZE - MID_PDU_SIZE; if (pdu >= slot->stop) pdu -= EN_RXSZ * 1024; pdu = en_read(sc, pdu); if (MID_RBD_CNT(rbd) * MID_ATMDATASZ < MID_PDU_LEN(pdu)) { device_printf(sc->dev, "invalid AAL5 length\n"); rx.post_skip = MID_RBD_CNT(rbd) * MID_ATMDATASZ; mlen = 0; if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1); } else if (rbd & MID_RBD_CRCERR) { device_printf(sc->dev, "CRC error\n"); rx.post_skip = MID_RBD_CNT(rbd) * MID_ATMDATASZ; mlen = 0; if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1); } else { mlen = MID_PDU_LEN(pdu); rx.post_skip = MID_RBD_CNT(rbd) * MID_ATMDATASZ - mlen; } } /* * now allocate mbufs for mlen bytes of data, if out of mbufs, trash all * * notes: * 1. it is possible that we've already allocated an mbuf for this pkt * but ran out of DRQs, in which case we saved the allocated mbuf * on "q". * 2. if we save an buf in "q" we store the "cur" (pointer) in the * buf as an identity (that we can check later). * 3. after this block of code, if m is still NULL then we ran out of * mbufs */ _IF_DEQUEUE(&slot->q, m); if (m != NULL) { if (m->m_pkthdr.csum_data != cur) { /* wasn't ours */ DBG(sc, SERV, ("rx%td: q'ed buf %p not ours", slot - sc->rxslot, m)); _IF_PREPEND(&slot->q, m); m = NULL; EN_COUNT(sc->stats.rxqnotus); } else { EN_COUNT(sc->stats.rxqus); DBG(sc, SERV, ("rx%td: recovered q'ed buf %p", slot - sc->rxslot, m)); } } if (mlen == 0 && m != NULL) { /* should not happen */ m_freem(m); m = NULL; } if (mlen != 0 && m == NULL) { m = en_mget(sc, mlen); if (m == NULL) { rx.post_skip += mlen; mlen = 0; EN_COUNT(sc->stats.rxmbufout); DBG(sc, SERV, ("rx%td: out of mbufs", slot - sc->rxslot)); } else rx.post_skip -= roundup(mlen, sizeof(uint32_t)) - mlen; DBG(sc, SERV, ("rx%td: allocate buf %p, mlen=%d", slot - sc->rxslot, m, mlen)); } DBG(sc, SERV, ("rx%td: VCI %d, rbuf %p, mlen %d, skip %u/%u", slot - sc->rxslot, vc->vcc.vci, m, mlen, rx.pre_skip, rx.post_skip)); if (m != NULL) { /* M_NOWAIT - called from interrupt context */ map = uma_zalloc_arg(sc->map_zone, sc, M_NOWAIT); if (map == NULL) { rx.post_skip += mlen; m_freem(m); DBG(sc, SERV, ("rx%td: out of maps", slot - sc->rxslot)); goto skip; } rx.m = m; error = bus_dmamap_load_mbuf(sc->txtag, map->map, m, en_rxdma_load, &rx, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->dev, "loading RX map failed " "%d\n", error); uma_zfree(sc->map_zone, map); m_freem(m); rx.post_skip += mlen; goto skip; } map->flags |= ENMAP_LOADED; if (rx.wait) { /* out of DRQs - wait */ uma_zfree(sc->map_zone, map); m->m_pkthdr.csum_data = cur; _IF_ENQUEUE(&slot->q, m); EN_COUNT(sc->stats.rxdrqout); sc->need_drqs = 1; /* flag condition */ return; } (void)m_length(m, &lastm); lastm->m_len -= roundup(mlen, sizeof(uint32_t)) - mlen; m->m_pkthdr.rcvif = (void *)map; _IF_ENQUEUE(&slot->indma, m); /* get next packet in this slot */ goto same_vci; } skip: /* * Here we end if we should drop the packet from the receive buffer. * The number of bytes to drop is in fill. We can do this with on * JK entry. If we don't even have that one - wait. */ if (sc->drq_free == 0) { sc->need_drqs = 1; /* flag condition */ return; } rx.post_skip += rx.pre_skip; DBG(sc, SERV, ("rx%td: skipping %u", slot - sc->rxslot, rx.post_skip)); /* advance buffer address */ EN_WRAPADD(slot->start, slot->stop, cur, rx.post_skip); /* write DRQ entry */ if (sc->is_adaptec) en_write(sc, sc->drq_us, MID_MK_RXQ_ADP(WORD_IDX(slot->start, cur), vc->vcc.vci, MID_DMA_END, MIDDMA_JK)); else en_write(sc, sc->drq_us, MID_MK_RXQ_ENI(WORD_IDX(slot->start, cur), vc->vcc.vci, MID_DMA_END, MIDDMA_JK)); en_write(sc, sc->drq_us + 4, 0); EN_WRAPADD(MID_DRQOFF, MID_DRQEND, sc->drq_us, 8); sc->drq_free--; /* signal to RX interrupt */ sc->drq[MID_DRQ_A2REG(sc->drq_us)] = EN_DQ_MK(slot - sc->rxslot, 0); slot->cur = cur; /* signal to card */ en_write(sc, MID_DMA_WRRX, MID_DRQ_A2REG(sc->drq_us)); goto same_vci; } /* * interrupt handler * * LOCK: unlocked, needed */ void en_intr(void *arg) { struct en_softc *sc = arg; uint32_t reg, kick, mask; int lcv, need_softserv; EN_LOCK(sc); reg = en_read(sc, MID_INTACK); DBG(sc, INTR, ("interrupt=0x%b", reg, MID_INTBITS)); if ((reg & MID_INT_ANY) == 0) { EN_UNLOCK(sc); return; } /* * unexpected errors that need a reset */ if ((reg & (MID_INT_IDENT | MID_INT_LERR | MID_INT_DMA_ERR)) != 0) { device_printf(sc->dev, "unexpected interrupt=0x%b, " "resetting\n", reg, MID_INTBITS); #ifdef EN_DEBUG panic("en: unexpected error"); #else en_reset_ul(sc); en_init(sc); #endif EN_UNLOCK(sc); return; } if (reg & MID_INT_SUNI) utopia_intr(&sc->utopia); kick = 0; if (reg & MID_INT_TX) kick |= en_intr_tx(sc, reg); if (reg & MID_INT_DMA_TX) kick |= en_intr_tx_dma(sc); /* * kick xmit channels as needed. */ if (kick) { DBG(sc, INTR, ("tx kick mask = 0x%x", kick)); for (mask = 1, lcv = 0 ; lcv < EN_NTX ; lcv++, mask = mask * 2) if ((kick & mask) && _IF_QLEN(&sc->txslot[lcv].q) != 0) en_txdma(sc, &sc->txslot[lcv]); } need_softserv = 0; if (reg & MID_INT_DMA_RX) need_softserv |= en_intr_rx_dma(sc); if (reg & MID_INT_SERVICE) need_softserv |= en_intr_service(sc); if (need_softserv) en_service(sc); /* * keep our stats */ if (reg & MID_INT_DMA_OVR) { EN_COUNT(sc->stats.dmaovr); DBG(sc, INTR, ("MID_INT_DMA_OVR")); } reg = en_read(sc, MID_STAT); sc->stats.otrash += MID_OTRASH(reg); sc->stats.vtrash += MID_VTRASH(reg); EN_UNLOCK(sc); } /* * Read at most n SUNI regs starting at reg into val */ static int en_utopia_readregs(struct ifatm *ifatm, u_int reg, uint8_t *val, u_int *n) { struct en_softc *sc = ifatm->ifp->if_softc; u_int i; EN_CHECKLOCK(sc); if (reg >= MID_NSUNI) return (EINVAL); if (reg + *n > MID_NSUNI) *n = MID_NSUNI - reg; for (i = 0; i < *n; i++) val[i] = en_read(sc, MID_SUNIOFF + 4 * (reg + i)); return (0); } /* * change the bits given by mask to them in val in register reg */ static int en_utopia_writereg(struct ifatm *ifatm, u_int reg, u_int mask, u_int val) { struct en_softc *sc = ifatm->ifp->if_softc; uint32_t regval; EN_CHECKLOCK(sc); if (reg >= MID_NSUNI) return (EINVAL); regval = en_read(sc, MID_SUNIOFF + 4 * reg); regval = (regval & ~mask) | (val & mask); en_write(sc, MID_SUNIOFF + 4 * reg, regval); return (0); } static const struct utopia_methods en_utopia_methods = { en_utopia_readregs, en_utopia_writereg }; /*********************************************************************/ /* * Probing the DMA brokeness of the card */ /* * Physical address load helper function for DMA probe * * LOCK: unlocked, not needed */ static void en_dmaprobe_load(void *uarg, bus_dma_segment_t *segs, int nseg, int error) { if (error == 0) *(bus_addr_t *)uarg = segs[0].ds_addr; } /* * en_dmaprobe: helper function for en_attach. * * see how the card handles DMA by running a few DMA tests. we need * to figure out the largest number of bytes we can DMA in one burst * ("bestburstlen"), and if the starting address for a burst needs to * be aligned on any sort of boundary or not ("alburst"). * * Things turn out more complex than that, because on my (harti) brand * new motherboard (2.4GHz) we can do 64byte aligned DMAs, but everything * we more than 4 bytes fails (with an RX DMA timeout) for physical * addresses that end with 0xc. Therefor we search not only the largest * burst that is supported (hopefully 64) but also check what is the largerst * unaligned supported size. If that appears to be lesser than 4 words, * set the noalbursts flag. That will be set only if also alburst is set. */ /* * en_dmaprobe_doit: do actual testing for the DMA test. * Cycle through all bursts sizes from 8 up to 64 and try whether it works. * Return the largest one that works. * * LOCK: unlocked, not needed */ static int en_dmaprobe_doit(struct en_softc *sc, uint8_t *sp, bus_addr_t psp) { uint8_t *dp = sp + MIDDMA_MAXBURST; bus_addr_t pdp = psp + MIDDMA_MAXBURST; int lcv, retval = 4, cnt; uint32_t reg, bcode, midvloc; if (sc->en_busreset) sc->en_busreset(sc); en_write(sc, MID_RESID, 0x0); /* reset card before touching RAM */ /* * set up a 1k buffer at MID_BUFOFF */ midvloc = ((MID_BUFOFF - MID_RAMOFF) / sizeof(uint32_t)) >> MIDV_LOCTOPSHFT; en_write(sc, MIDX_PLACE(0), MIDX_MKPLACE(en_k2sz(1), midvloc)); en_write(sc, MID_VC(0), (midvloc << MIDV_LOCSHIFT) | (en_k2sz(1) << MIDV_SZSHIFT) | MIDV_TRASH); en_write(sc, MID_DST_RP(0), 0); en_write(sc, MID_WP_ST_CNT(0), 0); /* set up sample data */ for (lcv = 0 ; lcv < MIDDMA_MAXBURST; lcv++) sp[lcv] = lcv + 1; /* enable DMA (only) */ en_write(sc, MID_MAST_CSR, MID_MCSR_ENDMA); sc->drq_chip = MID_DRQ_REG2A(en_read(sc, MID_DMA_RDRX)); sc->dtq_chip = MID_DTQ_REG2A(en_read(sc, MID_DMA_RDTX)); /* * try it now . . . DMA it out, then DMA it back in and compare * * note: in order to get the dma stuff to reverse directions it wants * the "end" flag set! since we are not dma'ing valid data we may * get an ident mismatch interrupt (which we will ignore). */ DBG(sc, DMA, ("test sp=%p/%#lx, dp=%p/%#lx", sp, (u_long)psp, dp, (u_long)pdp)); for (lcv = 8 ; lcv <= MIDDMA_MAXBURST ; lcv = lcv * 2) { DBG(sc, DMA, ("test lcv=%d", lcv)); /* zero SRAM and dest buffer */ bus_space_set_region_4(sc->en_memt, sc->en_base, MID_BUFOFF, 0, 1024 / 4); bzero(dp, MIDDMA_MAXBURST); bcode = en_sz2b(lcv); /* build lcv-byte-DMA x NBURSTS */ if (sc->is_adaptec) en_write(sc, sc->dtq_chip, MID_MK_TXQ_ADP(lcv, 0, MID_DMA_END, 0)); else en_write(sc, sc->dtq_chip, MID_MK_TXQ_ENI(1, 0, MID_DMA_END, bcode)); en_write(sc, sc->dtq_chip + 4, psp); EN_WRAPADD(MID_DTQOFF, MID_DTQEND, sc->dtq_chip, 8); en_write(sc, MID_DMA_WRTX, MID_DTQ_A2REG(sc->dtq_chip)); cnt = 1000; while ((reg = en_readx(sc, MID_DMA_RDTX)) != MID_DTQ_A2REG(sc->dtq_chip)) { DELAY(1); if (--cnt == 0) { DBG(sc, DMA, ("unexpected timeout in tx " "DMA test\n alignment=0x%lx, burst size=%d" ", dma addr reg=%#x, rdtx=%#x, stat=%#x\n", (u_long)sp & 63, lcv, en_read(sc, MID_DMA_ADDR), reg, en_read(sc, MID_INTSTAT))); return (retval); } } reg = en_read(sc, MID_INTACK); if ((reg & MID_INT_DMA_TX) != MID_INT_DMA_TX) { DBG(sc, DMA, ("unexpected status in tx DMA test: %#x\n", reg)); return (retval); } /* re-enable DMA (only) */ en_write(sc, MID_MAST_CSR, MID_MCSR_ENDMA); /* "return to sender..." address is known ... */ /* build lcv-byte-DMA x NBURSTS */ if (sc->is_adaptec) en_write(sc, sc->drq_chip, MID_MK_RXQ_ADP(lcv, 0, MID_DMA_END, 0)); else en_write(sc, sc->drq_chip, MID_MK_RXQ_ENI(1, 0, MID_DMA_END, bcode)); en_write(sc, sc->drq_chip + 4, pdp); EN_WRAPADD(MID_DRQOFF, MID_DRQEND, sc->drq_chip, 8); en_write(sc, MID_DMA_WRRX, MID_DRQ_A2REG(sc->drq_chip)); cnt = 1000; while ((reg = en_readx(sc, MID_DMA_RDRX)) != MID_DRQ_A2REG(sc->drq_chip)) { DELAY(1); cnt--; if (--cnt == 0) { DBG(sc, DMA, ("unexpected timeout in rx " "DMA test, rdrx=%#x\n", reg)); return (retval); } } reg = en_read(sc, MID_INTACK); if ((reg & MID_INT_DMA_RX) != MID_INT_DMA_RX) { DBG(sc, DMA, ("unexpected status in rx DMA " "test: 0x%x\n", reg)); return (retval); } if (bcmp(sp, dp, lcv)) { DBG(sc, DMA, ("DMA test failed! lcv=%d, sp=%p, " "dp=%p", lcv, sp, dp)); return (retval); } retval = lcv; } return (retval); /* studly 64 byte DMA present! oh baby!! */ } /* * Find the best DMA parameters * * LOCK: unlocked, not needed */ static void en_dmaprobe(struct en_softc *sc) { bus_dma_tag_t tag; bus_dmamap_t map; int err; void *buffer; int bestalgn, lcv, try, bestnoalgn; bus_addr_t phys; uint8_t *addr; sc->alburst = 0; sc->noalbursts = 0; /* * Allocate some DMA-able memory. * We need 3 times the max burst size aligned to the max burst size. */ err = bus_dma_tag_create(bus_get_dma_tag(sc->dev), MIDDMA_MAXBURST, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, 3 * MIDDMA_MAXBURST, 1, 3 * MIDDMA_MAXBURST, 0, NULL, NULL, &tag); if (err) panic("%s: cannot create test DMA tag %d", __func__, err); err = bus_dmamem_alloc(tag, &buffer, 0, &map); if (err) panic("%s: cannot allocate test DMA memory %d", __func__, err); err = bus_dmamap_load(tag, map, buffer, 3 * MIDDMA_MAXBURST, en_dmaprobe_load, &phys, BUS_DMA_NOWAIT); if (err) panic("%s: cannot load test DMA map %d", __func__, err); addr = buffer; DBG(sc, DMA, ("phys=%#lx addr=%p", (u_long)phys, addr)); /* * Now get the best burst size of the aligned case. */ bestalgn = bestnoalgn = en_dmaprobe_doit(sc, addr, phys); /* * Now try unaligned. */ for (lcv = 4; lcv < MIDDMA_MAXBURST; lcv += 4) { try = en_dmaprobe_doit(sc, addr + lcv, phys + lcv); if (try < bestnoalgn) bestnoalgn = try; } if (bestnoalgn < bestalgn) { sc->alburst = 1; if (bestnoalgn < 32) sc->noalbursts = 1; } sc->bestburstlen = bestalgn; sc->bestburstshift = en_log2(bestalgn); sc->bestburstmask = sc->bestburstlen - 1; /* must be power of 2 */ sc->bestburstcode = en_sz2b(bestalgn); /* * Reset the chip before freeing the buffer. It may still be trying * to DMA. */ if (sc->en_busreset) sc->en_busreset(sc); en_write(sc, MID_RESID, 0x0); /* reset card before touching RAM */ DELAY(10000); /* may still do DMA */ /* * Free the DMA stuff */ bus_dmamap_unload(tag, map); bus_dmamem_free(tag, buffer, map); bus_dma_tag_destroy(tag); } /*********************************************************************/ /* * Attach/detach. */ /* * Attach to the card. * * LOCK: unlocked, not needed (but initialized) */ int en_attach(struct en_softc *sc) { struct ifnet *ifp = sc->ifp; int sz; uint32_t reg, lcv, check, ptr, sav, midvloc; #ifdef EN_DEBUG sc->debug = EN_DEBUG; #endif /* * Probe card to determine memory size. * * The stupid ENI card always reports to PCI that it needs 4MB of * space (2MB regs and 2MB RAM). If it has less than 2MB RAM the * addresses wrap in the RAM address space (i.e. on a 512KB card * addresses 0x3ffffc, 0x37fffc, and 0x2ffffc are aliases for * 0x27fffc [note that RAM starts at offset 0x200000]). */ /* reset card before touching RAM */ if (sc->en_busreset) sc->en_busreset(sc); en_write(sc, MID_RESID, 0x0); for (lcv = MID_PROBEOFF; lcv <= MID_MAXOFF ; lcv += MID_PROBSIZE) { en_write(sc, lcv, lcv); /* data[address] = address */ for (check = MID_PROBEOFF; check < lcv ;check += MID_PROBSIZE) { reg = en_read(sc, check); if (reg != check) /* found an alias! - quit */ goto done_probe; } } done_probe: lcv -= MID_PROBSIZE; /* take one step back */ sc->en_obmemsz = (lcv + 4) - MID_RAMOFF; /* * determine the largest DMA burst supported */ en_dmaprobe(sc); /* * "hello world" */ /* reset */ if (sc->en_busreset) sc->en_busreset(sc); en_write(sc, MID_RESID, 0x0); /* reset */ /* zero memory */ bus_space_set_region_4(sc->en_memt, sc->en_base, MID_RAMOFF, 0, sc->en_obmemsz / 4); reg = en_read(sc, MID_RESID); device_printf(sc->dev, "ATM midway v%d, board IDs %d.%d, %s%s%s, " "%ldKB on-board RAM\n", MID_VER(reg), MID_MID(reg), MID_DID(reg), (MID_IS_SABRE(reg)) ? "sabre controller, " : "", (MID_IS_SUNI(reg)) ? "SUNI" : "Utopia", (!MID_IS_SUNI(reg) && MID_IS_UPIPE(reg)) ? " (pipelined)" : "", (long)sc->en_obmemsz / 1024); /* * fill in common ATM interface stuff */ IFP2IFATM(sc->ifp)->mib.hw_version = (MID_VER(reg) << 16) | (MID_MID(reg) << 8) | MID_DID(reg); if (MID_DID(reg) & 0x4) IFP2IFATM(sc->ifp)->mib.media = IFM_ATM_UTP_155; else IFP2IFATM(sc->ifp)->mib.media = IFM_ATM_MM_155; IFP2IFATM(sc->ifp)->mib.pcr = ATM_RATE_155M; IFP2IFATM(sc->ifp)->mib.vpi_bits = 0; IFP2IFATM(sc->ifp)->mib.vci_bits = MID_VCI_BITS; IFP2IFATM(sc->ifp)->mib.max_vccs = MID_N_VC; IFP2IFATM(sc->ifp)->mib.max_vpcs = 0; if (sc->is_adaptec) { IFP2IFATM(sc->ifp)->mib.device = ATM_DEVICE_ADP155P; if (sc->bestburstlen == 64 && sc->alburst == 0) device_printf(sc->dev, "passed 64 byte DMA test\n"); else device_printf(sc->dev, "FAILED DMA TEST: " "burst=%d, alburst=%d\n", sc->bestburstlen, sc->alburst); } else { IFP2IFATM(sc->ifp)->mib.device = ATM_DEVICE_ENI155P; device_printf(sc->dev, "maximum DMA burst length = %d " "bytes%s\n", sc->bestburstlen, sc->alburst ? sc->noalbursts ? " (no large bursts)" : " (must align)" : ""); } /* * link into network subsystem and prepare card */ sc->ifp->if_softc = sc; ifp->if_flags = IFF_SIMPLEX; ifp->if_ioctl = en_ioctl; ifp->if_start = en_start; mtx_init(&sc->en_mtx, device_get_nameunit(sc->dev), MTX_NETWORK_LOCK, MTX_DEF); cv_init(&sc->cv_close, "VC close"); /* * Make the sysctl tree */ sysctl_ctx_init(&sc->sysctl_ctx); if ((sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_hw_atm), OID_AUTO, device_get_nameunit(sc->dev), CTLFLAG_RD, 0, "")) == NULL) goto fail; if (SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "istats", CTLTYPE_OPAQUE | CTLFLAG_RD, sc, 0, en_sysctl_istats, "S", "internal statistics") == NULL) goto fail; #ifdef EN_DEBUG if (SYSCTL_ADD_UINT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "debug", CTLFLAG_RW , &sc->debug, 0, "") == NULL) goto fail; #endif IFP2IFATM(sc->ifp)->phy = &sc->utopia; utopia_attach(&sc->utopia, IFP2IFATM(sc->ifp), &sc->media, &sc->en_mtx, &sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), &en_utopia_methods); utopia_init_media(&sc->utopia); MGET(sc->padbuf, M_WAITOK, MT_DATA); bzero(sc->padbuf->m_data, MLEN); if (bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, EN_TXSZ * 1024, EN_MAX_DMASEG, EN_TXSZ * 1024, 0, NULL, NULL, &sc->txtag)) goto fail; sc->map_zone = uma_zcreate("en dma maps", sizeof(struct en_map), en_map_ctor, en_map_dtor, NULL, en_map_fini, UMA_ALIGN_PTR, UMA_ZONE_ZINIT); if (sc->map_zone == NULL) goto fail; uma_zone_set_max(sc->map_zone, EN_MAX_MAPS); /* * init softc */ sc->vccs = malloc(MID_N_VC * sizeof(sc->vccs[0]), M_DEVBUF, M_ZERO | M_WAITOK); sz = sc->en_obmemsz - (MID_BUFOFF - MID_RAMOFF); ptr = sav = MID_BUFOFF; ptr = roundup(ptr, EN_TXSZ * 1024); /* align */ sz = sz - (ptr - sav); if (EN_TXSZ*1024 * EN_NTX > sz) { device_printf(sc->dev, "EN_NTX/EN_TXSZ too big\n"); goto fail; } for (lcv = 0 ;lcv < EN_NTX ;lcv++) { sc->txslot[lcv].mbsize = 0; sc->txslot[lcv].start = ptr; ptr += (EN_TXSZ * 1024); sz -= (EN_TXSZ * 1024); sc->txslot[lcv].stop = ptr; sc->txslot[lcv].nref = 0; DBG(sc, INIT, ("tx%d: start 0x%x, stop 0x%x", lcv, sc->txslot[lcv].start, sc->txslot[lcv].stop)); } sav = ptr; ptr = roundup(ptr, EN_RXSZ * 1024); /* align */ sz = sz - (ptr - sav); sc->en_nrx = sz / (EN_RXSZ * 1024); if (sc->en_nrx <= 0) { device_printf(sc->dev, "EN_NTX/EN_TXSZ/EN_RXSZ too big\n"); goto fail; } /* * ensure that there is always one VC slot on the service list free * so that we can tell the difference between a full and empty list. */ if (sc->en_nrx >= MID_N_VC) sc->en_nrx = MID_N_VC - 1; for (lcv = 0 ; lcv < sc->en_nrx ; lcv++) { sc->rxslot[lcv].vcc = NULL; midvloc = sc->rxslot[lcv].start = ptr; ptr += (EN_RXSZ * 1024); sz -= (EN_RXSZ * 1024); sc->rxslot[lcv].stop = ptr; midvloc = midvloc - MID_RAMOFF; /* mask, cvt to words */ midvloc = rounddown2(midvloc, EN_RXSZ * 1024) >> 2; /* we only want the top 11 bits */ midvloc = midvloc >> MIDV_LOCTOPSHFT; midvloc = (midvloc & MIDV_LOCMASK) << MIDV_LOCSHIFT; sc->rxslot[lcv].mode = midvloc | (en_k2sz(EN_RXSZ) << MIDV_SZSHIFT) | MIDV_TRASH; DBG(sc, INIT, ("rx%d: start 0x%x, stop 0x%x, mode 0x%x", lcv, sc->rxslot[lcv].start, sc->rxslot[lcv].stop, sc->rxslot[lcv].mode)); } device_printf(sc->dev, "%d %dKB receive buffers, %d %dKB transmit " "buffers\n", sc->en_nrx, EN_RXSZ, EN_NTX, EN_TXSZ); device_printf(sc->dev, "end station identifier (mac address) " "%6D\n", IFP2IFATM(sc->ifp)->mib.esi, ":"); /* * Start SUNI stuff. This will call our readregs/writeregs * functions and these assume the lock to be held so we must get it * here. */ EN_LOCK(sc); utopia_start(&sc->utopia); utopia_reset(&sc->utopia); EN_UNLOCK(sc); /* * final commit */ atm_ifattach(ifp); #ifdef ENABLE_BPF bpfattach(ifp, DLT_ATM_RFC1483, sizeof(struct atmllc)); #endif return (0); fail: en_destroy(sc); return (-1); } /* * Free all internal resources. No access to bus resources here. * No locking required here (interrupt is already disabled). * * LOCK: unlocked, needed (but destroyed) */ void en_destroy(struct en_softc *sc) { u_int i; if (sc->utopia.state & UTP_ST_ATTACHED) { /* these assume the lock to be held */ EN_LOCK(sc); utopia_stop(&sc->utopia); utopia_detach(&sc->utopia); EN_UNLOCK(sc); } if (sc->vccs != NULL) { /* get rid of sticky VCCs */ for (i = 0; i < MID_N_VC; i++) if (sc->vccs[i] != NULL) uma_zfree(en_vcc_zone, sc->vccs[i]); free(sc->vccs, M_DEVBUF); } if (sc->padbuf != NULL) m_free(sc->padbuf); /* * Destroy the map zone before the tag (the fini function will * destroy the DMA maps using the tag) */ if (sc->map_zone != NULL) uma_zdestroy(sc->map_zone); if (sc->txtag != NULL) bus_dma_tag_destroy(sc->txtag); (void)sysctl_ctx_free(&sc->sysctl_ctx); cv_destroy(&sc->cv_close); mtx_destroy(&sc->en_mtx); } /* * Module loaded/unloaded */ int en_modevent(module_t mod __unused, int event, void *arg __unused) { switch (event) { case MOD_LOAD: en_vcc_zone = uma_zcreate("EN vccs", sizeof(struct en_vcc), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); if (en_vcc_zone == NULL) return (ENOMEM); break; case MOD_UNLOAD: uma_zdestroy(en_vcc_zone); break; } return (0); } /*********************************************************************/ /* * Debugging support */ #ifdef EN_DDBHOOK /* * functions we can call from ddb */ /* * en_dump: dump the state */ #define END_SWSL 0x00000040 /* swsl state */ #define END_DRQ 0x00000020 /* drq state */ #define END_DTQ 0x00000010 /* dtq state */ #define END_RX 0x00000008 /* rx state */ #define END_TX 0x00000004 /* tx state */ #define END_MREGS 0x00000002 /* registers */ #define END_STATS 0x00000001 /* dump stats */ #define END_BITS "\20\7SWSL\6DRQ\5DTQ\4RX\3TX\2MREGS\1STATS" static void en_dump_stats(const struct en_stats *s) { printf("en_stats:\n"); printf("\t%d/%d mfix (%d failed)\n", s->mfixaddr, s->mfixlen, s->mfixfail); printf("\t%d rx dma overflow interrupts\n", s->dmaovr); printf("\t%d times out of TX space and stalled\n", s->txoutspace); printf("\t%d times out of DTQs\n", s->txdtqout); printf("\t%d times launched a packet\n", s->launch); printf("\t%d times pulled the hw service list\n", s->hwpull); printf("\t%d times pushed a vci on the sw service list\n", s->swadd); printf("\t%d times RX pulled an mbuf from Q that wasn't ours\n", s->rxqnotus); printf("\t%d times RX pulled a good mbuf from Q\n", s->rxqus); printf("\t%d times ran out of DRQs\n", s->rxdrqout); printf("\t%d transmit packets dropped due to mbsize\n", s->txmbovr); printf("\t%d cells trashed due to turned off rxvc\n", s->vtrash); printf("\t%d cells trashed due to totally full buffer\n", s->otrash); printf("\t%d cells trashed due almost full buffer\n", s->ttrash); printf("\t%d rx mbuf allocation failures\n", s->rxmbufout); printf("\t%d times out of tx maps\n", s->txnomap); #ifdef NATM #ifdef NATM_STAT printf("\tnatmintr so_rcv: ok/drop cnt: %d/%d, ok/drop bytes: %d/%d\n", natm_sookcnt, natm_sodropcnt, natm_sookbytes, natm_sodropbytes); #endif #endif } static void en_dump_mregs(struct en_softc *sc) { u_int cnt; printf("mregs:\n"); printf("resid = 0x%x\n", en_read(sc, MID_RESID)); printf("interrupt status = 0x%b\n", (int)en_read(sc, MID_INTSTAT), MID_INTBITS); printf("interrupt enable = 0x%b\n", (int)en_read(sc, MID_INTENA), MID_INTBITS); printf("mcsr = 0x%b\n", (int)en_read(sc, MID_MAST_CSR), MID_MCSRBITS); printf("serv_write = [chip=%u] [us=%u]\n", en_read(sc, MID_SERV_WRITE), MID_SL_A2REG(sc->hwslistp)); printf("dma addr = 0x%x\n", en_read(sc, MID_DMA_ADDR)); printf("DRQ: chip[rd=0x%x,wr=0x%x], sc[chip=0x%x,us=0x%x]\n", MID_DRQ_REG2A(en_read(sc, MID_DMA_RDRX)), MID_DRQ_REG2A(en_read(sc, MID_DMA_WRRX)), sc->drq_chip, sc->drq_us); printf("DTQ: chip[rd=0x%x,wr=0x%x], sc[chip=0x%x,us=0x%x]\n", MID_DTQ_REG2A(en_read(sc, MID_DMA_RDTX)), MID_DTQ_REG2A(en_read(sc, MID_DMA_WRTX)), sc->dtq_chip, sc->dtq_us); printf(" unusal txspeeds:"); for (cnt = 0 ; cnt < MID_N_VC ; cnt++) if (sc->vccs[cnt]->txspeed) printf(" vci%d=0x%x", cnt, sc->vccs[cnt]->txspeed); printf("\n"); printf(" rxvc slot mappings:"); for (cnt = 0 ; cnt < MID_N_VC ; cnt++) if (sc->vccs[cnt]->rxslot != NULL) printf(" %d->%td", cnt, sc->vccs[cnt]->rxslot - sc->rxslot); printf("\n"); } static void en_dump_tx(struct en_softc *sc) { u_int slot; printf("tx:\n"); for (slot = 0 ; slot < EN_NTX; slot++) { printf("tx%d: start/stop/cur=0x%x/0x%x/0x%x [%d] ", slot, sc->txslot[slot].start, sc->txslot[slot].stop, sc->txslot[slot].cur, (sc->txslot[slot].cur - sc->txslot[slot].start) / 4); printf("mbsize=%d, bfree=%d\n", sc->txslot[slot].mbsize, sc->txslot[slot].bfree); printf("txhw: base_address=0x%x, size=%u, read=%u, " "descstart=%u\n", (u_int)MIDX_BASE(en_read(sc, MIDX_PLACE(slot))), MIDX_SZ(en_read(sc, MIDX_PLACE(slot))), en_read(sc, MIDX_READPTR(slot)), en_read(sc, MIDX_DESCSTART(slot))); } } static void en_dump_rx(struct en_softc *sc) { struct en_rxslot *slot; printf(" recv slots:\n"); for (slot = sc->rxslot ; slot < &sc->rxslot[sc->en_nrx]; slot++) { printf("rx%td: start/stop/cur=0x%x/0x%x/0x%x mode=0x%x ", slot - sc->rxslot, slot->start, slot->stop, slot->cur, slot->mode); if (slot->vcc != NULL) { printf("vci=%u\n", slot->vcc->vcc.vci); printf("RXHW: mode=0x%x, DST_RP=0x%x, WP_ST_CNT=0x%x\n", en_read(sc, MID_VC(slot->vcc->vcc.vci)), en_read(sc, MID_DST_RP(slot->vcc->vcc.vci)), en_read(sc, MID_WP_ST_CNT(slot->vcc->vcc.vci))); } } } /* * This is only correct for non-adaptec adapters */ static void en_dump_dtqs(struct en_softc *sc) { uint32_t ptr, reg; printf(" dtq [need_dtqs=%d,dtq_free=%d]:\n", sc->need_dtqs, sc->dtq_free); ptr = sc->dtq_chip; while (ptr != sc->dtq_us) { reg = en_read(sc, ptr); printf("\t0x%x=[%#x cnt=%d, chan=%d, end=%d, type=%d @ 0x%x]\n", sc->dtq[MID_DTQ_A2REG(ptr)], reg, MID_DMA_CNT(reg), MID_DMA_TXCHAN(reg), (reg & MID_DMA_END) != 0, MID_DMA_TYPE(reg), en_read(sc, ptr + 4)); EN_WRAPADD(MID_DTQOFF, MID_DTQEND, ptr, 8); } } static void en_dump_drqs(struct en_softc *sc) { uint32_t ptr, reg; printf(" drq [need_drqs=%d,drq_free=%d]:\n", sc->need_drqs, sc->drq_free); ptr = sc->drq_chip; while (ptr != sc->drq_us) { reg = en_read(sc, ptr); printf("\t0x%x=[cnt=%d, chan=%d, end=%d, type=%d @ 0x%x]\n", sc->drq[MID_DRQ_A2REG(ptr)], MID_DMA_CNT(reg), MID_DMA_RXVCI(reg), (reg & MID_DMA_END) != 0, MID_DMA_TYPE(reg), en_read(sc, ptr + 4)); EN_WRAPADD(MID_DRQOFF, MID_DRQEND, ptr, 8); } } /* Do not staticize - meant for calling from DDB! */ int en_dump(int unit, int level) { struct en_softc *sc; int lcv, cnt; devclass_t dc; int maxunit; dc = devclass_find("en"); if (dc == NULL) { printf("%s: can't find devclass!\n", __func__); return (0); } maxunit = devclass_get_maxunit(dc); for (lcv = 0 ; lcv < maxunit ; lcv++) { sc = devclass_get_softc(dc, lcv); if (sc == NULL) continue; if (unit != -1 && unit != lcv) continue; device_printf(sc->dev, "dumping device at level 0x%b\n", level, END_BITS); if (sc->dtq_us == 0) { printf("\n"); continue; } if (level & END_STATS) en_dump_stats(&sc->stats); if (level & END_MREGS) en_dump_mregs(sc); if (level & END_TX) en_dump_tx(sc); if (level & END_RX) en_dump_rx(sc); if (level & END_DTQ) en_dump_dtqs(sc); if (level & END_DRQ) en_dump_drqs(sc); if (level & END_SWSL) { printf(" swslist [size=%d]: ", sc->swsl_size); for (cnt = sc->swsl_head ; cnt != sc->swsl_tail ; cnt = (cnt + 1) % MID_SL_N) printf("0x%x ", sc->swslist[cnt]); printf("\n"); } } return (0); } /* * en_dumpmem: dump the memory * * Do not staticize - meant for calling from DDB! */ int en_dumpmem(int unit, int addr, int len) { struct en_softc *sc; uint32_t reg; devclass_t dc; dc = devclass_find("en"); if (dc == NULL) { printf("%s: can't find devclass\n", __func__); return (0); } sc = devclass_get_softc(dc, unit); if (sc == NULL) { printf("%s: invalid unit number: %d\n", __func__, unit); return (0); } addr = addr & ~3; if (addr < MID_RAMOFF || addr + len * 4 > MID_MAXOFF || len <= 0) { printf("invalid addr/len number: %d, %d\n", addr, len); return (0); } printf("dumping %d words starting at offset 0x%x\n", len, addr); while (len--) { reg = en_read(sc, addr); printf("mem[0x%x] = 0x%x\n", addr, reg); addr += 4; } return (0); } #endif Index: stable/11/sys/dev/fatm/if_fatm.c =================================================================== --- stable/11/sys/dev/fatm/if_fatm.c (revision 332287) +++ stable/11/sys/dev/fatm/if_fatm.c (revision 332288) @@ -1,3091 +1,3091 @@ /*- * Copyright (c) 2001-2003 * Fraunhofer Institute for Open Communication Systems (FhG Fokus). * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Author: Hartmut Brandt * * Fore PCA200E driver for NATM */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_natm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef ENABLE_BPF #include #endif #ifdef INET #include #include #endif #include #include #include #include #include #include #include #include #include #include devclass_t fatm_devclass; static const struct { uint16_t vid; uint16_t did; const char *name; } fatm_devs[] = { { 0x1127, 0x300, "FORE PCA200E" }, { 0, 0, NULL } }; static const struct rate { uint32_t ratio; uint32_t cell_rate; } rate_table[] = { #include }; #define RATE_TABLE_SIZE (sizeof(rate_table) / sizeof(rate_table[0])) SYSCTL_DECL(_hw_atm); MODULE_DEPEND(fatm, utopia, 1, 1, 1); static int fatm_utopia_readregs(struct ifatm *, u_int, uint8_t *, u_int *); static int fatm_utopia_writereg(struct ifatm *, u_int, u_int, u_int); static const struct utopia_methods fatm_utopia_methods = { fatm_utopia_readregs, fatm_utopia_writereg }; #define VC_OK(SC, VPI, VCI) \ (rounddown2(VPI, 1 << IFP2IFATM((SC)->ifp)->mib.vpi_bits) == 0 && \ (VCI) != 0 && rounddown2(VCI, 1 << IFP2IFATM((SC)->ifp)->mib.vci_bits) == 0) static int fatm_load_vc(struct fatm_softc *sc, struct card_vcc *vc); /* * Probing is easy: step trough the list of known vendor and device * ids and compare. If one is found - it's our. */ static int fatm_probe(device_t dev) { int i; for (i = 0; fatm_devs[i].name; i++) if (pci_get_vendor(dev) == fatm_devs[i].vid && pci_get_device(dev) == fatm_devs[i].did) { device_set_desc(dev, fatm_devs[i].name); return (BUS_PROBE_DEFAULT); } return (ENXIO); } /* * Function called at completion of a SUNI writeregs/readregs command. * This is called from the interrupt handler while holding the softc lock. * We use the queue entry as the randevouze point. */ static void fatm_utopia_writeregs_complete(struct fatm_softc *sc, struct cmdqueue *q) { H_SYNCSTAT_POSTREAD(sc, q->q.statp); if(H_GETSTAT(q->q.statp) & FATM_STAT_ERROR) { sc->istats.suni_reg_errors++; q->error = EIO; } wakeup(q); } /* * Write a SUNI register. The bits that are 1 in mask are written from val * into register reg. We wait for the command to complete by sleeping on * the register memory. * * We assume, that we already hold the softc mutex. */ static int fatm_utopia_writereg(struct ifatm *ifatm, u_int reg, u_int mask, u_int val) { int error; struct cmdqueue *q; struct fatm_softc *sc; sc = ifatm->ifp->if_softc; FATM_CHECKLOCK(sc); if (!(ifatm->ifp->if_drv_flags & IFF_DRV_RUNNING)) return (EIO); /* get queue element and fill it */ q = GET_QUEUE(sc->cmdqueue, struct cmdqueue, sc->cmdqueue.head); H_SYNCSTAT_POSTREAD(sc, q->q.statp); if (!(H_GETSTAT(q->q.statp) & FATM_STAT_FREE)) { sc->istats.cmd_queue_full++; return (EIO); } NEXT_QUEUE_ENTRY(sc->cmdqueue.head, FATM_CMD_QLEN); q->error = 0; q->cb = fatm_utopia_writeregs_complete; H_SETSTAT(q->q.statp, FATM_STAT_PENDING); H_SYNCSTAT_PREWRITE(sc, q->q.statp); WRITE4(sc, q->q.card + FATMOC_GETOC3_BUF, 0); BARRIER_W(sc); WRITE4(sc, q->q.card + FATMOC_OP, FATM_MAKE_SETOC3(reg, val, mask) | FATM_OP_INTERRUPT_SEL); BARRIER_W(sc); /* * Wait for the command to complete */ error = msleep(q, &sc->mtx, PZERO | PCATCH, "fatm_setreg", hz); switch(error) { case EWOULDBLOCK: error = EIO; break; case ERESTART: error = EINTR; break; case 0: error = q->error; break; } return (error); } /* * Function called at completion of a SUNI readregs command. * This is called from the interrupt handler while holding the softc lock. * We use reg_mem as the randevouze point. */ static void fatm_utopia_readregs_complete(struct fatm_softc *sc, struct cmdqueue *q) { H_SYNCSTAT_POSTREAD(sc, q->q.statp); if (H_GETSTAT(q->q.statp) & FATM_STAT_ERROR) { sc->istats.suni_reg_errors++; q->error = EIO; } wakeup(&sc->reg_mem); } /* * Read SUNI registers * * We use a preallocated buffer to read the registers. Therefor we need * to protect against multiple threads trying to read registers. We do this * with a condition variable and a flag. We wait for the command to complete by sleeping on * the register memory. * * We assume, that we already hold the softc mutex. */ static int fatm_utopia_readregs_internal(struct fatm_softc *sc) { int error, i; uint32_t *ptr; struct cmdqueue *q; /* get the buffer */ for (;;) { if (!(sc->ifp->if_drv_flags & IFF_DRV_RUNNING)) return (EIO); if (!(sc->flags & FATM_REGS_INUSE)) break; cv_wait(&sc->cv_regs, &sc->mtx); } sc->flags |= FATM_REGS_INUSE; q = GET_QUEUE(sc->cmdqueue, struct cmdqueue, sc->cmdqueue.head); H_SYNCSTAT_POSTREAD(sc, q->q.statp); if (!(H_GETSTAT(q->q.statp) & FATM_STAT_FREE)) { sc->istats.cmd_queue_full++; return (EIO); } NEXT_QUEUE_ENTRY(sc->cmdqueue.head, FATM_CMD_QLEN); q->error = 0; q->cb = fatm_utopia_readregs_complete; H_SETSTAT(q->q.statp, FATM_STAT_PENDING); H_SYNCSTAT_PREWRITE(sc, q->q.statp); bus_dmamap_sync(sc->reg_mem.dmat, sc->reg_mem.map, BUS_DMASYNC_PREREAD); WRITE4(sc, q->q.card + FATMOC_GETOC3_BUF, sc->reg_mem.paddr); BARRIER_W(sc); WRITE4(sc, q->q.card + FATMOC_OP, FATM_OP_OC3_GET_REG | FATM_OP_INTERRUPT_SEL); BARRIER_W(sc); /* * Wait for the command to complete */ error = msleep(&sc->reg_mem, &sc->mtx, PZERO | PCATCH, "fatm_getreg", hz); switch(error) { case EWOULDBLOCK: error = EIO; break; case ERESTART: error = EINTR; break; case 0: bus_dmamap_sync(sc->reg_mem.dmat, sc->reg_mem.map, BUS_DMASYNC_POSTREAD); error = q->error; break; } if (error != 0) { /* declare buffer to be free */ sc->flags &= ~FATM_REGS_INUSE; cv_signal(&sc->cv_regs); return (error); } /* swap if needed */ ptr = (uint32_t *)sc->reg_mem.mem; for (i = 0; i < FATM_NREGS; i++) ptr[i] = le32toh(ptr[i]) & 0xff; return (0); } /* * Read SUNI registers for the SUNI module. * * We assume, that we already hold the mutex. */ static int fatm_utopia_readregs(struct ifatm *ifatm, u_int reg, uint8_t *valp, u_int *np) { int err; int i; struct fatm_softc *sc; if (reg >= FATM_NREGS) return (EINVAL); if (reg + *np > FATM_NREGS) *np = FATM_NREGS - reg; sc = ifatm->ifp->if_softc; FATM_CHECKLOCK(sc); err = fatm_utopia_readregs_internal(sc); if (err != 0) return (err); for (i = 0; i < *np; i++) valp[i] = ((uint32_t *)sc->reg_mem.mem)[reg + i]; /* declare buffer to be free */ sc->flags &= ~FATM_REGS_INUSE; cv_signal(&sc->cv_regs); return (0); } /* * Check whether the hard is beating. We remember the last heart beat and * compare it to the current one. If it appears stuck for 10 times, we have * a problem. * * Assume we hold the lock. */ static void fatm_check_heartbeat(struct fatm_softc *sc) { uint32_t h; FATM_CHECKLOCK(sc); h = READ4(sc, FATMO_HEARTBEAT); DBG(sc, BEAT, ("heartbeat %08x", h)); if (sc->stop_cnt == 10) return; if (h == sc->heartbeat) { if (++sc->stop_cnt == 10) { log(LOG_ERR, "i960 stopped???\n"); WRITE4(sc, FATMO_HIMR, 1); } return; } sc->stop_cnt = 0; sc->heartbeat = h; } /* * Ensure that the heart is still beating. */ static void fatm_watchdog(void *arg) { struct fatm_softc *sc; sc = arg; FATM_CHECKLOCK(sc); fatm_check_heartbeat(sc); callout_reset(&sc->watchdog_timer, hz * 5, fatm_watchdog, sc); } /* * Hard reset the i960 on the board. This is done by initializing registers, * clearing interrupts and waiting for the selftest to finish. Not sure, * whether all these barriers are actually needed. * * Assumes that we hold the lock. */ static int fatm_reset(struct fatm_softc *sc) { int w; uint32_t val; FATM_CHECKLOCK(sc); WRITE4(sc, FATMO_APP_BASE, FATMO_COMMON_ORIGIN); BARRIER_W(sc); WRITE4(sc, FATMO_UART_TO_960, XMIT_READY); BARRIER_W(sc); WRITE4(sc, FATMO_UART_TO_HOST, XMIT_READY); BARRIER_W(sc); WRITE4(sc, FATMO_BOOT_STATUS, COLD_START); BARRIER_W(sc); WRITE1(sc, FATMO_HCR, FATM_HCR_RESET); BARRIER_W(sc); DELAY(1000); WRITE1(sc, FATMO_HCR, 0); BARRIER_RW(sc); DELAY(1000); for (w = 100; w; w--) { BARRIER_R(sc); val = READ4(sc, FATMO_BOOT_STATUS); switch (val) { case SELF_TEST_OK: return (0); case SELF_TEST_FAIL: return (EIO); } DELAY(1000); } return (EIO); } /* * Stop the card. Must be called WITH the lock held * Reset, free transmit and receive buffers. Wakeup everybody who may sleep. */ static void fatm_stop(struct fatm_softc *sc) { int i; struct cmdqueue *q; struct rbuf *rb; struct txqueue *tx; uint32_t stat; FATM_CHECKLOCK(sc); /* Stop the board */ utopia_stop(&sc->utopia); (void)fatm_reset(sc); /* stop watchdog */ callout_stop(&sc->watchdog_timer); if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) { sc->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); ATMEV_SEND_IFSTATE_CHANGED(IFP2IFATM(sc->ifp), sc->utopia.carrier == UTP_CARR_OK); /* * Collect transmit mbufs, partial receive mbufs and * supplied mbufs */ for (i = 0; i < FATM_TX_QLEN; i++) { tx = GET_QUEUE(sc->txqueue, struct txqueue, i); if (tx->m) { bus_dmamap_unload(sc->tx_tag, tx->map); m_freem(tx->m); tx->m = NULL; } } /* Collect supplied mbufs */ while ((rb = LIST_FIRST(&sc->rbuf_used)) != NULL) { LIST_REMOVE(rb, link); bus_dmamap_unload(sc->rbuf_tag, rb->map); m_free(rb->m); rb->m = NULL; LIST_INSERT_HEAD(&sc->rbuf_free, rb, link); } /* Unwait any waiters */ wakeup(&sc->sadi_mem); /* wakeup all threads waiting for STAT or REG buffers */ cv_broadcast(&sc->cv_stat); cv_broadcast(&sc->cv_regs); sc->flags &= ~(FATM_STAT_INUSE | FATM_REGS_INUSE); /* wakeup all threads waiting on commands */ for (i = 0; i < FATM_CMD_QLEN; i++) { q = GET_QUEUE(sc->cmdqueue, struct cmdqueue, i); H_SYNCSTAT_POSTREAD(sc, q->q.statp); if ((stat = H_GETSTAT(q->q.statp)) != FATM_STAT_FREE) { H_SETSTAT(q->q.statp, stat | FATM_STAT_ERROR); H_SYNCSTAT_PREWRITE(sc, q->q.statp); wakeup(q); } } utopia_reset_media(&sc->utopia); } sc->small_cnt = sc->large_cnt = 0; /* Reset vcc info */ if (sc->vccs != NULL) { sc->open_vccs = 0; for (i = 0; i < FORE_MAX_VCC + 1; i++) { if (sc->vccs[i] != NULL) { if ((sc->vccs[i]->vflags & (FATM_VCC_OPEN | FATM_VCC_TRY_OPEN)) == 0) { uma_zfree(sc->vcc_zone, sc->vccs[i]); sc->vccs[i] = NULL; } else { sc->vccs[i]->vflags = 0; sc->open_vccs++; } } } } } /* * Load the firmware into the board and save the entry point. */ static uint32_t firmware_load(struct fatm_softc *sc) { struct firmware *fw = (struct firmware *)firmware; DBG(sc, INIT, ("loading - entry=%x", fw->entry)); bus_space_write_region_4(sc->memt, sc->memh, fw->offset, firmware, sizeof(firmware) / sizeof(firmware[0])); BARRIER_RW(sc); return (fw->entry); } /* * Read a character from the virtual UART. The availability of a character * is signaled by a non-null value of the 32 bit register. The eating of * the character by us is signalled to the card by setting that register * to zero. */ static int rx_getc(struct fatm_softc *sc) { int w = 50; int c; while (w--) { c = READ4(sc, FATMO_UART_TO_HOST); BARRIER_RW(sc); if (c != 0) { WRITE4(sc, FATMO_UART_TO_HOST, 0); DBGC(sc, UART, ("%c", c & 0xff)); return (c & 0xff); } DELAY(1000); } return (-1); } /* * Eat up characters from the board and stuff them in the bit-bucket. */ static void rx_flush(struct fatm_softc *sc) { int w = 10000; while (w-- && rx_getc(sc) >= 0) ; } /* * Write a character to the card. The UART is available if the register * is zero. */ static int tx_putc(struct fatm_softc *sc, u_char c) { int w = 10; int c1; while (w--) { c1 = READ4(sc, FATMO_UART_TO_960); BARRIER_RW(sc); if (c1 == 0) { WRITE4(sc, FATMO_UART_TO_960, c | CHAR_AVAIL); DBGC(sc, UART, ("%c", c & 0xff)); return (0); } DELAY(1000); } return (-1); } /* * Start the firmware. This is doing by issuing a 'go' command with * the hex entry address of the firmware. Then we wait for the self-test to * succeed. */ static int fatm_start_firmware(struct fatm_softc *sc, uint32_t start) { static char hex[] = "0123456789abcdef"; u_int w, val; DBG(sc, INIT, ("starting")); rx_flush(sc); tx_putc(sc, '\r'); DELAY(1000); rx_flush(sc); tx_putc(sc, 'g'); (void)rx_getc(sc); tx_putc(sc, 'o'); (void)rx_getc(sc); tx_putc(sc, ' '); (void)rx_getc(sc); tx_putc(sc, hex[(start >> 12) & 0xf]); (void)rx_getc(sc); tx_putc(sc, hex[(start >> 8) & 0xf]); (void)rx_getc(sc); tx_putc(sc, hex[(start >> 4) & 0xf]); (void)rx_getc(sc); tx_putc(sc, hex[(start >> 0) & 0xf]); (void)rx_getc(sc); tx_putc(sc, '\r'); rx_flush(sc); for (w = 100; w; w--) { BARRIER_R(sc); val = READ4(sc, FATMO_BOOT_STATUS); switch (val) { case CP_RUNNING: return (0); case SELF_TEST_FAIL: return (EIO); } DELAY(1000); } return (EIO); } /* * Initialize one card and host queue. */ static void init_card_queue(struct fatm_softc *sc, struct fqueue *queue, int qlen, size_t qel_size, size_t desc_size, cardoff_t off, u_char **statpp, uint32_t *cardstat, u_char *descp, uint32_t carddesc) { struct fqelem *el = queue->chunk; while (qlen--) { el->card = off; off += 8; /* size of card entry */ el->statp = (uint32_t *)(*statpp); (*statpp) += sizeof(uint32_t); H_SETSTAT(el->statp, FATM_STAT_FREE); H_SYNCSTAT_PREWRITE(sc, el->statp); WRITE4(sc, el->card + FATMOS_STATP, (*cardstat)); (*cardstat) += sizeof(uint32_t); el->ioblk = descp; descp += desc_size; el->card_ioblk = carddesc; carddesc += desc_size; el = (struct fqelem *)((u_char *)el + qel_size); } queue->tail = queue->head = 0; } /* * Issue the initialize operation to the card, wait for completion and * initialize the on-board and host queue structures with offsets and * addresses. */ static int fatm_init_cmd(struct fatm_softc *sc) { int w, c; u_char *statp; uint32_t card_stat; u_int cnt; struct fqelem *el; cardoff_t off; DBG(sc, INIT, ("command")); WRITE4(sc, FATMO_ISTAT, 0); WRITE4(sc, FATMO_IMASK, 1); WRITE4(sc, FATMO_HLOGGER, 0); WRITE4(sc, FATMO_INIT + FATMOI_RECEIVE_TRESHOLD, 0); WRITE4(sc, FATMO_INIT + FATMOI_NUM_CONNECT, FORE_MAX_VCC); WRITE4(sc, FATMO_INIT + FATMOI_CQUEUE_LEN, FATM_CMD_QLEN); WRITE4(sc, FATMO_INIT + FATMOI_TQUEUE_LEN, FATM_TX_QLEN); WRITE4(sc, FATMO_INIT + FATMOI_RQUEUE_LEN, FATM_RX_QLEN); WRITE4(sc, FATMO_INIT + FATMOI_RPD_EXTENSION, RPD_EXTENSIONS); WRITE4(sc, FATMO_INIT + FATMOI_TPD_EXTENSION, TPD_EXTENSIONS); /* * initialize buffer descriptors */ WRITE4(sc, FATMO_INIT + FATMOI_SMALL_B1 + FATMOB_QUEUE_LENGTH, SMALL_SUPPLY_QLEN); WRITE4(sc, FATMO_INIT + FATMOI_SMALL_B1 + FATMOB_BUFFER_SIZE, SMALL_BUFFER_LEN); WRITE4(sc, FATMO_INIT + FATMOI_SMALL_B1 + FATMOB_POOL_SIZE, SMALL_POOL_SIZE); WRITE4(sc, FATMO_INIT + FATMOI_SMALL_B1 + FATMOB_SUPPLY_BLKSIZE, SMALL_SUPPLY_BLKSIZE); WRITE4(sc, FATMO_INIT + FATMOI_LARGE_B1 + FATMOB_QUEUE_LENGTH, LARGE_SUPPLY_QLEN); WRITE4(sc, FATMO_INIT + FATMOI_LARGE_B1 + FATMOB_BUFFER_SIZE, LARGE_BUFFER_LEN); WRITE4(sc, FATMO_INIT + FATMOI_LARGE_B1 + FATMOB_POOL_SIZE, LARGE_POOL_SIZE); WRITE4(sc, FATMO_INIT + FATMOI_LARGE_B1 + FATMOB_SUPPLY_BLKSIZE, LARGE_SUPPLY_BLKSIZE); WRITE4(sc, FATMO_INIT + FATMOI_SMALL_B2 + FATMOB_QUEUE_LENGTH, 0); WRITE4(sc, FATMO_INIT + FATMOI_SMALL_B2 + FATMOB_BUFFER_SIZE, 0); WRITE4(sc, FATMO_INIT + FATMOI_SMALL_B2 + FATMOB_POOL_SIZE, 0); WRITE4(sc, FATMO_INIT + FATMOI_SMALL_B2 + FATMOB_SUPPLY_BLKSIZE, 0); WRITE4(sc, FATMO_INIT + FATMOI_LARGE_B2 + FATMOB_QUEUE_LENGTH, 0); WRITE4(sc, FATMO_INIT + FATMOI_LARGE_B2 + FATMOB_BUFFER_SIZE, 0); WRITE4(sc, FATMO_INIT + FATMOI_LARGE_B2 + FATMOB_POOL_SIZE, 0); WRITE4(sc, FATMO_INIT + FATMOI_LARGE_B2 + FATMOB_SUPPLY_BLKSIZE, 0); /* * Start the command */ BARRIER_W(sc); WRITE4(sc, FATMO_INIT + FATMOI_STATUS, FATM_STAT_PENDING); BARRIER_W(sc); WRITE4(sc, FATMO_INIT + FATMOI_OP, FATM_OP_INITIALIZE); BARRIER_W(sc); /* * Busy wait for completion */ w = 100; while (w--) { c = READ4(sc, FATMO_INIT + FATMOI_STATUS); BARRIER_R(sc); if (c & FATM_STAT_COMPLETE) break; DELAY(1000); } if (c & FATM_STAT_ERROR) return (EIO); /* * Initialize the queues */ statp = sc->stat_mem.mem; card_stat = sc->stat_mem.paddr; /* * Command queue. This is special in that it's on the card. */ el = sc->cmdqueue.chunk; off = READ4(sc, FATMO_COMMAND_QUEUE); DBG(sc, INIT, ("cmd queue=%x", off)); for (cnt = 0; cnt < FATM_CMD_QLEN; cnt++) { el = &((struct cmdqueue *)sc->cmdqueue.chunk + cnt)->q; el->card = off; off += 32; /* size of card structure */ el->statp = (uint32_t *)statp; statp += sizeof(uint32_t); H_SETSTAT(el->statp, FATM_STAT_FREE); H_SYNCSTAT_PREWRITE(sc, el->statp); WRITE4(sc, el->card + FATMOC_STATP, card_stat); card_stat += sizeof(uint32_t); } sc->cmdqueue.tail = sc->cmdqueue.head = 0; /* * Now the other queues. These are in memory */ init_card_queue(sc, &sc->txqueue, FATM_TX_QLEN, sizeof(struct txqueue), TPD_SIZE, READ4(sc, FATMO_TRANSMIT_QUEUE), &statp, &card_stat, sc->txq_mem.mem, sc->txq_mem.paddr); init_card_queue(sc, &sc->rxqueue, FATM_RX_QLEN, sizeof(struct rxqueue), RPD_SIZE, READ4(sc, FATMO_RECEIVE_QUEUE), &statp, &card_stat, sc->rxq_mem.mem, sc->rxq_mem.paddr); init_card_queue(sc, &sc->s1queue, SMALL_SUPPLY_QLEN, sizeof(struct supqueue), BSUP_BLK2SIZE(SMALL_SUPPLY_BLKSIZE), READ4(sc, FATMO_SMALL_B1_QUEUE), &statp, &card_stat, sc->s1q_mem.mem, sc->s1q_mem.paddr); init_card_queue(sc, &sc->l1queue, LARGE_SUPPLY_QLEN, sizeof(struct supqueue), BSUP_BLK2SIZE(LARGE_SUPPLY_BLKSIZE), READ4(sc, FATMO_LARGE_B1_QUEUE), &statp, &card_stat, sc->l1q_mem.mem, sc->l1q_mem.paddr); sc->txcnt = 0; return (0); } /* * Read PROM. Called only from attach code. Here we spin because the interrupt * handler is not yet set up. */ static int fatm_getprom(struct fatm_softc *sc) { int i; struct prom *prom; struct cmdqueue *q; DBG(sc, INIT, ("reading prom")); q = GET_QUEUE(sc->cmdqueue, struct cmdqueue, sc->cmdqueue.head); NEXT_QUEUE_ENTRY(sc->cmdqueue.head, FATM_CMD_QLEN); q->error = 0; q->cb = NULL; H_SETSTAT(q->q.statp, FATM_STAT_PENDING); H_SYNCSTAT_PREWRITE(sc, q->q.statp); bus_dmamap_sync(sc->prom_mem.dmat, sc->prom_mem.map, BUS_DMASYNC_PREREAD); WRITE4(sc, q->q.card + FATMOC_GPROM_BUF, sc->prom_mem.paddr); BARRIER_W(sc); WRITE4(sc, q->q.card + FATMOC_OP, FATM_OP_GET_PROM_DATA); BARRIER_W(sc); for (i = 0; i < 1000; i++) { H_SYNCSTAT_POSTREAD(sc, q->q.statp); if (H_GETSTAT(q->q.statp) & (FATM_STAT_COMPLETE | FATM_STAT_ERROR)) break; DELAY(1000); } if (i == 1000) { if_printf(sc->ifp, "getprom timeout\n"); return (EIO); } H_SYNCSTAT_POSTREAD(sc, q->q.statp); if (H_GETSTAT(q->q.statp) & FATM_STAT_ERROR) { if_printf(sc->ifp, "getprom error\n"); return (EIO); } H_SETSTAT(q->q.statp, FATM_STAT_FREE); H_SYNCSTAT_PREWRITE(sc, q->q.statp); NEXT_QUEUE_ENTRY(sc->cmdqueue.tail, FATM_CMD_QLEN); bus_dmamap_sync(sc->prom_mem.dmat, sc->prom_mem.map, BUS_DMASYNC_POSTREAD); #ifdef notdef { u_int i; printf("PROM: "); u_char *ptr = (u_char *)sc->prom_mem.mem; for (i = 0; i < sizeof(struct prom); i++) printf("%02x ", *ptr++); printf("\n"); } #endif prom = (struct prom *)sc->prom_mem.mem; bcopy(prom->mac + 2, IFP2IFATM(sc->ifp)->mib.esi, 6); IFP2IFATM(sc->ifp)->mib.serial = le32toh(prom->serial); IFP2IFATM(sc->ifp)->mib.hw_version = le32toh(prom->version); IFP2IFATM(sc->ifp)->mib.sw_version = READ4(sc, FATMO_FIRMWARE_RELEASE); if_printf(sc->ifp, "ESI=%02x:%02x:%02x:%02x:%02x:%02x " "serial=%u hw=0x%x sw=0x%x\n", IFP2IFATM(sc->ifp)->mib.esi[0], IFP2IFATM(sc->ifp)->mib.esi[1], IFP2IFATM(sc->ifp)->mib.esi[2], IFP2IFATM(sc->ifp)->mib.esi[3], IFP2IFATM(sc->ifp)->mib.esi[4], IFP2IFATM(sc->ifp)->mib.esi[5], IFP2IFATM(sc->ifp)->mib.serial, IFP2IFATM(sc->ifp)->mib.hw_version, IFP2IFATM(sc->ifp)->mib.sw_version); return (0); } /* * This is the callback function for bus_dmamap_load. We assume, that we * have a 32-bit bus and so have always one segment. */ static void dmaload_helper(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { bus_addr_t *ptr = (bus_addr_t *)arg; if (error != 0) { printf("%s: error=%d\n", __func__, error); return; } KASSERT(nsegs == 1, ("too many DMA segments")); KASSERT(segs[0].ds_addr <= 0xffffffff, ("DMA address too large %lx", (u_long)segs[0].ds_addr)); *ptr = segs[0].ds_addr; } /* * Allocate a chunk of DMA-able memory and map it. */ static int alloc_dma_memory(struct fatm_softc *sc, const char *nm, struct fatm_mem *mem) { int error; mem->mem = NULL; if (bus_dma_tag_create(sc->parent_dmat, mem->align, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, mem->size, 1, BUS_SPACE_MAXSIZE_32BIT, BUS_DMA_ALLOCNOW, NULL, NULL, &mem->dmat)) { if_printf(sc->ifp, "could not allocate %s DMA tag\n", nm); return (ENOMEM); } error = bus_dmamem_alloc(mem->dmat, &mem->mem, 0, &mem->map); if (error) { if_printf(sc->ifp, "could not allocate %s DMA memory: " "%d\n", nm, error); bus_dma_tag_destroy(mem->dmat); mem->mem = NULL; return (error); } error = bus_dmamap_load(mem->dmat, mem->map, mem->mem, mem->size, dmaload_helper, &mem->paddr, BUS_DMA_NOWAIT); if (error) { if_printf(sc->ifp, "could not load %s DMA memory: " "%d\n", nm, error); bus_dmamem_free(mem->dmat, mem->mem, mem->map); bus_dma_tag_destroy(mem->dmat); mem->mem = NULL; return (error); } DBG(sc, DMA, ("DMA %s V/P/S/Z %p/%lx/%x/%x", nm, mem->mem, (u_long)mem->paddr, mem->size, mem->align)); return (0); } #ifdef TEST_DMA_SYNC static int alloc_dma_memoryX(struct fatm_softc *sc, const char *nm, struct fatm_mem *mem) { int error; mem->mem = NULL; if (bus_dma_tag_create(NULL, mem->align, 0, BUS_SPACE_MAXADDR_24BIT, BUS_SPACE_MAXADDR, NULL, NULL, mem->size, 1, mem->size, BUS_DMA_ALLOCNOW, NULL, NULL, &mem->dmat)) { if_printf(sc->ifp, "could not allocate %s DMA tag\n", nm); return (ENOMEM); } mem->mem = contigmalloc(mem->size, M_DEVBUF, M_WAITOK, BUS_SPACE_MAXADDR_24BIT, BUS_SPACE_MAXADDR_32BIT, mem->align, 0); error = bus_dmamap_create(mem->dmat, 0, &mem->map); if (error) { if_printf(sc->ifp, "could not allocate %s DMA map: " "%d\n", nm, error); contigfree(mem->mem, mem->size, M_DEVBUF); bus_dma_tag_destroy(mem->dmat); mem->mem = NULL; return (error); } error = bus_dmamap_load(mem->dmat, mem->map, mem->mem, mem->size, dmaload_helper, &mem->paddr, BUS_DMA_NOWAIT); if (error) { if_printf(sc->ifp, "could not load %s DMA memory: " "%d\n", nm, error); bus_dmamap_destroy(mem->dmat, mem->map); contigfree(mem->mem, mem->size, M_DEVBUF); bus_dma_tag_destroy(mem->dmat); mem->mem = NULL; return (error); } DBG(sc, DMA, ("DMAX %s V/P/S/Z %p/%lx/%x/%x", nm, mem->mem, (u_long)mem->paddr, mem->size, mem->align)); printf("DMAX: %s V/P/S/Z %p/%lx/%x/%x", nm, mem->mem, (u_long)mem->paddr, mem->size, mem->align); return (0); } #endif /* TEST_DMA_SYNC */ /* * Destroy all resources of an dma-able memory chunk */ static void destroy_dma_memory(struct fatm_mem *mem) { if (mem->mem != NULL) { bus_dmamap_unload(mem->dmat, mem->map); bus_dmamem_free(mem->dmat, mem->mem, mem->map); bus_dma_tag_destroy(mem->dmat); mem->mem = NULL; } } #ifdef TEST_DMA_SYNC static void destroy_dma_memoryX(struct fatm_mem *mem) { if (mem->mem != NULL) { bus_dmamap_unload(mem->dmat, mem->map); bus_dmamap_destroy(mem->dmat, mem->map); contigfree(mem->mem, mem->size, M_DEVBUF); bus_dma_tag_destroy(mem->dmat); mem->mem = NULL; } } #endif /* TEST_DMA_SYNC */ /* * Try to supply buffers to the card if there are free entries in the queues */ static void fatm_supply_small_buffers(struct fatm_softc *sc) { int nblocks, nbufs; struct supqueue *q; struct rbd *bd; int i, j, error, cnt; struct mbuf *m; struct rbuf *rb; bus_addr_t phys; nbufs = max(4 * sc->open_vccs, 32); nbufs = min(nbufs, SMALL_POOL_SIZE); nbufs -= sc->small_cnt; nblocks = howmany(nbufs, SMALL_SUPPLY_BLKSIZE); for (cnt = 0; cnt < nblocks; cnt++) { q = GET_QUEUE(sc->s1queue, struct supqueue, sc->s1queue.head); H_SYNCSTAT_POSTREAD(sc, q->q.statp); if (H_GETSTAT(q->q.statp) != FATM_STAT_FREE) break; bd = (struct rbd *)q->q.ioblk; for (i = 0; i < SMALL_SUPPLY_BLKSIZE; i++) { if ((rb = LIST_FIRST(&sc->rbuf_free)) == NULL) { if_printf(sc->ifp, "out of rbufs\n"); break; } MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) { LIST_INSERT_HEAD(&sc->rbuf_free, rb, link); break; } M_ALIGN(m, SMALL_BUFFER_LEN); error = bus_dmamap_load(sc->rbuf_tag, rb->map, m->m_data, SMALL_BUFFER_LEN, dmaload_helper, &phys, BUS_DMA_NOWAIT); if (error) { if_printf(sc->ifp, "dmamap_load mbuf failed %d", error); m_freem(m); LIST_INSERT_HEAD(&sc->rbuf_free, rb, link); break; } bus_dmamap_sync(sc->rbuf_tag, rb->map, BUS_DMASYNC_PREREAD); LIST_REMOVE(rb, link); LIST_INSERT_HEAD(&sc->rbuf_used, rb, link); rb->m = m; bd[i].handle = rb - sc->rbufs; H_SETDESC(bd[i].buffer, phys); } if (i < SMALL_SUPPLY_BLKSIZE) { for (j = 0; j < i; j++) { rb = sc->rbufs + bd[j].handle; bus_dmamap_unload(sc->rbuf_tag, rb->map); m_free(rb->m); rb->m = NULL; LIST_REMOVE(rb, link); LIST_INSERT_HEAD(&sc->rbuf_free, rb, link); } break; } H_SYNCQ_PREWRITE(&sc->s1q_mem, bd, sizeof(struct rbd) * SMALL_SUPPLY_BLKSIZE); H_SETSTAT(q->q.statp, FATM_STAT_PENDING); H_SYNCSTAT_PREWRITE(sc, q->q.statp); WRITE4(sc, q->q.card, q->q.card_ioblk); BARRIER_W(sc); sc->small_cnt += SMALL_SUPPLY_BLKSIZE; NEXT_QUEUE_ENTRY(sc->s1queue.head, SMALL_SUPPLY_QLEN); } } /* * Try to supply buffers to the card if there are free entries in the queues * We assume that all buffers are within the address space accessible by the * card (32-bit), so we don't need bounce buffers. */ static void fatm_supply_large_buffers(struct fatm_softc *sc) { int nbufs, nblocks, cnt; struct supqueue *q; struct rbd *bd; int i, j, error; struct mbuf *m; struct rbuf *rb; bus_addr_t phys; nbufs = max(4 * sc->open_vccs, 32); nbufs = min(nbufs, LARGE_POOL_SIZE); nbufs -= sc->large_cnt; nblocks = howmany(nbufs, LARGE_SUPPLY_BLKSIZE); for (cnt = 0; cnt < nblocks; cnt++) { q = GET_QUEUE(sc->l1queue, struct supqueue, sc->l1queue.head); H_SYNCSTAT_POSTREAD(sc, q->q.statp); if (H_GETSTAT(q->q.statp) != FATM_STAT_FREE) break; bd = (struct rbd *)q->q.ioblk; for (i = 0; i < LARGE_SUPPLY_BLKSIZE; i++) { if ((rb = LIST_FIRST(&sc->rbuf_free)) == NULL) { if_printf(sc->ifp, "out of rbufs\n"); break; } if ((m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR)) == NULL) { LIST_INSERT_HEAD(&sc->rbuf_free, rb, link); break; } /* No MEXT_ALIGN */ m->m_data += MCLBYTES - LARGE_BUFFER_LEN; error = bus_dmamap_load(sc->rbuf_tag, rb->map, m->m_data, LARGE_BUFFER_LEN, dmaload_helper, &phys, BUS_DMA_NOWAIT); if (error) { if_printf(sc->ifp, "dmamap_load mbuf failed %d", error); m_freem(m); LIST_INSERT_HEAD(&sc->rbuf_free, rb, link); break; } bus_dmamap_sync(sc->rbuf_tag, rb->map, BUS_DMASYNC_PREREAD); LIST_REMOVE(rb, link); LIST_INSERT_HEAD(&sc->rbuf_used, rb, link); rb->m = m; bd[i].handle = rb - sc->rbufs; H_SETDESC(bd[i].buffer, phys); } if (i < LARGE_SUPPLY_BLKSIZE) { for (j = 0; j < i; j++) { rb = sc->rbufs + bd[j].handle; bus_dmamap_unload(sc->rbuf_tag, rb->map); m_free(rb->m); rb->m = NULL; LIST_REMOVE(rb, link); LIST_INSERT_HEAD(&sc->rbuf_free, rb, link); } break; } H_SYNCQ_PREWRITE(&sc->l1q_mem, bd, sizeof(struct rbd) * LARGE_SUPPLY_BLKSIZE); H_SETSTAT(q->q.statp, FATM_STAT_PENDING); H_SYNCSTAT_PREWRITE(sc, q->q.statp); WRITE4(sc, q->q.card, q->q.card_ioblk); BARRIER_W(sc); sc->large_cnt += LARGE_SUPPLY_BLKSIZE; NEXT_QUEUE_ENTRY(sc->l1queue.head, LARGE_SUPPLY_QLEN); } } /* * Actually start the card. The lock must be held here. * Reset, load the firmware, start it, initializes queues, read the PROM * and supply receive buffers to the card. */ static void fatm_init_locked(struct fatm_softc *sc) { struct rxqueue *q; int i, c, error; uint32_t start; DBG(sc, INIT, ("initialize")); if (sc->ifp->if_drv_flags & IFF_DRV_RUNNING) fatm_stop(sc); /* * Hard reset the board */ if (fatm_reset(sc)) return; start = firmware_load(sc); if (fatm_start_firmware(sc, start) || fatm_init_cmd(sc) || fatm_getprom(sc)) { fatm_reset(sc); return; } /* * Handle media */ c = READ4(sc, FATMO_MEDIA_TYPE); switch (c) { case FORE_MT_TAXI_100: IFP2IFATM(sc->ifp)->mib.media = IFM_ATM_TAXI_100; IFP2IFATM(sc->ifp)->mib.pcr = 227273; break; case FORE_MT_TAXI_140: IFP2IFATM(sc->ifp)->mib.media = IFM_ATM_TAXI_140; IFP2IFATM(sc->ifp)->mib.pcr = 318181; break; case FORE_MT_UTP_SONET: IFP2IFATM(sc->ifp)->mib.media = IFM_ATM_UTP_155; IFP2IFATM(sc->ifp)->mib.pcr = 353207; break; case FORE_MT_MM_OC3_ST: case FORE_MT_MM_OC3_SC: IFP2IFATM(sc->ifp)->mib.media = IFM_ATM_MM_155; IFP2IFATM(sc->ifp)->mib.pcr = 353207; break; case FORE_MT_SM_OC3_ST: case FORE_MT_SM_OC3_SC: IFP2IFATM(sc->ifp)->mib.media = IFM_ATM_SM_155; IFP2IFATM(sc->ifp)->mib.pcr = 353207; break; default: log(LOG_ERR, "fatm: unknown media type %d\n", c); IFP2IFATM(sc->ifp)->mib.media = IFM_ATM_UNKNOWN; IFP2IFATM(sc->ifp)->mib.pcr = 353207; break; } sc->ifp->if_baudrate = 53 * 8 * IFP2IFATM(sc->ifp)->mib.pcr; utopia_init_media(&sc->utopia); /* * Initialize the RBDs */ for (i = 0; i < FATM_RX_QLEN; i++) { q = GET_QUEUE(sc->rxqueue, struct rxqueue, i); WRITE4(sc, q->q.card + 0, q->q.card_ioblk); } BARRIER_W(sc); /* * Supply buffers to the card */ fatm_supply_small_buffers(sc); fatm_supply_large_buffers(sc); /* * Now set flags, that we are ready */ sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; /* * Start the watchdog timer */ callout_reset(&sc->watchdog_timer, hz * 5, fatm_watchdog, sc); /* start SUNI */ utopia_start(&sc->utopia); ATMEV_SEND_IFSTATE_CHANGED(IFP2IFATM(sc->ifp), sc->utopia.carrier == UTP_CARR_OK); /* start all channels */ for (i = 0; i < FORE_MAX_VCC + 1; i++) if (sc->vccs[i] != NULL) { sc->vccs[i]->vflags |= FATM_VCC_REOPEN; error = fatm_load_vc(sc, sc->vccs[i]); if (error != 0) { if_printf(sc->ifp, "reopening %u " "failed: %d\n", i, error); sc->vccs[i]->vflags &= ~FATM_VCC_REOPEN; } } DBG(sc, INIT, ("done")); } /* * This is the exported as initialisation function. */ static void fatm_init(void *p) { struct fatm_softc *sc = p; FATM_LOCK(sc); fatm_init_locked(sc); FATM_UNLOCK(sc); } /************************************************************/ /* * The INTERRUPT handling */ /* * Check the command queue. If a command was completed, call the completion * function for that command. */ static void fatm_intr_drain_cmd(struct fatm_softc *sc) { struct cmdqueue *q; int stat; /* * Drain command queue */ for (;;) { q = GET_QUEUE(sc->cmdqueue, struct cmdqueue, sc->cmdqueue.tail); H_SYNCSTAT_POSTREAD(sc, q->q.statp); stat = H_GETSTAT(q->q.statp); if (stat != FATM_STAT_COMPLETE && stat != (FATM_STAT_COMPLETE | FATM_STAT_ERROR) && stat != FATM_STAT_ERROR) break; (*q->cb)(sc, q); H_SETSTAT(q->q.statp, FATM_STAT_FREE); H_SYNCSTAT_PREWRITE(sc, q->q.statp); NEXT_QUEUE_ENTRY(sc->cmdqueue.tail, FATM_CMD_QLEN); } } /* * Drain the small buffer supply queue. */ static void fatm_intr_drain_small_buffers(struct fatm_softc *sc) { struct supqueue *q; int stat; for (;;) { q = GET_QUEUE(sc->s1queue, struct supqueue, sc->s1queue.tail); H_SYNCSTAT_POSTREAD(sc, q->q.statp); stat = H_GETSTAT(q->q.statp); if ((stat & FATM_STAT_COMPLETE) == 0) break; if (stat & FATM_STAT_ERROR) log(LOG_ERR, "%s: status %x\n", __func__, stat); H_SETSTAT(q->q.statp, FATM_STAT_FREE); H_SYNCSTAT_PREWRITE(sc, q->q.statp); NEXT_QUEUE_ENTRY(sc->s1queue.tail, SMALL_SUPPLY_QLEN); } } /* * Drain the large buffer supply queue. */ static void fatm_intr_drain_large_buffers(struct fatm_softc *sc) { struct supqueue *q; int stat; for (;;) { q = GET_QUEUE(sc->l1queue, struct supqueue, sc->l1queue.tail); H_SYNCSTAT_POSTREAD(sc, q->q.statp); stat = H_GETSTAT(q->q.statp); if ((stat & FATM_STAT_COMPLETE) == 0) break; if (stat & FATM_STAT_ERROR) log(LOG_ERR, "%s status %x\n", __func__, stat); H_SETSTAT(q->q.statp, FATM_STAT_FREE); H_SYNCSTAT_PREWRITE(sc, q->q.statp); NEXT_QUEUE_ENTRY(sc->l1queue.tail, LARGE_SUPPLY_QLEN); } } /* * Check the receive queue. Send any received PDU up the protocol stack * (except when there was an error or the VCI appears to be closed. In this * case discard the PDU). */ static void fatm_intr_drain_rx(struct fatm_softc *sc) { struct rxqueue *q; int stat, mlen; u_int i; uint32_t h; struct mbuf *last, *m0; struct rpd *rpd; struct rbuf *rb; u_int vci, vpi, pt; struct atm_pseudohdr aph; struct ifnet *ifp; struct card_vcc *vc; for (;;) { q = GET_QUEUE(sc->rxqueue, struct rxqueue, sc->rxqueue.tail); H_SYNCSTAT_POSTREAD(sc, q->q.statp); stat = H_GETSTAT(q->q.statp); if ((stat & FATM_STAT_COMPLETE) == 0) break; rpd = (struct rpd *)q->q.ioblk; H_SYNCQ_POSTREAD(&sc->rxq_mem, rpd, RPD_SIZE); rpd->nseg = le32toh(rpd->nseg); mlen = 0; m0 = last = NULL; for (i = 0; i < rpd->nseg; i++) { rb = sc->rbufs + rpd->segment[i].handle; if (m0 == NULL) { m0 = last = rb->m; } else { last->m_next = rb->m; last = rb->m; } last->m_next = NULL; if (last->m_flags & M_EXT) sc->large_cnt--; else sc->small_cnt--; bus_dmamap_sync(sc->rbuf_tag, rb->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(sc->rbuf_tag, rb->map); rb->m = NULL; LIST_REMOVE(rb, link); LIST_INSERT_HEAD(&sc->rbuf_free, rb, link); last->m_len = le32toh(rpd->segment[i].length); mlen += last->m_len; } m0->m_pkthdr.len = mlen; m0->m_pkthdr.rcvif = sc->ifp; h = le32toh(rpd->atm_header); vpi = (h >> 20) & 0xff; vci = (h >> 4 ) & 0xffff; pt = (h >> 1 ) & 0x7; /* * Locate the VCC this packet belongs to */ if (!VC_OK(sc, vpi, vci)) vc = NULL; else if ((vc = sc->vccs[vci]) == NULL || !(sc->vccs[vci]->vflags & FATM_VCC_OPEN)) { sc->istats.rx_closed++; vc = NULL; } DBG(sc, RCV, ("RCV: vc=%u.%u pt=%u mlen=%d %s", vpi, vci, pt, mlen, vc == NULL ? "dropped" : "")); if (vc == NULL) { m_freem(m0); } else { #ifdef ENABLE_BPF if (!(vc->param.flags & ATMIO_FLAG_NG) && vc->param.aal == ATMIO_AAL_5 && (vc->param.flags & ATM_PH_LLCSNAP)) BPF_MTAP(sc->ifp, m0); #endif ATM_PH_FLAGS(&aph) = vc->param.flags; ATM_PH_VPI(&aph) = vpi; ATM_PH_SETVCI(&aph, vci); ifp = sc->ifp; if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); vc->ipackets++; vc->ibytes += m0->m_pkthdr.len; atm_input(ifp, &aph, m0, vc->rxhand); } H_SETSTAT(q->q.statp, FATM_STAT_FREE); H_SYNCSTAT_PREWRITE(sc, q->q.statp); WRITE4(sc, q->q.card, q->q.card_ioblk); BARRIER_W(sc); NEXT_QUEUE_ENTRY(sc->rxqueue.tail, FATM_RX_QLEN); } } /* * Check the transmit queue. Free the mbuf chains that we were transmitting. */ static void fatm_intr_drain_tx(struct fatm_softc *sc) { struct txqueue *q; int stat; /* * Drain tx queue */ for (;;) { q = GET_QUEUE(sc->txqueue, struct txqueue, sc->txqueue.tail); H_SYNCSTAT_POSTREAD(sc, q->q.statp); stat = H_GETSTAT(q->q.statp); if (stat != FATM_STAT_COMPLETE && stat != (FATM_STAT_COMPLETE | FATM_STAT_ERROR) && stat != FATM_STAT_ERROR) break; H_SETSTAT(q->q.statp, FATM_STAT_FREE); H_SYNCSTAT_PREWRITE(sc, q->q.statp); bus_dmamap_sync(sc->tx_tag, q->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->tx_tag, q->map); m_freem(q->m); q->m = NULL; sc->txcnt--; NEXT_QUEUE_ENTRY(sc->txqueue.tail, FATM_TX_QLEN); } } /* * Interrupt handler */ static void fatm_intr(void *p) { struct fatm_softc *sc = (struct fatm_softc *)p; FATM_LOCK(sc); if (!READ4(sc, FATMO_PSR)) { FATM_UNLOCK(sc); return; } WRITE4(sc, FATMO_HCR, FATM_HCR_CLRIRQ); if (!(sc->ifp->if_drv_flags & IFF_DRV_RUNNING)) { FATM_UNLOCK(sc); return; } fatm_intr_drain_cmd(sc); fatm_intr_drain_rx(sc); fatm_intr_drain_tx(sc); fatm_intr_drain_small_buffers(sc); fatm_intr_drain_large_buffers(sc); fatm_supply_small_buffers(sc); fatm_supply_large_buffers(sc); FATM_UNLOCK(sc); if (sc->retry_tx && _IF_QLEN(&sc->ifp->if_snd)) (*sc->ifp->if_start)(sc->ifp); } /* * Get device statistics. This must be called with the softc locked. * We use a preallocated buffer, so we need to protect this buffer. * We do this by using a condition variable and a flag. If the flag is set * the buffer is in use by one thread (one thread is executing a GETSTAT * card command). In this case all other threads that are trying to get * statistics block on that condition variable. When the thread finishes * using the buffer it resets the flag and signals the condition variable. This * will wakeup the next thread that is waiting for the buffer. If the interface * is stopped the stopping function will broadcast the cv. All threads will * find that the interface has been stopped and return. * * Acquiring of the buffer is done by the fatm_getstat() function. The freeing * must be done by the caller when he has finished using the buffer. */ static void fatm_getstat_complete(struct fatm_softc *sc, struct cmdqueue *q) { H_SYNCSTAT_POSTREAD(sc, q->q.statp); if (H_GETSTAT(q->q.statp) & FATM_STAT_ERROR) { sc->istats.get_stat_errors++; q->error = EIO; } wakeup(&sc->sadi_mem); } static int fatm_getstat(struct fatm_softc *sc) { int error; struct cmdqueue *q; /* * Wait until either the interface is stopped or we can get the * statistics buffer */ for (;;) { if (!(sc->ifp->if_drv_flags & IFF_DRV_RUNNING)) return (EIO); if (!(sc->flags & FATM_STAT_INUSE)) break; cv_wait(&sc->cv_stat, &sc->mtx); } sc->flags |= FATM_STAT_INUSE; q = GET_QUEUE(sc->cmdqueue, struct cmdqueue, sc->cmdqueue.head); H_SYNCSTAT_POSTREAD(sc, q->q.statp); if (!(H_GETSTAT(q->q.statp) & FATM_STAT_FREE)) { sc->istats.cmd_queue_full++; return (EIO); } NEXT_QUEUE_ENTRY(sc->cmdqueue.head, FATM_CMD_QLEN); q->error = 0; q->cb = fatm_getstat_complete; H_SETSTAT(q->q.statp, FATM_STAT_PENDING); H_SYNCSTAT_PREWRITE(sc, q->q.statp); bus_dmamap_sync(sc->sadi_mem.dmat, sc->sadi_mem.map, BUS_DMASYNC_PREREAD); WRITE4(sc, q->q.card + FATMOC_GSTAT_BUF, sc->sadi_mem.paddr); BARRIER_W(sc); WRITE4(sc, q->q.card + FATMOC_OP, FATM_OP_REQUEST_STATS | FATM_OP_INTERRUPT_SEL); BARRIER_W(sc); /* * Wait for the command to complete */ error = msleep(&sc->sadi_mem, &sc->mtx, PZERO | PCATCH, "fatm_stat", hz); switch (error) { case EWOULDBLOCK: error = EIO; break; case ERESTART: error = EINTR; break; case 0: bus_dmamap_sync(sc->sadi_mem.dmat, sc->sadi_mem.map, BUS_DMASYNC_POSTREAD); error = q->error; break; } /* * Swap statistics */ if (q->error == 0) { u_int i; uint32_t *p = (uint32_t *)sc->sadi_mem.mem; for (i = 0; i < sizeof(struct fatm_stats) / sizeof(uint32_t); i++, p++) *p = be32toh(*p); } return (error); } /* * Create a copy of a single mbuf. It can have either internal or * external data, it may have a packet header. External data is really * copied, so the new buffer is writeable. */ static struct mbuf * copy_mbuf(struct mbuf *m) { struct mbuf *new; MGET(new, M_NOWAIT, MT_DATA); if (new == NULL) return (NULL); if (m->m_flags & M_PKTHDR) { M_MOVE_PKTHDR(new, m); if (m->m_len > MHLEN) MCLGET(new, M_WAITOK); } else { if (m->m_len > MLEN) MCLGET(new, M_WAITOK); } bcopy(m->m_data, new->m_data, m->m_len); new->m_len = m->m_len; new->m_flags &= ~M_RDONLY; return (new); } /* * All segments must have a four byte aligned buffer address and a four * byte aligned length. Step through an mbuf chain and check these conditions. * If the buffer address is not aligned and this is a normal mbuf, move * the data down. Else make a copy of the mbuf with aligned data. * If the buffer length is not aligned steel data from the next mbuf. * We don't need to check whether this has more than one external reference, * because steeling data doesn't change the external cluster. * If the last mbuf is not aligned, fill with zeroes. * * Return packet length (well we should have this in the packet header), * but be careful not to count the zero fill at the end. * * If fixing fails free the chain and zero the pointer. * * We assume, that aligning the virtual address also aligns the mapped bus * address. */ static u_int fatm_fix_chain(struct fatm_softc *sc, struct mbuf **mp) { struct mbuf *m = *mp, *prev = NULL, *next, *new; u_int mlen = 0, fill = 0; int first, off; u_char *d, *cp; do { next = m->m_next; if ((uintptr_t)mtod(m, void *) % 4 != 0 || (m->m_len % 4 != 0 && next)) { /* * Needs fixing */ first = (m == *mp); d = mtod(m, u_char *); if ((off = (uintptr_t)(void *)d % 4) != 0) { if (M_WRITABLE(m)) { sc->istats.fix_addr_copy++; bcopy(d, d - off, m->m_len); m->m_data = (caddr_t)(d - off); } else { if ((new = copy_mbuf(m)) == NULL) { sc->istats.fix_addr_noext++; goto fail; } sc->istats.fix_addr_ext++; if (prev) prev->m_next = new; new->m_next = next; m_free(m); m = new; } } if ((off = m->m_len % 4) != 0) { if (!M_WRITABLE(m)) { if ((new = copy_mbuf(m)) == NULL) { sc->istats.fix_len_noext++; goto fail; } sc->istats.fix_len_copy++; if (prev) prev->m_next = new; new->m_next = next; m_free(m); m = new; } else sc->istats.fix_len++; d = mtod(m, u_char *) + m->m_len; off = 4 - off; while (off) { if (next == NULL) { *d++ = 0; fill++; } else if (next->m_len == 0) { sc->istats.fix_empty++; next = m_free(next); continue; } else { cp = mtod(next, u_char *); *d++ = *cp++; next->m_len--; next->m_data = (caddr_t)cp; } off--; m->m_len++; } } if (first) *mp = m; } mlen += m->m_len; prev = m; } while ((m = next) != NULL); return (mlen - fill); fail: m_freem(*mp); *mp = NULL; return (0); } /* * The helper function is used to load the computed physical addresses * into the transmit descriptor. */ static void fatm_tpd_load(void *varg, bus_dma_segment_t *segs, int nsegs, bus_size_t mapsize, int error) { struct tpd *tpd = varg; if (error) return; KASSERT(nsegs <= TPD_EXTENSIONS + TXD_FIXED, ("too many segments")); tpd->spec = 0; while (nsegs--) { H_SETDESC(tpd->segment[tpd->spec].buffer, segs->ds_addr); H_SETDESC(tpd->segment[tpd->spec].length, segs->ds_len); tpd->spec++; segs++; } } /* * Start output. * * Note, that we update the internal statistics without the lock here. */ static int fatm_tx(struct fatm_softc *sc, struct mbuf *m, struct card_vcc *vc, u_int mlen) { struct txqueue *q; u_int nblks; int error, aal, nsegs; struct tpd *tpd; /* * Get a queue element. * If there isn't one - try to drain the transmit queue * We used to sleep here if that doesn't help, but we * should not sleep here, because we are called with locks. */ q = GET_QUEUE(sc->txqueue, struct txqueue, sc->txqueue.head); H_SYNCSTAT_POSTREAD(sc, q->q.statp); if (H_GETSTAT(q->q.statp) != FATM_STAT_FREE) { fatm_intr_drain_tx(sc); H_SYNCSTAT_POSTREAD(sc, q->q.statp); if (H_GETSTAT(q->q.statp) != FATM_STAT_FREE) { if (sc->retry_tx) { sc->istats.tx_retry++; IF_PREPEND(&sc->ifp->if_snd, m); return (1); } sc->istats.tx_queue_full++; m_freem(m); return (0); } sc->istats.tx_queue_almost_full++; } tpd = q->q.ioblk; m->m_data += sizeof(struct atm_pseudohdr); m->m_len -= sizeof(struct atm_pseudohdr); #ifdef ENABLE_BPF if (!(vc->param.flags & ATMIO_FLAG_NG) && vc->param.aal == ATMIO_AAL_5 && (vc->param.flags & ATM_PH_LLCSNAP)) BPF_MTAP(sc->ifp, m); #endif /* map the mbuf */ error = bus_dmamap_load_mbuf(sc->tx_tag, q->map, m, fatm_tpd_load, tpd, BUS_DMA_NOWAIT); if(error) { if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1); if_printf(sc->ifp, "mbuf loaded error=%d\n", error); m_freem(m); return (0); } nsegs = tpd->spec; bus_dmamap_sync(sc->tx_tag, q->map, BUS_DMASYNC_PREWRITE); /* * OK. Now go and do it. */ aal = (vc->param.aal == ATMIO_AAL_5) ? 5 : 0; H_SETSTAT(q->q.statp, FATM_STAT_PENDING); H_SYNCSTAT_PREWRITE(sc, q->q.statp); q->m = m; /* * If the transmit queue is almost full, schedule a * transmit interrupt so that transmit descriptors can * be recycled. */ H_SETDESC(tpd->spec, TDX_MKSPEC((sc->txcnt >= (4 * FATM_TX_QLEN) / 5), aal, nsegs, mlen)); H_SETDESC(tpd->atm_header, TDX_MKHDR(vc->param.vpi, vc->param.vci, 0, 0)); if (vc->param.traffic == ATMIO_TRAFFIC_UBR) H_SETDESC(tpd->stream, 0); else { u_int i; for (i = 0; i < RATE_TABLE_SIZE; i++) if (rate_table[i].cell_rate < vc->param.tparam.pcr) break; if (i > 0) i--; H_SETDESC(tpd->stream, rate_table[i].ratio); } H_SYNCQ_PREWRITE(&sc->txq_mem, tpd, TPD_SIZE); nblks = TDX_SEGS2BLKS(nsegs); DBG(sc, XMIT, ("XMIT: mlen=%d spec=0x%x nsegs=%d blocks=%d", mlen, le32toh(tpd->spec), nsegs, nblks)); WRITE4(sc, q->q.card + 0, q->q.card_ioblk | nblks); BARRIER_W(sc); sc->txcnt++; if_inc_counter(sc->ifp, IFCOUNTER_OPACKETS, 1); vc->obytes += m->m_pkthdr.len; vc->opackets++; NEXT_QUEUE_ENTRY(sc->txqueue.head, FATM_TX_QLEN); return (0); } static void fatm_start(struct ifnet *ifp) { struct atm_pseudohdr aph; struct fatm_softc *sc; struct mbuf *m; u_int mlen, vpi, vci; struct card_vcc *vc; sc = ifp->if_softc; while (1) { IF_DEQUEUE(&ifp->if_snd, m); if (m == NULL) break; /* * Loop through the mbuf chain and compute the total length * of the packet. Check that all data pointer are * 4 byte aligned. If they are not, call fatm_mfix to * fix that problem. This comes more or less from the * en driver. */ mlen = fatm_fix_chain(sc, &m); if (m == NULL) continue; if (m->m_len < sizeof(struct atm_pseudohdr) && (m = m_pullup(m, sizeof(struct atm_pseudohdr))) == NULL) continue; aph = *mtod(m, struct atm_pseudohdr *); mlen -= sizeof(struct atm_pseudohdr); if (mlen == 0) { m_freem(m); continue; } if (mlen > FATM_MAXPDU) { sc->istats.tx_pdu2big++; m_freem(m); continue; } vci = ATM_PH_VCI(&aph); vpi = ATM_PH_VPI(&aph); /* * From here on we need the softc */ FATM_LOCK(sc); if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { FATM_UNLOCK(sc); m_freem(m); break; } if (!VC_OK(sc, vpi, vci) || (vc = sc->vccs[vci]) == NULL || !(vc->vflags & FATM_VCC_OPEN)) { FATM_UNLOCK(sc); m_freem(m); continue; } if (fatm_tx(sc, m, vc, mlen)) { FATM_UNLOCK(sc); break; } FATM_UNLOCK(sc); } } /* * VCC management * * This may seem complicated. The reason for this is, that we need an * asynchronuous open/close for the NATM VCCs because our ioctl handler * is called with the radix node head of the routing table locked. Therefor * we cannot sleep there and wait for the open/close to succeed. For this * reason we just initiate the operation from the ioctl. */ /* * Command the card to open/close a VC. * Return the queue entry for waiting if we are successful. */ static struct cmdqueue * fatm_start_vcc(struct fatm_softc *sc, u_int vpi, u_int vci, uint32_t cmd, u_int mtu, void (*func)(struct fatm_softc *, struct cmdqueue *)) { struct cmdqueue *q; q = GET_QUEUE(sc->cmdqueue, struct cmdqueue, sc->cmdqueue.head); H_SYNCSTAT_POSTREAD(sc, q->q.statp); if (!(H_GETSTAT(q->q.statp) & FATM_STAT_FREE)) { sc->istats.cmd_queue_full++; return (NULL); } NEXT_QUEUE_ENTRY(sc->cmdqueue.head, FATM_CMD_QLEN); q->error = 0; q->cb = func; H_SETSTAT(q->q.statp, FATM_STAT_PENDING); H_SYNCSTAT_PREWRITE(sc, q->q.statp); WRITE4(sc, q->q.card + FATMOC_ACTIN_VPVC, MKVPVC(vpi, vci)); BARRIER_W(sc); WRITE4(sc, q->q.card + FATMOC_ACTIN_MTU, mtu); BARRIER_W(sc); WRITE4(sc, q->q.card + FATMOC_OP, cmd); BARRIER_W(sc); return (q); } /* * The VC has been opened/closed and somebody has been waiting for this. * Wake him up. */ static void fatm_cmd_complete(struct fatm_softc *sc, struct cmdqueue *q) { H_SYNCSTAT_POSTREAD(sc, q->q.statp); if (H_GETSTAT(q->q.statp) & FATM_STAT_ERROR) { sc->istats.get_stat_errors++; q->error = EIO; } wakeup(q); } /* * Open complete */ static void fatm_open_finish(struct fatm_softc *sc, struct card_vcc *vc) { vc->vflags &= ~FATM_VCC_TRY_OPEN; vc->vflags |= FATM_VCC_OPEN; if (vc->vflags & FATM_VCC_REOPEN) { vc->vflags &= ~FATM_VCC_REOPEN; return; } /* inform management if this is not an NG * VCC or it's an NG PVC. */ if (!(vc->param.flags & ATMIO_FLAG_NG) || (vc->param.flags & ATMIO_FLAG_PVC)) ATMEV_SEND_VCC_CHANGED(IFP2IFATM(sc->ifp), 0, vc->param.vci, 1); } /* * The VC that we have tried to open asynchronuosly has been opened. */ static void fatm_open_complete(struct fatm_softc *sc, struct cmdqueue *q) { u_int vci; struct card_vcc *vc; vci = GETVCI(READ4(sc, q->q.card + FATMOC_ACTIN_VPVC)); vc = sc->vccs[vci]; H_SYNCSTAT_POSTREAD(sc, q->q.statp); if (H_GETSTAT(q->q.statp) & FATM_STAT_ERROR) { sc->istats.get_stat_errors++; sc->vccs[vci] = NULL; uma_zfree(sc->vcc_zone, vc); if_printf(sc->ifp, "opening VCI %u failed\n", vci); return; } fatm_open_finish(sc, vc); } /* * Wait on the queue entry until the VCC is opened/closed. */ static int fatm_waitvcc(struct fatm_softc *sc, struct cmdqueue *q) { int error; /* * Wait for the command to complete */ error = msleep(q, &sc->mtx, PZERO | PCATCH, "fatm_vci", hz); if (error != 0) return (error); return (q->error); } /* * Start to open a VCC. This just initiates the operation. */ static int fatm_open_vcc(struct fatm_softc *sc, struct atmio_openvcc *op) { int error; struct card_vcc *vc; /* * Check parameters */ if ((op->param.flags & ATMIO_FLAG_NOTX) && (op->param.flags & ATMIO_FLAG_NORX)) return (EINVAL); if (!VC_OK(sc, op->param.vpi, op->param.vci)) return (EINVAL); if (op->param.aal != ATMIO_AAL_0 && op->param.aal != ATMIO_AAL_5) return (EINVAL); vc = uma_zalloc(sc->vcc_zone, M_NOWAIT | M_ZERO); if (vc == NULL) return (ENOMEM); error = 0; FATM_LOCK(sc); if (!(sc->ifp->if_drv_flags & IFF_DRV_RUNNING)) { error = EIO; goto done; } if (sc->vccs[op->param.vci] != NULL) { error = EBUSY; goto done; } vc->param = op->param; vc->rxhand = op->rxhand; switch (op->param.traffic) { case ATMIO_TRAFFIC_UBR: break; case ATMIO_TRAFFIC_CBR: if (op->param.tparam.pcr == 0 || op->param.tparam.pcr > IFP2IFATM(sc->ifp)->mib.pcr) { error = EINVAL; goto done; } break; default: error = EINVAL; goto done; } vc->ibytes = vc->obytes = 0; vc->ipackets = vc->opackets = 0; vc->vflags = FATM_VCC_TRY_OPEN; sc->vccs[op->param.vci] = vc; sc->open_vccs++; error = fatm_load_vc(sc, vc); if (error != 0) { sc->vccs[op->param.vci] = NULL; sc->open_vccs--; goto done; } /* don't free below */ vc = NULL; done: FATM_UNLOCK(sc); if (vc != NULL) uma_zfree(sc->vcc_zone, vc); return (error); } /* * Try to initialize the given VC */ static int fatm_load_vc(struct fatm_softc *sc, struct card_vcc *vc) { uint32_t cmd; struct cmdqueue *q; int error; /* Command and buffer strategy */ cmd = FATM_OP_ACTIVATE_VCIN | FATM_OP_INTERRUPT_SEL | (0 << 16); if (vc->param.aal == ATMIO_AAL_0) cmd |= (0 << 8); else cmd |= (5 << 8); q = fatm_start_vcc(sc, vc->param.vpi, vc->param.vci, cmd, 1, (vc->param.flags & ATMIO_FLAG_ASYNC) ? fatm_open_complete : fatm_cmd_complete); if (q == NULL) return (EIO); if (!(vc->param.flags & ATMIO_FLAG_ASYNC)) { error = fatm_waitvcc(sc, q); if (error != 0) return (error); fatm_open_finish(sc, vc); } return (0); } /* * Finish close */ static void fatm_close_finish(struct fatm_softc *sc, struct card_vcc *vc) { /* inform management of this is not an NG * VCC or it's an NG PVC. */ if (!(vc->param.flags & ATMIO_FLAG_NG) || (vc->param.flags & ATMIO_FLAG_PVC)) ATMEV_SEND_VCC_CHANGED(IFP2IFATM(sc->ifp), 0, vc->param.vci, 0); sc->vccs[vc->param.vci] = NULL; sc->open_vccs--; uma_zfree(sc->vcc_zone, vc); } /* * The VC has been closed. */ static void fatm_close_complete(struct fatm_softc *sc, struct cmdqueue *q) { u_int vci; struct card_vcc *vc; vci = GETVCI(READ4(sc, q->q.card + FATMOC_ACTIN_VPVC)); vc = sc->vccs[vci]; H_SYNCSTAT_POSTREAD(sc, q->q.statp); if (H_GETSTAT(q->q.statp) & FATM_STAT_ERROR) { sc->istats.get_stat_errors++; /* keep the VCC in that state */ if_printf(sc->ifp, "closing VCI %u failed\n", vci); return; } fatm_close_finish(sc, vc); } /* * Initiate closing a VCC */ static int fatm_close_vcc(struct fatm_softc *sc, struct atmio_closevcc *cl) { int error; struct cmdqueue *q; struct card_vcc *vc; if (!VC_OK(sc, cl->vpi, cl->vci)) return (EINVAL); error = 0; FATM_LOCK(sc); if (!(sc->ifp->if_drv_flags & IFF_DRV_RUNNING)) { error = EIO; goto done; } vc = sc->vccs[cl->vci]; if (vc == NULL || !(vc->vflags & (FATM_VCC_OPEN | FATM_VCC_TRY_OPEN))) { error = ENOENT; goto done; } q = fatm_start_vcc(sc, cl->vpi, cl->vci, FATM_OP_DEACTIVATE_VCIN | FATM_OP_INTERRUPT_SEL, 1, (vc->param.flags & ATMIO_FLAG_ASYNC) ? fatm_close_complete : fatm_cmd_complete); if (q == NULL) { error = EIO; goto done; } vc->vflags &= ~(FATM_VCC_OPEN | FATM_VCC_TRY_OPEN); vc->vflags |= FATM_VCC_TRY_CLOSE; if (!(vc->param.flags & ATMIO_FLAG_ASYNC)) { error = fatm_waitvcc(sc, q); if (error != 0) goto done; fatm_close_finish(sc, vc); } done: FATM_UNLOCK(sc); return (error); } /* * IOCTL handler */ static int fatm_ioctl(struct ifnet *ifp, u_long cmd, caddr_t arg) { int error; struct fatm_softc *sc = ifp->if_softc; struct ifaddr *ifa = (struct ifaddr *)arg; struct ifreq *ifr = (struct ifreq *)arg; struct atmio_closevcc *cl = (struct atmio_closevcc *)arg; struct atmio_openvcc *op = (struct atmio_openvcc *)arg; struct atmio_vcctable *vtab; error = 0; switch (cmd) { case SIOCATMOPENVCC: /* kernel internal use */ error = fatm_open_vcc(sc, op); break; case SIOCATMCLOSEVCC: /* kernel internal use */ error = fatm_close_vcc(sc, cl); break; case SIOCSIFADDR: FATM_LOCK(sc); ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) fatm_init_locked(sc); switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: case AF_INET6: ifa->ifa_rtrequest = atm_rtrequest; break; #endif default: break; } FATM_UNLOCK(sc); break; case SIOCSIFFLAGS: FATM_LOCK(sc); if (ifp->if_flags & IFF_UP) { if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { fatm_init_locked(sc); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { fatm_stop(sc); } } FATM_UNLOCK(sc); break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: if (ifp->if_drv_flags & IFF_DRV_RUNNING) error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd); else error = EINVAL; break; case SIOCATMGVCCS: /* return vcc table */ vtab = atm_getvccs((struct atmio_vcc **)sc->vccs, FORE_MAX_VCC + 1, sc->open_vccs, &sc->mtx, 1); - error = copyout(vtab, ifr->ifr_data, sizeof(*vtab) + + error = copyout(vtab, ifr_data_get_ptr(ifr), sizeof(*vtab) + vtab->count * sizeof(vtab->vccs[0])); free(vtab, M_DEVBUF); break; case SIOCATMGETVCCS: /* internal netgraph use */ vtab = atm_getvccs((struct atmio_vcc **)sc->vccs, FORE_MAX_VCC + 1, sc->open_vccs, &sc->mtx, 0); if (vtab == NULL) { error = ENOMEM; break; } *(void **)arg = vtab; break; default: DBG(sc, IOCTL, ("+++ cmd=%08lx arg=%p", cmd, arg)); error = EINVAL; break; } return (error); } /* * Detach from the interface and free all resources allocated during * initialisation and later. */ static int fatm_detach(device_t dev) { u_int i; struct rbuf *rb; struct fatm_softc *sc; struct txqueue *tx; sc = device_get_softc(dev); if (device_is_alive(dev)) { FATM_LOCK(sc); fatm_stop(sc); utopia_detach(&sc->utopia); FATM_UNLOCK(sc); atm_ifdetach(sc->ifp); /* XXX race */ } callout_drain(&sc->watchdog_timer); if (sc->ih != NULL) bus_teardown_intr(dev, sc->irqres, sc->ih); while ((rb = LIST_FIRST(&sc->rbuf_used)) != NULL) { if_printf(sc->ifp, "rbuf %p still in use!\n", rb); bus_dmamap_unload(sc->rbuf_tag, rb->map); m_freem(rb->m); LIST_REMOVE(rb, link); LIST_INSERT_HEAD(&sc->rbuf_free, rb, link); } if (sc->txqueue.chunk != NULL) { for (i = 0; i < FATM_TX_QLEN; i++) { tx = GET_QUEUE(sc->txqueue, struct txqueue, i); bus_dmamap_destroy(sc->tx_tag, tx->map); } } while ((rb = LIST_FIRST(&sc->rbuf_free)) != NULL) { bus_dmamap_destroy(sc->rbuf_tag, rb->map); LIST_REMOVE(rb, link); } if (sc->rbufs != NULL) free(sc->rbufs, M_DEVBUF); if (sc->vccs != NULL) { for (i = 0; i < FORE_MAX_VCC + 1; i++) if (sc->vccs[i] != NULL) { uma_zfree(sc->vcc_zone, sc->vccs[i]); sc->vccs[i] = NULL; } free(sc->vccs, M_DEVBUF); } if (sc->vcc_zone != NULL) uma_zdestroy(sc->vcc_zone); if (sc->l1queue.chunk != NULL) free(sc->l1queue.chunk, M_DEVBUF); if (sc->s1queue.chunk != NULL) free(sc->s1queue.chunk, M_DEVBUF); if (sc->rxqueue.chunk != NULL) free(sc->rxqueue.chunk, M_DEVBUF); if (sc->txqueue.chunk != NULL) free(sc->txqueue.chunk, M_DEVBUF); if (sc->cmdqueue.chunk != NULL) free(sc->cmdqueue.chunk, M_DEVBUF); destroy_dma_memory(&sc->reg_mem); destroy_dma_memory(&sc->sadi_mem); destroy_dma_memory(&sc->prom_mem); #ifdef TEST_DMA_SYNC destroy_dma_memoryX(&sc->s1q_mem); destroy_dma_memoryX(&sc->l1q_mem); destroy_dma_memoryX(&sc->rxq_mem); destroy_dma_memoryX(&sc->txq_mem); destroy_dma_memoryX(&sc->stat_mem); #endif if (sc->tx_tag != NULL) if (bus_dma_tag_destroy(sc->tx_tag)) printf("tx DMA tag busy!\n"); if (sc->rbuf_tag != NULL) if (bus_dma_tag_destroy(sc->rbuf_tag)) printf("rbuf DMA tag busy!\n"); if (sc->parent_dmat != NULL) if (bus_dma_tag_destroy(sc->parent_dmat)) printf("parent DMA tag busy!\n"); if (sc->irqres != NULL) bus_release_resource(dev, SYS_RES_IRQ, sc->irqid, sc->irqres); if (sc->memres != NULL) bus_release_resource(dev, SYS_RES_MEMORY, sc->memid, sc->memres); (void)sysctl_ctx_free(&sc->sysctl_ctx); cv_destroy(&sc->cv_stat); cv_destroy(&sc->cv_regs); mtx_destroy(&sc->mtx); if_free(sc->ifp); return (0); } /* * Sysctl handler */ static int fatm_sysctl_istats(SYSCTL_HANDLER_ARGS) { struct fatm_softc *sc = arg1; u_long *ret; int error; ret = malloc(sizeof(sc->istats), M_TEMP, M_WAITOK); FATM_LOCK(sc); bcopy(&sc->istats, ret, sizeof(sc->istats)); FATM_UNLOCK(sc); error = SYSCTL_OUT(req, ret, sizeof(sc->istats)); free(ret, M_TEMP); return (error); } /* * Sysctl handler for card statistics * This is disable because it destroys the PHY statistics. */ static int fatm_sysctl_stats(SYSCTL_HANDLER_ARGS) { struct fatm_softc *sc = arg1; int error; const struct fatm_stats *s; u_long *ret; u_int i; ret = malloc(sizeof(u_long) * FATM_NSTATS, M_TEMP, M_WAITOK); FATM_LOCK(sc); if ((error = fatm_getstat(sc)) == 0) { s = sc->sadi_mem.mem; i = 0; ret[i++] = s->phy_4b5b.crc_header_errors; ret[i++] = s->phy_4b5b.framing_errors; ret[i++] = s->phy_oc3.section_bip8_errors; ret[i++] = s->phy_oc3.path_bip8_errors; ret[i++] = s->phy_oc3.line_bip24_errors; ret[i++] = s->phy_oc3.line_febe_errors; ret[i++] = s->phy_oc3.path_febe_errors; ret[i++] = s->phy_oc3.corr_hcs_errors; ret[i++] = s->phy_oc3.ucorr_hcs_errors; ret[i++] = s->atm.cells_transmitted; ret[i++] = s->atm.cells_received; ret[i++] = s->atm.vpi_bad_range; ret[i++] = s->atm.vpi_no_conn; ret[i++] = s->atm.vci_bad_range; ret[i++] = s->atm.vci_no_conn; ret[i++] = s->aal0.cells_transmitted; ret[i++] = s->aal0.cells_received; ret[i++] = s->aal0.cells_dropped; ret[i++] = s->aal4.cells_transmitted; ret[i++] = s->aal4.cells_received; ret[i++] = s->aal4.cells_crc_errors; ret[i++] = s->aal4.cels_protocol_errors; ret[i++] = s->aal4.cells_dropped; ret[i++] = s->aal4.cspdus_transmitted; ret[i++] = s->aal4.cspdus_received; ret[i++] = s->aal4.cspdus_protocol_errors; ret[i++] = s->aal4.cspdus_dropped; ret[i++] = s->aal5.cells_transmitted; ret[i++] = s->aal5.cells_received; ret[i++] = s->aal5.congestion_experienced; ret[i++] = s->aal5.cells_dropped; ret[i++] = s->aal5.cspdus_transmitted; ret[i++] = s->aal5.cspdus_received; ret[i++] = s->aal5.cspdus_crc_errors; ret[i++] = s->aal5.cspdus_protocol_errors; ret[i++] = s->aal5.cspdus_dropped; ret[i++] = s->aux.small_b1_failed; ret[i++] = s->aux.large_b1_failed; ret[i++] = s->aux.small_b2_failed; ret[i++] = s->aux.large_b2_failed; ret[i++] = s->aux.rpd_alloc_failed; ret[i++] = s->aux.receive_carrier; } /* declare the buffer free */ sc->flags &= ~FATM_STAT_INUSE; cv_signal(&sc->cv_stat); FATM_UNLOCK(sc); if (error == 0) error = SYSCTL_OUT(req, ret, sizeof(u_long) * FATM_NSTATS); free(ret, M_TEMP); return (error); } #define MAXDMASEGS 32 /* maximum number of receive descriptors */ /* * Attach to the device. * * We assume, that there is a global lock (Giant in this case) that protects * multiple threads from entering this function. This makes sense, doesn't it? */ static int fatm_attach(device_t dev) { struct ifnet *ifp; struct fatm_softc *sc; int unit; uint16_t cfg; int error = 0; struct rbuf *rb; u_int i; struct txqueue *tx; sc = device_get_softc(dev); unit = device_get_unit(dev); ifp = sc->ifp = if_alloc(IFT_ATM); if (ifp == NULL) { error = ENOSPC; goto fail; } IFP2IFATM(sc->ifp)->mib.device = ATM_DEVICE_PCA200E; IFP2IFATM(sc->ifp)->mib.serial = 0; IFP2IFATM(sc->ifp)->mib.hw_version = 0; IFP2IFATM(sc->ifp)->mib.sw_version = 0; IFP2IFATM(sc->ifp)->mib.vpi_bits = 0; IFP2IFATM(sc->ifp)->mib.vci_bits = FORE_VCIBITS; IFP2IFATM(sc->ifp)->mib.max_vpcs = 0; IFP2IFATM(sc->ifp)->mib.max_vccs = FORE_MAX_VCC; IFP2IFATM(sc->ifp)->mib.media = IFM_ATM_UNKNOWN; IFP2IFATM(sc->ifp)->phy = &sc->utopia; LIST_INIT(&sc->rbuf_free); LIST_INIT(&sc->rbuf_used); /* * Initialize mutex and condition variables. */ mtx_init(&sc->mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); cv_init(&sc->cv_stat, "fatm_stat"); cv_init(&sc->cv_regs, "fatm_regs"); sysctl_ctx_init(&sc->sysctl_ctx); callout_init_mtx(&sc->watchdog_timer, &sc->mtx, 0); /* * Make the sysctl tree */ if ((sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_hw_atm), OID_AUTO, device_get_nameunit(dev), CTLFLAG_RD, 0, "")) == NULL) goto fail; if (SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "istats", CTLTYPE_ULONG | CTLFLAG_RD, sc, 0, fatm_sysctl_istats, "LU", "internal statistics") == NULL) goto fail; if (SYSCTL_ADD_PROC(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "stats", CTLTYPE_ULONG | CTLFLAG_RD, sc, 0, fatm_sysctl_stats, "LU", "card statistics") == NULL) goto fail; if (SYSCTL_ADD_INT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "retry_tx", CTLFLAG_RW, &sc->retry_tx, 0, "retry flag") == NULL) goto fail; #ifdef FATM_DEBUG if (SYSCTL_ADD_UINT(&sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "debug", CTLFLAG_RW, &sc->debug, 0, "debug flags") == NULL) goto fail; sc->debug = FATM_DEBUG; #endif /* * Network subsystem stuff */ ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_flags = IFF_SIMPLEX; ifp->if_ioctl = fatm_ioctl; ifp->if_start = fatm_start; ifp->if_init = fatm_init; ifp->if_linkmib = &IFP2IFATM(sc->ifp)->mib; ifp->if_linkmiblen = sizeof(IFP2IFATM(sc->ifp)->mib); /* * Enable busmaster */ pci_enable_busmaster(dev); /* * Map memory */ sc->memid = 0x10; sc->memres = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->memid, RF_ACTIVE); if (sc->memres == NULL) { if_printf(ifp, "could not map memory\n"); error = ENXIO; goto fail; } sc->memh = rman_get_bushandle(sc->memres); sc->memt = rman_get_bustag(sc->memres); /* * Convert endianness of slave access */ cfg = pci_read_config(dev, FATM_PCIR_MCTL, 1); cfg |= FATM_PCIM_SWAB; pci_write_config(dev, FATM_PCIR_MCTL, cfg, 1); /* * Allocate interrupt (activate at the end) */ sc->irqid = 0; sc->irqres = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->irqid, RF_SHAREABLE | RF_ACTIVE); if (sc->irqres == NULL) { if_printf(ifp, "could not allocate irq\n"); error = ENXIO; goto fail; } /* * Allocate the parent DMA tag. This is used simply to hold overall * restrictions for the controller (and PCI bus) and is never used * to do anything. */ if (bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE_32BIT, MAXDMASEGS, BUS_SPACE_MAXSIZE_32BIT, 0, NULL, NULL, &sc->parent_dmat)) { if_printf(ifp, "could not allocate parent DMA tag\n"); error = ENOMEM; goto fail; } /* * Allocate the receive buffer DMA tag. This tag must map a maximum of * a mbuf cluster. */ if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, MCLBYTES, 0, NULL, NULL, &sc->rbuf_tag)) { if_printf(ifp, "could not allocate rbuf DMA tag\n"); error = ENOMEM; goto fail; } /* * Allocate the transmission DMA tag. Must add 1, because * rounded up PDU will be 65536 bytes long. */ if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, FATM_MAXPDU + 1, TPD_EXTENSIONS + TXD_FIXED, MCLBYTES, 0, NULL, NULL, &sc->tx_tag)) { if_printf(ifp, "could not allocate tx DMA tag\n"); error = ENOMEM; goto fail; } /* * Allocate DMAable memory. */ sc->stat_mem.size = sizeof(uint32_t) * (FATM_CMD_QLEN + FATM_TX_QLEN + FATM_RX_QLEN + SMALL_SUPPLY_QLEN + LARGE_SUPPLY_QLEN); sc->stat_mem.align = 4; sc->txq_mem.size = FATM_TX_QLEN * TPD_SIZE; sc->txq_mem.align = 32; sc->rxq_mem.size = FATM_RX_QLEN * RPD_SIZE; sc->rxq_mem.align = 32; sc->s1q_mem.size = SMALL_SUPPLY_QLEN * BSUP_BLK2SIZE(SMALL_SUPPLY_BLKSIZE); sc->s1q_mem.align = 32; sc->l1q_mem.size = LARGE_SUPPLY_QLEN * BSUP_BLK2SIZE(LARGE_SUPPLY_BLKSIZE); sc->l1q_mem.align = 32; #ifdef TEST_DMA_SYNC if ((error = alloc_dma_memoryX(sc, "STATUS", &sc->stat_mem)) != 0 || (error = alloc_dma_memoryX(sc, "TXQ", &sc->txq_mem)) != 0 || (error = alloc_dma_memoryX(sc, "RXQ", &sc->rxq_mem)) != 0 || (error = alloc_dma_memoryX(sc, "S1Q", &sc->s1q_mem)) != 0 || (error = alloc_dma_memoryX(sc, "L1Q", &sc->l1q_mem)) != 0) goto fail; #else if ((error = alloc_dma_memory(sc, "STATUS", &sc->stat_mem)) != 0 || (error = alloc_dma_memory(sc, "TXQ", &sc->txq_mem)) != 0 || (error = alloc_dma_memory(sc, "RXQ", &sc->rxq_mem)) != 0 || (error = alloc_dma_memory(sc, "S1Q", &sc->s1q_mem)) != 0 || (error = alloc_dma_memory(sc, "L1Q", &sc->l1q_mem)) != 0) goto fail; #endif sc->prom_mem.size = sizeof(struct prom); sc->prom_mem.align = 32; if ((error = alloc_dma_memory(sc, "PROM", &sc->prom_mem)) != 0) goto fail; sc->sadi_mem.size = sizeof(struct fatm_stats); sc->sadi_mem.align = 32; if ((error = alloc_dma_memory(sc, "STATISTICS", &sc->sadi_mem)) != 0) goto fail; sc->reg_mem.size = sizeof(uint32_t) * FATM_NREGS; sc->reg_mem.align = 32; if ((error = alloc_dma_memory(sc, "REGISTERS", &sc->reg_mem)) != 0) goto fail; /* * Allocate queues */ sc->cmdqueue.chunk = malloc(FATM_CMD_QLEN * sizeof(struct cmdqueue), M_DEVBUF, M_ZERO | M_WAITOK); sc->txqueue.chunk = malloc(FATM_TX_QLEN * sizeof(struct txqueue), M_DEVBUF, M_ZERO | M_WAITOK); sc->rxqueue.chunk = malloc(FATM_RX_QLEN * sizeof(struct rxqueue), M_DEVBUF, M_ZERO | M_WAITOK); sc->s1queue.chunk = malloc(SMALL_SUPPLY_QLEN * sizeof(struct supqueue), M_DEVBUF, M_ZERO | M_WAITOK); sc->l1queue.chunk = malloc(LARGE_SUPPLY_QLEN * sizeof(struct supqueue), M_DEVBUF, M_ZERO | M_WAITOK); sc->vccs = malloc((FORE_MAX_VCC + 1) * sizeof(sc->vccs[0]), M_DEVBUF, M_ZERO | M_WAITOK); sc->vcc_zone = uma_zcreate("FATM vccs", sizeof(struct card_vcc), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); if (sc->vcc_zone == NULL) { error = ENOMEM; goto fail; } /* * Allocate memory for the receive buffer headers. The total number * of headers should probably also include the maximum number of * buffers on the receive queue. */ sc->rbuf_total = SMALL_POOL_SIZE + LARGE_POOL_SIZE; sc->rbufs = malloc(sc->rbuf_total * sizeof(struct rbuf), M_DEVBUF, M_ZERO | M_WAITOK); /* * Put all rbuf headers on the free list and create DMA maps. */ for (rb = sc->rbufs, i = 0; i < sc->rbuf_total; i++, rb++) { if ((error = bus_dmamap_create(sc->rbuf_tag, 0, &rb->map))) { if_printf(sc->ifp, "creating rx map: %d\n", error); goto fail; } LIST_INSERT_HEAD(&sc->rbuf_free, rb, link); } /* * Create dma maps for transmission. In case of an error, free the * allocated DMA maps, because on some architectures maps are NULL * and we cannot distinguish between a failure and a NULL map in * the detach routine. */ for (i = 0; i < FATM_TX_QLEN; i++) { tx = GET_QUEUE(sc->txqueue, struct txqueue, i); if ((error = bus_dmamap_create(sc->tx_tag, 0, &tx->map))) { if_printf(sc->ifp, "creating tx map: %d\n", error); while (i > 0) { tx = GET_QUEUE(sc->txqueue, struct txqueue, i - 1); bus_dmamap_destroy(sc->tx_tag, tx->map); i--; } goto fail; } } utopia_attach(&sc->utopia, IFP2IFATM(sc->ifp), &sc->media, &sc->mtx, &sc->sysctl_ctx, SYSCTL_CHILDREN(sc->sysctl_tree), &fatm_utopia_methods); sc->utopia.flags |= UTP_FL_NORESET | UTP_FL_POLL_CARRIER; /* * Attach the interface */ atm_ifattach(ifp); ifp->if_snd.ifq_maxlen = 512; #ifdef ENABLE_BPF bpfattach(ifp, DLT_ATM_RFC1483, sizeof(struct atmllc)); #endif error = bus_setup_intr(dev, sc->irqres, INTR_TYPE_NET | INTR_MPSAFE, NULL, fatm_intr, sc, &sc->ih); if (error) { if_printf(ifp, "couldn't setup irq\n"); goto fail; } fail: if (error) fatm_detach(dev); return (error); } #if defined(FATM_DEBUG) && 0 static void dump_s1_queue(struct fatm_softc *sc) { int i; struct supqueue *q; for(i = 0; i < SMALL_SUPPLY_QLEN; i++) { q = GET_QUEUE(sc->s1queue, struct supqueue, i); printf("%2d: card=%x(%x,%x) stat=%x\n", i, q->q.card, READ4(sc, q->q.card), READ4(sc, q->q.card + 4), *q->q.statp); } } #endif /* * Driver infrastructure. */ static device_method_t fatm_methods[] = { DEVMETHOD(device_probe, fatm_probe), DEVMETHOD(device_attach, fatm_attach), DEVMETHOD(device_detach, fatm_detach), { 0, 0 } }; static driver_t fatm_driver = { "fatm", fatm_methods, sizeof(struct fatm_softc), }; DRIVER_MODULE(fatm, pci, fatm_driver, fatm_devclass, 0, 0); Index: stable/11/sys/dev/hatm/if_hatm_ioctl.c =================================================================== --- stable/11/sys/dev/hatm/if_hatm_ioctl.c (revision 332287) +++ stable/11/sys/dev/hatm/if_hatm_ioctl.c (revision 332288) @@ -1,385 +1,385 @@ /*- * Copyright (c) 2001-2003 * Fraunhofer Institute for Open Communication Systems (FhG Fokus). * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Author: Hartmut Brandt * * ForeHE driver. * * Ioctl handler. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_natm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static u_int hatm_natm_traffic = ATMIO_TRAFFIC_UBR; static u_int hatm_natm_pcr = 0; static int hatm_sysctl_natm_traffic(SYSCTL_HANDLER_ARGS); SYSCTL_DECL(_hw_atm); SYSCTL_PROC(_hw_atm, OID_AUTO, natm_traffic, CTLTYPE_UINT | CTLFLAG_RW, &hatm_natm_traffic, sizeof(hatm_natm_traffic), hatm_sysctl_natm_traffic, "IU", "traffic type for NATM connections"); SYSCTL_UINT(_hw_atm, OID_AUTO, natm_pcr, CTLFLAG_RW, &hatm_natm_pcr, 0, "PCR for NATM connections"); /* * Try to open the given VCC. */ static int hatm_open_vcc(struct hatm_softc *sc, struct atmio_openvcc *arg) { u_int cid; struct hevcc *vcc; int error = 0; DBG(sc, VCC, ("Open VCC: %u.%u flags=%#x", arg->param.vpi, arg->param.vci, arg->param.flags)); if ((arg->param.vpi & ~HE_VPI_MASK) || (arg->param.vci & ~HE_VCI_MASK) || (arg->param.vci == 0)) return (EINVAL); cid = HE_CID(arg->param.vpi, arg->param.vci); if ((arg->param.flags & ATMIO_FLAG_NOTX) && (arg->param.flags & ATMIO_FLAG_NORX)) return (EINVAL); vcc = uma_zalloc(sc->vcc_zone, M_NOWAIT | M_ZERO); if (vcc == NULL) return (ENOMEM); mtx_lock(&sc->mtx); if (!(sc->ifp->if_drv_flags & IFF_DRV_RUNNING)) { error = EIO; goto done; } if (sc->vccs[cid] != NULL) { error = EBUSY; goto done; } vcc->param = arg->param; vcc->rxhand = arg->rxhand; switch (vcc->param.aal) { case ATMIO_AAL_0: case ATMIO_AAL_5: case ATMIO_AAL_RAW: break; default: error = EINVAL; goto done; } switch (vcc->param.traffic) { case ATMIO_TRAFFIC_UBR: case ATMIO_TRAFFIC_CBR: case ATMIO_TRAFFIC_ABR: break; default: error = EINVAL; goto done; } vcc->ntpds = 0; vcc->chain = vcc->last = NULL; vcc->ibytes = vcc->ipackets = 0; vcc->obytes = vcc->opackets = 0; if (!(vcc->param.flags & ATMIO_FLAG_NOTX) && (error = hatm_tx_vcc_can_open(sc, cid, vcc)) != 0) goto done; /* ok - go ahead */ sc->vccs[cid] = vcc; hatm_load_vc(sc, cid, 0); /* don't free below */ vcc = NULL; sc->open_vccs++; done: mtx_unlock(&sc->mtx); if (vcc != NULL) uma_zfree(sc->vcc_zone, vcc); return (error); } void hatm_load_vc(struct hatm_softc *sc, u_int cid, int reopen) { struct hevcc *vcc = sc->vccs[cid]; if (!(vcc->param.flags & ATMIO_FLAG_NOTX)) hatm_tx_vcc_open(sc, cid); if (!(vcc->param.flags & ATMIO_FLAG_NORX)) hatm_rx_vcc_open(sc, cid); if (reopen) return; /* inform management about non-NG and NG-PVCs */ if (!(vcc->param.flags & ATMIO_FLAG_NG) || (vcc->param.flags & ATMIO_FLAG_PVC)) ATMEV_SEND_VCC_CHANGED(IFP2IFATM(sc->ifp), vcc->param.vpi, vcc->param.vci, 1); } /* * VCC has been finally closed. */ void hatm_vcc_closed(struct hatm_softc *sc, u_int cid) { struct hevcc *vcc = sc->vccs[cid]; /* inform management about non-NG and NG-PVCs */ if (!(vcc->param.flags & ATMIO_FLAG_NG) || (vcc->param.flags & ATMIO_FLAG_PVC)) ATMEV_SEND_VCC_CHANGED(IFP2IFATM(sc->ifp), HE_VPI(cid), HE_VCI(cid), 0); sc->open_vccs--; uma_zfree(sc->vcc_zone, vcc); sc->vccs[cid] = NULL; } /* * Try to close the given VCC */ static int hatm_close_vcc(struct hatm_softc *sc, struct atmio_closevcc *arg) { u_int cid; struct hevcc *vcc; int error = 0; DBG(sc, VCC, ("Close VCC: %u.%u", arg->vpi, arg->vci)); if((arg->vpi & ~HE_VPI_MASK) || (arg->vci & ~HE_VCI_MASK) || (arg->vci == 0)) return (EINVAL); cid = HE_CID(arg->vpi, arg->vci); mtx_lock(&sc->mtx); vcc = sc->vccs[cid]; if (!(sc->ifp->if_drv_flags & IFF_DRV_RUNNING)) { error = EIO; goto done; } if (vcc == NULL || !(vcc->vflags & HE_VCC_OPEN)) { error = ENOENT; goto done; } if (vcc->vflags & HE_VCC_TX_OPEN) hatm_tx_vcc_close(sc, cid); if (vcc->vflags & HE_VCC_RX_OPEN) hatm_rx_vcc_close(sc, cid); if (vcc->param.flags & ATMIO_FLAG_ASYNC) goto done; while ((sc->ifp->if_drv_flags & IFF_DRV_RUNNING) && (vcc->vflags & (HE_VCC_TX_CLOSING | HE_VCC_RX_CLOSING))) cv_wait(&sc->vcc_cv, &sc->mtx); if (!(sc->ifp->if_drv_flags & IFF_DRV_RUNNING)) { error = EIO; goto done; } if (!(vcc->vflags & ATMIO_FLAG_NOTX)) hatm_tx_vcc_closed(sc, cid); hatm_vcc_closed(sc, cid); done: mtx_unlock(&sc->mtx); return (error); } /* * IOCTL handler */ int hatm_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ifreq *ifr = (struct ifreq *)data; struct ifaddr *ifa = (struct ifaddr *)data; struct hatm_softc *sc = ifp->if_softc; struct atmio_vcctable *vtab; int error = 0; switch (cmd) { case SIOCSIFADDR: mtx_lock(&sc->mtx); ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) hatm_initialize(sc); switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: case AF_INET6: ifa->ifa_rtrequest = atm_rtrequest; break; #endif default: break; } mtx_unlock(&sc->mtx); break; case SIOCSIFFLAGS: mtx_lock(&sc->mtx); if (ifp->if_flags & IFF_UP) { if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { hatm_initialize(sc); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { hatm_stop(sc); } } mtx_unlock(&sc->mtx); break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd); break; case SIOCSIFMTU: /* * Set the interface MTU. */ if (ifr->ifr_mtu > ATMMTU) error = EINVAL; else ifp->if_mtu = ifr->ifr_mtu; break; case SIOCATMGVCCS: /* return vcc table */ vtab = atm_getvccs((struct atmio_vcc **)sc->vccs, HE_MAX_VCCS, sc->open_vccs, &sc->mtx, 1); - error = copyout(vtab, ifr->ifr_data, sizeof(*vtab) + + error = copyout(vtab, ifr_data_get_ptr(ifr), sizeof(*vtab) + vtab->count * sizeof(vtab->vccs[0])); free(vtab, M_DEVBUF); break; case SIOCATMGETVCCS: /* netgraph internal use */ vtab = atm_getvccs((struct atmio_vcc **)sc->vccs, HE_MAX_VCCS, sc->open_vccs, &sc->mtx, 0); if (vtab == NULL) { error = ENOMEM; break; } *(void **)data = vtab; break; case SIOCATMOPENVCC: /* kernel internal use */ error = hatm_open_vcc(sc, (struct atmio_openvcc *)data); break; case SIOCATMCLOSEVCC: /* kernel internal use */ error = hatm_close_vcc(sc, (struct atmio_closevcc *)data); break; default: DBG(sc, IOCTL, ("cmd=%08lx arg=%p", cmd, data)); error = EINVAL; break; } return (error); } static int hatm_sysctl_natm_traffic(SYSCTL_HANDLER_ARGS) { int error; int tmp; tmp = hatm_natm_traffic; error = sysctl_handle_int(oidp, &tmp, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (tmp != ATMIO_TRAFFIC_UBR && tmp != ATMIO_TRAFFIC_CBR) return (EINVAL); hatm_natm_traffic = tmp; return (0); } Index: stable/11/sys/dev/if_ndis/if_ndis.c =================================================================== --- stable/11/sys/dev/if_ndis/if_ndis.c (revision 332287) +++ stable/11/sys/dev/if_ndis/if_ndis.c (revision 332288) @@ -1,3424 +1,3427 @@ /*- * Copyright (c) 2003 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. * * WPA support originally contributed by Arvind Srinivasan * then hacked upon mercilessly by my. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NDIS_DEBUG #ifdef NDIS_DEBUG #define DPRINTF(x) do { if (ndis_debug > 0) printf x; } while (0) int ndis_debug = 0; SYSCTL_INT(_debug, OID_AUTO, ndis, CTLFLAG_RW, &ndis_debug, 0, "if_ndis debug level"); #else #define DPRINTF(x) #endif SYSCTL_DECL(_hw_ndisusb); int ndisusb_halt = 1; SYSCTL_INT(_hw_ndisusb, OID_AUTO, halt, CTLFLAG_RW, &ndisusb_halt, 0, "Halt NDIS USB driver when it's attached"); /* 0 - 30 dBm to mW conversion table */ static const uint16_t dBm2mW[] = { 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 25, 28, 32, 35, 40, 45, 50, 56, 63, 71, 79, 89, 100, 112, 126, 141, 158, 178, 200, 224, 251, 282, 316, 355, 398, 447, 501, 562, 631, 708, 794, 891, 1000 }; MODULE_DEPEND(ndis, ether, 1, 1, 1); MODULE_DEPEND(ndis, wlan, 1, 1, 1); MODULE_DEPEND(ndis, ndisapi, 1, 1, 1); MODULE_VERSION(ndis, 1); int ndis_attach (device_t); int ndis_detach (device_t); int ndis_suspend (device_t); int ndis_resume (device_t); void ndis_shutdown (device_t); int ndisdrv_modevent (module_t, int, void *); static void ndis_txeof (ndis_handle, ndis_packet *, ndis_status); static void ndis_rxeof (ndis_handle, ndis_packet **, uint32_t); static void ndis_rxeof_eth (ndis_handle, ndis_handle, char *, void *, uint32_t, void *, uint32_t, uint32_t); static void ndis_rxeof_done (ndis_handle); static void ndis_rxeof_xfr (kdpc *, ndis_handle, void *, void *); static void ndis_rxeof_xfr_done (ndis_handle, ndis_packet *, uint32_t, uint32_t); static void ndis_linksts (ndis_handle, ndis_status, void *, uint32_t); static void ndis_linksts_done (ndis_handle); /* We need to wrap these functions for amd64. */ static funcptr ndis_txeof_wrap; static funcptr ndis_rxeof_wrap; static funcptr ndis_rxeof_eth_wrap; static funcptr ndis_rxeof_done_wrap; static funcptr ndis_rxeof_xfr_wrap; static funcptr ndis_rxeof_xfr_done_wrap; static funcptr ndis_linksts_wrap; static funcptr ndis_linksts_done_wrap; static funcptr ndis_ticktask_wrap; static funcptr ndis_ifstarttask_wrap; static funcptr ndis_resettask_wrap; static funcptr ndis_inputtask_wrap; static struct ieee80211vap *ndis_vap_create(struct ieee80211com *, const char [IFNAMSIZ], int, enum ieee80211_opmode, int, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN]); static void ndis_vap_delete (struct ieee80211vap *); static void ndis_tick (void *); static void ndis_ticktask (device_object *, void *); static int ndis_raw_xmit (struct ieee80211_node *, struct mbuf *, const struct ieee80211_bpf_params *); static void ndis_update_mcast (struct ieee80211com *); static void ndis_update_promisc (struct ieee80211com *); static void ndis_ifstart (struct ifnet *); static void ndis_ifstarttask (device_object *, void *); static void ndis_resettask (device_object *, void *); static void ndis_inputtask (device_object *, void *); static int ndis_ifioctl (struct ifnet *, u_long, caddr_t); static int ndis_newstate (struct ieee80211vap *, enum ieee80211_state, int); static int ndis_nettype_chan (uint32_t); static int ndis_nettype_mode (uint32_t); static void ndis_scan (void *); static void ndis_scan_results (struct ndis_softc *); static void ndis_scan_start (struct ieee80211com *); static void ndis_scan_end (struct ieee80211com *); static void ndis_set_channel (struct ieee80211com *); static void ndis_scan_curchan (struct ieee80211_scan_state *, unsigned long); static void ndis_scan_mindwell (struct ieee80211_scan_state *); static void ndis_init (void *); static void ndis_stop (struct ndis_softc *); static int ndis_ifmedia_upd (struct ifnet *); static void ndis_ifmedia_sts (struct ifnet *, struct ifmediareq *); static int ndis_get_bssid_list (struct ndis_softc *, ndis_80211_bssid_list_ex **); static int ndis_get_assoc (struct ndis_softc *, ndis_wlan_bssid_ex **); static int ndis_probe_offload (struct ndis_softc *); static int ndis_set_offload (struct ndis_softc *); static void ndis_getstate_80211 (struct ndis_softc *); static void ndis_setstate_80211 (struct ndis_softc *); static void ndis_auth_and_assoc (struct ndis_softc *, struct ieee80211vap *); static void ndis_media_status (struct ifnet *, struct ifmediareq *); static int ndis_set_cipher (struct ndis_softc *, int); static int ndis_set_wpa (struct ndis_softc *, void *, int); static int ndis_add_key (struct ieee80211vap *, const struct ieee80211_key *); static int ndis_del_key (struct ieee80211vap *, const struct ieee80211_key *); static void ndis_setmulti (struct ndis_softc *); static void ndis_map_sclist (void *, bus_dma_segment_t *, int, bus_size_t, int); static int ndis_ifattach(struct ndis_softc *); static int ndis_80211attach(struct ndis_softc *); static int ndis_80211ioctl(struct ieee80211com *, u_long , void *); static int ndis_80211transmit(struct ieee80211com *, struct mbuf *); static void ndis_80211parent(struct ieee80211com *); static int ndisdrv_loaded = 0; /* * This routine should call windrv_load() once for each driver * image. This will do the relocation and dynalinking for the * image, and create a Windows driver object which will be * saved in our driver database. */ int ndisdrv_modevent(mod, cmd, arg) module_t mod; int cmd; void *arg; { int error = 0; switch (cmd) { case MOD_LOAD: ndisdrv_loaded++; if (ndisdrv_loaded > 1) break; windrv_wrap((funcptr)ndis_rxeof, &ndis_rxeof_wrap, 3, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_rxeof_eth, &ndis_rxeof_eth_wrap, 8, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_rxeof_done, &ndis_rxeof_done_wrap, 1, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_rxeof_xfr, &ndis_rxeof_xfr_wrap, 4, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_rxeof_xfr_done, &ndis_rxeof_xfr_done_wrap, 4, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_txeof, &ndis_txeof_wrap, 3, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_linksts, &ndis_linksts_wrap, 4, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_linksts_done, &ndis_linksts_done_wrap, 1, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_ticktask, &ndis_ticktask_wrap, 2, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_ifstarttask, &ndis_ifstarttask_wrap, 2, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_resettask, &ndis_resettask_wrap, 2, WINDRV_WRAP_STDCALL); windrv_wrap((funcptr)ndis_inputtask, &ndis_inputtask_wrap, 2, WINDRV_WRAP_STDCALL); break; case MOD_UNLOAD: ndisdrv_loaded--; if (ndisdrv_loaded > 0) break; /* fallthrough */ case MOD_SHUTDOWN: windrv_unwrap(ndis_rxeof_wrap); windrv_unwrap(ndis_rxeof_eth_wrap); windrv_unwrap(ndis_rxeof_done_wrap); windrv_unwrap(ndis_rxeof_xfr_wrap); windrv_unwrap(ndis_rxeof_xfr_done_wrap); windrv_unwrap(ndis_txeof_wrap); windrv_unwrap(ndis_linksts_wrap); windrv_unwrap(ndis_linksts_done_wrap); windrv_unwrap(ndis_ticktask_wrap); windrv_unwrap(ndis_ifstarttask_wrap); windrv_unwrap(ndis_resettask_wrap); windrv_unwrap(ndis_inputtask_wrap); break; default: error = EINVAL; break; } return (error); } /* * Program the 64-bit multicast hash filter. */ static void ndis_setmulti(sc) struct ndis_softc *sc; { struct ifnet *ifp; struct ifmultiaddr *ifma; int len, mclistsz, error; uint8_t *mclist; if (!NDIS_INITIALIZED(sc)) return; if (sc->ndis_80211) return; ifp = sc->ifp; if (ifp->if_flags & IFF_ALLMULTI || ifp->if_flags & IFF_PROMISC) { sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; len = sizeof(sc->ndis_filter); error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER, &sc->ndis_filter, &len); if (error) device_printf(sc->ndis_dev, "set allmulti failed: %d\n", error); return; } if (TAILQ_EMPTY(&ifp->if_multiaddrs)) return; len = sizeof(mclistsz); ndis_get_info(sc, OID_802_3_MAXIMUM_LIST_SIZE, &mclistsz, &len); mclist = malloc(ETHER_ADDR_LEN * mclistsz, M_TEMP, M_NOWAIT|M_ZERO); if (mclist == NULL) { sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; goto out; } sc->ndis_filter |= NDIS_PACKET_TYPE_MULTICAST; len = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), mclist + (ETHER_ADDR_LEN * len), ETHER_ADDR_LEN); len++; if (len > mclistsz) { if_maddr_runlock(ifp); sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; sc->ndis_filter &= ~NDIS_PACKET_TYPE_MULTICAST; goto out; } } if_maddr_runlock(ifp); len = len * ETHER_ADDR_LEN; error = ndis_set_info(sc, OID_802_3_MULTICAST_LIST, mclist, &len); if (error) { device_printf(sc->ndis_dev, "set mclist failed: %d\n", error); sc->ndis_filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; sc->ndis_filter &= ~NDIS_PACKET_TYPE_MULTICAST; } out: free(mclist, M_TEMP); len = sizeof(sc->ndis_filter); error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER, &sc->ndis_filter, &len); if (error) device_printf(sc->ndis_dev, "set multi failed: %d\n", error); } static int ndis_set_offload(sc) struct ndis_softc *sc; { ndis_task_offload *nto; ndis_task_offload_hdr *ntoh; ndis_task_tcpip_csum *nttc; struct ifnet *ifp; int len, error; if (!NDIS_INITIALIZED(sc)) return (EINVAL); if (sc->ndis_80211) return (EINVAL); /* See if there's anything to set. */ ifp = sc->ifp; error = ndis_probe_offload(sc); if (error) return (error); if (sc->ndis_hwassist == 0 && ifp->if_capabilities == 0) return (0); len = sizeof(ndis_task_offload_hdr) + sizeof(ndis_task_offload) + sizeof(ndis_task_tcpip_csum); ntoh = malloc(len, M_TEMP, M_NOWAIT|M_ZERO); if (ntoh == NULL) return (ENOMEM); ntoh->ntoh_vers = NDIS_TASK_OFFLOAD_VERSION; ntoh->ntoh_len = sizeof(ndis_task_offload_hdr); ntoh->ntoh_offset_firsttask = sizeof(ndis_task_offload_hdr); ntoh->ntoh_encapfmt.nef_encaphdrlen = sizeof(struct ether_header); ntoh->ntoh_encapfmt.nef_encap = NDIS_ENCAP_IEEE802_3; ntoh->ntoh_encapfmt.nef_flags = NDIS_ENCAPFLAG_FIXEDHDRLEN; nto = (ndis_task_offload *)((char *)ntoh + ntoh->ntoh_offset_firsttask); nto->nto_vers = NDIS_TASK_OFFLOAD_VERSION; nto->nto_len = sizeof(ndis_task_offload); nto->nto_task = NDIS_TASK_TCPIP_CSUM; nto->nto_offset_nexttask = 0; nto->nto_taskbuflen = sizeof(ndis_task_tcpip_csum); nttc = (ndis_task_tcpip_csum *)nto->nto_taskbuf; if (ifp->if_capenable & IFCAP_TXCSUM) nttc->nttc_v4tx = sc->ndis_v4tx; if (ifp->if_capenable & IFCAP_RXCSUM) nttc->nttc_v4rx = sc->ndis_v4rx; error = ndis_set_info(sc, OID_TCP_TASK_OFFLOAD, ntoh, &len); free(ntoh, M_TEMP); return (error); } static int ndis_probe_offload(sc) struct ndis_softc *sc; { ndis_task_offload *nto; ndis_task_offload_hdr *ntoh; ndis_task_tcpip_csum *nttc = NULL; struct ifnet *ifp; int len, error, dummy; ifp = sc->ifp; len = sizeof(dummy); error = ndis_get_info(sc, OID_TCP_TASK_OFFLOAD, &dummy, &len); if (error != ENOSPC) return (error); ntoh = malloc(len, M_TEMP, M_NOWAIT|M_ZERO); if (ntoh == NULL) return (ENOMEM); ntoh->ntoh_vers = NDIS_TASK_OFFLOAD_VERSION; ntoh->ntoh_len = sizeof(ndis_task_offload_hdr); ntoh->ntoh_encapfmt.nef_encaphdrlen = sizeof(struct ether_header); ntoh->ntoh_encapfmt.nef_encap = NDIS_ENCAP_IEEE802_3; ntoh->ntoh_encapfmt.nef_flags = NDIS_ENCAPFLAG_FIXEDHDRLEN; error = ndis_get_info(sc, OID_TCP_TASK_OFFLOAD, ntoh, &len); if (error) { free(ntoh, M_TEMP); return (error); } if (ntoh->ntoh_vers != NDIS_TASK_OFFLOAD_VERSION) { free(ntoh, M_TEMP); return (EINVAL); } nto = (ndis_task_offload *)((char *)ntoh + ntoh->ntoh_offset_firsttask); while (1) { switch (nto->nto_task) { case NDIS_TASK_TCPIP_CSUM: nttc = (ndis_task_tcpip_csum *)nto->nto_taskbuf; break; /* Don't handle these yet. */ case NDIS_TASK_IPSEC: case NDIS_TASK_TCP_LARGESEND: default: break; } if (nto->nto_offset_nexttask == 0) break; nto = (ndis_task_offload *)((char *)nto + nto->nto_offset_nexttask); } if (nttc == NULL) { free(ntoh, M_TEMP); return (ENOENT); } sc->ndis_v4tx = nttc->nttc_v4tx; sc->ndis_v4rx = nttc->nttc_v4rx; if (nttc->nttc_v4tx & NDIS_TCPSUM_FLAGS_IP_CSUM) sc->ndis_hwassist |= CSUM_IP; if (nttc->nttc_v4tx & NDIS_TCPSUM_FLAGS_TCP_CSUM) sc->ndis_hwassist |= CSUM_TCP; if (nttc->nttc_v4tx & NDIS_TCPSUM_FLAGS_UDP_CSUM) sc->ndis_hwassist |= CSUM_UDP; if (sc->ndis_hwassist) ifp->if_capabilities |= IFCAP_TXCSUM; if (nttc->nttc_v4rx & NDIS_TCPSUM_FLAGS_IP_CSUM) ifp->if_capabilities |= IFCAP_RXCSUM; if (nttc->nttc_v4rx & NDIS_TCPSUM_FLAGS_TCP_CSUM) ifp->if_capabilities |= IFCAP_RXCSUM; if (nttc->nttc_v4rx & NDIS_TCPSUM_FLAGS_UDP_CSUM) ifp->if_capabilities |= IFCAP_RXCSUM; free(ntoh, M_TEMP); return (0); } static int ndis_nettype_chan(uint32_t type) { switch (type) { case NDIS_80211_NETTYPE_11FH: return (IEEE80211_CHAN_FHSS); case NDIS_80211_NETTYPE_11DS: return (IEEE80211_CHAN_B); case NDIS_80211_NETTYPE_11OFDM5: return (IEEE80211_CHAN_A); case NDIS_80211_NETTYPE_11OFDM24: return (IEEE80211_CHAN_G); } DPRINTF(("unknown channel nettype %d\n", type)); return (IEEE80211_CHAN_B); /* Default to 11B chan */ } static int ndis_nettype_mode(uint32_t type) { switch (type) { case NDIS_80211_NETTYPE_11FH: return (IEEE80211_MODE_FH); case NDIS_80211_NETTYPE_11DS: return (IEEE80211_MODE_11B); case NDIS_80211_NETTYPE_11OFDM5: return (IEEE80211_MODE_11A); case NDIS_80211_NETTYPE_11OFDM24: return (IEEE80211_MODE_11G); } DPRINTF(("unknown mode nettype %d\n", type)); return (IEEE80211_MODE_AUTO); } /* * Attach the interface. Allocate softc structures, do ifmedia * setup and ethernet/BPF attach. */ int ndis_attach(device_t dev) { struct ndis_softc *sc; driver_object *pdrv; device_object *pdo; int error = 0, len; int i; sc = device_get_softc(dev); mtx_init(&sc->ndis_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK, MTX_DEF); KeInitializeSpinLock(&sc->ndis_rxlock); KeInitializeSpinLock(&sc->ndisusb_tasklock); KeInitializeSpinLock(&sc->ndisusb_xferdonelock); InitializeListHead(&sc->ndis_shlist); InitializeListHead(&sc->ndisusb_tasklist); InitializeListHead(&sc->ndisusb_xferdonelist); callout_init(&sc->ndis_stat_callout, 1); mbufq_init(&sc->ndis_rxqueue, INT_MAX); /* XXXGL: sane maximum */ if (sc->ndis_iftype == PCMCIABus) { error = ndis_alloc_amem(sc); if (error) { device_printf(dev, "failed to allocate " "attribute memory\n"); goto fail; } } /* Create sysctl registry nodes */ ndis_create_sysctls(sc); /* Find the PDO for this device instance. */ if (sc->ndis_iftype == PCIBus) pdrv = windrv_lookup(0, "PCI Bus"); else if (sc->ndis_iftype == PCMCIABus) pdrv = windrv_lookup(0, "PCCARD Bus"); else pdrv = windrv_lookup(0, "USB Bus"); pdo = windrv_find_pdo(pdrv, dev); /* * Create a new functional device object for this * device. This is what creates the miniport block * for this device instance. */ if (NdisAddDevice(sc->ndis_dobj, pdo) != STATUS_SUCCESS) { device_printf(dev, "failed to create FDO!\n"); error = ENXIO; goto fail; } /* Tell the user what version of the API the driver is using. */ device_printf(dev, "NDIS API version: %d.%d\n", sc->ndis_chars->nmc_version_major, sc->ndis_chars->nmc_version_minor); /* Do resource conversion. */ if (sc->ndis_iftype == PCMCIABus || sc->ndis_iftype == PCIBus) ndis_convert_res(sc); else sc->ndis_block->nmb_rlist = NULL; /* Install our RX and TX interrupt handlers. */ sc->ndis_block->nmb_senddone_func = ndis_txeof_wrap; sc->ndis_block->nmb_pktind_func = ndis_rxeof_wrap; sc->ndis_block->nmb_ethrxindicate_func = ndis_rxeof_eth_wrap; sc->ndis_block->nmb_ethrxdone_func = ndis_rxeof_done_wrap; sc->ndis_block->nmb_tdcond_func = ndis_rxeof_xfr_done_wrap; /* Override the status handler so we can detect link changes. */ sc->ndis_block->nmb_status_func = ndis_linksts_wrap; sc->ndis_block->nmb_statusdone_func = ndis_linksts_done_wrap; /* Set up work item handlers. */ sc->ndis_tickitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); sc->ndis_startitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); sc->ndis_resetitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); sc->ndis_inputitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); sc->ndisusb_xferdoneitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); sc->ndisusb_taskitem = IoAllocateWorkItem(sc->ndis_block->nmb_deviceobj); KeInitializeDpc(&sc->ndis_rxdpc, ndis_rxeof_xfr_wrap, sc->ndis_block); /* Call driver's init routine. */ if (ndis_init_nic(sc)) { device_printf(dev, "init handler failed\n"); error = ENXIO; goto fail; } /* * Figure out how big to make the TX buffer pool. */ len = sizeof(sc->ndis_maxpkts); if (ndis_get_info(sc, OID_GEN_MAXIMUM_SEND_PACKETS, &sc->ndis_maxpkts, &len)) { device_printf(dev, "failed to get max TX packets\n"); error = ENXIO; goto fail; } /* * If this is a deserialized miniport, we don't have * to honor the OID_GEN_MAXIMUM_SEND_PACKETS result. */ if (!NDIS_SERIALIZED(sc->ndis_block)) sc->ndis_maxpkts = NDIS_TXPKTS; /* Enforce some sanity, just in case. */ if (sc->ndis_maxpkts == 0) sc->ndis_maxpkts = 10; sc->ndis_txarray = malloc(sizeof(ndis_packet *) * sc->ndis_maxpkts, M_DEVBUF, M_NOWAIT|M_ZERO); /* Allocate a pool of ndis_packets for TX encapsulation. */ NdisAllocatePacketPool(&i, &sc->ndis_txpool, sc->ndis_maxpkts, PROTOCOL_RESERVED_SIZE_IN_PACKET); if (i != NDIS_STATUS_SUCCESS) { sc->ndis_txpool = NULL; device_printf(dev, "failed to allocate TX packet pool"); error = ENOMEM; goto fail; } sc->ndis_txpending = sc->ndis_maxpkts; sc->ndis_oidcnt = 0; /* Get supported oid list. */ ndis_get_supported_oids(sc, &sc->ndis_oids, &sc->ndis_oidcnt); /* If the NDIS module requested scatter/gather, init maps. */ if (sc->ndis_sc) ndis_init_dma(sc); /* * See if the OID_802_11_CONFIGURATION OID is * supported by this driver. If it is, then this an 802.11 * wireless driver, and we should set up media for wireless. */ for (i = 0; i < sc->ndis_oidcnt; i++) if (sc->ndis_oids[i] == OID_802_11_CONFIGURATION) { sc->ndis_80211 = 1; break; } if (sc->ndis_80211) error = ndis_80211attach(sc); else error = ndis_ifattach(sc); fail: if (error) { ndis_detach(dev); return (error); } if (sc->ndis_iftype == PNPBus && ndisusb_halt == 0) return (error); DPRINTF(("attach done.\n")); /* We're done talking to the NIC for now; halt it. */ ndis_halt_nic(sc); DPRINTF(("halting done.\n")); return (error); } static int ndis_80211attach(struct ndis_softc *sc) { struct ieee80211com *ic = &sc->ndis_ic; ndis_80211_rates_ex rates; struct ndis_80211_nettype_list *ntl; uint32_t arg; int mode, i, r, len, nonettypes = 1; uint8_t bands[IEEE80211_MODE_BYTES] = { 0 }; callout_init(&sc->ndis_scan_callout, 1); ic->ic_softc = sc; ic->ic_ioctl = ndis_80211ioctl; ic->ic_name = device_get_nameunit(sc->ndis_dev); ic->ic_opmode = IEEE80211_M_STA; ic->ic_phytype = IEEE80211_T_DS; ic->ic_caps = IEEE80211_C_8023ENCAP | IEEE80211_C_STA | IEEE80211_C_IBSS; setbit(ic->ic_modecaps, IEEE80211_MODE_AUTO); len = 0; r = ndis_get_info(sc, OID_802_11_NETWORK_TYPES_SUPPORTED, NULL, &len); if (r != ENOSPC) goto nonettypes; ntl = malloc(len, M_DEVBUF, M_WAITOK | M_ZERO); r = ndis_get_info(sc, OID_802_11_NETWORK_TYPES_SUPPORTED, ntl, &len); if (r != 0) { free(ntl, M_DEVBUF); goto nonettypes; } for (i = 0; i < ntl->ntl_items; i++) { mode = ndis_nettype_mode(ntl->ntl_type[i]); if (mode) { nonettypes = 0; setbit(ic->ic_modecaps, mode); setbit(bands, mode); } else device_printf(sc->ndis_dev, "Unknown nettype %d\n", ntl->ntl_type[i]); } free(ntl, M_DEVBUF); nonettypes: /* Default to 11b channels if the card did not supply any */ if (nonettypes) { setbit(ic->ic_modecaps, IEEE80211_MODE_11B); setbit(bands, IEEE80211_MODE_11B); } len = sizeof(rates); bzero((char *)&rates, len); r = ndis_get_info(sc, OID_802_11_SUPPORTED_RATES, (void *)rates, &len); if (r != 0) device_printf(sc->ndis_dev, "get rates failed: 0x%x\n", r); /* * Since the supported rates only up to 8 can be supported, * if this is not 802.11b we're just going to be faking it * all up to heck. */ #define TESTSETRATE(x, y) \ do { \ int i; \ for (i = 0; i < ic->ic_sup_rates[x].rs_nrates; i++) { \ if (ic->ic_sup_rates[x].rs_rates[i] == (y)) \ break; \ } \ if (i == ic->ic_sup_rates[x].rs_nrates) { \ ic->ic_sup_rates[x].rs_rates[i] = (y); \ ic->ic_sup_rates[x].rs_nrates++; \ } \ } while (0) #define SETRATE(x, y) \ ic->ic_sup_rates[x].rs_rates[ic->ic_sup_rates[x].rs_nrates] = (y) #define INCRATE(x) \ ic->ic_sup_rates[x].rs_nrates++ ic->ic_curmode = IEEE80211_MODE_AUTO; if (isset(ic->ic_modecaps, IEEE80211_MODE_11A)) ic->ic_sup_rates[IEEE80211_MODE_11A].rs_nrates = 0; if (isset(ic->ic_modecaps, IEEE80211_MODE_11B)) ic->ic_sup_rates[IEEE80211_MODE_11B].rs_nrates = 0; if (isset(ic->ic_modecaps, IEEE80211_MODE_11G)) ic->ic_sup_rates[IEEE80211_MODE_11G].rs_nrates = 0; for (i = 0; i < len; i++) { switch (rates[i] & IEEE80211_RATE_VAL) { case 2: case 4: case 11: case 10: case 22: if (isclr(ic->ic_modecaps, IEEE80211_MODE_11B)) { /* Lazy-init 802.11b. */ setbit(ic->ic_modecaps, IEEE80211_MODE_11B); ic->ic_sup_rates[IEEE80211_MODE_11B]. rs_nrates = 0; } SETRATE(IEEE80211_MODE_11B, rates[i]); INCRATE(IEEE80211_MODE_11B); break; default: if (isset(ic->ic_modecaps, IEEE80211_MODE_11A)) { SETRATE(IEEE80211_MODE_11A, rates[i]); INCRATE(IEEE80211_MODE_11A); } if (isset(ic->ic_modecaps, IEEE80211_MODE_11G)) { SETRATE(IEEE80211_MODE_11G, rates[i]); INCRATE(IEEE80211_MODE_11G); } break; } } /* * If the hardware supports 802.11g, it most * likely supports 802.11b and all of the * 802.11b and 802.11g speeds, so maybe we can * just cheat here. Just how in the heck do * we detect turbo modes, though? */ if (isset(ic->ic_modecaps, IEEE80211_MODE_11B)) { TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|2); TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|4); TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|11); TESTSETRATE(IEEE80211_MODE_11B, IEEE80211_RATE_BASIC|22); } if (isset(ic->ic_modecaps, IEEE80211_MODE_11G)) { TESTSETRATE(IEEE80211_MODE_11G, 48); TESTSETRATE(IEEE80211_MODE_11G, 72); TESTSETRATE(IEEE80211_MODE_11G, 96); TESTSETRATE(IEEE80211_MODE_11G, 108); } if (isset(ic->ic_modecaps, IEEE80211_MODE_11A)) { TESTSETRATE(IEEE80211_MODE_11A, 48); TESTSETRATE(IEEE80211_MODE_11A, 72); TESTSETRATE(IEEE80211_MODE_11A, 96); TESTSETRATE(IEEE80211_MODE_11A, 108); } #undef SETRATE #undef INCRATE #undef TESTSETRATE ieee80211_init_channels(ic, NULL, bands); /* * To test for WPA support, we need to see if we can * set AUTHENTICATION_MODE to WPA and read it back * successfully. */ i = sizeof(arg); arg = NDIS_80211_AUTHMODE_WPA; r = ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i); if (r == 0) { r = ndis_get_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i); if (r == 0 && arg == NDIS_80211_AUTHMODE_WPA) ic->ic_caps |= IEEE80211_C_WPA; } /* * To test for supported ciphers, we set each * available encryption type in descending order. * If ENC3 works, then we have WEP, TKIP and AES. * If only ENC2 works, then we have WEP and TKIP. * If only ENC1 works, then we have just WEP. */ i = sizeof(arg); arg = NDIS_80211_WEPSTAT_ENC3ENABLED; r = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &i); if (r == 0) { ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP | IEEE80211_CRYPTO_TKIP | IEEE80211_CRYPTO_AES_CCM; goto got_crypto; } arg = NDIS_80211_WEPSTAT_ENC2ENABLED; r = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &i); if (r == 0) { ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP | IEEE80211_CRYPTO_TKIP; goto got_crypto; } arg = NDIS_80211_WEPSTAT_ENC1ENABLED; r = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &i); if (r == 0) ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP; got_crypto: i = sizeof(arg); r = ndis_get_info(sc, OID_802_11_POWER_MODE, &arg, &i); if (r == 0) ic->ic_caps |= IEEE80211_C_PMGT; r = ndis_get_info(sc, OID_802_11_TX_POWER_LEVEL, &arg, &i); if (r == 0) ic->ic_caps |= IEEE80211_C_TXPMGT; /* * Get station address from the driver. */ len = sizeof(ic->ic_macaddr); ndis_get_info(sc, OID_802_3_CURRENT_ADDRESS, &ic->ic_macaddr, &len); ieee80211_ifattach(ic); ic->ic_raw_xmit = ndis_raw_xmit; ic->ic_scan_start = ndis_scan_start; ic->ic_scan_end = ndis_scan_end; ic->ic_set_channel = ndis_set_channel; ic->ic_scan_curchan = ndis_scan_curchan; ic->ic_scan_mindwell = ndis_scan_mindwell; ic->ic_bsschan = IEEE80211_CHAN_ANYC; ic->ic_vap_create = ndis_vap_create; ic->ic_vap_delete = ndis_vap_delete; ic->ic_update_mcast = ndis_update_mcast; ic->ic_update_promisc = ndis_update_promisc; ic->ic_transmit = ndis_80211transmit; ic->ic_parent = ndis_80211parent; if (bootverbose) ieee80211_announce(ic); return (0); } static int ndis_ifattach(struct ndis_softc *sc) { struct ifnet *ifp; u_char eaddr[ETHER_ADDR_LEN]; int len; ifp = if_alloc(IFT_ETHER); if (ifp == NULL) return (ENOSPC); sc->ifp = ifp; ifp->if_softc = sc; /* Check for task offload support. */ ndis_probe_offload(sc); /* * Get station address from the driver. */ len = sizeof(eaddr); ndis_get_info(sc, OID_802_3_CURRENT_ADDRESS, eaddr, &len); if_initname(ifp, device_get_name(sc->ndis_dev), device_get_unit(sc->ndis_dev)); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = ndis_ifioctl; ifp->if_start = ndis_ifstart; ifp->if_init = ndis_init; ifp->if_baudrate = 10000000; IFQ_SET_MAXLEN(&ifp->if_snd, 50); ifp->if_snd.ifq_drv_maxlen = 25; IFQ_SET_READY(&ifp->if_snd); ifp->if_capenable = ifp->if_capabilities; ifp->if_hwassist = sc->ndis_hwassist; ifmedia_init(&sc->ifmedia, IFM_IMASK, ndis_ifmedia_upd, ndis_ifmedia_sts); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_10_T|IFM_FDX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_100_TX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_100_TX|IFM_FDX, 0, NULL); ifmedia_add(&sc->ifmedia, IFM_ETHER|IFM_AUTO, 0, NULL); ifmedia_set(&sc->ifmedia, IFM_ETHER|IFM_AUTO); ether_ifattach(ifp, eaddr); return (0); } static struct ieee80211vap * ndis_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit, enum ieee80211_opmode opmode, int flags, const uint8_t bssid[IEEE80211_ADDR_LEN], const uint8_t mac[IEEE80211_ADDR_LEN]) { struct ndis_vap *nvp; struct ieee80211vap *vap; if (!TAILQ_EMPTY(&ic->ic_vaps)) /* only one at a time */ return NULL; nvp = malloc(sizeof(struct ndis_vap), M_80211_VAP, M_WAITOK | M_ZERO); vap = &nvp->vap; ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid); /* override with driver methods */ nvp->newstate = vap->iv_newstate; vap->iv_newstate = ndis_newstate; /* complete setup */ ieee80211_vap_attach(vap, ieee80211_media_change, ndis_media_status, mac); ic->ic_opmode = opmode; /* install key handing routines */ vap->iv_key_set = ndis_add_key; vap->iv_key_delete = ndis_del_key; return vap; } static void ndis_vap_delete(struct ieee80211vap *vap) { struct ndis_vap *nvp = NDIS_VAP(vap); struct ieee80211com *ic = vap->iv_ic; struct ndis_softc *sc = ic->ic_softc; ndis_stop(sc); callout_drain(&sc->ndis_scan_callout); ieee80211_vap_detach(vap); free(nvp, M_80211_VAP); } /* * Shutdown hardware and free up resources. This can be called any * time after the mutex has been initialized. It is called in both * the error case in attach and the normal detach case so it needs * to be careful about only freeing resources that have actually been * allocated. */ int ndis_detach(device_t dev) { struct ifnet *ifp; struct ndis_softc *sc; driver_object *drv; sc = device_get_softc(dev); NDIS_LOCK(sc); if (!sc->ndis_80211) ifp = sc->ifp; else ifp = NULL; if (ifp != NULL) ifp->if_flags &= ~IFF_UP; if (device_is_attached(dev)) { NDIS_UNLOCK(sc); ndis_stop(sc); if (sc->ndis_80211) ieee80211_ifdetach(&sc->ndis_ic); else if (ifp != NULL) ether_ifdetach(ifp); } else NDIS_UNLOCK(sc); if (sc->ndis_tickitem != NULL) IoFreeWorkItem(sc->ndis_tickitem); if (sc->ndis_startitem != NULL) IoFreeWorkItem(sc->ndis_startitem); if (sc->ndis_resetitem != NULL) IoFreeWorkItem(sc->ndis_resetitem); if (sc->ndis_inputitem != NULL) IoFreeWorkItem(sc->ndis_inputitem); if (sc->ndisusb_xferdoneitem != NULL) IoFreeWorkItem(sc->ndisusb_xferdoneitem); if (sc->ndisusb_taskitem != NULL) IoFreeWorkItem(sc->ndisusb_taskitem); bus_generic_detach(dev); ndis_unload_driver(sc); if (sc->ndis_irq) bus_release_resource(dev, SYS_RES_IRQ, 0, sc->ndis_irq); if (sc->ndis_res_io) bus_release_resource(dev, SYS_RES_IOPORT, sc->ndis_io_rid, sc->ndis_res_io); if (sc->ndis_res_mem) bus_release_resource(dev, SYS_RES_MEMORY, sc->ndis_mem_rid, sc->ndis_res_mem); if (sc->ndis_res_altmem) bus_release_resource(dev, SYS_RES_MEMORY, sc->ndis_altmem_rid, sc->ndis_res_altmem); if (ifp != NULL) if_free(ifp); if (sc->ndis_iftype == PCMCIABus) ndis_free_amem(sc); if (sc->ndis_sc) ndis_destroy_dma(sc); if (sc->ndis_txarray) free(sc->ndis_txarray, M_DEVBUF); if (!sc->ndis_80211) ifmedia_removeall(&sc->ifmedia); if (sc->ndis_txpool != NULL) NdisFreePacketPool(sc->ndis_txpool); /* Destroy the PDO for this device. */ if (sc->ndis_iftype == PCIBus) drv = windrv_lookup(0, "PCI Bus"); else if (sc->ndis_iftype == PCMCIABus) drv = windrv_lookup(0, "PCCARD Bus"); else drv = windrv_lookup(0, "USB Bus"); if (drv == NULL) panic("couldn't find driver object"); windrv_destroy_pdo(drv, dev); if (sc->ndis_iftype == PCIBus) bus_dma_tag_destroy(sc->ndis_parent_tag); return (0); } int ndis_suspend(dev) device_t dev; { struct ndis_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->ifp; #ifdef notdef if (NDIS_INITIALIZED(sc)) ndis_stop(sc); #endif return (0); } int ndis_resume(dev) device_t dev; { struct ndis_softc *sc; struct ifnet *ifp; sc = device_get_softc(dev); ifp = sc->ifp; if (NDIS_INITIALIZED(sc)) ndis_init(sc); return (0); } /* * The following bunch of routines are here to support drivers that * use the NdisMEthIndicateReceive()/MiniportTransferData() mechanism. * The NdisMEthIndicateReceive() handler runs at DISPATCH_LEVEL for * serialized miniports, or IRQL <= DISPATCH_LEVEL for deserialized * miniports. */ static void ndis_rxeof_eth(adapter, ctx, addr, hdr, hdrlen, lookahead, lookaheadlen, pktlen) ndis_handle adapter; ndis_handle ctx; char *addr; void *hdr; uint32_t hdrlen; void *lookahead; uint32_t lookaheadlen; uint32_t pktlen; { ndis_miniport_block *block; uint8_t irql = 0; uint32_t status; ndis_buffer *b; ndis_packet *p; struct mbuf *m; ndis_ethpriv *priv; block = adapter; m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) return; /* Save the data provided to us so far. */ m->m_len = lookaheadlen + hdrlen; m->m_pkthdr.len = pktlen + hdrlen; m->m_next = NULL; m_copyback(m, 0, hdrlen, hdr); m_copyback(m, hdrlen, lookaheadlen, lookahead); /* Now create a fake NDIS_PACKET to hold the data */ NdisAllocatePacket(&status, &p, block->nmb_rxpool); if (status != NDIS_STATUS_SUCCESS) { m_freem(m); return; } p->np_m0 = m; b = IoAllocateMdl(m->m_data, m->m_pkthdr.len, FALSE, FALSE, NULL); if (b == NULL) { NdisFreePacket(p); m_freem(m); return; } p->np_private.npp_head = p->np_private.npp_tail = b; p->np_private.npp_totlen = m->m_pkthdr.len; /* Save the packet RX context somewhere. */ priv = (ndis_ethpriv *)&p->np_protocolreserved; priv->nep_ctx = ctx; if (!NDIS_SERIALIZED(block)) KeAcquireSpinLock(&block->nmb_lock, &irql); InsertTailList((&block->nmb_packetlist), (&p->np_list)); if (!NDIS_SERIALIZED(block)) KeReleaseSpinLock(&block->nmb_lock, irql); } /* * NdisMEthIndicateReceiveComplete() handler, runs at DISPATCH_LEVEL * for serialized miniports, or IRQL <= DISPATCH_LEVEL for deserialized * miniports. */ static void ndis_rxeof_done(adapter) ndis_handle adapter; { struct ndis_softc *sc; ndis_miniport_block *block; block = adapter; /* Schedule transfer/RX of queued packets. */ sc = device_get_softc(block->nmb_physdeviceobj->do_devext); KeInsertQueueDpc(&sc->ndis_rxdpc, NULL, NULL); } /* * MiniportTransferData() handler, runs at DISPATCH_LEVEL. */ static void ndis_rxeof_xfr(dpc, adapter, sysarg1, sysarg2) kdpc *dpc; ndis_handle adapter; void *sysarg1; void *sysarg2; { ndis_miniport_block *block; struct ndis_softc *sc; ndis_packet *p; list_entry *l; uint32_t status; ndis_ethpriv *priv; struct ifnet *ifp; struct mbuf *m; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; KeAcquireSpinLockAtDpcLevel(&block->nmb_lock); l = block->nmb_packetlist.nle_flink; while(!IsListEmpty(&block->nmb_packetlist)) { l = RemoveHeadList((&block->nmb_packetlist)); p = CONTAINING_RECORD(l, ndis_packet, np_list); InitializeListHead((&p->np_list)); priv = (ndis_ethpriv *)&p->np_protocolreserved; m = p->np_m0; p->np_softc = sc; p->np_m0 = NULL; KeReleaseSpinLockFromDpcLevel(&block->nmb_lock); status = MSCALL6(sc->ndis_chars->nmc_transferdata_func, p, &p->np_private.npp_totlen, block, priv->nep_ctx, m->m_len, m->m_pkthdr.len - m->m_len); KeAcquireSpinLockAtDpcLevel(&block->nmb_lock); /* * If status is NDIS_STATUS_PENDING, do nothing and * wait for a callback to the ndis_rxeof_xfr_done() * handler. */ m->m_len = m->m_pkthdr.len; m->m_pkthdr.rcvif = ifp; if (status == NDIS_STATUS_SUCCESS) { IoFreeMdl(p->np_private.npp_head); NdisFreePacket(p); KeAcquireSpinLockAtDpcLevel(&sc->ndis_rxlock); mbufq_enqueue(&sc->ndis_rxqueue, m); KeReleaseSpinLockFromDpcLevel(&sc->ndis_rxlock); IoQueueWorkItem(sc->ndis_inputitem, (io_workitem_func)ndis_inputtask_wrap, WORKQUEUE_CRITICAL, sc); } if (status == NDIS_STATUS_FAILURE) m_freem(m); /* Advance to next packet */ l = block->nmb_packetlist.nle_flink; } KeReleaseSpinLockFromDpcLevel(&block->nmb_lock); } /* * NdisMTransferDataComplete() handler, runs at DISPATCH_LEVEL. */ static void ndis_rxeof_xfr_done(adapter, packet, status, len) ndis_handle adapter; ndis_packet *packet; uint32_t status; uint32_t len; { ndis_miniport_block *block; struct ndis_softc *sc; struct ifnet *ifp; struct mbuf *m; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; m = packet->np_m0; IoFreeMdl(packet->np_private.npp_head); NdisFreePacket(packet); if (status != NDIS_STATUS_SUCCESS) { m_freem(m); return; } m->m_len = m->m_pkthdr.len; m->m_pkthdr.rcvif = ifp; KeAcquireSpinLockAtDpcLevel(&sc->ndis_rxlock); mbufq_enqueue(&sc->ndis_rxqueue, m); KeReleaseSpinLockFromDpcLevel(&sc->ndis_rxlock); IoQueueWorkItem(sc->ndis_inputitem, (io_workitem_func)ndis_inputtask_wrap, WORKQUEUE_CRITICAL, sc); } /* * A frame has been uploaded: pass the resulting mbuf chain up to * the higher level protocols. * * When handling received NDIS packets, the 'status' field in the * out-of-band portion of the ndis_packet has special meaning. In the * most common case, the underlying NDIS driver will set this field * to NDIS_STATUS_SUCCESS, which indicates that it's ok for us to * take possession of it. We then change the status field to * NDIS_STATUS_PENDING to tell the driver that we now own the packet, * and that we will return it at some point in the future via the * return packet handler. * * If the driver hands us a packet with a status of NDIS_STATUS_RESOURCES, * this means the driver is running out of packet/buffer resources and * wants to maintain ownership of the packet. In this case, we have to * copy the packet data into local storage and let the driver keep the * packet. */ static void ndis_rxeof(adapter, packets, pktcnt) ndis_handle adapter; ndis_packet **packets; uint32_t pktcnt; { struct ndis_softc *sc; ndis_miniport_block *block; ndis_packet *p; uint32_t s; ndis_tcpip_csum *csum; struct ifnet *ifp; struct mbuf *m0, *m; int i; block = (ndis_miniport_block *)adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; /* * There's a slim chance the driver may indicate some packets * before we're completely ready to handle them. If we detect this, * we need to return them to the miniport and ignore them. */ if (!sc->ndis_running) { for (i = 0; i < pktcnt; i++) { p = packets[i]; if (p->np_oob.npo_status == NDIS_STATUS_SUCCESS) { p->np_refcnt++; (void)ndis_return_packet(NULL ,p, block); } } return; } for (i = 0; i < pktcnt; i++) { p = packets[i]; /* Stash the softc here so ptom can use it. */ p->np_softc = sc; if (ndis_ptom(&m0, p)) { device_printf(sc->ndis_dev, "ptom failed\n"); if (p->np_oob.npo_status == NDIS_STATUS_SUCCESS) (void)ndis_return_packet(NULL, p, block); } else { #ifdef notdef if (p->np_oob.npo_status == NDIS_STATUS_RESOURCES) { m = m_dup(m0, M_NOWAIT); /* * NOTE: we want to destroy the mbuf here, but * we don't actually want to return it to the * driver via the return packet handler. By * bumping np_refcnt, we can prevent the * ndis_return_packet() routine from actually * doing anything. */ p->np_refcnt++; m_freem(m0); if (m == NULL) if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); else m0 = m; } else p->np_oob.npo_status = NDIS_STATUS_PENDING; #endif m = m_dup(m0, M_NOWAIT); if (p->np_oob.npo_status == NDIS_STATUS_RESOURCES) p->np_refcnt++; else p->np_oob.npo_status = NDIS_STATUS_PENDING; m_freem(m0); if (m == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); continue; } m0 = m; m0->m_pkthdr.rcvif = ifp; /* Deal with checksum offload. */ if (ifp->if_capenable & IFCAP_RXCSUM && p->np_ext.npe_info[ndis_tcpipcsum_info] != NULL) { s = (uintptr_t) p->np_ext.npe_info[ndis_tcpipcsum_info]; csum = (ndis_tcpip_csum *)&s; if (csum->u.ntc_rxflags & NDIS_RXCSUM_IP_PASSED) m0->m_pkthdr.csum_flags |= CSUM_IP_CHECKED|CSUM_IP_VALID; if (csum->u.ntc_rxflags & (NDIS_RXCSUM_TCP_PASSED | NDIS_RXCSUM_UDP_PASSED)) { m0->m_pkthdr.csum_flags |= CSUM_DATA_VALID|CSUM_PSEUDO_HDR; m0->m_pkthdr.csum_data = 0xFFFF; } } KeAcquireSpinLockAtDpcLevel(&sc->ndis_rxlock); mbufq_enqueue(&sc->ndis_rxqueue, m0); KeReleaseSpinLockFromDpcLevel(&sc->ndis_rxlock); IoQueueWorkItem(sc->ndis_inputitem, (io_workitem_func)ndis_inputtask_wrap, WORKQUEUE_CRITICAL, sc); } } } /* * This routine is run at PASSIVE_LEVEL. We use this routine to pass * packets into the stack in order to avoid calling (*ifp->if_input)() * with any locks held (at DISPATCH_LEVEL, we'll be holding the * 'dispatch level' per-cpu sleep lock). */ static void ndis_inputtask(device_object *dobj, void *arg) { ndis_miniport_block *block; struct ndis_softc *sc = arg; struct mbuf *m; uint8_t irql; block = dobj->do_devext; KeAcquireSpinLock(&sc->ndis_rxlock, &irql); while ((m = mbufq_dequeue(&sc->ndis_rxqueue)) != NULL) { KeReleaseSpinLock(&sc->ndis_rxlock, irql); if ((sc->ndis_80211 != 0)) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); if (vap != NULL) vap->iv_deliver_data(vap, vap->iv_bss, m); } else { struct ifnet *ifp = sc->ifp; (*ifp->if_input)(ifp, m); } KeAcquireSpinLock(&sc->ndis_rxlock, &irql); } KeReleaseSpinLock(&sc->ndis_rxlock, irql); } /* * A frame was downloaded to the chip. It's safe for us to clean up * the list buffers. */ static void ndis_txeof(adapter, packet, status) ndis_handle adapter; ndis_packet *packet; ndis_status status; { struct ndis_softc *sc; ndis_miniport_block *block; struct ifnet *ifp; int idx; struct mbuf *m; block = (ndis_miniport_block *)adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; m = packet->np_m0; idx = packet->np_txidx; if (sc->ndis_sc) bus_dmamap_unload(sc->ndis_ttag, sc->ndis_tmaps[idx]); ndis_free_packet(packet); m_freem(m); NDIS_LOCK(sc); sc->ndis_txarray[idx] = NULL; sc->ndis_txpending++; if (!sc->ndis_80211) { struct ifnet *ifp = sc->ifp; if (status == NDIS_STATUS_SUCCESS) if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); else if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } sc->ndis_tx_timer = 0; NDIS_UNLOCK(sc); if (!sc->ndis_80211) IoQueueWorkItem(sc->ndis_startitem, (io_workitem_func)ndis_ifstarttask_wrap, WORKQUEUE_CRITICAL, sc); DPRINTF(("%s: ndis_ifstarttask_wrap sc=%p\n", __func__, sc)); } static void ndis_linksts(adapter, status, sbuf, slen) ndis_handle adapter; ndis_status status; void *sbuf; uint32_t slen; { ndis_miniport_block *block; struct ndis_softc *sc; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); sc->ndis_sts = status; /* Event list is all full up, drop this one. */ NDIS_LOCK(sc); if (sc->ndis_evt[sc->ndis_evtpidx].ne_sts) { NDIS_UNLOCK(sc); return; } /* Cache the event. */ if (slen) { sc->ndis_evt[sc->ndis_evtpidx].ne_buf = malloc(slen, M_TEMP, M_NOWAIT); if (sc->ndis_evt[sc->ndis_evtpidx].ne_buf == NULL) { NDIS_UNLOCK(sc); return; } bcopy((char *)sbuf, sc->ndis_evt[sc->ndis_evtpidx].ne_buf, slen); } sc->ndis_evt[sc->ndis_evtpidx].ne_sts = status; sc->ndis_evt[sc->ndis_evtpidx].ne_len = slen; NDIS_EVTINC(sc->ndis_evtpidx); NDIS_UNLOCK(sc); } static void ndis_linksts_done(adapter) ndis_handle adapter; { ndis_miniport_block *block; struct ndis_softc *sc; struct ifnet *ifp; block = adapter; sc = device_get_softc(block->nmb_physdeviceobj->do_devext); ifp = sc->ifp; if (!NDIS_INITIALIZED(sc)) return; switch (sc->ndis_sts) { case NDIS_STATUS_MEDIA_CONNECT: IoQueueWorkItem(sc->ndis_tickitem, (io_workitem_func)ndis_ticktask_wrap, WORKQUEUE_CRITICAL, sc); if (!sc->ndis_80211) IoQueueWorkItem(sc->ndis_startitem, (io_workitem_func)ndis_ifstarttask_wrap, WORKQUEUE_CRITICAL, sc); break; case NDIS_STATUS_MEDIA_DISCONNECT: if (sc->ndis_link) IoQueueWorkItem(sc->ndis_tickitem, (io_workitem_func)ndis_ticktask_wrap, WORKQUEUE_CRITICAL, sc); break; default: break; } } static void ndis_tick(xsc) void *xsc; { struct ndis_softc *sc; sc = xsc; if (sc->ndis_hang_timer && --sc->ndis_hang_timer == 0) { IoQueueWorkItem(sc->ndis_tickitem, (io_workitem_func)ndis_ticktask_wrap, WORKQUEUE_CRITICAL, sc); sc->ndis_hang_timer = sc->ndis_block->nmb_checkforhangsecs; } if (sc->ndis_tx_timer && --sc->ndis_tx_timer == 0) { if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1); device_printf(sc->ndis_dev, "watchdog timeout\n"); IoQueueWorkItem(sc->ndis_resetitem, (io_workitem_func)ndis_resettask_wrap, WORKQUEUE_CRITICAL, sc); if (!sc->ndis_80211) IoQueueWorkItem(sc->ndis_startitem, (io_workitem_func)ndis_ifstarttask_wrap, WORKQUEUE_CRITICAL, sc); } callout_reset(&sc->ndis_stat_callout, hz, ndis_tick, sc); } static void ndis_ticktask(device_object *d, void *xsc) { struct ndis_softc *sc = xsc; ndis_checkforhang_handler hangfunc; uint8_t rval; NDIS_LOCK(sc); if (!NDIS_INITIALIZED(sc)) { NDIS_UNLOCK(sc); return; } NDIS_UNLOCK(sc); hangfunc = sc->ndis_chars->nmc_checkhang_func; if (hangfunc != NULL) { rval = MSCALL1(hangfunc, sc->ndis_block->nmb_miniportadapterctx); if (rval == TRUE) { ndis_reset_nic(sc); return; } } NDIS_LOCK(sc); if (sc->ndis_link == 0 && sc->ndis_sts == NDIS_STATUS_MEDIA_CONNECT) { sc->ndis_link = 1; if (sc->ndis_80211 != 0) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); if (vap != NULL) { NDIS_UNLOCK(sc); ndis_getstate_80211(sc); ieee80211_new_state(vap, IEEE80211_S_RUN, -1); NDIS_LOCK(sc); if_link_state_change(vap->iv_ifp, LINK_STATE_UP); } } else if_link_state_change(sc->ifp, LINK_STATE_UP); } if (sc->ndis_link == 1 && sc->ndis_sts == NDIS_STATUS_MEDIA_DISCONNECT) { sc->ndis_link = 0; if (sc->ndis_80211 != 0) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); if (vap != NULL) { NDIS_UNLOCK(sc); ieee80211_new_state(vap, IEEE80211_S_SCAN, 0); NDIS_LOCK(sc); if_link_state_change(vap->iv_ifp, LINK_STATE_DOWN); } } else if_link_state_change(sc->ifp, LINK_STATE_DOWN); } NDIS_UNLOCK(sc); } static void ndis_map_sclist(arg, segs, nseg, mapsize, error) void *arg; bus_dma_segment_t *segs; int nseg; bus_size_t mapsize; int error; { struct ndis_sc_list *sclist; int i; if (error || arg == NULL) return; sclist = arg; sclist->nsl_frags = nseg; for (i = 0; i < nseg; i++) { sclist->nsl_elements[i].nse_addr.np_quad = segs[i].ds_addr; sclist->nsl_elements[i].nse_len = segs[i].ds_len; } } static int ndis_raw_xmit(struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_bpf_params *params) { /* no support; just discard */ m_freem(m); ieee80211_free_node(ni); return (0); } static void ndis_update_mcast(struct ieee80211com *ic) { struct ndis_softc *sc = ic->ic_softc; ndis_setmulti(sc); } static void ndis_update_promisc(struct ieee80211com *ic) { /* not supported */ } static void ndis_ifstarttask(device_object *d, void *arg) { struct ndis_softc *sc = arg; DPRINTF(("%s: sc=%p, ifp=%p\n", __func__, sc, sc->ifp)); if (sc->ndis_80211) return; struct ifnet *ifp = sc->ifp; if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) ndis_ifstart(ifp); } /* * Main transmit routine. To make NDIS drivers happy, we need to * transform mbuf chains into NDIS packets and feed them to the * send packet routines. Most drivers allow you to send several * packets at once (up to the maxpkts limit). Unfortunately, rather * that accepting them in the form of a linked list, they expect * a contiguous array of pointers to packets. * * For those drivers which use the NDIS scatter/gather DMA mechanism, * we need to perform busdma work here. Those that use map registers * will do the mapping themselves on a buffer by buffer basis. */ static void ndis_ifstart(struct ifnet *ifp) { struct ndis_softc *sc; struct mbuf *m = NULL; ndis_packet **p0 = NULL, *p = NULL; ndis_tcpip_csum *csum; int pcnt = 0, status; sc = ifp->if_softc; NDIS_LOCK(sc); if (!sc->ndis_link || ifp->if_drv_flags & IFF_DRV_OACTIVE) { NDIS_UNLOCK(sc); return; } p0 = &sc->ndis_txarray[sc->ndis_txidx]; while(sc->ndis_txpending) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m); if (m == NULL) break; NdisAllocatePacket(&status, &sc->ndis_txarray[sc->ndis_txidx], sc->ndis_txpool); if (status != NDIS_STATUS_SUCCESS) break; if (ndis_mtop(m, &sc->ndis_txarray[sc->ndis_txidx])) { IFQ_DRV_PREPEND(&ifp->if_snd, m); NDIS_UNLOCK(sc); return; } /* * Save pointer to original mbuf * so we can free it later. */ p = sc->ndis_txarray[sc->ndis_txidx]; p->np_txidx = sc->ndis_txidx; p->np_m0 = m; p->np_oob.npo_status = NDIS_STATUS_PENDING; /* * Do scatter/gather processing, if driver requested it. */ if (sc->ndis_sc) { bus_dmamap_load_mbuf(sc->ndis_ttag, sc->ndis_tmaps[sc->ndis_txidx], m, ndis_map_sclist, &p->np_sclist, BUS_DMA_NOWAIT); bus_dmamap_sync(sc->ndis_ttag, sc->ndis_tmaps[sc->ndis_txidx], BUS_DMASYNC_PREREAD); p->np_ext.npe_info[ndis_sclist_info] = &p->np_sclist; } /* Handle checksum offload. */ if (ifp->if_capenable & IFCAP_TXCSUM && m->m_pkthdr.csum_flags) { csum = (ndis_tcpip_csum *) &p->np_ext.npe_info[ndis_tcpipcsum_info]; csum->u.ntc_txflags = NDIS_TXCSUM_DO_IPV4; if (m->m_pkthdr.csum_flags & CSUM_IP) csum->u.ntc_txflags |= NDIS_TXCSUM_DO_IP; if (m->m_pkthdr.csum_flags & CSUM_TCP) csum->u.ntc_txflags |= NDIS_TXCSUM_DO_TCP; if (m->m_pkthdr.csum_flags & CSUM_UDP) csum->u.ntc_txflags |= NDIS_TXCSUM_DO_UDP; p->np_private.npp_flags = NDIS_PROTOCOL_ID_TCP_IP; } NDIS_INC(sc); sc->ndis_txpending--; pcnt++; /* * If there's a BPF listener, bounce a copy of this frame * to him. */ if (!sc->ndis_80211) /* XXX handle 80211 */ BPF_MTAP(ifp, m); /* * The array that p0 points to must appear contiguous, * so we must not wrap past the end of sc->ndis_txarray[]. * If it looks like we're about to wrap, break out here * so the this batch of packets can be transmitted, then * wait for txeof to ask us to send the rest. */ if (sc->ndis_txidx == 0) break; } if (pcnt == 0) { NDIS_UNLOCK(sc); return; } if (sc->ndis_txpending == 0) ifp->if_drv_flags |= IFF_DRV_OACTIVE; /* * Set a timeout in case the chip goes out to lunch. */ sc->ndis_tx_timer = 5; NDIS_UNLOCK(sc); /* * According to NDIS documentation, if a driver exports * a MiniportSendPackets() routine, we prefer that over * a MiniportSend() routine (which sends just a single * packet). */ if (sc->ndis_chars->nmc_sendmulti_func != NULL) ndis_send_packets(sc, p0, pcnt); else ndis_send_packet(sc, p); return; } static int ndis_80211transmit(struct ieee80211com *ic, struct mbuf *m) { struct ndis_softc *sc = ic->ic_softc; ndis_packet **p0 = NULL, *p = NULL; int status; NDIS_LOCK(sc); if (!sc->ndis_link || !sc->ndis_running) { NDIS_UNLOCK(sc); return (ENXIO); } if (sc->ndis_txpending == 0) { NDIS_UNLOCK(sc); return (ENOBUFS); } p0 = &sc->ndis_txarray[sc->ndis_txidx]; NdisAllocatePacket(&status, &sc->ndis_txarray[sc->ndis_txidx], sc->ndis_txpool); if (status != NDIS_STATUS_SUCCESS) { NDIS_UNLOCK(sc); return (ENOBUFS); } if (ndis_mtop(m, &sc->ndis_txarray[sc->ndis_txidx])) { NDIS_UNLOCK(sc); return (ENOBUFS); } /* * Save pointer to original mbuf * so we can free it later. */ p = sc->ndis_txarray[sc->ndis_txidx]; p->np_txidx = sc->ndis_txidx; p->np_m0 = m; p->np_oob.npo_status = NDIS_STATUS_PENDING; /* * Do scatter/gather processing, if driver requested it. */ if (sc->ndis_sc) { bus_dmamap_load_mbuf(sc->ndis_ttag, sc->ndis_tmaps[sc->ndis_txidx], m, ndis_map_sclist, &p->np_sclist, BUS_DMA_NOWAIT); bus_dmamap_sync(sc->ndis_ttag, sc->ndis_tmaps[sc->ndis_txidx], BUS_DMASYNC_PREREAD); p->np_ext.npe_info[ndis_sclist_info] = &p->np_sclist; } NDIS_INC(sc); sc->ndis_txpending--; /* * Set a timeout in case the chip goes out to lunch. */ sc->ndis_tx_timer = 5; NDIS_UNLOCK(sc); /* * According to NDIS documentation, if a driver exports * a MiniportSendPackets() routine, we prefer that over * a MiniportSend() routine (which sends just a single * packet). */ if (sc->ndis_chars->nmc_sendmulti_func != NULL) ndis_send_packets(sc, p0, 1); else ndis_send_packet(sc, p); return (0); } static void ndis_80211parent(struct ieee80211com *ic) { struct ndis_softc *sc = ic->ic_softc; /*NDIS_LOCK(sc);*/ if (ic->ic_nrunning > 0) { if (!sc->ndis_running) ndis_init(sc); } else if (sc->ndis_running) ndis_stop(sc); /*NDIS_UNLOCK(sc);*/ } static void ndis_init(void *xsc) { struct ndis_softc *sc = xsc; int i, len, error; /* * Avoid reintializing the link unnecessarily. * This should be dealt with in a better way by * fixing the upper layer modules so they don't * call ifp->if_init() quite as often. */ if (sc->ndis_link) return; /* * Cancel pending I/O and free all RX/TX buffers. */ ndis_stop(sc); if (!(sc->ndis_iftype == PNPBus && ndisusb_halt == 0)) { error = ndis_init_nic(sc); if (error != 0) { device_printf(sc->ndis_dev, "failed to initialize the device: %d\n", error); return; } } /* Program the packet filter */ sc->ndis_filter = NDIS_PACKET_TYPE_DIRECTED | NDIS_PACKET_TYPE_BROADCAST; if (sc->ndis_80211) { struct ieee80211com *ic = &sc->ndis_ic; if (ic->ic_promisc > 0) sc->ndis_filter |= NDIS_PACKET_TYPE_PROMISCUOUS; } else { struct ifnet *ifp = sc->ifp; if (ifp->if_flags & IFF_PROMISC) sc->ndis_filter |= NDIS_PACKET_TYPE_PROMISCUOUS; } len = sizeof(sc->ndis_filter); error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER, &sc->ndis_filter, &len); if (error) device_printf(sc->ndis_dev, "set filter failed: %d\n", error); /* * Set lookahead. */ if (sc->ndis_80211) i = ETHERMTU; else i = sc->ifp->if_mtu; len = sizeof(i); ndis_set_info(sc, OID_GEN_CURRENT_LOOKAHEAD, &i, &len); /* * Program the multicast filter, if necessary. */ ndis_setmulti(sc); /* Setup task offload. */ ndis_set_offload(sc); NDIS_LOCK(sc); sc->ndis_txidx = 0; sc->ndis_txpending = sc->ndis_maxpkts; sc->ndis_link = 0; if (!sc->ndis_80211) { if_link_state_change(sc->ifp, LINK_STATE_UNKNOWN); sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } sc->ndis_tx_timer = 0; /* * Some drivers don't set this value. The NDIS spec says * the default checkforhang timeout is "approximately 2 * seconds." We use 3 seconds, because it seems for some * drivers, exactly 2 seconds is too fast. */ if (sc->ndis_block->nmb_checkforhangsecs == 0) sc->ndis_block->nmb_checkforhangsecs = 3; sc->ndis_hang_timer = sc->ndis_block->nmb_checkforhangsecs; callout_reset(&sc->ndis_stat_callout, hz, ndis_tick, sc); sc->ndis_running = 1; NDIS_UNLOCK(sc); /* XXX force handling */ if (sc->ndis_80211) ieee80211_start_all(&sc->ndis_ic); /* start all vap's */ } /* * Set media options. */ static int ndis_ifmedia_upd(ifp) struct ifnet *ifp; { struct ndis_softc *sc; sc = ifp->if_softc; if (NDIS_INITIALIZED(sc)) ndis_init(sc); return (0); } /* * Report current media status. */ static void ndis_ifmedia_sts(ifp, ifmr) struct ifnet *ifp; struct ifmediareq *ifmr; { struct ndis_softc *sc; uint32_t media_info; ndis_media_state linkstate; int len; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; sc = ifp->if_softc; if (!NDIS_INITIALIZED(sc)) return; len = sizeof(linkstate); ndis_get_info(sc, OID_GEN_MEDIA_CONNECT_STATUS, (void *)&linkstate, &len); len = sizeof(media_info); ndis_get_info(sc, OID_GEN_LINK_SPEED, (void *)&media_info, &len); if (linkstate == nmc_connected) ifmr->ifm_status |= IFM_ACTIVE; switch (media_info) { case 100000: ifmr->ifm_active |= IFM_10_T; break; case 1000000: ifmr->ifm_active |= IFM_100_TX; break; case 10000000: ifmr->ifm_active |= IFM_1000_T; break; default: device_printf(sc->ndis_dev, "unknown speed: %d\n", media_info); break; } } static int ndis_set_cipher(struct ndis_softc *sc, int cipher) { struct ieee80211com *ic = &sc->ndis_ic; int rval = 0, len; uint32_t arg, save; len = sizeof(arg); if (cipher == WPA_CSE_WEP40 || cipher == WPA_CSE_WEP104) { if (!(ic->ic_cryptocaps & IEEE80211_CRYPTO_WEP)) return (ENOTSUP); arg = NDIS_80211_WEPSTAT_ENC1ENABLED; } if (cipher == WPA_CSE_TKIP) { if (!(ic->ic_cryptocaps & IEEE80211_CRYPTO_TKIP)) return (ENOTSUP); arg = NDIS_80211_WEPSTAT_ENC2ENABLED; } if (cipher == WPA_CSE_CCMP) { if (!(ic->ic_cryptocaps & IEEE80211_CRYPTO_AES_CCM)) return (ENOTSUP); arg = NDIS_80211_WEPSTAT_ENC3ENABLED; } DPRINTF(("Setting cipher to %d\n", arg)); save = arg; rval = ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &len); if (rval) return (rval); /* Check that the cipher was set correctly. */ len = sizeof(save); rval = ndis_get_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &len); if (rval != 0 || arg != save) return (ENODEV); return (0); } /* * WPA is hairy to set up. Do the work in a separate routine * so we don't clutter the setstate function too much. * Important yet undocumented fact: first we have to set the * authentication mode, _then_ we enable the ciphers. If one * of the WPA authentication modes isn't enabled, the driver * might not permit the TKIP or AES ciphers to be selected. */ static int ndis_set_wpa(sc, ie, ielen) struct ndis_softc *sc; void *ie; int ielen; { struct ieee80211_ie_wpa *w; struct ndis_ie *n; char *pos; uint32_t arg; int i; /* * Apparently, the only way for us to know what ciphers * and key management/authentication mode to use is for * us to inspect the optional information element (IE) * stored in the 802.11 state machine. This IE should be * supplied by the WPA supplicant. */ w = (struct ieee80211_ie_wpa *)ie; /* Check for the right kind of IE. */ if (w->wpa_id != IEEE80211_ELEMID_VENDOR) { DPRINTF(("Incorrect IE type %d\n", w->wpa_id)); return (EINVAL); } /* Skip over the ucast cipher OIDs. */ pos = (char *)&w->wpa_uciphers[0]; pos += w->wpa_uciphercnt * sizeof(struct ndis_ie); /* Skip over the authmode count. */ pos += sizeof(u_int16_t); /* * Check for the authentication modes. I'm * pretty sure there's only supposed to be one. */ n = (struct ndis_ie *)pos; if (n->ni_val == WPA_ASE_NONE) arg = NDIS_80211_AUTHMODE_WPANONE; if (n->ni_val == WPA_ASE_8021X_UNSPEC) arg = NDIS_80211_AUTHMODE_WPA; if (n->ni_val == WPA_ASE_8021X_PSK) arg = NDIS_80211_AUTHMODE_WPAPSK; DPRINTF(("Setting WPA auth mode to %d\n", arg)); i = sizeof(arg); if (ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i)) return (ENOTSUP); i = sizeof(arg); ndis_get_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &i); /* Now configure the desired ciphers. */ /* First, set up the multicast group cipher. */ n = (struct ndis_ie *)&w->wpa_mcipher[0]; if (ndis_set_cipher(sc, n->ni_val)) return (ENOTSUP); /* Now start looking around for the unicast ciphers. */ pos = (char *)&w->wpa_uciphers[0]; n = (struct ndis_ie *)pos; for (i = 0; i < w->wpa_uciphercnt; i++) { if (ndis_set_cipher(sc, n->ni_val)) return (ENOTSUP); n++; } return (0); } static void ndis_media_status(struct ifnet *ifp, struct ifmediareq *imr) { struct ieee80211vap *vap = ifp->if_softc; struct ndis_softc *sc = vap->iv_ic->ic_softc; uint32_t txrate; int len; if (!NDIS_INITIALIZED(sc)) return; len = sizeof(txrate); if (ndis_get_info(sc, OID_GEN_LINK_SPEED, &txrate, &len) == 0) vap->iv_bss->ni_txrate = txrate / 5000; ieee80211_media_status(ifp, imr); } static void ndis_setstate_80211(struct ndis_softc *sc) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); ndis_80211_macaddr bssid; ndis_80211_config config; int rval = 0, len; uint32_t arg; if (!NDIS_INITIALIZED(sc)) { DPRINTF(("%s: NDIS not initialized\n", __func__)); return; } /* Disassociate and turn off radio. */ len = sizeof(arg); arg = 1; ndis_set_info(sc, OID_802_11_DISASSOCIATE, &arg, &len); /* Set network infrastructure mode. */ len = sizeof(arg); if (ic->ic_opmode == IEEE80211_M_IBSS) arg = NDIS_80211_NET_INFRA_IBSS; else arg = NDIS_80211_NET_INFRA_BSS; rval = ndis_set_info(sc, OID_802_11_INFRASTRUCTURE_MODE, &arg, &len); if (rval) device_printf (sc->ndis_dev, "set infra failed: %d\n", rval); /* Set power management */ len = sizeof(arg); if (vap->iv_flags & IEEE80211_F_PMGTON) arg = NDIS_80211_POWERMODE_FAST_PSP; else arg = NDIS_80211_POWERMODE_CAM; ndis_set_info(sc, OID_802_11_POWER_MODE, &arg, &len); /* Set TX power */ if ((ic->ic_caps & IEEE80211_C_TXPMGT) && ic->ic_txpowlimit < nitems(dBm2mW)) { arg = dBm2mW[ic->ic_txpowlimit]; len = sizeof(arg); ndis_set_info(sc, OID_802_11_TX_POWER_LEVEL, &arg, &len); } /* * Default encryption mode to off, authentication * to open and privacy to 'accept everything.' */ len = sizeof(arg); arg = NDIS_80211_WEPSTAT_DISABLED; ndis_set_info(sc, OID_802_11_ENCRYPTION_STATUS, &arg, &len); len = sizeof(arg); arg = NDIS_80211_AUTHMODE_OPEN; ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &len); /* * Note that OID_802_11_PRIVACY_FILTER is optional: * not all drivers implement it. */ len = sizeof(arg); arg = NDIS_80211_PRIVFILT_8021XWEP; ndis_set_info(sc, OID_802_11_PRIVACY_FILTER, &arg, &len); len = sizeof(config); bzero((char *)&config, len); config.nc_length = len; config.nc_fhconfig.ncf_length = sizeof(ndis_80211_config_fh); rval = ndis_get_info(sc, OID_802_11_CONFIGURATION, &config, &len); /* * Some drivers expect us to initialize these values, so * provide some defaults. */ if (config.nc_beaconperiod == 0) config.nc_beaconperiod = 100; if (config.nc_atimwin == 0) config.nc_atimwin = 100; if (config.nc_fhconfig.ncf_dwelltime == 0) config.nc_fhconfig.ncf_dwelltime = 200; if (rval == 0 && ic->ic_bsschan != IEEE80211_CHAN_ANYC) { int chan, chanflag; chan = ieee80211_chan2ieee(ic, ic->ic_bsschan); chanflag = config.nc_dsconfig > 2500000 ? IEEE80211_CHAN_2GHZ : IEEE80211_CHAN_5GHZ; if (chan != ieee80211_mhz2ieee(config.nc_dsconfig / 1000, 0)) { config.nc_dsconfig = ic->ic_bsschan->ic_freq * 1000; len = sizeof(config); config.nc_length = len; config.nc_fhconfig.ncf_length = sizeof(ndis_80211_config_fh); DPRINTF(("Setting channel to %ukHz\n", config.nc_dsconfig)); rval = ndis_set_info(sc, OID_802_11_CONFIGURATION, &config, &len); if (rval) device_printf(sc->ndis_dev, "couldn't change " "DS config to %ukHz: %d\n", config.nc_dsconfig, rval); } } else if (rval) device_printf(sc->ndis_dev, "couldn't retrieve " "channel info: %d\n", rval); /* Set the BSSID to our value so the driver doesn't associate */ len = IEEE80211_ADDR_LEN; bcopy(vap->iv_myaddr, bssid, len); DPRINTF(("Setting BSSID to %6D\n", (uint8_t *)&bssid, ":")); rval = ndis_set_info(sc, OID_802_11_BSSID, &bssid, &len); if (rval) device_printf(sc->ndis_dev, "setting BSSID failed: %d\n", rval); } static void ndis_auth_and_assoc(struct ndis_softc *sc, struct ieee80211vap *vap) { struct ieee80211_node *ni = vap->iv_bss; ndis_80211_ssid ssid; ndis_80211_macaddr bssid; ndis_80211_wep wep; int i, rval = 0, len, error; uint32_t arg; if (!NDIS_INITIALIZED(sc)) { DPRINTF(("%s: NDIS not initialized\n", __func__)); return; } /* Initial setup */ ndis_setstate_80211(sc); /* Set network infrastructure mode. */ len = sizeof(arg); if (vap->iv_opmode == IEEE80211_M_IBSS) arg = NDIS_80211_NET_INFRA_IBSS; else arg = NDIS_80211_NET_INFRA_BSS; rval = ndis_set_info(sc, OID_802_11_INFRASTRUCTURE_MODE, &arg, &len); if (rval) device_printf (sc->ndis_dev, "set infra failed: %d\n", rval); /* Set RTS threshold */ len = sizeof(arg); arg = vap->iv_rtsthreshold; ndis_set_info(sc, OID_802_11_RTS_THRESHOLD, &arg, &len); /* Set fragmentation threshold */ len = sizeof(arg); arg = vap->iv_fragthreshold; ndis_set_info(sc, OID_802_11_FRAGMENTATION_THRESHOLD, &arg, &len); /* Set WEP */ if (vap->iv_flags & IEEE80211_F_PRIVACY && !(vap->iv_flags & IEEE80211_F_WPA)) { int keys_set = 0; if (ni->ni_authmode == IEEE80211_AUTH_SHARED) { len = sizeof(arg); arg = NDIS_80211_AUTHMODE_SHARED; DPRINTF(("Setting shared auth\n")); ndis_set_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &len); } for (i = 0; i < IEEE80211_WEP_NKID; i++) { if (vap->iv_nw_keys[i].wk_keylen) { if (vap->iv_nw_keys[i].wk_cipher->ic_cipher != IEEE80211_CIPHER_WEP) continue; bzero((char *)&wep, sizeof(wep)); wep.nw_keylen = vap->iv_nw_keys[i].wk_keylen; /* * 5, 13 and 16 are the only valid * key lengths. Anything in between * will be zero padded out to the * next highest boundary. */ if (vap->iv_nw_keys[i].wk_keylen < 5) wep.nw_keylen = 5; else if (vap->iv_nw_keys[i].wk_keylen > 5 && vap->iv_nw_keys[i].wk_keylen < 13) wep.nw_keylen = 13; else if (vap->iv_nw_keys[i].wk_keylen > 13 && vap->iv_nw_keys[i].wk_keylen < 16) wep.nw_keylen = 16; wep.nw_keyidx = i; wep.nw_length = (sizeof(uint32_t) * 3) + wep.nw_keylen; if (i == vap->iv_def_txkey) wep.nw_keyidx |= NDIS_80211_WEPKEY_TX; bcopy(vap->iv_nw_keys[i].wk_key, wep.nw_keydata, wep.nw_length); len = sizeof(wep); DPRINTF(("Setting WEP key %d\n", i)); rval = ndis_set_info(sc, OID_802_11_ADD_WEP, &wep, &len); if (rval) device_printf(sc->ndis_dev, "set wepkey failed: %d\n", rval); keys_set++; } } if (keys_set) { DPRINTF(("Setting WEP on\n")); arg = NDIS_80211_WEPSTAT_ENABLED; len = sizeof(arg); rval = ndis_set_info(sc, OID_802_11_WEP_STATUS, &arg, &len); if (rval) device_printf(sc->ndis_dev, "enable WEP failed: %d\n", rval); if (vap->iv_flags & IEEE80211_F_DROPUNENC) arg = NDIS_80211_PRIVFILT_8021XWEP; else arg = NDIS_80211_PRIVFILT_ACCEPTALL; len = sizeof(arg); ndis_set_info(sc, OID_802_11_PRIVACY_FILTER, &arg, &len); } } /* Set up WPA. */ if ((vap->iv_flags & IEEE80211_F_WPA) && vap->iv_appie_assocreq != NULL) { struct ieee80211_appie *ie = vap->iv_appie_assocreq; error = ndis_set_wpa(sc, ie->ie_data, ie->ie_len); if (error != 0) device_printf(sc->ndis_dev, "WPA setup failed\n"); } #ifdef notyet /* Set network type. */ arg = 0; switch (vap->iv_curmode) { case IEEE80211_MODE_11A: arg = NDIS_80211_NETTYPE_11OFDM5; break; case IEEE80211_MODE_11B: arg = NDIS_80211_NETTYPE_11DS; break; case IEEE80211_MODE_11G: arg = NDIS_80211_NETTYPE_11OFDM24; break; default: device_printf(sc->ndis_dev, "unknown mode: %d\n", vap->iv_curmode); } if (arg) { DPRINTF(("Setting network type to %d\n", arg)); len = sizeof(arg); rval = ndis_set_info(sc, OID_802_11_NETWORK_TYPE_IN_USE, &arg, &len); if (rval) device_printf(sc->ndis_dev, "set nettype failed: %d\n", rval); } #endif /* * If the user selected a specific BSSID, try * to use that one. This is useful in the case where * there are several APs in range with the same network * name. To delete the BSSID, we use the broadcast * address as the BSSID. * Note that some drivers seem to allow setting a BSSID * in ad-hoc mode, which has the effect of forcing the * NIC to create an ad-hoc cell with a specific BSSID, * instead of a randomly chosen one. However, the net80211 * code makes the assumtion that the BSSID setting is invalid * when you're in ad-hoc mode, so we don't allow that here. */ len = IEEE80211_ADDR_LEN; if (vap->iv_flags & IEEE80211_F_DESBSSID && vap->iv_opmode != IEEE80211_M_IBSS) bcopy(ni->ni_bssid, bssid, len); else bcopy(ieee80211broadcastaddr, bssid, len); DPRINTF(("Setting BSSID to %6D\n", (uint8_t *)&bssid, ":")); rval = ndis_set_info(sc, OID_802_11_BSSID, &bssid, &len); if (rval) device_printf(sc->ndis_dev, "setting BSSID failed: %d\n", rval); /* Set SSID -- always do this last. */ #ifdef NDIS_DEBUG if (ndis_debug > 0) { printf("Setting ESSID to "); ieee80211_print_essid(ni->ni_essid, ni->ni_esslen); printf("\n"); } #endif len = sizeof(ssid); bzero((char *)&ssid, len); ssid.ns_ssidlen = ni->ni_esslen; if (ssid.ns_ssidlen == 0) { ssid.ns_ssidlen = 1; } else bcopy(ni->ni_essid, ssid.ns_ssid, ssid.ns_ssidlen); rval = ndis_set_info(sc, OID_802_11_SSID, &ssid, &len); if (rval) device_printf (sc->ndis_dev, "set ssid failed: %d\n", rval); return; } static int ndis_get_bssid_list(sc, bl) struct ndis_softc *sc; ndis_80211_bssid_list_ex **bl; { int len, error; len = sizeof(uint32_t) + (sizeof(ndis_wlan_bssid_ex) * 16); *bl = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO); if (*bl == NULL) return (ENOMEM); error = ndis_get_info(sc, OID_802_11_BSSID_LIST, *bl, &len); if (error == ENOSPC) { free(*bl, M_DEVBUF); *bl = malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO); if (*bl == NULL) return (ENOMEM); error = ndis_get_info(sc, OID_802_11_BSSID_LIST, *bl, &len); } if (error) { DPRINTF(("%s: failed to read\n", __func__)); free(*bl, M_DEVBUF); return (error); } return (0); } static int ndis_get_assoc(struct ndis_softc *sc, ndis_wlan_bssid_ex **assoc) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap; struct ieee80211_node *ni; ndis_80211_bssid_list_ex *bl; ndis_wlan_bssid_ex *bs; ndis_80211_macaddr bssid; int i, len, error; if (!sc->ndis_link) return (ENOENT); len = sizeof(bssid); error = ndis_get_info(sc, OID_802_11_BSSID, &bssid, &len); if (error) { device_printf(sc->ndis_dev, "failed to get bssid\n"); return (ENOENT); } vap = TAILQ_FIRST(&ic->ic_vaps); ni = vap->iv_bss; error = ndis_get_bssid_list(sc, &bl); if (error) return (error); bs = (ndis_wlan_bssid_ex *)&bl->nblx_bssid[0]; for (i = 0; i < bl->nblx_items; i++) { if (bcmp(bs->nwbx_macaddr, bssid, sizeof(bssid)) == 0) { *assoc = malloc(bs->nwbx_len, M_TEMP, M_NOWAIT); if (*assoc == NULL) { free(bl, M_TEMP); return (ENOMEM); } bcopy((char *)bs, (char *)*assoc, bs->nwbx_len); free(bl, M_TEMP); if (ic->ic_opmode == IEEE80211_M_STA) ni->ni_associd = 1 | 0xc000; /* fake associd */ return (0); } bs = (ndis_wlan_bssid_ex *)((char *)bs + bs->nwbx_len); } free(bl, M_TEMP); return (ENOENT); } static void ndis_getstate_80211(struct ndis_softc *sc) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); struct ieee80211_node *ni = vap->iv_bss; ndis_wlan_bssid_ex *bs; int rval, len, i = 0; int chanflag; uint32_t arg; if (!NDIS_INITIALIZED(sc)) return; if ((rval = ndis_get_assoc(sc, &bs)) != 0) return; /* We're associated, retrieve info on the current bssid. */ ic->ic_curmode = ndis_nettype_mode(bs->nwbx_nettype); chanflag = ndis_nettype_chan(bs->nwbx_nettype); IEEE80211_ADDR_COPY(ni->ni_bssid, bs->nwbx_macaddr); /* Get SSID from current association info. */ bcopy(bs->nwbx_ssid.ns_ssid, ni->ni_essid, bs->nwbx_ssid.ns_ssidlen); ni->ni_esslen = bs->nwbx_ssid.ns_ssidlen; if (ic->ic_caps & IEEE80211_C_PMGT) { len = sizeof(arg); rval = ndis_get_info(sc, OID_802_11_POWER_MODE, &arg, &len); if (rval) device_printf(sc->ndis_dev, "get power mode failed: %d\n", rval); if (arg == NDIS_80211_POWERMODE_CAM) vap->iv_flags &= ~IEEE80211_F_PMGTON; else vap->iv_flags |= IEEE80211_F_PMGTON; } /* Get TX power */ if (ic->ic_caps & IEEE80211_C_TXPMGT) { len = sizeof(arg); ndis_get_info(sc, OID_802_11_TX_POWER_LEVEL, &arg, &len); for (i = 0; i < nitems(dBm2mW); i++) if (dBm2mW[i] >= arg) break; ic->ic_txpowlimit = i; } /* * Use the current association information to reflect * what channel we're on. */ ic->ic_curchan = ieee80211_find_channel(ic, bs->nwbx_config.nc_dsconfig / 1000, chanflag); if (ic->ic_curchan == NULL) ic->ic_curchan = &ic->ic_channels[0]; ni->ni_chan = ic->ic_curchan; ic->ic_bsschan = ic->ic_curchan; free(bs, M_TEMP); /* * Determine current authentication mode. */ len = sizeof(arg); rval = ndis_get_info(sc, OID_802_11_AUTHENTICATION_MODE, &arg, &len); if (rval) device_printf(sc->ndis_dev, "get authmode status failed: %d\n", rval); else { vap->iv_flags &= ~IEEE80211_F_WPA; switch (arg) { case NDIS_80211_AUTHMODE_OPEN: ni->ni_authmode = IEEE80211_AUTH_OPEN; break; case NDIS_80211_AUTHMODE_SHARED: ni->ni_authmode = IEEE80211_AUTH_SHARED; break; case NDIS_80211_AUTHMODE_AUTO: ni->ni_authmode = IEEE80211_AUTH_AUTO; break; case NDIS_80211_AUTHMODE_WPA: case NDIS_80211_AUTHMODE_WPAPSK: case NDIS_80211_AUTHMODE_WPANONE: ni->ni_authmode = IEEE80211_AUTH_WPA; vap->iv_flags |= IEEE80211_F_WPA1; break; case NDIS_80211_AUTHMODE_WPA2: case NDIS_80211_AUTHMODE_WPA2PSK: ni->ni_authmode = IEEE80211_AUTH_WPA; vap->iv_flags |= IEEE80211_F_WPA2; break; default: ni->ni_authmode = IEEE80211_AUTH_NONE; break; } } len = sizeof(arg); rval = ndis_get_info(sc, OID_802_11_WEP_STATUS, &arg, &len); if (rval) device_printf(sc->ndis_dev, "get wep status failed: %d\n", rval); if (arg == NDIS_80211_WEPSTAT_ENABLED) vap->iv_flags |= IEEE80211_F_PRIVACY|IEEE80211_F_DROPUNENC; else vap->iv_flags &= ~(IEEE80211_F_PRIVACY|IEEE80211_F_DROPUNENC); } static int ndis_ifioctl(ifp, command, data) struct ifnet *ifp; u_long command; caddr_t data; { struct ndis_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; int i, error = 0; /*NDIS_LOCK(sc);*/ switch (command) { case SIOCSIFFLAGS: if (ifp->if_flags & IFF_UP) { if (sc->ndis_running && ifp->if_flags & IFF_PROMISC && !(sc->ndis_if_flags & IFF_PROMISC)) { sc->ndis_filter |= NDIS_PACKET_TYPE_PROMISCUOUS; i = sizeof(sc->ndis_filter); error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER, &sc->ndis_filter, &i); } else if (sc->ndis_running && !(ifp->if_flags & IFF_PROMISC) && sc->ndis_if_flags & IFF_PROMISC) { sc->ndis_filter &= ~NDIS_PACKET_TYPE_PROMISCUOUS; i = sizeof(sc->ndis_filter); error = ndis_set_info(sc, OID_GEN_CURRENT_PACKET_FILTER, &sc->ndis_filter, &i); } else ndis_init(sc); } else { if (sc->ndis_running) ndis_stop(sc); } sc->ndis_if_flags = ifp->if_flags; error = 0; break; case SIOCADDMULTI: case SIOCDELMULTI: ndis_setmulti(sc); error = 0; break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->ifmedia, command); break; case SIOCSIFCAP: ifp->if_capenable = ifr->ifr_reqcap; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist = sc->ndis_hwassist; else ifp->if_hwassist = 0; ndis_set_offload(sc); break; default: error = ether_ioctl(ifp, command, data); break; } /*NDIS_UNLOCK(sc);*/ return(error); } static int ndis_80211ioctl(struct ieee80211com *ic, u_long cmd, void *data) { struct ndis_softc *sc = ic->ic_softc; struct ifreq *ifr = data; struct ndis_oid_data oid; struct ndis_evt evt; void *oidbuf = NULL; int error = 0; if ((error = priv_check(curthread, PRIV_DRIVER)) != 0) return (error); switch (cmd) { case SIOCGDRVSPEC: case SIOCSDRVSPEC: - error = copyin(ifr->ifr_data, &oid, sizeof(oid)); + error = copyin(ifr_data_get_ptr(ifr), &oid, sizeof(oid)); if (error) break; oidbuf = malloc(oid.len, M_TEMP, M_WAITOK | M_ZERO); - error = copyin(ifr->ifr_data + sizeof(oid), oidbuf, oid.len); + error = copyin((caddr_t)ifr_data_get_ptr(ifr) + sizeof(oid), + oidbuf, oid.len); } if (error) { free(oidbuf, M_TEMP); return (error); } switch (cmd) { case SIOCGDRVSPEC: error = ndis_get_info(sc, oid.oid, oidbuf, &oid.len); break; case SIOCSDRVSPEC: error = ndis_set_info(sc, oid.oid, oidbuf, &oid.len); break; case SIOCGPRIVATE_0: NDIS_LOCK(sc); if (sc->ndis_evt[sc->ndis_evtcidx].ne_sts == 0) { error = ENOENT; NDIS_UNLOCK(sc); break; } - error = copyin(ifr->ifr_data, &evt, sizeof(evt)); + error = copyin(ifr_data_get_ptr(ifr), &evt, sizeof(evt)); if (error) { NDIS_UNLOCK(sc); break; } if (evt.ne_len < sc->ndis_evt[sc->ndis_evtcidx].ne_len) { error = ENOSPC; NDIS_UNLOCK(sc); break; } error = copyout(&sc->ndis_evt[sc->ndis_evtcidx], - ifr->ifr_data, sizeof(uint32_t) * 2); + ifr_data_get_ptr(ifr), sizeof(uint32_t) * 2); if (error) { NDIS_UNLOCK(sc); break; } if (sc->ndis_evt[sc->ndis_evtcidx].ne_len) { error = copyout(sc->ndis_evt[sc->ndis_evtcidx].ne_buf, - ifr->ifr_data + (sizeof(uint32_t) * 2), + (caddr_t)ifr_data_get_ptr(ifr) + + (sizeof(uint32_t) * 2), sc->ndis_evt[sc->ndis_evtcidx].ne_len); if (error) { NDIS_UNLOCK(sc); break; } free(sc->ndis_evt[sc->ndis_evtcidx].ne_buf, M_TEMP); sc->ndis_evt[sc->ndis_evtcidx].ne_buf = NULL; } sc->ndis_evt[sc->ndis_evtcidx].ne_len = 0; sc->ndis_evt[sc->ndis_evtcidx].ne_sts = 0; NDIS_EVTINC(sc->ndis_evtcidx); NDIS_UNLOCK(sc); break; default: error = ENOTTY; break; } switch (cmd) { case SIOCGDRVSPEC: case SIOCSDRVSPEC: - error = copyout(&oid, ifr->ifr_data, sizeof(oid)); + error = copyout(&oid, ifr_data_get_ptr(ifr), sizeof(oid)); if (error) break; - error = copyout(oidbuf, ifr->ifr_data + sizeof(oid), oid.len); + error = copyout(oidbuf, + (caddr_t)ifr_data_get_ptr(ifr) + sizeof(oid), oid.len); } free(oidbuf, M_TEMP); return (error); } int ndis_del_key(struct ieee80211vap *vap, const struct ieee80211_key *key) { struct ndis_softc *sc = vap->iv_ic->ic_softc; ndis_80211_key rkey; int len, error = 0; bzero((char *)&rkey, sizeof(rkey)); len = sizeof(rkey); rkey.nk_len = len; rkey.nk_keyidx = key->wk_keyix; bcopy(vap->iv_ifp->if_broadcastaddr, rkey.nk_bssid, IEEE80211_ADDR_LEN); error = ndis_set_info(sc, OID_802_11_REMOVE_KEY, &rkey, &len); if (error) return (0); return (1); } /* * In theory this could be called for any key, but we'll * only use it for WPA TKIP or AES keys. These need to be * set after initial authentication with the AP. */ static int ndis_add_key(struct ieee80211vap *vap, const struct ieee80211_key *key) { struct ndis_softc *sc = vap->iv_ic->ic_softc; ndis_80211_key rkey; int len, error = 0; switch (key->wk_cipher->ic_cipher) { case IEEE80211_CIPHER_TKIP: len = sizeof(ndis_80211_key); bzero((char *)&rkey, sizeof(rkey)); rkey.nk_len = len; rkey.nk_keylen = key->wk_keylen; if (key->wk_flags & IEEE80211_KEY_SWMIC) rkey.nk_keylen += 16; /* key index - gets weird in NDIS */ if (key->wk_keyix != IEEE80211_KEYIX_NONE) rkey.nk_keyidx = key->wk_keyix; else rkey.nk_keyidx = 0; if (key->wk_flags & IEEE80211_KEY_XMIT) rkey.nk_keyidx |= 1 << 31; if (key->wk_flags & IEEE80211_KEY_GROUP) { bcopy(ieee80211broadcastaddr, rkey.nk_bssid, IEEE80211_ADDR_LEN); } else { bcopy(vap->iv_bss->ni_bssid, rkey.nk_bssid, IEEE80211_ADDR_LEN); /* pairwise key */ rkey.nk_keyidx |= 1 << 30; } /* need to set bit 29 based on keyrsc */ rkey.nk_keyrsc = key->wk_keyrsc[0]; /* XXX need tid */ if (rkey.nk_keyrsc) rkey.nk_keyidx |= 1 << 29; if (key->wk_flags & IEEE80211_KEY_SWMIC) { bcopy(key->wk_key, rkey.nk_keydata, 16); bcopy(key->wk_key + 24, rkey.nk_keydata + 16, 8); bcopy(key->wk_key + 16, rkey.nk_keydata + 24, 8); } else bcopy(key->wk_key, rkey.nk_keydata, key->wk_keylen); error = ndis_set_info(sc, OID_802_11_ADD_KEY, &rkey, &len); break; case IEEE80211_CIPHER_WEP: error = 0; break; /* * I don't know how to set up keys for the AES * cipher yet. Is it the same as TKIP? */ case IEEE80211_CIPHER_AES_CCM: default: error = ENOTTY; break; } /* We need to return 1 for success, 0 for failure. */ if (error) return (0); return (1); } static void ndis_resettask(d, arg) device_object *d; void *arg; { struct ndis_softc *sc; sc = arg; ndis_reset_nic(sc); } /* * Stop the adapter and free any mbufs allocated to the * RX and TX lists. */ static void ndis_stop(struct ndis_softc *sc) { int i; callout_drain(&sc->ndis_stat_callout); NDIS_LOCK(sc); sc->ndis_tx_timer = 0; sc->ndis_link = 0; if (!sc->ndis_80211) sc->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); sc->ndis_running = 0; NDIS_UNLOCK(sc); if (sc->ndis_iftype != PNPBus || (sc->ndis_iftype == PNPBus && !(sc->ndisusb_status & NDISUSB_STATUS_DETACH) && ndisusb_halt != 0)) ndis_halt_nic(sc); NDIS_LOCK(sc); for (i = 0; i < NDIS_EVENTS; i++) { if (sc->ndis_evt[i].ne_sts && sc->ndis_evt[i].ne_buf != NULL) { free(sc->ndis_evt[i].ne_buf, M_TEMP); sc->ndis_evt[i].ne_buf = NULL; } sc->ndis_evt[i].ne_sts = 0; sc->ndis_evt[i].ne_len = 0; } sc->ndis_evtcidx = 0; sc->ndis_evtpidx = 0; NDIS_UNLOCK(sc); } /* * Stop all chip I/O so that the kernel's probe routines don't * get confused by errant DMAs when rebooting. */ void ndis_shutdown(dev) device_t dev; { struct ndis_softc *sc; sc = device_get_softc(dev); ndis_stop(sc); } static int ndis_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct ndis_vap *nvp = NDIS_VAP(vap); struct ieee80211com *ic = vap->iv_ic; struct ndis_softc *sc = ic->ic_softc; enum ieee80211_state ostate; DPRINTF(("%s: %s -> %s\n", __func__, ieee80211_state_name[vap->iv_state], ieee80211_state_name[nstate])); ostate = vap->iv_state; vap->iv_state = nstate; switch (nstate) { /* pass on to net80211 */ case IEEE80211_S_INIT: case IEEE80211_S_SCAN: return nvp->newstate(vap, nstate, arg); case IEEE80211_S_ASSOC: if (ostate != IEEE80211_S_AUTH) { IEEE80211_UNLOCK(ic); ndis_auth_and_assoc(sc, vap); IEEE80211_LOCK(ic); } break; case IEEE80211_S_AUTH: IEEE80211_UNLOCK(ic); ndis_auth_and_assoc(sc, vap); if (vap->iv_state == IEEE80211_S_AUTH) /* XXX */ ieee80211_new_state(vap, IEEE80211_S_ASSOC, 0); IEEE80211_LOCK(ic); break; default: break; } return (0); } static void ndis_scan(void *arg) { struct ieee80211vap *vap = arg; ieee80211_scan_done(vap); } static void ndis_scan_results(struct ndis_softc *sc) { struct ieee80211com *ic = &sc->ndis_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); ndis_80211_bssid_list_ex *bl; ndis_wlan_bssid_ex *wb; struct ieee80211_scanparams sp; struct ieee80211_frame wh; struct ieee80211_channel *saved_chan; int i, j; int rssi, noise, freq, chanflag; uint8_t ssid[2+IEEE80211_NWID_LEN]; uint8_t rates[2+IEEE80211_RATE_MAXSIZE]; uint8_t *frm, *efrm; saved_chan = ic->ic_curchan; noise = -96; if (ndis_get_bssid_list(sc, &bl)) return; DPRINTF(("%s: %d results\n", __func__, bl->nblx_items)); wb = &bl->nblx_bssid[0]; for (i = 0; i < bl->nblx_items; i++) { memset(&sp, 0, sizeof(sp)); memcpy(wh.i_addr2, wb->nwbx_macaddr, sizeof(wh.i_addr2)); memcpy(wh.i_addr3, wb->nwbx_macaddr, sizeof(wh.i_addr3)); rssi = 100 * (wb->nwbx_rssi - noise) / (-32 - noise); rssi = max(0, min(rssi, 100)); /* limit 0 <= rssi <= 100 */ if (wb->nwbx_privacy) sp.capinfo |= IEEE80211_CAPINFO_PRIVACY; sp.bintval = wb->nwbx_config.nc_beaconperiod; switch (wb->nwbx_netinfra) { case NDIS_80211_NET_INFRA_IBSS: sp.capinfo |= IEEE80211_CAPINFO_IBSS; break; case NDIS_80211_NET_INFRA_BSS: sp.capinfo |= IEEE80211_CAPINFO_ESS; break; } sp.rates = &rates[0]; for (j = 0; j < IEEE80211_RATE_MAXSIZE; j++) { /* XXX - check units */ if (wb->nwbx_supportedrates[j] == 0) break; rates[2 + j] = wb->nwbx_supportedrates[j] & 0x7f; } rates[1] = j; sp.ssid = (uint8_t *)&ssid[0]; memcpy(sp.ssid + 2, &wb->nwbx_ssid.ns_ssid, wb->nwbx_ssid.ns_ssidlen); sp.ssid[1] = wb->nwbx_ssid.ns_ssidlen; chanflag = ndis_nettype_chan(wb->nwbx_nettype); freq = wb->nwbx_config.nc_dsconfig / 1000; sp.chan = sp.bchan = ieee80211_mhz2ieee(freq, chanflag); /* Hack ic->ic_curchan to be in sync with the scan result */ ic->ic_curchan = ieee80211_find_channel(ic, freq, chanflag); if (ic->ic_curchan == NULL) ic->ic_curchan = &ic->ic_channels[0]; /* Process extended info from AP */ if (wb->nwbx_len > sizeof(ndis_wlan_bssid)) { frm = (uint8_t *)&wb->nwbx_ies; efrm = frm + wb->nwbx_ielen; if (efrm - frm < 12) goto done; sp.tstamp = frm; frm += 8; sp.bintval = le16toh(*(uint16_t *)frm); frm += 2; sp.capinfo = le16toh(*(uint16_t *)frm); frm += 2; sp.ies = frm; sp.ies_len = efrm - frm; } done: DPRINTF(("scan: bssid %s chan %dMHz (%d/%d) rssi %d\n", ether_sprintf(wb->nwbx_macaddr), freq, sp.bchan, chanflag, rssi)); ieee80211_add_scan(vap, ic->ic_curchan, &sp, &wh, 0, rssi, noise); wb = (ndis_wlan_bssid_ex *)((char *)wb + wb->nwbx_len); } free(bl, M_DEVBUF); /* Restore the channel after messing with it */ ic->ic_curchan = saved_chan; } static void ndis_scan_start(struct ieee80211com *ic) { struct ndis_softc *sc = ic->ic_softc; struct ieee80211vap *vap; struct ieee80211_scan_state *ss; ndis_80211_ssid ssid; int error, len; ss = ic->ic_scan; vap = TAILQ_FIRST(&ic->ic_vaps); if (!NDIS_INITIALIZED(sc)) { DPRINTF(("%s: scan aborted\n", __func__)); ieee80211_cancel_scan(vap); return; } len = sizeof(ssid); bzero((char *)&ssid, len); if (ss->ss_nssid == 0) ssid.ns_ssidlen = 1; else { /* Perform a directed scan */ ssid.ns_ssidlen = ss->ss_ssid[0].len; bcopy(ss->ss_ssid[0].ssid, ssid.ns_ssid, ssid.ns_ssidlen); } error = ndis_set_info(sc, OID_802_11_SSID, &ssid, &len); if (error) DPRINTF(("%s: set ESSID failed\n", __func__)); len = 0; error = ndis_set_info(sc, OID_802_11_BSSID_LIST_SCAN, NULL, &len); if (error) { DPRINTF(("%s: scan command failed\n", __func__)); ieee80211_cancel_scan(vap); return; } /* Set a timer to collect the results */ callout_reset(&sc->ndis_scan_callout, hz * 3, ndis_scan, vap); } static void ndis_set_channel(struct ieee80211com *ic) { /* ignore */ } static void ndis_scan_curchan(struct ieee80211_scan_state *ss, unsigned long maxdwell) { /* ignore */ } static void ndis_scan_mindwell(struct ieee80211_scan_state *ss) { /* NB: don't try to abort scan; wait for firmware to finish */ } static void ndis_scan_end(struct ieee80211com *ic) { struct ndis_softc *sc = ic->ic_softc; ndis_scan_results(sc); } Index: stable/11/sys/dev/iwi/if_iwi.c =================================================================== --- stable/11/sys/dev/iwi/if_iwi.c (revision 332287) +++ stable/11/sys/dev/iwi/if_iwi.c (revision 332288) @@ -1,3617 +1,3617 @@ /*- * Copyright (c) 2004, 2005 * Damien Bergamini . All rights reserved. * Copyright (c) 2005-2006 Sam Leffler, Errno Consulting * Copyright (c) 2007 Andrew Thompson * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /*- * Intel(R) PRO/Wireless 2200BG/2225BG/2915ABG driver * http://www.intel.com/network/connectivity/products/wireless/prowireless_mobile.htm */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define IWI_DEBUG #ifdef IWI_DEBUG #define DPRINTF(x) do { if (iwi_debug > 0) printf x; } while (0) #define DPRINTFN(n, x) do { if (iwi_debug >= (n)) printf x; } while (0) int iwi_debug = 0; SYSCTL_INT(_debug, OID_AUTO, iwi, CTLFLAG_RW, &iwi_debug, 0, "iwi debug level"); static const char *iwi_fw_states[] = { "IDLE", /* IWI_FW_IDLE */ "LOADING", /* IWI_FW_LOADING */ "ASSOCIATING", /* IWI_FW_ASSOCIATING */ "DISASSOCIATING", /* IWI_FW_DISASSOCIATING */ "SCANNING", /* IWI_FW_SCANNING */ }; #else #define DPRINTF(x) #define DPRINTFN(n, x) #endif MODULE_DEPEND(iwi, pci, 1, 1, 1); MODULE_DEPEND(iwi, wlan, 1, 1, 1); MODULE_DEPEND(iwi, firmware, 1, 1, 1); enum { IWI_LED_TX, IWI_LED_RX, IWI_LED_POLL, }; struct iwi_ident { uint16_t vendor; uint16_t device; const char *name; }; static const struct iwi_ident iwi_ident_table[] = { { 0x8086, 0x4220, "Intel(R) PRO/Wireless 2200BG" }, { 0x8086, 0x4221, "Intel(R) PRO/Wireless 2225BG" }, { 0x8086, 0x4223, "Intel(R) PRO/Wireless 2915ABG" }, { 0x8086, 0x4224, "Intel(R) PRO/Wireless 2915ABG" }, { 0, 0, NULL } }; static const uint8_t def_chan_2ghz[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 }; static const uint8_t def_chan_5ghz_band1[] = { 36, 40, 44, 48, 52, 56, 60, 64 }; static const uint8_t def_chan_5ghz_band2[] = { 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140 }; static const uint8_t def_chan_5ghz_band3[] = { 149, 153, 157, 161, 165 }; static struct ieee80211vap *iwi_vap_create(struct ieee80211com *, const char [IFNAMSIZ], int, enum ieee80211_opmode, int, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN]); static void iwi_vap_delete(struct ieee80211vap *); static void iwi_dma_map_addr(void *, bus_dma_segment_t *, int, int); static int iwi_alloc_cmd_ring(struct iwi_softc *, struct iwi_cmd_ring *, int); static void iwi_reset_cmd_ring(struct iwi_softc *, struct iwi_cmd_ring *); static void iwi_free_cmd_ring(struct iwi_softc *, struct iwi_cmd_ring *); static int iwi_alloc_tx_ring(struct iwi_softc *, struct iwi_tx_ring *, int, bus_addr_t, bus_addr_t); static void iwi_reset_tx_ring(struct iwi_softc *, struct iwi_tx_ring *); static void iwi_free_tx_ring(struct iwi_softc *, struct iwi_tx_ring *); static int iwi_alloc_rx_ring(struct iwi_softc *, struct iwi_rx_ring *, int); static void iwi_reset_rx_ring(struct iwi_softc *, struct iwi_rx_ring *); static void iwi_free_rx_ring(struct iwi_softc *, struct iwi_rx_ring *); static struct ieee80211_node *iwi_node_alloc(struct ieee80211vap *, const uint8_t [IEEE80211_ADDR_LEN]); static void iwi_node_free(struct ieee80211_node *); static void iwi_media_status(struct ifnet *, struct ifmediareq *); static int iwi_newstate(struct ieee80211vap *, enum ieee80211_state, int); static void iwi_wme_init(struct iwi_softc *); static int iwi_wme_setparams(struct iwi_softc *); static int iwi_wme_update(struct ieee80211com *); static uint16_t iwi_read_prom_word(struct iwi_softc *, uint8_t); static void iwi_frame_intr(struct iwi_softc *, struct iwi_rx_data *, int, struct iwi_frame *); static void iwi_notification_intr(struct iwi_softc *, struct iwi_notif *); static void iwi_rx_intr(struct iwi_softc *); static void iwi_tx_intr(struct iwi_softc *, struct iwi_tx_ring *); static void iwi_intr(void *); static int iwi_cmd(struct iwi_softc *, uint8_t, void *, uint8_t); static void iwi_write_ibssnode(struct iwi_softc *, const u_int8_t [], int); static int iwi_tx_start(struct iwi_softc *, struct mbuf *, struct ieee80211_node *, int); static int iwi_raw_xmit(struct ieee80211_node *, struct mbuf *, const struct ieee80211_bpf_params *); static void iwi_start(struct iwi_softc *); static int iwi_transmit(struct ieee80211com *, struct mbuf *); static void iwi_watchdog(void *); static int iwi_ioctl(struct ieee80211com *, u_long, void *); static void iwi_parent(struct ieee80211com *); static void iwi_stop_master(struct iwi_softc *); static int iwi_reset(struct iwi_softc *); static int iwi_load_ucode(struct iwi_softc *, const struct iwi_fw *); static int iwi_load_firmware(struct iwi_softc *, const struct iwi_fw *); static void iwi_release_fw_dma(struct iwi_softc *sc); static int iwi_config(struct iwi_softc *); static int iwi_get_firmware(struct iwi_softc *, enum ieee80211_opmode); static void iwi_put_firmware(struct iwi_softc *); static void iwi_monitor_scan(void *, int); static int iwi_scanchan(struct iwi_softc *, unsigned long, int); static void iwi_scan_start(struct ieee80211com *); static void iwi_scan_end(struct ieee80211com *); static void iwi_set_channel(struct ieee80211com *); static void iwi_scan_curchan(struct ieee80211_scan_state *, unsigned long maxdwell); static void iwi_scan_mindwell(struct ieee80211_scan_state *); static int iwi_auth_and_assoc(struct iwi_softc *, struct ieee80211vap *); static void iwi_disassoc(void *, int); static int iwi_disassociate(struct iwi_softc *, int quiet); static void iwi_init_locked(struct iwi_softc *); static void iwi_init(void *); static int iwi_init_fw_dma(struct iwi_softc *, int); static void iwi_stop_locked(void *); static void iwi_stop(struct iwi_softc *); static void iwi_restart(void *, int); static int iwi_getrfkill(struct iwi_softc *); static void iwi_radio_on(void *, int); static void iwi_radio_off(void *, int); static void iwi_sysctlattach(struct iwi_softc *); static void iwi_led_event(struct iwi_softc *, int); static void iwi_ledattach(struct iwi_softc *); static void iwi_collect_bands(struct ieee80211com *, uint8_t [], size_t); static void iwi_getradiocaps(struct ieee80211com *, int, int *, struct ieee80211_channel []); static int iwi_probe(device_t); static int iwi_attach(device_t); static int iwi_detach(device_t); static int iwi_shutdown(device_t); static int iwi_suspend(device_t); static int iwi_resume(device_t); static device_method_t iwi_methods[] = { /* Device interface */ DEVMETHOD(device_probe, iwi_probe), DEVMETHOD(device_attach, iwi_attach), DEVMETHOD(device_detach, iwi_detach), DEVMETHOD(device_shutdown, iwi_shutdown), DEVMETHOD(device_suspend, iwi_suspend), DEVMETHOD(device_resume, iwi_resume), DEVMETHOD_END }; static driver_t iwi_driver = { "iwi", iwi_methods, sizeof (struct iwi_softc) }; static devclass_t iwi_devclass; DRIVER_MODULE(iwi, pci, iwi_driver, iwi_devclass, NULL, NULL); MODULE_VERSION(iwi, 1); static __inline uint8_t MEM_READ_1(struct iwi_softc *sc, uint32_t addr) { CSR_WRITE_4(sc, IWI_CSR_INDIRECT_ADDR, addr); return CSR_READ_1(sc, IWI_CSR_INDIRECT_DATA); } static __inline uint32_t MEM_READ_4(struct iwi_softc *sc, uint32_t addr) { CSR_WRITE_4(sc, IWI_CSR_INDIRECT_ADDR, addr); return CSR_READ_4(sc, IWI_CSR_INDIRECT_DATA); } static int iwi_probe(device_t dev) { const struct iwi_ident *ident; for (ident = iwi_ident_table; ident->name != NULL; ident++) { if (pci_get_vendor(dev) == ident->vendor && pci_get_device(dev) == ident->device) { device_set_desc(dev, ident->name); return (BUS_PROBE_DEFAULT); } } return ENXIO; } static int iwi_attach(device_t dev) { struct iwi_softc *sc = device_get_softc(dev); struct ieee80211com *ic = &sc->sc_ic; uint16_t val; int i, error; sc->sc_dev = dev; sc->sc_ledevent = ticks; IWI_LOCK_INIT(sc); mbufq_init(&sc->sc_snd, ifqmaxlen); sc->sc_unr = new_unrhdr(1, IWI_MAX_IBSSNODE-1, &sc->sc_mtx); TASK_INIT(&sc->sc_radiontask, 0, iwi_radio_on, sc); TASK_INIT(&sc->sc_radiofftask, 0, iwi_radio_off, sc); TASK_INIT(&sc->sc_restarttask, 0, iwi_restart, sc); TASK_INIT(&sc->sc_disassoctask, 0, iwi_disassoc, sc); TASK_INIT(&sc->sc_monitortask, 0, iwi_monitor_scan, sc); callout_init_mtx(&sc->sc_wdtimer, &sc->sc_mtx, 0); callout_init_mtx(&sc->sc_rftimer, &sc->sc_mtx, 0); pci_write_config(dev, 0x41, 0, 1); /* enable bus-mastering */ pci_enable_busmaster(dev); i = PCIR_BAR(0); sc->mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &i, RF_ACTIVE); if (sc->mem == NULL) { device_printf(dev, "could not allocate memory resource\n"); goto fail; } sc->sc_st = rman_get_bustag(sc->mem); sc->sc_sh = rman_get_bushandle(sc->mem); i = 0; sc->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &i, RF_ACTIVE | RF_SHAREABLE); if (sc->irq == NULL) { device_printf(dev, "could not allocate interrupt resource\n"); goto fail; } if (iwi_reset(sc) != 0) { device_printf(dev, "could not reset adapter\n"); goto fail; } /* * Allocate rings. */ if (iwi_alloc_cmd_ring(sc, &sc->cmdq, IWI_CMD_RING_COUNT) != 0) { device_printf(dev, "could not allocate Cmd ring\n"); goto fail; } for (i = 0; i < 4; i++) { error = iwi_alloc_tx_ring(sc, &sc->txq[i], IWI_TX_RING_COUNT, IWI_CSR_TX1_RIDX + i * 4, IWI_CSR_TX1_WIDX + i * 4); if (error != 0) { device_printf(dev, "could not allocate Tx ring %d\n", i+i); goto fail; } } if (iwi_alloc_rx_ring(sc, &sc->rxq, IWI_RX_RING_COUNT) != 0) { device_printf(dev, "could not allocate Rx ring\n"); goto fail; } iwi_wme_init(sc); ic->ic_softc = sc; ic->ic_name = device_get_nameunit(dev); ic->ic_opmode = IEEE80211_M_STA; ic->ic_phytype = IEEE80211_T_OFDM; /* not only, but not used */ /* set device capabilities */ ic->ic_caps = IEEE80211_C_STA /* station mode supported */ | IEEE80211_C_IBSS /* IBSS mode supported */ | IEEE80211_C_MONITOR /* monitor mode supported */ | IEEE80211_C_PMGT /* power save supported */ | IEEE80211_C_SHPREAMBLE /* short preamble supported */ | IEEE80211_C_WPA /* 802.11i */ | IEEE80211_C_WME /* 802.11e */ #if 0 | IEEE80211_C_BGSCAN /* capable of bg scanning */ #endif ; /* read MAC address from EEPROM */ val = iwi_read_prom_word(sc, IWI_EEPROM_MAC + 0); ic->ic_macaddr[0] = val & 0xff; ic->ic_macaddr[1] = val >> 8; val = iwi_read_prom_word(sc, IWI_EEPROM_MAC + 1); ic->ic_macaddr[2] = val & 0xff; ic->ic_macaddr[3] = val >> 8; val = iwi_read_prom_word(sc, IWI_EEPROM_MAC + 2); ic->ic_macaddr[4] = val & 0xff; ic->ic_macaddr[5] = val >> 8; iwi_getradiocaps(ic, IEEE80211_CHAN_MAX, &ic->ic_nchans, ic->ic_channels); ieee80211_ifattach(ic); /* override default methods */ ic->ic_node_alloc = iwi_node_alloc; sc->sc_node_free = ic->ic_node_free; ic->ic_node_free = iwi_node_free; ic->ic_raw_xmit = iwi_raw_xmit; ic->ic_scan_start = iwi_scan_start; ic->ic_scan_end = iwi_scan_end; ic->ic_set_channel = iwi_set_channel; ic->ic_scan_curchan = iwi_scan_curchan; ic->ic_scan_mindwell = iwi_scan_mindwell; ic->ic_wme.wme_update = iwi_wme_update; ic->ic_vap_create = iwi_vap_create; ic->ic_vap_delete = iwi_vap_delete; ic->ic_ioctl = iwi_ioctl; ic->ic_transmit = iwi_transmit; ic->ic_parent = iwi_parent; ic->ic_getradiocaps = iwi_getradiocaps; ieee80211_radiotap_attach(ic, &sc->sc_txtap.wt_ihdr, sizeof(sc->sc_txtap), IWI_TX_RADIOTAP_PRESENT, &sc->sc_rxtap.wr_ihdr, sizeof(sc->sc_rxtap), IWI_RX_RADIOTAP_PRESENT); iwi_sysctlattach(sc); iwi_ledattach(sc); /* * Hook our interrupt after all initialization is complete. */ error = bus_setup_intr(dev, sc->irq, INTR_TYPE_NET | INTR_MPSAFE, NULL, iwi_intr, sc, &sc->sc_ih); if (error != 0) { device_printf(dev, "could not set up interrupt\n"); goto fail; } if (bootverbose) ieee80211_announce(ic); return 0; fail: /* XXX fix */ iwi_detach(dev); return ENXIO; } static int iwi_detach(device_t dev) { struct iwi_softc *sc = device_get_softc(dev); struct ieee80211com *ic = &sc->sc_ic; bus_teardown_intr(dev, sc->irq, sc->sc_ih); /* NB: do early to drain any pending tasks */ ieee80211_draintask(ic, &sc->sc_radiontask); ieee80211_draintask(ic, &sc->sc_radiofftask); ieee80211_draintask(ic, &sc->sc_restarttask); ieee80211_draintask(ic, &sc->sc_disassoctask); ieee80211_draintask(ic, &sc->sc_monitortask); iwi_stop(sc); ieee80211_ifdetach(ic); iwi_put_firmware(sc); iwi_release_fw_dma(sc); iwi_free_cmd_ring(sc, &sc->cmdq); iwi_free_tx_ring(sc, &sc->txq[0]); iwi_free_tx_ring(sc, &sc->txq[1]); iwi_free_tx_ring(sc, &sc->txq[2]); iwi_free_tx_ring(sc, &sc->txq[3]); iwi_free_rx_ring(sc, &sc->rxq); bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->irq), sc->irq); bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->mem), sc->mem); delete_unrhdr(sc->sc_unr); mbufq_drain(&sc->sc_snd); IWI_LOCK_DESTROY(sc); return 0; } static struct ieee80211vap * iwi_vap_create(struct ieee80211com *ic, const char name[IFNAMSIZ], int unit, enum ieee80211_opmode opmode, int flags, const uint8_t bssid[IEEE80211_ADDR_LEN], const uint8_t mac[IEEE80211_ADDR_LEN]) { struct iwi_softc *sc = ic->ic_softc; struct iwi_vap *ivp; struct ieee80211vap *vap; int i; if (!TAILQ_EMPTY(&ic->ic_vaps)) /* only one at a time */ return NULL; /* * Get firmware image (and possibly dma memory) on mode change. */ if (iwi_get_firmware(sc, opmode)) return NULL; /* allocate DMA memory for mapping firmware image */ i = sc->fw_fw.size; if (sc->fw_boot.size > i) i = sc->fw_boot.size; /* XXX do we dma the ucode as well ? */ if (sc->fw_uc.size > i) i = sc->fw_uc.size; if (iwi_init_fw_dma(sc, i)) return NULL; ivp = malloc(sizeof(struct iwi_vap), M_80211_VAP, M_WAITOK | M_ZERO); vap = &ivp->iwi_vap; ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid); /* override the default, the setting comes from the linux driver */ vap->iv_bmissthreshold = 24; /* override with driver methods */ ivp->iwi_newstate = vap->iv_newstate; vap->iv_newstate = iwi_newstate; /* complete setup */ ieee80211_vap_attach(vap, ieee80211_media_change, iwi_media_status, mac); ic->ic_opmode = opmode; return vap; } static void iwi_vap_delete(struct ieee80211vap *vap) { struct iwi_vap *ivp = IWI_VAP(vap); ieee80211_vap_detach(vap); free(ivp, M_80211_VAP); } static void iwi_dma_map_addr(void *arg, bus_dma_segment_t *segs, int nseg, int error) { if (error != 0) return; KASSERT(nseg == 1, ("too many DMA segments, %d should be 1", nseg)); *(bus_addr_t *)arg = segs[0].ds_addr; } static int iwi_alloc_cmd_ring(struct iwi_softc *sc, struct iwi_cmd_ring *ring, int count) { int error; ring->count = count; ring->queued = 0; ring->cur = ring->next = 0; error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 4, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, count * IWI_CMD_DESC_SIZE, 1, count * IWI_CMD_DESC_SIZE, 0, NULL, NULL, &ring->desc_dmat); if (error != 0) { device_printf(sc->sc_dev, "could not create desc DMA tag\n"); goto fail; } error = bus_dmamem_alloc(ring->desc_dmat, (void **)&ring->desc, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &ring->desc_map); if (error != 0) { device_printf(sc->sc_dev, "could not allocate DMA memory\n"); goto fail; } error = bus_dmamap_load(ring->desc_dmat, ring->desc_map, ring->desc, count * IWI_CMD_DESC_SIZE, iwi_dma_map_addr, &ring->physaddr, 0); if (error != 0) { device_printf(sc->sc_dev, "could not load desc DMA map\n"); goto fail; } return 0; fail: iwi_free_cmd_ring(sc, ring); return error; } static void iwi_reset_cmd_ring(struct iwi_softc *sc, struct iwi_cmd_ring *ring) { ring->queued = 0; ring->cur = ring->next = 0; } static void iwi_free_cmd_ring(struct iwi_softc *sc, struct iwi_cmd_ring *ring) { if (ring->desc != NULL) { bus_dmamap_sync(ring->desc_dmat, ring->desc_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->desc_dmat, ring->desc_map); bus_dmamem_free(ring->desc_dmat, ring->desc, ring->desc_map); } if (ring->desc_dmat != NULL) bus_dma_tag_destroy(ring->desc_dmat); } static int iwi_alloc_tx_ring(struct iwi_softc *sc, struct iwi_tx_ring *ring, int count, bus_addr_t csr_ridx, bus_addr_t csr_widx) { int i, error; ring->count = count; ring->queued = 0; ring->cur = ring->next = 0; ring->csr_ridx = csr_ridx; ring->csr_widx = csr_widx; error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 4, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, count * IWI_TX_DESC_SIZE, 1, count * IWI_TX_DESC_SIZE, 0, NULL, NULL, &ring->desc_dmat); if (error != 0) { device_printf(sc->sc_dev, "could not create desc DMA tag\n"); goto fail; } error = bus_dmamem_alloc(ring->desc_dmat, (void **)&ring->desc, BUS_DMA_NOWAIT | BUS_DMA_ZERO, &ring->desc_map); if (error != 0) { device_printf(sc->sc_dev, "could not allocate DMA memory\n"); goto fail; } error = bus_dmamap_load(ring->desc_dmat, ring->desc_map, ring->desc, count * IWI_TX_DESC_SIZE, iwi_dma_map_addr, &ring->physaddr, 0); if (error != 0) { device_printf(sc->sc_dev, "could not load desc DMA map\n"); goto fail; } ring->data = malloc(count * sizeof (struct iwi_tx_data), M_DEVBUF, M_NOWAIT | M_ZERO); if (ring->data == NULL) { device_printf(sc->sc_dev, "could not allocate soft data\n"); error = ENOMEM; goto fail; } error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, IWI_MAX_NSEG, MCLBYTES, 0, NULL, NULL, &ring->data_dmat); if (error != 0) { device_printf(sc->sc_dev, "could not create data DMA tag\n"); goto fail; } for (i = 0; i < count; i++) { error = bus_dmamap_create(ring->data_dmat, 0, &ring->data[i].map); if (error != 0) { device_printf(sc->sc_dev, "could not create DMA map\n"); goto fail; } } return 0; fail: iwi_free_tx_ring(sc, ring); return error; } static void iwi_reset_tx_ring(struct iwi_softc *sc, struct iwi_tx_ring *ring) { struct iwi_tx_data *data; int i; for (i = 0; i < ring->count; i++) { data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); data->m = NULL; } if (data->ni != NULL) { ieee80211_free_node(data->ni); data->ni = NULL; } } ring->queued = 0; ring->cur = ring->next = 0; } static void iwi_free_tx_ring(struct iwi_softc *sc, struct iwi_tx_ring *ring) { struct iwi_tx_data *data; int i; if (ring->desc != NULL) { bus_dmamap_sync(ring->desc_dmat, ring->desc_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->desc_dmat, ring->desc_map); bus_dmamem_free(ring->desc_dmat, ring->desc, ring->desc_map); } if (ring->desc_dmat != NULL) bus_dma_tag_destroy(ring->desc_dmat); if (ring->data != NULL) { for (i = 0; i < ring->count; i++) { data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); } if (data->ni != NULL) ieee80211_free_node(data->ni); if (data->map != NULL) bus_dmamap_destroy(ring->data_dmat, data->map); } free(ring->data, M_DEVBUF); } if (ring->data_dmat != NULL) bus_dma_tag_destroy(ring->data_dmat); } static int iwi_alloc_rx_ring(struct iwi_softc *sc, struct iwi_rx_ring *ring, int count) { struct iwi_rx_data *data; int i, error; ring->count = count; ring->cur = 0; ring->data = malloc(count * sizeof (struct iwi_rx_data), M_DEVBUF, M_NOWAIT | M_ZERO); if (ring->data == NULL) { device_printf(sc->sc_dev, "could not allocate soft data\n"); error = ENOMEM; goto fail; } error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1, MCLBYTES, 0, NULL, NULL, &ring->data_dmat); if (error != 0) { device_printf(sc->sc_dev, "could not create data DMA tag\n"); goto fail; } for (i = 0; i < count; i++) { data = &ring->data[i]; error = bus_dmamap_create(ring->data_dmat, 0, &data->map); if (error != 0) { device_printf(sc->sc_dev, "could not create DMA map\n"); goto fail; } data->m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (data->m == NULL) { device_printf(sc->sc_dev, "could not allocate rx mbuf\n"); error = ENOMEM; goto fail; } error = bus_dmamap_load(ring->data_dmat, data->map, mtod(data->m, void *), MCLBYTES, iwi_dma_map_addr, &data->physaddr, 0); if (error != 0) { device_printf(sc->sc_dev, "could not load rx buf DMA map"); goto fail; } data->reg = IWI_CSR_RX_BASE + i * 4; } return 0; fail: iwi_free_rx_ring(sc, ring); return error; } static void iwi_reset_rx_ring(struct iwi_softc *sc, struct iwi_rx_ring *ring) { ring->cur = 0; } static void iwi_free_rx_ring(struct iwi_softc *sc, struct iwi_rx_ring *ring) { struct iwi_rx_data *data; int i; if (ring->data != NULL) { for (i = 0; i < ring->count; i++) { data = &ring->data[i]; if (data->m != NULL) { bus_dmamap_sync(ring->data_dmat, data->map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(ring->data_dmat, data->map); m_freem(data->m); } if (data->map != NULL) bus_dmamap_destroy(ring->data_dmat, data->map); } free(ring->data, M_DEVBUF); } if (ring->data_dmat != NULL) bus_dma_tag_destroy(ring->data_dmat); } static int iwi_shutdown(device_t dev) { struct iwi_softc *sc = device_get_softc(dev); iwi_stop(sc); iwi_put_firmware(sc); /* ??? XXX */ return 0; } static int iwi_suspend(device_t dev) { struct iwi_softc *sc = device_get_softc(dev); struct ieee80211com *ic = &sc->sc_ic; ieee80211_suspend_all(ic); return 0; } static int iwi_resume(device_t dev) { struct iwi_softc *sc = device_get_softc(dev); struct ieee80211com *ic = &sc->sc_ic; pci_write_config(dev, 0x41, 0, 1); ieee80211_resume_all(ic); return 0; } static struct ieee80211_node * iwi_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN]) { struct iwi_node *in; in = malloc(sizeof (struct iwi_node), M_80211_NODE, M_NOWAIT | M_ZERO); if (in == NULL) return NULL; /* XXX assign sta table entry for adhoc */ in->in_station = -1; return &in->in_node; } static void iwi_node_free(struct ieee80211_node *ni) { struct ieee80211com *ic = ni->ni_ic; struct iwi_softc *sc = ic->ic_softc; struct iwi_node *in = (struct iwi_node *)ni; if (in->in_station != -1) { DPRINTF(("%s mac %6D station %u\n", __func__, ni->ni_macaddr, ":", in->in_station)); free_unr(sc->sc_unr, in->in_station); } sc->sc_node_free(ni); } /* * Convert h/w rate code to IEEE rate code. */ static int iwi_cvtrate(int iwirate) { switch (iwirate) { case IWI_RATE_DS1: return 2; case IWI_RATE_DS2: return 4; case IWI_RATE_DS5: return 11; case IWI_RATE_DS11: return 22; case IWI_RATE_OFDM6: return 12; case IWI_RATE_OFDM9: return 18; case IWI_RATE_OFDM12: return 24; case IWI_RATE_OFDM18: return 36; case IWI_RATE_OFDM24: return 48; case IWI_RATE_OFDM36: return 72; case IWI_RATE_OFDM48: return 96; case IWI_RATE_OFDM54: return 108; } return 0; } /* * The firmware automatically adapts the transmit speed. We report its current * value here. */ static void iwi_media_status(struct ifnet *ifp, struct ifmediareq *imr) { struct ieee80211vap *vap = ifp->if_softc; struct ieee80211com *ic = vap->iv_ic; struct iwi_softc *sc = ic->ic_softc; struct ieee80211_node *ni; /* read current transmission rate from adapter */ ni = ieee80211_ref_node(vap->iv_bss); ni->ni_txrate = iwi_cvtrate(CSR_READ_4(sc, IWI_CSR_CURRENT_TX_RATE)); ieee80211_free_node(ni); ieee80211_media_status(ifp, imr); } static int iwi_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct iwi_vap *ivp = IWI_VAP(vap); struct ieee80211com *ic = vap->iv_ic; struct iwi_softc *sc = ic->ic_softc; IWI_LOCK_DECL; DPRINTF(("%s: %s -> %s flags 0x%x\n", __func__, ieee80211_state_name[vap->iv_state], ieee80211_state_name[nstate], sc->flags)); IEEE80211_UNLOCK(ic); IWI_LOCK(sc); switch (nstate) { case IEEE80211_S_INIT: /* * NB: don't try to do this if iwi_stop_master has * shutdown the firmware and disabled interrupts. */ if (vap->iv_state == IEEE80211_S_RUN && (sc->flags & IWI_FLAG_FW_INITED)) iwi_disassociate(sc, 0); break; case IEEE80211_S_AUTH: iwi_auth_and_assoc(sc, vap); break; case IEEE80211_S_RUN: if (vap->iv_opmode == IEEE80211_M_IBSS && vap->iv_state == IEEE80211_S_SCAN) { /* * XXX when joining an ibss network we are called * with a SCAN -> RUN transition on scan complete. * Use that to call iwi_auth_and_assoc. On completing * the join we are then called again with an * AUTH -> RUN transition and we want to do nothing. * This is all totally bogus and needs to be redone. */ iwi_auth_and_assoc(sc, vap); } else if (vap->iv_opmode == IEEE80211_M_MONITOR) ieee80211_runtask(ic, &sc->sc_monitortask); break; case IEEE80211_S_ASSOC: /* * If we are transitioning from AUTH then just wait * for the ASSOC status to come back from the firmware. * Otherwise we need to issue the association request. */ if (vap->iv_state == IEEE80211_S_AUTH) break; iwi_auth_and_assoc(sc, vap); break; default: break; } IWI_UNLOCK(sc); IEEE80211_LOCK(ic); return ivp->iwi_newstate(vap, nstate, arg); } /* * WME parameters coming from IEEE 802.11e specification. These values are * already declared in ieee80211_proto.c, but they are static so they can't * be reused here. */ static const struct wmeParams iwi_wme_cck_params[WME_NUM_AC] = { { 0, 3, 5, 7, 0 }, /* WME_AC_BE */ { 0, 3, 5, 10, 0 }, /* WME_AC_BK */ { 0, 2, 4, 5, 188 }, /* WME_AC_VI */ { 0, 2, 3, 4, 102 } /* WME_AC_VO */ }; static const struct wmeParams iwi_wme_ofdm_params[WME_NUM_AC] = { { 0, 3, 4, 6, 0 }, /* WME_AC_BE */ { 0, 3, 4, 10, 0 }, /* WME_AC_BK */ { 0, 2, 3, 4, 94 }, /* WME_AC_VI */ { 0, 2, 2, 3, 47 } /* WME_AC_VO */ }; #define IWI_EXP2(v) htole16((1 << (v)) - 1) #define IWI_USEC(v) htole16(IEEE80211_TXOP_TO_US(v)) static void iwi_wme_init(struct iwi_softc *sc) { const struct wmeParams *wmep; int ac; memset(sc->wme, 0, sizeof sc->wme); for (ac = 0; ac < WME_NUM_AC; ac++) { /* set WME values for CCK modulation */ wmep = &iwi_wme_cck_params[ac]; sc->wme[1].aifsn[ac] = wmep->wmep_aifsn; sc->wme[1].cwmin[ac] = IWI_EXP2(wmep->wmep_logcwmin); sc->wme[1].cwmax[ac] = IWI_EXP2(wmep->wmep_logcwmax); sc->wme[1].burst[ac] = IWI_USEC(wmep->wmep_txopLimit); sc->wme[1].acm[ac] = wmep->wmep_acm; /* set WME values for OFDM modulation */ wmep = &iwi_wme_ofdm_params[ac]; sc->wme[2].aifsn[ac] = wmep->wmep_aifsn; sc->wme[2].cwmin[ac] = IWI_EXP2(wmep->wmep_logcwmin); sc->wme[2].cwmax[ac] = IWI_EXP2(wmep->wmep_logcwmax); sc->wme[2].burst[ac] = IWI_USEC(wmep->wmep_txopLimit); sc->wme[2].acm[ac] = wmep->wmep_acm; } } static int iwi_wme_setparams(struct iwi_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; const struct wmeParams *wmep; int ac; for (ac = 0; ac < WME_NUM_AC; ac++) { /* set WME values for current operating mode */ wmep = &ic->ic_wme.wme_chanParams.cap_wmeParams[ac]; sc->wme[0].aifsn[ac] = wmep->wmep_aifsn; sc->wme[0].cwmin[ac] = IWI_EXP2(wmep->wmep_logcwmin); sc->wme[0].cwmax[ac] = IWI_EXP2(wmep->wmep_logcwmax); sc->wme[0].burst[ac] = IWI_USEC(wmep->wmep_txopLimit); sc->wme[0].acm[ac] = wmep->wmep_acm; } DPRINTF(("Setting WME parameters\n")); return iwi_cmd(sc, IWI_CMD_SET_WME_PARAMS, sc->wme, sizeof sc->wme); } #undef IWI_USEC #undef IWI_EXP2 static int iwi_wme_update(struct ieee80211com *ic) { struct iwi_softc *sc = ic->ic_softc; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); IWI_LOCK_DECL; /* * We may be called to update the WME parameters in * the adapter at various places. If we're already * associated then initiate the request immediately; * otherwise we assume the params will get sent down * to the adapter as part of the work iwi_auth_and_assoc * does. */ if (vap->iv_state == IEEE80211_S_RUN) { IWI_LOCK(sc); iwi_wme_setparams(sc); IWI_UNLOCK(sc); } return (0); } static int iwi_wme_setie(struct iwi_softc *sc) { struct ieee80211_wme_info wme; memset(&wme, 0, sizeof wme); wme.wme_id = IEEE80211_ELEMID_VENDOR; wme.wme_len = sizeof (struct ieee80211_wme_info) - 2; wme.wme_oui[0] = 0x00; wme.wme_oui[1] = 0x50; wme.wme_oui[2] = 0xf2; wme.wme_type = WME_OUI_TYPE; wme.wme_subtype = WME_INFO_OUI_SUBTYPE; wme.wme_version = WME_VERSION; wme.wme_info = 0; DPRINTF(("Setting WME IE (len=%u)\n", wme.wme_len)); return iwi_cmd(sc, IWI_CMD_SET_WMEIE, &wme, sizeof wme); } /* * Read 16 bits at address 'addr' from the serial EEPROM. */ static uint16_t iwi_read_prom_word(struct iwi_softc *sc, uint8_t addr) { uint32_t tmp; uint16_t val; int n; /* clock C once before the first command */ IWI_EEPROM_CTL(sc, 0); IWI_EEPROM_CTL(sc, IWI_EEPROM_S); IWI_EEPROM_CTL(sc, IWI_EEPROM_S | IWI_EEPROM_C); IWI_EEPROM_CTL(sc, IWI_EEPROM_S); /* write start bit (1) */ IWI_EEPROM_CTL(sc, IWI_EEPROM_S | IWI_EEPROM_D); IWI_EEPROM_CTL(sc, IWI_EEPROM_S | IWI_EEPROM_D | IWI_EEPROM_C); /* write READ opcode (10) */ IWI_EEPROM_CTL(sc, IWI_EEPROM_S | IWI_EEPROM_D); IWI_EEPROM_CTL(sc, IWI_EEPROM_S | IWI_EEPROM_D | IWI_EEPROM_C); IWI_EEPROM_CTL(sc, IWI_EEPROM_S); IWI_EEPROM_CTL(sc, IWI_EEPROM_S | IWI_EEPROM_C); /* write address A7-A0 */ for (n = 7; n >= 0; n--) { IWI_EEPROM_CTL(sc, IWI_EEPROM_S | (((addr >> n) & 1) << IWI_EEPROM_SHIFT_D)); IWI_EEPROM_CTL(sc, IWI_EEPROM_S | (((addr >> n) & 1) << IWI_EEPROM_SHIFT_D) | IWI_EEPROM_C); } IWI_EEPROM_CTL(sc, IWI_EEPROM_S); /* read data Q15-Q0 */ val = 0; for (n = 15; n >= 0; n--) { IWI_EEPROM_CTL(sc, IWI_EEPROM_S | IWI_EEPROM_C); IWI_EEPROM_CTL(sc, IWI_EEPROM_S); tmp = MEM_READ_4(sc, IWI_MEM_EEPROM_CTL); val |= ((tmp & IWI_EEPROM_Q) >> IWI_EEPROM_SHIFT_Q) << n; } IWI_EEPROM_CTL(sc, 0); /* clear Chip Select and clock C */ IWI_EEPROM_CTL(sc, IWI_EEPROM_S); IWI_EEPROM_CTL(sc, 0); IWI_EEPROM_CTL(sc, IWI_EEPROM_C); return val; } static void iwi_setcurchan(struct iwi_softc *sc, int chan) { struct ieee80211com *ic = &sc->sc_ic; sc->curchan = chan; ieee80211_radiotap_chan_change(ic); } static void iwi_frame_intr(struct iwi_softc *sc, struct iwi_rx_data *data, int i, struct iwi_frame *frame) { struct ieee80211com *ic = &sc->sc_ic; struct mbuf *mnew, *m; struct ieee80211_node *ni; int type, error, framelen; int8_t rssi, nf; IWI_LOCK_DECL; framelen = le16toh(frame->len); if (framelen < IEEE80211_MIN_LEN || framelen > MCLBYTES) { /* * XXX >MCLBYTES is bogus as it means the h/w dma'd * out of bounds; need to figure out how to limit * frame size in the firmware */ /* XXX stat */ DPRINTFN(1, ("drop rx frame len=%u chan=%u rssi=%u rssi_dbm=%u\n", le16toh(frame->len), frame->chan, frame->rssi, frame->rssi_dbm)); return; } DPRINTFN(5, ("received frame len=%u chan=%u rssi=%u rssi_dbm=%u\n", le16toh(frame->len), frame->chan, frame->rssi, frame->rssi_dbm)); if (frame->chan != sc->curchan) iwi_setcurchan(sc, frame->chan); /* * Try to allocate a new mbuf for this ring element and load it before * processing the current mbuf. If the ring element cannot be loaded, * drop the received packet and reuse the old mbuf. In the unlikely * case that the old mbuf can't be reloaded either, explicitly panic. */ mnew = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (mnew == NULL) { counter_u64_add(ic->ic_ierrors, 1); return; } bus_dmamap_unload(sc->rxq.data_dmat, data->map); error = bus_dmamap_load(sc->rxq.data_dmat, data->map, mtod(mnew, void *), MCLBYTES, iwi_dma_map_addr, &data->physaddr, 0); if (error != 0) { m_freem(mnew); /* try to reload the old mbuf */ error = bus_dmamap_load(sc->rxq.data_dmat, data->map, mtod(data->m, void *), MCLBYTES, iwi_dma_map_addr, &data->physaddr, 0); if (error != 0) { /* very unlikely that it will fail... */ panic("%s: could not load old rx mbuf", device_get_name(sc->sc_dev)); } counter_u64_add(ic->ic_ierrors, 1); return; } /* * New mbuf successfully loaded, update Rx ring and continue * processing. */ m = data->m; data->m = mnew; CSR_WRITE_4(sc, data->reg, data->physaddr); /* finalize mbuf */ m->m_pkthdr.len = m->m_len = sizeof (struct iwi_hdr) + sizeof (struct iwi_frame) + framelen; m_adj(m, sizeof (struct iwi_hdr) + sizeof (struct iwi_frame)); rssi = frame->rssi_dbm; nf = -95; if (ieee80211_radiotap_active(ic)) { struct iwi_rx_radiotap_header *tap = &sc->sc_rxtap; tap->wr_flags = 0; tap->wr_antsignal = rssi; tap->wr_antnoise = nf; tap->wr_rate = iwi_cvtrate(frame->rate); tap->wr_antenna = frame->antenna; } IWI_UNLOCK(sc); ni = ieee80211_find_rxnode(ic, mtod(m, struct ieee80211_frame_min *)); if (ni != NULL) { type = ieee80211_input(ni, m, rssi, nf); ieee80211_free_node(ni); } else type = ieee80211_input_all(ic, m, rssi, nf); IWI_LOCK(sc); if (sc->sc_softled) { /* * Blink for any data frame. Otherwise do a * heartbeat-style blink when idle. The latter * is mainly for station mode where we depend on * periodic beacon frames to trigger the poll event. */ if (type == IEEE80211_FC0_TYPE_DATA) { sc->sc_rxrate = frame->rate; iwi_led_event(sc, IWI_LED_RX); } else if (ticks - sc->sc_ledevent >= sc->sc_ledidle) iwi_led_event(sc, IWI_LED_POLL); } } /* * Check for an association response frame to see if QoS * has been negotiated. We parse just enough to figure * out if we're supposed to use QoS. The proper solution * is to pass the frame up so ieee80211_input can do the * work but that's made hard by how things currently are * done in the driver. */ static void iwi_checkforqos(struct ieee80211vap *vap, const struct ieee80211_frame *wh, int len) { #define SUBTYPE(wh) ((wh)->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK) const uint8_t *frm, *efrm, *wme; struct ieee80211_node *ni; uint16_t capinfo, status, associd; /* NB: +8 for capinfo, status, associd, and first ie */ if (!(sizeof(*wh)+8 < len && len < IEEE80211_MAX_LEN) || SUBTYPE(wh) != IEEE80211_FC0_SUBTYPE_ASSOC_RESP) return; /* * asresp frame format * [2] capability information * [2] status * [2] association ID * [tlv] supported rates * [tlv] extended supported rates * [tlv] WME */ frm = (const uint8_t *)&wh[1]; efrm = ((const uint8_t *) wh) + len; capinfo = le16toh(*(const uint16_t *)frm); frm += 2; status = le16toh(*(const uint16_t *)frm); frm += 2; associd = le16toh(*(const uint16_t *)frm); frm += 2; wme = NULL; while (efrm - frm > 1) { IEEE80211_VERIFY_LENGTH(efrm - frm, frm[1] + 2, return); switch (*frm) { case IEEE80211_ELEMID_VENDOR: if (iswmeoui(frm)) wme = frm; break; } frm += frm[1] + 2; } ni = ieee80211_ref_node(vap->iv_bss); ni->ni_capinfo = capinfo; ni->ni_associd = associd & 0x3fff; if (wme != NULL) ni->ni_flags |= IEEE80211_NODE_QOS; else ni->ni_flags &= ~IEEE80211_NODE_QOS; ieee80211_free_node(ni); #undef SUBTYPE } static void iwi_notif_link_quality(struct iwi_softc *sc, struct iwi_notif *notif) { struct iwi_notif_link_quality *lq; int len; len = le16toh(notif->len); DPRINTFN(5, ("Notification (%u) - len=%d, sizeof=%zu\n", notif->type, len, sizeof(struct iwi_notif_link_quality) )); /* enforce length */ if (len != sizeof(struct iwi_notif_link_quality)) { DPRINTFN(5, ("Notification: (%u) too short (%d)\n", notif->type, len)); return; } lq = (struct iwi_notif_link_quality *)(notif + 1); memcpy(&sc->sc_linkqual, lq, sizeof(sc->sc_linkqual)); sc->sc_linkqual_valid = 1; } /* * Task queue callbacks for iwi_notification_intr used to avoid LOR's. */ static void iwi_notification_intr(struct iwi_softc *sc, struct iwi_notif *notif) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); struct iwi_notif_scan_channel *chan; struct iwi_notif_scan_complete *scan; struct iwi_notif_authentication *auth; struct iwi_notif_association *assoc; struct iwi_notif_beacon_state *beacon; switch (notif->type) { case IWI_NOTIF_TYPE_SCAN_CHANNEL: chan = (struct iwi_notif_scan_channel *)(notif + 1); DPRINTFN(3, ("Scan of channel %u complete (%u)\n", ieee80211_ieee2mhz(chan->nchan, 0), chan->nchan)); /* Reset the timer, the scan is still going */ sc->sc_state_timer = 3; break; case IWI_NOTIF_TYPE_SCAN_COMPLETE: scan = (struct iwi_notif_scan_complete *)(notif + 1); DPRINTFN(2, ("Scan completed (%u, %u)\n", scan->nchan, scan->status)); IWI_STATE_END(sc, IWI_FW_SCANNING); /* * Monitor mode works by doing a passive scan to set * the channel and enable rx. Because we don't want * to abort a scan lest the firmware crash we scan * for a short period of time and automatically restart * the scan when notified the sweep has completed. */ if (vap->iv_opmode == IEEE80211_M_MONITOR) { ieee80211_runtask(ic, &sc->sc_monitortask); break; } if (scan->status == IWI_SCAN_COMPLETED) { /* NB: don't need to defer, net80211 does it for us */ ieee80211_scan_next(vap); } break; case IWI_NOTIF_TYPE_AUTHENTICATION: auth = (struct iwi_notif_authentication *)(notif + 1); switch (auth->state) { case IWI_AUTH_SUCCESS: DPRINTFN(2, ("Authentication succeeeded\n")); ieee80211_new_state(vap, IEEE80211_S_ASSOC, -1); break; case IWI_AUTH_FAIL: /* * These are delivered as an unsolicited deauth * (e.g. due to inactivity) or in response to an * associate request. */ sc->flags &= ~IWI_FLAG_ASSOCIATED; if (vap->iv_state != IEEE80211_S_RUN) { DPRINTFN(2, ("Authentication failed\n")); vap->iv_stats.is_rx_auth_fail++; IWI_STATE_END(sc, IWI_FW_ASSOCIATING); } else { DPRINTFN(2, ("Deauthenticated\n")); vap->iv_stats.is_rx_deauth++; } ieee80211_new_state(vap, IEEE80211_S_SCAN, -1); break; case IWI_AUTH_SENT_1: case IWI_AUTH_RECV_2: case IWI_AUTH_SEQ1_PASS: break; case IWI_AUTH_SEQ1_FAIL: DPRINTFN(2, ("Initial authentication handshake failed; " "you probably need shared key\n")); vap->iv_stats.is_rx_auth_fail++; IWI_STATE_END(sc, IWI_FW_ASSOCIATING); /* XXX retry shared key when in auto */ break; default: device_printf(sc->sc_dev, "unknown authentication state %u\n", auth->state); break; } break; case IWI_NOTIF_TYPE_ASSOCIATION: assoc = (struct iwi_notif_association *)(notif + 1); switch (assoc->state) { case IWI_AUTH_SUCCESS: /* re-association, do nothing */ break; case IWI_ASSOC_SUCCESS: DPRINTFN(2, ("Association succeeded\n")); sc->flags |= IWI_FLAG_ASSOCIATED; IWI_STATE_END(sc, IWI_FW_ASSOCIATING); iwi_checkforqos(vap, (const struct ieee80211_frame *)(assoc+1), le16toh(notif->len) - sizeof(*assoc) - 1); ieee80211_new_state(vap, IEEE80211_S_RUN, -1); break; case IWI_ASSOC_INIT: sc->flags &= ~IWI_FLAG_ASSOCIATED; switch (sc->fw_state) { case IWI_FW_ASSOCIATING: DPRINTFN(2, ("Association failed\n")); IWI_STATE_END(sc, IWI_FW_ASSOCIATING); ieee80211_new_state(vap, IEEE80211_S_SCAN, -1); break; case IWI_FW_DISASSOCIATING: DPRINTFN(2, ("Dissassociated\n")); IWI_STATE_END(sc, IWI_FW_DISASSOCIATING); vap->iv_stats.is_rx_disassoc++; ieee80211_new_state(vap, IEEE80211_S_SCAN, -1); break; } break; default: device_printf(sc->sc_dev, "unknown association state %u\n", assoc->state); break; } break; case IWI_NOTIF_TYPE_BEACON: /* XXX check struct length */ beacon = (struct iwi_notif_beacon_state *)(notif + 1); DPRINTFN(5, ("Beacon state (%u, %u)\n", beacon->state, le32toh(beacon->number))); if (beacon->state == IWI_BEACON_MISS) { /* * The firmware notifies us of every beacon miss * so we need to track the count against the * configured threshold before notifying the * 802.11 layer. * XXX try to roam, drop assoc only on much higher count */ if (le32toh(beacon->number) >= vap->iv_bmissthreshold) { DPRINTF(("Beacon miss: %u >= %u\n", le32toh(beacon->number), vap->iv_bmissthreshold)); vap->iv_stats.is_beacon_miss++; /* * It's pointless to notify the 802.11 layer * as it'll try to send a probe request (which * we'll discard) and then timeout and drop us * into scan state. Instead tell the firmware * to disassociate and then on completion we'll * kick the state machine to scan. */ ieee80211_runtask(ic, &sc->sc_disassoctask); } } break; case IWI_NOTIF_TYPE_CALIBRATION: case IWI_NOTIF_TYPE_NOISE: /* XXX handle? */ DPRINTFN(5, ("Notification (%u)\n", notif->type)); break; case IWI_NOTIF_TYPE_LINK_QUALITY: iwi_notif_link_quality(sc, notif); break; default: DPRINTF(("unknown notification type %u flags 0x%x len %u\n", notif->type, notif->flags, le16toh(notif->len))); break; } } static void iwi_rx_intr(struct iwi_softc *sc) { struct iwi_rx_data *data; struct iwi_hdr *hdr; uint32_t hw; hw = CSR_READ_4(sc, IWI_CSR_RX_RIDX); for (; sc->rxq.cur != hw;) { data = &sc->rxq.data[sc->rxq.cur]; bus_dmamap_sync(sc->rxq.data_dmat, data->map, BUS_DMASYNC_POSTREAD); hdr = mtod(data->m, struct iwi_hdr *); switch (hdr->type) { case IWI_HDR_TYPE_FRAME: iwi_frame_intr(sc, data, sc->rxq.cur, (struct iwi_frame *)(hdr + 1)); break; case IWI_HDR_TYPE_NOTIF: iwi_notification_intr(sc, (struct iwi_notif *)(hdr + 1)); break; default: device_printf(sc->sc_dev, "unknown hdr type %u\n", hdr->type); } DPRINTFN(15, ("rx done idx=%u\n", sc->rxq.cur)); sc->rxq.cur = (sc->rxq.cur + 1) % IWI_RX_RING_COUNT; } /* tell the firmware what we have processed */ hw = (hw == 0) ? IWI_RX_RING_COUNT - 1 : hw - 1; CSR_WRITE_4(sc, IWI_CSR_RX_WIDX, hw); } static void iwi_tx_intr(struct iwi_softc *sc, struct iwi_tx_ring *txq) { struct iwi_tx_data *data; uint32_t hw; hw = CSR_READ_4(sc, txq->csr_ridx); while (txq->next != hw) { data = &txq->data[txq->next]; DPRINTFN(15, ("tx done idx=%u\n", txq->next)); bus_dmamap_sync(txq->data_dmat, data->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->data_dmat, data->map); ieee80211_tx_complete(data->ni, data->m, 0); data->ni = NULL; data->m = NULL; txq->queued--; txq->next = (txq->next + 1) % IWI_TX_RING_COUNT; } sc->sc_tx_timer = 0; if (sc->sc_softled) iwi_led_event(sc, IWI_LED_TX); iwi_start(sc); } static void iwi_fatal_error_intr(struct iwi_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = TAILQ_FIRST(&ic->ic_vaps); device_printf(sc->sc_dev, "firmware error\n"); if (vap != NULL) ieee80211_cancel_scan(vap); ieee80211_runtask(ic, &sc->sc_restarttask); sc->flags &= ~IWI_FLAG_BUSY; sc->sc_busy_timer = 0; wakeup(sc); } static void iwi_radio_off_intr(struct iwi_softc *sc) { ieee80211_runtask(&sc->sc_ic, &sc->sc_radiofftask); } static void iwi_intr(void *arg) { struct iwi_softc *sc = arg; uint32_t r; IWI_LOCK_DECL; IWI_LOCK(sc); if ((r = CSR_READ_4(sc, IWI_CSR_INTR)) == 0 || r == 0xffffffff) { IWI_UNLOCK(sc); return; } /* acknowledge interrupts */ CSR_WRITE_4(sc, IWI_CSR_INTR, r); if (r & IWI_INTR_FATAL_ERROR) { iwi_fatal_error_intr(sc); goto done; } if (r & IWI_INTR_FW_INITED) { if (!(r & (IWI_INTR_FATAL_ERROR | IWI_INTR_PARITY_ERROR))) wakeup(sc); } if (r & IWI_INTR_RADIO_OFF) iwi_radio_off_intr(sc); if (r & IWI_INTR_CMD_DONE) { sc->flags &= ~IWI_FLAG_BUSY; sc->sc_busy_timer = 0; wakeup(sc); } if (r & IWI_INTR_TX1_DONE) iwi_tx_intr(sc, &sc->txq[0]); if (r & IWI_INTR_TX2_DONE) iwi_tx_intr(sc, &sc->txq[1]); if (r & IWI_INTR_TX3_DONE) iwi_tx_intr(sc, &sc->txq[2]); if (r & IWI_INTR_TX4_DONE) iwi_tx_intr(sc, &sc->txq[3]); if (r & IWI_INTR_RX_DONE) iwi_rx_intr(sc); if (r & IWI_INTR_PARITY_ERROR) { /* XXX rate-limit */ device_printf(sc->sc_dev, "parity error\n"); } done: IWI_UNLOCK(sc); } static int iwi_cmd(struct iwi_softc *sc, uint8_t type, void *data, uint8_t len) { struct iwi_cmd_desc *desc; IWI_LOCK_ASSERT(sc); if (sc->flags & IWI_FLAG_BUSY) { device_printf(sc->sc_dev, "%s: cmd %d not sent, busy\n", __func__, type); return EAGAIN; } sc->flags |= IWI_FLAG_BUSY; sc->sc_busy_timer = 2; desc = &sc->cmdq.desc[sc->cmdq.cur]; desc->hdr.type = IWI_HDR_TYPE_COMMAND; desc->hdr.flags = IWI_HDR_FLAG_IRQ; desc->type = type; desc->len = len; memcpy(desc->data, data, len); bus_dmamap_sync(sc->cmdq.desc_dmat, sc->cmdq.desc_map, BUS_DMASYNC_PREWRITE); DPRINTFN(2, ("sending command idx=%u type=%u len=%u\n", sc->cmdq.cur, type, len)); sc->cmdq.cur = (sc->cmdq.cur + 1) % IWI_CMD_RING_COUNT; CSR_WRITE_4(sc, IWI_CSR_CMD_WIDX, sc->cmdq.cur); return msleep(sc, &sc->sc_mtx, 0, "iwicmd", hz); } static void iwi_write_ibssnode(struct iwi_softc *sc, const u_int8_t addr[IEEE80211_ADDR_LEN], int entry) { struct iwi_ibssnode node; /* write node information into NIC memory */ memset(&node, 0, sizeof node); IEEE80211_ADDR_COPY(node.bssid, addr); DPRINTF(("%s mac %6D station %u\n", __func__, node.bssid, ":", entry)); CSR_WRITE_REGION_1(sc, IWI_CSR_NODE_BASE + entry * sizeof node, (uint8_t *)&node, sizeof node); } static int iwi_tx_start(struct iwi_softc *sc, struct mbuf *m0, struct ieee80211_node *ni, int ac) { struct ieee80211vap *vap = ni->ni_vap; struct ieee80211com *ic = ni->ni_ic; struct iwi_node *in = (struct iwi_node *)ni; const struct ieee80211_frame *wh; struct ieee80211_key *k; const struct chanAccParams *cap; struct iwi_tx_ring *txq = &sc->txq[ac]; struct iwi_tx_data *data; struct iwi_tx_desc *desc; struct mbuf *mnew; bus_dma_segment_t segs[IWI_MAX_NSEG]; int error, nsegs, hdrlen, i; int ismcast, flags, xflags, staid; IWI_LOCK_ASSERT(sc); wh = mtod(m0, const struct ieee80211_frame *); /* NB: only data frames use this path */ hdrlen = ieee80211_hdrsize(wh); ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1); flags = xflags = 0; if (!ismcast) flags |= IWI_DATA_FLAG_NEED_ACK; if (vap->iv_flags & IEEE80211_F_SHPREAMBLE) flags |= IWI_DATA_FLAG_SHPREAMBLE; if (IEEE80211_QOS_HAS_SEQ(wh)) { xflags |= IWI_DATA_XFLAG_QOS; cap = &ic->ic_wme.wme_chanParams; if (!cap->cap_wmeParams[ac].wmep_noackPolicy) flags &= ~IWI_DATA_FLAG_NEED_ACK; } /* * This is only used in IBSS mode where the firmware expect an index * in a h/w table instead of a destination address. */ if (vap->iv_opmode == IEEE80211_M_IBSS) { if (!ismcast) { if (in->in_station == -1) { in->in_station = alloc_unr(sc->sc_unr); if (in->in_station == -1) { /* h/w table is full */ if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1); m_freem(m0); ieee80211_free_node(ni); return 0; } iwi_write_ibssnode(sc, ni->ni_macaddr, in->in_station); } staid = in->in_station; } else { /* * Multicast addresses have no associated node * so there will be no station entry. We reserve * entry 0 for one mcast address and use that. * If there are many being used this will be * expensive and we'll need to do a better job * but for now this handles the broadcast case. */ if (!IEEE80211_ADDR_EQ(wh->i_addr1, sc->sc_mcast)) { IEEE80211_ADDR_COPY(sc->sc_mcast, wh->i_addr1); iwi_write_ibssnode(sc, sc->sc_mcast, 0); } staid = 0; } } else staid = 0; if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) { k = ieee80211_crypto_encap(ni, m0); if (k == NULL) { m_freem(m0); return ENOBUFS; } /* packet header may have moved, reset our local pointer */ wh = mtod(m0, struct ieee80211_frame *); } if (ieee80211_radiotap_active_vap(vap)) { struct iwi_tx_radiotap_header *tap = &sc->sc_txtap; tap->wt_flags = 0; ieee80211_radiotap_tx(vap, m0); } data = &txq->data[txq->cur]; desc = &txq->desc[txq->cur]; /* save and trim IEEE802.11 header */ m_copydata(m0, 0, hdrlen, (caddr_t)&desc->wh); m_adj(m0, hdrlen); error = bus_dmamap_load_mbuf_sg(txq->data_dmat, data->map, m0, segs, &nsegs, 0); if (error != 0 && error != EFBIG) { device_printf(sc->sc_dev, "could not map mbuf (error %d)\n", error); m_freem(m0); return error; } if (error != 0) { mnew = m_defrag(m0, M_NOWAIT); if (mnew == NULL) { device_printf(sc->sc_dev, "could not defragment mbuf\n"); m_freem(m0); return ENOBUFS; } m0 = mnew; error = bus_dmamap_load_mbuf_sg(txq->data_dmat, data->map, m0, segs, &nsegs, 0); if (error != 0) { device_printf(sc->sc_dev, "could not map mbuf (error %d)\n", error); m_freem(m0); return error; } } data->m = m0; data->ni = ni; desc->hdr.type = IWI_HDR_TYPE_DATA; desc->hdr.flags = IWI_HDR_FLAG_IRQ; desc->station = staid; desc->cmd = IWI_DATA_CMD_TX; desc->len = htole16(m0->m_pkthdr.len); desc->flags = flags; desc->xflags = xflags; #if 0 if (vap->iv_flags & IEEE80211_F_PRIVACY) desc->wep_txkey = vap->iv_def_txkey; else #endif desc->flags |= IWI_DATA_FLAG_NO_WEP; desc->nseg = htole32(nsegs); for (i = 0; i < nsegs; i++) { desc->seg_addr[i] = htole32(segs[i].ds_addr); desc->seg_len[i] = htole16(segs[i].ds_len); } bus_dmamap_sync(txq->data_dmat, data->map, BUS_DMASYNC_PREWRITE); bus_dmamap_sync(txq->desc_dmat, txq->desc_map, BUS_DMASYNC_PREWRITE); DPRINTFN(5, ("sending data frame txq=%u idx=%u len=%u nseg=%u\n", ac, txq->cur, le16toh(desc->len), nsegs)); txq->queued++; txq->cur = (txq->cur + 1) % IWI_TX_RING_COUNT; CSR_WRITE_4(sc, txq->csr_widx, txq->cur); return 0; } static int iwi_raw_xmit(struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_bpf_params *params) { /* no support; just discard */ m_freem(m); ieee80211_free_node(ni); return 0; } static int iwi_transmit(struct ieee80211com *ic, struct mbuf *m) { struct iwi_softc *sc = ic->ic_softc; int error; IWI_LOCK_DECL; IWI_LOCK(sc); if (!sc->sc_running) { IWI_UNLOCK(sc); return (ENXIO); } error = mbufq_enqueue(&sc->sc_snd, m); if (error) { IWI_UNLOCK(sc); return (error); } iwi_start(sc); IWI_UNLOCK(sc); return (0); } static void iwi_start(struct iwi_softc *sc) { struct mbuf *m; struct ieee80211_node *ni; int ac; IWI_LOCK_ASSERT(sc); while ((m = mbufq_dequeue(&sc->sc_snd)) != NULL) { ac = M_WME_GETAC(m); if (sc->txq[ac].queued > IWI_TX_RING_COUNT - 8) { /* there is no place left in this ring; tail drop */ /* XXX tail drop */ mbufq_prepend(&sc->sc_snd, m); break; } ni = (struct ieee80211_node *) m->m_pkthdr.rcvif; if (iwi_tx_start(sc, m, ni, ac) != 0) { if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1); ieee80211_free_node(ni); break; } sc->sc_tx_timer = 5; } } static void iwi_watchdog(void *arg) { struct iwi_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; IWI_LOCK_ASSERT(sc); if (sc->sc_tx_timer > 0) { if (--sc->sc_tx_timer == 0) { device_printf(sc->sc_dev, "device timeout\n"); counter_u64_add(ic->ic_oerrors, 1); ieee80211_runtask(ic, &sc->sc_restarttask); } } if (sc->sc_state_timer > 0) { if (--sc->sc_state_timer == 0) { device_printf(sc->sc_dev, "firmware stuck in state %d, resetting\n", sc->fw_state); if (sc->fw_state == IWI_FW_SCANNING) ieee80211_cancel_scan(TAILQ_FIRST(&ic->ic_vaps)); ieee80211_runtask(ic, &sc->sc_restarttask); sc->sc_state_timer = 3; } } if (sc->sc_busy_timer > 0) { if (--sc->sc_busy_timer == 0) { device_printf(sc->sc_dev, "firmware command timeout, resetting\n"); ieee80211_runtask(ic, &sc->sc_restarttask); } } callout_reset(&sc->sc_wdtimer, hz, iwi_watchdog, sc); } static void iwi_parent(struct ieee80211com *ic) { struct iwi_softc *sc = ic->ic_softc; int startall = 0; IWI_LOCK_DECL; IWI_LOCK(sc); if (ic->ic_nrunning > 0) { if (!sc->sc_running) { iwi_init_locked(sc); startall = 1; } } else if (sc->sc_running) iwi_stop_locked(sc); IWI_UNLOCK(sc); if (startall) ieee80211_start_all(ic); } static int iwi_ioctl(struct ieee80211com *ic, u_long cmd, void *data) { struct ifreq *ifr = data; struct iwi_softc *sc = ic->ic_softc; int error; IWI_LOCK_DECL; IWI_LOCK(sc); switch (cmd) { case SIOCGIWISTATS: /* XXX validate permissions/memory/etc? */ - error = copyout(&sc->sc_linkqual, ifr->ifr_data, + error = copyout(&sc->sc_linkqual, ifr_data_get_ptr(ifr), sizeof(struct iwi_notif_link_quality)); break; case SIOCZIWISTATS: memset(&sc->sc_linkqual, 0, sizeof(struct iwi_notif_link_quality)); error = 0; break; default: error = ENOTTY; break; } IWI_UNLOCK(sc); return (error); } static void iwi_stop_master(struct iwi_softc *sc) { uint32_t tmp; int ntries; /* disable interrupts */ CSR_WRITE_4(sc, IWI_CSR_INTR_MASK, 0); CSR_WRITE_4(sc, IWI_CSR_RST, IWI_RST_STOP_MASTER); for (ntries = 0; ntries < 5; ntries++) { if (CSR_READ_4(sc, IWI_CSR_RST) & IWI_RST_MASTER_DISABLED) break; DELAY(10); } if (ntries == 5) device_printf(sc->sc_dev, "timeout waiting for master\n"); tmp = CSR_READ_4(sc, IWI_CSR_RST); CSR_WRITE_4(sc, IWI_CSR_RST, tmp | IWI_RST_PRINCETON_RESET); sc->flags &= ~IWI_FLAG_FW_INITED; } static int iwi_reset(struct iwi_softc *sc) { uint32_t tmp; int i, ntries; iwi_stop_master(sc); tmp = CSR_READ_4(sc, IWI_CSR_CTL); CSR_WRITE_4(sc, IWI_CSR_CTL, tmp | IWI_CTL_INIT); CSR_WRITE_4(sc, IWI_CSR_READ_INT, IWI_READ_INT_INIT_HOST); /* wait for clock stabilization */ for (ntries = 0; ntries < 1000; ntries++) { if (CSR_READ_4(sc, IWI_CSR_CTL) & IWI_CTL_CLOCK_READY) break; DELAY(200); } if (ntries == 1000) { device_printf(sc->sc_dev, "timeout waiting for clock stabilization\n"); return EIO; } tmp = CSR_READ_4(sc, IWI_CSR_RST); CSR_WRITE_4(sc, IWI_CSR_RST, tmp | IWI_RST_SOFT_RESET); DELAY(10); tmp = CSR_READ_4(sc, IWI_CSR_CTL); CSR_WRITE_4(sc, IWI_CSR_CTL, tmp | IWI_CTL_INIT); /* clear NIC memory */ CSR_WRITE_4(sc, IWI_CSR_AUTOINC_ADDR, 0); for (i = 0; i < 0xc000; i++) CSR_WRITE_4(sc, IWI_CSR_AUTOINC_DATA, 0); return 0; } static const struct iwi_firmware_ohdr * iwi_setup_ofw(struct iwi_softc *sc, struct iwi_fw *fw) { const struct firmware *fp = fw->fp; const struct iwi_firmware_ohdr *hdr; if (fp->datasize < sizeof (struct iwi_firmware_ohdr)) { device_printf(sc->sc_dev, "image '%s' too small\n", fp->name); return NULL; } hdr = (const struct iwi_firmware_ohdr *)fp->data; if ((IWI_FW_GET_MAJOR(le32toh(hdr->version)) != IWI_FW_REQ_MAJOR) || (IWI_FW_GET_MINOR(le32toh(hdr->version)) != IWI_FW_REQ_MINOR)) { device_printf(sc->sc_dev, "version for '%s' %d.%d != %d.%d\n", fp->name, IWI_FW_GET_MAJOR(le32toh(hdr->version)), IWI_FW_GET_MINOR(le32toh(hdr->version)), IWI_FW_REQ_MAJOR, IWI_FW_REQ_MINOR); return NULL; } fw->data = ((const char *) fp->data) + sizeof(struct iwi_firmware_ohdr); fw->size = fp->datasize - sizeof(struct iwi_firmware_ohdr); fw->name = fp->name; return hdr; } static const struct iwi_firmware_ohdr * iwi_setup_oucode(struct iwi_softc *sc, struct iwi_fw *fw) { const struct iwi_firmware_ohdr *hdr; hdr = iwi_setup_ofw(sc, fw); if (hdr != NULL && le32toh(hdr->mode) != IWI_FW_MODE_UCODE) { device_printf(sc->sc_dev, "%s is not a ucode image\n", fw->name); hdr = NULL; } return hdr; } static void iwi_getfw(struct iwi_fw *fw, const char *fwname, struct iwi_fw *uc, const char *ucname) { if (fw->fp == NULL) fw->fp = firmware_get(fwname); /* NB: pre-3.0 ucode is packaged separately */ if (uc->fp == NULL && fw->fp != NULL && fw->fp->version < 300) uc->fp = firmware_get(ucname); } /* * Get the required firmware images if not already loaded. * Note that we hold firmware images so long as the device * is marked up in case we need to reload them on device init. * This is necessary because we re-init the device sometimes * from a context where we cannot read from the filesystem * (e.g. from the taskqueue thread when rfkill is re-enabled). * XXX return 0 on success, 1 on error. * * NB: the order of get'ing and put'ing images here is * intentional to support handling firmware images bundled * by operating mode and/or all together in one file with * the boot firmware as "master". */ static int iwi_get_firmware(struct iwi_softc *sc, enum ieee80211_opmode opmode) { const struct iwi_firmware_hdr *hdr; const struct firmware *fp; /* invalidate cached firmware on mode change */ if (sc->fw_mode != opmode) iwi_put_firmware(sc); switch (opmode) { case IEEE80211_M_STA: iwi_getfw(&sc->fw_fw, "iwi_bss", &sc->fw_uc, "iwi_ucode_bss"); break; case IEEE80211_M_IBSS: iwi_getfw(&sc->fw_fw, "iwi_ibss", &sc->fw_uc, "iwi_ucode_ibss"); break; case IEEE80211_M_MONITOR: iwi_getfw(&sc->fw_fw, "iwi_monitor", &sc->fw_uc, "iwi_ucode_monitor"); break; default: device_printf(sc->sc_dev, "unknown opmode %d\n", opmode); return EINVAL; } fp = sc->fw_fw.fp; if (fp == NULL) { device_printf(sc->sc_dev, "could not load firmware\n"); goto bad; } if (fp->version < 300) { /* * Firmware prior to 3.0 was packaged as separate * boot, firmware, and ucode images. Verify the * ucode image was read in, retrieve the boot image * if needed, and check version stamps for consistency. * The version stamps in the data are also checked * above; this is a bit paranoid but is a cheap * safeguard against mis-packaging. */ if (sc->fw_uc.fp == NULL) { device_printf(sc->sc_dev, "could not load ucode\n"); goto bad; } if (sc->fw_boot.fp == NULL) { sc->fw_boot.fp = firmware_get("iwi_boot"); if (sc->fw_boot.fp == NULL) { device_printf(sc->sc_dev, "could not load boot firmware\n"); goto bad; } } if (sc->fw_boot.fp->version != sc->fw_fw.fp->version || sc->fw_boot.fp->version != sc->fw_uc.fp->version) { device_printf(sc->sc_dev, "firmware version mismatch: " "'%s' is %d, '%s' is %d, '%s' is %d\n", sc->fw_boot.fp->name, sc->fw_boot.fp->version, sc->fw_uc.fp->name, sc->fw_uc.fp->version, sc->fw_fw.fp->name, sc->fw_fw.fp->version ); goto bad; } /* * Check and setup each image. */ if (iwi_setup_oucode(sc, &sc->fw_uc) == NULL || iwi_setup_ofw(sc, &sc->fw_boot) == NULL || iwi_setup_ofw(sc, &sc->fw_fw) == NULL) goto bad; } else { /* * Check and setup combined image. */ if (fp->datasize < sizeof(struct iwi_firmware_hdr)) { device_printf(sc->sc_dev, "image '%s' too small\n", fp->name); goto bad; } hdr = (const struct iwi_firmware_hdr *)fp->data; if (fp->datasize < sizeof(*hdr) + le32toh(hdr->bsize) + le32toh(hdr->usize) + le32toh(hdr->fsize)) { device_printf(sc->sc_dev, "image '%s' too small (2)\n", fp->name); goto bad; } sc->fw_boot.data = ((const char *) fp->data) + sizeof(*hdr); sc->fw_boot.size = le32toh(hdr->bsize); sc->fw_boot.name = fp->name; sc->fw_uc.data = sc->fw_boot.data + sc->fw_boot.size; sc->fw_uc.size = le32toh(hdr->usize); sc->fw_uc.name = fp->name; sc->fw_fw.data = sc->fw_uc.data + sc->fw_uc.size; sc->fw_fw.size = le32toh(hdr->fsize); sc->fw_fw.name = fp->name; } #if 0 device_printf(sc->sc_dev, "boot %d ucode %d fw %d bytes\n", sc->fw_boot.size, sc->fw_uc.size, sc->fw_fw.size); #endif sc->fw_mode = opmode; return 0; bad: iwi_put_firmware(sc); return 1; } static void iwi_put_fw(struct iwi_fw *fw) { if (fw->fp != NULL) { firmware_put(fw->fp, FIRMWARE_UNLOAD); fw->fp = NULL; } fw->data = NULL; fw->size = 0; fw->name = NULL; } /* * Release any cached firmware images. */ static void iwi_put_firmware(struct iwi_softc *sc) { iwi_put_fw(&sc->fw_uc); iwi_put_fw(&sc->fw_fw); iwi_put_fw(&sc->fw_boot); } static int iwi_load_ucode(struct iwi_softc *sc, const struct iwi_fw *fw) { uint32_t tmp; const uint16_t *w; const char *uc = fw->data; size_t size = fw->size; int i, ntries, error; IWI_LOCK_ASSERT(sc); error = 0; CSR_WRITE_4(sc, IWI_CSR_RST, CSR_READ_4(sc, IWI_CSR_RST) | IWI_RST_STOP_MASTER); for (ntries = 0; ntries < 5; ntries++) { if (CSR_READ_4(sc, IWI_CSR_RST) & IWI_RST_MASTER_DISABLED) break; DELAY(10); } if (ntries == 5) { device_printf(sc->sc_dev, "timeout waiting for master\n"); error = EIO; goto fail; } MEM_WRITE_4(sc, 0x3000e0, 0x80000000); DELAY(5000); tmp = CSR_READ_4(sc, IWI_CSR_RST); tmp &= ~IWI_RST_PRINCETON_RESET; CSR_WRITE_4(sc, IWI_CSR_RST, tmp); DELAY(5000); MEM_WRITE_4(sc, 0x3000e0, 0); DELAY(1000); MEM_WRITE_4(sc, IWI_MEM_EEPROM_EVENT, 1); DELAY(1000); MEM_WRITE_4(sc, IWI_MEM_EEPROM_EVENT, 0); DELAY(1000); MEM_WRITE_1(sc, 0x200000, 0x00); MEM_WRITE_1(sc, 0x200000, 0x40); DELAY(1000); /* write microcode into adapter memory */ for (w = (const uint16_t *)uc; size > 0; w++, size -= 2) MEM_WRITE_2(sc, 0x200010, htole16(*w)); MEM_WRITE_1(sc, 0x200000, 0x00); MEM_WRITE_1(sc, 0x200000, 0x80); /* wait until we get an answer */ for (ntries = 0; ntries < 100; ntries++) { if (MEM_READ_1(sc, 0x200000) & 1) break; DELAY(100); } if (ntries == 100) { device_printf(sc->sc_dev, "timeout waiting for ucode to initialize\n"); error = EIO; goto fail; } /* read the answer or the firmware will not initialize properly */ for (i = 0; i < 7; i++) MEM_READ_4(sc, 0x200004); MEM_WRITE_1(sc, 0x200000, 0x00); fail: return error; } /* macro to handle unaligned little endian data in firmware image */ #define GETLE32(p) ((p)[0] | (p)[1] << 8 | (p)[2] << 16 | (p)[3] << 24) static int iwi_load_firmware(struct iwi_softc *sc, const struct iwi_fw *fw) { u_char *p, *end; uint32_t sentinel, ctl, src, dst, sum, len, mlen, tmp; int ntries, error; IWI_LOCK_ASSERT(sc); /* copy firmware image to DMA memory */ memcpy(sc->fw_virtaddr, fw->data, fw->size); /* make sure the adapter will get up-to-date values */ bus_dmamap_sync(sc->fw_dmat, sc->fw_map, BUS_DMASYNC_PREWRITE); /* tell the adapter where the command blocks are stored */ MEM_WRITE_4(sc, 0x3000a0, 0x27000); /* * Store command blocks into adapter's internal memory using register * indirections. The adapter will read the firmware image through DMA * using information stored in command blocks. */ src = sc->fw_physaddr; p = sc->fw_virtaddr; end = p + fw->size; CSR_WRITE_4(sc, IWI_CSR_AUTOINC_ADDR, 0x27000); while (p < end) { dst = GETLE32(p); p += 4; src += 4; len = GETLE32(p); p += 4; src += 4; p += len; while (len > 0) { mlen = min(len, IWI_CB_MAXDATALEN); ctl = IWI_CB_DEFAULT_CTL | mlen; sum = ctl ^ src ^ dst; /* write a command block */ CSR_WRITE_4(sc, IWI_CSR_AUTOINC_DATA, ctl); CSR_WRITE_4(sc, IWI_CSR_AUTOINC_DATA, src); CSR_WRITE_4(sc, IWI_CSR_AUTOINC_DATA, dst); CSR_WRITE_4(sc, IWI_CSR_AUTOINC_DATA, sum); src += mlen; dst += mlen; len -= mlen; } } /* write a fictive final command block (sentinel) */ sentinel = CSR_READ_4(sc, IWI_CSR_AUTOINC_ADDR); CSR_WRITE_4(sc, IWI_CSR_AUTOINC_DATA, 0); tmp = CSR_READ_4(sc, IWI_CSR_RST); tmp &= ~(IWI_RST_MASTER_DISABLED | IWI_RST_STOP_MASTER); CSR_WRITE_4(sc, IWI_CSR_RST, tmp); /* tell the adapter to start processing command blocks */ MEM_WRITE_4(sc, 0x3000a4, 0x540100); /* wait until the adapter reaches the sentinel */ for (ntries = 0; ntries < 400; ntries++) { if (MEM_READ_4(sc, 0x3000d0) >= sentinel) break; DELAY(100); } /* sync dma, just in case */ bus_dmamap_sync(sc->fw_dmat, sc->fw_map, BUS_DMASYNC_POSTWRITE); if (ntries == 400) { device_printf(sc->sc_dev, "timeout processing command blocks for %s firmware\n", fw->name); return EIO; } /* we're done with command blocks processing */ MEM_WRITE_4(sc, 0x3000a4, 0x540c00); /* allow interrupts so we know when the firmware is ready */ CSR_WRITE_4(sc, IWI_CSR_INTR_MASK, IWI_INTR_MASK); /* tell the adapter to initialize the firmware */ CSR_WRITE_4(sc, IWI_CSR_RST, 0); tmp = CSR_READ_4(sc, IWI_CSR_CTL); CSR_WRITE_4(sc, IWI_CSR_CTL, tmp | IWI_CTL_ALLOW_STANDBY); /* wait at most one second for firmware initialization to complete */ if ((error = msleep(sc, &sc->sc_mtx, 0, "iwiinit", hz)) != 0) { device_printf(sc->sc_dev, "timeout waiting for %s firmware " "initialization to complete\n", fw->name); } return error; } static int iwi_setpowermode(struct iwi_softc *sc, struct ieee80211vap *vap) { uint32_t data; if (vap->iv_flags & IEEE80211_F_PMGTON) { /* XXX set more fine-grained operation */ data = htole32(IWI_POWER_MODE_MAX); } else data = htole32(IWI_POWER_MODE_CAM); DPRINTF(("Setting power mode to %u\n", le32toh(data))); return iwi_cmd(sc, IWI_CMD_SET_POWER_MODE, &data, sizeof data); } static int iwi_setwepkeys(struct iwi_softc *sc, struct ieee80211vap *vap) { struct iwi_wep_key wepkey; struct ieee80211_key *wk; int error, i; for (i = 0; i < IEEE80211_WEP_NKID; i++) { wk = &vap->iv_nw_keys[i]; wepkey.cmd = IWI_WEP_KEY_CMD_SETKEY; wepkey.idx = i; wepkey.len = wk->wk_keylen; memset(wepkey.key, 0, sizeof wepkey.key); memcpy(wepkey.key, wk->wk_key, wk->wk_keylen); DPRINTF(("Setting wep key index %u len %u\n", wepkey.idx, wepkey.len)); error = iwi_cmd(sc, IWI_CMD_SET_WEP_KEY, &wepkey, sizeof wepkey); if (error != 0) return error; } return 0; } static int iwi_config(struct iwi_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct iwi_configuration config; struct iwi_rateset rs; struct iwi_txpower power; uint32_t data; int error, i; IWI_LOCK_ASSERT(sc); DPRINTF(("Setting MAC address to %6D\n", ic->ic_macaddr, ":")); error = iwi_cmd(sc, IWI_CMD_SET_MAC_ADDRESS, ic->ic_macaddr, IEEE80211_ADDR_LEN); if (error != 0) return error; memset(&config, 0, sizeof config); config.bluetooth_coexistence = sc->bluetooth; config.silence_threshold = 0x1e; config.antenna = sc->antenna; config.multicast_enabled = 1; config.answer_pbreq = (ic->ic_opmode == IEEE80211_M_IBSS) ? 1 : 0; config.disable_unicast_decryption = 1; config.disable_multicast_decryption = 1; if (ic->ic_opmode == IEEE80211_M_MONITOR) { config.allow_invalid_frames = 1; config.allow_beacon_and_probe_resp = 1; config.allow_mgt = 1; } DPRINTF(("Configuring adapter\n")); error = iwi_cmd(sc, IWI_CMD_SET_CONFIG, &config, sizeof config); if (error != 0) return error; if (ic->ic_opmode == IEEE80211_M_IBSS) { power.mode = IWI_MODE_11B; power.nchan = 11; for (i = 0; i < 11; i++) { power.chan[i].chan = i + 1; power.chan[i].power = IWI_TXPOWER_MAX; } DPRINTF(("Setting .11b channels tx power\n")); error = iwi_cmd(sc, IWI_CMD_SET_TX_POWER, &power, sizeof power); if (error != 0) return error; power.mode = IWI_MODE_11G; DPRINTF(("Setting .11g channels tx power\n")); error = iwi_cmd(sc, IWI_CMD_SET_TX_POWER, &power, sizeof power); if (error != 0) return error; } memset(&rs, 0, sizeof rs); rs.mode = IWI_MODE_11G; rs.type = IWI_RATESET_TYPE_SUPPORTED; rs.nrates = ic->ic_sup_rates[IEEE80211_MODE_11G].rs_nrates; memcpy(rs.rates, ic->ic_sup_rates[IEEE80211_MODE_11G].rs_rates, rs.nrates); DPRINTF(("Setting .11bg supported rates (%u)\n", rs.nrates)); error = iwi_cmd(sc, IWI_CMD_SET_RATES, &rs, sizeof rs); if (error != 0) return error; memset(&rs, 0, sizeof rs); rs.mode = IWI_MODE_11A; rs.type = IWI_RATESET_TYPE_SUPPORTED; rs.nrates = ic->ic_sup_rates[IEEE80211_MODE_11A].rs_nrates; memcpy(rs.rates, ic->ic_sup_rates[IEEE80211_MODE_11A].rs_rates, rs.nrates); DPRINTF(("Setting .11a supported rates (%u)\n", rs.nrates)); error = iwi_cmd(sc, IWI_CMD_SET_RATES, &rs, sizeof rs); if (error != 0) return error; data = htole32(arc4random()); DPRINTF(("Setting initialization vector to %u\n", le32toh(data))); error = iwi_cmd(sc, IWI_CMD_SET_IV, &data, sizeof data); if (error != 0) return error; /* enable adapter */ DPRINTF(("Enabling adapter\n")); return iwi_cmd(sc, IWI_CMD_ENABLE, NULL, 0); } static __inline void set_scan_type(struct iwi_scan_ext *scan, int ix, int scan_type) { uint8_t *st = &scan->scan_type[ix / 2]; if (ix % 2) *st = (*st & 0xf0) | ((scan_type & 0xf) << 0); else *st = (*st & 0x0f) | ((scan_type & 0xf) << 4); } static int scan_type(const struct ieee80211_scan_state *ss, const struct ieee80211_channel *chan) { /* We can only set one essid for a directed scan */ if (ss->ss_nssid != 0) return IWI_SCAN_TYPE_BDIRECTED; if ((ss->ss_flags & IEEE80211_SCAN_ACTIVE) && (chan->ic_flags & IEEE80211_CHAN_PASSIVE) == 0) return IWI_SCAN_TYPE_BROADCAST; return IWI_SCAN_TYPE_PASSIVE; } static __inline int scan_band(const struct ieee80211_channel *c) { return IEEE80211_IS_CHAN_5GHZ(c) ? IWI_CHAN_5GHZ : IWI_CHAN_2GHZ; } static void iwi_monitor_scan(void *arg, int npending) { struct iwi_softc *sc = arg; IWI_LOCK_DECL; IWI_LOCK(sc); (void) iwi_scanchan(sc, 2000, 0); IWI_UNLOCK(sc); } /* * Start a scan on the current channel or all channels. */ static int iwi_scanchan(struct iwi_softc *sc, unsigned long maxdwell, int allchan) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211_channel *chan; struct ieee80211_scan_state *ss; struct iwi_scan_ext scan; int error = 0; IWI_LOCK_ASSERT(sc); if (sc->fw_state == IWI_FW_SCANNING) { /* * This should not happen as we only trigger scan_next after * completion */ DPRINTF(("%s: called too early - still scanning\n", __func__)); return (EBUSY); } IWI_STATE_BEGIN(sc, IWI_FW_SCANNING); ss = ic->ic_scan; memset(&scan, 0, sizeof scan); scan.full_scan_index = htole32(++sc->sc_scangen); scan.dwell_time[IWI_SCAN_TYPE_PASSIVE] = htole16(maxdwell); if (ic->ic_flags_ext & IEEE80211_FEXT_BGSCAN) { /* * Use very short dwell times for when we send probe request * frames. Without this bg scans hang. Ideally this should * be handled with early-termination as done by net80211 but * that's not feasible (aborting a scan is problematic). */ scan.dwell_time[IWI_SCAN_TYPE_BROADCAST] = htole16(30); scan.dwell_time[IWI_SCAN_TYPE_BDIRECTED] = htole16(30); } else { scan.dwell_time[IWI_SCAN_TYPE_BROADCAST] = htole16(maxdwell); scan.dwell_time[IWI_SCAN_TYPE_BDIRECTED] = htole16(maxdwell); } /* We can only set one essid for a directed scan */ if (ss->ss_nssid != 0) { error = iwi_cmd(sc, IWI_CMD_SET_ESSID, ss->ss_ssid[0].ssid, ss->ss_ssid[0].len); if (error) return (error); } if (allchan) { int i, next, band, b, bstart; /* * Convert scan list to run-length encoded channel list * the firmware requires (preserving the order setup by * net80211). The first entry in each run specifies the * band and the count of items in the run. */ next = 0; /* next open slot */ bstart = 0; /* NB: not needed, silence compiler */ band = -1; /* NB: impossible value */ KASSERT(ss->ss_last > 0, ("no channels")); for (i = 0; i < ss->ss_last; i++) { chan = ss->ss_chans[i]; b = scan_band(chan); if (b != band) { if (band != -1) scan.channels[bstart] = (next - bstart) | band; /* NB: this allocates a slot for the run-len */ band = b, bstart = next++; } if (next >= IWI_SCAN_CHANNELS) { DPRINTF(("truncating scan list\n")); break; } scan.channels[next] = ieee80211_chan2ieee(ic, chan); set_scan_type(&scan, next, scan_type(ss, chan)); next++; } scan.channels[bstart] = (next - bstart) | band; } else { /* Scan the current channel only */ chan = ic->ic_curchan; scan.channels[0] = 1 | scan_band(chan); scan.channels[1] = ieee80211_chan2ieee(ic, chan); set_scan_type(&scan, 1, scan_type(ss, chan)); } #ifdef IWI_DEBUG if (iwi_debug > 0) { static const char *scantype[8] = { "PSTOP", "PASV", "DIR", "BCAST", "BDIR", "5", "6", "7" }; int i; printf("Scan request: index %u dwell %d/%d/%d\n" , le32toh(scan.full_scan_index) , le16toh(scan.dwell_time[IWI_SCAN_TYPE_PASSIVE]) , le16toh(scan.dwell_time[IWI_SCAN_TYPE_BROADCAST]) , le16toh(scan.dwell_time[IWI_SCAN_TYPE_BDIRECTED]) ); i = 0; do { int run = scan.channels[i]; if (run == 0) break; printf("Scan %d %s channels:", run & 0x3f, run & IWI_CHAN_2GHZ ? "2.4GHz" : "5GHz"); for (run &= 0x3f, i++; run > 0; run--, i++) { uint8_t type = scan.scan_type[i/2]; printf(" %u/%s", scan.channels[i], scantype[(i & 1 ? type : type>>4) & 7]); } printf("\n"); } while (i < IWI_SCAN_CHANNELS); } #endif return (iwi_cmd(sc, IWI_CMD_SCAN_EXT, &scan, sizeof scan)); } static int iwi_set_sensitivity(struct iwi_softc *sc, int8_t rssi_dbm) { struct iwi_sensitivity sens; DPRINTF(("Setting sensitivity to %d\n", rssi_dbm)); memset(&sens, 0, sizeof sens); sens.rssi = htole16(rssi_dbm); return iwi_cmd(sc, IWI_CMD_SET_SENSITIVITY, &sens, sizeof sens); } static int iwi_auth_and_assoc(struct iwi_softc *sc, struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; struct ifnet *ifp = vap->iv_ifp; struct ieee80211_node *ni; struct iwi_configuration config; struct iwi_associate *assoc = &sc->assoc; struct iwi_rateset rs; uint16_t capinfo; uint32_t data; int error, mode; IWI_LOCK_ASSERT(sc); ni = ieee80211_ref_node(vap->iv_bss); if (sc->flags & IWI_FLAG_ASSOCIATED) { DPRINTF(("Already associated\n")); return (-1); } IWI_STATE_BEGIN(sc, IWI_FW_ASSOCIATING); error = 0; mode = 0; if (IEEE80211_IS_CHAN_A(ic->ic_curchan)) mode = IWI_MODE_11A; else if (IEEE80211_IS_CHAN_G(ic->ic_curchan)) mode = IWI_MODE_11G; if (IEEE80211_IS_CHAN_B(ic->ic_curchan)) mode = IWI_MODE_11B; if (IEEE80211_IS_CHAN_2GHZ(ic->ic_curchan)) { memset(&config, 0, sizeof config); config.bluetooth_coexistence = sc->bluetooth; config.antenna = sc->antenna; config.multicast_enabled = 1; if (mode == IWI_MODE_11G) config.use_protection = 1; config.answer_pbreq = (vap->iv_opmode == IEEE80211_M_IBSS) ? 1 : 0; config.disable_unicast_decryption = 1; config.disable_multicast_decryption = 1; DPRINTF(("Configuring adapter\n")); error = iwi_cmd(sc, IWI_CMD_SET_CONFIG, &config, sizeof config); if (error != 0) goto done; } #ifdef IWI_DEBUG if (iwi_debug > 0) { printf("Setting ESSID to "); ieee80211_print_essid(ni->ni_essid, ni->ni_esslen); printf("\n"); } #endif error = iwi_cmd(sc, IWI_CMD_SET_ESSID, ni->ni_essid, ni->ni_esslen); if (error != 0) goto done; error = iwi_setpowermode(sc, vap); if (error != 0) goto done; data = htole32(vap->iv_rtsthreshold); DPRINTF(("Setting RTS threshold to %u\n", le32toh(data))); error = iwi_cmd(sc, IWI_CMD_SET_RTS_THRESHOLD, &data, sizeof data); if (error != 0) goto done; data = htole32(vap->iv_fragthreshold); DPRINTF(("Setting fragmentation threshold to %u\n", le32toh(data))); error = iwi_cmd(sc, IWI_CMD_SET_FRAG_THRESHOLD, &data, sizeof data); if (error != 0) goto done; /* the rate set has already been "negotiated" */ memset(&rs, 0, sizeof rs); rs.mode = mode; rs.type = IWI_RATESET_TYPE_NEGOTIATED; rs.nrates = ni->ni_rates.rs_nrates; if (rs.nrates > IWI_RATESET_SIZE) { DPRINTF(("Truncating negotiated rate set from %u\n", rs.nrates)); rs.nrates = IWI_RATESET_SIZE; } memcpy(rs.rates, ni->ni_rates.rs_rates, rs.nrates); DPRINTF(("Setting negotiated rates (%u)\n", rs.nrates)); error = iwi_cmd(sc, IWI_CMD_SET_RATES, &rs, sizeof rs); if (error != 0) goto done; memset(assoc, 0, sizeof *assoc); if ((vap->iv_flags & IEEE80211_F_WME) && ni->ni_ies.wme_ie != NULL) { /* NB: don't treat WME setup as failure */ if (iwi_wme_setparams(sc) == 0 && iwi_wme_setie(sc) == 0) assoc->policy |= htole16(IWI_POLICY_WME); /* XXX complain on failure? */ } if (vap->iv_appie_wpa != NULL) { struct ieee80211_appie *ie = vap->iv_appie_wpa; DPRINTF(("Setting optional IE (len=%u)\n", ie->ie_len)); error = iwi_cmd(sc, IWI_CMD_SET_OPTIE, ie->ie_data, ie->ie_len); if (error != 0) goto done; } error = iwi_set_sensitivity(sc, ic->ic_node_getrssi(ni)); if (error != 0) goto done; assoc->mode = mode; assoc->chan = ic->ic_curchan->ic_ieee; /* * NB: do not arrange for shared key auth w/o privacy * (i.e. a wep key); it causes a firmware error. */ if ((vap->iv_flags & IEEE80211_F_PRIVACY) && ni->ni_authmode == IEEE80211_AUTH_SHARED) { assoc->auth = IWI_AUTH_SHARED; /* * It's possible to have privacy marked but no default * key setup. This typically is due to a user app bug * but if we blindly grab the key the firmware will * barf so avoid it for now. */ if (vap->iv_def_txkey != IEEE80211_KEYIX_NONE) assoc->auth |= vap->iv_def_txkey << 4; error = iwi_setwepkeys(sc, vap); if (error != 0) goto done; } if (vap->iv_flags & IEEE80211_F_WPA) assoc->policy |= htole16(IWI_POLICY_WPA); if (vap->iv_opmode == IEEE80211_M_IBSS && ni->ni_tstamp.tsf == 0) assoc->type = IWI_HC_IBSS_START; else assoc->type = IWI_HC_ASSOC; memcpy(assoc->tstamp, ni->ni_tstamp.data, 8); if (vap->iv_opmode == IEEE80211_M_IBSS) capinfo = IEEE80211_CAPINFO_IBSS; else capinfo = IEEE80211_CAPINFO_ESS; if (vap->iv_flags & IEEE80211_F_PRIVACY) capinfo |= IEEE80211_CAPINFO_PRIVACY; if ((ic->ic_flags & IEEE80211_F_SHPREAMBLE) && IEEE80211_IS_CHAN_2GHZ(ic->ic_curchan)) capinfo |= IEEE80211_CAPINFO_SHORT_PREAMBLE; if (ni->ni_capinfo & IEEE80211_CAPINFO_SHORT_SLOTTIME) capinfo |= IEEE80211_CAPINFO_SHORT_SLOTTIME; assoc->capinfo = htole16(capinfo); assoc->lintval = htole16(ic->ic_lintval); assoc->intval = htole16(ni->ni_intval); IEEE80211_ADDR_COPY(assoc->bssid, ni->ni_bssid); if (vap->iv_opmode == IEEE80211_M_IBSS) IEEE80211_ADDR_COPY(assoc->dst, ifp->if_broadcastaddr); else IEEE80211_ADDR_COPY(assoc->dst, ni->ni_bssid); DPRINTF(("%s bssid %6D dst %6D channel %u policy 0x%x " "auth %u capinfo 0x%x lintval %u bintval %u\n", assoc->type == IWI_HC_IBSS_START ? "Start" : "Join", assoc->bssid, ":", assoc->dst, ":", assoc->chan, le16toh(assoc->policy), assoc->auth, le16toh(assoc->capinfo), le16toh(assoc->lintval), le16toh(assoc->intval))); error = iwi_cmd(sc, IWI_CMD_ASSOCIATE, assoc, sizeof *assoc); done: ieee80211_free_node(ni); if (error) IWI_STATE_END(sc, IWI_FW_ASSOCIATING); return (error); } static void iwi_disassoc(void *arg, int pending) { struct iwi_softc *sc = arg; IWI_LOCK_DECL; IWI_LOCK(sc); iwi_disassociate(sc, 0); IWI_UNLOCK(sc); } static int iwi_disassociate(struct iwi_softc *sc, int quiet) { struct iwi_associate *assoc = &sc->assoc; if ((sc->flags & IWI_FLAG_ASSOCIATED) == 0) { DPRINTF(("Not associated\n")); return (-1); } IWI_STATE_BEGIN(sc, IWI_FW_DISASSOCIATING); if (quiet) assoc->type = IWI_HC_DISASSOC_QUIET; else assoc->type = IWI_HC_DISASSOC; DPRINTF(("Trying to disassociate from %6D channel %u\n", assoc->bssid, ":", assoc->chan)); return iwi_cmd(sc, IWI_CMD_ASSOCIATE, assoc, sizeof *assoc); } /* * release dma resources for the firmware */ static void iwi_release_fw_dma(struct iwi_softc *sc) { if (sc->fw_flags & IWI_FW_HAVE_PHY) bus_dmamap_unload(sc->fw_dmat, sc->fw_map); if (sc->fw_flags & IWI_FW_HAVE_MAP) bus_dmamem_free(sc->fw_dmat, sc->fw_virtaddr, sc->fw_map); if (sc->fw_flags & IWI_FW_HAVE_DMAT) bus_dma_tag_destroy(sc->fw_dmat); sc->fw_flags = 0; sc->fw_dma_size = 0; sc->fw_dmat = NULL; sc->fw_map = NULL; sc->fw_physaddr = 0; sc->fw_virtaddr = NULL; } /* * allocate the dma descriptor for the firmware. * Return 0 on success, 1 on error. * Must be called unlocked, protected by IWI_FLAG_FW_LOADING. */ static int iwi_init_fw_dma(struct iwi_softc *sc, int size) { if (sc->fw_dma_size >= size) return 0; if (bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), 4, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, size, 1, size, 0, NULL, NULL, &sc->fw_dmat) != 0) { device_printf(sc->sc_dev, "could not create firmware DMA tag\n"); goto error; } sc->fw_flags |= IWI_FW_HAVE_DMAT; if (bus_dmamem_alloc(sc->fw_dmat, &sc->fw_virtaddr, 0, &sc->fw_map) != 0) { device_printf(sc->sc_dev, "could not allocate firmware DMA memory\n"); goto error; } sc->fw_flags |= IWI_FW_HAVE_MAP; if (bus_dmamap_load(sc->fw_dmat, sc->fw_map, sc->fw_virtaddr, size, iwi_dma_map_addr, &sc->fw_physaddr, 0) != 0) { device_printf(sc->sc_dev, "could not load firmware DMA map\n"); goto error; } sc->fw_flags |= IWI_FW_HAVE_PHY; sc->fw_dma_size = size; return 0; error: iwi_release_fw_dma(sc); return 1; } static void iwi_init_locked(struct iwi_softc *sc) { struct iwi_rx_data *data; int i; IWI_LOCK_ASSERT(sc); if (sc->fw_state == IWI_FW_LOADING) { device_printf(sc->sc_dev, "%s: already loading\n", __func__); return; /* XXX: condvar? */ } iwi_stop_locked(sc); IWI_STATE_BEGIN(sc, IWI_FW_LOADING); if (iwi_reset(sc) != 0) { device_printf(sc->sc_dev, "could not reset adapter\n"); goto fail; } if (iwi_load_firmware(sc, &sc->fw_boot) != 0) { device_printf(sc->sc_dev, "could not load boot firmware %s\n", sc->fw_boot.name); goto fail; } if (iwi_load_ucode(sc, &sc->fw_uc) != 0) { device_printf(sc->sc_dev, "could not load microcode %s\n", sc->fw_uc.name); goto fail; } iwi_stop_master(sc); CSR_WRITE_4(sc, IWI_CSR_CMD_BASE, sc->cmdq.physaddr); CSR_WRITE_4(sc, IWI_CSR_CMD_SIZE, sc->cmdq.count); CSR_WRITE_4(sc, IWI_CSR_CMD_WIDX, sc->cmdq.cur); CSR_WRITE_4(sc, IWI_CSR_TX1_BASE, sc->txq[0].physaddr); CSR_WRITE_4(sc, IWI_CSR_TX1_SIZE, sc->txq[0].count); CSR_WRITE_4(sc, IWI_CSR_TX1_WIDX, sc->txq[0].cur); CSR_WRITE_4(sc, IWI_CSR_TX2_BASE, sc->txq[1].physaddr); CSR_WRITE_4(sc, IWI_CSR_TX2_SIZE, sc->txq[1].count); CSR_WRITE_4(sc, IWI_CSR_TX2_WIDX, sc->txq[1].cur); CSR_WRITE_4(sc, IWI_CSR_TX3_BASE, sc->txq[2].physaddr); CSR_WRITE_4(sc, IWI_CSR_TX3_SIZE, sc->txq[2].count); CSR_WRITE_4(sc, IWI_CSR_TX3_WIDX, sc->txq[2].cur); CSR_WRITE_4(sc, IWI_CSR_TX4_BASE, sc->txq[3].physaddr); CSR_WRITE_4(sc, IWI_CSR_TX4_SIZE, sc->txq[3].count); CSR_WRITE_4(sc, IWI_CSR_TX4_WIDX, sc->txq[3].cur); for (i = 0; i < sc->rxq.count; i++) { data = &sc->rxq.data[i]; CSR_WRITE_4(sc, data->reg, data->physaddr); } CSR_WRITE_4(sc, IWI_CSR_RX_WIDX, sc->rxq.count - 1); if (iwi_load_firmware(sc, &sc->fw_fw) != 0) { device_printf(sc->sc_dev, "could not load main firmware %s\n", sc->fw_fw.name); goto fail; } sc->flags |= IWI_FLAG_FW_INITED; IWI_STATE_END(sc, IWI_FW_LOADING); if (iwi_config(sc) != 0) { device_printf(sc->sc_dev, "unable to enable adapter\n"); goto fail2; } callout_reset(&sc->sc_wdtimer, hz, iwi_watchdog, sc); sc->sc_running = 1; return; fail: IWI_STATE_END(sc, IWI_FW_LOADING); fail2: iwi_stop_locked(sc); } static void iwi_init(void *priv) { struct iwi_softc *sc = priv; struct ieee80211com *ic = &sc->sc_ic; IWI_LOCK_DECL; IWI_LOCK(sc); iwi_init_locked(sc); IWI_UNLOCK(sc); if (sc->sc_running) ieee80211_start_all(ic); } static void iwi_stop_locked(void *priv) { struct iwi_softc *sc = priv; IWI_LOCK_ASSERT(sc); sc->sc_running = 0; if (sc->sc_softled) { callout_stop(&sc->sc_ledtimer); sc->sc_blinking = 0; } callout_stop(&sc->sc_wdtimer); callout_stop(&sc->sc_rftimer); iwi_stop_master(sc); CSR_WRITE_4(sc, IWI_CSR_RST, IWI_RST_SOFT_RESET); /* reset rings */ iwi_reset_cmd_ring(sc, &sc->cmdq); iwi_reset_tx_ring(sc, &sc->txq[0]); iwi_reset_tx_ring(sc, &sc->txq[1]); iwi_reset_tx_ring(sc, &sc->txq[2]); iwi_reset_tx_ring(sc, &sc->txq[3]); iwi_reset_rx_ring(sc, &sc->rxq); sc->sc_tx_timer = 0; sc->sc_state_timer = 0; sc->sc_busy_timer = 0; sc->flags &= ~(IWI_FLAG_BUSY | IWI_FLAG_ASSOCIATED); sc->fw_state = IWI_FW_IDLE; wakeup(sc); } static void iwi_stop(struct iwi_softc *sc) { IWI_LOCK_DECL; IWI_LOCK(sc); iwi_stop_locked(sc); IWI_UNLOCK(sc); } static void iwi_restart(void *arg, int npending) { struct iwi_softc *sc = arg; iwi_init(sc); } /* * Return whether or not the radio is enabled in hardware * (i.e. the rfkill switch is "off"). */ static int iwi_getrfkill(struct iwi_softc *sc) { return (CSR_READ_4(sc, IWI_CSR_IO) & IWI_IO_RADIO_ENABLED) == 0; } static void iwi_radio_on(void *arg, int pending) { struct iwi_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; device_printf(sc->sc_dev, "radio turned on\n"); iwi_init(sc); ieee80211_notify_radio(ic, 1); } static void iwi_rfkill_poll(void *arg) { struct iwi_softc *sc = arg; IWI_LOCK_ASSERT(sc); /* * Check for a change in rfkill state. We get an * interrupt when a radio is disabled but not when * it is enabled so we must poll for the latter. */ if (!iwi_getrfkill(sc)) { ieee80211_runtask(&sc->sc_ic, &sc->sc_radiontask); return; } callout_reset(&sc->sc_rftimer, 2*hz, iwi_rfkill_poll, sc); } static void iwi_radio_off(void *arg, int pending) { struct iwi_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; IWI_LOCK_DECL; device_printf(sc->sc_dev, "radio turned off\n"); ieee80211_notify_radio(ic, 0); IWI_LOCK(sc); iwi_stop_locked(sc); iwi_rfkill_poll(sc); IWI_UNLOCK(sc); } static int iwi_sysctl_stats(SYSCTL_HANDLER_ARGS) { struct iwi_softc *sc = arg1; uint32_t size, buf[128]; memset(buf, 0, sizeof buf); if (!(sc->flags & IWI_FLAG_FW_INITED)) return SYSCTL_OUT(req, buf, sizeof buf); size = min(CSR_READ_4(sc, IWI_CSR_TABLE0_SIZE), 128 - 1); CSR_READ_REGION_4(sc, IWI_CSR_TABLE0_BASE, &buf[1], size); return SYSCTL_OUT(req, buf, size); } static int iwi_sysctl_radio(SYSCTL_HANDLER_ARGS) { struct iwi_softc *sc = arg1; int val = !iwi_getrfkill(sc); return SYSCTL_OUT(req, &val, sizeof val); } /* * Add sysctl knobs. */ static void iwi_sysctlattach(struct iwi_softc *sc) { struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->sc_dev); struct sysctl_oid *tree = device_get_sysctl_tree(sc->sc_dev); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "radio", CTLTYPE_INT | CTLFLAG_RD, sc, 0, iwi_sysctl_radio, "I", "radio transmitter switch state (0=off, 1=on)"); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "stats", CTLTYPE_OPAQUE | CTLFLAG_RD, sc, 0, iwi_sysctl_stats, "S", "statistics"); sc->bluetooth = 0; SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "bluetooth", CTLFLAG_RW, &sc->bluetooth, 0, "bluetooth coexistence"); sc->antenna = IWI_ANTENNA_AUTO; SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "antenna", CTLFLAG_RW, &sc->antenna, 0, "antenna (0=auto)"); } /* * LED support. * * Different cards have different capabilities. Some have three * led's while others have only one. The linux ipw driver defines * led's for link state (associated or not), band (11a, 11g, 11b), * and for link activity. We use one led and vary the blink rate * according to the tx/rx traffic a la the ath driver. */ static __inline uint32_t iwi_toggle_event(uint32_t r) { return r &~ (IWI_RST_STANDBY | IWI_RST_GATE_ODMA | IWI_RST_GATE_IDMA | IWI_RST_GATE_ADMA); } static uint32_t iwi_read_event(struct iwi_softc *sc) { return MEM_READ_4(sc, IWI_MEM_EEPROM_EVENT); } static void iwi_write_event(struct iwi_softc *sc, uint32_t v) { MEM_WRITE_4(sc, IWI_MEM_EEPROM_EVENT, v); } static void iwi_led_done(void *arg) { struct iwi_softc *sc = arg; sc->sc_blinking = 0; } /* * Turn the activity LED off: flip the pin and then set a timer so no * update will happen for the specified duration. */ static void iwi_led_off(void *arg) { struct iwi_softc *sc = arg; uint32_t v; v = iwi_read_event(sc); v &= ~sc->sc_ledpin; iwi_write_event(sc, iwi_toggle_event(v)); callout_reset(&sc->sc_ledtimer, sc->sc_ledoff, iwi_led_done, sc); } /* * Blink the LED according to the specified on/off times. */ static void iwi_led_blink(struct iwi_softc *sc, int on, int off) { uint32_t v; v = iwi_read_event(sc); v |= sc->sc_ledpin; iwi_write_event(sc, iwi_toggle_event(v)); sc->sc_blinking = 1; sc->sc_ledoff = off; callout_reset(&sc->sc_ledtimer, on, iwi_led_off, sc); } static void iwi_led_event(struct iwi_softc *sc, int event) { /* NB: on/off times from the Atheros NDIS driver, w/ permission */ static const struct { u_int rate; /* tx/rx iwi rate */ u_int16_t timeOn; /* LED on time (ms) */ u_int16_t timeOff; /* LED off time (ms) */ } blinkrates[] = { { IWI_RATE_OFDM54, 40, 10 }, { IWI_RATE_OFDM48, 44, 11 }, { IWI_RATE_OFDM36, 50, 13 }, { IWI_RATE_OFDM24, 57, 14 }, { IWI_RATE_OFDM18, 67, 16 }, { IWI_RATE_OFDM12, 80, 20 }, { IWI_RATE_DS11, 100, 25 }, { IWI_RATE_OFDM9, 133, 34 }, { IWI_RATE_OFDM6, 160, 40 }, { IWI_RATE_DS5, 200, 50 }, { 6, 240, 58 }, /* XXX 3Mb/s if it existed */ { IWI_RATE_DS2, 267, 66 }, { IWI_RATE_DS1, 400, 100 }, { 0, 500, 130 }, /* unknown rate/polling */ }; uint32_t txrate; int j = 0; /* XXX silence compiler */ sc->sc_ledevent = ticks; /* time of last event */ if (sc->sc_blinking) /* don't interrupt active blink */ return; switch (event) { case IWI_LED_POLL: j = nitems(blinkrates)-1; break; case IWI_LED_TX: /* read current transmission rate from adapter */ txrate = CSR_READ_4(sc, IWI_CSR_CURRENT_TX_RATE); if (blinkrates[sc->sc_txrix].rate != txrate) { for (j = 0; j < nitems(blinkrates)-1; j++) if (blinkrates[j].rate == txrate) break; sc->sc_txrix = j; } else j = sc->sc_txrix; break; case IWI_LED_RX: if (blinkrates[sc->sc_rxrix].rate != sc->sc_rxrate) { for (j = 0; j < nitems(blinkrates)-1; j++) if (blinkrates[j].rate == sc->sc_rxrate) break; sc->sc_rxrix = j; } else j = sc->sc_rxrix; break; } /* XXX beware of overflow */ iwi_led_blink(sc, (blinkrates[j].timeOn * hz) / 1000, (blinkrates[j].timeOff * hz) / 1000); } static int iwi_sysctl_softled(SYSCTL_HANDLER_ARGS) { struct iwi_softc *sc = arg1; int softled = sc->sc_softled; int error; error = sysctl_handle_int(oidp, &softled, 0, req); if (error || !req->newptr) return error; softled = (softled != 0); if (softled != sc->sc_softled) { if (softled) { uint32_t v = iwi_read_event(sc); v &= ~sc->sc_ledpin; iwi_write_event(sc, iwi_toggle_event(v)); } sc->sc_softled = softled; } return 0; } static void iwi_ledattach(struct iwi_softc *sc) { struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->sc_dev); struct sysctl_oid *tree = device_get_sysctl_tree(sc->sc_dev); sc->sc_blinking = 0; sc->sc_ledstate = 1; sc->sc_ledidle = (2700*hz)/1000; /* 2.7sec */ callout_init_mtx(&sc->sc_ledtimer, &sc->sc_mtx, 0); SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "softled", CTLTYPE_INT | CTLFLAG_RW, sc, 0, iwi_sysctl_softled, "I", "enable/disable software LED support"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "ledpin", CTLFLAG_RW, &sc->sc_ledpin, 0, "pin setting to turn activity LED on"); SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "ledidle", CTLFLAG_RW, &sc->sc_ledidle, 0, "idle time for inactivity LED (ticks)"); /* XXX for debugging */ SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "nictype", CTLFLAG_RD, &sc->sc_nictype, 0, "NIC type from EEPROM"); sc->sc_ledpin = IWI_RST_LED_ACTIVITY; sc->sc_softled = 1; sc->sc_nictype = (iwi_read_prom_word(sc, IWI_EEPROM_NIC) >> 8) & 0xff; if (sc->sc_nictype == 1) { /* * NB: led's are reversed. */ sc->sc_ledpin = IWI_RST_LED_ASSOCIATED; } } static void iwi_scan_start(struct ieee80211com *ic) { /* ignore */ } static void iwi_set_channel(struct ieee80211com *ic) { struct iwi_softc *sc = ic->ic_softc; if (sc->fw_state == IWI_FW_IDLE) iwi_setcurchan(sc, ic->ic_curchan->ic_ieee); } static void iwi_scan_curchan(struct ieee80211_scan_state *ss, unsigned long maxdwell) { struct ieee80211vap *vap = ss->ss_vap; struct iwi_softc *sc = vap->iv_ic->ic_softc; IWI_LOCK_DECL; IWI_LOCK(sc); if (iwi_scanchan(sc, maxdwell, 0)) ieee80211_cancel_scan(vap); IWI_UNLOCK(sc); } static void iwi_scan_mindwell(struct ieee80211_scan_state *ss) { /* NB: don't try to abort scan; wait for firmware to finish */ } static void iwi_scan_end(struct ieee80211com *ic) { struct iwi_softc *sc = ic->ic_softc; IWI_LOCK_DECL; IWI_LOCK(sc); sc->flags &= ~IWI_FLAG_CHANNEL_SCAN; /* NB: make sure we're still scanning */ if (sc->fw_state == IWI_FW_SCANNING) iwi_cmd(sc, IWI_CMD_ABORT_SCAN, NULL, 0); IWI_UNLOCK(sc); } static void iwi_collect_bands(struct ieee80211com *ic, uint8_t bands[], size_t bands_sz) { struct iwi_softc *sc = ic->ic_softc; device_t dev = sc->sc_dev; memset(bands, 0, bands_sz); setbit(bands, IEEE80211_MODE_11B); setbit(bands, IEEE80211_MODE_11G); if (pci_get_device(dev) >= 0x4223) setbit(bands, IEEE80211_MODE_11A); } static void iwi_getradiocaps(struct ieee80211com *ic, int maxchans, int *nchans, struct ieee80211_channel chans[]) { uint8_t bands[IEEE80211_MODE_BYTES]; iwi_collect_bands(ic, bands, sizeof(bands)); *nchans = 0; if (isset(bands, IEEE80211_MODE_11B) || isset(bands, IEEE80211_MODE_11G)) ieee80211_add_channel_list_2ghz(chans, maxchans, nchans, def_chan_2ghz, nitems(def_chan_2ghz), bands, 0); if (isset(bands, IEEE80211_MODE_11A)) { ieee80211_add_channel_list_5ghz(chans, maxchans, nchans, def_chan_5ghz_band1, nitems(def_chan_5ghz_band1), bands, 0); ieee80211_add_channel_list_5ghz(chans, maxchans, nchans, def_chan_5ghz_band2, nitems(def_chan_5ghz_band2), bands, 0); ieee80211_add_channel_list_5ghz(chans, maxchans, nchans, def_chan_5ghz_band3, nitems(def_chan_5ghz_band3), bands, 0); } } Index: stable/11/sys/dev/ixgbe/if_ix.c =================================================================== --- stable/11/sys/dev/ixgbe/if_ix.c (revision 332287) +++ stable/11/sys/dev/ixgbe/if_ix.c (revision 332288) @@ -1,5274 +1,5274 @@ /****************************************************************************** Copyright (c) 2001-2017, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #ifndef IXGBE_STANDALONE_BUILD #include "opt_inet.h" #include "opt_inet6.h" #include "opt_rss.h" #endif #include "ixgbe.h" /************************************************************************ * Driver version ************************************************************************/ char ixgbe_driver_version[] = "3.2.12-k"; /************************************************************************ * PCI Device ID Table * * Used by probe to select devices to load on * Last field stores an index into ixgbe_strings * Last entry must be all 0s * * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, String Index } ************************************************************************/ static ixgbe_vendor_info_t ixgbe_vendor_info_array[] = { {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_DUAL_PORT, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AF_SINGLE_PORT, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_CX4, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598AT2, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_DA_DUAL_PORT, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_CX4_DUAL_PORT, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_XF_LR, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82598EB_SFP_LOM, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_KX4_MEZZ, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_XAUI_LOM, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_CX4, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_T3_LOM, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_COMBO_BACKPLANE, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BACKPLANE_FCOE, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF2, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_FCOE, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599EN_SFP, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_SFP_SF_QP, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_QSFP_SF_QP, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540T1, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550T1, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KR, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_KX4, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_10G_T, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_1G_T, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_X_SFP, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_KR, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_KR_L, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SFP, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SFP_N, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SGMII, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_SGMII_L, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_10G_T, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_1G_T, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X550EM_A_1G_T_L, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_X540_BYPASS, 0, 0, 0}, {IXGBE_INTEL_VENDOR_ID, IXGBE_DEV_ID_82599_BYPASS, 0, 0, 0}, /* required last entry */ {0, 0, 0, 0, 0} }; /************************************************************************ * Table of branding strings ************************************************************************/ static char *ixgbe_strings[] = { "Intel(R) PRO/10GbE PCI-Express Network Driver" }; /************************************************************************ * Function prototypes ************************************************************************/ static int ixgbe_probe(device_t); static int ixgbe_attach(device_t); static int ixgbe_detach(device_t); static int ixgbe_shutdown(device_t); static int ixgbe_suspend(device_t); static int ixgbe_resume(device_t); static int ixgbe_ioctl(struct ifnet *, u_long, caddr_t); static void ixgbe_init(void *); static void ixgbe_init_locked(struct adapter *); static void ixgbe_stop(void *); #if __FreeBSD_version >= 1100036 static uint64_t ixgbe_get_counter(struct ifnet *, ift_counter); #endif static void ixgbe_init_device_features(struct adapter *); static void ixgbe_check_fan_failure(struct adapter *, u32, bool); static void ixgbe_add_media_types(struct adapter *); static void ixgbe_media_status(struct ifnet *, struct ifmediareq *); static int ixgbe_media_change(struct ifnet *); static int ixgbe_allocate_pci_resources(struct adapter *); static void ixgbe_get_slot_info(struct adapter *); static int ixgbe_allocate_msix(struct adapter *); static int ixgbe_allocate_legacy(struct adapter *); static int ixgbe_configure_interrupts(struct adapter *); static void ixgbe_free_pci_resources(struct adapter *); static void ixgbe_local_timer(void *); static int ixgbe_setup_interface(device_t, struct adapter *); static void ixgbe_config_gpie(struct adapter *); static void ixgbe_config_dmac(struct adapter *); static void ixgbe_config_delay_values(struct adapter *); static void ixgbe_config_link(struct adapter *); static void ixgbe_check_wol_support(struct adapter *); static int ixgbe_setup_low_power_mode(struct adapter *); static void ixgbe_rearm_queues(struct adapter *, u64); static void ixgbe_initialize_transmit_units(struct adapter *); static void ixgbe_initialize_receive_units(struct adapter *); static void ixgbe_enable_rx_drop(struct adapter *); static void ixgbe_disable_rx_drop(struct adapter *); static void ixgbe_initialize_rss_mapping(struct adapter *); static void ixgbe_enable_intr(struct adapter *); static void ixgbe_disable_intr(struct adapter *); static void ixgbe_update_stats_counters(struct adapter *); static void ixgbe_set_promisc(struct adapter *); static void ixgbe_set_multi(struct adapter *); static void ixgbe_update_link_status(struct adapter *); static void ixgbe_set_ivar(struct adapter *, u8, u8, s8); static void ixgbe_configure_ivars(struct adapter *); static u8 *ixgbe_mc_array_itr(struct ixgbe_hw *, u8 **, u32 *); static void ixgbe_setup_vlan_hw_support(struct adapter *); static void ixgbe_register_vlan(void *, struct ifnet *, u16); static void ixgbe_unregister_vlan(void *, struct ifnet *, u16); static void ixgbe_add_device_sysctls(struct adapter *); static void ixgbe_add_hw_stats(struct adapter *); static int ixgbe_set_flowcntl(struct adapter *, int); static int ixgbe_set_advertise(struct adapter *, int); static int ixgbe_get_advertise(struct adapter *); /* Sysctl handlers */ static void ixgbe_set_sysctl_value(struct adapter *, const char *, const char *, int *, int); static int ixgbe_sysctl_flowcntl(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_advertise(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_phy_overtemp_occurred(SYSCTL_HANDLER_ARGS); #ifdef IXGBE_DEBUG static int ixgbe_sysctl_power_state(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_print_rss_config(SYSCTL_HANDLER_ARGS); #endif static int ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_eee_state(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS); static int ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS); /* Support for pluggable optic modules */ static bool ixgbe_sfp_probe(struct adapter *); /* Legacy (single vector) interrupt handler */ static void ixgbe_legacy_irq(void *); /* The MSI/MSI-X Interrupt handlers */ static void ixgbe_msix_que(void *); static void ixgbe_msix_link(void *); /* Deferred interrupt tasklets */ static void ixgbe_handle_que(void *, int); static void ixgbe_handle_link(void *, int); static void ixgbe_handle_msf(void *, int); static void ixgbe_handle_mod(void *, int); static void ixgbe_handle_phy(void *, int); /************************************************************************ * FreeBSD Device Interface Entry Points ************************************************************************/ static device_method_t ix_methods[] = { /* Device interface */ DEVMETHOD(device_probe, ixgbe_probe), DEVMETHOD(device_attach, ixgbe_attach), DEVMETHOD(device_detach, ixgbe_detach), DEVMETHOD(device_shutdown, ixgbe_shutdown), DEVMETHOD(device_suspend, ixgbe_suspend), DEVMETHOD(device_resume, ixgbe_resume), #ifdef PCI_IOV DEVMETHOD(pci_iov_init, ixgbe_init_iov), DEVMETHOD(pci_iov_uninit, ixgbe_uninit_iov), DEVMETHOD(pci_iov_add_vf, ixgbe_add_vf), #endif /* PCI_IOV */ DEVMETHOD_END }; static driver_t ix_driver = { "ix", ix_methods, sizeof(struct adapter), }; devclass_t ix_devclass; DRIVER_MODULE(ix, pci, ix_driver, ix_devclass, 0, 0); MODULE_DEPEND(ix, pci, 1, 1, 1); MODULE_DEPEND(ix, ether, 1, 1, 1); #ifdef DEV_NETMAP MODULE_DEPEND(ix, netmap, 1, 1, 1); #endif /* * TUNEABLE PARAMETERS: */ static SYSCTL_NODE(_hw, OID_AUTO, ix, CTLFLAG_RD, 0, "IXGBE driver parameters"); /* * AIM: Adaptive Interrupt Moderation * which means that the interrupt rate * is varied over time based on the * traffic for that interrupt vector */ static int ixgbe_enable_aim = TRUE; SYSCTL_INT(_hw_ix, OID_AUTO, enable_aim, CTLFLAG_RDTUN, &ixgbe_enable_aim, 0, "Enable adaptive interrupt moderation"); static int ixgbe_max_interrupt_rate = (4000000 / IXGBE_LOW_LATENCY); SYSCTL_INT(_hw_ix, OID_AUTO, max_interrupt_rate, CTLFLAG_RDTUN, &ixgbe_max_interrupt_rate, 0, "Maximum interrupts per second"); /* How many packets rxeof tries to clean at a time */ static int ixgbe_rx_process_limit = 256; SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &ixgbe_rx_process_limit, 0, "Maximum number of received packets to process at a time, -1 means unlimited"); /* How many packets txeof tries to clean at a time */ static int ixgbe_tx_process_limit = 256; SYSCTL_INT(_hw_ix, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN, &ixgbe_tx_process_limit, 0, "Maximum number of sent packets to process at a time, -1 means unlimited"); /* Flow control setting, default to full */ static int ixgbe_flow_control = ixgbe_fc_full; SYSCTL_INT(_hw_ix, OID_AUTO, flow_control, CTLFLAG_RDTUN, &ixgbe_flow_control, 0, "Default flow control used for all adapters"); /* Advertise Speed, default to 0 (auto) */ static int ixgbe_advertise_speed = 0; SYSCTL_INT(_hw_ix, OID_AUTO, advertise_speed, CTLFLAG_RDTUN, &ixgbe_advertise_speed, 0, "Default advertised speed for all adapters"); /* * Smart speed setting, default to on * this only works as a compile option * right now as its during attach, set * this to 'ixgbe_smart_speed_off' to * disable. */ static int ixgbe_smart_speed = ixgbe_smart_speed_on; /* * MSI-X should be the default for best performance, * but this allows it to be forced off for testing. */ static int ixgbe_enable_msix = 1; SYSCTL_INT(_hw_ix, OID_AUTO, enable_msix, CTLFLAG_RDTUN, &ixgbe_enable_msix, 0, "Enable MSI-X interrupts"); /* * Number of Queues, can be set to 0, * it then autoconfigures based on the * number of cpus with a max of 8. This * can be overriden manually here. */ static int ixgbe_num_queues = 0; SYSCTL_INT(_hw_ix, OID_AUTO, num_queues, CTLFLAG_RDTUN, &ixgbe_num_queues, 0, "Number of queues to configure, 0 indicates autoconfigure"); /* * Number of TX descriptors per ring, * setting higher than RX as this seems * the better performing choice. */ static int ixgbe_txd = PERFORM_TXD; SYSCTL_INT(_hw_ix, OID_AUTO, txd, CTLFLAG_RDTUN, &ixgbe_txd, 0, "Number of transmit descriptors per queue"); /* Number of RX descriptors per ring */ static int ixgbe_rxd = PERFORM_RXD; SYSCTL_INT(_hw_ix, OID_AUTO, rxd, CTLFLAG_RDTUN, &ixgbe_rxd, 0, "Number of receive descriptors per queue"); /* * Defining this on will allow the use * of unsupported SFP+ modules, note that * doing so you are on your own :) */ static int allow_unsupported_sfp = FALSE; SYSCTL_INT(_hw_ix, OID_AUTO, unsupported_sfp, CTLFLAG_RDTUN, &allow_unsupported_sfp, 0, "Allow unsupported SFP modules...use at your own risk"); /* * Not sure if Flow Director is fully baked, * so we'll default to turning it off. */ static int ixgbe_enable_fdir = 0; SYSCTL_INT(_hw_ix, OID_AUTO, enable_fdir, CTLFLAG_RDTUN, &ixgbe_enable_fdir, 0, "Enable Flow Director"); /* Legacy Transmit (single queue) */ static int ixgbe_enable_legacy_tx = 0; SYSCTL_INT(_hw_ix, OID_AUTO, enable_legacy_tx, CTLFLAG_RDTUN, &ixgbe_enable_legacy_tx, 0, "Enable Legacy TX flow"); /* Receive-Side Scaling */ static int ixgbe_enable_rss = 1; SYSCTL_INT(_hw_ix, OID_AUTO, enable_rss, CTLFLAG_RDTUN, &ixgbe_enable_rss, 0, "Enable Receive-Side Scaling (RSS)"); /* Keep running tab on them for sanity check */ static int ixgbe_total_ports; static int (*ixgbe_start_locked)(struct ifnet *, struct tx_ring *); static int (*ixgbe_ring_empty)(struct ifnet *, struct buf_ring *); MALLOC_DEFINE(M_IXGBE, "ix", "ix driver allocations"); /************************************************************************ * ixgbe_initialize_rss_mapping ************************************************************************/ static void ixgbe_initialize_rss_mapping(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 reta = 0, mrqc, rss_key[10]; int queue_id, table_size, index_mult; int i, j; u32 rss_hash_config; if (adapter->feat_en & IXGBE_FEATURE_RSS) { /* Fetch the configured RSS key */ rss_getkey((uint8_t *)&rss_key); } else { /* set up random bits */ arc4rand(&rss_key, sizeof(rss_key), 0); } /* Set multiplier for RETA setup and table size based on MAC */ index_mult = 0x1; table_size = 128; switch (adapter->hw.mac.type) { case ixgbe_mac_82598EB: index_mult = 0x11; break; case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: table_size = 512; break; default: break; } /* Set up the redirection table */ for (i = 0, j = 0; i < table_size; i++, j++) { if (j == adapter->num_queues) j = 0; if (adapter->feat_en & IXGBE_FEATURE_RSS) { /* * Fetch the RSS bucket id for the given indirection * entry. Cap it at the number of configured buckets * (which is num_queues.) */ queue_id = rss_get_indirection_to_bucket(i); queue_id = queue_id % adapter->num_queues; } else queue_id = (j * index_mult); /* * The low 8 bits are for hash value (n+0); * The next 8 bits are for hash value (n+1), etc. */ reta = reta >> 8; reta = reta | (((uint32_t)queue_id) << 24); if ((i & 3) == 3) { if (i < 128) IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta); else IXGBE_WRITE_REG(hw, IXGBE_ERETA((i >> 2) - 32), reta); reta = 0; } } /* Now fill our hash function seeds */ for (i = 0; i < 10; i++) IXGBE_WRITE_REG(hw, IXGBE_RSSRK(i), rss_key[i]); /* Perform hash on these packet types */ if (adapter->feat_en & IXGBE_FEATURE_RSS) rss_hash_config = rss_gethashconfig(); else { /* * Disable UDP - IP fragments aren't currently being handled * and so we end up with a mix of 2-tuple and 4-tuple * traffic. */ rss_hash_config = RSS_HASHTYPE_RSS_IPV4 | RSS_HASHTYPE_RSS_TCP_IPV4 | RSS_HASHTYPE_RSS_IPV6 | RSS_HASHTYPE_RSS_TCP_IPV6 | RSS_HASHTYPE_RSS_IPV6_EX | RSS_HASHTYPE_RSS_TCP_IPV6_EX; } mrqc = IXGBE_MRQC_RSSEN; if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4; if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_TCP; if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6; if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_TCP; if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX; if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6_EX) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP; if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP; if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4_EX) device_printf(adapter->dev, "%s: RSS_HASHTYPE_RSS_UDP_IPV4_EX defined, but not supported\n", __func__); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP; if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6_EX) mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP; mrqc |= ixgbe_get_mrqc(adapter->iov_mode); IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); } /* ixgbe_initialize_rss_mapping */ /************************************************************************ * ixgbe_initialize_receive_units - Setup receive registers and features. ************************************************************************/ #define BSIZEPKT_ROUNDUP ((1<rx_rings; struct ixgbe_hw *hw = &adapter->hw; struct ifnet *ifp = adapter->ifp; int i, j; u32 bufsz, fctrl, srrctl, rxcsum; u32 hlreg; /* * Make sure receives are disabled while * setting up the descriptor ring */ ixgbe_disable_rx(hw); /* Enable broadcasts */ fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); fctrl |= IXGBE_FCTRL_BAM; if (adapter->hw.mac.type == ixgbe_mac_82598EB) { fctrl |= IXGBE_FCTRL_DPF; fctrl |= IXGBE_FCTRL_PMCF; } IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); /* Set for Jumbo Frames? */ hlreg = IXGBE_READ_REG(hw, IXGBE_HLREG0); if (ifp->if_mtu > ETHERMTU) hlreg |= IXGBE_HLREG0_JUMBOEN; else hlreg &= ~IXGBE_HLREG0_JUMBOEN; #ifdef DEV_NETMAP /* CRC stripping is conditional in Netmap */ if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && (ifp->if_capenable & IFCAP_NETMAP) && !ix_crcstrip) hlreg &= ~IXGBE_HLREG0_RXCRCSTRP; else #endif /* DEV_NETMAP */ hlreg |= IXGBE_HLREG0_RXCRCSTRP; IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg); bufsz = (adapter->rx_mbuf_sz + BSIZEPKT_ROUNDUP) >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; for (i = 0; i < adapter->num_queues; i++, rxr++) { u64 rdba = rxr->rxdma.dma_paddr; j = rxr->me; /* Setup the Base and Length of the Rx Descriptor Ring */ IXGBE_WRITE_REG(hw, IXGBE_RDBAL(j), (rdba & 0x00000000ffffffffULL)); IXGBE_WRITE_REG(hw, IXGBE_RDBAH(j), (rdba >> 32)); IXGBE_WRITE_REG(hw, IXGBE_RDLEN(j), adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc)); /* Set up the SRRCTL register */ srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(j)); srrctl &= ~IXGBE_SRRCTL_BSIZEHDR_MASK; srrctl &= ~IXGBE_SRRCTL_BSIZEPKT_MASK; srrctl |= bufsz; srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; /* * Set DROP_EN iff we have no flow control and >1 queue. * Note that srrctl was cleared shortly before during reset, * so we do not need to clear the bit, but do it just in case * this code is moved elsewhere. */ if (adapter->num_queues > 1 && adapter->hw.fc.requested_mode == ixgbe_fc_none) { srrctl |= IXGBE_SRRCTL_DROP_EN; } else { srrctl &= ~IXGBE_SRRCTL_DROP_EN; } IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(j), srrctl); /* Setup the HW Rx Head and Tail Descriptor Pointers */ IXGBE_WRITE_REG(hw, IXGBE_RDH(j), 0); IXGBE_WRITE_REG(hw, IXGBE_RDT(j), 0); /* Set the driver rx tail address */ rxr->tail = IXGBE_RDT(rxr->me); } if (adapter->hw.mac.type != ixgbe_mac_82598EB) { u32 psrtype = IXGBE_PSRTYPE_TCPHDR | IXGBE_PSRTYPE_UDPHDR | IXGBE_PSRTYPE_IPV4HDR | IXGBE_PSRTYPE_IPV6HDR; IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0), psrtype); } rxcsum = IXGBE_READ_REG(hw, IXGBE_RXCSUM); ixgbe_initialize_rss_mapping(adapter); if (adapter->num_queues > 1) { /* RSS and RX IPP Checksum are mutually exclusive */ rxcsum |= IXGBE_RXCSUM_PCSD; } if (ifp->if_capenable & IFCAP_RXCSUM) rxcsum |= IXGBE_RXCSUM_PCSD; /* This is useful for calculating UDP/IP fragment checksums */ if (!(rxcsum & IXGBE_RXCSUM_PCSD)) rxcsum |= IXGBE_RXCSUM_IPPCSE; IXGBE_WRITE_REG(hw, IXGBE_RXCSUM, rxcsum); return; } /* ixgbe_initialize_receive_units */ /************************************************************************ * ixgbe_initialize_transmit_units - Enable transmit units. ************************************************************************/ static void ixgbe_initialize_transmit_units(struct adapter *adapter) { struct tx_ring *txr = adapter->tx_rings; struct ixgbe_hw *hw = &adapter->hw; /* Setup the Base and Length of the Tx Descriptor Ring */ for (int i = 0; i < adapter->num_queues; i++, txr++) { u64 tdba = txr->txdma.dma_paddr; u32 txctrl = 0; int j = txr->me; IXGBE_WRITE_REG(hw, IXGBE_TDBAL(j), (tdba & 0x00000000ffffffffULL)); IXGBE_WRITE_REG(hw, IXGBE_TDBAH(j), (tdba >> 32)); IXGBE_WRITE_REG(hw, IXGBE_TDLEN(j), adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc)); /* Setup the HW Tx Head and Tail descriptor pointers */ IXGBE_WRITE_REG(hw, IXGBE_TDH(j), 0); IXGBE_WRITE_REG(hw, IXGBE_TDT(j), 0); /* Cache the tail address */ txr->tail = IXGBE_TDT(j); /* Disable Head Writeback */ /* * Note: for X550 series devices, these registers are actually * prefixed with TPH_ isntead of DCA_, but the addresses and * fields remain the same. */ switch (hw->mac.type) { case ixgbe_mac_82598EB: txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(j)); break; default: txctrl = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(j)); break; } txctrl &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; switch (hw->mac.type) { case ixgbe_mac_82598EB: IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(j), txctrl); break; default: IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(j), txctrl); break; } } if (hw->mac.type != ixgbe_mac_82598EB) { u32 dmatxctl, rttdcs; dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL); dmatxctl |= IXGBE_DMATXCTL_TE; IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl); /* Disable arbiter to set MTQC */ rttdcs = IXGBE_READ_REG(hw, IXGBE_RTTDCS); rttdcs |= IXGBE_RTTDCS_ARBDIS; IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); IXGBE_WRITE_REG(hw, IXGBE_MTQC, ixgbe_get_mtqc(adapter->iov_mode)); rttdcs &= ~IXGBE_RTTDCS_ARBDIS; IXGBE_WRITE_REG(hw, IXGBE_RTTDCS, rttdcs); } return; } /* ixgbe_initialize_transmit_units */ /************************************************************************ * ixgbe_attach - Device initialization routine * * Called when the driver is being loaded. * Identifies the type of hardware, allocates all resources * and initializes the hardware. * * return 0 on success, positive on failure ************************************************************************/ static int ixgbe_attach(device_t dev) { struct adapter *adapter; struct ixgbe_hw *hw; int error = 0; u32 ctrl_ext; INIT_DEBUGOUT("ixgbe_attach: begin"); /* Allocate, clear, and link in our adapter structure */ adapter = device_get_softc(dev); adapter->hw.back = adapter; adapter->dev = dev; hw = &adapter->hw; /* Core Lock Init*/ IXGBE_CORE_LOCK_INIT(adapter, device_get_nameunit(dev)); /* Set up the timer callout */ callout_init_mtx(&adapter->timer, &adapter->core_mtx, 0); /* Determine hardware revision */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->revision_id = pci_get_revid(dev); hw->subsystem_vendor_id = pci_get_subvendor(dev); hw->subsystem_device_id = pci_get_subdevice(dev); /* * Make sure BUSMASTER is set */ pci_enable_busmaster(dev); /* Do base PCI setup - map BAR0 */ if (ixgbe_allocate_pci_resources(adapter)) { device_printf(dev, "Allocation of PCI resources failed\n"); error = ENXIO; goto err_out; } /* let hardware know driver is loaded */ ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); ctrl_ext |= IXGBE_CTRL_EXT_DRV_LOAD; IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); hw->allow_unsupported_sfp = allow_unsupported_sfp; /* * Initialize the shared code */ if (ixgbe_init_shared_code(hw)) { device_printf(dev, "Unable to initialize the shared code\n"); error = ENXIO; goto err_out; } if (hw->mbx.ops.init_params) hw->mbx.ops.init_params(hw); /* Pick up the 82599 settings */ if (hw->mac.type != ixgbe_mac_82598EB) { hw->phy.smart_speed = ixgbe_smart_speed; adapter->num_segs = IXGBE_82599_SCATTER; } else adapter->num_segs = IXGBE_82598_SCATTER; ixgbe_init_device_features(adapter); if (ixgbe_configure_interrupts(adapter)) { error = ENXIO; goto err_out; } /* Allocate multicast array memory. */ adapter->mta = malloc(sizeof(*adapter->mta) * MAX_NUM_MULTICAST_ADDRESSES, M_IXGBE, M_NOWAIT); if (adapter->mta == NULL) { device_printf(dev, "Can not allocate multicast setup array\n"); error = ENOMEM; goto err_out; } /* Enable WoL (if supported) */ ixgbe_check_wol_support(adapter); /* Register for VLAN events */ adapter->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, ixgbe_register_vlan, adapter, EVENTHANDLER_PRI_FIRST); adapter->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, ixgbe_unregister_vlan, adapter, EVENTHANDLER_PRI_FIRST); /* Verify adapter fan is still functional (if applicable) */ if (adapter->feat_en & IXGBE_FEATURE_FAN_FAIL) { u32 esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); ixgbe_check_fan_failure(adapter, esdp, FALSE); } /* Ensure SW/FW semaphore is free */ ixgbe_init_swfw_semaphore(hw); /* Enable EEE power saving */ if (adapter->feat_en & IXGBE_FEATURE_EEE) hw->mac.ops.setup_eee(hw, TRUE); /* Set an initial default flow control value */ hw->fc.requested_mode = ixgbe_flow_control; /* Sysctls for limiting the amount of work done in the taskqueues */ ixgbe_set_sysctl_value(adapter, "rx_processing_limit", "max number of rx packets to process", &adapter->rx_process_limit, ixgbe_rx_process_limit); ixgbe_set_sysctl_value(adapter, "tx_processing_limit", "max number of tx packets to process", &adapter->tx_process_limit, ixgbe_tx_process_limit); /* Do descriptor calc and sanity checks */ if (((ixgbe_txd * sizeof(union ixgbe_adv_tx_desc)) % DBA_ALIGN) != 0 || ixgbe_txd < MIN_TXD || ixgbe_txd > MAX_TXD) { device_printf(dev, "TXD config issue, using default!\n"); adapter->num_tx_desc = DEFAULT_TXD; } else adapter->num_tx_desc = ixgbe_txd; /* * With many RX rings it is easy to exceed the * system mbuf allocation. Tuning nmbclusters * can alleviate this. */ if (nmbclusters > 0) { int s; s = (ixgbe_rxd * adapter->num_queues) * ixgbe_total_ports; if (s > nmbclusters) { device_printf(dev, "RX Descriptors exceed system mbuf max, using default instead!\n"); ixgbe_rxd = DEFAULT_RXD; } } if (((ixgbe_rxd * sizeof(union ixgbe_adv_rx_desc)) % DBA_ALIGN) != 0 || ixgbe_rxd < MIN_RXD || ixgbe_rxd > MAX_RXD) { device_printf(dev, "RXD config issue, using default!\n"); adapter->num_rx_desc = DEFAULT_RXD; } else adapter->num_rx_desc = ixgbe_rxd; /* Allocate our TX/RX Queues */ if (ixgbe_allocate_queues(adapter)) { error = ENOMEM; goto err_out; } hw->phy.reset_if_overtemp = TRUE; error = ixgbe_reset_hw(hw); hw->phy.reset_if_overtemp = FALSE; if (error == IXGBE_ERR_SFP_NOT_PRESENT) { /* * No optics in this port, set up * so the timer routine will probe * for later insertion. */ adapter->sfp_probe = TRUE; error = IXGBE_SUCCESS; } else if (error == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module detected!\n"); error = EIO; goto err_late; } else if (error) { device_printf(dev, "Hardware initialization failed\n"); error = EIO; goto err_late; } /* Make sure we have a good EEPROM before we read from it */ if (ixgbe_validate_eeprom_checksum(&adapter->hw, NULL) < 0) { device_printf(dev, "The EEPROM Checksum Is Not Valid\n"); error = EIO; goto err_late; } /* Setup OS specific network interface */ if (ixgbe_setup_interface(dev, adapter) != 0) goto err_late; if (adapter->feat_en & IXGBE_FEATURE_MSIX) error = ixgbe_allocate_msix(adapter); else error = ixgbe_allocate_legacy(adapter); if (error) goto err_late; error = ixgbe_start_hw(hw); switch (error) { case IXGBE_ERR_EEPROM_VERSION: device_printf(dev, "This device is a pre-production adapter/LOM. Please be aware there may be issues associated with your hardware.\nIf you are experiencing problems please contact your Intel or hardware representative who provided you with this hardware.\n"); break; case IXGBE_ERR_SFP_NOT_SUPPORTED: device_printf(dev, "Unsupported SFP+ Module\n"); error = EIO; goto err_late; case IXGBE_ERR_SFP_NOT_PRESENT: device_printf(dev, "No SFP+ Module found\n"); /* falls thru */ default: break; } /* Enable the optics for 82599 SFP+ fiber */ ixgbe_enable_tx_laser(hw); /* Enable power to the phy. */ ixgbe_set_phy_power(hw, TRUE); /* Initialize statistics */ ixgbe_update_stats_counters(adapter); /* Check PCIE slot type/speed/width */ ixgbe_get_slot_info(adapter); /* * Do time init and sysctl init here, but * only on the first port of a bypass adapter. */ ixgbe_bypass_init(adapter); /* Set an initial dmac value */ adapter->dmac = 0; /* Set initial advertised speeds (if applicable) */ adapter->advertise = ixgbe_get_advertise(adapter); if (adapter->feat_cap & IXGBE_FEATURE_SRIOV) ixgbe_define_iov_schemas(dev, &error); /* Add sysctls */ ixgbe_add_device_sysctls(adapter); ixgbe_add_hw_stats(adapter); /* For Netmap */ adapter->init_locked = ixgbe_init_locked; adapter->stop_locked = ixgbe_stop; if (adapter->feat_en & IXGBE_FEATURE_NETMAP) ixgbe_netmap_attach(adapter); INIT_DEBUGOUT("ixgbe_attach: end"); return (0); err_late: ixgbe_free_transmit_structures(adapter); ixgbe_free_receive_structures(adapter); free(adapter->queues, M_DEVBUF); err_out: if (adapter->ifp != NULL) if_free(adapter->ifp); ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT); ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD; IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext); ixgbe_free_pci_resources(adapter); free(adapter->mta, M_IXGBE); IXGBE_CORE_LOCK_DESTROY(adapter); return (error); } /* ixgbe_attach */ /************************************************************************ * ixgbe_check_wol_support * * Checks whether the adapter's ports are capable of * Wake On LAN by reading the adapter's NVM. * * Sets each port's hw->wol_enabled value depending * on the value read here. ************************************************************************/ static void ixgbe_check_wol_support(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u16 dev_caps = 0; /* Find out WoL support for port */ adapter->wol_support = hw->wol_enabled = 0; ixgbe_get_device_caps(hw, &dev_caps); if ((dev_caps & IXGBE_DEVICE_CAPS_WOL_PORT0_1) || ((dev_caps & IXGBE_DEVICE_CAPS_WOL_PORT0) && hw->bus.func == 0)) adapter->wol_support = hw->wol_enabled = 1; /* Save initial wake up filter configuration */ adapter->wufc = IXGBE_READ_REG(hw, IXGBE_WUFC); return; } /* ixgbe_check_wol_support */ /************************************************************************ * ixgbe_setup_interface * * Setup networking device structure and register an interface. ************************************************************************/ static int ixgbe_setup_interface(device_t dev, struct adapter *adapter) { struct ifnet *ifp; INIT_DEBUGOUT("ixgbe_setup_interface: begin"); ifp = adapter->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_baudrate = IF_Gbps(10); ifp->if_init = ixgbe_init; ifp->if_softc = adapter; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = ixgbe_ioctl; #if __FreeBSD_version >= 1100036 if_setgetcounterfn(ifp, ixgbe_get_counter); #endif #if __FreeBSD_version >= 1100045 /* TSO parameters */ ifp->if_hw_tsomax = 65518; ifp->if_hw_tsomaxsegcount = IXGBE_82599_SCATTER; ifp->if_hw_tsomaxsegsize = 2048; #endif if (adapter->feat_en & IXGBE_FEATURE_LEGACY_TX) { ifp->if_start = ixgbe_legacy_start; IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 2); ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 2; IFQ_SET_READY(&ifp->if_snd); ixgbe_start_locked = ixgbe_legacy_start_locked; ixgbe_ring_empty = ixgbe_legacy_ring_empty; } else { ifp->if_transmit = ixgbe_mq_start; ifp->if_qflush = ixgbe_qflush; ixgbe_start_locked = ixgbe_mq_start_locked; ixgbe_ring_empty = drbr_empty; } ether_ifattach(ifp, adapter->hw.mac.addr); adapter->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; /* * Tell the upper layer(s) we support long frames. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); /* Set capability flags */ ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6 | IFCAP_TSO | IFCAP_LRO | IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_HWCSUM | IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU | IFCAP_HWSTATS; /* Enable the above capabilities by default */ ifp->if_capenable = ifp->if_capabilities; /* * Don't turn this on by default, if vlans are * created on another pseudo device (eg. lagg) * then vlan events are not passed thru, breaking * operation, but with HW FILTER off it works. If * using vlans directly on the ixgbe driver you can * enable this and get full hardware tag filtering. */ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&adapter->media, IFM_IMASK, ixgbe_media_change, ixgbe_media_status); adapter->phy_layer = ixgbe_get_supported_physical_layer(&adapter->hw); ixgbe_add_media_types(adapter); /* Set autoselect media by default */ ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); return (0); } /* ixgbe_setup_interface */ #if __FreeBSD_version >= 1100036 /************************************************************************ * ixgbe_get_counter ************************************************************************/ static uint64_t ixgbe_get_counter(struct ifnet *ifp, ift_counter cnt) { struct adapter *adapter; struct tx_ring *txr; uint64_t rv; adapter = if_getsoftc(ifp); switch (cnt) { case IFCOUNTER_IPACKETS: return (adapter->ipackets); case IFCOUNTER_OPACKETS: return (adapter->opackets); case IFCOUNTER_IBYTES: return (adapter->ibytes); case IFCOUNTER_OBYTES: return (adapter->obytes); case IFCOUNTER_IMCASTS: return (adapter->imcasts); case IFCOUNTER_OMCASTS: return (adapter->omcasts); case IFCOUNTER_COLLISIONS: return (0); case IFCOUNTER_IQDROPS: return (adapter->iqdrops); case IFCOUNTER_OQDROPS: rv = 0; txr = adapter->tx_rings; for (int i = 0; i < adapter->num_queues; i++, txr++) rv += txr->br->br_drops; return (rv); case IFCOUNTER_IERRORS: return (adapter->ierrors); default: return (if_get_counter_default(ifp, cnt)); } } /* ixgbe_get_counter */ #endif /************************************************************************ * ixgbe_add_media_types ************************************************************************/ static void ixgbe_add_media_types(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; device_t dev = adapter->dev; u64 layer; layer = adapter->phy_layer; /* Media types with matching FreeBSD media defines */ if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T) ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_T, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_T) ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_100BASE_TX) ifmedia_add(&adapter->media, IFM_ETHER | IFM_100_TX, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_10BASE_T) ifmedia_add(&adapter->media, IFM_ETHER | IFM_10_T, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU || layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA) ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_TWINAX, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_LR, 0, NULL); if (hw->phy.multispeed_fiber) ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_LX, 0, NULL); } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_SR, 0, NULL); if (hw->phy.multispeed_fiber) ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); } else if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_SX, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_CX4, 0, NULL); #ifdef IFM_ETH_XTYPE if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_KR, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_KX4, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_KX, 0, NULL); if (layer & IXGBE_PHYSICAL_LAYER_2500BASE_KX) ifmedia_add(&adapter->media, IFM_ETHER | IFM_2500_KX, 0, NULL); #else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) { device_printf(dev, "Media supported: 10GbaseKR\n"); device_printf(dev, "10GbaseKR mapped to 10GbaseSR\n"); ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_SR, 0, NULL); } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4) { device_printf(dev, "Media supported: 10GbaseKX4\n"); device_printf(dev, "10GbaseKX4 mapped to 10GbaseCX4\n"); ifmedia_add(&adapter->media, IFM_ETHER | IFM_10G_CX4, 0, NULL); } if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) { device_printf(dev, "Media supported: 1000baseKX\n"); device_printf(dev, "1000baseKX mapped to 1000baseCX\n"); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_CX, 0, NULL); } if (layer & IXGBE_PHYSICAL_LAYER_2500BASE_KX) { device_printf(dev, "Media supported: 2500baseKX\n"); device_printf(dev, "2500baseKX mapped to 2500baseSX\n"); ifmedia_add(&adapter->media, IFM_ETHER | IFM_2500_SX, 0, NULL); } #endif if (layer & IXGBE_PHYSICAL_LAYER_1000BASE_BX) device_printf(dev, "Media supported: 1000baseBX\n"); if (hw->device_id == IXGBE_DEV_ID_82598AT) { ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T | IFM_FDX, 0, NULL); ifmedia_add(&adapter->media, IFM_ETHER | IFM_1000_T, 0, NULL); } ifmedia_add(&adapter->media, IFM_ETHER | IFM_AUTO, 0, NULL); } /* ixgbe_add_media_types */ /************************************************************************ * ixgbe_is_sfp ************************************************************************/ static inline bool ixgbe_is_sfp(struct ixgbe_hw *hw) { switch (hw->mac.type) { case ixgbe_mac_82598EB: if (hw->phy.type == ixgbe_phy_nl) return TRUE; return FALSE; case ixgbe_mac_82599EB: switch (hw->mac.ops.get_media_type(hw)) { case ixgbe_media_type_fiber: case ixgbe_media_type_fiber_qsfp: return TRUE; default: return FALSE; } case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) return TRUE; return FALSE; default: return FALSE; } } /* ixgbe_is_sfp */ /************************************************************************ * ixgbe_config_link ************************************************************************/ static void ixgbe_config_link(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 autoneg, err = 0; bool sfp, negotiate; sfp = ixgbe_is_sfp(hw); if (sfp) { if (hw->phy.multispeed_fiber) { hw->mac.ops.setup_sfp(hw); ixgbe_enable_tx_laser(hw); taskqueue_enqueue(adapter->tq, &adapter->msf_task); } else taskqueue_enqueue(adapter->tq, &adapter->mod_task); } else { if (hw->mac.ops.check_link) err = ixgbe_check_link(hw, &adapter->link_speed, &adapter->link_up, FALSE); if (err) goto out; autoneg = hw->phy.autoneg_advertised; if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) err = hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate); if (err) goto out; if (hw->mac.ops.setup_link) err = hw->mac.ops.setup_link(hw, autoneg, adapter->link_up); } out: return; } /* ixgbe_config_link */ /************************************************************************ * ixgbe_update_stats_counters - Update board statistics counters. ************************************************************************/ static void ixgbe_update_stats_counters(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct ixgbe_hw_stats *stats = &adapter->stats.pf; u32 missed_rx = 0, bprc, lxon, lxoff, total; u64 total_missed_rx = 0; stats->crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS); stats->illerrc += IXGBE_READ_REG(hw, IXGBE_ILLERRC); stats->errbc += IXGBE_READ_REG(hw, IXGBE_ERRBC); stats->mspdc += IXGBE_READ_REG(hw, IXGBE_MSPDC); stats->mpc[0] += IXGBE_READ_REG(hw, IXGBE_MPC(0)); for (int i = 0; i < 16; i++) { stats->qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i)); stats->qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i)); stats->qprdc[i] += IXGBE_READ_REG(hw, IXGBE_QPRDC(i)); } stats->mlfc += IXGBE_READ_REG(hw, IXGBE_MLFC); stats->mrfc += IXGBE_READ_REG(hw, IXGBE_MRFC); stats->rlec += IXGBE_READ_REG(hw, IXGBE_RLEC); /* Hardware workaround, gprc counts missed packets */ stats->gprc += IXGBE_READ_REG(hw, IXGBE_GPRC); stats->gprc -= missed_rx; if (hw->mac.type != ixgbe_mac_82598EB) { stats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCL) + ((u64)IXGBE_READ_REG(hw, IXGBE_GORCH) << 32); stats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL) + ((u64)IXGBE_READ_REG(hw, IXGBE_GOTCH) << 32); stats->tor += IXGBE_READ_REG(hw, IXGBE_TORL) + ((u64)IXGBE_READ_REG(hw, IXGBE_TORH) << 32); stats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT); stats->lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT); } else { stats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC); stats->lxoffrxc += IXGBE_READ_REG(hw, IXGBE_LXOFFRXC); /* 82598 only has a counter in the high register */ stats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCH); stats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH); stats->tor += IXGBE_READ_REG(hw, IXGBE_TORH); } /* * Workaround: mprc hardware is incorrectly counting * broadcasts, so for now we subtract those. */ bprc = IXGBE_READ_REG(hw, IXGBE_BPRC); stats->bprc += bprc; stats->mprc += IXGBE_READ_REG(hw, IXGBE_MPRC); if (hw->mac.type == ixgbe_mac_82598EB) stats->mprc -= bprc; stats->prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64); stats->prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127); stats->prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255); stats->prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511); stats->prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023); stats->prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522); lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC); stats->lxontxc += lxon; lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC); stats->lxofftxc += lxoff; total = lxon + lxoff; stats->gptc += IXGBE_READ_REG(hw, IXGBE_GPTC); stats->mptc += IXGBE_READ_REG(hw, IXGBE_MPTC); stats->ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64); stats->gptc -= total; stats->mptc -= total; stats->ptc64 -= total; stats->gotc -= total * ETHER_MIN_LEN; stats->ruc += IXGBE_READ_REG(hw, IXGBE_RUC); stats->rfc += IXGBE_READ_REG(hw, IXGBE_RFC); stats->roc += IXGBE_READ_REG(hw, IXGBE_ROC); stats->rjc += IXGBE_READ_REG(hw, IXGBE_RJC); stats->mngprc += IXGBE_READ_REG(hw, IXGBE_MNGPRC); stats->mngpdc += IXGBE_READ_REG(hw, IXGBE_MNGPDC); stats->mngptc += IXGBE_READ_REG(hw, IXGBE_MNGPTC); stats->tpr += IXGBE_READ_REG(hw, IXGBE_TPR); stats->tpt += IXGBE_READ_REG(hw, IXGBE_TPT); stats->ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127); stats->ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255); stats->ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511); stats->ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023); stats->ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522); stats->bptc += IXGBE_READ_REG(hw, IXGBE_BPTC); stats->xec += IXGBE_READ_REG(hw, IXGBE_XEC); stats->fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC); stats->fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST); /* Only read FCOE on 82599 */ if (hw->mac.type != ixgbe_mac_82598EB) { stats->fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC); stats->fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC); stats->fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC); stats->fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC); stats->fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC); } /* Fill out the OS statistics structure */ IXGBE_SET_IPACKETS(adapter, stats->gprc); IXGBE_SET_OPACKETS(adapter, stats->gptc); IXGBE_SET_IBYTES(adapter, stats->gorc); IXGBE_SET_OBYTES(adapter, stats->gotc); IXGBE_SET_IMCASTS(adapter, stats->mprc); IXGBE_SET_OMCASTS(adapter, stats->mptc); IXGBE_SET_COLLISIONS(adapter, 0); IXGBE_SET_IQDROPS(adapter, total_missed_rx); IXGBE_SET_IERRORS(adapter, stats->crcerrs + stats->rlec); } /* ixgbe_update_stats_counters */ /************************************************************************ * ixgbe_add_hw_stats * * Add sysctl variables, one per statistic, to the system. ************************************************************************/ static void ixgbe_add_hw_stats(struct adapter *adapter) { device_t dev = adapter->dev; struct tx_ring *txr = adapter->tx_rings; struct rx_ring *rxr = adapter->rx_rings; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct ixgbe_hw_stats *stats = &adapter->stats.pf; struct sysctl_oid *stat_node, *queue_node; struct sysctl_oid_list *stat_list, *queue_list; #define QUEUE_NAME_LEN 32 char namebuf[QUEUE_NAME_LEN]; /* Driver Statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "dropped", CTLFLAG_RD, &adapter->dropped_pkts, "Driver dropped packets"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_defrag_failed", CTLFLAG_RD, &adapter->mbuf_defrag_failed, "m_defrag() failed"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", CTLFLAG_RD, &adapter->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "link_irq", CTLFLAG_RD, &adapter->link_irq, "Link MSI-X IRQ Handled"); for (int i = 0; i < adapter->num_queues; i++, txr++) { snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "interrupt_rate", CTLTYPE_UINT | CTLFLAG_RW, &adapter->queues[i], sizeof(&adapter->queues[i]), ixgbe_sysctl_interrupt_rate_handler, "IU", "Interrupt Rate"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", CTLFLAG_RD, &(adapter->queues[i].irqs), "irqs on this queue"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_head", CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr), ixgbe_sysctl_tdh_handler, "IU", "Transmit Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "txd_tail", CTLTYPE_UINT | CTLFLAG_RD, txr, sizeof(txr), ixgbe_sysctl_tdt_handler, "IU", "Transmit Descriptor Tail"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tso_tx", CTLFLAG_RD, &txr->tso_tx, "TSO"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_tx_dma_setup", CTLFLAG_RD, &txr->no_tx_dma_setup, "Driver tx dma failure in xmit"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txr->no_desc_avail, "Queue No Descriptor Available"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &txr->total_packets, "Queue Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "br_drops", CTLFLAG_RD, &txr->br->br_drops, "Packets dropped in buf_ring"); } for (int i = 0; i < adapter->num_queues; i++, rxr++) { struct lro_ctrl *lro = &rxr->lro; snprintf(namebuf, QUEUE_NAME_LEN, "queue%d", i); queue_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_head", CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr), ixgbe_sysctl_rdh_handler, "IU", "Receive Descriptor Head"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "rxd_tail", CTLTYPE_UINT | CTLFLAG_RD, rxr, sizeof(rxr), ixgbe_sysctl_rdt_handler, "IU", "Receive Descriptor Tail"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &rxr->rx_packets, "Queue Packets Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &rxr->rx_bytes, "Queue Bytes Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_copies", CTLFLAG_RD, &rxr->rx_copies, "Copied RX Frames"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_discarded", CTLFLAG_RD, &rxr->rx_discarded, "Discarded RX packets"); SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_queued", CTLFLAG_RD, &lro->lro_queued, 0, "LRO Queued"); SYSCTL_ADD_U64(ctx, queue_list, OID_AUTO, "lro_flushed", CTLFLAG_RD, &lro->lro_flushed, 0, "LRO Flushed"); } /* MAC stats get their own sub node */ stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac_stats", CTLFLAG_RD, NULL, "MAC Statistics"); stat_list = SYSCTL_CHILDREN(stat_node); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "crc_errs", CTLFLAG_RD, &stats->crcerrs, "CRC Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "ill_errs", CTLFLAG_RD, &stats->illerrc, "Illegal Byte Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "byte_errs", CTLFLAG_RD, &stats->errbc, "Byte Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "short_discards", CTLFLAG_RD, &stats->mspdc, "MAC Short Packets Discarded"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "local_faults", CTLFLAG_RD, &stats->mlfc, "MAC Local Faults"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "remote_faults", CTLFLAG_RD, &stats->mrfc, "MAC Remote Faults"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rec_len_errs", CTLFLAG_RD, &stats->rlec, "Receive Length Errors"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_missed_packets", CTLFLAG_RD, &stats->mpc[0], "RX Missed Packet Count"); /* Flow Control stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_txd", CTLFLAG_RD, &stats->lxontxc, "Link XON Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xon_recvd", CTLFLAG_RD, &stats->lxonrxc, "Link XON Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_txd", CTLFLAG_RD, &stats->lxofftxc, "Link XOFF Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "xoff_recvd", CTLFLAG_RD, &stats->lxoffrxc, "Link XOFF Received"); /* Packet Reception Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_octets_rcvd", CTLFLAG_RD, &stats->tor, "Total Octets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_rcvd", CTLFLAG_RD, &stats->gorc, "Good Octets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_rcvd", CTLFLAG_RD, &stats->tpr, "Total Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_rcvd", CTLFLAG_RD, &stats->gprc, "Good Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_rcvd", CTLFLAG_RD, &stats->mprc, "Multicast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_rcvd", CTLFLAG_RD, &stats->bprc, "Broadcast Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_64", CTLFLAG_RD, &stats->prc64, "64 byte frames received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_65_127", CTLFLAG_RD, &stats->prc127, "65-127 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_128_255", CTLFLAG_RD, &stats->prc255, "128-255 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_256_511", CTLFLAG_RD, &stats->prc511, "256-511 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_512_1023", CTLFLAG_RD, &stats->prc1023, "512-1023 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "rx_frames_1024_1522", CTLFLAG_RD, &stats->prc1522, "1023-1522 byte frames received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_undersized", CTLFLAG_RD, &stats->ruc, "Receive Undersized"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_fragmented", CTLFLAG_RD, &stats->rfc, "Fragmented Packets Received "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_oversized", CTLFLAG_RD, &stats->roc, "Oversized Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "recv_jabberd", CTLFLAG_RD, &stats->rjc, "Received Jabber"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_rcvd", CTLFLAG_RD, &stats->mngprc, "Management Packets Received"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_drpd", CTLFLAG_RD, &stats->mngptc, "Management Packets Dropped"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "checksum_errs", CTLFLAG_RD, &stats->xec, "Checksum Errors"); /* Packet Transmission Stats */ SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_octets_txd", CTLFLAG_RD, &stats->gotc, "Good Octets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "total_pkts_txd", CTLFLAG_RD, &stats->tpt, "Total Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "good_pkts_txd", CTLFLAG_RD, &stats->gptc, "Good Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "bcast_pkts_txd", CTLFLAG_RD, &stats->bptc, "Broadcast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "mcast_pkts_txd", CTLFLAG_RD, &stats->mptc, "Multicast Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "management_pkts_txd", CTLFLAG_RD, &stats->mngptc, "Management Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_64", CTLFLAG_RD, &stats->ptc64, "64 byte frames transmitted "); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_65_127", CTLFLAG_RD, &stats->ptc127, "65-127 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_128_255", CTLFLAG_RD, &stats->ptc255, "128-255 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_256_511", CTLFLAG_RD, &stats->ptc511, "256-511 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_512_1023", CTLFLAG_RD, &stats->ptc1023, "512-1023 byte frames transmitted"); SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, "tx_frames_1024_1522", CTLFLAG_RD, &stats->ptc1522, "1024-1522 byte frames transmitted"); } /* ixgbe_add_hw_stats */ /************************************************************************ * ixgbe_sysctl_tdh_handler - Transmit Descriptor Head handler function * * Retrieves the TDH value from the hardware ************************************************************************/ static int ixgbe_sysctl_tdh_handler(SYSCTL_HANDLER_ARGS) { struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1); int error; unsigned int val; if (!txr) return (0); val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDH(txr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } /* ixgbe_sysctl_tdh_handler */ /************************************************************************ * ixgbe_sysctl_tdt_handler - Transmit Descriptor Tail handler function * * Retrieves the TDT value from the hardware ************************************************************************/ static int ixgbe_sysctl_tdt_handler(SYSCTL_HANDLER_ARGS) { struct tx_ring *txr = ((struct tx_ring *)oidp->oid_arg1); int error; unsigned int val; if (!txr) return (0); val = IXGBE_READ_REG(&txr->adapter->hw, IXGBE_TDT(txr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } /* ixgbe_sysctl_tdt_handler */ /************************************************************************ * ixgbe_sysctl_rdh_handler - Receive Descriptor Head handler function * * Retrieves the RDH value from the hardware ************************************************************************/ static int ixgbe_sysctl_rdh_handler(SYSCTL_HANDLER_ARGS) { struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1); int error; unsigned int val; if (!rxr) return (0); val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDH(rxr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } /* ixgbe_sysctl_rdh_handler */ /************************************************************************ * ixgbe_sysctl_rdt_handler - Receive Descriptor Tail handler function * * Retrieves the RDT value from the hardware ************************************************************************/ static int ixgbe_sysctl_rdt_handler(SYSCTL_HANDLER_ARGS) { struct rx_ring *rxr = ((struct rx_ring *)oidp->oid_arg1); int error; unsigned int val; if (!rxr) return (0); val = IXGBE_READ_REG(&rxr->adapter->hw, IXGBE_RDT(rxr->me)); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } /* ixgbe_sysctl_rdt_handler */ /************************************************************************ * ixgbe_register_vlan * * Run via vlan config EVENT, it enables us to use the * HW Filter table since we can get the vlan id. This * just creates the entry in the soft version of the * VFTA, init will repopulate the real table. ************************************************************************/ static void ixgbe_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u16 index, bit; if (ifp->if_softc != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IXGBE_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] |= (1 << bit); ++adapter->num_vlans; ixgbe_setup_vlan_hw_support(adapter); IXGBE_CORE_UNLOCK(adapter); } /* ixgbe_register_vlan */ /************************************************************************ * ixgbe_unregister_vlan * * Run via vlan unconfig EVENT, remove our entry in the soft vfta. ************************************************************************/ static void ixgbe_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct adapter *adapter = ifp->if_softc; u16 index, bit; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IXGBE_CORE_LOCK(adapter); index = (vtag >> 5) & 0x7F; bit = vtag & 0x1F; adapter->shadow_vfta[index] &= ~(1 << bit); --adapter->num_vlans; /* Re-init to load the changes */ ixgbe_setup_vlan_hw_support(adapter); IXGBE_CORE_UNLOCK(adapter); } /* ixgbe_unregister_vlan */ /************************************************************************ * ixgbe_setup_vlan_hw_support ************************************************************************/ static void ixgbe_setup_vlan_hw_support(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; struct ixgbe_hw *hw = &adapter->hw; struct rx_ring *rxr; int i; u32 ctrl; /* * We get here thru init_locked, meaning * a soft reset, this has already cleared * the VFTA and other state, so if there * have been no vlan's registered do nothing. */ if (adapter->num_vlans == 0) return; /* Setup the queues for vlans */ for (i = 0; i < adapter->num_queues; i++) { rxr = &adapter->rx_rings[i]; /* On 82599 the VLAN enable is per/queue in RXDCTL */ if (hw->mac.type != ixgbe_mac_82598EB) { ctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)); ctrl |= IXGBE_RXDCTL_VME; IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxr->me), ctrl); } rxr->vtag_strip = TRUE; } if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0) return; /* * A soft reset zero's out the VFTA, so * we need to repopulate it now. */ for (i = 0; i < IXGBE_VFTA_SIZE; i++) if (adapter->shadow_vfta[i] != 0) IXGBE_WRITE_REG(hw, IXGBE_VFTA(i), adapter->shadow_vfta[i]); ctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); /* Enable the Filter Table if enabled */ if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { ctrl &= ~IXGBE_VLNCTRL_CFIEN; ctrl |= IXGBE_VLNCTRL_VFE; } if (hw->mac.type == ixgbe_mac_82598EB) ctrl |= IXGBE_VLNCTRL_VME; IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, ctrl); } /* ixgbe_setup_vlan_hw_support */ /************************************************************************ * ixgbe_get_slot_info * * Get the width and transaction speed of * the slot this adapter is plugged into. ************************************************************************/ static void ixgbe_get_slot_info(struct adapter *adapter) { device_t dev = adapter->dev; struct ixgbe_hw *hw = &adapter->hw; u32 offset; u16 link; int bus_info_valid = TRUE; /* Some devices are behind an internal bridge */ switch (hw->device_id) { case IXGBE_DEV_ID_82599_SFP_SF_QP: case IXGBE_DEV_ID_82599_QSFP_SF_QP: goto get_parent_info; default: break; } ixgbe_get_bus_info(hw); /* * Some devices don't use PCI-E, but there is no need * to display "Unknown" for bus speed and width. */ switch (hw->mac.type) { case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: return; default: goto display; } get_parent_info: /* * For the Quad port adapter we need to parse back * up the PCI tree to find the speed of the expansion * slot into which this adapter is plugged. A bit more work. */ dev = device_get_parent(device_get_parent(dev)); #ifdef IXGBE_DEBUG device_printf(dev, "parent pcib = %x,%x,%x\n", pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); #endif dev = device_get_parent(device_get_parent(dev)); #ifdef IXGBE_DEBUG device_printf(dev, "slot pcib = %x,%x,%x\n", pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); #endif /* Now get the PCI Express Capabilities offset */ if (pci_find_cap(dev, PCIY_EXPRESS, &offset)) { /* * Hmm...can't get PCI-Express capabilities. * Falling back to default method. */ bus_info_valid = FALSE; ixgbe_get_bus_info(hw); goto display; } /* ...and read the Link Status Register */ link = pci_read_config(dev, offset + PCIER_LINK_STA, 2); ixgbe_set_pci_config_data_generic(hw, link); display: device_printf(dev, "PCI Express Bus: Speed %s %s\n", ((hw->bus.speed == ixgbe_bus_speed_8000) ? "8.0GT/s" : (hw->bus.speed == ixgbe_bus_speed_5000) ? "5.0GT/s" : (hw->bus.speed == ixgbe_bus_speed_2500) ? "2.5GT/s" : "Unknown"), ((hw->bus.width == ixgbe_bus_width_pcie_x8) ? "Width x8" : (hw->bus.width == ixgbe_bus_width_pcie_x4) ? "Width x4" : (hw->bus.width == ixgbe_bus_width_pcie_x1) ? "Width x1" : "Unknown")); if (bus_info_valid) { if ((hw->device_id != IXGBE_DEV_ID_82599_SFP_SF_QP) && ((hw->bus.width <= ixgbe_bus_width_pcie_x4) && (hw->bus.speed == ixgbe_bus_speed_2500))) { device_printf(dev, "PCI-Express bandwidth available for this card\n is not sufficient for optimal performance.\n"); device_printf(dev, "For optimal performance a x8 PCIE, or x4 PCIE Gen2 slot is required.\n"); } if ((hw->device_id == IXGBE_DEV_ID_82599_SFP_SF_QP) && ((hw->bus.width <= ixgbe_bus_width_pcie_x8) && (hw->bus.speed < ixgbe_bus_speed_8000))) { device_printf(dev, "PCI-Express bandwidth available for this card\n is not sufficient for optimal performance.\n"); device_printf(dev, "For optimal performance a x8 PCIE Gen3 slot is required.\n"); } } else device_printf(dev, "Unable to determine slot speed/width. The speed/width reported are that of the internal switch.\n"); return; } /* ixgbe_get_slot_info */ /************************************************************************ * ixgbe_enable_queue - MSI-X Interrupt Handlers and Tasklets ************************************************************************/ static inline void ixgbe_enable_queue(struct adapter *adapter, u32 vector) { struct ixgbe_hw *hw = &adapter->hw; u64 queue = (u64)(1 << vector); u32 mask; if (hw->mac.type == ixgbe_mac_82598EB) { mask = (IXGBE_EIMS_RTX_QUEUE & queue); IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); } else { mask = (queue & 0xFFFFFFFF); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask); mask = (queue >> 32); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); } } /* ixgbe_enable_queue */ /************************************************************************ * ixgbe_disable_queue ************************************************************************/ static inline void ixgbe_disable_queue(struct adapter *adapter, u32 vector) { struct ixgbe_hw *hw = &adapter->hw; u64 queue = (u64)(1 << vector); u32 mask; if (hw->mac.type == ixgbe_mac_82598EB) { mask = (IXGBE_EIMS_RTX_QUEUE & queue); IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask); } else { mask = (queue & 0xFFFFFFFF); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask); mask = (queue >> 32); if (mask) IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask); } } /* ixgbe_disable_queue */ /************************************************************************ * ixgbe_msix_que - MSI-X Queue Interrupt Service routine ************************************************************************/ void ixgbe_msix_que(void *arg) { struct ix_queue *que = arg; struct adapter *adapter = que->adapter; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = que->txr; struct rx_ring *rxr = que->rxr; bool more; u32 newitr = 0; /* Protect against spurious interrupts */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; ixgbe_disable_queue(adapter, que->msix); ++que->irqs; more = ixgbe_rxeof(que); IXGBE_TX_LOCK(txr); ixgbe_txeof(txr); if (!ixgbe_ring_empty(ifp, txr->br)) ixgbe_start_locked(ifp, txr); IXGBE_TX_UNLOCK(txr); /* Do AIM now? */ if (adapter->enable_aim == FALSE) goto no_calc; /* * Do Adaptive Interrupt Moderation: * - Write out last calculated setting * - Calculate based on average size over * the last interval. */ if (que->eitr_setting) IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(que->msix), que->eitr_setting); que->eitr_setting = 0; /* Idle, do nothing */ if ((txr->bytes == 0) && (rxr->bytes == 0)) goto no_calc; if ((txr->bytes) && (txr->packets)) newitr = txr->bytes/txr->packets; if ((rxr->bytes) && (rxr->packets)) newitr = max(newitr, (rxr->bytes / rxr->packets)); newitr += 24; /* account for hardware frame, crc */ /* set an upper boundary */ newitr = min(newitr, 3000); /* Be nice to the mid range */ if ((newitr > 300) && (newitr < 1200)) newitr = (newitr / 3); else newitr = (newitr / 2); if (adapter->hw.mac.type == ixgbe_mac_82598EB) newitr |= newitr << 16; else newitr |= IXGBE_EITR_CNT_WDIS; /* save for next interrupt */ que->eitr_setting = newitr; /* Reset state */ txr->bytes = 0; txr->packets = 0; rxr->bytes = 0; rxr->packets = 0; no_calc: if (more) taskqueue_enqueue(que->tq, &que->que_task); else ixgbe_enable_queue(adapter, que->msix); return; } /* ixgbe_msix_que */ /************************************************************************ * ixgbe_media_status - Media Ioctl callback * * Called whenever the user queries the status of * the interface using ifconfig. ************************************************************************/ static void ixgbe_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { struct adapter *adapter = ifp->if_softc; struct ixgbe_hw *hw = &adapter->hw; int layer; INIT_DEBUGOUT("ixgbe_media_status: begin"); IXGBE_CORE_LOCK(adapter); ixgbe_update_link_status(adapter); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!adapter->link_active) { IXGBE_CORE_UNLOCK(adapter); return; } ifmr->ifm_status |= IFM_ACTIVE; layer = adapter->phy_layer; if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_T || layer & IXGBE_PHYSICAL_LAYER_1000BASE_T || layer & IXGBE_PHYSICAL_LAYER_100BASE_TX || layer & IXGBE_PHYSICAL_LAYER_10BASE_T) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_T | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_T | IFM_FDX; break; case IXGBE_LINK_SPEED_100_FULL: ifmr->ifm_active |= IFM_100_TX | IFM_FDX; break; case IXGBE_LINK_SPEED_10_FULL: ifmr->ifm_active |= IFM_10_T | IFM_FDX; break; } if (layer & IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU || layer & IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_TWINAX | IFM_FDX; break; } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LR) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_LR | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_LX | IFM_FDX; break; } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_LRM) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_LRM | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_LX | IFM_FDX; break; } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_SR || layer & IXGBE_PHYSICAL_LAYER_1000BASE_SX) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_SR | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_SX | IFM_FDX; break; } if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_CX4) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX; break; } /* * XXX: These need to use the proper media types once * they're added. */ #ifndef IFM_ETH_XTYPE if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_SR | IFM_FDX; break; case IXGBE_LINK_SPEED_2_5GB_FULL: ifmr->ifm_active |= IFM_2500_SX | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_CX | IFM_FDX; break; } else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4 || layer & IXGBE_PHYSICAL_LAYER_2500BASE_KX || layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_CX4 | IFM_FDX; break; case IXGBE_LINK_SPEED_2_5GB_FULL: ifmr->ifm_active |= IFM_2500_SX | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_CX | IFM_FDX; break; } #else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KR) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_KR | IFM_FDX; break; case IXGBE_LINK_SPEED_2_5GB_FULL: ifmr->ifm_active |= IFM_2500_KX | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_KX | IFM_FDX; break; } else if (layer & IXGBE_PHYSICAL_LAYER_10GBASE_KX4 || layer & IXGBE_PHYSICAL_LAYER_2500BASE_KX || layer & IXGBE_PHYSICAL_LAYER_1000BASE_KX) switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: ifmr->ifm_active |= IFM_10G_KX4 | IFM_FDX; break; case IXGBE_LINK_SPEED_2_5GB_FULL: ifmr->ifm_active |= IFM_2500_KX | IFM_FDX; break; case IXGBE_LINK_SPEED_1GB_FULL: ifmr->ifm_active |= IFM_1000_KX | IFM_FDX; break; } #endif /* If nothing is recognized... */ if (IFM_SUBTYPE(ifmr->ifm_active) == 0) ifmr->ifm_active |= IFM_UNKNOWN; #if __FreeBSD_version >= 900025 /* Display current flow control setting used on link */ if (hw->fc.current_mode == ixgbe_fc_rx_pause || hw->fc.current_mode == ixgbe_fc_full) ifmr->ifm_active |= IFM_ETH_RXPAUSE; if (hw->fc.current_mode == ixgbe_fc_tx_pause || hw->fc.current_mode == ixgbe_fc_full) ifmr->ifm_active |= IFM_ETH_TXPAUSE; #endif IXGBE_CORE_UNLOCK(adapter); return; } /* ixgbe_media_status */ /************************************************************************ * ixgbe_media_change - Media Ioctl callback * * Called when the user changes speed/duplex using * media/mediopt option with ifconfig. ************************************************************************/ static int ixgbe_media_change(struct ifnet *ifp) { struct adapter *adapter = ifp->if_softc; struct ifmedia *ifm = &adapter->media; struct ixgbe_hw *hw = &adapter->hw; ixgbe_link_speed speed = 0; INIT_DEBUGOUT("ixgbe_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); if (hw->phy.media_type == ixgbe_media_type_backplane) return (ENODEV); /* * We don't actually need to check against the supported * media types of the adapter; ifmedia will take care of * that for us. */ switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: case IFM_10G_T: speed |= IXGBE_LINK_SPEED_100_FULL; speed |= IXGBE_LINK_SPEED_1GB_FULL; speed |= IXGBE_LINK_SPEED_10GB_FULL; break; case IFM_10G_LRM: case IFM_10G_LR: #ifndef IFM_ETH_XTYPE case IFM_10G_SR: /* KR, too */ case IFM_10G_CX4: /* KX4 */ #else case IFM_10G_KR: case IFM_10G_KX4: #endif speed |= IXGBE_LINK_SPEED_1GB_FULL; speed |= IXGBE_LINK_SPEED_10GB_FULL; break; #ifndef IFM_ETH_XTYPE case IFM_1000_CX: /* KX */ #else case IFM_1000_KX: #endif case IFM_1000_LX: case IFM_1000_SX: speed |= IXGBE_LINK_SPEED_1GB_FULL; break; case IFM_1000_T: speed |= IXGBE_LINK_SPEED_100_FULL; speed |= IXGBE_LINK_SPEED_1GB_FULL; break; case IFM_10G_TWINAX: speed |= IXGBE_LINK_SPEED_10GB_FULL; break; case IFM_100_TX: speed |= IXGBE_LINK_SPEED_100_FULL; break; case IFM_10_T: speed |= IXGBE_LINK_SPEED_10_FULL; break; default: goto invalid; } hw->mac.autotry_restart = TRUE; hw->mac.ops.setup_link(hw, speed, TRUE); adapter->advertise = ((speed & IXGBE_LINK_SPEED_10GB_FULL) ? 4 : 0) | ((speed & IXGBE_LINK_SPEED_1GB_FULL) ? 2 : 0) | ((speed & IXGBE_LINK_SPEED_100_FULL) ? 1 : 0) | ((speed & IXGBE_LINK_SPEED_10_FULL) ? 8 : 0); return (0); invalid: device_printf(adapter->dev, "Invalid media type!\n"); return (EINVAL); } /* ixgbe_media_change */ /************************************************************************ * ixgbe_set_promisc ************************************************************************/ static void ixgbe_set_promisc(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; int mcnt = 0; u32 rctl; rctl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); rctl &= (~IXGBE_FCTRL_UPE); if (ifp->if_flags & IFF_ALLMULTI) mcnt = MAX_NUM_MULTICAST_ADDRESSES; else { struct ifmultiaddr *ifma; #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_maddr_rlock(ifp); #endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) break; mcnt++; } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_maddr_runlock(ifp); #endif } if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) rctl &= (~IXGBE_FCTRL_MPE); IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, rctl); if (ifp->if_flags & IFF_PROMISC) { rctl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, rctl); } else if (ifp->if_flags & IFF_ALLMULTI) { rctl |= IXGBE_FCTRL_MPE; rctl &= ~IXGBE_FCTRL_UPE; IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, rctl); } } /* ixgbe_set_promisc */ /************************************************************************ * ixgbe_msix_link - Link status change ISR (MSI/MSI-X) ************************************************************************/ static void ixgbe_msix_link(void *arg) { struct adapter *adapter = arg; struct ixgbe_hw *hw = &adapter->hw; u32 eicr, eicr_mask; s32 retval; ++adapter->link_irq; /* Pause other interrupts */ IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_OTHER); /* First get the cause */ eicr = IXGBE_READ_REG(hw, IXGBE_EICS); /* Be sure the queue bits are not cleared */ eicr &= ~IXGBE_EICR_RTX_QUEUE; /* Clear interrupt with write */ IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr); /* Link status change */ if (eicr & IXGBE_EICR_LSC) { IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_LSC); taskqueue_enqueue(adapter->tq, &adapter->link_task); } if (adapter->hw.mac.type != ixgbe_mac_82598EB) { if ((adapter->feat_en & IXGBE_FEATURE_FDIR) && (eicr & IXGBE_EICR_FLOW_DIR)) { /* This is probably overkill :) */ if (!atomic_cmpset_int(&adapter->fdir_reinit, 0, 1)) return; /* Disable the interrupt */ IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_FLOW_DIR); taskqueue_enqueue(adapter->tq, &adapter->fdir_task); } if (eicr & IXGBE_EICR_ECC) { device_printf(adapter->dev, "CRITICAL: ECC ERROR!! Please Reboot!!\n"); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC); } /* Check for over temp condition */ if (adapter->feat_en & IXGBE_FEATURE_TEMP_SENSOR) { switch (adapter->hw.mac.type) { case ixgbe_mac_X550EM_a: if (!(eicr & IXGBE_EICR_GPI_SDP0_X550EM_a)) break; IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EICR_GPI_SDP0_X550EM_a); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0_X550EM_a); retval = hw->phy.ops.check_overtemp(hw); if (retval != IXGBE_ERR_OVERTEMP) break; device_printf(adapter->dev, "CRITICAL: OVER TEMP!! PHY IS SHUT DOWN!!\n"); device_printf(adapter->dev, "System shutdown required!\n"); break; default: if (!(eicr & IXGBE_EICR_TS)) break; retval = hw->phy.ops.check_overtemp(hw); if (retval != IXGBE_ERR_OVERTEMP) break; device_printf(adapter->dev, "CRITICAL: OVER TEMP!! PHY IS SHUT DOWN!!\n"); device_printf(adapter->dev, "System shutdown required!\n"); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_TS); break; } } /* Check for VF message */ if ((adapter->feat_en & IXGBE_FEATURE_SRIOV) && (eicr & IXGBE_EICR_MAILBOX)) taskqueue_enqueue(adapter->tq, &adapter->mbx_task); } if (ixgbe_is_sfp(hw)) { /* Pluggable optics-related interrupt */ if (hw->mac.type >= ixgbe_mac_X540) eicr_mask = IXGBE_EICR_GPI_SDP0_X540; else eicr_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw); if (eicr & eicr_mask) { IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr_mask); taskqueue_enqueue(adapter->tq, &adapter->mod_task); } if ((hw->mac.type == ixgbe_mac_82599EB) && (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw))) { IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); taskqueue_enqueue(adapter->tq, &adapter->msf_task); } } /* Check for fan failure */ if (adapter->feat_en & IXGBE_FEATURE_FAN_FAIL) { ixgbe_check_fan_failure(adapter, eicr, TRUE); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); } /* External PHY interrupt */ if ((hw->phy.type == ixgbe_phy_x550em_ext_t) && (eicr & IXGBE_EICR_GPI_SDP0_X540)) { IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP0_X540); taskqueue_enqueue(adapter->tq, &adapter->phy_task); } /* Re-enable other interrupts */ IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_OTHER); } /* ixgbe_msix_link */ /************************************************************************ * ixgbe_sysctl_interrupt_rate_handler ************************************************************************/ static int ixgbe_sysctl_interrupt_rate_handler(SYSCTL_HANDLER_ARGS) { struct ix_queue *que = ((struct ix_queue *)oidp->oid_arg1); int error; unsigned int reg, usec, rate; reg = IXGBE_READ_REG(&que->adapter->hw, IXGBE_EITR(que->msix)); usec = ((reg & 0x0FF8) >> 3); if (usec > 0) rate = 500000 / usec; else rate = 0; error = sysctl_handle_int(oidp, &rate, 0, req); if (error || !req->newptr) return error; reg &= ~0xfff; /* default, no limitation */ ixgbe_max_interrupt_rate = 0; if (rate > 0 && rate < 500000) { if (rate < 1000) rate = 1000; ixgbe_max_interrupt_rate = rate; reg |= ((4000000/rate) & 0xff8); } IXGBE_WRITE_REG(&que->adapter->hw, IXGBE_EITR(que->msix), reg); return (0); } /* ixgbe_sysctl_interrupt_rate_handler */ /************************************************************************ * ixgbe_add_device_sysctls ************************************************************************/ static void ixgbe_add_device_sysctls(struct adapter *adapter) { device_t dev = adapter->dev; struct ixgbe_hw *hw = &adapter->hw; struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); /* Sysctls for all devices */ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_flowcntl, "I", IXGBE_SYSCTL_DESC_SET_FC); adapter->enable_aim = ixgbe_enable_aim; SYSCTL_ADD_INT(ctx, child, OID_AUTO, "enable_aim", CTLFLAG_RW, &adapter->enable_aim, 1, "Interrupt Moderation"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_advertise, "I", IXGBE_SYSCTL_DESC_ADV_SPEED); #ifdef IXGBE_DEBUG /* testing sysctls (for all devices) */ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "power_state", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_power_state, "I", "PCI Power State"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "print_rss_config", CTLTYPE_STRING | CTLFLAG_RD, adapter, 0, ixgbe_sysctl_print_rss_config, "A", "Prints RSS Configuration"); #endif /* for X550 series devices */ if (hw->mac.type >= ixgbe_mac_X550) SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "dmac", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_dmac, "I", "DMA Coalesce"); /* for WoL-capable devices */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) { SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "wol_enable", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_wol_enable, "I", "Enable/Disable Wake on LAN"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "wufc", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_wufc, "I", "Enable/Disable Wake Up Filters"); } /* for X552/X557-AT devices */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) { struct sysctl_oid *phy_node; struct sysctl_oid_list *phy_list; phy_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "phy", CTLFLAG_RD, NULL, "External PHY sysctls"); phy_list = SYSCTL_CHILDREN(phy_node); SYSCTL_ADD_PROC(ctx, phy_list, OID_AUTO, "temp", CTLTYPE_INT | CTLFLAG_RD, adapter, 0, ixgbe_sysctl_phy_temp, "I", "Current External PHY Temperature (Celsius)"); SYSCTL_ADD_PROC(ctx, phy_list, OID_AUTO, "overtemp_occurred", CTLTYPE_INT | CTLFLAG_RD, adapter, 0, ixgbe_sysctl_phy_overtemp_occurred, "I", "External PHY High Temperature Event Occurred"); } if (adapter->feat_cap & IXGBE_FEATURE_EEE) { SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "eee_state", CTLTYPE_INT | CTLFLAG_RW, adapter, 0, ixgbe_sysctl_eee_state, "I", "EEE Power Save State"); } } /* ixgbe_add_device_sysctls */ /************************************************************************ * ixgbe_allocate_pci_resources ************************************************************************/ static int ixgbe_allocate_pci_resources(struct adapter *adapter) { device_t dev = adapter->dev; int rid; rid = PCIR_BAR(0); adapter->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(adapter->pci_mem)) { device_printf(dev, "Unable to allocate bus resource: memory\n"); return (ENXIO); } /* Save bus_space values for READ/WRITE_REG macros */ adapter->osdep.mem_bus_space_tag = rman_get_bustag(adapter->pci_mem); adapter->osdep.mem_bus_space_handle = rman_get_bushandle(adapter->pci_mem); /* Set hw values for shared code */ adapter->hw.hw_addr = (u8 *)&adapter->osdep.mem_bus_space_handle; return (0); } /* ixgbe_allocate_pci_resources */ /************************************************************************ * ixgbe_detach - Device removal routine * * Called when the driver is being removed. * Stops the adapter and deallocates all the resources * that were allocated for driver operation. * * return 0 on success, positive on failure ************************************************************************/ static int ixgbe_detach(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ix_queue *que = adapter->queues; struct tx_ring *txr = adapter->tx_rings; u32 ctrl_ext; INIT_DEBUGOUT("ixgbe_detach: begin"); /* Make sure VLANS are not using driver */ if (adapter->ifp->if_vlantrunk != NULL) { device_printf(dev, "Vlan in use, detach first\n"); return (EBUSY); } if (ixgbe_pci_iov_detach(dev) != 0) { device_printf(dev, "SR-IOV in use; detach first.\n"); return (EBUSY); } ether_ifdetach(adapter->ifp); /* Stop the adapter */ IXGBE_CORE_LOCK(adapter); ixgbe_setup_low_power_mode(adapter); IXGBE_CORE_UNLOCK(adapter); for (int i = 0; i < adapter->num_queues; i++, que++, txr++) { if (que->tq) { if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) taskqueue_drain(que->tq, &txr->txq_task); taskqueue_drain(que->tq, &que->que_task); taskqueue_free(que->tq); } } /* Drain the Link queue */ if (adapter->tq) { taskqueue_drain(adapter->tq, &adapter->link_task); taskqueue_drain(adapter->tq, &adapter->mod_task); taskqueue_drain(adapter->tq, &adapter->msf_task); if (adapter->feat_cap & IXGBE_FEATURE_SRIOV) taskqueue_drain(adapter->tq, &adapter->mbx_task); taskqueue_drain(adapter->tq, &adapter->phy_task); if (adapter->feat_en & IXGBE_FEATURE_FDIR) taskqueue_drain(adapter->tq, &adapter->fdir_task); taskqueue_free(adapter->tq); } /* let hardware know driver is unloading */ ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT); ctrl_ext &= ~IXGBE_CTRL_EXT_DRV_LOAD; IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, ctrl_ext); /* Unregister VLAN events */ if (adapter->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, adapter->vlan_attach); if (adapter->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); callout_drain(&adapter->timer); if (adapter->feat_en & IXGBE_FEATURE_NETMAP) netmap_detach(adapter->ifp); ixgbe_free_pci_resources(adapter); bus_generic_detach(dev); if_free(adapter->ifp); ixgbe_free_transmit_structures(adapter); ixgbe_free_receive_structures(adapter); free(adapter->queues, M_DEVBUF); free(adapter->mta, M_IXGBE); IXGBE_CORE_LOCK_DESTROY(adapter); return (0); } /* ixgbe_detach */ /************************************************************************ * ixgbe_setup_low_power_mode - LPLU/WoL preparation * * Prepare the adapter/port for LPLU and/or WoL ************************************************************************/ static int ixgbe_setup_low_power_mode(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; device_t dev = adapter->dev; s32 error = 0; mtx_assert(&adapter->core_mtx, MA_OWNED); /* Limit power management flow to X550EM baseT */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T && hw->phy.ops.enter_lplu) { /* Turn off support for APM wakeup. (Using ACPI instead) */ IXGBE_WRITE_REG(hw, IXGBE_GRC, IXGBE_READ_REG(hw, IXGBE_GRC) & ~(u32)2); /* * Clear Wake Up Status register to prevent any previous wakeup * events from waking us up immediately after we suspend. */ IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff); /* * Program the Wakeup Filter Control register with user filter * settings */ IXGBE_WRITE_REG(hw, IXGBE_WUFC, adapter->wufc); /* Enable wakeups and power management in Wakeup Control */ IXGBE_WRITE_REG(hw, IXGBE_WUC, IXGBE_WUC_WKEN | IXGBE_WUC_PME_EN); /* X550EM baseT adapters need a special LPLU flow */ hw->phy.reset_disable = true; ixgbe_stop(adapter); error = hw->phy.ops.enter_lplu(hw); if (error) device_printf(dev, "Error entering LPLU: %d\n", error); hw->phy.reset_disable = false; } else { /* Just stop for other adapters */ ixgbe_stop(adapter); } return error; } /* ixgbe_setup_low_power_mode */ /************************************************************************ * ixgbe_shutdown - Shutdown entry point ************************************************************************/ static int ixgbe_shutdown(device_t dev) { struct adapter *adapter = device_get_softc(dev); int error = 0; INIT_DEBUGOUT("ixgbe_shutdown: begin"); IXGBE_CORE_LOCK(adapter); error = ixgbe_setup_low_power_mode(adapter); IXGBE_CORE_UNLOCK(adapter); return (error); } /* ixgbe_shutdown */ /************************************************************************ * ixgbe_suspend * * From D0 to D3 ************************************************************************/ static int ixgbe_suspend(device_t dev) { struct adapter *adapter = device_get_softc(dev); int error = 0; INIT_DEBUGOUT("ixgbe_suspend: begin"); IXGBE_CORE_LOCK(adapter); error = ixgbe_setup_low_power_mode(adapter); IXGBE_CORE_UNLOCK(adapter); return (error); } /* ixgbe_suspend */ /************************************************************************ * ixgbe_resume * * From D3 to D0 ************************************************************************/ static int ixgbe_resume(device_t dev) { struct adapter *adapter = device_get_softc(dev); struct ifnet *ifp = adapter->ifp; struct ixgbe_hw *hw = &adapter->hw; u32 wus; INIT_DEBUGOUT("ixgbe_resume: begin"); IXGBE_CORE_LOCK(adapter); /* Read & clear WUS register */ wus = IXGBE_READ_REG(hw, IXGBE_WUS); if (wus) device_printf(dev, "Woken up by (WUS): %#010x\n", IXGBE_READ_REG(hw, IXGBE_WUS)); IXGBE_WRITE_REG(hw, IXGBE_WUS, 0xffffffff); /* And clear WUFC until next low-power transition */ IXGBE_WRITE_REG(hw, IXGBE_WUFC, 0); /* * Required after D3->D0 transition; * will re-advertise all previous advertised speeds */ if (ifp->if_flags & IFF_UP) ixgbe_init_locked(adapter); IXGBE_CORE_UNLOCK(adapter); return (0); } /* ixgbe_resume */ /************************************************************************ * ixgbe_set_if_hwassist - Set the various hardware offload abilities. * * Takes the ifnet's if_capenable flags (e.g. set by the user using * ifconfig) and indicates to the OS via the ifnet's if_hwassist * field what mbuf offload flags the driver will understand. ************************************************************************/ static void ixgbe_set_if_hwassist(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; ifp->if_hwassist = 0; #if __FreeBSD_version >= 1000000 if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_IP_TSO; if (ifp->if_capenable & IFCAP_TSO6) ifp->if_hwassist |= CSUM_IP6_TSO; if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist |= (CSUM_IP | CSUM_IP_UDP | CSUM_IP_TCP); if (adapter->hw.mac.type != ixgbe_mac_82598EB) ifp->if_hwassist |= CSUM_IP_SCTP; } if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) { ifp->if_hwassist |= (CSUM_IP6_UDP | CSUM_IP6_TCP); if (adapter->hw.mac.type != ixgbe_mac_82598EB) ifp->if_hwassist |= CSUM_IP6_SCTP; } #else if (ifp->if_capenable & IFCAP_TSO) ifp->if_hwassist |= CSUM_TSO; if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); if (adapter->hw.mac.type != ixgbe_mac_82598EB) ifp->if_hwassist |= CSUM_SCTP; } #endif } /* ixgbe_set_if_hwassist */ /************************************************************************ * ixgbe_init_locked - Init entry point * * Used in two ways: It is used by the stack as an init * entry point in network interface structure. It is also * used by the driver as a hw/sw initialization routine to * get to a consistent state. * * return 0 on success, positive on failure ************************************************************************/ void ixgbe_init_locked(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; struct ixgbe_hw *hw = &adapter->hw; struct tx_ring *txr; struct rx_ring *rxr; u32 txdctl, mhadd; u32 rxdctl, rxctrl; u32 ctrl_ext; int err = 0; mtx_assert(&adapter->core_mtx, MA_OWNED); INIT_DEBUGOUT("ixgbe_init_locked: begin"); hw->adapter_stopped = FALSE; ixgbe_stop_adapter(hw); callout_stop(&adapter->timer); /* Queue indices may change with IOV mode */ ixgbe_align_all_queue_indices(adapter); /* reprogram the RAR[0] in case user changed it. */ ixgbe_set_rar(hw, 0, hw->mac.addr, adapter->pool, IXGBE_RAH_AV); /* Get the latest mac address, User can use a LAA */ bcopy(IF_LLADDR(ifp), hw->mac.addr, IXGBE_ETH_LENGTH_OF_ADDRESS); ixgbe_set_rar(hw, 0, hw->mac.addr, adapter->pool, 1); hw->addr_ctrl.rar_used_count = 1; /* Set hardware offload abilities from ifnet flags */ ixgbe_set_if_hwassist(adapter); /* Prepare transmit descriptors and buffers */ if (ixgbe_setup_transmit_structures(adapter)) { device_printf(dev, "Could not setup transmit structures\n"); ixgbe_stop(adapter); return; } ixgbe_init_hw(hw); ixgbe_initialize_iov(adapter); ixgbe_initialize_transmit_units(adapter); /* Setup Multicast table */ ixgbe_set_multi(adapter); /* Determine the correct mbuf pool, based on frame size */ if (adapter->max_frame_size <= MCLBYTES) adapter->rx_mbuf_sz = MCLBYTES; else adapter->rx_mbuf_sz = MJUMPAGESIZE; /* Prepare receive descriptors and buffers */ if (ixgbe_setup_receive_structures(adapter)) { device_printf(dev, "Could not setup receive structures\n"); ixgbe_stop(adapter); return; } /* Configure RX settings */ ixgbe_initialize_receive_units(adapter); /* Enable SDP & MSI-X interrupts based on adapter */ ixgbe_config_gpie(adapter); /* Set MTU size */ if (ifp->if_mtu > ETHERMTU) { /* aka IXGBE_MAXFRS on 82599 and newer */ mhadd = IXGBE_READ_REG(hw, IXGBE_MHADD); mhadd &= ~IXGBE_MHADD_MFS_MASK; mhadd |= adapter->max_frame_size << IXGBE_MHADD_MFS_SHIFT; IXGBE_WRITE_REG(hw, IXGBE_MHADD, mhadd); } /* Now enable all the queues */ for (int i = 0; i < adapter->num_queues; i++) { txr = &adapter->tx_rings[i]; txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(txr->me)); txdctl |= IXGBE_TXDCTL_ENABLE; /* Set WTHRESH to 8, burst writeback */ txdctl |= (8 << 16); /* * When the internal queue falls below PTHRESH (32), * start prefetching as long as there are at least * HTHRESH (1) buffers ready. The values are taken * from the Intel linux driver 3.8.21. * Prefetching enables tx line rate even with 1 queue. */ txdctl |= (32 << 0) | (1 << 8); IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(txr->me), txdctl); } for (int i = 0, j = 0; i < adapter->num_queues; i++) { rxr = &adapter->rx_rings[i]; rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)); if (hw->mac.type == ixgbe_mac_82598EB) { /* * PTHRESH = 21 * HTHRESH = 4 * WTHRESH = 8 */ rxdctl &= ~0x3FFFFF; rxdctl |= 0x080420; } rxdctl |= IXGBE_RXDCTL_ENABLE; IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(rxr->me), rxdctl); for (; j < 10; j++) { if (IXGBE_READ_REG(hw, IXGBE_RXDCTL(rxr->me)) & IXGBE_RXDCTL_ENABLE) break; else msec_delay(1); } wmb(); /* * In netmap mode, we must preserve the buffers made * available to userspace before the if_init() * (this is true by default on the TX side, because * init makes all buffers available to userspace). * * netmap_reset() and the device specific routines * (e.g. ixgbe_setup_receive_rings()) map these * buffers at the end of the NIC ring, so here we * must set the RDT (tail) register to make sure * they are not overwritten. * * In this driver the NIC ring starts at RDH = 0, * RDT points to the last slot available for reception (?), * so RDT = num_rx_desc - 1 means the whole ring is available. */ #ifdef DEV_NETMAP if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && (ifp->if_capenable & IFCAP_NETMAP)) { struct netmap_adapter *na = NA(adapter->ifp); struct netmap_kring *kring = &na->rx_rings[i]; int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); IXGBE_WRITE_REG(hw, IXGBE_RDT(rxr->me), t); } else #endif /* DEV_NETMAP */ IXGBE_WRITE_REG(hw, IXGBE_RDT(rxr->me), adapter->num_rx_desc - 1); } /* Enable Receive engine */ rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); if (hw->mac.type == ixgbe_mac_82598EB) rxctrl |= IXGBE_RXCTRL_DMBYPS; rxctrl |= IXGBE_RXCTRL_RXEN; ixgbe_enable_rx_dma(hw, rxctrl); callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter); /* Set up MSI-X routing */ if (adapter->feat_en & IXGBE_FEATURE_MSIX) { ixgbe_configure_ivars(adapter); /* Set up auto-mask */ if (hw->mac.type == ixgbe_mac_82598EB) IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE); else { IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF); IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF); } } else { /* Simple settings for Legacy/MSI */ ixgbe_set_ivar(adapter, 0, 0, 0); ixgbe_set_ivar(adapter, 0, 0, 1); IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE); } ixgbe_init_fdir(adapter); /* * Check on any SFP devices that * need to be kick-started */ if (hw->phy.type == ixgbe_phy_none) { err = hw->phy.ops.identify(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module type was detected.\n"); return; } } /* Set moderation on the Link interrupt */ IXGBE_WRITE_REG(hw, IXGBE_EITR(adapter->vector), IXGBE_LINK_ITR); /* Config/Enable Link */ ixgbe_config_link(adapter); /* Hardware Packet Buffer & Flow Control setup */ ixgbe_config_delay_values(adapter); /* Initialize the FC settings */ ixgbe_start_hw(hw); /* Set up VLAN support and filter */ ixgbe_setup_vlan_hw_support(adapter); /* Setup DMA Coalescing */ ixgbe_config_dmac(adapter); /* And now turn on interrupts */ ixgbe_enable_intr(adapter); /* Enable the use of the MBX by the VF's */ if (adapter->feat_en & IXGBE_FEATURE_SRIOV) { ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); ctrl_ext |= IXGBE_CTRL_EXT_PFRSTD; IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); } /* Now inform the stack we're ready */ ifp->if_drv_flags |= IFF_DRV_RUNNING; return; } /* ixgbe_init_locked */ /************************************************************************ * ixgbe_init ************************************************************************/ static void ixgbe_init(void *arg) { struct adapter *adapter = arg; IXGBE_CORE_LOCK(adapter); ixgbe_init_locked(adapter); IXGBE_CORE_UNLOCK(adapter); return; } /* ixgbe_init */ /************************************************************************ * ixgbe_set_ivar * * Setup the correct IVAR register for a particular MSI-X interrupt * (yes this is all very magic and confusing :) * - entry is the register array entry * - vector is the MSI-X vector for this queue * - type is RX/TX/MISC ************************************************************************/ static void ixgbe_set_ivar(struct adapter *adapter, u8 entry, u8 vector, s8 type) { struct ixgbe_hw *hw = &adapter->hw; u32 ivar, index; vector |= IXGBE_IVAR_ALLOC_VAL; switch (hw->mac.type) { case ixgbe_mac_82598EB: if (type == -1) entry = IXGBE_IVAR_OTHER_CAUSES_INDEX; else entry += (type * 64); index = (entry >> 2) & 0x1F; ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index)); ivar &= ~(0xFF << (8 * (entry & 0x3))); ivar |= (vector << (8 * (entry & 0x3))); IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR(index), ivar); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: if (type == -1) { /* MISC IVAR */ index = (entry & 1) * 8; ivar = IXGBE_READ_REG(hw, IXGBE_IVAR_MISC); ivar &= ~(0xFF << index); ivar |= (vector << index); IXGBE_WRITE_REG(hw, IXGBE_IVAR_MISC, ivar); } else { /* RX/TX IVARS */ index = (16 * (entry & 1)) + (8 * type); ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(entry >> 1)); ivar &= ~(0xFF << index); ivar |= (vector << index); IXGBE_WRITE_REG(hw, IXGBE_IVAR(entry >> 1), ivar); } default: break; } } /* ixgbe_set_ivar */ /************************************************************************ * ixgbe_configure_ivars ************************************************************************/ static void ixgbe_configure_ivars(struct adapter *adapter) { struct ix_queue *que = adapter->queues; u32 newitr; if (ixgbe_max_interrupt_rate > 0) newitr = (4000000 / ixgbe_max_interrupt_rate) & 0x0FF8; else { /* * Disable DMA coalescing if interrupt moderation is * disabled. */ adapter->dmac = 0; newitr = 0; } for (int i = 0; i < adapter->num_queues; i++, que++) { struct rx_ring *rxr = &adapter->rx_rings[i]; struct tx_ring *txr = &adapter->tx_rings[i]; /* First the RX queue entry */ ixgbe_set_ivar(adapter, rxr->me, que->msix, 0); /* ... and the TX */ ixgbe_set_ivar(adapter, txr->me, que->msix, 1); /* Set an Initial EITR value */ IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(que->msix), newitr); } /* For the Link interrupt */ ixgbe_set_ivar(adapter, 1, adapter->vector, -1); } /* ixgbe_configure_ivars */ /************************************************************************ * ixgbe_config_gpie ************************************************************************/ static void ixgbe_config_gpie(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 gpie; gpie = IXGBE_READ_REG(hw, IXGBE_GPIE); if (adapter->feat_en & IXGBE_FEATURE_MSIX) { /* Enable Enhanced MSI-X mode */ gpie |= IXGBE_GPIE_MSIX_MODE | IXGBE_GPIE_EIAME | IXGBE_GPIE_PBA_SUPPORT | IXGBE_GPIE_OCD; } /* Fan Failure Interrupt */ if (adapter->feat_en & IXGBE_FEATURE_FAN_FAIL) gpie |= IXGBE_SDP1_GPIEN; /* Thermal Sensor Interrupt */ if (adapter->feat_en & IXGBE_FEATURE_TEMP_SENSOR) gpie |= IXGBE_SDP0_GPIEN_X540; /* Link detection */ switch (hw->mac.type) { case ixgbe_mac_82599EB: gpie |= IXGBE_SDP1_GPIEN | IXGBE_SDP2_GPIEN; break; case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: gpie |= IXGBE_SDP0_GPIEN_X540; break; default: break; } IXGBE_WRITE_REG(hw, IXGBE_GPIE, gpie); return; } /* ixgbe_config_gpie */ /************************************************************************ * ixgbe_config_delay_values * * Requires adapter->max_frame_size to be set. ************************************************************************/ static void ixgbe_config_delay_values(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; u32 rxpb, frame, size, tmp; frame = adapter->max_frame_size; /* Calculate High Water */ switch (hw->mac.type) { case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: tmp = IXGBE_DV_X540(frame, frame); break; default: tmp = IXGBE_DV(frame, frame); break; } size = IXGBE_BT2KB(tmp); rxpb = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(0)) >> 10; hw->fc.high_water[0] = rxpb - size; /* Now calculate Low Water */ switch (hw->mac.type) { case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: tmp = IXGBE_LOW_DV_X540(frame); break; default: tmp = IXGBE_LOW_DV(frame); break; } hw->fc.low_water[0] = IXGBE_BT2KB(tmp); hw->fc.pause_time = IXGBE_FC_PAUSE; hw->fc.send_xon = TRUE; } /* ixgbe_config_delay_values */ /************************************************************************ * ixgbe_set_multi - Multicast Update * * Called whenever multicast address list is updated. ************************************************************************/ static void ixgbe_set_multi(struct adapter *adapter) { struct ifmultiaddr *ifma; struct ixgbe_mc_addr *mta; struct ifnet *ifp = adapter->ifp; u8 *update_ptr; int mcnt = 0; u32 fctrl; IOCTL_DEBUGOUT("ixgbe_set_multi: begin"); mta = adapter->mta; bzero(mta, sizeof(*mta) * MAX_NUM_MULTICAST_ADDRESSES); #if __FreeBSD_version < 800000 IF_ADDR_LOCK(ifp); #else if_maddr_rlock(ifp); #endif TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_NUM_MULTICAST_ADDRESSES) break; bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), mta[mcnt].addr, IXGBE_ETH_LENGTH_OF_ADDRESS); mta[mcnt].vmdq = adapter->pool; mcnt++; } #if __FreeBSD_version < 800000 IF_ADDR_UNLOCK(ifp); #else if_maddr_runlock(ifp); #endif fctrl = IXGBE_READ_REG(&adapter->hw, IXGBE_FCTRL); fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); if (ifp->if_flags & IFF_PROMISC) fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); else if (mcnt >= MAX_NUM_MULTICAST_ADDRESSES || ifp->if_flags & IFF_ALLMULTI) { fctrl |= IXGBE_FCTRL_MPE; fctrl &= ~IXGBE_FCTRL_UPE; } else fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); IXGBE_WRITE_REG(&adapter->hw, IXGBE_FCTRL, fctrl); if (mcnt < MAX_NUM_MULTICAST_ADDRESSES) { update_ptr = (u8 *)mta; ixgbe_update_mc_addr_list(&adapter->hw, update_ptr, mcnt, ixgbe_mc_array_itr, TRUE); } return; } /* ixgbe_set_multi */ /************************************************************************ * ixgbe_mc_array_itr * * An iterator function needed by the multicast shared code. * It feeds the shared code routine the addresses in the * array of ixgbe_set_multi() one by one. ************************************************************************/ static u8 * ixgbe_mc_array_itr(struct ixgbe_hw *hw, u8 **update_ptr, u32 *vmdq) { struct ixgbe_mc_addr *mta; mta = (struct ixgbe_mc_addr *)*update_ptr; *vmdq = mta->vmdq; *update_ptr = (u8*)(mta + 1); return (mta->addr); } /* ixgbe_mc_array_itr */ /************************************************************************ * ixgbe_local_timer - Timer routine * * Checks for link status, updates statistics, * and runs the watchdog check. ************************************************************************/ static void ixgbe_local_timer(void *arg) { struct adapter *adapter = arg; device_t dev = adapter->dev; struct ix_queue *que = adapter->queues; u64 queues = 0; int hung = 0; mtx_assert(&adapter->core_mtx, MA_OWNED); /* Check for pluggable optics */ if (adapter->sfp_probe) if (!ixgbe_sfp_probe(adapter)) goto out; /* Nothing to do */ ixgbe_update_link_status(adapter); ixgbe_update_stats_counters(adapter); /* * Check the TX queues status * - mark hung queues so we don't schedule on them * - watchdog only if all queues show hung */ for (int i = 0; i < adapter->num_queues; i++, que++) { /* Keep track of queues with work for soft irq */ if (que->txr->busy) queues |= ((u64)1 << que->me); /* * Each time txeof runs without cleaning, but there * are uncleaned descriptors it increments busy. If * we get to the MAX we declare it hung. */ if (que->busy == IXGBE_QUEUE_HUNG) { ++hung; /* Mark the queue as inactive */ adapter->active_queues &= ~((u64)1 << que->me); continue; } else { /* Check if we've come back from hung */ if ((adapter->active_queues & ((u64)1 << que->me)) == 0) adapter->active_queues |= ((u64)1 << que->me); } if (que->busy >= IXGBE_MAX_TX_BUSY) { device_printf(dev, "Warning queue %d appears to be hung!\n", i); que->txr->busy = IXGBE_QUEUE_HUNG; ++hung; } } /* Only truly watchdog if all queues show hung */ if (hung == adapter->num_queues) goto watchdog; else if (queues != 0) { /* Force an IRQ on queues with work */ ixgbe_rearm_queues(adapter, queues); } out: callout_reset(&adapter->timer, hz, ixgbe_local_timer, adapter); return; watchdog: device_printf(adapter->dev, "Watchdog timeout -- resetting\n"); adapter->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; adapter->watchdog_events++; ixgbe_init_locked(adapter); } /* ixgbe_local_timer */ /************************************************************************ * ixgbe_sfp_probe * * Determine if a port had optics inserted. ************************************************************************/ static bool ixgbe_sfp_probe(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; device_t dev = adapter->dev; bool result = FALSE; if ((hw->phy.type == ixgbe_phy_nl) && (hw->phy.sfp_type == ixgbe_sfp_type_not_present)) { s32 ret = hw->phy.ops.identify_sfp(hw); if (ret) goto out; ret = hw->phy.ops.reset(hw); adapter->sfp_probe = FALSE; if (ret == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module detected!"); device_printf(dev, "Reload driver with supported module.\n"); goto out; } else device_printf(dev, "SFP+ module detected!\n"); /* We now have supported optics */ result = TRUE; } out: return (result); } /* ixgbe_sfp_probe */ /************************************************************************ * ixgbe_handle_mod - Tasklet for SFP module interrupts ************************************************************************/ static void ixgbe_handle_mod(void *context, int pending) { struct adapter *adapter = context; struct ixgbe_hw *hw = &adapter->hw; device_t dev = adapter->dev; u32 err, cage_full = 0; if (adapter->hw.need_crosstalk_fix) { switch (hw->mac.type) { case ixgbe_mac_82599EB: cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) & IXGBE_ESDP_SDP2; break; case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: cage_full = IXGBE_READ_REG(hw, IXGBE_ESDP) & IXGBE_ESDP_SDP0; break; default: break; } if (!cage_full) return; } err = hw->phy.ops.identify_sfp(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Unsupported SFP+ module type was detected.\n"); return; } err = hw->mac.ops.setup_sfp(hw); if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { device_printf(dev, "Setup failure - unsupported SFP+ module type.\n"); return; } taskqueue_enqueue(adapter->tq, &adapter->msf_task); } /* ixgbe_handle_mod */ /************************************************************************ * ixgbe_handle_msf - Tasklet for MSF (multispeed fiber) interrupts ************************************************************************/ static void ixgbe_handle_msf(void *context, int pending) { struct adapter *adapter = context; struct ixgbe_hw *hw = &adapter->hw; u32 autoneg; bool negotiate; /* get_supported_phy_layer will call hw->phy.ops.identify_sfp() */ adapter->phy_layer = ixgbe_get_supported_physical_layer(hw); autoneg = hw->phy.autoneg_advertised; if ((!autoneg) && (hw->mac.ops.get_link_capabilities)) hw->mac.ops.get_link_capabilities(hw, &autoneg, &negotiate); if (hw->mac.ops.setup_link) hw->mac.ops.setup_link(hw, autoneg, TRUE); /* Adjust media types shown in ifconfig */ ifmedia_removeall(&adapter->media); ixgbe_add_media_types(adapter); ifmedia_set(&adapter->media, IFM_ETHER | IFM_AUTO); } /* ixgbe_handle_msf */ /************************************************************************ * ixgbe_handle_phy - Tasklet for external PHY interrupts ************************************************************************/ static void ixgbe_handle_phy(void *context, int pending) { struct adapter *adapter = context; struct ixgbe_hw *hw = &adapter->hw; int error; error = hw->phy.ops.handle_lasi(hw); if (error == IXGBE_ERR_OVERTEMP) device_printf(adapter->dev, "CRITICAL: EXTERNAL PHY OVER TEMP!! PHY will downshift to lower power state!\n"); else if (error) device_printf(adapter->dev, "Error handling LASI interrupt: %d\n", error); } /* ixgbe_handle_phy */ /************************************************************************ * ixgbe_stop - Stop the hardware * * Disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. ************************************************************************/ static void ixgbe_stop(void *arg) { struct ifnet *ifp; struct adapter *adapter = arg; struct ixgbe_hw *hw = &adapter->hw; ifp = adapter->ifp; mtx_assert(&adapter->core_mtx, MA_OWNED); INIT_DEBUGOUT("ixgbe_stop: begin\n"); ixgbe_disable_intr(adapter); callout_stop(&adapter->timer); /* Let the stack know...*/ ifp->if_drv_flags &= ~IFF_DRV_RUNNING; ixgbe_reset_hw(hw); hw->adapter_stopped = FALSE; ixgbe_stop_adapter(hw); if (hw->mac.type == ixgbe_mac_82599EB) ixgbe_stop_mac_link_on_d3_82599(hw); /* Turn off the laser - noop with no optics */ ixgbe_disable_tx_laser(hw); /* Update the stack */ adapter->link_up = FALSE; ixgbe_update_link_status(adapter); /* reprogram the RAR[0] in case user changed it. */ ixgbe_set_rar(&adapter->hw, 0, adapter->hw.mac.addr, 0, IXGBE_RAH_AV); return; } /* ixgbe_stop */ /************************************************************************ * ixgbe_update_link_status - Update OS on link state * * Note: Only updates the OS on the cached link state. * The real check of the hardware only happens with * a link interrupt. ************************************************************************/ static void ixgbe_update_link_status(struct adapter *adapter) { struct ifnet *ifp = adapter->ifp; device_t dev = adapter->dev; if (adapter->link_up) { if (adapter->link_active == FALSE) { if (bootverbose) device_printf(dev, "Link is up %d Gbps %s \n", ((adapter->link_speed == 128) ? 10 : 1), "Full Duplex"); adapter->link_active = TRUE; /* Update any Flow Control changes */ ixgbe_fc_enable(&adapter->hw); /* Update DMA coalescing config */ ixgbe_config_dmac(adapter); if_link_state_change(ifp, LINK_STATE_UP); if (adapter->feat_en & IXGBE_FEATURE_SRIOV) ixgbe_ping_all_vfs(adapter); } } else { /* Link down */ if (adapter->link_active == TRUE) { if (bootverbose) device_printf(dev, "Link is Down\n"); if_link_state_change(ifp, LINK_STATE_DOWN); adapter->link_active = FALSE; if (adapter->feat_en & IXGBE_FEATURE_SRIOV) ixgbe_ping_all_vfs(adapter); } } return; } /* ixgbe_update_link_status */ /************************************************************************ * ixgbe_config_dmac - Configure DMA Coalescing ************************************************************************/ static void ixgbe_config_dmac(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct ixgbe_dmac_config *dcfg = &hw->mac.dmac_config; if (hw->mac.type < ixgbe_mac_X550 || !hw->mac.ops.dmac_config) return; if (dcfg->watchdog_timer ^ adapter->dmac || dcfg->link_speed ^ adapter->link_speed) { dcfg->watchdog_timer = adapter->dmac; dcfg->fcoe_en = false; dcfg->link_speed = adapter->link_speed; dcfg->num_tcs = 1; INIT_DEBUGOUT2("dmac settings: watchdog %d, link speed %d\n", dcfg->watchdog_timer, dcfg->link_speed); hw->mac.ops.dmac_config(hw); } } /* ixgbe_config_dmac */ /************************************************************************ * ixgbe_enable_intr ************************************************************************/ static void ixgbe_enable_intr(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct ix_queue *que = adapter->queues; u32 mask, fwsm; mask = (IXGBE_EIMS_ENABLE_MASK & ~IXGBE_EIMS_RTX_QUEUE); switch (adapter->hw.mac.type) { case ixgbe_mac_82599EB: mask |= IXGBE_EIMS_ECC; /* Temperature sensor on some adapters */ mask |= IXGBE_EIMS_GPI_SDP0; /* SFP+ (RX_LOS_N & MOD_ABS_N) */ mask |= IXGBE_EIMS_GPI_SDP1; mask |= IXGBE_EIMS_GPI_SDP2; break; case ixgbe_mac_X540: /* Detect if Thermal Sensor is enabled */ fwsm = IXGBE_READ_REG(hw, IXGBE_FWSM); if (fwsm & IXGBE_FWSM_TS_ENABLED) mask |= IXGBE_EIMS_TS; mask |= IXGBE_EIMS_ECC; break; case ixgbe_mac_X550: /* MAC thermal sensor is automatically enabled */ mask |= IXGBE_EIMS_TS; mask |= IXGBE_EIMS_ECC; break; case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: /* Some devices use SDP0 for important information */ if (hw->device_id == IXGBE_DEV_ID_X550EM_X_SFP || hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP || hw->device_id == IXGBE_DEV_ID_X550EM_A_SFP_N || hw->device_id == IXGBE_DEV_ID_X550EM_X_10G_T) mask |= IXGBE_EIMS_GPI_SDP0_BY_MAC(hw); if (hw->phy.type == ixgbe_phy_x550em_ext_t) mask |= IXGBE_EICR_GPI_SDP0_X540; mask |= IXGBE_EIMS_ECC; break; default: break; } /* Enable Fan Failure detection */ if (adapter->feat_en & IXGBE_FEATURE_FAN_FAIL) mask |= IXGBE_EIMS_GPI_SDP1; /* Enable SR-IOV */ if (adapter->feat_en & IXGBE_FEATURE_SRIOV) mask |= IXGBE_EIMS_MAILBOX; /* Enable Flow Director */ if (adapter->feat_en & IXGBE_FEATURE_FDIR) mask |= IXGBE_EIMS_FLOW_DIR; IXGBE_WRITE_REG(hw, IXGBE_EIMS, mask); /* With MSI-X we use auto clear */ if (adapter->msix_mem) { mask = IXGBE_EIMS_ENABLE_MASK; /* Don't autoclear Link */ mask &= ~IXGBE_EIMS_OTHER; mask &= ~IXGBE_EIMS_LSC; if (adapter->feat_cap & IXGBE_FEATURE_SRIOV) mask &= ~IXGBE_EIMS_MAILBOX; IXGBE_WRITE_REG(hw, IXGBE_EIAC, mask); } /* * Now enable all queues, this is done separately to * allow for handling the extended (beyond 32) MSI-X * vectors that can be used by 82599 */ for (int i = 0; i < adapter->num_queues; i++, que++) ixgbe_enable_queue(adapter, que->msix); IXGBE_WRITE_FLUSH(hw); return; } /* ixgbe_enable_intr */ /************************************************************************ * ixgbe_disable_intr ************************************************************************/ static void ixgbe_disable_intr(struct adapter *adapter) { if (adapter->msix_mem) IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, 0); if (adapter->hw.mac.type == ixgbe_mac_82598EB) { IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0); } else { IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0); } IXGBE_WRITE_FLUSH(&adapter->hw); return; } /* ixgbe_disable_intr */ /************************************************************************ * ixgbe_legacy_irq - Legacy Interrupt Service routine ************************************************************************/ static void ixgbe_legacy_irq(void *arg) { struct ix_queue *que = arg; struct adapter *adapter = que->adapter; struct ixgbe_hw *hw = &adapter->hw; struct ifnet *ifp = adapter->ifp; struct tx_ring *txr = adapter->tx_rings; bool more = false; u32 eicr, eicr_mask; /* Silicon errata #26 on 82598 */ IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_IRQ_CLEAR_MASK); eicr = IXGBE_READ_REG(hw, IXGBE_EICR); ++que->irqs; if (eicr == 0) { ixgbe_enable_intr(adapter); return; } if (ifp->if_drv_flags & IFF_DRV_RUNNING) { more = ixgbe_rxeof(que); IXGBE_TX_LOCK(txr); ixgbe_txeof(txr); if (!ixgbe_ring_empty(ifp, txr->br)) ixgbe_start_locked(ifp, txr); IXGBE_TX_UNLOCK(txr); } /* Check for fan failure */ if (adapter->feat_en & IXGBE_FEATURE_FAN_FAIL) { ixgbe_check_fan_failure(adapter, eicr, true); IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); } /* Link status change */ if (eicr & IXGBE_EICR_LSC) taskqueue_enqueue(adapter->tq, &adapter->link_task); if (ixgbe_is_sfp(hw)) { /* Pluggable optics-related interrupt */ if (hw->mac.type >= ixgbe_mac_X540) eicr_mask = IXGBE_EICR_GPI_SDP0_X540; else eicr_mask = IXGBE_EICR_GPI_SDP2_BY_MAC(hw); if (eicr & eicr_mask) { IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr_mask); taskqueue_enqueue(adapter->tq, &adapter->mod_task); } if ((hw->mac.type == ixgbe_mac_82599EB) && (eicr & IXGBE_EICR_GPI_SDP1_BY_MAC(hw))) { IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_GPI_SDP1_BY_MAC(hw)); taskqueue_enqueue(adapter->tq, &adapter->msf_task); } } /* External PHY interrupt */ if ((hw->phy.type == ixgbe_phy_x550em_ext_t) && (eicr & IXGBE_EICR_GPI_SDP0_X540)) taskqueue_enqueue(adapter->tq, &adapter->phy_task); if (more) taskqueue_enqueue(que->tq, &que->que_task); else ixgbe_enable_intr(adapter); return; } /* ixgbe_legacy_irq */ /************************************************************************ * ixgbe_free_pci_resources ************************************************************************/ static void ixgbe_free_pci_resources(struct adapter *adapter) { struct ix_queue *que = adapter->queues; device_t dev = adapter->dev; int rid, memrid; if (adapter->hw.mac.type == ixgbe_mac_82598EB) memrid = PCIR_BAR(MSIX_82598_BAR); else memrid = PCIR_BAR(MSIX_82599_BAR); /* * There is a slight possibility of a failure mode * in attach that will result in entering this function * before interrupt resources have been initialized, and * in that case we do not want to execute the loops below * We can detect this reliably by the state of the adapter * res pointer. */ if (adapter->res == NULL) goto mem; /* * Release all msix queue resources: */ for (int i = 0; i < adapter->num_queues; i++, que++) { rid = que->msix + 1; if (que->tag != NULL) { bus_teardown_intr(dev, que->res, que->tag); que->tag = NULL; } if (que->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); } if (adapter->tag != NULL) { bus_teardown_intr(dev, adapter->res, adapter->tag); adapter->tag = NULL; } /* Clean the Legacy or Link interrupt last */ if (adapter->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, adapter->link_rid, adapter->res); mem: if ((adapter->feat_en & IXGBE_FEATURE_MSI) || (adapter->feat_en & IXGBE_FEATURE_MSIX)) pci_release_msi(dev); if (adapter->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, memrid, adapter->msix_mem); if (adapter->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), adapter->pci_mem); return; } /* ixgbe_free_pci_resources */ /************************************************************************ * ixgbe_set_sysctl_value ************************************************************************/ static void ixgbe_set_sysctl_value(struct adapter *adapter, const char *name, const char *description, int *limit, int value) { *limit = value; SYSCTL_ADD_INT(device_get_sysctl_ctx(adapter->dev), SYSCTL_CHILDREN(device_get_sysctl_tree(adapter->dev)), OID_AUTO, name, CTLFLAG_RW, limit, value, description); } /* ixgbe_set_sysctl_value */ /************************************************************************ * ixgbe_sysctl_flowcntl * * SYSCTL wrapper around setting Flow Control ************************************************************************/ static int ixgbe_sysctl_flowcntl(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error, fc; adapter = (struct adapter *)arg1; fc = adapter->hw.fc.current_mode; error = sysctl_handle_int(oidp, &fc, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Don't bother if it's not changed */ if (fc == adapter->hw.fc.current_mode) return (0); return ixgbe_set_flowcntl(adapter, fc); } /* ixgbe_sysctl_flowcntl */ /************************************************************************ * ixgbe_set_flowcntl - Set flow control * * Flow control values: * 0 - off * 1 - rx pause * 2 - tx pause * 3 - full ************************************************************************/ static int ixgbe_set_flowcntl(struct adapter *adapter, int fc) { switch (fc) { case ixgbe_fc_rx_pause: case ixgbe_fc_tx_pause: case ixgbe_fc_full: adapter->hw.fc.requested_mode = fc; if (adapter->num_queues > 1) ixgbe_disable_rx_drop(adapter); break; case ixgbe_fc_none: adapter->hw.fc.requested_mode = ixgbe_fc_none; if (adapter->num_queues > 1) ixgbe_enable_rx_drop(adapter); break; default: return (EINVAL); } /* Don't autoneg if forcing a value */ adapter->hw.fc.disable_fc_autoneg = TRUE; ixgbe_fc_enable(&adapter->hw); return (0); } /* ixgbe_set_flowcntl */ /************************************************************************ * ixgbe_enable_rx_drop * * Enable the hardware to drop packets when the buffer is * full. This is useful with multiqueue, so that no single * queue being full stalls the entire RX engine. We only * enable this when Multiqueue is enabled AND Flow Control * is disabled. ************************************************************************/ static void ixgbe_enable_rx_drop(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct rx_ring *rxr; u32 srrctl; for (int i = 0; i < adapter->num_queues; i++) { rxr = &adapter->rx_rings[i]; srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me)); srrctl |= IXGBE_SRRCTL_DROP_EN; IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl); } /* enable drop for each vf */ for (int i = 0; i < adapter->num_vfs; i++) { IXGBE_WRITE_REG(hw, IXGBE_QDE, (IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT) | IXGBE_QDE_ENABLE)); } } /* ixgbe_enable_rx_drop */ /************************************************************************ * ixgbe_disable_rx_drop ************************************************************************/ static void ixgbe_disable_rx_drop(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct rx_ring *rxr; u32 srrctl; for (int i = 0; i < adapter->num_queues; i++) { rxr = &adapter->rx_rings[i]; srrctl = IXGBE_READ_REG(hw, IXGBE_SRRCTL(rxr->me)); srrctl &= ~IXGBE_SRRCTL_DROP_EN; IXGBE_WRITE_REG(hw, IXGBE_SRRCTL(rxr->me), srrctl); } /* disable drop for each vf */ for (int i = 0; i < adapter->num_vfs; i++) { IXGBE_WRITE_REG(hw, IXGBE_QDE, (IXGBE_QDE_WRITE | (i << IXGBE_QDE_IDX_SHIFT))); } } /* ixgbe_disable_rx_drop */ /************************************************************************ * ixgbe_sysctl_advertise * * SYSCTL wrapper around setting advertised speed ************************************************************************/ static int ixgbe_sysctl_advertise(SYSCTL_HANDLER_ARGS) { struct adapter *adapter; int error, advertise; adapter = (struct adapter *)arg1; advertise = adapter->advertise; error = sysctl_handle_int(oidp, &advertise, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixgbe_set_advertise(adapter, advertise); } /* ixgbe_sysctl_advertise */ /************************************************************************ * ixgbe_set_advertise - Control advertised link speed * * Flags: * 0x1 - advertise 100 Mb * 0x2 - advertise 1G * 0x4 - advertise 10G * 0x8 - advertise 10 Mb (yes, Mb) ************************************************************************/ static int ixgbe_set_advertise(struct adapter *adapter, int advertise) { device_t dev; struct ixgbe_hw *hw; ixgbe_link_speed speed = 0; ixgbe_link_speed link_caps = 0; s32 err = IXGBE_NOT_IMPLEMENTED; bool negotiate = FALSE; /* Checks to validate new value */ if (adapter->advertise == advertise) /* no change */ return (0); dev = adapter->dev; hw = &adapter->hw; /* No speed changes for backplane media */ if (hw->phy.media_type == ixgbe_media_type_backplane) return (ENODEV); if (!((hw->phy.media_type == ixgbe_media_type_copper) || (hw->phy.multispeed_fiber))) { device_printf(dev, "Advertised speed can only be set on copper or multispeed fiber media types.\n"); return (EINVAL); } if (advertise < 0x1 || advertise > 0xF) { device_printf(dev, "Invalid advertised speed; valid modes are 0x1 through 0xF\n"); return (EINVAL); } if (hw->mac.ops.get_link_capabilities) { err = hw->mac.ops.get_link_capabilities(hw, &link_caps, &negotiate); if (err != IXGBE_SUCCESS) { device_printf(dev, "Unable to determine supported advertise speeds\n"); return (ENODEV); } } /* Set new value and report new advertised mode */ if (advertise & 0x1) { if (!(link_caps & IXGBE_LINK_SPEED_100_FULL)) { device_printf(dev, "Interface does not support 100Mb advertised speed\n"); return (EINVAL); } speed |= IXGBE_LINK_SPEED_100_FULL; } if (advertise & 0x2) { if (!(link_caps & IXGBE_LINK_SPEED_1GB_FULL)) { device_printf(dev, "Interface does not support 1Gb advertised speed\n"); return (EINVAL); } speed |= IXGBE_LINK_SPEED_1GB_FULL; } if (advertise & 0x4) { if (!(link_caps & IXGBE_LINK_SPEED_10GB_FULL)) { device_printf(dev, "Interface does not support 10Gb advertised speed\n"); return (EINVAL); } speed |= IXGBE_LINK_SPEED_10GB_FULL; } if (advertise & 0x8) { if (!(link_caps & IXGBE_LINK_SPEED_10_FULL)) { device_printf(dev, "Interface does not support 10Mb advertised speed\n"); return (EINVAL); } speed |= IXGBE_LINK_SPEED_10_FULL; } hw->mac.autotry_restart = TRUE; hw->mac.ops.setup_link(hw, speed, TRUE); adapter->advertise = advertise; return (0); } /* ixgbe_set_advertise */ /************************************************************************ * ixgbe_get_advertise - Get current advertised speed settings * * Formatted for sysctl usage. * Flags: * 0x1 - advertise 100 Mb * 0x2 - advertise 1G * 0x4 - advertise 10G * 0x8 - advertise 10 Mb (yes, Mb) ************************************************************************/ static int ixgbe_get_advertise(struct adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; int speed; ixgbe_link_speed link_caps = 0; s32 err; bool negotiate = FALSE; /* * Advertised speed means nothing unless it's copper or * multi-speed fiber */ if (!(hw->phy.media_type == ixgbe_media_type_copper) && !(hw->phy.multispeed_fiber)) return (0); err = hw->mac.ops.get_link_capabilities(hw, &link_caps, &negotiate); if (err != IXGBE_SUCCESS) return (0); speed = ((link_caps & IXGBE_LINK_SPEED_10GB_FULL) ? 4 : 0) | ((link_caps & IXGBE_LINK_SPEED_1GB_FULL) ? 2 : 0) | ((link_caps & IXGBE_LINK_SPEED_100_FULL) ? 1 : 0) | ((link_caps & IXGBE_LINK_SPEED_10_FULL) ? 8 : 0); return speed; } /* ixgbe_get_advertise */ /************************************************************************ * ixgbe_sysctl_dmac - Manage DMA Coalescing * * Control values: * 0/1 - off / on (use default value of 1000) * * Legal timer values are: * 50,100,250,500,1000,2000,5000,10000 * * Turning off interrupt moderation will also turn this off. ************************************************************************/ static int ixgbe_sysctl_dmac(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; struct ifnet *ifp = adapter->ifp; int error; u32 newval; newval = adapter->dmac; error = sysctl_handle_int(oidp, &newval, 0, req); if ((error) || (req->newptr == NULL)) return (error); switch (newval) { case 0: /* Disabled */ adapter->dmac = 0; break; case 1: /* Enable and use default */ adapter->dmac = 1000; break; case 50: case 100: case 250: case 500: case 1000: case 2000: case 5000: case 10000: /* Legal values - allow */ adapter->dmac = newval; break; default: /* Do nothing, illegal value */ return (EINVAL); } /* Re-initialize hardware if it's already running */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) ixgbe_init(adapter); return (0); } /* ixgbe_sysctl_dmac */ #ifdef IXGBE_DEBUG /************************************************************************ * ixgbe_sysctl_power_state * * Sysctl to test power states * Values: * 0 - set device to D0 * 3 - set device to D3 * (none) - get current device power state ************************************************************************/ static int ixgbe_sysctl_power_state(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; device_t dev = adapter->dev; int curr_ps, new_ps, error = 0; curr_ps = new_ps = pci_get_powerstate(dev); error = sysctl_handle_int(oidp, &new_ps, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (new_ps == curr_ps) return (0); if (new_ps == 3 && curr_ps == 0) error = DEVICE_SUSPEND(dev); else if (new_ps == 0 && curr_ps == 3) error = DEVICE_RESUME(dev); else return (EINVAL); device_printf(dev, "New state: %d\n", pci_get_powerstate(dev)); return (error); } /* ixgbe_sysctl_power_state */ #endif /************************************************************************ * ixgbe_sysctl_wol_enable * * Sysctl to enable/disable the WoL capability, * if supported by the adapter. * * Values: * 0 - disabled * 1 - enabled ************************************************************************/ static int ixgbe_sysctl_wol_enable(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; struct ixgbe_hw *hw = &adapter->hw; int new_wol_enabled; int error = 0; new_wol_enabled = hw->wol_enabled; error = sysctl_handle_int(oidp, &new_wol_enabled, 0, req); if ((error) || (req->newptr == NULL)) return (error); new_wol_enabled = !!(new_wol_enabled); if (new_wol_enabled == hw->wol_enabled) return (0); if (new_wol_enabled > 0 && !adapter->wol_support) return (ENODEV); else hw->wol_enabled = new_wol_enabled; return (0); } /* ixgbe_sysctl_wol_enable */ /************************************************************************ * ixgbe_sysctl_wufc - Wake Up Filter Control * * Sysctl to enable/disable the types of packets that the * adapter will wake up on upon receipt. * Flags: * 0x1 - Link Status Change * 0x2 - Magic Packet * 0x4 - Direct Exact * 0x8 - Directed Multicast * 0x10 - Broadcast * 0x20 - ARP/IPv4 Request Packet * 0x40 - Direct IPv4 Packet * 0x80 - Direct IPv6 Packet * * Settings not listed above will cause the sysctl to return an error. ************************************************************************/ static int ixgbe_sysctl_wufc(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; int error = 0; u32 new_wufc; new_wufc = adapter->wufc; error = sysctl_handle_int(oidp, &new_wufc, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (new_wufc == adapter->wufc) return (0); if (new_wufc & 0xffffff00) return (EINVAL); new_wufc &= 0xff; new_wufc |= (0xffffff & adapter->wufc); adapter->wufc = new_wufc; return (0); } /* ixgbe_sysctl_wufc */ #ifdef IXGBE_DEBUG /************************************************************************ * ixgbe_sysctl_print_rss_config ************************************************************************/ static int ixgbe_sysctl_print_rss_config(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; struct ixgbe_hw *hw = &adapter->hw; device_t dev = adapter->dev; struct sbuf *buf; int error = 0, reta_size; u32 reg; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } // TODO: use sbufs to make a string to print out /* Set multiplier for RETA setup and table size based on MAC */ switch (adapter->hw.mac.type) { case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: reta_size = 128; break; default: reta_size = 32; break; } /* Print out the redirection table */ sbuf_cat(buf, "\n"); for (int i = 0; i < reta_size; i++) { if (i < 32) { reg = IXGBE_READ_REG(hw, IXGBE_RETA(i)); sbuf_printf(buf, "RETA(%2d): 0x%08x\n", i, reg); } else { reg = IXGBE_READ_REG(hw, IXGBE_ERETA(i - 32)); sbuf_printf(buf, "ERETA(%2d): 0x%08x\n", i - 32, reg); } } // TODO: print more config error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (0); } /* ixgbe_sysctl_print_rss_config */ #endif /* IXGBE_DEBUG */ /************************************************************************ * ixgbe_sysctl_phy_temp - Retrieve temperature of PHY * * For X552/X557-AT devices using an external PHY ************************************************************************/ static int ixgbe_sysctl_phy_temp(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; struct ixgbe_hw *hw = &adapter->hw; u16 reg; if (hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) { device_printf(adapter->dev, "Device has no supported external thermal sensor.\n"); return (ENODEV); } if (hw->phy.ops.read_reg(hw, IXGBE_PHY_CURRENT_TEMP, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®)) { device_printf(adapter->dev, "Error reading from PHY's current temperature register\n"); return (EAGAIN); } /* Shift temp for output */ reg = reg >> 8; return (sysctl_handle_int(oidp, NULL, reg, req)); } /* ixgbe_sysctl_phy_temp */ /************************************************************************ * ixgbe_sysctl_phy_overtemp_occurred * * Reports (directly from the PHY) whether the current PHY * temperature is over the overtemp threshold. ************************************************************************/ static int ixgbe_sysctl_phy_overtemp_occurred(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; struct ixgbe_hw *hw = &adapter->hw; u16 reg; if (hw->device_id != IXGBE_DEV_ID_X550EM_X_10G_T) { device_printf(adapter->dev, "Device has no supported external thermal sensor.\n"); return (ENODEV); } if (hw->phy.ops.read_reg(hw, IXGBE_PHY_OVERTEMP_STATUS, IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, ®)) { device_printf(adapter->dev, "Error reading from PHY's temperature status register\n"); return (EAGAIN); } /* Get occurrence bit */ reg = !!(reg & 0x4000); return (sysctl_handle_int(oidp, 0, reg, req)); } /* ixgbe_sysctl_phy_overtemp_occurred */ /************************************************************************ * ixgbe_sysctl_eee_state * * Sysctl to set EEE power saving feature * Values: * 0 - disable EEE * 1 - enable EEE * (none) - get current device EEE state ************************************************************************/ static int ixgbe_sysctl_eee_state(SYSCTL_HANDLER_ARGS) { struct adapter *adapter = (struct adapter *)arg1; device_t dev = adapter->dev; int curr_eee, new_eee, error = 0; s32 retval; curr_eee = new_eee = !!(adapter->feat_en & IXGBE_FEATURE_EEE); error = sysctl_handle_int(oidp, &new_eee, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Nothing to do */ if (new_eee == curr_eee) return (0); /* Not supported */ if (!(adapter->feat_cap & IXGBE_FEATURE_EEE)) return (EINVAL); /* Bounds checking */ if ((new_eee < 0) || (new_eee > 1)) return (EINVAL); retval = adapter->hw.mac.ops.setup_eee(&adapter->hw, new_eee); if (retval) { device_printf(dev, "Error in EEE setup: 0x%08X\n", retval); return (EINVAL); } /* Restart auto-neg */ ixgbe_init(adapter); device_printf(dev, "New EEE state: %d\n", new_eee); /* Cache new value */ if (new_eee) adapter->feat_en |= IXGBE_FEATURE_EEE; else adapter->feat_en &= ~IXGBE_FEATURE_EEE; return (error); } /* ixgbe_sysctl_eee_state */ /************************************************************************ * ixgbe_init_device_features ************************************************************************/ static void ixgbe_init_device_features(struct adapter *adapter) { adapter->feat_cap = IXGBE_FEATURE_NETMAP | IXGBE_FEATURE_RSS | IXGBE_FEATURE_MSI | IXGBE_FEATURE_MSIX | IXGBE_FEATURE_LEGACY_IRQ | IXGBE_FEATURE_LEGACY_TX; /* Set capabilities first... */ switch (adapter->hw.mac.type) { case ixgbe_mac_82598EB: if (adapter->hw.device_id == IXGBE_DEV_ID_82598AT) adapter->feat_cap |= IXGBE_FEATURE_FAN_FAIL; break; case ixgbe_mac_X540: adapter->feat_cap |= IXGBE_FEATURE_SRIOV; adapter->feat_cap |= IXGBE_FEATURE_FDIR; if ((adapter->hw.device_id == IXGBE_DEV_ID_X540_BYPASS) && (adapter->hw.bus.func == 0)) adapter->feat_cap |= IXGBE_FEATURE_BYPASS; break; case ixgbe_mac_X550: adapter->feat_cap |= IXGBE_FEATURE_TEMP_SENSOR; adapter->feat_cap |= IXGBE_FEATURE_SRIOV; adapter->feat_cap |= IXGBE_FEATURE_FDIR; break; case ixgbe_mac_X550EM_x: adapter->feat_cap |= IXGBE_FEATURE_SRIOV; adapter->feat_cap |= IXGBE_FEATURE_FDIR; if (adapter->hw.device_id == IXGBE_DEV_ID_X550EM_X_KR) adapter->feat_cap |= IXGBE_FEATURE_EEE; break; case ixgbe_mac_X550EM_a: adapter->feat_cap |= IXGBE_FEATURE_SRIOV; adapter->feat_cap |= IXGBE_FEATURE_FDIR; adapter->feat_cap &= ~IXGBE_FEATURE_LEGACY_IRQ; if ((adapter->hw.device_id == IXGBE_DEV_ID_X550EM_A_1G_T) || (adapter->hw.device_id == IXGBE_DEV_ID_X550EM_A_1G_T_L)) { adapter->feat_cap |= IXGBE_FEATURE_TEMP_SENSOR; adapter->feat_cap |= IXGBE_FEATURE_EEE; } break; case ixgbe_mac_82599EB: adapter->feat_cap |= IXGBE_FEATURE_SRIOV; adapter->feat_cap |= IXGBE_FEATURE_FDIR; if ((adapter->hw.device_id == IXGBE_DEV_ID_82599_BYPASS) && (adapter->hw.bus.func == 0)) adapter->feat_cap |= IXGBE_FEATURE_BYPASS; if (adapter->hw.device_id == IXGBE_DEV_ID_82599_QSFP_SF_QP) adapter->feat_cap &= ~IXGBE_FEATURE_LEGACY_IRQ; break; default: break; } /* Enabled by default... */ /* Fan failure detection */ if (adapter->feat_cap & IXGBE_FEATURE_FAN_FAIL) adapter->feat_en |= IXGBE_FEATURE_FAN_FAIL; /* Netmap */ if (adapter->feat_cap & IXGBE_FEATURE_NETMAP) adapter->feat_en |= IXGBE_FEATURE_NETMAP; /* EEE */ if (adapter->feat_cap & IXGBE_FEATURE_EEE) adapter->feat_en |= IXGBE_FEATURE_EEE; /* Thermal Sensor */ if (adapter->feat_cap & IXGBE_FEATURE_TEMP_SENSOR) adapter->feat_en |= IXGBE_FEATURE_TEMP_SENSOR; /* Enabled via global sysctl... */ /* Flow Director */ if (ixgbe_enable_fdir) { if (adapter->feat_cap & IXGBE_FEATURE_FDIR) adapter->feat_en |= IXGBE_FEATURE_FDIR; else device_printf(adapter->dev, "Device does not support Flow Director. Leaving disabled."); } /* Legacy (single queue) transmit */ if ((adapter->feat_cap & IXGBE_FEATURE_LEGACY_TX) && ixgbe_enable_legacy_tx) adapter->feat_en |= IXGBE_FEATURE_LEGACY_TX; /* * Message Signal Interrupts - Extended (MSI-X) * Normal MSI is only enabled if MSI-X calls fail. */ if (!ixgbe_enable_msix) adapter->feat_cap &= ~IXGBE_FEATURE_MSIX; /* Receive-Side Scaling (RSS) */ if ((adapter->feat_cap & IXGBE_FEATURE_RSS) && ixgbe_enable_rss) adapter->feat_en |= IXGBE_FEATURE_RSS; /* Disable features with unmet dependencies... */ /* No MSI-X */ if (!(adapter->feat_cap & IXGBE_FEATURE_MSIX)) { adapter->feat_cap &= ~IXGBE_FEATURE_RSS; adapter->feat_cap &= ~IXGBE_FEATURE_SRIOV; adapter->feat_en &= ~IXGBE_FEATURE_RSS; adapter->feat_en &= ~IXGBE_FEATURE_SRIOV; } } /* ixgbe_init_device_features */ /************************************************************************ * ixgbe_probe - Device identification routine * * Determines if the driver should be loaded on * adapter based on its PCI vendor/device ID. * * return BUS_PROBE_DEFAULT on success, positive on failure ************************************************************************/ static int ixgbe_probe(device_t dev) { ixgbe_vendor_info_t *ent; u16 pci_vendor_id = 0; u16 pci_device_id = 0; u16 pci_subvendor_id = 0; u16 pci_subdevice_id = 0; char adapter_name[256]; INIT_DEBUGOUT("ixgbe_probe: begin"); pci_vendor_id = pci_get_vendor(dev); if (pci_vendor_id != IXGBE_INTEL_VENDOR_ID) return (ENXIO); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); ent = ixgbe_vendor_info_array; while (ent->vendor_id != 0) { if ((pci_vendor_id == ent->vendor_id) && (pci_device_id == ent->device_id) && ((pci_subvendor_id == ent->subvendor_id) || (ent->subvendor_id == 0)) && ((pci_subdevice_id == ent->subdevice_id) || (ent->subdevice_id == 0))) { sprintf(adapter_name, "%s, Version - %s", ixgbe_strings[ent->index], ixgbe_driver_version); device_set_desc_copy(dev, adapter_name); ++ixgbe_total_ports; return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } /* ixgbe_probe */ /************************************************************************ * ixgbe_ioctl - Ioctl entry point * * Called when the user wants to configure the interface. * * return 0 on success, positive on failure ************************************************************************/ static int ixgbe_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct adapter *adapter = ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; #endif int error = 0; bool avoid_reset = FALSE; switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif /* * Calling init results in link renegotiation, * so we avoid doing it when possible. */ if (avoid_reset) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) ixgbe_init(adapter); #ifdef INET if (!(ifp->if_flags & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else error = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); if (ifr->ifr_mtu > IXGBE_MAX_MTU) { error = EINVAL; } else { IXGBE_CORE_LOCK(adapter); ifp->if_mtu = ifr->ifr_mtu; adapter->max_frame_size = ifp->if_mtu + IXGBE_MTU_HDR; if (ifp->if_drv_flags & IFF_DRV_RUNNING) ixgbe_init_locked(adapter); ixgbe_recalculate_max_frame(adapter); IXGBE_CORE_UNLOCK(adapter); } break; case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)"); IXGBE_CORE_LOCK(adapter); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ adapter->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { ixgbe_set_promisc(adapter); } } else ixgbe_init_locked(adapter); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) ixgbe_stop(adapter); adapter->if_flags = ifp->if_flags; IXGBE_CORE_UNLOCK(adapter); break; case SIOCADDMULTI: case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl: SIOC(ADD|DEL)MULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXGBE_CORE_LOCK(adapter); ixgbe_disable_intr(adapter); ixgbe_set_multi(adapter); ixgbe_enable_intr(adapter); IXGBE_CORE_UNLOCK(adapter); } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &adapter->media, command); break; case SIOCSIFCAP: { IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)"); int mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (!mask) break; /* HW cannot turn these on/off separately */ if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { ifp->if_capenable ^= IFCAP_RXCSUM; ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; } if (mask & IFCAP_TXCSUM) ifp->if_capenable ^= IFCAP_TXCSUM; if (mask & IFCAP_TXCSUM_IPV6) ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_TSO6) ifp->if_capenable ^= IFCAP_TSO6; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWFILTER) ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXGBE_CORE_LOCK(adapter); ixgbe_init_locked(adapter); IXGBE_CORE_UNLOCK(adapter); } VLAN_CAPABILITIES(ifp); break; } #if __FreeBSD_version >= 1100036 case SIOCGI2C: { struct ixgbe_hw *hw = &adapter->hw; struct ifi2creq i2c; int i; IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)"); - error = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); + error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (error != 0) break; if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { error = EINVAL; break; } if (i2c.len > sizeof(i2c.data)) { error = EINVAL; break; } for (i = 0; i < i2c.len; i++) hw->phy.ops.read_i2c_byte(hw, i2c.offset + i, i2c.dev_addr, &i2c.data[i]); - error = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); + error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c)); break; } #endif default: IOCTL_DEBUGOUT1("ioctl: UNKNOWN (0x%X)\n", (int)command); error = ether_ioctl(ifp, command, data); break; } return (error); } /* ixgbe_ioctl */ /************************************************************************ * ixgbe_check_fan_failure ************************************************************************/ static void ixgbe_check_fan_failure(struct adapter *adapter, u32 reg, bool in_interrupt) { u32 mask; mask = (in_interrupt) ? IXGBE_EICR_GPI_SDP1_BY_MAC(&adapter->hw) : IXGBE_ESDP_SDP1; if (reg & mask) device_printf(adapter->dev, "\nCRITICAL: FAN FAILURE!! REPLACE IMMEDIATELY!!\n"); } /* ixgbe_check_fan_failure */ /************************************************************************ * ixgbe_handle_que ************************************************************************/ static void ixgbe_handle_que(void *context, int pending) { struct ix_queue *que = context; struct adapter *adapter = que->adapter; struct tx_ring *txr = que->txr; struct ifnet *ifp = adapter->ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ixgbe_rxeof(que); IXGBE_TX_LOCK(txr); ixgbe_txeof(txr); if (!ixgbe_ring_empty(ifp, txr->br)) ixgbe_start_locked(ifp, txr); IXGBE_TX_UNLOCK(txr); } /* Re-enable this interrupt */ if (que->res != NULL) ixgbe_enable_queue(adapter, que->msix); else ixgbe_enable_intr(adapter); return; } /* ixgbe_handle_que */ /************************************************************************ * ixgbe_allocate_legacy - Setup the Legacy or MSI Interrupt handler ************************************************************************/ static int ixgbe_allocate_legacy(struct adapter *adapter) { device_t dev = adapter->dev; struct ix_queue *que = adapter->queues; struct tx_ring *txr = adapter->tx_rings; int error; /* We allocate a single interrupt resource */ adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &adapter->link_rid, RF_SHAREABLE | RF_ACTIVE); if (adapter->res == NULL) { device_printf(dev, "Unable to allocate bus resource: interrupt\n"); return (ENXIO); } /* * Try allocating a fast interrupt and the associated deferred * processing contexts. */ if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr); TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que); que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); taskqueue_start_threads(&que->tq, 1, PI_NET, "%s ixq", device_get_nameunit(adapter->dev)); /* Tasklets for Link, SFP and Multispeed Fiber */ TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter); TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter); TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter); TASK_INIT(&adapter->phy_task, 0, ixgbe_handle_phy, adapter); if (adapter->feat_en & IXGBE_FEATURE_FDIR) TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter); adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT, taskqueue_thread_enqueue, &adapter->tq); taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq", device_get_nameunit(adapter->dev)); if ((error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_legacy_irq, que, &adapter->tag)) != 0) { device_printf(dev, "Failed to register fast interrupt handler: %d\n", error); taskqueue_free(que->tq); taskqueue_free(adapter->tq); que->tq = NULL; adapter->tq = NULL; return (error); } /* For simplicity in the handlers */ adapter->active_queues = IXGBE_EIMS_ENABLE_MASK; return (0); } /* ixgbe_allocate_legacy */ /************************************************************************ * ixgbe_allocate_msix - Setup MSI-X Interrupt resources and handlers ************************************************************************/ static int ixgbe_allocate_msix(struct adapter *adapter) { device_t dev = adapter->dev; struct ix_queue *que = adapter->queues; struct tx_ring *txr = adapter->tx_rings; int error, rid, vector = 0; int cpu_id = 0; unsigned int rss_buckets = 0; cpuset_t cpu_mask; /* * If we're doing RSS, the number of queues needs to * match the number of RSS buckets that are configured. * * + If there's more queues than RSS buckets, we'll end * up with queues that get no traffic. * * + If there's more RSS buckets than queues, we'll end * up having multiple RSS buckets map to the same queue, * so there'll be some contention. */ rss_buckets = rss_getnumbuckets(); if ((adapter->feat_en & IXGBE_FEATURE_RSS) && (adapter->num_queues != rss_buckets)) { device_printf(dev, "%s: number of queues (%d) != number of RSS buckets (%d); performance will be impacted.\n", __func__, adapter->num_queues, rss_buckets); } for (int i = 0; i < adapter->num_queues; i++, vector++, que++, txr++) { rid = vector + 1; que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (que->res == NULL) { device_printf(dev, "Unable to allocate bus resource: que interrupt [%d]\n", vector); return (ENXIO); } /* Set the handler function */ error = bus_setup_intr(dev, que->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_msix_que, que, &que->tag); if (error) { que->res = NULL; device_printf(dev, "Failed to register QUE handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, que->res, que->tag, "q%d", i); #endif que->msix = vector; adapter->active_queues |= (u64)(1 << que->msix); if (adapter->feat_en & IXGBE_FEATURE_RSS) { /* * The queue ID is used as the RSS layer bucket ID. * We look up the queue ID -> RSS CPU ID and select * that. */ cpu_id = rss_getcpu(i % rss_buckets); CPU_SETOF(cpu_id, &cpu_mask); } else { /* * Bind the MSI-X vector, and thus the * rings to the corresponding CPU. * * This just happens to match the default RSS * round-robin bucket -> queue -> CPU allocation. */ if (adapter->num_queues > 1) cpu_id = i; } if (adapter->num_queues > 1) bus_bind_intr(dev, que->res, cpu_id); #ifdef IXGBE_DEBUG if (adapter->feat_en & IXGBE_FEATURE_RSS) device_printf(dev, "Bound RSS bucket %d to CPU %d\n", i, cpu_id); else device_printf(dev, "Bound queue %d to cpu %d\n", i, cpu_id); #endif /* IXGBE_DEBUG */ if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) TASK_INIT(&txr->txq_task, 0, ixgbe_deferred_mq_start, txr); TASK_INIT(&que->que_task, 0, ixgbe_handle_que, que); que->tq = taskqueue_create_fast("ixgbe_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); #if __FreeBSD_version < 1100000 taskqueue_start_threads(&que->tq, 1, PI_NET, "%s:q%d", device_get_nameunit(adapter->dev), i); #else if (adapter->feat_en & IXGBE_FEATURE_RSS) taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, &cpu_mask, "%s (bucket %d)", device_get_nameunit(adapter->dev), cpu_id); else taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, NULL, "%s:q%d", device_get_nameunit(adapter->dev), i); #endif } /* and Link */ adapter->link_rid = vector + 1; adapter->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &adapter->link_rid, RF_SHAREABLE | RF_ACTIVE); if (!adapter->res) { device_printf(dev, "Unable to allocate bus resource: Link interrupt [%d]\n", adapter->link_rid); return (ENXIO); } /* Set the link handler function */ error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixgbe_msix_link, adapter, &adapter->tag); if (error) { adapter->res = NULL; device_printf(dev, "Failed to register LINK handler"); return (error); } #if __FreeBSD_version >= 800504 bus_describe_intr(dev, adapter->res, adapter->tag, "link"); #endif adapter->vector = vector; /* Tasklets for Link, SFP and Multispeed Fiber */ TASK_INIT(&adapter->link_task, 0, ixgbe_handle_link, adapter); TASK_INIT(&adapter->mod_task, 0, ixgbe_handle_mod, adapter); TASK_INIT(&adapter->msf_task, 0, ixgbe_handle_msf, adapter); if (adapter->feat_cap & IXGBE_FEATURE_SRIOV) TASK_INIT(&adapter->mbx_task, 0, ixgbe_handle_mbx, adapter); TASK_INIT(&adapter->phy_task, 0, ixgbe_handle_phy, adapter); if (adapter->feat_en & IXGBE_FEATURE_FDIR) TASK_INIT(&adapter->fdir_task, 0, ixgbe_reinit_fdir, adapter); adapter->tq = taskqueue_create_fast("ixgbe_link", M_NOWAIT, taskqueue_thread_enqueue, &adapter->tq); taskqueue_start_threads(&adapter->tq, 1, PI_NET, "%s linkq", device_get_nameunit(adapter->dev)); return (0); } /* ixgbe_allocate_msix */ /************************************************************************ * ixgbe_configure_interrupts * * Setup MSI-X, MSI, or legacy interrupts (in that order). * This will also depend on user settings. ************************************************************************/ static int ixgbe_configure_interrupts(struct adapter *adapter) { device_t dev = adapter->dev; int rid, want, queues, msgs; /* Default to 1 queue if MSI-X setup fails */ adapter->num_queues = 1; /* Override by tuneable */ if (!(adapter->feat_cap & IXGBE_FEATURE_MSIX)) goto msi; /* First try MSI-X */ msgs = pci_msix_count(dev); if (msgs == 0) goto msi; rid = PCIR_BAR(MSIX_82598_BAR); adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (adapter->msix_mem == NULL) { rid += 4; /* 82599 maps in higher BAR */ adapter->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); } if (adapter->msix_mem == NULL) { /* May not be enabled */ device_printf(adapter->dev, "Unable to map MSI-X table.\n"); goto msi; } /* Figure out a reasonable auto config value */ queues = min(mp_ncpus, msgs - 1); /* If we're doing RSS, clamp at the number of RSS buckets */ if (adapter->feat_en & IXGBE_FEATURE_RSS) queues = min(queues, rss_getnumbuckets()); if (ixgbe_num_queues > queues) { device_printf(adapter->dev, "ixgbe_num_queues (%d) is too large, using reduced amount (%d).\n", ixgbe_num_queues, queues); ixgbe_num_queues = queues; } if (ixgbe_num_queues != 0) queues = ixgbe_num_queues; /* Set max queues to 8 when autoconfiguring */ else queues = min(queues, 8); /* reflect correct sysctl value */ ixgbe_num_queues = queues; /* * Want one vector (RX/TX pair) per queue * plus an additional for Link. */ want = queues + 1; if (msgs >= want) msgs = want; else { device_printf(adapter->dev, "MSI-X Configuration Problem, %d vectors but %d queues wanted!\n", msgs, want); goto msi; } if ((pci_alloc_msix(dev, &msgs) == 0) && (msgs == want)) { device_printf(adapter->dev, "Using MSI-X interrupts with %d vectors\n", msgs); adapter->num_queues = queues; adapter->feat_en |= IXGBE_FEATURE_MSIX; return (0); } /* * MSI-X allocation failed or provided us with * less vectors than needed. Free MSI-X resources * and we'll try enabling MSI. */ pci_release_msi(dev); msi: /* Without MSI-X, some features are no longer supported */ adapter->feat_cap &= ~IXGBE_FEATURE_RSS; adapter->feat_en &= ~IXGBE_FEATURE_RSS; adapter->feat_cap &= ~IXGBE_FEATURE_SRIOV; adapter->feat_en &= ~IXGBE_FEATURE_SRIOV; if (adapter->msix_mem != NULL) { bus_release_resource(dev, SYS_RES_MEMORY, rid, adapter->msix_mem); adapter->msix_mem = NULL; } msgs = 1; if (pci_alloc_msi(dev, &msgs) == 0) { adapter->feat_en |= IXGBE_FEATURE_MSI; adapter->link_rid = 1; device_printf(adapter->dev, "Using an MSI interrupt\n"); return (0); } if (!(adapter->feat_cap & IXGBE_FEATURE_LEGACY_IRQ)) { device_printf(adapter->dev, "Device does not support legacy interrupts.\n"); return 1; } adapter->feat_en |= IXGBE_FEATURE_LEGACY_IRQ; adapter->link_rid = 0; device_printf(adapter->dev, "Using a Legacy interrupt\n"); return (0); } /* ixgbe_configure_interrupts */ /************************************************************************ * ixgbe_handle_link - Tasklet for MSI-X Link interrupts * * Done outside of interrupt context since the driver might sleep ************************************************************************/ static void ixgbe_handle_link(void *context, int pending) { struct adapter *adapter = context; struct ixgbe_hw *hw = &adapter->hw; ixgbe_check_link(hw, &adapter->link_speed, &adapter->link_up, 0); ixgbe_update_link_status(adapter); /* Re-enable link interrupts */ IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_LSC); } /* ixgbe_handle_link */ /************************************************************************ * ixgbe_rearm_queues ************************************************************************/ static void ixgbe_rearm_queues(struct adapter *adapter, u64 queues) { u32 mask; switch (adapter->hw.mac.type) { case ixgbe_mac_82598EB: mask = (IXGBE_EIMS_RTX_QUEUE & queues); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_X550EM_a: mask = (queues & 0xFFFFFFFF); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(0), mask); mask = (queues >> 32); IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS_EX(1), mask); break; default: break; } } /* ixgbe_rearm_queues */ Index: stable/11/sys/dev/ixl/ixl_pf_main.c =================================================================== --- stable/11/sys/dev/ixl/ixl_pf_main.c (revision 332287) +++ stable/11/sys/dev/ixl/ixl_pf_main.c (revision 332288) @@ -1,6089 +1,6089 @@ /****************************************************************************** Copyright (c) 2013-2015, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the Intel Corporation nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************/ /*$FreeBSD$*/ #include "ixl_pf.h" #ifdef PCI_IOV #include "ixl_pf_iov.h" #endif #ifdef IXL_IW #include "ixl_iw.h" #include "ixl_iw_int.h" #endif #ifdef DEV_NETMAP #include #include #include #endif /* DEV_NETMAP */ static int ixl_setup_queue(struct ixl_queue *, struct ixl_pf *, int); static u64 ixl_max_aq_speed_to_value(u8); static u8 ixl_convert_sysctl_aq_link_speed(u8, bool); /* Sysctls */ static int ixl_set_flowcntl(SYSCTL_HANDLER_ARGS); static int ixl_set_advertise(SYSCTL_HANDLER_ARGS); static int ixl_current_speed(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_show_fw(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_unallocated_queues(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_pf_tx_itr(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_pf_rx_itr(SYSCTL_HANDLER_ARGS); /* Debug Sysctls */ static int ixl_sysctl_link_status(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_phy_abilities(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_sw_filter_list(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_hw_res_alloc(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_switch_config(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_hkey(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_hena(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_hlut(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fw_link_management(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_read_i2c_byte(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_write_i2c_byte(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_fc_ability(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_rs_ability(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_fc_request(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_rs_request(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_fec_auto_enable(SYSCTL_HANDLER_ARGS); #ifdef IXL_DEBUG static int ixl_sysctl_qtx_tail_handler(SYSCTL_HANDLER_ARGS); static int ixl_sysctl_qrx_tail_handler(SYSCTL_HANDLER_ARGS); #endif #ifdef IXL_IW extern int ixl_enable_iwarp; #endif const uint8_t ixl_bcast_addr[ETHER_ADDR_LEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; const char * const ixl_fc_string[6] = { "None", "Rx", "Tx", "Full", "Priority", "Default" }; MALLOC_DEFINE(M_IXL, "ixl", "ixl driver allocations"); void ixl_debug_core(struct ixl_pf *pf, enum ixl_dbg_mask mask, char *fmt, ...) { va_list args; if (!(mask & pf->dbg_mask)) return; /* Re-implement device_printf() */ device_print_prettyname(pf->dev); va_start(args, fmt); vprintf(fmt, args); va_end(args); } /* ** Put the FW, API, NVM, EEtrackID, and OEM version information into a string */ void ixl_nvm_version_str(struct i40e_hw *hw, struct sbuf *buf) { u8 oem_ver = (u8)(hw->nvm.oem_ver >> 24); u16 oem_build = (u16)((hw->nvm.oem_ver >> 16) & 0xFFFF); u8 oem_patch = (u8)(hw->nvm.oem_ver & 0xFF); sbuf_printf(buf, "fw %d.%d.%05d api %d.%d nvm %x.%02x etid %08x oem %d.%d.%d", hw->aq.fw_maj_ver, hw->aq.fw_min_ver, hw->aq.fw_build, hw->aq.api_maj_ver, hw->aq.api_min_ver, (hw->nvm.version & IXL_NVM_VERSION_HI_MASK) >> IXL_NVM_VERSION_HI_SHIFT, (hw->nvm.version & IXL_NVM_VERSION_LO_MASK) >> IXL_NVM_VERSION_LO_SHIFT, hw->nvm.eetrack, oem_ver, oem_build, oem_patch); } void ixl_print_nvm_version(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *sbuf; sbuf = sbuf_new_auto(); ixl_nvm_version_str(hw, sbuf); sbuf_finish(sbuf); device_printf(dev, "%s\n", sbuf_data(sbuf)); sbuf_delete(sbuf); } static void ixl_configure_tx_itr(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; vsi->tx_itr_setting = pf->tx_itr; for (int i = 0; i < vsi->num_queues; i++, que++) { struct tx_ring *txr = &que->txr; wr32(hw, I40E_PFINT_ITRN(IXL_TX_ITR, i), vsi->tx_itr_setting); txr->itr = vsi->tx_itr_setting; txr->latency = IXL_AVE_LATENCY; } } static void ixl_configure_rx_itr(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; vsi->rx_itr_setting = pf->rx_itr; for (int i = 0; i < vsi->num_queues; i++, que++) { struct rx_ring *rxr = &que->rxr; wr32(hw, I40E_PFINT_ITRN(IXL_RX_ITR, i), vsi->rx_itr_setting); rxr->itr = vsi->rx_itr_setting; rxr->latency = IXL_AVE_LATENCY; } } /* * Write PF ITR values to queue ITR registers. */ void ixl_configure_itr(struct ixl_pf *pf) { ixl_configure_tx_itr(pf); ixl_configure_rx_itr(pf); } /********************************************************************* * Init entry point * * This routine is used in two ways. It is used by the stack as * init entry point in network interface structure. It is also used * by the driver as a hw/sw initialization routine to get to a * consistent state. * * return 0 on success, positive on failure **********************************************************************/ void ixl_init_locked(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ifnet *ifp = vsi->ifp; device_t dev = pf->dev; struct i40e_filter_control_settings filter; u8 tmpaddr[ETHER_ADDR_LEN]; int ret; INIT_DEBUGOUT("ixl_init_locked: begin"); IXL_PF_LOCK_ASSERT(pf); ixl_stop_locked(pf); /* * If the aq is dead here, it probably means something outside of the driver * did something to the adapter, like a PF reset. * So rebuild the driver's state here if that occurs. */ if (!i40e_check_asq_alive(&pf->hw)) { device_printf(dev, "Admin Queue is down; resetting...\n"); ixl_teardown_hw_structs(pf); ixl_reset(pf); } /* Get the latest mac address... User might use a LAA */ bcopy(IF_LLADDR(vsi->ifp), tmpaddr, I40E_ETH_LENGTH_OF_ADDRESS); if (!cmp_etheraddr(hw->mac.addr, tmpaddr) && (i40e_validate_mac_addr(tmpaddr) == I40E_SUCCESS)) { ixl_del_filter(vsi, hw->mac.addr, IXL_VLAN_ANY); bcopy(tmpaddr, hw->mac.addr, I40E_ETH_LENGTH_OF_ADDRESS); ret = i40e_aq_mac_address_write(hw, I40E_AQC_WRITE_TYPE_LAA_ONLY, hw->mac.addr, NULL); if (ret) { device_printf(dev, "LLA address" "change failed!!\n"); return; } } ixl_add_filter(vsi, hw->mac.addr, IXL_VLAN_ANY); /* Set the various hardware offload abilities */ ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TSO) ifp->if_hwassist |= CSUM_TSO; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) ifp->if_hwassist |= (CSUM_TCP_IPV6 | CSUM_UDP_IPV6); /* Set up the device filtering */ bzero(&filter, sizeof(filter)); filter.enable_ethtype = TRUE; filter.enable_macvlan = TRUE; filter.enable_fdir = FALSE; filter.hash_lut_size = I40E_HASH_LUT_SIZE_512; if (i40e_set_filter_control(hw, &filter)) device_printf(dev, "i40e_set_filter_control() failed\n"); /* Prepare the VSI: rings, hmc contexts, etc... */ if (ixl_initialize_vsi(vsi)) { device_printf(dev, "initialize vsi failed!!\n"); return; } /* Set up RSS */ ixl_config_rss(pf); /* Add protocol filters to list */ ixl_init_filters(vsi); /* Setup vlan's if needed */ ixl_setup_vlan_filters(vsi); /* Set up MSI/X routing and the ITR settings */ if (pf->msix > 1) { ixl_configure_queue_intr_msix(pf); ixl_configure_itr(pf); } else ixl_configure_legacy(pf); ixl_enable_rings(vsi); i40e_aq_set_default_vsi(hw, vsi->seid, NULL); ixl_reconfigure_filters(vsi); /* And now turn on interrupts */ ixl_enable_intr(vsi); /* Get link info */ hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &pf->link_up); ixl_update_link_status(pf); /* Start the local timer */ callout_reset(&pf->timer, hz, ixl_local_timer, pf); /* Now inform the stack we're ready */ ifp->if_drv_flags |= IFF_DRV_RUNNING; #ifdef IXL_IW if (ixl_enable_iwarp && pf->iw_enabled) { ret = ixl_iw_pf_init(pf); if (ret) device_printf(dev, "initialize iwarp failed, code %d\n", ret); } #endif } /********************************************************************* * * Get the hardware capabilities * **********************************************************************/ int ixl_get_hw_capabilities(struct ixl_pf *pf) { struct i40e_aqc_list_capabilities_element_resp *buf; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int error, len; u16 needed; bool again = TRUE; len = 40 * sizeof(struct i40e_aqc_list_capabilities_element_resp); retry: if (!(buf = (struct i40e_aqc_list_capabilities_element_resp *) malloc(len, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate cap memory\n"); return (ENOMEM); } /* This populates the hw struct */ error = i40e_aq_discover_capabilities(hw, buf, len, &needed, i40e_aqc_opc_list_func_capabilities, NULL); free(buf, M_DEVBUF); if ((pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOMEM) && (again == TRUE)) { /* retry once with a larger buffer */ again = FALSE; len = needed; goto retry; } else if (pf->hw.aq.asq_last_status != I40E_AQ_RC_OK) { device_printf(dev, "capability discovery failed: %d\n", pf->hw.aq.asq_last_status); return (ENODEV); } /* Capture this PF's starting queue pair */ pf->qbase = hw->func_caps.base_queue; #ifdef IXL_DEBUG device_printf(dev, "pf_id=%d, num_vfs=%d, msix_pf=%d, " "msix_vf=%d, fd_g=%d, fd_b=%d, tx_qp=%d rx_qp=%d qbase=%d\n", hw->pf_id, hw->func_caps.num_vfs, hw->func_caps.num_msix_vectors, hw->func_caps.num_msix_vectors_vf, hw->func_caps.fd_filters_guaranteed, hw->func_caps.fd_filters_best_effort, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, hw->func_caps.base_queue); #endif /* Print a subset of the capability information. */ device_printf(dev, "PF-ID[%d]: VFs %d, MSIX %d, VF MSIX %d, QPs %d, %s\n", hw->pf_id, hw->func_caps.num_vfs, hw->func_caps.num_msix_vectors, hw->func_caps.num_msix_vectors_vf, hw->func_caps.num_tx_qp, (hw->func_caps.mdio_port_mode == 2) ? "I2C" : (hw->func_caps.mdio_port_mode == 1) ? "MDIO dedicated" : "MDIO shared"); struct i40e_osdep *osdep = (struct i40e_osdep *)hw->back; osdep->i2c_intfc_num = ixl_find_i2c_interface(pf); if (osdep->i2c_intfc_num != -1) pf->has_i2c = true; return (error); } void ixl_cap_txcsum_tso(struct ixl_vsi *vsi, struct ifnet *ifp, int mask) { device_t dev = vsi->dev; /* Enable/disable TXCSUM/TSO4 */ if (!(ifp->if_capenable & IFCAP_TXCSUM) && !(ifp->if_capenable & IFCAP_TSO4)) { if (mask & IFCAP_TXCSUM) { ifp->if_capenable |= IFCAP_TXCSUM; /* enable TXCSUM, restore TSO if previously enabled */ if (vsi->flags & IXL_FLAGS_KEEP_TSO4) { vsi->flags &= ~IXL_FLAGS_KEEP_TSO4; ifp->if_capenable |= IFCAP_TSO4; } } else if (mask & IFCAP_TSO4) { ifp->if_capenable |= (IFCAP_TXCSUM | IFCAP_TSO4); vsi->flags &= ~IXL_FLAGS_KEEP_TSO4; device_printf(dev, "TSO4 requires txcsum, enabling both...\n"); } } else if((ifp->if_capenable & IFCAP_TXCSUM) && !(ifp->if_capenable & IFCAP_TSO4)) { if (mask & IFCAP_TXCSUM) ifp->if_capenable &= ~IFCAP_TXCSUM; else if (mask & IFCAP_TSO4) ifp->if_capenable |= IFCAP_TSO4; } else if((ifp->if_capenable & IFCAP_TXCSUM) && (ifp->if_capenable & IFCAP_TSO4)) { if (mask & IFCAP_TXCSUM) { vsi->flags |= IXL_FLAGS_KEEP_TSO4; ifp->if_capenable &= ~(IFCAP_TXCSUM | IFCAP_TSO4); device_printf(dev, "TSO4 requires txcsum, disabling both...\n"); } else if (mask & IFCAP_TSO4) ifp->if_capenable &= ~IFCAP_TSO4; } /* Enable/disable TXCSUM_IPV6/TSO6 */ if (!(ifp->if_capenable & IFCAP_TXCSUM_IPV6) && !(ifp->if_capenable & IFCAP_TSO6)) { if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable |= IFCAP_TXCSUM_IPV6; if (vsi->flags & IXL_FLAGS_KEEP_TSO6) { vsi->flags &= ~IXL_FLAGS_KEEP_TSO6; ifp->if_capenable |= IFCAP_TSO6; } } else if (mask & IFCAP_TSO6) { ifp->if_capenable |= (IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); vsi->flags &= ~IXL_FLAGS_KEEP_TSO6; device_printf(dev, "TSO6 requires txcsum6, enabling both...\n"); } } else if((ifp->if_capenable & IFCAP_TXCSUM_IPV6) && !(ifp->if_capenable & IFCAP_TSO6)) { if (mask & IFCAP_TXCSUM_IPV6) ifp->if_capenable &= ~IFCAP_TXCSUM_IPV6; else if (mask & IFCAP_TSO6) ifp->if_capenable |= IFCAP_TSO6; } else if ((ifp->if_capenable & IFCAP_TXCSUM_IPV6) && (ifp->if_capenable & IFCAP_TSO6)) { if (mask & IFCAP_TXCSUM_IPV6) { vsi->flags |= IXL_FLAGS_KEEP_TSO6; ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6); device_printf(dev, "TSO6 requires txcsum6, disabling both...\n"); } else if (mask & IFCAP_TSO6) ifp->if_capenable &= ~IFCAP_TSO6; } } /* For the set_advertise sysctl */ void ixl_get_initial_advertised_speeds(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; enum i40e_status_code status; struct i40e_aq_get_phy_abilities_resp abilities; /* Set initial sysctl values */ status = i40e_aq_get_phy_capabilities(hw, FALSE, false, &abilities, NULL); if (status) { /* Non-fatal error */ device_printf(dev, "%s: i40e_aq_get_phy_capabilities() error %d\n", __func__, status); return; } pf->advertised_speed = ixl_convert_sysctl_aq_link_speed(abilities.link_speed, false); } int ixl_teardown_hw_structs(struct ixl_pf *pf) { enum i40e_status_code status = 0; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; /* Shutdown LAN HMC */ if (hw->hmc.hmc_obj) { status = i40e_shutdown_lan_hmc(hw); if (status) { device_printf(dev, "init: LAN HMC shutdown failure; status %d\n", status); goto err_out; } } // XXX: This gets called when we know the adminq is inactive; // so we already know it's setup when we get here. /* Shutdown admin queue */ status = i40e_shutdown_adminq(hw); if (status) device_printf(dev, "init: Admin Queue shutdown failure; status %d\n", status); err_out: return (status); } int ixl_reset(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; u8 set_fc_err_mask; int error = 0; // XXX: clear_hw() actually writes to hw registers -- maybe this isn't necessary i40e_clear_hw(hw); error = i40e_pf_reset(hw); if (error) { device_printf(dev, "init: PF reset failure"); error = EIO; goto err_out; } error = i40e_init_adminq(hw); if (error) { device_printf(dev, "init: Admin queue init failure;" " status code %d", error); error = EIO; goto err_out; } i40e_clear_pxe_mode(hw); error = ixl_get_hw_capabilities(pf); if (error) { device_printf(dev, "init: Error retrieving HW capabilities;" " status code %d\n", error); goto err_out; } error = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, 0, 0); if (error) { device_printf(dev, "init: LAN HMC init failed; status code %d\n", error); error = EIO; goto err_out; } error = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY); if (error) { device_printf(dev, "init: LAN HMC config failed; status code %d\n", error); error = EIO; goto err_out; } // XXX: possible fix for panic, but our failure recovery is still broken error = ixl_switch_config(pf); if (error) { device_printf(dev, "init: ixl_switch_config() failed: %d\n", error); goto err_out; } error = i40e_aq_set_phy_int_mask(hw, IXL_DEFAULT_PHY_INT_MASK, NULL); if (error) { device_printf(dev, "init: i40e_aq_set_phy_mask() failed: err %d," " aq_err %d\n", error, hw->aq.asq_last_status); error = EIO; goto err_out; } error = i40e_set_fc(hw, &set_fc_err_mask, true); if (error) { device_printf(dev, "init: setting link flow control failed; retcode %d," " fc_err_mask 0x%02x\n", error, set_fc_err_mask); goto err_out; } // XXX: (Rebuild VSIs?) /* Firmware delay workaround */ if (((hw->aq.fw_maj_ver == 4) && (hw->aq.fw_min_ver < 33)) || (hw->aq.fw_maj_ver < 4)) { i40e_msec_delay(75); error = i40e_aq_set_link_restart_an(hw, TRUE, NULL); if (error) { device_printf(dev, "init: link restart failed, aq_err %d\n", hw->aq.asq_last_status); goto err_out; } } err_out: return (error); } /* ** MSIX Interrupt Handlers and Tasklets */ void ixl_handle_que(void *context, int pending) { struct ixl_queue *que = context; struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; struct ifnet *ifp = vsi->ifp; bool more; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { more = ixl_rxeof(que, IXL_RX_LIMIT); IXL_TX_LOCK(txr); ixl_txeof(que); if (!drbr_empty(ifp, txr->br)) ixl_mq_start_locked(ifp, txr); IXL_TX_UNLOCK(txr); if (more) { taskqueue_enqueue(que->tq, &que->task); return; } } /* Reenable this interrupt - hmmm */ ixl_enable_queue(hw, que->me); return; } /********************************************************************* * * Legacy Interrupt Service routine * **********************************************************************/ void ixl_intr(void *arg) { struct ixl_pf *pf = arg; struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; struct ifnet *ifp = vsi->ifp; struct tx_ring *txr = &que->txr; u32 icr0; bool more_tx, more_rx; pf->admin_irq++; /* Protect against spurious interrupts */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) return; icr0 = rd32(hw, I40E_PFINT_ICR0); #ifdef PCI_IOV if (icr0 & I40E_PFINT_ICR0_VFLR_MASK) taskqueue_enqueue(pf->tq, &pf->vflr_task); #endif if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) { taskqueue_enqueue(pf->tq, &pf->adminq); } if (icr0 & I40E_PFINT_ICR0_QUEUE_0_MASK) { ++que->irqs; more_rx = ixl_rxeof(que, IXL_RX_LIMIT); IXL_TX_LOCK(txr); more_tx = ixl_txeof(que); if (!drbr_empty(vsi->ifp, txr->br)) more_tx = 1; IXL_TX_UNLOCK(txr); } ixl_enable_intr0(hw); } /********************************************************************* * * MSIX VSI Interrupt Service routine * **********************************************************************/ void ixl_msix_que(void *arg) { struct ixl_queue *que = arg; struct ixl_vsi *vsi = que->vsi; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; bool more_tx, more_rx; /* Protect against spurious interrupts */ if (!(vsi->ifp->if_drv_flags & IFF_DRV_RUNNING)) return; ++que->irqs; more_rx = ixl_rxeof(que, IXL_RX_LIMIT); IXL_TX_LOCK(txr); more_tx = ixl_txeof(que); /* ** Make certain that if the stack ** has anything queued the task gets ** scheduled to handle it. */ if (!drbr_empty(vsi->ifp, txr->br)) more_tx = 1; IXL_TX_UNLOCK(txr); ixl_set_queue_rx_itr(que); ixl_set_queue_tx_itr(que); if (more_tx || more_rx) taskqueue_enqueue(que->tq, &que->task); else ixl_enable_queue(hw, que->me); return; } /********************************************************************* * * MSIX Admin Queue Interrupt Service routine * **********************************************************************/ void ixl_msix_adminq(void *arg) { struct ixl_pf *pf = arg; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; u32 reg, mask, rstat_reg; bool do_task = FALSE; ++pf->admin_irq; reg = rd32(hw, I40E_PFINT_ICR0); mask = rd32(hw, I40E_PFINT_ICR0_ENA); /* Check on the cause */ if (reg & I40E_PFINT_ICR0_ADMINQ_MASK) { mask &= ~I40E_PFINT_ICR0_ADMINQ_MASK; do_task = TRUE; } if (reg & I40E_PFINT_ICR0_MAL_DETECT_MASK) { ixl_handle_mdd_event(pf); mask &= ~I40E_PFINT_ICR0_MAL_DETECT_MASK; } if (reg & I40E_PFINT_ICR0_GRST_MASK) { device_printf(dev, "Reset Requested!\n"); rstat_reg = rd32(hw, I40E_GLGEN_RSTAT); rstat_reg = (rstat_reg & I40E_GLGEN_RSTAT_RESET_TYPE_MASK) >> I40E_GLGEN_RSTAT_RESET_TYPE_SHIFT; device_printf(dev, "Reset type: "); switch (rstat_reg) { /* These others might be handled similarly to an EMPR reset */ case I40E_RESET_CORER: printf("CORER\n"); break; case I40E_RESET_GLOBR: printf("GLOBR\n"); break; case I40E_RESET_EMPR: printf("EMPR\n"); atomic_set_int(&pf->state, IXL_PF_STATE_EMPR_RESETTING); break; default: printf("POR\n"); break; } /* overload admin queue task to check reset progress */ do_task = TRUE; } if (reg & I40E_PFINT_ICR0_ECC_ERR_MASK) { device_printf(dev, "ECC Error detected!\n"); } if (reg & I40E_PFINT_ICR0_HMC_ERR_MASK) { reg = rd32(hw, I40E_PFHMC_ERRORINFO); if (reg & I40E_PFHMC_ERRORINFO_ERROR_DETECTED_MASK) { device_printf(dev, "HMC Error detected!\n"); device_printf(dev, "INFO 0x%08x\n", reg); reg = rd32(hw, I40E_PFHMC_ERRORDATA); device_printf(dev, "DATA 0x%08x\n", reg); wr32(hw, I40E_PFHMC_ERRORINFO, 0); } } if (reg & I40E_PFINT_ICR0_PCI_EXCEPTION_MASK) { device_printf(dev, "PCI Exception detected!\n"); } #ifdef PCI_IOV if (reg & I40E_PFINT_ICR0_VFLR_MASK) { mask &= ~I40E_PFINT_ICR0_ENA_VFLR_MASK; taskqueue_enqueue(pf->tq, &pf->vflr_task); } #endif if (do_task) taskqueue_enqueue(pf->tq, &pf->adminq); else ixl_enable_intr0(hw); } void ixl_set_promisc(struct ixl_vsi *vsi) { struct ifnet *ifp = vsi->ifp; struct i40e_hw *hw = vsi->hw; int err, mcnt = 0; bool uni = FALSE, multi = FALSE; if (ifp->if_flags & IFF_ALLMULTI) multi = TRUE; else { /* Need to count the multicast addresses */ struct ifmultiaddr *ifma; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == MAX_MULTICAST_ADDR) break; mcnt++; } if_maddr_runlock(ifp); } if (mcnt >= MAX_MULTICAST_ADDR) multi = TRUE; if (ifp->if_flags & IFF_PROMISC) uni = TRUE; err = i40e_aq_set_vsi_unicast_promiscuous(hw, vsi->seid, uni, NULL, TRUE); err = i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, multi, NULL); return; } /********************************************************************* * Filter Routines * * Routines for multicast and vlan filter management. * *********************************************************************/ void ixl_add_multi(struct ixl_vsi *vsi) { struct ifmultiaddr *ifma; struct ifnet *ifp = vsi->ifp; struct i40e_hw *hw = vsi->hw; int mcnt = 0, flags; IOCTL_DEBUGOUT("ixl_add_multi: begin"); if_maddr_rlock(ifp); /* ** First just get a count, to decide if we ** we simply use multicast promiscuous. */ TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mcnt++; } if_maddr_runlock(ifp); if (__predict_false(mcnt >= MAX_MULTICAST_ADDR)) { /* delete existing MC filters */ ixl_del_hw_filters(vsi, mcnt); i40e_aq_set_vsi_multicast_promiscuous(hw, vsi->seid, TRUE, NULL); return; } mcnt = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; ixl_add_mc_filter(vsi, (u8*)LLADDR((struct sockaddr_dl *) ifma->ifma_addr)); mcnt++; } if_maddr_runlock(ifp); if (mcnt > 0) { flags = (IXL_FILTER_ADD | IXL_FILTER_USED | IXL_FILTER_MC); ixl_add_hw_filters(vsi, flags, mcnt); } IOCTL_DEBUGOUT("ixl_add_multi: end"); return; } void ixl_del_multi(struct ixl_vsi *vsi) { struct ifnet *ifp = vsi->ifp; struct ifmultiaddr *ifma; struct ixl_mac_filter *f; int mcnt = 0; bool match = FALSE; IOCTL_DEBUGOUT("ixl_del_multi: begin"); /* Search for removed multicast addresses */ if_maddr_rlock(ifp); SLIST_FOREACH(f, &vsi->ftl, next) { if ((f->flags & IXL_FILTER_USED) && (f->flags & IXL_FILTER_MC)) { match = FALSE; TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; u8 *mc_addr = (u8 *)LLADDR((struct sockaddr_dl *)ifma->ifma_addr); if (cmp_etheraddr(f->macaddr, mc_addr)) { match = TRUE; break; } } if (match == FALSE) { f->flags |= IXL_FILTER_DEL; mcnt++; } } } if_maddr_runlock(ifp); if (mcnt > 0) ixl_del_hw_filters(vsi, mcnt); } /********************************************************************* * Timer routine * * This routine checks for link status,updates statistics, * and runs the watchdog check. * * Only runs when the driver is configured UP and RUNNING. * **********************************************************************/ void ixl_local_timer(void *arg) { struct ixl_pf *pf = arg; struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; device_t dev = pf->dev; struct tx_ring *txr; int hung = 0; u32 mask; s32 timer, new_timer; IXL_PF_LOCK_ASSERT(pf); /* Fire off the adminq task */ taskqueue_enqueue(pf->tq, &pf->adminq); /* Update stats */ ixl_update_stats_counters(pf); /* Check status of the queues */ mask = (I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK | I40E_PFINT_DYN_CTLN_ITR_INDX_MASK); for (int i = 0; i < vsi->num_queues; i++, que++) { txr = &que->txr; timer = atomic_load_acq_32(&txr->watchdog_timer); if (timer > 0) { new_timer = timer - hz; if (new_timer <= 0) { atomic_store_rel_32(&txr->watchdog_timer, -1); device_printf(dev, "WARNING: queue %d " "appears to be hung!\n", que->me); ++hung; } else { /* * If this fails, that means something in the TX path has updated * the watchdog, so it means the TX path is still working and * the watchdog doesn't need to countdown. */ atomic_cmpset_rel_32(&txr->watchdog_timer, timer, new_timer); /* Any queues with outstanding work get a sw irq */ wr32(hw, I40E_PFINT_DYN_CTLN(que->me), mask); } } } /* Reset when a queue shows hung */ if (hung) goto hung; callout_reset(&pf->timer, hz, ixl_local_timer, pf); return; hung: device_printf(dev, "WARNING: Resetting!\n"); pf->watchdog_events++; ixl_init_locked(pf); } void ixl_link_up_msg(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ifnet *ifp = pf->vsi.ifp; log(LOG_NOTICE, "%s: Link is up, %s Full Duplex, FEC: %s, Autoneg: %s, Flow Control: %s\n", ifp->if_xname, ixl_aq_speed_to_str(hw->phy.link_info.link_speed), (hw->phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_KR_ENA) ? "Clause 74 BASE-R FEC" : (hw->phy.link_info.fec_info & I40E_AQ_CONFIG_FEC_RS_ENA) ? "Clause 108 RS-FEC" : "None", (hw->phy.link_info.an_info & I40E_AQ_AN_COMPLETED) ? "True" : "False", (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_TX && hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_RX) ? ixl_fc_string[3] : (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_TX) ? ixl_fc_string[2] : (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_RX) ? ixl_fc_string[1] : ixl_fc_string[0]); } /* ** Note: this routine updates the OS on the link state ** the real check of the hardware only happens with ** a link interrupt. */ void ixl_update_link_status(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; struct ifnet *ifp = vsi->ifp; device_t dev = pf->dev; if (pf->link_up) { if (vsi->link_active == FALSE) { vsi->link_active = TRUE; ifp->if_baudrate = ixl_max_aq_speed_to_value(pf->link_speed); if_link_state_change(ifp, LINK_STATE_UP); ixl_link_up_msg(pf); } } else { /* Link down */ if (vsi->link_active == TRUE) { if (bootverbose) device_printf(dev, "Link is Down\n"); if_link_state_change(ifp, LINK_STATE_DOWN); vsi->link_active = FALSE; } } return; } /********************************************************************* * * This routine disables all traffic on the adapter by issuing a * global reset on the MAC and deallocates TX/RX buffers. * **********************************************************************/ void ixl_stop_locked(struct ixl_pf *pf) { struct ixl_vsi *vsi = &pf->vsi; struct ifnet *ifp = vsi->ifp; INIT_DEBUGOUT("ixl_stop: begin\n"); IXL_PF_LOCK_ASSERT(pf); #ifdef IXL_IW /* Stop iWARP device */ if (ixl_enable_iwarp && pf->iw_enabled) ixl_iw_pf_stop(pf); #endif /* Stop the local timer */ callout_stop(&pf->timer); ixl_disable_rings_intr(vsi); ixl_disable_rings(vsi); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING); } void ixl_stop(struct ixl_pf *pf) { IXL_PF_LOCK(pf); ixl_stop_locked(pf); IXL_PF_UNLOCK(pf); } /********************************************************************* * * Setup MSIX Interrupt resources and handlers for the VSI * **********************************************************************/ int ixl_setup_legacy(struct ixl_pf *pf) { device_t dev = pf->dev; int error, rid = 0; if (pf->msix == 1) rid = 1; pf->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (pf->res == NULL) { device_printf(dev, "bus_alloc_resource_any() for" " legacy/msi interrupt\n"); return (ENXIO); } /* Set the handler function */ error = bus_setup_intr(dev, pf->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixl_intr, pf, &pf->tag); if (error) { pf->res = NULL; device_printf(dev, "bus_setup_intr() for legacy/msi" " interrupt handler failed, error %d\n", error); return (ENXIO); } error = bus_describe_intr(dev, pf->res, pf->tag, "irq"); if (error) { /* non-fatal */ device_printf(dev, "bus_describe_intr() for Admin Queue" " interrupt name failed, error %d\n", error); } return (0); } int ixl_setup_adminq_tq(struct ixl_pf *pf) { device_t dev = pf->dev; int error = 0; /* Tasklet for Admin Queue interrupts */ TASK_INIT(&pf->adminq, 0, ixl_do_adminq, pf); #ifdef PCI_IOV /* VFLR Tasklet */ TASK_INIT(&pf->vflr_task, 0, ixl_handle_vflr, pf); #endif /* Create and start Admin Queue taskqueue */ pf->tq = taskqueue_create_fast("ixl_aq", M_NOWAIT, taskqueue_thread_enqueue, &pf->tq); if (!pf->tq) { device_printf(dev, "taskqueue_create_fast (for AQ) returned NULL!\n"); return (ENOMEM); } error = taskqueue_start_threads(&pf->tq, 1, PI_NET, "%s aq", device_get_nameunit(dev)); if (error) { device_printf(dev, "taskqueue_start_threads (for AQ) error: %d\n", error); taskqueue_free(pf->tq); return (error); } return (0); } int ixl_setup_queue_tqs(struct ixl_vsi *vsi) { struct ixl_queue *que = vsi->queues; device_t dev = vsi->dev; #ifdef RSS int cpu_id = 0; cpuset_t cpu_mask; #endif /* Create queue tasks and start queue taskqueues */ for (int i = 0; i < vsi->num_queues; i++, que++) { TASK_INIT(&que->tx_task, 0, ixl_deferred_mq_start, que); TASK_INIT(&que->task, 0, ixl_handle_que, que); que->tq = taskqueue_create_fast("ixl_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); #ifdef RSS CPU_SETOF(cpu_id, &cpu_mask); taskqueue_start_threads_cpuset(&que->tq, 1, PI_NET, &cpu_mask, "%s (bucket %d)", device_get_nameunit(dev), cpu_id); #else taskqueue_start_threads(&que->tq, 1, PI_NET, "%s (que %d)", device_get_nameunit(dev), que->me); #endif } return (0); } void ixl_free_adminq_tq(struct ixl_pf *pf) { if (pf->tq) { taskqueue_free(pf->tq); pf->tq = NULL; } } void ixl_free_queue_tqs(struct ixl_vsi *vsi) { struct ixl_queue *que = vsi->queues; for (int i = 0; i < vsi->num_queues; i++, que++) { if (que->tq) { taskqueue_free(que->tq); que->tq = NULL; } } } int ixl_setup_adminq_msix(struct ixl_pf *pf) { device_t dev = pf->dev; int rid, error = 0; /* Admin IRQ rid is 1, vector is 0 */ rid = 1; /* Get interrupt resource from bus */ pf->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!pf->res) { device_printf(dev, "bus_alloc_resource_any() for Admin Queue" " interrupt failed [rid=%d]\n", rid); return (ENXIO); } /* Then associate interrupt with handler */ error = bus_setup_intr(dev, pf->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixl_msix_adminq, pf, &pf->tag); if (error) { pf->res = NULL; device_printf(dev, "bus_setup_intr() for Admin Queue" " interrupt handler failed, error %d\n", error); return (ENXIO); } error = bus_describe_intr(dev, pf->res, pf->tag, "aq"); if (error) { /* non-fatal */ device_printf(dev, "bus_describe_intr() for Admin Queue" " interrupt name failed, error %d\n", error); } pf->admvec = 0; return (0); } /* * Allocate interrupt resources from bus and associate an interrupt handler * to those for the VSI's queues. */ int ixl_setup_queue_msix(struct ixl_vsi *vsi) { device_t dev = vsi->dev; struct ixl_queue *que = vsi->queues; struct tx_ring *txr; int error, rid, vector = 1; /* Queue interrupt vector numbers start at 1 (adminq intr is 0) */ for (int i = 0; i < vsi->num_queues; i++, vector++, que++) { int cpu_id = i; rid = vector + 1; txr = &que->txr; que->res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE); if (!que->res) { device_printf(dev, "bus_alloc_resource_any() for" " Queue %d interrupt failed [rid=%d]\n", que->me, rid); return (ENXIO); } /* Set the handler function */ error = bus_setup_intr(dev, que->res, INTR_TYPE_NET | INTR_MPSAFE, NULL, ixl_msix_que, que, &que->tag); if (error) { device_printf(dev, "bus_setup_intr() for Queue %d" " interrupt handler failed, error %d\n", que->me, error); bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); return (error); } error = bus_describe_intr(dev, que->res, que->tag, "q%d", i); if (error) { device_printf(dev, "bus_describe_intr() for Queue %d" " interrupt name failed, error %d\n", que->me, error); } /* Bind the vector to a CPU */ #ifdef RSS cpu_id = rss_getcpu(i % rss_getnumbuckets()); #endif error = bus_bind_intr(dev, que->res, cpu_id); if (error) { device_printf(dev, "bus_bind_intr() for Queue %d" " to CPU %d failed, error %d\n", que->me, cpu_id, error); } que->msix = vector; } return (0); } /* * When used in a virtualized environment PCI BUSMASTER capability may not be set * so explicity set it here and rewrite the ENABLE in the MSIX control register * at this point to cause the host to successfully initialize us. */ void ixl_set_busmaster(device_t dev) { u16 pci_cmd_word; pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); pci_cmd_word |= PCIM_CMD_BUSMASTEREN; pci_write_config(dev, PCIR_COMMAND, pci_cmd_word, 2); } /* * rewrite the ENABLE in the MSIX control register * to cause the host to successfully initialize us. */ void ixl_set_msix_enable(device_t dev) { int msix_ctrl, rid; pci_find_cap(dev, PCIY_MSIX, &rid); rid += PCIR_MSIX_CTRL; msix_ctrl = pci_read_config(dev, rid, 2); msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; pci_write_config(dev, rid, msix_ctrl, 2); } /* * Allocate MSI/X vectors from the OS. * Returns 0 for legacy, 1 for MSI, >1 for MSIX. */ int ixl_init_msix(struct ixl_pf *pf) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; int auto_max_queues; int rid, want, vectors, queues, available; #ifdef IXL_IW int iw_want, iw_vectors; pf->iw_msix = 0; #endif /* Override by tuneable */ if (!pf->enable_msix) goto no_msix; /* Ensure proper operation in virtualized environment */ ixl_set_busmaster(dev); /* First try MSI/X */ rid = PCIR_BAR(IXL_MSIX_BAR); pf->msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!pf->msix_mem) { /* May not be enabled */ device_printf(pf->dev, "Unable to map MSIX table\n"); goto no_msix; } available = pci_msix_count(dev); if (available < 2) { /* system has msix disabled (0), or only one vector (1) */ bus_release_resource(dev, SYS_RES_MEMORY, rid, pf->msix_mem); pf->msix_mem = NULL; goto no_msix; } /* Clamp max number of queues based on: * - # of MSI-X vectors available * - # of cpus available * - # of queues that can be assigned to the LAN VSI */ auto_max_queues = min(mp_ncpus, available - 1); if (hw->mac.type == I40E_MAC_X722) auto_max_queues = min(auto_max_queues, 128); else auto_max_queues = min(auto_max_queues, 64); /* Override with tunable value if tunable is less than autoconfig count */ if ((pf->max_queues != 0) && (pf->max_queues <= auto_max_queues)) queues = pf->max_queues; /* Use autoconfig amount if that's lower */ else if ((pf->max_queues != 0) && (pf->max_queues > auto_max_queues)) { device_printf(dev, "ixl_max_queues (%d) is too large, using " "autoconfig amount (%d)...\n", pf->max_queues, auto_max_queues); queues = auto_max_queues; } /* Limit maximum auto-configured queues to 8 if no user value is set */ else queues = min(auto_max_queues, 8); #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ if (queues > rss_getnumbuckets()) queues = rss_getnumbuckets(); #endif /* ** Want one vector (RX/TX pair) per queue ** plus an additional for the admin queue. */ want = queues + 1; if (want <= available) /* Have enough */ vectors = want; else { device_printf(pf->dev, "MSIX Configuration Problem, " "%d vectors available but %d wanted!\n", available, want); pf->msix_mem = NULL; goto no_msix; /* Will go to Legacy setup */ } #ifdef IXL_IW if (ixl_enable_iwarp) { /* iWARP wants additional vector for CQP */ iw_want = mp_ncpus + 1; available -= vectors; if (available > 0) { iw_vectors = (available >= iw_want) ? iw_want : available; vectors += iw_vectors; } else iw_vectors = 0; } #endif ixl_set_msix_enable(dev); if (pci_alloc_msix(dev, &vectors) == 0) { device_printf(pf->dev, "Using MSIX interrupts with %d vectors\n", vectors); pf->msix = vectors; #ifdef IXL_IW if (ixl_enable_iwarp) pf->iw_msix = iw_vectors; #endif pf->vsi.num_queues = queues; #ifdef RSS /* * If we're doing RSS, the number of queues needs to * match the number of RSS buckets that are configured. * * + If there's more queues than RSS buckets, we'll end * up with queues that get no traffic. * * + If there's more RSS buckets than queues, we'll end * up having multiple RSS buckets map to the same queue, * so there'll be some contention. */ if (queues != rss_getnumbuckets()) { device_printf(dev, "%s: queues (%d) != RSS buckets (%d)" "; performance will be impacted.\n", __func__, queues, rss_getnumbuckets()); } #endif return (vectors); } no_msix: vectors = pci_msi_count(dev); pf->vsi.num_queues = 1; pf->max_queues = 1; if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) device_printf(pf->dev, "Using an MSI interrupt\n"); else { vectors = 0; device_printf(pf->dev, "Using a Legacy interrupt\n"); } return (vectors); } /* * Configure admin queue/misc interrupt cause registers in hardware. */ void ixl_configure_intr0_msix(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; u32 reg; /* First set up the adminq - vector 0 */ wr32(hw, I40E_PFINT_ICR0_ENA, 0); /* disable all */ rd32(hw, I40E_PFINT_ICR0); /* read to clear */ reg = I40E_PFINT_ICR0_ENA_ECC_ERR_MASK | I40E_PFINT_ICR0_ENA_GRST_MASK | I40E_PFINT_ICR0_ENA_HMC_ERR_MASK | I40E_PFINT_ICR0_ENA_ADMINQ_MASK | I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK | I40E_PFINT_ICR0_ENA_VFLR_MASK | I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); /* * 0x7FF is the end of the queue list. * This means we won't use MSI-X vector 0 for a queue interrupt * in MSIX mode. */ wr32(hw, I40E_PFINT_LNKLST0, 0x7FF); /* Value is in 2 usec units, so 0x3E is 62*2 = 124 usecs. */ wr32(hw, I40E_PFINT_ITR0(IXL_RX_ITR), 0x3E); wr32(hw, I40E_PFINT_DYN_CTL0, I40E_PFINT_DYN_CTL0_SW_ITR_INDX_MASK | I40E_PFINT_DYN_CTL0_INTENA_MSK_MASK); wr32(hw, I40E_PFINT_STAT_CTL0, 0); } /* * Configure queue interrupt cause registers in hardware. */ void ixl_configure_queue_intr_msix(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; u32 reg; u16 vector = 1; for (int i = 0; i < vsi->num_queues; i++, vector++) { wr32(hw, I40E_PFINT_DYN_CTLN(i), 0); /* First queue type is RX / 0 */ wr32(hw, I40E_PFINT_LNKLSTN(i), i); reg = I40E_QINT_RQCTL_CAUSE_ENA_MASK | (IXL_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | (vector << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) | (i << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_RQCTL(i), reg); reg = I40E_QINT_TQCTL_CAUSE_ENA_MASK | (IXL_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (vector << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) | (IXL_QUEUE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT) | (I40E_QUEUE_TYPE_RX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_TQCTL(i), reg); } } /* * Configure for MSI single vector operation */ void ixl_configure_legacy(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *que = vsi->queues; struct rx_ring *rxr = &que->rxr; struct tx_ring *txr = &que->txr; u32 reg; /* Configure ITR */ vsi->tx_itr_setting = pf->tx_itr; wr32(hw, I40E_PFINT_ITR0(IXL_TX_ITR), vsi->tx_itr_setting); txr->itr = vsi->tx_itr_setting; vsi->rx_itr_setting = pf->rx_itr; wr32(hw, I40E_PFINT_ITR0(IXL_RX_ITR), vsi->rx_itr_setting); rxr->itr = vsi->rx_itr_setting; /* Setup "other" causes */ reg = I40E_PFINT_ICR0_ENA_ECC_ERR_MASK | I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK | I40E_PFINT_ICR0_ENA_GRST_MASK | I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK | I40E_PFINT_ICR0_ENA_GPIO_MASK | I40E_PFINT_ICR0_ENA_LINK_STAT_CHANGE_MASK | I40E_PFINT_ICR0_ENA_HMC_ERR_MASK | I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK | I40E_PFINT_ICR0_ENA_VFLR_MASK | I40E_PFINT_ICR0_ENA_ADMINQ_MASK ; wr32(hw, I40E_PFINT_ICR0_ENA, reg); /* No ITR for non-queue interrupts */ wr32(hw, I40E_PFINT_STAT_CTL0, IXL_ITR_NONE << I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT); /* FIRSTQ_INDX = 0, FIRSTQ_TYPE = 0 (rx) */ wr32(hw, I40E_PFINT_LNKLST0, 0); /* Associate the queue pair to the vector and enable the q int */ reg = I40E_QINT_RQCTL_CAUSE_ENA_MASK | (IXL_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) | (I40E_QUEUE_TYPE_TX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT); wr32(hw, I40E_QINT_RQCTL(0), reg); reg = I40E_QINT_TQCTL_CAUSE_ENA_MASK | (IXL_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) | (IXL_QUEUE_EOL << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT); wr32(hw, I40E_QINT_TQCTL(0), reg); } int ixl_allocate_pci_resources(struct ixl_pf *pf) { int rid; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; /* Map BAR0 */ rid = PCIR_BAR(0); pf->pci_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (!(pf->pci_mem)) { device_printf(dev, "Unable to allocate bus resource: PCI memory\n"); return (ENXIO); } /* Save off the PCI information */ hw->vendor_id = pci_get_vendor(dev); hw->device_id = pci_get_device(dev); hw->revision_id = pci_read_config(dev, PCIR_REVID, 1); hw->subsystem_vendor_id = pci_read_config(dev, PCIR_SUBVEND_0, 2); hw->subsystem_device_id = pci_read_config(dev, PCIR_SUBDEV_0, 2); hw->bus.device = pci_get_slot(dev); hw->bus.func = pci_get_function(dev); /* Save off register access information */ pf->osdep.mem_bus_space_tag = rman_get_bustag(pf->pci_mem); pf->osdep.mem_bus_space_handle = rman_get_bushandle(pf->pci_mem); pf->osdep.mem_bus_space_size = rman_get_size(pf->pci_mem); pf->osdep.flush_reg = I40E_GLGEN_STAT; pf->hw.hw_addr = (u8 *) &pf->osdep.mem_bus_space_handle; pf->hw.back = &pf->osdep; return (0); } /* * Teardown and release the admin queue/misc vector * interrupt. */ int ixl_teardown_adminq_msix(struct ixl_pf *pf) { device_t dev = pf->dev; int rid, error = 0; if (pf->admvec) /* we are doing MSIX */ rid = pf->admvec + 1; else (pf->msix != 0) ? (rid = 1):(rid = 0); if (pf->tag != NULL) { bus_teardown_intr(dev, pf->res, pf->tag); if (error) { device_printf(dev, "bus_teardown_intr() for" " interrupt 0 failed\n"); // return (ENXIO); } pf->tag = NULL; } if (pf->res != NULL) { bus_release_resource(dev, SYS_RES_IRQ, rid, pf->res); if (error) { device_printf(dev, "bus_release_resource() for" " interrupt 0 failed [rid=%d]\n", rid); // return (ENXIO); } pf->res = NULL; } return (0); } int ixl_teardown_queue_msix(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct ixl_queue *que = vsi->queues; device_t dev = vsi->dev; int rid, error = 0; /* We may get here before stations are setup */ if ((pf->msix < 2) || (que == NULL)) return (0); /* Release all MSIX queue resources */ for (int i = 0; i < vsi->num_queues; i++, que++) { rid = que->msix + 1; if (que->tag != NULL) { error = bus_teardown_intr(dev, que->res, que->tag); if (error) { device_printf(dev, "bus_teardown_intr() for" " Queue %d interrupt failed\n", que->me); // return (ENXIO); } que->tag = NULL; } if (que->res != NULL) { error = bus_release_resource(dev, SYS_RES_IRQ, rid, que->res); if (error) { device_printf(dev, "bus_release_resource() for" " Queue %d interrupt failed [rid=%d]\n", que->me, rid); // return (ENXIO); } que->res = NULL; } } return (0); } void ixl_free_pci_resources(struct ixl_pf *pf) { device_t dev = pf->dev; int memrid; ixl_teardown_queue_msix(&pf->vsi); ixl_teardown_adminq_msix(pf); if (pf->msix > 0) pci_release_msi(dev); memrid = PCIR_BAR(IXL_MSIX_BAR); if (pf->msix_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, memrid, pf->msix_mem); if (pf->pci_mem != NULL) bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(0), pf->pci_mem); return; } void ixl_add_ifmedia(struct ixl_vsi *vsi, u64 phy_types) { /* Display supported media types */ if (phy_types & (I40E_CAP_PHY_TYPE_100BASE_TX)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_100_TX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_1000BASE_T)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_1000_T, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_1000BASE_SX)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_1000_SX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_1000BASE_LX)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_1000_LX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_XAUI) || phy_types & (I40E_CAP_PHY_TYPE_XFI) || phy_types & (I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_TWINAX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_SR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_SR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_LR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_LR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_T)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_T, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_40GBASE_CR4) || phy_types & (I40E_CAP_PHY_TYPE_40GBASE_CR4_CU) || phy_types & (I40E_CAP_PHY_TYPE_40GBASE_AOC) || phy_types & (I40E_CAP_PHY_TYPE_XLAUI) || phy_types & (I40E_CAP_PHY_TYPE_40GBASE_KR4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_CR4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_40GBASE_SR4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_SR4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_40GBASE_LR4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_LR4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_1000BASE_KX)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_1000_KX, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_CR1_CU) || phy_types & (I40E_CAP_PHY_TYPE_10GBASE_CR1)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_CR1, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_AOC)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_TWINAX_LONG, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_SFI)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_SFI, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_KX4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_KX4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_10GBASE_KR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_10G_KR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_20GBASE_KR2)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_20G_KR2, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_40GBASE_KR4)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_KR4, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_XLPPI)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_40G_XLPPI, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_KR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_25G_KR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_CR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_25G_CR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_SR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_25G_SR, 0, NULL); if (phy_types & (I40E_CAP_PHY_TYPE_25GBASE_LR)) ifmedia_add(&vsi->media, IFM_ETHER | IFM_UNKNOWN, 0, NULL); } /********************************************************************* * * Setup networking device structure and register an interface. * **********************************************************************/ int ixl_setup_interface(device_t dev, struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct ifnet *ifp; struct i40e_hw *hw = vsi->hw; struct ixl_queue *que = vsi->queues; struct i40e_aq_get_phy_abilities_resp abilities; enum i40e_status_code aq_error = 0; INIT_DEBUGOUT("ixl_setup_interface: begin"); ifp = vsi->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (-1); } if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_mtu = ETHERMTU; ifp->if_init = ixl_init; ifp->if_softc = vsi; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = ixl_ioctl; #if __FreeBSD_version >= 1100036 if_setgetcounterfn(ifp, ixl_get_counter); #endif ifp->if_transmit = ixl_mq_start; ifp->if_qflush = ixl_qflush; ifp->if_snd.ifq_maxlen = que->num_desc - 2; vsi->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; /* Set TSO limits */ ifp->if_hw_tsomax = IP_MAXPACKET - (ETHER_HDR_LEN + ETHER_CRC_LEN); ifp->if_hw_tsomaxsegcount = IXL_MAX_TSO_SEGS; ifp->if_hw_tsomaxsegsize = PAGE_SIZE; /* * Tell the upper layer(s) we support long frames. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_HWCSUM; ifp->if_capabilities |= IFCAP_HWCSUM_IPV6; ifp->if_capabilities |= IFCAP_TSO; ifp->if_capabilities |= IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_LRO; /* VLAN capabilties */ ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWTSO | IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM; ifp->if_capenable = ifp->if_capabilities; /* ** Don't turn this on by default, if vlans are ** created on another pseudo device (eg. lagg) ** then vlan events are not passed thru, breaking ** operation, but with HW FILTER off it works. If ** using vlans directly on the ixl driver you can ** enable this and get full hardware tag filtering. */ ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; /* * Specify the media types supported by this adapter and register * callbacks to update media and link information */ ifmedia_init(&vsi->media, IFM_IMASK, ixl_media_change, ixl_media_status); aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, TRUE, &abilities, NULL); /* May need delay to detect fiber correctly */ if (aq_error == I40E_ERR_UNKNOWN_PHY) { i40e_msec_delay(200); aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, TRUE, &abilities, NULL); } if (aq_error) { if (aq_error == I40E_ERR_UNKNOWN_PHY) device_printf(dev, "Unknown PHY type detected!\n"); else device_printf(dev, "Error getting supported media types, err %d," " AQ error %d\n", aq_error, hw->aq.asq_last_status); return (0); } pf->supported_speeds = abilities.link_speed; ifp->if_baudrate = ixl_max_aq_speed_to_value(pf->supported_speeds); ixl_add_ifmedia(vsi, hw->phy.phy_types); /* Use autoselect media by default */ ifmedia_add(&vsi->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&vsi->media, IFM_ETHER | IFM_AUTO); ether_ifattach(ifp, hw->mac.addr); return (0); } /* ** Run when the Admin Queue gets a link state change interrupt. */ void ixl_link_event(struct ixl_pf *pf, struct i40e_arq_event_info *e) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct i40e_aqc_get_link_status *status = (struct i40e_aqc_get_link_status *)&e->desc.params.raw; /* Request link status from adapter */ hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &pf->link_up); /* Print out message if an unqualified module is found */ if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) && (!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) && (!(status->link_info & I40E_AQ_LINK_UP))) device_printf(dev, "Link failed because " "an unqualified module was detected!\n"); /* Update OS link info */ ixl_update_link_status(pf); } /********************************************************************* * * Get Firmware Switch configuration * - this will need to be more robust when more complex * switch configurations are enabled. * **********************************************************************/ int ixl_switch_config(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; device_t dev = vsi->dev; struct i40e_aqc_get_switch_config_resp *sw_config; u8 aq_buf[I40E_AQ_LARGE_BUF]; int ret; u16 next = 0; memset(&aq_buf, 0, sizeof(aq_buf)); sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf; ret = i40e_aq_get_switch_config(hw, sw_config, sizeof(aq_buf), &next, NULL); if (ret) { device_printf(dev, "aq_get_switch_config() failed, error %d," " aq_error %d\n", ret, pf->hw.aq.asq_last_status); return (ret); } if (pf->dbg_mask & IXL_DBG_SWITCH_INFO) { device_printf(dev, "Switch config: header reported: %d in structure, %d total\n", sw_config->header.num_reported, sw_config->header.num_total); for (int i = 0; i < sw_config->header.num_reported; i++) { device_printf(dev, "-> %d: type=%d seid=%d uplink=%d downlink=%d\n", i, sw_config->element[i].element_type, sw_config->element[i].seid, sw_config->element[i].uplink_seid, sw_config->element[i].downlink_seid); } } /* Simplified due to a single VSI */ vsi->uplink_seid = sw_config->element[0].uplink_seid; vsi->downlink_seid = sw_config->element[0].downlink_seid; vsi->seid = sw_config->element[0].seid; return (ret); } /********************************************************************* * * Initialize the VSI: this handles contexts, which means things * like the number of descriptors, buffer size, * plus we init the rings thru this function. * **********************************************************************/ int ixl_initialize_vsi(struct ixl_vsi *vsi) { struct ixl_pf *pf = vsi->back; struct ixl_queue *que = vsi->queues; device_t dev = vsi->dev; struct i40e_hw *hw = vsi->hw; struct i40e_vsi_context ctxt; int tc_queues; int err = 0; memset(&ctxt, 0, sizeof(ctxt)); ctxt.seid = vsi->seid; if (pf->veb_seid != 0) ctxt.uplink_seid = pf->veb_seid; ctxt.pf_num = hw->pf_id; err = i40e_aq_get_vsi_params(hw, &ctxt, NULL); if (err) { device_printf(dev, "i40e_aq_get_vsi_params() failed, error %d" " aq_error %d\n", err, hw->aq.asq_last_status); return (err); } ixl_dbg(pf, IXL_DBG_SWITCH_INFO, "get_vsi_params: seid: %d, uplinkseid: %d, vsi_number: %d, " "vsis_allocated: %d, vsis_unallocated: %d, flags: 0x%x, " "pfnum: %d, vfnum: %d, stat idx: %d, enabled: %d\n", ctxt.seid, ctxt.uplink_seid, ctxt.vsi_number, ctxt.vsis_allocated, ctxt.vsis_unallocated, ctxt.flags, ctxt.pf_num, ctxt.vf_num, ctxt.info.stat_counter_idx, ctxt.info.up_enable_bits); /* ** Set the queue and traffic class bits ** - when multiple traffic classes are supported ** this will need to be more robust. */ ctxt.info.valid_sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; ctxt.info.mapping_flags |= I40E_AQ_VSI_QUE_MAP_CONTIG; /* In contig mode, que_mapping[0] is first queue index used by this VSI */ ctxt.info.queue_mapping[0] = 0; /* * This VSI will only use traffic class 0; start traffic class 0's * queue allocation at queue 0, and assign it 2^tc_queues queues (though * the driver may not use all of them). */ tc_queues = bsrl(pf->qtag.num_allocated); ctxt.info.tc_mapping[0] = ((0 << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) & I40E_AQ_VSI_TC_QUE_OFFSET_MASK) | ((tc_queues << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT) & I40E_AQ_VSI_TC_QUE_NUMBER_MASK); /* Set VLAN receive stripping mode */ ctxt.info.valid_sections |= I40E_AQ_VSI_PROP_VLAN_VALID; ctxt.info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL; if (vsi->ifp->if_capenable & IFCAP_VLAN_HWTAGGING) ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_EMOD_STR_BOTH; else ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_EMOD_NOTHING; #ifdef IXL_IW /* Set TCP Enable for iWARP capable VSI */ if (ixl_enable_iwarp && pf->iw_enabled) { ctxt.info.valid_sections |= htole16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID); ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA; } #endif /* Save VSI number and info for use later */ vsi->vsi_num = ctxt.vsi_number; bcopy(&ctxt.info, &vsi->info, sizeof(vsi->info)); /* Reset VSI statistics */ ixl_vsi_reset_stats(vsi); vsi->hw_filters_add = 0; vsi->hw_filters_del = 0; ctxt.flags = htole16(I40E_AQ_VSI_TYPE_PF); err = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (err) { device_printf(dev, "i40e_aq_update_vsi_params() failed, error %d," " aq_error %d\n", err, hw->aq.asq_last_status); return (err); } for (int i = 0; i < vsi->num_queues; i++, que++) { struct tx_ring *txr = &que->txr; struct rx_ring *rxr = &que->rxr; struct i40e_hmc_obj_txq tctx; struct i40e_hmc_obj_rxq rctx; u32 txctl; u16 size; /* Setup the HMC TX Context */ size = que->num_desc * sizeof(struct i40e_tx_desc); memset(&tctx, 0, sizeof(struct i40e_hmc_obj_txq)); tctx.new_context = 1; tctx.base = (txr->dma.pa/IXL_TX_CTX_BASE_UNITS); tctx.qlen = que->num_desc; tctx.fc_ena = 0; tctx.rdylist = vsi->info.qs_handle[0]; /* index is TC */ /* Enable HEAD writeback */ tctx.head_wb_ena = 1; tctx.head_wb_addr = txr->dma.pa + (que->num_desc * sizeof(struct i40e_tx_desc)); tctx.rdylist_act = 0; err = i40e_clear_lan_tx_queue_context(hw, i); if (err) { device_printf(dev, "Unable to clear TX context\n"); break; } err = i40e_set_lan_tx_queue_context(hw, i, &tctx); if (err) { device_printf(dev, "Unable to set TX context\n"); break; } /* Associate the ring with this PF */ txctl = I40E_QTX_CTL_PF_QUEUE; txctl |= ((hw->pf_id << I40E_QTX_CTL_PF_INDX_SHIFT) & I40E_QTX_CTL_PF_INDX_MASK); wr32(hw, I40E_QTX_CTL(i), txctl); ixl_flush(hw); /* Do ring (re)init */ ixl_init_tx_ring(que); /* Next setup the HMC RX Context */ if (vsi->max_frame_size <= MCLBYTES) rxr->mbuf_sz = MCLBYTES; else rxr->mbuf_sz = MJUMPAGESIZE; u16 max_rxmax = rxr->mbuf_sz * hw->func_caps.rx_buf_chain_len; /* Set up an RX context for the HMC */ memset(&rctx, 0, sizeof(struct i40e_hmc_obj_rxq)); rctx.dbuff = rxr->mbuf_sz >> I40E_RXQ_CTX_DBUFF_SHIFT; /* ignore header split for now */ rctx.hbuff = 0 >> I40E_RXQ_CTX_HBUFF_SHIFT; rctx.rxmax = (vsi->max_frame_size < max_rxmax) ? vsi->max_frame_size : max_rxmax; rctx.dtype = 0; rctx.dsize = 1; /* do 32byte descriptors */ rctx.hsplit_0 = 0; /* no HDR split initially */ rctx.base = (rxr->dma.pa/IXL_RX_CTX_BASE_UNITS); rctx.qlen = que->num_desc; rctx.tphrdesc_ena = 1; rctx.tphwdesc_ena = 1; rctx.tphdata_ena = 0; rctx.tphhead_ena = 0; rctx.lrxqthresh = 2; rctx.crcstrip = 1; rctx.l2tsel = 1; rctx.showiv = 1; rctx.fc_ena = 0; rctx.prefena = 1; err = i40e_clear_lan_rx_queue_context(hw, i); if (err) { device_printf(dev, "Unable to clear RX context %d\n", i); break; } err = i40e_set_lan_rx_queue_context(hw, i, &rctx); if (err) { device_printf(dev, "Unable to set RX context %d\n", i); break; } err = ixl_init_rx_ring(que); if (err) { device_printf(dev, "Fail in init_rx_ring %d\n", i); break; } #ifdef DEV_NETMAP /* preserve queue */ if (vsi->ifp->if_capenable & IFCAP_NETMAP) { struct netmap_adapter *na = NA(vsi->ifp); struct netmap_kring *kring = &na->rx_rings[i]; int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); wr32(vsi->hw, I40E_QRX_TAIL(que->me), t); } else #endif /* DEV_NETMAP */ wr32(vsi->hw, I40E_QRX_TAIL(que->me), que->num_desc - 1); } return (err); } /********************************************************************* * * Free all VSI structs. * **********************************************************************/ void ixl_free_vsi(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct ixl_queue *que = vsi->queues; /* Free station queues */ if (!vsi->queues) goto free_filters; for (int i = 0; i < vsi->num_queues; i++, que++) { struct tx_ring *txr = &que->txr; struct rx_ring *rxr = &que->rxr; if (!mtx_initialized(&txr->mtx)) /* uninitialized */ continue; IXL_TX_LOCK(txr); ixl_free_que_tx(que); if (txr->base) i40e_free_dma_mem(&pf->hw, &txr->dma); IXL_TX_UNLOCK(txr); IXL_TX_LOCK_DESTROY(txr); if (!mtx_initialized(&rxr->mtx)) /* uninitialized */ continue; IXL_RX_LOCK(rxr); ixl_free_que_rx(que); if (rxr->base) i40e_free_dma_mem(&pf->hw, &rxr->dma); IXL_RX_UNLOCK(rxr); IXL_RX_LOCK_DESTROY(rxr); } free(vsi->queues, M_DEVBUF); free_filters: /* Free VSI filter list */ ixl_free_mac_filters(vsi); } void ixl_free_mac_filters(struct ixl_vsi *vsi) { struct ixl_mac_filter *f; while (!SLIST_EMPTY(&vsi->ftl)) { f = SLIST_FIRST(&vsi->ftl); SLIST_REMOVE_HEAD(&vsi->ftl, next); free(f, M_DEVBUF); } } /* * Fill out fields in queue struct and setup tx/rx memory and structs */ static int ixl_setup_queue(struct ixl_queue *que, struct ixl_pf *pf, int index) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct tx_ring *txr = &que->txr; struct rx_ring *rxr = &que->rxr; int error = 0; int rsize, tsize; que->num_desc = pf->ringsz; que->me = index; que->vsi = vsi; txr->que = que; txr->tail = I40E_QTX_TAIL(que->me); /* Initialize the TX lock */ snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)", device_get_nameunit(dev), que->me); mtx_init(&txr->mtx, txr->mtx_name, NULL, MTX_DEF); /* Create the TX descriptor ring */ tsize = roundup2((que->num_desc * sizeof(struct i40e_tx_desc)) + sizeof(u32), DBA_ALIGN); if (i40e_allocate_dma_mem(hw, &txr->dma, i40e_mem_reserved, tsize, DBA_ALIGN)) { device_printf(dev, "Unable to allocate TX Descriptor memory\n"); error = ENOMEM; goto fail; } txr->base = (struct i40e_tx_desc *)txr->dma.va; bzero((void *)txr->base, tsize); /* Now allocate transmit soft structs for the ring */ if (ixl_allocate_tx_data(que)) { device_printf(dev, "Critical Failure setting up TX structures\n"); error = ENOMEM; goto fail; } /* Allocate a buf ring */ txr->br = buf_ring_alloc(DEFAULT_TXBRSZ, M_DEVBUF, M_NOWAIT, &txr->mtx); if (txr->br == NULL) { device_printf(dev, "Critical Failure setting up TX buf ring\n"); error = ENOMEM; goto fail; } rsize = roundup2(que->num_desc * sizeof(union i40e_rx_desc), DBA_ALIGN); rxr->que = que; rxr->tail = I40E_QRX_TAIL(que->me); /* Initialize the RX side lock */ snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)", device_get_nameunit(dev), que->me); mtx_init(&rxr->mtx, rxr->mtx_name, NULL, MTX_DEF); if (i40e_allocate_dma_mem(hw, &rxr->dma, i40e_mem_reserved, rsize, 4096)) { device_printf(dev, "Unable to allocate RX Descriptor memory\n"); error = ENOMEM; goto fail; } rxr->base = (union i40e_rx_desc *)rxr->dma.va; bzero((void *)rxr->base, rsize); /* Allocate receive soft structs for the ring*/ if (ixl_allocate_rx_data(que)) { device_printf(dev, "Critical Failure setting up receive structs\n"); error = ENOMEM; goto fail; } return (0); fail: if (rxr->base) i40e_free_dma_mem(&pf->hw, &rxr->dma); if (mtx_initialized(&rxr->mtx)) mtx_destroy(&rxr->mtx); if (txr->br) { buf_ring_free(txr->br, M_DEVBUF); txr->br = NULL; } if (txr->base) i40e_free_dma_mem(&pf->hw, &txr->dma); if (mtx_initialized(&txr->mtx)) mtx_destroy(&txr->mtx); return (error); } /********************************************************************* * * Allocate memory for the VSI (virtual station interface) and their * associated queues, rings and the descriptors associated with each, * called only once at attach. * **********************************************************************/ int ixl_setup_stations(struct ixl_pf *pf) { device_t dev = pf->dev; struct ixl_vsi *vsi; struct ixl_queue *que; int error = 0; vsi = &pf->vsi; vsi->back = (void *)pf; vsi->hw = &pf->hw; vsi->id = 0; vsi->num_vlans = 0; vsi->back = pf; /* Get memory for the station queues */ if (!(vsi->queues = (struct ixl_queue *) malloc(sizeof(struct ixl_queue) * vsi->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate queue memory\n"); error = ENOMEM; return (error); } /* Then setup each queue */ for (int i = 0; i < vsi->num_queues; i++) { que = &vsi->queues[i]; error = ixl_setup_queue(que, pf, i); if (error) return (error); } return (0); } /* ** Provide a update to the queue RX ** interrupt moderation value. */ void ixl_set_queue_rx_itr(struct ixl_queue *que) { struct ixl_vsi *vsi = que->vsi; struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct i40e_hw *hw = vsi->hw; struct rx_ring *rxr = &que->rxr; u16 rx_itr; u16 rx_latency = 0; int rx_bytes; /* Idle, do nothing */ if (rxr->bytes == 0) return; if (pf->dynamic_rx_itr) { rx_bytes = rxr->bytes/rxr->itr; rx_itr = rxr->itr; /* Adjust latency range */ switch (rxr->latency) { case IXL_LOW_LATENCY: if (rx_bytes > 10) { rx_latency = IXL_AVE_LATENCY; rx_itr = IXL_ITR_20K; } break; case IXL_AVE_LATENCY: if (rx_bytes > 20) { rx_latency = IXL_BULK_LATENCY; rx_itr = IXL_ITR_8K; } else if (rx_bytes <= 10) { rx_latency = IXL_LOW_LATENCY; rx_itr = IXL_ITR_100K; } break; case IXL_BULK_LATENCY: if (rx_bytes <= 20) { rx_latency = IXL_AVE_LATENCY; rx_itr = IXL_ITR_20K; } break; } rxr->latency = rx_latency; if (rx_itr != rxr->itr) { /* do an exponential smoothing */ rx_itr = (10 * rx_itr * rxr->itr) / ((9 * rx_itr) + rxr->itr); rxr->itr = min(rx_itr, IXL_MAX_ITR); wr32(hw, I40E_PFINT_ITRN(IXL_RX_ITR, que->me), rxr->itr); } } else { /* We may have have toggled to non-dynamic */ if (vsi->rx_itr_setting & IXL_ITR_DYNAMIC) vsi->rx_itr_setting = pf->rx_itr; /* Update the hardware if needed */ if (rxr->itr != vsi->rx_itr_setting) { rxr->itr = vsi->rx_itr_setting; wr32(hw, I40E_PFINT_ITRN(IXL_RX_ITR, que->me), rxr->itr); } } rxr->bytes = 0; rxr->packets = 0; return; } /* ** Provide a update to the queue TX ** interrupt moderation value. */ void ixl_set_queue_tx_itr(struct ixl_queue *que) { struct ixl_vsi *vsi = que->vsi; struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct i40e_hw *hw = vsi->hw; struct tx_ring *txr = &que->txr; u16 tx_itr; u16 tx_latency = 0; int tx_bytes; /* Idle, do nothing */ if (txr->bytes == 0) return; if (pf->dynamic_tx_itr) { tx_bytes = txr->bytes/txr->itr; tx_itr = txr->itr; switch (txr->latency) { case IXL_LOW_LATENCY: if (tx_bytes > 10) { tx_latency = IXL_AVE_LATENCY; tx_itr = IXL_ITR_20K; } break; case IXL_AVE_LATENCY: if (tx_bytes > 20) { tx_latency = IXL_BULK_LATENCY; tx_itr = IXL_ITR_8K; } else if (tx_bytes <= 10) { tx_latency = IXL_LOW_LATENCY; tx_itr = IXL_ITR_100K; } break; case IXL_BULK_LATENCY: if (tx_bytes <= 20) { tx_latency = IXL_AVE_LATENCY; tx_itr = IXL_ITR_20K; } break; } txr->latency = tx_latency; if (tx_itr != txr->itr) { /* do an exponential smoothing */ tx_itr = (10 * tx_itr * txr->itr) / ((9 * tx_itr) + txr->itr); txr->itr = min(tx_itr, IXL_MAX_ITR); wr32(hw, I40E_PFINT_ITRN(IXL_TX_ITR, que->me), txr->itr); } } else { /* We may have have toggled to non-dynamic */ if (vsi->tx_itr_setting & IXL_ITR_DYNAMIC) vsi->tx_itr_setting = pf->tx_itr; /* Update the hardware if needed */ if (txr->itr != vsi->tx_itr_setting) { txr->itr = vsi->tx_itr_setting; wr32(hw, I40E_PFINT_ITRN(IXL_TX_ITR, que->me), txr->itr); } } txr->bytes = 0; txr->packets = 0; return; } void ixl_add_vsi_sysctls(struct ixl_pf *pf, struct ixl_vsi *vsi, struct sysctl_ctx_list *ctx, const char *sysctl_name) { struct sysctl_oid *tree; struct sysctl_oid_list *child; struct sysctl_oid_list *vsi_list; tree = device_get_sysctl_tree(pf->dev); child = SYSCTL_CHILDREN(tree); vsi->vsi_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, sysctl_name, CTLFLAG_RD, NULL, "VSI Number"); vsi_list = SYSCTL_CHILDREN(vsi->vsi_node); ixl_add_sysctls_eth_stats(ctx, vsi_list, &vsi->eth_stats); } #ifdef IXL_DEBUG /** * ixl_sysctl_qtx_tail_handler * Retrieves I40E_QTX_TAIL value from hardware * for a sysctl. */ static int ixl_sysctl_qtx_tail_handler(SYSCTL_HANDLER_ARGS) { struct ixl_queue *que; int error; u32 val; que = ((struct ixl_queue *)oidp->oid_arg1); if (!que) return 0; val = rd32(que->vsi->hw, que->txr.tail); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } /** * ixl_sysctl_qrx_tail_handler * Retrieves I40E_QRX_TAIL value from hardware * for a sysctl. */ static int ixl_sysctl_qrx_tail_handler(SYSCTL_HANDLER_ARGS) { struct ixl_queue *que; int error; u32 val; que = ((struct ixl_queue *)oidp->oid_arg1); if (!que) return 0; val = rd32(que->vsi->hw, que->rxr.tail); error = sysctl_handle_int(oidp, &val, 0, req); if (error || !req->newptr) return error; return (0); } #endif /* * Used to set the Tx ITR value for all of the PF LAN VSI's queues. * Writes to the ITR registers immediately. */ static int ixl_sysctl_pf_tx_itr(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; int error = 0; int requested_tx_itr; requested_tx_itr = pf->tx_itr; error = sysctl_handle_int(oidp, &requested_tx_itr, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (pf->dynamic_tx_itr) { device_printf(dev, "Cannot set TX itr value while dynamic TX itr is enabled\n"); return (EINVAL); } if (requested_tx_itr < 0 || requested_tx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid TX itr value; value must be between 0 and %d\n", IXL_MAX_ITR); return (EINVAL); } pf->tx_itr = requested_tx_itr; ixl_configure_tx_itr(pf); return (error); } /* * Used to set the Rx ITR value for all of the PF LAN VSI's queues. * Writes to the ITR registers immediately. */ static int ixl_sysctl_pf_rx_itr(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; int error = 0; int requested_rx_itr; requested_rx_itr = pf->rx_itr; error = sysctl_handle_int(oidp, &requested_rx_itr, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (pf->dynamic_rx_itr) { device_printf(dev, "Cannot set RX itr value while dynamic RX itr is enabled\n"); return (EINVAL); } if (requested_rx_itr < 0 || requested_rx_itr > IXL_MAX_ITR) { device_printf(dev, "Invalid RX itr value; value must be between 0 and %d\n", IXL_MAX_ITR); return (EINVAL); } pf->rx_itr = requested_rx_itr; ixl_configure_rx_itr(pf); return (error); } void ixl_add_hw_stats(struct ixl_pf *pf) { device_t dev = pf->dev; struct ixl_vsi *vsi = &pf->vsi; struct ixl_queue *queues = vsi->queues; struct i40e_hw_port_stats *pf_stats = &pf->stats; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid *tree = device_get_sysctl_tree(dev); struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree); struct sysctl_oid_list *vsi_list; struct sysctl_oid *queue_node; struct sysctl_oid_list *queue_list; struct tx_ring *txr; struct rx_ring *rxr; char queue_namebuf[QUEUE_NAME_LEN]; /* Driver statistics */ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "watchdog_events", CTLFLAG_RD, &pf->watchdog_events, "Watchdog timeouts"); SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "admin_irq", CTLFLAG_RD, &pf->admin_irq, "Admin Queue IRQ Handled"); ixl_add_vsi_sysctls(pf, &pf->vsi, ctx, "pf"); vsi_list = SYSCTL_CHILDREN(pf->vsi.vsi_node); /* Queue statistics */ for (int q = 0; q < vsi->num_queues; q++) { snprintf(queue_namebuf, QUEUE_NAME_LEN, "que%d", q); queue_node = SYSCTL_ADD_NODE(ctx, vsi_list, OID_AUTO, queue_namebuf, CTLFLAG_RD, NULL, "Queue #"); queue_list = SYSCTL_CHILDREN(queue_node); txr = &(queues[q].txr); rxr = &(queues[q].rxr); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "mbuf_defrag_failed", CTLFLAG_RD, &(queues[q].mbuf_defrag_failed), "m_defrag() failed"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "irqs", CTLFLAG_RD, &(queues[q].irqs), "irqs on this queue"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tso_tx", CTLFLAG_RD, &(queues[q].tso), "TSO"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_dmamap_failed", CTLFLAG_RD, &(queues[q].tx_dmamap_failed), "Driver tx dma failure in xmit"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "mss_too_small", CTLFLAG_RD, &(queues[q].mss_too_small), "TSO sends with an MSS less than 64"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &(txr->no_desc), "Queue No Descriptor Available"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &(txr->total_packets), "Queue Packets Transmitted"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, &(txr->tx_bytes), "Queue Bytes Transmitted"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &(rxr->rx_packets), "Queue Packets Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &(rxr->rx_bytes), "Queue Bytes Received"); SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_desc_err", CTLFLAG_RD, &(rxr->desc_errs), "Queue Rx Descriptor Errors"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "rx_itr", CTLFLAG_RD, &(rxr->itr), 0, "Queue Rx ITR Interval"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "tx_itr", CTLFLAG_RD, &(txr->itr), 0, "Queue Tx ITR Interval"); #ifdef IXL_DEBUG SYSCTL_ADD_UQUAD(ctx, queue_list, OID_AUTO, "rx_not_done", CTLFLAG_RD, &(rxr->not_done), "Queue Rx Descriptors not Done"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "rx_next_refresh", CTLFLAG_RD, &(rxr->next_refresh), 0, "Queue Rx Descriptors not Done"); SYSCTL_ADD_UINT(ctx, queue_list, OID_AUTO, "rx_next_check", CTLFLAG_RD, &(rxr->next_check), 0, "Queue Rx Descriptors not Done"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "qtx_tail", CTLTYPE_UINT | CTLFLAG_RD, &queues[q], sizeof(struct ixl_queue), ixl_sysctl_qtx_tail_handler, "IU", "Queue Transmit Descriptor Tail"); SYSCTL_ADD_PROC(ctx, queue_list, OID_AUTO, "qrx_tail", CTLTYPE_UINT | CTLFLAG_RD, &queues[q], sizeof(struct ixl_queue), ixl_sysctl_qrx_tail_handler, "IU", "Queue Receive Descriptor Tail"); #endif } /* MAC stats */ ixl_add_sysctls_mac_stats(ctx, child, pf_stats); } void ixl_add_sysctls_eth_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct i40e_eth_stats *eth_stats) { struct ixl_sysctl_info ctls[] = { {ð_stats->rx_bytes, "good_octets_rcvd", "Good Octets Received"}, {ð_stats->rx_unicast, "ucast_pkts_rcvd", "Unicast Packets Received"}, {ð_stats->rx_multicast, "mcast_pkts_rcvd", "Multicast Packets Received"}, {ð_stats->rx_broadcast, "bcast_pkts_rcvd", "Broadcast Packets Received"}, {ð_stats->rx_discards, "rx_discards", "Discarded RX packets"}, {ð_stats->tx_bytes, "good_octets_txd", "Good Octets Transmitted"}, {ð_stats->tx_unicast, "ucast_pkts_txd", "Unicast Packets Transmitted"}, {ð_stats->tx_multicast, "mcast_pkts_txd", "Multicast Packets Transmitted"}, {ð_stats->tx_broadcast, "bcast_pkts_txd", "Broadcast Packets Transmitted"}, // end {0,0,0} }; struct ixl_sysctl_info *entry = ctls; while (entry->stat != 0) { SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, entry->name, CTLFLAG_RD, entry->stat, entry->description); entry++; } } void ixl_add_sysctls_mac_stats(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct i40e_hw_port_stats *stats) { struct sysctl_oid *stat_node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "mac", CTLFLAG_RD, NULL, "Mac Statistics"); struct sysctl_oid_list *stat_list = SYSCTL_CHILDREN(stat_node); struct i40e_eth_stats *eth_stats = &stats->eth; ixl_add_sysctls_eth_stats(ctx, stat_list, eth_stats); struct ixl_sysctl_info ctls[] = { {&stats->crc_errors, "crc_errors", "CRC Errors"}, {&stats->illegal_bytes, "illegal_bytes", "Illegal Byte Errors"}, {&stats->mac_local_faults, "local_faults", "MAC Local Faults"}, {&stats->mac_remote_faults, "remote_faults", "MAC Remote Faults"}, {&stats->rx_length_errors, "rx_length_errors", "Receive Length Errors"}, /* Packet Reception Stats */ {&stats->rx_size_64, "rx_frames_64", "64 byte frames received"}, {&stats->rx_size_127, "rx_frames_65_127", "65-127 byte frames received"}, {&stats->rx_size_255, "rx_frames_128_255", "128-255 byte frames received"}, {&stats->rx_size_511, "rx_frames_256_511", "256-511 byte frames received"}, {&stats->rx_size_1023, "rx_frames_512_1023", "512-1023 byte frames received"}, {&stats->rx_size_1522, "rx_frames_1024_1522", "1024-1522 byte frames received"}, {&stats->rx_size_big, "rx_frames_big", "1523-9522 byte frames received"}, {&stats->rx_undersize, "rx_undersize", "Undersized packets received"}, {&stats->rx_fragments, "rx_fragmented", "Fragmented packets received"}, {&stats->rx_oversize, "rx_oversized", "Oversized packets received"}, {&stats->rx_jabber, "rx_jabber", "Received Jabber"}, {&stats->checksum_error, "checksum_errors", "Checksum Errors"}, /* Packet Transmission Stats */ {&stats->tx_size_64, "tx_frames_64", "64 byte frames transmitted"}, {&stats->tx_size_127, "tx_frames_65_127", "65-127 byte frames transmitted"}, {&stats->tx_size_255, "tx_frames_128_255", "128-255 byte frames transmitted"}, {&stats->tx_size_511, "tx_frames_256_511", "256-511 byte frames transmitted"}, {&stats->tx_size_1023, "tx_frames_512_1023", "512-1023 byte frames transmitted"}, {&stats->tx_size_1522, "tx_frames_1024_1522", "1024-1522 byte frames transmitted"}, {&stats->tx_size_big, "tx_frames_big", "1523-9522 byte frames transmitted"}, /* Flow control */ {&stats->link_xon_tx, "xon_txd", "Link XON transmitted"}, {&stats->link_xon_rx, "xon_recvd", "Link XON received"}, {&stats->link_xoff_tx, "xoff_txd", "Link XOFF transmitted"}, {&stats->link_xoff_rx, "xoff_recvd", "Link XOFF received"}, /* End */ {0,0,0} }; struct ixl_sysctl_info *entry = ctls; while (entry->stat != 0) { SYSCTL_ADD_UQUAD(ctx, stat_list, OID_AUTO, entry->name, CTLFLAG_RD, entry->stat, entry->description); entry++; } } void ixl_set_rss_key(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; device_t dev = pf->dev; enum i40e_status_code status; #ifdef RSS u32 rss_seed[IXL_RSS_KEY_SIZE_REG]; #else u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687, 0x183cfd8c, 0xce880440, 0x580cbc3c, 0x35897377, 0x328b25e1, 0x4fa98922, 0xb7d90c14, 0xd5bad70d, 0xcd15a2c1, 0x0, 0x0, 0x0}; #endif #ifdef RSS /* Fetch the configured RSS key */ rss_getkey((uint8_t *) &rss_seed); #endif /* Fill out hash function seed */ if (hw->mac.type == I40E_MAC_X722) { struct i40e_aqc_get_set_rss_key_data key_data; bcopy(rss_seed, key_data.standard_rss_key, 40); status = i40e_aq_set_rss_key(hw, vsi->vsi_num, &key_data); if (status) device_printf(dev, "i40e_aq_set_rss_key status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); } else { for (int i = 0; i < IXL_RSS_KEY_SIZE_REG; i++) i40e_write_rx_ctl(hw, I40E_PFQF_HKEY(i), rss_seed[i]); } } /* * Configure enabled PCTYPES for RSS. */ void ixl_set_rss_pctypes(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; u64 set_hena = 0, hena; #ifdef RSS u32 rss_hash_config; rss_hash_config = rss_gethashconfig(); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV4) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER); if (rss_hash_config & RSS_HASHTYPE_RSS_IPV6_EX) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6); if (rss_hash_config & RSS_HASHTYPE_RSS_TCP_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP); if (rss_hash_config & RSS_HASHTYPE_RSS_UDP_IPV6) set_hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP); #else if (hw->mac.type == I40E_MAC_X722) set_hena = IXL_DEFAULT_RSS_HENA_X722; else set_hena = IXL_DEFAULT_RSS_HENA_XL710; #endif hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); hena |= set_hena; i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); } void ixl_set_rss_hlut(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct ixl_vsi *vsi = &pf->vsi; int i, que_id; int lut_entry_width; u32 lut = 0; enum i40e_status_code status; if (hw->mac.type == I40E_MAC_X722) lut_entry_width = 7; else lut_entry_width = pf->hw.func_caps.rss_table_entry_width; /* Populate the LUT with max no. of queues in round robin fashion */ u8 hlut_buf[512]; for (i = 0; i < pf->hw.func_caps.rss_table_size; i++) { #ifdef RSS /* * Fetch the RSS bucket id for the given indirection entry. * Cap it at the number of configured buckets (which is * num_queues.) */ que_id = rss_get_indirection_to_bucket(i); que_id = que_id % vsi->num_queues; #else que_id = i % vsi->num_queues; #endif lut = (que_id & ((0x1 << lut_entry_width) - 1)); hlut_buf[i] = lut; } if (hw->mac.type == I40E_MAC_X722) { status = i40e_aq_set_rss_lut(hw, vsi->vsi_num, TRUE, hlut_buf, sizeof(hlut_buf)); if (status) device_printf(dev, "i40e_aq_set_rss_lut status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); } else { for (i = 0; i < pf->hw.func_caps.rss_table_size >> 2; i++) wr32(hw, I40E_PFQF_HLUT(i), ((u32 *)hlut_buf)[i]); ixl_flush(hw); } } /* ** Setup the PF's RSS parameters. */ void ixl_config_rss(struct ixl_pf *pf) { ixl_set_rss_key(pf); ixl_set_rss_pctypes(pf); ixl_set_rss_hlut(pf); } /* ** This routine is run via an vlan config EVENT, ** it enables us to use the HW Filter table since ** we can get the vlan id. This just creates the ** entry in the soft version of the VFTA, init will ** repopulate the real table. */ void ixl_register_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct ixl_vsi *vsi = ifp->if_softc; struct i40e_hw *hw = vsi->hw; struct ixl_pf *pf = (struct ixl_pf *)vsi->back; if (ifp->if_softc != arg) /* Not our event */ return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IXL_PF_LOCK(pf); ++vsi->num_vlans; ixl_add_filter(vsi, hw->mac.addr, vtag); IXL_PF_UNLOCK(pf); } /* ** This routine is run via an vlan ** unconfig EVENT, remove our entry ** in the soft vfta. */ void ixl_unregister_vlan(void *arg, struct ifnet *ifp, u16 vtag) { struct ixl_vsi *vsi = ifp->if_softc; struct i40e_hw *hw = vsi->hw; struct ixl_pf *pf = (struct ixl_pf *)vsi->back; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) /* Invalid */ return; IXL_PF_LOCK(pf); --vsi->num_vlans; ixl_del_filter(vsi, hw->mac.addr, vtag); IXL_PF_UNLOCK(pf); } /* ** This routine updates vlan filters, called by init ** it scans the filter table and then updates the hw ** after a soft reset. */ void ixl_setup_vlan_filters(struct ixl_vsi *vsi) { struct ixl_mac_filter *f; int cnt = 0, flags; if (vsi->num_vlans == 0) return; /* ** Scan the filter list for vlan entries, ** mark them for addition and then call ** for the AQ update. */ SLIST_FOREACH(f, &vsi->ftl, next) { if (f->flags & IXL_FILTER_VLAN) { f->flags |= (IXL_FILTER_ADD | IXL_FILTER_USED); cnt++; } } if (cnt == 0) { printf("setup vlan: no filters found!\n"); return; } flags = IXL_FILTER_VLAN; flags |= (IXL_FILTER_ADD | IXL_FILTER_USED); ixl_add_hw_filters(vsi, flags, cnt); return; } /* ** Initialize filter list and add filters that the hardware ** needs to know about. ** ** Requires VSI's filter list & seid to be set before calling. */ void ixl_init_filters(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; /* Add broadcast address */ ixl_add_filter(vsi, ixl_bcast_addr, IXL_VLAN_ANY); /* * Prevent Tx flow control frames from being sent out by * non-firmware transmitters. * This affects every VSI in the PF. */ if (pf->enable_tx_fc_filter) i40e_add_filter_to_drop_tx_flow_control_frames(vsi->hw, vsi->seid); } /* ** This routine adds mulicast filters */ void ixl_add_mc_filter(struct ixl_vsi *vsi, u8 *macaddr) { struct ixl_mac_filter *f; /* Does one already exist */ f = ixl_find_filter(vsi, macaddr, IXL_VLAN_ANY); if (f != NULL) return; f = ixl_get_filter(vsi); if (f == NULL) { printf("WARNING: no filter available!!\n"); return; } bcopy(macaddr, f->macaddr, ETHER_ADDR_LEN); f->vlan = IXL_VLAN_ANY; f->flags |= (IXL_FILTER_ADD | IXL_FILTER_USED | IXL_FILTER_MC); return; } void ixl_reconfigure_filters(struct ixl_vsi *vsi) { ixl_add_hw_filters(vsi, IXL_FILTER_USED, vsi->num_macs); } /* ** This routine adds macvlan filters */ void ixl_add_filter(struct ixl_vsi *vsi, const u8 *macaddr, s16 vlan) { struct ixl_mac_filter *f, *tmp; struct ixl_pf *pf; device_t dev; DEBUGOUT("ixl_add_filter: begin"); pf = vsi->back; dev = pf->dev; /* Does one already exist */ f = ixl_find_filter(vsi, macaddr, vlan); if (f != NULL) return; /* ** Is this the first vlan being registered, if so we ** need to remove the ANY filter that indicates we are ** not in a vlan, and replace that with a 0 filter. */ if ((vlan != IXL_VLAN_ANY) && (vsi->num_vlans == 1)) { tmp = ixl_find_filter(vsi, macaddr, IXL_VLAN_ANY); if (tmp != NULL) { ixl_del_filter(vsi, macaddr, IXL_VLAN_ANY); ixl_add_filter(vsi, macaddr, 0); } } f = ixl_get_filter(vsi); if (f == NULL) { device_printf(dev, "WARNING: no filter available!!\n"); return; } bcopy(macaddr, f->macaddr, ETHER_ADDR_LEN); f->vlan = vlan; f->flags |= (IXL_FILTER_ADD | IXL_FILTER_USED); if (f->vlan != IXL_VLAN_ANY) f->flags |= IXL_FILTER_VLAN; else vsi->num_macs++; ixl_add_hw_filters(vsi, f->flags, 1); return; } void ixl_del_filter(struct ixl_vsi *vsi, const u8 *macaddr, s16 vlan) { struct ixl_mac_filter *f; f = ixl_find_filter(vsi, macaddr, vlan); if (f == NULL) return; f->flags |= IXL_FILTER_DEL; ixl_del_hw_filters(vsi, 1); vsi->num_macs--; /* Check if this is the last vlan removal */ if (vlan != IXL_VLAN_ANY && vsi->num_vlans == 0) { /* Switch back to a non-vlan filter */ ixl_del_filter(vsi, macaddr, 0); ixl_add_filter(vsi, macaddr, IXL_VLAN_ANY); } return; } /* ** Find the filter with both matching mac addr and vlan id */ struct ixl_mac_filter * ixl_find_filter(struct ixl_vsi *vsi, const u8 *macaddr, s16 vlan) { struct ixl_mac_filter *f; bool match = FALSE; SLIST_FOREACH(f, &vsi->ftl, next) { if (!cmp_etheraddr(f->macaddr, macaddr)) continue; if (f->vlan == vlan) { match = TRUE; break; } } if (!match) f = NULL; return (f); } /* ** This routine takes additions to the vsi filter ** table and creates an Admin Queue call to create ** the filters in the hardware. */ void ixl_add_hw_filters(struct ixl_vsi *vsi, int flags, int cnt) { struct i40e_aqc_add_macvlan_element_data *a, *b; struct ixl_mac_filter *f; struct ixl_pf *pf; struct i40e_hw *hw; device_t dev; int err, j = 0; pf = vsi->back; dev = pf->dev; hw = &pf->hw; IXL_PF_LOCK_ASSERT(pf); a = malloc(sizeof(struct i40e_aqc_add_macvlan_element_data) * cnt, M_DEVBUF, M_NOWAIT | M_ZERO); if (a == NULL) { device_printf(dev, "add_hw_filters failed to get memory\n"); return; } /* ** Scan the filter list, each time we find one ** we add it to the admin queue array and turn off ** the add bit. */ SLIST_FOREACH(f, &vsi->ftl, next) { if (f->flags == flags) { b = &a[j]; // a pox on fvl long names :) bcopy(f->macaddr, b->mac_addr, ETHER_ADDR_LEN); if (f->vlan == IXL_VLAN_ANY) { b->vlan_tag = 0; b->flags = I40E_AQC_MACVLAN_ADD_IGNORE_VLAN; } else { b->vlan_tag = f->vlan; b->flags = 0; } b->flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH; f->flags &= ~IXL_FILTER_ADD; j++; } if (j == cnt) break; } if (j > 0) { err = i40e_aq_add_macvlan(hw, vsi->seid, a, j, NULL); if (err) device_printf(dev, "aq_add_macvlan err %d, " "aq_error %d\n", err, hw->aq.asq_last_status); else vsi->hw_filters_add += j; } free(a, M_DEVBUF); return; } /* ** This routine takes removals in the vsi filter ** table and creates an Admin Queue call to delete ** the filters in the hardware. */ void ixl_del_hw_filters(struct ixl_vsi *vsi, int cnt) { struct i40e_aqc_remove_macvlan_element_data *d, *e; struct ixl_pf *pf; struct i40e_hw *hw; device_t dev; struct ixl_mac_filter *f, *f_temp; int err, j = 0; DEBUGOUT("ixl_del_hw_filters: begin\n"); pf = vsi->back; hw = &pf->hw; dev = pf->dev; d = malloc(sizeof(struct i40e_aqc_remove_macvlan_element_data) * cnt, M_DEVBUF, M_NOWAIT | M_ZERO); if (d == NULL) { printf("del hw filter failed to get memory\n"); return; } SLIST_FOREACH_SAFE(f, &vsi->ftl, next, f_temp) { if (f->flags & IXL_FILTER_DEL) { e = &d[j]; // a pox on fvl long names :) bcopy(f->macaddr, e->mac_addr, ETHER_ADDR_LEN); e->vlan_tag = (f->vlan == IXL_VLAN_ANY ? 0 : f->vlan); e->flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH; /* delete entry from vsi list */ SLIST_REMOVE(&vsi->ftl, f, ixl_mac_filter, next); free(f, M_DEVBUF); j++; } if (j == cnt) break; } if (j > 0) { err = i40e_aq_remove_macvlan(hw, vsi->seid, d, j, NULL); if (err && hw->aq.asq_last_status != I40E_AQ_RC_ENOENT) { int sc = 0; for (int i = 0; i < j; i++) sc += (!d[i].error_code); vsi->hw_filters_del += sc; device_printf(dev, "Failed to remove %d/%d filters, aq error %d\n", j - sc, j, hw->aq.asq_last_status); } else vsi->hw_filters_del += j; } free(d, M_DEVBUF); DEBUGOUT("ixl_del_hw_filters: end\n"); return; } int ixl_enable_tx_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { struct i40e_hw *hw = &pf->hw; int error = 0; u32 reg; u16 pf_qidx; pf_qidx = ixl_pf_qidx_from_vsi_qidx(qtag, vsi_qidx); ixl_dbg(pf, IXL_DBG_EN_DIS, "Enabling PF TX ring %4d / VSI TX ring %4d...\n", pf_qidx, vsi_qidx); i40e_pre_tx_queue_cfg(hw, pf_qidx, TRUE); reg = rd32(hw, I40E_QTX_ENA(pf_qidx)); reg |= I40E_QTX_ENA_QENA_REQ_MASK | I40E_QTX_ENA_QENA_STAT_MASK; wr32(hw, I40E_QTX_ENA(pf_qidx), reg); /* Verify the enable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QTX_ENA(pf_qidx)); if (reg & I40E_QTX_ENA_QENA_STAT_MASK) break; i40e_msec_delay(10); } if ((reg & I40E_QTX_ENA_QENA_STAT_MASK) == 0) { device_printf(pf->dev, "TX queue %d still disabled!\n", pf_qidx); error = ETIMEDOUT; } return (error); } int ixl_enable_rx_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { struct i40e_hw *hw = &pf->hw; int error = 0; u32 reg; u16 pf_qidx; pf_qidx = ixl_pf_qidx_from_vsi_qidx(qtag, vsi_qidx); ixl_dbg(pf, IXL_DBG_EN_DIS, "Enabling PF RX ring %4d / VSI RX ring %4d...\n", pf_qidx, vsi_qidx); reg = rd32(hw, I40E_QRX_ENA(pf_qidx)); reg |= I40E_QRX_ENA_QENA_REQ_MASK | I40E_QRX_ENA_QENA_STAT_MASK; wr32(hw, I40E_QRX_ENA(pf_qidx), reg); /* Verify the enable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QRX_ENA(pf_qidx)); if (reg & I40E_QRX_ENA_QENA_STAT_MASK) break; i40e_msec_delay(10); } if ((reg & I40E_QRX_ENA_QENA_STAT_MASK) == 0) { device_printf(pf->dev, "RX queue %d still disabled!\n", pf_qidx); error = ETIMEDOUT; } return (error); } int ixl_enable_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { int error = 0; error = ixl_enable_tx_ring(pf, qtag, vsi_qidx); /* Called function already prints error message */ if (error) return (error); error = ixl_enable_rx_ring(pf, qtag, vsi_qidx); return (error); } /* For PF VSI only */ int ixl_enable_rings(struct ixl_vsi *vsi) { struct ixl_pf *pf = vsi->back; int error = 0; for (int i = 0; i < vsi->num_queues; i++) { error = ixl_enable_ring(pf, &pf->qtag, i); if (error) return (error); } return (error); } int ixl_disable_tx_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { struct i40e_hw *hw = &pf->hw; int error = 0; u32 reg; u16 pf_qidx; pf_qidx = ixl_pf_qidx_from_vsi_qidx(qtag, vsi_qidx); i40e_pre_tx_queue_cfg(hw, pf_qidx, FALSE); i40e_usec_delay(500); reg = rd32(hw, I40E_QTX_ENA(pf_qidx)); reg &= ~I40E_QTX_ENA_QENA_REQ_MASK; wr32(hw, I40E_QTX_ENA(pf_qidx), reg); /* Verify the disable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QTX_ENA(pf_qidx)); if (!(reg & I40E_QTX_ENA_QENA_STAT_MASK)) break; i40e_msec_delay(10); } if (reg & I40E_QTX_ENA_QENA_STAT_MASK) { device_printf(pf->dev, "TX queue %d still enabled!\n", pf_qidx); error = ETIMEDOUT; } return (error); } int ixl_disable_rx_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { struct i40e_hw *hw = &pf->hw; int error = 0; u32 reg; u16 pf_qidx; pf_qidx = ixl_pf_qidx_from_vsi_qidx(qtag, vsi_qidx); reg = rd32(hw, I40E_QRX_ENA(pf_qidx)); reg &= ~I40E_QRX_ENA_QENA_REQ_MASK; wr32(hw, I40E_QRX_ENA(pf_qidx), reg); /* Verify the disable took */ for (int j = 0; j < 10; j++) { reg = rd32(hw, I40E_QRX_ENA(pf_qidx)); if (!(reg & I40E_QRX_ENA_QENA_STAT_MASK)) break; i40e_msec_delay(10); } if (reg & I40E_QRX_ENA_QENA_STAT_MASK) { device_printf(pf->dev, "RX queue %d still enabled!\n", pf_qidx); error = ETIMEDOUT; } return (error); } int ixl_disable_ring(struct ixl_pf *pf, struct ixl_pf_qtag *qtag, u16 vsi_qidx) { int error = 0; error = ixl_disable_tx_ring(pf, qtag, vsi_qidx); /* Called function already prints error message */ if (error) return (error); error = ixl_disable_rx_ring(pf, qtag, vsi_qidx); return (error); } /* For PF VSI only */ int ixl_disable_rings(struct ixl_vsi *vsi) { struct ixl_pf *pf = vsi->back; int error = 0; for (int i = 0; i < vsi->num_queues; i++) { error = ixl_disable_ring(pf, &pf->qtag, i); if (error) return (error); } return (error); } /** * ixl_handle_mdd_event * * Called from interrupt handler to identify possibly malicious vfs * (But also detects events from the PF, as well) **/ void ixl_handle_mdd_event(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; bool mdd_detected = false; bool pf_mdd_detected = false; u32 reg; /* find what triggered the MDD event */ reg = rd32(hw, I40E_GL_MDET_TX); if (reg & I40E_GL_MDET_TX_VALID_MASK) { u8 pf_num = (reg & I40E_GL_MDET_TX_PF_NUM_MASK) >> I40E_GL_MDET_TX_PF_NUM_SHIFT; u8 event = (reg & I40E_GL_MDET_TX_EVENT_MASK) >> I40E_GL_MDET_TX_EVENT_SHIFT; u16 queue = (reg & I40E_GL_MDET_TX_QUEUE_MASK) >> I40E_GL_MDET_TX_QUEUE_SHIFT; device_printf(dev, "Malicious Driver Detection event %d" " on TX queue %d, pf number %d\n", event, queue, pf_num); wr32(hw, I40E_GL_MDET_TX, 0xffffffff); mdd_detected = true; } reg = rd32(hw, I40E_GL_MDET_RX); if (reg & I40E_GL_MDET_RX_VALID_MASK) { u8 pf_num = (reg & I40E_GL_MDET_RX_FUNCTION_MASK) >> I40E_GL_MDET_RX_FUNCTION_SHIFT; u8 event = (reg & I40E_GL_MDET_RX_EVENT_MASK) >> I40E_GL_MDET_RX_EVENT_SHIFT; u16 queue = (reg & I40E_GL_MDET_RX_QUEUE_MASK) >> I40E_GL_MDET_RX_QUEUE_SHIFT; device_printf(dev, "Malicious Driver Detection event %d" " on RX queue %d, pf number %d\n", event, queue, pf_num); wr32(hw, I40E_GL_MDET_RX, 0xffffffff); mdd_detected = true; } if (mdd_detected) { reg = rd32(hw, I40E_PF_MDET_TX); if (reg & I40E_PF_MDET_TX_VALID_MASK) { wr32(hw, I40E_PF_MDET_TX, 0xFFFF); device_printf(dev, "MDD TX event is for this function!"); pf_mdd_detected = true; } reg = rd32(hw, I40E_PF_MDET_RX); if (reg & I40E_PF_MDET_RX_VALID_MASK) { wr32(hw, I40E_PF_MDET_RX, 0xFFFF); device_printf(dev, "MDD RX event is for this function!"); pf_mdd_detected = true; } } /* re-enable mdd interrupt cause */ reg = rd32(hw, I40E_PFINT_ICR0_ENA); reg |= I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK; wr32(hw, I40E_PFINT_ICR0_ENA, reg); ixl_flush(hw); } void ixl_enable_intr(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct i40e_hw *hw = vsi->hw; struct ixl_queue *que = vsi->queues; if (pf->msix > 1) { for (int i = 0; i < vsi->num_queues; i++, que++) ixl_enable_queue(hw, que->me); } else ixl_enable_intr0(hw); } void ixl_disable_rings_intr(struct ixl_vsi *vsi) { struct i40e_hw *hw = vsi->hw; struct ixl_queue *que = vsi->queues; for (int i = 0; i < vsi->num_queues; i++, que++) ixl_disable_queue(hw, que->me); } void ixl_enable_intr0(struct i40e_hw *hw) { u32 reg; /* Use IXL_ITR_NONE so ITR isn't updated here */ reg = I40E_PFINT_DYN_CTL0_INTENA_MASK | I40E_PFINT_DYN_CTL0_CLEARPBA_MASK | (IXL_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTL0, reg); } void ixl_disable_intr0(struct i40e_hw *hw) { u32 reg; reg = IXL_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT; wr32(hw, I40E_PFINT_DYN_CTL0, reg); ixl_flush(hw); } void ixl_enable_queue(struct i40e_hw *hw, int id) { u32 reg; reg = I40E_PFINT_DYN_CTLN_INTENA_MASK | I40E_PFINT_DYN_CTLN_CLEARPBA_MASK | (IXL_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT); wr32(hw, I40E_PFINT_DYN_CTLN(id), reg); } void ixl_disable_queue(struct i40e_hw *hw, int id) { u32 reg; reg = IXL_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT; wr32(hw, I40E_PFINT_DYN_CTLN(id), reg); } void ixl_update_stats_counters(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; struct ixl_vf *vf; struct i40e_hw_port_stats *nsd = &pf->stats; struct i40e_hw_port_stats *osd = &pf->stats_offsets; /* Update hw stats */ ixl_stat_update32(hw, I40E_GLPRT_CRCERRS(hw->port), pf->stat_offsets_loaded, &osd->crc_errors, &nsd->crc_errors); ixl_stat_update32(hw, I40E_GLPRT_ILLERRC(hw->port), pf->stat_offsets_loaded, &osd->illegal_bytes, &nsd->illegal_bytes); ixl_stat_update48(hw, I40E_GLPRT_GORCH(hw->port), I40E_GLPRT_GORCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_bytes, &nsd->eth.rx_bytes); ixl_stat_update48(hw, I40E_GLPRT_GOTCH(hw->port), I40E_GLPRT_GOTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_bytes, &nsd->eth.tx_bytes); ixl_stat_update32(hw, I40E_GLPRT_RDPC(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_discards, &nsd->eth.rx_discards); ixl_stat_update48(hw, I40E_GLPRT_UPRCH(hw->port), I40E_GLPRT_UPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_unicast, &nsd->eth.rx_unicast); ixl_stat_update48(hw, I40E_GLPRT_UPTCH(hw->port), I40E_GLPRT_UPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_unicast, &nsd->eth.tx_unicast); ixl_stat_update48(hw, I40E_GLPRT_MPRCH(hw->port), I40E_GLPRT_MPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_multicast, &nsd->eth.rx_multicast); ixl_stat_update48(hw, I40E_GLPRT_MPTCH(hw->port), I40E_GLPRT_MPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_multicast, &nsd->eth.tx_multicast); ixl_stat_update48(hw, I40E_GLPRT_BPRCH(hw->port), I40E_GLPRT_BPRCL(hw->port), pf->stat_offsets_loaded, &osd->eth.rx_broadcast, &nsd->eth.rx_broadcast); ixl_stat_update48(hw, I40E_GLPRT_BPTCH(hw->port), I40E_GLPRT_BPTCL(hw->port), pf->stat_offsets_loaded, &osd->eth.tx_broadcast, &nsd->eth.tx_broadcast); ixl_stat_update32(hw, I40E_GLPRT_TDOLD(hw->port), pf->stat_offsets_loaded, &osd->tx_dropped_link_down, &nsd->tx_dropped_link_down); ixl_stat_update32(hw, I40E_GLPRT_MLFC(hw->port), pf->stat_offsets_loaded, &osd->mac_local_faults, &nsd->mac_local_faults); ixl_stat_update32(hw, I40E_GLPRT_MRFC(hw->port), pf->stat_offsets_loaded, &osd->mac_remote_faults, &nsd->mac_remote_faults); ixl_stat_update32(hw, I40E_GLPRT_RLEC(hw->port), pf->stat_offsets_loaded, &osd->rx_length_errors, &nsd->rx_length_errors); /* Flow control (LFC) stats */ ixl_stat_update32(hw, I40E_GLPRT_LXONRXC(hw->port), pf->stat_offsets_loaded, &osd->link_xon_rx, &nsd->link_xon_rx); ixl_stat_update32(hw, I40E_GLPRT_LXONTXC(hw->port), pf->stat_offsets_loaded, &osd->link_xon_tx, &nsd->link_xon_tx); ixl_stat_update32(hw, I40E_GLPRT_LXOFFRXC(hw->port), pf->stat_offsets_loaded, &osd->link_xoff_rx, &nsd->link_xoff_rx); ixl_stat_update32(hw, I40E_GLPRT_LXOFFTXC(hw->port), pf->stat_offsets_loaded, &osd->link_xoff_tx, &nsd->link_xoff_tx); /* Packet size stats rx */ ixl_stat_update48(hw, I40E_GLPRT_PRC64H(hw->port), I40E_GLPRT_PRC64L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_64, &nsd->rx_size_64); ixl_stat_update48(hw, I40E_GLPRT_PRC127H(hw->port), I40E_GLPRT_PRC127L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_127, &nsd->rx_size_127); ixl_stat_update48(hw, I40E_GLPRT_PRC255H(hw->port), I40E_GLPRT_PRC255L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_255, &nsd->rx_size_255); ixl_stat_update48(hw, I40E_GLPRT_PRC511H(hw->port), I40E_GLPRT_PRC511L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_511, &nsd->rx_size_511); ixl_stat_update48(hw, I40E_GLPRT_PRC1023H(hw->port), I40E_GLPRT_PRC1023L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_1023, &nsd->rx_size_1023); ixl_stat_update48(hw, I40E_GLPRT_PRC1522H(hw->port), I40E_GLPRT_PRC1522L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_1522, &nsd->rx_size_1522); ixl_stat_update48(hw, I40E_GLPRT_PRC9522H(hw->port), I40E_GLPRT_PRC9522L(hw->port), pf->stat_offsets_loaded, &osd->rx_size_big, &nsd->rx_size_big); /* Packet size stats tx */ ixl_stat_update48(hw, I40E_GLPRT_PTC64H(hw->port), I40E_GLPRT_PTC64L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_64, &nsd->tx_size_64); ixl_stat_update48(hw, I40E_GLPRT_PTC127H(hw->port), I40E_GLPRT_PTC127L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_127, &nsd->tx_size_127); ixl_stat_update48(hw, I40E_GLPRT_PTC255H(hw->port), I40E_GLPRT_PTC255L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_255, &nsd->tx_size_255); ixl_stat_update48(hw, I40E_GLPRT_PTC511H(hw->port), I40E_GLPRT_PTC511L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_511, &nsd->tx_size_511); ixl_stat_update48(hw, I40E_GLPRT_PTC1023H(hw->port), I40E_GLPRT_PTC1023L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_1023, &nsd->tx_size_1023); ixl_stat_update48(hw, I40E_GLPRT_PTC1522H(hw->port), I40E_GLPRT_PTC1522L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_1522, &nsd->tx_size_1522); ixl_stat_update48(hw, I40E_GLPRT_PTC9522H(hw->port), I40E_GLPRT_PTC9522L(hw->port), pf->stat_offsets_loaded, &osd->tx_size_big, &nsd->tx_size_big); ixl_stat_update32(hw, I40E_GLPRT_RUC(hw->port), pf->stat_offsets_loaded, &osd->rx_undersize, &nsd->rx_undersize); ixl_stat_update32(hw, I40E_GLPRT_RFC(hw->port), pf->stat_offsets_loaded, &osd->rx_fragments, &nsd->rx_fragments); ixl_stat_update32(hw, I40E_GLPRT_ROC(hw->port), pf->stat_offsets_loaded, &osd->rx_oversize, &nsd->rx_oversize); ixl_stat_update32(hw, I40E_GLPRT_RJC(hw->port), pf->stat_offsets_loaded, &osd->rx_jabber, &nsd->rx_jabber); pf->stat_offsets_loaded = true; /* End hw stats */ /* Update vsi stats */ ixl_update_vsi_stats(vsi); for (int i = 0; i < pf->num_vfs; i++) { vf = &pf->vfs[i]; if (vf->vf_flags & VF_FLAG_ENABLED) ixl_update_eth_stats(&pf->vfs[i].vsi); } } int ixl_rebuild_hw_structs_after_reset(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; struct ixl_vsi *vsi = &pf->vsi; device_t dev = pf->dev; bool is_up = false; int error = 0; is_up = !!(vsi->ifp->if_drv_flags & IFF_DRV_RUNNING); /* Teardown */ if (is_up) ixl_stop(pf); error = i40e_shutdown_lan_hmc(hw); if (error) device_printf(dev, "Shutdown LAN HMC failed with code %d\n", error); ixl_disable_intr0(hw); ixl_teardown_adminq_msix(pf); error = i40e_shutdown_adminq(hw); if (error) device_printf(dev, "Shutdown Admin queue failed with code %d\n", error); /* Setup */ error = i40e_init_adminq(hw); if (error != 0 && error != I40E_ERR_FIRMWARE_API_VERSION) { device_printf(dev, "Unable to initialize Admin Queue, error %d\n", error); } error = ixl_setup_adminq_msix(pf); if (error) { device_printf(dev, "ixl_setup_adminq_msix error: %d\n", error); } ixl_configure_intr0_msix(pf); ixl_enable_intr0(hw); error = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp, hw->func_caps.num_rx_qp, 0, 0); if (error) { device_printf(dev, "init_lan_hmc failed: %d\n", error); } error = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY); if (error) { device_printf(dev, "configure_lan_hmc failed: %d\n", error); } if (is_up) ixl_init(pf); return (0); } void ixl_handle_empr_reset(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int count = 0; u32 reg; /* Typically finishes within 3-4 seconds */ while (count++ < 100) { reg = rd32(hw, I40E_GLGEN_RSTAT) & I40E_GLGEN_RSTAT_DEVSTATE_MASK; if (reg) i40e_msec_delay(100); else break; } ixl_dbg(pf, IXL_DBG_INFO, "EMPR reset wait count: %d\n", count); device_printf(dev, "Rebuilding driver state...\n"); ixl_rebuild_hw_structs_after_reset(pf); device_printf(dev, "Rebuilding driver state done.\n"); atomic_clear_int(&pf->state, IXL_PF_STATE_EMPR_RESETTING); } /* ** Tasklet handler for MSIX Adminq interrupts ** - do outside interrupt since it might sleep */ void ixl_do_adminq(void *context, int pending) { struct ixl_pf *pf = context; struct i40e_hw *hw = &pf->hw; struct i40e_arq_event_info event; i40e_status ret; device_t dev = pf->dev; u32 loop = 0; u16 opcode, result; if (pf->state & IXL_PF_STATE_EMPR_RESETTING) { /* Flag cleared at end of this function */ ixl_handle_empr_reset(pf); return; } /* Admin Queue handling */ event.buf_len = IXL_AQ_BUF_SZ; event.msg_buf = malloc(event.buf_len, M_DEVBUF, M_NOWAIT | M_ZERO); if (!event.msg_buf) { device_printf(dev, "%s: Unable to allocate memory for Admin" " Queue event!\n", __func__); return; } IXL_PF_LOCK(pf); /* clean and process any events */ do { ret = i40e_clean_arq_element(hw, &event, &result); if (ret) break; opcode = LE16_TO_CPU(event.desc.opcode); ixl_dbg(pf, IXL_DBG_AQ, "Admin Queue event: %#06x\n", opcode); switch (opcode) { case i40e_aqc_opc_get_link_status: ixl_link_event(pf, &event); break; case i40e_aqc_opc_send_msg_to_pf: #ifdef PCI_IOV ixl_handle_vf_msg(pf, &event); #endif break; case i40e_aqc_opc_event_lan_overflow: default: break; } } while (result && (loop++ < IXL_ADM_LIMIT)); free(event.msg_buf, M_DEVBUF); /* * If there are still messages to process, reschedule ourselves. * Otherwise, re-enable our interrupt. */ if (result > 0) taskqueue_enqueue(pf->tq, &pf->adminq); else ixl_enable_intr0(hw); IXL_PF_UNLOCK(pf); } /** * Update VSI-specific ethernet statistics counters. **/ void ixl_update_eth_stats(struct ixl_vsi *vsi) { struct ixl_pf *pf = (struct ixl_pf *)vsi->back; struct i40e_hw *hw = &pf->hw; struct i40e_eth_stats *es; struct i40e_eth_stats *oes; struct i40e_hw_port_stats *nsd; u16 stat_idx = vsi->info.stat_counter_idx; es = &vsi->eth_stats; oes = &vsi->eth_stats_offsets; nsd = &pf->stats; /* Gather up the stats that the hw collects */ ixl_stat_update32(hw, I40E_GLV_TEPC(stat_idx), vsi->stat_offsets_loaded, &oes->tx_errors, &es->tx_errors); ixl_stat_update32(hw, I40E_GLV_RDPC(stat_idx), vsi->stat_offsets_loaded, &oes->rx_discards, &es->rx_discards); ixl_stat_update48(hw, I40E_GLV_GORCH(stat_idx), I40E_GLV_GORCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_bytes, &es->rx_bytes); ixl_stat_update48(hw, I40E_GLV_UPRCH(stat_idx), I40E_GLV_UPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_unicast, &es->rx_unicast); ixl_stat_update48(hw, I40E_GLV_MPRCH(stat_idx), I40E_GLV_MPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_multicast, &es->rx_multicast); ixl_stat_update48(hw, I40E_GLV_BPRCH(stat_idx), I40E_GLV_BPRCL(stat_idx), vsi->stat_offsets_loaded, &oes->rx_broadcast, &es->rx_broadcast); ixl_stat_update48(hw, I40E_GLV_GOTCH(stat_idx), I40E_GLV_GOTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_bytes, &es->tx_bytes); ixl_stat_update48(hw, I40E_GLV_UPTCH(stat_idx), I40E_GLV_UPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_unicast, &es->tx_unicast); ixl_stat_update48(hw, I40E_GLV_MPTCH(stat_idx), I40E_GLV_MPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_multicast, &es->tx_multicast); ixl_stat_update48(hw, I40E_GLV_BPTCH(stat_idx), I40E_GLV_BPTCL(stat_idx), vsi->stat_offsets_loaded, &oes->tx_broadcast, &es->tx_broadcast); vsi->stat_offsets_loaded = true; } void ixl_update_vsi_stats(struct ixl_vsi *vsi) { struct ixl_pf *pf; struct ifnet *ifp; struct i40e_eth_stats *es; u64 tx_discards; struct i40e_hw_port_stats *nsd; pf = vsi->back; ifp = vsi->ifp; es = &vsi->eth_stats; nsd = &pf->stats; ixl_update_eth_stats(vsi); tx_discards = es->tx_discards + nsd->tx_dropped_link_down; for (int i = 0; i < vsi->num_queues; i++) tx_discards += vsi->queues[i].txr.br->br_drops; /* Update ifnet stats */ IXL_SET_IPACKETS(vsi, es->rx_unicast + es->rx_multicast + es->rx_broadcast); IXL_SET_OPACKETS(vsi, es->tx_unicast + es->tx_multicast + es->tx_broadcast); IXL_SET_IBYTES(vsi, es->rx_bytes); IXL_SET_OBYTES(vsi, es->tx_bytes); IXL_SET_IMCASTS(vsi, es->rx_multicast); IXL_SET_OMCASTS(vsi, es->tx_multicast); IXL_SET_IERRORS(vsi, nsd->crc_errors + nsd->illegal_bytes + nsd->rx_undersize + nsd->rx_oversize + nsd->rx_fragments + nsd->rx_jabber); IXL_SET_OERRORS(vsi, es->tx_errors); IXL_SET_IQDROPS(vsi, es->rx_discards + nsd->eth.rx_discards); IXL_SET_OQDROPS(vsi, tx_discards); IXL_SET_NOPROTO(vsi, es->rx_unknown_protocol); IXL_SET_COLLISIONS(vsi, 0); } /** * Reset all of the stats for the given pf **/ void ixl_pf_reset_stats(struct ixl_pf *pf) { bzero(&pf->stats, sizeof(struct i40e_hw_port_stats)); bzero(&pf->stats_offsets, sizeof(struct i40e_hw_port_stats)); pf->stat_offsets_loaded = false; } /** * Resets all stats of the given vsi **/ void ixl_vsi_reset_stats(struct ixl_vsi *vsi) { bzero(&vsi->eth_stats, sizeof(struct i40e_eth_stats)); bzero(&vsi->eth_stats_offsets, sizeof(struct i40e_eth_stats)); vsi->stat_offsets_loaded = false; } /** * Read and update a 48 bit stat from the hw * * Since the device stats are not reset at PFReset, they likely will not * be zeroed when the driver starts. We'll save the first values read * and use them as offsets to be subtracted from the raw values in order * to report stats that count from zero. **/ void ixl_stat_update48(struct i40e_hw *hw, u32 hireg, u32 loreg, bool offset_loaded, u64 *offset, u64 *stat) { u64 new_data; #if defined(__FreeBSD__) && (__FreeBSD_version >= 1000000) && defined(__amd64__) new_data = rd64(hw, loreg); #else /* * Use two rd32's instead of one rd64; FreeBSD versions before * 10 don't support 64-bit bus reads/writes. */ new_data = rd32(hw, loreg); new_data |= ((u64)(rd32(hw, hireg) & 0xFFFF)) << 32; #endif if (!offset_loaded) *offset = new_data; if (new_data >= *offset) *stat = new_data - *offset; else *stat = (new_data + ((u64)1 << 48)) - *offset; *stat &= 0xFFFFFFFFFFFFULL; } /** * Read and update a 32 bit stat from the hw **/ void ixl_stat_update32(struct i40e_hw *hw, u32 reg, bool offset_loaded, u64 *offset, u64 *stat) { u32 new_data; new_data = rd32(hw, reg); if (!offset_loaded) *offset = new_data; if (new_data >= *offset) *stat = (u32)(new_data - *offset); else *stat = (u32)((new_data + ((u64)1 << 32)) - *offset); } void ixl_add_device_sysctls(struct ixl_pf *pf) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev); struct sysctl_oid_list *ctx_list = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); struct sysctl_oid *debug_node; struct sysctl_oid_list *debug_list; struct sysctl_oid *fec_node; struct sysctl_oid_list *fec_list; /* Set up sysctls */ SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "fc", CTLTYPE_INT | CTLFLAG_RW, pf, 0, ixl_set_flowcntl, "I", IXL_SYSCTL_HELP_FC); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "advertise_speed", CTLTYPE_INT | CTLFLAG_RW, pf, 0, ixl_set_advertise, "I", IXL_SYSCTL_HELP_SET_ADVERTISE); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "current_speed", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_current_speed, "A", "Current Port Speed"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_show_fw, "A", "Firmware version"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "unallocated_queues", CTLTYPE_INT | CTLFLAG_RD, pf, 0, ixl_sysctl_unallocated_queues, "I", "Queues not allocated to a PF or VF"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "tx_itr", CTLTYPE_INT | CTLFLAG_RW, pf, 0, ixl_sysctl_pf_tx_itr, "I", "Immediately set TX ITR value for all queues"); SYSCTL_ADD_PROC(ctx, ctx_list, OID_AUTO, "rx_itr", CTLTYPE_INT | CTLFLAG_RW, pf, 0, ixl_sysctl_pf_rx_itr, "I", "Immediately set RX ITR value for all queues"); SYSCTL_ADD_INT(ctx, ctx_list, OID_AUTO, "dynamic_rx_itr", CTLFLAG_RW, &pf->dynamic_rx_itr, 0, "Enable dynamic RX ITR"); SYSCTL_ADD_INT(ctx, ctx_list, OID_AUTO, "dynamic_tx_itr", CTLFLAG_RW, &pf->dynamic_tx_itr, 0, "Enable dynamic TX ITR"); /* Add FEC sysctls for 25G adapters */ /* * XXX: These settings can be changed, but that isn't supported, * so these are read-only for now. */ if (hw->device_id == I40E_DEV_ID_25G_B || hw->device_id == I40E_DEV_ID_25G_SFP28) { fec_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "fec", CTLFLAG_RD, NULL, "FEC Sysctls"); fec_list = SYSCTL_CHILDREN(fec_node); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "fc_ability", CTLTYPE_INT | CTLFLAG_RD, pf, 0, ixl_sysctl_fec_fc_ability, "I", "FC FEC ability enabled"); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "rs_ability", CTLTYPE_INT | CTLFLAG_RD, pf, 0, ixl_sysctl_fec_rs_ability, "I", "RS FEC ability enabled"); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "fc_requested", CTLTYPE_INT | CTLFLAG_RD, pf, 0, ixl_sysctl_fec_fc_request, "I", "FC FEC mode requested on link"); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "rs_requested", CTLTYPE_INT | CTLFLAG_RD, pf, 0, ixl_sysctl_fec_rs_request, "I", "RS FEC mode requested on link"); SYSCTL_ADD_PROC(ctx, fec_list, OID_AUTO, "auto_fec_enabled", CTLTYPE_INT | CTLFLAG_RD, pf, 0, ixl_sysctl_fec_auto_enable, "I", "Let FW decide FEC ability/request modes"); } /* Add sysctls meant to print debug information, but don't list them * in "sysctl -a" output. */ debug_node = SYSCTL_ADD_NODE(ctx, ctx_list, OID_AUTO, "debug", CTLFLAG_RD | CTLFLAG_SKIP, NULL, "Debug Sysctls"); debug_list = SYSCTL_CHILDREN(debug_node); SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "shared_debug_mask", CTLFLAG_RW, &pf->hw.debug_mask, 0, "Shared code debug message level"); SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "core_debug_mask", CTLFLAG_RW, &pf->dbg_mask, 0, "Non-hared code debug message level"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "link_status", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_link_status, "A", IXL_SYSCTL_HELP_LINK_STATUS); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "phy_abilities", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_phy_abilities, "A", "PHY Abilities"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "filter_list", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_sw_filter_list, "A", "SW Filter List"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "hw_res_alloc", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_hw_res_alloc, "A", "HW Resource Allocation"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "switch_config", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_switch_config, "A", "HW Switch Configuration"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "rss_key", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_hkey, "A", "View RSS key"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "rss_lut", CTLTYPE_STRING | CTLFLAG_RD, pf, 0, ixl_sysctl_hlut, "A", "View RSS lookup table"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "rss_hena", CTLTYPE_ULONG | CTLFLAG_RD, pf, 0, ixl_sysctl_hena, "LU", "View enabled packet types for RSS"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "disable_fw_link_management", CTLTYPE_INT | CTLFLAG_WR, pf, 0, ixl_sysctl_fw_link_management, "I", "Disable FW Link Management"); if (pf->has_i2c) { SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "read_i2c_byte", CTLTYPE_INT | CTLFLAG_RW, pf, 0, ixl_sysctl_read_i2c_byte, "I", "Read byte from I2C bus"); SYSCTL_ADD_PROC(ctx, debug_list, OID_AUTO, "write_i2c_byte", CTLTYPE_INT | CTLFLAG_RW, pf, 0, ixl_sysctl_write_i2c_byte, "I", "Write byte to I2C bus"); } #ifdef PCI_IOV SYSCTL_ADD_UINT(ctx, debug_list, OID_AUTO, "vc_debug_level", CTLFLAG_RW, &pf->vc_debug_lvl, 0, "PF/VF Virtual Channel debug level"); #endif } /* * Primarily for finding out how many queues can be assigned to VFs, * at runtime. */ static int ixl_sysctl_unallocated_queues(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int queues; IXL_PF_LOCK(pf); queues = (int)ixl_pf_qmgr_get_num_free(&pf->qmgr); IXL_PF_UNLOCK(pf); return sysctl_handle_int(oidp, NULL, queues, req); } /* ** Set flow control using sysctl: ** 0 - off ** 1 - rx pause ** 2 - tx pause ** 3 - full */ int ixl_set_flowcntl(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int requested_fc, error = 0; enum i40e_status_code aq_error = 0; u8 fc_aq_err = 0; /* Get request */ requested_fc = pf->fc; error = sysctl_handle_int(oidp, &requested_fc, 0, req); if ((error) || (req->newptr == NULL)) return (error); if (requested_fc < 0 || requested_fc > 3) { device_printf(dev, "Invalid fc mode; valid modes are 0 through 3\n"); return (EINVAL); } /* Set fc ability for port */ hw->fc.requested_mode = requested_fc; aq_error = i40e_set_fc(hw, &fc_aq_err, TRUE); if (aq_error) { device_printf(dev, "%s: Error setting new fc mode %d; fc_err %#x\n", __func__, aq_error, fc_aq_err); return (EIO); } pf->fc = requested_fc; /* Get new link state */ i40e_msec_delay(250); hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &pf->link_up); return (0); } char * ixl_aq_speed_to_str(enum i40e_aq_link_speed link_speed) { int index; char *speeds[] = { "Unknown", "100 Mbps", "1 Gbps", "10 Gbps", "40 Gbps", "20 Gbps", "25 Gbps", }; switch (link_speed) { case I40E_LINK_SPEED_100MB: index = 1; break; case I40E_LINK_SPEED_1GB: index = 2; break; case I40E_LINK_SPEED_10GB: index = 3; break; case I40E_LINK_SPEED_40GB: index = 4; break; case I40E_LINK_SPEED_20GB: index = 5; break; case I40E_LINK_SPEED_25GB: index = 6; break; case I40E_LINK_SPEED_UNKNOWN: default: index = 0; break; } return speeds[index]; } int ixl_current_speed(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; int error = 0; ixl_update_link_status(pf); error = sysctl_handle_string(oidp, ixl_aq_speed_to_str(hw->phy.link_info.link_speed), 8, req); return (error); } static u8 ixl_convert_sysctl_aq_link_speed(u8 speeds, bool to_aq) { static u16 speedmap[6] = { (I40E_LINK_SPEED_100MB | (0x1 << 8)), (I40E_LINK_SPEED_1GB | (0x2 << 8)), (I40E_LINK_SPEED_10GB | (0x4 << 8)), (I40E_LINK_SPEED_20GB | (0x8 << 8)), (I40E_LINK_SPEED_25GB | (0x10 << 8)), (I40E_LINK_SPEED_40GB | (0x20 << 8)) }; u8 retval = 0; for (int i = 0; i < 6; i++) { if (to_aq) retval |= (speeds & (speedmap[i] >> 8)) ? (speedmap[i] & 0xff) : 0; else retval |= (speeds & speedmap[i]) ? (speedmap[i] >> 8) : 0; } return (retval); } int ixl_set_advertised_speeds(struct ixl_pf *pf, int speeds) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct i40e_aq_get_phy_abilities_resp abilities; struct i40e_aq_set_phy_config config; enum i40e_status_code aq_error = 0; /* Get current capability information */ aq_error = i40e_aq_get_phy_capabilities(hw, FALSE, FALSE, &abilities, NULL); if (aq_error) { device_printf(dev, "%s: Error getting phy capabilities %d," " aq error: %d\n", __func__, aq_error, hw->aq.asq_last_status); return (EIO); } /* Prepare new config */ bzero(&config, sizeof(config)); config.link_speed = ixl_convert_sysctl_aq_link_speed(speeds, true); config.phy_type = abilities.phy_type; config.phy_type_ext = abilities.phy_type_ext; config.abilities = abilities.abilities | I40E_AQ_PHY_ENABLE_ATOMIC_LINK; config.eee_capability = abilities.eee_capability; config.eeer = abilities.eeer_val; config.low_power_ctrl = abilities.d3_lpan; /* Do aq command & restart link */ aq_error = i40e_aq_set_phy_config(hw, &config, NULL); if (aq_error) { device_printf(dev, "%s: Error setting new phy config %d," " aq error: %d\n", __func__, aq_error, hw->aq.asq_last_status); return (EIO); } return (0); } /* ** Control link advertise speed: ** Flags: ** 0x1 - advertise 100 Mb ** 0x2 - advertise 1G ** 0x4 - advertise 10G ** 0x8 - advertise 20G ** 0x10 - advertise 25G ** 0x20 - advertise 40G ** ** Set to 0 to disable link */ int ixl_set_advertise(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; u8 converted_speeds; int requested_ls = 0; int error = 0; /* Read in new mode */ requested_ls = pf->advertised_speed; error = sysctl_handle_int(oidp, &requested_ls, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Check if changing speeds is supported */ switch (hw->device_id) { case I40E_DEV_ID_25G_B: case I40E_DEV_ID_25G_SFP28: device_printf(dev, "Changing advertised speeds not supported" " on this device.\n"); return (EINVAL); } if (requested_ls < 0 || requested_ls > 0xff) { } /* Check for valid value */ converted_speeds = ixl_convert_sysctl_aq_link_speed((u8)requested_ls, true); if ((converted_speeds | pf->supported_speeds) != pf->supported_speeds) { device_printf(dev, "Invalid advertised speed; " "valid flags are: 0x%02x\n", ixl_convert_sysctl_aq_link_speed(pf->supported_speeds, false)); return (EINVAL); } error = ixl_set_advertised_speeds(pf, requested_ls); if (error) return (error); pf->advertised_speed = requested_ls; ixl_update_link_status(pf); return (0); } /* * Input: bitmap of enum i40e_aq_link_speed */ static u64 ixl_max_aq_speed_to_value(u8 link_speeds) { if (link_speeds & I40E_LINK_SPEED_40GB) return IF_Gbps(40); if (link_speeds & I40E_LINK_SPEED_25GB) return IF_Gbps(25); if (link_speeds & I40E_LINK_SPEED_20GB) return IF_Gbps(20); if (link_speeds & I40E_LINK_SPEED_10GB) return IF_Gbps(10); if (link_speeds & I40E_LINK_SPEED_1GB) return IF_Gbps(1); if (link_speeds & I40E_LINK_SPEED_100MB) return IF_Mbps(100); else /* Minimum supported link speed */ return IF_Mbps(100); } /* ** Get the width and transaction speed of ** the bus this adapter is plugged into. */ void ixl_get_bus_info(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; u16 link; u32 offset, num_ports; u64 max_speed; /* Some devices don't use PCIE */ if (hw->mac.type == I40E_MAC_X722) return; /* Read PCI Express Capabilities Link Status Register */ pci_find_cap(dev, PCIY_EXPRESS, &offset); link = pci_read_config(dev, offset + PCIER_LINK_STA, 2); /* Fill out hw struct with PCIE info */ i40e_set_pci_config_data(hw, link); /* Use info to print out bandwidth messages */ device_printf(dev,"PCI Express Bus: Speed %s %s\n", ((hw->bus.speed == i40e_bus_speed_8000) ? "8.0GT/s": (hw->bus.speed == i40e_bus_speed_5000) ? "5.0GT/s": (hw->bus.speed == i40e_bus_speed_2500) ? "2.5GT/s":"Unknown"), (hw->bus.width == i40e_bus_width_pcie_x8) ? "Width x8" : (hw->bus.width == i40e_bus_width_pcie_x4) ? "Width x4" : (hw->bus.width == i40e_bus_width_pcie_x2) ? "Width x2" : (hw->bus.width == i40e_bus_width_pcie_x1) ? "Width x1" : ("Unknown")); /* * If adapter is in slot with maximum supported speed, * no warning message needs to be printed out. */ if (hw->bus.speed >= i40e_bus_speed_8000 && hw->bus.width >= i40e_bus_width_pcie_x8) return; num_ports = bitcount32(hw->func_caps.valid_functions); max_speed = ixl_max_aq_speed_to_value(pf->supported_speeds) / 1000000; if ((num_ports * max_speed) > hw->bus.speed * hw->bus.width) { device_printf(dev, "PCI-Express bandwidth available" " for this device may be insufficient for" " optimal performance.\n"); device_printf(dev, "Please move the device to a different" " PCI-e link with more lanes and/or higher" " transfer rate.\n"); } } static int ixl_sysctl_show_fw(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; struct sbuf *sbuf; sbuf = sbuf_new_for_sysctl(NULL, NULL, 128, req); ixl_nvm_version_str(hw, sbuf); sbuf_finish(sbuf); sbuf_delete(sbuf); return 0; } void ixl_print_nvm_cmd(device_t dev, struct i40e_nvm_access *nvma) { if ((nvma->command == I40E_NVM_READ) && ((nvma->config & 0xFF) == 0xF) && (((nvma->config & 0xF00) >> 8) == 0xF) && (nvma->offset == 0) && (nvma->data_size == 1)) { // device_printf(dev, "- Get Driver Status Command\n"); } else if (nvma->command == I40E_NVM_READ) { } else { switch (nvma->command) { case 0xB: device_printf(dev, "- command: I40E_NVM_READ\n"); break; case 0xC: device_printf(dev, "- command: I40E_NVM_WRITE\n"); break; default: device_printf(dev, "- command: unknown 0x%08x\n", nvma->command); break; } device_printf(dev, "- config (ptr) : 0x%02x\n", nvma->config & 0xFF); device_printf(dev, "- config (flags): 0x%01x\n", (nvma->config & 0xF00) >> 8); device_printf(dev, "- offset : 0x%08x\n", nvma->offset); device_printf(dev, "- data_s : 0x%08x\n", nvma->data_size); } } int ixl_handle_nvmupd_cmd(struct ixl_pf *pf, struct ifdrv *ifd) { struct i40e_hw *hw = &pf->hw; struct i40e_nvm_access *nvma; device_t dev = pf->dev; enum i40e_status_code status = 0; int perrno; DEBUGFUNC("ixl_handle_nvmupd_cmd"); /* Sanity checks */ if (ifd->ifd_len < sizeof(struct i40e_nvm_access) || ifd->ifd_data == NULL) { device_printf(dev, "%s: incorrect ifdrv length or data pointer\n", __func__); device_printf(dev, "%s: ifdrv length: %lu, sizeof(struct i40e_nvm_access): %lu\n", __func__, ifd->ifd_len, sizeof(struct i40e_nvm_access)); device_printf(dev, "%s: data pointer: %p\n", __func__, ifd->ifd_data); return (EINVAL); } nvma = (struct i40e_nvm_access *)ifd->ifd_data; if (pf->dbg_mask & IXL_DBG_NVMUPD) ixl_print_nvm_cmd(dev, nvma); if (pf->state & IXL_PF_STATE_EMPR_RESETTING) { int count = 0; while (count++ < 100) { i40e_msec_delay(100); if (!(pf->state & IXL_PF_STATE_EMPR_RESETTING)) break; } } if (!(pf->state & IXL_PF_STATE_EMPR_RESETTING)) { IXL_PF_LOCK(pf); status = i40e_nvmupd_command(hw, nvma, nvma->data, &perrno); IXL_PF_UNLOCK(pf); } else { perrno = -EBUSY; } if (status) device_printf(dev, "i40e_nvmupd_command status %s, perrno %d\n", i40e_stat_str(hw, status), perrno); /* * -EPERM is actually ERESTART, which the kernel interprets as it needing * to run this ioctl again. So use -EACCES for -EPERM instead. */ if (perrno == -EPERM) return (-EACCES); else return (perrno); } /********************************************************************* * * Media Ioctl callback * * This routine is called whenever the user queries the status of * the interface using ifconfig. * **********************************************************************/ void ixl_media_status(struct ifnet * ifp, struct ifmediareq * ifmr) { struct ixl_vsi *vsi = ifp->if_softc; struct ixl_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; INIT_DEBUGOUT("ixl_media_status: begin"); IXL_PF_LOCK(pf); hw->phy.get_link_info = TRUE; i40e_get_link_status(hw, &pf->link_up); ixl_update_link_status(pf); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!pf->link_up) { IXL_PF_UNLOCK(pf); return; } ifmr->ifm_status |= IFM_ACTIVE; /* Hardware always does full-duplex */ ifmr->ifm_active |= IFM_FDX; switch (hw->phy.link_info.phy_type) { /* 100 M */ case I40E_PHY_TYPE_100BASE_TX: ifmr->ifm_active |= IFM_100_TX; break; /* 1 G */ case I40E_PHY_TYPE_1000BASE_T: ifmr->ifm_active |= IFM_1000_T; break; case I40E_PHY_TYPE_1000BASE_SX: ifmr->ifm_active |= IFM_1000_SX; break; case I40E_PHY_TYPE_1000BASE_LX: ifmr->ifm_active |= IFM_1000_LX; break; case I40E_PHY_TYPE_1000BASE_T_OPTICAL: ifmr->ifm_active |= IFM_OTHER; break; /* 10 G */ case I40E_PHY_TYPE_10GBASE_SFPP_CU: ifmr->ifm_active |= IFM_10G_TWINAX; break; case I40E_PHY_TYPE_10GBASE_SR: ifmr->ifm_active |= IFM_10G_SR; break; case I40E_PHY_TYPE_10GBASE_LR: ifmr->ifm_active |= IFM_10G_LR; break; case I40E_PHY_TYPE_10GBASE_T: ifmr->ifm_active |= IFM_10G_T; break; case I40E_PHY_TYPE_XAUI: case I40E_PHY_TYPE_XFI: case I40E_PHY_TYPE_10GBASE_AOC: ifmr->ifm_active |= IFM_OTHER; break; /* 25 G */ case I40E_PHY_TYPE_25GBASE_KR: ifmr->ifm_active |= IFM_25G_KR; break; case I40E_PHY_TYPE_25GBASE_CR: ifmr->ifm_active |= IFM_25G_CR; break; case I40E_PHY_TYPE_25GBASE_SR: ifmr->ifm_active |= IFM_25G_SR; break; case I40E_PHY_TYPE_25GBASE_LR: ifmr->ifm_active |= IFM_UNKNOWN; break; /* 40 G */ case I40E_PHY_TYPE_40GBASE_CR4: case I40E_PHY_TYPE_40GBASE_CR4_CU: ifmr->ifm_active |= IFM_40G_CR4; break; case I40E_PHY_TYPE_40GBASE_SR4: ifmr->ifm_active |= IFM_40G_SR4; break; case I40E_PHY_TYPE_40GBASE_LR4: ifmr->ifm_active |= IFM_40G_LR4; break; case I40E_PHY_TYPE_XLAUI: ifmr->ifm_active |= IFM_OTHER; break; case I40E_PHY_TYPE_1000BASE_KX: ifmr->ifm_active |= IFM_1000_KX; break; case I40E_PHY_TYPE_SGMII: ifmr->ifm_active |= IFM_1000_SGMII; break; /* ERJ: What's the difference between these? */ case I40E_PHY_TYPE_10GBASE_CR1_CU: case I40E_PHY_TYPE_10GBASE_CR1: ifmr->ifm_active |= IFM_10G_CR1; break; case I40E_PHY_TYPE_10GBASE_KX4: ifmr->ifm_active |= IFM_10G_KX4; break; case I40E_PHY_TYPE_10GBASE_KR: ifmr->ifm_active |= IFM_10G_KR; break; case I40E_PHY_TYPE_SFI: ifmr->ifm_active |= IFM_10G_SFI; break; /* Our single 20G media type */ case I40E_PHY_TYPE_20GBASE_KR2: ifmr->ifm_active |= IFM_20G_KR2; break; case I40E_PHY_TYPE_40GBASE_KR4: ifmr->ifm_active |= IFM_40G_KR4; break; case I40E_PHY_TYPE_XLPPI: case I40E_PHY_TYPE_40GBASE_AOC: ifmr->ifm_active |= IFM_40G_XLPPI; break; /* Unknown to driver */ default: ifmr->ifm_active |= IFM_UNKNOWN; break; } /* Report flow control status as well */ if (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_TX) ifmr->ifm_active |= IFM_ETH_TXPAUSE; if (hw->phy.link_info.an_info & I40E_AQ_LINK_PAUSE_RX) ifmr->ifm_active |= IFM_ETH_RXPAUSE; IXL_PF_UNLOCK(pf); } void ixl_init(void *arg) { struct ixl_pf *pf = arg; IXL_PF_LOCK(pf); ixl_init_locked(pf); IXL_PF_UNLOCK(pf); } /* * NOTE: Fortville does not support forcing media speeds. Instead, * use the set_advertise sysctl to set the speeds Fortville * will advertise or be allowed to operate at. */ int ixl_media_change(struct ifnet * ifp) { struct ixl_vsi *vsi = ifp->if_softc; struct ifmedia *ifm = &vsi->media; INIT_DEBUGOUT("ixl_media_change: begin"); if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); if_printf(ifp, "Use 'advertise_speed' sysctl to change advertised speeds\n"); return (ENODEV); } /********************************************************************* * Ioctl entry point * * ixl_ioctl is called when the user wants to configure the * interface. * * return 0 on success, positive on failure **********************************************************************/ int ixl_ioctl(struct ifnet * ifp, u_long command, caddr_t data) { struct ixl_vsi *vsi = ifp->if_softc; struct ixl_pf *pf = vsi->back; struct ifreq *ifr = (struct ifreq *)data; struct ifdrv *ifd = (struct ifdrv *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; bool avoid_reset = FALSE; #endif int error = 0; switch (command) { case SIOCSIFADDR: IOCTL_DEBUGOUT("ioctl: SIOCSIFADDR (Set Interface Address)"); #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif #if defined(INET) || defined(INET6) /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) ixl_init(pf); #ifdef INET if (!(ifp->if_flags & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else error = ether_ioctl(ifp, command, data); break; #endif case SIOCSIFMTU: IOCTL_DEBUGOUT("ioctl: SIOCSIFMTU (Set Interface MTU)"); if (ifr->ifr_mtu > IXL_MAX_FRAME - ETHER_HDR_LEN - ETHER_CRC_LEN - ETHER_VLAN_ENCAP_LEN) { error = EINVAL; } else { IXL_PF_LOCK(pf); ifp->if_mtu = ifr->ifr_mtu; vsi->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN + ETHER_VLAN_ENCAP_LEN; ixl_init_locked(pf); IXL_PF_UNLOCK(pf); } break; case SIOCSIFFLAGS: IOCTL_DEBUGOUT("ioctl: SIOCSIFFLAGS (Set Interface Flags)"); IXL_PF_LOCK(pf); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ pf->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { ixl_set_promisc(vsi); } } else { IXL_PF_UNLOCK(pf); ixl_init(pf); IXL_PF_LOCK(pf); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ixl_stop_locked(pf); } } pf->if_flags = ifp->if_flags; IXL_PF_UNLOCK(pf); break; case SIOCSDRVSPEC: case SIOCGDRVSPEC: IOCTL_DEBUGOUT("ioctl: SIOCxDRVSPEC (Get/Set Driver-specific " "Info)\n"); /* NVM update command */ if (ifd->ifd_cmd == I40E_NVM_ACCESS) error = ixl_handle_nvmupd_cmd(pf, ifd); else error = EINVAL; break; case SIOCADDMULTI: IOCTL_DEBUGOUT("ioctl: SIOCADDMULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXL_PF_LOCK(pf); ixl_disable_rings_intr(vsi); ixl_add_multi(vsi); ixl_enable_intr(vsi); IXL_PF_UNLOCK(pf); } break; case SIOCDELMULTI: IOCTL_DEBUGOUT("ioctl: SIOCDELMULTI"); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXL_PF_LOCK(pf); ixl_disable_rings_intr(vsi); ixl_del_multi(vsi); ixl_enable_intr(vsi); IXL_PF_UNLOCK(pf); } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: case SIOCGIFXMEDIA: IOCTL_DEBUGOUT("ioctl: SIOCxIFMEDIA (Get/Set Interface Media)"); error = ifmedia_ioctl(ifp, ifr, &vsi->media, command); break; case SIOCSIFCAP: { int mask = ifr->ifr_reqcap ^ ifp->if_capenable; IOCTL_DEBUGOUT("ioctl: SIOCSIFCAP (Set Capabilities)"); ixl_cap_txcsum_tso(vsi, ifp, mask); if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWFILTER) ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { IXL_PF_LOCK(pf); ixl_init_locked(pf); IXL_PF_UNLOCK(pf); } VLAN_CAPABILITIES(ifp); break; } #if __FreeBSD_version >= 1003000 case SIOCGI2C: { struct ifi2creq i2c; int i; IOCTL_DEBUGOUT("ioctl: SIOCGI2C (Get I2C Data)"); if (!pf->has_i2c) return (ENOTTY); - error = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); + error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (error != 0) break; if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { error = EINVAL; break; } if (i2c.len > sizeof(i2c.data)) { error = EINVAL; break; } for (i = 0; i < i2c.len; i++) if (ixl_read_i2c_byte(pf, i2c.offset + i, i2c.dev_addr, &i2c.data[i])) return (EIO); - error = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); + error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c)); break; } #endif default: IOCTL_DEBUGOUT("ioctl: UNKNOWN (0x%X)\n", (int)command); error = ether_ioctl(ifp, command, data); break; } return (error); } int ixl_find_i2c_interface(struct ixl_pf *pf) { struct i40e_hw *hw = &pf->hw; bool i2c_en, port_matched; u32 reg; for (int i = 0; i < 4; i++) { reg = rd32(hw, I40E_GLGEN_MDIO_I2C_SEL(i)); i2c_en = (reg & I40E_GLGEN_MDIO_I2C_SEL_MDIO_I2C_SEL_MASK); port_matched = ((reg & I40E_GLGEN_MDIO_I2C_SEL_PHY_PORT_NUM_MASK) >> I40E_GLGEN_MDIO_I2C_SEL_PHY_PORT_NUM_SHIFT) & BIT(hw->port); if (i2c_en && port_matched) return (i); } return (-1); } static char * ixl_phy_type_string(u32 bit_pos, bool ext) { static char * phy_types_str[32] = { "SGMII", "1000BASE-KX", "10GBASE-KX4", "10GBASE-KR", "40GBASE-KR4", "XAUI", "XFI", "SFI", "XLAUI", "XLPPI", "40GBASE-CR4", "10GBASE-CR1", "Reserved (12)", "Reserved (13)", "Reserved (14)", "Reserved (15)", "Reserved (16)", "100BASE-TX", "1000BASE-T", "10GBASE-T", "10GBASE-SR", "10GBASE-LR", "10GBASE-SFP+Cu", "10GBASE-CR1", "40GBASE-CR4", "40GBASE-SR4", "40GBASE-LR4", "1000BASE-SX", "1000BASE-LX", "1000BASE-T Optical", "20GBASE-KR2", "Reserved (31)" }; static char * ext_phy_types_str[4] = { "25GBASE-KR", "25GBASE-CR", "25GBASE-SR", "25GBASE-LR" }; if (ext && bit_pos > 3) return "Invalid_Ext"; if (bit_pos > 31) return "Invalid"; return (ext) ? ext_phy_types_str[bit_pos] : phy_types_str[bit_pos]; } int ixl_aq_get_link_status(struct ixl_pf *pf, struct i40e_aqc_get_link_status *link_status) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; struct i40e_aq_desc desc; enum i40e_status_code status; struct i40e_aqc_get_link_status *aq_link_status = (struct i40e_aqc_get_link_status *)&desc.params.raw; i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_get_link_status); link_status->command_flags = CPU_TO_LE16(I40E_AQ_LSE_ENABLE); status = i40e_asq_send_command(hw, &desc, NULL, 0, NULL); if (status) { device_printf(dev, "%s: i40e_aqc_opc_get_link_status status %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (EIO); } bcopy(aq_link_status, link_status, sizeof(struct i40e_aqc_get_link_status)); return (0); } static char * ixl_phy_type_string_ls(u8 val) { if (val >= 0x1F) return ixl_phy_type_string(val - 0x1F, true); else return ixl_phy_type_string(val, false); } static int ixl_sysctl_link_status(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; struct sbuf *buf; int error = 0; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for sysctl output.\n"); return (ENOMEM); } struct i40e_aqc_get_link_status link_status; error = ixl_aq_get_link_status(pf, &link_status); if (error) { sbuf_delete(buf); return (error); } /* TODO: Add 25G types */ sbuf_printf(buf, "\n" "PHY Type : 0x%02x<%s>\n" "Speed : 0x%02x\n" "Link info: 0x%02x\n" "AN info : 0x%02x\n" "Ext info : 0x%02x\n" "Loopback : 0x%02x\n" "Max Frame: %d\n" "Config : 0x%02x\n" "Power : 0x%02x", link_status.phy_type, ixl_phy_type_string_ls(link_status.phy_type), link_status.link_speed, link_status.link_info, link_status.an_info, link_status.ext_info, link_status.loopback, link_status.max_frame_size, link_status.config, link_status.power_desc); error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_phy_abilities(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; enum i40e_status_code status; struct i40e_aq_get_phy_abilities_resp abilities; struct sbuf *buf; int error = 0; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for sysctl output.\n"); return (ENOMEM); } status = i40e_aq_get_phy_capabilities(hw, FALSE, FALSE, &abilities, NULL); if (status) { device_printf(dev, "%s: i40e_aq_get_phy_capabilities() status %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); sbuf_delete(buf); return (EIO); } sbuf_printf(buf, "\n" "PHY Type : %08x", abilities.phy_type); if (abilities.phy_type != 0) { sbuf_printf(buf, "<"); for (int i = 0; i < 32; i++) if ((1 << i) & abilities.phy_type) sbuf_printf(buf, "%s,", ixl_phy_type_string(i, false)); sbuf_printf(buf, ">\n"); } sbuf_printf(buf, "PHY Ext : %02x", abilities.phy_type_ext); if (abilities.phy_type_ext != 0) { sbuf_printf(buf, "<"); for (int i = 0; i < 4; i++) if ((1 << i) & abilities.phy_type_ext) sbuf_printf(buf, "%s,", ixl_phy_type_string(i, true)); sbuf_printf(buf, ">"); } sbuf_printf(buf, "\n"); sbuf_printf(buf, "Speed : %02x\n" "Abilities: %02x\n" "EEE cap : %04x\n" "EEER reg : %08x\n" "D3 Lpan : %02x\n" "ID : %02x %02x %02x %02x\n" "ModType : %02x %02x %02x\n" "ModType E: %01x\n" "FEC Cfg : %02x\n" "Ext CC : %02x", abilities.link_speed, abilities.abilities, abilities.eee_capability, abilities.eeer_val, abilities.d3_lpan, abilities.phy_id[0], abilities.phy_id[1], abilities.phy_id[2], abilities.phy_id[3], abilities.module_type[0], abilities.module_type[1], abilities.module_type[2], abilities.phy_type_ext >> 5, abilities.phy_type_ext & 0x1F, abilities.ext_comp_code); error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_sw_filter_list(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct ixl_vsi *vsi = &pf->vsi; struct ixl_mac_filter *f; char *buf, *buf_i; int error = 0; int ftl_len = 0; int ftl_counter = 0; int buf_len = 0; int entry_len = 42; SLIST_FOREACH(f, &vsi->ftl, next) { ftl_len++; } if (ftl_len < 1) { sysctl_handle_string(oidp, "(none)", 6, req); return (0); } buf_len = sizeof(char) * (entry_len + 1) * ftl_len + 2; buf = buf_i = malloc(buf_len, M_DEVBUF, M_NOWAIT); sprintf(buf_i++, "\n"); SLIST_FOREACH(f, &vsi->ftl, next) { sprintf(buf_i, MAC_FORMAT ", vlan %4d, flags %#06x", MAC_FORMAT_ARGS(f->macaddr), f->vlan, f->flags); buf_i += entry_len; /* don't print '\n' for last entry */ if (++ftl_counter != ftl_len) { sprintf(buf_i, "\n"); buf_i++; } } error = sysctl_handle_string(oidp, buf, strlen(buf), req); if (error) printf("sysctl error: %d\n", error); free(buf, M_DEVBUF); return error; } #define IXL_SW_RES_SIZE 0x14 int ixl_res_alloc_cmp(const void *a, const void *b) { const struct i40e_aqc_switch_resource_alloc_element_resp *one, *two; one = (const struct i40e_aqc_switch_resource_alloc_element_resp *)a; two = (const struct i40e_aqc_switch_resource_alloc_element_resp *)b; return ((int)one->resource_type - (int)two->resource_type); } /* * Longest string length: 25 */ char * ixl_switch_res_type_string(u8 type) { static char * ixl_switch_res_type_strings[0x14] = { "VEB", "VSI", "Perfect Match MAC address", "S-tag", "(Reserved)", "Multicast hash entry", "Unicast hash entry", "VLAN", "VSI List entry", "(Reserved)", "VLAN Statistic Pool", "Mirror Rule", "Queue Set", "Inner VLAN Forward filter", "(Reserved)", "Inner MAC", "IP", "GRE/VN1 Key", "VN2 Key", "Tunneling Port" }; if (type < 0x14) return ixl_switch_res_type_strings[type]; else return "(Reserved)"; } static int ixl_sysctl_hw_res_alloc(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; enum i40e_status_code status; int error = 0; u8 num_entries; struct i40e_aqc_switch_resource_alloc_element_resp resp[IXL_SW_RES_SIZE]; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } bzero(resp, sizeof(resp)); status = i40e_aq_get_switch_resource_alloc(hw, &num_entries, resp, IXL_SW_RES_SIZE, NULL); if (status) { device_printf(dev, "%s: get_switch_resource_alloc() error %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); sbuf_delete(buf); return (error); } /* Sort entries by type for display */ qsort(resp, num_entries, sizeof(struct i40e_aqc_switch_resource_alloc_element_resp), &ixl_res_alloc_cmp); sbuf_cat(buf, "\n"); sbuf_printf(buf, "# of entries: %d\n", num_entries); sbuf_printf(buf, " Type | Guaranteed | Total | Used | Un-allocated\n" " | (this) | (all) | (this) | (all) \n"); for (int i = 0; i < num_entries; i++) { sbuf_printf(buf, "%25s | %10d %5d %6d %12d", ixl_switch_res_type_string(resp[i].resource_type), resp[i].guaranteed, resp[i].total, resp[i].used, resp[i].total_unalloced); if (i < num_entries - 1) sbuf_cat(buf, "\n"); } error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } /* ** Caller must init and delete sbuf; this function will clear and ** finish it for caller. ** ** XXX: Cannot use the SEID for this, since there is no longer a ** fixed mapping between SEID and element type. */ char * ixl_switch_element_string(struct sbuf *s, struct i40e_aqc_switch_config_element_resp *element) { sbuf_clear(s); switch (element->element_type) { case I40E_AQ_SW_ELEM_TYPE_MAC: sbuf_printf(s, "MAC %3d", element->element_info); break; case I40E_AQ_SW_ELEM_TYPE_PF: sbuf_printf(s, "PF %3d", element->element_info); break; case I40E_AQ_SW_ELEM_TYPE_VF: sbuf_printf(s, "VF %3d", element->element_info); break; case I40E_AQ_SW_ELEM_TYPE_EMP: sbuf_cat(s, "EMP"); break; case I40E_AQ_SW_ELEM_TYPE_BMC: sbuf_cat(s, "BMC"); break; case I40E_AQ_SW_ELEM_TYPE_PV: sbuf_cat(s, "PV"); break; case I40E_AQ_SW_ELEM_TYPE_VEB: sbuf_cat(s, "VEB"); break; case I40E_AQ_SW_ELEM_TYPE_PA: sbuf_cat(s, "PA"); break; case I40E_AQ_SW_ELEM_TYPE_VSI: sbuf_printf(s, "VSI %3d", element->element_info); break; default: sbuf_cat(s, "?"); break; } sbuf_finish(s); return sbuf_data(s); } static int ixl_sysctl_switch_config(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; struct sbuf *nmbuf; enum i40e_status_code status; int error = 0; u16 next = 0; u8 aq_buf[I40E_AQ_LARGE_BUF]; struct i40e_aqc_get_switch_config_resp *sw_config; sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for sysctl output.\n"); return (ENOMEM); } status = i40e_aq_get_switch_config(hw, sw_config, sizeof(aq_buf), &next, NULL); if (status) { device_printf(dev, "%s: aq_get_switch_config() error %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); sbuf_delete(buf); return error; } if (next) device_printf(dev, "%s: TODO: get more config with SEID %d\n", __func__, next); nmbuf = sbuf_new_auto(); if (!nmbuf) { device_printf(dev, "Could not allocate sbuf for name output.\n"); sbuf_delete(buf); return (ENOMEM); } sbuf_cat(buf, "\n"); /* Assuming <= 255 elements in switch */ sbuf_printf(buf, "# of reported elements: %d\n", sw_config->header.num_reported); sbuf_printf(buf, "total # of elements: %d\n", sw_config->header.num_total); /* Exclude: ** Revision -- all elements are revision 1 for now */ sbuf_printf(buf, "SEID ( Name ) | Uplink | Downlink | Conn Type\n" " | | | (uplink)\n"); for (int i = 0; i < sw_config->header.num_reported; i++) { // "%4d (%8s) | %8s %8s %#8x", sbuf_printf(buf, "%4d", sw_config->element[i].seid); sbuf_cat(buf, " "); sbuf_printf(buf, "(%8s)", ixl_switch_element_string(nmbuf, &sw_config->element[i])); sbuf_cat(buf, " | "); sbuf_printf(buf, "%8d", sw_config->element[i].uplink_seid); sbuf_cat(buf, " "); sbuf_printf(buf, "%8d", sw_config->element[i].downlink_seid); sbuf_cat(buf, " "); sbuf_printf(buf, "%#8x", sw_config->element[i].connection_type); if (i < sw_config->header.num_reported - 1) sbuf_cat(buf, "\n"); } sbuf_delete(nmbuf); error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_hkey(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; int error = 0; enum i40e_status_code status; u32 reg; struct i40e_aqc_get_set_rss_key_data key_data; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } sbuf_cat(buf, "\n"); if (hw->mac.type == I40E_MAC_X722) { bzero(key_data.standard_rss_key, sizeof(key_data.standard_rss_key)); status = i40e_aq_get_rss_key(hw, pf->vsi.vsi_num, &key_data); if (status) device_printf(dev, "i40e_aq_get_rss_key status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); sbuf_printf(buf, "%40D", (u_char *)key_data.standard_rss_key, ""); } else { for (int i = 0; i < IXL_RSS_KEY_SIZE_REG; i++) { reg = i40e_read_rx_ctl(hw, I40E_PFQF_HKEY(i)); sbuf_printf(buf, "%4D", (u_char *)®, ""); } } error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_hlut(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; struct sbuf *buf; int error = 0; enum i40e_status_code status; u8 hlut[512]; u32 reg; buf = sbuf_new_for_sysctl(NULL, NULL, 128, req); if (!buf) { device_printf(dev, "Could not allocate sbuf for output.\n"); return (ENOMEM); } sbuf_cat(buf, "\n"); if (hw->mac.type == I40E_MAC_X722) { bzero(hlut, sizeof(hlut)); status = i40e_aq_get_rss_lut(hw, pf->vsi.vsi_num, TRUE, hlut, sizeof(hlut)); if (status) device_printf(dev, "i40e_aq_get_rss_lut status %s, error %s\n", i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); sbuf_printf(buf, "%512D", (u_char *)hlut, ""); } else { for (int i = 0; i < hw->func_caps.rss_table_size >> 2; i++) { reg = rd32(hw, I40E_PFQF_HLUT(i)); sbuf_printf(buf, "%4D", (u_char *)®, ""); } } error = sbuf_finish(buf); if (error) device_printf(dev, "Error finishing sbuf: %d\n", error); sbuf_delete(buf); return (error); } static int ixl_sysctl_hena(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; u64 hena; hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); return sysctl_handle_long(oidp, NULL, hena, req); } /* * Sysctl to disable firmware's link management * * 1 - Disable link management on this port * 0 - Re-enable link management * * On normal NVMs, firmware manages link by default. */ static int ixl_sysctl_fw_link_management(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; struct i40e_hw *hw = &pf->hw; device_t dev = pf->dev; int requested_mode = -1; enum i40e_status_code status = 0; int error = 0; /* Read in new mode */ error = sysctl_handle_int(oidp, &requested_mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Check for sane value */ if (requested_mode < 0 || requested_mode > 1) { device_printf(dev, "Valid modes are 0 or 1\n"); return (EINVAL); } /* Set new mode */ status = i40e_aq_set_phy_debug(hw, !!(requested_mode) << 4, NULL); if (status) { device_printf(dev, "%s: Error setting new phy debug mode %s," " aq error: %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (EIO); } return (0); } /* * Sysctl to read a byte from I2C bus. * * Input: 32-bit value: * bits 0-7: device address (0xA0 or 0xA2) * bits 8-15: offset (0-255) * bits 16-31: unused * Output: 8-bit value read */ static int ixl_sysctl_read_i2c_byte(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; int input = -1, error = 0; device_printf(dev, "%s: start\n", __func__); u8 dev_addr, offset, output; /* Read in I2C read parameters */ error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Validate device address */ dev_addr = input & 0xFF; if (dev_addr != 0xA0 && dev_addr != 0xA2) { return (EINVAL); } offset = (input >> 8) & 0xFF; error = ixl_read_i2c_byte(pf, offset, dev_addr, &output); if (error) return (error); device_printf(dev, "%02X\n", output); return (0); } /* * Sysctl to write a byte to the I2C bus. * * Input: 32-bit value: * bits 0-7: device address (0xA0 or 0xA2) * bits 8-15: offset (0-255) * bits 16-23: value to write * bits 24-31: unused * Output: 8-bit value written */ static int ixl_sysctl_write_i2c_byte(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; device_t dev = pf->dev; int input = -1, error = 0; u8 dev_addr, offset, value; /* Read in I2C write parameters */ error = sysctl_handle_int(oidp, &input, 0, req); if ((error) || (req->newptr == NULL)) return (error); /* Validate device address */ dev_addr = input & 0xFF; if (dev_addr != 0xA0 && dev_addr != 0xA2) { return (EINVAL); } offset = (input >> 8) & 0xFF; value = (input >> 16) & 0xFF; error = ixl_write_i2c_byte(pf, offset, dev_addr, value); if (error) return (error); device_printf(dev, "%02X written\n", value); return (0); } static int ixl_get_fec_config(struct ixl_pf *pf, struct i40e_aq_get_phy_abilities_resp *abilities, u8 bit_pos, int *is_set) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; enum i40e_status_code status; status = i40e_aq_get_phy_capabilities(hw, FALSE, FALSE, abilities, NULL); if (status) { device_printf(dev, "%s: i40e_aq_get_phy_capabilities() status %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (EIO); } *is_set = !!(abilities->phy_type_ext & bit_pos); return (0); } static int ixl_set_fec_config(struct ixl_pf *pf, struct i40e_aq_get_phy_abilities_resp *abilities, u8 bit_pos, int set) { device_t dev = pf->dev; struct i40e_hw *hw = &pf->hw; struct i40e_aq_set_phy_config config; enum i40e_status_code status; /* Set new PHY config */ memset(&config, 0, sizeof(config)); config.fec_config = abilities->phy_type_ext & ~(bit_pos); if (set) config.fec_config |= bit_pos; if (config.fec_config != abilities->phy_type_ext) { config.abilities |= I40E_AQ_PHY_ENABLE_ATOMIC_LINK; config.phy_type = abilities->phy_type; config.phy_type_ext = abilities->phy_type_ext; config.link_speed = abilities->link_speed; config.eee_capability = abilities->eee_capability; config.eeer = abilities->eeer_val; config.low_power_ctrl = abilities->d3_lpan; status = i40e_aq_set_phy_config(hw, &config, NULL); if (status) { device_printf(dev, "%s: i40e_aq_set_phy_config() status %s, aq error %s\n", __func__, i40e_stat_str(hw, status), i40e_aq_str(hw, hw->aq.asq_last_status)); return (EIO); } } return (0); } static int ixl_sysctl_fec_fc_ability(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_SET_FEC_ABILITY_KR, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_ABILITY_KR, !!(mode)); } static int ixl_sysctl_fec_rs_ability(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_SET_FEC_ABILITY_RS, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_ABILITY_RS, !!(mode)); } static int ixl_sysctl_fec_fc_request(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_SET_FEC_REQUEST_KR, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_REQUEST_KR, !!(mode)); } static int ixl_sysctl_fec_rs_request(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_SET_FEC_REQUEST_RS, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_REQUEST_RS, !!(mode)); } static int ixl_sysctl_fec_auto_enable(SYSCTL_HANDLER_ARGS) { struct ixl_pf *pf = (struct ixl_pf *)arg1; int mode, error = 0; struct i40e_aq_get_phy_abilities_resp abilities; error = ixl_get_fec_config(pf, &abilities, I40E_AQ_SET_FEC_AUTO, &mode); if (error) return (error); /* Read in new mode */ error = sysctl_handle_int(oidp, &mode, 0, req); if ((error) || (req->newptr == NULL)) return (error); return ixl_set_fec_config(pf, &abilities, I40E_AQ_SET_FEC_AUTO, !!(mode)); } Index: stable/11/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c =================================================================== --- stable/11/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c (revision 332287) +++ stable/11/sys/dev/mlx4/mlx4_en/mlx4_en_netdev.c (revision 332288) @@ -1,2881 +1,2881 @@ /* * Copyright (c) 2007, 2014 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include #include #include #include #ifdef CONFIG_NET_RX_BUSY_POLL #include #endif #include #include #include #include #include #include #include #include #include "en.h" #include "en_port.h" static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv); static void mlx4_en_sysctl_conf(struct mlx4_en_priv *priv); #ifdef CONFIG_NET_RX_BUSY_POLL /* must be called with local_bh_disable()d */ static int mlx4_en_low_latency_recv(struct napi_struct *napi) { struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi); struct net_device *dev = cq->dev; struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_rx_ring *rx_ring = priv->rx_ring[cq->ring]; int done; if (!priv->port_up) return LL_FLUSH_FAILED; if (!mlx4_en_cq_lock_poll(cq)) return LL_FLUSH_BUSY; done = mlx4_en_process_rx_cq(dev, cq, 4); #ifdef LL_EXTENDED_STATS if (likely(done)) rx_ring->cleaned += done; else rx_ring->misses++; #endif mlx4_en_cq_unlock_poll(cq); return done; } #endif /* CONFIG_NET_RX_BUSY_POLL */ #ifdef CONFIG_RFS_ACCEL struct mlx4_en_filter { struct list_head next; struct work_struct work; u8 ip_proto; __be32 src_ip; __be32 dst_ip; __be16 src_port; __be16 dst_port; int rxq_index; struct mlx4_en_priv *priv; u32 flow_id; /* RFS infrastructure id */ int id; /* mlx4_en driver id */ u64 reg_id; /* Flow steering API id */ u8 activated; /* Used to prevent expiry before filter * is attached */ struct hlist_node filter_chain; }; static void mlx4_en_filter_rfs_expire(struct mlx4_en_priv *priv); static enum mlx4_net_trans_rule_id mlx4_ip_proto_to_trans_rule_id(u8 ip_proto) { switch (ip_proto) { case IPPROTO_UDP: return MLX4_NET_TRANS_RULE_ID_UDP; case IPPROTO_TCP: return MLX4_NET_TRANS_RULE_ID_TCP; default: return MLX4_NET_TRANS_RULE_NUM; } }; static void mlx4_en_filter_work(struct work_struct *work) { struct mlx4_en_filter *filter = container_of(work, struct mlx4_en_filter, work); struct mlx4_en_priv *priv = filter->priv; struct mlx4_spec_list spec_tcp_udp = { .id = mlx4_ip_proto_to_trans_rule_id(filter->ip_proto), { .tcp_udp = { .dst_port = filter->dst_port, .dst_port_msk = (__force __be16)-1, .src_port = filter->src_port, .src_port_msk = (__force __be16)-1, }, }, }; struct mlx4_spec_list spec_ip = { .id = MLX4_NET_TRANS_RULE_ID_IPV4, { .ipv4 = { .dst_ip = filter->dst_ip, .dst_ip_msk = (__force __be32)-1, .src_ip = filter->src_ip, .src_ip_msk = (__force __be32)-1, }, }, }; struct mlx4_spec_list spec_eth = { .id = MLX4_NET_TRANS_RULE_ID_ETH, }; struct mlx4_net_trans_rule rule = { .list = LIST_HEAD_INIT(rule.list), .queue_mode = MLX4_NET_TRANS_Q_LIFO, .exclusive = 1, .allow_loopback = 1, .promisc_mode = MLX4_FS_REGULAR, .port = priv->port, .priority = MLX4_DOMAIN_RFS, }; int rc; __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); if (spec_tcp_udp.id >= MLX4_NET_TRANS_RULE_NUM) { en_warn(priv, "RFS: ignoring unsupported ip protocol (%d)\n", filter->ip_proto); goto ignore; } list_add_tail(&spec_eth.list, &rule.list); list_add_tail(&spec_ip.list, &rule.list); list_add_tail(&spec_tcp_udp.list, &rule.list); rule.qpn = priv->rss_map.qps[filter->rxq_index].qpn; memcpy(spec_eth.eth.dst_mac, priv->dev->dev_addr, ETH_ALEN); memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN); filter->activated = 0; if (filter->reg_id) { rc = mlx4_flow_detach(priv->mdev->dev, filter->reg_id); if (rc && rc != -ENOENT) en_err(priv, "Error detaching flow. rc = %d\n", rc); } rc = mlx4_flow_attach(priv->mdev->dev, &rule, &filter->reg_id); if (rc) en_err(priv, "Error attaching flow. err = %d\n", rc); ignore: mlx4_en_filter_rfs_expire(priv); filter->activated = 1; } static inline struct hlist_head * filter_hash_bucket(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip, __be16 src_port, __be16 dst_port) { unsigned long l; int bucket_idx; l = (__force unsigned long)src_port | ((__force unsigned long)dst_port << 2); l ^= (__force unsigned long)(src_ip ^ dst_ip); bucket_idx = hash_long(l, MLX4_EN_FILTER_HASH_SHIFT); return &priv->filter_hash[bucket_idx]; } static struct mlx4_en_filter * mlx4_en_filter_alloc(struct mlx4_en_priv *priv, int rxq_index, __be32 src_ip, __be32 dst_ip, u8 ip_proto, __be16 src_port, __be16 dst_port, u32 flow_id) { struct mlx4_en_filter *filter = NULL; filter = kzalloc(sizeof(struct mlx4_en_filter), GFP_ATOMIC); if (!filter) return NULL; filter->priv = priv; filter->rxq_index = rxq_index; INIT_WORK(&filter->work, mlx4_en_filter_work); filter->src_ip = src_ip; filter->dst_ip = dst_ip; filter->ip_proto = ip_proto; filter->src_port = src_port; filter->dst_port = dst_port; filter->flow_id = flow_id; filter->id = priv->last_filter_id++ % RPS_NO_FILTER; list_add_tail(&filter->next, &priv->filters); hlist_add_head(&filter->filter_chain, filter_hash_bucket(priv, src_ip, dst_ip, src_port, dst_port)); return filter; } static void mlx4_en_filter_free(struct mlx4_en_filter *filter) { struct mlx4_en_priv *priv = filter->priv; int rc; list_del(&filter->next); rc = mlx4_flow_detach(priv->mdev->dev, filter->reg_id); if (rc && rc != -ENOENT) en_err(priv, "Error detaching flow. rc = %d\n", rc); kfree(filter); } static inline struct mlx4_en_filter * mlx4_en_filter_find(struct mlx4_en_priv *priv, __be32 src_ip, __be32 dst_ip, u8 ip_proto, __be16 src_port, __be16 dst_port) { struct mlx4_en_filter *filter; struct mlx4_en_filter *ret = NULL; hlist_for_each_entry(filter, filter_hash_bucket(priv, src_ip, dst_ip, src_port, dst_port), filter_chain) { if (filter->src_ip == src_ip && filter->dst_ip == dst_ip && filter->ip_proto == ip_proto && filter->src_port == src_port && filter->dst_port == dst_port) { ret = filter; break; } } return ret; } static int mlx4_en_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, u16 rxq_index, u32 flow_id) { struct mlx4_en_priv *priv = netdev_priv(net_dev); struct mlx4_en_filter *filter; const struct iphdr *ip; const __be16 *ports; u8 ip_proto; __be32 src_ip; __be32 dst_ip; __be16 src_port; __be16 dst_port; int nhoff = skb_network_offset(skb); int ret = 0; if (skb->protocol != htons(ETH_P_IP)) return -EPROTONOSUPPORT; ip = (const struct iphdr *)(skb->data + nhoff); if (ip_is_fragment(ip)) return -EPROTONOSUPPORT; if ((ip->protocol != IPPROTO_TCP) && (ip->protocol != IPPROTO_UDP)) return -EPROTONOSUPPORT; ports = (const __be16 *)(skb->data + nhoff + 4 * ip->ihl); ip_proto = ip->protocol; src_ip = ip->saddr; dst_ip = ip->daddr; src_port = ports[0]; dst_port = ports[1]; spin_lock_bh(&priv->filters_lock); filter = mlx4_en_filter_find(priv, src_ip, dst_ip, ip_proto, src_port, dst_port); if (filter) { if (filter->rxq_index == rxq_index) goto out; filter->rxq_index = rxq_index; } else { filter = mlx4_en_filter_alloc(priv, rxq_index, src_ip, dst_ip, ip_proto, src_port, dst_port, flow_id); if (!filter) { ret = -ENOMEM; goto err; } } queue_work(priv->mdev->workqueue, &filter->work); out: ret = filter->id; err: spin_unlock_bh(&priv->filters_lock); return ret; } void mlx4_en_cleanup_filters(struct mlx4_en_priv *priv) { struct mlx4_en_filter *filter, *tmp; LIST_HEAD(del_list); spin_lock_bh(&priv->filters_lock); list_for_each_entry_safe(filter, tmp, &priv->filters, next) { list_move(&filter->next, &del_list); hlist_del(&filter->filter_chain); } spin_unlock_bh(&priv->filters_lock); list_for_each_entry_safe(filter, tmp, &del_list, next) { cancel_work_sync(&filter->work); mlx4_en_filter_free(filter); } } static void mlx4_en_filter_rfs_expire(struct mlx4_en_priv *priv) { struct mlx4_en_filter *filter = NULL, *tmp, *last_filter = NULL; LIST_HEAD(del_list); int i = 0; spin_lock_bh(&priv->filters_lock); list_for_each_entry_safe(filter, tmp, &priv->filters, next) { if (i > MLX4_EN_FILTER_EXPIRY_QUOTA) break; if (filter->activated && !work_pending(&filter->work) && rps_may_expire_flow(priv->dev, filter->rxq_index, filter->flow_id, filter->id)) { list_move(&filter->next, &del_list); hlist_del(&filter->filter_chain); } else last_filter = filter; i++; } if (last_filter && (&last_filter->next != priv->filters.next)) list_move(&priv->filters, &last_filter->next); spin_unlock_bh(&priv->filters_lock); list_for_each_entry_safe(filter, tmp, &del_list, next) mlx4_en_filter_free(filter); } #endif static void mlx4_en_vlan_rx_add_vid(void *arg, struct net_device *dev, u16 vid) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err; int idx; if (arg != priv) return; en_dbg(HW, priv, "adding VLAN:%d\n", vid); set_bit(vid, priv->active_vlans); /* Add VID to port VLAN filter */ mutex_lock(&mdev->state_lock); if (mdev->device_up && priv->port_up) { err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); if (err) en_err(priv, "Failed configuring VLAN filter\n"); } if (mlx4_register_vlan(mdev->dev, priv->port, vid, &idx)) en_dbg(HW, priv, "failed adding vlan %d\n", vid); mutex_unlock(&mdev->state_lock); } static void mlx4_en_vlan_rx_kill_vid(void *arg, struct net_device *dev, u16 vid) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err; if (arg != priv) return; en_dbg(HW, priv, "Killing VID:%d\n", vid); clear_bit(vid, priv->active_vlans); /* Remove VID from port VLAN filter */ mutex_lock(&mdev->state_lock); mlx4_unregister_vlan(mdev->dev, priv->port, vid); if (mdev->device_up && priv->port_up) { err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); if (err) en_err(priv, "Failed configuring VLAN filter\n"); } mutex_unlock(&mdev->state_lock); } static int mlx4_en_tunnel_steer_add(struct mlx4_en_priv *priv, unsigned char *addr, int qpn, u64 *reg_id) { int err; if (priv->mdev->dev->caps.tunnel_offload_mode != MLX4_TUNNEL_OFFLOAD_MODE_VXLAN || priv->mdev->dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) return 0; /* do nothing */ err = mlx4_tunnel_steer_add(priv->mdev->dev, addr, priv->port, qpn, MLX4_DOMAIN_NIC, reg_id); if (err) { en_err(priv, "failed to add vxlan steering rule, err %d\n", err); return err; } en_dbg(DRV, priv, "added vxlan steering rule, mac %pM reg_id %llx\n", addr, (long long)*reg_id); return 0; } static int mlx4_en_uc_steer_add(struct mlx4_en_priv *priv, unsigned char *mac, int *qpn, u64 *reg_id) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; int err; switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_B0: { struct mlx4_qp qp; u8 gid[16] = {0}; qp.qpn = *qpn; memcpy(&gid[10], mac, ETH_ALEN); gid[5] = priv->port; err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH); break; } case MLX4_STEERING_MODE_DEVICE_MANAGED: { struct mlx4_spec_list spec_eth = { {NULL} }; __be64 mac_mask = cpu_to_be64(MLX4_MAC_MASK << 16); struct mlx4_net_trans_rule rule = { .queue_mode = MLX4_NET_TRANS_Q_FIFO, .exclusive = 0, .allow_loopback = 1, .promisc_mode = MLX4_FS_REGULAR, .priority = MLX4_DOMAIN_NIC, }; rule.port = priv->port; rule.qpn = *qpn; INIT_LIST_HEAD(&rule.list); spec_eth.id = MLX4_NET_TRANS_RULE_ID_ETH; memcpy(spec_eth.eth.dst_mac, mac, ETH_ALEN); memcpy(spec_eth.eth.dst_mac_msk, &mac_mask, ETH_ALEN); list_add_tail(&spec_eth.list, &rule.list); err = mlx4_flow_attach(dev, &rule, reg_id); break; } default: return -EINVAL; } if (err) en_warn(priv, "Failed Attaching Unicast\n"); return err; } static void mlx4_en_uc_steer_release(struct mlx4_en_priv *priv, unsigned char *mac, int qpn, u64 reg_id) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; switch (dev->caps.steering_mode) { case MLX4_STEERING_MODE_B0: { struct mlx4_qp qp; u8 gid[16] = {0}; qp.qpn = qpn; memcpy(&gid[10], mac, ETH_ALEN); gid[5] = priv->port; mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH); break; } case MLX4_STEERING_MODE_DEVICE_MANAGED: { mlx4_flow_detach(dev, reg_id); break; } default: en_err(priv, "Invalid steering mode.\n"); } } static int mlx4_en_get_qp(struct mlx4_en_priv *priv) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; int index = 0; int err = 0; int *qpn = &priv->base_qpn; u64 mac = mlx4_mac_to_u64(IF_LLADDR(priv->dev)); en_dbg(DRV, priv, "Registering MAC: %pM for adding\n", IF_LLADDR(priv->dev)); index = mlx4_register_mac(dev, priv->port, mac); if (index < 0) { err = index; en_err(priv, "Failed adding MAC: %pM\n", IF_LLADDR(priv->dev)); return err; } if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { int base_qpn = mlx4_get_base_qpn(dev, priv->port); *qpn = base_qpn + index; return 0; } err = mlx4_qp_reserve_range(dev, 1, 1, qpn, MLX4_RESERVE_A0_QP); en_dbg(DRV, priv, "Reserved qp %d\n", *qpn); if (err) { en_err(priv, "Failed to reserve qp for mac registration\n"); mlx4_unregister_mac(dev, priv->port, mac); return err; } return 0; } static void mlx4_en_put_qp(struct mlx4_en_priv *priv) { struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_dev *dev = mdev->dev; int qpn = priv->base_qpn; if (dev->caps.steering_mode == MLX4_STEERING_MODE_A0) { u64 mac = mlx4_mac_to_u64(IF_LLADDR(priv->dev)); en_dbg(DRV, priv, "Registering MAC: %pM for deleting\n", IF_LLADDR(priv->dev)); mlx4_unregister_mac(dev, priv->port, mac); } else { en_dbg(DRV, priv, "Releasing qp: port %d, qpn %d\n", priv->port, qpn); mlx4_qp_release_range(dev, qpn, 1); priv->flags &= ~MLX4_EN_FLAG_FORCE_PROMISC; } } static void mlx4_en_clear_uclist(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_addr_list *tmp, *uc_to_del; list_for_each_entry_safe(uc_to_del, tmp, &priv->uc_list, list) { list_del(&uc_to_del->list); kfree(uc_to_del); } } static void mlx4_en_cache_uclist(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_addr_list *tmp; struct ifaddr *ifa; mlx4_en_clear_uclist(dev); if_addr_rlock(dev); TAILQ_FOREACH(ifa, &dev->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_LINK) continue; if (((struct sockaddr_dl *)ifa->ifa_addr)->sdl_alen != ETHER_ADDR_LEN) continue; tmp = kzalloc(sizeof(struct mlx4_en_addr_list), GFP_ATOMIC); if (tmp == NULL) { en_err(priv, "Failed to allocate address list\n"); break; } memcpy(tmp->addr, LLADDR((struct sockaddr_dl *)ifa->ifa_addr), ETH_ALEN); list_add_tail(&tmp->list, &priv->uc_list); } if_addr_runlock(dev); } static void mlx4_en_clear_mclist(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_addr_list *tmp, *mc_to_del; list_for_each_entry_safe(mc_to_del, tmp, &priv->mc_list, list) { list_del(&mc_to_del->list); kfree(mc_to_del); } } static void mlx4_en_cache_mclist(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_addr_list *tmp; struct ifmultiaddr *ifma; mlx4_en_clear_mclist(dev); if_maddr_rlock(dev); TAILQ_FOREACH(ifma, &dev->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (((struct sockaddr_dl *)ifma->ifma_addr)->sdl_alen != ETHER_ADDR_LEN) continue; tmp = kzalloc(sizeof(struct mlx4_en_addr_list), GFP_ATOMIC); if (tmp == NULL) { en_err(priv, "Failed to allocate address list\n"); break; } memcpy(tmp->addr, LLADDR((struct sockaddr_dl *)ifma->ifma_addr), ETH_ALEN); list_add_tail(&tmp->list, &priv->mc_list); } if_maddr_runlock(dev); } static void update_addr_list_flags(struct mlx4_en_priv *priv, struct list_head *dst, struct list_head *src) { struct mlx4_en_addr_list *dst_tmp, *src_tmp, *new_mc; bool found; /* Find all the entries that should be removed from dst, * These are the entries that are not found in src */ list_for_each_entry(dst_tmp, dst, list) { found = false; list_for_each_entry(src_tmp, src, list) { if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) { found = true; break; } } if (!found) dst_tmp->action = MLX4_ADDR_LIST_REM; } /* Add entries that exist in src but not in dst * mark them as need to add */ list_for_each_entry(src_tmp, src, list) { found = false; list_for_each_entry(dst_tmp, dst, list) { if (!memcmp(dst_tmp->addr, src_tmp->addr, ETH_ALEN)) { dst_tmp->action = MLX4_ADDR_LIST_NONE; found = true; break; } } if (!found) { new_mc = kmalloc(sizeof(struct mlx4_en_addr_list), GFP_KERNEL); if (!new_mc) { en_err(priv, "Failed to allocate current multicast list\n"); return; } memcpy(new_mc, src_tmp, sizeof(struct mlx4_en_addr_list)); new_mc->action = MLX4_ADDR_LIST_ADD; list_add_tail(&new_mc->list, dst); } } } static void mlx4_en_set_rx_mode(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); if (!priv->port_up) return; queue_work(priv->mdev->workqueue, &priv->rx_mode_task); } static void mlx4_en_set_promisc_mode(struct mlx4_en_priv *priv, struct mlx4_en_dev *mdev) { int err = 0; if (!(priv->flags & MLX4_EN_FLAG_PROMISC)) { priv->flags |= MLX4_EN_FLAG_PROMISC; /* Enable promiscouos mode */ switch (mdev->dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: err = mlx4_flow_steer_promisc_add(mdev->dev, priv->port, priv->base_qpn, MLX4_FS_ALL_DEFAULT); if (err) en_err(priv, "Failed enabling promiscuous mode\n"); priv->flags |= MLX4_EN_FLAG_MC_PROMISC; break; case MLX4_STEERING_MODE_B0: err = mlx4_unicast_promisc_add(mdev->dev, priv->base_qpn, priv->port); if (err) en_err(priv, "Failed enabling unicast promiscuous mode\n"); /* Add the default qp number as multicast * promisc */ if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) { err = mlx4_multicast_promisc_add(mdev->dev, priv->base_qpn, priv->port); if (err) en_err(priv, "Failed enabling multicast promiscuous mode\n"); priv->flags |= MLX4_EN_FLAG_MC_PROMISC; } break; case MLX4_STEERING_MODE_A0: err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 1); if (err) en_err(priv, "Failed enabling promiscuous mode\n"); break; } /* Disable port multicast filter (unconditionally) */ err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_DISABLE); if (err) en_err(priv, "Failed disabling multicast filter\n"); } } static void mlx4_en_clear_promisc_mode(struct mlx4_en_priv *priv, struct mlx4_en_dev *mdev) { int err = 0; priv->flags &= ~MLX4_EN_FLAG_PROMISC; /* Disable promiscouos mode */ switch (mdev->dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: err = mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, MLX4_FS_ALL_DEFAULT); if (err) en_err(priv, "Failed disabling promiscuous mode\n"); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; break; case MLX4_STEERING_MODE_B0: err = mlx4_unicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); if (err) en_err(priv, "Failed disabling unicast promiscuous mode\n"); /* Disable Multicast promisc */ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { err = mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); if (err) en_err(priv, "Failed disabling multicast promiscuous mode\n"); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; } break; case MLX4_STEERING_MODE_A0: err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 0); if (err) en_err(priv, "Failed disabling promiscuous mode\n"); break; } } static void mlx4_en_do_multicast(struct mlx4_en_priv *priv, struct net_device *dev, struct mlx4_en_dev *mdev) { struct mlx4_en_addr_list *addr_list, *tmp; u8 mc_list[16] = {0}; int err = 0; u64 mcast_addr = 0; /* Enable/disable the multicast filter according to IFF_ALLMULTI */ if (dev->if_flags & IFF_ALLMULTI) { err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_DISABLE); if (err) en_err(priv, "Failed disabling multicast filter\n"); /* Add the default qp number as multicast promisc */ if (!(priv->flags & MLX4_EN_FLAG_MC_PROMISC)) { switch (mdev->dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: err = mlx4_flow_steer_promisc_add(mdev->dev, priv->port, priv->base_qpn, MLX4_FS_MC_DEFAULT); break; case MLX4_STEERING_MODE_B0: err = mlx4_multicast_promisc_add(mdev->dev, priv->base_qpn, priv->port); break; case MLX4_STEERING_MODE_A0: break; } if (err) en_err(priv, "Failed entering multicast promisc mode\n"); priv->flags |= MLX4_EN_FLAG_MC_PROMISC; } } else { /* Disable Multicast promisc */ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { switch (mdev->dev->caps.steering_mode) { case MLX4_STEERING_MODE_DEVICE_MANAGED: err = mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, MLX4_FS_MC_DEFAULT); break; case MLX4_STEERING_MODE_B0: err = mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); break; case MLX4_STEERING_MODE_A0: break; } if (err) en_err(priv, "Failed disabling multicast promiscuous mode\n"); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; } /* Update unicast list */ mlx4_en_cache_uclist(dev); update_addr_list_flags(priv, &priv->curr_uc_list, &priv->uc_list); list_for_each_entry_safe(addr_list, tmp, &priv->curr_uc_list, list) { if (addr_list->action == MLX4_ADDR_LIST_REM) { mlx4_en_uc_steer_release(priv, addr_list->addr, priv->rss_map.indir_qp.qpn, addr_list->reg_id); /* remove from list */ list_del(&addr_list->list); kfree(addr_list); } else if (addr_list->action == MLX4_ADDR_LIST_ADD) { err = mlx4_en_uc_steer_add(priv, addr_list->addr, &priv->rss_map.indir_qp.qpn, &addr_list->reg_id); if (err) en_err(priv, "Fail to add unicast address\n"); } } err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_DISABLE); if (err) en_err(priv, "Failed disabling multicast filter\n"); /* Flush mcast filter and init it with broadcast address */ mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, ETH_BCAST, 1, MLX4_MCAST_CONFIG); /* Update multicast list - we cache all addresses so they won't * change while HW is updated holding the command semaphor */ mlx4_en_cache_mclist(dev); list_for_each_entry(addr_list, &priv->mc_list, list) { mcast_addr = mlx4_mac_to_u64(addr_list->addr); mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, mcast_addr, 0, MLX4_MCAST_CONFIG); } err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 0, MLX4_MCAST_ENABLE); if (err) en_err(priv, "Failed enabling multicast filter\n"); update_addr_list_flags(priv, &priv->curr_mc_list, &priv->mc_list); list_for_each_entry_safe(addr_list, tmp, &priv->curr_mc_list, list) { if (addr_list->action == MLX4_ADDR_LIST_REM) { /* detach this address and delete from list */ memcpy(&mc_list[10], addr_list->addr, ETH_ALEN); mc_list[5] = priv->port; err = mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, addr_list->reg_id); if (err) en_err(priv, "Fail to detach multicast address\n"); if (addr_list->tunnel_reg_id) { err = mlx4_flow_detach(priv->mdev->dev, addr_list->tunnel_reg_id); if (err) en_err(priv, "Failed to detach multicast address\n"); } /* remove from list */ list_del(&addr_list->list); kfree(addr_list); } else if (addr_list->action == MLX4_ADDR_LIST_ADD) { /* attach the address */ memcpy(&mc_list[10], addr_list->addr, ETH_ALEN); /* needed for B0 steering support */ mc_list[5] = priv->port; err = mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, mc_list, priv->port, 0, MLX4_PROT_ETH, &addr_list->reg_id); if (err) en_err(priv, "Fail to attach multicast address\n"); err = mlx4_en_tunnel_steer_add(priv, &mc_list[10], priv->base_qpn, &addr_list->tunnel_reg_id); if (err) en_err(priv, "Failed to attach multicast address\n"); } } } } static void mlx4_en_do_set_rx_mode(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, rx_mode_task); struct mlx4_en_dev *mdev = priv->mdev; struct net_device *dev = priv->dev; mutex_lock(&mdev->state_lock); if (!mdev->device_up) { en_dbg(HW, priv, "Card is not up, ignoring rx mode change.\n"); goto out; } if (!priv->port_up) { en_dbg(HW, priv, "Port is down, ignoring rx mode change.\n"); goto out; } if (!mlx4_en_QUERY_PORT(mdev, priv->port)) { if (priv->port_state.link_state) { priv->last_link_state = MLX4_DEV_EVENT_PORT_UP; /* update netif baudrate */ priv->dev->if_baudrate = IF_Mbps(priv->port_state.link_speed); /* Important note: the following call for if_link_state_change * is needed for interface up scenario (start port, link state * change) */ if_link_state_change(priv->dev, LINK_STATE_UP); en_dbg(HW, priv, "Link Up\n"); } } /* Promsicuous mode: disable all filters */ if ((dev->if_flags & IFF_PROMISC) || (priv->flags & MLX4_EN_FLAG_FORCE_PROMISC)) { mlx4_en_set_promisc_mode(priv, mdev); goto out; } /* Not in promiscuous mode */ if (priv->flags & MLX4_EN_FLAG_PROMISC) mlx4_en_clear_promisc_mode(priv, mdev); mlx4_en_do_multicast(priv, dev, mdev); out: mutex_unlock(&mdev->state_lock); } static void mlx4_en_watchdog_timeout(void *arg) { struct mlx4_en_priv *priv = arg; struct mlx4_en_dev *mdev = priv->mdev; en_dbg(DRV, priv, "Scheduling watchdog\n"); queue_work(mdev->workqueue, &priv->watchdog_task); if (priv->port_up) callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT, mlx4_en_watchdog_timeout, priv); } static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv) { struct mlx4_en_cq *cq; int i; /* If we haven't received a specific coalescing setting * (module param), we set the moderation parameters as follows: * - moder_cnt is set to the number of mtu sized packets to * satisfy our coalescing target. * - moder_time is set to a fixed value. */ priv->rx_frames = MLX4_EN_RX_COAL_TARGET; priv->rx_usecs = MLX4_EN_RX_COAL_TIME; priv->tx_frames = MLX4_EN_TX_COAL_PKTS; priv->tx_usecs = MLX4_EN_TX_COAL_TIME; en_dbg(INTR, priv, "Default coalesing params for mtu: %u - " "rx_frames:%d rx_usecs:%d\n", (unsigned)priv->dev->if_mtu, priv->rx_frames, priv->rx_usecs); /* Setup cq moderation params */ for (i = 0; i < priv->rx_ring_num; i++) { cq = priv->rx_cq[i]; cq->moder_cnt = priv->rx_frames; cq->moder_time = priv->rx_usecs; priv->last_moder_time[i] = MLX4_EN_AUTO_CONF; priv->last_moder_packets[i] = 0; priv->last_moder_bytes[i] = 0; } for (i = 0; i < priv->tx_ring_num; i++) { cq = priv->tx_cq[i]; cq->moder_cnt = priv->tx_frames; cq->moder_time = priv->tx_usecs; } /* Reset auto-moderation params */ priv->pkt_rate_low = MLX4_EN_RX_RATE_LOW; priv->rx_usecs_low = MLX4_EN_RX_COAL_TIME_LOW; priv->pkt_rate_high = MLX4_EN_RX_RATE_HIGH; priv->rx_usecs_high = MLX4_EN_RX_COAL_TIME_HIGH; priv->sample_interval = MLX4_EN_SAMPLE_INTERVAL; priv->adaptive_rx_coal = 1; priv->last_moder_jiffies = 0; priv->last_moder_tx_packets = 0; } static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv) { unsigned long period = (unsigned long) (jiffies - priv->last_moder_jiffies); struct mlx4_en_cq *cq; unsigned long packets; unsigned long rate; unsigned long avg_pkt_size; unsigned long rx_packets; unsigned long rx_bytes; unsigned long rx_pkt_diff; int moder_time; int ring, err; if (!priv->adaptive_rx_coal || period < priv->sample_interval * HZ) return; for (ring = 0; ring < priv->rx_ring_num; ring++) { spin_lock(&priv->stats_lock); rx_packets = priv->rx_ring[ring]->packets; rx_bytes = priv->rx_ring[ring]->bytes; spin_unlock(&priv->stats_lock); rx_pkt_diff = ((unsigned long) (rx_packets - priv->last_moder_packets[ring])); packets = rx_pkt_diff; rate = packets * HZ / period; avg_pkt_size = packets ? ((unsigned long) (rx_bytes - priv->last_moder_bytes[ring])) / packets : 0; /* Apply auto-moderation only when packet rate * exceeds a rate that it matters */ if (rate > (MLX4_EN_RX_RATE_THRESH / priv->rx_ring_num) && avg_pkt_size > MLX4_EN_AVG_PKT_SMALL) { if (rate < priv->pkt_rate_low) moder_time = priv->rx_usecs_low; else if (rate > priv->pkt_rate_high) moder_time = priv->rx_usecs_high; else moder_time = (rate - priv->pkt_rate_low) * (priv->rx_usecs_high - priv->rx_usecs_low) / (priv->pkt_rate_high - priv->pkt_rate_low) + priv->rx_usecs_low; } else { moder_time = priv->rx_usecs_low; } if (moder_time != priv->last_moder_time[ring]) { priv->last_moder_time[ring] = moder_time; cq = priv->rx_cq[ring]; cq->moder_time = moder_time; cq->moder_cnt = priv->rx_frames; err = mlx4_en_set_cq_moder(priv, cq); if (err) en_err(priv, "Failed modifying moderation for cq:%d\n", ring); } priv->last_moder_packets[ring] = rx_packets; priv->last_moder_bytes[ring] = rx_bytes; } priv->last_moder_jiffies = jiffies; } static void mlx4_en_do_get_stats(struct work_struct *work) { struct delayed_work *delay = to_delayed_work(work); struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv, stats_task); struct mlx4_en_dev *mdev = priv->mdev; int err; mutex_lock(&mdev->state_lock); if (mdev->device_up) { if (priv->port_up) { if (mlx4_is_slave(mdev->dev)) err = mlx4_en_get_vport_stats(mdev, priv->port); else err = mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 0); if (err) en_dbg(HW, priv, "Could not update stats\n"); mlx4_en_auto_moderation(priv); } queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); } mutex_unlock(&mdev->state_lock); } /* mlx4_en_service_task - Run service task for tasks that needed to be done * periodically */ static void mlx4_en_service_task(struct work_struct *work) { struct delayed_work *delay = to_delayed_work(work); struct mlx4_en_priv *priv = container_of(delay, struct mlx4_en_priv, service_task); struct mlx4_en_dev *mdev = priv->mdev; mutex_lock(&mdev->state_lock); if (mdev->device_up) { queue_delayed_work(mdev->workqueue, &priv->service_task, SERVICE_TASK_DELAY); } mutex_unlock(&mdev->state_lock); } static void mlx4_en_linkstate(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, linkstate_task); struct mlx4_en_dev *mdev = priv->mdev; int linkstate = priv->link_state; mutex_lock(&mdev->state_lock); /* If observable port state changed set carrier state and * report to system log */ if (priv->last_link_state != linkstate) { if (linkstate == MLX4_DEV_EVENT_PORT_DOWN) { en_info(priv, "Link Down\n"); if_link_state_change(priv->dev, LINK_STATE_DOWN); /* update netif baudrate */ priv->dev->if_baudrate = 0; /* make sure the port is up before notifying the OS. * This is tricky since we get here on INIT_PORT and * in such case we can't tell the OS the port is up. * To solve this there is a call to if_link_state_change * in set_rx_mode. * */ } else if (priv->port_up && (linkstate == MLX4_DEV_EVENT_PORT_UP)){ if (mlx4_en_QUERY_PORT(priv->mdev, priv->port)) en_info(priv, "Query port failed\n"); priv->dev->if_baudrate = IF_Mbps(priv->port_state.link_speed); en_info(priv, "Link Up\n"); if_link_state_change(priv->dev, LINK_STATE_UP); } } priv->last_link_state = linkstate; mutex_unlock(&mdev->state_lock); } int mlx4_en_start_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_cq *cq; struct mlx4_en_tx_ring *tx_ring; int rx_index = 0; int tx_index = 0; int err = 0; int i; int j; u8 mc_list[16] = {0}; if (priv->port_up) { en_dbg(DRV, priv, "start port called while port already up\n"); return 0; } INIT_LIST_HEAD(&priv->mc_list); INIT_LIST_HEAD(&priv->uc_list); INIT_LIST_HEAD(&priv->curr_mc_list); INIT_LIST_HEAD(&priv->curr_uc_list); INIT_LIST_HEAD(&priv->ethtool_list); /* Calculate Rx buf size */ dev->if_mtu = min(dev->if_mtu, priv->max_mtu); mlx4_en_calc_rx_buf(dev); en_dbg(DRV, priv, "Rx buf size:%d\n", priv->rx_mb_size); /* Configure rx cq's and rings */ err = mlx4_en_activate_rx_rings(priv); if (err) { en_err(priv, "Failed to activate RX rings\n"); return err; } for (i = 0; i < priv->rx_ring_num; i++) { cq = priv->rx_cq[i]; mlx4_en_cq_init_lock(cq); err = mlx4_en_activate_cq(priv, cq, i); if (err) { en_err(priv, "Failed activating Rx CQ\n"); goto cq_err; } for (j = 0; j < cq->size; j++) cq->buf[j].owner_sr_opcode = MLX4_CQE_OWNER_MASK; err = mlx4_en_set_cq_moder(priv, cq); if (err) { en_err(priv, "Failed setting cq moderation parameters"); mlx4_en_deactivate_cq(priv, cq); goto cq_err; } mlx4_en_arm_cq(priv, cq); priv->rx_ring[i]->cqn = cq->mcq.cqn; ++rx_index; } /* Set qp number */ en_dbg(DRV, priv, "Getting qp number for port %d\n", priv->port); err = mlx4_en_get_qp(priv); if (err) { en_err(priv, "Failed getting eth qp\n"); goto cq_err; } mdev->mac_removed[priv->port] = 0; priv->counter_index = mlx4_get_default_counter_index(mdev->dev, priv->port); err = mlx4_en_config_rss_steer(priv); if (err) { en_err(priv, "Failed configuring rss steering\n"); goto mac_err; } err = mlx4_en_create_drop_qp(priv); if (err) goto rss_err; /* Configure tx cq's and rings */ for (i = 0; i < priv->tx_ring_num; i++) { /* Configure cq */ cq = priv->tx_cq[i]; err = mlx4_en_activate_cq(priv, cq, i); if (err) { en_err(priv, "Failed activating Tx CQ\n"); goto tx_err; } err = mlx4_en_set_cq_moder(priv, cq); if (err) { en_err(priv, "Failed setting cq moderation parameters"); mlx4_en_deactivate_cq(priv, cq); goto tx_err; } en_dbg(DRV, priv, "Resetting index of collapsed CQ:%d to -1\n", i); cq->buf->wqe_index = cpu_to_be16(0xffff); /* Configure ring */ tx_ring = priv->tx_ring[i]; err = mlx4_en_activate_tx_ring(priv, tx_ring, cq->mcq.cqn, i / priv->num_tx_rings_p_up); if (err) { en_err(priv, "Failed activating Tx ring %d\n", i); mlx4_en_deactivate_cq(priv, cq); goto tx_err; } /* Arm CQ for TX completions */ mlx4_en_arm_cq(priv, cq); /* Set initial ownership of all Tx TXBBs to SW (1) */ for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE) *((u32 *) (tx_ring->buf + j)) = INIT_OWNER_BIT; ++tx_index; } /* Configure port */ err = mlx4_SET_PORT_general(mdev->dev, priv->port, priv->rx_mb_size, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); if (err) { en_err(priv, "Failed setting port general configurations for port %d, with error %d\n", priv->port, err); goto tx_err; } /* Set default qp number */ err = mlx4_SET_PORT_qpn_calc(mdev->dev, priv->port, priv->base_qpn, 0); if (err) { en_err(priv, "Failed setting default qp numbers\n"); goto tx_err; } /* Init port */ en_dbg(HW, priv, "Initializing port\n"); err = mlx4_INIT_PORT(mdev->dev, priv->port); if (err) { en_err(priv, "Failed Initializing port\n"); goto tx_err; } /* Attach rx QP to bradcast address */ memset(&mc_list[10], 0xff, ETH_ALEN); mc_list[5] = priv->port; /* needed for B0 steering support */ if (mlx4_multicast_attach(mdev->dev, &priv->rss_map.indir_qp, mc_list, priv->port, 0, MLX4_PROT_ETH, &priv->broadcast_id)) mlx4_warn(mdev, "Failed Attaching Broadcast\n"); /* Must redo promiscuous mode setup. */ priv->flags &= ~(MLX4_EN_FLAG_PROMISC | MLX4_EN_FLAG_MC_PROMISC); /* Schedule multicast task to populate multicast list */ queue_work(mdev->workqueue, &priv->rx_mode_task); priv->port_up = true; /* Enable the queues. */ dev->if_drv_flags &= ~IFF_DRV_OACTIVE; dev->if_drv_flags |= IFF_DRV_RUNNING; #ifdef CONFIG_DEBUG_FS mlx4_en_create_debug_files(priv); #endif callout_reset(&priv->watchdog_timer, MLX4_EN_WATCHDOG_TIMEOUT, mlx4_en_watchdog_timeout, priv); return 0; tx_err: while (tx_index--) { mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[tx_index]); mlx4_en_deactivate_cq(priv, priv->tx_cq[tx_index]); } mlx4_en_destroy_drop_qp(priv); rss_err: mlx4_en_release_rss_steer(priv); mac_err: mlx4_en_put_qp(priv); cq_err: while (rx_index--) mlx4_en_deactivate_cq(priv, priv->rx_cq[rx_index]); for (i = 0; i < priv->rx_ring_num; i++) mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); return err; /* need to close devices */ } void mlx4_en_stop_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_addr_list *addr_list, *tmp; int i; u8 mc_list[16] = {0}; if (!priv->port_up) { en_dbg(DRV, priv, "stop port called while port already down\n"); return; } #ifdef CONFIG_DEBUG_FS mlx4_en_delete_debug_files(priv); #endif /* close port*/ mlx4_CLOSE_PORT(mdev->dev, priv->port); /* Set port as not active */ priv->port_up = false; priv->counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev); /* Promsicuous mode */ if (mdev->dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) { priv->flags &= ~(MLX4_EN_FLAG_PROMISC | MLX4_EN_FLAG_MC_PROMISC); mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, MLX4_FS_ALL_DEFAULT); mlx4_flow_steer_promisc_remove(mdev->dev, priv->port, MLX4_FS_MC_DEFAULT); } else if (priv->flags & MLX4_EN_FLAG_PROMISC) { priv->flags &= ~MLX4_EN_FLAG_PROMISC; /* Disable promiscouos mode */ mlx4_unicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); /* Disable Multicast promisc */ if (priv->flags & MLX4_EN_FLAG_MC_PROMISC) { mlx4_multicast_promisc_remove(mdev->dev, priv->base_qpn, priv->port); priv->flags &= ~MLX4_EN_FLAG_MC_PROMISC; } } /* Detach All unicasts */ list_for_each_entry(addr_list, &priv->curr_uc_list, list) { mlx4_en_uc_steer_release(priv, addr_list->addr, priv->rss_map.indir_qp.qpn, addr_list->reg_id); } mlx4_en_clear_uclist(dev); list_for_each_entry_safe(addr_list, tmp, &priv->curr_uc_list, list) { list_del(&addr_list->list); kfree(addr_list); } /* Detach All multicasts */ memset(&mc_list[10], 0xff, ETH_ALEN); mc_list[5] = priv->port; /* needed for B0 steering support */ mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, priv->broadcast_id); list_for_each_entry(addr_list, &priv->curr_mc_list, list) { memcpy(&mc_list[10], addr_list->addr, ETH_ALEN); mc_list[5] = priv->port; mlx4_multicast_detach(mdev->dev, &priv->rss_map.indir_qp, mc_list, MLX4_PROT_ETH, addr_list->reg_id); } mlx4_en_clear_mclist(dev); list_for_each_entry_safe(addr_list, tmp, &priv->curr_mc_list, list) { list_del(&addr_list->list); kfree(addr_list); } /* Flush multicast filter */ mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 1, MLX4_MCAST_CONFIG); mlx4_en_destroy_drop_qp(priv); /* Free TX Rings */ for (i = 0; i < priv->tx_ring_num; i++) { mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[i]); mlx4_en_deactivate_cq(priv, priv->tx_cq[i]); } msleep(10); for (i = 0; i < priv->tx_ring_num; i++) mlx4_en_free_tx_buf(dev, priv->tx_ring[i]); /* Free RSS qps */ mlx4_en_release_rss_steer(priv); /* Unregister Mac address for the port */ mlx4_en_put_qp(priv); mdev->mac_removed[priv->port] = 1; /* Free RX Rings */ for (i = 0; i < priv->rx_ring_num; i++) { struct mlx4_en_cq *cq = priv->rx_cq[i]; mlx4_en_deactivate_rx_ring(priv, priv->rx_ring[i]); mlx4_en_deactivate_cq(priv, cq); } callout_stop(&priv->watchdog_timer); dev->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); } static void mlx4_en_restart(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, watchdog_task); struct mlx4_en_dev *mdev = priv->mdev; struct net_device *dev = priv->dev; struct mlx4_en_tx_ring *ring; int i; if (priv->blocked == 0 || priv->port_up == 0) return; for (i = 0; i < priv->tx_ring_num; i++) { ring = priv->tx_ring[i]; if (ring->blocked && ring->watchdog_time + MLX4_EN_WATCHDOG_TIMEOUT < ticks) goto reset; } return; reset: priv->port_stats.tx_timeout++; en_dbg(DRV, priv, "Watchdog task called for port %d\n", priv->port); mutex_lock(&mdev->state_lock); if (priv->port_up) { mlx4_en_stop_port(dev); //for (i = 0; i < priv->tx_ring_num; i++) // netdev_tx_reset_queue(priv->tx_ring[i]->tx_queue); if (mlx4_en_start_port(dev)) en_err(priv, "Failed restarting port %d\n", priv->port); } mutex_unlock(&mdev->state_lock); } static void mlx4_en_clear_stats(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int i; if (!mlx4_is_slave(mdev->dev)) if (mlx4_en_DUMP_ETH_STATS(mdev, priv->port, 1)) en_dbg(HW, priv, "Failed dumping statistics\n"); memset(&priv->pstats, 0, sizeof(priv->pstats)); memset(&priv->pkstats, 0, sizeof(priv->pkstats)); memset(&priv->port_stats, 0, sizeof(priv->port_stats)); memset(&priv->vport_stats, 0, sizeof(priv->vport_stats)); for (i = 0; i < priv->tx_ring_num; i++) { priv->tx_ring[i]->bytes = 0; priv->tx_ring[i]->packets = 0; priv->tx_ring[i]->tx_csum = 0; priv->tx_ring[i]->oversized_packets = 0; } for (i = 0; i < priv->rx_ring_num; i++) { priv->rx_ring[i]->bytes = 0; priv->rx_ring[i]->packets = 0; priv->rx_ring[i]->csum_ok = 0; priv->rx_ring[i]->csum_none = 0; } } static void mlx4_en_open(void* arg) { struct mlx4_en_priv *priv; struct mlx4_en_dev *mdev; struct net_device *dev; int err = 0; priv = arg; mdev = priv->mdev; dev = priv->dev; mutex_lock(&mdev->state_lock); if (!mdev->device_up) { en_err(priv, "Cannot open - device down/disabled\n"); goto out; } /* Reset HW statistics and SW counters */ mlx4_en_clear_stats(dev); err = mlx4_en_start_port(dev); if (err) en_err(priv, "Failed starting port:%d\n", priv->port); out: mutex_unlock(&mdev->state_lock); return; } void mlx4_en_free_resources(struct mlx4_en_priv *priv) { int i; #ifdef CONFIG_RFS_ACCEL if (priv->dev->rx_cpu_rmap) { free_irq_cpu_rmap(priv->dev->rx_cpu_rmap); priv->dev->rx_cpu_rmap = NULL; } #endif for (i = 0; i < priv->tx_ring_num; i++) { if (priv->tx_ring && priv->tx_ring[i]) mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]); if (priv->tx_cq && priv->tx_cq[i]) mlx4_en_destroy_cq(priv, &priv->tx_cq[i]); } for (i = 0; i < priv->rx_ring_num; i++) { if (priv->rx_ring[i]) mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i], priv->prof->rx_ring_size, priv->stride); if (priv->rx_cq[i]) mlx4_en_destroy_cq(priv, &priv->rx_cq[i]); } if (priv->stat_sysctl != NULL) sysctl_ctx_free(&priv->stat_ctx); } int mlx4_en_alloc_resources(struct mlx4_en_priv *priv) { struct mlx4_en_port_profile *prof = priv->prof; int i; int node = 0; /* Create rx Rings */ for (i = 0; i < priv->rx_ring_num; i++) { if (mlx4_en_create_cq(priv, &priv->rx_cq[i], prof->rx_ring_size, i, RX, node)) goto err; if (mlx4_en_create_rx_ring(priv, &priv->rx_ring[i], prof->rx_ring_size, node)) goto err; } /* Create tx Rings */ for (i = 0; i < priv->tx_ring_num; i++) { if (mlx4_en_create_cq(priv, &priv->tx_cq[i], prof->tx_ring_size, i, TX, node)) goto err; if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i], prof->tx_ring_size, TXBB_SIZE, node, i)) goto err; } #ifdef CONFIG_RFS_ACCEL priv->dev->rx_cpu_rmap = alloc_irq_cpu_rmap(priv->rx_ring_num); if (!priv->dev->rx_cpu_rmap) goto err; #endif /* Re-create stat sysctls in case the number of rings changed. */ mlx4_en_sysctl_stat(priv); return 0; err: en_err(priv, "Failed to allocate NIC resources\n"); for (i = 0; i < priv->rx_ring_num; i++) { if (priv->rx_ring[i]) mlx4_en_destroy_rx_ring(priv, &priv->rx_ring[i], prof->rx_ring_size, priv->stride); if (priv->rx_cq[i]) mlx4_en_destroy_cq(priv, &priv->rx_cq[i]); } for (i = 0; i < priv->tx_ring_num; i++) { if (priv->tx_ring[i]) mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]); if (priv->tx_cq[i]) mlx4_en_destroy_cq(priv, &priv->tx_cq[i]); } priv->port_up = false; return -ENOMEM; } struct en_port_attribute { struct attribute attr; ssize_t (*show)(struct en_port *, struct en_port_attribute *, char *buf); ssize_t (*store)(struct en_port *, struct en_port_attribute *, char *buf, size_t count); }; #define PORT_ATTR_RO(_name) \ struct en_port_attribute en_port_attr_##_name = __ATTR_RO(_name) #define EN_PORT_ATTR(_name, _mode, _show, _store) \ struct en_port_attribute en_port_attr_##_name = __ATTR(_name, _mode, _show, _store) void mlx4_en_destroy_netdev(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port); /* don't allow more IOCTLs */ priv->gone = 1; /* XXX wait a bit to allow IOCTL handlers to complete */ pause("W", hz); if (priv->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach); if (priv->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach); /* Unregister device - this will close the port if it was up */ if (priv->registered) { mutex_lock(&mdev->state_lock); ether_ifdetach(dev); mutex_unlock(&mdev->state_lock); } mutex_lock(&mdev->state_lock); mlx4_en_stop_port(dev); mutex_unlock(&mdev->state_lock); if (priv->allocated) mlx4_free_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE); cancel_delayed_work(&priv->stats_task); cancel_delayed_work(&priv->service_task); /* flush any pending task for this netdev */ flush_workqueue(mdev->workqueue); callout_drain(&priv->watchdog_timer); /* Detach the netdev so tasks would not attempt to access it */ mutex_lock(&mdev->state_lock); mdev->pndev[priv->port] = NULL; mutex_unlock(&mdev->state_lock); mlx4_en_free_resources(priv); /* freeing the sysctl conf cannot be called from within mlx4_en_free_resources */ if (priv->conf_sysctl != NULL) sysctl_ctx_free(&priv->conf_ctx); kfree(priv->tx_ring); kfree(priv->tx_cq); kfree(priv); if_free(dev); } static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int err = 0; en_dbg(DRV, priv, "Change MTU called - current:%u new:%u\n", (unsigned)dev->if_mtu, (unsigned)new_mtu); if ((new_mtu < MLX4_EN_MIN_MTU) || (new_mtu > priv->max_mtu)) { en_err(priv, "Bad MTU size:%d, max %u.\n", new_mtu, priv->max_mtu); return -EPERM; } mutex_lock(&mdev->state_lock); dev->if_mtu = new_mtu; if (dev->if_drv_flags & IFF_DRV_RUNNING) { if (!mdev->device_up) { /* NIC is probably restarting - let watchdog task reset * * the port */ en_dbg(DRV, priv, "Change MTU called with card down!?\n"); } else { mlx4_en_stop_port(dev); err = mlx4_en_start_port(dev); if (err) { en_err(priv, "Failed restarting port:%d\n", priv->port); queue_work(mdev->workqueue, &priv->watchdog_task); } } } mutex_unlock(&mdev->state_lock); return 0; } static int mlx4_en_calc_media(struct mlx4_en_priv *priv) { int trans_type; int active; active = IFM_ETHER; if (priv->last_link_state == MLX4_DEV_EVENT_PORT_DOWN) return (active); active |= IFM_FDX; trans_type = priv->port_state.transceiver; /* XXX I don't know all of the transceiver values. */ switch (priv->port_state.link_speed) { case 100: active |= IFM_100_T; break; case 1000: active |= IFM_1000_T; break; case 10000: if (trans_type > 0 && trans_type <= 0xC) active |= IFM_10G_SR; else if (trans_type == 0x80 || trans_type == 0) active |= IFM_10G_CX4; break; case 40000: active |= IFM_40G_CR4; break; } if (priv->prof->tx_pause) active |= IFM_ETH_TXPAUSE; if (priv->prof->rx_pause) active |= IFM_ETH_RXPAUSE; return (active); } static void mlx4_en_media_status(struct ifnet *dev, struct ifmediareq *ifmr) { struct mlx4_en_priv *priv; priv = dev->if_softc; ifmr->ifm_status = IFM_AVALID; if (priv->last_link_state != MLX4_DEV_EVENT_PORT_DOWN) ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active = mlx4_en_calc_media(priv); return; } static int mlx4_en_media_change(struct ifnet *dev) { struct mlx4_en_priv *priv; struct ifmedia *ifm; int rxpause; int txpause; int error; priv = dev->if_softc; ifm = &priv->media; rxpause = txpause = 0; error = 0; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) return (EINVAL); switch (IFM_SUBTYPE(ifm->ifm_media)) { case IFM_AUTO: break; case IFM_10G_SR: case IFM_10G_CX4: case IFM_1000_T: case IFM_40G_CR4: if ((IFM_SUBTYPE(ifm->ifm_media) == IFM_SUBTYPE(mlx4_en_calc_media(priv))) && (ifm->ifm_media & IFM_FDX)) break; /* Fallthrough */ default: printf("%s: Only auto media type\n", if_name(dev)); return (EINVAL); } /* Allow user to set/clear pause */ if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_RXPAUSE) rxpause = 1; if (IFM_OPTIONS(ifm->ifm_media) & IFM_ETH_TXPAUSE) txpause = 1; if (priv->prof->tx_pause != txpause || priv->prof->rx_pause != rxpause) { priv->prof->tx_pause = txpause; priv->prof->rx_pause = rxpause; error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); } return (error); } static int mlx4_en_ioctl(struct ifnet *dev, u_long command, caddr_t data) { struct mlx4_en_priv *priv; struct mlx4_en_dev *mdev; struct ifreq *ifr; int error; int mask; struct ifrsskey *ifrk; const u32 *key; struct ifrsshash *ifrh; u8 rss_mask; error = 0; mask = 0; priv = dev->if_softc; /* check if detaching */ if (priv == NULL || priv->gone != 0) return (ENXIO); mdev = priv->mdev; ifr = (struct ifreq *) data; switch (command) { case SIOCSIFMTU: error = -mlx4_en_change_mtu(dev, ifr->ifr_mtu); break; case SIOCSIFFLAGS: if (dev->if_flags & IFF_UP) { if ((dev->if_drv_flags & IFF_DRV_RUNNING) == 0) { mutex_lock(&mdev->state_lock); mlx4_en_start_port(dev); mutex_unlock(&mdev->state_lock); } else { mlx4_en_set_rx_mode(dev); } } else { mutex_lock(&mdev->state_lock); if (dev->if_drv_flags & IFF_DRV_RUNNING) { mlx4_en_stop_port(dev); if_link_state_change(dev, LINK_STATE_DOWN); } mutex_unlock(&mdev->state_lock); } break; case SIOCADDMULTI: case SIOCDELMULTI: mlx4_en_set_rx_mode(dev); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(dev, ifr, &priv->media, command); break; case SIOCSIFCAP: mutex_lock(&mdev->state_lock); mask = ifr->ifr_reqcap ^ dev->if_capenable; if (mask & IFCAP_TXCSUM) { dev->if_capenable ^= IFCAP_TXCSUM; dev->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (IFCAP_TSO4 & dev->if_capenable && !(IFCAP_TXCSUM & dev->if_capenable)) { dev->if_capenable &= ~IFCAP_TSO4; dev->if_hwassist &= ~CSUM_IP_TSO; if_printf(dev, "tso4 disabled due to -txcsum.\n"); } } if (mask & IFCAP_TXCSUM_IPV6) { dev->if_capenable ^= IFCAP_TXCSUM_IPV6; dev->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); if (IFCAP_TSO6 & dev->if_capenable && !(IFCAP_TXCSUM_IPV6 & dev->if_capenable)) { dev->if_capenable &= ~IFCAP_TSO6; dev->if_hwassist &= ~CSUM_IP6_TSO; if_printf(dev, "tso6 disabled due to -txcsum6.\n"); } } if (mask & IFCAP_RXCSUM) dev->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) dev->if_capenable ^= IFCAP_RXCSUM_IPV6; if (mask & IFCAP_TSO4) { if (!(IFCAP_TSO4 & dev->if_capenable) && !(IFCAP_TXCSUM & dev->if_capenable)) { if_printf(dev, "enable txcsum first.\n"); error = EAGAIN; goto out; } dev->if_capenable ^= IFCAP_TSO4; dev->if_hwassist ^= CSUM_IP_TSO; } if (mask & IFCAP_TSO6) { if (!(IFCAP_TSO6 & dev->if_capenable) && !(IFCAP_TXCSUM_IPV6 & dev->if_capenable)) { if_printf(dev, "enable txcsum6 first.\n"); error = EAGAIN; goto out; } dev->if_capenable ^= IFCAP_TSO6; dev->if_hwassist ^= CSUM_IP6_TSO; } if (mask & IFCAP_LRO) dev->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) dev->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWFILTER) dev->if_capenable ^= IFCAP_VLAN_HWFILTER; if (mask & IFCAP_WOL_MAGIC) dev->if_capenable ^= IFCAP_WOL_MAGIC; if (dev->if_drv_flags & IFF_DRV_RUNNING) mlx4_en_start_port(dev); out: mutex_unlock(&mdev->state_lock); VLAN_CAPABILITIES(dev); break; #if __FreeBSD_version >= 1100036 case SIOCGI2C: { struct ifi2creq i2c; - error = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); + error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (error) break; if (i2c.len > sizeof(i2c.data)) { error = EINVAL; break; } /* * Note that we ignore i2c.addr here. The driver hardcodes * the address to 0x50, while standard expects it to be 0xA0. */ error = mlx4_get_module_info(mdev->dev, priv->port, i2c.offset, i2c.len, i2c.data); if (error < 0) { error = -error; break; } - error = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); + error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c)); break; } #endif case SIOCGIFRSSKEY: ifrk = (struct ifrsskey *)data; ifrk->ifrk_func = RSS_FUNC_TOEPLITZ; mutex_lock(&mdev->state_lock); key = mlx4_en_get_rss_key(priv, &ifrk->ifrk_keylen); if (ifrk->ifrk_keylen > RSS_KEYLEN) error = EINVAL; else memcpy(ifrk->ifrk_key, key, ifrk->ifrk_keylen); mutex_unlock(&mdev->state_lock); break; case SIOCGIFRSSHASH: mutex_lock(&mdev->state_lock); rss_mask = mlx4_en_get_rss_mask(priv); mutex_unlock(&mdev->state_lock); ifrh = (struct ifrsshash *)data; ifrh->ifrh_func = RSS_FUNC_TOEPLITZ; ifrh->ifrh_types = 0; if (rss_mask & MLX4_RSS_IPV4) ifrh->ifrh_types |= RSS_TYPE_IPV4; if (rss_mask & MLX4_RSS_TCP_IPV4) ifrh->ifrh_types |= RSS_TYPE_TCP_IPV4; if (rss_mask & MLX4_RSS_IPV6) ifrh->ifrh_types |= RSS_TYPE_IPV6; if (rss_mask & MLX4_RSS_TCP_IPV6) ifrh->ifrh_types |= RSS_TYPE_TCP_IPV6; if (rss_mask & MLX4_RSS_UDP_IPV4) ifrh->ifrh_types |= RSS_TYPE_UDP_IPV4; if (rss_mask & MLX4_RSS_UDP_IPV6) ifrh->ifrh_types |= RSS_TYPE_UDP_IPV6; break; default: error = ether_ioctl(dev, command, data); break; } return (error); } int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, struct mlx4_en_port_profile *prof) { struct net_device *dev; struct mlx4_en_priv *priv; uint8_t dev_addr[ETHER_ADDR_LEN]; int err; int i; priv = kzalloc(sizeof(*priv), GFP_KERNEL); dev = priv->dev = if_alloc(IFT_ETHER); if (dev == NULL) { en_err(priv, "Net device allocation failed\n"); kfree(priv); return -ENOMEM; } dev->if_softc = priv; if_initname(dev, "mlxen", (device_get_unit( mdev->pdev->dev.bsddev) * MLX4_MAX_PORTS) + port - 1); dev->if_mtu = ETHERMTU; dev->if_init = mlx4_en_open; dev->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; dev->if_ioctl = mlx4_en_ioctl; dev->if_transmit = mlx4_en_transmit; dev->if_qflush = mlx4_en_qflush; dev->if_snd.ifq_maxlen = prof->tx_ring_size; /* * Initialize driver private data */ priv->counter_index = 0xff; spin_lock_init(&priv->stats_lock); INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode); INIT_WORK(&priv->watchdog_task, mlx4_en_restart); INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task); callout_init(&priv->watchdog_timer, 1); #ifdef CONFIG_RFS_ACCEL INIT_LIST_HEAD(&priv->filters); spin_lock_init(&priv->filters_lock); #endif priv->msg_enable = MLX4_EN_MSG_LEVEL; priv->dev = dev; priv->mdev = mdev; priv->ddev = &mdev->pdev->dev; priv->prof = prof; priv->port = port; priv->port_up = false; priv->flags = prof->flags; priv->num_tx_rings_p_up = mdev->profile.num_tx_rings_p_up; priv->tx_ring_num = prof->tx_ring_num; priv->tx_ring = kcalloc(MAX_TX_RINGS, sizeof(struct mlx4_en_tx_ring *), GFP_KERNEL); if (!priv->tx_ring) { err = -ENOMEM; goto out; } priv->tx_cq = kcalloc(sizeof(struct mlx4_en_cq *), MAX_TX_RINGS, GFP_KERNEL); if (!priv->tx_cq) { err = -ENOMEM; goto out; } priv->rx_ring_num = prof->rx_ring_num; priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0; priv->mac_index = -1; priv->last_ifq_jiffies = 0; priv->if_counters_rx_errors = 0; priv->if_counters_rx_no_buffer = 0; #ifdef CONFIG_MLX4_EN_DCB if (!mlx4_is_slave(priv->mdev->dev)) { priv->dcbx_cap = DCB_CAP_DCBX_HOST; priv->flags |= MLX4_EN_FLAG_DCB_ENABLED; if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ETS_CFG) { dev->dcbnl_ops = &mlx4_en_dcbnl_ops; } else { en_info(priv, "QoS disabled - no HW support\n"); dev->dcbnl_ops = &mlx4_en_dcbnl_pfc_ops; } } #endif /* Query for default mac and max mtu */ priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port]; priv->mac = mdev->dev->caps.def_mac[priv->port]; if (ILLEGAL_MAC(priv->mac)) { #if BITS_PER_LONG == 64 en_err(priv, "Port: %d, invalid mac burned: 0x%lx, quiting\n", priv->port, priv->mac); #elif BITS_PER_LONG == 32 en_err(priv, "Port: %d, invalid mac burned: 0x%llx, quiting\n", priv->port, priv->mac); #endif err = -EINVAL; goto out; } priv->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) + DS_SIZE); mlx4_en_sysctl_conf(priv); err = mlx4_en_alloc_resources(priv); if (err) goto out; /* Allocate page for receive rings */ err = mlx4_alloc_hwq_res(mdev->dev, &priv->res, MLX4_EN_PAGE_SIZE, MLX4_EN_PAGE_SIZE); if (err) { en_err(priv, "Failed to allocate page for rx qps\n"); goto out; } priv->allocated = 1; /* * Set driver features */ dev->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6; dev->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING; dev->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER; dev->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU; dev->if_capabilities |= IFCAP_LRO; dev->if_capabilities |= IFCAP_HWSTATS; if (mdev->LSO_support) dev->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTSO; #if __FreeBSD_version >= 1100000 /* set TSO limits so that we don't have to drop TX packets */ dev->if_hw_tsomax = MLX4_EN_TX_MAX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN) /* hdr */; dev->if_hw_tsomaxsegcount = MLX4_EN_TX_MAX_MBUF_FRAGS - 1 /* hdr */; dev->if_hw_tsomaxsegsize = MLX4_EN_TX_MAX_MBUF_SIZE; #endif dev->if_capenable = dev->if_capabilities; dev->if_hwassist = 0; if (dev->if_capenable & (IFCAP_TSO4 | IFCAP_TSO6)) dev->if_hwassist |= CSUM_TSO; if (dev->if_capenable & IFCAP_TXCSUM) dev->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (dev->if_capenable & IFCAP_TXCSUM_IPV6) dev->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); /* Register for VLAN events */ priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, mlx4_en_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST); priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, mlx4_en_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST); mdev->pndev[priv->port] = dev; priv->last_link_state = MLX4_DEV_EVENT_PORT_DOWN; mlx4_en_set_default_moderation(priv); /* Set default MAC */ for (i = 0; i < ETHER_ADDR_LEN; i++) dev_addr[ETHER_ADDR_LEN - 1 - i] = (u8) (priv->mac >> (8 * i)); ether_ifattach(dev, dev_addr); if_link_state_change(dev, LINK_STATE_DOWN); ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK, mlx4_en_media_change, mlx4_en_media_status); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_1000_T, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_SR, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_10G_CX4, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_FDX | IFM_40G_CR4, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO); en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num); en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); priv->registered = 1; en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num); en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); priv->rx_mb_size = dev->if_mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN; err = mlx4_SET_PORT_general(mdev->dev, priv->port, priv->rx_mb_size, prof->tx_pause, prof->tx_ppp, prof->rx_pause, prof->rx_ppp); if (err) { en_err(priv, "Failed setting port general configurations " "for port %d, with error %d\n", priv->port, err); goto out; } /* Init port */ en_warn(priv, "Initializing port\n"); err = mlx4_INIT_PORT(mdev->dev, priv->port); if (err) { en_err(priv, "Failed Initializing port\n"); goto out; } queue_delayed_work(mdev->workqueue, &priv->stats_task, STATS_DELAY); if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_TS) queue_delayed_work(mdev->workqueue, &priv->service_task, SERVICE_TASK_DELAY); return 0; out: mlx4_en_destroy_netdev(dev); return err; } static int mlx4_en_set_ring_size(struct net_device *dev, int rx_size, int tx_size) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int port_up = 0; int err = 0; rx_size = roundup_pow_of_two(rx_size); rx_size = max_t(u32, rx_size, MLX4_EN_MIN_RX_SIZE); rx_size = min_t(u32, rx_size, MLX4_EN_MAX_RX_SIZE); tx_size = roundup_pow_of_two(tx_size); tx_size = max_t(u32, tx_size, MLX4_EN_MIN_TX_SIZE); tx_size = min_t(u32, tx_size, MLX4_EN_MAX_TX_SIZE); if (rx_size == (priv->port_up ? priv->rx_ring[0]->actual_size : priv->rx_ring[0]->size) && tx_size == priv->tx_ring[0]->size) return 0; mutex_lock(&mdev->state_lock); if (priv->port_up) { port_up = 1; mlx4_en_stop_port(dev); } mlx4_en_free_resources(priv); priv->prof->tx_ring_size = tx_size; priv->prof->rx_ring_size = rx_size; err = mlx4_en_alloc_resources(priv); if (err) { en_err(priv, "Failed reallocating port resources\n"); goto out; } if (port_up) { err = mlx4_en_start_port(dev); if (err) en_err(priv, "Failed starting port\n"); } out: mutex_unlock(&mdev->state_lock); return err; } static int mlx4_en_set_rx_ring_size(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; int size; int error; priv = arg1; size = priv->prof->rx_ring_size; error = sysctl_handle_int(oidp, &size, 0, req); if (error || !req->newptr) return (error); error = -mlx4_en_set_ring_size(priv->dev, size, priv->prof->tx_ring_size); return (error); } static int mlx4_en_set_tx_ring_size(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; int size; int error; priv = arg1; size = priv->prof->tx_ring_size; error = sysctl_handle_int(oidp, &size, 0, req); if (error || !req->newptr) return (error); error = -mlx4_en_set_ring_size(priv->dev, priv->prof->rx_ring_size, size); return (error); } static int mlx4_en_get_module_info(struct net_device *dev, struct ethtool_modinfo *modinfo) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int ret; u8 data[4]; /* Read first 2 bytes to get Module & REV ID */ ret = mlx4_get_module_info(mdev->dev, priv->port, 0/*offset*/, 2/*size*/, data); if (ret < 2) { en_err(priv, "Failed to read eeprom module first two bytes, error: 0x%x\n", -ret); return -EIO; } switch (data[0] /* identifier */) { case MLX4_MODULE_ID_QSFP: modinfo->type = ETH_MODULE_SFF_8436; modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; break; case MLX4_MODULE_ID_QSFP_PLUS: if (data[1] >= 0x3) { /* revision id */ modinfo->type = ETH_MODULE_SFF_8636; modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; } else { modinfo->type = ETH_MODULE_SFF_8436; modinfo->eeprom_len = ETH_MODULE_SFF_8436_LEN; } break; case MLX4_MODULE_ID_QSFP28: modinfo->type = ETH_MODULE_SFF_8636; modinfo->eeprom_len = ETH_MODULE_SFF_8636_LEN; break; case MLX4_MODULE_ID_SFP: modinfo->type = ETH_MODULE_SFF_8472; modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; break; default: en_err(priv, "mlx4_en_get_module_info : Not recognized cable type\n"); return -EINVAL; } return 0; } static int mlx4_en_get_module_eeprom(struct net_device *dev, struct ethtool_eeprom *ee, u8 *data) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; int offset = ee->offset; int i = 0, ret; if (ee->len == 0) return -EINVAL; memset(data, 0, ee->len); while (i < ee->len) { en_dbg(DRV, priv, "mlx4_get_module_info i(%d) offset(%d) len(%d)\n", i, offset, ee->len - i); ret = mlx4_get_module_info(mdev->dev, priv->port, offset, ee->len - i, data + i); if (!ret) /* Done reading */ return 0; if (ret < 0) { en_err(priv, "mlx4_get_module_info i(%d) offset(%d) bytes_to_read(%d) - FAILED (0x%x)\n", i, offset, ee->len - i, ret); return -1; } i += ret; offset += ret; } return 0; } static void mlx4_en_print_eeprom(u8 *data, __u32 len) { int i; int j = 0; int row = 0; const int NUM_OF_BYTES = 16; printf("\nOffset\t\tValues\n"); printf("------\t\t------\n"); while(row < len){ printf("0x%04x\t\t",row); for(i=0; i < NUM_OF_BYTES; i++){ printf("%02x ", data[j]); row++; j++; } printf("\n"); } } /* Read cable EEPROM module information by first inspecting the first * two bytes to get the length and then read the rest of the information. * The information is printed to dmesg. */ static int mlx4_en_read_eeprom(SYSCTL_HANDLER_ARGS) { u8* data; int error; int result = 0; struct mlx4_en_priv *priv; struct net_device *dev; struct ethtool_modinfo modinfo; struct ethtool_eeprom ee; error = sysctl_handle_int(oidp, &result, 0, req); if (error || !req->newptr) return (error); if (result == 1) { priv = arg1; dev = priv->dev; data = kmalloc(PAGE_SIZE, GFP_KERNEL); error = mlx4_en_get_module_info(dev, &modinfo); if (error) { en_err(priv, "mlx4_en_get_module_info returned with error - FAILED (0x%x)\n", -error); goto out; } ee.len = modinfo.eeprom_len; ee.offset = 0; error = mlx4_en_get_module_eeprom(dev, &ee, data); if (error) { en_err(priv, "mlx4_en_get_module_eeprom returned with error - FAILED (0x%x)\n", -error); /* Continue printing partial information in case of an error */ } /* EEPROM information will be printed in dmesg */ mlx4_en_print_eeprom(data, ee.len); out: kfree(data); } /* Return zero to prevent sysctl failure. */ return (0); } static int mlx4_en_set_tx_ppp(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; int ppp; int error; priv = arg1; ppp = priv->prof->tx_ppp; error = sysctl_handle_int(oidp, &ppp, 0, req); if (error || !req->newptr) return (error); if (ppp > 0xff || ppp < 0) return (-EINVAL); priv->prof->tx_ppp = ppp; error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); return (error); } static int mlx4_en_set_rx_ppp(SYSCTL_HANDLER_ARGS) { struct mlx4_en_priv *priv; struct mlx4_en_dev *mdev; int ppp; int error; int port_up; port_up = 0; priv = arg1; mdev = priv->mdev; ppp = priv->prof->rx_ppp; error = sysctl_handle_int(oidp, &ppp, 0, req); if (error || !req->newptr) return (error); if (ppp > 0xff || ppp < 0) return (-EINVAL); /* See if we have to change the number of tx queues. */ if (!ppp != !priv->prof->rx_ppp) { mutex_lock(&mdev->state_lock); if (priv->port_up) { port_up = 1; mlx4_en_stop_port(priv->dev); } mlx4_en_free_resources(priv); priv->prof->rx_ppp = ppp; error = -mlx4_en_alloc_resources(priv); if (error) en_err(priv, "Failed reallocating port resources\n"); if (error == 0 && port_up) { error = -mlx4_en_start_port(priv->dev); if (error) en_err(priv, "Failed starting port\n"); } mutex_unlock(&mdev->state_lock); return (error); } priv->prof->rx_ppp = ppp; error = -mlx4_SET_PORT_general(priv->mdev->dev, priv->port, priv->rx_mb_size + ETHER_CRC_LEN, priv->prof->tx_pause, priv->prof->tx_ppp, priv->prof->rx_pause, priv->prof->rx_ppp); return (error); } static void mlx4_en_sysctl_conf(struct mlx4_en_priv *priv) { struct net_device *dev; struct sysctl_ctx_list *ctx; struct sysctl_oid *node; struct sysctl_oid_list *node_list; struct sysctl_oid *coal; struct sysctl_oid_list *coal_list; const char *pnameunit; dev = priv->dev; ctx = &priv->conf_ctx; pnameunit = device_get_nameunit(priv->mdev->pdev->dev.bsddev); sysctl_ctx_init(ctx); priv->conf_sysctl = SYSCTL_ADD_NODE(ctx, SYSCTL_STATIC_CHILDREN(_hw), OID_AUTO, dev->if_xname, CTLFLAG_RD, 0, "mlx4 10gig ethernet"); node = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(priv->conf_sysctl), OID_AUTO, "conf", CTLFLAG_RD, NULL, "Configuration"); node_list = SYSCTL_CHILDREN(node); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "msg_enable", CTLFLAG_RW, &priv->msg_enable, 0, "Driver message enable bitfield"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "rx_rings", CTLFLAG_RD, &priv->rx_ring_num, 0, "Number of receive rings"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_rings", CTLFLAG_RD, &priv->tx_ring_num, 0, "Number of transmit rings"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_rx_ring_size, "I", "Receive ring size"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_tx_ring_size, "I", "Transmit ring size"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "tx_ppp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_tx_ppp, "I", "TX Per-priority pause"); SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "rx_ppp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_set_rx_ppp, "I", "RX Per-priority pause"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "port_num", CTLFLAG_RD, &priv->port, 0, "Port Number"); SYSCTL_ADD_STRING(ctx, node_list, OID_AUTO, "device_name", CTLFLAG_RD, __DECONST(void *, pnameunit), 0, "PCI device name"); /* Add coalescer configuration. */ coal = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, "coalesce", CTLFLAG_RD, NULL, "Interrupt coalesce configuration"); coal_list = SYSCTL_CHILDREN(coal); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_low", CTLFLAG_RW, &priv->pkt_rate_low, 0, "Packets per-second for minimum delay"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_low", CTLFLAG_RW, &priv->rx_usecs_low, 0, "Minimum RX delay in micro-seconds"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "pkt_rate_high", CTLFLAG_RW, &priv->pkt_rate_high, 0, "Packets per-second for maximum delay"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "rx_usecs_high", CTLFLAG_RW, &priv->rx_usecs_high, 0, "Maximum RX delay in micro-seconds"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "sample_interval", CTLFLAG_RW, &priv->sample_interval, 0, "adaptive frequency in units of HZ ticks"); SYSCTL_ADD_UINT(ctx, coal_list, OID_AUTO, "adaptive_rx_coal", CTLFLAG_RW, &priv->adaptive_rx_coal, 0, "Enable adaptive rx coalescing"); /* EEPROM support */ SYSCTL_ADD_PROC(ctx, node_list, OID_AUTO, "eeprom_info", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, priv, 0, mlx4_en_read_eeprom, "I", "EEPROM information"); } static void mlx4_en_sysctl_stat(struct mlx4_en_priv *priv) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *node_list; struct sysctl_oid *ring_node; struct sysctl_oid_list *ring_list; struct mlx4_en_tx_ring *tx_ring; struct mlx4_en_rx_ring *rx_ring; char namebuf[128]; int i; ctx = &priv->stat_ctx; sysctl_ctx_init(ctx); priv->stat_sysctl = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(priv->conf_sysctl), OID_AUTO, "stat", CTLFLAG_RD, NULL, "Statistics"); node_list = SYSCTL_CHILDREN(priv->stat_sysctl); #ifdef MLX4_EN_PERF_STAT SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_poll", CTLFLAG_RD, &priv->pstats.tx_poll, "TX Poll calls"); SYSCTL_ADD_QUAD(ctx, node_list, OID_AUTO, "tx_pktsz_avg", CTLFLAG_RD, &priv->pstats.tx_pktsz_avg, "TX average packet size"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "inflight_avg", CTLFLAG_RD, &priv->pstats.inflight_avg, "TX average packets in-flight"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "tx_coal_avg", CTLFLAG_RD, &priv->pstats.tx_coal_avg, "TX average coalesced completions"); SYSCTL_ADD_UINT(ctx, node_list, OID_AUTO, "rx_coal_avg", CTLFLAG_RD, &priv->pstats.rx_coal_avg, "RX average coalesced completions"); #endif SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tso_packets", CTLFLAG_RD, &priv->port_stats.tso_packets, 0, "TSO packets sent"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "queue_stopped", CTLFLAG_RD, &priv->port_stats.queue_stopped, 0, "Queue full"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "wake_queue", CTLFLAG_RD, &priv->port_stats.wake_queue, 0, "Queue resumed after full"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_timeout", CTLFLAG_RD, &priv->port_stats.tx_timeout, 0, "Transmit timeouts"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_oversized_packets", CTLFLAG_RD, &priv->port_stats.oversized_packets, 0, "TX oversized packets, m_defrag failed"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_alloc_failed", CTLFLAG_RD, &priv->port_stats.rx_alloc_failed, 0, "RX failed to allocate mbuf"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_chksum_good", CTLFLAG_RD, &priv->port_stats.rx_chksum_good, 0, "RX checksum offload success"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_chksum_none", CTLFLAG_RD, &priv->port_stats.rx_chksum_none, 0, "RX without checksum offload"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_chksum_offload", CTLFLAG_RD, &priv->port_stats.tx_chksum_offload, 0, "TX checksum offloads"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "defrag_attempts", CTLFLAG_RD, &priv->port_stats.defrag_attempts, 0, "Oversized chains defragged"); /* Could strdup the names and add in a loop. This is simpler. */ SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_bytes", CTLFLAG_RD, &priv->pkstats.rx_bytes, 0, "RX Bytes"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_packets", CTLFLAG_RD, &priv->pkstats.rx_packets, 0, "RX packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_multicast_packets", CTLFLAG_RD, &priv->pkstats.rx_multicast_packets, 0, "RX Multicast Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_broadcast_packets", CTLFLAG_RD, &priv->pkstats.rx_broadcast_packets, 0, "RX Broadcast Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_errors", CTLFLAG_RD, &priv->pkstats.rx_errors, 0, "RX Errors"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_dropped", CTLFLAG_RD, &priv->pkstats.rx_dropped, 0, "RX Dropped"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_length_errors", CTLFLAG_RD, &priv->pkstats.rx_length_errors, 0, "RX Length Errors"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_over_errors", CTLFLAG_RD, &priv->pkstats.rx_over_errors, 0, "RX Over Errors"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_crc_errors", CTLFLAG_RD, &priv->pkstats.rx_crc_errors, 0, "RX CRC Errors"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_jabbers", CTLFLAG_RD, &priv->pkstats.rx_jabbers, 0, "RX Jabbers"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_in_range_length_error", CTLFLAG_RD, &priv->pkstats.rx_in_range_length_error, 0, "RX IN_Range Length Error"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_out_range_length_error", CTLFLAG_RD, &priv->pkstats.rx_out_range_length_error, 0, "RX Out Range Length Error"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_lt_64_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_lt_64_bytes_packets, 0, "RX Lt 64 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_127_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_127_bytes_packets, 0, "RX 127 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_255_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_255_bytes_packets, 0, "RX 255 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_511_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_511_bytes_packets, 0, "RX 511 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_1023_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_1023_bytes_packets, 0, "RX 1023 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_1518_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_1518_bytes_packets, 0, "RX 1518 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_1522_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_1522_bytes_packets, 0, "RX 1522 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_1548_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_1548_bytes_packets, 0, "RX 1548 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "rx_gt_1548_bytes_packets", CTLFLAG_RD, &priv->pkstats.rx_gt_1548_bytes_packets, 0, "RX Greater Then 1548 bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_packets", CTLFLAG_RD, &priv->pkstats.tx_packets, 0, "TX packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_bytes", CTLFLAG_RD, &priv->pkstats.tx_bytes, 0, "TX Bytes"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_multicast_packets", CTLFLAG_RD, &priv->pkstats.tx_multicast_packets, 0, "TX Multicast Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_broadcast_packets", CTLFLAG_RD, &priv->pkstats.tx_broadcast_packets, 0, "TX Broadcast Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_errors", CTLFLAG_RD, &priv->pkstats.tx_errors, 0, "TX Errors"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_dropped", CTLFLAG_RD, &priv->pkstats.tx_dropped, 0, "TX Dropped"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_lt_64_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_lt_64_bytes_packets, 0, "TX Less Then 64 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_127_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_127_bytes_packets, 0, "TX 127 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_255_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_255_bytes_packets, 0, "TX 255 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_511_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_511_bytes_packets, 0, "TX 511 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_1023_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_1023_bytes_packets, 0, "TX 1023 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_1518_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_1518_bytes_packets, 0, "TX 1518 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_1522_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_1522_bytes_packets, 0, "TX 1522 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_1548_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_1548_bytes_packets, 0, "TX 1548 Bytes Packets"); SYSCTL_ADD_U64(ctx, node_list, OID_AUTO, "tx_gt_1548_bytes_packets", CTLFLAG_RD, &priv->pkstats.tx_gt_1548_bytes_packets, 0, "TX Greater Then 1548 Bytes Packets"); for (i = 0; i < priv->tx_ring_num; i++) { tx_ring = priv->tx_ring[i]; snprintf(namebuf, sizeof(namebuf), "tx_ring%d", i); ring_node = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "TX Ring"); ring_list = SYSCTL_CHILDREN(ring_node); SYSCTL_ADD_U64(ctx, ring_list, OID_AUTO, "packets", CTLFLAG_RD, &tx_ring->packets, 0, "TX packets"); SYSCTL_ADD_U64(ctx, ring_list, OID_AUTO, "bytes", CTLFLAG_RD, &tx_ring->bytes, 0, "TX bytes"); SYSCTL_ADD_U64(ctx, ring_list, OID_AUTO, "tso_packets", CTLFLAG_RD, &tx_ring->tso_packets, 0, "TSO packets"); SYSCTL_ADD_U64(ctx, ring_list, OID_AUTO, "defrag_attempts", CTLFLAG_RD, &tx_ring->defrag_attempts, 0, "Oversized chains defragged"); } for (i = 0; i < priv->rx_ring_num; i++) { rx_ring = priv->rx_ring[i]; snprintf(namebuf, sizeof(namebuf), "rx_ring%d", i); ring_node = SYSCTL_ADD_NODE(ctx, node_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "RX Ring"); ring_list = SYSCTL_CHILDREN(ring_node); SYSCTL_ADD_U64(ctx, ring_list, OID_AUTO, "packets", CTLFLAG_RD, &rx_ring->packets, 0, "RX packets"); SYSCTL_ADD_U64(ctx, ring_list, OID_AUTO, "bytes", CTLFLAG_RD, &rx_ring->bytes, 0, "RX bytes"); SYSCTL_ADD_U64(ctx, ring_list, OID_AUTO, "error", CTLFLAG_RD, &rx_ring->errors, 0, "RX soft errors"); } } Index: stable/11/sys/dev/mlx5/mlx5_en/mlx5_en_main.c =================================================================== --- stable/11/sys/dev/mlx5/mlx5_en/mlx5_en_main.c (revision 332287) +++ stable/11/sys/dev/mlx5/mlx5_en/mlx5_en_main.c (revision 332288) @@ -1,3664 +1,3664 @@ /*- * Copyright (c) 2015 Mellanox Technologies. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "en.h" #include #include #define ETH_DRIVER_VERSION "3.1.0-dev" char mlx5e_version[] = "Mellanox Ethernet driver" " (" ETH_DRIVER_VERSION ")"; struct mlx5e_channel_param { struct mlx5e_rq_param rq; struct mlx5e_sq_param sq; struct mlx5e_cq_param rx_cq; struct mlx5e_cq_param tx_cq; }; static const struct { u32 subtype; u64 baudrate; } mlx5e_mode_table[MLX5E_LINK_MODES_NUMBER] = { [MLX5E_1000BASE_CX_SGMII] = { .subtype = IFM_1000_CX_SGMII, .baudrate = IF_Mbps(1000ULL), }, [MLX5E_1000BASE_KX] = { .subtype = IFM_1000_KX, .baudrate = IF_Mbps(1000ULL), }, [MLX5E_10GBASE_CX4] = { .subtype = IFM_10G_CX4, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_KX4] = { .subtype = IFM_10G_KX4, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_KR] = { .subtype = IFM_10G_KR, .baudrate = IF_Gbps(10ULL), }, [MLX5E_20GBASE_KR2] = { .subtype = IFM_20G_KR2, .baudrate = IF_Gbps(20ULL), }, [MLX5E_40GBASE_CR4] = { .subtype = IFM_40G_CR4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_40GBASE_KR4] = { .subtype = IFM_40G_KR4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_56GBASE_R4] = { .subtype = IFM_56G_R4, .baudrate = IF_Gbps(56ULL), }, [MLX5E_10GBASE_CR] = { .subtype = IFM_10G_CR1, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_SR] = { .subtype = IFM_10G_SR, .baudrate = IF_Gbps(10ULL), }, [MLX5E_10GBASE_ER] = { .subtype = IFM_10G_ER, .baudrate = IF_Gbps(10ULL), }, [MLX5E_40GBASE_SR4] = { .subtype = IFM_40G_SR4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_40GBASE_LR4] = { .subtype = IFM_40G_LR4, .baudrate = IF_Gbps(40ULL), }, [MLX5E_100GBASE_CR4] = { .subtype = IFM_100G_CR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100GBASE_SR4] = { .subtype = IFM_100G_SR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100GBASE_KR4] = { .subtype = IFM_100G_KR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100GBASE_LR4] = { .subtype = IFM_100G_LR4, .baudrate = IF_Gbps(100ULL), }, [MLX5E_100BASE_TX] = { .subtype = IFM_100_TX, .baudrate = IF_Mbps(100ULL), }, [MLX5E_1000BASE_T] = { .subtype = IFM_1000_T, .baudrate = IF_Mbps(1000ULL), }, [MLX5E_10GBASE_T] = { .subtype = IFM_10G_T, .baudrate = IF_Gbps(10ULL), }, [MLX5E_25GBASE_CR] = { .subtype = IFM_25G_CR, .baudrate = IF_Gbps(25ULL), }, [MLX5E_25GBASE_KR] = { .subtype = IFM_25G_KR, .baudrate = IF_Gbps(25ULL), }, [MLX5E_25GBASE_SR] = { .subtype = IFM_25G_SR, .baudrate = IF_Gbps(25ULL), }, [MLX5E_50GBASE_CR2] = { .subtype = IFM_50G_CR2, .baudrate = IF_Gbps(50ULL), }, [MLX5E_50GBASE_KR2] = { .subtype = IFM_50G_KR2, .baudrate = IF_Gbps(50ULL), }, }; MALLOC_DEFINE(M_MLX5EN, "MLX5EN", "MLX5 Ethernet"); static void mlx5e_update_carrier(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; u32 out[MLX5_ST_SZ_DW(ptys_reg)]; u32 eth_proto_oper; int error; u8 port_state; u8 i; port_state = mlx5_query_vport_state(mdev, MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0); if (port_state == VPORT_STATE_UP) { priv->media_status_last |= IFM_ACTIVE; } else { priv->media_status_last &= ~IFM_ACTIVE; priv->media_active_last = IFM_ETHER; if_link_state_change(priv->ifp, LINK_STATE_DOWN); return; } error = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); if (error) { priv->media_active_last = IFM_ETHER; priv->ifp->if_baudrate = 1; if_printf(priv->ifp, "%s: query port ptys failed: 0x%x\n", __func__, error); return; } eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); for (i = 0; i != MLX5E_LINK_MODES_NUMBER; i++) { if (mlx5e_mode_table[i].baudrate == 0) continue; if (MLX5E_PROT_MASK(i) & eth_proto_oper) { priv->ifp->if_baudrate = mlx5e_mode_table[i].baudrate; priv->media_active_last = mlx5e_mode_table[i].subtype | IFM_ETHER | IFM_FDX; } } if_link_state_change(priv->ifp, LINK_STATE_UP); } static void mlx5e_media_status(struct ifnet *dev, struct ifmediareq *ifmr) { struct mlx5e_priv *priv = dev->if_softc; ifmr->ifm_status = priv->media_status_last; ifmr->ifm_active = priv->media_active_last | (priv->params.rx_pauseframe_control ? IFM_ETH_RXPAUSE : 0) | (priv->params.tx_pauseframe_control ? IFM_ETH_TXPAUSE : 0); } static u32 mlx5e_find_link_mode(u32 subtype) { u32 i; u32 link_mode = 0; for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { if (mlx5e_mode_table[i].baudrate == 0) continue; if (mlx5e_mode_table[i].subtype == subtype) link_mode |= MLX5E_PROT_MASK(i); } return (link_mode); } static int mlx5e_set_port_pause_and_pfc(struct mlx5e_priv *priv) { return (mlx5_set_port_pause_and_pfc(priv->mdev, 1, priv->params.rx_pauseframe_control, priv->params.tx_pauseframe_control, priv->params.rx_priority_flow_control, priv->params.tx_priority_flow_control)); } static int mlx5e_set_port_pfc(struct mlx5e_priv *priv) { int error; if (priv->params.rx_pauseframe_control || priv->params.tx_pauseframe_control) { if_printf(priv->ifp, "Global pauseframes must be disabled before enabling PFC.\n"); error = -EINVAL; } else { error = mlx5e_set_port_pause_and_pfc(priv); } return (error); } static int mlx5e_media_change(struct ifnet *dev) { struct mlx5e_priv *priv = dev->if_softc; struct mlx5_core_dev *mdev = priv->mdev; u32 eth_proto_cap; u32 link_mode; int was_opened; int locked; int error; locked = PRIV_LOCKED(priv); if (!locked) PRIV_LOCK(priv); if (IFM_TYPE(priv->media.ifm_media) != IFM_ETHER) { error = EINVAL; goto done; } link_mode = mlx5e_find_link_mode(IFM_SUBTYPE(priv->media.ifm_media)); /* query supported capabilities */ error = mlx5_query_port_proto_cap(mdev, ð_proto_cap, MLX5_PTYS_EN); if (error != 0) { if_printf(dev, "Query port media capability failed\n"); goto done; } /* check for autoselect */ if (IFM_SUBTYPE(priv->media.ifm_media) == IFM_AUTO) { link_mode = eth_proto_cap; if (link_mode == 0) { if_printf(dev, "Port media capability is zero\n"); error = EINVAL; goto done; } } else { link_mode = link_mode & eth_proto_cap; if (link_mode == 0) { if_printf(dev, "Not supported link mode requested\n"); error = EINVAL; goto done; } } if (priv->media.ifm_media & (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE)) { /* check if PFC is enabled */ if (priv->params.rx_priority_flow_control || priv->params.tx_priority_flow_control) { if_printf(dev, "PFC must be disabled before enabling global pauseframes.\n"); error = EINVAL; goto done; } } /* update pauseframe control bits */ priv->params.rx_pauseframe_control = (priv->media.ifm_media & IFM_ETH_RXPAUSE) ? 1 : 0; priv->params.tx_pauseframe_control = (priv->media.ifm_media & IFM_ETH_TXPAUSE) ? 1 : 0; /* check if device is opened */ was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); /* reconfigure the hardware */ mlx5_set_port_status(mdev, MLX5_PORT_DOWN); mlx5_set_port_proto(mdev, link_mode, MLX5_PTYS_EN); error = -mlx5e_set_port_pause_and_pfc(priv); if (was_opened) mlx5_set_port_status(mdev, MLX5_PORT_UP); done: if (!locked) PRIV_UNLOCK(priv); return (error); } static void mlx5e_update_carrier_work(struct work_struct *work) { struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, update_carrier_work); PRIV_LOCK(priv); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) mlx5e_update_carrier(priv); PRIV_UNLOCK(priv); } /* * This function reads the physical port counters from the firmware * using a pre-defined layout defined by various MLX5E_PPORT_XXX() * macros. The output is converted from big-endian 64-bit values into * host endian ones and stored in the "priv->stats.pport" structure. */ static void mlx5e_update_pport_counters(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_pport_stats *s = &priv->stats.pport; struct mlx5e_port_stats_debug *s_debug = &priv->stats.port_stats_debug; u32 *in; u32 *out; const u64 *ptr; unsigned sz = MLX5_ST_SZ_BYTES(ppcnt_reg); unsigned x; unsigned y; unsigned z; /* allocate firmware request structures */ in = mlx5_vzalloc(sz); out = mlx5_vzalloc(sz); if (in == NULL || out == NULL) goto free_out; /* * Get pointer to the 64-bit counter set which is located at a * fixed offset in the output firmware request structure: */ ptr = (const uint64_t *)MLX5_ADDR_OF(ppcnt_reg, out, counter_set); MLX5_SET(ppcnt_reg, in, local_port, 1); /* read IEEE802_3 counter group using predefined counter layout */ MLX5_SET(ppcnt_reg, in, grp, MLX5_IEEE_802_3_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); for (x = 0, y = MLX5E_PPORT_PER_PRIO_STATS_NUM; x != MLX5E_PPORT_IEEE802_3_STATS_NUM; x++, y++) s->arg[y] = be64toh(ptr[x]); /* read RFC2819 counter group using predefined counter layout */ MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2819_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); for (x = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM; x++, y++) s->arg[y] = be64toh(ptr[x]); for (y = 0; x != MLX5E_PPORT_RFC2819_STATS_NUM + MLX5E_PPORT_RFC2819_STATS_DEBUG_NUM; x++, y++) s_debug->arg[y] = be64toh(ptr[x]); /* read RFC2863 counter group using predefined counter layout */ MLX5_SET(ppcnt_reg, in, grp, MLX5_RFC_2863_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); for (x = 0; x != MLX5E_PPORT_RFC2863_STATS_DEBUG_NUM; x++, y++) s_debug->arg[y] = be64toh(ptr[x]); /* read physical layer stats counter group using predefined counter layout */ MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); for (x = 0; x != MLX5E_PPORT_PHYSICAL_LAYER_STATS_DEBUG_NUM; x++, y++) s_debug->arg[y] = be64toh(ptr[x]); /* read per-priority counters */ MLX5_SET(ppcnt_reg, in, grp, MLX5_PER_PRIORITY_COUNTERS_GROUP); /* iterate all the priorities */ for (y = z = 0; z != MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO; z++) { MLX5_SET(ppcnt_reg, in, prio_tc, z); mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0); /* read per priority stats counter group using predefined counter layout */ for (x = 0; x != (MLX5E_PPORT_PER_PRIO_STATS_NUM / MLX5E_PPORT_PER_PRIO_STATS_NUM_PRIO); x++, y++) s->arg[y] = be64toh(ptr[x]); } free_out: /* free firmware request structures */ kvfree(in); kvfree(out); } /* * This function is called regularly to collect all statistics * counters from the firmware. The values can be viewed through the * sysctl interface. Execution is serialized using the priv's global * configuration lock. */ static void mlx5e_update_stats_work(struct work_struct *work) { struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, update_stats_work); struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_vport_stats *s = &priv->stats.vport; struct mlx5e_rq_stats *rq_stats; struct mlx5e_sq_stats *sq_stats; struct buf_ring *sq_br; #if (__FreeBSD_version < 1100000) struct ifnet *ifp = priv->ifp; #endif u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; u32 *out; int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); u64 tso_packets = 0; u64 tso_bytes = 0; u64 tx_queue_dropped = 0; u64 tx_defragged = 0; u64 tx_offload_none = 0; u64 lro_packets = 0; u64 lro_bytes = 0; u64 sw_lro_queued = 0; u64 sw_lro_flushed = 0; u64 rx_csum_none = 0; u64 rx_wqe_err = 0; u32 rx_out_of_buffer = 0; int i; int j; PRIV_LOCK(priv); out = mlx5_vzalloc(outlen); if (out == NULL) goto free_out; if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0) goto free_out; /* Collect firts the SW counters and then HW for consistency */ for (i = 0; i < priv->params.num_channels; i++) { struct mlx5e_rq *rq = &priv->channel[i]->rq; rq_stats = &priv->channel[i]->rq.stats; /* collect stats from LRO */ rq_stats->sw_lro_queued = rq->lro.lro_queued; rq_stats->sw_lro_flushed = rq->lro.lro_flushed; sw_lro_queued += rq_stats->sw_lro_queued; sw_lro_flushed += rq_stats->sw_lro_flushed; lro_packets += rq_stats->lro_packets; lro_bytes += rq_stats->lro_bytes; rx_csum_none += rq_stats->csum_none; rx_wqe_err += rq_stats->wqe_err; for (j = 0; j < priv->num_tc; j++) { sq_stats = &priv->channel[i]->sq[j].stats; sq_br = priv->channel[i]->sq[j].br; tso_packets += sq_stats->tso_packets; tso_bytes += sq_stats->tso_bytes; tx_queue_dropped += sq_stats->dropped; if (sq_br != NULL) tx_queue_dropped += sq_br->br_drops; tx_defragged += sq_stats->defragged; tx_offload_none += sq_stats->csum_offload_none; } } /* update counters */ s->tso_packets = tso_packets; s->tso_bytes = tso_bytes; s->tx_queue_dropped = tx_queue_dropped; s->tx_defragged = tx_defragged; s->lro_packets = lro_packets; s->lro_bytes = lro_bytes; s->sw_lro_queued = sw_lro_queued; s->sw_lro_flushed = sw_lro_flushed; s->rx_csum_none = rx_csum_none; s->rx_wqe_err = rx_wqe_err; /* HW counters */ memset(in, 0, sizeof(in)); MLX5_SET(query_vport_counter_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_COUNTER); MLX5_SET(query_vport_counter_in, in, op_mod, 0); MLX5_SET(query_vport_counter_in, in, other_vport, 0); memset(out, 0, outlen); /* get number of out-of-buffer drops first */ if (mlx5_vport_query_out_of_rx_buffer(mdev, priv->counter_set_id, &rx_out_of_buffer)) goto free_out; /* accumulate difference into a 64-bit counter */ s->rx_out_of_buffer += (u64)(u32)(rx_out_of_buffer - s->rx_out_of_buffer_prev); s->rx_out_of_buffer_prev = rx_out_of_buffer; /* get port statistics */ if (mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen)) goto free_out; #define MLX5_GET_CTR(out, x) \ MLX5_GET64(query_vport_counter_out, out, x) s->rx_error_packets = MLX5_GET_CTR(out, received_errors.packets); s->rx_error_bytes = MLX5_GET_CTR(out, received_errors.octets); s->tx_error_packets = MLX5_GET_CTR(out, transmit_errors.packets); s->tx_error_bytes = MLX5_GET_CTR(out, transmit_errors.octets); s->rx_unicast_packets = MLX5_GET_CTR(out, received_eth_unicast.packets); s->rx_unicast_bytes = MLX5_GET_CTR(out, received_eth_unicast.octets); s->tx_unicast_packets = MLX5_GET_CTR(out, transmitted_eth_unicast.packets); s->tx_unicast_bytes = MLX5_GET_CTR(out, transmitted_eth_unicast.octets); s->rx_multicast_packets = MLX5_GET_CTR(out, received_eth_multicast.packets); s->rx_multicast_bytes = MLX5_GET_CTR(out, received_eth_multicast.octets); s->tx_multicast_packets = MLX5_GET_CTR(out, transmitted_eth_multicast.packets); s->tx_multicast_bytes = MLX5_GET_CTR(out, transmitted_eth_multicast.octets); s->rx_broadcast_packets = MLX5_GET_CTR(out, received_eth_broadcast.packets); s->rx_broadcast_bytes = MLX5_GET_CTR(out, received_eth_broadcast.octets); s->tx_broadcast_packets = MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); s->tx_broadcast_bytes = MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); s->rx_packets = s->rx_unicast_packets + s->rx_multicast_packets + s->rx_broadcast_packets - s->rx_out_of_buffer; s->rx_bytes = s->rx_unicast_bytes + s->rx_multicast_bytes + s->rx_broadcast_bytes; s->tx_packets = s->tx_unicast_packets + s->tx_multicast_packets + s->tx_broadcast_packets; s->tx_bytes = s->tx_unicast_bytes + s->tx_multicast_bytes + s->tx_broadcast_bytes; /* Update calculated offload counters */ s->tx_csum_offload = s->tx_packets - tx_offload_none; s->rx_csum_good = s->rx_packets - s->rx_csum_none; /* Get physical port counters */ mlx5e_update_pport_counters(priv); #if (__FreeBSD_version < 1100000) /* no get_counters interface in fbsd 10 */ ifp->if_ipackets = s->rx_packets; ifp->if_ierrors = s->rx_error_packets + priv->stats.pport.alignment_err + priv->stats.pport.check_seq_err + priv->stats.pport.crc_align_errors + priv->stats.pport.in_range_len_errors + priv->stats.pport.jabbers + priv->stats.pport.out_of_range_len + priv->stats.pport.oversize_pkts + priv->stats.pport.symbol_err + priv->stats.pport.too_long_errors + priv->stats.pport.undersize_pkts + priv->stats.pport.unsupported_op_rx; ifp->if_iqdrops = s->rx_out_of_buffer + priv->stats.pport.drop_events; ifp->if_opackets = s->tx_packets; ifp->if_oerrors = s->tx_error_packets; ifp->if_snd.ifq_drops = s->tx_queue_dropped; ifp->if_ibytes = s->rx_bytes; ifp->if_obytes = s->tx_bytes; ifp->if_collisions = priv->stats.pport.collisions; #endif free_out: kvfree(out); /* Update diagnostics, if any */ if (priv->params_ethtool.diag_pci_enable || priv->params_ethtool.diag_general_enable) { int error = mlx5_core_get_diagnostics_full(mdev, priv->params_ethtool.diag_pci_enable ? &priv->params_pci : NULL, priv->params_ethtool.diag_general_enable ? &priv->params_general : NULL); if (error != 0) if_printf(priv->ifp, "Failed reading diagnostics: %d\n", error); } PRIV_UNLOCK(priv); } static void mlx5e_update_stats(void *arg) { struct mlx5e_priv *priv = arg; queue_work(priv->wq, &priv->update_stats_work); callout_reset(&priv->watchdog, hz, &mlx5e_update_stats, priv); } static void mlx5e_async_event_sub(struct mlx5e_priv *priv, enum mlx5_dev_event event) { switch (event) { case MLX5_DEV_EVENT_PORT_UP: case MLX5_DEV_EVENT_PORT_DOWN: queue_work(priv->wq, &priv->update_carrier_work); break; default: break; } } static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, enum mlx5_dev_event event, unsigned long param) { struct mlx5e_priv *priv = vpriv; mtx_lock(&priv->async_events_mtx); if (test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state)) mlx5e_async_event_sub(priv, event); mtx_unlock(&priv->async_events_mtx); } static void mlx5e_enable_async_events(struct mlx5e_priv *priv) { set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); } static void mlx5e_disable_async_events(struct mlx5e_priv *priv) { mtx_lock(&priv->async_events_mtx); clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); mtx_unlock(&priv->async_events_mtx); } static const char *mlx5e_rq_stats_desc[] = { MLX5E_RQ_STATS(MLX5E_STATS_DESC) }; static int mlx5e_create_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) { struct mlx5e_priv *priv = c->priv; struct mlx5_core_dev *mdev = priv->mdev; char buffer[16]; void *rqc = param->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); int wq_sz; int err; int i; /* Create DMA descriptor TAG */ if ((err = -bus_dma_tag_create( bus_get_dma_tag(mdev->pdev->dev.bsddev), 1, /* any alignment */ 0, /* no boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM16BYTES, /* maxsize */ 1, /* nsegments */ MJUM16BYTES, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &rq->dma_tag))) goto done; err = mlx5_wq_ll_create(mdev, ¶m->wq, rqc_wq, &rq->wq, &rq->wq_ctrl); if (err) goto err_free_dma_tag; rq->wq.db = &rq->wq.db[MLX5_RCV_DBR]; if (priv->params.hw_lro_en) { rq->wqe_sz = priv->params.lro_wqe_sz; } else { rq->wqe_sz = MLX5E_SW2MB_MTU(priv->ifp->if_mtu); } if (rq->wqe_sz > MJUM16BYTES) { err = -ENOMEM; goto err_rq_wq_destroy; } else if (rq->wqe_sz > MJUM9BYTES) { rq->wqe_sz = MJUM16BYTES; } else if (rq->wqe_sz > MJUMPAGESIZE) { rq->wqe_sz = MJUM9BYTES; } else if (rq->wqe_sz > MCLBYTES) { rq->wqe_sz = MJUMPAGESIZE; } else { rq->wqe_sz = MCLBYTES; } wq_sz = mlx5_wq_ll_get_size(&rq->wq); err = -tcp_lro_init_args(&rq->lro, c->ifp, TCP_LRO_ENTRIES, wq_sz); if (err) goto err_rq_wq_destroy; rq->mbuf = malloc(wq_sz * sizeof(rq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO); for (i = 0; i != wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); uint32_t byte_count = rq->wqe_sz - MLX5E_NET_IP_ALIGN; err = -bus_dmamap_create(rq->dma_tag, 0, &rq->mbuf[i].dma_map); if (err != 0) { while (i--) bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map); goto err_rq_mbuf_free; } wqe->data.lkey = c->mkey_be; wqe->data.byte_count = cpu_to_be32(byte_count | MLX5_HW_START_PADDING); } rq->ifp = c->ifp; rq->channel = c; rq->ix = c->ix; snprintf(buffer, sizeof(buffer), "rxstat%d", c->ix); mlx5e_create_stats(&rq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), buffer, mlx5e_rq_stats_desc, MLX5E_RQ_STATS_NUM, rq->stats.arg); return (0); err_rq_mbuf_free: free(rq->mbuf, M_MLX5EN); tcp_lro_free(&rq->lro); err_rq_wq_destroy: mlx5_wq_destroy(&rq->wq_ctrl); err_free_dma_tag: bus_dma_tag_destroy(rq->dma_tag); done: return (err); } static void mlx5e_destroy_rq(struct mlx5e_rq *rq) { int wq_sz; int i; /* destroy all sysctl nodes */ sysctl_ctx_free(&rq->stats.ctx); /* free leftover LRO packets, if any */ tcp_lro_free(&rq->lro); wq_sz = mlx5_wq_ll_get_size(&rq->wq); for (i = 0; i != wq_sz; i++) { if (rq->mbuf[i].mbuf != NULL) { bus_dmamap_unload(rq->dma_tag, rq->mbuf[i].dma_map); m_freem(rq->mbuf[i].mbuf); } bus_dmamap_destroy(rq->dma_tag, rq->mbuf[i].dma_map); } free(rq->mbuf, M_MLX5EN); mlx5_wq_destroy(&rq->wq_ctrl); } static int mlx5e_enable_rq(struct mlx5e_rq *rq, struct mlx5e_rq_param *param) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; struct mlx5_core_dev *mdev = priv->mdev; void *in; void *rqc; void *wq; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(create_rq_in) + sizeof(u64) * rq->wq_ctrl.buf.npages; in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); rqc = MLX5_ADDR_OF(create_rq_in, in, ctx); wq = MLX5_ADDR_OF(rqc, rqc, wq); memcpy(rqc, param->rqc, sizeof(param->rqc)); MLX5_SET(rqc, rqc, cqn, c->rq.cq.mcq.cqn); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST); MLX5_SET(rqc, rqc, flush_in_error_en, 1); if (priv->counter_set_id >= 0) MLX5_SET(rqc, rqc, counter_set_id, priv->counter_set_id); MLX5_SET(wq, wq, log_wq_pg_sz, rq->wq_ctrl.buf.page_shift - PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma); mlx5_fill_page_array(&rq->wq_ctrl.buf, (__be64 *) MLX5_ADDR_OF(wq, wq, pas)); err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn); kvfree(in); return (err); } static int mlx5e_modify_rq(struct mlx5e_rq *rq, int curr_state, int next_state) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; struct mlx5_core_dev *mdev = priv->mdev; void *in; void *rqc; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(modify_rq_in); in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); MLX5_SET(modify_rq_in, in, rqn, rq->rqn); MLX5_SET(modify_rq_in, in, rq_state, curr_state); MLX5_SET(rqc, rqc, state, next_state); err = mlx5_core_modify_rq(mdev, in, inlen); kvfree(in); return (err); } static void mlx5e_disable_rq(struct mlx5e_rq *rq) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; struct mlx5_core_dev *mdev = priv->mdev; mlx5_core_destroy_rq(mdev, rq->rqn); } static int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq) { struct mlx5e_channel *c = rq->channel; struct mlx5e_priv *priv = c->priv; struct mlx5_wq_ll *wq = &rq->wq; int i; for (i = 0; i < 1000; i++) { if (wq->cur_sz >= priv->params.min_rx_wqes) return (0); msleep(4); } return (-ETIMEDOUT); } static int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) { int err; err = mlx5e_create_rq(c, param, rq); if (err) return (err); err = mlx5e_enable_rq(rq, param); if (err) goto err_destroy_rq; err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); if (err) goto err_disable_rq; c->rq.enabled = 1; return (0); err_disable_rq: mlx5e_disable_rq(rq); err_destroy_rq: mlx5e_destroy_rq(rq); return (err); } static void mlx5e_close_rq(struct mlx5e_rq *rq) { mtx_lock(&rq->mtx); rq->enabled = 0; callout_stop(&rq->watchdog); mtx_unlock(&rq->mtx); callout_drain(&rq->watchdog); mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); } static void mlx5e_close_rq_wait(struct mlx5e_rq *rq) { struct mlx5_core_dev *mdev = rq->channel->priv->mdev; /* wait till RQ is empty */ while (!mlx5_wq_ll_is_empty(&rq->wq) && (mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)) { msleep(4); rq->cq.mcq.comp(&rq->cq.mcq); } mlx5e_disable_rq(rq); mlx5e_destroy_rq(rq); } void mlx5e_free_sq_db(struct mlx5e_sq *sq) { int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); int x; for (x = 0; x != wq_sz; x++) bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map); free(sq->mbuf, M_MLX5EN); } int mlx5e_alloc_sq_db(struct mlx5e_sq *sq) { int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); int err; int x; sq->mbuf = malloc(wq_sz * sizeof(sq->mbuf[0]), M_MLX5EN, M_WAITOK | M_ZERO); /* Create DMA descriptor MAPs */ for (x = 0; x != wq_sz; x++) { err = -bus_dmamap_create(sq->dma_tag, 0, &sq->mbuf[x].dma_map); if (err != 0) { while (x--) bus_dmamap_destroy(sq->dma_tag, sq->mbuf[x].dma_map); free(sq->mbuf, M_MLX5EN); return (err); } } return (0); } static const char *mlx5e_sq_stats_desc[] = { MLX5E_SQ_STATS(MLX5E_STATS_DESC) }; static int mlx5e_create_sq(struct mlx5e_channel *c, int tc, struct mlx5e_sq_param *param, struct mlx5e_sq *sq) { struct mlx5e_priv *priv = c->priv; struct mlx5_core_dev *mdev = priv->mdev; char buffer[16]; void *sqc = param->sqc; void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); #ifdef RSS cpuset_t cpu_mask; int cpu_id; #endif int err; /* Create DMA descriptor TAG */ if ((err = -bus_dma_tag_create( bus_get_dma_tag(mdev->pdev->dev.bsddev), 1, /* any alignment */ 0, /* no boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MLX5E_MAX_TX_PAYLOAD_SIZE, /* maxsize */ MLX5E_MAX_TX_MBUF_FRAGS, /* nsegments */ MLX5E_MAX_TX_MBUF_SIZE, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockfuncarg */ &sq->dma_tag))) goto done; err = mlx5_alloc_map_uar(mdev, &sq->uar); if (err) goto err_free_dma_tag; err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl); if (err) goto err_unmap_free_uar; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; err = mlx5e_alloc_sq_db(sq); if (err) goto err_sq_wq_destroy; sq->mkey_be = c->mkey_be; sq->ifp = priv->ifp; sq->priv = priv; sq->tc = tc; /* check if we should allocate a second packet buffer */ if (priv->params_ethtool.tx_bufring_disable == 0) { sq->br = buf_ring_alloc(MLX5E_SQ_TX_QUEUE_SIZE, M_MLX5EN, M_WAITOK, &sq->lock); if (sq->br == NULL) { if_printf(c->ifp, "%s: Failed allocating sq drbr buffer\n", __func__); err = -ENOMEM; goto err_free_sq_db; } sq->sq_tq = taskqueue_create_fast("mlx5e_que", M_WAITOK, taskqueue_thread_enqueue, &sq->sq_tq); if (sq->sq_tq == NULL) { if_printf(c->ifp, "%s: Failed allocating taskqueue\n", __func__); err = -ENOMEM; goto err_free_drbr; } TASK_INIT(&sq->sq_task, 0, mlx5e_tx_que, sq); #ifdef RSS cpu_id = rss_getcpu(c->ix % rss_getnumbuckets()); CPU_SETOF(cpu_id, &cpu_mask); taskqueue_start_threads_cpuset(&sq->sq_tq, 1, PI_NET, &cpu_mask, "%s TX SQ%d.%d CPU%d", c->ifp->if_xname, c->ix, tc, cpu_id); #else taskqueue_start_threads(&sq->sq_tq, 1, PI_NET, "%s TX SQ%d.%d", c->ifp->if_xname, c->ix, tc); #endif } snprintf(buffer, sizeof(buffer), "txstat%dtc%d", c->ix, tc); mlx5e_create_stats(&sq->stats.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), buffer, mlx5e_sq_stats_desc, MLX5E_SQ_STATS_NUM, sq->stats.arg); return (0); err_free_drbr: buf_ring_free(sq->br, M_MLX5EN); err_free_sq_db: mlx5e_free_sq_db(sq); err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); err_unmap_free_uar: mlx5_unmap_free_uar(mdev, &sq->uar); err_free_dma_tag: bus_dma_tag_destroy(sq->dma_tag); done: return (err); } static void mlx5e_destroy_sq(struct mlx5e_sq *sq) { /* destroy all sysctl nodes */ sysctl_ctx_free(&sq->stats.ctx); mlx5e_free_sq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); mlx5_unmap_free_uar(sq->priv->mdev, &sq->uar); if (sq->sq_tq != NULL) { taskqueue_drain(sq->sq_tq, &sq->sq_task); taskqueue_free(sq->sq_tq); } if (sq->br != NULL) buf_ring_free(sq->br, M_MLX5EN); } int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param, int tis_num) { void *in; void *sqc; void *wq; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(create_sq_in) + sizeof(u64) * sq->wq_ctrl.buf.npages; in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); sqc = MLX5_ADDR_OF(create_sq_in, in, ctx); wq = MLX5_ADDR_OF(sqc, sqc, wq); memcpy(sqc, param->sqc, sizeof(param->sqc)); MLX5_SET(sqc, sqc, tis_num_0, tis_num); MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); MLX5_SET(sqc, sqc, tis_lst_sz, 1); MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); MLX5_SET(wq, wq, uar_page, sq->uar.index); MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); mlx5_fill_page_array(&sq->wq_ctrl.buf, (__be64 *) MLX5_ADDR_OF(wq, wq, pas)); err = mlx5_core_create_sq(sq->priv->mdev, in, inlen, &sq->sqn); kvfree(in); return (err); } int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state) { void *in; void *sqc; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(modify_sq_in); in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); MLX5_SET(modify_sq_in, in, sqn, sq->sqn); MLX5_SET(modify_sq_in, in, sq_state, curr_state); MLX5_SET(sqc, sqc, state, next_state); err = mlx5_core_modify_sq(sq->priv->mdev, in, inlen); kvfree(in); return (err); } void mlx5e_disable_sq(struct mlx5e_sq *sq) { mlx5_core_destroy_sq(sq->priv->mdev, sq->sqn); } static int mlx5e_open_sq(struct mlx5e_channel *c, int tc, struct mlx5e_sq_param *param, struct mlx5e_sq *sq) { int err; err = mlx5e_create_sq(c, tc, param, sq); if (err) return (err); err = mlx5e_enable_sq(sq, param, c->priv->tisn[tc]); if (err) goto err_destroy_sq; err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); if (err) goto err_disable_sq; atomic_store_rel_int(&sq->queue_state, MLX5E_SQ_READY); return (0); err_disable_sq: mlx5e_disable_sq(sq); err_destroy_sq: mlx5e_destroy_sq(sq); return (err); } static void mlx5e_sq_send_nops_locked(struct mlx5e_sq *sq, int can_sleep) { /* fill up remainder with NOPs */ while (sq->cev_counter != 0) { while (!mlx5e_sq_has_room_for(sq, 1)) { if (can_sleep != 0) { mtx_unlock(&sq->lock); msleep(4); mtx_lock(&sq->lock); } else { goto done; } } /* send a single NOP */ mlx5e_send_nop(sq, 1); atomic_thread_fence_rel(); } done: /* Check if we need to write the doorbell */ if (likely(sq->doorbell.d64 != 0)) { mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0); sq->doorbell.d64 = 0; } } void mlx5e_sq_cev_timeout(void *arg) { struct mlx5e_sq *sq = arg; mtx_assert(&sq->lock, MA_OWNED); /* check next state */ switch (sq->cev_next_state) { case MLX5E_CEV_STATE_SEND_NOPS: /* fill TX ring with NOPs, if any */ mlx5e_sq_send_nops_locked(sq, 0); /* check if completed */ if (sq->cev_counter == 0) { sq->cev_next_state = MLX5E_CEV_STATE_INITIAL; return; } break; default: /* send NOPs on next timeout */ sq->cev_next_state = MLX5E_CEV_STATE_SEND_NOPS; break; } /* restart timer */ callout_reset_curcpu(&sq->cev_callout, hz, mlx5e_sq_cev_timeout, sq); } void mlx5e_drain_sq(struct mlx5e_sq *sq) { int error; struct mlx5_core_dev *mdev= sq->priv->mdev; /* * Check if already stopped. * * NOTE: The "stopped" variable is only written when both the * priv's configuration lock and the SQ's lock is locked. It * can therefore safely be read when only one of the two locks * is locked. This function is always called when the priv's * configuration lock is locked. */ if (sq->stopped != 0) return; mtx_lock(&sq->lock); /* don't put more packets into the SQ */ sq->stopped = 1; /* teardown event factor timer, if any */ sq->cev_next_state = MLX5E_CEV_STATE_HOLD_NOPS; callout_stop(&sq->cev_callout); /* send dummy NOPs in order to flush the transmit ring */ mlx5e_sq_send_nops_locked(sq, 1); mtx_unlock(&sq->lock); /* make sure it is safe to free the callout */ callout_drain(&sq->cev_callout); /* wait till SQ is empty or link is down */ mtx_lock(&sq->lock); while (sq->cc != sq->pc && (sq->priv->media_status_last & IFM_ACTIVE) != 0 && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) { mtx_unlock(&sq->lock); msleep(1); sq->cq.mcq.comp(&sq->cq.mcq); mtx_lock(&sq->lock); } mtx_unlock(&sq->lock); /* error out remaining requests */ error = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR); if (error != 0) { if_printf(sq->ifp, "mlx5e_modify_sq() from RDY to ERR failed: %d\n", error); } /* wait till SQ is empty */ mtx_lock(&sq->lock); while (sq->cc != sq->pc && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) { mtx_unlock(&sq->lock); msleep(1); sq->cq.mcq.comp(&sq->cq.mcq); mtx_lock(&sq->lock); } mtx_unlock(&sq->lock); } static void mlx5e_close_sq_wait(struct mlx5e_sq *sq) { mlx5e_drain_sq(sq); mlx5e_disable_sq(sq); mlx5e_destroy_sq(sq); } static int mlx5e_create_cq(struct mlx5e_priv *priv, struct mlx5e_cq_param *param, struct mlx5e_cq *cq, mlx5e_cq_comp_t *comp, int eq_ix) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_core_cq *mcq = &cq->mcq; int eqn_not_used; int irqn; int err; u32 i; param->wq.buf_numa_node = 0; param->wq.db_numa_node = 0; err = mlx5_cqwq_create(mdev, ¶m->wq, param->cqc, &cq->wq, &cq->wq_ctrl); if (err) return (err); mlx5_vector2eqn(mdev, eq_ix, &eqn_not_used, &irqn); mcq->cqe_sz = 64; mcq->set_ci_db = cq->wq_ctrl.db.db; mcq->arm_db = cq->wq_ctrl.db.db + 1; *mcq->set_ci_db = 0; *mcq->arm_db = 0; mcq->vector = eq_ix; mcq->comp = comp; mcq->event = mlx5e_cq_error_event; mcq->irqn = irqn; mcq->uar = &priv->cq_uar; for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); cqe->op_own = 0xf1; } cq->priv = priv; return (0); } static void mlx5e_destroy_cq(struct mlx5e_cq *cq) { mlx5_wq_destroy(&cq->wq_ctrl); } static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param, int eq_ix) { struct mlx5_core_cq *mcq = &cq->mcq; void *in; void *cqc; int inlen; int irqn_not_used; int eqn; int err; inlen = MLX5_ST_SZ_BYTES(create_cq_in) + sizeof(u64) * cq->wq_ctrl.buf.npages; in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context); memcpy(cqc, param->cqc, sizeof(param->cqc)); mlx5_fill_page_array(&cq->wq_ctrl.buf, (__be64 *) MLX5_ADDR_OF(create_cq_in, in, pas)); mlx5_vector2eqn(cq->priv->mdev, eq_ix, &eqn, &irqn_not_used); MLX5_SET(cqc, cqc, c_eqn, eqn); MLX5_SET(cqc, cqc, uar_page, mcq->uar->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - PAGE_SHIFT); MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); err = mlx5_core_create_cq(cq->priv->mdev, mcq, in, inlen); kvfree(in); if (err) return (err); mlx5e_cq_arm(cq, MLX5_GET_DOORBELL_LOCK(&cq->priv->doorbell_lock)); return (0); } static void mlx5e_disable_cq(struct mlx5e_cq *cq) { mlx5_core_destroy_cq(cq->priv->mdev, &cq->mcq); } int mlx5e_open_cq(struct mlx5e_priv *priv, struct mlx5e_cq_param *param, struct mlx5e_cq *cq, mlx5e_cq_comp_t *comp, int eq_ix) { int err; err = mlx5e_create_cq(priv, param, cq, comp, eq_ix); if (err) return (err); err = mlx5e_enable_cq(cq, param, eq_ix); if (err) goto err_destroy_cq; return (0); err_destroy_cq: mlx5e_destroy_cq(cq); return (err); } void mlx5e_close_cq(struct mlx5e_cq *cq) { mlx5e_disable_cq(cq); mlx5e_destroy_cq(cq); } static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, struct mlx5e_channel_param *cparam) { int err; int tc; for (tc = 0; tc < c->num_tc; tc++) { /* open completion queue */ err = mlx5e_open_cq(c->priv, &cparam->tx_cq, &c->sq[tc].cq, &mlx5e_tx_cq_comp, c->ix); if (err) goto err_close_tx_cqs; } return (0); err_close_tx_cqs: for (tc--; tc >= 0; tc--) mlx5e_close_cq(&c->sq[tc].cq); return (err); } static void mlx5e_close_tx_cqs(struct mlx5e_channel *c) { int tc; for (tc = 0; tc < c->num_tc; tc++) mlx5e_close_cq(&c->sq[tc].cq); } static int mlx5e_open_sqs(struct mlx5e_channel *c, struct mlx5e_channel_param *cparam) { int err; int tc; for (tc = 0; tc < c->num_tc; tc++) { err = mlx5e_open_sq(c, tc, &cparam->sq, &c->sq[tc]); if (err) goto err_close_sqs; } return (0); err_close_sqs: for (tc--; tc >= 0; tc--) mlx5e_close_sq_wait(&c->sq[tc]); return (err); } static void mlx5e_close_sqs_wait(struct mlx5e_channel *c) { int tc; for (tc = 0; tc < c->num_tc; tc++) mlx5e_close_sq_wait(&c->sq[tc]); } static void mlx5e_chan_mtx_init(struct mlx5e_channel *c) { int tc; mtx_init(&c->rq.mtx, "mlx5rx", MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&c->rq.watchdog, &c->rq.mtx, 0); for (tc = 0; tc < c->num_tc; tc++) { struct mlx5e_sq *sq = c->sq + tc; mtx_init(&sq->lock, "mlx5tx", MTX_NETWORK_LOCK " TX", MTX_DEF); mtx_init(&sq->comp_lock, "mlx5comp", MTX_NETWORK_LOCK " TX", MTX_DEF); callout_init_mtx(&sq->cev_callout, &sq->lock, 0); sq->cev_factor = c->priv->params_ethtool.tx_completion_fact; /* ensure the TX completion event factor is not zero */ if (sq->cev_factor == 0) sq->cev_factor = 1; } } static void mlx5e_chan_mtx_destroy(struct mlx5e_channel *c) { int tc; mtx_destroy(&c->rq.mtx); for (tc = 0; tc < c->num_tc; tc++) { mtx_destroy(&c->sq[tc].lock); mtx_destroy(&c->sq[tc].comp_lock); } } static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_channel_param *cparam, struct mlx5e_channel *volatile *cp) { struct mlx5e_channel *c; int err; c = malloc(sizeof(*c), M_MLX5EN, M_WAITOK | M_ZERO); c->priv = priv; c->ix = ix; c->cpu = 0; c->ifp = priv->ifp; c->mkey_be = cpu_to_be32(priv->mr.key); c->num_tc = priv->num_tc; /* init mutexes */ mlx5e_chan_mtx_init(c); /* open transmit completion queue */ err = mlx5e_open_tx_cqs(c, cparam); if (err) goto err_free; /* open receive completion queue */ err = mlx5e_open_cq(c->priv, &cparam->rx_cq, &c->rq.cq, &mlx5e_rx_cq_comp, c->ix); if (err) goto err_close_tx_cqs; err = mlx5e_open_sqs(c, cparam); if (err) goto err_close_rx_cq; err = mlx5e_open_rq(c, &cparam->rq, &c->rq); if (err) goto err_close_sqs; /* store channel pointer */ *cp = c; /* poll receive queue initially */ c->rq.cq.mcq.comp(&c->rq.cq.mcq); return (0); err_close_sqs: mlx5e_close_sqs_wait(c); err_close_rx_cq: mlx5e_close_cq(&c->rq.cq); err_close_tx_cqs: mlx5e_close_tx_cqs(c); err_free: /* destroy mutexes */ mlx5e_chan_mtx_destroy(c); free(c, M_MLX5EN); return (err); } static void mlx5e_close_channel(struct mlx5e_channel *volatile *pp) { struct mlx5e_channel *c = *pp; /* check if channel is already closed */ if (c == NULL) return; mlx5e_close_rq(&c->rq); } static void mlx5e_close_channel_wait(struct mlx5e_channel *volatile *pp) { struct mlx5e_channel *c = *pp; /* check if channel is already closed */ if (c == NULL) return; /* ensure channel pointer is no longer used */ *pp = NULL; mlx5e_close_rq_wait(&c->rq); mlx5e_close_sqs_wait(c); mlx5e_close_cq(&c->rq.cq); mlx5e_close_tx_cqs(c); /* destroy mutexes */ mlx5e_chan_mtx_destroy(c); free(c, M_MLX5EN); } static void mlx5e_build_rq_param(struct mlx5e_priv *priv, struct mlx5e_rq_param *param) { void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size); MLX5_SET(wq, wq, pd, priv->pdn); param->wq.buf_numa_node = 0; param->wq.db_numa_node = 0; param->wq.linear = 1; } static void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_sq_param *param) { void *sqc = param->sqc; void *wq = MLX5_ADDR_OF(sqc, sqc, wq); MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); MLX5_SET(wq, wq, pd, priv->pdn); param->wq.buf_numa_node = 0; param->wq.db_numa_node = 0; param->wq.linear = 1; } static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, struct mlx5e_cq_param *param) { void *cqc = param->cqc; MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index); } static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, struct mlx5e_cq_param *param) { void *cqc = param->cqc; /* * TODO The sysctl to control on/off is a bool value for now, which means * we only support CSUM, once HASH is implemnted we'll need to address that. */ if (priv->params.cqe_zipping_en) { MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM); MLX5_SET(cqc, cqc, cqe_compression_en, 1); } MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_rq_size); MLX5_SET(cqc, cqc, cq_period, priv->params.rx_cq_moderation_usec); MLX5_SET(cqc, cqc, cq_max_count, priv->params.rx_cq_moderation_pkts); switch (priv->params.rx_cq_moderation_mode) { case 0: MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; default: if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE); else MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; } mlx5e_build_common_cq_param(priv, param); } static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, struct mlx5e_cq_param *param) { void *cqc = param->cqc; MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size); MLX5_SET(cqc, cqc, cq_period, priv->params.tx_cq_moderation_usec); MLX5_SET(cqc, cqc, cq_max_count, priv->params.tx_cq_moderation_pkts); switch (priv->params.tx_cq_moderation_mode) { case 0: MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; default: if (MLX5_CAP_GEN(priv->mdev, cq_period_start_from_cqe)) MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_CQE); else MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); break; } mlx5e_build_common_cq_param(priv, param); } static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam) { memset(cparam, 0, sizeof(*cparam)); mlx5e_build_rq_param(priv, &cparam->rq); mlx5e_build_sq_param(priv, &cparam->sq); mlx5e_build_rx_cq_param(priv, &cparam->rx_cq); mlx5e_build_tx_cq_param(priv, &cparam->tx_cq); } static int mlx5e_open_channels(struct mlx5e_priv *priv) { struct mlx5e_channel_param cparam; void *ptr; int err; int i; int j; priv->channel = malloc(priv->params.num_channels * sizeof(struct mlx5e_channel *), M_MLX5EN, M_WAITOK | M_ZERO); mlx5e_build_channel_param(priv, &cparam); for (i = 0; i < priv->params.num_channels; i++) { err = mlx5e_open_channel(priv, i, &cparam, &priv->channel[i]); if (err) goto err_close_channels; } for (j = 0; j < priv->params.num_channels; j++) { err = mlx5e_wait_for_min_rx_wqes(&priv->channel[j]->rq); if (err) goto err_close_channels; } return (0); err_close_channels: for (i--; i >= 0; i--) { mlx5e_close_channel(&priv->channel[i]); mlx5e_close_channel_wait(&priv->channel[i]); } /* remove "volatile" attribute from "channel" pointer */ ptr = __DECONST(void *, priv->channel); priv->channel = NULL; free(ptr, M_MLX5EN); return (err); } static void mlx5e_close_channels(struct mlx5e_priv *priv) { void *ptr; int i; if (priv->channel == NULL) return; for (i = 0; i < priv->params.num_channels; i++) mlx5e_close_channel(&priv->channel[i]); for (i = 0; i < priv->params.num_channels; i++) mlx5e_close_channel_wait(&priv->channel[i]); /* remove "volatile" attribute from "channel" pointer */ ptr = __DECONST(void *, priv->channel); priv->channel = NULL; free(ptr, M_MLX5EN); } static int mlx5e_refresh_sq_params(struct mlx5e_priv *priv, struct mlx5e_sq *sq) { if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) { uint8_t cq_mode; switch (priv->params.tx_cq_moderation_mode) { case 0: cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE; break; default: cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE; break; } return (mlx5_core_modify_cq_moderation_mode(priv->mdev, &sq->cq.mcq, priv->params.tx_cq_moderation_usec, priv->params.tx_cq_moderation_pkts, cq_mode)); } return (mlx5_core_modify_cq_moderation(priv->mdev, &sq->cq.mcq, priv->params.tx_cq_moderation_usec, priv->params.tx_cq_moderation_pkts)); } static int mlx5e_refresh_rq_params(struct mlx5e_priv *priv, struct mlx5e_rq *rq) { if (MLX5_CAP_GEN(priv->mdev, cq_period_mode_modify)) { uint8_t cq_mode; int retval; switch (priv->params.rx_cq_moderation_mode) { case 0: cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE; break; default: cq_mode = MLX5_CQ_PERIOD_MODE_START_FROM_CQE; break; } retval = mlx5_core_modify_cq_moderation_mode(priv->mdev, &rq->cq.mcq, priv->params.rx_cq_moderation_usec, priv->params.rx_cq_moderation_pkts, cq_mode); return (retval); } return (mlx5_core_modify_cq_moderation(priv->mdev, &rq->cq.mcq, priv->params.rx_cq_moderation_usec, priv->params.rx_cq_moderation_pkts)); } static int mlx5e_refresh_channel_params_sub(struct mlx5e_priv *priv, struct mlx5e_channel *c) { int err; int i; if (c == NULL) return (EINVAL); err = mlx5e_refresh_rq_params(priv, &c->rq); if (err) goto done; for (i = 0; i != c->num_tc; i++) { err = mlx5e_refresh_sq_params(priv, &c->sq[i]); if (err) goto done; } done: return (err); } int mlx5e_refresh_channel_params(struct mlx5e_priv *priv) { int i; if (priv->channel == NULL) return (EINVAL); for (i = 0; i < priv->params.num_channels; i++) { int err; err = mlx5e_refresh_channel_params_sub(priv, priv->channel[i]); if (err) return (err); } return (0); } static int mlx5e_open_tis(struct mlx5e_priv *priv, int tc) { struct mlx5_core_dev *mdev = priv->mdev; u32 in[MLX5_ST_SZ_DW(create_tis_in)]; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); memset(in, 0, sizeof(in)); MLX5_SET(tisc, tisc, prio, tc); MLX5_SET(tisc, tisc, transport_domain, priv->tdn); return (mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc])); } static void mlx5e_close_tis(struct mlx5e_priv *priv, int tc) { mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]); } static int mlx5e_open_tises(struct mlx5e_priv *priv) { int num_tc = priv->num_tc; int err; int tc; for (tc = 0; tc < num_tc; tc++) { err = mlx5e_open_tis(priv, tc); if (err) goto err_close_tises; } return (0); err_close_tises: for (tc--; tc >= 0; tc--) mlx5e_close_tis(priv, tc); return (err); } static void mlx5e_close_tises(struct mlx5e_priv *priv) { int num_tc = priv->num_tc; int tc; for (tc = 0; tc < num_tc; tc++) mlx5e_close_tis(priv, tc); } static int mlx5e_open_rqt(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; u32 *in; u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0}; void *rqtc; int inlen; int err; int sz; int i; sz = 1 << priv->params.rx_hash_log_tbl_sz; inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + sizeof(u32) * sz; in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context); MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); MLX5_SET(rqtc, rqtc, rqt_max_size, sz); for (i = 0; i < sz; i++) { int ix; #ifdef RSS ix = rss_get_indirection_to_bucket(i); #else ix = i; #endif /* ensure we don't overflow */ ix %= priv->params.num_channels; MLX5_SET(rqtc, rqtc, rq_num[i], priv->channel[ix]->rq.rqn); } MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT); err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); if (!err) priv->rqtn = MLX5_GET(create_rqt_out, out, rqtn); kvfree(in); return (err); } static void mlx5e_close_rqt(struct mlx5e_priv *priv) { u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0}; u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0}; MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); MLX5_SET(destroy_rqt_in, in, rqtn, priv->rqtn); mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out)); } static void mlx5e_build_tir_ctx(struct mlx5e_priv *priv, u32 * tirc, int tt) { void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); __be32 *hkey; MLX5_SET(tirc, tirc, transport_domain, priv->tdn); #define ROUGH_MAX_L2_L3_HDR_SZ 256 #define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP) #define MLX5_HASH_ALL (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP |\ MLX5_HASH_FIELD_SEL_L4_SPORT |\ MLX5_HASH_FIELD_SEL_L4_DPORT) #define MLX5_HASH_IP_IPSEC_SPI (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP |\ MLX5_HASH_FIELD_SEL_IPSEC_SPI) if (priv->params.hw_lro_en) { MLX5_SET(tirc, tirc, lro_enable_mask, MLX5_TIRC_LRO_ENABLE_MASK_IPV4_LRO | MLX5_TIRC_LRO_ENABLE_MASK_IPV6_LRO); MLX5_SET(tirc, tirc, lro_max_msg_sz, (priv->params.lro_wqe_sz - ROUGH_MAX_L2_L3_HDR_SZ) >> 8); /* TODO: add the option to choose timer value dynamically */ MLX5_SET(tirc, tirc, lro_timeout_period_usecs, MLX5_CAP_ETH(priv->mdev, lro_timer_supported_periods[2])); } /* setup parameters for hashing TIR type, if any */ switch (tt) { case MLX5E_TT_ANY: MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_DIRECT); MLX5_SET(tirc, tirc, inline_rqn, priv->channel[0]->rq.rqn); break; default: MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, priv->rqtn); MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_TIRC_RX_HASH_FN_HASH_TOEPLITZ); hkey = (__be32 *) MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key); #ifdef RSS /* * The FreeBSD RSS implementation does currently not * support symmetric Toeplitz hashes: */ MLX5_SET(tirc, tirc, rx_hash_symmetric, 0); rss_getkey((uint8_t *)hkey); #else MLX5_SET(tirc, tirc, rx_hash_symmetric, 1); hkey[0] = cpu_to_be32(0xD181C62C); hkey[1] = cpu_to_be32(0xF7F4DB5B); hkey[2] = cpu_to_be32(0x1983A2FC); hkey[3] = cpu_to_be32(0x943E1ADB); hkey[4] = cpu_to_be32(0xD9389E6B); hkey[5] = cpu_to_be32(0xD1039C2C); hkey[6] = cpu_to_be32(0xA74499AD); hkey[7] = cpu_to_be32(0x593D56D9); hkey[8] = cpu_to_be32(0xF3253C06); hkey[9] = cpu_to_be32(0x2ADC1FFC); #endif break; } switch (tt) { case MLX5E_TT_IPV4_TCP: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); #ifdef RSS if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV4)) { MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP); } else #endif MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_ALL); break; case MLX5E_TT_IPV6_TCP: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, MLX5_L4_PROT_TYPE_TCP); #ifdef RSS if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_TCP_IPV6)) { MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP); } else #endif MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_ALL); break; case MLX5E_TT_IPV4_UDP: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, MLX5_L4_PROT_TYPE_UDP); #ifdef RSS if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV4)) { MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP); } else #endif MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_ALL); break; case MLX5E_TT_IPV6_UDP: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); MLX5_SET(rx_hash_field_select, hfso, l4_prot_type, MLX5_L4_PROT_TYPE_UDP); #ifdef RSS if (!(rss_gethashconfig() & RSS_HASHTYPE_RSS_UDP_IPV6)) { MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP); } else #endif MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_ALL); break; case MLX5E_TT_IPV4_IPSEC_AH: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP_IPSEC_SPI); break; case MLX5E_TT_IPV6_IPSEC_AH: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP_IPSEC_SPI); break; case MLX5E_TT_IPV4_IPSEC_ESP: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP_IPSEC_SPI); break; case MLX5E_TT_IPV6_IPSEC_ESP: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP_IPSEC_SPI); break; case MLX5E_TT_IPV4: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4); MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP); break; case MLX5E_TT_IPV6: MLX5_SET(rx_hash_field_select, hfso, l3_prot_type, MLX5_L3_PROT_TYPE_IPV6); MLX5_SET(rx_hash_field_select, hfso, selected_fields, MLX5_HASH_IP); break; default: break; } } static int mlx5e_open_tir(struct mlx5e_priv *priv, int tt) { struct mlx5_core_dev *mdev = priv->mdev; u32 *in; void *tirc; int inlen; int err; inlen = MLX5_ST_SZ_BYTES(create_tir_in); in = mlx5_vzalloc(inlen); if (in == NULL) return (-ENOMEM); tirc = MLX5_ADDR_OF(create_tir_in, in, tir_context); mlx5e_build_tir_ctx(priv, tirc, tt); err = mlx5_core_create_tir(mdev, in, inlen, &priv->tirn[tt]); kvfree(in); return (err); } static void mlx5e_close_tir(struct mlx5e_priv *priv, int tt) { mlx5_core_destroy_tir(priv->mdev, priv->tirn[tt]); } static int mlx5e_open_tirs(struct mlx5e_priv *priv) { int err; int i; for (i = 0; i < MLX5E_NUM_TT; i++) { err = mlx5e_open_tir(priv, i); if (err) goto err_close_tirs; } return (0); err_close_tirs: for (i--; i >= 0; i--) mlx5e_close_tir(priv, i); return (err); } static void mlx5e_close_tirs(struct mlx5e_priv *priv) { int i; for (i = 0; i < MLX5E_NUM_TT; i++) mlx5e_close_tir(priv, i); } /* * SW MTU does not include headers, * HW MTU includes all headers and checksums. */ static int mlx5e_set_dev_port_mtu(struct ifnet *ifp, int sw_mtu) { struct mlx5e_priv *priv = ifp->if_softc; struct mlx5_core_dev *mdev = priv->mdev; int hw_mtu; int err; hw_mtu = MLX5E_SW2HW_MTU(sw_mtu); err = mlx5_set_port_mtu(mdev, hw_mtu); if (err) { if_printf(ifp, "%s: mlx5_set_port_mtu failed setting %d, err=%d\n", __func__, sw_mtu, err); return (err); } /* Update vport context MTU */ err = mlx5_set_vport_mtu(mdev, hw_mtu); if (err) { if_printf(ifp, "%s: Failed updating vport context with MTU size, err=%d\n", __func__, err); } ifp->if_mtu = sw_mtu; err = mlx5_query_vport_mtu(mdev, &hw_mtu); if (err || !hw_mtu) { /* fallback to port oper mtu */ err = mlx5_query_port_oper_mtu(mdev, &hw_mtu); } if (err) { if_printf(ifp, "Query port MTU, after setting new " "MTU value, failed\n"); return (err); } else if (MLX5E_HW2SW_MTU(hw_mtu) < sw_mtu) { err = -E2BIG, if_printf(ifp, "Port MTU %d is smaller than " "ifp mtu %d\n", hw_mtu, sw_mtu); } else if (MLX5E_HW2SW_MTU(hw_mtu) > sw_mtu) { err = -EINVAL; if_printf(ifp, "Port MTU %d is bigger than " "ifp mtu %d\n", hw_mtu, sw_mtu); } priv->params_ethtool.hw_mtu = hw_mtu; return (err); } int mlx5e_open_locked(struct ifnet *ifp) { struct mlx5e_priv *priv = ifp->if_softc; int err; u16 set_id; /* check if already opened */ if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0) return (0); #ifdef RSS if (rss_getnumbuckets() > priv->params.num_channels) { if_printf(ifp, "NOTE: There are more RSS buckets(%u) than " "channels(%u) available\n", rss_getnumbuckets(), priv->params.num_channels); } #endif err = mlx5e_open_tises(priv); if (err) { if_printf(ifp, "%s: mlx5e_open_tises failed, %d\n", __func__, err); return (err); } err = mlx5_vport_alloc_q_counter(priv->mdev, MLX5_INTERFACE_PROTOCOL_ETH, &set_id); if (err) { if_printf(priv->ifp, "%s: mlx5_vport_alloc_q_counter failed: %d\n", __func__, err); goto err_close_tises; } /* store counter set ID */ priv->counter_set_id = set_id; err = mlx5e_open_channels(priv); if (err) { if_printf(ifp, "%s: mlx5e_open_channels failed, %d\n", __func__, err); goto err_dalloc_q_counter; } err = mlx5e_open_rqt(priv); if (err) { if_printf(ifp, "%s: mlx5e_open_rqt failed, %d\n", __func__, err); goto err_close_channels; } err = mlx5e_open_tirs(priv); if (err) { if_printf(ifp, "%s: mlx5e_open_tir failed, %d\n", __func__, err); goto err_close_rqls; } err = mlx5e_open_flow_table(priv); if (err) { if_printf(ifp, "%s: mlx5e_open_flow_table failed, %d\n", __func__, err); goto err_close_tirs; } err = mlx5e_add_all_vlan_rules(priv); if (err) { if_printf(ifp, "%s: mlx5e_add_all_vlan_rules failed, %d\n", __func__, err); goto err_close_flow_table; } set_bit(MLX5E_STATE_OPENED, &priv->state); mlx5e_update_carrier(priv); mlx5e_set_rx_mode_core(priv); return (0); err_close_flow_table: mlx5e_close_flow_table(priv); err_close_tirs: mlx5e_close_tirs(priv); err_close_rqls: mlx5e_close_rqt(priv); err_close_channels: mlx5e_close_channels(priv); err_dalloc_q_counter: mlx5_vport_dealloc_q_counter(priv->mdev, MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id); err_close_tises: mlx5e_close_tises(priv); return (err); } static void mlx5e_open(void *arg) { struct mlx5e_priv *priv = arg; PRIV_LOCK(priv); if (mlx5_set_port_status(priv->mdev, MLX5_PORT_UP)) if_printf(priv->ifp, "%s: Setting port status to up failed\n", __func__); mlx5e_open_locked(priv->ifp); priv->ifp->if_drv_flags |= IFF_DRV_RUNNING; PRIV_UNLOCK(priv); } int mlx5e_close_locked(struct ifnet *ifp) { struct mlx5e_priv *priv = ifp->if_softc; /* check if already closed */ if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0) return (0); clear_bit(MLX5E_STATE_OPENED, &priv->state); mlx5e_set_rx_mode_core(priv); mlx5e_del_all_vlan_rules(priv); if_link_state_change(priv->ifp, LINK_STATE_DOWN); mlx5e_close_flow_table(priv); mlx5e_close_tirs(priv); mlx5e_close_rqt(priv); mlx5e_close_channels(priv); mlx5_vport_dealloc_q_counter(priv->mdev, MLX5_INTERFACE_PROTOCOL_ETH, priv->counter_set_id); mlx5e_close_tises(priv); return (0); } #if (__FreeBSD_version >= 1100000) static uint64_t mlx5e_get_counter(struct ifnet *ifp, ift_counter cnt) { struct mlx5e_priv *priv = ifp->if_softc; u64 retval; /* PRIV_LOCK(priv); XXX not allowed */ switch (cnt) { case IFCOUNTER_IPACKETS: retval = priv->stats.vport.rx_packets; break; case IFCOUNTER_IERRORS: retval = priv->stats.vport.rx_error_packets + priv->stats.pport.alignment_err + priv->stats.pport.check_seq_err + priv->stats.pport.crc_align_errors + priv->stats.pport.in_range_len_errors + priv->stats.pport.jabbers + priv->stats.pport.out_of_range_len + priv->stats.pport.oversize_pkts + priv->stats.pport.symbol_err + priv->stats.pport.too_long_errors + priv->stats.pport.undersize_pkts + priv->stats.pport.unsupported_op_rx; break; case IFCOUNTER_IQDROPS: retval = priv->stats.vport.rx_out_of_buffer + priv->stats.pport.drop_events; break; case IFCOUNTER_OPACKETS: retval = priv->stats.vport.tx_packets; break; case IFCOUNTER_OERRORS: retval = priv->stats.vport.tx_error_packets; break; case IFCOUNTER_IBYTES: retval = priv->stats.vport.rx_bytes; break; case IFCOUNTER_OBYTES: retval = priv->stats.vport.tx_bytes; break; case IFCOUNTER_IMCASTS: retval = priv->stats.vport.rx_multicast_packets; break; case IFCOUNTER_OMCASTS: retval = priv->stats.vport.tx_multicast_packets; break; case IFCOUNTER_OQDROPS: retval = priv->stats.vport.tx_queue_dropped; break; case IFCOUNTER_COLLISIONS: retval = priv->stats.pport.collisions; break; default: retval = if_get_counter_default(ifp, cnt); break; } /* PRIV_UNLOCK(priv); XXX not allowed */ return (retval); } #endif static void mlx5e_set_rx_mode(struct ifnet *ifp) { struct mlx5e_priv *priv = ifp->if_softc; queue_work(priv->wq, &priv->set_rx_mode_work); } static int mlx5e_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct mlx5e_priv *priv; struct ifreq *ifr; struct ifi2creq i2c; int error = 0; int mask = 0; int size_read = 0; int module_status; int module_num; int max_mtu; uint8_t read_addr; priv = ifp->if_softc; /* check if detaching */ if (priv == NULL || priv->gone != 0) return (ENXIO); switch (command) { case SIOCSIFMTU: ifr = (struct ifreq *)data; PRIV_LOCK(priv); mlx5_query_port_max_mtu(priv->mdev, &max_mtu); if (ifr->ifr_mtu >= MLX5E_MTU_MIN && ifr->ifr_mtu <= MIN(MLX5E_MTU_MAX, max_mtu)) { int was_opened; was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); if (was_opened) mlx5e_close_locked(ifp); /* set new MTU */ mlx5e_set_dev_port_mtu(ifp, ifr->ifr_mtu); if (was_opened) mlx5e_open_locked(ifp); } else { error = EINVAL; if_printf(ifp, "Invalid MTU value. Min val: %d, Max val: %d\n", MLX5E_MTU_MIN, MIN(MLX5E_MTU_MAX, max_mtu)); } PRIV_UNLOCK(priv); break; case SIOCSIFFLAGS: if ((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { mlx5e_set_rx_mode(ifp); break; } PRIV_LOCK(priv); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { if (test_bit(MLX5E_STATE_OPENED, &priv->state) == 0) mlx5e_open_locked(ifp); ifp->if_drv_flags |= IFF_DRV_RUNNING; mlx5_set_port_status(priv->mdev, MLX5_PORT_UP); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { mlx5_set_port_status(priv->mdev, MLX5_PORT_DOWN); if (test_bit(MLX5E_STATE_OPENED, &priv->state) != 0) mlx5e_close_locked(ifp); mlx5e_update_carrier(priv); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; } } PRIV_UNLOCK(priv); break; case SIOCADDMULTI: case SIOCDELMULTI: mlx5e_set_rx_mode(ifp); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: case SIOCGIFXMEDIA: ifr = (struct ifreq *)data; error = ifmedia_ioctl(ifp, ifr, &priv->media, command); break; case SIOCSIFCAP: ifr = (struct ifreq *)data; PRIV_LOCK(priv); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (IFCAP_TSO4 & ifp->if_capenable && !(IFCAP_TXCSUM & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO4; ifp->if_hwassist &= ~CSUM_IP_TSO; if_printf(ifp, "tso4 disabled due to -txcsum.\n"); } } if (mask & IFCAP_TXCSUM_IPV6) { ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); if (IFCAP_TSO6 & ifp->if_capenable && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO6; ifp->if_hwassist &= ~CSUM_IP6_TSO; if_printf(ifp, "tso6 disabled due to -txcsum6.\n"); } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; if (mask & IFCAP_TSO4) { if (!(IFCAP_TSO4 & ifp->if_capenable) && !(IFCAP_TXCSUM & ifp->if_capenable)) { if_printf(ifp, "enable txcsum first.\n"); error = EAGAIN; goto out; } ifp->if_capenable ^= IFCAP_TSO4; ifp->if_hwassist ^= CSUM_IP_TSO; } if (mask & IFCAP_TSO6) { if (!(IFCAP_TSO6 & ifp->if_capenable) && !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) { if_printf(ifp, "enable txcsum6 first.\n"); error = EAGAIN; goto out; } ifp->if_capenable ^= IFCAP_TSO6; ifp->if_hwassist ^= CSUM_IP6_TSO; } if (mask & IFCAP_VLAN_HWFILTER) { if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) mlx5e_disable_vlan_filter(priv); else mlx5e_enable_vlan_filter(priv); ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; } if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_WOL_MAGIC) ifp->if_capenable ^= IFCAP_WOL_MAGIC; VLAN_CAPABILITIES(ifp); /* turn off LRO means also turn of HW LRO - if it's on */ if (mask & IFCAP_LRO) { int was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); bool need_restart = false; ifp->if_capenable ^= IFCAP_LRO; if (!(ifp->if_capenable & IFCAP_LRO)) { if (priv->params.hw_lro_en) { priv->params.hw_lro_en = false; need_restart = true; /* Not sure this is the correct way */ priv->params_ethtool.hw_lro = priv->params.hw_lro_en; } } if (was_opened && need_restart) { mlx5e_close_locked(ifp); mlx5e_open_locked(ifp); } } out: PRIV_UNLOCK(priv); break; case SIOCGI2C: ifr = (struct ifreq *)data; /* * Copy from the user-space address ifr_data to the * kernel-space address i2c */ - error = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); + error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (error) break; if (i2c.len > sizeof(i2c.data)) { error = EINVAL; break; } PRIV_LOCK(priv); /* Get module_num which is required for the query_eeprom */ error = mlx5_query_module_num(priv->mdev, &module_num); if (error) { if_printf(ifp, "Query module num failed, eeprom " "reading is not supported\n"); error = EINVAL; goto err_i2c; } /* Check if module is present before doing an access */ module_status = mlx5_query_module_status(priv->mdev, module_num); if (module_status != MLX5_MODULE_STATUS_PLUGGED_ENABLED && module_status != MLX5_MODULE_STATUS_PLUGGED_DISABLED) { error = EINVAL; goto err_i2c; } /* * Currently 0XA0 and 0xA2 are the only addresses permitted. * The internal conversion is as follows: */ if (i2c.dev_addr == 0xA0) read_addr = MLX5E_I2C_ADDR_LOW; else if (i2c.dev_addr == 0xA2) read_addr = MLX5E_I2C_ADDR_HIGH; else { if_printf(ifp, "Query eeprom failed, " "Invalid Address: %X\n", i2c.dev_addr); error = EINVAL; goto err_i2c; } error = mlx5_query_eeprom(priv->mdev, read_addr, MLX5E_EEPROM_LOW_PAGE, (uint32_t)i2c.offset, (uint32_t)i2c.len, module_num, (uint32_t *)i2c.data, &size_read); if (error) { if_printf(ifp, "Query eeprom failed, eeprom " "reading is not supported\n"); error = EINVAL; goto err_i2c; } if (i2c.len > MLX5_EEPROM_MAX_BYTES) { error = mlx5_query_eeprom(priv->mdev, read_addr, MLX5E_EEPROM_LOW_PAGE, (uint32_t)(i2c.offset + size_read), (uint32_t)(i2c.len - size_read), module_num, (uint32_t *)(i2c.data + size_read), &size_read); } if (error) { if_printf(ifp, "Query eeprom failed, eeprom " "reading is not supported\n"); error = EINVAL; goto err_i2c; } - error = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); + error = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c)); err_i2c: PRIV_UNLOCK(priv); break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) { /* * TODO: uncoment once FW really sets all these bits if * (!mdev->caps.eth.rss_ind_tbl_cap || !mdev->caps.eth.csum_cap || * !mdev->caps.eth.max_lso_cap || !mdev->caps.eth.vlan_cap || * !(mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_SCQE_BRK_MOD)) return * -ENOTSUPP; */ /* TODO: add more must-to-have features */ if (MLX5_CAP_GEN(mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) return (-ENODEV); return (0); } static void mlx5e_build_ifp_priv(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv, int num_comp_vectors) { /* * TODO: Consider link speed for setting "log_sq_size", * "log_rq_size" and "cq_moderation_xxx": */ priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; priv->params.rx_cq_moderation_usec = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE : MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; priv->params.rx_cq_moderation_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? 1 : 0; priv->params.rx_cq_moderation_pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; priv->params.tx_cq_moderation_usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; priv->params.tx_cq_moderation_pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; priv->params.min_rx_wqes = MLX5E_PARAMS_DEFAULT_MIN_RX_WQES; priv->params.rx_hash_log_tbl_sz = (order_base_2(num_comp_vectors) > MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ) ? order_base_2(num_comp_vectors) : MLX5E_PARAMS_DEFAULT_RX_HASH_LOG_TBL_SZ; priv->params.num_tc = 1; priv->params.default_vlan_prio = 0; priv->counter_set_id = -1; /* * hw lro is currently defaulted to off. when it won't anymore we * will consider the HW capability: "!!MLX5_CAP_ETH(mdev, lro_cap)" */ priv->params.hw_lro_en = false; priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; priv->params.cqe_zipping_en = !!MLX5_CAP_GEN(mdev, cqe_compression); priv->mdev = mdev; priv->params.num_channels = num_comp_vectors; priv->order_base_2_num_channels = order_base_2(num_comp_vectors); priv->queue_mapping_channel_mask = roundup_pow_of_two(num_comp_vectors) - 1; priv->num_tc = priv->params.num_tc; priv->default_vlan_prio = priv->params.default_vlan_prio; INIT_WORK(&priv->update_stats_work, mlx5e_update_stats_work); INIT_WORK(&priv->update_carrier_work, mlx5e_update_carrier_work); INIT_WORK(&priv->set_rx_mode_work, mlx5e_set_rx_mode_work); } static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, struct mlx5_core_mr *mkey) { struct ifnet *ifp = priv->ifp; struct mlx5_core_dev *mdev = priv->mdev; int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); void *mkc; u32 *in; int err; in = mlx5_vzalloc(inlen); if (in == NULL) { if_printf(ifp, "%s: failed to allocate inbox\n", __func__); return (-ENOMEM); } mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); MLX5_SET(mkc, mkc, access_mode, MLX5_ACCESS_MODE_PA); MLX5_SET(mkc, mkc, lw, 1); MLX5_SET(mkc, mkc, lr, 1); MLX5_SET(mkc, mkc, pd, pdn); MLX5_SET(mkc, mkc, length64, 1); MLX5_SET(mkc, mkc, qpn, 0xffffff); err = mlx5_core_create_mkey(mdev, mkey, in, inlen); if (err) if_printf(ifp, "%s: mlx5_core_create_mkey failed, %d\n", __func__, err); kvfree(in); return (err); } static const char *mlx5e_vport_stats_desc[] = { MLX5E_VPORT_STATS(MLX5E_STATS_DESC) }; static const char *mlx5e_pport_stats_desc[] = { MLX5E_PPORT_STATS(MLX5E_STATS_DESC) }; static void mlx5e_priv_mtx_init(struct mlx5e_priv *priv) { mtx_init(&priv->async_events_mtx, "mlx5async", MTX_NETWORK_LOCK, MTX_DEF); sx_init(&priv->state_lock, "mlx5state"); callout_init_mtx(&priv->watchdog, &priv->async_events_mtx, 0); MLX5_INIT_DOORBELL_LOCK(&priv->doorbell_lock); } static void mlx5e_priv_mtx_destroy(struct mlx5e_priv *priv) { mtx_destroy(&priv->async_events_mtx); sx_destroy(&priv->state_lock); } static int sysctl_firmware(SYSCTL_HANDLER_ARGS) { /* * %d.%d%.d the string format. * fw_rev_{maj,min,sub} return u16, 2^16 = 65536. * We need at most 5 chars to store that. * It also has: two "." and NULL at the end, which means we need 18 * (5*3 + 3) chars at most. */ char fw[18]; struct mlx5e_priv *priv = arg1; int error; snprintf(fw, sizeof(fw), "%d.%d.%d", fw_rev_maj(priv->mdev), fw_rev_min(priv->mdev), fw_rev_sub(priv->mdev)); error = sysctl_handle_string(oidp, fw, sizeof(fw), req); return (error); } static void mlx5e_disable_tx_dma(struct mlx5e_channel *ch) { int i; for (i = 0; i < ch->num_tc; i++) mlx5e_drain_sq(&ch->sq[i]); } static void mlx5e_reset_sq_doorbell_record(struct mlx5e_sq *sq) { sq->doorbell.d32[0] = cpu_to_be32(MLX5_OPCODE_NOP); sq->doorbell.d32[1] = cpu_to_be32(sq->sqn << 8); mlx5e_tx_notify_hw(sq, sq->doorbell.d32, 0); sq->doorbell.d64 = 0; } void mlx5e_resume_sq(struct mlx5e_sq *sq) { int err; /* check if already enabled */ if (sq->stopped == 0) return; err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_ERR, MLX5_SQC_STATE_RST); if (err != 0) { if_printf(sq->ifp, "mlx5e_modify_sq() from ERR to RST failed: %d\n", err); } sq->cc = 0; sq->pc = 0; /* reset doorbell prior to moving from RST to RDY */ mlx5e_reset_sq_doorbell_record(sq); err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); if (err != 0) { if_printf(sq->ifp, "mlx5e_modify_sq() from RST to RDY failed: %d\n", err); } mtx_lock(&sq->lock); sq->cev_next_state = MLX5E_CEV_STATE_INITIAL; sq->stopped = 0; mtx_unlock(&sq->lock); } static void mlx5e_enable_tx_dma(struct mlx5e_channel *ch) { int i; for (i = 0; i < ch->num_tc; i++) mlx5e_resume_sq(&ch->sq[i]); } static void mlx5e_disable_rx_dma(struct mlx5e_channel *ch) { struct mlx5e_rq *rq = &ch->rq; int err; mtx_lock(&rq->mtx); rq->enabled = 0; callout_stop(&rq->watchdog); mtx_unlock(&rq->mtx); callout_drain(&rq->watchdog); err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RDY, MLX5_RQC_STATE_ERR); if (err != 0) { if_printf(rq->ifp, "mlx5e_modify_rq() from RDY to RST failed: %d\n", err); } while (!mlx5_wq_ll_is_empty(&rq->wq)) { msleep(1); rq->cq.mcq.comp(&rq->cq.mcq); } /* * Transitioning into RST state will allow the FW to track less ERR state queues, * thus reducing the recv queue flushing time */ err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_ERR, MLX5_RQC_STATE_RST); if (err != 0) { if_printf(rq->ifp, "mlx5e_modify_rq() from ERR to RST failed: %d\n", err); } } static void mlx5e_enable_rx_dma(struct mlx5e_channel *ch) { struct mlx5e_rq *rq = &ch->rq; int err; rq->wq.wqe_ctr = 0; mlx5_wq_ll_update_db_record(&rq->wq); err = mlx5e_modify_rq(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY); if (err != 0) { if_printf(rq->ifp, "mlx5e_modify_rq() from RST to RDY failed: %d\n", err); } rq->enabled = 1; rq->cq.mcq.comp(&rq->cq.mcq); } void mlx5e_modify_tx_dma(struct mlx5e_priv *priv, uint8_t value) { int i; if (priv->channel == NULL) return; for (i = 0; i < priv->params.num_channels; i++) { if (!priv->channel[i]) continue; if (value) mlx5e_disable_tx_dma(priv->channel[i]); else mlx5e_enable_tx_dma(priv->channel[i]); } } void mlx5e_modify_rx_dma(struct mlx5e_priv *priv, uint8_t value) { int i; if (priv->channel == NULL) return; for (i = 0; i < priv->params.num_channels; i++) { if (!priv->channel[i]) continue; if (value) mlx5e_disable_rx_dma(priv->channel[i]); else mlx5e_enable_rx_dma(priv->channel[i]); } } static void mlx5e_add_hw_stats(struct mlx5e_priv *priv) { SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw), OID_AUTO, "fw_version", CTLTYPE_STRING | CTLFLAG_RD, priv, 0, sysctl_firmware, "A", "HCA firmware version"); SYSCTL_ADD_STRING(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_hw), OID_AUTO, "board_id", CTLFLAG_RD, priv->mdev->board_id, 0, "Board ID"); } static int mlx5e_sysctl_tx_priority_flow_control(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv = arg1; uint32_t tx_pfc; uint32_t value; int error; PRIV_LOCK(priv); tx_pfc = priv->params.tx_priority_flow_control; /* get current value */ value = (tx_pfc >> arg2) & 1; error = sysctl_handle_32(oidp, &value, 0, req); /* range check value */ if (value != 0) priv->params.tx_priority_flow_control |= (1 << arg2); else priv->params.tx_priority_flow_control &= ~(1 << arg2); /* check if update is required */ if (error == 0 && priv->gone == 0 && tx_pfc != priv->params.tx_priority_flow_control) { error = -mlx5e_set_port_pfc(priv); /* restore previous value */ if (error != 0) priv->params.tx_priority_flow_control= tx_pfc; } PRIV_UNLOCK(priv); return (error); } static int mlx5e_sysctl_rx_priority_flow_control(SYSCTL_HANDLER_ARGS) { struct mlx5e_priv *priv = arg1; uint32_t rx_pfc; uint32_t value; int error; PRIV_LOCK(priv); rx_pfc = priv->params.rx_priority_flow_control; /* get current value */ value = (rx_pfc >> arg2) & 1; error = sysctl_handle_32(oidp, &value, 0, req); /* range check value */ if (value != 0) priv->params.rx_priority_flow_control |= (1 << arg2); else priv->params.rx_priority_flow_control &= ~(1 << arg2); /* check if update is required */ if (error == 0 && priv->gone == 0 && rx_pfc != priv->params.rx_priority_flow_control) { error = -mlx5e_set_port_pfc(priv); /* restore previous value */ if (error != 0) priv->params.rx_priority_flow_control= rx_pfc; } PRIV_UNLOCK(priv); return (error); } static void mlx5e_setup_pauseframes(struct mlx5e_priv *priv) { unsigned int x; char path[96]; int error; /* Only receiving pauseframes is enabled by default */ priv->params.tx_pauseframe_control = 0; priv->params.rx_pauseframe_control = 1; /* disable ports flow control, PFC, by default */ priv->params.tx_priority_flow_control = 0; priv->params.rx_priority_flow_control = 0; #if (__FreeBSD_version < 1100000) /* compute path for sysctl */ snprintf(path, sizeof(path), "dev.mce.%d.tx_pauseframe_control", device_get_unit(priv->mdev->pdev->dev.bsddev)); /* try to fetch tunable, if any */ TUNABLE_INT_FETCH(path, &priv->params.tx_pauseframe_control); /* compute path for sysctl */ snprintf(path, sizeof(path), "dev.mce.%d.rx_pauseframe_control", device_get_unit(priv->mdev->pdev->dev.bsddev)); /* try to fetch tunable, if any */ TUNABLE_INT_FETCH(path, &priv->params.rx_pauseframe_control); for (x = 0; x != 8; x++) { /* compute path for sysctl */ snprintf(path, sizeof(path), "dev.mce.%d.tx_priority_flow_control_%u", device_get_unit(priv->mdev->pdev->dev.bsddev), x); /* try to fetch tunable, if any */ if (TUNABLE_INT_FETCH(path, &value) == 0 && value != 0) priv->params.tx_priority_flow_control |= 1 << x; /* compute path for sysctl */ snprintf(path, sizeof(path), "dev.mce.%d.rx_priority_flow_control_%u", device_get_unit(priv->mdev->pdev->dev.bsddev), x); /* try to fetch tunable, if any */ if (TUNABLE_INT_FETCH(path, &value) == 0 && value != 0) priv->params.rx_priority_flow_control |= 1 << x; } #endif /* register pauseframe SYSCTLs */ SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, "tx_pauseframe_control", CTLFLAG_RDTUN, &priv->params.tx_pauseframe_control, 0, "Set to enable TX pause frames. Clear to disable."); SYSCTL_ADD_INT(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, "rx_pauseframe_control", CTLFLAG_RDTUN, &priv->params.rx_pauseframe_control, 0, "Set to enable RX pause frames. Clear to disable."); /* register priority_flow control, PFC, SYSCTLs */ for (x = 0; x != 8; x++) { snprintf(path, sizeof(path), "tx_priority_flow_control_%u", x); SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, path, CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, priv, x, &mlx5e_sysctl_tx_priority_flow_control, "IU", "Set to enable TX ports flow control frames for given priority. Clear to disable."); snprintf(path, sizeof(path), "rx_priority_flow_control_%u", x); SYSCTL_ADD_PROC(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, path, CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, priv, x, &mlx5e_sysctl_rx_priority_flow_control, "IU", "Set to enable RX ports flow control frames for given priority. Clear to disable."); } PRIV_LOCK(priv); /* range check */ priv->params.tx_pauseframe_control = priv->params.tx_pauseframe_control ? 1 : 0; priv->params.rx_pauseframe_control = priv->params.rx_pauseframe_control ? 1 : 0; /* update firmware */ error = mlx5e_set_port_pause_and_pfc(priv); if (error == -EINVAL) { if_printf(priv->ifp, "Global pauseframes must be disabled before enabling PFC.\n"); priv->params.rx_priority_flow_control = 0; priv->params.tx_priority_flow_control = 0; /* update firmware */ (void) mlx5e_set_port_pause_and_pfc(priv); } PRIV_UNLOCK(priv); } static void * mlx5e_create_ifp(struct mlx5_core_dev *mdev) { struct ifnet *ifp; struct mlx5e_priv *priv; u8 dev_addr[ETHER_ADDR_LEN] __aligned(4); struct sysctl_oid_list *child; int ncv = mdev->priv.eq_table.num_comp_vectors; char unit[16]; int err; int i; u32 eth_proto_cap; if (mlx5e_check_required_hca_cap(mdev)) { mlx5_core_dbg(mdev, "mlx5e_check_required_hca_cap() failed\n"); return (NULL); } priv = malloc(sizeof(*priv), M_MLX5EN, M_WAITOK | M_ZERO); mlx5e_priv_mtx_init(priv); ifp = priv->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { mlx5_core_err(mdev, "if_alloc() failed\n"); goto err_free_priv; } ifp->if_softc = priv; if_initname(ifp, "mce", device_get_unit(mdev->pdev->dev.bsddev)); ifp->if_mtu = ETHERMTU; ifp->if_init = mlx5e_open; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = mlx5e_ioctl; ifp->if_transmit = mlx5e_xmit; ifp->if_qflush = if_qflush; #if (__FreeBSD_version >= 1100000) ifp->if_get_counter = mlx5e_get_counter; #endif ifp->if_snd.ifq_maxlen = ifqmaxlen; /* * Set driver features */ ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6; ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING; ifp->if_capabilities |= IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWFILTER; ifp->if_capabilities |= IFCAP_LINKSTATE | IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_LRO; ifp->if_capabilities |= IFCAP_TSO | IFCAP_VLAN_HWTSO; ifp->if_capabilities |= IFCAP_HWSTATS; /* set TSO limits so that we don't have to drop TX packets */ ifp->if_hw_tsomax = MLX5E_MAX_TX_PAYLOAD_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); ifp->if_hw_tsomaxsegcount = MLX5E_MAX_TX_MBUF_FRAGS - 1 /* hdr */; ifp->if_hw_tsomaxsegsize = MLX5E_MAX_TX_MBUF_SIZE; ifp->if_capenable = ifp->if_capabilities; ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TSO) ifp->if_hwassist |= CSUM_TSO; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) ifp->if_hwassist |= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6); /* ifnet sysctl tree */ sysctl_ctx_init(&priv->sysctl_ctx); priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dev), OID_AUTO, ifp->if_dname, CTLFLAG_RD, 0, "MLX5 ethernet - interface name"); if (priv->sysctl_ifnet == NULL) { mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n"); goto err_free_sysctl; } snprintf(unit, sizeof(unit), "%d", ifp->if_dunit); priv->sysctl_ifnet = SYSCTL_ADD_NODE(&priv->sysctl_ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), OID_AUTO, unit, CTLFLAG_RD, 0, "MLX5 ethernet - interface unit"); if (priv->sysctl_ifnet == NULL) { mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n"); goto err_free_sysctl; } /* HW sysctl tree */ child = SYSCTL_CHILDREN(device_get_sysctl_tree(mdev->pdev->dev.bsddev)); priv->sysctl_hw = SYSCTL_ADD_NODE(&priv->sysctl_ctx, child, OID_AUTO, "hw", CTLFLAG_RD, 0, "MLX5 ethernet dev hw"); if (priv->sysctl_hw == NULL) { mlx5_core_err(mdev, "SYSCTL_ADD_NODE() failed\n"); goto err_free_sysctl; } mlx5e_build_ifp_priv(mdev, priv, ncv); snprintf(unit, sizeof(unit), "mce%u_wq", device_get_unit(mdev->pdev->dev.bsddev)); priv->wq = alloc_workqueue(unit, 0, 1); if (priv->wq == NULL) { if_printf(ifp, "%s: alloc_workqueue failed\n", __func__); goto err_free_sysctl; } err = mlx5_alloc_map_uar(mdev, &priv->cq_uar); if (err) { if_printf(ifp, "%s: mlx5_alloc_map_uar failed, %d\n", __func__, err); goto err_free_wq; } err = mlx5_core_alloc_pd(mdev, &priv->pdn); if (err) { if_printf(ifp, "%s: mlx5_core_alloc_pd failed, %d\n", __func__, err); goto err_unmap_free_uar; } err = mlx5_alloc_transport_domain(mdev, &priv->tdn); if (err) { if_printf(ifp, "%s: mlx5_alloc_transport_domain failed, %d\n", __func__, err); goto err_dealloc_pd; } err = mlx5e_create_mkey(priv, priv->pdn, &priv->mr); if (err) { if_printf(ifp, "%s: mlx5e_create_mkey failed, %d\n", __func__, err); goto err_dealloc_transport_domain; } mlx5_query_nic_vport_mac_address(priv->mdev, 0, dev_addr); /* check if we should generate a random MAC address */ if (MLX5_CAP_GEN(priv->mdev, vport_group_manager) == 0 && is_zero_ether_addr(dev_addr)) { random_ether_addr(dev_addr); if_printf(ifp, "Assigned random MAC address\n"); } /* set default MTU */ mlx5e_set_dev_port_mtu(ifp, ifp->if_mtu); /* Set desc */ device_set_desc(mdev->pdev->dev.bsddev, mlx5e_version); /* Set default media status */ priv->media_status_last = IFM_AVALID; priv->media_active_last = IFM_ETHER | IFM_AUTO | IFM_ETH_RXPAUSE | IFM_FDX; /* setup default pauseframes configuration */ mlx5e_setup_pauseframes(priv); err = mlx5_query_port_proto_cap(mdev, ð_proto_cap, MLX5_PTYS_EN); if (err) { eth_proto_cap = 0; if_printf(ifp, "%s: Query port media capability failed, %d\n", __func__, err); } /* Setup supported medias */ ifmedia_init(&priv->media, IFM_IMASK | IFM_ETH_FMASK, mlx5e_media_change, mlx5e_media_status); for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { if (mlx5e_mode_table[i].baudrate == 0) continue; if (MLX5E_PROT_MASK(i) & eth_proto_cap) { ifmedia_add(&priv->media, mlx5e_mode_table[i].subtype | IFM_ETHER, 0, NULL); ifmedia_add(&priv->media, mlx5e_mode_table[i].subtype | IFM_ETHER | IFM_FDX | IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL); } } ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_add(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX | IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE, 0, NULL); /* Set autoselect by default */ ifmedia_set(&priv->media, IFM_ETHER | IFM_AUTO | IFM_FDX | IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE); ether_ifattach(ifp, dev_addr); /* Register for VLAN events */ priv->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, mlx5e_vlan_rx_add_vid, priv, EVENTHANDLER_PRI_FIRST); priv->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, mlx5e_vlan_rx_kill_vid, priv, EVENTHANDLER_PRI_FIRST); /* Link is down by default */ if_link_state_change(ifp, LINK_STATE_DOWN); mlx5e_enable_async_events(priv); mlx5e_add_hw_stats(priv); mlx5e_create_stats(&priv->stats.vport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), "vstats", mlx5e_vport_stats_desc, MLX5E_VPORT_STATS_NUM, priv->stats.vport.arg); mlx5e_create_stats(&priv->stats.pport.ctx, SYSCTL_CHILDREN(priv->sysctl_ifnet), "pstats", mlx5e_pport_stats_desc, MLX5E_PPORT_STATS_NUM, priv->stats.pport.arg); mlx5e_create_ethtool(priv); mtx_lock(&priv->async_events_mtx); mlx5e_update_stats(priv); mtx_unlock(&priv->async_events_mtx); return (priv); err_dealloc_transport_domain: mlx5_dealloc_transport_domain(mdev, priv->tdn); err_dealloc_pd: mlx5_core_dealloc_pd(mdev, priv->pdn); err_unmap_free_uar: mlx5_unmap_free_uar(mdev, &priv->cq_uar); err_free_wq: destroy_workqueue(priv->wq); err_free_sysctl: sysctl_ctx_free(&priv->sysctl_ctx); if_free(ifp); err_free_priv: mlx5e_priv_mtx_destroy(priv); free(priv, M_MLX5EN); return (NULL); } static void mlx5e_destroy_ifp(struct mlx5_core_dev *mdev, void *vpriv) { struct mlx5e_priv *priv = vpriv; struct ifnet *ifp = priv->ifp; /* don't allow more IOCTLs */ priv->gone = 1; /* * Clear the device description to avoid use after free, * because the bsddev is not destroyed when this module is * unloaded: */ device_set_desc(mdev->pdev->dev.bsddev, NULL); /* XXX wait a bit to allow IOCTL handlers to complete */ pause("W", hz); /* stop watchdog timer */ callout_drain(&priv->watchdog); if (priv->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, priv->vlan_attach); if (priv->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, priv->vlan_detach); /* make sure device gets closed */ PRIV_LOCK(priv); mlx5e_close_locked(ifp); PRIV_UNLOCK(priv); /* unregister device */ ifmedia_removeall(&priv->media); ether_ifdetach(ifp); if_free(ifp); /* destroy all remaining sysctl nodes */ if (priv->sysctl_debug) sysctl_ctx_free(&priv->stats.port_stats_debug.ctx); sysctl_ctx_free(&priv->stats.vport.ctx); sysctl_ctx_free(&priv->stats.pport.ctx); sysctl_ctx_free(&priv->sysctl_ctx); mlx5_core_destroy_mkey(priv->mdev, &priv->mr); mlx5_dealloc_transport_domain(priv->mdev, priv->tdn); mlx5_core_dealloc_pd(priv->mdev, priv->pdn); mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar); mlx5e_disable_async_events(priv); destroy_workqueue(priv->wq); mlx5e_priv_mtx_destroy(priv); free(priv, M_MLX5EN); } static void * mlx5e_get_ifp(void *vpriv) { struct mlx5e_priv *priv = vpriv; return (priv->ifp); } static struct mlx5_interface mlx5e_interface = { .add = mlx5e_create_ifp, .remove = mlx5e_destroy_ifp, .event = mlx5e_async_event, .protocol = MLX5_INTERFACE_PROTOCOL_ETH, .get_dev = mlx5e_get_ifp, }; void mlx5e_init(void) { mlx5_register_interface(&mlx5e_interface); } void mlx5e_cleanup(void) { mlx5_unregister_interface(&mlx5e_interface); } module_init_order(mlx5e_init, SI_ORDER_THIRD); module_exit_order(mlx5e_cleanup, SI_ORDER_THIRD); #if (__FreeBSD_version >= 1100000) MODULE_DEPEND(mlx5en, linuxkpi, 1, 1, 1); #endif MODULE_DEPEND(mlx5en, mlx5, 1, 1, 1); MODULE_VERSION(mlx5en, 1); Index: stable/11/sys/dev/mwl/if_mwl.c =================================================================== --- stable/11/sys/dev/mwl/if_mwl.c (revision 332287) +++ stable/11/sys/dev/mwl/if_mwl.c (revision 332288) @@ -1,4834 +1,4834 @@ /*- * Copyright (c) 2007-2009 Sam Leffler, Errno Consulting * Copyright (c) 2007-2008 Marvell Semiconductor, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification. * 2. Redistributions in binary form must reproduce at minimum a disclaimer * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any * redistribution must be conditioned upon including a substantially * similar Disclaimer requirement for further binary redistribution. * * NO WARRANTY * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGES. */ #include __FBSDID("$FreeBSD$"); /* * Driver for the Marvell 88W8363 Wireless LAN controller. */ #include "opt_inet.h" #include "opt_mwl.h" #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif /* INET */ #include #include /* idiomatic shorthands: MS = mask+shift, SM = shift+mask */ #define MS(v,x) (((v) & x) >> x##_S) #define SM(v,x) (((v) << x##_S) & x) static struct ieee80211vap *mwl_vap_create(struct ieee80211com *, const char [IFNAMSIZ], int, enum ieee80211_opmode, int, const uint8_t [IEEE80211_ADDR_LEN], const uint8_t [IEEE80211_ADDR_LEN]); static void mwl_vap_delete(struct ieee80211vap *); static int mwl_setupdma(struct mwl_softc *); static int mwl_hal_reset(struct mwl_softc *sc); static int mwl_init(struct mwl_softc *); static void mwl_parent(struct ieee80211com *); static int mwl_reset(struct ieee80211vap *, u_long); static void mwl_stop(struct mwl_softc *); static void mwl_start(struct mwl_softc *); static int mwl_transmit(struct ieee80211com *, struct mbuf *); static int mwl_raw_xmit(struct ieee80211_node *, struct mbuf *, const struct ieee80211_bpf_params *); static int mwl_media_change(struct ifnet *); static void mwl_watchdog(void *); static int mwl_ioctl(struct ieee80211com *, u_long, void *); static void mwl_radar_proc(void *, int); static void mwl_chanswitch_proc(void *, int); static void mwl_bawatchdog_proc(void *, int); static int mwl_key_alloc(struct ieee80211vap *, struct ieee80211_key *, ieee80211_keyix *, ieee80211_keyix *); static int mwl_key_delete(struct ieee80211vap *, const struct ieee80211_key *); static int mwl_key_set(struct ieee80211vap *, const struct ieee80211_key *); static int _mwl_key_set(struct ieee80211vap *, const struct ieee80211_key *, const uint8_t mac[IEEE80211_ADDR_LEN]); static int mwl_mode_init(struct mwl_softc *); static void mwl_update_mcast(struct ieee80211com *); static void mwl_update_promisc(struct ieee80211com *); static void mwl_updateslot(struct ieee80211com *); static int mwl_beacon_setup(struct ieee80211vap *); static void mwl_beacon_update(struct ieee80211vap *, int); #ifdef MWL_HOST_PS_SUPPORT static void mwl_update_ps(struct ieee80211vap *, int); static int mwl_set_tim(struct ieee80211_node *, int); #endif static int mwl_dma_setup(struct mwl_softc *); static void mwl_dma_cleanup(struct mwl_softc *); static struct ieee80211_node *mwl_node_alloc(struct ieee80211vap *, const uint8_t [IEEE80211_ADDR_LEN]); static void mwl_node_cleanup(struct ieee80211_node *); static void mwl_node_drain(struct ieee80211_node *); static void mwl_node_getsignal(const struct ieee80211_node *, int8_t *, int8_t *); static void mwl_node_getmimoinfo(const struct ieee80211_node *, struct ieee80211_mimo_info *); static int mwl_rxbuf_init(struct mwl_softc *, struct mwl_rxbuf *); static void mwl_rx_proc(void *, int); static void mwl_txq_init(struct mwl_softc *sc, struct mwl_txq *, int); static int mwl_tx_setup(struct mwl_softc *, int, int); static int mwl_wme_update(struct ieee80211com *); static void mwl_tx_cleanupq(struct mwl_softc *, struct mwl_txq *); static void mwl_tx_cleanup(struct mwl_softc *); static uint16_t mwl_calcformat(uint8_t rate, const struct ieee80211_node *); static int mwl_tx_start(struct mwl_softc *, struct ieee80211_node *, struct mwl_txbuf *, struct mbuf *); static void mwl_tx_proc(void *, int); static int mwl_chan_set(struct mwl_softc *, struct ieee80211_channel *); static void mwl_draintxq(struct mwl_softc *); static void mwl_cleartxq(struct mwl_softc *, struct ieee80211vap *); static int mwl_recv_action(struct ieee80211_node *, const struct ieee80211_frame *, const uint8_t *, const uint8_t *); static int mwl_addba_request(struct ieee80211_node *, struct ieee80211_tx_ampdu *, int dialogtoken, int baparamset, int batimeout); static int mwl_addba_response(struct ieee80211_node *, struct ieee80211_tx_ampdu *, int status, int baparamset, int batimeout); static void mwl_addba_stop(struct ieee80211_node *, struct ieee80211_tx_ampdu *); static int mwl_startrecv(struct mwl_softc *); static MWL_HAL_APMODE mwl_getapmode(const struct ieee80211vap *, struct ieee80211_channel *); static int mwl_setapmode(struct ieee80211vap *, struct ieee80211_channel*); static void mwl_scan_start(struct ieee80211com *); static void mwl_scan_end(struct ieee80211com *); static void mwl_set_channel(struct ieee80211com *); static int mwl_peerstadb(struct ieee80211_node *, int aid, int staid, MWL_HAL_PEERINFO *pi); static int mwl_localstadb(struct ieee80211vap *); static int mwl_newstate(struct ieee80211vap *, enum ieee80211_state, int); static int allocstaid(struct mwl_softc *sc, int aid); static void delstaid(struct mwl_softc *sc, int staid); static void mwl_newassoc(struct ieee80211_node *, int); static void mwl_agestations(void *); static int mwl_setregdomain(struct ieee80211com *, struct ieee80211_regdomain *, int, struct ieee80211_channel []); static void mwl_getradiocaps(struct ieee80211com *, int, int *, struct ieee80211_channel []); static int mwl_getchannels(struct mwl_softc *); static void mwl_sysctlattach(struct mwl_softc *); static void mwl_announce(struct mwl_softc *); SYSCTL_NODE(_hw, OID_AUTO, mwl, CTLFLAG_RD, 0, "Marvell driver parameters"); static int mwl_rxdesc = MWL_RXDESC; /* # rx desc's to allocate */ SYSCTL_INT(_hw_mwl, OID_AUTO, rxdesc, CTLFLAG_RW, &mwl_rxdesc, 0, "rx descriptors allocated"); static int mwl_rxbuf = MWL_RXBUF; /* # rx buffers to allocate */ SYSCTL_INT(_hw_mwl, OID_AUTO, rxbuf, CTLFLAG_RWTUN, &mwl_rxbuf, 0, "rx buffers allocated"); static int mwl_txbuf = MWL_TXBUF; /* # tx buffers to allocate */ SYSCTL_INT(_hw_mwl, OID_AUTO, txbuf, CTLFLAG_RWTUN, &mwl_txbuf, 0, "tx buffers allocated"); static int mwl_txcoalesce = 8; /* # tx packets to q before poking f/w*/ SYSCTL_INT(_hw_mwl, OID_AUTO, txcoalesce, CTLFLAG_RWTUN, &mwl_txcoalesce, 0, "tx buffers to send at once"); static int mwl_rxquota = MWL_RXBUF; /* # max buffers to process */ SYSCTL_INT(_hw_mwl, OID_AUTO, rxquota, CTLFLAG_RWTUN, &mwl_rxquota, 0, "max rx buffers to process per interrupt"); static int mwl_rxdmalow = 3; /* # min buffers for wakeup */ SYSCTL_INT(_hw_mwl, OID_AUTO, rxdmalow, CTLFLAG_RWTUN, &mwl_rxdmalow, 0, "min free rx buffers before restarting traffic"); #ifdef MWL_DEBUG static int mwl_debug = 0; SYSCTL_INT(_hw_mwl, OID_AUTO, debug, CTLFLAG_RWTUN, &mwl_debug, 0, "control debugging printfs"); enum { MWL_DEBUG_XMIT = 0x00000001, /* basic xmit operation */ MWL_DEBUG_XMIT_DESC = 0x00000002, /* xmit descriptors */ MWL_DEBUG_RECV = 0x00000004, /* basic recv operation */ MWL_DEBUG_RECV_DESC = 0x00000008, /* recv descriptors */ MWL_DEBUG_RESET = 0x00000010, /* reset processing */ MWL_DEBUG_BEACON = 0x00000020, /* beacon handling */ MWL_DEBUG_INTR = 0x00000040, /* ISR */ MWL_DEBUG_TX_PROC = 0x00000080, /* tx ISR proc */ MWL_DEBUG_RX_PROC = 0x00000100, /* rx ISR proc */ MWL_DEBUG_KEYCACHE = 0x00000200, /* key cache management */ MWL_DEBUG_STATE = 0x00000400, /* 802.11 state transitions */ MWL_DEBUG_NODE = 0x00000800, /* node management */ MWL_DEBUG_RECV_ALL = 0x00001000, /* trace all frames (beacons) */ MWL_DEBUG_TSO = 0x00002000, /* TSO processing */ MWL_DEBUG_AMPDU = 0x00004000, /* BA stream handling */ MWL_DEBUG_ANY = 0xffffffff }; #define IS_BEACON(wh) \ ((wh->i_fc[0] & (IEEE80211_FC0_TYPE_MASK|IEEE80211_FC0_SUBTYPE_MASK)) == \ (IEEE80211_FC0_TYPE_MGT|IEEE80211_FC0_SUBTYPE_BEACON)) #define IFF_DUMPPKTS_RECV(sc, wh) \ ((sc->sc_debug & MWL_DEBUG_RECV) && \ ((sc->sc_debug & MWL_DEBUG_RECV_ALL) || !IS_BEACON(wh))) #define IFF_DUMPPKTS_XMIT(sc) \ (sc->sc_debug & MWL_DEBUG_XMIT) #define DPRINTF(sc, m, fmt, ...) do { \ if (sc->sc_debug & (m)) \ printf(fmt, __VA_ARGS__); \ } while (0) #define KEYPRINTF(sc, hk, mac) do { \ if (sc->sc_debug & MWL_DEBUG_KEYCACHE) \ mwl_keyprint(sc, __func__, hk, mac); \ } while (0) static void mwl_printrxbuf(const struct mwl_rxbuf *bf, u_int ix); static void mwl_printtxbuf(const struct mwl_txbuf *bf, u_int qnum, u_int ix); #else #define IFF_DUMPPKTS_RECV(sc, wh) 0 #define IFF_DUMPPKTS_XMIT(sc) 0 #define DPRINTF(sc, m, fmt, ...) do { (void )sc; } while (0) #define KEYPRINTF(sc, k, mac) do { (void )sc; } while (0) #endif static MALLOC_DEFINE(M_MWLDEV, "mwldev", "mwl driver dma buffers"); /* * Each packet has fixed front matter: a 2-byte length * of the payload, followed by a 4-address 802.11 header * (regardless of the actual header and always w/o any * QoS header). The payload then follows. */ struct mwltxrec { uint16_t fwlen; struct ieee80211_frame_addr4 wh; } __packed; /* * Read/Write shorthands for accesses to BAR 0. Note * that all BAR 1 operations are done in the "hal" and * there should be no reference to them here. */ #ifdef MWL_DEBUG static __inline uint32_t RD4(struct mwl_softc *sc, bus_size_t off) { return bus_space_read_4(sc->sc_io0t, sc->sc_io0h, off); } #endif static __inline void WR4(struct mwl_softc *sc, bus_size_t off, uint32_t val) { bus_space_write_4(sc->sc_io0t, sc->sc_io0h, off, val); } int mwl_attach(uint16_t devid, struct mwl_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct mwl_hal *mh; int error = 0; DPRINTF(sc, MWL_DEBUG_ANY, "%s: devid 0x%x\n", __func__, devid); /* * Setup the RX free list lock early, so it can be consistently * removed. */ MWL_RXFREE_INIT(sc); mh = mwl_hal_attach(sc->sc_dev, devid, sc->sc_io1h, sc->sc_io1t, sc->sc_dmat); if (mh == NULL) { device_printf(sc->sc_dev, "unable to attach HAL\n"); error = EIO; goto bad; } sc->sc_mh = mh; /* * Load firmware so we can get setup. We arbitrarily * pick station firmware; we'll re-load firmware as * needed so setting up the wrong mode isn't a big deal. */ if (mwl_hal_fwload(mh, NULL) != 0) { device_printf(sc->sc_dev, "unable to setup builtin firmware\n"); error = EIO; goto bad1; } if (mwl_hal_gethwspecs(mh, &sc->sc_hwspecs) != 0) { device_printf(sc->sc_dev, "unable to fetch h/w specs\n"); error = EIO; goto bad1; } error = mwl_getchannels(sc); if (error != 0) goto bad1; sc->sc_txantenna = 0; /* h/w default */ sc->sc_rxantenna = 0; /* h/w default */ sc->sc_invalid = 0; /* ready to go, enable int handling */ sc->sc_ageinterval = MWL_AGEINTERVAL; /* * Allocate tx+rx descriptors and populate the lists. * We immediately push the information to the firmware * as otherwise it gets upset. */ error = mwl_dma_setup(sc); if (error != 0) { device_printf(sc->sc_dev, "failed to setup descriptors: %d\n", error); goto bad1; } error = mwl_setupdma(sc); /* push to firmware */ if (error != 0) /* NB: mwl_setupdma prints msg */ goto bad1; callout_init(&sc->sc_timer, 1); callout_init_mtx(&sc->sc_watchdog, &sc->sc_mtx, 0); mbufq_init(&sc->sc_snd, ifqmaxlen); sc->sc_tq = taskqueue_create("mwl_taskq", M_NOWAIT, taskqueue_thread_enqueue, &sc->sc_tq); taskqueue_start_threads(&sc->sc_tq, 1, PI_NET, "%s taskq", device_get_nameunit(sc->sc_dev)); TASK_INIT(&sc->sc_rxtask, 0, mwl_rx_proc, sc); TASK_INIT(&sc->sc_radartask, 0, mwl_radar_proc, sc); TASK_INIT(&sc->sc_chanswitchtask, 0, mwl_chanswitch_proc, sc); TASK_INIT(&sc->sc_bawatchdogtask, 0, mwl_bawatchdog_proc, sc); /* NB: insure BK queue is the lowest priority h/w queue */ if (!mwl_tx_setup(sc, WME_AC_BK, MWL_WME_AC_BK)) { device_printf(sc->sc_dev, "unable to setup xmit queue for %s traffic!\n", ieee80211_wme_acnames[WME_AC_BK]); error = EIO; goto bad2; } if (!mwl_tx_setup(sc, WME_AC_BE, MWL_WME_AC_BE) || !mwl_tx_setup(sc, WME_AC_VI, MWL_WME_AC_VI) || !mwl_tx_setup(sc, WME_AC_VO, MWL_WME_AC_VO)) { /* * Not enough hardware tx queues to properly do WME; * just punt and assign them all to the same h/w queue. * We could do a better job of this if, for example, * we allocate queues when we switch from station to * AP mode. */ if (sc->sc_ac2q[WME_AC_VI] != NULL) mwl_tx_cleanupq(sc, sc->sc_ac2q[WME_AC_VI]); if (sc->sc_ac2q[WME_AC_BE] != NULL) mwl_tx_cleanupq(sc, sc->sc_ac2q[WME_AC_BE]); sc->sc_ac2q[WME_AC_BE] = sc->sc_ac2q[WME_AC_BK]; sc->sc_ac2q[WME_AC_VI] = sc->sc_ac2q[WME_AC_BK]; sc->sc_ac2q[WME_AC_VO] = sc->sc_ac2q[WME_AC_BK]; } TASK_INIT(&sc->sc_txtask, 0, mwl_tx_proc, sc); ic->ic_softc = sc; ic->ic_name = device_get_nameunit(sc->sc_dev); /* XXX not right but it's not used anywhere important */ ic->ic_phytype = IEEE80211_T_OFDM; ic->ic_opmode = IEEE80211_M_STA; ic->ic_caps = IEEE80211_C_STA /* station mode supported */ | IEEE80211_C_HOSTAP /* hostap mode */ | IEEE80211_C_MONITOR /* monitor mode */ #if 0 | IEEE80211_C_IBSS /* ibss, nee adhoc, mode */ | IEEE80211_C_AHDEMO /* adhoc demo mode */ #endif | IEEE80211_C_MBSS /* mesh point link mode */ | IEEE80211_C_WDS /* WDS supported */ | IEEE80211_C_SHPREAMBLE /* short preamble supported */ | IEEE80211_C_SHSLOT /* short slot time supported */ | IEEE80211_C_WME /* WME/WMM supported */ | IEEE80211_C_BURST /* xmit bursting supported */ | IEEE80211_C_WPA /* capable of WPA1+WPA2 */ | IEEE80211_C_BGSCAN /* capable of bg scanning */ | IEEE80211_C_TXFRAG /* handle tx frags */ | IEEE80211_C_TXPMGT /* capable of txpow mgt */ | IEEE80211_C_DFS /* DFS supported */ ; ic->ic_htcaps = IEEE80211_HTCAP_SMPS_ENA /* SM PS mode enabled */ | IEEE80211_HTCAP_CHWIDTH40 /* 40MHz channel width */ | IEEE80211_HTCAP_SHORTGI20 /* short GI in 20MHz */ | IEEE80211_HTCAP_SHORTGI40 /* short GI in 40MHz */ | IEEE80211_HTCAP_RXSTBC_2STREAM/* 1-2 spatial streams */ #if MWL_AGGR_SIZE == 7935 | IEEE80211_HTCAP_MAXAMSDU_7935 /* max A-MSDU length */ #else | IEEE80211_HTCAP_MAXAMSDU_3839 /* max A-MSDU length */ #endif #if 0 | IEEE80211_HTCAP_PSMP /* PSMP supported */ | IEEE80211_HTCAP_40INTOLERANT /* 40MHz intolerant */ #endif /* s/w capabilities */ | IEEE80211_HTC_HT /* HT operation */ | IEEE80211_HTC_AMPDU /* tx A-MPDU */ | IEEE80211_HTC_AMSDU /* tx A-MSDU */ | IEEE80211_HTC_SMPS /* SMPS available */ ; /* * Mark h/w crypto support. * XXX no way to query h/w support. */ ic->ic_cryptocaps |= IEEE80211_CRYPTO_WEP | IEEE80211_CRYPTO_AES_CCM | IEEE80211_CRYPTO_TKIP | IEEE80211_CRYPTO_TKIPMIC ; /* * Transmit requires space in the packet for a special * format transmit record and optional padding between * this record and the payload. Ask the net80211 layer * to arrange this when encapsulating packets so we can * add it efficiently. */ ic->ic_headroom = sizeof(struct mwltxrec) - sizeof(struct ieee80211_frame); IEEE80211_ADDR_COPY(ic->ic_macaddr, sc->sc_hwspecs.macAddr); /* call MI attach routine. */ ieee80211_ifattach(ic); ic->ic_setregdomain = mwl_setregdomain; ic->ic_getradiocaps = mwl_getradiocaps; /* override default methods */ ic->ic_raw_xmit = mwl_raw_xmit; ic->ic_newassoc = mwl_newassoc; ic->ic_updateslot = mwl_updateslot; ic->ic_update_mcast = mwl_update_mcast; ic->ic_update_promisc = mwl_update_promisc; ic->ic_wme.wme_update = mwl_wme_update; ic->ic_transmit = mwl_transmit; ic->ic_ioctl = mwl_ioctl; ic->ic_parent = mwl_parent; ic->ic_node_alloc = mwl_node_alloc; sc->sc_node_cleanup = ic->ic_node_cleanup; ic->ic_node_cleanup = mwl_node_cleanup; sc->sc_node_drain = ic->ic_node_drain; ic->ic_node_drain = mwl_node_drain; ic->ic_node_getsignal = mwl_node_getsignal; ic->ic_node_getmimoinfo = mwl_node_getmimoinfo; ic->ic_scan_start = mwl_scan_start; ic->ic_scan_end = mwl_scan_end; ic->ic_set_channel = mwl_set_channel; sc->sc_recv_action = ic->ic_recv_action; ic->ic_recv_action = mwl_recv_action; sc->sc_addba_request = ic->ic_addba_request; ic->ic_addba_request = mwl_addba_request; sc->sc_addba_response = ic->ic_addba_response; ic->ic_addba_response = mwl_addba_response; sc->sc_addba_stop = ic->ic_addba_stop; ic->ic_addba_stop = mwl_addba_stop; ic->ic_vap_create = mwl_vap_create; ic->ic_vap_delete = mwl_vap_delete; ieee80211_radiotap_attach(ic, &sc->sc_tx_th.wt_ihdr, sizeof(sc->sc_tx_th), MWL_TX_RADIOTAP_PRESENT, &sc->sc_rx_th.wr_ihdr, sizeof(sc->sc_rx_th), MWL_RX_RADIOTAP_PRESENT); /* * Setup dynamic sysctl's now that country code and * regdomain are available from the hal. */ mwl_sysctlattach(sc); if (bootverbose) ieee80211_announce(ic); mwl_announce(sc); return 0; bad2: mwl_dma_cleanup(sc); bad1: mwl_hal_detach(mh); bad: MWL_RXFREE_DESTROY(sc); sc->sc_invalid = 1; return error; } int mwl_detach(struct mwl_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; MWL_LOCK(sc); mwl_stop(sc); MWL_UNLOCK(sc); /* * NB: the order of these is important: * o call the 802.11 layer before detaching the hal to * insure callbacks into the driver to delete global * key cache entries can be handled * o reclaim the tx queue data structures after calling * the 802.11 layer as we'll get called back to reclaim * node state and potentially want to use them * o to cleanup the tx queues the hal is called, so detach * it last * Other than that, it's straightforward... */ ieee80211_ifdetach(ic); callout_drain(&sc->sc_watchdog); mwl_dma_cleanup(sc); MWL_RXFREE_DESTROY(sc); mwl_tx_cleanup(sc); mwl_hal_detach(sc->sc_mh); mbufq_drain(&sc->sc_snd); return 0; } /* * MAC address handling for multiple BSS on the same radio. * The first vap uses the MAC address from the EEPROM. For * subsequent vap's we set the U/L bit (bit 1) in the MAC * address and use the next six bits as an index. */ static void assign_address(struct mwl_softc *sc, uint8_t mac[IEEE80211_ADDR_LEN], int clone) { int i; if (clone && mwl_hal_ismbsscapable(sc->sc_mh)) { /* NB: we only do this if h/w supports multiple bssid */ for (i = 0; i < 32; i++) if ((sc->sc_bssidmask & (1<sc_bssidmask |= 1<sc_nbssid0++; } static void reclaim_address(struct mwl_softc *sc, const uint8_t mac[IEEE80211_ADDR_LEN]) { int i = mac[0] >> 2; if (i != 0 || --sc->sc_nbssid0 == 0) sc->sc_bssidmask &= ~(1<ic_softc; struct mwl_hal *mh = sc->sc_mh; struct ieee80211vap *vap, *apvap; struct mwl_hal_vap *hvap; struct mwl_vap *mvp; uint8_t mac[IEEE80211_ADDR_LEN]; IEEE80211_ADDR_COPY(mac, mac0); switch (opmode) { case IEEE80211_M_HOSTAP: case IEEE80211_M_MBSS: if ((flags & IEEE80211_CLONE_MACADDR) == 0) assign_address(sc, mac, flags & IEEE80211_CLONE_BSSID); hvap = mwl_hal_newvap(mh, MWL_HAL_AP, mac); if (hvap == NULL) { if ((flags & IEEE80211_CLONE_MACADDR) == 0) reclaim_address(sc, mac); return NULL; } break; case IEEE80211_M_STA: if ((flags & IEEE80211_CLONE_MACADDR) == 0) assign_address(sc, mac, flags & IEEE80211_CLONE_BSSID); hvap = mwl_hal_newvap(mh, MWL_HAL_STA, mac); if (hvap == NULL) { if ((flags & IEEE80211_CLONE_MACADDR) == 0) reclaim_address(sc, mac); return NULL; } /* no h/w beacon miss support; always use s/w */ flags |= IEEE80211_CLONE_NOBEACONS; break; case IEEE80211_M_WDS: hvap = NULL; /* NB: we use associated AP vap */ if (sc->sc_napvaps == 0) return NULL; /* no existing AP vap */ break; case IEEE80211_M_MONITOR: hvap = NULL; break; case IEEE80211_M_IBSS: case IEEE80211_M_AHDEMO: default: return NULL; } mvp = malloc(sizeof(struct mwl_vap), M_80211_VAP, M_WAITOK | M_ZERO); mvp->mv_hvap = hvap; if (opmode == IEEE80211_M_WDS) { /* * WDS vaps must have an associated AP vap; find one. * XXX not right. */ TAILQ_FOREACH(apvap, &ic->ic_vaps, iv_next) if (apvap->iv_opmode == IEEE80211_M_HOSTAP) { mvp->mv_ap_hvap = MWL_VAP(apvap)->mv_hvap; break; } KASSERT(mvp->mv_ap_hvap != NULL, ("no ap vap")); } vap = &mvp->mv_vap; ieee80211_vap_setup(ic, vap, name, unit, opmode, flags, bssid); /* override with driver methods */ mvp->mv_newstate = vap->iv_newstate; vap->iv_newstate = mwl_newstate; vap->iv_max_keyix = 0; /* XXX */ vap->iv_key_alloc = mwl_key_alloc; vap->iv_key_delete = mwl_key_delete; vap->iv_key_set = mwl_key_set; #ifdef MWL_HOST_PS_SUPPORT if (opmode == IEEE80211_M_HOSTAP || opmode == IEEE80211_M_MBSS) { vap->iv_update_ps = mwl_update_ps; mvp->mv_set_tim = vap->iv_set_tim; vap->iv_set_tim = mwl_set_tim; } #endif vap->iv_reset = mwl_reset; vap->iv_update_beacon = mwl_beacon_update; /* override max aid so sta's cannot assoc when we're out of sta id's */ vap->iv_max_aid = MWL_MAXSTAID; /* override default A-MPDU rx parameters */ vap->iv_ampdu_rxmax = IEEE80211_HTCAP_MAXRXAMPDU_64K; vap->iv_ampdu_density = IEEE80211_HTCAP_MPDUDENSITY_4; /* complete setup */ ieee80211_vap_attach(vap, mwl_media_change, ieee80211_media_status, mac); switch (vap->iv_opmode) { case IEEE80211_M_HOSTAP: case IEEE80211_M_MBSS: case IEEE80211_M_STA: /* * Setup sta db entry for local address. */ mwl_localstadb(vap); if (vap->iv_opmode == IEEE80211_M_HOSTAP || vap->iv_opmode == IEEE80211_M_MBSS) sc->sc_napvaps++; else sc->sc_nstavaps++; break; case IEEE80211_M_WDS: sc->sc_nwdsvaps++; break; default: break; } /* * Setup overall operating mode. */ if (sc->sc_napvaps) ic->ic_opmode = IEEE80211_M_HOSTAP; else if (sc->sc_nstavaps) ic->ic_opmode = IEEE80211_M_STA; else ic->ic_opmode = opmode; return vap; } static void mwl_vap_delete(struct ieee80211vap *vap) { struct mwl_vap *mvp = MWL_VAP(vap); struct mwl_softc *sc = vap->iv_ic->ic_softc; struct mwl_hal *mh = sc->sc_mh; struct mwl_hal_vap *hvap = mvp->mv_hvap; enum ieee80211_opmode opmode = vap->iv_opmode; /* XXX disallow ap vap delete if WDS still present */ if (sc->sc_running) { /* quiesce h/w while we remove the vap */ mwl_hal_intrset(mh, 0); /* disable interrupts */ } ieee80211_vap_detach(vap); switch (opmode) { case IEEE80211_M_HOSTAP: case IEEE80211_M_MBSS: case IEEE80211_M_STA: KASSERT(hvap != NULL, ("no hal vap handle")); (void) mwl_hal_delstation(hvap, vap->iv_myaddr); mwl_hal_delvap(hvap); if (opmode == IEEE80211_M_HOSTAP || opmode == IEEE80211_M_MBSS) sc->sc_napvaps--; else sc->sc_nstavaps--; /* XXX don't do it for IEEE80211_CLONE_MACADDR */ reclaim_address(sc, vap->iv_myaddr); break; case IEEE80211_M_WDS: sc->sc_nwdsvaps--; break; default: break; } mwl_cleartxq(sc, vap); free(mvp, M_80211_VAP); if (sc->sc_running) mwl_hal_intrset(mh, sc->sc_imask); } void mwl_suspend(struct mwl_softc *sc) { MWL_LOCK(sc); mwl_stop(sc); MWL_UNLOCK(sc); } void mwl_resume(struct mwl_softc *sc) { int error = EDOOFUS; MWL_LOCK(sc); if (sc->sc_ic.ic_nrunning > 0) error = mwl_init(sc); MWL_UNLOCK(sc); if (error == 0) ieee80211_start_all(&sc->sc_ic); /* start all vap's */ } void mwl_shutdown(void *arg) { struct mwl_softc *sc = arg; MWL_LOCK(sc); mwl_stop(sc); MWL_UNLOCK(sc); } /* * Interrupt handler. Most of the actual processing is deferred. */ void mwl_intr(void *arg) { struct mwl_softc *sc = arg; struct mwl_hal *mh = sc->sc_mh; uint32_t status; if (sc->sc_invalid) { /* * The hardware is not ready/present, don't touch anything. * Note this can happen early on if the IRQ is shared. */ DPRINTF(sc, MWL_DEBUG_ANY, "%s: invalid; ignored\n", __func__); return; } /* * Figure out the reason(s) for the interrupt. */ mwl_hal_getisr(mh, &status); /* NB: clears ISR too */ if (status == 0) /* must be a shared irq */ return; DPRINTF(sc, MWL_DEBUG_INTR, "%s: status 0x%x imask 0x%x\n", __func__, status, sc->sc_imask); if (status & MACREG_A2HRIC_BIT_RX_RDY) taskqueue_enqueue(sc->sc_tq, &sc->sc_rxtask); if (status & MACREG_A2HRIC_BIT_TX_DONE) taskqueue_enqueue(sc->sc_tq, &sc->sc_txtask); if (status & MACREG_A2HRIC_BIT_BA_WATCHDOG) taskqueue_enqueue(sc->sc_tq, &sc->sc_bawatchdogtask); if (status & MACREG_A2HRIC_BIT_OPC_DONE) mwl_hal_cmddone(mh); if (status & MACREG_A2HRIC_BIT_MAC_EVENT) { ; } if (status & MACREG_A2HRIC_BIT_ICV_ERROR) { /* TKIP ICV error */ sc->sc_stats.mst_rx_badtkipicv++; } if (status & MACREG_A2HRIC_BIT_QUEUE_EMPTY) { /* 11n aggregation queue is empty, re-fill */ ; } if (status & MACREG_A2HRIC_BIT_QUEUE_FULL) { ; } if (status & MACREG_A2HRIC_BIT_RADAR_DETECT) { /* radar detected, process event */ taskqueue_enqueue(sc->sc_tq, &sc->sc_radartask); } if (status & MACREG_A2HRIC_BIT_CHAN_SWITCH) { /* DFS channel switch */ taskqueue_enqueue(sc->sc_tq, &sc->sc_chanswitchtask); } } static void mwl_radar_proc(void *arg, int pending) { struct mwl_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; DPRINTF(sc, MWL_DEBUG_ANY, "%s: radar detected, pending %u\n", __func__, pending); sc->sc_stats.mst_radardetect++; /* XXX stop h/w BA streams? */ IEEE80211_LOCK(ic); ieee80211_dfs_notify_radar(ic, ic->ic_curchan); IEEE80211_UNLOCK(ic); } static void mwl_chanswitch_proc(void *arg, int pending) { struct mwl_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; DPRINTF(sc, MWL_DEBUG_ANY, "%s: channel switch notice, pending %u\n", __func__, pending); IEEE80211_LOCK(ic); sc->sc_csapending = 0; ieee80211_csa_completeswitch(ic); IEEE80211_UNLOCK(ic); } static void mwl_bawatchdog(const MWL_HAL_BASTREAM *sp) { struct ieee80211_node *ni = sp->data[0]; /* send DELBA and drop the stream */ ieee80211_ampdu_stop(ni, sp->data[1], IEEE80211_REASON_UNSPECIFIED); } static void mwl_bawatchdog_proc(void *arg, int pending) { struct mwl_softc *sc = arg; struct mwl_hal *mh = sc->sc_mh; const MWL_HAL_BASTREAM *sp; uint8_t bitmap, n; sc->sc_stats.mst_bawatchdog++; if (mwl_hal_getwatchdogbitmap(mh, &bitmap) != 0) { DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: could not get bitmap\n", __func__); sc->sc_stats.mst_bawatchdog_failed++; return; } DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: bitmap 0x%x\n", __func__, bitmap); if (bitmap == 0xff) { n = 0; /* disable all ba streams */ for (bitmap = 0; bitmap < 8; bitmap++) { sp = mwl_hal_bastream_lookup(mh, bitmap); if (sp != NULL) { mwl_bawatchdog(sp); n++; } } if (n == 0) { DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: no BA streams found\n", __func__); sc->sc_stats.mst_bawatchdog_empty++; } } else if (bitmap != 0xaa) { /* disable a single ba stream */ sp = mwl_hal_bastream_lookup(mh, bitmap); if (sp != NULL) { mwl_bawatchdog(sp); } else { DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: no BA stream %d\n", __func__, bitmap); sc->sc_stats.mst_bawatchdog_notfound++; } } } /* * Convert net80211 channel to a HAL channel. */ static void mwl_mapchan(MWL_HAL_CHANNEL *hc, const struct ieee80211_channel *chan) { hc->channel = chan->ic_ieee; *(uint32_t *)&hc->channelFlags = 0; if (IEEE80211_IS_CHAN_2GHZ(chan)) hc->channelFlags.FreqBand = MWL_FREQ_BAND_2DOT4GHZ; else if (IEEE80211_IS_CHAN_5GHZ(chan)) hc->channelFlags.FreqBand = MWL_FREQ_BAND_5GHZ; if (IEEE80211_IS_CHAN_HT40(chan)) { hc->channelFlags.ChnlWidth = MWL_CH_40_MHz_WIDTH; if (IEEE80211_IS_CHAN_HT40U(chan)) hc->channelFlags.ExtChnlOffset = MWL_EXT_CH_ABOVE_CTRL_CH; else hc->channelFlags.ExtChnlOffset = MWL_EXT_CH_BELOW_CTRL_CH; } else hc->channelFlags.ChnlWidth = MWL_CH_20_MHz_WIDTH; /* XXX 10MHz channels */ } /* * Inform firmware of our tx/rx dma setup. The BAR 0 * writes below are for compatibility with older firmware. * For current firmware we send this information with a * cmd block via mwl_hal_sethwdma. */ static int mwl_setupdma(struct mwl_softc *sc) { int error, i; sc->sc_hwdma.rxDescRead = sc->sc_rxdma.dd_desc_paddr; WR4(sc, sc->sc_hwspecs.rxDescRead, sc->sc_hwdma.rxDescRead); WR4(sc, sc->sc_hwspecs.rxDescWrite, sc->sc_hwdma.rxDescRead); for (i = 0; i < MWL_NUM_TX_QUEUES-MWL_NUM_ACK_QUEUES; i++) { struct mwl_txq *txq = &sc->sc_txq[i]; sc->sc_hwdma.wcbBase[i] = txq->dma.dd_desc_paddr; WR4(sc, sc->sc_hwspecs.wcbBase[i], sc->sc_hwdma.wcbBase[i]); } sc->sc_hwdma.maxNumTxWcb = mwl_txbuf; sc->sc_hwdma.maxNumWCB = MWL_NUM_TX_QUEUES-MWL_NUM_ACK_QUEUES; error = mwl_hal_sethwdma(sc->sc_mh, &sc->sc_hwdma); if (error != 0) { device_printf(sc->sc_dev, "unable to setup tx/rx dma; hal status %u\n", error); /* XXX */ } return error; } /* * Inform firmware of tx rate parameters. * Called after a channel change. */ static int mwl_setcurchanrates(struct mwl_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; const struct ieee80211_rateset *rs; MWL_HAL_TXRATE rates; memset(&rates, 0, sizeof(rates)); rs = ieee80211_get_suprates(ic, ic->ic_curchan); /* rate used to send management frames */ rates.MgtRate = rs->rs_rates[0] & IEEE80211_RATE_VAL; /* rate used to send multicast frames */ rates.McastRate = rates.MgtRate; return mwl_hal_settxrate_auto(sc->sc_mh, &rates); } /* * Inform firmware of tx rate parameters. Called whenever * user-settable params change and after a channel change. */ static int mwl_setrates(struct ieee80211vap *vap) { struct mwl_vap *mvp = MWL_VAP(vap); struct ieee80211_node *ni = vap->iv_bss; const struct ieee80211_txparam *tp = ni->ni_txparms; MWL_HAL_TXRATE rates; KASSERT(vap->iv_state == IEEE80211_S_RUN, ("state %d", vap->iv_state)); /* * Update the h/w rate map. * NB: 0x80 for MCS is passed through unchanged */ memset(&rates, 0, sizeof(rates)); /* rate used to send management frames */ rates.MgtRate = tp->mgmtrate; /* rate used to send multicast frames */ rates.McastRate = tp->mcastrate; /* while here calculate EAPOL fixed rate cookie */ mvp->mv_eapolformat = htole16(mwl_calcformat(rates.MgtRate, ni)); return mwl_hal_settxrate(mvp->mv_hvap, tp->ucastrate != IEEE80211_FIXED_RATE_NONE ? RATE_FIXED : RATE_AUTO, &rates); } /* * Setup a fixed xmit rate cookie for EAPOL frames. */ static void mwl_seteapolformat(struct ieee80211vap *vap) { struct mwl_vap *mvp = MWL_VAP(vap); struct ieee80211_node *ni = vap->iv_bss; enum ieee80211_phymode mode; uint8_t rate; KASSERT(vap->iv_state == IEEE80211_S_RUN, ("state %d", vap->iv_state)); mode = ieee80211_chan2mode(ni->ni_chan); /* * Use legacy rates when operating a mixed HT+non-HT bss. * NB: this may violate POLA for sta and wds vap's. */ if (mode == IEEE80211_MODE_11NA && (vap->iv_flags_ht & IEEE80211_FHT_PUREN) == 0) rate = vap->iv_txparms[IEEE80211_MODE_11A].mgmtrate; else if (mode == IEEE80211_MODE_11NG && (vap->iv_flags_ht & IEEE80211_FHT_PUREN) == 0) rate = vap->iv_txparms[IEEE80211_MODE_11G].mgmtrate; else rate = vap->iv_txparms[mode].mgmtrate; mvp->mv_eapolformat = htole16(mwl_calcformat(rate, ni)); } /* * Map SKU+country code to region code for radar bin'ing. */ static int mwl_map2regioncode(const struct ieee80211_regdomain *rd) { switch (rd->regdomain) { case SKU_FCC: case SKU_FCC3: return DOMAIN_CODE_FCC; case SKU_CA: return DOMAIN_CODE_IC; case SKU_ETSI: case SKU_ETSI2: case SKU_ETSI3: if (rd->country == CTRY_SPAIN) return DOMAIN_CODE_SPAIN; if (rd->country == CTRY_FRANCE || rd->country == CTRY_FRANCE2) return DOMAIN_CODE_FRANCE; /* XXX force 1.3.1 radar type */ return DOMAIN_CODE_ETSI_131; case SKU_JAPAN: return DOMAIN_CODE_MKK; case SKU_ROW: return DOMAIN_CODE_DGT; /* Taiwan */ case SKU_APAC: case SKU_APAC2: case SKU_APAC3: return DOMAIN_CODE_AUS; /* Australia */ } /* XXX KOREA? */ return DOMAIN_CODE_FCC; /* XXX? */ } static int mwl_hal_reset(struct mwl_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct mwl_hal *mh = sc->sc_mh; mwl_hal_setantenna(mh, WL_ANTENNATYPE_RX, sc->sc_rxantenna); mwl_hal_setantenna(mh, WL_ANTENNATYPE_TX, sc->sc_txantenna); mwl_hal_setradio(mh, 1, WL_AUTO_PREAMBLE); mwl_hal_setwmm(sc->sc_mh, (ic->ic_flags & IEEE80211_F_WME) != 0); mwl_chan_set(sc, ic->ic_curchan); /* NB: RF/RA performance tuned for indoor mode */ mwl_hal_setrateadaptmode(mh, 0); mwl_hal_setoptimizationlevel(mh, (ic->ic_flags & IEEE80211_F_BURST) != 0); mwl_hal_setregioncode(mh, mwl_map2regioncode(&ic->ic_regdomain)); mwl_hal_setaggampduratemode(mh, 1, 80); /* XXX */ mwl_hal_setcfend(mh, 0); /* XXX */ return 1; } static int mwl_init(struct mwl_softc *sc) { struct mwl_hal *mh = sc->sc_mh; int error = 0; MWL_LOCK_ASSERT(sc); /* * Stop anything previously setup. This is safe * whether this is the first time through or not. */ mwl_stop(sc); /* * Push vap-independent state to the firmware. */ if (!mwl_hal_reset(sc)) { device_printf(sc->sc_dev, "unable to reset hardware\n"); return EIO; } /* * Setup recv (once); transmit is already good to go. */ error = mwl_startrecv(sc); if (error != 0) { device_printf(sc->sc_dev, "unable to start recv logic\n"); return error; } /* * Enable interrupts. */ sc->sc_imask = MACREG_A2HRIC_BIT_RX_RDY | MACREG_A2HRIC_BIT_TX_DONE | MACREG_A2HRIC_BIT_OPC_DONE #if 0 | MACREG_A2HRIC_BIT_MAC_EVENT #endif | MACREG_A2HRIC_BIT_ICV_ERROR | MACREG_A2HRIC_BIT_RADAR_DETECT | MACREG_A2HRIC_BIT_CHAN_SWITCH #if 0 | MACREG_A2HRIC_BIT_QUEUE_EMPTY #endif | MACREG_A2HRIC_BIT_BA_WATCHDOG | MACREQ_A2HRIC_BIT_TX_ACK ; sc->sc_running = 1; mwl_hal_intrset(mh, sc->sc_imask); callout_reset(&sc->sc_watchdog, hz, mwl_watchdog, sc); return 0; } static void mwl_stop(struct mwl_softc *sc) { MWL_LOCK_ASSERT(sc); if (sc->sc_running) { /* * Shutdown the hardware and driver. */ sc->sc_running = 0; callout_stop(&sc->sc_watchdog); sc->sc_tx_timer = 0; mwl_draintxq(sc); } } static int mwl_reset_vap(struct ieee80211vap *vap, int state) { struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; struct ieee80211com *ic = vap->iv_ic; if (state == IEEE80211_S_RUN) mwl_setrates(vap); /* XXX off by 1? */ mwl_hal_setrtsthreshold(hvap, vap->iv_rtsthreshold); /* XXX auto? 20/40 split? */ mwl_hal_sethtgi(hvap, (vap->iv_flags_ht & (IEEE80211_FHT_SHORTGI20|IEEE80211_FHT_SHORTGI40)) ? 1 : 0); mwl_hal_setnprot(hvap, ic->ic_htprotmode == IEEE80211_PROT_NONE ? HTPROTECT_NONE : HTPROTECT_AUTO); /* XXX txpower cap */ /* re-setup beacons */ if (state == IEEE80211_S_RUN && (vap->iv_opmode == IEEE80211_M_HOSTAP || vap->iv_opmode == IEEE80211_M_MBSS || vap->iv_opmode == IEEE80211_M_IBSS)) { mwl_setapmode(vap, vap->iv_bss->ni_chan); mwl_hal_setnprotmode(hvap, MS(ic->ic_curhtprotmode, IEEE80211_HTINFO_OPMODE)); return mwl_beacon_setup(vap); } return 0; } /* * Reset the hardware w/o losing operational state. * Used to reset or reload hardware state for a vap. */ static int mwl_reset(struct ieee80211vap *vap, u_long cmd) { struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; int error = 0; if (hvap != NULL) { /* WDS, MONITOR, etc. */ struct ieee80211com *ic = vap->iv_ic; struct mwl_softc *sc = ic->ic_softc; struct mwl_hal *mh = sc->sc_mh; /* XXX handle DWDS sta vap change */ /* XXX do we need to disable interrupts? */ mwl_hal_intrset(mh, 0); /* disable interrupts */ error = mwl_reset_vap(vap, vap->iv_state); mwl_hal_intrset(mh, sc->sc_imask); } return error; } /* * Allocate a tx buffer for sending a frame. The * packet is assumed to have the WME AC stored so * we can use it to select the appropriate h/w queue. */ static struct mwl_txbuf * mwl_gettxbuf(struct mwl_softc *sc, struct mwl_txq *txq) { struct mwl_txbuf *bf; /* * Grab a TX buffer and associated resources. */ MWL_TXQ_LOCK(txq); bf = STAILQ_FIRST(&txq->free); if (bf != NULL) { STAILQ_REMOVE_HEAD(&txq->free, bf_list); txq->nfree--; } MWL_TXQ_UNLOCK(txq); if (bf == NULL) DPRINTF(sc, MWL_DEBUG_XMIT, "%s: out of xmit buffers on q %d\n", __func__, txq->qnum); return bf; } /* * Return a tx buffer to the queue it came from. Note there * are two cases because we must preserve the order of buffers * as it reflects the fixed order of descriptors in memory * (the firmware pre-fetches descriptors so we cannot reorder). */ static void mwl_puttxbuf_head(struct mwl_txq *txq, struct mwl_txbuf *bf) { bf->bf_m = NULL; bf->bf_node = NULL; MWL_TXQ_LOCK(txq); STAILQ_INSERT_HEAD(&txq->free, bf, bf_list); txq->nfree++; MWL_TXQ_UNLOCK(txq); } static void mwl_puttxbuf_tail(struct mwl_txq *txq, struct mwl_txbuf *bf) { bf->bf_m = NULL; bf->bf_node = NULL; MWL_TXQ_LOCK(txq); STAILQ_INSERT_TAIL(&txq->free, bf, bf_list); txq->nfree++; MWL_TXQ_UNLOCK(txq); } static int mwl_transmit(struct ieee80211com *ic, struct mbuf *m) { struct mwl_softc *sc = ic->ic_softc; int error; MWL_LOCK(sc); if (!sc->sc_running) { MWL_UNLOCK(sc); return (ENXIO); } error = mbufq_enqueue(&sc->sc_snd, m); if (error) { MWL_UNLOCK(sc); return (error); } mwl_start(sc); MWL_UNLOCK(sc); return (0); } static void mwl_start(struct mwl_softc *sc) { struct ieee80211_node *ni; struct mwl_txbuf *bf; struct mbuf *m; struct mwl_txq *txq = NULL; /* XXX silence gcc */ int nqueued; MWL_LOCK_ASSERT(sc); if (!sc->sc_running || sc->sc_invalid) return; nqueued = 0; while ((m = mbufq_dequeue(&sc->sc_snd)) != NULL) { /* * Grab the node for the destination. */ ni = (struct ieee80211_node *) m->m_pkthdr.rcvif; KASSERT(ni != NULL, ("no node")); m->m_pkthdr.rcvif = NULL; /* committed, clear ref */ /* * Grab a TX buffer and associated resources. * We honor the classification by the 802.11 layer. */ txq = sc->sc_ac2q[M_WME_GETAC(m)]; bf = mwl_gettxbuf(sc, txq); if (bf == NULL) { m_freem(m); ieee80211_free_node(ni); #ifdef MWL_TX_NODROP sc->sc_stats.mst_tx_qstop++; break; #else DPRINTF(sc, MWL_DEBUG_XMIT, "%s: tail drop on q %d\n", __func__, txq->qnum); sc->sc_stats.mst_tx_qdrop++; continue; #endif /* MWL_TX_NODROP */ } /* * Pass the frame to the h/w for transmission. */ if (mwl_tx_start(sc, ni, bf, m)) { if_inc_counter(ni->ni_vap->iv_ifp, IFCOUNTER_OERRORS, 1); mwl_puttxbuf_head(txq, bf); ieee80211_free_node(ni); continue; } nqueued++; if (nqueued >= mwl_txcoalesce) { /* * Poke the firmware to process queued frames; * see below about (lack of) locking. */ nqueued = 0; mwl_hal_txstart(sc->sc_mh, 0/*XXX*/); } } if (nqueued) { /* * NB: We don't need to lock against tx done because * this just prods the firmware to check the transmit * descriptors. The firmware will also start fetching * descriptors by itself if it notices new ones are * present when it goes to deliver a tx done interrupt * to the host. So if we race with tx done processing * it's ok. Delivering the kick here rather than in * mwl_tx_start is an optimization to avoid poking the * firmware for each packet. * * NB: the queue id isn't used so 0 is ok. */ mwl_hal_txstart(sc->sc_mh, 0/*XXX*/); } } static int mwl_raw_xmit(struct ieee80211_node *ni, struct mbuf *m, const struct ieee80211_bpf_params *params) { struct ieee80211com *ic = ni->ni_ic; struct mwl_softc *sc = ic->ic_softc; struct mwl_txbuf *bf; struct mwl_txq *txq; if (!sc->sc_running || sc->sc_invalid) { m_freem(m); return ENETDOWN; } /* * Grab a TX buffer and associated resources. * Note that we depend on the classification * by the 802.11 layer to get to the right h/w * queue. Management frames must ALWAYS go on * queue 1 but we cannot just force that here * because we may receive non-mgt frames. */ txq = sc->sc_ac2q[M_WME_GETAC(m)]; bf = mwl_gettxbuf(sc, txq); if (bf == NULL) { sc->sc_stats.mst_tx_qstop++; m_freem(m); return ENOBUFS; } /* * Pass the frame to the h/w for transmission. */ if (mwl_tx_start(sc, ni, bf, m)) { mwl_puttxbuf_head(txq, bf); return EIO; /* XXX */ } /* * NB: We don't need to lock against tx done because * this just prods the firmware to check the transmit * descriptors. The firmware will also start fetching * descriptors by itself if it notices new ones are * present when it goes to deliver a tx done interrupt * to the host. So if we race with tx done processing * it's ok. Delivering the kick here rather than in * mwl_tx_start is an optimization to avoid poking the * firmware for each packet. * * NB: the queue id isn't used so 0 is ok. */ mwl_hal_txstart(sc->sc_mh, 0/*XXX*/); return 0; } static int mwl_media_change(struct ifnet *ifp) { struct ieee80211vap *vap = ifp->if_softc; int error; error = ieee80211_media_change(ifp); /* NB: only the fixed rate can change and that doesn't need a reset */ if (error == ENETRESET) { mwl_setrates(vap); error = 0; } return error; } #ifdef MWL_DEBUG static void mwl_keyprint(struct mwl_softc *sc, const char *tag, const MWL_HAL_KEYVAL *hk, const uint8_t mac[IEEE80211_ADDR_LEN]) { static const char *ciphers[] = { "WEP", "TKIP", "AES-CCM", }; int i, n; printf("%s: [%u] %-7s", tag, hk->keyIndex, ciphers[hk->keyTypeId]); for (i = 0, n = hk->keyLen; i < n; i++) printf(" %02x", hk->key.aes[i]); printf(" mac %s", ether_sprintf(mac)); if (hk->keyTypeId == KEY_TYPE_ID_TKIP) { printf(" %s", "rxmic"); for (i = 0; i < sizeof(hk->key.tkip.rxMic); i++) printf(" %02x", hk->key.tkip.rxMic[i]); printf(" txmic"); for (i = 0; i < sizeof(hk->key.tkip.txMic); i++) printf(" %02x", hk->key.tkip.txMic[i]); } printf(" flags 0x%x\n", hk->keyFlags); } #endif /* * Allocate a key cache slot for a unicast key. The * firmware handles key allocation and every station is * guaranteed key space so we are always successful. */ static int mwl_key_alloc(struct ieee80211vap *vap, struct ieee80211_key *k, ieee80211_keyix *keyix, ieee80211_keyix *rxkeyix) { struct mwl_softc *sc = vap->iv_ic->ic_softc; if (k->wk_keyix != IEEE80211_KEYIX_NONE || (k->wk_flags & IEEE80211_KEY_GROUP)) { if (!(&vap->iv_nw_keys[0] <= k && k < &vap->iv_nw_keys[IEEE80211_WEP_NKID])) { /* should not happen */ DPRINTF(sc, MWL_DEBUG_KEYCACHE, "%s: bogus group key\n", __func__); return 0; } /* give the caller what they requested */ *keyix = *rxkeyix = k - vap->iv_nw_keys; } else { /* * Firmware handles key allocation. */ *keyix = *rxkeyix = 0; } return 1; } /* * Delete a key entry allocated by mwl_key_alloc. */ static int mwl_key_delete(struct ieee80211vap *vap, const struct ieee80211_key *k) { struct mwl_softc *sc = vap->iv_ic->ic_softc; struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; MWL_HAL_KEYVAL hk; const uint8_t bcastaddr[IEEE80211_ADDR_LEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; if (hvap == NULL) { if (vap->iv_opmode != IEEE80211_M_WDS) { /* XXX monitor mode? */ DPRINTF(sc, MWL_DEBUG_KEYCACHE, "%s: no hvap for opmode %d\n", __func__, vap->iv_opmode); return 0; } hvap = MWL_VAP(vap)->mv_ap_hvap; } DPRINTF(sc, MWL_DEBUG_KEYCACHE, "%s: delete key %u\n", __func__, k->wk_keyix); memset(&hk, 0, sizeof(hk)); hk.keyIndex = k->wk_keyix; switch (k->wk_cipher->ic_cipher) { case IEEE80211_CIPHER_WEP: hk.keyTypeId = KEY_TYPE_ID_WEP; break; case IEEE80211_CIPHER_TKIP: hk.keyTypeId = KEY_TYPE_ID_TKIP; break; case IEEE80211_CIPHER_AES_CCM: hk.keyTypeId = KEY_TYPE_ID_AES; break; default: /* XXX should not happen */ DPRINTF(sc, MWL_DEBUG_KEYCACHE, "%s: unknown cipher %d\n", __func__, k->wk_cipher->ic_cipher); return 0; } return (mwl_hal_keyreset(hvap, &hk, bcastaddr) == 0); /*XXX*/ } static __inline int addgroupflags(MWL_HAL_KEYVAL *hk, const struct ieee80211_key *k) { if (k->wk_flags & IEEE80211_KEY_GROUP) { if (k->wk_flags & IEEE80211_KEY_XMIT) hk->keyFlags |= KEY_FLAG_TXGROUPKEY; if (k->wk_flags & IEEE80211_KEY_RECV) hk->keyFlags |= KEY_FLAG_RXGROUPKEY; return 1; } else return 0; } /* * Set the key cache contents for the specified key. Key cache * slot(s) must already have been allocated by mwl_key_alloc. */ static int mwl_key_set(struct ieee80211vap *vap, const struct ieee80211_key *k) { return (_mwl_key_set(vap, k, k->wk_macaddr)); } static int _mwl_key_set(struct ieee80211vap *vap, const struct ieee80211_key *k, const uint8_t mac[IEEE80211_ADDR_LEN]) { #define GRPXMIT (IEEE80211_KEY_XMIT | IEEE80211_KEY_GROUP) /* NB: static wep keys are marked GROUP+tx/rx; GTK will be tx or rx */ #define IEEE80211_IS_STATICKEY(k) \ (((k)->wk_flags & (GRPXMIT|IEEE80211_KEY_RECV)) == \ (GRPXMIT|IEEE80211_KEY_RECV)) struct mwl_softc *sc = vap->iv_ic->ic_softc; struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; const struct ieee80211_cipher *cip = k->wk_cipher; const uint8_t *macaddr; MWL_HAL_KEYVAL hk; KASSERT((k->wk_flags & IEEE80211_KEY_SWCRYPT) == 0, ("s/w crypto set?")); if (hvap == NULL) { if (vap->iv_opmode != IEEE80211_M_WDS) { /* XXX monitor mode? */ DPRINTF(sc, MWL_DEBUG_KEYCACHE, "%s: no hvap for opmode %d\n", __func__, vap->iv_opmode); return 0; } hvap = MWL_VAP(vap)->mv_ap_hvap; } memset(&hk, 0, sizeof(hk)); hk.keyIndex = k->wk_keyix; switch (cip->ic_cipher) { case IEEE80211_CIPHER_WEP: hk.keyTypeId = KEY_TYPE_ID_WEP; hk.keyLen = k->wk_keylen; if (k->wk_keyix == vap->iv_def_txkey) hk.keyFlags = KEY_FLAG_WEP_TXKEY; if (!IEEE80211_IS_STATICKEY(k)) { /* NB: WEP is never used for the PTK */ (void) addgroupflags(&hk, k); } break; case IEEE80211_CIPHER_TKIP: hk.keyTypeId = KEY_TYPE_ID_TKIP; hk.key.tkip.tsc.high = (uint32_t)(k->wk_keytsc >> 16); hk.key.tkip.tsc.low = (uint16_t)k->wk_keytsc; hk.keyFlags = KEY_FLAG_TSC_VALID | KEY_FLAG_MICKEY_VALID; hk.keyLen = k->wk_keylen + IEEE80211_MICBUF_SIZE; if (!addgroupflags(&hk, k)) hk.keyFlags |= KEY_FLAG_PAIRWISE; break; case IEEE80211_CIPHER_AES_CCM: hk.keyTypeId = KEY_TYPE_ID_AES; hk.keyLen = k->wk_keylen; if (!addgroupflags(&hk, k)) hk.keyFlags |= KEY_FLAG_PAIRWISE; break; default: /* XXX should not happen */ DPRINTF(sc, MWL_DEBUG_KEYCACHE, "%s: unknown cipher %d\n", __func__, k->wk_cipher->ic_cipher); return 0; } /* * NB: tkip mic keys get copied here too; the layout * just happens to match that in ieee80211_key. */ memcpy(hk.key.aes, k->wk_key, hk.keyLen); /* * Locate address of sta db entry for writing key; * the convention unfortunately is somewhat different * than how net80211, hostapd, and wpa_supplicant think. */ if (vap->iv_opmode == IEEE80211_M_STA) { /* * NB: keys plumbed before the sta reaches AUTH state * will be discarded or written to the wrong sta db * entry because iv_bss is meaningless. This is ok * (right now) because we handle deferred plumbing of * WEP keys when the sta reaches AUTH state. */ macaddr = vap->iv_bss->ni_bssid; if ((k->wk_flags & IEEE80211_KEY_GROUP) == 0) { /* XXX plumb to local sta db too for static key wep */ mwl_hal_keyset(hvap, &hk, vap->iv_myaddr); } } else if (vap->iv_opmode == IEEE80211_M_WDS && vap->iv_state != IEEE80211_S_RUN) { /* * Prior to RUN state a WDS vap will not it's BSS node * setup so we will plumb the key to the wrong mac * address (it'll be our local address). Workaround * this for the moment by grabbing the correct address. */ macaddr = vap->iv_des_bssid; } else if ((k->wk_flags & GRPXMIT) == GRPXMIT) macaddr = vap->iv_myaddr; else macaddr = mac; KEYPRINTF(sc, &hk, macaddr); return (mwl_hal_keyset(hvap, &hk, macaddr) == 0); #undef IEEE80211_IS_STATICKEY #undef GRPXMIT } /* * Set the multicast filter contents into the hardware. * XXX f/w has no support; just defer to the os. */ static void mwl_setmcastfilter(struct mwl_softc *sc) { #if 0 struct ether_multi *enm; struct ether_multistep estep; uint8_t macs[IEEE80211_ADDR_LEN*MWL_HAL_MCAST_MAX];/* XXX stack use */ uint8_t *mp; int nmc; mp = macs; nmc = 0; ETHER_FIRST_MULTI(estep, &sc->sc_ec, enm); while (enm != NULL) { /* XXX Punt on ranges. */ if (nmc == MWL_HAL_MCAST_MAX || !IEEE80211_ADDR_EQ(enm->enm_addrlo, enm->enm_addrhi)) { ifp->if_flags |= IFF_ALLMULTI; return; } IEEE80211_ADDR_COPY(mp, enm->enm_addrlo); mp += IEEE80211_ADDR_LEN, nmc++; ETHER_NEXT_MULTI(estep, enm); } ifp->if_flags &= ~IFF_ALLMULTI; mwl_hal_setmcast(sc->sc_mh, nmc, macs); #endif } static int mwl_mode_init(struct mwl_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; struct mwl_hal *mh = sc->sc_mh; mwl_hal_setpromisc(mh, ic->ic_promisc > 0); mwl_setmcastfilter(sc); return 0; } /* * Callback from the 802.11 layer after a multicast state change. */ static void mwl_update_mcast(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; mwl_setmcastfilter(sc); } /* * Callback from the 802.11 layer after a promiscuous mode change. * Note this interface does not check the operating mode as this * is an internal callback and we are expected to honor the current * state (e.g. this is used for setting the interface in promiscuous * mode when operating in hostap mode to do ACS). */ static void mwl_update_promisc(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; mwl_hal_setpromisc(sc->sc_mh, ic->ic_promisc > 0); } /* * Callback from the 802.11 layer to update the slot time * based on the current setting. We use it to notify the * firmware of ERP changes and the f/w takes care of things * like slot time and preamble. */ static void mwl_updateslot(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; struct mwl_hal *mh = sc->sc_mh; int prot; /* NB: can be called early; suppress needless cmds */ if (!sc->sc_running) return; /* * Calculate the ERP flags. The firwmare will use * this to carry out the appropriate measures. */ prot = 0; if (IEEE80211_IS_CHAN_ANYG(ic->ic_curchan)) { if ((ic->ic_flags & IEEE80211_F_SHSLOT) == 0) prot |= IEEE80211_ERP_NON_ERP_PRESENT; if (ic->ic_flags & IEEE80211_F_USEPROT) prot |= IEEE80211_ERP_USE_PROTECTION; if (ic->ic_flags & IEEE80211_F_USEBARKER) prot |= IEEE80211_ERP_LONG_PREAMBLE; } DPRINTF(sc, MWL_DEBUG_RESET, "%s: chan %u MHz/flags 0x%x %s slot, (prot 0x%x ic_flags 0x%x)\n", __func__, ic->ic_curchan->ic_freq, ic->ic_curchan->ic_flags, ic->ic_flags & IEEE80211_F_SHSLOT ? "short" : "long", prot, ic->ic_flags); mwl_hal_setgprot(mh, prot); } /* * Setup the beacon frame. */ static int mwl_beacon_setup(struct ieee80211vap *vap) { struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; struct ieee80211_node *ni = vap->iv_bss; struct mbuf *m; m = ieee80211_beacon_alloc(ni); if (m == NULL) return ENOBUFS; mwl_hal_setbeacon(hvap, mtod(m, const void *), m->m_len); m_free(m); return 0; } /* * Update the beacon frame in response to a change. */ static void mwl_beacon_update(struct ieee80211vap *vap, int item) { struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; struct ieee80211com *ic = vap->iv_ic; KASSERT(hvap != NULL, ("no beacon")); switch (item) { case IEEE80211_BEACON_ERP: mwl_updateslot(ic); break; case IEEE80211_BEACON_HTINFO: mwl_hal_setnprotmode(hvap, MS(ic->ic_curhtprotmode, IEEE80211_HTINFO_OPMODE)); break; case IEEE80211_BEACON_CAPS: case IEEE80211_BEACON_WME: case IEEE80211_BEACON_APPIE: case IEEE80211_BEACON_CSA: break; case IEEE80211_BEACON_TIM: /* NB: firmware always forms TIM */ return; } /* XXX retain beacon frame and update */ mwl_beacon_setup(vap); } static void mwl_load_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { bus_addr_t *paddr = (bus_addr_t*) arg; KASSERT(error == 0, ("error %u on bus_dma callback", error)); *paddr = segs->ds_addr; } #ifdef MWL_HOST_PS_SUPPORT /* * Handle power save station occupancy changes. */ static void mwl_update_ps(struct ieee80211vap *vap, int nsta) { struct mwl_vap *mvp = MWL_VAP(vap); if (nsta == 0 || mvp->mv_last_ps_sta == 0) mwl_hal_setpowersave_bss(mvp->mv_hvap, nsta); mvp->mv_last_ps_sta = nsta; } /* * Handle associated station power save state changes. */ static int mwl_set_tim(struct ieee80211_node *ni, int set) { struct ieee80211vap *vap = ni->ni_vap; struct mwl_vap *mvp = MWL_VAP(vap); if (mvp->mv_set_tim(ni, set)) { /* NB: state change */ mwl_hal_setpowersave_sta(mvp->mv_hvap, IEEE80211_AID(ni->ni_associd), set); return 1; } else return 0; } #endif /* MWL_HOST_PS_SUPPORT */ static int mwl_desc_setup(struct mwl_softc *sc, const char *name, struct mwl_descdma *dd, int nbuf, size_t bufsize, int ndesc, size_t descsize) { uint8_t *ds; int error; DPRINTF(sc, MWL_DEBUG_RESET, "%s: %s DMA: %u bufs (%ju) %u desc/buf (%ju)\n", __func__, name, nbuf, (uintmax_t) bufsize, ndesc, (uintmax_t) descsize); dd->dd_name = name; dd->dd_desc_len = nbuf * ndesc * descsize; /* * Setup DMA descriptor area. */ error = bus_dma_tag_create(bus_get_dma_tag(sc->sc_dev), /* parent */ PAGE_SIZE, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ dd->dd_desc_len, /* maxsize */ 1, /* nsegments */ dd->dd_desc_len, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dd->dd_dmat); if (error != 0) { device_printf(sc->sc_dev, "cannot allocate %s DMA tag\n", dd->dd_name); return error; } /* allocate descriptors */ error = bus_dmamem_alloc(dd->dd_dmat, (void**) &dd->dd_desc, BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &dd->dd_dmamap); if (error != 0) { device_printf(sc->sc_dev, "unable to alloc memory for %u %s descriptors, " "error %u\n", nbuf * ndesc, dd->dd_name, error); goto fail1; } error = bus_dmamap_load(dd->dd_dmat, dd->dd_dmamap, dd->dd_desc, dd->dd_desc_len, mwl_load_cb, &dd->dd_desc_paddr, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->sc_dev, "unable to map %s descriptors, error %u\n", dd->dd_name, error); goto fail2; } ds = dd->dd_desc; memset(ds, 0, dd->dd_desc_len); DPRINTF(sc, MWL_DEBUG_RESET, "%s: %s DMA map: %p (%lu) -> 0x%jx (%lu)\n", __func__, dd->dd_name, ds, (u_long) dd->dd_desc_len, (uintmax_t) dd->dd_desc_paddr, /*XXX*/ (u_long) dd->dd_desc_len); return 0; fail2: bus_dmamem_free(dd->dd_dmat, dd->dd_desc, dd->dd_dmamap); fail1: bus_dma_tag_destroy(dd->dd_dmat); memset(dd, 0, sizeof(*dd)); return error; #undef DS2PHYS } static void mwl_desc_cleanup(struct mwl_softc *sc, struct mwl_descdma *dd) { bus_dmamap_unload(dd->dd_dmat, dd->dd_dmamap); bus_dmamem_free(dd->dd_dmat, dd->dd_desc, dd->dd_dmamap); bus_dma_tag_destroy(dd->dd_dmat); memset(dd, 0, sizeof(*dd)); } /* * Construct a tx q's free list. The order of entries on * the list must reflect the physical layout of tx descriptors * because the firmware pre-fetches descriptors. * * XXX might be better to use indices into the buffer array. */ static void mwl_txq_reset(struct mwl_softc *sc, struct mwl_txq *txq) { struct mwl_txbuf *bf; int i; bf = txq->dma.dd_bufptr; STAILQ_INIT(&txq->free); for (i = 0; i < mwl_txbuf; i++, bf++) STAILQ_INSERT_TAIL(&txq->free, bf, bf_list); txq->nfree = i; } #define DS2PHYS(_dd, _ds) \ ((_dd)->dd_desc_paddr + ((caddr_t)(_ds) - (caddr_t)(_dd)->dd_desc)) static int mwl_txdma_setup(struct mwl_softc *sc, struct mwl_txq *txq) { int error, bsize, i; struct mwl_txbuf *bf; struct mwl_txdesc *ds; error = mwl_desc_setup(sc, "tx", &txq->dma, mwl_txbuf, sizeof(struct mwl_txbuf), MWL_TXDESC, sizeof(struct mwl_txdesc)); if (error != 0) return error; /* allocate and setup tx buffers */ bsize = mwl_txbuf * sizeof(struct mwl_txbuf); bf = malloc(bsize, M_MWLDEV, M_NOWAIT | M_ZERO); if (bf == NULL) { device_printf(sc->sc_dev, "malloc of %u tx buffers failed\n", mwl_txbuf); return ENOMEM; } txq->dma.dd_bufptr = bf; ds = txq->dma.dd_desc; for (i = 0; i < mwl_txbuf; i++, bf++, ds += MWL_TXDESC) { bf->bf_desc = ds; bf->bf_daddr = DS2PHYS(&txq->dma, ds); error = bus_dmamap_create(sc->sc_dmat, BUS_DMA_NOWAIT, &bf->bf_dmamap); if (error != 0) { device_printf(sc->sc_dev, "unable to create dmamap for tx " "buffer %u, error %u\n", i, error); return error; } } mwl_txq_reset(sc, txq); return 0; } static void mwl_txdma_cleanup(struct mwl_softc *sc, struct mwl_txq *txq) { struct mwl_txbuf *bf; int i; bf = txq->dma.dd_bufptr; for (i = 0; i < mwl_txbuf; i++, bf++) { KASSERT(bf->bf_m == NULL, ("mbuf on free list")); KASSERT(bf->bf_node == NULL, ("node on free list")); if (bf->bf_dmamap != NULL) bus_dmamap_destroy(sc->sc_dmat, bf->bf_dmamap); } STAILQ_INIT(&txq->free); txq->nfree = 0; if (txq->dma.dd_bufptr != NULL) { free(txq->dma.dd_bufptr, M_MWLDEV); txq->dma.dd_bufptr = NULL; } if (txq->dma.dd_desc_len != 0) mwl_desc_cleanup(sc, &txq->dma); } static int mwl_rxdma_setup(struct mwl_softc *sc) { int error, jumbosize, bsize, i; struct mwl_rxbuf *bf; struct mwl_jumbo *rbuf; struct mwl_rxdesc *ds; caddr_t data; error = mwl_desc_setup(sc, "rx", &sc->sc_rxdma, mwl_rxdesc, sizeof(struct mwl_rxbuf), 1, sizeof(struct mwl_rxdesc)); if (error != 0) return error; /* * Receive is done to a private pool of jumbo buffers. * This allows us to attach to mbuf's and avoid re-mapping * memory on each rx we post. We allocate a large chunk * of memory and manage it in the driver. The mbuf free * callback method is used to reclaim frames after sending * them up the stack. By default we allocate 2x the number of * rx descriptors configured so we have some slop to hold * us while frames are processed. */ if (mwl_rxbuf < 2*mwl_rxdesc) { device_printf(sc->sc_dev, "too few rx dma buffers (%d); increasing to %d\n", mwl_rxbuf, 2*mwl_rxdesc); mwl_rxbuf = 2*mwl_rxdesc; } jumbosize = roundup(MWL_AGGR_SIZE, PAGE_SIZE); sc->sc_rxmemsize = mwl_rxbuf*jumbosize; error = bus_dma_tag_create(sc->sc_dmat, /* parent */ PAGE_SIZE, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR_32BIT, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sc->sc_rxmemsize, /* maxsize */ 1, /* nsegments */ sc->sc_rxmemsize, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &sc->sc_rxdmat); if (error != 0) { device_printf(sc->sc_dev, "could not create rx DMA tag\n"); return error; } error = bus_dmamem_alloc(sc->sc_rxdmat, (void**) &sc->sc_rxmem, BUS_DMA_NOWAIT | BUS_DMA_COHERENT, &sc->sc_rxmap); if (error != 0) { device_printf(sc->sc_dev, "could not alloc %ju bytes of rx DMA memory\n", (uintmax_t) sc->sc_rxmemsize); return error; } error = bus_dmamap_load(sc->sc_rxdmat, sc->sc_rxmap, sc->sc_rxmem, sc->sc_rxmemsize, mwl_load_cb, &sc->sc_rxmem_paddr, BUS_DMA_NOWAIT); if (error != 0) { device_printf(sc->sc_dev, "could not load rx DMA map\n"); return error; } /* * Allocate rx buffers and set them up. */ bsize = mwl_rxdesc * sizeof(struct mwl_rxbuf); bf = malloc(bsize, M_MWLDEV, M_NOWAIT | M_ZERO); if (bf == NULL) { device_printf(sc->sc_dev, "malloc of %u rx buffers failed\n", bsize); return error; } sc->sc_rxdma.dd_bufptr = bf; STAILQ_INIT(&sc->sc_rxbuf); ds = sc->sc_rxdma.dd_desc; for (i = 0; i < mwl_rxdesc; i++, bf++, ds++) { bf->bf_desc = ds; bf->bf_daddr = DS2PHYS(&sc->sc_rxdma, ds); /* pre-assign dma buffer */ bf->bf_data = ((uint8_t *)sc->sc_rxmem) + (i*jumbosize); /* NB: tail is intentional to preserve descriptor order */ STAILQ_INSERT_TAIL(&sc->sc_rxbuf, bf, bf_list); } /* * Place remainder of dma memory buffers on the free list. */ SLIST_INIT(&sc->sc_rxfree); for (; i < mwl_rxbuf; i++) { data = ((uint8_t *)sc->sc_rxmem) + (i*jumbosize); rbuf = MWL_JUMBO_DATA2BUF(data); SLIST_INSERT_HEAD(&sc->sc_rxfree, rbuf, next); sc->sc_nrxfree++; } return 0; } #undef DS2PHYS static void mwl_rxdma_cleanup(struct mwl_softc *sc) { if (sc->sc_rxmem_paddr != 0) { bus_dmamap_unload(sc->sc_rxdmat, sc->sc_rxmap); sc->sc_rxmem_paddr = 0; } if (sc->sc_rxmem != NULL) { bus_dmamem_free(sc->sc_rxdmat, sc->sc_rxmem, sc->sc_rxmap); sc->sc_rxmem = NULL; } if (sc->sc_rxdma.dd_bufptr != NULL) { free(sc->sc_rxdma.dd_bufptr, M_MWLDEV); sc->sc_rxdma.dd_bufptr = NULL; } if (sc->sc_rxdma.dd_desc_len != 0) mwl_desc_cleanup(sc, &sc->sc_rxdma); } static int mwl_dma_setup(struct mwl_softc *sc) { int error, i; error = mwl_rxdma_setup(sc); if (error != 0) { mwl_rxdma_cleanup(sc); return error; } for (i = 0; i < MWL_NUM_TX_QUEUES; i++) { error = mwl_txdma_setup(sc, &sc->sc_txq[i]); if (error != 0) { mwl_dma_cleanup(sc); return error; } } return 0; } static void mwl_dma_cleanup(struct mwl_softc *sc) { int i; for (i = 0; i < MWL_NUM_TX_QUEUES; i++) mwl_txdma_cleanup(sc, &sc->sc_txq[i]); mwl_rxdma_cleanup(sc); } static struct ieee80211_node * mwl_node_alloc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN]) { struct ieee80211com *ic = vap->iv_ic; struct mwl_softc *sc = ic->ic_softc; const size_t space = sizeof(struct mwl_node); struct mwl_node *mn; mn = malloc(space, M_80211_NODE, M_NOWAIT|M_ZERO); if (mn == NULL) { /* XXX stat+msg */ return NULL; } DPRINTF(sc, MWL_DEBUG_NODE, "%s: mn %p\n", __func__, mn); return &mn->mn_node; } static void mwl_node_cleanup(struct ieee80211_node *ni) { struct ieee80211com *ic = ni->ni_ic; struct mwl_softc *sc = ic->ic_softc; struct mwl_node *mn = MWL_NODE(ni); DPRINTF(sc, MWL_DEBUG_NODE, "%s: ni %p ic %p staid %d\n", __func__, ni, ni->ni_ic, mn->mn_staid); if (mn->mn_staid != 0) { struct ieee80211vap *vap = ni->ni_vap; if (mn->mn_hvap != NULL) { if (vap->iv_opmode == IEEE80211_M_STA) mwl_hal_delstation(mn->mn_hvap, vap->iv_myaddr); else mwl_hal_delstation(mn->mn_hvap, ni->ni_macaddr); } /* * NB: legacy WDS peer sta db entry is installed using * the associate ap's hvap; use it again to delete it. * XXX can vap be NULL? */ else if (vap->iv_opmode == IEEE80211_M_WDS && MWL_VAP(vap)->mv_ap_hvap != NULL) mwl_hal_delstation(MWL_VAP(vap)->mv_ap_hvap, ni->ni_macaddr); delstaid(sc, mn->mn_staid); mn->mn_staid = 0; } sc->sc_node_cleanup(ni); } /* * Reclaim rx dma buffers from packets sitting on the ampdu * reorder queue for a station. We replace buffers with a * system cluster (if available). */ static void mwl_ampdu_rxdma_reclaim(struct ieee80211_rx_ampdu *rap) { #if 0 int i, n, off; struct mbuf *m; void *cl; n = rap->rxa_qframes; for (i = 0; i < rap->rxa_wnd && n > 0; i++) { m = rap->rxa_m[i]; if (m == NULL) continue; n--; /* our dma buffers have a well-known free routine */ if ((m->m_flags & M_EXT) == 0 || m->m_ext.ext_free != mwl_ext_free) continue; /* * Try to allocate a cluster and move the data. */ off = m->m_data - m->m_ext.ext_buf; if (off + m->m_pkthdr.len > MCLBYTES) { /* XXX no AMSDU for now */ continue; } cl = pool_cache_get_paddr(&mclpool_cache, 0, &m->m_ext.ext_paddr); if (cl != NULL) { /* * Copy the existing data to the cluster, remove * the rx dma buffer, and attach the cluster in * its place. Note we preserve the offset to the * data so frames being bridged can still prepend * their headers without adding another mbuf. */ memcpy((caddr_t) cl + off, m->m_data, m->m_pkthdr.len); MEXTREMOVE(m); MEXTADD(m, cl, MCLBYTES, 0, NULL, &mclpool_cache); /* setup mbuf like _MCLGET does */ m->m_flags |= M_CLUSTER | M_EXT_RW; _MOWNERREF(m, M_EXT | M_CLUSTER); /* NB: m_data is clobbered by MEXTADDR, adjust */ m->m_data += off; } } #endif } /* * Callback to reclaim resources. We first let the * net80211 layer do it's thing, then if we are still * blocked by a lack of rx dma buffers we walk the ampdu * reorder q's to reclaim buffers by copying to a system * cluster. */ static void mwl_node_drain(struct ieee80211_node *ni) { struct ieee80211com *ic = ni->ni_ic; struct mwl_softc *sc = ic->ic_softc; struct mwl_node *mn = MWL_NODE(ni); DPRINTF(sc, MWL_DEBUG_NODE, "%s: ni %p vap %p staid %d\n", __func__, ni, ni->ni_vap, mn->mn_staid); /* NB: call up first to age out ampdu q's */ sc->sc_node_drain(ni); /* XXX better to not check low water mark? */ if (sc->sc_rxblocked && mn->mn_staid != 0 && (ni->ni_flags & IEEE80211_NODE_HT)) { uint8_t tid; /* * Walk the reorder q and reclaim rx dma buffers by copying * the packet contents into clusters. */ for (tid = 0; tid < WME_NUM_TID; tid++) { struct ieee80211_rx_ampdu *rap; rap = &ni->ni_rx_ampdu[tid]; if ((rap->rxa_flags & IEEE80211_AGGR_XCHGPEND) == 0) continue; if (rap->rxa_qframes) mwl_ampdu_rxdma_reclaim(rap); } } } static void mwl_node_getsignal(const struct ieee80211_node *ni, int8_t *rssi, int8_t *noise) { *rssi = ni->ni_ic->ic_node_getrssi(ni); #ifdef MWL_ANT_INFO_SUPPORT #if 0 /* XXX need to smooth data */ *noise = -MWL_NODE_CONST(ni)->mn_ai.nf; #else *noise = -95; /* XXX */ #endif #else *noise = -95; /* XXX */ #endif } /* * Convert Hardware per-antenna rssi info to common format: * Let a1, a2, a3 represent the amplitudes per chain * Let amax represent max[a1, a2, a3] * Rssi1_dBm = RSSI_dBm + 20*log10(a1/amax) * Rssi1_dBm = RSSI_dBm + 20*log10(a1) - 20*log10(amax) * We store a table that is 4*20*log10(idx) - the extra 4 is to store or * maintain some extra precision. * * Values are stored in .5 db format capped at 127. */ static void mwl_node_getmimoinfo(const struct ieee80211_node *ni, struct ieee80211_mimo_info *mi) { #define CVT(_dst, _src) do { \ (_dst) = rssi + ((logdbtbl[_src] - logdbtbl[rssi_max]) >> 2); \ (_dst) = (_dst) > 64 ? 127 : ((_dst) << 1); \ } while (0) static const int8_t logdbtbl[32] = { 0, 0, 24, 38, 48, 56, 62, 68, 72, 76, 80, 83, 86, 89, 92, 94, 96, 98, 100, 102, 104, 106, 107, 109, 110, 112, 113, 115, 116, 117, 118, 119 }; const struct mwl_node *mn = MWL_NODE_CONST(ni); uint8_t rssi = mn->mn_ai.rsvd1/2; /* XXX */ uint32_t rssi_max; rssi_max = mn->mn_ai.rssi_a; if (mn->mn_ai.rssi_b > rssi_max) rssi_max = mn->mn_ai.rssi_b; if (mn->mn_ai.rssi_c > rssi_max) rssi_max = mn->mn_ai.rssi_c; CVT(mi->rssi[0], mn->mn_ai.rssi_a); CVT(mi->rssi[1], mn->mn_ai.rssi_b); CVT(mi->rssi[2], mn->mn_ai.rssi_c); mi->noise[0] = mn->mn_ai.nf_a; mi->noise[1] = mn->mn_ai.nf_b; mi->noise[2] = mn->mn_ai.nf_c; #undef CVT } static __inline void * mwl_getrxdma(struct mwl_softc *sc) { struct mwl_jumbo *buf; void *data; /* * Allocate from jumbo pool. */ MWL_RXFREE_LOCK(sc); buf = SLIST_FIRST(&sc->sc_rxfree); if (buf == NULL) { DPRINTF(sc, MWL_DEBUG_ANY, "%s: out of rx dma buffers\n", __func__); sc->sc_stats.mst_rx_nodmabuf++; data = NULL; } else { SLIST_REMOVE_HEAD(&sc->sc_rxfree, next); sc->sc_nrxfree--; data = MWL_JUMBO_BUF2DATA(buf); } MWL_RXFREE_UNLOCK(sc); return data; } static __inline void mwl_putrxdma(struct mwl_softc *sc, void *data) { struct mwl_jumbo *buf; /* XXX bounds check data */ MWL_RXFREE_LOCK(sc); buf = MWL_JUMBO_DATA2BUF(data); SLIST_INSERT_HEAD(&sc->sc_rxfree, buf, next); sc->sc_nrxfree++; MWL_RXFREE_UNLOCK(sc); } static int mwl_rxbuf_init(struct mwl_softc *sc, struct mwl_rxbuf *bf) { struct mwl_rxdesc *ds; ds = bf->bf_desc; if (bf->bf_data == NULL) { bf->bf_data = mwl_getrxdma(sc); if (bf->bf_data == NULL) { /* mark descriptor to be skipped */ ds->RxControl = EAGLE_RXD_CTRL_OS_OWN; /* NB: don't need PREREAD */ MWL_RXDESC_SYNC(sc, ds, BUS_DMASYNC_PREWRITE); sc->sc_stats.mst_rxbuf_failed++; return ENOMEM; } } /* * NB: DMA buffer contents is known to be unmodified * so there's no need to flush the data cache. */ /* * Setup descriptor. */ ds->QosCtrl = 0; ds->RSSI = 0; ds->Status = EAGLE_RXD_STATUS_IDLE; ds->Channel = 0; ds->PktLen = htole16(MWL_AGGR_SIZE); ds->SQ2 = 0; ds->pPhysBuffData = htole32(MWL_JUMBO_DMA_ADDR(sc, bf->bf_data)); /* NB: don't touch pPhysNext, set once */ ds->RxControl = EAGLE_RXD_CTRL_DRIVER_OWN; MWL_RXDESC_SYNC(sc, ds, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return 0; } static void mwl_ext_free(struct mbuf *m, void *data, void *arg) { struct mwl_softc *sc = arg; /* XXX bounds check data */ mwl_putrxdma(sc, data); /* * If we were previously blocked by a lack of rx dma buffers * check if we now have enough to restart rx interrupt handling. * NB: we know we are called at splvm which is above splnet. */ if (sc->sc_rxblocked && sc->sc_nrxfree > mwl_rxdmalow) { sc->sc_rxblocked = 0; mwl_hal_intrset(sc->sc_mh, sc->sc_imask); } } struct mwl_frame_bar { u_int8_t i_fc[2]; u_int8_t i_dur[2]; u_int8_t i_ra[IEEE80211_ADDR_LEN]; u_int8_t i_ta[IEEE80211_ADDR_LEN]; /* ctl, seq, FCS */ } __packed; /* * Like ieee80211_anyhdrsize, but handles BAR frames * specially so the logic below to piece the 802.11 * header together works. */ static __inline int mwl_anyhdrsize(const void *data) { const struct ieee80211_frame *wh = data; if ((wh->i_fc[0]&IEEE80211_FC0_TYPE_MASK) == IEEE80211_FC0_TYPE_CTL) { switch (wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK) { case IEEE80211_FC0_SUBTYPE_CTS: case IEEE80211_FC0_SUBTYPE_ACK: return sizeof(struct ieee80211_frame_ack); case IEEE80211_FC0_SUBTYPE_BAR: return sizeof(struct mwl_frame_bar); } return sizeof(struct ieee80211_frame_min); } else return ieee80211_hdrsize(data); } static void mwl_handlemicerror(struct ieee80211com *ic, const uint8_t *data) { const struct ieee80211_frame *wh; struct ieee80211_node *ni; wh = (const struct ieee80211_frame *)(data + sizeof(uint16_t)); ni = ieee80211_find_rxnode(ic, (const struct ieee80211_frame_min *) wh); if (ni != NULL) { ieee80211_notify_michael_failure(ni->ni_vap, wh, 0); ieee80211_free_node(ni); } } /* * Convert hardware signal strength to rssi. The value * provided by the device has the noise floor added in; * we need to compensate for this but we don't have that * so we use a fixed value. * * The offset of 8 is good for both 2.4 and 5GHz. The LNA * offset is already set as part of the initial gain. This * will give at least +/- 3dB for 2.4GHz and +/- 5dB for 5GHz. */ static __inline int cvtrssi(uint8_t ssi) { int rssi = (int) ssi + 8; /* XXX hack guess until we have a real noise floor */ rssi = 2*(87 - rssi); /* NB: .5 dBm units */ return (rssi < 0 ? 0 : rssi > 127 ? 127 : rssi); } static void mwl_rx_proc(void *arg, int npending) { struct mwl_softc *sc = arg; struct ieee80211com *ic = &sc->sc_ic; struct mwl_rxbuf *bf; struct mwl_rxdesc *ds; struct mbuf *m; struct ieee80211_qosframe *wh; struct ieee80211_qosframe_addr4 *wh4; struct ieee80211_node *ni; struct mwl_node *mn; int off, len, hdrlen, pktlen, rssi, ntodo; uint8_t *data, status; void *newdata; int16_t nf; DPRINTF(sc, MWL_DEBUG_RX_PROC, "%s: pending %u rdptr 0x%x wrptr 0x%x\n", __func__, npending, RD4(sc, sc->sc_hwspecs.rxDescRead), RD4(sc, sc->sc_hwspecs.rxDescWrite)); nf = -96; /* XXX */ bf = sc->sc_rxnext; for (ntodo = mwl_rxquota; ntodo > 0; ntodo--) { if (bf == NULL) bf = STAILQ_FIRST(&sc->sc_rxbuf); ds = bf->bf_desc; data = bf->bf_data; if (data == NULL) { /* * If data allocation failed previously there * will be no buffer; try again to re-populate it. * Note the firmware will not advance to the next * descriptor with a dma buffer so we must mimic * this or we'll get out of sync. */ DPRINTF(sc, MWL_DEBUG_ANY, "%s: rx buf w/o dma memory\n", __func__); (void) mwl_rxbuf_init(sc, bf); sc->sc_stats.mst_rx_dmabufmissing++; break; } MWL_RXDESC_SYNC(sc, ds, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); if (ds->RxControl != EAGLE_RXD_CTRL_DMA_OWN) break; #ifdef MWL_DEBUG if (sc->sc_debug & MWL_DEBUG_RECV_DESC) mwl_printrxbuf(bf, 0); #endif status = ds->Status; if (status & EAGLE_RXD_STATUS_DECRYPT_ERR_MASK) { counter_u64_add(ic->ic_ierrors, 1); sc->sc_stats.mst_rx_crypto++; /* * NB: Check EAGLE_RXD_STATUS_GENERAL_DECRYPT_ERR * for backwards compatibility. */ if (status != EAGLE_RXD_STATUS_GENERAL_DECRYPT_ERR && (status & EAGLE_RXD_STATUS_TKIP_MIC_DECRYPT_ERR)) { /* * MIC error, notify upper layers. */ bus_dmamap_sync(sc->sc_rxdmat, sc->sc_rxmap, BUS_DMASYNC_POSTREAD); mwl_handlemicerror(ic, data); sc->sc_stats.mst_rx_tkipmic++; } /* XXX too painful to tap packets */ goto rx_next; } /* * Sync the data buffer. */ len = le16toh(ds->PktLen); bus_dmamap_sync(sc->sc_rxdmat, sc->sc_rxmap, BUS_DMASYNC_POSTREAD); /* * The 802.11 header is provided all or in part at the front; * use it to calculate the true size of the header that we'll * construct below. We use this to figure out where to copy * payload prior to constructing the header. */ hdrlen = mwl_anyhdrsize(data + sizeof(uint16_t)); off = sizeof(uint16_t) + sizeof(struct ieee80211_frame_addr4); /* calculate rssi early so we can re-use for each aggregate */ rssi = cvtrssi(ds->RSSI); pktlen = hdrlen + (len - off); /* * NB: we know our frame is at least as large as * IEEE80211_MIN_LEN because there is a 4-address * frame at the front. Hence there's no need to * vet the packet length. If the frame in fact * is too small it should be discarded at the * net80211 layer. */ /* * Attach dma buffer to an mbuf. We tried * doing this based on the packet size (i.e. * copying small packets) but it turns out to * be a net loss. The tradeoff might be system * dependent (cache architecture is important). */ MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) { DPRINTF(sc, MWL_DEBUG_ANY, "%s: no rx mbuf\n", __func__); sc->sc_stats.mst_rx_nombuf++; goto rx_next; } /* * Acquire the replacement dma buffer before * processing the frame. If we're out of dma * buffers we disable rx interrupts and wait * for the free pool to reach mlw_rxdmalow buffers * before starting to do work again. If the firmware * runs out of descriptors then it will toss frames * which is better than our doing it as that can * starve our processing. It is also important that * we always process rx'd frames in case they are * A-MPDU as otherwise the host's view of the BA * window may get out of sync with the firmware. */ newdata = mwl_getrxdma(sc); if (newdata == NULL) { /* NB: stat+msg in mwl_getrxdma */ m_free(m); /* disable RX interrupt and mark state */ mwl_hal_intrset(sc->sc_mh, sc->sc_imask &~ MACREG_A2HRIC_BIT_RX_RDY); sc->sc_rxblocked = 1; ieee80211_drain(ic); /* XXX check rxblocked and immediately start again? */ goto rx_stop; } bf->bf_data = newdata; /* * Attach the dma buffer to the mbuf; * mwl_rxbuf_init will re-setup the rx * descriptor using the replacement dma * buffer we just installed above. */ MEXTADD(m, data, MWL_AGGR_SIZE, mwl_ext_free, data, sc, 0, EXT_NET_DRV); m->m_data += off - hdrlen; m->m_pkthdr.len = m->m_len = pktlen; /* NB: dma buffer assumed read-only */ /* * Piece 802.11 header together. */ wh = mtod(m, struct ieee80211_qosframe *); /* NB: don't need to do this sometimes but ... */ /* XXX special case so we can memcpy after m_devget? */ ovbcopy(data + sizeof(uint16_t), wh, hdrlen); if (IEEE80211_QOS_HAS_SEQ(wh)) { if (IEEE80211_IS_DSTODS(wh)) { wh4 = mtod(m, struct ieee80211_qosframe_addr4*); *(uint16_t *)wh4->i_qos = ds->QosCtrl; } else { *(uint16_t *)wh->i_qos = ds->QosCtrl; } } /* * The f/w strips WEP header but doesn't clear * the WEP bit; mark the packet with M_WEP so * net80211 will treat the data as decrypted. * While here also clear the PWR_MGT bit since * power save is handled by the firmware and * passing this up will potentially cause the * upper layer to put a station in power save * (except when configured with MWL_HOST_PS_SUPPORT). */ if (wh->i_fc[1] & IEEE80211_FC1_PROTECTED) m->m_flags |= M_WEP; #ifdef MWL_HOST_PS_SUPPORT wh->i_fc[1] &= ~IEEE80211_FC1_PROTECTED; #else wh->i_fc[1] &= ~(IEEE80211_FC1_PROTECTED | IEEE80211_FC1_PWR_MGT); #endif if (ieee80211_radiotap_active(ic)) { struct mwl_rx_radiotap_header *tap = &sc->sc_rx_th; tap->wr_flags = 0; tap->wr_rate = ds->Rate; tap->wr_antsignal = rssi + nf; tap->wr_antnoise = nf; } if (IFF_DUMPPKTS_RECV(sc, wh)) { ieee80211_dump_pkt(ic, mtod(m, caddr_t), len, ds->Rate, rssi); } /* dispatch */ ni = ieee80211_find_rxnode(ic, (const struct ieee80211_frame_min *) wh); if (ni != NULL) { mn = MWL_NODE(ni); #ifdef MWL_ANT_INFO_SUPPORT mn->mn_ai.rssi_a = ds->ai.rssi_a; mn->mn_ai.rssi_b = ds->ai.rssi_b; mn->mn_ai.rssi_c = ds->ai.rssi_c; mn->mn_ai.rsvd1 = rssi; #endif /* tag AMPDU aggregates for reorder processing */ if (ni->ni_flags & IEEE80211_NODE_HT) m->m_flags |= M_AMPDU; (void) ieee80211_input(ni, m, rssi, nf); ieee80211_free_node(ni); } else (void) ieee80211_input_all(ic, m, rssi, nf); rx_next: /* NB: ignore ENOMEM so we process more descriptors */ (void) mwl_rxbuf_init(sc, bf); bf = STAILQ_NEXT(bf, bf_list); } rx_stop: sc->sc_rxnext = bf; if (mbufq_first(&sc->sc_snd) != NULL) { /* NB: kick fw; the tx thread may have been preempted */ mwl_hal_txstart(sc->sc_mh, 0); mwl_start(sc); } } static void mwl_txq_init(struct mwl_softc *sc, struct mwl_txq *txq, int qnum) { struct mwl_txbuf *bf, *bn; struct mwl_txdesc *ds; MWL_TXQ_LOCK_INIT(sc, txq); txq->qnum = qnum; txq->txpri = 0; /* XXX */ #if 0 /* NB: q setup by mwl_txdma_setup XXX */ STAILQ_INIT(&txq->free); #endif STAILQ_FOREACH(bf, &txq->free, bf_list) { bf->bf_txq = txq; ds = bf->bf_desc; bn = STAILQ_NEXT(bf, bf_list); if (bn == NULL) bn = STAILQ_FIRST(&txq->free); ds->pPhysNext = htole32(bn->bf_daddr); } STAILQ_INIT(&txq->active); } /* * Setup a hardware data transmit queue for the specified * access control. We record the mapping from ac's * to h/w queues for use by mwl_tx_start. */ static int mwl_tx_setup(struct mwl_softc *sc, int ac, int mvtype) { struct mwl_txq *txq; if (ac >= nitems(sc->sc_ac2q)) { device_printf(sc->sc_dev, "AC %u out of range, max %zu!\n", ac, nitems(sc->sc_ac2q)); return 0; } if (mvtype >= MWL_NUM_TX_QUEUES) { device_printf(sc->sc_dev, "mvtype %u out of range, max %u!\n", mvtype, MWL_NUM_TX_QUEUES); return 0; } txq = &sc->sc_txq[mvtype]; mwl_txq_init(sc, txq, mvtype); sc->sc_ac2q[ac] = txq; return 1; } /* * Update WME parameters for a transmit queue. */ static int mwl_txq_update(struct mwl_softc *sc, int ac) { #define MWL_EXPONENT_TO_VALUE(v) ((1<sc_ic; struct mwl_txq *txq = sc->sc_ac2q[ac]; struct wmeParams *wmep = &ic->ic_wme.wme_chanParams.cap_wmeParams[ac]; struct mwl_hal *mh = sc->sc_mh; int aifs, cwmin, cwmax, txoplim; aifs = wmep->wmep_aifsn; /* XXX in sta mode need to pass log values for cwmin/max */ cwmin = MWL_EXPONENT_TO_VALUE(wmep->wmep_logcwmin); cwmax = MWL_EXPONENT_TO_VALUE(wmep->wmep_logcwmax); txoplim = wmep->wmep_txopLimit; /* NB: units of 32us */ if (mwl_hal_setedcaparams(mh, txq->qnum, cwmin, cwmax, aifs, txoplim)) { device_printf(sc->sc_dev, "unable to update hardware queue " "parameters for %s traffic!\n", ieee80211_wme_acnames[ac]); return 0; } return 1; #undef MWL_EXPONENT_TO_VALUE } /* * Callback from the 802.11 layer to update WME parameters. */ static int mwl_wme_update(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; return !mwl_txq_update(sc, WME_AC_BE) || !mwl_txq_update(sc, WME_AC_BK) || !mwl_txq_update(sc, WME_AC_VI) || !mwl_txq_update(sc, WME_AC_VO) ? EIO : 0; } /* * Reclaim resources for a setup queue. */ static void mwl_tx_cleanupq(struct mwl_softc *sc, struct mwl_txq *txq) { /* XXX hal work? */ MWL_TXQ_LOCK_DESTROY(txq); } /* * Reclaim all tx queue resources. */ static void mwl_tx_cleanup(struct mwl_softc *sc) { int i; for (i = 0; i < MWL_NUM_TX_QUEUES; i++) mwl_tx_cleanupq(sc, &sc->sc_txq[i]); } static int mwl_tx_dmasetup(struct mwl_softc *sc, struct mwl_txbuf *bf, struct mbuf *m0) { struct mbuf *m; int error; /* * Load the DMA map so any coalescing is done. This * also calculates the number of descriptors we need. */ error = bus_dmamap_load_mbuf_sg(sc->sc_dmat, bf->bf_dmamap, m0, bf->bf_segs, &bf->bf_nseg, BUS_DMA_NOWAIT); if (error == EFBIG) { /* XXX packet requires too many descriptors */ bf->bf_nseg = MWL_TXDESC+1; } else if (error != 0) { sc->sc_stats.mst_tx_busdma++; m_freem(m0); return error; } /* * Discard null packets and check for packets that * require too many TX descriptors. We try to convert * the latter to a cluster. */ if (error == EFBIG) { /* too many desc's, linearize */ sc->sc_stats.mst_tx_linear++; #if MWL_TXDESC > 1 m = m_collapse(m0, M_NOWAIT, MWL_TXDESC); #else m = m_defrag(m0, M_NOWAIT); #endif if (m == NULL) { m_freem(m0); sc->sc_stats.mst_tx_nombuf++; return ENOMEM; } m0 = m; error = bus_dmamap_load_mbuf_sg(sc->sc_dmat, bf->bf_dmamap, m0, bf->bf_segs, &bf->bf_nseg, BUS_DMA_NOWAIT); if (error != 0) { sc->sc_stats.mst_tx_busdma++; m_freem(m0); return error; } KASSERT(bf->bf_nseg <= MWL_TXDESC, ("too many segments after defrag; nseg %u", bf->bf_nseg)); } else if (bf->bf_nseg == 0) { /* null packet, discard */ sc->sc_stats.mst_tx_nodata++; m_freem(m0); return EIO; } DPRINTF(sc, MWL_DEBUG_XMIT, "%s: m %p len %u\n", __func__, m0, m0->m_pkthdr.len); bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_PREWRITE); bf->bf_m = m0; return 0; } static __inline int mwl_cvtlegacyrate(int rate) { switch (rate) { case 2: return 0; case 4: return 1; case 11: return 2; case 22: return 3; case 44: return 4; case 12: return 5; case 18: return 6; case 24: return 7; case 36: return 8; case 48: return 9; case 72: return 10; case 96: return 11; case 108:return 12; } return 0; } /* * Calculate fixed tx rate information per client state; * this value is suitable for writing to the Format field * of a tx descriptor. */ static uint16_t mwl_calcformat(uint8_t rate, const struct ieee80211_node *ni) { uint16_t fmt; fmt = SM(3, EAGLE_TXD_ANTENNA) | (IEEE80211_IS_CHAN_HT40D(ni->ni_chan) ? EAGLE_TXD_EXTCHAN_LO : EAGLE_TXD_EXTCHAN_HI); if (rate & IEEE80211_RATE_MCS) { /* HT MCS */ fmt |= EAGLE_TXD_FORMAT_HT /* NB: 0x80 implicitly stripped from ucastrate */ | SM(rate, EAGLE_TXD_RATE); /* XXX short/long GI may be wrong; re-check */ if (IEEE80211_IS_CHAN_HT40(ni->ni_chan)) { fmt |= EAGLE_TXD_CHW_40 | (ni->ni_htcap & IEEE80211_HTCAP_SHORTGI40 ? EAGLE_TXD_GI_SHORT : EAGLE_TXD_GI_LONG); } else { fmt |= EAGLE_TXD_CHW_20 | (ni->ni_htcap & IEEE80211_HTCAP_SHORTGI20 ? EAGLE_TXD_GI_SHORT : EAGLE_TXD_GI_LONG); } } else { /* legacy rate */ fmt |= EAGLE_TXD_FORMAT_LEGACY | SM(mwl_cvtlegacyrate(rate), EAGLE_TXD_RATE) | EAGLE_TXD_CHW_20 /* XXX iv_flags & IEEE80211_F_SHPREAMBLE? */ | (ni->ni_capinfo & IEEE80211_CAPINFO_SHORT_PREAMBLE ? EAGLE_TXD_PREAMBLE_SHORT : EAGLE_TXD_PREAMBLE_LONG); } return fmt; } static int mwl_tx_start(struct mwl_softc *sc, struct ieee80211_node *ni, struct mwl_txbuf *bf, struct mbuf *m0) { struct ieee80211com *ic = &sc->sc_ic; struct ieee80211vap *vap = ni->ni_vap; int error, iswep, ismcast; int hdrlen, copyhdrlen, pktlen; struct mwl_txdesc *ds; struct mwl_txq *txq; struct ieee80211_frame *wh; struct mwltxrec *tr; struct mwl_node *mn; uint16_t qos; #if MWL_TXDESC > 1 int i; #endif wh = mtod(m0, struct ieee80211_frame *); iswep = wh->i_fc[1] & IEEE80211_FC1_PROTECTED; ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1); hdrlen = ieee80211_anyhdrsize(wh); copyhdrlen = hdrlen; pktlen = m0->m_pkthdr.len; if (IEEE80211_QOS_HAS_SEQ(wh)) { if (IEEE80211_IS_DSTODS(wh)) { qos = *(uint16_t *) (((struct ieee80211_qosframe_addr4 *) wh)->i_qos); copyhdrlen -= sizeof(qos); } else qos = *(uint16_t *) (((struct ieee80211_qosframe *) wh)->i_qos); } else qos = 0; if (iswep) { const struct ieee80211_cipher *cip; struct ieee80211_key *k; /* * Construct the 802.11 header+trailer for an encrypted * frame. The only reason this can fail is because of an * unknown or unsupported cipher/key type. * * NB: we do this even though the firmware will ignore * what we've done for WEP and TKIP as we need the * ExtIV filled in for CCMP and this also adjusts * the headers which simplifies our work below. */ k = ieee80211_crypto_encap(ni, m0); if (k == NULL) { /* * This can happen when the key is yanked after the * frame was queued. Just discard the frame; the * 802.11 layer counts failures and provides * debugging/diagnostics. */ m_freem(m0); return EIO; } /* * Adjust the packet length for the crypto additions * done during encap and any other bits that the f/w * will add later on. */ cip = k->wk_cipher; pktlen += cip->ic_header + cip->ic_miclen + cip->ic_trailer; /* packet header may have moved, reset our local pointer */ wh = mtod(m0, struct ieee80211_frame *); } if (ieee80211_radiotap_active_vap(vap)) { sc->sc_tx_th.wt_flags = 0; /* XXX */ if (iswep) sc->sc_tx_th.wt_flags |= IEEE80211_RADIOTAP_F_WEP; #if 0 sc->sc_tx_th.wt_rate = ds->DataRate; #endif sc->sc_tx_th.wt_txpower = ni->ni_txpower; sc->sc_tx_th.wt_antenna = sc->sc_txantenna; ieee80211_radiotap_tx(vap, m0); } /* * Copy up/down the 802.11 header; the firmware requires * we present a 2-byte payload length followed by a * 4-address header (w/o QoS), followed (optionally) by * any WEP/ExtIV header (but only filled in for CCMP). * We are assured the mbuf has sufficient headroom to * prepend in-place by the setup of ic_headroom in * mwl_attach. */ if (hdrlen < sizeof(struct mwltxrec)) { const int space = sizeof(struct mwltxrec) - hdrlen; if (M_LEADINGSPACE(m0) < space) { /* NB: should never happen */ device_printf(sc->sc_dev, "not enough headroom, need %d found %zd, " "m_flags 0x%x m_len %d\n", space, M_LEADINGSPACE(m0), m0->m_flags, m0->m_len); ieee80211_dump_pkt(ic, mtod(m0, const uint8_t *), m0->m_len, 0, -1); m_freem(m0); sc->sc_stats.mst_tx_noheadroom++; return EIO; } M_PREPEND(m0, space, M_NOWAIT); } tr = mtod(m0, struct mwltxrec *); if (wh != (struct ieee80211_frame *) &tr->wh) ovbcopy(wh, &tr->wh, hdrlen); /* * Note: the "firmware length" is actually the length * of the fully formed "802.11 payload". That is, it's * everything except for the 802.11 header. In particular * this includes all crypto material including the MIC! */ tr->fwlen = htole16(pktlen - hdrlen); /* * Load the DMA map so any coalescing is done. This * also calculates the number of descriptors we need. */ error = mwl_tx_dmasetup(sc, bf, m0); if (error != 0) { /* NB: stat collected in mwl_tx_dmasetup */ DPRINTF(sc, MWL_DEBUG_XMIT, "%s: unable to setup dma\n", __func__); return error; } bf->bf_node = ni; /* NB: held reference */ m0 = bf->bf_m; /* NB: may have changed */ tr = mtod(m0, struct mwltxrec *); wh = (struct ieee80211_frame *)&tr->wh; /* * Formulate tx descriptor. */ ds = bf->bf_desc; txq = bf->bf_txq; ds->QosCtrl = qos; /* NB: already little-endian */ #if MWL_TXDESC == 1 /* * NB: multiframes should be zero because the descriptors * are initialized to zero. This should handle the case * where the driver is built with MWL_TXDESC=1 but we are * using firmware with multi-segment support. */ ds->PktPtr = htole32(bf->bf_segs[0].ds_addr); ds->PktLen = htole16(bf->bf_segs[0].ds_len); #else ds->multiframes = htole32(bf->bf_nseg); ds->PktLen = htole16(m0->m_pkthdr.len); for (i = 0; i < bf->bf_nseg; i++) { ds->PktPtrArray[i] = htole32(bf->bf_segs[i].ds_addr); ds->PktLenArray[i] = htole16(bf->bf_segs[i].ds_len); } #endif /* NB: pPhysNext, DataRate, and SapPktInfo setup once, don't touch */ ds->Format = 0; ds->pad = 0; ds->ack_wcb_addr = 0; mn = MWL_NODE(ni); /* * Select transmit rate. */ switch (wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) { case IEEE80211_FC0_TYPE_MGT: sc->sc_stats.mst_tx_mgmt++; /* fall thru... */ case IEEE80211_FC0_TYPE_CTL: /* NB: assign to BE q to avoid bursting */ ds->TxPriority = MWL_WME_AC_BE; break; case IEEE80211_FC0_TYPE_DATA: if (!ismcast) { const struct ieee80211_txparam *tp = ni->ni_txparms; /* * EAPOL frames get forced to a fixed rate and w/o * aggregation; otherwise check for any fixed rate * for the client (may depend on association state). */ if (m0->m_flags & M_EAPOL) { const struct mwl_vap *mvp = MWL_VAP_CONST(vap); ds->Format = mvp->mv_eapolformat; ds->pad = htole16( EAGLE_TXD_FIXED_RATE | EAGLE_TXD_DONT_AGGR); } else if (tp->ucastrate != IEEE80211_FIXED_RATE_NONE) { /* XXX pre-calculate per node */ ds->Format = htole16( mwl_calcformat(tp->ucastrate, ni)); ds->pad = htole16(EAGLE_TXD_FIXED_RATE); } /* NB: EAPOL frames will never have qos set */ if (qos == 0) ds->TxPriority = txq->qnum; #if MWL_MAXBA > 3 else if (mwl_bastream_match(&mn->mn_ba[3], qos)) ds->TxPriority = mn->mn_ba[3].txq; #endif #if MWL_MAXBA > 2 else if (mwl_bastream_match(&mn->mn_ba[2], qos)) ds->TxPriority = mn->mn_ba[2].txq; #endif #if MWL_MAXBA > 1 else if (mwl_bastream_match(&mn->mn_ba[1], qos)) ds->TxPriority = mn->mn_ba[1].txq; #endif #if MWL_MAXBA > 0 else if (mwl_bastream_match(&mn->mn_ba[0], qos)) ds->TxPriority = mn->mn_ba[0].txq; #endif else ds->TxPriority = txq->qnum; } else ds->TxPriority = txq->qnum; break; default: device_printf(sc->sc_dev, "bogus frame type 0x%x (%s)\n", wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK, __func__); sc->sc_stats.mst_tx_badframetype++; m_freem(m0); return EIO; } if (IFF_DUMPPKTS_XMIT(sc)) ieee80211_dump_pkt(ic, mtod(m0, const uint8_t *)+sizeof(uint16_t), m0->m_len - sizeof(uint16_t), ds->DataRate, -1); MWL_TXQ_LOCK(txq); ds->Status = htole32(EAGLE_TXD_STATUS_FW_OWNED); STAILQ_INSERT_TAIL(&txq->active, bf, bf_list); MWL_TXDESC_SYNC(txq, ds, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); sc->sc_tx_timer = 5; MWL_TXQ_UNLOCK(txq); return 0; } static __inline int mwl_cvtlegacyrix(int rix) { static const int ieeerates[] = { 2, 4, 11, 22, 44, 12, 18, 24, 36, 48, 72, 96, 108 }; return (rix < nitems(ieeerates) ? ieeerates[rix] : 0); } /* * Process completed xmit descriptors from the specified queue. */ static int mwl_tx_processq(struct mwl_softc *sc, struct mwl_txq *txq) { #define EAGLE_TXD_STATUS_MCAST \ (EAGLE_TXD_STATUS_MULTICAST_TX | EAGLE_TXD_STATUS_BROADCAST_TX) struct ieee80211com *ic = &sc->sc_ic; struct mwl_txbuf *bf; struct mwl_txdesc *ds; struct ieee80211_node *ni; struct mwl_node *an; int nreaped; uint32_t status; DPRINTF(sc, MWL_DEBUG_TX_PROC, "%s: tx queue %u\n", __func__, txq->qnum); for (nreaped = 0;; nreaped++) { MWL_TXQ_LOCK(txq); bf = STAILQ_FIRST(&txq->active); if (bf == NULL) { MWL_TXQ_UNLOCK(txq); break; } ds = bf->bf_desc; MWL_TXDESC_SYNC(txq, ds, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); if (ds->Status & htole32(EAGLE_TXD_STATUS_FW_OWNED)) { MWL_TXQ_UNLOCK(txq); break; } STAILQ_REMOVE_HEAD(&txq->active, bf_list); MWL_TXQ_UNLOCK(txq); #ifdef MWL_DEBUG if (sc->sc_debug & MWL_DEBUG_XMIT_DESC) mwl_printtxbuf(bf, txq->qnum, nreaped); #endif ni = bf->bf_node; if (ni != NULL) { an = MWL_NODE(ni); status = le32toh(ds->Status); if (status & EAGLE_TXD_STATUS_OK) { uint16_t Format = le16toh(ds->Format); uint8_t txant = MS(Format, EAGLE_TXD_ANTENNA); sc->sc_stats.mst_ant_tx[txant]++; if (status & EAGLE_TXD_STATUS_OK_RETRY) sc->sc_stats.mst_tx_retries++; if (status & EAGLE_TXD_STATUS_OK_MORE_RETRY) sc->sc_stats.mst_tx_mretries++; if (txq->qnum >= MWL_WME_AC_VO) ic->ic_wme.wme_hipri_traffic++; ni->ni_txrate = MS(Format, EAGLE_TXD_RATE); if ((Format & EAGLE_TXD_FORMAT_HT) == 0) { ni->ni_txrate = mwl_cvtlegacyrix( ni->ni_txrate); } else ni->ni_txrate |= IEEE80211_RATE_MCS; sc->sc_stats.mst_tx_rate = ni->ni_txrate; } else { if (status & EAGLE_TXD_STATUS_FAILED_LINK_ERROR) sc->sc_stats.mst_tx_linkerror++; if (status & EAGLE_TXD_STATUS_FAILED_XRETRY) sc->sc_stats.mst_tx_xretries++; if (status & EAGLE_TXD_STATUS_FAILED_AGING) sc->sc_stats.mst_tx_aging++; if (bf->bf_m->m_flags & M_FF) sc->sc_stats.mst_ff_txerr++; } if (bf->bf_m->m_flags & M_TXCB) /* XXX strip fw len in case header inspected */ m_adj(bf->bf_m, sizeof(uint16_t)); ieee80211_tx_complete(ni, bf->bf_m, (status & EAGLE_TXD_STATUS_OK) == 0); } else m_freem(bf->bf_m); ds->Status = htole32(EAGLE_TXD_STATUS_IDLE); bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap); mwl_puttxbuf_tail(txq, bf); } return nreaped; #undef EAGLE_TXD_STATUS_MCAST } /* * Deferred processing of transmit interrupt; special-cased * for four hardware queues, 0-3. */ static void mwl_tx_proc(void *arg, int npending) { struct mwl_softc *sc = arg; int nreaped; /* * Process each active queue. */ nreaped = 0; if (!STAILQ_EMPTY(&sc->sc_txq[0].active)) nreaped += mwl_tx_processq(sc, &sc->sc_txq[0]); if (!STAILQ_EMPTY(&sc->sc_txq[1].active)) nreaped += mwl_tx_processq(sc, &sc->sc_txq[1]); if (!STAILQ_EMPTY(&sc->sc_txq[2].active)) nreaped += mwl_tx_processq(sc, &sc->sc_txq[2]); if (!STAILQ_EMPTY(&sc->sc_txq[3].active)) nreaped += mwl_tx_processq(sc, &sc->sc_txq[3]); if (nreaped != 0) { sc->sc_tx_timer = 0; if (mbufq_first(&sc->sc_snd) != NULL) { /* NB: kick fw; the tx thread may have been preempted */ mwl_hal_txstart(sc->sc_mh, 0); mwl_start(sc); } } } static void mwl_tx_draintxq(struct mwl_softc *sc, struct mwl_txq *txq) { struct ieee80211_node *ni; struct mwl_txbuf *bf; u_int ix; /* * NB: this assumes output has been stopped and * we do not need to block mwl_tx_tasklet */ for (ix = 0;; ix++) { MWL_TXQ_LOCK(txq); bf = STAILQ_FIRST(&txq->active); if (bf == NULL) { MWL_TXQ_UNLOCK(txq); break; } STAILQ_REMOVE_HEAD(&txq->active, bf_list); MWL_TXQ_UNLOCK(txq); #ifdef MWL_DEBUG if (sc->sc_debug & MWL_DEBUG_RESET) { struct ieee80211com *ic = &sc->sc_ic; const struct mwltxrec *tr = mtod(bf->bf_m, const struct mwltxrec *); mwl_printtxbuf(bf, txq->qnum, ix); ieee80211_dump_pkt(ic, (const uint8_t *)&tr->wh, bf->bf_m->m_len - sizeof(tr->fwlen), 0, -1); } #endif /* MWL_DEBUG */ bus_dmamap_unload(sc->sc_dmat, bf->bf_dmamap); ni = bf->bf_node; if (ni != NULL) { /* * Reclaim node reference. */ ieee80211_free_node(ni); } m_freem(bf->bf_m); mwl_puttxbuf_tail(txq, bf); } } /* * Drain the transmit queues and reclaim resources. */ static void mwl_draintxq(struct mwl_softc *sc) { int i; for (i = 0; i < MWL_NUM_TX_QUEUES; i++) mwl_tx_draintxq(sc, &sc->sc_txq[i]); sc->sc_tx_timer = 0; } #ifdef MWL_DIAGAPI /* * Reset the transmit queues to a pristine state after a fw download. */ static void mwl_resettxq(struct mwl_softc *sc) { int i; for (i = 0; i < MWL_NUM_TX_QUEUES; i++) mwl_txq_reset(sc, &sc->sc_txq[i]); } #endif /* MWL_DIAGAPI */ /* * Clear the transmit queues of any frames submitted for the * specified vap. This is done when the vap is deleted so we * don't potentially reference the vap after it is gone. * Note we cannot remove the frames; we only reclaim the node * reference. */ static void mwl_cleartxq(struct mwl_softc *sc, struct ieee80211vap *vap) { struct mwl_txq *txq; struct mwl_txbuf *bf; int i; for (i = 0; i < MWL_NUM_TX_QUEUES; i++) { txq = &sc->sc_txq[i]; MWL_TXQ_LOCK(txq); STAILQ_FOREACH(bf, &txq->active, bf_list) { struct ieee80211_node *ni = bf->bf_node; if (ni != NULL && ni->ni_vap == vap) { bf->bf_node = NULL; ieee80211_free_node(ni); } } MWL_TXQ_UNLOCK(txq); } } static int mwl_recv_action(struct ieee80211_node *ni, const struct ieee80211_frame *wh, const uint8_t *frm, const uint8_t *efrm) { struct mwl_softc *sc = ni->ni_ic->ic_softc; const struct ieee80211_action *ia; ia = (const struct ieee80211_action *) frm; if (ia->ia_category == IEEE80211_ACTION_CAT_HT && ia->ia_action == IEEE80211_ACTION_HT_MIMOPWRSAVE) { const struct ieee80211_action_ht_mimopowersave *mps = (const struct ieee80211_action_ht_mimopowersave *) ia; mwl_hal_setmimops(sc->sc_mh, ni->ni_macaddr, mps->am_control & IEEE80211_A_HT_MIMOPWRSAVE_ENA, MS(mps->am_control, IEEE80211_A_HT_MIMOPWRSAVE_MODE)); return 0; } else return sc->sc_recv_action(ni, wh, frm, efrm); } static int mwl_addba_request(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, int dialogtoken, int baparamset, int batimeout) { struct mwl_softc *sc = ni->ni_ic->ic_softc; struct ieee80211vap *vap = ni->ni_vap; struct mwl_node *mn = MWL_NODE(ni); struct mwl_bastate *bas; bas = tap->txa_private; if (bas == NULL) { const MWL_HAL_BASTREAM *sp; /* * Check for a free BA stream slot. */ #if MWL_MAXBA > 3 if (mn->mn_ba[3].bastream == NULL) bas = &mn->mn_ba[3]; else #endif #if MWL_MAXBA > 2 if (mn->mn_ba[2].bastream == NULL) bas = &mn->mn_ba[2]; else #endif #if MWL_MAXBA > 1 if (mn->mn_ba[1].bastream == NULL) bas = &mn->mn_ba[1]; else #endif #if MWL_MAXBA > 0 if (mn->mn_ba[0].bastream == NULL) bas = &mn->mn_ba[0]; else #endif { /* sta already has max BA streams */ /* XXX assign BA stream to highest priority tid */ DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: already has max bastreams\n", __func__); sc->sc_stats.mst_ampdu_reject++; return 0; } /* NB: no held reference to ni */ sp = mwl_hal_bastream_alloc(MWL_VAP(vap)->mv_hvap, (baparamset & IEEE80211_BAPS_POLICY_IMMEDIATE) != 0, ni->ni_macaddr, tap->txa_tid, ni->ni_htparam, ni, tap); if (sp == NULL) { /* * No available stream, return 0 so no * a-mpdu aggregation will be done. */ DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: no bastream available\n", __func__); sc->sc_stats.mst_ampdu_nostream++; return 0; } DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: alloc bastream %p\n", __func__, sp); /* NB: qos is left zero so we won't match in mwl_tx_start */ bas->bastream = sp; tap->txa_private = bas; } /* fetch current seq# from the firmware; if available */ if (mwl_hal_bastream_get_seqno(sc->sc_mh, bas->bastream, vap->iv_opmode == IEEE80211_M_STA ? vap->iv_myaddr : ni->ni_macaddr, &tap->txa_start) != 0) tap->txa_start = 0; return sc->sc_addba_request(ni, tap, dialogtoken, baparamset, batimeout); } static int mwl_addba_response(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap, int code, int baparamset, int batimeout) { struct mwl_softc *sc = ni->ni_ic->ic_softc; struct mwl_bastate *bas; bas = tap->txa_private; if (bas == NULL) { /* XXX should not happen */ DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: no BA stream allocated, TID %d\n", __func__, tap->txa_tid); sc->sc_stats.mst_addba_nostream++; return 0; } if (code == IEEE80211_STATUS_SUCCESS) { struct ieee80211vap *vap = ni->ni_vap; int bufsiz, error; /* * Tell the firmware to setup the BA stream; * we know resources are available because we * pre-allocated one before forming the request. */ bufsiz = MS(baparamset, IEEE80211_BAPS_BUFSIZ); if (bufsiz == 0) bufsiz = IEEE80211_AGGR_BAWMAX; error = mwl_hal_bastream_create(MWL_VAP(vap)->mv_hvap, bas->bastream, bufsiz, bufsiz, tap->txa_start); if (error != 0) { /* * Setup failed, return immediately so no a-mpdu * aggregation will be done. */ mwl_hal_bastream_destroy(sc->sc_mh, bas->bastream); mwl_bastream_free(bas); tap->txa_private = NULL; DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: create failed, error %d, bufsiz %d TID %d " "htparam 0x%x\n", __func__, error, bufsiz, tap->txa_tid, ni->ni_htparam); sc->sc_stats.mst_bacreate_failed++; return 0; } /* NB: cache txq to avoid ptr indirect */ mwl_bastream_setup(bas, tap->txa_tid, bas->bastream->txq); DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: bastream %p assigned to txq %d TID %d bufsiz %d " "htparam 0x%x\n", __func__, bas->bastream, bas->txq, tap->txa_tid, bufsiz, ni->ni_htparam); } else { /* * Other side NAK'd us; return the resources. */ DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: request failed with code %d, destroy bastream %p\n", __func__, code, bas->bastream); mwl_hal_bastream_destroy(sc->sc_mh, bas->bastream); mwl_bastream_free(bas); tap->txa_private = NULL; } /* NB: firmware sends BAR so we don't need to */ return sc->sc_addba_response(ni, tap, code, baparamset, batimeout); } static void mwl_addba_stop(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap) { struct mwl_softc *sc = ni->ni_ic->ic_softc; struct mwl_bastate *bas; bas = tap->txa_private; if (bas != NULL) { DPRINTF(sc, MWL_DEBUG_AMPDU, "%s: destroy bastream %p\n", __func__, bas->bastream); mwl_hal_bastream_destroy(sc->sc_mh, bas->bastream); mwl_bastream_free(bas); tap->txa_private = NULL; } sc->sc_addba_stop(ni, tap); } /* * Setup the rx data structures. This should only be * done once or we may get out of sync with the firmware. */ static int mwl_startrecv(struct mwl_softc *sc) { if (!sc->sc_recvsetup) { struct mwl_rxbuf *bf, *prev; struct mwl_rxdesc *ds; prev = NULL; STAILQ_FOREACH(bf, &sc->sc_rxbuf, bf_list) { int error = mwl_rxbuf_init(sc, bf); if (error != 0) { DPRINTF(sc, MWL_DEBUG_RECV, "%s: mwl_rxbuf_init failed %d\n", __func__, error); return error; } if (prev != NULL) { ds = prev->bf_desc; ds->pPhysNext = htole32(bf->bf_daddr); } prev = bf; } if (prev != NULL) { ds = prev->bf_desc; ds->pPhysNext = htole32(STAILQ_FIRST(&sc->sc_rxbuf)->bf_daddr); } sc->sc_recvsetup = 1; } mwl_mode_init(sc); /* set filters, etc. */ return 0; } static MWL_HAL_APMODE mwl_getapmode(const struct ieee80211vap *vap, struct ieee80211_channel *chan) { MWL_HAL_APMODE mode; if (IEEE80211_IS_CHAN_HT(chan)) { if (vap->iv_flags_ht & IEEE80211_FHT_PUREN) mode = AP_MODE_N_ONLY; else if (IEEE80211_IS_CHAN_5GHZ(chan)) mode = AP_MODE_AandN; else if (vap->iv_flags & IEEE80211_F_PUREG) mode = AP_MODE_GandN; else mode = AP_MODE_BandGandN; } else if (IEEE80211_IS_CHAN_ANYG(chan)) { if (vap->iv_flags & IEEE80211_F_PUREG) mode = AP_MODE_G_ONLY; else mode = AP_MODE_MIXED; } else if (IEEE80211_IS_CHAN_B(chan)) mode = AP_MODE_B_ONLY; else if (IEEE80211_IS_CHAN_A(chan)) mode = AP_MODE_A_ONLY; else mode = AP_MODE_MIXED; /* XXX should not happen? */ return mode; } static int mwl_setapmode(struct ieee80211vap *vap, struct ieee80211_channel *chan) { struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; return mwl_hal_setapmode(hvap, mwl_getapmode(vap, chan)); } /* * Set/change channels. */ static int mwl_chan_set(struct mwl_softc *sc, struct ieee80211_channel *chan) { struct mwl_hal *mh = sc->sc_mh; struct ieee80211com *ic = &sc->sc_ic; MWL_HAL_CHANNEL hchan; int maxtxpow; DPRINTF(sc, MWL_DEBUG_RESET, "%s: chan %u MHz/flags 0x%x\n", __func__, chan->ic_freq, chan->ic_flags); /* * Convert to a HAL channel description with * the flags constrained to reflect the current * operating mode. */ mwl_mapchan(&hchan, chan); mwl_hal_intrset(mh, 0); /* disable interrupts */ #if 0 mwl_draintxq(sc); /* clear pending tx frames */ #endif mwl_hal_setchannel(mh, &hchan); /* * Tx power is cap'd by the regulatory setting and * possibly a user-set limit. We pass the min of * these to the hal to apply them to the cal data * for this channel. * XXX min bound? */ maxtxpow = 2*chan->ic_maxregpower; if (maxtxpow > ic->ic_txpowlimit) maxtxpow = ic->ic_txpowlimit; mwl_hal_settxpower(mh, &hchan, maxtxpow / 2); /* NB: potentially change mcast/mgt rates */ mwl_setcurchanrates(sc); /* * Update internal state. */ sc->sc_tx_th.wt_chan_freq = htole16(chan->ic_freq); sc->sc_rx_th.wr_chan_freq = htole16(chan->ic_freq); if (IEEE80211_IS_CHAN_A(chan)) { sc->sc_tx_th.wt_chan_flags = htole16(IEEE80211_CHAN_A); sc->sc_rx_th.wr_chan_flags = htole16(IEEE80211_CHAN_A); } else if (IEEE80211_IS_CHAN_ANYG(chan)) { sc->sc_tx_th.wt_chan_flags = htole16(IEEE80211_CHAN_G); sc->sc_rx_th.wr_chan_flags = htole16(IEEE80211_CHAN_G); } else { sc->sc_tx_th.wt_chan_flags = htole16(IEEE80211_CHAN_B); sc->sc_rx_th.wr_chan_flags = htole16(IEEE80211_CHAN_B); } sc->sc_curchan = hchan; mwl_hal_intrset(mh, sc->sc_imask); return 0; } static void mwl_scan_start(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; DPRINTF(sc, MWL_DEBUG_STATE, "%s\n", __func__); } static void mwl_scan_end(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; DPRINTF(sc, MWL_DEBUG_STATE, "%s\n", __func__); } static void mwl_set_channel(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; (void) mwl_chan_set(sc, ic->ic_curchan); } /* * Handle a channel switch request. We inform the firmware * and mark the global state to suppress various actions. * NB: we issue only one request to the fw; we may be called * multiple times if there are multiple vap's. */ static void mwl_startcsa(struct ieee80211vap *vap) { struct ieee80211com *ic = vap->iv_ic; struct mwl_softc *sc = ic->ic_softc; MWL_HAL_CHANNEL hchan; if (sc->sc_csapending) return; mwl_mapchan(&hchan, ic->ic_csa_newchan); /* 1 =>'s quiet channel */ mwl_hal_setchannelswitchie(sc->sc_mh, &hchan, 1, ic->ic_csa_count); sc->sc_csapending = 1; } /* * Plumb any static WEP key for the station. This is * necessary as we must propagate the key from the * global key table of the vap to each sta db entry. */ static void mwl_setanywepkey(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN]) { if ((vap->iv_flags & (IEEE80211_F_PRIVACY|IEEE80211_F_WPA)) == IEEE80211_F_PRIVACY && vap->iv_def_txkey != IEEE80211_KEYIX_NONE && vap->iv_nw_keys[vap->iv_def_txkey].wk_keyix != IEEE80211_KEYIX_NONE) (void) _mwl_key_set(vap, &vap->iv_nw_keys[vap->iv_def_txkey], mac); } static int mwl_peerstadb(struct ieee80211_node *ni, int aid, int staid, MWL_HAL_PEERINFO *pi) { #define WME(ie) ((const struct ieee80211_wme_info *) ie) struct ieee80211vap *vap = ni->ni_vap; struct mwl_hal_vap *hvap; int error; if (vap->iv_opmode == IEEE80211_M_WDS) { /* * WDS vap's do not have a f/w vap; instead they piggyback * on an AP vap and we must install the sta db entry and * crypto state using that AP's handle (the WDS vap has none). */ hvap = MWL_VAP(vap)->mv_ap_hvap; } else hvap = MWL_VAP(vap)->mv_hvap; error = mwl_hal_newstation(hvap, ni->ni_macaddr, aid, staid, pi, ni->ni_flags & (IEEE80211_NODE_QOS | IEEE80211_NODE_HT), ni->ni_ies.wme_ie != NULL ? WME(ni->ni_ies.wme_ie)->wme_info : 0); if (error == 0) { /* * Setup security for this station. For sta mode this is * needed even though do the same thing on transition to * AUTH state because the call to mwl_hal_newstation * clobbers the crypto state we setup. */ mwl_setanywepkey(vap, ni->ni_macaddr); } return error; #undef WME } static void mwl_setglobalkeys(struct ieee80211vap *vap) { struct ieee80211_key *wk; wk = &vap->iv_nw_keys[0]; for (; wk < &vap->iv_nw_keys[IEEE80211_WEP_NKID]; wk++) if (wk->wk_keyix != IEEE80211_KEYIX_NONE) (void) _mwl_key_set(vap, wk, vap->iv_myaddr); } /* * Convert a legacy rate set to a firmware bitmask. */ static uint32_t get_rate_bitmap(const struct ieee80211_rateset *rs) { uint32_t rates; int i; rates = 0; for (i = 0; i < rs->rs_nrates; i++) switch (rs->rs_rates[i] & IEEE80211_RATE_VAL) { case 2: rates |= 0x001; break; case 4: rates |= 0x002; break; case 11: rates |= 0x004; break; case 22: rates |= 0x008; break; case 44: rates |= 0x010; break; case 12: rates |= 0x020; break; case 18: rates |= 0x040; break; case 24: rates |= 0x080; break; case 36: rates |= 0x100; break; case 48: rates |= 0x200; break; case 72: rates |= 0x400; break; case 96: rates |= 0x800; break; case 108: rates |= 0x1000; break; } return rates; } /* * Construct an HT firmware bitmask from an HT rate set. */ static uint32_t get_htrate_bitmap(const struct ieee80211_htrateset *rs) { uint32_t rates; int i; rates = 0; for (i = 0; i < rs->rs_nrates; i++) { if (rs->rs_rates[i] < 16) rates |= 1<rs_rates[i]; } return rates; } /* * Craft station database entry for station. * NB: use host byte order here, the hal handles byte swapping. */ static MWL_HAL_PEERINFO * mkpeerinfo(MWL_HAL_PEERINFO *pi, const struct ieee80211_node *ni) { const struct ieee80211vap *vap = ni->ni_vap; memset(pi, 0, sizeof(*pi)); pi->LegacyRateBitMap = get_rate_bitmap(&ni->ni_rates); pi->CapInfo = ni->ni_capinfo; if (ni->ni_flags & IEEE80211_NODE_HT) { /* HT capabilities, etc */ pi->HTCapabilitiesInfo = ni->ni_htcap; /* XXX pi.HTCapabilitiesInfo */ pi->MacHTParamInfo = ni->ni_htparam; pi->HTRateBitMap = get_htrate_bitmap(&ni->ni_htrates); pi->AddHtInfo.ControlChan = ni->ni_htctlchan; pi->AddHtInfo.AddChan = ni->ni_ht2ndchan; pi->AddHtInfo.OpMode = ni->ni_htopmode; pi->AddHtInfo.stbc = ni->ni_htstbc; /* constrain according to local configuration */ if ((vap->iv_flags_ht & IEEE80211_FHT_SHORTGI40) == 0) pi->HTCapabilitiesInfo &= ~IEEE80211_HTCAP_SHORTGI40; if ((vap->iv_flags_ht & IEEE80211_FHT_SHORTGI20) == 0) pi->HTCapabilitiesInfo &= ~IEEE80211_HTCAP_SHORTGI20; if (ni->ni_chw != 40) pi->HTCapabilitiesInfo &= ~IEEE80211_HTCAP_CHWIDTH40; } return pi; } /* * Re-create the local sta db entry for a vap to ensure * up to date WME state is pushed to the firmware. Because * this resets crypto state this must be followed by a * reload of any keys in the global key table. */ static int mwl_localstadb(struct ieee80211vap *vap) { #define WME(ie) ((const struct ieee80211_wme_info *) ie) struct mwl_hal_vap *hvap = MWL_VAP(vap)->mv_hvap; struct ieee80211_node *bss; MWL_HAL_PEERINFO pi; int error; switch (vap->iv_opmode) { case IEEE80211_M_STA: bss = vap->iv_bss; error = mwl_hal_newstation(hvap, vap->iv_myaddr, 0, 0, vap->iv_state == IEEE80211_S_RUN ? mkpeerinfo(&pi, bss) : NULL, (bss->ni_flags & (IEEE80211_NODE_QOS | IEEE80211_NODE_HT)), bss->ni_ies.wme_ie != NULL ? WME(bss->ni_ies.wme_ie)->wme_info : 0); if (error == 0) mwl_setglobalkeys(vap); break; case IEEE80211_M_HOSTAP: case IEEE80211_M_MBSS: error = mwl_hal_newstation(hvap, vap->iv_myaddr, 0, 0, NULL, vap->iv_flags & IEEE80211_F_WME, 0); if (error == 0) mwl_setglobalkeys(vap); break; default: error = 0; break; } return error; #undef WME } static int mwl_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) { struct mwl_vap *mvp = MWL_VAP(vap); struct mwl_hal_vap *hvap = mvp->mv_hvap; struct ieee80211com *ic = vap->iv_ic; struct ieee80211_node *ni = NULL; struct mwl_softc *sc = ic->ic_softc; struct mwl_hal *mh = sc->sc_mh; enum ieee80211_state ostate = vap->iv_state; int error; DPRINTF(sc, MWL_DEBUG_STATE, "%s: %s: %s -> %s\n", vap->iv_ifp->if_xname, __func__, ieee80211_state_name[ostate], ieee80211_state_name[nstate]); callout_stop(&sc->sc_timer); /* * Clear current radar detection state. */ if (ostate == IEEE80211_S_CAC) { /* stop quiet mode radar detection */ mwl_hal_setradardetection(mh, DR_CHK_CHANNEL_AVAILABLE_STOP); } else if (sc->sc_radarena) { /* stop in-service radar detection */ mwl_hal_setradardetection(mh, DR_DFS_DISABLE); sc->sc_radarena = 0; } /* * Carry out per-state actions before doing net80211 work. */ if (nstate == IEEE80211_S_INIT) { /* NB: only ap+sta vap's have a fw entity */ if (hvap != NULL) mwl_hal_stop(hvap); } else if (nstate == IEEE80211_S_SCAN) { mwl_hal_start(hvap); /* NB: this disables beacon frames */ mwl_hal_setinframode(hvap); } else if (nstate == IEEE80211_S_AUTH) { /* * Must create a sta db entry in case a WEP key needs to * be plumbed. This entry will be overwritten if we * associate; otherwise it will be reclaimed on node free. */ ni = vap->iv_bss; MWL_NODE(ni)->mn_hvap = hvap; (void) mwl_peerstadb(ni, 0, 0, NULL); } else if (nstate == IEEE80211_S_CSA) { /* XXX move to below? */ if (vap->iv_opmode == IEEE80211_M_HOSTAP || vap->iv_opmode == IEEE80211_M_MBSS) mwl_startcsa(vap); } else if (nstate == IEEE80211_S_CAC) { /* XXX move to below? */ /* stop ap xmit and enable quiet mode radar detection */ mwl_hal_setradardetection(mh, DR_CHK_CHANNEL_AVAILABLE_START); } /* * Invoke the parent method to do net80211 work. */ error = mvp->mv_newstate(vap, nstate, arg); /* * Carry out work that must be done after net80211 runs; * this work requires up to date state (e.g. iv_bss). */ if (error == 0 && nstate == IEEE80211_S_RUN) { /* NB: collect bss node again, it may have changed */ ni = vap->iv_bss; DPRINTF(sc, MWL_DEBUG_STATE, "%s: %s(RUN): iv_flags 0x%08x bintvl %d bssid %s " "capinfo 0x%04x chan %d\n", vap->iv_ifp->if_xname, __func__, vap->iv_flags, ni->ni_intval, ether_sprintf(ni->ni_bssid), ni->ni_capinfo, ieee80211_chan2ieee(ic, ic->ic_curchan)); /* * Recreate local sta db entry to update WME/HT state. */ mwl_localstadb(vap); switch (vap->iv_opmode) { case IEEE80211_M_HOSTAP: case IEEE80211_M_MBSS: if (ostate == IEEE80211_S_CAC) { /* enable in-service radar detection */ mwl_hal_setradardetection(mh, DR_IN_SERVICE_MONITOR_START); sc->sc_radarena = 1; } /* * Allocate and setup the beacon frame * (and related state). */ error = mwl_reset_vap(vap, IEEE80211_S_RUN); if (error != 0) { DPRINTF(sc, MWL_DEBUG_STATE, "%s: beacon setup failed, error %d\n", __func__, error); goto bad; } /* NB: must be after setting up beacon */ mwl_hal_start(hvap); break; case IEEE80211_M_STA: DPRINTF(sc, MWL_DEBUG_STATE, "%s: %s: aid 0x%x\n", vap->iv_ifp->if_xname, __func__, ni->ni_associd); /* * Set state now that we're associated. */ mwl_hal_setassocid(hvap, ni->ni_bssid, ni->ni_associd); mwl_setrates(vap); mwl_hal_setrtsthreshold(hvap, vap->iv_rtsthreshold); if ((vap->iv_flags & IEEE80211_F_DWDS) && sc->sc_ndwdsvaps++ == 0) mwl_hal_setdwds(mh, 1); break; case IEEE80211_M_WDS: DPRINTF(sc, MWL_DEBUG_STATE, "%s: %s: bssid %s\n", vap->iv_ifp->if_xname, __func__, ether_sprintf(ni->ni_bssid)); mwl_seteapolformat(vap); break; default: break; } /* * Set CS mode according to operating channel; * this mostly an optimization for 5GHz. * * NB: must follow mwl_hal_start which resets csmode */ if (IEEE80211_IS_CHAN_5GHZ(ic->ic_bsschan)) mwl_hal_setcsmode(mh, CSMODE_AGGRESSIVE); else mwl_hal_setcsmode(mh, CSMODE_AUTO_ENA); /* * Start timer to prod firmware. */ if (sc->sc_ageinterval != 0) callout_reset(&sc->sc_timer, sc->sc_ageinterval*hz, mwl_agestations, sc); } else if (nstate == IEEE80211_S_SLEEP) { /* XXX set chip in power save */ } else if ((vap->iv_flags & IEEE80211_F_DWDS) && --sc->sc_ndwdsvaps == 0) mwl_hal_setdwds(mh, 0); bad: return error; } /* * Manage station id's; these are separate from AID's * as AID's may have values out of the range of possible * station id's acceptable to the firmware. */ static int allocstaid(struct mwl_softc *sc, int aid) { int staid; if (!(0 < aid && aid < MWL_MAXSTAID) || isset(sc->sc_staid, aid)) { /* NB: don't use 0 */ for (staid = 1; staid < MWL_MAXSTAID; staid++) if (isclr(sc->sc_staid, staid)) break; } else staid = aid; setbit(sc->sc_staid, staid); return staid; } static void delstaid(struct mwl_softc *sc, int staid) { clrbit(sc->sc_staid, staid); } /* * Setup driver-specific state for a newly associated node. * Note that we're called also on a re-associate, the isnew * param tells us if this is the first time or not. */ static void mwl_newassoc(struct ieee80211_node *ni, int isnew) { struct ieee80211vap *vap = ni->ni_vap; struct mwl_softc *sc = vap->iv_ic->ic_softc; struct mwl_node *mn = MWL_NODE(ni); MWL_HAL_PEERINFO pi; uint16_t aid; int error; aid = IEEE80211_AID(ni->ni_associd); if (isnew) { mn->mn_staid = allocstaid(sc, aid); mn->mn_hvap = MWL_VAP(vap)->mv_hvap; } else { mn = MWL_NODE(ni); /* XXX reset BA stream? */ } DPRINTF(sc, MWL_DEBUG_NODE, "%s: mac %s isnew %d aid %d staid %d\n", __func__, ether_sprintf(ni->ni_macaddr), isnew, aid, mn->mn_staid); error = mwl_peerstadb(ni, aid, mn->mn_staid, mkpeerinfo(&pi, ni)); if (error != 0) { DPRINTF(sc, MWL_DEBUG_NODE, "%s: error %d creating sta db entry\n", __func__, error); /* XXX how to deal with error? */ } } /* * Periodically poke the firmware to age out station state * (power save queues, pending tx aggregates). */ static void mwl_agestations(void *arg) { struct mwl_softc *sc = arg; mwl_hal_setkeepalive(sc->sc_mh); if (sc->sc_ageinterval != 0) /* NB: catch dynamic changes */ callout_schedule(&sc->sc_timer, sc->sc_ageinterval*hz); } static const struct mwl_hal_channel * findhalchannel(const MWL_HAL_CHANNELINFO *ci, int ieee) { int i; for (i = 0; i < ci->nchannels; i++) { const struct mwl_hal_channel *hc = &ci->channels[i]; if (hc->ieee == ieee) return hc; } return NULL; } static int mwl_setregdomain(struct ieee80211com *ic, struct ieee80211_regdomain *rd, int nchan, struct ieee80211_channel chans[]) { struct mwl_softc *sc = ic->ic_softc; struct mwl_hal *mh = sc->sc_mh; const MWL_HAL_CHANNELINFO *ci; int i; for (i = 0; i < nchan; i++) { struct ieee80211_channel *c = &chans[i]; const struct mwl_hal_channel *hc; if (IEEE80211_IS_CHAN_2GHZ(c)) { mwl_hal_getchannelinfo(mh, MWL_FREQ_BAND_2DOT4GHZ, IEEE80211_IS_CHAN_HT40(c) ? MWL_CH_40_MHz_WIDTH : MWL_CH_20_MHz_WIDTH, &ci); } else if (IEEE80211_IS_CHAN_5GHZ(c)) { mwl_hal_getchannelinfo(mh, MWL_FREQ_BAND_5GHZ, IEEE80211_IS_CHAN_HT40(c) ? MWL_CH_40_MHz_WIDTH : MWL_CH_20_MHz_WIDTH, &ci); } else { device_printf(sc->sc_dev, "%s: channel %u freq %u/0x%x not 2.4/5GHz\n", __func__, c->ic_ieee, c->ic_freq, c->ic_flags); return EINVAL; } /* * Verify channel has cal data and cap tx power. */ hc = findhalchannel(ci, c->ic_ieee); if (hc != NULL) { if (c->ic_maxpower > 2*hc->maxTxPow) c->ic_maxpower = 2*hc->maxTxPow; goto next; } if (IEEE80211_IS_CHAN_HT40(c)) { /* * Look for the extension channel since the * hal table only has the primary channel. */ hc = findhalchannel(ci, c->ic_extieee); if (hc != NULL) { if (c->ic_maxpower > 2*hc->maxTxPow) c->ic_maxpower = 2*hc->maxTxPow; goto next; } } device_printf(sc->sc_dev, "%s: no cal data for channel %u ext %u freq %u/0x%x\n", __func__, c->ic_ieee, c->ic_extieee, c->ic_freq, c->ic_flags); return EINVAL; next: ; } return 0; } #define IEEE80211_CHAN_HTG (IEEE80211_CHAN_HT|IEEE80211_CHAN_G) #define IEEE80211_CHAN_HTA (IEEE80211_CHAN_HT|IEEE80211_CHAN_A) static void addht40channels(struct ieee80211_channel chans[], int maxchans, int *nchans, const MWL_HAL_CHANNELINFO *ci, int flags) { int i, error; for (i = 0; i < ci->nchannels; i++) { const struct mwl_hal_channel *hc = &ci->channels[i]; error = ieee80211_add_channel_ht40(chans, maxchans, nchans, hc->ieee, hc->maxTxPow, flags); if (error != 0 && error != ENOENT) break; } } static void addchannels(struct ieee80211_channel chans[], int maxchans, int *nchans, const MWL_HAL_CHANNELINFO *ci, const uint8_t bands[]) { int i, error; error = 0; for (i = 0; i < ci->nchannels && error == 0; i++) { const struct mwl_hal_channel *hc = &ci->channels[i]; error = ieee80211_add_channel(chans, maxchans, nchans, hc->ieee, hc->freq, hc->maxTxPow, 0, bands); } } static void getchannels(struct mwl_softc *sc, int maxchans, int *nchans, struct ieee80211_channel chans[]) { const MWL_HAL_CHANNELINFO *ci; uint8_t bands[IEEE80211_MODE_BYTES]; /* * Use the channel info from the hal to craft the * channel list. Note that we pass back an unsorted * list; the caller is required to sort it for us * (if desired). */ *nchans = 0; if (mwl_hal_getchannelinfo(sc->sc_mh, MWL_FREQ_BAND_2DOT4GHZ, MWL_CH_20_MHz_WIDTH, &ci) == 0) { memset(bands, 0, sizeof(bands)); setbit(bands, IEEE80211_MODE_11B); setbit(bands, IEEE80211_MODE_11G); setbit(bands, IEEE80211_MODE_11NG); addchannels(chans, maxchans, nchans, ci, bands); } if (mwl_hal_getchannelinfo(sc->sc_mh, MWL_FREQ_BAND_5GHZ, MWL_CH_20_MHz_WIDTH, &ci) == 0) { memset(bands, 0, sizeof(bands)); setbit(bands, IEEE80211_MODE_11A); setbit(bands, IEEE80211_MODE_11NA); addchannels(chans, maxchans, nchans, ci, bands); } if (mwl_hal_getchannelinfo(sc->sc_mh, MWL_FREQ_BAND_2DOT4GHZ, MWL_CH_40_MHz_WIDTH, &ci) == 0) addht40channels(chans, maxchans, nchans, ci, IEEE80211_CHAN_HTG); if (mwl_hal_getchannelinfo(sc->sc_mh, MWL_FREQ_BAND_5GHZ, MWL_CH_40_MHz_WIDTH, &ci) == 0) addht40channels(chans, maxchans, nchans, ci, IEEE80211_CHAN_HTA); } static void mwl_getradiocaps(struct ieee80211com *ic, int maxchans, int *nchans, struct ieee80211_channel chans[]) { struct mwl_softc *sc = ic->ic_softc; getchannels(sc, maxchans, nchans, chans); } static int mwl_getchannels(struct mwl_softc *sc) { struct ieee80211com *ic = &sc->sc_ic; /* * Use the channel info from the hal to craft the * channel list for net80211. Note that we pass up * an unsorted list; net80211 will sort it for us. */ memset(ic->ic_channels, 0, sizeof(ic->ic_channels)); ic->ic_nchans = 0; getchannels(sc, IEEE80211_CHAN_MAX, &ic->ic_nchans, ic->ic_channels); ic->ic_regdomain.regdomain = SKU_DEBUG; ic->ic_regdomain.country = CTRY_DEFAULT; ic->ic_regdomain.location = 'I'; ic->ic_regdomain.isocc[0] = ' '; /* XXX? */ ic->ic_regdomain.isocc[1] = ' '; return (ic->ic_nchans == 0 ? EIO : 0); } #undef IEEE80211_CHAN_HTA #undef IEEE80211_CHAN_HTG #ifdef MWL_DEBUG static void mwl_printrxbuf(const struct mwl_rxbuf *bf, u_int ix) { const struct mwl_rxdesc *ds = bf->bf_desc; uint32_t status = le32toh(ds->Status); printf("R[%2u] (DS.V:%p DS.P:0x%jx) NEXT:%08x DATA:%08x RC:%02x%s\n" " STAT:%02x LEN:%04x RSSI:%02x CHAN:%02x RATE:%02x QOS:%04x HT:%04x\n", ix, ds, (uintmax_t)bf->bf_daddr, le32toh(ds->pPhysNext), le32toh(ds->pPhysBuffData), ds->RxControl, ds->RxControl != EAGLE_RXD_CTRL_DRIVER_OWN ? "" : (status & EAGLE_RXD_STATUS_OK) ? " *" : " !", ds->Status, le16toh(ds->PktLen), ds->RSSI, ds->Channel, ds->Rate, le16toh(ds->QosCtrl), le16toh(ds->HtSig2)); } static void mwl_printtxbuf(const struct mwl_txbuf *bf, u_int qnum, u_int ix) { const struct mwl_txdesc *ds = bf->bf_desc; uint32_t status = le32toh(ds->Status); printf("Q%u[%3u]", qnum, ix); printf(" (DS.V:%p DS.P:0x%jx)\n", ds, (uintmax_t)bf->bf_daddr); printf(" NEXT:%08x DATA:%08x LEN:%04x STAT:%08x%s\n", le32toh(ds->pPhysNext), le32toh(ds->PktPtr), le16toh(ds->PktLen), status, status & EAGLE_TXD_STATUS_USED ? "" : (status & 3) != 0 ? " *" : " !"); printf(" RATE:%02x PRI:%x QOS:%04x SAP:%08x FORMAT:%04x\n", ds->DataRate, ds->TxPriority, le16toh(ds->QosCtrl), le32toh(ds->SapPktInfo), le16toh(ds->Format)); #if MWL_TXDESC > 1 printf(" MULTIFRAMES:%u LEN:%04x %04x %04x %04x %04x %04x\n" , le32toh(ds->multiframes) , le16toh(ds->PktLenArray[0]), le16toh(ds->PktLenArray[1]) , le16toh(ds->PktLenArray[2]), le16toh(ds->PktLenArray[3]) , le16toh(ds->PktLenArray[4]), le16toh(ds->PktLenArray[5]) ); printf(" DATA:%08x %08x %08x %08x %08x %08x\n" , le32toh(ds->PktPtrArray[0]), le32toh(ds->PktPtrArray[1]) , le32toh(ds->PktPtrArray[2]), le32toh(ds->PktPtrArray[3]) , le32toh(ds->PktPtrArray[4]), le32toh(ds->PktPtrArray[5]) ); #endif #if 0 { const uint8_t *cp = (const uint8_t *) ds; int i; for (i = 0; i < sizeof(struct mwl_txdesc); i++) { printf("%02x ", cp[i]); if (((i+1) % 16) == 0) printf("\n"); } printf("\n"); } #endif } #endif /* MWL_DEBUG */ #if 0 static void mwl_txq_dump(struct mwl_txq *txq) { struct mwl_txbuf *bf; int i = 0; MWL_TXQ_LOCK(txq); STAILQ_FOREACH(bf, &txq->active, bf_list) { struct mwl_txdesc *ds = bf->bf_desc; MWL_TXDESC_SYNC(txq, ds, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); #ifdef MWL_DEBUG mwl_printtxbuf(bf, txq->qnum, i); #endif i++; } MWL_TXQ_UNLOCK(txq); } #endif static void mwl_watchdog(void *arg) { struct mwl_softc *sc = arg; callout_reset(&sc->sc_watchdog, hz, mwl_watchdog, sc); if (sc->sc_tx_timer == 0 || --sc->sc_tx_timer > 0) return; if (sc->sc_running && !sc->sc_invalid) { if (mwl_hal_setkeepalive(sc->sc_mh)) device_printf(sc->sc_dev, "transmit timeout (firmware hung?)\n"); else device_printf(sc->sc_dev, "transmit timeout\n"); #if 0 mwl_reset(sc); mwl_txq_dump(&sc->sc_txq[0]);/*XXX*/ #endif counter_u64_add(sc->sc_ic.ic_oerrors, 1); sc->sc_stats.mst_watchdog++; } } #ifdef MWL_DIAGAPI /* * Diagnostic interface to the HAL. This is used by various * tools to do things like retrieve register contents for * debugging. The mechanism is intentionally opaque so that * it can change frequently w/o concern for compatibility. */ static int mwl_ioctl_diag(struct mwl_softc *sc, struct mwl_diag *md) { struct mwl_hal *mh = sc->sc_mh; u_int id = md->md_id & MWL_DIAG_ID; void *indata = NULL; void *outdata = NULL; u_int32_t insize = md->md_in_size; u_int32_t outsize = md->md_out_size; int error = 0; if (md->md_id & MWL_DIAG_IN) { /* * Copy in data. */ indata = malloc(insize, M_TEMP, M_NOWAIT); if (indata == NULL) { error = ENOMEM; goto bad; } error = copyin(md->md_in_data, indata, insize); if (error) goto bad; } if (md->md_id & MWL_DIAG_DYN) { /* * Allocate a buffer for the results (otherwise the HAL * returns a pointer to a buffer where we can read the * results). Note that we depend on the HAL leaving this * pointer for us to use below in reclaiming the buffer; * may want to be more defensive. */ outdata = malloc(outsize, M_TEMP, M_NOWAIT); if (outdata == NULL) { error = ENOMEM; goto bad; } } if (mwl_hal_getdiagstate(mh, id, indata, insize, &outdata, &outsize)) { if (outsize < md->md_out_size) md->md_out_size = outsize; if (outdata != NULL) error = copyout(outdata, md->md_out_data, md->md_out_size); } else { error = EINVAL; } bad: if ((md->md_id & MWL_DIAG_IN) && indata != NULL) free(indata, M_TEMP); if ((md->md_id & MWL_DIAG_DYN) && outdata != NULL) free(outdata, M_TEMP); return error; } static int mwl_ioctl_reset(struct mwl_softc *sc, struct mwl_diag *md) { struct mwl_hal *mh = sc->sc_mh; int error; MWL_LOCK_ASSERT(sc); if (md->md_id == 0 && mwl_hal_fwload(mh, NULL) != 0) { device_printf(sc->sc_dev, "unable to load firmware\n"); return EIO; } if (mwl_hal_gethwspecs(mh, &sc->sc_hwspecs) != 0) { device_printf(sc->sc_dev, "unable to fetch h/w specs\n"); return EIO; } error = mwl_setupdma(sc); if (error != 0) { /* NB: mwl_setupdma prints a msg */ return error; } /* * Reset tx/rx data structures; after reload we must * re-start the driver's notion of the next xmit/recv. */ mwl_draintxq(sc); /* clear pending frames */ mwl_resettxq(sc); /* rebuild tx q lists */ sc->sc_rxnext = NULL; /* force rx to start at the list head */ return 0; } #endif /* MWL_DIAGAPI */ static void mwl_parent(struct ieee80211com *ic) { struct mwl_softc *sc = ic->ic_softc; int startall = 0; MWL_LOCK(sc); if (ic->ic_nrunning > 0) { if (sc->sc_running) { /* * To avoid rescanning another access point, * do not call mwl_init() here. Instead, * only reflect promisc mode settings. */ mwl_mode_init(sc); } else { /* * Beware of being called during attach/detach * to reset promiscuous mode. In that case we * will still be marked UP but not RUNNING. * However trying to re-init the interface * is the wrong thing to do as we've already * torn down much of our state. There's * probably a better way to deal with this. */ if (!sc->sc_invalid) { mwl_init(sc); /* XXX lose error */ startall = 1; } } } else mwl_stop(sc); MWL_UNLOCK(sc); if (startall) ieee80211_start_all(ic); } static int mwl_ioctl(struct ieee80211com *ic, u_long cmd, void *data) { struct mwl_softc *sc = ic->ic_softc; struct ifreq *ifr = data; int error = 0; switch (cmd) { case SIOCGMVSTATS: mwl_hal_gethwstats(sc->sc_mh, &sc->sc_stats.hw_stats); #if 0 /* NB: embed these numbers to get a consistent view */ sc->sc_stats.mst_tx_packets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS); sc->sc_stats.mst_rx_packets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS); #endif /* * NB: Drop the softc lock in case of a page fault; * we'll accept any potential inconsisentcy in the * statistics. The alternative is to copy the data * to a local structure. */ - return (copyout(&sc->sc_stats, - ifr->ifr_data, sizeof (sc->sc_stats))); + return (copyout(&sc->sc_stats, ifr_data_get_ptr(ifr), + sizeof (sc->sc_stats))); #ifdef MWL_DIAGAPI case SIOCGMVDIAG: /* XXX check privs */ return mwl_ioctl_diag(sc, (struct mwl_diag *) ifr); case SIOCGMVRESET: /* XXX check privs */ MWL_LOCK(sc); error = mwl_ioctl_reset(sc,(struct mwl_diag *) ifr); MWL_UNLOCK(sc); break; #endif /* MWL_DIAGAPI */ default: error = ENOTTY; break; } return (error); } #ifdef MWL_DEBUG static int mwl_sysctl_debug(SYSCTL_HANDLER_ARGS) { struct mwl_softc *sc = arg1; int debug, error; debug = sc->sc_debug | (mwl_hal_getdebug(sc->sc_mh) << 24); error = sysctl_handle_int(oidp, &debug, 0, req); if (error || !req->newptr) return error; mwl_hal_setdebug(sc->sc_mh, debug >> 24); sc->sc_debug = debug & 0x00ffffff; return 0; } #endif /* MWL_DEBUG */ static void mwl_sysctlattach(struct mwl_softc *sc) { #ifdef MWL_DEBUG struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->sc_dev); struct sysctl_oid *tree = device_get_sysctl_tree(sc->sc_dev); sc->sc_debug = mwl_debug; SYSCTL_ADD_PROC(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "debug", CTLTYPE_INT | CTLFLAG_RW, sc, 0, mwl_sysctl_debug, "I", "control debugging printfs"); #endif } /* * Announce various information on device/driver attach. */ static void mwl_announce(struct mwl_softc *sc) { device_printf(sc->sc_dev, "Rev A%d hardware, v%d.%d.%d.%d firmware (regioncode %d)\n", sc->sc_hwspecs.hwVersion, (sc->sc_hwspecs.fwReleaseNumber>>24) & 0xff, (sc->sc_hwspecs.fwReleaseNumber>>16) & 0xff, (sc->sc_hwspecs.fwReleaseNumber>>8) & 0xff, (sc->sc_hwspecs.fwReleaseNumber>>0) & 0xff, sc->sc_hwspecs.regionCode); sc->sc_fwrelease = sc->sc_hwspecs.fwReleaseNumber; if (bootverbose) { int i; for (i = 0; i <= WME_AC_VO; i++) { struct mwl_txq *txq = sc->sc_ac2q[i]; device_printf(sc->sc_dev, "Use hw queue %u for %s traffic\n", txq->qnum, ieee80211_wme_acnames[i]); } } if (bootverbose || mwl_rxdesc != MWL_RXDESC) device_printf(sc->sc_dev, "using %u rx descriptors\n", mwl_rxdesc); if (bootverbose || mwl_rxbuf != MWL_RXBUF) device_printf(sc->sc_dev, "using %u rx buffers\n", mwl_rxbuf); if (bootverbose || mwl_txbuf != MWL_TXBUF) device_printf(sc->sc_dev, "using %u tx buffers\n", mwl_txbuf); if (bootverbose && mwl_hal_ismbsscapable(sc->sc_mh)) device_printf(sc->sc_dev, "multi-bss support\n"); #ifdef MWL_TX_NODROP if (bootverbose) device_printf(sc->sc_dev, "no tx drop\n"); #endif } Index: stable/11/sys/dev/nxge/if_nxge.c =================================================================== --- stable/11/sys/dev/nxge/if_nxge.c (revision 332287) +++ stable/11/sys/dev/nxge/if_nxge.c (revision 332288) @@ -1,3534 +1,3536 @@ /*- * Copyright (c) 2002-2007 Neterion, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include int copyright_print = 0; int hal_driver_init_count = 0; size_t size = sizeof(int); static void inline xge_flush_txds(xge_hal_channel_h); /** * xge_probe * Probes for Xframe devices * * @dev Device handle * * Returns * BUS_PROBE_DEFAULT if device is supported * ENXIO if device is not supported */ int xge_probe(device_t dev) { int devid = pci_get_device(dev); int vendorid = pci_get_vendor(dev); int retValue = ENXIO; if(vendorid == XGE_PCI_VENDOR_ID) { if((devid == XGE_PCI_DEVICE_ID_XENA_2) || (devid == XGE_PCI_DEVICE_ID_HERC_2)) { if(!copyright_print) { xge_os_printf(XGE_COPYRIGHT); copyright_print = 1; } device_set_desc_copy(dev, "Neterion Xframe 10 Gigabit Ethernet Adapter"); retValue = BUS_PROBE_DEFAULT; } } return retValue; } /** * xge_init_params * Sets HAL parameter values (from kenv). * * @dconfig Device Configuration * @dev Device Handle */ void xge_init_params(xge_hal_device_config_t *dconfig, device_t dev) { int qindex, tindex, revision; device_t checkdev; xge_lldev_t *lldev = (xge_lldev_t *)device_get_softc(dev); dconfig->mtu = XGE_DEFAULT_INITIAL_MTU; dconfig->pci_freq_mherz = XGE_DEFAULT_USER_HARDCODED; dconfig->device_poll_millis = XGE_HAL_DEFAULT_DEVICE_POLL_MILLIS; dconfig->link_stability_period = XGE_HAL_DEFAULT_LINK_STABILITY_PERIOD; dconfig->mac.rmac_bcast_en = XGE_DEFAULT_MAC_RMAC_BCAST_EN; dconfig->fifo.alignment_size = XGE_DEFAULT_FIFO_ALIGNMENT_SIZE; XGE_GET_PARAM("hw.xge.enable_tso", (*lldev), enabled_tso, XGE_DEFAULT_ENABLED_TSO); XGE_GET_PARAM("hw.xge.enable_lro", (*lldev), enabled_lro, XGE_DEFAULT_ENABLED_LRO); XGE_GET_PARAM("hw.xge.enable_msi", (*lldev), enabled_msi, XGE_DEFAULT_ENABLED_MSI); XGE_GET_PARAM("hw.xge.latency_timer", (*dconfig), latency_timer, XGE_DEFAULT_LATENCY_TIMER); XGE_GET_PARAM("hw.xge.max_splits_trans", (*dconfig), max_splits_trans, XGE_DEFAULT_MAX_SPLITS_TRANS); XGE_GET_PARAM("hw.xge.mmrb_count", (*dconfig), mmrb_count, XGE_DEFAULT_MMRB_COUNT); XGE_GET_PARAM("hw.xge.shared_splits", (*dconfig), shared_splits, XGE_DEFAULT_SHARED_SPLITS); XGE_GET_PARAM("hw.xge.isr_polling_cnt", (*dconfig), isr_polling_cnt, XGE_DEFAULT_ISR_POLLING_CNT); XGE_GET_PARAM("hw.xge.stats_refresh_time_sec", (*dconfig), stats_refresh_time_sec, XGE_DEFAULT_STATS_REFRESH_TIME_SEC); XGE_GET_PARAM_MAC("hw.xge.mac_tmac_util_period", tmac_util_period, XGE_DEFAULT_MAC_TMAC_UTIL_PERIOD); XGE_GET_PARAM_MAC("hw.xge.mac_rmac_util_period", rmac_util_period, XGE_DEFAULT_MAC_RMAC_UTIL_PERIOD); XGE_GET_PARAM_MAC("hw.xge.mac_rmac_pause_gen_en", rmac_pause_gen_en, XGE_DEFAULT_MAC_RMAC_PAUSE_GEN_EN); XGE_GET_PARAM_MAC("hw.xge.mac_rmac_pause_rcv_en", rmac_pause_rcv_en, XGE_DEFAULT_MAC_RMAC_PAUSE_RCV_EN); XGE_GET_PARAM_MAC("hw.xge.mac_rmac_pause_time", rmac_pause_time, XGE_DEFAULT_MAC_RMAC_PAUSE_TIME); XGE_GET_PARAM_MAC("hw.xge.mac_mc_pause_threshold_q0q3", mc_pause_threshold_q0q3, XGE_DEFAULT_MAC_MC_PAUSE_THRESHOLD_Q0Q3); XGE_GET_PARAM_MAC("hw.xge.mac_mc_pause_threshold_q4q7", mc_pause_threshold_q4q7, XGE_DEFAULT_MAC_MC_PAUSE_THRESHOLD_Q4Q7); XGE_GET_PARAM_FIFO("hw.xge.fifo_memblock_size", memblock_size, XGE_DEFAULT_FIFO_MEMBLOCK_SIZE); XGE_GET_PARAM_FIFO("hw.xge.fifo_reserve_threshold", reserve_threshold, XGE_DEFAULT_FIFO_RESERVE_THRESHOLD); XGE_GET_PARAM_FIFO("hw.xge.fifo_max_frags", max_frags, XGE_DEFAULT_FIFO_MAX_FRAGS); for(qindex = 0; qindex < XGE_FIFO_COUNT; qindex++) { XGE_GET_PARAM_FIFO_QUEUE("hw.xge.fifo_queue_intr", intr, qindex, XGE_DEFAULT_FIFO_QUEUE_INTR); XGE_GET_PARAM_FIFO_QUEUE("hw.xge.fifo_queue_max", max, qindex, XGE_DEFAULT_FIFO_QUEUE_MAX); XGE_GET_PARAM_FIFO_QUEUE("hw.xge.fifo_queue_initial", initial, qindex, XGE_DEFAULT_FIFO_QUEUE_INITIAL); for (tindex = 0; tindex < XGE_HAL_MAX_FIFO_TTI_NUM; tindex++) { dconfig->fifo.queue[qindex].tti[tindex].enabled = 1; dconfig->fifo.queue[qindex].configured = 1; XGE_GET_PARAM_FIFO_QUEUE_TTI("hw.xge.fifo_queue_tti_urange_a", urange_a, qindex, tindex, XGE_DEFAULT_FIFO_QUEUE_TTI_URANGE_A); XGE_GET_PARAM_FIFO_QUEUE_TTI("hw.xge.fifo_queue_tti_urange_b", urange_b, qindex, tindex, XGE_DEFAULT_FIFO_QUEUE_TTI_URANGE_B); XGE_GET_PARAM_FIFO_QUEUE_TTI("hw.xge.fifo_queue_tti_urange_c", urange_c, qindex, tindex, XGE_DEFAULT_FIFO_QUEUE_TTI_URANGE_C); XGE_GET_PARAM_FIFO_QUEUE_TTI("hw.xge.fifo_queue_tti_ufc_a", ufc_a, qindex, tindex, XGE_DEFAULT_FIFO_QUEUE_TTI_UFC_A); XGE_GET_PARAM_FIFO_QUEUE_TTI("hw.xge.fifo_queue_tti_ufc_b", ufc_b, qindex, tindex, XGE_DEFAULT_FIFO_QUEUE_TTI_UFC_B); XGE_GET_PARAM_FIFO_QUEUE_TTI("hw.xge.fifo_queue_tti_ufc_c", ufc_c, qindex, tindex, XGE_DEFAULT_FIFO_QUEUE_TTI_UFC_C); XGE_GET_PARAM_FIFO_QUEUE_TTI("hw.xge.fifo_queue_tti_ufc_d", ufc_d, qindex, tindex, XGE_DEFAULT_FIFO_QUEUE_TTI_UFC_D); XGE_GET_PARAM_FIFO_QUEUE_TTI( "hw.xge.fifo_queue_tti_timer_ci_en", timer_ci_en, qindex, tindex, XGE_DEFAULT_FIFO_QUEUE_TTI_TIMER_CI_EN); XGE_GET_PARAM_FIFO_QUEUE_TTI( "hw.xge.fifo_queue_tti_timer_ac_en", timer_ac_en, qindex, tindex, XGE_DEFAULT_FIFO_QUEUE_TTI_TIMER_AC_EN); XGE_GET_PARAM_FIFO_QUEUE_TTI( "hw.xge.fifo_queue_tti_timer_val_us", timer_val_us, qindex, tindex, XGE_DEFAULT_FIFO_QUEUE_TTI_TIMER_VAL_US); } } XGE_GET_PARAM_RING("hw.xge.ring_memblock_size", memblock_size, XGE_DEFAULT_RING_MEMBLOCK_SIZE); XGE_GET_PARAM_RING("hw.xge.ring_strip_vlan_tag", strip_vlan_tag, XGE_DEFAULT_RING_STRIP_VLAN_TAG); XGE_GET_PARAM("hw.xge.buffer_mode", (*lldev), buffer_mode, XGE_DEFAULT_BUFFER_MODE); if((lldev->buffer_mode < XGE_HAL_RING_QUEUE_BUFFER_MODE_1) || (lldev->buffer_mode > XGE_HAL_RING_QUEUE_BUFFER_MODE_2)) { xge_trace(XGE_ERR, "Supported buffer modes are 1 and 2"); lldev->buffer_mode = XGE_HAL_RING_QUEUE_BUFFER_MODE_1; } for (qindex = 0; qindex < XGE_RING_COUNT; qindex++) { dconfig->ring.queue[qindex].max_frm_len = XGE_HAL_RING_USE_MTU; dconfig->ring.queue[qindex].priority = 0; dconfig->ring.queue[qindex].configured = 1; dconfig->ring.queue[qindex].buffer_mode = (lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_2) ? XGE_HAL_RING_QUEUE_BUFFER_MODE_3 : lldev->buffer_mode; XGE_GET_PARAM_RING_QUEUE("hw.xge.ring_queue_max", max, qindex, XGE_DEFAULT_RING_QUEUE_MAX); XGE_GET_PARAM_RING_QUEUE("hw.xge.ring_queue_initial", initial, qindex, XGE_DEFAULT_RING_QUEUE_INITIAL); XGE_GET_PARAM_RING_QUEUE("hw.xge.ring_queue_dram_size_mb", dram_size_mb, qindex, XGE_DEFAULT_RING_QUEUE_DRAM_SIZE_MB); XGE_GET_PARAM_RING_QUEUE("hw.xge.ring_queue_indicate_max_pkts", indicate_max_pkts, qindex, XGE_DEFAULT_RING_QUEUE_INDICATE_MAX_PKTS); XGE_GET_PARAM_RING_QUEUE("hw.xge.ring_queue_backoff_interval_us", backoff_interval_us, qindex, XGE_DEFAULT_RING_QUEUE_BACKOFF_INTERVAL_US); XGE_GET_PARAM_RING_QUEUE_RTI("hw.xge.ring_queue_rti_ufc_a", ufc_a, qindex, XGE_DEFAULT_RING_QUEUE_RTI_UFC_A); XGE_GET_PARAM_RING_QUEUE_RTI("hw.xge.ring_queue_rti_ufc_b", ufc_b, qindex, XGE_DEFAULT_RING_QUEUE_RTI_UFC_B); XGE_GET_PARAM_RING_QUEUE_RTI("hw.xge.ring_queue_rti_ufc_c", ufc_c, qindex, XGE_DEFAULT_RING_QUEUE_RTI_UFC_C); XGE_GET_PARAM_RING_QUEUE_RTI("hw.xge.ring_queue_rti_ufc_d", ufc_d, qindex, XGE_DEFAULT_RING_QUEUE_RTI_UFC_D); XGE_GET_PARAM_RING_QUEUE_RTI("hw.xge.ring_queue_rti_timer_ac_en", timer_ac_en, qindex, XGE_DEFAULT_RING_QUEUE_RTI_TIMER_AC_EN); XGE_GET_PARAM_RING_QUEUE_RTI("hw.xge.ring_queue_rti_timer_val_us", timer_val_us, qindex, XGE_DEFAULT_RING_QUEUE_RTI_TIMER_VAL_US); XGE_GET_PARAM_RING_QUEUE_RTI("hw.xge.ring_queue_rti_urange_a", urange_a, qindex, XGE_DEFAULT_RING_QUEUE_RTI_URANGE_A); XGE_GET_PARAM_RING_QUEUE_RTI("hw.xge.ring_queue_rti_urange_b", urange_b, qindex, XGE_DEFAULT_RING_QUEUE_RTI_URANGE_B); XGE_GET_PARAM_RING_QUEUE_RTI("hw.xge.ring_queue_rti_urange_c", urange_c, qindex, XGE_DEFAULT_RING_QUEUE_RTI_URANGE_C); } if(dconfig->fifo.max_frags > (PAGE_SIZE/32)) { xge_os_printf("fifo_max_frags = %d", dconfig->fifo.max_frags) xge_os_printf("fifo_max_frags should be <= (PAGE_SIZE / 32) = %d", (int)(PAGE_SIZE / 32)) xge_os_printf("Using fifo_max_frags = %d", (int)(PAGE_SIZE / 32)) dconfig->fifo.max_frags = (PAGE_SIZE / 32); } checkdev = pci_find_device(VENDOR_ID_AMD, DEVICE_ID_8131_PCI_BRIDGE); if(checkdev != NULL) { /* Check Revision for 0x12 */ revision = pci_read_config(checkdev, xge_offsetof(xge_hal_pci_config_t, revision), 1); if(revision <= 0x12) { /* Set mmrb_count to 1k and max splits = 2 */ dconfig->mmrb_count = 1; dconfig->max_splits_trans = XGE_HAL_THREE_SPLIT_TRANSACTION; } } } /** * xge_buffer_sizes_set * Set buffer sizes based on Rx buffer mode * * @lldev Per-adapter Data * @buffer_mode Rx Buffer Mode */ void xge_rx_buffer_sizes_set(xge_lldev_t *lldev, int buffer_mode, int mtu) { int index = 0; int frame_header = XGE_HAL_MAC_HEADER_MAX_SIZE; int buffer_size = mtu + frame_header; xge_os_memzero(lldev->rxd_mbuf_len, sizeof(lldev->rxd_mbuf_len)); if(buffer_mode != XGE_HAL_RING_QUEUE_BUFFER_MODE_5) lldev->rxd_mbuf_len[buffer_mode - 1] = mtu; lldev->rxd_mbuf_len[0] = (buffer_mode == 1) ? buffer_size:frame_header; if(buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_5) lldev->rxd_mbuf_len[1] = XGE_HAL_TCPIP_HEADER_MAX_SIZE; if(buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_5) { index = 2; buffer_size -= XGE_HAL_TCPIP_HEADER_MAX_SIZE; while(buffer_size > MJUMPAGESIZE) { lldev->rxd_mbuf_len[index++] = MJUMPAGESIZE; buffer_size -= MJUMPAGESIZE; } XGE_ALIGN_TO(buffer_size, 128); lldev->rxd_mbuf_len[index] = buffer_size; lldev->rxd_mbuf_cnt = index + 1; } for(index = 0; index < buffer_mode; index++) xge_trace(XGE_TRACE, "Buffer[%d] %d\n", index, lldev->rxd_mbuf_len[index]); } /** * xge_buffer_mode_init * Init Rx buffer mode * * @lldev Per-adapter Data * @mtu Interface MTU */ void xge_buffer_mode_init(xge_lldev_t *lldev, int mtu) { int index = 0, buffer_size = 0; xge_hal_ring_config_t *ring_config = &((lldev->devh)->config.ring); buffer_size = mtu + XGE_HAL_MAC_HEADER_MAX_SIZE; if(lldev->enabled_lro) (lldev->ifnetp)->if_capenable |= IFCAP_LRO; else (lldev->ifnetp)->if_capenable &= ~IFCAP_LRO; lldev->rxd_mbuf_cnt = lldev->buffer_mode; if(lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_2) { XGE_SET_BUFFER_MODE_IN_RINGS(XGE_HAL_RING_QUEUE_BUFFER_MODE_3); ring_config->scatter_mode = XGE_HAL_RING_QUEUE_SCATTER_MODE_B; } else { XGE_SET_BUFFER_MODE_IN_RINGS(lldev->buffer_mode); ring_config->scatter_mode = XGE_HAL_RING_QUEUE_SCATTER_MODE_A; } xge_rx_buffer_sizes_set(lldev, lldev->buffer_mode, mtu); xge_os_printf("%s: TSO %s", device_get_nameunit(lldev->device), ((lldev->enabled_tso) ? "Enabled":"Disabled")); xge_os_printf("%s: LRO %s", device_get_nameunit(lldev->device), ((lldev->ifnetp)->if_capenable & IFCAP_LRO) ? "Enabled":"Disabled"); xge_os_printf("%s: Rx %d Buffer Mode Enabled", device_get_nameunit(lldev->device), lldev->buffer_mode); } /** * xge_driver_initialize * Initializes HAL driver (common for all devices) * * Returns * XGE_HAL_OK if success * XGE_HAL_ERR_BAD_DRIVER_CONFIG if driver configuration parameters are invalid */ int xge_driver_initialize(void) { xge_hal_uld_cbs_t uld_callbacks; xge_hal_driver_config_t driver_config; xge_hal_status_e status = XGE_HAL_OK; /* Initialize HAL driver */ if(!hal_driver_init_count) { xge_os_memzero(&uld_callbacks, sizeof(xge_hal_uld_cbs_t)); xge_os_memzero(&driver_config, sizeof(xge_hal_driver_config_t)); /* * Initial and maximum size of the queue used to store the events * like Link up/down (xge_hal_event_e) */ driver_config.queue_size_initial = XGE_HAL_MIN_QUEUE_SIZE_INITIAL; driver_config.queue_size_max = XGE_HAL_MAX_QUEUE_SIZE_MAX; uld_callbacks.link_up = xge_callback_link_up; uld_callbacks.link_down = xge_callback_link_down; uld_callbacks.crit_err = xge_callback_crit_err; uld_callbacks.event = xge_callback_event; status = xge_hal_driver_initialize(&driver_config, &uld_callbacks); if(status != XGE_HAL_OK) { XGE_EXIT_ON_ERR("xgeX: Initialization of HAL driver failed", xdi_out, status); } } hal_driver_init_count = hal_driver_init_count + 1; xge_hal_driver_debug_module_mask_set(0xffffffff); xge_hal_driver_debug_level_set(XGE_TRACE); xdi_out: return status; } /** * xge_media_init * Initializes, adds and sets media * * @devc Device Handle */ void xge_media_init(device_t devc) { xge_lldev_t *lldev = (xge_lldev_t *)device_get_softc(devc); /* Initialize Media */ ifmedia_init(&lldev->media, IFM_IMASK, xge_ifmedia_change, xge_ifmedia_status); /* Add supported media */ ifmedia_add(&lldev->media, IFM_ETHER | IFM_1000_SX | IFM_FDX, 0, NULL); ifmedia_add(&lldev->media, IFM_ETHER | IFM_1000_SX, 0, NULL); ifmedia_add(&lldev->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_add(&lldev->media, IFM_ETHER | IFM_10G_SR, 0, NULL); ifmedia_add(&lldev->media, IFM_ETHER | IFM_10G_LR, 0, NULL); /* Set media */ ifmedia_set(&lldev->media, IFM_ETHER | IFM_AUTO); } /** * xge_pci_space_save * Save PCI configuration space * * @dev Device Handle */ void xge_pci_space_save(device_t dev) { struct pci_devinfo *dinfo = NULL; dinfo = device_get_ivars(dev); xge_trace(XGE_TRACE, "Saving PCI configuration space"); pci_cfg_save(dev, dinfo, 0); } /** * xge_pci_space_restore * Restore saved PCI configuration space * * @dev Device Handle */ void xge_pci_space_restore(device_t dev) { struct pci_devinfo *dinfo = NULL; dinfo = device_get_ivars(dev); xge_trace(XGE_TRACE, "Restoring PCI configuration space"); pci_cfg_restore(dev, dinfo); } /** * xge_msi_info_save * Save MSI info * * @lldev Per-adapter Data */ void xge_msi_info_save(xge_lldev_t * lldev) { xge_os_pci_read16(lldev->pdev, NULL, xge_offsetof(xge_hal_pci_config_le_t, msi_control), &lldev->msi_info.msi_control); xge_os_pci_read32(lldev->pdev, NULL, xge_offsetof(xge_hal_pci_config_le_t, msi_lower_address), &lldev->msi_info.msi_lower_address); xge_os_pci_read32(lldev->pdev, NULL, xge_offsetof(xge_hal_pci_config_le_t, msi_higher_address), &lldev->msi_info.msi_higher_address); xge_os_pci_read16(lldev->pdev, NULL, xge_offsetof(xge_hal_pci_config_le_t, msi_data), &lldev->msi_info.msi_data); } /** * xge_msi_info_restore * Restore saved MSI info * * @dev Device Handle */ void xge_msi_info_restore(xge_lldev_t *lldev) { /* * If interface is made down and up, traffic fails. It was observed that * MSI information were getting reset on down. Restoring them. */ xge_os_pci_write16(lldev->pdev, NULL, xge_offsetof(xge_hal_pci_config_le_t, msi_control), lldev->msi_info.msi_control); xge_os_pci_write32(lldev->pdev, NULL, xge_offsetof(xge_hal_pci_config_le_t, msi_lower_address), lldev->msi_info.msi_lower_address); xge_os_pci_write32(lldev->pdev, NULL, xge_offsetof(xge_hal_pci_config_le_t, msi_higher_address), lldev->msi_info.msi_higher_address); xge_os_pci_write16(lldev->pdev, NULL, xge_offsetof(xge_hal_pci_config_le_t, msi_data), lldev->msi_info.msi_data); } /** * xge_init_mutex * Initializes mutexes used in driver * * @lldev Per-adapter Data */ void xge_mutex_init(xge_lldev_t *lldev) { int qindex; sprintf(lldev->mtx_name_drv, "%s_drv", device_get_nameunit(lldev->device)); mtx_init(&lldev->mtx_drv, lldev->mtx_name_drv, MTX_NETWORK_LOCK, MTX_DEF); for(qindex = 0; qindex < XGE_FIFO_COUNT; qindex++) { sprintf(lldev->mtx_name_tx[qindex], "%s_tx_%d", device_get_nameunit(lldev->device), qindex); mtx_init(&lldev->mtx_tx[qindex], lldev->mtx_name_tx[qindex], NULL, MTX_DEF); } } /** * xge_mutex_destroy * Destroys mutexes used in driver * * @lldev Per-adapter Data */ void xge_mutex_destroy(xge_lldev_t *lldev) { int qindex; for(qindex = 0; qindex < XGE_FIFO_COUNT; qindex++) mtx_destroy(&lldev->mtx_tx[qindex]); mtx_destroy(&lldev->mtx_drv); } /** * xge_print_info * Print device and driver information * * @lldev Per-adapter Data */ void xge_print_info(xge_lldev_t *lldev) { device_t dev = lldev->device; xge_hal_device_t *hldev = lldev->devh; xge_hal_status_e status = XGE_HAL_OK; u64 val64 = 0; const char *xge_pci_bus_speeds[17] = { "PCI 33MHz Bus", "PCI 66MHz Bus", "PCIX(M1) 66MHz Bus", "PCIX(M1) 100MHz Bus", "PCIX(M1) 133MHz Bus", "PCIX(M2) 133MHz Bus", "PCIX(M2) 200MHz Bus", "PCIX(M2) 266MHz Bus", "PCIX(M1) Reserved", "PCIX(M1) 66MHz Bus (Not Supported)", "PCIX(M1) 100MHz Bus (Not Supported)", "PCIX(M1) 133MHz Bus (Not Supported)", "PCIX(M2) Reserved", "PCIX 533 Reserved", "PCI Basic Mode", "PCIX Basic Mode", "PCI Invalid Mode" }; xge_os_printf("%s: Xframe%s %s Revision %d Driver v%s", device_get_nameunit(dev), ((hldev->device_id == XGE_PCI_DEVICE_ID_XENA_2) ? "I" : "II"), hldev->vpd_data.product_name, hldev->revision, XGE_DRIVER_VERSION); xge_os_printf("%s: Serial Number %s", device_get_nameunit(dev), hldev->vpd_data.serial_num); if(pci_get_device(dev) == XGE_PCI_DEVICE_ID_HERC_2) { status = xge_hal_mgmt_reg_read(hldev, 0, xge_offsetof(xge_hal_pci_bar0_t, pci_info), &val64); if(status != XGE_HAL_OK) xge_trace(XGE_ERR, "Error for getting bus speed"); xge_os_printf("%s: Adapter is on %s bit %s", device_get_nameunit(dev), ((val64 & BIT(8)) ? "32":"64"), (xge_pci_bus_speeds[((val64 & XGE_HAL_PCI_INFO) >> 60)])); } xge_os_printf("%s: Using %s Interrupts", device_get_nameunit(dev), (lldev->enabled_msi == XGE_HAL_INTR_MODE_MSI) ? "MSI":"Line"); } /** * xge_create_dma_tags * Creates DMA tags for both Tx and Rx * * @dev Device Handle * * Returns XGE_HAL_OK or XGE_HAL_FAIL (if errors) */ xge_hal_status_e xge_create_dma_tags(device_t dev) { xge_lldev_t *lldev = (xge_lldev_t *)device_get_softc(dev); xge_hal_status_e status = XGE_HAL_FAIL; int mtu = (lldev->ifnetp)->if_mtu, maxsize; /* DMA tag for Tx */ status = bus_dma_tag_create( bus_get_dma_tag(dev), /* Parent */ PAGE_SIZE, /* Alignment */ 0, /* Bounds */ BUS_SPACE_MAXADDR, /* Low Address */ BUS_SPACE_MAXADDR, /* High Address */ NULL, /* Filter Function */ NULL, /* Filter Function Arguments */ MCLBYTES * XGE_MAX_SEGS, /* Maximum Size */ XGE_MAX_SEGS, /* Number of Segments */ MCLBYTES, /* Maximum Segment Size */ BUS_DMA_ALLOCNOW, /* Flags */ NULL, /* Lock Function */ NULL, /* Lock Function Arguments */ (&lldev->dma_tag_tx)); /* DMA Tag */ if(status != 0) goto _exit; maxsize = mtu + XGE_HAL_MAC_HEADER_MAX_SIZE; if(maxsize <= MCLBYTES) { maxsize = MCLBYTES; } else { if(lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_5) maxsize = MJUMPAGESIZE; else maxsize = (maxsize <= MJUMPAGESIZE) ? MJUMPAGESIZE : MJUM9BYTES; } /* DMA tag for Rx */ status = bus_dma_tag_create( bus_get_dma_tag(dev), /* Parent */ PAGE_SIZE, /* Alignment */ 0, /* Bounds */ BUS_SPACE_MAXADDR, /* Low Address */ BUS_SPACE_MAXADDR, /* High Address */ NULL, /* Filter Function */ NULL, /* Filter Function Arguments */ maxsize, /* Maximum Size */ 1, /* Number of Segments */ maxsize, /* Maximum Segment Size */ BUS_DMA_ALLOCNOW, /* Flags */ NULL, /* Lock Function */ NULL, /* Lock Function Arguments */ (&lldev->dma_tag_rx)); /* DMA Tag */ if(status != 0) goto _exit1; status = bus_dmamap_create(lldev->dma_tag_rx, BUS_DMA_NOWAIT, &lldev->extra_dma_map); if(status != 0) goto _exit2; status = XGE_HAL_OK; goto _exit; _exit2: status = bus_dma_tag_destroy(lldev->dma_tag_rx); if(status != 0) xge_trace(XGE_ERR, "Rx DMA tag destroy failed"); _exit1: status = bus_dma_tag_destroy(lldev->dma_tag_tx); if(status != 0) xge_trace(XGE_ERR, "Tx DMA tag destroy failed"); status = XGE_HAL_FAIL; _exit: return status; } /** * xge_confirm_changes * Disables and Enables interface to apply requested change * * @lldev Per-adapter Data * @mtu_set Is it called for changing MTU? (Yes: 1, No: 0) * * Returns 0 or Error Number */ void xge_confirm_changes(xge_lldev_t *lldev, xge_option_e option) { if(lldev->initialized == 0) goto _exit1; mtx_lock(&lldev->mtx_drv); if_down(lldev->ifnetp); xge_device_stop(lldev, XGE_HAL_CHANNEL_OC_NORMAL); if(option == XGE_SET_MTU) (lldev->ifnetp)->if_mtu = lldev->mtu; else xge_buffer_mode_init(lldev, lldev->mtu); xge_device_init(lldev, XGE_HAL_CHANNEL_OC_NORMAL); if_up(lldev->ifnetp); mtx_unlock(&lldev->mtx_drv); goto _exit; _exit1: /* Request was to change MTU and device not initialized */ if(option == XGE_SET_MTU) { (lldev->ifnetp)->if_mtu = lldev->mtu; xge_buffer_mode_init(lldev, lldev->mtu); } _exit: return; } /** * xge_change_lro_status * Enable/Disable LRO feature * * @SYSCTL_HANDLER_ARGS sysctl_oid structure with arguments * * Returns 0 or error number. */ static int xge_change_lro_status(SYSCTL_HANDLER_ARGS) { xge_lldev_t *lldev = (xge_lldev_t *)arg1; int request = lldev->enabled_lro, status = XGE_HAL_OK; status = sysctl_handle_int(oidp, &request, arg2, req); if((status != XGE_HAL_OK) || (!req->newptr)) goto _exit; if((request < 0) || (request > 1)) { status = EINVAL; goto _exit; } /* Return if current and requested states are same */ if(request == lldev->enabled_lro){ xge_trace(XGE_ERR, "LRO is already %s", ((request) ? "enabled" : "disabled")); goto _exit; } lldev->enabled_lro = request; xge_confirm_changes(lldev, XGE_CHANGE_LRO); arg2 = lldev->enabled_lro; _exit: return status; } /** * xge_add_sysctl_handlers * Registers sysctl parameter value update handlers * * @lldev Per-adapter data */ void xge_add_sysctl_handlers(xge_lldev_t *lldev) { struct sysctl_ctx_list *context_list = device_get_sysctl_ctx(lldev->device); struct sysctl_oid *oid = device_get_sysctl_tree(lldev->device); SYSCTL_ADD_PROC(context_list, SYSCTL_CHILDREN(oid), OID_AUTO, "enable_lro", CTLTYPE_INT | CTLFLAG_RW, lldev, 0, xge_change_lro_status, "I", "Enable or disable LRO feature"); } /** * xge_attach * Connects driver to the system if probe was success * * @dev Device Handle */ int xge_attach(device_t dev) { xge_hal_device_config_t *device_config; xge_hal_device_attr_t attr; xge_lldev_t *lldev; xge_hal_device_t *hldev; xge_pci_info_t *pci_info; struct ifnet *ifnetp; int rid, rid0, rid1, error; int msi_count = 0, status = XGE_HAL_OK; int enable_msi = XGE_HAL_INTR_MODE_IRQLINE; device_config = xge_os_malloc(NULL, sizeof(xge_hal_device_config_t)); if(!device_config) { XGE_EXIT_ON_ERR("Memory allocation for device configuration failed", attach_out_config, ENOMEM); } lldev = (xge_lldev_t *) device_get_softc(dev); if(!lldev) { XGE_EXIT_ON_ERR("Adapter softc is NULL", attach_out, ENOMEM); } lldev->device = dev; xge_mutex_init(lldev); error = xge_driver_initialize(); if(error != XGE_HAL_OK) { xge_resources_free(dev, xge_free_mutex); XGE_EXIT_ON_ERR("Initializing driver failed", attach_out, ENXIO); } /* HAL device */ hldev = (xge_hal_device_t *)xge_os_malloc(NULL, sizeof(xge_hal_device_t)); if(!hldev) { xge_resources_free(dev, xge_free_terminate_hal_driver); XGE_EXIT_ON_ERR("Memory allocation for HAL device failed", attach_out, ENOMEM); } lldev->devh = hldev; /* Our private structure */ pci_info = (xge_pci_info_t*) xge_os_malloc(NULL, sizeof(xge_pci_info_t)); if(!pci_info) { xge_resources_free(dev, xge_free_hal_device); XGE_EXIT_ON_ERR("Memory allocation for PCI info. failed", attach_out, ENOMEM); } lldev->pdev = pci_info; pci_info->device = dev; /* Set bus master */ pci_enable_busmaster(dev); /* Get virtual address for BAR0 */ rid0 = PCIR_BAR(0); pci_info->regmap0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid0, RF_ACTIVE); if(pci_info->regmap0 == NULL) { xge_resources_free(dev, xge_free_pci_info); XGE_EXIT_ON_ERR("Bus resource allocation for BAR0 failed", attach_out, ENOMEM); } attr.bar0 = (char *)pci_info->regmap0; pci_info->bar0resource = (xge_bus_resource_t*) xge_os_malloc(NULL, sizeof(xge_bus_resource_t)); if(pci_info->bar0resource == NULL) { xge_resources_free(dev, xge_free_bar0); XGE_EXIT_ON_ERR("Memory allocation for BAR0 Resources failed", attach_out, ENOMEM); } ((xge_bus_resource_t *)(pci_info->bar0resource))->bus_tag = rman_get_bustag(pci_info->regmap0); ((xge_bus_resource_t *)(pci_info->bar0resource))->bus_handle = rman_get_bushandle(pci_info->regmap0); ((xge_bus_resource_t *)(pci_info->bar0resource))->bar_start_addr = pci_info->regmap0; /* Get virtual address for BAR1 */ rid1 = PCIR_BAR(2); pci_info->regmap1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid1, RF_ACTIVE); if(pci_info->regmap1 == NULL) { xge_resources_free(dev, xge_free_bar0_resource); XGE_EXIT_ON_ERR("Bus resource allocation for BAR1 failed", attach_out, ENOMEM); } attr.bar1 = (char *)pci_info->regmap1; pci_info->bar1resource = (xge_bus_resource_t*) xge_os_malloc(NULL, sizeof(xge_bus_resource_t)); if(pci_info->bar1resource == NULL) { xge_resources_free(dev, xge_free_bar1); XGE_EXIT_ON_ERR("Memory allocation for BAR1 Resources failed", attach_out, ENOMEM); } ((xge_bus_resource_t *)(pci_info->bar1resource))->bus_tag = rman_get_bustag(pci_info->regmap1); ((xge_bus_resource_t *)(pci_info->bar1resource))->bus_handle = rman_get_bushandle(pci_info->regmap1); ((xge_bus_resource_t *)(pci_info->bar1resource))->bar_start_addr = pci_info->regmap1; /* Save PCI config space */ xge_pci_space_save(dev); attr.regh0 = (xge_bus_resource_t *) pci_info->bar0resource; attr.regh1 = (xge_bus_resource_t *) pci_info->bar1resource; attr.irqh = lldev->irqhandle; attr.cfgh = pci_info; attr.pdev = pci_info; /* Initialize device configuration parameters */ xge_init_params(device_config, dev); rid = 0; if(lldev->enabled_msi) { /* Number of MSI messages supported by device */ msi_count = pci_msi_count(dev); if(msi_count > 1) { /* Device supports MSI */ if(bootverbose) { xge_trace(XGE_ERR, "MSI count: %d", msi_count); xge_trace(XGE_ERR, "Now, driver supporting 1 message"); } msi_count = 1; error = pci_alloc_msi(dev, &msi_count); if(error == 0) { if(bootverbose) xge_trace(XGE_ERR, "Allocated messages: %d", msi_count); enable_msi = XGE_HAL_INTR_MODE_MSI; rid = 1; } else { if(bootverbose) xge_trace(XGE_ERR, "pci_alloc_msi failed, %d", error); } } } lldev->enabled_msi = enable_msi; /* Allocate resource for irq */ lldev->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, (RF_SHAREABLE | RF_ACTIVE)); if(lldev->irq == NULL) { xge_trace(XGE_ERR, "Allocating irq resource for %s failed", ((rid == 0) ? "line interrupt" : "MSI")); if(rid == 1) { error = pci_release_msi(dev); if(error != 0) { xge_trace(XGE_ERR, "Releasing MSI resources failed %d", error); xge_trace(XGE_ERR, "Requires reboot to use MSI again"); } xge_trace(XGE_ERR, "Trying line interrupts"); rid = 0; lldev->enabled_msi = XGE_HAL_INTR_MODE_IRQLINE; lldev->irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, (RF_SHAREABLE | RF_ACTIVE)); } if(lldev->irq == NULL) { xge_trace(XGE_ERR, "Allocating irq resource failed"); xge_resources_free(dev, xge_free_bar1_resource); status = ENOMEM; goto attach_out; } } device_config->intr_mode = lldev->enabled_msi; if(bootverbose) { xge_trace(XGE_TRACE, "rid: %d, Mode: %d, MSI count: %d", rid, lldev->enabled_msi, msi_count); } /* Initialize HAL device */ error = xge_hal_device_initialize(hldev, &attr, device_config); if(error != XGE_HAL_OK) { xge_resources_free(dev, xge_free_irq_resource); XGE_EXIT_ON_ERR("Initializing HAL device failed", attach_out, ENXIO); } xge_hal_device_private_set(hldev, lldev); error = xge_interface_setup(dev); if(error != 0) { status = error; goto attach_out; } ifnetp = lldev->ifnetp; ifnetp->if_mtu = device_config->mtu; xge_media_init(dev); /* Associate interrupt handler with the device */ if(lldev->enabled_msi == XGE_HAL_INTR_MODE_MSI) { error = bus_setup_intr(dev, lldev->irq, (INTR_TYPE_NET | INTR_MPSAFE), #if __FreeBSD_version > 700030 NULL, #endif xge_isr_msi, lldev, &lldev->irqhandle); xge_msi_info_save(lldev); } else { error = bus_setup_intr(dev, lldev->irq, (INTR_TYPE_NET | INTR_MPSAFE), #if __FreeBSD_version > 700030 xge_isr_filter, #endif xge_isr_line, lldev, &lldev->irqhandle); } if(error != 0) { xge_resources_free(dev, xge_free_media_interface); XGE_EXIT_ON_ERR("Associating interrupt handler with device failed", attach_out, ENXIO); } xge_print_info(lldev); xge_add_sysctl_handlers(lldev); xge_buffer_mode_init(lldev, device_config->mtu); attach_out: xge_os_free(NULL, device_config, sizeof(xge_hal_device_config_t)); attach_out_config: return status; } /** * xge_resources_free * Undo what-all we did during load/attach * * @dev Device Handle * @error Identifies what-all to undo */ void xge_resources_free(device_t dev, xge_lables_e error) { xge_lldev_t *lldev; xge_pci_info_t *pci_info; xge_hal_device_t *hldev; int rid, status; /* LL Device */ lldev = (xge_lldev_t *) device_get_softc(dev); pci_info = lldev->pdev; /* HAL Device */ hldev = lldev->devh; switch(error) { case xge_free_all: /* Teardown interrupt handler - device association */ bus_teardown_intr(dev, lldev->irq, lldev->irqhandle); case xge_free_media_interface: /* Media */ ifmedia_removeall(&lldev->media); /* Detach Ether */ ether_ifdetach(lldev->ifnetp); if_free(lldev->ifnetp); xge_hal_device_private_set(hldev, NULL); xge_hal_device_disable(hldev); case xge_free_terminate_hal_device: /* HAL Device */ xge_hal_device_terminate(hldev); case xge_free_irq_resource: /* Release IRQ resource */ bus_release_resource(dev, SYS_RES_IRQ, ((lldev->enabled_msi == XGE_HAL_INTR_MODE_IRQLINE) ? 0:1), lldev->irq); if(lldev->enabled_msi == XGE_HAL_INTR_MODE_MSI) { status = pci_release_msi(dev); if(status != 0) { if(bootverbose) { xge_trace(XGE_ERR, "pci_release_msi returned %d", status); } } } case xge_free_bar1_resource: /* Restore PCI configuration space */ xge_pci_space_restore(dev); /* Free bar1resource */ xge_os_free(NULL, pci_info->bar1resource, sizeof(xge_bus_resource_t)); case xge_free_bar1: /* Release BAR1 */ rid = PCIR_BAR(2); bus_release_resource(dev, SYS_RES_MEMORY, rid, pci_info->regmap1); case xge_free_bar0_resource: /* Free bar0resource */ xge_os_free(NULL, pci_info->bar0resource, sizeof(xge_bus_resource_t)); case xge_free_bar0: /* Release BAR0 */ rid = PCIR_BAR(0); bus_release_resource(dev, SYS_RES_MEMORY, rid, pci_info->regmap0); case xge_free_pci_info: /* Disable Bus Master */ pci_disable_busmaster(dev); /* Free pci_info_t */ lldev->pdev = NULL; xge_os_free(NULL, pci_info, sizeof(xge_pci_info_t)); case xge_free_hal_device: /* Free device configuration struct and HAL device */ xge_os_free(NULL, hldev, sizeof(xge_hal_device_t)); case xge_free_terminate_hal_driver: /* Terminate HAL driver */ hal_driver_init_count = hal_driver_init_count - 1; if(!hal_driver_init_count) { xge_hal_driver_terminate(); } case xge_free_mutex: xge_mutex_destroy(lldev); } } /** * xge_detach * Detaches driver from the Kernel subsystem * * @dev Device Handle */ int xge_detach(device_t dev) { xge_lldev_t *lldev = (xge_lldev_t *)device_get_softc(dev); if(lldev->in_detach == 0) { lldev->in_detach = 1; xge_stop(lldev); xge_resources_free(dev, xge_free_all); } return 0; } /** * xge_shutdown * To shutdown device before system shutdown * * @dev Device Handle */ int xge_shutdown(device_t dev) { xge_lldev_t *lldev = (xge_lldev_t *) device_get_softc(dev); xge_stop(lldev); return 0; } /** * xge_interface_setup * Setup interface * * @dev Device Handle * * Returns 0 on success, ENXIO/ENOMEM on failure */ int xge_interface_setup(device_t dev) { u8 mcaddr[ETHER_ADDR_LEN]; xge_hal_status_e status; xge_lldev_t *lldev = (xge_lldev_t *)device_get_softc(dev); struct ifnet *ifnetp; xge_hal_device_t *hldev = lldev->devh; /* Get the MAC address of the device */ status = xge_hal_device_macaddr_get(hldev, 0, &mcaddr); if(status != XGE_HAL_OK) { xge_resources_free(dev, xge_free_terminate_hal_device); XGE_EXIT_ON_ERR("Getting MAC address failed", ifsetup_out, ENXIO); } /* Get interface ifnet structure for this Ether device */ ifnetp = lldev->ifnetp = if_alloc(IFT_ETHER); if(ifnetp == NULL) { xge_resources_free(dev, xge_free_terminate_hal_device); XGE_EXIT_ON_ERR("Allocation ifnet failed", ifsetup_out, ENOMEM); } /* Initialize interface ifnet structure */ if_initname(ifnetp, device_get_name(dev), device_get_unit(dev)); ifnetp->if_mtu = XGE_HAL_DEFAULT_MTU; ifnetp->if_baudrate = XGE_BAUDRATE; ifnetp->if_init = xge_init; ifnetp->if_softc = lldev; ifnetp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifnetp->if_ioctl = xge_ioctl; ifnetp->if_start = xge_send; /* TODO: Check and assign optimal value */ ifnetp->if_snd.ifq_maxlen = ifqmaxlen; ifnetp->if_capabilities = IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM; if(lldev->enabled_tso) ifnetp->if_capabilities |= IFCAP_TSO4; if(lldev->enabled_lro) ifnetp->if_capabilities |= IFCAP_LRO; ifnetp->if_capenable = ifnetp->if_capabilities; /* Attach the interface */ ether_ifattach(ifnetp, mcaddr); ifsetup_out: return status; } /** * xge_callback_link_up * Callback for Link-up indication from HAL * * @userdata Per-adapter data */ void xge_callback_link_up(void *userdata) { xge_lldev_t *lldev = (xge_lldev_t *)userdata; struct ifnet *ifnetp = lldev->ifnetp; ifnetp->if_flags &= ~IFF_DRV_OACTIVE; if_link_state_change(ifnetp, LINK_STATE_UP); } /** * xge_callback_link_down * Callback for Link-down indication from HAL * * @userdata Per-adapter data */ void xge_callback_link_down(void *userdata) { xge_lldev_t *lldev = (xge_lldev_t *)userdata; struct ifnet *ifnetp = lldev->ifnetp; ifnetp->if_flags |= IFF_DRV_OACTIVE; if_link_state_change(ifnetp, LINK_STATE_DOWN); } /** * xge_callback_crit_err * Callback for Critical error indication from HAL * * @userdata Per-adapter data * @type Event type (Enumerated hardware error) * @serr_data Hardware status */ void xge_callback_crit_err(void *userdata, xge_hal_event_e type, u64 serr_data) { xge_trace(XGE_ERR, "Critical Error"); xge_reset(userdata); } /** * xge_callback_event * Callback from HAL indicating that some event has been queued * * @item Queued event item */ void xge_callback_event(xge_queue_item_t *item) { xge_lldev_t *lldev = NULL; xge_hal_device_t *hldev = NULL; struct ifnet *ifnetp = NULL; hldev = item->context; lldev = xge_hal_device_private(hldev); ifnetp = lldev->ifnetp; switch((int)item->event_type) { case XGE_LL_EVENT_TRY_XMIT_AGAIN: if(lldev->initialized) { if(xge_hal_channel_dtr_count(lldev->fifo_channel[0]) > 0) { ifnetp->if_flags &= ~IFF_DRV_OACTIVE; } else { xge_queue_produce_context( xge_hal_device_queue(lldev->devh), XGE_LL_EVENT_TRY_XMIT_AGAIN, lldev->devh); } } break; case XGE_LL_EVENT_DEVICE_RESETTING: xge_reset(item->context); break; default: break; } } /** * xge_ifmedia_change * Media change driver callback * * @ifnetp Interface Handle * * Returns 0 if media is Ether else EINVAL */ int xge_ifmedia_change(struct ifnet *ifnetp) { xge_lldev_t *lldev = ifnetp->if_softc; struct ifmedia *ifmediap = &lldev->media; return (IFM_TYPE(ifmediap->ifm_media) != IFM_ETHER) ? EINVAL:0; } /** * xge_ifmedia_status * Media status driver callback * * @ifnetp Interface Handle * @ifmr Interface Media Settings */ void xge_ifmedia_status(struct ifnet *ifnetp, struct ifmediareq *ifmr) { xge_hal_status_e status; u64 regvalue; xge_lldev_t *lldev = ifnetp->if_softc; xge_hal_device_t *hldev = lldev->devh; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; status = xge_hal_mgmt_reg_read(hldev, 0, xge_offsetof(xge_hal_pci_bar0_t, adapter_status), ®value); if(status != XGE_HAL_OK) { xge_trace(XGE_TRACE, "Getting adapter status failed"); goto _exit; } if((regvalue & (XGE_HAL_ADAPTER_STATUS_RMAC_REMOTE_FAULT | XGE_HAL_ADAPTER_STATUS_RMAC_LOCAL_FAULT)) == 0) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= IFM_10G_SR | IFM_FDX; if_link_state_change(ifnetp, LINK_STATE_UP); } else { if_link_state_change(ifnetp, LINK_STATE_DOWN); } _exit: return; } /** * xge_ioctl_stats * IOCTL to get statistics * * @lldev Per-adapter data * @ifreqp Interface request */ int xge_ioctl_stats(xge_lldev_t *lldev, struct ifreq *ifreqp) { xge_hal_status_e status = XGE_HAL_OK; char cmd, mode; void *info = NULL; int retValue; - cmd = retValue = fubyte(ifreqp->ifr_data); + cmd = retValue = fubyte(ifr_data_get_ptr(ifreqp)); if (retValue == -1) return (EFAULT); retValue = EINVAL; switch(cmd) { case XGE_QUERY_STATS: mtx_lock(&lldev->mtx_drv); status = xge_hal_stats_hw(lldev->devh, (xge_hal_stats_hw_info_t **)&info); mtx_unlock(&lldev->mtx_drv); if(status == XGE_HAL_OK) { - if(copyout(info, ifreqp->ifr_data, + if(copyout(info, ifr_data_get_ptr(ifreqp), sizeof(xge_hal_stats_hw_info_t)) == 0) retValue = 0; } else { xge_trace(XGE_ERR, "Getting statistics failed (Status: %d)", status); } break; case XGE_QUERY_PCICONF: info = xge_os_malloc(NULL, sizeof(xge_hal_pci_config_t)); if(info != NULL) { mtx_lock(&lldev->mtx_drv); status = xge_hal_mgmt_pci_config(lldev->devh, info, sizeof(xge_hal_pci_config_t)); mtx_unlock(&lldev->mtx_drv); if(status == XGE_HAL_OK) { - if(copyout(info, ifreqp->ifr_data, + if(copyout(info, ifr_data_get_ptr(ifreqp), sizeof(xge_hal_pci_config_t)) == 0) retValue = 0; } else { xge_trace(XGE_ERR, "Getting PCI configuration failed (%d)", status); } xge_os_free(NULL, info, sizeof(xge_hal_pci_config_t)); } break; case XGE_QUERY_DEVSTATS: info = xge_os_malloc(NULL, sizeof(xge_hal_stats_device_info_t)); if(info != NULL) { mtx_lock(&lldev->mtx_drv); status =xge_hal_mgmt_device_stats(lldev->devh, info, sizeof(xge_hal_stats_device_info_t)); mtx_unlock(&lldev->mtx_drv); if(status == XGE_HAL_OK) { - if(copyout(info, ifreqp->ifr_data, + if(copyout(info, ifr_data_get_ptr(ifreqp), sizeof(xge_hal_stats_device_info_t)) == 0) retValue = 0; } else { xge_trace(XGE_ERR, "Getting device info failed (%d)", status); } xge_os_free(NULL, info, sizeof(xge_hal_stats_device_info_t)); } break; case XGE_QUERY_SWSTATS: info = xge_os_malloc(NULL, sizeof(xge_hal_stats_sw_err_t)); if(info != NULL) { mtx_lock(&lldev->mtx_drv); status =xge_hal_mgmt_sw_stats(lldev->devh, info, sizeof(xge_hal_stats_sw_err_t)); mtx_unlock(&lldev->mtx_drv); if(status == XGE_HAL_OK) { - if(copyout(info, ifreqp->ifr_data, + if(copyout(info, ifr_data_get_ptr(ifreqp), sizeof(xge_hal_stats_sw_err_t)) == 0) retValue = 0; } else { xge_trace(XGE_ERR, "Getting tcode statistics failed (%d)", status); } xge_os_free(NULL, info, sizeof(xge_hal_stats_sw_err_t)); } break; case XGE_QUERY_DRIVERSTATS: - if(copyout(&lldev->driver_stats, ifreqp->ifr_data, + if(copyout(&lldev->driver_stats, ifr_data_get_ptr(ifreqp), sizeof(xge_driver_stats_t)) == 0) { retValue = 0; } else { xge_trace(XGE_ERR, "Copyout of driver statistics failed (%d)", status); } break; case XGE_READ_VERSION: info = xge_os_malloc(NULL, XGE_BUFFER_SIZE); if(info != NULL) { strcpy(info, XGE_DRIVER_VERSION); - if(copyout(info, ifreqp->ifr_data, XGE_BUFFER_SIZE) == 0) + if(copyout(info, ifr_data_get_ptr(ifreqp), + XGE_BUFFER_SIZE) == 0) retValue = 0; xge_os_free(NULL, info, XGE_BUFFER_SIZE); } break; case XGE_QUERY_DEVCONF: info = xge_os_malloc(NULL, sizeof(xge_hal_device_config_t)); if(info != NULL) { mtx_lock(&lldev->mtx_drv); status = xge_hal_mgmt_device_config(lldev->devh, info, sizeof(xge_hal_device_config_t)); mtx_unlock(&lldev->mtx_drv); if(status == XGE_HAL_OK) { - if(copyout(info, ifreqp->ifr_data, + if(copyout(info, ifr_data_get_ptr(ifreqp), sizeof(xge_hal_device_config_t)) == 0) retValue = 0; } else { xge_trace(XGE_ERR, "Getting devconfig failed (%d)", status); } xge_os_free(NULL, info, sizeof(xge_hal_device_config_t)); } break; case XGE_QUERY_BUFFER_MODE: - if(copyout(&lldev->buffer_mode, ifreqp->ifr_data, + if(copyout(&lldev->buffer_mode, ifr_data_get_ptr(ifreqp), sizeof(int)) == 0) retValue = 0; break; case XGE_SET_BUFFER_MODE_1: case XGE_SET_BUFFER_MODE_2: case XGE_SET_BUFFER_MODE_5: mode = (cmd == XGE_SET_BUFFER_MODE_1) ? 'Y':'N'; - if(copyout(&mode, ifreqp->ifr_data, sizeof(mode)) == 0) + if(copyout(&mode, ifr_data_get_ptr(ifreqp), sizeof(mode)) == 0) retValue = 0; break; default: xge_trace(XGE_TRACE, "Nothing is matching"); retValue = ENOTTY; break; } return retValue; } /** * xge_ioctl_registers * IOCTL to get registers * * @lldev Per-adapter data * @ifreqp Interface request */ int xge_ioctl_registers(xge_lldev_t *lldev, struct ifreq *ifreqp) { xge_register_t tmpdata; xge_register_t *data; xge_hal_status_e status = XGE_HAL_OK; int retValue = EINVAL, offset = 0, index = 0; int error; u64 val64 = 0; - error = copyin(ifreqp->ifr_data, &tmpdata, sizeof(tmpdata)); + error = copyin(ifr_data_get_ptr(ifreqp), &tmpdata, sizeof(tmpdata)); if (error != 0) return (error); data = &tmpdata; /* Reading a register */ if(strcmp(data->option, "-r") == 0) { data->value = 0x0000; mtx_lock(&lldev->mtx_drv); status = xge_hal_mgmt_reg_read(lldev->devh, 0, data->offset, &data->value); mtx_unlock(&lldev->mtx_drv); if(status == XGE_HAL_OK) { - if(copyout(data, ifreqp->ifr_data, sizeof(xge_register_t)) == 0) + if(copyout(data, ifr_data_get_ptr(ifreqp), + sizeof(xge_register_t)) == 0) retValue = 0; } } /* Writing to a register */ else if(strcmp(data->option, "-w") == 0) { mtx_lock(&lldev->mtx_drv); status = xge_hal_mgmt_reg_write(lldev->devh, 0, data->offset, data->value); if(status == XGE_HAL_OK) { val64 = 0x0000; status = xge_hal_mgmt_reg_read(lldev->devh, 0, data->offset, &val64); if(status != XGE_HAL_OK) { xge_trace(XGE_ERR, "Reading back updated register failed"); } else { if(val64 != data->value) { xge_trace(XGE_ERR, "Read and written register values mismatched"); } else retValue = 0; } } else { xge_trace(XGE_ERR, "Getting register value failed"); } mtx_unlock(&lldev->mtx_drv); } else { mtx_lock(&lldev->mtx_drv); for(index = 0, offset = 0; offset <= XGE_OFFSET_OF_LAST_REG; index++, offset += 0x0008) { val64 = 0; status = xge_hal_mgmt_reg_read(lldev->devh, 0, offset, &val64); if(status != XGE_HAL_OK) { xge_trace(XGE_ERR, "Getting register value failed"); break; } *((u64 *)((u64 *)data + index)) = val64; retValue = 0; } mtx_unlock(&lldev->mtx_drv); if(retValue == 0) { - if(copyout(data, ifreqp->ifr_data, + if(copyout(data, ifr_data_get_ptr(ifreqp), sizeof(xge_hal_pci_bar0_t)) != 0) { xge_trace(XGE_ERR, "Copyout of register values failed"); retValue = EINVAL; } } else { xge_trace(XGE_ERR, "Getting register values failed"); } } return retValue; } /** * xge_ioctl * Callback to control the device - Interface configuration * * @ifnetp Interface Handle * @command Device control command * @data Parameters associated with command (if any) */ int xge_ioctl(struct ifnet *ifnetp, unsigned long command, caddr_t data) { struct ifreq *ifreqp = (struct ifreq *)data; xge_lldev_t *lldev = ifnetp->if_softc; struct ifmedia *ifmediap = &lldev->media; int retValue = 0, mask = 0; if(lldev->in_detach) { return retValue; } switch(command) { /* Set/Get ifnet address */ case SIOCSIFADDR: case SIOCGIFADDR: ether_ioctl(ifnetp, command, data); break; /* Set ifnet MTU */ case SIOCSIFMTU: retValue = xge_change_mtu(lldev, ifreqp->ifr_mtu); break; /* Set ifnet flags */ case SIOCSIFFLAGS: if(ifnetp->if_flags & IFF_UP) { /* Link status is UP */ if(!(ifnetp->if_drv_flags & IFF_DRV_RUNNING)) { xge_init(lldev); } xge_disable_promisc(lldev); xge_enable_promisc(lldev); } else { /* Link status is DOWN */ /* If device is in running, make it down */ if(ifnetp->if_drv_flags & IFF_DRV_RUNNING) { xge_stop(lldev); } } break; /* Add/delete multicast address */ case SIOCADDMULTI: case SIOCDELMULTI: if(ifnetp->if_drv_flags & IFF_DRV_RUNNING) { xge_setmulti(lldev); } break; /* Set/Get net media */ case SIOCSIFMEDIA: case SIOCGIFMEDIA: retValue = ifmedia_ioctl(ifnetp, ifreqp, ifmediap, command); break; /* Set capabilities */ case SIOCSIFCAP: mtx_lock(&lldev->mtx_drv); mask = ifreqp->ifr_reqcap ^ ifnetp->if_capenable; if(mask & IFCAP_TXCSUM) { if(ifnetp->if_capenable & IFCAP_TXCSUM) { ifnetp->if_capenable &= ~(IFCAP_TSO4 | IFCAP_TXCSUM); ifnetp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP | CSUM_TSO); } else { ifnetp->if_capenable |= IFCAP_TXCSUM; ifnetp->if_hwassist |= (CSUM_TCP | CSUM_UDP); } } if(mask & IFCAP_TSO4) { if(ifnetp->if_capenable & IFCAP_TSO4) { ifnetp->if_capenable &= ~IFCAP_TSO4; ifnetp->if_hwassist &= ~CSUM_TSO; xge_os_printf("%s: TSO Disabled", device_get_nameunit(lldev->device)); } else if(ifnetp->if_capenable & IFCAP_TXCSUM) { ifnetp->if_capenable |= IFCAP_TSO4; ifnetp->if_hwassist |= CSUM_TSO; xge_os_printf("%s: TSO Enabled", device_get_nameunit(lldev->device)); } } mtx_unlock(&lldev->mtx_drv); break; /* Custom IOCTL 0 */ case SIOCGPRIVATE_0: retValue = xge_ioctl_stats(lldev, ifreqp); break; /* Custom IOCTL 1 */ case SIOCGPRIVATE_1: retValue = xge_ioctl_registers(lldev, ifreqp); break; default: retValue = EINVAL; break; } return retValue; } /** * xge_init * Initialize the interface * * @plldev Per-adapter Data */ void xge_init(void *plldev) { xge_lldev_t *lldev = (xge_lldev_t *)plldev; mtx_lock(&lldev->mtx_drv); xge_os_memzero(&lldev->driver_stats, sizeof(xge_driver_stats_t)); xge_device_init(lldev, XGE_HAL_CHANNEL_OC_NORMAL); mtx_unlock(&lldev->mtx_drv); } /** * xge_device_init * Initialize the interface (called by holding lock) * * @pdevin Per-adapter Data */ void xge_device_init(xge_lldev_t *lldev, xge_hal_channel_reopen_e option) { struct ifnet *ifnetp = lldev->ifnetp; xge_hal_device_t *hldev = lldev->devh; struct ifaddr *ifaddrp; unsigned char *macaddr; struct sockaddr_dl *sockaddrp; int status = XGE_HAL_OK; mtx_assert((&lldev->mtx_drv), MA_OWNED); /* If device is in running state, initializing is not required */ if(ifnetp->if_drv_flags & IFF_DRV_RUNNING) return; /* Initializing timer */ callout_init(&lldev->timer, 1); xge_trace(XGE_TRACE, "Set MTU size"); status = xge_hal_device_mtu_set(hldev, ifnetp->if_mtu); if(status != XGE_HAL_OK) { xge_trace(XGE_ERR, "Setting MTU in HAL device failed"); goto _exit; } /* Enable HAL device */ xge_hal_device_enable(hldev); /* Get MAC address and update in HAL */ ifaddrp = ifnetp->if_addr; sockaddrp = (struct sockaddr_dl *)ifaddrp->ifa_addr; sockaddrp->sdl_type = IFT_ETHER; sockaddrp->sdl_alen = ifnetp->if_addrlen; macaddr = LLADDR(sockaddrp); xge_trace(XGE_TRACE, "Setting MAC address: %02x:%02x:%02x:%02x:%02x:%02x\n", *macaddr, *(macaddr + 1), *(macaddr + 2), *(macaddr + 3), *(macaddr + 4), *(macaddr + 5)); status = xge_hal_device_macaddr_set(hldev, 0, macaddr); if(status != XGE_HAL_OK) xge_trace(XGE_ERR, "Setting MAC address failed (%d)", status); /* Opening channels */ mtx_unlock(&lldev->mtx_drv); status = xge_channel_open(lldev, option); mtx_lock(&lldev->mtx_drv); if(status != XGE_HAL_OK) goto _exit; /* Set appropriate flags */ ifnetp->if_drv_flags |= IFF_DRV_RUNNING; ifnetp->if_flags &= ~IFF_DRV_OACTIVE; /* Checksum capability */ ifnetp->if_hwassist = (ifnetp->if_capenable & IFCAP_TXCSUM) ? (CSUM_TCP | CSUM_UDP) : 0; if((lldev->enabled_tso) && (ifnetp->if_capenable & IFCAP_TSO4)) ifnetp->if_hwassist |= CSUM_TSO; /* Enable interrupts */ xge_hal_device_intr_enable(hldev); callout_reset(&lldev->timer, 10*hz, xge_timer, lldev); /* Disable promiscuous mode */ xge_trace(XGE_TRACE, "If opted, enable promiscuous mode"); xge_enable_promisc(lldev); /* Device is initialized */ lldev->initialized = 1; xge_os_mdelay(1000); _exit: return; } /** * xge_timer * Timer timeout function to handle link status * * @devp Per-adapter Data */ void xge_timer(void *devp) { xge_lldev_t *lldev = (xge_lldev_t *)devp; xge_hal_device_t *hldev = lldev->devh; /* Poll for changes */ xge_hal_device_poll(hldev); /* Reset timer */ callout_reset(&lldev->timer, hz, xge_timer, lldev); return; } /** * xge_stop * De-activate the interface * * @lldev Per-adater Data */ void xge_stop(xge_lldev_t *lldev) { mtx_lock(&lldev->mtx_drv); xge_device_stop(lldev, XGE_HAL_CHANNEL_OC_NORMAL); mtx_unlock(&lldev->mtx_drv); } /** * xge_isr_filter * ISR filter function - to filter interrupts from other devices (shared) * * @handle Per-adapter Data * * Returns * FILTER_STRAY if interrupt is from other device * FILTER_SCHEDULE_THREAD if interrupt is from Xframe device */ int xge_isr_filter(void *handle) { xge_lldev_t *lldev = (xge_lldev_t *)handle; xge_hal_pci_bar0_t *bar0 = (xge_hal_pci_bar0_t *)((lldev->devh)->bar0); u16 retValue = FILTER_STRAY; u64 val64 = 0; XGE_DRV_STATS(isr_filter); val64 = xge_os_pio_mem_read64(lldev->pdev, (lldev->devh)->regh0, &bar0->general_int_status); retValue = (!val64) ? FILTER_STRAY : FILTER_SCHEDULE_THREAD; return retValue; } /** * xge_isr_line * Interrupt service routine for Line interrupts * * @plldev Per-adapter Data */ void xge_isr_line(void *plldev) { xge_hal_status_e status; xge_lldev_t *lldev = (xge_lldev_t *)plldev; xge_hal_device_t *hldev = (xge_hal_device_t *)lldev->devh; struct ifnet *ifnetp = lldev->ifnetp; XGE_DRV_STATS(isr_line); if(ifnetp->if_drv_flags & IFF_DRV_RUNNING) { status = xge_hal_device_handle_irq(hldev); if(!(IFQ_DRV_IS_EMPTY(&ifnetp->if_snd))) xge_send(ifnetp); } } /* * xge_isr_msi * ISR for Message signaled interrupts */ void xge_isr_msi(void *plldev) { xge_lldev_t *lldev = (xge_lldev_t *)plldev; XGE_DRV_STATS(isr_msi); xge_hal_device_continue_irq(lldev->devh); } /** * xge_rx_open * Initiate and open all Rx channels * * @qid Ring Index * @lldev Per-adapter Data * @rflag Channel open/close/reopen flag * * Returns 0 or Error Number */ int xge_rx_open(int qid, xge_lldev_t *lldev, xge_hal_channel_reopen_e rflag) { u64 adapter_status = 0x0; xge_hal_status_e status = XGE_HAL_FAIL; xge_hal_channel_attr_t attr = { .post_qid = qid, .compl_qid = 0, .callback = xge_rx_compl, .per_dtr_space = sizeof(xge_rx_priv_t), .flags = 0, .type = XGE_HAL_CHANNEL_TYPE_RING, .userdata = lldev, .dtr_init = xge_rx_initial_replenish, .dtr_term = xge_rx_term }; /* If device is not ready, return */ status = xge_hal_device_status(lldev->devh, &adapter_status); if(status != XGE_HAL_OK) { xge_os_printf("Adapter Status: 0x%llx", (long long) adapter_status); XGE_EXIT_ON_ERR("Device is not ready", _exit, XGE_HAL_FAIL); } else { status = xge_hal_channel_open(lldev->devh, &attr, &lldev->ring_channel[qid], rflag); } _exit: return status; } /** * xge_tx_open * Initialize and open all Tx channels * * @lldev Per-adapter Data * @tflag Channel open/close/reopen flag * * Returns 0 or Error Number */ int xge_tx_open(xge_lldev_t *lldev, xge_hal_channel_reopen_e tflag) { xge_hal_status_e status = XGE_HAL_FAIL; u64 adapter_status = 0x0; int qindex, index; xge_hal_channel_attr_t attr = { .compl_qid = 0, .callback = xge_tx_compl, .per_dtr_space = sizeof(xge_tx_priv_t), .flags = 0, .type = XGE_HAL_CHANNEL_TYPE_FIFO, .userdata = lldev, .dtr_init = xge_tx_initial_replenish, .dtr_term = xge_tx_term }; /* If device is not ready, return */ status = xge_hal_device_status(lldev->devh, &adapter_status); if(status != XGE_HAL_OK) { xge_os_printf("Adapter Status: 0x%llx", (long long) adapter_status); XGE_EXIT_ON_ERR("Device is not ready", _exit, XGE_HAL_FAIL); } for(qindex = 0; qindex < XGE_FIFO_COUNT; qindex++) { attr.post_qid = qindex, status = xge_hal_channel_open(lldev->devh, &attr, &lldev->fifo_channel[qindex], tflag); if(status != XGE_HAL_OK) { for(index = 0; index < qindex; index++) xge_hal_channel_close(lldev->fifo_channel[index], tflag); } } _exit: return status; } /** * xge_enable_msi * Enables MSI * * @lldev Per-adapter Data */ void xge_enable_msi(xge_lldev_t *lldev) { xge_list_t *item = NULL; xge_hal_device_t *hldev = lldev->devh; xge_hal_channel_t *channel = NULL; u16 offset = 0, val16 = 0; xge_os_pci_read16(lldev->pdev, NULL, xge_offsetof(xge_hal_pci_config_le_t, msi_control), &val16); /* Update msi_data */ offset = (val16 & 0x80) ? 0x4c : 0x48; xge_os_pci_read16(lldev->pdev, NULL, offset, &val16); if(val16 & 0x1) val16 &= 0xfffe; else val16 |= 0x1; xge_os_pci_write16(lldev->pdev, NULL, offset, val16); /* Update msi_control */ xge_os_pci_read16(lldev->pdev, NULL, xge_offsetof(xge_hal_pci_config_le_t, msi_control), &val16); val16 |= 0x10; xge_os_pci_write16(lldev->pdev, NULL, xge_offsetof(xge_hal_pci_config_le_t, msi_control), val16); /* Set TxMAT and RxMAT registers with MSI */ xge_list_for_each(item, &hldev->free_channels) { channel = xge_container_of(item, xge_hal_channel_t, item); xge_hal_channel_msi_set(channel, 1, (u32)val16); } } /** * xge_channel_open * Open both Tx and Rx channels * * @lldev Per-adapter Data * @option Channel reopen option */ int xge_channel_open(xge_lldev_t *lldev, xge_hal_channel_reopen_e option) { xge_lro_entry_t *lro_session = NULL; xge_hal_status_e status = XGE_HAL_OK; int index = 0, index2 = 0; if(lldev->enabled_msi == XGE_HAL_INTR_MODE_MSI) { xge_msi_info_restore(lldev); xge_enable_msi(lldev); } _exit2: status = xge_create_dma_tags(lldev->device); if(status != XGE_HAL_OK) XGE_EXIT_ON_ERR("DMA tag creation failed", _exit, status); /* Open ring (Rx) channel */ for(index = 0; index < XGE_RING_COUNT; index++) { status = xge_rx_open(index, lldev, option); if(status != XGE_HAL_OK) { /* * DMA mapping fails in the unpatched Kernel which can't * allocate contiguous memory for Jumbo frames. * Try using 5 buffer mode. */ if((lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_1) && (((lldev->ifnetp)->if_mtu + XGE_HAL_MAC_HEADER_MAX_SIZE) > MJUMPAGESIZE)) { /* Close so far opened channels */ for(index2 = 0; index2 < index; index2++) { xge_hal_channel_close(lldev->ring_channel[index2], option); } /* Destroy DMA tags intended to use for 1 buffer mode */ if(bus_dmamap_destroy(lldev->dma_tag_rx, lldev->extra_dma_map)) { xge_trace(XGE_ERR, "Rx extra DMA map destroy failed"); } if(bus_dma_tag_destroy(lldev->dma_tag_rx)) xge_trace(XGE_ERR, "Rx DMA tag destroy failed"); if(bus_dma_tag_destroy(lldev->dma_tag_tx)) xge_trace(XGE_ERR, "Tx DMA tag destroy failed"); /* Switch to 5 buffer mode */ lldev->buffer_mode = XGE_HAL_RING_QUEUE_BUFFER_MODE_5; xge_buffer_mode_init(lldev, (lldev->ifnetp)->if_mtu); /* Restart init */ goto _exit2; } else { XGE_EXIT_ON_ERR("Opening Rx channel failed", _exit1, status); } } } if(lldev->enabled_lro) { SLIST_INIT(&lldev->lro_free); SLIST_INIT(&lldev->lro_active); lldev->lro_num = XGE_LRO_DEFAULT_ENTRIES; for(index = 0; index < lldev->lro_num; index++) { lro_session = (xge_lro_entry_t *) xge_os_malloc(NULL, sizeof(xge_lro_entry_t)); if(lro_session == NULL) { lldev->lro_num = index; break; } SLIST_INSERT_HEAD(&lldev->lro_free, lro_session, next); } } /* Open FIFO (Tx) channel */ status = xge_tx_open(lldev, option); if(status != XGE_HAL_OK) XGE_EXIT_ON_ERR("Opening Tx channel failed", _exit1, status); goto _exit; _exit1: /* * Opening Rx channel(s) failed (index is ) or * Initialization of LRO failed (index is XGE_RING_COUNT) * Opening Tx channel failed (index is XGE_RING_COUNT) */ for(index2 = 0; index2 < index; index2++) xge_hal_channel_close(lldev->ring_channel[index2], option); _exit: return status; } /** * xge_channel_close * Close both Tx and Rx channels * * @lldev Per-adapter Data * @option Channel reopen option * */ void xge_channel_close(xge_lldev_t *lldev, xge_hal_channel_reopen_e option) { int qindex = 0; DELAY(1000 * 1000); /* Close FIFO (Tx) channel */ for(qindex = 0; qindex < XGE_FIFO_COUNT; qindex++) xge_hal_channel_close(lldev->fifo_channel[qindex], option); /* Close Ring (Rx) channels */ for(qindex = 0; qindex < XGE_RING_COUNT; qindex++) xge_hal_channel_close(lldev->ring_channel[qindex], option); if(bus_dmamap_destroy(lldev->dma_tag_rx, lldev->extra_dma_map)) xge_trace(XGE_ERR, "Rx extra map destroy failed"); if(bus_dma_tag_destroy(lldev->dma_tag_rx)) xge_trace(XGE_ERR, "Rx DMA tag destroy failed"); if(bus_dma_tag_destroy(lldev->dma_tag_tx)) xge_trace(XGE_ERR, "Tx DMA tag destroy failed"); } /** * dmamap_cb * DMA map callback * * @arg Parameter passed from dmamap * @segs Segments * @nseg Number of segments * @error Error */ void dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { if(!error) { *(bus_addr_t *) arg = segs->ds_addr; } } /** * xge_reset * Device Reset * * @lldev Per-adapter Data */ void xge_reset(xge_lldev_t *lldev) { xge_trace(XGE_TRACE, "Reseting the chip"); /* If the device is not initialized, return */ if(lldev->initialized) { mtx_lock(&lldev->mtx_drv); xge_device_stop(lldev, XGE_HAL_CHANNEL_OC_NORMAL); xge_device_init(lldev, XGE_HAL_CHANNEL_OC_NORMAL); mtx_unlock(&lldev->mtx_drv); } return; } /** * xge_setmulti * Set an address as a multicast address * * @lldev Per-adapter Data */ void xge_setmulti(xge_lldev_t *lldev) { struct ifmultiaddr *ifma; u8 *lladdr; xge_hal_device_t *hldev = (xge_hal_device_t *)lldev->devh; struct ifnet *ifnetp = lldev->ifnetp; int index = 0; int offset = 1; int table_size = 47; xge_hal_status_e status = XGE_HAL_OK; u8 initial_addr[]= {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; if((ifnetp->if_flags & IFF_MULTICAST) && (!lldev->all_multicast)) { status = xge_hal_device_mcast_enable(hldev); lldev->all_multicast = 1; } else if((ifnetp->if_flags & IFF_MULTICAST) && (lldev->all_multicast)) { status = xge_hal_device_mcast_disable(hldev); lldev->all_multicast = 0; } if(status != XGE_HAL_OK) { xge_trace(XGE_ERR, "Enabling/disabling multicast failed"); goto _exit; } /* Updating address list */ if_maddr_rlock(ifnetp); index = 0; TAILQ_FOREACH(ifma, &ifnetp->if_multiaddrs, ifma_link) { if(ifma->ifma_addr->sa_family != AF_LINK) { continue; } lladdr = LLADDR((struct sockaddr_dl *)ifma->ifma_addr); index += 1; } if_maddr_runlock(ifnetp); if((!lldev->all_multicast) && (index)) { lldev->macaddr_count = (index + 1); if(lldev->macaddr_count > table_size) { goto _exit; } /* Clear old addresses */ for(index = 0; index < 48; index++) { xge_hal_device_macaddr_set(hldev, (offset + index), initial_addr); } } /* Add new addresses */ if_maddr_rlock(ifnetp); index = 0; TAILQ_FOREACH(ifma, &ifnetp->if_multiaddrs, ifma_link) { if(ifma->ifma_addr->sa_family != AF_LINK) { continue; } lladdr = LLADDR((struct sockaddr_dl *)ifma->ifma_addr); xge_hal_device_macaddr_set(hldev, (offset + index), lladdr); index += 1; } if_maddr_runlock(ifnetp); _exit: return; } /** * xge_enable_promisc * Enable Promiscuous Mode * * @lldev Per-adapter Data */ void xge_enable_promisc(xge_lldev_t *lldev) { struct ifnet *ifnetp = lldev->ifnetp; xge_hal_device_t *hldev = lldev->devh; xge_hal_pci_bar0_t *bar0 = NULL; u64 val64 = 0; bar0 = (xge_hal_pci_bar0_t *) hldev->bar0; if(ifnetp->if_flags & IFF_PROMISC) { xge_hal_device_promisc_enable(lldev->devh); /* * When operating in promiscuous mode, don't strip the VLAN tag */ val64 = xge_os_pio_mem_read64(lldev->pdev, hldev->regh0, &bar0->rx_pa_cfg); val64 &= ~XGE_HAL_RX_PA_CFG_STRIP_VLAN_TAG_MODE(1); val64 |= XGE_HAL_RX_PA_CFG_STRIP_VLAN_TAG_MODE(0); xge_os_pio_mem_write64(lldev->pdev, hldev->regh0, val64, &bar0->rx_pa_cfg); xge_trace(XGE_TRACE, "Promiscuous mode ON"); } } /** * xge_disable_promisc * Disable Promiscuous Mode * * @lldev Per-adapter Data */ void xge_disable_promisc(xge_lldev_t *lldev) { xge_hal_device_t *hldev = lldev->devh; xge_hal_pci_bar0_t *bar0 = NULL; u64 val64 = 0; bar0 = (xge_hal_pci_bar0_t *) hldev->bar0; xge_hal_device_promisc_disable(lldev->devh); /* * Strip VLAN tag when operating in non-promiscuous mode */ val64 = xge_os_pio_mem_read64(lldev->pdev, hldev->regh0, &bar0->rx_pa_cfg); val64 &= ~XGE_HAL_RX_PA_CFG_STRIP_VLAN_TAG_MODE(1); val64 |= XGE_HAL_RX_PA_CFG_STRIP_VLAN_TAG_MODE(1); xge_os_pio_mem_write64(lldev->pdev, hldev->regh0, val64, &bar0->rx_pa_cfg); xge_trace(XGE_TRACE, "Promiscuous mode OFF"); } /** * xge_change_mtu * Change interface MTU to a requested valid size * * @lldev Per-adapter Data * @NewMtu Requested MTU * * Returns 0 or Error Number */ int xge_change_mtu(xge_lldev_t *lldev, int new_mtu) { int status = XGE_HAL_OK; /* Check requested MTU size for boundary */ if(xge_hal_device_mtu_check(lldev->devh, new_mtu) != XGE_HAL_OK) { XGE_EXIT_ON_ERR("Invalid MTU", _exit, EINVAL); } lldev->mtu = new_mtu; xge_confirm_changes(lldev, XGE_SET_MTU); _exit: return status; } /** * xge_device_stop * * Common code for both stop and part of reset. Disables device, interrupts and * closes channels * * @dev Device Handle * @option Channel normal/reset option */ void xge_device_stop(xge_lldev_t *lldev, xge_hal_channel_reopen_e option) { xge_hal_device_t *hldev = lldev->devh; struct ifnet *ifnetp = lldev->ifnetp; u64 val64 = 0; mtx_assert((&lldev->mtx_drv), MA_OWNED); /* If device is not in "Running" state, return */ if (!(ifnetp->if_drv_flags & IFF_DRV_RUNNING)) goto _exit; /* Set appropriate flags */ ifnetp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); /* Stop timer */ callout_stop(&lldev->timer); /* Disable interrupts */ xge_hal_device_intr_disable(hldev); mtx_unlock(&lldev->mtx_drv); xge_queue_flush(xge_hal_device_queue(lldev->devh)); mtx_lock(&lldev->mtx_drv); /* Disable HAL device */ if(xge_hal_device_disable(hldev) != XGE_HAL_OK) { xge_trace(XGE_ERR, "Disabling HAL device failed"); xge_hal_device_status(hldev, &val64); xge_trace(XGE_ERR, "Adapter Status: 0x%llx", (long long)val64); } /* Close Tx and Rx channels */ xge_channel_close(lldev, option); /* Reset HAL device */ xge_hal_device_reset(hldev); xge_os_mdelay(1000); lldev->initialized = 0; if_link_state_change(ifnetp, LINK_STATE_DOWN); _exit: return; } /** * xge_set_mbuf_cflags * set checksum flag for the mbuf * * @pkt Packet */ void xge_set_mbuf_cflags(mbuf_t pkt) { pkt->m_pkthdr.csum_flags = CSUM_IP_CHECKED; pkt->m_pkthdr.csum_flags |= CSUM_IP_VALID; pkt->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); pkt->m_pkthdr.csum_data = htons(0xffff); } /** * xge_lro_flush_sessions * Flush LRO session and send accumulated LRO packet to upper layer * * @lldev Per-adapter Data */ void xge_lro_flush_sessions(xge_lldev_t *lldev) { xge_lro_entry_t *lro_session = NULL; while(!SLIST_EMPTY(&lldev->lro_active)) { lro_session = SLIST_FIRST(&lldev->lro_active); SLIST_REMOVE_HEAD(&lldev->lro_active, next); xge_lro_flush(lldev, lro_session); } } /** * xge_lro_flush * Flush LRO session. Send accumulated LRO packet to upper layer * * @lldev Per-adapter Data * @lro LRO session to be flushed */ static void xge_lro_flush(xge_lldev_t *lldev, xge_lro_entry_t *lro_session) { struct ip *header_ip; struct tcphdr *header_tcp; u32 *ptr; if(lro_session->append_cnt) { header_ip = lro_session->lro_header_ip; header_ip->ip_len = htons(lro_session->len - ETHER_HDR_LEN); lro_session->m_head->m_pkthdr.len = lro_session->len; header_tcp = (struct tcphdr *)(header_ip + 1); header_tcp->th_ack = lro_session->ack_seq; header_tcp->th_win = lro_session->window; if(lro_session->timestamp) { ptr = (u32 *)(header_tcp + 1); ptr[1] = htonl(lro_session->tsval); ptr[2] = lro_session->tsecr; } } (*lldev->ifnetp->if_input)(lldev->ifnetp, lro_session->m_head); lro_session->m_head = NULL; lro_session->timestamp = 0; lro_session->append_cnt = 0; SLIST_INSERT_HEAD(&lldev->lro_free, lro_session, next); } /** * xge_lro_accumulate * Accumulate packets to form a large LRO packet based on various conditions * * @lldev Per-adapter Data * @m_head Current Packet * * Returns XGE_HAL_OK or XGE_HAL_FAIL (failure) */ static int xge_lro_accumulate(xge_lldev_t *lldev, struct mbuf *m_head) { struct ether_header *header_ethernet; struct ip *header_ip; struct tcphdr *header_tcp; u32 seq, *ptr; struct mbuf *buffer_next, *buffer_tail; xge_lro_entry_t *lro_session; xge_hal_status_e status = XGE_HAL_FAIL; int hlen, ip_len, tcp_hdr_len, tcp_data_len, tot_len, tcp_options; int trim; /* Get Ethernet header */ header_ethernet = mtod(m_head, struct ether_header *); /* Return if it is not IP packet */ if(header_ethernet->ether_type != htons(ETHERTYPE_IP)) goto _exit; /* Get IP header */ header_ip = lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_1 ? (struct ip *)(header_ethernet + 1) : mtod(m_head->m_next, struct ip *); /* Return if it is not TCP packet */ if(header_ip->ip_p != IPPROTO_TCP) goto _exit; /* Return if packet has options */ if((header_ip->ip_hl << 2) != sizeof(*header_ip)) goto _exit; /* Return if packet is fragmented */ if(header_ip->ip_off & htons(IP_MF | IP_OFFMASK)) goto _exit; /* Get TCP header */ header_tcp = (struct tcphdr *)(header_ip + 1); /* Return if not ACK or PUSH */ if((header_tcp->th_flags & ~(TH_ACK | TH_PUSH)) != 0) goto _exit; /* Only timestamp option is handled */ tcp_options = (header_tcp->th_off << 2) - sizeof(*header_tcp); tcp_hdr_len = sizeof(*header_tcp) + tcp_options; ptr = (u32 *)(header_tcp + 1); if(tcp_options != 0) { if(__predict_false(tcp_options != TCPOLEN_TSTAMP_APPA) || (*ptr != ntohl(TCPOPT_NOP << 24 | TCPOPT_NOP << 16 | TCPOPT_TIMESTAMP << 8 | TCPOLEN_TIMESTAMP))) { goto _exit; } } /* Total length of packet (IP) */ ip_len = ntohs(header_ip->ip_len); /* TCP data size */ tcp_data_len = ip_len - (header_tcp->th_off << 2) - sizeof(*header_ip); /* If the frame is padded, trim it */ tot_len = m_head->m_pkthdr.len; trim = tot_len - (ip_len + ETHER_HDR_LEN); if(trim != 0) { if(trim < 0) goto _exit; m_adj(m_head, -trim); tot_len = m_head->m_pkthdr.len; } buffer_next = m_head; buffer_tail = NULL; while(buffer_next != NULL) { buffer_tail = buffer_next; buffer_next = buffer_tail->m_next; } /* Total size of only headers */ hlen = ip_len + ETHER_HDR_LEN - tcp_data_len; /* Get sequence number */ seq = ntohl(header_tcp->th_seq); SLIST_FOREACH(lro_session, &lldev->lro_active, next) { if(lro_session->source_port == header_tcp->th_sport && lro_session->dest_port == header_tcp->th_dport && lro_session->source_ip == header_ip->ip_src.s_addr && lro_session->dest_ip == header_ip->ip_dst.s_addr) { /* Unmatched sequence number, flush LRO session */ if(__predict_false(seq != lro_session->next_seq)) { SLIST_REMOVE(&lldev->lro_active, lro_session, xge_lro_entry_t, next); xge_lro_flush(lldev, lro_session); goto _exit; } /* Handle timestamp option */ if(tcp_options) { u32 tsval = ntohl(*(ptr + 1)); if(__predict_false(lro_session->tsval > tsval || *(ptr + 2) == 0)) { goto _exit; } lro_session->tsval = tsval; lro_session->tsecr = *(ptr + 2); } lro_session->next_seq += tcp_data_len; lro_session->ack_seq = header_tcp->th_ack; lro_session->window = header_tcp->th_win; /* If TCP data/payload is of 0 size, free mbuf */ if(tcp_data_len == 0) { m_freem(m_head); status = XGE_HAL_OK; goto _exit; } lro_session->append_cnt++; lro_session->len += tcp_data_len; /* Adjust mbuf so that m_data points to payload than headers */ m_adj(m_head, hlen); /* Append this packet to LRO accumulated packet */ lro_session->m_tail->m_next = m_head; lro_session->m_tail = buffer_tail; /* Flush if LRO packet is exceeding maximum size */ if(lro_session->len > (XGE_HAL_LRO_DEFAULT_FRM_LEN - lldev->ifnetp->if_mtu)) { SLIST_REMOVE(&lldev->lro_active, lro_session, xge_lro_entry_t, next); xge_lro_flush(lldev, lro_session); } status = XGE_HAL_OK; goto _exit; } } if(SLIST_EMPTY(&lldev->lro_free)) goto _exit; /* Start a new LRO session */ lro_session = SLIST_FIRST(&lldev->lro_free); SLIST_REMOVE_HEAD(&lldev->lro_free, next); SLIST_INSERT_HEAD(&lldev->lro_active, lro_session, next); lro_session->source_port = header_tcp->th_sport; lro_session->dest_port = header_tcp->th_dport; lro_session->source_ip = header_ip->ip_src.s_addr; lro_session->dest_ip = header_ip->ip_dst.s_addr; lro_session->next_seq = seq + tcp_data_len; lro_session->mss = tcp_data_len; lro_session->ack_seq = header_tcp->th_ack; lro_session->window = header_tcp->th_win; lro_session->lro_header_ip = header_ip; /* Handle timestamp option */ if(tcp_options) { lro_session->timestamp = 1; lro_session->tsval = ntohl(*(ptr + 1)); lro_session->tsecr = *(ptr + 2); } lro_session->len = tot_len; lro_session->m_head = m_head; lro_session->m_tail = buffer_tail; status = XGE_HAL_OK; _exit: return status; } /** * xge_accumulate_large_rx * Accumulate packets to form a large LRO packet based on various conditions * * @lldev Per-adapter Data * @pkt Current packet * @pkt_length Packet Length * @rxd_priv Rx Descriptor Private Data */ void xge_accumulate_large_rx(xge_lldev_t *lldev, struct mbuf *pkt, int pkt_length, xge_rx_priv_t *rxd_priv) { if(xge_lro_accumulate(lldev, pkt) != XGE_HAL_OK) { bus_dmamap_sync(lldev->dma_tag_rx, rxd_priv->dmainfo[0].dma_map, BUS_DMASYNC_POSTREAD); (*lldev->ifnetp->if_input)(lldev->ifnetp, pkt); } } /** * xge_rx_compl * If the interrupt is due to received frame (Rx completion), send it up * * @channelh Ring Channel Handle * @dtr Current Descriptor * @t_code Transfer Code indicating success or error * @userdata Per-adapter Data * * Returns XGE_HAL_OK or HAL error enums */ xge_hal_status_e xge_rx_compl(xge_hal_channel_h channelh, xge_hal_dtr_h dtr, u8 t_code, void *userdata) { struct ifnet *ifnetp; xge_rx_priv_t *rxd_priv = NULL; mbuf_t mbuf_up = NULL; xge_hal_status_e status = XGE_HAL_OK; xge_hal_dtr_info_t ext_info; int index; u16 vlan_tag; /*get the user data portion*/ xge_lldev_t *lldev = xge_hal_channel_userdata(channelh); if(!lldev) { XGE_EXIT_ON_ERR("Failed to get user data", _exit, XGE_HAL_FAIL); } XGE_DRV_STATS(rx_completions); /* get the interface pointer */ ifnetp = lldev->ifnetp; do { XGE_DRV_STATS(rx_desc_compl); if(!(ifnetp->if_drv_flags & IFF_DRV_RUNNING)) { status = XGE_HAL_FAIL; goto _exit; } if(t_code) { xge_trace(XGE_TRACE, "Packet dropped because of %d", t_code); XGE_DRV_STATS(rx_tcode); xge_hal_device_handle_tcode(channelh, dtr, t_code); xge_hal_ring_dtr_post(channelh,dtr); continue; } /* Get the private data for this descriptor*/ rxd_priv = (xge_rx_priv_t *) xge_hal_ring_dtr_private(channelh, dtr); if(!rxd_priv) { XGE_EXIT_ON_ERR("Failed to get descriptor private data", _exit, XGE_HAL_FAIL); } /* * Prepare one buffer to send it to upper layer -- since the upper * layer frees the buffer do not use rxd_priv->buffer. Meanwhile * prepare a new buffer, do mapping, use it in the current * descriptor and post descriptor back to ring channel */ mbuf_up = rxd_priv->bufferArray[0]; /* Gets details of mbuf i.e., packet length */ xge_ring_dtr_get(mbuf_up, channelh, dtr, lldev, rxd_priv); status = (lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_1) ? xge_get_buf(dtr, rxd_priv, lldev, 0) : xge_get_buf_3b_5b(dtr, rxd_priv, lldev); if(status != XGE_HAL_OK) { xge_trace(XGE_ERR, "No memory"); XGE_DRV_STATS(rx_no_buf); /* * Unable to allocate buffer. Instead of discarding, post * descriptor back to channel for future processing of same * packet. */ xge_hal_ring_dtr_post(channelh, dtr); continue; } /* Get the extended information */ xge_hal_ring_dtr_info_get(channelh, dtr, &ext_info); /* * As we have allocated a new mbuf for this descriptor, post this * descriptor with new mbuf back to ring channel */ vlan_tag = ext_info.vlan; xge_hal_ring_dtr_post(channelh, dtr); if ((!(ext_info.proto & XGE_HAL_FRAME_PROTO_IP_FRAGMENTED) && (ext_info.proto & XGE_HAL_FRAME_PROTO_TCP_OR_UDP) && (ext_info.l3_cksum == XGE_HAL_L3_CKSUM_OK) && (ext_info.l4_cksum == XGE_HAL_L4_CKSUM_OK))) { /* set Checksum Flag */ xge_set_mbuf_cflags(mbuf_up); if(lldev->enabled_lro) { xge_accumulate_large_rx(lldev, mbuf_up, mbuf_up->m_len, rxd_priv); } else { /* Post-Read sync for buffers*/ for(index = 0; index < lldev->rxd_mbuf_cnt; index++) { bus_dmamap_sync(lldev->dma_tag_rx, rxd_priv->dmainfo[0].dma_map, BUS_DMASYNC_POSTREAD); } (*ifnetp->if_input)(ifnetp, mbuf_up); } } else { /* * Packet with erroneous checksum , let the upper layer deal * with it */ /* Post-Read sync for buffers*/ for(index = 0; index < lldev->rxd_mbuf_cnt; index++) { bus_dmamap_sync(lldev->dma_tag_rx, rxd_priv->dmainfo[0].dma_map, BUS_DMASYNC_POSTREAD); } if(vlan_tag) { mbuf_up->m_pkthdr.ether_vtag = vlan_tag; mbuf_up->m_flags |= M_VLANTAG; } if(lldev->enabled_lro) xge_lro_flush_sessions(lldev); (*ifnetp->if_input)(ifnetp, mbuf_up); } } while(xge_hal_ring_dtr_next_completed(channelh, &dtr, &t_code) == XGE_HAL_OK); if(lldev->enabled_lro) xge_lro_flush_sessions(lldev); _exit: return status; } /** * xge_ring_dtr_get * Get descriptors * * @mbuf_up Packet to send up * @channelh Ring Channel Handle * @dtr Descriptor * @lldev Per-adapter Data * @rxd_priv Rx Descriptor Private Data * * Returns XGE_HAL_OK or HAL error enums */ int xge_ring_dtr_get(mbuf_t mbuf_up, xge_hal_channel_h channelh, xge_hal_dtr_h dtr, xge_lldev_t *lldev, xge_rx_priv_t *rxd_priv) { mbuf_t m; int pkt_length[5]={0,0}, pkt_len=0; dma_addr_t dma_data[5]; int index; m = mbuf_up; pkt_len = 0; if(lldev->buffer_mode != XGE_HAL_RING_QUEUE_BUFFER_MODE_1) { xge_os_memzero(pkt_length, sizeof(pkt_length)); /* * Retrieve data of interest from the completed descriptor -- This * returns the packet length */ if(lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_5) { xge_hal_ring_dtr_5b_get(channelh, dtr, dma_data, pkt_length); } else { xge_hal_ring_dtr_3b_get(channelh, dtr, dma_data, pkt_length); } for(index = 0; index < lldev->rxd_mbuf_cnt; index++) { m->m_len = pkt_length[index]; if(index < (lldev->rxd_mbuf_cnt-1)) { m->m_next = rxd_priv->bufferArray[index + 1]; m = m->m_next; } else { m->m_next = NULL; } pkt_len+=pkt_length[index]; } /* * Since 2 buffer mode is an exceptional case where data is in 3rd * buffer but not in 2nd buffer */ if(lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_2) { m->m_len = pkt_length[2]; pkt_len+=pkt_length[2]; } /* * Update length of newly created buffer to be sent up with packet * length */ mbuf_up->m_pkthdr.len = pkt_len; } else { /* * Retrieve data of interest from the completed descriptor -- This * returns the packet length */ xge_hal_ring_dtr_1b_get(channelh, dtr,&dma_data[0], &pkt_length[0]); /* * Update length of newly created buffer to be sent up with packet * length */ mbuf_up->m_len = mbuf_up->m_pkthdr.len = pkt_length[0]; } return XGE_HAL_OK; } /** * xge_flush_txds * Flush Tx descriptors * * @channelh Channel handle */ static void inline xge_flush_txds(xge_hal_channel_h channelh) { xge_lldev_t *lldev = xge_hal_channel_userdata(channelh); xge_hal_dtr_h tx_dtr; xge_tx_priv_t *tx_priv; u8 t_code; while(xge_hal_fifo_dtr_next_completed(channelh, &tx_dtr, &t_code) == XGE_HAL_OK) { XGE_DRV_STATS(tx_desc_compl); if(t_code) { xge_trace(XGE_TRACE, "Tx descriptor with t_code %d", t_code); XGE_DRV_STATS(tx_tcode); xge_hal_device_handle_tcode(channelh, tx_dtr, t_code); } tx_priv = xge_hal_fifo_dtr_private(tx_dtr); bus_dmamap_unload(lldev->dma_tag_tx, tx_priv->dma_map); m_freem(tx_priv->buffer); tx_priv->buffer = NULL; xge_hal_fifo_dtr_free(channelh, tx_dtr); } } /** * xge_send * Transmit function * * @ifnetp Interface Handle */ void xge_send(struct ifnet *ifnetp) { int qindex = 0; xge_lldev_t *lldev = ifnetp->if_softc; for(qindex = 0; qindex < XGE_FIFO_COUNT; qindex++) { if(mtx_trylock(&lldev->mtx_tx[qindex]) == 0) { XGE_DRV_STATS(tx_lock_fail); break; } xge_send_locked(ifnetp, qindex); mtx_unlock(&lldev->mtx_tx[qindex]); } } static void inline xge_send_locked(struct ifnet *ifnetp, int qindex) { xge_hal_dtr_h dtr; static bus_dma_segment_t segs[XGE_MAX_SEGS]; xge_hal_status_e status; unsigned int max_fragments; xge_lldev_t *lldev = ifnetp->if_softc; xge_hal_channel_h channelh = lldev->fifo_channel[qindex]; mbuf_t m_head = NULL; mbuf_t m_buf = NULL; xge_tx_priv_t *ll_tx_priv = NULL; register unsigned int count = 0; unsigned int nsegs = 0; u16 vlan_tag; max_fragments = ((xge_hal_fifo_t *)channelh)->config->max_frags; /* If device is not initialized, return */ if((!lldev->initialized) || (!(ifnetp->if_drv_flags & IFF_DRV_RUNNING))) return; XGE_DRV_STATS(tx_calls); /* * This loop will be executed for each packet in the kernel maintained * queue -- each packet can be with fragments as an mbuf chain */ for(;;) { IF_DEQUEUE(&ifnetp->if_snd, m_head); if (m_head == NULL) { ifnetp->if_drv_flags &= ~(IFF_DRV_OACTIVE); return; } for(m_buf = m_head; m_buf != NULL; m_buf = m_buf->m_next) { if(m_buf->m_len) count += 1; } if(count >= max_fragments) { m_buf = m_defrag(m_head, M_NOWAIT); if(m_buf != NULL) m_head = m_buf; XGE_DRV_STATS(tx_defrag); } /* Reserve descriptors */ status = xge_hal_fifo_dtr_reserve(channelh, &dtr); if(status != XGE_HAL_OK) { XGE_DRV_STATS(tx_no_txd); xge_flush_txds(channelh); break; } vlan_tag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0; xge_hal_fifo_dtr_vlan_set(dtr, vlan_tag); /* Update Tx private structure for this descriptor */ ll_tx_priv = xge_hal_fifo_dtr_private(dtr); ll_tx_priv->buffer = m_head; /* * Do mapping -- Required DMA tag has been created in xge_init * function and DMA maps have already been created in the * xgell_tx_replenish function. * Returns number of segments through nsegs */ if(bus_dmamap_load_mbuf_sg(lldev->dma_tag_tx, ll_tx_priv->dma_map, m_head, segs, &nsegs, BUS_DMA_NOWAIT)) { xge_trace(XGE_TRACE, "DMA map load failed"); XGE_DRV_STATS(tx_map_fail); break; } if(lldev->driver_stats.tx_max_frags < nsegs) lldev->driver_stats.tx_max_frags = nsegs; /* Set descriptor buffer for header and each fragment/segment */ count = 0; do { xge_hal_fifo_dtr_buffer_set(channelh, dtr, count, (dma_addr_t)htole64(segs[count].ds_addr), segs[count].ds_len); count++; } while(count < nsegs); /* Pre-write Sync of mapping */ bus_dmamap_sync(lldev->dma_tag_tx, ll_tx_priv->dma_map, BUS_DMASYNC_PREWRITE); if((lldev->enabled_tso) && (m_head->m_pkthdr.csum_flags & CSUM_TSO)) { XGE_DRV_STATS(tx_tso); xge_hal_fifo_dtr_mss_set(dtr, m_head->m_pkthdr.tso_segsz); } /* Checksum */ if(ifnetp->if_hwassist > 0) { xge_hal_fifo_dtr_cksum_set_bits(dtr, XGE_HAL_TXD_TX_CKO_IPV4_EN | XGE_HAL_TXD_TX_CKO_TCP_EN | XGE_HAL_TXD_TX_CKO_UDP_EN); } /* Post descriptor to FIFO channel */ xge_hal_fifo_dtr_post(channelh, dtr); XGE_DRV_STATS(tx_posted); /* Send the same copy of mbuf packet to BPF (Berkely Packet Filter) * listener so that we can use tools like tcpdump */ ETHER_BPF_MTAP(ifnetp, m_head); } /* Prepend the packet back to queue */ IF_PREPEND(&ifnetp->if_snd, m_head); ifnetp->if_drv_flags |= IFF_DRV_OACTIVE; xge_queue_produce_context(xge_hal_device_queue(lldev->devh), XGE_LL_EVENT_TRY_XMIT_AGAIN, lldev->devh); XGE_DRV_STATS(tx_again); } /** * xge_get_buf * Allocates new mbufs to be placed into descriptors * * @dtrh Descriptor Handle * @rxd_priv Rx Descriptor Private Data * @lldev Per-adapter Data * @index Buffer Index (if multi-buffer mode) * * Returns XGE_HAL_OK or HAL error enums */ int xge_get_buf(xge_hal_dtr_h dtrh, xge_rx_priv_t *rxd_priv, xge_lldev_t *lldev, int index) { register mbuf_t mp = NULL; struct ifnet *ifnetp = lldev->ifnetp; int status = XGE_HAL_OK; int buffer_size = 0, cluster_size = 0, count; bus_dmamap_t map = rxd_priv->dmainfo[index].dma_map; bus_dma_segment_t segs[3]; buffer_size = (lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_1) ? ifnetp->if_mtu + XGE_HAL_MAC_HEADER_MAX_SIZE : lldev->rxd_mbuf_len[index]; if(buffer_size <= MCLBYTES) { cluster_size = MCLBYTES; mp = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); } else { cluster_size = MJUMPAGESIZE; if((lldev->buffer_mode != XGE_HAL_RING_QUEUE_BUFFER_MODE_5) && (buffer_size > MJUMPAGESIZE)) { cluster_size = MJUM9BYTES; } mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, cluster_size); } if(!mp) { xge_trace(XGE_ERR, "Out of memory to allocate mbuf"); status = XGE_HAL_FAIL; goto getbuf_out; } /* Update mbuf's length, packet length and receive interface */ mp->m_len = mp->m_pkthdr.len = buffer_size; mp->m_pkthdr.rcvif = ifnetp; /* Load DMA map */ if(bus_dmamap_load_mbuf_sg(lldev->dma_tag_rx, lldev->extra_dma_map, mp, segs, &count, BUS_DMA_NOWAIT)) { XGE_DRV_STATS(rx_map_fail); m_freem(mp); XGE_EXIT_ON_ERR("DMA map load failed", getbuf_out, XGE_HAL_FAIL); } /* Update descriptor private data */ rxd_priv->bufferArray[index] = mp; rxd_priv->dmainfo[index].dma_phyaddr = htole64(segs->ds_addr); rxd_priv->dmainfo[index].dma_map = lldev->extra_dma_map; lldev->extra_dma_map = map; /* Pre-Read/Write sync */ bus_dmamap_sync(lldev->dma_tag_rx, map, BUS_DMASYNC_POSTREAD); /* Unload DMA map of mbuf in current descriptor */ bus_dmamap_unload(lldev->dma_tag_rx, map); /* Set descriptor buffer */ if(lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_1) { xge_hal_ring_dtr_1b_set(dtrh, rxd_priv->dmainfo[0].dma_phyaddr, cluster_size); } getbuf_out: return status; } /** * xge_get_buf_3b_5b * Allocates new mbufs to be placed into descriptors (in multi-buffer modes) * * @dtrh Descriptor Handle * @rxd_priv Rx Descriptor Private Data * @lldev Per-adapter Data * * Returns XGE_HAL_OK or HAL error enums */ int xge_get_buf_3b_5b(xge_hal_dtr_h dtrh, xge_rx_priv_t *rxd_priv, xge_lldev_t *lldev) { bus_addr_t dma_pointers[5]; int dma_sizes[5]; int status = XGE_HAL_OK, index; int newindex = 0; for(index = 0; index < lldev->rxd_mbuf_cnt; index++) { status = xge_get_buf(dtrh, rxd_priv, lldev, index); if(status != XGE_HAL_OK) { for(newindex = 0; newindex < index; newindex++) { m_freem(rxd_priv->bufferArray[newindex]); } XGE_EXIT_ON_ERR("mbuf allocation failed", _exit, status); } } for(index = 0; index < lldev->buffer_mode; index++) { if(lldev->rxd_mbuf_len[index] != 0) { dma_pointers[index] = rxd_priv->dmainfo[index].dma_phyaddr; dma_sizes[index] = lldev->rxd_mbuf_len[index]; } else { dma_pointers[index] = rxd_priv->dmainfo[index-1].dma_phyaddr; dma_sizes[index] = 1; } } /* Assigning second buffer to third pointer in 2 buffer mode */ if(lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_2) { dma_pointers[2] = dma_pointers[1]; dma_sizes[2] = dma_sizes[1]; dma_sizes[1] = 1; } if(lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_5) { xge_hal_ring_dtr_5b_set(dtrh, dma_pointers, dma_sizes); } else { xge_hal_ring_dtr_3b_set(dtrh, dma_pointers, dma_sizes); } _exit: return status; } /** * xge_tx_compl * If the interrupt is due to Tx completion, free the sent buffer * * @channelh Channel Handle * @dtr Descriptor * @t_code Transfer Code indicating success or error * @userdata Per-adapter Data * * Returns XGE_HAL_OK or HAL error enum */ xge_hal_status_e xge_tx_compl(xge_hal_channel_h channelh, xge_hal_dtr_h dtr, u8 t_code, void *userdata) { xge_tx_priv_t *ll_tx_priv = NULL; xge_lldev_t *lldev = (xge_lldev_t *)userdata; struct ifnet *ifnetp = lldev->ifnetp; mbuf_t m_buffer = NULL; int qindex = xge_hal_channel_id(channelh); mtx_lock(&lldev->mtx_tx[qindex]); XGE_DRV_STATS(tx_completions); /* * For each completed descriptor: Get private structure, free buffer, * do unmapping, and free descriptor */ do { XGE_DRV_STATS(tx_desc_compl); if(t_code) { XGE_DRV_STATS(tx_tcode); xge_trace(XGE_TRACE, "t_code %d", t_code); xge_hal_device_handle_tcode(channelh, dtr, t_code); } ll_tx_priv = xge_hal_fifo_dtr_private(dtr); m_buffer = ll_tx_priv->buffer; bus_dmamap_unload(lldev->dma_tag_tx, ll_tx_priv->dma_map); m_freem(m_buffer); ll_tx_priv->buffer = NULL; xge_hal_fifo_dtr_free(channelh, dtr); } while(xge_hal_fifo_dtr_next_completed(channelh, &dtr, &t_code) == XGE_HAL_OK); xge_send_locked(ifnetp, qindex); ifnetp->if_drv_flags &= ~IFF_DRV_OACTIVE; mtx_unlock(&lldev->mtx_tx[qindex]); return XGE_HAL_OK; } /** * xge_tx_initial_replenish * Initially allocate buffers and set them into descriptors for later use * * @channelh Tx Channel Handle * @dtrh Descriptor Handle * @index * @userdata Per-adapter Data * @reopen Channel open/reopen option * * Returns XGE_HAL_OK or HAL error enums */ xge_hal_status_e xge_tx_initial_replenish(xge_hal_channel_h channelh, xge_hal_dtr_h dtrh, int index, void *userdata, xge_hal_channel_reopen_e reopen) { xge_tx_priv_t *txd_priv = NULL; int status = XGE_HAL_OK; /* Get the user data portion from channel handle */ xge_lldev_t *lldev = xge_hal_channel_userdata(channelh); if(lldev == NULL) { XGE_EXIT_ON_ERR("Failed to get user data from channel", txinit_out, XGE_HAL_FAIL); } /* Get the private data */ txd_priv = (xge_tx_priv_t *) xge_hal_fifo_dtr_private(dtrh); if(txd_priv == NULL) { XGE_EXIT_ON_ERR("Failed to get descriptor private data", txinit_out, XGE_HAL_FAIL); } /* Create DMA map for this descriptor */ if(bus_dmamap_create(lldev->dma_tag_tx, BUS_DMA_NOWAIT, &txd_priv->dma_map)) { XGE_EXIT_ON_ERR("DMA map creation for Tx descriptor failed", txinit_out, XGE_HAL_FAIL); } txinit_out: return status; } /** * xge_rx_initial_replenish * Initially allocate buffers and set them into descriptors for later use * * @channelh Tx Channel Handle * @dtrh Descriptor Handle * @index Ring Index * @userdata Per-adapter Data * @reopen Channel open/reopen option * * Returns XGE_HAL_OK or HAL error enums */ xge_hal_status_e xge_rx_initial_replenish(xge_hal_channel_h channelh, xge_hal_dtr_h dtrh, int index, void *userdata, xge_hal_channel_reopen_e reopen) { xge_rx_priv_t *rxd_priv = NULL; int status = XGE_HAL_OK; int index1 = 0, index2 = 0; /* Get the user data portion from channel handle */ xge_lldev_t *lldev = xge_hal_channel_userdata(channelh); if(lldev == NULL) { XGE_EXIT_ON_ERR("Failed to get user data from channel", rxinit_out, XGE_HAL_FAIL); } /* Get the private data */ rxd_priv = (xge_rx_priv_t *) xge_hal_ring_dtr_private(channelh, dtrh); if(rxd_priv == NULL) { XGE_EXIT_ON_ERR("Failed to get descriptor private data", rxinit_out, XGE_HAL_FAIL); } rxd_priv->bufferArray = xge_os_malloc(NULL, (sizeof(rxd_priv->bufferArray) * lldev->rxd_mbuf_cnt)); if(rxd_priv->bufferArray == NULL) { XGE_EXIT_ON_ERR("Failed to allocate Rxd private", rxinit_out, XGE_HAL_FAIL); } if(lldev->buffer_mode == XGE_HAL_RING_QUEUE_BUFFER_MODE_1) { /* Create DMA map for these descriptors*/ if(bus_dmamap_create(lldev->dma_tag_rx , BUS_DMA_NOWAIT, &rxd_priv->dmainfo[0].dma_map)) { XGE_EXIT_ON_ERR("DMA map creation for Rx descriptor failed", rxinit_err_out, XGE_HAL_FAIL); } /* Get a buffer, attach it to this descriptor */ status = xge_get_buf(dtrh, rxd_priv, lldev, 0); } else { for(index1 = 0; index1 < lldev->rxd_mbuf_cnt; index1++) { /* Create DMA map for this descriptor */ if(bus_dmamap_create(lldev->dma_tag_rx , BUS_DMA_NOWAIT , &rxd_priv->dmainfo[index1].dma_map)) { for(index2 = index1 - 1; index2 >= 0; index2--) { bus_dmamap_destroy(lldev->dma_tag_rx, rxd_priv->dmainfo[index2].dma_map); } XGE_EXIT_ON_ERR( "Jumbo DMA map creation for Rx descriptor failed", rxinit_err_out, XGE_HAL_FAIL); } } status = xge_get_buf_3b_5b(dtrh, rxd_priv, lldev); } if(status != XGE_HAL_OK) { for(index1 = 0; index1 < lldev->rxd_mbuf_cnt; index1++) { bus_dmamap_destroy(lldev->dma_tag_rx, rxd_priv->dmainfo[index1].dma_map); } goto rxinit_err_out; } else { goto rxinit_out; } rxinit_err_out: xge_os_free(NULL, rxd_priv->bufferArray, (sizeof(rxd_priv->bufferArray) * lldev->rxd_mbuf_cnt)); rxinit_out: return status; } /** * xge_rx_term * During unload terminate and free all descriptors * * @channelh Rx Channel Handle * @dtrh Rx Descriptor Handle * @state Descriptor State * @userdata Per-adapter Data * @reopen Channel open/reopen option */ void xge_rx_term(xge_hal_channel_h channelh, xge_hal_dtr_h dtrh, xge_hal_dtr_state_e state, void *userdata, xge_hal_channel_reopen_e reopen) { xge_rx_priv_t *rxd_priv = NULL; xge_lldev_t *lldev = NULL; int index = 0; /* Descriptor state is not "Posted" */ if(state != XGE_HAL_DTR_STATE_POSTED) goto rxterm_out; /* Get the user data portion */ lldev = xge_hal_channel_userdata(channelh); /* Get the private data */ rxd_priv = (xge_rx_priv_t *) xge_hal_ring_dtr_private(channelh, dtrh); for(index = 0; index < lldev->rxd_mbuf_cnt; index++) { if(rxd_priv->dmainfo[index].dma_map != NULL) { bus_dmamap_sync(lldev->dma_tag_rx, rxd_priv->dmainfo[index].dma_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(lldev->dma_tag_rx, rxd_priv->dmainfo[index].dma_map); if(rxd_priv->bufferArray[index] != NULL) m_free(rxd_priv->bufferArray[index]); bus_dmamap_destroy(lldev->dma_tag_rx, rxd_priv->dmainfo[index].dma_map); } } xge_os_free(NULL, rxd_priv->bufferArray, (sizeof(rxd_priv->bufferArray) * lldev->rxd_mbuf_cnt)); /* Free the descriptor */ xge_hal_ring_dtr_free(channelh, dtrh); rxterm_out: return; } /** * xge_tx_term * During unload terminate and free all descriptors * * @channelh Rx Channel Handle * @dtrh Rx Descriptor Handle * @state Descriptor State * @userdata Per-adapter Data * @reopen Channel open/reopen option */ void xge_tx_term(xge_hal_channel_h channelh, xge_hal_dtr_h dtr, xge_hal_dtr_state_e state, void *userdata, xge_hal_channel_reopen_e reopen) { xge_tx_priv_t *ll_tx_priv = xge_hal_fifo_dtr_private(dtr); xge_lldev_t *lldev = (xge_lldev_t *)userdata; /* Destroy DMA map */ bus_dmamap_destroy(lldev->dma_tag_tx, ll_tx_priv->dma_map); } /** * xge_methods * * FreeBSD device interface entry points */ static device_method_t xge_methods[] = { DEVMETHOD(device_probe, xge_probe), DEVMETHOD(device_attach, xge_attach), DEVMETHOD(device_detach, xge_detach), DEVMETHOD(device_shutdown, xge_shutdown), DEVMETHOD_END }; static driver_t xge_driver = { "nxge", xge_methods, sizeof(xge_lldev_t), }; static devclass_t xge_devclass; DRIVER_MODULE(nxge, pci, xge_driver, xge_devclass, 0, 0); Index: stable/11/sys/dev/oce/oce_if.c =================================================================== --- stable/11/sys/dev/oce/oce_if.c (revision 332287) +++ stable/11/sys/dev/oce/oce_if.c (revision 332288) @@ -1,2355 +1,2355 @@ /*- * Copyright (C) 2013 Emulex * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Emulex Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Contact Information: * freebsd-drivers@emulex.com * * Emulex * 3333 Susan Street * Costa Mesa, CA 92626 */ /* $FreeBSD$ */ #include "opt_inet6.h" #include "opt_inet.h" #include "oce_if.h" /* UE Status Low CSR */ static char *ue_status_low_desc[] = { "CEV", "CTX", "DBUF", "ERX", "Host", "MPU", "NDMA", "PTC ", "RDMA ", "RXF ", "RXIPS ", "RXULP0 ", "RXULP1 ", "RXULP2 ", "TIM ", "TPOST ", "TPRE ", "TXIPS ", "TXULP0 ", "TXULP1 ", "UC ", "WDMA ", "TXULP2 ", "HOST1 ", "P0_OB_LINK ", "P1_OB_LINK ", "HOST_GPIO ", "MBOX ", "AXGMAC0", "AXGMAC1", "JTAG", "MPU_INTPEND" }; /* UE Status High CSR */ static char *ue_status_hi_desc[] = { "LPCMEMHOST", "MGMT_MAC", "PCS0ONLINE", "MPU_IRAM", "PCS1ONLINE", "PCTL0", "PCTL1", "PMEM", "RR", "TXPB", "RXPP", "XAUI", "TXP", "ARM", "IPC", "HOST2", "HOST3", "HOST4", "HOST5", "HOST6", "HOST7", "HOST8", "HOST9", "NETC", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown", "Unknown" }; /* Driver entry points prototypes */ static int oce_probe(device_t dev); static int oce_attach(device_t dev); static int oce_detach(device_t dev); static int oce_shutdown(device_t dev); static int oce_ioctl(struct ifnet *ifp, u_long command, caddr_t data); static void oce_init(void *xsc); static int oce_multiq_start(struct ifnet *ifp, struct mbuf *m); static void oce_multiq_flush(struct ifnet *ifp); /* Driver interrupt routines protypes */ static void oce_intr(void *arg, int pending); static int oce_setup_intr(POCE_SOFTC sc); static int oce_fast_isr(void *arg); static int oce_alloc_intr(POCE_SOFTC sc, int vector, void (*isr) (void *arg, int pending)); /* Media callbacks prototypes */ static void oce_media_status(struct ifnet *ifp, struct ifmediareq *req); static int oce_media_change(struct ifnet *ifp); /* Transmit routines prototypes */ static int oce_tx(POCE_SOFTC sc, struct mbuf **mpp, int wq_index); static void oce_tx_restart(POCE_SOFTC sc, struct oce_wq *wq); static void oce_tx_complete(struct oce_wq *wq, uint32_t wqe_idx, uint32_t status); static int oce_multiq_transmit(struct ifnet *ifp, struct mbuf *m, struct oce_wq *wq); /* Receive routines prototypes */ static void oce_discard_rx_comp(struct oce_rq *rq, struct oce_nic_rx_cqe *cqe); static int oce_cqe_vtp_valid(POCE_SOFTC sc, struct oce_nic_rx_cqe *cqe); static int oce_cqe_portid_valid(POCE_SOFTC sc, struct oce_nic_rx_cqe *cqe); static void oce_rx(struct oce_rq *rq, uint32_t rqe_idx, struct oce_nic_rx_cqe *cqe); /* Helper function prototypes in this file */ static int oce_attach_ifp(POCE_SOFTC sc); static void oce_add_vlan(void *arg, struct ifnet *ifp, uint16_t vtag); static void oce_del_vlan(void *arg, struct ifnet *ifp, uint16_t vtag); static int oce_vid_config(POCE_SOFTC sc); static void oce_mac_addr_set(POCE_SOFTC sc); static int oce_handle_passthrough(struct ifnet *ifp, caddr_t data); static void oce_local_timer(void *arg); static void oce_if_deactivate(POCE_SOFTC sc); static void oce_if_activate(POCE_SOFTC sc); static void setup_max_queues_want(POCE_SOFTC sc); static void update_queues_got(POCE_SOFTC sc); static void process_link_state(POCE_SOFTC sc, struct oce_async_cqe_link_state *acqe); static int oce_tx_asic_stall_verify(POCE_SOFTC sc, struct mbuf *m); static void oce_get_config(POCE_SOFTC sc); static struct mbuf *oce_insert_vlan_tag(POCE_SOFTC sc, struct mbuf *m, boolean_t *complete); /* IP specific */ #if defined(INET6) || defined(INET) static int oce_init_lro(POCE_SOFTC sc); static void oce_rx_flush_lro(struct oce_rq *rq); static struct mbuf * oce_tso_setup(POCE_SOFTC sc, struct mbuf **mpp); #endif static device_method_t oce_dispatch[] = { DEVMETHOD(device_probe, oce_probe), DEVMETHOD(device_attach, oce_attach), DEVMETHOD(device_detach, oce_detach), DEVMETHOD(device_shutdown, oce_shutdown), DEVMETHOD_END }; static driver_t oce_driver = { "oce", oce_dispatch, sizeof(OCE_SOFTC) }; static devclass_t oce_devclass; DRIVER_MODULE(oce, pci, oce_driver, oce_devclass, 0, 0); MODULE_DEPEND(oce, pci, 1, 1, 1); MODULE_DEPEND(oce, ether, 1, 1, 1); MODULE_VERSION(oce, 1); /* global vars */ const char component_revision[32] = {"///" COMPONENT_REVISION "///"}; /* Module capabilites and parameters */ uint32_t oce_max_rsp_handled = OCE_MAX_RSP_HANDLED; uint32_t oce_enable_rss = OCE_MODCAP_RSS; TUNABLE_INT("hw.oce.max_rsp_handled", &oce_max_rsp_handled); TUNABLE_INT("hw.oce.enable_rss", &oce_enable_rss); /* Supported devices table */ static uint32_t supportedDevices[] = { (PCI_VENDOR_SERVERENGINES << 16) | PCI_PRODUCT_BE2, (PCI_VENDOR_SERVERENGINES << 16) | PCI_PRODUCT_BE3, (PCI_VENDOR_EMULEX << 16) | PCI_PRODUCT_BE3, (PCI_VENDOR_EMULEX << 16) | PCI_PRODUCT_XE201, (PCI_VENDOR_EMULEX << 16) | PCI_PRODUCT_XE201_VF, (PCI_VENDOR_EMULEX << 16) | PCI_PRODUCT_SH }; /***************************************************************************** * Driver entry points functions * *****************************************************************************/ static int oce_probe(device_t dev) { uint16_t vendor = 0; uint16_t device = 0; int i = 0; char str[256] = {0}; POCE_SOFTC sc; sc = device_get_softc(dev); bzero(sc, sizeof(OCE_SOFTC)); sc->dev = dev; vendor = pci_get_vendor(dev); device = pci_get_device(dev); for (i = 0; i < (sizeof(supportedDevices) / sizeof(uint32_t)); i++) { if (vendor == ((supportedDevices[i] >> 16) & 0xffff)) { if (device == (supportedDevices[i] & 0xffff)) { sprintf(str, "%s:%s", "Emulex CNA NIC function", component_revision); device_set_desc_copy(dev, str); switch (device) { case PCI_PRODUCT_BE2: sc->flags |= OCE_FLAGS_BE2; break; case PCI_PRODUCT_BE3: sc->flags |= OCE_FLAGS_BE3; break; case PCI_PRODUCT_XE201: case PCI_PRODUCT_XE201_VF: sc->flags |= OCE_FLAGS_XE201; break; case PCI_PRODUCT_SH: sc->flags |= OCE_FLAGS_SH; break; default: return ENXIO; } return BUS_PROBE_DEFAULT; } } } return ENXIO; } static int oce_attach(device_t dev) { POCE_SOFTC sc; int rc = 0; sc = device_get_softc(dev); rc = oce_hw_pci_alloc(sc); if (rc) return rc; sc->tx_ring_size = OCE_TX_RING_SIZE; sc->rx_ring_size = OCE_RX_RING_SIZE; sc->rq_frag_size = OCE_RQ_BUF_SIZE; sc->flow_control = OCE_DEFAULT_FLOW_CONTROL; sc->promisc = OCE_DEFAULT_PROMISCUOUS; LOCK_CREATE(&sc->bmbx_lock, "Mailbox_lock"); LOCK_CREATE(&sc->dev_lock, "Device_lock"); /* initialise the hardware */ rc = oce_hw_init(sc); if (rc) goto pci_res_free; oce_get_config(sc); setup_max_queues_want(sc); rc = oce_setup_intr(sc); if (rc) goto mbox_free; rc = oce_queue_init_all(sc); if (rc) goto intr_free; rc = oce_attach_ifp(sc); if (rc) goto queues_free; #if defined(INET6) || defined(INET) rc = oce_init_lro(sc); if (rc) goto ifp_free; #endif rc = oce_hw_start(sc); if (rc) goto lro_free; sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config, oce_add_vlan, sc, EVENTHANDLER_PRI_FIRST); sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, oce_del_vlan, sc, EVENTHANDLER_PRI_FIRST); rc = oce_stats_init(sc); if (rc) goto vlan_free; oce_add_sysctls(sc); callout_init(&sc->timer, 1); rc = callout_reset(&sc->timer, 2 * hz, oce_local_timer, sc); if (rc) goto stats_free; return 0; stats_free: callout_drain(&sc->timer); oce_stats_free(sc); vlan_free: if (sc->vlan_attach) EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach); if (sc->vlan_detach) EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach); oce_hw_intr_disable(sc); lro_free: #if defined(INET6) || defined(INET) oce_free_lro(sc); ifp_free: #endif ether_ifdetach(sc->ifp); if_free(sc->ifp); queues_free: oce_queue_release_all(sc); intr_free: oce_intr_free(sc); mbox_free: oce_dma_free(sc, &sc->bsmbx); pci_res_free: oce_hw_pci_free(sc); LOCK_DESTROY(&sc->dev_lock); LOCK_DESTROY(&sc->bmbx_lock); return rc; } static int oce_detach(device_t dev) { POCE_SOFTC sc = device_get_softc(dev); LOCK(&sc->dev_lock); oce_if_deactivate(sc); UNLOCK(&sc->dev_lock); callout_drain(&sc->timer); if (sc->vlan_attach != NULL) EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach); if (sc->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach); ether_ifdetach(sc->ifp); if_free(sc->ifp); oce_hw_shutdown(sc); bus_generic_detach(dev); return 0; } static int oce_shutdown(device_t dev) { int rc; rc = oce_detach(dev); return rc; } static int oce_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct ifreq *ifr = (struct ifreq *)data; POCE_SOFTC sc = ifp->if_softc; int rc = 0; uint32_t u; switch (command) { case SIOCGIFMEDIA: rc = ifmedia_ioctl(ifp, ifr, &sc->media, command); break; case SIOCSIFMTU: if (ifr->ifr_mtu > OCE_MAX_MTU) rc = EINVAL; else ifp->if_mtu = ifr->ifr_mtu; break; case SIOCSIFFLAGS: if (ifp->if_flags & IFF_UP) { if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; oce_init(sc); } device_printf(sc->dev, "Interface Up\n"); } else { LOCK(&sc->dev_lock); sc->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); oce_if_deactivate(sc); UNLOCK(&sc->dev_lock); device_printf(sc->dev, "Interface Down\n"); } if ((ifp->if_flags & IFF_PROMISC) && !sc->promisc) { if (!oce_rxf_set_promiscuous(sc, (1 | (1 << 1)))) sc->promisc = TRUE; } else if (!(ifp->if_flags & IFF_PROMISC) && sc->promisc) { if (!oce_rxf_set_promiscuous(sc, 0)) sc->promisc = FALSE; } break; case SIOCADDMULTI: case SIOCDELMULTI: rc = oce_hw_update_multicast(sc); if (rc) device_printf(sc->dev, "Update multicast address failed\n"); break; case SIOCSIFCAP: u = ifr->ifr_reqcap ^ ifp->if_capenable; if (u & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if (IFCAP_TSO & ifp->if_capenable && !(IFCAP_TXCSUM & ifp->if_capenable)) { ifp->if_capenable &= ~IFCAP_TSO; ifp->if_hwassist &= ~CSUM_TSO; if_printf(ifp, "TSO disabled due to -txcsum.\n"); } } if (u & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (u & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; if (IFCAP_TSO & ifp->if_capenable) { if (IFCAP_TXCSUM & ifp->if_capenable) ifp->if_hwassist |= CSUM_TSO; else { ifp->if_capenable &= ~IFCAP_TSO; ifp->if_hwassist &= ~CSUM_TSO; if_printf(ifp, "Enable txcsum first.\n"); rc = EAGAIN; } } else ifp->if_hwassist &= ~CSUM_TSO; } if (u & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (u & IFCAP_VLAN_HWFILTER) { ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; oce_vid_config(sc); } #if defined(INET6) || defined(INET) if (u & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; #endif break; case SIOCGPRIVATE_0: rc = oce_handle_passthrough(ifp, data); break; default: rc = ether_ioctl(ifp, command, data); break; } return rc; } static void oce_init(void *arg) { POCE_SOFTC sc = arg; LOCK(&sc->dev_lock); if (sc->ifp->if_flags & IFF_UP) { oce_if_deactivate(sc); oce_if_activate(sc); } UNLOCK(&sc->dev_lock); } static int oce_multiq_start(struct ifnet *ifp, struct mbuf *m) { POCE_SOFTC sc = ifp->if_softc; struct oce_wq *wq = NULL; int queue_index = 0; int status = 0; if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) queue_index = m->m_pkthdr.flowid % sc->nwqs; wq = sc->wq[queue_index]; LOCK(&wq->tx_lock); status = oce_multiq_transmit(ifp, m, wq); UNLOCK(&wq->tx_lock); return status; } static void oce_multiq_flush(struct ifnet *ifp) { POCE_SOFTC sc = ifp->if_softc; struct mbuf *m; int i = 0; for (i = 0; i < sc->nwqs; i++) { while ((m = buf_ring_dequeue_sc(sc->wq[i]->br)) != NULL) m_freem(m); } if_qflush(ifp); } /***************************************************************************** * Driver interrupt routines functions * *****************************************************************************/ static void oce_intr(void *arg, int pending) { POCE_INTR_INFO ii = (POCE_INTR_INFO) arg; POCE_SOFTC sc = ii->sc; struct oce_eq *eq = ii->eq; struct oce_eqe *eqe; struct oce_cq *cq = NULL; int i, num_eqes = 0; bus_dmamap_sync(eq->ring->dma.tag, eq->ring->dma.map, BUS_DMASYNC_POSTWRITE); do { eqe = RING_GET_CONSUMER_ITEM_VA(eq->ring, struct oce_eqe); if (eqe->evnt == 0) break; eqe->evnt = 0; bus_dmamap_sync(eq->ring->dma.tag, eq->ring->dma.map, BUS_DMASYNC_POSTWRITE); RING_GET(eq->ring, 1); num_eqes++; } while (TRUE); if (!num_eqes) goto eq_arm; /* Spurious */ /* Clear EQ entries, but dont arm */ oce_arm_eq(sc, eq->eq_id, num_eqes, FALSE, FALSE); /* Process TX, RX and MCC. But dont arm CQ*/ for (i = 0; i < eq->cq_valid; i++) { cq = eq->cq[i]; (*cq->cq_handler)(cq->cb_arg); } /* Arm all cqs connected to this EQ */ for (i = 0; i < eq->cq_valid; i++) { cq = eq->cq[i]; oce_arm_cq(sc, cq->cq_id, 0, TRUE); } eq_arm: oce_arm_eq(sc, eq->eq_id, 0, TRUE, FALSE); return; } static int oce_setup_intr(POCE_SOFTC sc) { int rc = 0, use_intx = 0; int vector = 0, req_vectors = 0; if (is_rss_enabled(sc)) req_vectors = MAX((sc->nrqs - 1), sc->nwqs); else req_vectors = 1; if (sc->flags & OCE_FLAGS_MSIX_CAPABLE) { sc->intr_count = req_vectors; rc = pci_alloc_msix(sc->dev, &sc->intr_count); if (rc != 0) { use_intx = 1; pci_release_msi(sc->dev); } else sc->flags |= OCE_FLAGS_USING_MSIX; } else use_intx = 1; if (use_intx) sc->intr_count = 1; /* Scale number of queues based on intr we got */ update_queues_got(sc); if (use_intx) { device_printf(sc->dev, "Using legacy interrupt\n"); rc = oce_alloc_intr(sc, vector, oce_intr); if (rc) goto error; } else { for (; vector < sc->intr_count; vector++) { rc = oce_alloc_intr(sc, vector, oce_intr); if (rc) goto error; } } return 0; error: oce_intr_free(sc); return rc; } static int oce_fast_isr(void *arg) { POCE_INTR_INFO ii = (POCE_INTR_INFO) arg; POCE_SOFTC sc = ii->sc; if (ii->eq == NULL) return FILTER_STRAY; oce_arm_eq(sc, ii->eq->eq_id, 0, FALSE, TRUE); taskqueue_enqueue(ii->tq, &ii->task); ii->eq->intr++; return FILTER_HANDLED; } static int oce_alloc_intr(POCE_SOFTC sc, int vector, void (*isr) (void *arg, int pending)) { POCE_INTR_INFO ii = &sc->intrs[vector]; int rc = 0, rr; if (vector >= OCE_MAX_EQ) return (EINVAL); /* Set the resource id for the interrupt. * MSIx is vector + 1 for the resource id, * INTx is 0 for the resource id. */ if (sc->flags & OCE_FLAGS_USING_MSIX) rr = vector + 1; else rr = 0; ii->intr_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rr, RF_ACTIVE|RF_SHAREABLE); ii->irq_rr = rr; if (ii->intr_res == NULL) { device_printf(sc->dev, "Could not allocate interrupt\n"); rc = ENXIO; return rc; } TASK_INIT(&ii->task, 0, isr, ii); ii->vector = vector; sprintf(ii->task_name, "oce_task[%d]", ii->vector); ii->tq = taskqueue_create_fast(ii->task_name, M_NOWAIT, taskqueue_thread_enqueue, &ii->tq); taskqueue_start_threads(&ii->tq, 1, PI_NET, "%s taskq", device_get_nameunit(sc->dev)); ii->sc = sc; rc = bus_setup_intr(sc->dev, ii->intr_res, INTR_TYPE_NET, oce_fast_isr, NULL, ii, &ii->tag); return rc; } void oce_intr_free(POCE_SOFTC sc) { int i = 0; for (i = 0; i < sc->intr_count; i++) { if (sc->intrs[i].tag != NULL) bus_teardown_intr(sc->dev, sc->intrs[i].intr_res, sc->intrs[i].tag); if (sc->intrs[i].tq != NULL) taskqueue_free(sc->intrs[i].tq); if (sc->intrs[i].intr_res != NULL) bus_release_resource(sc->dev, SYS_RES_IRQ, sc->intrs[i].irq_rr, sc->intrs[i].intr_res); sc->intrs[i].tag = NULL; sc->intrs[i].intr_res = NULL; } if (sc->flags & OCE_FLAGS_USING_MSIX) pci_release_msi(sc->dev); } /****************************************************************************** * Media callbacks functions * ******************************************************************************/ static void oce_media_status(struct ifnet *ifp, struct ifmediareq *req) { POCE_SOFTC sc = (POCE_SOFTC) ifp->if_softc; req->ifm_status = IFM_AVALID; req->ifm_active = IFM_ETHER; if (sc->link_status == 1) req->ifm_status |= IFM_ACTIVE; else return; switch (sc->link_speed) { case 1: /* 10 Mbps */ req->ifm_active |= IFM_10_T | IFM_FDX; sc->speed = 10; break; case 2: /* 100 Mbps */ req->ifm_active |= IFM_100_TX | IFM_FDX; sc->speed = 100; break; case 3: /* 1 Gbps */ req->ifm_active |= IFM_1000_T | IFM_FDX; sc->speed = 1000; break; case 4: /* 10 Gbps */ req->ifm_active |= IFM_10G_SR | IFM_FDX; sc->speed = 10000; break; case 5: /* 20 Gbps */ req->ifm_active |= IFM_10G_SR | IFM_FDX; sc->speed = 20000; break; case 6: /* 25 Gbps */ req->ifm_active |= IFM_10G_SR | IFM_FDX; sc->speed = 25000; break; case 7: /* 40 Gbps */ req->ifm_active |= IFM_40G_SR4 | IFM_FDX; sc->speed = 40000; break; default: sc->speed = 0; break; } return; } int oce_media_change(struct ifnet *ifp) { return 0; } /***************************************************************************** * Transmit routines functions * *****************************************************************************/ static int oce_tx(POCE_SOFTC sc, struct mbuf **mpp, int wq_index) { int rc = 0, i, retry_cnt = 0; bus_dma_segment_t segs[OCE_MAX_TX_ELEMENTS]; struct mbuf *m, *m_temp; struct oce_wq *wq = sc->wq[wq_index]; struct oce_packet_desc *pd; struct oce_nic_hdr_wqe *nichdr; struct oce_nic_frag_wqe *nicfrag; int num_wqes; uint32_t reg_value; boolean_t complete = TRUE; m = *mpp; if (!m) return EINVAL; if (!(m->m_flags & M_PKTHDR)) { rc = ENXIO; goto free_ret; } if(oce_tx_asic_stall_verify(sc, m)) { m = oce_insert_vlan_tag(sc, m, &complete); if(!m) { device_printf(sc->dev, "Insertion unsuccessful\n"); return 0; } } if (m->m_pkthdr.csum_flags & CSUM_TSO) { /* consolidate packet buffers for TSO/LSO segment offload */ #if defined(INET6) || defined(INET) m = oce_tso_setup(sc, mpp); #else m = NULL; #endif if (m == NULL) { rc = ENXIO; goto free_ret; } } pd = &wq->pckts[wq->pkt_desc_head]; retry: rc = bus_dmamap_load_mbuf_sg(wq->tag, pd->map, m, segs, &pd->nsegs, BUS_DMA_NOWAIT); if (rc == 0) { num_wqes = pd->nsegs + 1; if (IS_BE(sc) || IS_SH(sc)) { /*Dummy required only for BE3.*/ if (num_wqes & 1) num_wqes++; } if (num_wqes >= RING_NUM_FREE(wq->ring)) { bus_dmamap_unload(wq->tag, pd->map); return EBUSY; } atomic_store_rel_int(&wq->pkt_desc_head, (wq->pkt_desc_head + 1) % \ OCE_WQ_PACKET_ARRAY_SIZE); bus_dmamap_sync(wq->tag, pd->map, BUS_DMASYNC_PREWRITE); pd->mbuf = m; nichdr = RING_GET_PRODUCER_ITEM_VA(wq->ring, struct oce_nic_hdr_wqe); nichdr->u0.dw[0] = 0; nichdr->u0.dw[1] = 0; nichdr->u0.dw[2] = 0; nichdr->u0.dw[3] = 0; nichdr->u0.s.complete = complete; nichdr->u0.s.event = 1; nichdr->u0.s.crc = 1; nichdr->u0.s.forward = 0; nichdr->u0.s.ipcs = (m->m_pkthdr.csum_flags & CSUM_IP) ? 1 : 0; nichdr->u0.s.udpcs = (m->m_pkthdr.csum_flags & CSUM_UDP) ? 1 : 0; nichdr->u0.s.tcpcs = (m->m_pkthdr.csum_flags & CSUM_TCP) ? 1 : 0; nichdr->u0.s.num_wqe = num_wqes; nichdr->u0.s.total_length = m->m_pkthdr.len; if (m->m_flags & M_VLANTAG) { nichdr->u0.s.vlan = 1; /*Vlan present*/ nichdr->u0.s.vlan_tag = m->m_pkthdr.ether_vtag; } if (m->m_pkthdr.csum_flags & CSUM_TSO) { if (m->m_pkthdr.tso_segsz) { nichdr->u0.s.lso = 1; nichdr->u0.s.lso_mss = m->m_pkthdr.tso_segsz; } if (!IS_BE(sc) || !IS_SH(sc)) nichdr->u0.s.ipcs = 1; } RING_PUT(wq->ring, 1); atomic_add_int(&wq->ring->num_used, 1); for (i = 0; i < pd->nsegs; i++) { nicfrag = RING_GET_PRODUCER_ITEM_VA(wq->ring, struct oce_nic_frag_wqe); nicfrag->u0.s.rsvd0 = 0; nicfrag->u0.s.frag_pa_hi = ADDR_HI(segs[i].ds_addr); nicfrag->u0.s.frag_pa_lo = ADDR_LO(segs[i].ds_addr); nicfrag->u0.s.frag_len = segs[i].ds_len; pd->wqe_idx = wq->ring->pidx; RING_PUT(wq->ring, 1); atomic_add_int(&wq->ring->num_used, 1); } if (num_wqes > (pd->nsegs + 1)) { nicfrag = RING_GET_PRODUCER_ITEM_VA(wq->ring, struct oce_nic_frag_wqe); nicfrag->u0.dw[0] = 0; nicfrag->u0.dw[1] = 0; nicfrag->u0.dw[2] = 0; nicfrag->u0.dw[3] = 0; pd->wqe_idx = wq->ring->pidx; RING_PUT(wq->ring, 1); atomic_add_int(&wq->ring->num_used, 1); pd->nsegs++; } if_inc_counter(sc->ifp, IFCOUNTER_OPACKETS, 1); wq->tx_stats.tx_reqs++; wq->tx_stats.tx_wrbs += num_wqes; wq->tx_stats.tx_bytes += m->m_pkthdr.len; wq->tx_stats.tx_pkts++; bus_dmamap_sync(wq->ring->dma.tag, wq->ring->dma.map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); reg_value = (num_wqes << 16) | wq->wq_id; OCE_WRITE_REG32(sc, db, wq->db_offset, reg_value); } else if (rc == EFBIG) { if (retry_cnt == 0) { m_temp = m_defrag(m, M_NOWAIT); if (m_temp == NULL) goto free_ret; m = m_temp; *mpp = m_temp; retry_cnt = retry_cnt + 1; goto retry; } else goto free_ret; } else if (rc == ENOMEM) return rc; else goto free_ret; return 0; free_ret: m_freem(*mpp); *mpp = NULL; return rc; } static void oce_tx_complete(struct oce_wq *wq, uint32_t wqe_idx, uint32_t status) { struct oce_packet_desc *pd; POCE_SOFTC sc = (POCE_SOFTC) wq->parent; struct mbuf *m; pd = &wq->pckts[wq->pkt_desc_tail]; atomic_store_rel_int(&wq->pkt_desc_tail, (wq->pkt_desc_tail + 1) % OCE_WQ_PACKET_ARRAY_SIZE); atomic_subtract_int(&wq->ring->num_used, pd->nsegs + 1); bus_dmamap_sync(wq->tag, pd->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(wq->tag, pd->map); m = pd->mbuf; m_freem(m); pd->mbuf = NULL; if (sc->ifp->if_drv_flags & IFF_DRV_OACTIVE) { if (wq->ring->num_used < (wq->ring->num_items / 2)) { sc->ifp->if_drv_flags &= ~(IFF_DRV_OACTIVE); oce_tx_restart(sc, wq); } } } static void oce_tx_restart(POCE_SOFTC sc, struct oce_wq *wq) { if ((sc->ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) return; #if __FreeBSD_version >= 800000 if (!drbr_empty(sc->ifp, wq->br)) #else if (!IFQ_DRV_IS_EMPTY(&sc->ifp->if_snd)) #endif taskqueue_enqueue(taskqueue_swi, &wq->txtask); } #if defined(INET6) || defined(INET) static struct mbuf * oce_tso_setup(POCE_SOFTC sc, struct mbuf **mpp) { struct mbuf *m; #ifdef INET struct ip *ip; #endif #ifdef INET6 struct ip6_hdr *ip6; #endif struct ether_vlan_header *eh; struct tcphdr *th; uint16_t etype; int total_len = 0, ehdrlen = 0; m = *mpp; if (M_WRITABLE(m) == 0) { m = m_dup(*mpp, M_NOWAIT); if (!m) return NULL; m_freem(*mpp); *mpp = m; } eh = mtod(m, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { etype = ntohs(eh->evl_proto); ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { etype = ntohs(eh->evl_encap_proto); ehdrlen = ETHER_HDR_LEN; } switch (etype) { #ifdef INET case ETHERTYPE_IP: ip = (struct ip *)(m->m_data + ehdrlen); if (ip->ip_p != IPPROTO_TCP) return NULL; th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); total_len = ehdrlen + (ip->ip_hl << 2) + (th->th_off << 2); break; #endif #ifdef INET6 case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(m->m_data + ehdrlen); if (ip6->ip6_nxt != IPPROTO_TCP) return NULL; th = (struct tcphdr *)((caddr_t)ip6 + sizeof(struct ip6_hdr)); total_len = ehdrlen + sizeof(struct ip6_hdr) + (th->th_off << 2); break; #endif default: return NULL; } m = m_pullup(m, total_len); if (!m) return NULL; *mpp = m; return m; } #endif /* INET6 || INET */ void oce_tx_task(void *arg, int npending) { struct oce_wq *wq = arg; POCE_SOFTC sc = wq->parent; struct ifnet *ifp = sc->ifp; int rc = 0; #if __FreeBSD_version >= 800000 LOCK(&wq->tx_lock); rc = oce_multiq_transmit(ifp, NULL, wq); if (rc) { device_printf(sc->dev, "TX[%d] restart failed\n", wq->queue_index); } UNLOCK(&wq->tx_lock); #else oce_start(ifp); #endif } void oce_start(struct ifnet *ifp) { POCE_SOFTC sc = ifp->if_softc; struct mbuf *m; int rc = 0; int def_q = 0; /* Defualt tx queue is 0*/ if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return; if (!sc->link_status) return; do { IF_DEQUEUE(&sc->ifp->if_snd, m); if (m == NULL) break; LOCK(&sc->wq[def_q]->tx_lock); rc = oce_tx(sc, &m, def_q); UNLOCK(&sc->wq[def_q]->tx_lock); if (rc) { if (m != NULL) { sc->wq[def_q]->tx_stats.tx_stops ++; ifp->if_drv_flags |= IFF_DRV_OACTIVE; IFQ_DRV_PREPEND(&ifp->if_snd, m); m = NULL; } break; } if (m != NULL) ETHER_BPF_MTAP(ifp, m); } while (TRUE); return; } /* Handle the Completion Queue for transmit */ uint16_t oce_wq_handler(void *arg) { struct oce_wq *wq = (struct oce_wq *)arg; POCE_SOFTC sc = wq->parent; struct oce_cq *cq = wq->cq; struct oce_nic_tx_cqe *cqe; int num_cqes = 0; bus_dmamap_sync(cq->ring->dma.tag, cq->ring->dma.map, BUS_DMASYNC_POSTWRITE); cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_nic_tx_cqe); while (cqe->u0.dw[3]) { DW_SWAP((uint32_t *) cqe, sizeof(oce_wq_cqe)); wq->ring->cidx = cqe->u0.s.wqe_index + 1; if (wq->ring->cidx >= wq->ring->num_items) wq->ring->cidx -= wq->ring->num_items; oce_tx_complete(wq, cqe->u0.s.wqe_index, cqe->u0.s.status); wq->tx_stats.tx_compl++; cqe->u0.dw[3] = 0; RING_GET(cq->ring, 1); bus_dmamap_sync(cq->ring->dma.tag, cq->ring->dma.map, BUS_DMASYNC_POSTWRITE); cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_nic_tx_cqe); num_cqes++; } if (num_cqes) oce_arm_cq(sc, cq->cq_id, num_cqes, FALSE); return 0; } static int oce_multiq_transmit(struct ifnet *ifp, struct mbuf *m, struct oce_wq *wq) { POCE_SOFTC sc = ifp->if_softc; int status = 0, queue_index = 0; struct mbuf *next = NULL; struct buf_ring *br = NULL; br = wq->br; queue_index = wq->queue_index; if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) { if (m != NULL) status = drbr_enqueue(ifp, br, m); return status; } if (m != NULL) { if ((status = drbr_enqueue(ifp, br, m)) != 0) return status; } while ((next = drbr_peek(ifp, br)) != NULL) { if (oce_tx(sc, &next, queue_index)) { if (next == NULL) { drbr_advance(ifp, br); } else { drbr_putback(ifp, br, next); wq->tx_stats.tx_stops ++; ifp->if_drv_flags |= IFF_DRV_OACTIVE; } break; } drbr_advance(ifp, br); if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len); if (next->m_flags & M_MCAST) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); ETHER_BPF_MTAP(ifp, next); } return 0; } /***************************************************************************** * Receive routines functions * *****************************************************************************/ static void oce_rx(struct oce_rq *rq, uint32_t rqe_idx, struct oce_nic_rx_cqe *cqe) { uint32_t out; struct oce_packet_desc *pd; POCE_SOFTC sc = (POCE_SOFTC) rq->parent; int i, len, frag_len; struct mbuf *m = NULL, *tail = NULL; uint16_t vtag; len = cqe->u0.s.pkt_size; if (!len) { /*partial DMA workaround for Lancer*/ oce_discard_rx_comp(rq, cqe); goto exit; } /* Get vlan_tag value */ if(IS_BE(sc) || IS_SH(sc)) vtag = BSWAP_16(cqe->u0.s.vlan_tag); else vtag = cqe->u0.s.vlan_tag; for (i = 0; i < cqe->u0.s.num_fragments; i++) { if (rq->packets_out == rq->packets_in) { device_printf(sc->dev, "RQ transmit descriptor missing\n"); } out = rq->packets_out + 1; if (out == OCE_RQ_PACKET_ARRAY_SIZE) out = 0; pd = &rq->pckts[rq->packets_out]; rq->packets_out = out; bus_dmamap_sync(rq->tag, pd->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(rq->tag, pd->map); rq->pending--; frag_len = (len > rq->cfg.frag_size) ? rq->cfg.frag_size : len; pd->mbuf->m_len = frag_len; if (tail != NULL) { /* additional fragments */ pd->mbuf->m_flags &= ~M_PKTHDR; tail->m_next = pd->mbuf; tail = pd->mbuf; } else { /* first fragment, fill out much of the packet header */ pd->mbuf->m_pkthdr.len = len; pd->mbuf->m_pkthdr.csum_flags = 0; if (IF_CSUM_ENABLED(sc)) { if (cqe->u0.s.l4_cksum_pass) { pd->mbuf->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); pd->mbuf->m_pkthdr.csum_data = 0xffff; } if (cqe->u0.s.ip_cksum_pass) { if (!cqe->u0.s.ip_ver) { /* IPV4 */ pd->mbuf->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED|CSUM_IP_VALID); } } } m = tail = pd->mbuf; } pd->mbuf = NULL; len -= frag_len; } if (m) { if (!oce_cqe_portid_valid(sc, cqe)) { m_freem(m); goto exit; } m->m_pkthdr.rcvif = sc->ifp; #if __FreeBSD_version >= 800000 if (rq->queue_index) m->m_pkthdr.flowid = (rq->queue_index - 1); else m->m_pkthdr.flowid = rq->queue_index; M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); #endif /* This deternies if vlan tag is Valid */ if (oce_cqe_vtp_valid(sc, cqe)) { if (sc->function_mode & FNM_FLEX10_MODE) { /* FLEX10. If QnQ is not set, neglect VLAN */ if (cqe->u0.s.qnq) { m->m_pkthdr.ether_vtag = vtag; m->m_flags |= M_VLANTAG; } } else if (sc->pvid != (vtag & VLAN_VID_MASK)) { /* In UMC mode generally pvid will be striped by hw. But in some cases we have seen it comes with pvid. So if pvid == vlan, neglect vlan. */ m->m_pkthdr.ether_vtag = vtag; m->m_flags |= M_VLANTAG; } } if_inc_counter(sc->ifp, IFCOUNTER_IPACKETS, 1); #if defined(INET6) || defined(INET) /* Try to queue to LRO */ if (IF_LRO_ENABLED(sc) && (cqe->u0.s.ip_cksum_pass) && (cqe->u0.s.l4_cksum_pass) && (!cqe->u0.s.ip_ver) && (rq->lro.lro_cnt != 0)) { if (tcp_lro_rx(&rq->lro, m, 0) == 0) { rq->lro_pkts_queued ++; goto post_done; } /* If LRO posting fails then try to post to STACK */ } #endif (*sc->ifp->if_input) (sc->ifp, m); #if defined(INET6) || defined(INET) post_done: #endif /* Update rx stats per queue */ rq->rx_stats.rx_pkts++; rq->rx_stats.rx_bytes += cqe->u0.s.pkt_size; rq->rx_stats.rx_frags += cqe->u0.s.num_fragments; if (cqe->u0.s.pkt_type == OCE_MULTICAST_PACKET) rq->rx_stats.rx_mcast_pkts++; if (cqe->u0.s.pkt_type == OCE_UNICAST_PACKET) rq->rx_stats.rx_ucast_pkts++; } exit: return; } static void oce_discard_rx_comp(struct oce_rq *rq, struct oce_nic_rx_cqe *cqe) { uint32_t out, i = 0; struct oce_packet_desc *pd; POCE_SOFTC sc = (POCE_SOFTC) rq->parent; int num_frags = cqe->u0.s.num_fragments; for (i = 0; i < num_frags; i++) { if (rq->packets_out == rq->packets_in) { device_printf(sc->dev, "RQ transmit descriptor missing\n"); } out = rq->packets_out + 1; if (out == OCE_RQ_PACKET_ARRAY_SIZE) out = 0; pd = &rq->pckts[rq->packets_out]; rq->packets_out = out; bus_dmamap_sync(rq->tag, pd->map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(rq->tag, pd->map); rq->pending--; m_freem(pd->mbuf); } } static int oce_cqe_vtp_valid(POCE_SOFTC sc, struct oce_nic_rx_cqe *cqe) { struct oce_nic_rx_cqe_v1 *cqe_v1; int vtp = 0; if (sc->be3_native) { cqe_v1 = (struct oce_nic_rx_cqe_v1 *)cqe; vtp = cqe_v1->u0.s.vlan_tag_present; } else vtp = cqe->u0.s.vlan_tag_present; return vtp; } static int oce_cqe_portid_valid(POCE_SOFTC sc, struct oce_nic_rx_cqe *cqe) { struct oce_nic_rx_cqe_v1 *cqe_v1; int port_id = 0; if (sc->be3_native && (IS_BE(sc) || IS_SH(sc))) { cqe_v1 = (struct oce_nic_rx_cqe_v1 *)cqe; port_id = cqe_v1->u0.s.port; if (sc->port_id != port_id) return 0; } else ;/* For BE3 legacy and Lancer this is dummy */ return 1; } #if defined(INET6) || defined(INET) static void oce_rx_flush_lro(struct oce_rq *rq) { struct lro_ctrl *lro = &rq->lro; POCE_SOFTC sc = (POCE_SOFTC) rq->parent; if (!IF_LRO_ENABLED(sc)) return; tcp_lro_flush_all(lro); rq->lro_pkts_queued = 0; return; } static int oce_init_lro(POCE_SOFTC sc) { struct lro_ctrl *lro = NULL; int i = 0, rc = 0; for (i = 0; i < sc->nrqs; i++) { lro = &sc->rq[i]->lro; rc = tcp_lro_init(lro); if (rc != 0) { device_printf(sc->dev, "LRO init failed\n"); return rc; } lro->ifp = sc->ifp; } return rc; } void oce_free_lro(POCE_SOFTC sc) { struct lro_ctrl *lro = NULL; int i = 0; for (i = 0; i < sc->nrqs; i++) { lro = &sc->rq[i]->lro; if (lro) tcp_lro_free(lro); } } #endif int oce_alloc_rx_bufs(struct oce_rq *rq, int count) { POCE_SOFTC sc = (POCE_SOFTC) rq->parent; int i, in, rc; struct oce_packet_desc *pd; bus_dma_segment_t segs[6]; int nsegs, added = 0; struct oce_nic_rqe *rqe; pd_rxulp_db_t rxdb_reg; bzero(&rxdb_reg, sizeof(pd_rxulp_db_t)); for (i = 0; i < count; i++) { in = rq->packets_in + 1; if (in == OCE_RQ_PACKET_ARRAY_SIZE) in = 0; if (in == rq->packets_out) break; /* no more room */ pd = &rq->pckts[rq->packets_in]; pd->mbuf = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (pd->mbuf == NULL) break; pd->mbuf->m_len = pd->mbuf->m_pkthdr.len = MCLBYTES; rc = bus_dmamap_load_mbuf_sg(rq->tag, pd->map, pd->mbuf, segs, &nsegs, BUS_DMA_NOWAIT); if (rc) { m_free(pd->mbuf); break; } if (nsegs != 1) { i--; continue; } rq->packets_in = in; bus_dmamap_sync(rq->tag, pd->map, BUS_DMASYNC_PREREAD); rqe = RING_GET_PRODUCER_ITEM_VA(rq->ring, struct oce_nic_rqe); rqe->u0.s.frag_pa_hi = ADDR_HI(segs[0].ds_addr); rqe->u0.s.frag_pa_lo = ADDR_LO(segs[0].ds_addr); DW_SWAP(u32ptr(rqe), sizeof(struct oce_nic_rqe)); RING_PUT(rq->ring, 1); added++; rq->pending++; } if (added != 0) { for (i = added / OCE_MAX_RQ_POSTS; i > 0; i--) { rxdb_reg.bits.num_posted = OCE_MAX_RQ_POSTS; rxdb_reg.bits.qid = rq->rq_id; OCE_WRITE_REG32(sc, db, PD_RXULP_DB, rxdb_reg.dw0); added -= OCE_MAX_RQ_POSTS; } if (added > 0) { rxdb_reg.bits.qid = rq->rq_id; rxdb_reg.bits.num_posted = added; OCE_WRITE_REG32(sc, db, PD_RXULP_DB, rxdb_reg.dw0); } } return 0; } /* Handle the Completion Queue for receive */ uint16_t oce_rq_handler(void *arg) { struct oce_rq *rq = (struct oce_rq *)arg; struct oce_cq *cq = rq->cq; POCE_SOFTC sc = rq->parent; struct oce_nic_rx_cqe *cqe; int num_cqes = 0, rq_buffers_used = 0; bus_dmamap_sync(cq->ring->dma.tag, cq->ring->dma.map, BUS_DMASYNC_POSTWRITE); cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_nic_rx_cqe); while (cqe->u0.dw[2]) { DW_SWAP((uint32_t *) cqe, sizeof(oce_rq_cqe)); RING_GET(rq->ring, 1); if (cqe->u0.s.error == 0) { oce_rx(rq, cqe->u0.s.frag_index, cqe); } else { rq->rx_stats.rxcp_err++; if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1); /* Post L3/L4 errors to stack.*/ oce_rx(rq, cqe->u0.s.frag_index, cqe); } rq->rx_stats.rx_compl++; cqe->u0.dw[2] = 0; #if defined(INET6) || defined(INET) if (IF_LRO_ENABLED(sc) && rq->lro_pkts_queued >= 16) { oce_rx_flush_lro(rq); } #endif RING_GET(cq->ring, 1); bus_dmamap_sync(cq->ring->dma.tag, cq->ring->dma.map, BUS_DMASYNC_POSTWRITE); cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_nic_rx_cqe); num_cqes++; if (num_cqes >= (IS_XE201(sc) ? 8 : oce_max_rsp_handled)) break; } #if defined(INET6) || defined(INET) if (IF_LRO_ENABLED(sc)) oce_rx_flush_lro(rq); #endif if (num_cqes) { oce_arm_cq(sc, cq->cq_id, num_cqes, FALSE); rq_buffers_used = OCE_RQ_PACKET_ARRAY_SIZE - rq->pending; if (rq_buffers_used > 1) oce_alloc_rx_bufs(rq, (rq_buffers_used - 1)); } return 0; } /***************************************************************************** * Helper function prototypes in this file * *****************************************************************************/ static int oce_attach_ifp(POCE_SOFTC sc) { sc->ifp = if_alloc(IFT_ETHER); if (!sc->ifp) return ENOMEM; ifmedia_init(&sc->media, IFM_IMASK, oce_media_change, oce_media_status); ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO); sc->ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST; sc->ifp->if_ioctl = oce_ioctl; sc->ifp->if_start = oce_start; sc->ifp->if_init = oce_init; sc->ifp->if_mtu = ETHERMTU; sc->ifp->if_softc = sc; #if __FreeBSD_version >= 800000 sc->ifp->if_transmit = oce_multiq_start; sc->ifp->if_qflush = oce_multiq_flush; #endif if_initname(sc->ifp, device_get_name(sc->dev), device_get_unit(sc->dev)); sc->ifp->if_snd.ifq_drv_maxlen = OCE_MAX_TX_DESC - 1; IFQ_SET_MAXLEN(&sc->ifp->if_snd, sc->ifp->if_snd.ifq_drv_maxlen); IFQ_SET_READY(&sc->ifp->if_snd); sc->ifp->if_hwassist = OCE_IF_HWASSIST; sc->ifp->if_hwassist |= CSUM_TSO; sc->ifp->if_hwassist |= (CSUM_IP | CSUM_TCP | CSUM_UDP); sc->ifp->if_capabilities = OCE_IF_CAPABILITIES; sc->ifp->if_capabilities |= IFCAP_HWCSUM; sc->ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; #if defined(INET6) || defined(INET) sc->ifp->if_capabilities |= IFCAP_TSO; sc->ifp->if_capabilities |= IFCAP_LRO; sc->ifp->if_capabilities |= IFCAP_VLAN_HWTSO; #endif sc->ifp->if_capenable = sc->ifp->if_capabilities; sc->ifp->if_baudrate = IF_Gbps(10); #if __FreeBSD_version >= 1000000 sc->ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); sc->ifp->if_hw_tsomaxsegcount = OCE_MAX_TX_ELEMENTS; sc->ifp->if_hw_tsomaxsegsize = 4096; #endif ether_ifattach(sc->ifp, sc->macaddr.mac_addr); return 0; } static void oce_add_vlan(void *arg, struct ifnet *ifp, uint16_t vtag) { POCE_SOFTC sc = ifp->if_softc; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) return; sc->vlan_tag[vtag] = 1; sc->vlans_added++; if (sc->vlans_added <= (sc->max_vlans + 1)) oce_vid_config(sc); } static void oce_del_vlan(void *arg, struct ifnet *ifp, uint16_t vtag) { POCE_SOFTC sc = ifp->if_softc; if (ifp->if_softc != arg) return; if ((vtag == 0) || (vtag > 4095)) return; sc->vlan_tag[vtag] = 0; sc->vlans_added--; oce_vid_config(sc); } /* * A max of 64 vlans can be configured in BE. If the user configures * more, place the card in vlan promiscuous mode. */ static int oce_vid_config(POCE_SOFTC sc) { struct normal_vlan vtags[MAX_VLANFILTER_SIZE]; uint16_t ntags = 0, i; int status = 0; if ((sc->vlans_added <= MAX_VLANFILTER_SIZE) && (sc->ifp->if_capenable & IFCAP_VLAN_HWFILTER)) { for (i = 0; i < MAX_VLANS; i++) { if (sc->vlan_tag[i]) { vtags[ntags].vtag = i; ntags++; } } if (ntags) status = oce_config_vlan(sc, (uint8_t) sc->if_id, vtags, ntags, 1, 0); } else status = oce_config_vlan(sc, (uint8_t) sc->if_id, NULL, 0, 1, 1); return status; } static void oce_mac_addr_set(POCE_SOFTC sc) { uint32_t old_pmac_id = sc->pmac_id; int status = 0; status = bcmp((IF_LLADDR(sc->ifp)), sc->macaddr.mac_addr, sc->macaddr.size_of_struct); if (!status) return; status = oce_mbox_macaddr_add(sc, (uint8_t *)(IF_LLADDR(sc->ifp)), sc->if_id, &sc->pmac_id); if (!status) { status = oce_mbox_macaddr_del(sc, sc->if_id, old_pmac_id); bcopy((IF_LLADDR(sc->ifp)), sc->macaddr.mac_addr, sc->macaddr.size_of_struct); } if (status) device_printf(sc->dev, "Failed update macaddress\n"); } static int oce_handle_passthrough(struct ifnet *ifp, caddr_t data) { POCE_SOFTC sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; int rc = ENXIO; char cookie[32] = {0}; - void *priv_data = (void *)ifr->ifr_data; + void *priv_data = ifr_data_get_ptr(ifr); void *ioctl_ptr; uint32_t req_size; struct mbx_hdr req; OCE_DMA_MEM dma_mem; struct mbx_common_get_cntl_attr *fw_cmd; if (copyin(priv_data, cookie, strlen(IOCTL_COOKIE))) return EFAULT; if (memcmp(cookie, IOCTL_COOKIE, strlen(IOCTL_COOKIE))) return EINVAL; ioctl_ptr = (char *)priv_data + strlen(IOCTL_COOKIE); if (copyin(ioctl_ptr, &req, sizeof(struct mbx_hdr))) return EFAULT; req_size = le32toh(req.u0.req.request_length); if (req_size > 65536) return EINVAL; req_size += sizeof(struct mbx_hdr); rc = oce_dma_alloc(sc, req_size, &dma_mem, 0); if (rc) return ENOMEM; if (copyin(ioctl_ptr, OCE_DMAPTR(&dma_mem,char), req_size)) { rc = EFAULT; goto dma_free; } rc = oce_pass_through_mbox(sc, &dma_mem, req_size); if (rc) { rc = EIO; goto dma_free; } if (copyout(OCE_DMAPTR(&dma_mem,char), ioctl_ptr, req_size)) rc = EFAULT; /* firmware is filling all the attributes for this ioctl except the driver version..so fill it */ if(req.u0.rsp.opcode == OPCODE_COMMON_GET_CNTL_ATTRIBUTES) { fw_cmd = (struct mbx_common_get_cntl_attr *) ioctl_ptr; strncpy(fw_cmd->params.rsp.cntl_attr_info.hba_attr.drv_ver_str, COMPONENT_REVISION, strlen(COMPONENT_REVISION)); } dma_free: oce_dma_free(sc, &dma_mem); return rc; } static void oce_eqd_set_periodic(POCE_SOFTC sc) { struct oce_set_eqd set_eqd[OCE_MAX_EQ]; struct oce_aic_obj *aic; struct oce_eq *eqo; uint64_t now = 0, delta; int eqd, i, num = 0; uint32_t ips = 0; int tps; for (i = 0 ; i < sc->neqs; i++) { eqo = sc->eq[i]; aic = &sc->aic_obj[i]; /* When setting the static eq delay from the user space */ if (!aic->enable) { eqd = aic->et_eqd; goto modify_eqd; } now = ticks; /* Over flow check */ if ((now < aic->ticks) || (eqo->intr < aic->intr_prev)) goto done; delta = now - aic->ticks; tps = delta/hz; /* Interrupt rate based on elapsed ticks */ if(tps) ips = (uint32_t)(eqo->intr - aic->intr_prev) / tps; if (ips > INTR_RATE_HWM) eqd = aic->cur_eqd + 20; else if (ips < INTR_RATE_LWM) eqd = aic->cur_eqd / 2; else goto done; if (eqd < 10) eqd = 0; /* Make sure that the eq delay is in the known range */ eqd = min(eqd, aic->max_eqd); eqd = max(eqd, aic->min_eqd); modify_eqd: if (eqd != aic->cur_eqd) { set_eqd[num].delay_multiplier = (eqd * 65)/100; set_eqd[num].eq_id = eqo->eq_id; aic->cur_eqd = eqd; num++; } done: aic->intr_prev = eqo->intr; aic->ticks = now; } /* Is there atleast one eq that needs to be modified? */ if(num) oce_mbox_eqd_modify_periodic(sc, set_eqd, num); } static void oce_detect_hw_error(POCE_SOFTC sc) { uint32_t ue_low = 0, ue_high = 0, ue_low_mask = 0, ue_high_mask = 0; uint32_t sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0; uint32_t i; if (sc->hw_error) return; if (IS_XE201(sc)) { sliport_status = OCE_READ_REG32(sc, db, SLIPORT_STATUS_OFFSET); if (sliport_status & SLIPORT_STATUS_ERR_MASK) { sliport_err1 = OCE_READ_REG32(sc, db, SLIPORT_ERROR1_OFFSET); sliport_err2 = OCE_READ_REG32(sc, db, SLIPORT_ERROR2_OFFSET); } } else { ue_low = OCE_READ_REG32(sc, devcfg, PCICFG_UE_STATUS_LOW); ue_high = OCE_READ_REG32(sc, devcfg, PCICFG_UE_STATUS_HIGH); ue_low_mask = OCE_READ_REG32(sc, devcfg, PCICFG_UE_STATUS_LOW_MASK); ue_high_mask = OCE_READ_REG32(sc, devcfg, PCICFG_UE_STATUS_HI_MASK); ue_low = (ue_low & ~ue_low_mask); ue_high = (ue_high & ~ue_high_mask); } /* On certain platforms BE hardware can indicate spurious UEs. * Allow the h/w to stop working completely in case of a real UE. * Hence not setting the hw_error for UE detection. */ if (sliport_status & SLIPORT_STATUS_ERR_MASK) { sc->hw_error = TRUE; device_printf(sc->dev, "Error detected in the card\n"); } if (sliport_status & SLIPORT_STATUS_ERR_MASK) { device_printf(sc->dev, "ERR: sliport status 0x%x\n", sliport_status); device_printf(sc->dev, "ERR: sliport error1 0x%x\n", sliport_err1); device_printf(sc->dev, "ERR: sliport error2 0x%x\n", sliport_err2); } if (ue_low) { for (i = 0; ue_low; ue_low >>= 1, i++) { if (ue_low & 1) device_printf(sc->dev, "UE: %s bit set\n", ue_status_low_desc[i]); } } if (ue_high) { for (i = 0; ue_high; ue_high >>= 1, i++) { if (ue_high & 1) device_printf(sc->dev, "UE: %s bit set\n", ue_status_hi_desc[i]); } } } static void oce_local_timer(void *arg) { POCE_SOFTC sc = arg; int i = 0; oce_detect_hw_error(sc); oce_refresh_nic_stats(sc); oce_refresh_queue_stats(sc); oce_mac_addr_set(sc); /* TX Watch Dog*/ for (i = 0; i < sc->nwqs; i++) oce_tx_restart(sc, sc->wq[i]); /* calculate and set the eq delay for optimal interrupt rate */ if (IS_BE(sc) || IS_SH(sc)) oce_eqd_set_periodic(sc); callout_reset(&sc->timer, hz, oce_local_timer, sc); } /* NOTE : This should only be called holding * DEVICE_LOCK. */ static void oce_if_deactivate(POCE_SOFTC sc) { int i, mtime = 0; int wait_req = 0; struct oce_rq *rq; struct oce_wq *wq; struct oce_eq *eq; sc->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); /*Wait for max of 400ms for TX completions to be done */ while (mtime < 400) { wait_req = 0; for_all_wq_queues(sc, wq, i) { if (wq->ring->num_used) { wait_req = 1; DELAY(1); break; } } mtime += 1; if (!wait_req) break; } /* Stop intrs and finish any bottom halves pending */ oce_hw_intr_disable(sc); /* Since taskqueue_drain takes a Gaint Lock, We should not acquire any other lock. So unlock device lock and require after completing taskqueue_drain. */ UNLOCK(&sc->dev_lock); for (i = 0; i < sc->intr_count; i++) { if (sc->intrs[i].tq != NULL) { taskqueue_drain(sc->intrs[i].tq, &sc->intrs[i].task); } } LOCK(&sc->dev_lock); /* Delete RX queue in card with flush param */ oce_stop_rx(sc); /* Invalidate any pending cq and eq entries*/ for_all_evnt_queues(sc, eq, i) oce_drain_eq(eq); for_all_rq_queues(sc, rq, i) oce_drain_rq_cq(rq); for_all_wq_queues(sc, wq, i) oce_drain_wq_cq(wq); /* But still we need to get MCC aync events. So enable intrs and also arm first EQ */ oce_hw_intr_enable(sc); oce_arm_eq(sc, sc->eq[0]->eq_id, 0, TRUE, FALSE); DELAY(10); } static void oce_if_activate(POCE_SOFTC sc) { struct oce_eq *eq; struct oce_rq *rq; struct oce_wq *wq; int i, rc = 0; sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; oce_hw_intr_disable(sc); oce_start_rx(sc); for_all_rq_queues(sc, rq, i) { rc = oce_start_rq(rq); if (rc) device_printf(sc->dev, "Unable to start RX\n"); } for_all_wq_queues(sc, wq, i) { rc = oce_start_wq(wq); if (rc) device_printf(sc->dev, "Unable to start TX\n"); } for_all_evnt_queues(sc, eq, i) oce_arm_eq(sc, eq->eq_id, 0, TRUE, FALSE); oce_hw_intr_enable(sc); } static void process_link_state(POCE_SOFTC sc, struct oce_async_cqe_link_state *acqe) { /* Update Link status */ if ((acqe->u0.s.link_status & ~ASYNC_EVENT_LOGICAL) == ASYNC_EVENT_LINK_UP) { sc->link_status = ASYNC_EVENT_LINK_UP; if_link_state_change(sc->ifp, LINK_STATE_UP); } else { sc->link_status = ASYNC_EVENT_LINK_DOWN; if_link_state_change(sc->ifp, LINK_STATE_DOWN); } } /* Handle the Completion Queue for the Mailbox/Async notifications */ uint16_t oce_mq_handler(void *arg) { struct oce_mq *mq = (struct oce_mq *)arg; POCE_SOFTC sc = mq->parent; struct oce_cq *cq = mq->cq; int num_cqes = 0, evt_type = 0, optype = 0; struct oce_mq_cqe *cqe; struct oce_async_cqe_link_state *acqe; struct oce_async_event_grp5_pvid_state *gcqe; struct oce_async_event_qnq *dbgcqe; bus_dmamap_sync(cq->ring->dma.tag, cq->ring->dma.map, BUS_DMASYNC_POSTWRITE); cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_mq_cqe); while (cqe->u0.dw[3]) { DW_SWAP((uint32_t *) cqe, sizeof(oce_mq_cqe)); if (cqe->u0.s.async_event) { evt_type = cqe->u0.s.event_type; optype = cqe->u0.s.async_type; if (evt_type == ASYNC_EVENT_CODE_LINK_STATE) { /* Link status evt */ acqe = (struct oce_async_cqe_link_state *)cqe; process_link_state(sc, acqe); } else if ((evt_type == ASYNC_EVENT_GRP5) && (optype == ASYNC_EVENT_PVID_STATE)) { /* GRP5 PVID */ gcqe = (struct oce_async_event_grp5_pvid_state *)cqe; if (gcqe->enabled) sc->pvid = gcqe->tag & VLAN_VID_MASK; else sc->pvid = 0; } else if(evt_type == ASYNC_EVENT_CODE_DEBUG && optype == ASYNC_EVENT_DEBUG_QNQ) { dbgcqe = (struct oce_async_event_qnq *)cqe; if(dbgcqe->valid) sc->qnqid = dbgcqe->vlan_tag; sc->qnq_debug_event = TRUE; } } cqe->u0.dw[3] = 0; RING_GET(cq->ring, 1); bus_dmamap_sync(cq->ring->dma.tag, cq->ring->dma.map, BUS_DMASYNC_POSTWRITE); cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_mq_cqe); num_cqes++; } if (num_cqes) oce_arm_cq(sc, cq->cq_id, num_cqes, FALSE); return 0; } static void setup_max_queues_want(POCE_SOFTC sc) { /* Check if it is FLEX machine. Is so dont use RSS */ if ((sc->function_mode & FNM_FLEX10_MODE) || (sc->function_mode & FNM_UMC_MODE) || (sc->function_mode & FNM_VNIC_MODE) || (!is_rss_enabled(sc)) || IS_BE2(sc)) { sc->nrqs = 1; sc->nwqs = 1; } else { sc->nrqs = MIN(OCE_NCPUS, sc->nrssqs) + 1; sc->nwqs = MIN(OCE_NCPUS, sc->nrssqs); } if (IS_BE2(sc) && is_rss_enabled(sc)) sc->nrqs = MIN(OCE_NCPUS, sc->nrssqs) + 1; } static void update_queues_got(POCE_SOFTC sc) { if (is_rss_enabled(sc)) { sc->nrqs = sc->intr_count + 1; sc->nwqs = sc->intr_count; } else { sc->nrqs = 1; sc->nwqs = 1; } if (IS_BE2(sc)) sc->nwqs = 1; } static int oce_check_ipv6_ext_hdr(struct mbuf *m) { struct ether_header *eh = mtod(m, struct ether_header *); caddr_t m_datatemp = m->m_data; if (eh->ether_type == htons(ETHERTYPE_IPV6)) { m->m_data += sizeof(struct ether_header); struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); if((ip6->ip6_nxt != IPPROTO_TCP) && \ (ip6->ip6_nxt != IPPROTO_UDP)){ struct ip6_ext *ip6e = NULL; m->m_data += sizeof(struct ip6_hdr); ip6e = (struct ip6_ext *) mtod(m, struct ip6_ext *); if(ip6e->ip6e_len == 0xff) { m->m_data = m_datatemp; return TRUE; } } m->m_data = m_datatemp; } return FALSE; } static int is_be3_a1(POCE_SOFTC sc) { if((sc->flags & OCE_FLAGS_BE3) && ((sc->asic_revision & 0xFF) < 2)) { return TRUE; } return FALSE; } static struct mbuf * oce_insert_vlan_tag(POCE_SOFTC sc, struct mbuf *m, boolean_t *complete) { uint16_t vlan_tag = 0; if(!M_WRITABLE(m)) return NULL; /* Embed vlan tag in the packet if it is not part of it */ if(m->m_flags & M_VLANTAG) { vlan_tag = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag); m->m_flags &= ~M_VLANTAG; } /* if UMC, ignore vlan tag insertion and instead insert pvid */ if(sc->pvid) { if(!vlan_tag) vlan_tag = sc->pvid; *complete = FALSE; } if(vlan_tag) { m = ether_vlanencap(m, vlan_tag); } if(sc->qnqid) { m = ether_vlanencap(m, sc->qnqid); *complete = FALSE; } return m; } static int oce_tx_asic_stall_verify(POCE_SOFTC sc, struct mbuf *m) { if(is_be3_a1(sc) && IS_QNQ_OR_UMC(sc) && \ oce_check_ipv6_ext_hdr(m)) { return TRUE; } return FALSE; } static void oce_get_config(POCE_SOFTC sc) { int rc = 0; uint32_t max_rss = 0; if ((IS_BE(sc) || IS_SH(sc)) && (!sc->be3_native)) max_rss = OCE_LEGACY_MODE_RSS; else max_rss = OCE_MAX_RSS; if (!IS_BE(sc)) { rc = oce_get_profile_config(sc, max_rss); if (rc) { sc->nwqs = OCE_MAX_WQ; sc->nrssqs = max_rss; sc->nrqs = sc->nrssqs + 1; } } else { /* For BE3 don't rely on fw for determining the resources */ sc->nrssqs = max_rss; sc->nrqs = sc->nrssqs + 1; sc->nwqs = OCE_MAX_WQ; sc->max_vlans = MAX_VLANFILTER_SIZE; } } Index: stable/11/sys/dev/patm/if_patm_ioctl.c =================================================================== --- stable/11/sys/dev/patm/if_patm_ioctl.c (revision 332287) +++ stable/11/sys/dev/patm/if_patm_ioctl.c (revision 332288) @@ -1,412 +1,412 @@ /*- * Copyright (c) 2003 * Fraunhofer Institute for Open Communication Systems (FhG Fokus). * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Author: Hartmut Brandt * * Driver for IDT77252 based cards like ProSum's. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_natm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Open the VCC with the given parameters */ static int patm_open_vcc(struct patm_softc *sc, struct atmio_openvcc *arg) { u_int cid; struct patm_vcc *vcc; int error = 0; patm_debug(sc, VCC, "Open VCC: %u.%u flags=%#x", arg->param.vpi, arg->param.vci, arg->param.flags); if (!LEGAL_VPI(sc, arg->param.vpi) || !LEGAL_VCI(sc, arg->param.vci)) return (EINVAL); if (arg->param.vci == 0 && (arg->param.vpi != 0 || !(arg->param.flags & ATMIO_FLAG_NOTX) || arg->param.aal != ATMIO_AAL_RAW)) return (EINVAL); cid = PATM_CID(sc, arg->param.vpi, arg->param.vci); if ((arg->param.flags & ATMIO_FLAG_NOTX) && (arg->param.flags & ATMIO_FLAG_NORX)) return (EINVAL); if ((arg->param.traffic == ATMIO_TRAFFIC_ABR) && (arg->param.flags & (ATMIO_FLAG_NOTX | ATMIO_FLAG_NORX))) return (EINVAL); /* allocate vcc */ vcc = uma_zalloc(sc->vcc_zone, M_NOWAIT | M_ZERO); if (vcc == NULL) return (ENOMEM); mtx_lock(&sc->mtx); if (!(sc->ifp->if_drv_flags & IFF_DRV_RUNNING)) { /* stopped while we have analyzed the arguments */ error = EIO; goto done; } if (sc->vccs[cid] != NULL) { /* ups, already open */ error = EBUSY; goto done; } /* check some parameters */ vcc->cid = cid; vcc->vcc = arg->param; vcc->vflags = 0; vcc->rxhand = arg->rxhand; switch (vcc->vcc.aal) { case ATMIO_AAL_0: case ATMIO_AAL_34: case ATMIO_AAL_5: break; case ATMIO_AAL_RAW: if (arg->param.vci == 0 && !(arg->param.flags & ATMIO_FLAG_NOTX)) { error = EINVAL; goto done; } break; default: error = EINVAL; goto done; } switch (vcc->vcc.traffic) { case ATMIO_TRAFFIC_VBR: case ATMIO_TRAFFIC_UBR: case ATMIO_TRAFFIC_CBR: case ATMIO_TRAFFIC_ABR: break; default: error = EINVAL; goto done; } /* initialize */ vcc->chain = NULL; vcc->last = NULL; vcc->ibytes = vcc->ipackets = 0; vcc->obytes = vcc->opackets = 0; /* ask the TX and RX sides */ patm_debug(sc, VCC, "Open VCC: asking Rx/Tx"); if (!(vcc->vcc.flags & ATMIO_FLAG_NOTX) && (error = patm_tx_vcc_can_open(sc, vcc)) != 0) goto done; if (!(vcc->vcc.flags & ATMIO_FLAG_NORX) && (error = patm_rx_vcc_can_open(sc, vcc)) != 0) goto done; /* ok - go ahead */ sc->vccs[cid] = vcc; patm_load_vc(sc, vcc, 0); /* don't free below */ vcc = NULL; sc->vccs_open++; /* done */ done: mtx_unlock(&sc->mtx); if (vcc != NULL) uma_zfree(sc->vcc_zone, vcc); return (error); } void patm_load_vc(struct patm_softc *sc, struct patm_vcc *vcc, int reload) { patm_debug(sc, VCC, "Open VCC: opening"); if (!(vcc->vcc.flags & ATMIO_FLAG_NOTX)) patm_tx_vcc_open(sc, vcc); if (!(vcc->vcc.flags & ATMIO_FLAG_NORX)) patm_rx_vcc_open(sc, vcc); if (!reload) { /* inform management about non-NG and NG-PVCs */ if (!(vcc->vcc.flags & ATMIO_FLAG_NG) || (vcc->vcc.flags & ATMIO_FLAG_PVC)) ATMEV_SEND_VCC_CHANGED(IFP2IFATM(sc->ifp), vcc->vcc.vpi, vcc->vcc.vci, 1); } patm_debug(sc, VCC, "Open VCC: now open"); } /* * Try to close the given VCC */ static int patm_close_vcc(struct patm_softc *sc, struct atmio_closevcc *arg) { u_int cid; struct patm_vcc *vcc; int error = 0; patm_debug(sc, VCC, "Close VCC: %u.%u", arg->vpi, arg->vci); if (!LEGAL_VPI(sc, arg->vpi) || !LEGAL_VCI(sc, arg->vci)) return (EINVAL); cid = PATM_CID(sc, arg->vpi, arg->vci); mtx_lock(&sc->mtx); if (!(sc->ifp->if_drv_flags & IFF_DRV_RUNNING)) { /* stopped while we have analyzed the arguments */ error = EIO; goto done; } vcc = sc->vccs[cid]; if (vcc == NULL || !(vcc->vflags & PATM_VCC_OPEN)) { error = ENOENT; goto done; } if (vcc->vflags & PATM_VCC_TX_OPEN) patm_tx_vcc_close(sc, vcc); if (vcc->vflags & PATM_VCC_RX_OPEN) patm_rx_vcc_close(sc, vcc); if (vcc->vcc.flags & ATMIO_FLAG_ASYNC) goto done; while (vcc->vflags & (PATM_VCC_TX_CLOSING | PATM_VCC_RX_CLOSING)) { cv_wait(&sc->vcc_cv, &sc->mtx); if (!(sc->ifp->if_drv_flags & IFF_DRV_RUNNING)) { /* ups, has been stopped */ error = EIO; goto done; } } if (!(vcc->vcc.flags & ATMIO_FLAG_NOTX)) patm_tx_vcc_closed(sc, vcc); if (!(vcc->vcc.flags & ATMIO_FLAG_NORX)) patm_rx_vcc_closed(sc, vcc); patm_vcc_closed(sc, vcc); done: mtx_unlock(&sc->mtx); return (error); } /* * VCC has been finally closed. */ void patm_vcc_closed(struct patm_softc *sc, struct patm_vcc *vcc) { /* inform management about non-NG and NG-PVCs */ if (!(vcc->vcc.flags & ATMIO_FLAG_NG) || (vcc->vcc.flags & ATMIO_FLAG_PVC)) ATMEV_SEND_VCC_CHANGED(IFP2IFATM(sc->ifp), vcc->vcc.vpi, vcc->vcc.vci, 0); sc->vccs_open--; sc->vccs[vcc->cid] = NULL; uma_zfree(sc->vcc_zone, vcc); } int patm_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ifreq *ifr = (struct ifreq *)data; struct ifaddr *ifa = (struct ifaddr *)data; struct patm_softc *sc = ifp->if_softc; int error = 0; uint32_t cfg; struct atmio_vcctable *vtab; switch (cmd) { case SIOCSIFADDR: mtx_lock(&sc->mtx); ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) patm_initialize(sc); switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: case AF_INET6: ifa->ifa_rtrequest = atm_rtrequest; break; #endif default: break; } mtx_unlock(&sc->mtx); break; case SIOCSIFFLAGS: mtx_lock(&sc->mtx); if (ifp->if_flags & IFF_UP) { if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { patm_initialize(sc); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { patm_stop(sc); } } mtx_unlock(&sc->mtx); break; case SIOCGIFMEDIA: case SIOCSIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd); /* * We need to toggle unassigned/idle cells ourself because * the 77252 generates null cells for spacing. When switching * null cells of it gets the timing wrong. */ mtx_lock(&sc->mtx); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if (sc->utopia.state & UTP_ST_UNASS) { if (!(sc->flags & PATM_UNASS)) { cfg = patm_nor_read(sc, IDT_NOR_CFG); cfg &= ~IDT_CFG_IDLECLP; patm_nor_write(sc, IDT_NOR_CFG, cfg); sc->flags |= PATM_UNASS; } } else { if (sc->flags & PATM_UNASS) { cfg = patm_nor_read(sc, IDT_NOR_CFG); cfg |= IDT_CFG_IDLECLP; patm_nor_write(sc, IDT_NOR_CFG, cfg); sc->flags &= ~PATM_UNASS; } } } else { if (sc->utopia.state & UTP_ST_UNASS) sc->flags |= PATM_UNASS; else sc->flags &= ~PATM_UNASS; } mtx_unlock(&sc->mtx); break; case SIOCSIFMTU: /* * Set the interface MTU. */ if (ifr->ifr_mtu > ATMMTU) error = EINVAL; else ifp->if_mtu = ifr->ifr_mtu; break; case SIOCATMOPENVCC: /* kernel internal use */ error = patm_open_vcc(sc, (struct atmio_openvcc *)data); break; case SIOCATMCLOSEVCC: /* kernel internal use */ error = patm_close_vcc(sc, (struct atmio_closevcc *)data); break; case SIOCATMGVCCS: /* external use */ /* return vcc table */ vtab = atm_getvccs((struct atmio_vcc **)sc->vccs, sc->mmap->max_conn, sc->vccs_open, &sc->mtx, 1); - error = copyout(vtab, ifr->ifr_data, sizeof(*vtab) + + error = copyout(vtab, ifr_data_get_ptr(ifr), sizeof(*vtab) + vtab->count * sizeof(vtab->vccs[0])); free(vtab, M_DEVBUF); break; case SIOCATMGETVCCS: /* netgraph internal use */ vtab = atm_getvccs((struct atmio_vcc **)sc->vccs, sc->mmap->max_conn, sc->vccs_open, &sc->mtx, 0); if (vtab == NULL) { error = ENOMEM; break; } *(void **)data = vtab; break; default: patm_debug(sc, IOCTL, "unknown cmd=%08lx arg=%p", cmd, data); error = EINVAL; break; } return (error); } Index: stable/11/sys/dev/qlnx/qlnxe/qlnx_os.c =================================================================== --- stable/11/sys/dev/qlnx/qlnxe/qlnx_os.c (revision 332287) +++ stable/11/sys/dev/qlnx/qlnxe/qlnx_os.c (revision 332288) @@ -1,7443 +1,7443 @@ /* * Copyright (c) 2017-2018 Cavium, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * File: qlnx_os.c * Author : David C Somayajulu, Cavium, Inc., San Jose, CA 95131. */ #include __FBSDID("$FreeBSD$"); #include "qlnx_os.h" #include "bcm_osal.h" #include "reg_addr.h" #include "ecore_gtt_reg_addr.h" #include "ecore.h" #include "ecore_chain.h" #include "ecore_status.h" #include "ecore_hw.h" #include "ecore_rt_defs.h" #include "ecore_init_ops.h" #include "ecore_int.h" #include "ecore_cxt.h" #include "ecore_spq.h" #include "ecore_init_fw_funcs.h" #include "ecore_sp_commands.h" #include "ecore_dev_api.h" #include "ecore_l2_api.h" #include "ecore_mcp.h" #include "ecore_hw_defs.h" #include "mcp_public.h" #include "ecore_iro.h" #include "nvm_cfg.h" #include "ecore_dev_api.h" #include "ecore_dbg_fw_funcs.h" #include "qlnx_ioctl.h" #include "qlnx_def.h" #include "qlnx_ver.h" #include /* * static functions */ /* * ioctl related functions */ static void qlnx_add_sysctls(qlnx_host_t *ha); /* * main driver */ static void qlnx_release(qlnx_host_t *ha); static void qlnx_fp_isr(void *arg); static void qlnx_init_ifnet(device_t dev, qlnx_host_t *ha); static void qlnx_init(void *arg); static void qlnx_init_locked(qlnx_host_t *ha); static int qlnx_set_multi(qlnx_host_t *ha, uint32_t add_multi); static int qlnx_set_promisc(qlnx_host_t *ha); static int qlnx_set_allmulti(qlnx_host_t *ha); static int qlnx_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static int qlnx_media_change(struct ifnet *ifp); static void qlnx_media_status(struct ifnet *ifp, struct ifmediareq *ifmr); static void qlnx_stop(qlnx_host_t *ha); static int qlnx_send(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct mbuf **m_headp); static int qlnx_get_ifq_snd_maxlen(qlnx_host_t *ha); static uint32_t qlnx_get_optics(qlnx_host_t *ha, struct qlnx_link_output *if_link); static int qlnx_transmit(struct ifnet *ifp, struct mbuf *mp); static int qlnx_transmit_locked(struct ifnet *ifp, struct qlnx_fastpath *fp, struct mbuf *mp); static void qlnx_qflush(struct ifnet *ifp); static int qlnx_alloc_parent_dma_tag(qlnx_host_t *ha); static void qlnx_free_parent_dma_tag(qlnx_host_t *ha); static int qlnx_alloc_tx_dma_tag(qlnx_host_t *ha); static void qlnx_free_tx_dma_tag(qlnx_host_t *ha); static int qlnx_alloc_rx_dma_tag(qlnx_host_t *ha); static void qlnx_free_rx_dma_tag(qlnx_host_t *ha); static int qlnx_get_mfw_version(qlnx_host_t *ha, uint32_t *mfw_ver); static int qlnx_get_flash_size(qlnx_host_t *ha, uint32_t *flash_size); static int qlnx_nic_setup(struct ecore_dev *cdev, struct ecore_pf_params *func_params); static int qlnx_nic_start(struct ecore_dev *cdev); static int qlnx_slowpath_start(qlnx_host_t *ha); static int qlnx_slowpath_stop(qlnx_host_t *ha); static int qlnx_init_hw(qlnx_host_t *ha); static void qlnx_set_id(struct ecore_dev *cdev, char name[NAME_SIZE], char ver_str[VER_SIZE]); static void qlnx_unload(qlnx_host_t *ha); static int qlnx_load(qlnx_host_t *ha); static void qlnx_hw_set_multi(qlnx_host_t *ha, uint8_t *mta, uint32_t mcnt, uint32_t add_mac); static void qlnx_dump_buf8(qlnx_host_t *ha, const char *msg, void *dbuf, uint32_t len); static int qlnx_alloc_rx_buffer(qlnx_host_t *ha, struct qlnx_rx_queue *rxq); static void qlnx_reuse_rx_data(struct qlnx_rx_queue *rxq); static void qlnx_update_rx_prod(struct ecore_hwfn *p_hwfn, struct qlnx_rx_queue *rxq); static int qlnx_set_rx_accept_filter(qlnx_host_t *ha, uint8_t filter); static int qlnx_grc_dumpsize(qlnx_host_t *ha, uint32_t *num_dwords, int hwfn_index); static int qlnx_idle_chk_size(qlnx_host_t *ha, uint32_t *num_dwords, int hwfn_index); static void qlnx_timer(void *arg); static int qlnx_alloc_tx_br(qlnx_host_t *ha, struct qlnx_fastpath *fp); static void qlnx_free_tx_br(qlnx_host_t *ha, struct qlnx_fastpath *fp); static void qlnx_trigger_dump(qlnx_host_t *ha); static uint16_t qlnx_num_tx_compl(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct qlnx_tx_queue *txq); static void qlnx_tx_int(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct qlnx_tx_queue *txq); static int qlnx_rx_int(qlnx_host_t *ha, struct qlnx_fastpath *fp, int budget, int lro_enable); static void qlnx_fp_taskqueue(void *context, int pending); static void qlnx_sample_storm_stats(qlnx_host_t *ha); static int qlnx_alloc_tpa_mbuf(qlnx_host_t *ha, uint16_t rx_buf_size, struct qlnx_agg_info *tpa); static void qlnx_free_tpa_mbuf(qlnx_host_t *ha, struct qlnx_agg_info *tpa); #if __FreeBSD_version >= 1100000 static uint64_t qlnx_get_counter(if_t ifp, ift_counter cnt); #endif /* * Hooks to the Operating Systems */ static int qlnx_pci_probe (device_t); static int qlnx_pci_attach (device_t); static int qlnx_pci_detach (device_t); static device_method_t qlnx_pci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, qlnx_pci_probe), DEVMETHOD(device_attach, qlnx_pci_attach), DEVMETHOD(device_detach, qlnx_pci_detach), { 0, 0 } }; static driver_t qlnx_pci_driver = { "ql", qlnx_pci_methods, sizeof (qlnx_host_t), }; static devclass_t qlnx_devclass; MODULE_VERSION(if_qlnxe,1); DRIVER_MODULE(if_qlnxe, pci, qlnx_pci_driver, qlnx_devclass, 0, 0); MODULE_DEPEND(if_qlnxe, pci, 1, 1, 1); MODULE_DEPEND(if_qlnxe, ether, 1, 1, 1); MALLOC_DEFINE(M_QLNXBUF, "qlnxbuf", "Buffers for qlnx driver"); char qlnx_dev_str[64]; char qlnx_ver_str[VER_SIZE]; char qlnx_name_str[NAME_SIZE]; /* * Some PCI Configuration Space Related Defines */ #ifndef PCI_VENDOR_QLOGIC #define PCI_VENDOR_QLOGIC 0x1077 #endif /* 40G Adapter QLE45xxx*/ #ifndef QLOGIC_PCI_DEVICE_ID_1634 #define QLOGIC_PCI_DEVICE_ID_1634 0x1634 #endif /* 100G Adapter QLE45xxx*/ #ifndef QLOGIC_PCI_DEVICE_ID_1644 #define QLOGIC_PCI_DEVICE_ID_1644 0x1644 #endif /* 25G Adapter QLE45xxx*/ #ifndef QLOGIC_PCI_DEVICE_ID_1656 #define QLOGIC_PCI_DEVICE_ID_1656 0x1656 #endif /* 50G Adapter QLE45xxx*/ #ifndef QLOGIC_PCI_DEVICE_ID_1654 #define QLOGIC_PCI_DEVICE_ID_1654 0x1654 #endif /* 10G/25G/40G Adapter QLE41xxx*/ #ifndef QLOGIC_PCI_DEVICE_ID_8070 #define QLOGIC_PCI_DEVICE_ID_8070 0x8070 #endif SYSCTL_NODE(_hw, OID_AUTO, qlnxe, CTLFLAG_RD, 0, "qlnxe driver parameters"); /* Number of Queues: 0 (Auto) or 1 to 32 (fixed queue number) */ static int qlnxe_queue_count = QLNX_DEFAULT_RSS; SYSCTL_INT(_hw_qlnxe, OID_AUTO, queue_count, CTLFLAG_RDTUN, &qlnxe_queue_count, 0, "Multi-Queue queue count"); static int qlnx_valid_device(device_t dev) { uint16_t device_id; device_id = pci_get_device(dev); if ((device_id == QLOGIC_PCI_DEVICE_ID_1634) || (device_id == QLOGIC_PCI_DEVICE_ID_1644) || (device_id == QLOGIC_PCI_DEVICE_ID_1656) || (device_id == QLOGIC_PCI_DEVICE_ID_1654) || (device_id == QLOGIC_PCI_DEVICE_ID_8070)) return 0; return -1; } /* * Name: qlnx_pci_probe * Function: Validate the PCI device to be a QLA80XX device */ static int qlnx_pci_probe(device_t dev) { snprintf(qlnx_ver_str, sizeof(qlnx_ver_str), "v%d.%d.%d", QLNX_VERSION_MAJOR, QLNX_VERSION_MINOR, QLNX_VERSION_BUILD); snprintf(qlnx_name_str, sizeof(qlnx_name_str), "qlnx"); if (pci_get_vendor(dev) != PCI_VENDOR_QLOGIC) { return (ENXIO); } switch (pci_get_device(dev)) { case QLOGIC_PCI_DEVICE_ID_1644: snprintf(qlnx_dev_str, sizeof(qlnx_dev_str), "%s v%d.%d.%d", "Qlogic 100GbE PCI CNA Adapter-Ethernet Function", QLNX_VERSION_MAJOR, QLNX_VERSION_MINOR, QLNX_VERSION_BUILD); device_set_desc_copy(dev, qlnx_dev_str); break; case QLOGIC_PCI_DEVICE_ID_1634: snprintf(qlnx_dev_str, sizeof(qlnx_dev_str), "%s v%d.%d.%d", "Qlogic 40GbE PCI CNA Adapter-Ethernet Function", QLNX_VERSION_MAJOR, QLNX_VERSION_MINOR, QLNX_VERSION_BUILD); device_set_desc_copy(dev, qlnx_dev_str); break; case QLOGIC_PCI_DEVICE_ID_1656: snprintf(qlnx_dev_str, sizeof(qlnx_dev_str), "%s v%d.%d.%d", "Qlogic 25GbE PCI CNA Adapter-Ethernet Function", QLNX_VERSION_MAJOR, QLNX_VERSION_MINOR, QLNX_VERSION_BUILD); device_set_desc_copy(dev, qlnx_dev_str); break; case QLOGIC_PCI_DEVICE_ID_1654: snprintf(qlnx_dev_str, sizeof(qlnx_dev_str), "%s v%d.%d.%d", "Qlogic 50GbE PCI CNA Adapter-Ethernet Function", QLNX_VERSION_MAJOR, QLNX_VERSION_MINOR, QLNX_VERSION_BUILD); device_set_desc_copy(dev, qlnx_dev_str); break; case QLOGIC_PCI_DEVICE_ID_8070: snprintf(qlnx_dev_str, sizeof(qlnx_dev_str), "%s v%d.%d.%d", "Qlogic 10GbE/25GbE/40GbE PCI CNA (AH) " "Adapter-Ethernet Function", QLNX_VERSION_MAJOR, QLNX_VERSION_MINOR, QLNX_VERSION_BUILD); device_set_desc_copy(dev, qlnx_dev_str); break; default: return (ENXIO); } return (BUS_PROBE_DEFAULT); } static uint16_t qlnx_num_tx_compl(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct qlnx_tx_queue *txq) { u16 hw_bd_cons; u16 ecore_cons_idx; uint16_t diff; hw_bd_cons = le16toh(*txq->hw_cons_ptr); ecore_cons_idx = ecore_chain_get_cons_idx(&txq->tx_pbl); if (hw_bd_cons < ecore_cons_idx) { diff = (1 << 16) - (ecore_cons_idx - hw_bd_cons); } else { diff = hw_bd_cons - ecore_cons_idx; } return diff; } static void qlnx_sp_intr(void *arg) { struct ecore_hwfn *p_hwfn; qlnx_host_t *ha; int i; p_hwfn = arg; if (p_hwfn == NULL) { printf("%s: spurious slowpath intr\n", __func__); return; } ha = (qlnx_host_t *)p_hwfn->p_dev; QL_DPRINT2(ha, "enter\n"); for (i = 0; i < ha->cdev.num_hwfns; i++) { if (&ha->cdev.hwfns[i] == p_hwfn) { taskqueue_enqueue(ha->sp_taskqueue[i], &ha->sp_task[i]); break; } } QL_DPRINT2(ha, "exit\n"); return; } static void qlnx_sp_taskqueue(void *context, int pending) { struct ecore_hwfn *p_hwfn; p_hwfn = context; if (p_hwfn != NULL) { qlnx_sp_isr(p_hwfn); } return; } static int qlnx_create_sp_taskqueues(qlnx_host_t *ha) { int i; uint8_t tq_name[32]; for (i = 0; i < ha->cdev.num_hwfns; i++) { struct ecore_hwfn *p_hwfn = &ha->cdev.hwfns[i]; bzero(tq_name, sizeof (tq_name)); snprintf(tq_name, sizeof (tq_name), "ql_sp_tq_%d", i); TASK_INIT(&ha->sp_task[i], 0, qlnx_sp_taskqueue, p_hwfn); ha->sp_taskqueue[i] = taskqueue_create_fast(tq_name, M_NOWAIT, taskqueue_thread_enqueue, &ha->sp_taskqueue[i]); if (ha->sp_taskqueue[i] == NULL) return (-1); taskqueue_start_threads(&ha->sp_taskqueue[i], 1, PI_NET, "%s", tq_name); QL_DPRINT1(ha, "%p\n", ha->sp_taskqueue[i]); } return (0); } static void qlnx_destroy_sp_taskqueues(qlnx_host_t *ha) { int i; for (i = 0; i < ha->cdev.num_hwfns; i++) { if (ha->sp_taskqueue[i] != NULL) { taskqueue_drain(ha->sp_taskqueue[i], &ha->sp_task[i]); taskqueue_free(ha->sp_taskqueue[i]); } } return; } static void qlnx_fp_taskqueue(void *context, int pending) { struct qlnx_fastpath *fp; qlnx_host_t *ha; struct ifnet *ifp; #ifdef QLNX_RCV_IN_TASKQ int lro_enable; int rx_int = 0, total_rx_count = 0; struct thread *cthread; #endif /* #ifdef QLNX_RCV_IN_TASKQ */ fp = context; if (fp == NULL) return; ha = (qlnx_host_t *)fp->edev; ifp = ha->ifp; #ifdef QLNX_RCV_IN_TASKQ cthread = curthread; thread_lock(cthread); if (!sched_is_bound(cthread)) sched_bind(cthread, fp->rss_id); thread_unlock(cthread); lro_enable = ifp->if_capenable & IFCAP_LRO; rx_int = qlnx_rx_int(ha, fp, ha->rx_pkt_threshold, lro_enable); if (rx_int) { fp->rx_pkts += rx_int; total_rx_count += rx_int; } #ifdef QLNX_SOFT_LRO { struct lro_ctrl *lro; lro = &fp->rxq->lro; if (lro_enable && total_rx_count) { #if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO) if (ha->dbg_trace_lro_cnt) { if (lro->lro_mbuf_count & ~1023) fp->lro_cnt_1024++; else if (lro->lro_mbuf_count & ~511) fp->lro_cnt_512++; else if (lro->lro_mbuf_count & ~255) fp->lro_cnt_256++; else if (lro->lro_mbuf_count & ~127) fp->lro_cnt_128++; else if (lro->lro_mbuf_count & ~63) fp->lro_cnt_64++; } tcp_lro_flush_all(lro); #else struct lro_entry *queued; while ((!SLIST_EMPTY(&lro->lro_active))) { queued = SLIST_FIRST(&lro->lro_active); SLIST_REMOVE_HEAD(&lro->lro_active, next); tcp_lro_flush(lro, queued); } #endif /* #if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO) */ } } #endif /* #ifdef QLNX_SOFT_LRO */ ecore_sb_update_sb_idx(fp->sb_info); rmb(); #endif /* #ifdef QLNX_RCV_IN_TASKQ */ if(ifp->if_drv_flags & IFF_DRV_RUNNING) { if (!drbr_empty(ifp, fp->tx_br)) { if(mtx_trylock(&fp->tx_mtx)) { #ifdef QLNX_TRACE_PERF_DATA tx_pkts = fp->tx_pkts_transmitted; tx_compl = fp->tx_pkts_completed; #endif qlnx_transmit_locked(ifp, fp, NULL); #ifdef QLNX_TRACE_PERF_DATA fp->tx_pkts_trans_fp += (fp->tx_pkts_transmitted - tx_pkts); fp->tx_pkts_compl_fp += (fp->tx_pkts_completed - tx_compl); #endif mtx_unlock(&fp->tx_mtx); } } } #ifdef QLNX_RCV_IN_TASKQ if (rx_int) { if (fp->fp_taskqueue != NULL) taskqueue_enqueue(fp->fp_taskqueue, &fp->fp_task); } else { if (fp->tx_ring_full) { qlnx_mdelay(__func__, 100); } ecore_sb_ack(fp->sb_info, IGU_INT_ENABLE, 1); } #endif /* #ifdef QLNX_RCV_IN_TASKQ */ QL_DPRINT2(ha, "exit \n"); return; } static int qlnx_create_fp_taskqueues(qlnx_host_t *ha) { int i; uint8_t tq_name[32]; struct qlnx_fastpath *fp; for (i = 0; i < ha->num_rss; i++) { fp = &ha->fp_array[i]; bzero(tq_name, sizeof (tq_name)); snprintf(tq_name, sizeof (tq_name), "ql_fp_tq_%d", i); TASK_INIT(&fp->fp_task, 0, qlnx_fp_taskqueue, fp); fp->fp_taskqueue = taskqueue_create_fast(tq_name, M_NOWAIT, taskqueue_thread_enqueue, &fp->fp_taskqueue); if (fp->fp_taskqueue == NULL) return (-1); taskqueue_start_threads(&fp->fp_taskqueue, 1, PI_NET, "%s", tq_name); QL_DPRINT1(ha, "%p\n",fp->fp_taskqueue); } return (0); } static void qlnx_destroy_fp_taskqueues(qlnx_host_t *ha) { int i; struct qlnx_fastpath *fp; for (i = 0; i < ha->num_rss; i++) { fp = &ha->fp_array[i]; if (fp->fp_taskqueue != NULL) { taskqueue_drain(fp->fp_taskqueue, &fp->fp_task); taskqueue_free(fp->fp_taskqueue); fp->fp_taskqueue = NULL; } } return; } static void qlnx_drain_fp_taskqueues(qlnx_host_t *ha) { int i; struct qlnx_fastpath *fp; for (i = 0; i < ha->num_rss; i++) { fp = &ha->fp_array[i]; if (fp->fp_taskqueue != NULL) { QLNX_UNLOCK(ha); taskqueue_drain(fp->fp_taskqueue, &fp->fp_task); QLNX_LOCK(ha); } } return; } static void qlnx_get_params(qlnx_host_t *ha) { if ((qlnxe_queue_count < 0) || (qlnxe_queue_count > QLNX_MAX_RSS)) { device_printf(ha->pci_dev, "invalid queue_count value (%d)\n", qlnxe_queue_count); qlnxe_queue_count = 0; } return; } /* * Name: qlnx_pci_attach * Function: attaches the device to the operating system */ static int qlnx_pci_attach(device_t dev) { qlnx_host_t *ha = NULL; uint32_t rsrc_len_reg = 0; uint32_t rsrc_len_dbells = 0; uint32_t rsrc_len_msix = 0; int i; uint32_t mfw_ver; if ((ha = device_get_softc(dev)) == NULL) { device_printf(dev, "cannot get softc\n"); return (ENOMEM); } memset(ha, 0, sizeof (qlnx_host_t)); if (qlnx_valid_device(dev) != 0) { device_printf(dev, "device is not valid device\n"); return (ENXIO); } ha->pci_func = pci_get_function(dev); ha->pci_dev = dev; mtx_init(&ha->hw_lock, "qlnx_hw_lock", MTX_NETWORK_LOCK, MTX_DEF); ha->flags.lock_init = 1; pci_enable_busmaster(dev); /* * map the PCI BARs */ ha->reg_rid = PCIR_BAR(0); ha->pci_reg = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ha->reg_rid, RF_ACTIVE); if (ha->pci_reg == NULL) { device_printf(dev, "unable to map BAR0\n"); goto qlnx_pci_attach_err; } rsrc_len_reg = (uint32_t) bus_get_resource_count(dev, SYS_RES_MEMORY, ha->reg_rid); ha->dbells_rid = PCIR_BAR(2); ha->pci_dbells = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ha->dbells_rid, RF_ACTIVE); if (ha->pci_dbells == NULL) { device_printf(dev, "unable to map BAR1\n"); goto qlnx_pci_attach_err; } rsrc_len_dbells = (uint32_t) bus_get_resource_count(dev, SYS_RES_MEMORY, ha->dbells_rid); ha->dbells_phys_addr = (uint64_t) bus_get_resource_start(dev, SYS_RES_MEMORY, ha->dbells_rid);; ha->dbells_size = rsrc_len_dbells; ha->msix_rid = PCIR_BAR(4); ha->msix_bar = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &ha->msix_rid, RF_ACTIVE); if (ha->msix_bar == NULL) { device_printf(dev, "unable to map BAR2\n"); goto qlnx_pci_attach_err; } rsrc_len_msix = (uint32_t) bus_get_resource_count(dev, SYS_RES_MEMORY, ha->msix_rid); /* * allocate dma tags */ if (qlnx_alloc_parent_dma_tag(ha)) goto qlnx_pci_attach_err; if (qlnx_alloc_tx_dma_tag(ha)) goto qlnx_pci_attach_err; if (qlnx_alloc_rx_dma_tag(ha)) goto qlnx_pci_attach_err; if (qlnx_init_hw(ha) != 0) goto qlnx_pci_attach_err; qlnx_get_params(ha); if((pci_get_device(dev) == QLOGIC_PCI_DEVICE_ID_1644) && (qlnxe_queue_count == QLNX_DEFAULT_RSS)) { qlnxe_queue_count = QLNX_MAX_RSS; } /* * Allocate MSI-x vectors */ if(qlnxe_queue_count == 0) ha->num_rss = QLNX_DEFAULT_RSS; else ha->num_rss = qlnxe_queue_count; ha->num_tc = QLNX_MAX_TC; ha->msix_count = pci_msix_count(dev); if (ha->msix_count > (mp_ncpus + ha->cdev.num_hwfns)) ha->msix_count = mp_ncpus + ha->cdev.num_hwfns; if (!ha->msix_count || (ha->msix_count < (ha->cdev.num_hwfns + 1 ))) { device_printf(dev, "%s: msix_count[%d] not enough\n", __func__, ha->msix_count); goto qlnx_pci_attach_err; } if (ha->msix_count > (ha->num_rss + ha->cdev.num_hwfns )) ha->msix_count = ha->num_rss + ha->cdev.num_hwfns; else ha->num_rss = ha->msix_count - ha->cdev.num_hwfns; QL_DPRINT1(ha, "\n\t\t\tpci_reg [%p, 0x%08x 0x%08x]" "\n\t\t\tdbells [%p, 0x%08x 0x%08x]" "\n\t\t\tmsix [%p, 0x%08x 0x%08x 0x%x 0x%x]" "\n\t\t\t[ncpus = %d][num_rss = 0x%x] [num_tc = 0x%x]\n", ha->pci_reg, rsrc_len_reg, ha->reg_rid, ha->pci_dbells, rsrc_len_dbells, ha->dbells_rid, ha->msix_bar, rsrc_len_msix, ha->msix_rid, pci_msix_count(dev), ha->msix_count, mp_ncpus, ha->num_rss, ha->num_tc); if (pci_alloc_msix(dev, &ha->msix_count)) { device_printf(dev, "%s: pci_alloc_msix[%d] failed\n", __func__, ha->msix_count); ha->msix_count = 0; goto qlnx_pci_attach_err; } /* * Initialize slow path interrupt and task queue */ if (qlnx_create_sp_taskqueues(ha) != 0) goto qlnx_pci_attach_err; for (i = 0; i < ha->cdev.num_hwfns; i++) { struct ecore_hwfn *p_hwfn = &ha->cdev.hwfns[i]; ha->sp_irq_rid[i] = i + 1; ha->sp_irq[i] = bus_alloc_resource_any(dev, SYS_RES_IRQ, &ha->sp_irq_rid[i], (RF_ACTIVE | RF_SHAREABLE)); if (ha->sp_irq[i] == NULL) { device_printf(dev, "could not allocate mbx interrupt\n"); goto qlnx_pci_attach_err; } if (bus_setup_intr(dev, ha->sp_irq[i], (INTR_TYPE_NET | INTR_MPSAFE), NULL, qlnx_sp_intr, p_hwfn, &ha->sp_handle[i])) { device_printf(dev, "could not setup slow path interrupt\n"); goto qlnx_pci_attach_err; } QL_DPRINT1(ha, "p_hwfn [%p] sp_irq_rid %d" " sp_irq %p sp_handle %p\n", p_hwfn, ha->sp_irq_rid[i], ha->sp_irq[i], ha->sp_handle[i]); } /* * initialize fast path interrupt */ if (qlnx_create_fp_taskqueues(ha) != 0) goto qlnx_pci_attach_err; for (i = 0; i < ha->num_rss; i++) { ha->irq_vec[i].rss_idx = i; ha->irq_vec[i].ha = ha; ha->irq_vec[i].irq_rid = (1 + ha->cdev.num_hwfns) + i; ha->irq_vec[i].irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &ha->irq_vec[i].irq_rid, (RF_ACTIVE | RF_SHAREABLE)); if (ha->irq_vec[i].irq == NULL) { device_printf(dev, "could not allocate interrupt[%d]\n", i); goto qlnx_pci_attach_err; } if (qlnx_alloc_tx_br(ha, &ha->fp_array[i])) { device_printf(dev, "could not allocate tx_br[%d]\n", i); goto qlnx_pci_attach_err; } } callout_init(&ha->qlnx_callout, 1); ha->flags.callout_init = 1; for (i = 0; i < ha->cdev.num_hwfns; i++) { if (qlnx_grc_dumpsize(ha, &ha->grcdump_size[i], i) != 0) goto qlnx_pci_attach_err; if (ha->grcdump_size[i] == 0) goto qlnx_pci_attach_err; ha->grcdump_size[i] = ha->grcdump_size[i] << 2; QL_DPRINT1(ha, "grcdump_size[%d] = 0x%08x\n", i, ha->grcdump_size[i]); ha->grcdump[i] = qlnx_zalloc(ha->grcdump_size[i]); if (ha->grcdump[i] == NULL) { device_printf(dev, "grcdump alloc[%d] failed\n", i); goto qlnx_pci_attach_err; } if (qlnx_idle_chk_size(ha, &ha->idle_chk_size[i], i) != 0) goto qlnx_pci_attach_err; if (ha->idle_chk_size[i] == 0) goto qlnx_pci_attach_err; ha->idle_chk_size[i] = ha->idle_chk_size[i] << 2; QL_DPRINT1(ha, "idle_chk_size[%d] = 0x%08x\n", i, ha->idle_chk_size[i]); ha->idle_chk[i] = qlnx_zalloc(ha->idle_chk_size[i]); if (ha->idle_chk[i] == NULL) { device_printf(dev, "idle_chk alloc failed\n"); goto qlnx_pci_attach_err; } } if (qlnx_slowpath_start(ha) != 0) { qlnx_mdelay(__func__, 1000); qlnx_trigger_dump(ha); goto qlnx_pci_attach_err0; } else ha->flags.slowpath_start = 1; if (qlnx_get_flash_size(ha, &ha->flash_size) != 0) { qlnx_mdelay(__func__, 1000); qlnx_trigger_dump(ha); goto qlnx_pci_attach_err0; } if (qlnx_get_mfw_version(ha, &mfw_ver) != 0) { qlnx_mdelay(__func__, 1000); qlnx_trigger_dump(ha); goto qlnx_pci_attach_err0; } snprintf(ha->mfw_ver, sizeof(ha->mfw_ver), "%d.%d.%d.%d", ((mfw_ver >> 24) & 0xFF), ((mfw_ver >> 16) & 0xFF), ((mfw_ver >> 8) & 0xFF), (mfw_ver & 0xFF)); snprintf(ha->stormfw_ver, sizeof(ha->stormfw_ver), "%d.%d.%d.%d", FW_MAJOR_VERSION, FW_MINOR_VERSION, FW_REVISION_VERSION, FW_ENGINEERING_VERSION); QL_DPRINT1(ha, "STORM_FW version %s MFW version %s\n", ha->stormfw_ver, ha->mfw_ver); qlnx_init_ifnet(dev, ha); /* * add sysctls */ qlnx_add_sysctls(ha); qlnx_pci_attach_err0: /* * create ioctl device interface */ if (qlnx_make_cdev(ha)) { device_printf(dev, "%s: ql_make_cdev failed\n", __func__); goto qlnx_pci_attach_err; } QL_DPRINT2(ha, "success\n"); return (0); qlnx_pci_attach_err: qlnx_release(ha); return (ENXIO); } /* * Name: qlnx_pci_detach * Function: Unhooks the device from the operating system */ static int qlnx_pci_detach(device_t dev) { qlnx_host_t *ha = NULL; if ((ha = device_get_softc(dev)) == NULL) { device_printf(dev, "cannot get softc\n"); return (ENOMEM); } QLNX_LOCK(ha); qlnx_stop(ha); QLNX_UNLOCK(ha); qlnx_release(ha); return (0); } static int qlnx_init_hw(qlnx_host_t *ha) { int rval = 0; struct ecore_hw_prepare_params params; ecore_init_struct(&ha->cdev); /* ha->dp_module = ECORE_MSG_PROBE | ECORE_MSG_INTR | ECORE_MSG_SP | ECORE_MSG_LINK | ECORE_MSG_SPQ | ECORE_MSG_RDMA; ha->dp_level = ECORE_LEVEL_VERBOSE;*/ ha->dp_level = ECORE_LEVEL_NOTICE; ecore_init_dp(&ha->cdev, ha->dp_module, ha->dp_level, ha->pci_dev); ha->cdev.regview = ha->pci_reg; ha->cdev.doorbells = ha->pci_dbells; ha->cdev.db_phys_addr = ha->dbells_phys_addr; ha->cdev.db_size = ha->dbells_size; bzero(¶ms, sizeof (struct ecore_hw_prepare_params)); ha->personality = ECORE_PCI_DEFAULT; params.personality = ha->personality; params.drv_resc_alloc = false; params.chk_reg_fifo = false; params.initiate_pf_flr = true; params.epoch = 0; ecore_hw_prepare(&ha->cdev, ¶ms); qlnx_set_id(&ha->cdev, qlnx_name_str, qlnx_ver_str); return (rval); } static void qlnx_release(qlnx_host_t *ha) { device_t dev; int i; dev = ha->pci_dev; QL_DPRINT2(ha, "enter\n"); for (i = 0; i < QLNX_MAX_HW_FUNCS; i++) { if (ha->idle_chk[i] != NULL) { free(ha->idle_chk[i], M_QLNXBUF); ha->idle_chk[i] = NULL; } if (ha->grcdump[i] != NULL) { free(ha->grcdump[i], M_QLNXBUF); ha->grcdump[i] = NULL; } } if (ha->flags.callout_init) callout_drain(&ha->qlnx_callout); if (ha->flags.slowpath_start) { qlnx_slowpath_stop(ha); } ecore_hw_remove(&ha->cdev); qlnx_del_cdev(ha); if (ha->ifp != NULL) ether_ifdetach(ha->ifp); qlnx_free_tx_dma_tag(ha); qlnx_free_rx_dma_tag(ha); qlnx_free_parent_dma_tag(ha); for (i = 0; i < ha->num_rss; i++) { struct qlnx_fastpath *fp = &ha->fp_array[i]; if (ha->irq_vec[i].handle) { (void)bus_teardown_intr(dev, ha->irq_vec[i].irq, ha->irq_vec[i].handle); } if (ha->irq_vec[i].irq) { (void)bus_release_resource(dev, SYS_RES_IRQ, ha->irq_vec[i].irq_rid, ha->irq_vec[i].irq); } qlnx_free_tx_br(ha, fp); } qlnx_destroy_fp_taskqueues(ha); for (i = 0; i < ha->cdev.num_hwfns; i++) { if (ha->sp_handle[i]) (void)bus_teardown_intr(dev, ha->sp_irq[i], ha->sp_handle[i]); if (ha->sp_irq[i]) (void) bus_release_resource(dev, SYS_RES_IRQ, ha->sp_irq_rid[i], ha->sp_irq[i]); } qlnx_destroy_sp_taskqueues(ha); if (ha->msix_count) pci_release_msi(dev); if (ha->flags.lock_init) { mtx_destroy(&ha->hw_lock); } if (ha->pci_reg) (void) bus_release_resource(dev, SYS_RES_MEMORY, ha->reg_rid, ha->pci_reg); if (ha->pci_dbells) (void) bus_release_resource(dev, SYS_RES_MEMORY, ha->dbells_rid, ha->pci_dbells); if (ha->msix_bar) (void) bus_release_resource(dev, SYS_RES_MEMORY, ha->msix_rid, ha->msix_bar); QL_DPRINT2(ha, "exit\n"); return; } static void qlnx_trigger_dump(qlnx_host_t *ha) { int i; if (ha->ifp != NULL) ha->ifp->if_drv_flags &= ~(IFF_DRV_OACTIVE | IFF_DRV_RUNNING); QL_DPRINT2(ha, "enter\n"); for (i = 0; i < ha->cdev.num_hwfns; i++) { qlnx_grc_dump(ha, &ha->grcdump_dwords[i], i); qlnx_idle_chk(ha, &ha->idle_chk_dwords[i], i); } QL_DPRINT2(ha, "exit\n"); return; } static int qlnx_trigger_dump_sysctl(SYSCTL_HANDLER_ARGS) { int err, ret = 0; qlnx_host_t *ha; err = sysctl_handle_int(oidp, &ret, 0, req); if (err || !req->newptr) return (err); if (ret == 1) { ha = (qlnx_host_t *)arg1; qlnx_trigger_dump(ha); } return (err); } static int qlnx_set_tx_coalesce(SYSCTL_HANDLER_ARGS) { int err, i, ret = 0, usecs = 0; qlnx_host_t *ha; struct ecore_hwfn *p_hwfn; struct qlnx_fastpath *fp; err = sysctl_handle_int(oidp, &usecs, 0, req); if (err || !req->newptr || !usecs || (usecs > 255)) return (err); ha = (qlnx_host_t *)arg1; for (i = 0; i < ha->num_rss; i++) { p_hwfn = &ha->cdev.hwfns[(i % ha->cdev.num_hwfns)]; fp = &ha->fp_array[i]; if (fp->txq[0]->handle != NULL) { ret = ecore_set_queue_coalesce(p_hwfn, 0, (uint16_t)usecs, fp->txq[0]->handle); } } if (!ret) ha->tx_coalesce_usecs = (uint8_t)usecs; return (err); } static int qlnx_set_rx_coalesce(SYSCTL_HANDLER_ARGS) { int err, i, ret = 0, usecs = 0; qlnx_host_t *ha; struct ecore_hwfn *p_hwfn; struct qlnx_fastpath *fp; err = sysctl_handle_int(oidp, &usecs, 0, req); if (err || !req->newptr || !usecs || (usecs > 255)) return (err); ha = (qlnx_host_t *)arg1; for (i = 0; i < ha->num_rss; i++) { p_hwfn = &ha->cdev.hwfns[(i % ha->cdev.num_hwfns)]; fp = &ha->fp_array[i]; if (fp->rxq->handle != NULL) { ret = ecore_set_queue_coalesce(p_hwfn, (uint16_t)usecs, 0, fp->rxq->handle); } } if (!ret) ha->rx_coalesce_usecs = (uint8_t)usecs; return (err); } static void qlnx_add_sp_stats_sysctls(qlnx_host_t *ha) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; struct sysctl_oid *ctx_oid; ctx = device_get_sysctl_ctx(ha->pci_dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(ha->pci_dev)); ctx_oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "spstat", CTLFLAG_RD, NULL, "spstat"); children = SYSCTL_CHILDREN(ctx_oid); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "sp_interrupts", CTLFLAG_RD, &ha->sp_interrupts, "No. of slowpath interrupts"); return; } static void qlnx_add_fp_stats_sysctls(qlnx_host_t *ha) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; struct sysctl_oid_list *node_children; struct sysctl_oid *ctx_oid; int i, j; uint8_t name_str[16]; ctx = device_get_sysctl_ctx(ha->pci_dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(ha->pci_dev)); ctx_oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "fpstat", CTLFLAG_RD, NULL, "fpstat"); children = SYSCTL_CHILDREN(ctx_oid); for (i = 0; i < ha->num_rss; i++) { bzero(name_str, (sizeof(uint8_t) * sizeof(name_str))); snprintf(name_str, sizeof(name_str), "%d", i); ctx_oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, name_str, CTLFLAG_RD, NULL, name_str); node_children = SYSCTL_CHILDREN(ctx_oid); /* Tx Related */ SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_pkts_processed", CTLFLAG_RD, &ha->fp_array[i].tx_pkts_processed, "No. of packets processed for transmission"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_pkts_freed", CTLFLAG_RD, &ha->fp_array[i].tx_pkts_freed, "No. of freed packets"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_pkts_transmitted", CTLFLAG_RD, &ha->fp_array[i].tx_pkts_transmitted, "No. of transmitted packets"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_pkts_completed", CTLFLAG_RD, &ha->fp_array[i].tx_pkts_completed, "No. of transmit completions"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_non_tso_pkts", CTLFLAG_RD, &ha->fp_array[i].tx_non_tso_pkts, "No. of non LSO transmited packets"); #ifdef QLNX_TRACE_PERF_DATA SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_pkts_trans_ctx", CTLFLAG_RD, &ha->fp_array[i].tx_pkts_trans_ctx, "No. of transmitted packets in transmit context"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_pkts_compl_ctx", CTLFLAG_RD, &ha->fp_array[i].tx_pkts_compl_ctx, "No. of transmit completions in transmit context"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_pkts_trans_fp", CTLFLAG_RD, &ha->fp_array[i].tx_pkts_trans_fp, "No. of transmitted packets in taskqueue"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_pkts_compl_fp", CTLFLAG_RD, &ha->fp_array[i].tx_pkts_compl_fp, "No. of transmit completions in taskqueue"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_pkts_compl_intr", CTLFLAG_RD, &ha->fp_array[i].tx_pkts_compl_intr, "No. of transmit completions in interrupt ctx"); #endif SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_tso_pkts", CTLFLAG_RD, &ha->fp_array[i].tx_tso_pkts, "No. of LSO transmited packets"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_lso_wnd_min_len", CTLFLAG_RD, &ha->fp_array[i].tx_lso_wnd_min_len, "tx_lso_wnd_min_len"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_defrag", CTLFLAG_RD, &ha->fp_array[i].tx_defrag, "tx_defrag"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tx_nsegs_gt_elem_left", CTLFLAG_RD, &ha->fp_array[i].tx_nsegs_gt_elem_left, "tx_nsegs_gt_elem_left"); SYSCTL_ADD_UINT(ctx, node_children, OID_AUTO, "tx_tso_max_nsegs", CTLFLAG_RD, &ha->fp_array[i].tx_tso_max_nsegs, ha->fp_array[i].tx_tso_max_nsegs, "tx_tso_max_nsegs"); SYSCTL_ADD_UINT(ctx, node_children, OID_AUTO, "tx_tso_min_nsegs", CTLFLAG_RD, &ha->fp_array[i].tx_tso_min_nsegs, ha->fp_array[i].tx_tso_min_nsegs, "tx_tso_min_nsegs"); SYSCTL_ADD_UINT(ctx, node_children, OID_AUTO, "tx_tso_max_pkt_len", CTLFLAG_RD, &ha->fp_array[i].tx_tso_max_pkt_len, ha->fp_array[i].tx_tso_max_pkt_len, "tx_tso_max_pkt_len"); SYSCTL_ADD_UINT(ctx, node_children, OID_AUTO, "tx_tso_min_pkt_len", CTLFLAG_RD, &ha->fp_array[i].tx_tso_min_pkt_len, ha->fp_array[i].tx_tso_min_pkt_len, "tx_tso_min_pkt_len"); for (j = 0; j < QLNX_FP_MAX_SEGS; j++) { bzero(name_str, (sizeof(uint8_t) * sizeof(name_str))); snprintf(name_str, sizeof(name_str), "tx_pkts_nseg_%02d", (j+1)); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, name_str, CTLFLAG_RD, &ha->fp_array[i].tx_pkts[j], name_str); } #ifdef QLNX_TRACE_PERF_DATA for (j = 0; j < 18; j++) { bzero(name_str, (sizeof(uint8_t) * sizeof(name_str))); snprintf(name_str, sizeof(name_str), "tx_pkts_hist_%02d", (j+1)); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, name_str, CTLFLAG_RD, &ha->fp_array[i].tx_pkts_hist[j], name_str); } for (j = 0; j < 5; j++) { bzero(name_str, (sizeof(uint8_t) * sizeof(name_str))); snprintf(name_str, sizeof(name_str), "tx_comInt_%02d", (j+1)); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, name_str, CTLFLAG_RD, &ha->fp_array[i].tx_comInt[j], name_str); } for (j = 0; j < 18; j++) { bzero(name_str, (sizeof(uint8_t) * sizeof(name_str))); snprintf(name_str, sizeof(name_str), "tx_pkts_q_%02d", (j+1)); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, name_str, CTLFLAG_RD, &ha->fp_array[i].tx_pkts_q[j], name_str); } #endif SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_tx_nsegs_gt_elem_left", CTLFLAG_RD, &ha->fp_array[i].err_tx_nsegs_gt_elem_left, "err_tx_nsegs_gt_elem_left"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_tx_dmamap_create", CTLFLAG_RD, &ha->fp_array[i].err_tx_dmamap_create, "err_tx_dmamap_create"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_tx_defrag_dmamap_load", CTLFLAG_RD, &ha->fp_array[i].err_tx_defrag_dmamap_load, "err_tx_defrag_dmamap_load"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_tx_non_tso_max_seg", CTLFLAG_RD, &ha->fp_array[i].err_tx_non_tso_max_seg, "err_tx_non_tso_max_seg"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_tx_dmamap_load", CTLFLAG_RD, &ha->fp_array[i].err_tx_dmamap_load, "err_tx_dmamap_load"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_tx_defrag", CTLFLAG_RD, &ha->fp_array[i].err_tx_defrag, "err_tx_defrag"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_tx_free_pkt_null", CTLFLAG_RD, &ha->fp_array[i].err_tx_free_pkt_null, "err_tx_free_pkt_null"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_tx_cons_idx_conflict", CTLFLAG_RD, &ha->fp_array[i].err_tx_cons_idx_conflict, "err_tx_cons_idx_conflict"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "lro_cnt_64", CTLFLAG_RD, &ha->fp_array[i].lro_cnt_64, "lro_cnt_64"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "lro_cnt_128", CTLFLAG_RD, &ha->fp_array[i].lro_cnt_128, "lro_cnt_128"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "lro_cnt_256", CTLFLAG_RD, &ha->fp_array[i].lro_cnt_256, "lro_cnt_256"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "lro_cnt_512", CTLFLAG_RD, &ha->fp_array[i].lro_cnt_512, "lro_cnt_512"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "lro_cnt_1024", CTLFLAG_RD, &ha->fp_array[i].lro_cnt_1024, "lro_cnt_1024"); /* Rx Related */ SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "rx_pkts", CTLFLAG_RD, &ha->fp_array[i].rx_pkts, "No. of received packets"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tpa_start", CTLFLAG_RD, &ha->fp_array[i].tpa_start, "No. of tpa_start packets"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tpa_cont", CTLFLAG_RD, &ha->fp_array[i].tpa_cont, "No. of tpa_cont packets"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "tpa_end", CTLFLAG_RD, &ha->fp_array[i].tpa_end, "No. of tpa_end packets"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_m_getcl", CTLFLAG_RD, &ha->fp_array[i].err_m_getcl, "err_m_getcl"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_m_getjcl", CTLFLAG_RD, &ha->fp_array[i].err_m_getjcl, "err_m_getjcl"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_rx_hw_errors", CTLFLAG_RD, &ha->fp_array[i].err_rx_hw_errors, "err_rx_hw_errors"); SYSCTL_ADD_QUAD(ctx, node_children, OID_AUTO, "err_rx_alloc_errors", CTLFLAG_RD, &ha->fp_array[i].err_rx_alloc_errors, "err_rx_alloc_errors"); } return; } static void qlnx_add_hw_stats_sysctls(qlnx_host_t *ha) { struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; struct sysctl_oid *ctx_oid; ctx = device_get_sysctl_ctx(ha->pci_dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(ha->pci_dev)); ctx_oid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO, "hwstat", CTLFLAG_RD, NULL, "hwstat"); children = SYSCTL_CHILDREN(ctx_oid); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "no_buff_discards", CTLFLAG_RD, &ha->hw_stats.common.no_buff_discards, "No. of packets discarded due to lack of buffer"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "packet_too_big_discard", CTLFLAG_RD, &ha->hw_stats.common.packet_too_big_discard, "No. of packets discarded because packet was too big"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "ttl0_discard", CTLFLAG_RD, &ha->hw_stats.common.ttl0_discard, "ttl0_discard"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_ucast_bytes", CTLFLAG_RD, &ha->hw_stats.common.rx_ucast_bytes, "rx_ucast_bytes"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_mcast_bytes", CTLFLAG_RD, &ha->hw_stats.common.rx_mcast_bytes, "rx_mcast_bytes"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_bcast_bytes", CTLFLAG_RD, &ha->hw_stats.common.rx_bcast_bytes, "rx_bcast_bytes"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_ucast_pkts", CTLFLAG_RD, &ha->hw_stats.common.rx_ucast_pkts, "rx_ucast_pkts"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_mcast_pkts", CTLFLAG_RD, &ha->hw_stats.common.rx_mcast_pkts, "rx_mcast_pkts"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_bcast_pkts", CTLFLAG_RD, &ha->hw_stats.common.rx_bcast_pkts, "rx_bcast_pkts"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "mftag_filter_discards", CTLFLAG_RD, &ha->hw_stats.common.mftag_filter_discards, "mftag_filter_discards"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "mac_filter_discards", CTLFLAG_RD, &ha->hw_stats.common.mac_filter_discards, "mac_filter_discards"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_ucast_bytes", CTLFLAG_RD, &ha->hw_stats.common.tx_ucast_bytes, "tx_ucast_bytes"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_mcast_bytes", CTLFLAG_RD, &ha->hw_stats.common.tx_mcast_bytes, "tx_mcast_bytes"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_bcast_bytes", CTLFLAG_RD, &ha->hw_stats.common.tx_bcast_bytes, "tx_bcast_bytes"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_ucast_pkts", CTLFLAG_RD, &ha->hw_stats.common.tx_ucast_pkts, "tx_ucast_pkts"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_mcast_pkts", CTLFLAG_RD, &ha->hw_stats.common.tx_mcast_pkts, "tx_mcast_pkts"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_bcast_pkts", CTLFLAG_RD, &ha->hw_stats.common.tx_bcast_pkts, "tx_bcast_pkts"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_err_drop_pkts", CTLFLAG_RD, &ha->hw_stats.common.tx_err_drop_pkts, "tx_err_drop_pkts"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tpa_coalesced_pkts", CTLFLAG_RD, &ha->hw_stats.common.tpa_coalesced_pkts, "tpa_coalesced_pkts"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tpa_coalesced_events", CTLFLAG_RD, &ha->hw_stats.common.tpa_coalesced_events, "tpa_coalesced_events"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tpa_aborts_num", CTLFLAG_RD, &ha->hw_stats.common.tpa_aborts_num, "tpa_aborts_num"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tpa_not_coalesced_pkts", CTLFLAG_RD, &ha->hw_stats.common.tpa_not_coalesced_pkts, "tpa_not_coalesced_pkts"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tpa_coalesced_bytes", CTLFLAG_RD, &ha->hw_stats.common.tpa_coalesced_bytes, "tpa_coalesced_bytes"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_64_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.rx_64_byte_packets, "rx_64_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_65_to_127_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.rx_65_to_127_byte_packets, "rx_65_to_127_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_128_to_255_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.rx_128_to_255_byte_packets, "rx_128_to_255_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_256_to_511_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.rx_256_to_511_byte_packets, "rx_256_to_511_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_512_to_1023_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.rx_512_to_1023_byte_packets, "rx_512_to_1023_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_1024_to_1518_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.rx_1024_to_1518_byte_packets, "rx_1024_to_1518_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_1519_to_1522_byte_packets", CTLFLAG_RD, &ha->hw_stats.bb.rx_1519_to_1522_byte_packets, "rx_1519_to_1522_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_1523_to_2047_byte_packets", CTLFLAG_RD, &ha->hw_stats.bb.rx_1519_to_2047_byte_packets, "rx_1523_to_2047_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_2048_to_4095_byte_packets", CTLFLAG_RD, &ha->hw_stats.bb.rx_2048_to_4095_byte_packets, "rx_2048_to_4095_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_4096_to_9216_byte_packets", CTLFLAG_RD, &ha->hw_stats.bb.rx_4096_to_9216_byte_packets, "rx_4096_to_9216_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_9217_to_16383_byte_packets", CTLFLAG_RD, &ha->hw_stats.bb.rx_9217_to_16383_byte_packets, "rx_9217_to_16383_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_crc_errors", CTLFLAG_RD, &ha->hw_stats.common.rx_crc_errors, "rx_crc_errors"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_mac_crtl_frames", CTLFLAG_RD, &ha->hw_stats.common.rx_mac_crtl_frames, "rx_mac_crtl_frames"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_pause_frames", CTLFLAG_RD, &ha->hw_stats.common.rx_pause_frames, "rx_pause_frames"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_pfc_frames", CTLFLAG_RD, &ha->hw_stats.common.rx_pfc_frames, "rx_pfc_frames"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_align_errors", CTLFLAG_RD, &ha->hw_stats.common.rx_align_errors, "rx_align_errors"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_carrier_errors", CTLFLAG_RD, &ha->hw_stats.common.rx_carrier_errors, "rx_carrier_errors"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_oversize_packets", CTLFLAG_RD, &ha->hw_stats.common.rx_oversize_packets, "rx_oversize_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_jabbers", CTLFLAG_RD, &ha->hw_stats.common.rx_jabbers, "rx_jabbers"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_undersize_packets", CTLFLAG_RD, &ha->hw_stats.common.rx_undersize_packets, "rx_undersize_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_fragments", CTLFLAG_RD, &ha->hw_stats.common.rx_fragments, "rx_fragments"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_64_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.tx_64_byte_packets, "tx_64_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_65_to_127_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.tx_65_to_127_byte_packets, "tx_65_to_127_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_128_to_255_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.tx_128_to_255_byte_packets, "tx_128_to_255_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_256_to_511_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.tx_256_to_511_byte_packets, "tx_256_to_511_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_512_to_1023_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.tx_512_to_1023_byte_packets, "tx_512_to_1023_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_1024_to_1518_byte_packets", CTLFLAG_RD, &ha->hw_stats.common.tx_1024_to_1518_byte_packets, "tx_1024_to_1518_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_1519_to_2047_byte_packets", CTLFLAG_RD, &ha->hw_stats.bb.tx_1519_to_2047_byte_packets, "tx_1519_to_2047_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_2048_to_4095_byte_packets", CTLFLAG_RD, &ha->hw_stats.bb.tx_2048_to_4095_byte_packets, "tx_2048_to_4095_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_4096_to_9216_byte_packets", CTLFLAG_RD, &ha->hw_stats.bb.tx_4096_to_9216_byte_packets, "tx_4096_to_9216_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_9217_to_16383_byte_packets", CTLFLAG_RD, &ha->hw_stats.bb.tx_9217_to_16383_byte_packets, "tx_9217_to_16383_byte_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_pause_frames", CTLFLAG_RD, &ha->hw_stats.common.tx_pause_frames, "tx_pause_frames"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_pfc_frames", CTLFLAG_RD, &ha->hw_stats.common.tx_pfc_frames, "tx_pfc_frames"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_lpi_entry_count", CTLFLAG_RD, &ha->hw_stats.bb.tx_lpi_entry_count, "tx_lpi_entry_count"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_total_collisions", CTLFLAG_RD, &ha->hw_stats.bb.tx_total_collisions, "tx_total_collisions"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "brb_truncates", CTLFLAG_RD, &ha->hw_stats.common.brb_truncates, "brb_truncates"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "brb_discards", CTLFLAG_RD, &ha->hw_stats.common.brb_discards, "brb_discards"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_mac_bytes", CTLFLAG_RD, &ha->hw_stats.common.rx_mac_bytes, "rx_mac_bytes"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_mac_uc_packets", CTLFLAG_RD, &ha->hw_stats.common.rx_mac_uc_packets, "rx_mac_uc_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_mac_mc_packets", CTLFLAG_RD, &ha->hw_stats.common.rx_mac_mc_packets, "rx_mac_mc_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_mac_bc_packets", CTLFLAG_RD, &ha->hw_stats.common.rx_mac_bc_packets, "rx_mac_bc_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "rx_mac_frames_ok", CTLFLAG_RD, &ha->hw_stats.common.rx_mac_frames_ok, "rx_mac_frames_ok"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_mac_bytes", CTLFLAG_RD, &ha->hw_stats.common.tx_mac_bytes, "tx_mac_bytes"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_mac_uc_packets", CTLFLAG_RD, &ha->hw_stats.common.tx_mac_uc_packets, "tx_mac_uc_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_mac_mc_packets", CTLFLAG_RD, &ha->hw_stats.common.tx_mac_mc_packets, "tx_mac_mc_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_mac_bc_packets", CTLFLAG_RD, &ha->hw_stats.common.tx_mac_bc_packets, "tx_mac_bc_packets"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tx_mac_ctrl_frames", CTLFLAG_RD, &ha->hw_stats.common.tx_mac_ctrl_frames, "tx_mac_ctrl_frames"); return; } static void qlnx_add_sysctls(qlnx_host_t *ha) { device_t dev = ha->pci_dev; struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; ctx = device_get_sysctl_ctx(dev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); qlnx_add_fp_stats_sysctls(ha); qlnx_add_sp_stats_sysctls(ha); qlnx_add_hw_stats_sysctls(ha); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Driver_Version", CTLFLAG_RD, qlnx_ver_str, 0, "Driver Version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "STORMFW_Version", CTLFLAG_RD, ha->stormfw_ver, 0, "STORM Firmware Version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "MFW_Version", CTLFLAG_RD, ha->mfw_ver, 0, "Management Firmware Version"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "personality", CTLFLAG_RD, &ha->personality, ha->personality, "\tpersonality = 0 => Ethernet Only\n" "\tpersonality = 3 => Ethernet and RoCE\n" "\tpersonality = 4 => Ethernet and iWARP\n" "\tpersonality = 6 => Default in Shared Memory\n"); ha->dbg_level = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "debug", CTLFLAG_RW, &ha->dbg_level, ha->dbg_level, "Debug Level"); ha->dp_level = 0x01; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "dp_level", CTLFLAG_RW, &ha->dp_level, ha->dp_level, "DP Level"); ha->dbg_trace_lro_cnt = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "dbg_trace_lro_cnt", CTLFLAG_RW, &ha->dbg_trace_lro_cnt, ha->dbg_trace_lro_cnt, "Trace LRO Counts"); ha->dbg_trace_tso_pkt_len = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "dbg_trace_tso_pkt_len", CTLFLAG_RW, &ha->dbg_trace_tso_pkt_len, ha->dbg_trace_tso_pkt_len, "Trace TSO packet lengths"); ha->dp_module = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "dp_module", CTLFLAG_RW, &ha->dp_module, ha->dp_module, "DP Module"); ha->err_inject = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "err_inject", CTLFLAG_RW, &ha->err_inject, ha->err_inject, "Error Inject"); ha->storm_stats_enable = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "storm_stats_enable", CTLFLAG_RW, &ha->storm_stats_enable, ha->storm_stats_enable, "Enable Storm Statistics Gathering"); ha->storm_stats_index = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "storm_stats_index", CTLFLAG_RD, &ha->storm_stats_index, ha->storm_stats_index, "Enable Storm Statistics Gathering Current Index"); ha->grcdump_taken = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "grcdump_taken", CTLFLAG_RD, &ha->grcdump_taken, ha->grcdump_taken, "grcdump_taken"); ha->idle_chk_taken = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "idle_chk_taken", CTLFLAG_RD, &ha->idle_chk_taken, ha->idle_chk_taken, "idle_chk_taken"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_coalesce_usecs", CTLFLAG_RD, &ha->rx_coalesce_usecs, ha->rx_coalesce_usecs, "rx_coalesce_usecs"); SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_coalesce_usecs", CTLFLAG_RD, &ha->tx_coalesce_usecs, ha->tx_coalesce_usecs, "tx_coalesce_usecs"); ha->rx_pkt_threshold = 128; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_pkt_threshold", CTLFLAG_RW, &ha->rx_pkt_threshold, ha->rx_pkt_threshold, "No. of Rx Pkts to process at a time"); ha->rx_jumbo_buf_eq_mtu = 0; SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "rx_jumbo_buf_eq_mtu", CTLFLAG_RW, &ha->rx_jumbo_buf_eq_mtu, ha->rx_jumbo_buf_eq_mtu, "== 0 => Rx Jumbo buffers are capped to 4Kbytes\n" "otherwise Rx Jumbo buffers are set to >= MTU size\n"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "trigger_dump", CTLTYPE_INT | CTLFLAG_RW, (void *)ha, 0, qlnx_trigger_dump_sysctl, "I", "trigger_dump"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "set_rx_coalesce_usecs", CTLTYPE_INT | CTLFLAG_RW, (void *)ha, 0, qlnx_set_rx_coalesce, "I", "rx interrupt coalesce period microseconds"); SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "set_tx_coalesce_usecs", CTLTYPE_INT | CTLFLAG_RW, (void *)ha, 0, qlnx_set_tx_coalesce, "I", "tx interrupt coalesce period microseconds"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "err_illegal_intr", CTLFLAG_RD, &ha->err_illegal_intr, "err_illegal_intr"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "err_fp_null", CTLFLAG_RD, &ha->err_fp_null, "err_fp_null"); SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "err_get_proto_invalid_type", CTLFLAG_RD, &ha->err_get_proto_invalid_type, "err_get_proto_invalid_type"); return; } /***************************************************************************** * Operating System Network Interface Functions *****************************************************************************/ static void qlnx_init_ifnet(device_t dev, qlnx_host_t *ha) { uint16_t device_id; struct ifnet *ifp; ifp = ha->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) panic("%s: cannot if_alloc()\n", device_get_nameunit(dev)); if_initname(ifp, device_get_name(dev), device_get_unit(dev)); device_id = pci_get_device(ha->pci_dev); #if __FreeBSD_version >= 1000000 if (device_id == QLOGIC_PCI_DEVICE_ID_1634) ifp->if_baudrate = IF_Gbps(40); else if ((device_id == QLOGIC_PCI_DEVICE_ID_1656) || (device_id == QLOGIC_PCI_DEVICE_ID_8070)) ifp->if_baudrate = IF_Gbps(25); else if (device_id == QLOGIC_PCI_DEVICE_ID_1654) ifp->if_baudrate = IF_Gbps(50); else if (device_id == QLOGIC_PCI_DEVICE_ID_1644) ifp->if_baudrate = IF_Gbps(100); ifp->if_capabilities = IFCAP_LINKSTATE; #else ifp->if_mtu = ETHERMTU; ifp->if_baudrate = (1 * 1000 * 1000 *1000); #endif /* #if __FreeBSD_version >= 1000000 */ ifp->if_init = qlnx_init; ifp->if_softc = ha; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = qlnx_ioctl; ifp->if_transmit = qlnx_transmit; ifp->if_qflush = qlnx_qflush; IFQ_SET_MAXLEN(&ifp->if_snd, qlnx_get_ifq_snd_maxlen(ha)); ifp->if_snd.ifq_drv_maxlen = qlnx_get_ifq_snd_maxlen(ha); IFQ_SET_READY(&ifp->if_snd); #if __FreeBSD_version >= 1100036 if_setgetcounterfn(ifp, qlnx_get_counter); #endif ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; memcpy(ha->primary_mac, qlnx_get_mac_addr(ha), ETH_ALEN); ether_ifattach(ifp, ha->primary_mac); bcopy(IF_LLADDR(ha->ifp), ha->primary_mac, ETHER_ADDR_LEN); ifp->if_capabilities = IFCAP_HWCSUM; ifp->if_capabilities |= IFCAP_JUMBO_MTU; ifp->if_capabilities |= IFCAP_VLAN_MTU; ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; ifp->if_capabilities |= IFCAP_VLAN_HWCSUM; ifp->if_capabilities |= IFCAP_VLAN_HWTSO; ifp->if_capabilities |= IFCAP_TSO4; ifp->if_capabilities |= IFCAP_TSO6; ifp->if_capabilities |= IFCAP_LRO; ifp->if_hw_tsomax = QLNX_MAX_TSO_FRAME_SIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); ifp->if_hw_tsomaxsegcount = QLNX_MAX_SEGMENTS - 1 /* hdr */; ifp->if_hw_tsomaxsegsize = QLNX_MAX_TX_MBUF_SIZE; ifp->if_capenable = ifp->if_capabilities; ifp->if_hwassist = CSUM_IP; ifp->if_hwassist |= CSUM_TCP | CSUM_UDP; ifp->if_hwassist |= CSUM_TCP_IPV6 | CSUM_UDP_IPV6; ifp->if_hwassist |= CSUM_TSO; ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifmedia_init(&ha->media, IFM_IMASK, qlnx_media_change,\ qlnx_media_status); if (device_id == QLOGIC_PCI_DEVICE_ID_1634) { ifmedia_add(&ha->media, (IFM_ETHER | IFM_40G_LR4), 0, NULL); ifmedia_add(&ha->media, (IFM_ETHER | IFM_40G_SR4), 0, NULL); ifmedia_add(&ha->media, (IFM_ETHER | IFM_40G_CR4), 0, NULL); } else if ((device_id == QLOGIC_PCI_DEVICE_ID_1656) || (device_id == QLOGIC_PCI_DEVICE_ID_8070)) { ifmedia_add(&ha->media, (IFM_ETHER | QLNX_IFM_25G_SR), 0, NULL); ifmedia_add(&ha->media, (IFM_ETHER | QLNX_IFM_25G_CR), 0, NULL); } else if (device_id == QLOGIC_PCI_DEVICE_ID_1654) { ifmedia_add(&ha->media, (IFM_ETHER | IFM_50G_KR2), 0, NULL); ifmedia_add(&ha->media, (IFM_ETHER | IFM_50G_CR2), 0, NULL); } else if (device_id == QLOGIC_PCI_DEVICE_ID_1644) { ifmedia_add(&ha->media, (IFM_ETHER | QLNX_IFM_100G_LR4), 0, NULL); ifmedia_add(&ha->media, (IFM_ETHER | QLNX_IFM_100G_SR4), 0, NULL); ifmedia_add(&ha->media, (IFM_ETHER | QLNX_IFM_100G_CR4), 0, NULL); } ifmedia_add(&ha->media, (IFM_ETHER | IFM_FDX), 0, NULL); ifmedia_add(&ha->media, (IFM_ETHER | IFM_AUTO), 0, NULL); ifmedia_set(&ha->media, (IFM_ETHER | IFM_AUTO)); QL_DPRINT2(ha, "exit\n"); return; } static void qlnx_init_locked(qlnx_host_t *ha) { struct ifnet *ifp = ha->ifp; QL_DPRINT1(ha, "Driver Initialization start \n"); qlnx_stop(ha); if (qlnx_load(ha) == 0) { ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } return; } static void qlnx_init(void *arg) { qlnx_host_t *ha; ha = (qlnx_host_t *)arg; QL_DPRINT2(ha, "enter\n"); QLNX_LOCK(ha); qlnx_init_locked(ha); QLNX_UNLOCK(ha); QL_DPRINT2(ha, "exit\n"); return; } static int qlnx_config_mcast_mac_addr(qlnx_host_t *ha, uint8_t *mac_addr, uint32_t add_mac) { struct ecore_filter_mcast *mcast; struct ecore_dev *cdev; int rc; cdev = &ha->cdev; mcast = &ha->ecore_mcast; bzero(mcast, sizeof(struct ecore_filter_mcast)); if (add_mac) mcast->opcode = ECORE_FILTER_ADD; else mcast->opcode = ECORE_FILTER_REMOVE; mcast->num_mc_addrs = 1; memcpy(mcast->mac, mac_addr, ETH_ALEN); rc = ecore_filter_mcast_cmd(cdev, mcast, ECORE_SPQ_MODE_CB, NULL); return (rc); } static int qlnx_hw_add_mcast(qlnx_host_t *ha, uint8_t *mta) { int i; for (i = 0; i < QLNX_MAX_NUM_MULTICAST_ADDRS; i++) { if (QL_MAC_CMP(ha->mcast[i].addr, mta) == 0) return 0; /* its been already added */ } for (i = 0; i < QLNX_MAX_NUM_MULTICAST_ADDRS; i++) { if ((ha->mcast[i].addr[0] == 0) && (ha->mcast[i].addr[1] == 0) && (ha->mcast[i].addr[2] == 0) && (ha->mcast[i].addr[3] == 0) && (ha->mcast[i].addr[4] == 0) && (ha->mcast[i].addr[5] == 0)) { if (qlnx_config_mcast_mac_addr(ha, mta, 1)) return (-1); bcopy(mta, ha->mcast[i].addr, ETH_ALEN); ha->nmcast++; return 0; } } return 0; } static int qlnx_hw_del_mcast(qlnx_host_t *ha, uint8_t *mta) { int i; for (i = 0; i < QLNX_MAX_NUM_MULTICAST_ADDRS; i++) { if (QL_MAC_CMP(ha->mcast[i].addr, mta) == 0) { if (qlnx_config_mcast_mac_addr(ha, mta, 0)) return (-1); ha->mcast[i].addr[0] = 0; ha->mcast[i].addr[1] = 0; ha->mcast[i].addr[2] = 0; ha->mcast[i].addr[3] = 0; ha->mcast[i].addr[4] = 0; ha->mcast[i].addr[5] = 0; ha->nmcast--; return 0; } } return 0; } /* * Name: qls_hw_set_multi * Function: Sets the Multicast Addresses provided the host O.S into the * hardware (for the given interface) */ static void qlnx_hw_set_multi(qlnx_host_t *ha, uint8_t *mta, uint32_t mcnt, uint32_t add_mac) { int i; for (i = 0; i < mcnt; i++) { if (add_mac) { if (qlnx_hw_add_mcast(ha, mta)) break; } else { if (qlnx_hw_del_mcast(ha, mta)) break; } mta += ETHER_HDR_LEN; } return; } #define QLNX_MCAST_ADDRS_SIZE (QLNX_MAX_NUM_MULTICAST_ADDRS * ETHER_HDR_LEN) static int qlnx_set_multi(qlnx_host_t *ha, uint32_t add_multi) { uint8_t mta[QLNX_MCAST_ADDRS_SIZE]; struct ifmultiaddr *ifma; int mcnt = 0; struct ifnet *ifp = ha->ifp; int ret = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == QLNX_MAX_NUM_MULTICAST_ADDRS) break; bcopy(LLADDR((struct sockaddr_dl *) ifma->ifma_addr), &mta[mcnt * ETHER_HDR_LEN], ETHER_HDR_LEN); mcnt++; } if_maddr_runlock(ifp); QLNX_LOCK(ha); qlnx_hw_set_multi(ha, mta, mcnt, add_multi); QLNX_UNLOCK(ha); return (ret); } static int qlnx_set_promisc(qlnx_host_t *ha) { int rc = 0; uint8_t filter; filter = ha->filter; filter |= ECORE_ACCEPT_MCAST_UNMATCHED; filter |= ECORE_ACCEPT_UCAST_UNMATCHED; rc = qlnx_set_rx_accept_filter(ha, filter); return (rc); } static int qlnx_set_allmulti(qlnx_host_t *ha) { int rc = 0; uint8_t filter; filter = ha->filter; filter |= ECORE_ACCEPT_MCAST_UNMATCHED; rc = qlnx_set_rx_accept_filter(ha, filter); return (rc); } static int qlnx_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { int ret = 0, mask; struct ifreq *ifr = (struct ifreq *)data; struct ifaddr *ifa = (struct ifaddr *)data; qlnx_host_t *ha; ha = (qlnx_host_t *)ifp->if_softc; switch (cmd) { case SIOCSIFADDR: QL_DPRINT4(ha, "SIOCSIFADDR (0x%lx)\n", cmd); if (ifa->ifa_addr->sa_family == AF_INET) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { QLNX_LOCK(ha); qlnx_init_locked(ha); QLNX_UNLOCK(ha); } QL_DPRINT4(ha, "SIOCSIFADDR (0x%lx) ipv4 [0x%08x]\n", cmd, ntohl(IA_SIN(ifa)->sin_addr.s_addr)); arp_ifinit(ifp, ifa); } else { ether_ioctl(ifp, cmd, data); } break; case SIOCSIFMTU: QL_DPRINT4(ha, "SIOCSIFMTU (0x%lx)\n", cmd); if (ifr->ifr_mtu > QLNX_MAX_MTU) { ret = EINVAL; } else { QLNX_LOCK(ha); ifp->if_mtu = ifr->ifr_mtu; ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { qlnx_init_locked(ha); } QLNX_UNLOCK(ha); } break; case SIOCSIFFLAGS: QL_DPRINT4(ha, "SIOCSIFFLAGS (0x%lx)\n", cmd); QLNX_LOCK(ha); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if ((ifp->if_flags ^ ha->if_flags) & IFF_PROMISC) { ret = qlnx_set_promisc(ha); } else if ((ifp->if_flags ^ ha->if_flags) & IFF_ALLMULTI) { ret = qlnx_set_allmulti(ha); } } else { ha->max_frame_size = ifp->if_mtu + ETHER_HDR_LEN + ETHER_CRC_LEN; qlnx_init_locked(ha); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) qlnx_stop(ha); ha->if_flags = ifp->if_flags; } QLNX_UNLOCK(ha); break; case SIOCADDMULTI: QL_DPRINT4(ha, "%s (0x%lx)\n", "SIOCADDMULTI", cmd); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if (qlnx_set_multi(ha, 1)) ret = EINVAL; } break; case SIOCDELMULTI: QL_DPRINT4(ha, "%s (0x%lx)\n", "SIOCDELMULTI", cmd); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if (qlnx_set_multi(ha, 0)) ret = EINVAL; } break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: QL_DPRINT4(ha, "SIOCSIFMEDIA/SIOCGIFMEDIA (0x%lx)\n", cmd); ret = ifmedia_ioctl(ifp, ifr, &ha->media, cmd); break; case SIOCSIFCAP: mask = ifr->ifr_reqcap ^ ifp->if_capenable; QL_DPRINT4(ha, "SIOCSIFCAP (0x%lx)\n", cmd); if (mask & IFCAP_HWCSUM) ifp->if_capenable ^= IFCAP_HWCSUM; if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; if (mask & IFCAP_TSO6) ifp->if_capenable ^= IFCAP_TSO6; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) qlnx_init(ha); VLAN_CAPABILITIES(ifp); break; #if (__FreeBSD_version >= 1100101) case SIOCGI2C: { struct ifi2creq i2c; struct ecore_hwfn *p_hwfn = &ha->cdev.hwfns[0]; struct ecore_ptt *p_ptt; - ret = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); + ret = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (ret) break; if ((i2c.len > sizeof (i2c.data)) || (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2)) { ret = EINVAL; break; } p_ptt = ecore_ptt_acquire(p_hwfn); if (!p_ptt) { QL_DPRINT1(ha, "ecore_ptt_acquire failed\n"); ret = -1; break; } ret = ecore_mcp_phy_sfp_read(p_hwfn, p_ptt, (ha->pci_func & 0x1), i2c.dev_addr, i2c.offset, i2c.len, &i2c.data[0]); ecore_ptt_release(p_hwfn, p_ptt); if (ret) { ret = -1; break; } - ret = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); + ret = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c)); QL_DPRINT8(ha, "SIOCGI2C copyout ret = %d \ len = %d addr = 0x%02x offset = 0x%04x \ data[0..7]=0x%02x 0x%02x 0x%02x 0x%02x 0x%02x \ 0x%02x 0x%02x 0x%02x\n", ret, i2c.len, i2c.dev_addr, i2c.offset, i2c.data[0], i2c.data[1], i2c.data[2], i2c.data[3], i2c.data[4], i2c.data[5], i2c.data[6], i2c.data[7]); break; } #endif /* #if (__FreeBSD_version >= 1100101) */ default: QL_DPRINT4(ha, "default (0x%lx)\n", cmd); ret = ether_ioctl(ifp, cmd, data); break; } return (ret); } static int qlnx_media_change(struct ifnet *ifp) { qlnx_host_t *ha; struct ifmedia *ifm; int ret = 0; ha = (qlnx_host_t *)ifp->if_softc; QL_DPRINT2(ha, "enter\n"); ifm = &ha->media; if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) ret = EINVAL; QL_DPRINT2(ha, "exit\n"); return (ret); } static void qlnx_media_status(struct ifnet *ifp, struct ifmediareq *ifmr) { qlnx_host_t *ha; ha = (qlnx_host_t *)ifp->if_softc; QL_DPRINT2(ha, "enter\n"); ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (ha->link_up) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= (IFM_FDX | qlnx_get_optics(ha, &ha->if_link)); if (ha->if_link.link_partner_caps & (QLNX_LINK_CAP_Pause | QLNX_LINK_CAP_Asym_Pause)) ifmr->ifm_active |= (IFM_ETH_RXPAUSE | IFM_ETH_TXPAUSE); } QL_DPRINT2(ha, "exit (%s)\n", (ha->link_up ? "link_up" : "link_down")); return; } static void qlnx_free_tx_pkt(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct qlnx_tx_queue *txq) { u16 idx; struct mbuf *mp; bus_dmamap_t map; int i; struct eth_tx_bd *tx_data_bd; struct eth_tx_1st_bd *first_bd; int nbds = 0; idx = txq->sw_tx_cons; mp = txq->sw_tx_ring[idx].mp; map = txq->sw_tx_ring[idx].map; if ((mp == NULL) || QL_ERR_INJECT(ha, QL_ERR_INJCT_TX_INT_MBUF_NULL)){ QL_RESET_ERR_INJECT(ha, QL_ERR_INJCT_TX_INT_MBUF_NULL); QL_DPRINT1(ha, "(mp == NULL) " " tx_idx = 0x%x" " ecore_prod_idx = 0x%x" " ecore_cons_idx = 0x%x" " hw_bd_cons = 0x%x" " txq_db_last = 0x%x" " elem_left = 0x%x\n", fp->rss_id, ecore_chain_get_prod_idx(&txq->tx_pbl), ecore_chain_get_cons_idx(&txq->tx_pbl), le16toh(*txq->hw_cons_ptr), txq->tx_db.raw, ecore_chain_get_elem_left(&txq->tx_pbl)); fp->err_tx_free_pkt_null++; //DEBUG qlnx_trigger_dump(ha); return; } else { QLNX_INC_OPACKETS((ha->ifp)); QLNX_INC_OBYTES((ha->ifp), (mp->m_pkthdr.len)); bus_dmamap_sync(ha->tx_tag, map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(ha->tx_tag, map); fp->tx_pkts_freed++; fp->tx_pkts_completed++; m_freem(mp); } first_bd = (struct eth_tx_1st_bd *)ecore_chain_consume(&txq->tx_pbl); nbds = first_bd->data.nbds; // BD_SET_UNMAP_ADDR_LEN(first_bd, 0, 0); for (i = 1; i < nbds; i++) { tx_data_bd = ecore_chain_consume(&txq->tx_pbl); // BD_SET_UNMAP_ADDR_LEN(tx_data_bd, 0, 0); } txq->sw_tx_ring[idx].flags = 0; txq->sw_tx_ring[idx].mp = NULL; txq->sw_tx_ring[idx].map = (bus_dmamap_t)0; return; } static void qlnx_tx_int(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct qlnx_tx_queue *txq) { u16 hw_bd_cons; u16 ecore_cons_idx; uint16_t diff; uint16_t idx, idx2; hw_bd_cons = le16toh(*txq->hw_cons_ptr); while (hw_bd_cons != (ecore_cons_idx = ecore_chain_get_cons_idx(&txq->tx_pbl))) { if (hw_bd_cons < ecore_cons_idx) { diff = (1 << 16) - (ecore_cons_idx - hw_bd_cons); } else { diff = hw_bd_cons - ecore_cons_idx; } if ((diff > TX_RING_SIZE) || QL_ERR_INJECT(ha, QL_ERR_INJCT_TX_INT_DIFF)){ QL_RESET_ERR_INJECT(ha, QL_ERR_INJCT_TX_INT_DIFF); QL_DPRINT1(ha, "(diff = 0x%x) " " tx_idx = 0x%x" " ecore_prod_idx = 0x%x" " ecore_cons_idx = 0x%x" " hw_bd_cons = 0x%x" " txq_db_last = 0x%x" " elem_left = 0x%x\n", diff, fp->rss_id, ecore_chain_get_prod_idx(&txq->tx_pbl), ecore_chain_get_cons_idx(&txq->tx_pbl), le16toh(*txq->hw_cons_ptr), txq->tx_db.raw, ecore_chain_get_elem_left(&txq->tx_pbl)); fp->err_tx_cons_idx_conflict++; //DEBUG qlnx_trigger_dump(ha); } idx = (txq->sw_tx_cons + 1) & (TX_RING_SIZE - 1); idx2 = (txq->sw_tx_cons + 2) & (TX_RING_SIZE - 1); prefetch(txq->sw_tx_ring[idx].mp); prefetch(txq->sw_tx_ring[idx2].mp); qlnx_free_tx_pkt(ha, fp, txq); txq->sw_tx_cons = (txq->sw_tx_cons + 1) & (TX_RING_SIZE - 1); } return; } static int qlnx_transmit_locked(struct ifnet *ifp,struct qlnx_fastpath *fp, struct mbuf *mp) { int ret = 0; struct qlnx_tx_queue *txq; qlnx_host_t * ha; uint16_t elem_left; txq = fp->txq[0]; ha = (qlnx_host_t *)fp->edev; if ((!(ifp->if_drv_flags & IFF_DRV_RUNNING)) || (!ha->link_up)) { if(mp != NULL) ret = drbr_enqueue(ifp, fp->tx_br, mp); return (ret); } if(mp != NULL) ret = drbr_enqueue(ifp, fp->tx_br, mp); mp = drbr_peek(ifp, fp->tx_br); while (mp != NULL) { if (qlnx_send(ha, fp, &mp)) { if (mp != NULL) { drbr_putback(ifp, fp->tx_br, mp); } else { fp->tx_pkts_processed++; drbr_advance(ifp, fp->tx_br); } goto qlnx_transmit_locked_exit; } else { drbr_advance(ifp, fp->tx_br); fp->tx_pkts_transmitted++; fp->tx_pkts_processed++; } mp = drbr_peek(ifp, fp->tx_br); } qlnx_transmit_locked_exit: if((qlnx_num_tx_compl(ha,fp, fp->txq[0]) > QLNX_TX_COMPL_THRESH) || ((int)(elem_left = ecore_chain_get_elem_left(&txq->tx_pbl)) < QLNX_TX_ELEM_MAX_THRESH)) (void)qlnx_tx_int(ha, fp, fp->txq[0]); QL_DPRINT2(ha, "%s: exit ret = %d\n", __func__, ret); return ret; } static int qlnx_transmit(struct ifnet *ifp, struct mbuf *mp) { qlnx_host_t *ha = (qlnx_host_t *)ifp->if_softc; struct qlnx_fastpath *fp; int rss_id = 0, ret = 0; #ifdef QLNX_TRACEPERF_DATA uint64_t tx_pkts = 0, tx_compl = 0; #endif QL_DPRINT2(ha, "enter\n"); #if __FreeBSD_version >= 1100000 if (M_HASHTYPE_GET(mp) != M_HASHTYPE_NONE) #else if (mp->m_flags & M_FLOWID) #endif rss_id = (mp->m_pkthdr.flowid % ECORE_RSS_IND_TABLE_SIZE) % ha->num_rss; fp = &ha->fp_array[rss_id]; if (fp->tx_br == NULL) { ret = EINVAL; goto qlnx_transmit_exit; } if (mtx_trylock(&fp->tx_mtx)) { #ifdef QLNX_TRACEPERF_DATA tx_pkts = fp->tx_pkts_transmitted; tx_compl = fp->tx_pkts_completed; #endif ret = qlnx_transmit_locked(ifp, fp, mp); #ifdef QLNX_TRACEPERF_DATA fp->tx_pkts_trans_ctx += (fp->tx_pkts_transmitted - tx_pkts); fp->tx_pkts_compl_ctx += (fp->tx_pkts_completed - tx_compl); #endif mtx_unlock(&fp->tx_mtx); } else { if (mp != NULL && (fp->fp_taskqueue != NULL)) { ret = drbr_enqueue(ifp, fp->tx_br, mp); taskqueue_enqueue(fp->fp_taskqueue, &fp->fp_task); } } qlnx_transmit_exit: QL_DPRINT2(ha, "exit ret = %d\n", ret); return ret; } static void qlnx_qflush(struct ifnet *ifp) { int rss_id; struct qlnx_fastpath *fp; struct mbuf *mp; qlnx_host_t *ha; ha = (qlnx_host_t *)ifp->if_softc; QL_DPRINT2(ha, "enter\n"); for (rss_id = 0; rss_id < ha->num_rss; rss_id++) { fp = &ha->fp_array[rss_id]; if (fp == NULL) continue; if (fp->tx_br) { mtx_lock(&fp->tx_mtx); while ((mp = drbr_dequeue(ifp, fp->tx_br)) != NULL) { fp->tx_pkts_freed++; m_freem(mp); } mtx_unlock(&fp->tx_mtx); } } QL_DPRINT2(ha, "exit\n"); return; } static void qlnx_txq_doorbell_wr32(qlnx_host_t *ha, void *reg_addr, uint32_t value) { struct ecore_dev *cdev; uint32_t offset; cdev = &ha->cdev; offset = (uint32_t)((uint8_t *)reg_addr - (uint8_t *)cdev->doorbells); bus_write_4(ha->pci_dbells, offset, value); bus_barrier(ha->pci_reg, 0, 0, BUS_SPACE_BARRIER_READ); bus_barrier(ha->pci_dbells, 0, 0, BUS_SPACE_BARRIER_READ); return; } static uint32_t qlnx_tcp_offset(qlnx_host_t *ha, struct mbuf *mp) { struct ether_vlan_header *eh = NULL; struct ip *ip = NULL; struct ip6_hdr *ip6 = NULL; struct tcphdr *th = NULL; uint32_t ehdrlen = 0, ip_hlen = 0, offset = 0; uint16_t etype = 0; device_t dev; uint8_t buf[sizeof(struct ip6_hdr)]; dev = ha->pci_dev; eh = mtod(mp, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; etype = ntohs(eh->evl_proto); } else { ehdrlen = ETHER_HDR_LEN; etype = ntohs(eh->evl_encap_proto); } switch (etype) { case ETHERTYPE_IP: ip = (struct ip *)(mp->m_data + ehdrlen); ip_hlen = sizeof (struct ip); if (mp->m_len < (ehdrlen + ip_hlen)) { m_copydata(mp, ehdrlen, sizeof(struct ip), buf); ip = (struct ip *)buf; } th = (struct tcphdr *)(ip + 1); offset = ip_hlen + ehdrlen + (th->th_off << 2); break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); ip_hlen = sizeof(struct ip6_hdr); if (mp->m_len < (ehdrlen + ip_hlen)) { m_copydata(mp, ehdrlen, sizeof (struct ip6_hdr), buf); ip6 = (struct ip6_hdr *)buf; } th = (struct tcphdr *)(ip6 + 1); offset = ip_hlen + ehdrlen + (th->th_off << 2); break; default: break; } return (offset); } static __inline int qlnx_tso_check(struct qlnx_fastpath *fp, bus_dma_segment_t *segs, int nsegs, uint32_t offset) { int i; uint32_t sum, nbds_in_hdr = 1; uint32_t window; bus_dma_segment_t *s_seg; /* If the header spans mulitple segments, skip those segments */ if (nsegs < ETH_TX_LSO_WINDOW_BDS_NUM) return (0); i = 0; while ((i < nsegs) && (offset >= segs->ds_len)) { offset = offset - segs->ds_len; segs++; i++; nbds_in_hdr++; } window = ETH_TX_LSO_WINDOW_BDS_NUM - nbds_in_hdr; nsegs = nsegs - i; while (nsegs >= window) { sum = 0; s_seg = segs; for (i = 0; i < window; i++){ sum += s_seg->ds_len; s_seg++; } if (sum < ETH_TX_LSO_WINDOW_MIN_LEN) { fp->tx_lso_wnd_min_len++; return (-1); } nsegs = nsegs - 1; segs++; } return (0); } static int qlnx_send(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct mbuf **m_headp) { bus_dma_segment_t *segs; bus_dmamap_t map = 0; uint32_t nsegs = 0; int ret = -1; struct mbuf *m_head = *m_headp; uint16_t idx = 0; uint16_t elem_left; uint8_t nbd = 0; struct qlnx_tx_queue *txq; struct eth_tx_1st_bd *first_bd; struct eth_tx_2nd_bd *second_bd; struct eth_tx_3rd_bd *third_bd; struct eth_tx_bd *tx_data_bd; int seg_idx = 0; uint32_t nbds_in_hdr = 0; uint32_t offset = 0; #ifdef QLNX_TRACE_PERF_DATA uint16_t bd_used; #endif QL_DPRINT8(ha, "enter\n"); if (!ha->link_up) return (-1); first_bd = NULL; second_bd = NULL; third_bd = NULL; tx_data_bd = NULL; txq = fp->txq[0]; if ((int)(elem_left = ecore_chain_get_elem_left(&txq->tx_pbl)) < QLNX_TX_ELEM_MIN_THRESH) { fp->tx_nsegs_gt_elem_left++; fp->err_tx_nsegs_gt_elem_left++; return (ENOBUFS); } idx = txq->sw_tx_prod; map = txq->sw_tx_ring[idx].map; segs = txq->segs; ret = bus_dmamap_load_mbuf_sg(ha->tx_tag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT); if (ha->dbg_trace_tso_pkt_len) { if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { if (!fp->tx_tso_min_pkt_len) { fp->tx_tso_min_pkt_len = m_head->m_pkthdr.len; fp->tx_tso_min_pkt_len = m_head->m_pkthdr.len; } else { if (fp->tx_tso_min_pkt_len > m_head->m_pkthdr.len) fp->tx_tso_min_pkt_len = m_head->m_pkthdr.len; if (fp->tx_tso_max_pkt_len < m_head->m_pkthdr.len) fp->tx_tso_max_pkt_len = m_head->m_pkthdr.len; } } } if (m_head->m_pkthdr.csum_flags & CSUM_TSO) offset = qlnx_tcp_offset(ha, m_head); if ((ret == EFBIG) || ((nsegs > QLNX_MAX_SEGMENTS_NON_TSO) && ( (!(m_head->m_pkthdr.csum_flags & CSUM_TSO)) || ((m_head->m_pkthdr.csum_flags & CSUM_TSO) && qlnx_tso_check(fp, segs, nsegs, offset))))) { struct mbuf *m; QL_DPRINT8(ha, "EFBIG [%d]\n", m_head->m_pkthdr.len); fp->tx_defrag++; m = m_defrag(m_head, M_NOWAIT); if (m == NULL) { fp->err_tx_defrag++; fp->tx_pkts_freed++; m_freem(m_head); *m_headp = NULL; QL_DPRINT1(ha, "m_defrag() = NULL [%d]\n", ret); return (ENOBUFS); } m_head = m; *m_headp = m_head; if ((ret = bus_dmamap_load_mbuf_sg(ha->tx_tag, map, m_head, segs, &nsegs, BUS_DMA_NOWAIT))) { fp->err_tx_defrag_dmamap_load++; QL_DPRINT1(ha, "bus_dmamap_load_mbuf_sg failed0 [%d, %d]\n", ret, m_head->m_pkthdr.len); fp->tx_pkts_freed++; m_freem(m_head); *m_headp = NULL; return (ret); } if ((nsegs > QLNX_MAX_SEGMENTS_NON_TSO) && !(m_head->m_pkthdr.csum_flags & CSUM_TSO)) { fp->err_tx_non_tso_max_seg++; QL_DPRINT1(ha, "(%d) nsegs too many for non-TSO [%d, %d]\n", ret, nsegs, m_head->m_pkthdr.len); fp->tx_pkts_freed++; m_freem(m_head); *m_headp = NULL; return (ret); } if (m_head->m_pkthdr.csum_flags & CSUM_TSO) offset = qlnx_tcp_offset(ha, m_head); } else if (ret) { fp->err_tx_dmamap_load++; QL_DPRINT1(ha, "bus_dmamap_load_mbuf_sg failed1 [%d, %d]\n", ret, m_head->m_pkthdr.len); fp->tx_pkts_freed++; m_freem(m_head); *m_headp = NULL; return (ret); } QL_ASSERT(ha, (nsegs != 0), ("qlnx_send: empty packet")); if (ha->dbg_trace_tso_pkt_len) { if (nsegs < QLNX_FP_MAX_SEGS) fp->tx_pkts[(nsegs - 1)]++; else fp->tx_pkts[(QLNX_FP_MAX_SEGS - 1)]++; } #ifdef QLNX_TRACE_PERF_DATA if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { if(m_head->m_pkthdr.len <= 2048) fp->tx_pkts_hist[0]++; else if((m_head->m_pkthdr.len > 2048) && (m_head->m_pkthdr.len <= 4096)) fp->tx_pkts_hist[1]++; else if((m_head->m_pkthdr.len > 4096) && (m_head->m_pkthdr.len <= 8192)) fp->tx_pkts_hist[2]++; else if((m_head->m_pkthdr.len > 8192) && (m_head->m_pkthdr.len <= 12288 )) fp->tx_pkts_hist[3]++; else if((m_head->m_pkthdr.len > 11288) && (m_head->m_pkthdr.len <= 16394)) fp->tx_pkts_hist[4]++; else if((m_head->m_pkthdr.len > 16384) && (m_head->m_pkthdr.len <= 20480)) fp->tx_pkts_hist[5]++; else if((m_head->m_pkthdr.len > 20480) && (m_head->m_pkthdr.len <= 24576)) fp->tx_pkts_hist[6]++; else if((m_head->m_pkthdr.len > 24576) && (m_head->m_pkthdr.len <= 28672)) fp->tx_pkts_hist[7]++; else if((m_head->m_pkthdr.len > 28762) && (m_head->m_pkthdr.len <= 32768)) fp->tx_pkts_hist[8]++; else if((m_head->m_pkthdr.len > 32768) && (m_head->m_pkthdr.len <= 36864)) fp->tx_pkts_hist[9]++; else if((m_head->m_pkthdr.len > 36864) && (m_head->m_pkthdr.len <= 40960)) fp->tx_pkts_hist[10]++; else if((m_head->m_pkthdr.len > 40960) && (m_head->m_pkthdr.len <= 45056)) fp->tx_pkts_hist[11]++; else if((m_head->m_pkthdr.len > 45056) && (m_head->m_pkthdr.len <= 49152)) fp->tx_pkts_hist[12]++; else if((m_head->m_pkthdr.len > 49512) && m_head->m_pkthdr.len <= 53248)) fp->tx_pkts_hist[13]++; else if((m_head->m_pkthdr.len > 53248) && (m_head->m_pkthdr.len <= 57344)) fp->tx_pkts_hist[14]++; else if((m_head->m_pkthdr.len > 53248) && (m_head->m_pkthdr.len <= 57344)) fp->tx_pkts_hist[15]++; else if((m_head->m_pkthdr.len > 57344) && (m_head->m_pkthdr.len <= 61440)) fp->tx_pkts_hist[16]++; else fp->tx_pkts_hist[17]++; } if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { elem_left = ecore_chain_get_elem_left(&txq->tx_pbl); bd_used = TX_RING_SIZE - elem_left; if(bd_used <= 100) fp->tx_pkts_q[0]++; else if((bd_used > 100) && (bd_used <= 500)) fp->tx_pkts_q[1]++; else if((bd_used > 500) && (bd_used <= 1000)) fp->tx_pkts_q[2]++; else if((bd_used > 1000) && (bd_used <= 2000)) fp->tx_pkts_q[3]++; else if((bd_used > 3000) && (bd_used <= 4000)) fp->tx_pkts_q[4]++; else if((bd_used > 4000) && (bd_used <= 5000)) fp->tx_pkts_q[5]++; else if((bd_used > 6000) && (bd_used <= 7000)) fp->tx_pkts_q[6]++; else if((bd_used > 7000) && (bd_used <= 8000)) fp->tx_pkts_q[7]++; else if((bd_used > 8000) && (bd_used <= 9000)) fp->tx_pkts_q[8]++; else if((bd_used > 9000) && (bd_used <= 10000)) fp->tx_pkts_q[9]++; else if((bd_used > 10000) && (bd_used <= 11000)) fp->tx_pkts_q[10]++; else if((bd_used > 11000) && (bd_used <= 12000)) fp->tx_pkts_q[11]++; else if((bd_used > 12000) && (bd_used <= 13000)) fp->tx_pkts_q[12]++; else if((bd_used > 13000) && (bd_used <= 14000)) fp->tx_pkts_q[13]++; else if((bd_used > 14000) && (bd_used <= 15000)) fp->tx_pkts_q[14]++; else if((bd_used > 15000) && (bd_used <= 16000)) fp->tx_pkts_q[15]++; else fp->tx_pkts_q[16]++; } #endif /* end of QLNX_TRACE_PERF_DATA */ if ((nsegs + QLNX_TX_ELEM_RESERVE) > (int)(elem_left = ecore_chain_get_elem_left(&txq->tx_pbl))) { QL_DPRINT1(ha, "(%d, 0x%x) insuffient BDs" " in chain[%d] trying to free packets\n", nsegs, elem_left, fp->rss_id); fp->tx_nsegs_gt_elem_left++; (void)qlnx_tx_int(ha, fp, txq); if ((nsegs + QLNX_TX_ELEM_RESERVE) > (int)(elem_left = ecore_chain_get_elem_left(&txq->tx_pbl))) { QL_DPRINT1(ha, "(%d, 0x%x) insuffient BDs in chain[%d]\n", nsegs, elem_left, fp->rss_id); fp->err_tx_nsegs_gt_elem_left++; fp->tx_ring_full = 1; if (ha->storm_stats_enable) ha->storm_stats_gather = 1; return (ENOBUFS); } } bus_dmamap_sync(ha->tx_tag, map, BUS_DMASYNC_PREWRITE); txq->sw_tx_ring[idx].mp = m_head; first_bd = (struct eth_tx_1st_bd *)ecore_chain_produce(&txq->tx_pbl); memset(first_bd, 0, sizeof(*first_bd)); first_bd->data.bd_flags.bitfields = 1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT; BD_SET_UNMAP_ADDR_LEN(first_bd, segs->ds_addr, segs->ds_len); nbd++; if (m_head->m_pkthdr.csum_flags & CSUM_IP) { first_bd->data.bd_flags.bitfields |= (1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT); } if (m_head->m_pkthdr.csum_flags & (CSUM_UDP | CSUM_TCP | CSUM_TCP_IPV6 | CSUM_UDP_IPV6)) { first_bd->data.bd_flags.bitfields |= (1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT); } if (m_head->m_flags & M_VLANTAG) { first_bd->data.vlan = m_head->m_pkthdr.ether_vtag; first_bd->data.bd_flags.bitfields |= (1 << ETH_TX_1ST_BD_FLAGS_VLAN_INSERTION_SHIFT); } if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { first_bd->data.bd_flags.bitfields |= (1 << ETH_TX_1ST_BD_FLAGS_LSO_SHIFT); first_bd->data.bd_flags.bitfields |= (1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT); nbds_in_hdr = 1; if (offset == segs->ds_len) { BD_SET_UNMAP_ADDR_LEN(first_bd, segs->ds_addr, offset); segs++; seg_idx++; second_bd = (struct eth_tx_2nd_bd *) ecore_chain_produce(&txq->tx_pbl); memset(second_bd, 0, sizeof(*second_bd)); nbd++; if (seg_idx < nsegs) { BD_SET_UNMAP_ADDR_LEN(second_bd, \ (segs->ds_addr), (segs->ds_len)); segs++; seg_idx++; } third_bd = (struct eth_tx_3rd_bd *) ecore_chain_produce(&txq->tx_pbl); memset(third_bd, 0, sizeof(*third_bd)); third_bd->data.lso_mss = m_head->m_pkthdr.tso_segsz; third_bd->data.bitfields |= (nbds_in_hdr<ds_addr), (segs->ds_len)); segs++; seg_idx++; } for (; seg_idx < nsegs; seg_idx++) { tx_data_bd = (struct eth_tx_bd *) ecore_chain_produce(&txq->tx_pbl); memset(tx_data_bd, 0, sizeof(*tx_data_bd)); BD_SET_UNMAP_ADDR_LEN(tx_data_bd, \ segs->ds_addr,\ segs->ds_len); segs++; nbd++; } } else if (offset < segs->ds_len) { BD_SET_UNMAP_ADDR_LEN(first_bd, segs->ds_addr, offset); second_bd = (struct eth_tx_2nd_bd *) ecore_chain_produce(&txq->tx_pbl); memset(second_bd, 0, sizeof(*second_bd)); BD_SET_UNMAP_ADDR_LEN(second_bd, \ (segs->ds_addr + offset),\ (segs->ds_len - offset)); nbd++; segs++; third_bd = (struct eth_tx_3rd_bd *) ecore_chain_produce(&txq->tx_pbl); memset(third_bd, 0, sizeof(*third_bd)); BD_SET_UNMAP_ADDR_LEN(third_bd, \ segs->ds_addr,\ segs->ds_len); third_bd->data.lso_mss = m_head->m_pkthdr.tso_segsz; third_bd->data.bitfields |= (nbds_in_hdr<tx_pbl); memset(tx_data_bd, 0, sizeof(*tx_data_bd)); BD_SET_UNMAP_ADDR_LEN(tx_data_bd, \ segs->ds_addr,\ segs->ds_len); segs++; nbd++; } } else { offset = offset - segs->ds_len; segs++; for (seg_idx = 1; seg_idx < nsegs; seg_idx++) { if (offset) nbds_in_hdr++; tx_data_bd = (struct eth_tx_bd *) ecore_chain_produce(&txq->tx_pbl); memset(tx_data_bd, 0, sizeof(*tx_data_bd)); if (second_bd == NULL) { second_bd = (struct eth_tx_2nd_bd *) tx_data_bd; } else if (third_bd == NULL) { third_bd = (struct eth_tx_3rd_bd *) tx_data_bd; } if (offset && (offset < segs->ds_len)) { BD_SET_UNMAP_ADDR_LEN(tx_data_bd,\ segs->ds_addr, offset); tx_data_bd = (struct eth_tx_bd *) ecore_chain_produce(&txq->tx_pbl); memset(tx_data_bd, 0, sizeof(*tx_data_bd)); if (second_bd == NULL) { second_bd = (struct eth_tx_2nd_bd *)tx_data_bd; } else if (third_bd == NULL) { third_bd = (struct eth_tx_3rd_bd *)tx_data_bd; } BD_SET_UNMAP_ADDR_LEN(tx_data_bd,\ (segs->ds_addr + offset), \ (segs->ds_len - offset)); nbd++; offset = 0; } else { if (offset) offset = offset - segs->ds_len; BD_SET_UNMAP_ADDR_LEN(tx_data_bd,\ segs->ds_addr, segs->ds_len); } segs++; nbd++; } if (third_bd == NULL) { third_bd = (struct eth_tx_3rd_bd *) ecore_chain_produce(&txq->tx_pbl); memset(third_bd, 0, sizeof(*third_bd)); } third_bd->data.lso_mss = m_head->m_pkthdr.tso_segsz; third_bd->data.bitfields |= (nbds_in_hdr<tx_tso_pkts++; } else { segs++; for (seg_idx = 1; seg_idx < nsegs; seg_idx++) { tx_data_bd = (struct eth_tx_bd *) ecore_chain_produce(&txq->tx_pbl); memset(tx_data_bd, 0, sizeof(*tx_data_bd)); BD_SET_UNMAP_ADDR_LEN(tx_data_bd, segs->ds_addr,\ segs->ds_len); segs++; nbd++; } first_bd->data.bitfields = (m_head->m_pkthdr.len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK) << ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT; first_bd->data.bitfields = htole16(first_bd->data.bitfields); fp->tx_non_tso_pkts++; } first_bd->data.nbds = nbd; if (ha->dbg_trace_tso_pkt_len) { if (fp->tx_tso_max_nsegs < nsegs) fp->tx_tso_max_nsegs = nsegs; if ((nsegs < fp->tx_tso_min_nsegs) || (!fp->tx_tso_min_nsegs)) fp->tx_tso_min_nsegs = nsegs; } txq->sw_tx_ring[idx].nsegs = nsegs; txq->sw_tx_prod = (txq->sw_tx_prod + 1) & (TX_RING_SIZE - 1); txq->tx_db.data.bd_prod = htole16(ecore_chain_get_prod_idx(&txq->tx_pbl)); qlnx_txq_doorbell_wr32(ha, txq->doorbell_addr, txq->tx_db.raw); QL_DPRINT8(ha, "exit\n"); return (0); } static void qlnx_stop(qlnx_host_t *ha) { struct ifnet *ifp = ha->ifp; device_t dev; int i; dev = ha->pci_dev; ifp->if_drv_flags &= ~(IFF_DRV_OACTIVE | IFF_DRV_RUNNING); /* * We simply lock and unlock each fp->tx_mtx to * propagate the if_drv_flags * state to each tx thread */ QL_DPRINT1(ha, "QLNX STATE = %d\n",ha->state); if (ha->state == QLNX_STATE_OPEN) { for (i = 0; i < ha->num_rss; i++) { struct qlnx_fastpath *fp = &ha->fp_array[i]; mtx_lock(&fp->tx_mtx); mtx_unlock(&fp->tx_mtx); if (fp->fp_taskqueue != NULL) taskqueue_enqueue(fp->fp_taskqueue, &fp->fp_task); } } qlnx_unload(ha); return; } static int qlnx_get_ifq_snd_maxlen(qlnx_host_t *ha) { return(TX_RING_SIZE - 1); } uint8_t * qlnx_get_mac_addr(qlnx_host_t *ha) { struct ecore_hwfn *p_hwfn; p_hwfn = &ha->cdev.hwfns[0]; return (p_hwfn->hw_info.hw_mac_addr); } static uint32_t qlnx_get_optics(qlnx_host_t *ha, struct qlnx_link_output *if_link) { uint32_t ifm_type = 0; switch (if_link->media_type) { case MEDIA_MODULE_FIBER: case MEDIA_UNSPECIFIED: if (if_link->speed == (100 * 1000)) ifm_type = QLNX_IFM_100G_SR4; else if (if_link->speed == (40 * 1000)) ifm_type = IFM_40G_SR4; else if (if_link->speed == (25 * 1000)) ifm_type = QLNX_IFM_25G_SR; else if (if_link->speed == (10 * 1000)) ifm_type = (IFM_10G_LR | IFM_10G_SR); else if (if_link->speed == (1 * 1000)) ifm_type = (IFM_1000_SX | IFM_1000_LX); break; case MEDIA_DA_TWINAX: if (if_link->speed == (100 * 1000)) ifm_type = QLNX_IFM_100G_CR4; else if (if_link->speed == (40 * 1000)) ifm_type = IFM_40G_CR4; else if (if_link->speed == (25 * 1000)) ifm_type = QLNX_IFM_25G_CR; else if (if_link->speed == (10 * 1000)) ifm_type = IFM_10G_TWINAX; break; default : ifm_type = IFM_UNKNOWN; break; } return (ifm_type); } /***************************************************************************** * Interrupt Service Functions *****************************************************************************/ static int qlnx_rx_jumbo_chain(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct mbuf *mp_head, uint16_t len) { struct mbuf *mp, *mpf, *mpl; struct sw_rx_data *sw_rx_data; struct qlnx_rx_queue *rxq; uint16_t len_in_buffer; rxq = fp->rxq; mpf = mpl = mp = NULL; while (len) { rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); sw_rx_data = &rxq->sw_rx_ring[rxq->sw_rx_cons]; mp = sw_rx_data->data; if (mp == NULL) { QL_DPRINT1(ha, "mp = NULL\n"); fp->err_rx_mp_null++; rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); if (mpf != NULL) m_freem(mpf); return (-1); } bus_dmamap_sync(ha->rx_tag, sw_rx_data->map, BUS_DMASYNC_POSTREAD); if (qlnx_alloc_rx_buffer(ha, rxq) != 0) { QL_DPRINT1(ha, "New buffer allocation failed, dropping" " incoming packet and reusing its buffer\n"); qlnx_reuse_rx_data(rxq); fp->err_rx_alloc_errors++; if (mpf != NULL) m_freem(mpf); return (-1); } ecore_chain_consume(&rxq->rx_bd_ring); if (len > rxq->rx_buf_size) len_in_buffer = rxq->rx_buf_size; else len_in_buffer = len; len = len - len_in_buffer; mp->m_flags &= ~M_PKTHDR; mp->m_next = NULL; mp->m_len = len_in_buffer; if (mpf == NULL) mpf = mpl = mp; else { mpl->m_next = mp; mpl = mp; } } if (mpf != NULL) mp_head->m_next = mpf; return (0); } static void qlnx_tpa_start(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct qlnx_rx_queue *rxq, struct eth_fast_path_rx_tpa_start_cqe *cqe) { uint32_t agg_index; struct ifnet *ifp = ha->ifp; struct mbuf *mp; struct mbuf *mpf = NULL, *mpl = NULL, *mpc = NULL; struct sw_rx_data *sw_rx_data; dma_addr_t addr; bus_dmamap_t map; struct eth_rx_bd *rx_bd; int i; device_t dev; #if __FreeBSD_version >= 1100000 uint8_t hash_type; #endif /* #if __FreeBSD_version >= 1100000 */ dev = ha->pci_dev; agg_index = cqe->tpa_agg_index; QL_DPRINT7(ha, "[rss_id = %d]: enter\n \ \t type = 0x%x\n \ \t bitfields = 0x%x\n \ \t seg_len = 0x%x\n \ \t pars_flags = 0x%x\n \ \t vlan_tag = 0x%x\n \ \t rss_hash = 0x%x\n \ \t len_on_first_bd = 0x%x\n \ \t placement_offset = 0x%x\n \ \t tpa_agg_index = 0x%x\n \ \t header_len = 0x%x\n \ \t ext_bd_len_list[0] = 0x%x\n \ \t ext_bd_len_list[1] = 0x%x\n \ \t ext_bd_len_list[2] = 0x%x\n \ \t ext_bd_len_list[3] = 0x%x\n \ \t ext_bd_len_list[4] = 0x%x\n", fp->rss_id, cqe->type, cqe->bitfields, cqe->seg_len, cqe->pars_flags.flags, cqe->vlan_tag, cqe->rss_hash, cqe->len_on_first_bd, cqe->placement_offset, cqe->tpa_agg_index, cqe->header_len, cqe->ext_bd_len_list[0], cqe->ext_bd_len_list[1], cqe->ext_bd_len_list[2], cqe->ext_bd_len_list[3], cqe->ext_bd_len_list[4]); if (agg_index >= ETH_TPA_MAX_AGGS_NUM) { fp->err_rx_tpa_invalid_agg_num++; return; } sw_rx_data = &rxq->sw_rx_ring[rxq->sw_rx_cons]; bus_dmamap_sync(ha->rx_tag, sw_rx_data->map, BUS_DMASYNC_POSTREAD); mp = sw_rx_data->data; QL_DPRINT7(ha, "[rss_id = %d]: mp = %p \n ", fp->rss_id, mp); if (mp == NULL) { QL_DPRINT7(ha, "[%d]: mp = NULL\n", fp->rss_id); fp->err_rx_mp_null++; rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); return; } if ((le16toh(cqe->pars_flags.flags)) & CQE_FLAGS_ERR) { QL_DPRINT7(ha, "[%d]: CQE in CONS = %u has error," " flags = %x, dropping incoming packet\n", fp->rss_id, rxq->sw_rx_cons, le16toh(cqe->pars_flags.flags)); fp->err_rx_hw_errors++; qlnx_reuse_rx_data(rxq); QLNX_INC_IERRORS(ifp); return; } if (qlnx_alloc_rx_buffer(ha, rxq) != 0) { QL_DPRINT7(ha, "[%d]: New buffer allocation failed," " dropping incoming packet and reusing its buffer\n", fp->rss_id); fp->err_rx_alloc_errors++; QLNX_INC_IQDROPS(ifp); /* * Load the tpa mbuf into the rx ring and save the * posted mbuf */ map = sw_rx_data->map; addr = sw_rx_data->dma_addr; sw_rx_data = &rxq->sw_rx_ring[rxq->sw_rx_prod]; sw_rx_data->data = rxq->tpa_info[agg_index].rx_buf.data; sw_rx_data->dma_addr = rxq->tpa_info[agg_index].rx_buf.dma_addr; sw_rx_data->map = rxq->tpa_info[agg_index].rx_buf.map; rxq->tpa_info[agg_index].rx_buf.data = mp; rxq->tpa_info[agg_index].rx_buf.dma_addr = addr; rxq->tpa_info[agg_index].rx_buf.map = map; rx_bd = (struct eth_rx_bd *) ecore_chain_produce(&rxq->rx_bd_ring); rx_bd->addr.hi = htole32(U64_HI(sw_rx_data->dma_addr)); rx_bd->addr.lo = htole32(U64_LO(sw_rx_data->dma_addr)); bus_dmamap_sync(ha->rx_tag, sw_rx_data->map, BUS_DMASYNC_PREREAD); rxq->sw_rx_prod = (rxq->sw_rx_prod + 1) & (RX_RING_SIZE - 1); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); ecore_chain_consume(&rxq->rx_bd_ring); /* Now reuse any buffers posted in ext_bd_len_list */ for (i = 0; i < ETH_TPA_CQE_START_LEN_LIST_SIZE; i++) { if (cqe->ext_bd_len_list[i] == 0) break; qlnx_reuse_rx_data(rxq); } rxq->tpa_info[agg_index].agg_state = QLNX_AGG_STATE_ERROR; return; } if (rxq->tpa_info[agg_index].agg_state != QLNX_AGG_STATE_NONE) { QL_DPRINT7(ha, "[%d]: invalid aggregation state," " dropping incoming packet and reusing its buffer\n", fp->rss_id); QLNX_INC_IQDROPS(ifp); /* if we already have mbuf head in aggregation free it */ if (rxq->tpa_info[agg_index].mpf) { m_freem(rxq->tpa_info[agg_index].mpf); rxq->tpa_info[agg_index].mpl = NULL; } rxq->tpa_info[agg_index].mpf = mp; rxq->tpa_info[agg_index].mpl = NULL; rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); ecore_chain_consume(&rxq->rx_bd_ring); /* Now reuse any buffers posted in ext_bd_len_list */ for (i = 0; i < ETH_TPA_CQE_START_LEN_LIST_SIZE; i++) { if (cqe->ext_bd_len_list[i] == 0) break; qlnx_reuse_rx_data(rxq); } rxq->tpa_info[agg_index].agg_state = QLNX_AGG_STATE_ERROR; return; } /* * first process the ext_bd_len_list * if this fails then we simply drop the packet */ ecore_chain_consume(&rxq->rx_bd_ring); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); for (i = 0; i < ETH_TPA_CQE_START_LEN_LIST_SIZE; i++) { QL_DPRINT7(ha, "[%d]: 4\n ", fp->rss_id); if (cqe->ext_bd_len_list[i] == 0) break; sw_rx_data = &rxq->sw_rx_ring[rxq->sw_rx_cons]; bus_dmamap_sync(ha->rx_tag, sw_rx_data->map, BUS_DMASYNC_POSTREAD); mpc = sw_rx_data->data; if (mpc == NULL) { QL_DPRINT7(ha, "[%d]: mpc = NULL\n", fp->rss_id); fp->err_rx_mp_null++; if (mpf != NULL) m_freem(mpf); mpf = mpl = NULL; rxq->tpa_info[agg_index].agg_state = QLNX_AGG_STATE_ERROR; ecore_chain_consume(&rxq->rx_bd_ring); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); continue; } if (qlnx_alloc_rx_buffer(ha, rxq) != 0) { QL_DPRINT7(ha, "[%d]: New buffer allocation failed," " dropping incoming packet and reusing its" " buffer\n", fp->rss_id); qlnx_reuse_rx_data(rxq); if (mpf != NULL) m_freem(mpf); mpf = mpl = NULL; rxq->tpa_info[agg_index].agg_state = QLNX_AGG_STATE_ERROR; ecore_chain_consume(&rxq->rx_bd_ring); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); continue; } mpc->m_flags &= ~M_PKTHDR; mpc->m_next = NULL; mpc->m_len = cqe->ext_bd_len_list[i]; if (mpf == NULL) { mpf = mpl = mpc; } else { mpl->m_len = ha->rx_buf_size; mpl->m_next = mpc; mpl = mpc; } ecore_chain_consume(&rxq->rx_bd_ring); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); } if (rxq->tpa_info[agg_index].agg_state != QLNX_AGG_STATE_NONE) { QL_DPRINT7(ha, "[%d]: invalid aggregation state, dropping" " incoming packet and reusing its buffer\n", fp->rss_id); QLNX_INC_IQDROPS(ifp); rxq->tpa_info[agg_index].mpf = mp; rxq->tpa_info[agg_index].mpl = NULL; return; } rxq->tpa_info[agg_index].placement_offset = cqe->placement_offset; if (mpf != NULL) { mp->m_len = ha->rx_buf_size; mp->m_next = mpf; rxq->tpa_info[agg_index].mpf = mp; rxq->tpa_info[agg_index].mpl = mpl; } else { mp->m_len = cqe->len_on_first_bd + cqe->placement_offset; rxq->tpa_info[agg_index].mpf = mp; rxq->tpa_info[agg_index].mpl = mp; mp->m_next = NULL; } mp->m_flags |= M_PKTHDR; /* assign packet to this interface interface */ mp->m_pkthdr.rcvif = ifp; /* assume no hardware checksum has complated */ mp->m_pkthdr.csum_flags = 0; //mp->m_pkthdr.flowid = fp->rss_id; mp->m_pkthdr.flowid = cqe->rss_hash; #if __FreeBSD_version >= 1100000 hash_type = cqe->bitfields & (ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE_MASK << ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE_SHIFT); switch (hash_type) { case RSS_HASH_TYPE_IPV4: M_HASHTYPE_SET(mp, M_HASHTYPE_RSS_IPV4); break; case RSS_HASH_TYPE_TCP_IPV4: M_HASHTYPE_SET(mp, M_HASHTYPE_RSS_TCP_IPV4); break; case RSS_HASH_TYPE_IPV6: M_HASHTYPE_SET(mp, M_HASHTYPE_RSS_IPV6); break; case RSS_HASH_TYPE_TCP_IPV6: M_HASHTYPE_SET(mp, M_HASHTYPE_RSS_TCP_IPV6); break; default: M_HASHTYPE_SET(mp, M_HASHTYPE_OPAQUE); break; } #else mp->m_flags |= M_FLOWID; #endif mp->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); mp->m_pkthdr.csum_data = 0xFFFF; if (CQE_HAS_VLAN(cqe->pars_flags.flags)) { mp->m_pkthdr.ether_vtag = le16toh(cqe->vlan_tag); mp->m_flags |= M_VLANTAG; } rxq->tpa_info[agg_index].agg_state = QLNX_AGG_STATE_START; QL_DPRINT7(ha, "[%d]: 5\n\tagg_state = %d\n\t mpf = %p mpl = %p\n", fp->rss_id, rxq->tpa_info[agg_index].agg_state, rxq->tpa_info[agg_index].mpf, rxq->tpa_info[agg_index].mpl); return; } static void qlnx_tpa_cont(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct qlnx_rx_queue *rxq, struct eth_fast_path_rx_tpa_cont_cqe *cqe) { struct sw_rx_data *sw_rx_data; int i; struct mbuf *mpf = NULL, *mpl = NULL, *mpc = NULL; struct mbuf *mp; uint32_t agg_index; device_t dev; dev = ha->pci_dev; QL_DPRINT7(ha, "[%d]: enter\n \ \t type = 0x%x\n \ \t tpa_agg_index = 0x%x\n \ \t len_list[0] = 0x%x\n \ \t len_list[1] = 0x%x\n \ \t len_list[2] = 0x%x\n \ \t len_list[3] = 0x%x\n \ \t len_list[4] = 0x%x\n \ \t len_list[5] = 0x%x\n", fp->rss_id, cqe->type, cqe->tpa_agg_index, cqe->len_list[0], cqe->len_list[1], cqe->len_list[2], cqe->len_list[3], cqe->len_list[4], cqe->len_list[5]); agg_index = cqe->tpa_agg_index; if (agg_index >= ETH_TPA_MAX_AGGS_NUM) { QL_DPRINT7(ha, "[%d]: 0\n ", fp->rss_id); fp->err_rx_tpa_invalid_agg_num++; return; } for (i = 0; i < ETH_TPA_CQE_CONT_LEN_LIST_SIZE; i++) { QL_DPRINT7(ha, "[%d]: 1\n ", fp->rss_id); if (cqe->len_list[i] == 0) break; if (rxq->tpa_info[agg_index].agg_state != QLNX_AGG_STATE_START) { qlnx_reuse_rx_data(rxq); continue; } sw_rx_data = &rxq->sw_rx_ring[rxq->sw_rx_cons]; bus_dmamap_sync(ha->rx_tag, sw_rx_data->map, BUS_DMASYNC_POSTREAD); mpc = sw_rx_data->data; if (mpc == NULL) { QL_DPRINT7(ha, "[%d]: mpc = NULL\n", fp->rss_id); fp->err_rx_mp_null++; if (mpf != NULL) m_freem(mpf); mpf = mpl = NULL; rxq->tpa_info[agg_index].agg_state = QLNX_AGG_STATE_ERROR; ecore_chain_consume(&rxq->rx_bd_ring); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); continue; } if (qlnx_alloc_rx_buffer(ha, rxq) != 0) { QL_DPRINT7(ha, "[%d]: New buffer allocation failed," " dropping incoming packet and reusing its" " buffer\n", fp->rss_id); qlnx_reuse_rx_data(rxq); if (mpf != NULL) m_freem(mpf); mpf = mpl = NULL; rxq->tpa_info[agg_index].agg_state = QLNX_AGG_STATE_ERROR; ecore_chain_consume(&rxq->rx_bd_ring); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); continue; } mpc->m_flags &= ~M_PKTHDR; mpc->m_next = NULL; mpc->m_len = cqe->len_list[i]; if (mpf == NULL) { mpf = mpl = mpc; } else { mpl->m_len = ha->rx_buf_size; mpl->m_next = mpc; mpl = mpc; } ecore_chain_consume(&rxq->rx_bd_ring); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); } QL_DPRINT7(ha, "[%d]: 2\n" "\tmpf = %p mpl = %p\n", fp->rss_id, mpf, mpl); if (mpf != NULL) { mp = rxq->tpa_info[agg_index].mpl; mp->m_len = ha->rx_buf_size; mp->m_next = mpf; rxq->tpa_info[agg_index].mpl = mpl; } return; } static int qlnx_tpa_end(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct qlnx_rx_queue *rxq, struct eth_fast_path_rx_tpa_end_cqe *cqe) { struct sw_rx_data *sw_rx_data; int i; struct mbuf *mpf = NULL, *mpl = NULL, *mpc = NULL; struct mbuf *mp; uint32_t agg_index; uint32_t len = 0; struct ifnet *ifp = ha->ifp; device_t dev; dev = ha->pci_dev; QL_DPRINT7(ha, "[%d]: enter\n \ \t type = 0x%x\n \ \t tpa_agg_index = 0x%x\n \ \t total_packet_len = 0x%x\n \ \t num_of_bds = 0x%x\n \ \t end_reason = 0x%x\n \ \t num_of_coalesced_segs = 0x%x\n \ \t ts_delta = 0x%x\n \ \t len_list[0] = 0x%x\n \ \t len_list[1] = 0x%x\n \ \t len_list[2] = 0x%x\n \ \t len_list[3] = 0x%x\n", fp->rss_id, cqe->type, cqe->tpa_agg_index, cqe->total_packet_len, cqe->num_of_bds, cqe->end_reason, cqe->num_of_coalesced_segs, cqe->ts_delta, cqe->len_list[0], cqe->len_list[1], cqe->len_list[2], cqe->len_list[3]); agg_index = cqe->tpa_agg_index; if (agg_index >= ETH_TPA_MAX_AGGS_NUM) { QL_DPRINT7(ha, "[%d]: 0\n ", fp->rss_id); fp->err_rx_tpa_invalid_agg_num++; return (0); } for (i = 0; i < ETH_TPA_CQE_END_LEN_LIST_SIZE; i++) { QL_DPRINT7(ha, "[%d]: 1\n ", fp->rss_id); if (cqe->len_list[i] == 0) break; if (rxq->tpa_info[agg_index].agg_state != QLNX_AGG_STATE_START) { QL_DPRINT7(ha, "[%d]: 2\n ", fp->rss_id); qlnx_reuse_rx_data(rxq); continue; } sw_rx_data = &rxq->sw_rx_ring[rxq->sw_rx_cons]; bus_dmamap_sync(ha->rx_tag, sw_rx_data->map, BUS_DMASYNC_POSTREAD); mpc = sw_rx_data->data; if (mpc == NULL) { QL_DPRINT7(ha, "[%d]: mpc = NULL\n", fp->rss_id); fp->err_rx_mp_null++; if (mpf != NULL) m_freem(mpf); mpf = mpl = NULL; rxq->tpa_info[agg_index].agg_state = QLNX_AGG_STATE_ERROR; ecore_chain_consume(&rxq->rx_bd_ring); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); continue; } if (qlnx_alloc_rx_buffer(ha, rxq) != 0) { QL_DPRINT7(ha, "[%d]: New buffer allocation failed," " dropping incoming packet and reusing its" " buffer\n", fp->rss_id); qlnx_reuse_rx_data(rxq); if (mpf != NULL) m_freem(mpf); mpf = mpl = NULL; rxq->tpa_info[agg_index].agg_state = QLNX_AGG_STATE_ERROR; ecore_chain_consume(&rxq->rx_bd_ring); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); continue; } mpc->m_flags &= ~M_PKTHDR; mpc->m_next = NULL; mpc->m_len = cqe->len_list[i]; if (mpf == NULL) { mpf = mpl = mpc; } else { mpl->m_len = ha->rx_buf_size; mpl->m_next = mpc; mpl = mpc; } ecore_chain_consume(&rxq->rx_bd_ring); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); } QL_DPRINT7(ha, "[%d]: 5\n ", fp->rss_id); if (mpf != NULL) { QL_DPRINT7(ha, "[%d]: 6\n ", fp->rss_id); mp = rxq->tpa_info[agg_index].mpl; mp->m_len = ha->rx_buf_size; mp->m_next = mpf; } if (rxq->tpa_info[agg_index].agg_state != QLNX_AGG_STATE_START) { QL_DPRINT7(ha, "[%d]: 7\n ", fp->rss_id); if (rxq->tpa_info[agg_index].mpf != NULL) m_freem(rxq->tpa_info[agg_index].mpf); rxq->tpa_info[agg_index].mpf = NULL; rxq->tpa_info[agg_index].mpl = NULL; rxq->tpa_info[agg_index].agg_state = QLNX_AGG_STATE_NONE; return (0); } mp = rxq->tpa_info[agg_index].mpf; m_adj(mp, rxq->tpa_info[agg_index].placement_offset); mp->m_pkthdr.len = cqe->total_packet_len; if (mp->m_next == NULL) mp->m_len = mp->m_pkthdr.len; else { /* compute the total packet length */ mpf = mp; while (mpf != NULL) { len += mpf->m_len; mpf = mpf->m_next; } if (cqe->total_packet_len > len) { mpl = rxq->tpa_info[agg_index].mpl; mpl->m_len += (cqe->total_packet_len - len); } } QLNX_INC_IPACKETS(ifp); QLNX_INC_IBYTES(ifp, (cqe->total_packet_len)); QL_DPRINT7(ha, "[%d]: 8 csum_data = 0x%x csum_flags = 0x%lx\n \ m_len = 0x%x m_pkthdr_len = 0x%x\n", fp->rss_id, mp->m_pkthdr.csum_data, mp->m_pkthdr.csum_flags, mp->m_len, mp->m_pkthdr.len); (*ifp->if_input)(ifp, mp); rxq->tpa_info[agg_index].mpf = NULL; rxq->tpa_info[agg_index].mpl = NULL; rxq->tpa_info[agg_index].agg_state = QLNX_AGG_STATE_NONE; return (cqe->num_of_coalesced_segs); } static int qlnx_rx_int(qlnx_host_t *ha, struct qlnx_fastpath *fp, int budget, int lro_enable) { uint16_t hw_comp_cons, sw_comp_cons; int rx_pkt = 0; struct qlnx_rx_queue *rxq = fp->rxq; struct ifnet *ifp = ha->ifp; struct ecore_dev *cdev = &ha->cdev; struct ecore_hwfn *p_hwfn; #ifdef QLNX_SOFT_LRO struct lro_ctrl *lro; lro = &rxq->lro; #endif /* #ifdef QLNX_SOFT_LRO */ hw_comp_cons = le16toh(*rxq->hw_cons_ptr); sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring); p_hwfn = &ha->cdev.hwfns[(fp->rss_id % cdev->num_hwfns)]; /* Memory barrier to prevent the CPU from doing speculative reads of CQE * / BD in the while-loop before reading hw_comp_cons. If the CQE is * read before it is written by FW, then FW writes CQE and SB, and then * the CPU reads the hw_comp_cons, it will use an old CQE. */ /* Loop to complete all indicated BDs */ while (sw_comp_cons != hw_comp_cons) { union eth_rx_cqe *cqe; struct eth_fast_path_rx_reg_cqe *fp_cqe; struct sw_rx_data *sw_rx_data; register struct mbuf *mp; enum eth_rx_cqe_type cqe_type; uint16_t len, pad, len_on_first_bd; uint8_t *data; #if __FreeBSD_version >= 1100000 uint8_t hash_type; #endif /* #if __FreeBSD_version >= 1100000 */ /* Get the CQE from the completion ring */ cqe = (union eth_rx_cqe *) ecore_chain_consume(&rxq->rx_comp_ring); cqe_type = cqe->fast_path_regular.type; if (cqe_type == ETH_RX_CQE_TYPE_SLOW_PATH) { QL_DPRINT3(ha, "Got a slowath CQE\n"); ecore_eth_cqe_completion(p_hwfn, (struct eth_slow_path_rx_cqe *)cqe); goto next_cqe; } if (cqe_type != ETH_RX_CQE_TYPE_REGULAR) { switch (cqe_type) { case ETH_RX_CQE_TYPE_TPA_START: qlnx_tpa_start(ha, fp, rxq, &cqe->fast_path_tpa_start); fp->tpa_start++; break; case ETH_RX_CQE_TYPE_TPA_CONT: qlnx_tpa_cont(ha, fp, rxq, &cqe->fast_path_tpa_cont); fp->tpa_cont++; break; case ETH_RX_CQE_TYPE_TPA_END: rx_pkt += qlnx_tpa_end(ha, fp, rxq, &cqe->fast_path_tpa_end); fp->tpa_end++; break; default: break; } goto next_cqe; } /* Get the data from the SW ring */ sw_rx_data = &rxq->sw_rx_ring[rxq->sw_rx_cons]; mp = sw_rx_data->data; if (mp == NULL) { QL_DPRINT1(ha, "mp = NULL\n"); fp->err_rx_mp_null++; rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); goto next_cqe; } bus_dmamap_sync(ha->rx_tag, sw_rx_data->map, BUS_DMASYNC_POSTREAD); /* non GRO */ fp_cqe = &cqe->fast_path_regular;/* MK CR TPA check assembly */ len = le16toh(fp_cqe->pkt_len); pad = fp_cqe->placement_offset; QL_DPRINT3(ha, "CQE type = %x, flags = %x, vlan = %x," " len %u, parsing flags = %d pad = %d\n", cqe_type, fp_cqe->bitfields, le16toh(fp_cqe->vlan_tag), len, le16toh(fp_cqe->pars_flags.flags), pad); data = mtod(mp, uint8_t *); data = data + pad; if (0) qlnx_dump_buf8(ha, __func__, data, len); /* For every Rx BD consumed, we allocate a new BD so the BD ring * is always with a fixed size. If allocation fails, we take the * consumed BD and return it to the ring in the PROD position. * The packet that was received on that BD will be dropped (and * not passed to the upper stack). */ /* If this is an error packet then drop it */ if ((le16toh(cqe->fast_path_regular.pars_flags.flags)) & CQE_FLAGS_ERR) { QL_DPRINT1(ha, "CQE in CONS = %u has error, flags = %x," " dropping incoming packet\n", sw_comp_cons, le16toh(cqe->fast_path_regular.pars_flags.flags)); fp->err_rx_hw_errors++; qlnx_reuse_rx_data(rxq); QLNX_INC_IERRORS(ifp); goto next_cqe; } if (qlnx_alloc_rx_buffer(ha, rxq) != 0) { QL_DPRINT1(ha, "New buffer allocation failed, dropping" " incoming packet and reusing its buffer\n"); qlnx_reuse_rx_data(rxq); fp->err_rx_alloc_errors++; QLNX_INC_IQDROPS(ifp); goto next_cqe; } ecore_chain_consume(&rxq->rx_bd_ring); len_on_first_bd = fp_cqe->len_on_first_bd; m_adj(mp, pad); mp->m_pkthdr.len = len; QL_DPRINT1(ha, "len = %d len_on_first_bd = %d\n", len, len_on_first_bd); if ((len > 60 ) && (len > len_on_first_bd)) { mp->m_len = len_on_first_bd; if (qlnx_rx_jumbo_chain(ha, fp, mp, (len - len_on_first_bd)) != 0) { m_freem(mp); QLNX_INC_IQDROPS(ifp); goto next_cqe; } } else if (len_on_first_bd < len) { fp->err_rx_jumbo_chain_pkts++; } else { mp->m_len = len; } mp->m_flags |= M_PKTHDR; /* assign packet to this interface interface */ mp->m_pkthdr.rcvif = ifp; /* assume no hardware checksum has complated */ mp->m_pkthdr.csum_flags = 0; mp->m_pkthdr.flowid = fp_cqe->rss_hash; #if __FreeBSD_version >= 1100000 hash_type = fp_cqe->bitfields & (ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE_MASK << ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE_SHIFT); switch (hash_type) { case RSS_HASH_TYPE_IPV4: M_HASHTYPE_SET(mp, M_HASHTYPE_RSS_IPV4); break; case RSS_HASH_TYPE_TCP_IPV4: M_HASHTYPE_SET(mp, M_HASHTYPE_RSS_TCP_IPV4); break; case RSS_HASH_TYPE_IPV6: M_HASHTYPE_SET(mp, M_HASHTYPE_RSS_IPV6); break; case RSS_HASH_TYPE_TCP_IPV6: M_HASHTYPE_SET(mp, M_HASHTYPE_RSS_TCP_IPV6); break; default: M_HASHTYPE_SET(mp, M_HASHTYPE_OPAQUE); break; } #else mp->m_flags |= M_FLOWID; #endif if (CQE_L3_PACKET(fp_cqe->pars_flags.flags)) { mp->m_pkthdr.csum_flags |= CSUM_IP_CHECKED; } if (!(CQE_IP_HDR_ERR(fp_cqe->pars_flags.flags))) { mp->m_pkthdr.csum_flags |= CSUM_IP_VALID; } if (CQE_L4_HAS_CSUM(fp_cqe->pars_flags.flags)) { mp->m_pkthdr.csum_data = 0xFFFF; mp->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); } if (CQE_HAS_VLAN(fp_cqe->pars_flags.flags)) { mp->m_pkthdr.ether_vtag = le16toh(fp_cqe->vlan_tag); mp->m_flags |= M_VLANTAG; } QLNX_INC_IPACKETS(ifp); QLNX_INC_IBYTES(ifp, len); #ifdef QLNX_SOFT_LRO if (lro_enable) { #if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO) tcp_lro_queue_mbuf(lro, mp); #else if (tcp_lro_rx(lro, mp, 0)) (*ifp->if_input)(ifp, mp); #endif /* #if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO) */ } else { (*ifp->if_input)(ifp, mp); } #else (*ifp->if_input)(ifp, mp); #endif /* #ifdef QLNX_SOFT_LRO */ rx_pkt++; rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); next_cqe: /* don't consume bd rx buffer */ ecore_chain_recycle_consumed(&rxq->rx_comp_ring); sw_comp_cons = ecore_chain_get_cons_idx(&rxq->rx_comp_ring); /* CR TPA - revisit how to handle budget in TPA perhaps increase on "end" */ if (rx_pkt == budget) break; } /* repeat while sw_comp_cons != hw_comp_cons... */ /* Update producers */ qlnx_update_rx_prod(p_hwfn, rxq); return rx_pkt; } /* * fast path interrupt */ static void qlnx_fp_isr(void *arg) { qlnx_ivec_t *ivec = arg; qlnx_host_t *ha; struct qlnx_fastpath *fp = NULL; int idx; ha = ivec->ha; if (ha->state != QLNX_STATE_OPEN) { return; } idx = ivec->rss_idx; if ((idx = ivec->rss_idx) >= ha->num_rss) { QL_DPRINT1(ha, "illegal interrupt[%d]\n", idx); ha->err_illegal_intr++; return; } fp = &ha->fp_array[idx]; if (fp == NULL) { ha->err_fp_null++; } else { #ifdef QLNX_RCV_IN_TASKQ ecore_sb_ack(fp->sb_info, IGU_INT_DISABLE, 0); if (fp->fp_taskqueue != NULL) taskqueue_enqueue(fp->fp_taskqueue, &fp->fp_task); #else int rx_int = 0, total_rx_count = 0; int lro_enable, tc; struct qlnx_tx_queue *txq; uint16_t elem_left; lro_enable = ha->ifp->if_capenable & IFCAP_LRO; ecore_sb_ack(fp->sb_info, IGU_INT_DISABLE, 0); do { for (tc = 0; tc < ha->num_tc; tc++) { txq = fp->txq[tc]; if((int)(elem_left = ecore_chain_get_elem_left(&txq->tx_pbl)) < QLNX_TX_ELEM_THRESH) { if (mtx_trylock(&fp->tx_mtx)) { #ifdef QLNX_TRACE_PERF_DATA tx_compl = fp->tx_pkts_completed; #endif qlnx_tx_int(ha, fp, fp->txq[tc]); #ifdef QLNX_TRACE_PERF_DATA fp->tx_pkts_compl_intr += (fp->tx_pkts_completed - tx_compl); if ((fp->tx_pkts_completed - tx_compl) <= 32) fp->tx_comInt[0]++; else if (((fp->tx_pkts_completed - tx_compl) > 32) && ((fp->tx_pkts_completed - tx_compl) <= 64)) fp->tx_comInt[1]++; else if(((fp->tx_pkts_completed - tx_compl) > 64) && ((fp->tx_pkts_completed - tx_compl) <= 128)) fp->tx_comInt[2]++; else if(((fp->tx_pkts_completed - tx_compl) > 128)) fp->tx_comInt[3]++; #endif mtx_unlock(&fp->tx_mtx); } } } rx_int = qlnx_rx_int(ha, fp, ha->rx_pkt_threshold, lro_enable); if (rx_int) { fp->rx_pkts += rx_int; total_rx_count += rx_int; } } while (rx_int); #ifdef QLNX_SOFT_LRO { struct lro_ctrl *lro; lro = &fp->rxq->lro; if (lro_enable && total_rx_count) { #if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO) #ifdef QLNX_TRACE_LRO_CNT if (lro->lro_mbuf_count & ~1023) fp->lro_cnt_1024++; else if (lro->lro_mbuf_count & ~511) fp->lro_cnt_512++; else if (lro->lro_mbuf_count & ~255) fp->lro_cnt_256++; else if (lro->lro_mbuf_count & ~127) fp->lro_cnt_128++; else if (lro->lro_mbuf_count & ~63) fp->lro_cnt_64++; #endif /* #ifdef QLNX_TRACE_LRO_CNT */ tcp_lro_flush_all(lro); #else struct lro_entry *queued; while ((!SLIST_EMPTY(&lro->lro_active))) { queued = SLIST_FIRST(&lro->lro_active); SLIST_REMOVE_HEAD(&lro->lro_active, \ next); tcp_lro_flush(lro, queued); } #endif /* #if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO) */ } } #endif /* #ifdef QLNX_SOFT_LRO */ ecore_sb_update_sb_idx(fp->sb_info); rmb(); ecore_sb_ack(fp->sb_info, IGU_INT_ENABLE, 1); #endif /* #ifdef QLNX_RCV_IN_TASKQ */ } return; } /* * slow path interrupt processing function * can be invoked in polled mode or in interrupt mode via taskqueue. */ void qlnx_sp_isr(void *arg) { struct ecore_hwfn *p_hwfn; qlnx_host_t *ha; p_hwfn = arg; ha = (qlnx_host_t *)p_hwfn->p_dev; ha->sp_interrupts++; QL_DPRINT2(ha, "enter\n"); ecore_int_sp_dpc(p_hwfn); QL_DPRINT2(ha, "exit\n"); return; } /***************************************************************************** * Support Functions for DMA'able Memory *****************************************************************************/ static void qlnx_dmamap_callback(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { *((bus_addr_t *)arg) = 0; if (error) { printf("%s: bus_dmamap_load failed (%d)\n", __func__, error); return; } *((bus_addr_t *)arg) = segs[0].ds_addr; return; } static int qlnx_alloc_dmabuf(qlnx_host_t *ha, qlnx_dma_t *dma_buf) { int ret = 0; device_t dev; bus_addr_t b_addr; dev = ha->pci_dev; ret = bus_dma_tag_create( ha->parent_tag,/* parent */ dma_buf->alignment, ((bus_size_t)(1ULL << 32)),/* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ dma_buf->size, /* maxsize */ 1, /* nsegments */ dma_buf->size, /* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &dma_buf->dma_tag); if (ret) { QL_DPRINT1(ha, "could not create dma tag\n"); goto qlnx_alloc_dmabuf_exit; } ret = bus_dmamem_alloc(dma_buf->dma_tag, (void **)&dma_buf->dma_b, (BUS_DMA_ZERO | BUS_DMA_COHERENT | BUS_DMA_NOWAIT), &dma_buf->dma_map); if (ret) { bus_dma_tag_destroy(dma_buf->dma_tag); QL_DPRINT1(ha, "bus_dmamem_alloc failed\n"); goto qlnx_alloc_dmabuf_exit; } ret = bus_dmamap_load(dma_buf->dma_tag, dma_buf->dma_map, dma_buf->dma_b, dma_buf->size, qlnx_dmamap_callback, &b_addr, BUS_DMA_NOWAIT); if (ret || !b_addr) { bus_dma_tag_destroy(dma_buf->dma_tag); bus_dmamem_free(dma_buf->dma_tag, dma_buf->dma_b, dma_buf->dma_map); ret = -1; goto qlnx_alloc_dmabuf_exit; } dma_buf->dma_addr = b_addr; qlnx_alloc_dmabuf_exit: return ret; } static void qlnx_free_dmabuf(qlnx_host_t *ha, qlnx_dma_t *dma_buf) { bus_dmamap_unload(dma_buf->dma_tag, dma_buf->dma_map); bus_dmamem_free(dma_buf->dma_tag, dma_buf->dma_b, dma_buf->dma_map); bus_dma_tag_destroy(dma_buf->dma_tag); return; } void * qlnx_dma_alloc_coherent(void *ecore_dev, bus_addr_t *phys, uint32_t size) { qlnx_dma_t dma_buf; qlnx_dma_t *dma_p; qlnx_host_t *ha; device_t dev; ha = (qlnx_host_t *)ecore_dev; dev = ha->pci_dev; size = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1); memset(&dma_buf, 0, sizeof (qlnx_dma_t)); dma_buf.size = size + PAGE_SIZE; dma_buf.alignment = 8; if (qlnx_alloc_dmabuf((qlnx_host_t *)ecore_dev, &dma_buf) != 0) return (NULL); bzero((uint8_t *)dma_buf.dma_b, dma_buf.size); *phys = dma_buf.dma_addr; dma_p = (qlnx_dma_t *)((uint8_t *)dma_buf.dma_b + size); memcpy(dma_p, &dma_buf, sizeof(qlnx_dma_t)); /* QL_DPRINT5(ha, "[%p %p %p %p 0x%08x ]\n", (void *)dma_buf.dma_map, (void *)dma_buf.dma_tag, dma_buf.dma_b, (void *)dma_buf.dma_addr, size); */ return (dma_buf.dma_b); } void qlnx_dma_free_coherent(void *ecore_dev, void *v_addr, bus_addr_t phys, uint32_t size) { qlnx_dma_t dma_buf, *dma_p; qlnx_host_t *ha; device_t dev; ha = (qlnx_host_t *)ecore_dev; dev = ha->pci_dev; if (v_addr == NULL) return; size = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1); dma_p = (qlnx_dma_t *)((uint8_t *)v_addr + size); /* QL_DPRINT5(ha, "[%p %p %p %p 0x%08x ]\n", (void *)dma_p->dma_map, (void *)dma_p->dma_tag, dma_p->dma_b, (void *)dma_p->dma_addr, size); */ dma_buf = *dma_p; qlnx_free_dmabuf((qlnx_host_t *)ecore_dev, &dma_buf); return; } static int qlnx_alloc_parent_dma_tag(qlnx_host_t *ha) { int ret; device_t dev; dev = ha->pci_dev; /* * Allocate parent DMA Tag */ ret = bus_dma_tag_create( bus_get_dma_tag(dev), /* parent */ 1,((bus_size_t)(1ULL << 32)),/* alignment, boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ BUS_SPACE_MAXSIZE_32BIT,/* maxsize */ 0, /* nsegments */ BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */ 0, /* flags */ NULL, NULL, /* lockfunc, lockarg */ &ha->parent_tag); if (ret) { QL_DPRINT1(ha, "could not create parent dma tag\n"); return (-1); } ha->flags.parent_tag = 1; return (0); } static void qlnx_free_parent_dma_tag(qlnx_host_t *ha) { if (ha->parent_tag != NULL) { bus_dma_tag_destroy(ha->parent_tag); ha->parent_tag = NULL; } return; } static int qlnx_alloc_tx_dma_tag(qlnx_host_t *ha) { if (bus_dma_tag_create(NULL, /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ QLNX_MAX_TSO_FRAME_SIZE, /* maxsize */ QLNX_MAX_SEGMENTS, /* nsegments */ QLNX_MAX_TX_MBUF_SIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &ha->tx_tag)) { QL_DPRINT1(ha, "tx_tag alloc failed\n"); return (-1); } return (0); } static void qlnx_free_tx_dma_tag(qlnx_host_t *ha) { if (ha->tx_tag != NULL) { bus_dma_tag_destroy(ha->tx_tag); ha->tx_tag = NULL; } return; } static int qlnx_alloc_rx_dma_tag(qlnx_host_t *ha) { if (bus_dma_tag_create(NULL, /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ MJUM9BYTES, /* maxsize */ 1, /* nsegments */ MJUM9BYTES, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &ha->rx_tag)) { QL_DPRINT1(ha, " rx_tag alloc failed\n"); return (-1); } return (0); } static void qlnx_free_rx_dma_tag(qlnx_host_t *ha) { if (ha->rx_tag != NULL) { bus_dma_tag_destroy(ha->rx_tag); ha->rx_tag = NULL; } return; } /********************************* * Exported functions *********************************/ uint32_t qlnx_pci_bus_get_bar_size(void *ecore_dev, uint8_t bar_id) { uint32_t bar_size; bar_id = bar_id * 2; bar_size = bus_get_resource_count(((qlnx_host_t *)ecore_dev)->pci_dev, SYS_RES_MEMORY, PCIR_BAR(bar_id)); return (bar_size); } uint32_t qlnx_pci_read_config_byte(void *ecore_dev, uint32_t pci_reg, uint8_t *reg_value) { *reg_value = pci_read_config(((qlnx_host_t *)ecore_dev)->pci_dev, pci_reg, 1); return 0; } uint32_t qlnx_pci_read_config_word(void *ecore_dev, uint32_t pci_reg, uint16_t *reg_value) { *reg_value = pci_read_config(((qlnx_host_t *)ecore_dev)->pci_dev, pci_reg, 2); return 0; } uint32_t qlnx_pci_read_config_dword(void *ecore_dev, uint32_t pci_reg, uint32_t *reg_value) { *reg_value = pci_read_config(((qlnx_host_t *)ecore_dev)->pci_dev, pci_reg, 4); return 0; } void qlnx_pci_write_config_byte(void *ecore_dev, uint32_t pci_reg, uint8_t reg_value) { pci_write_config(((qlnx_host_t *)ecore_dev)->pci_dev, pci_reg, reg_value, 1); return; } void qlnx_pci_write_config_word(void *ecore_dev, uint32_t pci_reg, uint16_t reg_value) { pci_write_config(((qlnx_host_t *)ecore_dev)->pci_dev, pci_reg, reg_value, 2); return; } void qlnx_pci_write_config_dword(void *ecore_dev, uint32_t pci_reg, uint32_t reg_value) { pci_write_config(((qlnx_host_t *)ecore_dev)->pci_dev, pci_reg, reg_value, 4); return; } int qlnx_pci_find_capability(void *ecore_dev, int cap) { int reg; qlnx_host_t *ha; ha = ecore_dev; if (pci_find_cap(ha->pci_dev, PCIY_EXPRESS, ®) == 0) return reg; else { QL_DPRINT1(ha, "failed\n"); return 0; } } uint32_t qlnx_reg_rd32(void *hwfn, uint32_t reg_addr) { uint32_t data32; struct ecore_dev *cdev; struct ecore_hwfn *p_hwfn; p_hwfn = hwfn; cdev = p_hwfn->p_dev; reg_addr = (uint32_t)((uint8_t *)(p_hwfn->regview) - (uint8_t *)(cdev->regview)) + reg_addr; data32 = bus_read_4(((qlnx_host_t *)cdev)->pci_reg, reg_addr); return (data32); } void qlnx_reg_wr32(void *hwfn, uint32_t reg_addr, uint32_t value) { struct ecore_dev *cdev; struct ecore_hwfn *p_hwfn; p_hwfn = hwfn; cdev = p_hwfn->p_dev; reg_addr = (uint32_t)((uint8_t *)(p_hwfn->regview) - (uint8_t *)(cdev->regview)) + reg_addr; bus_write_4(((qlnx_host_t *)cdev)->pci_reg, reg_addr, value); return; } void qlnx_reg_wr16(void *hwfn, uint32_t reg_addr, uint16_t value) { struct ecore_dev *cdev; struct ecore_hwfn *p_hwfn; p_hwfn = hwfn; cdev = p_hwfn->p_dev; reg_addr = (uint32_t)((uint8_t *)(p_hwfn->regview) - (uint8_t *)(cdev->regview)) + reg_addr; bus_write_2(((qlnx_host_t *)cdev)->pci_reg, reg_addr, value); return; } void qlnx_dbell_wr32(void *hwfn, uint32_t reg_addr, uint32_t value) { struct ecore_dev *cdev; struct ecore_hwfn *p_hwfn; p_hwfn = hwfn; cdev = p_hwfn->p_dev; reg_addr = (uint32_t)((uint8_t *)(p_hwfn->doorbells) - (uint8_t *)(cdev->doorbells)) + reg_addr; bus_write_4(((qlnx_host_t *)cdev)->pci_dbells, reg_addr, value); return; } uint32_t qlnx_direct_reg_rd32(void *p_hwfn, uint32_t *reg_addr) { uint32_t data32; uint32_t offset; struct ecore_dev *cdev; cdev = ((struct ecore_hwfn *)p_hwfn)->p_dev; offset = (uint32_t)((uint8_t *)reg_addr - (uint8_t *)(cdev->regview)); data32 = bus_read_4(((qlnx_host_t *)cdev)->pci_reg, offset); return (data32); } void qlnx_direct_reg_wr32(void *p_hwfn, void *reg_addr, uint32_t value) { uint32_t offset; struct ecore_dev *cdev; cdev = ((struct ecore_hwfn *)p_hwfn)->p_dev; offset = (uint32_t)((uint8_t *)reg_addr - (uint8_t *)(cdev->regview)); bus_write_4(((qlnx_host_t *)cdev)->pci_reg, offset, value); return; } void qlnx_direct_reg_wr64(void *p_hwfn, void *reg_addr, uint64_t value) { uint32_t offset; struct ecore_dev *cdev; cdev = ((struct ecore_hwfn *)p_hwfn)->p_dev; offset = (uint32_t)((uint8_t *)reg_addr - (uint8_t *)(cdev->regview)); bus_write_8(((qlnx_host_t *)cdev)->pci_reg, offset, value); return; } void * qlnx_zalloc(uint32_t size) { caddr_t va; va = malloc((unsigned long)size, M_QLNXBUF, M_NOWAIT); bzero(va, size); return ((void *)va); } void qlnx_barrier(void *p_hwfn) { qlnx_host_t *ha; ha = (qlnx_host_t *)((struct ecore_hwfn *)p_hwfn)->p_dev; bus_barrier(ha->pci_reg, 0, 0, BUS_SPACE_BARRIER_WRITE); } void qlnx_link_update(void *p_hwfn) { qlnx_host_t *ha; int prev_link_state; ha = (qlnx_host_t *)((struct ecore_hwfn *)p_hwfn)->p_dev; qlnx_fill_link(p_hwfn, &ha->if_link); prev_link_state = ha->link_up; ha->link_up = ha->if_link.link_up; if (prev_link_state != ha->link_up) { if (ha->link_up) { if_link_state_change(ha->ifp, LINK_STATE_UP); } else { if_link_state_change(ha->ifp, LINK_STATE_DOWN); } } return; } void qlnx_fill_link(struct ecore_hwfn *hwfn, struct qlnx_link_output *if_link) { struct ecore_mcp_link_params link_params; struct ecore_mcp_link_state link_state; memset(if_link, 0, sizeof(*if_link)); memset(&link_params, 0, sizeof(struct ecore_mcp_link_params)); memset(&link_state, 0, sizeof(struct ecore_mcp_link_state)); /* Prepare source inputs */ /* we only deal with physical functions */ memcpy(&link_params, ecore_mcp_get_link_params(hwfn), sizeof(link_params)); memcpy(&link_state, ecore_mcp_get_link_state(hwfn), sizeof(link_state)); ecore_mcp_get_media_type(hwfn->p_dev, &if_link->media_type); /* Set the link parameters to pass to protocol driver */ if (link_state.link_up) { if_link->link_up = true; if_link->speed = link_state.speed; } if_link->supported_caps = QLNX_LINK_CAP_FIBRE; if (link_params.speed.autoneg) if_link->supported_caps |= QLNX_LINK_CAP_Autoneg; if (link_params.pause.autoneg || (link_params.pause.forced_rx && link_params.pause.forced_tx)) if_link->supported_caps |= QLNX_LINK_CAP_Asym_Pause; if (link_params.pause.autoneg || link_params.pause.forced_rx || link_params.pause.forced_tx) if_link->supported_caps |= QLNX_LINK_CAP_Pause; if (link_params.speed.advertised_speeds & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) if_link->supported_caps |= QLNX_LINK_CAP_1000baseT_Half | QLNX_LINK_CAP_1000baseT_Full; if (link_params.speed.advertised_speeds & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G) if_link->supported_caps |= QLNX_LINK_CAP_10000baseKR_Full; if (link_params.speed.advertised_speeds & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G) if_link->supported_caps |= QLNX_LINK_CAP_25000baseKR_Full; if (link_params.speed.advertised_speeds & NVM_CFG1_PORT_DRV_LINK_SPEED_40G) if_link->supported_caps |= QLNX_LINK_CAP_40000baseLR4_Full; if (link_params.speed.advertised_speeds & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G) if_link->supported_caps |= QLNX_LINK_CAP_50000baseKR2_Full; if (link_params.speed.advertised_speeds & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G) if_link->supported_caps |= QLNX_LINK_CAP_100000baseKR4_Full; if_link->advertised_caps = if_link->supported_caps; if_link->autoneg = link_params.speed.autoneg; if_link->duplex = QLNX_LINK_DUPLEX; /* Link partner capabilities */ if (link_state.partner_adv_speed & ECORE_LINK_PARTNER_SPEED_1G_HD) if_link->link_partner_caps |= QLNX_LINK_CAP_1000baseT_Half; if (link_state.partner_adv_speed & ECORE_LINK_PARTNER_SPEED_1G_FD) if_link->link_partner_caps |= QLNX_LINK_CAP_1000baseT_Full; if (link_state.partner_adv_speed & ECORE_LINK_PARTNER_SPEED_10G) if_link->link_partner_caps |= QLNX_LINK_CAP_10000baseKR_Full; if (link_state.partner_adv_speed & ECORE_LINK_PARTNER_SPEED_25G) if_link->link_partner_caps |= QLNX_LINK_CAP_25000baseKR_Full; if (link_state.partner_adv_speed & ECORE_LINK_PARTNER_SPEED_40G) if_link->link_partner_caps |= QLNX_LINK_CAP_40000baseLR4_Full; if (link_state.partner_adv_speed & ECORE_LINK_PARTNER_SPEED_50G) if_link->link_partner_caps |= QLNX_LINK_CAP_50000baseKR2_Full; if (link_state.partner_adv_speed & ECORE_LINK_PARTNER_SPEED_100G) if_link->link_partner_caps |= QLNX_LINK_CAP_100000baseKR4_Full; if (link_state.an_complete) if_link->link_partner_caps |= QLNX_LINK_CAP_Autoneg; if (link_state.partner_adv_pause) if_link->link_partner_caps |= QLNX_LINK_CAP_Pause; if ((link_state.partner_adv_pause == ECORE_LINK_PARTNER_ASYMMETRIC_PAUSE) || (link_state.partner_adv_pause == ECORE_LINK_PARTNER_BOTH_PAUSE)) if_link->link_partner_caps |= QLNX_LINK_CAP_Asym_Pause; return; } static int qlnx_nic_setup(struct ecore_dev *cdev, struct ecore_pf_params *func_params) { int rc, i; for (i = 0; i < cdev->num_hwfns; i++) { struct ecore_hwfn *p_hwfn = &cdev->hwfns[i]; p_hwfn->pf_params = *func_params; } rc = ecore_resc_alloc(cdev); if (rc) goto qlnx_nic_setup_exit; ecore_resc_setup(cdev); qlnx_nic_setup_exit: return rc; } static int qlnx_nic_start(struct ecore_dev *cdev) { int rc; struct ecore_hw_init_params params; bzero(¶ms, sizeof (struct ecore_hw_init_params)); params.p_tunn = NULL; params.b_hw_start = true; params.int_mode = cdev->int_mode; params.allow_npar_tx_switch = true; params.bin_fw_data = NULL; rc = ecore_hw_init(cdev, ¶ms); if (rc) { ecore_resc_free(cdev); return rc; } return 0; } static int qlnx_slowpath_start(qlnx_host_t *ha) { struct ecore_dev *cdev; struct ecore_pf_params pf_params; int rc; memset(&pf_params, 0, sizeof(struct ecore_pf_params)); pf_params.eth_pf_params.num_cons = (ha->num_rss) * (ha->num_tc + 1); cdev = &ha->cdev; rc = qlnx_nic_setup(cdev, &pf_params); if (rc) goto qlnx_slowpath_start_exit; cdev->int_mode = ECORE_INT_MODE_MSIX; cdev->int_coalescing_mode = ECORE_COAL_MODE_ENABLE; #ifdef QLNX_MAX_COALESCE cdev->rx_coalesce_usecs = 255; cdev->tx_coalesce_usecs = 255; #endif rc = qlnx_nic_start(cdev); ha->rx_coalesce_usecs = cdev->rx_coalesce_usecs; ha->tx_coalesce_usecs = cdev->tx_coalesce_usecs; qlnx_slowpath_start_exit: return (rc); } static int qlnx_slowpath_stop(qlnx_host_t *ha) { struct ecore_dev *cdev; device_t dev = ha->pci_dev; int i; cdev = &ha->cdev; ecore_hw_stop(cdev); for (i = 0; i < ha->cdev.num_hwfns; i++) { if (ha->sp_handle[i]) (void)bus_teardown_intr(dev, ha->sp_irq[i], ha->sp_handle[i]); ha->sp_handle[i] = NULL; if (ha->sp_irq[i]) (void) bus_release_resource(dev, SYS_RES_IRQ, ha->sp_irq_rid[i], ha->sp_irq[i]); ha->sp_irq[i] = NULL; } ecore_resc_free(cdev); return 0; } static void qlnx_set_id(struct ecore_dev *cdev, char name[NAME_SIZE], char ver_str[VER_SIZE]) { int i; memcpy(cdev->name, name, NAME_SIZE); for_each_hwfn(cdev, i) { snprintf(cdev->hwfns[i].name, NAME_SIZE, "%s-%d", name, i); } cdev->drv_type = DRV_ID_DRV_TYPE_FREEBSD; return ; } void qlnx_get_protocol_stats(void *cdev, int proto_type, void *proto_stats) { enum ecore_mcp_protocol_type type; union ecore_mcp_protocol_stats *stats; struct ecore_eth_stats eth_stats; qlnx_host_t *ha; ha = cdev; stats = proto_stats; type = proto_type; switch (type) { case ECORE_MCP_LAN_STATS: ecore_get_vport_stats((struct ecore_dev *)cdev, ð_stats); stats->lan_stats.ucast_rx_pkts = eth_stats.common.rx_ucast_pkts; stats->lan_stats.ucast_tx_pkts = eth_stats.common.tx_ucast_pkts; stats->lan_stats.fcs_err = -1; break; default: ha->err_get_proto_invalid_type++; QL_DPRINT1(ha, "invalid protocol type 0x%x\n", type); break; } return; } static int qlnx_get_mfw_version(qlnx_host_t *ha, uint32_t *mfw_ver) { struct ecore_hwfn *p_hwfn; struct ecore_ptt *p_ptt; p_hwfn = &ha->cdev.hwfns[0]; p_ptt = ecore_ptt_acquire(p_hwfn); if (p_ptt == NULL) { QL_DPRINT1(ha, "ecore_ptt_acquire failed\n"); return (-1); } ecore_mcp_get_mfw_ver(p_hwfn, p_ptt, mfw_ver, NULL); ecore_ptt_release(p_hwfn, p_ptt); return (0); } static int qlnx_get_flash_size(qlnx_host_t *ha, uint32_t *flash_size) { struct ecore_hwfn *p_hwfn; struct ecore_ptt *p_ptt; p_hwfn = &ha->cdev.hwfns[0]; p_ptt = ecore_ptt_acquire(p_hwfn); if (p_ptt == NULL) { QL_DPRINT1(ha,"ecore_ptt_acquire failed\n"); return (-1); } ecore_mcp_get_flash_size(p_hwfn, p_ptt, flash_size); ecore_ptt_release(p_hwfn, p_ptt); return (0); } static int qlnx_alloc_mem_arrays(qlnx_host_t *ha) { struct ecore_dev *cdev; cdev = &ha->cdev; bzero(&ha->txq_array[0], (sizeof(struct qlnx_tx_queue) * QLNX_MAX_RSS)); bzero(&ha->rxq_array[0], (sizeof(struct qlnx_rx_queue) * QLNX_MAX_RSS)); bzero(&ha->sb_array[0], (sizeof(struct ecore_sb_info) * QLNX_MAX_RSS)); return 0; } static void qlnx_init_fp(qlnx_host_t *ha) { int rss_id, txq_array_index, tc; for (rss_id = 0; rss_id < ha->num_rss; rss_id++) { struct qlnx_fastpath *fp = &ha->fp_array[rss_id]; fp->rss_id = rss_id; fp->edev = ha; fp->sb_info = &ha->sb_array[rss_id]; fp->rxq = &ha->rxq_array[rss_id]; fp->rxq->rxq_id = rss_id; for (tc = 0; tc < ha->num_tc; tc++) { txq_array_index = tc * ha->num_rss + rss_id; fp->txq[tc] = &ha->txq_array[txq_array_index]; fp->txq[tc]->index = txq_array_index; } snprintf(fp->name, sizeof(fp->name), "%s-fp-%d", qlnx_name_str, rss_id); fp->tx_ring_full = 0; /* reset all the statistics counters */ fp->tx_pkts_processed = 0; fp->tx_pkts_freed = 0; fp->tx_pkts_transmitted = 0; fp->tx_pkts_completed = 0; #ifdef QLNX_TRACE_PERF_DATA fp->tx_pkts_trans_ctx = 0; fp->tx_pkts_compl_ctx = 0; fp->tx_pkts_trans_fp = 0; fp->tx_pkts_compl_fp = 0; fp->tx_pkts_compl_intr = 0; #endif fp->tx_lso_wnd_min_len = 0; fp->tx_defrag = 0; fp->tx_nsegs_gt_elem_left = 0; fp->tx_tso_max_nsegs = 0; fp->tx_tso_min_nsegs = 0; fp->err_tx_nsegs_gt_elem_left = 0; fp->err_tx_dmamap_create = 0; fp->err_tx_defrag_dmamap_load = 0; fp->err_tx_non_tso_max_seg = 0; fp->err_tx_dmamap_load = 0; fp->err_tx_defrag = 0; fp->err_tx_free_pkt_null = 0; fp->err_tx_cons_idx_conflict = 0; fp->rx_pkts = 0; fp->err_m_getcl = 0; fp->err_m_getjcl = 0; } return; } static void qlnx_free_mem_sb(qlnx_host_t *ha, struct ecore_sb_info *sb_info) { struct ecore_dev *cdev; cdev = &ha->cdev; if (sb_info->sb_virt) { OSAL_DMA_FREE_COHERENT(cdev, ((void *)sb_info->sb_virt), (sb_info->sb_phys), (sizeof(*sb_info->sb_virt))); sb_info->sb_virt = NULL; } } static int qlnx_sb_init(struct ecore_dev *cdev, struct ecore_sb_info *sb_info, void *sb_virt_addr, bus_addr_t sb_phy_addr, u16 sb_id) { struct ecore_hwfn *p_hwfn; int hwfn_index, rc; u16 rel_sb_id; hwfn_index = sb_id % cdev->num_hwfns; p_hwfn = &cdev->hwfns[hwfn_index]; rel_sb_id = sb_id / cdev->num_hwfns; QL_DPRINT2(((qlnx_host_t *)cdev), "hwfn_index = %d p_hwfn = %p sb_id = 0x%x rel_sb_id = 0x%x \ sb_info = %p sb_virt_addr = %p sb_phy_addr = %p\n", hwfn_index, p_hwfn, sb_id, rel_sb_id, sb_info, sb_virt_addr, (void *)sb_phy_addr); rc = ecore_int_sb_init(p_hwfn, p_hwfn->p_main_ptt, sb_info, sb_virt_addr, sb_phy_addr, rel_sb_id); return rc; } /* This function allocates fast-path status block memory */ static int qlnx_alloc_mem_sb(qlnx_host_t *ha, struct ecore_sb_info *sb_info, u16 sb_id) { struct status_block_e4 *sb_virt; bus_addr_t sb_phys; int rc; uint32_t size; struct ecore_dev *cdev; cdev = &ha->cdev; size = sizeof(*sb_virt); sb_virt = OSAL_DMA_ALLOC_COHERENT(cdev, (&sb_phys), size); if (!sb_virt) { QL_DPRINT1(ha, "Status block allocation failed\n"); return -ENOMEM; } rc = qlnx_sb_init(cdev, sb_info, sb_virt, sb_phys, sb_id); if (rc) { OSAL_DMA_FREE_COHERENT(cdev, sb_virt, sb_phys, size); } return rc; } static void qlnx_free_rx_buffers(qlnx_host_t *ha, struct qlnx_rx_queue *rxq) { int i; struct sw_rx_data *rx_buf; for (i = 0; i < rxq->num_rx_buffers; i++) { rx_buf = &rxq->sw_rx_ring[i]; if (rx_buf->data != NULL) { if (rx_buf->map != NULL) { bus_dmamap_unload(ha->rx_tag, rx_buf->map); bus_dmamap_destroy(ha->rx_tag, rx_buf->map); rx_buf->map = NULL; } m_freem(rx_buf->data); rx_buf->data = NULL; } } return; } static void qlnx_free_mem_rxq(qlnx_host_t *ha, struct qlnx_rx_queue *rxq) { struct ecore_dev *cdev; int i; cdev = &ha->cdev; qlnx_free_rx_buffers(ha, rxq); for (i = 0; i < ETH_TPA_MAX_AGGS_NUM; i++) { qlnx_free_tpa_mbuf(ha, &rxq->tpa_info[i]); if (rxq->tpa_info[i].mpf != NULL) m_freem(rxq->tpa_info[i].mpf); } bzero((void *)&rxq->sw_rx_ring[0], (sizeof (struct sw_rx_data) * RX_RING_SIZE)); /* Free the real RQ ring used by FW */ if (rxq->rx_bd_ring.p_virt_addr) { ecore_chain_free(cdev, &rxq->rx_bd_ring); rxq->rx_bd_ring.p_virt_addr = NULL; } /* Free the real completion ring used by FW */ if (rxq->rx_comp_ring.p_virt_addr && rxq->rx_comp_ring.pbl_sp.p_virt_table) { ecore_chain_free(cdev, &rxq->rx_comp_ring); rxq->rx_comp_ring.p_virt_addr = NULL; rxq->rx_comp_ring.pbl_sp.p_virt_table = NULL; } #ifdef QLNX_SOFT_LRO { struct lro_ctrl *lro; lro = &rxq->lro; tcp_lro_free(lro); } #endif /* #ifdef QLNX_SOFT_LRO */ return; } static int qlnx_alloc_rx_buffer(qlnx_host_t *ha, struct qlnx_rx_queue *rxq) { register struct mbuf *mp; uint16_t rx_buf_size; struct sw_rx_data *sw_rx_data; struct eth_rx_bd *rx_bd; dma_addr_t dma_addr; bus_dmamap_t map; bus_dma_segment_t segs[1]; int nsegs; int ret; struct ecore_dev *cdev; cdev = &ha->cdev; rx_buf_size = rxq->rx_buf_size; mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx_buf_size); if (mp == NULL) { QL_DPRINT1(ha, "Failed to allocate Rx data\n"); return -ENOMEM; } mp->m_len = mp->m_pkthdr.len = rx_buf_size; map = (bus_dmamap_t)0; ret = bus_dmamap_load_mbuf_sg(ha->rx_tag, map, mp, segs, &nsegs, BUS_DMA_NOWAIT); dma_addr = segs[0].ds_addr; if (ret || !dma_addr || (nsegs != 1)) { m_freem(mp); QL_DPRINT1(ha, "bus_dmamap_load failed[%d, 0x%016llx, %d]\n", ret, (long long unsigned int)dma_addr, nsegs); return -ENOMEM; } sw_rx_data = &rxq->sw_rx_ring[rxq->sw_rx_prod]; sw_rx_data->data = mp; sw_rx_data->dma_addr = dma_addr; sw_rx_data->map = map; /* Advance PROD and get BD pointer */ rx_bd = (struct eth_rx_bd *)ecore_chain_produce(&rxq->rx_bd_ring); rx_bd->addr.hi = htole32(U64_HI(dma_addr)); rx_bd->addr.lo = htole32(U64_LO(dma_addr)); bus_dmamap_sync(ha->rx_tag, map, BUS_DMASYNC_PREREAD); rxq->sw_rx_prod = (rxq->sw_rx_prod + 1) & (RX_RING_SIZE - 1); return 0; } static int qlnx_alloc_tpa_mbuf(qlnx_host_t *ha, uint16_t rx_buf_size, struct qlnx_agg_info *tpa) { struct mbuf *mp; dma_addr_t dma_addr; bus_dmamap_t map; bus_dma_segment_t segs[1]; int nsegs; int ret; struct sw_rx_data *rx_buf; mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, rx_buf_size); if (mp == NULL) { QL_DPRINT1(ha, "Failed to allocate Rx data\n"); return -ENOMEM; } mp->m_len = mp->m_pkthdr.len = rx_buf_size; map = (bus_dmamap_t)0; ret = bus_dmamap_load_mbuf_sg(ha->rx_tag, map, mp, segs, &nsegs, BUS_DMA_NOWAIT); dma_addr = segs[0].ds_addr; if (ret || !dma_addr || (nsegs != 1)) { m_freem(mp); QL_DPRINT1(ha, "bus_dmamap_load failed[%d, 0x%016llx, %d]\n", ret, (long long unsigned int)dma_addr, nsegs); return -ENOMEM; } rx_buf = &tpa->rx_buf; memset(rx_buf, 0, sizeof (struct sw_rx_data)); rx_buf->data = mp; rx_buf->dma_addr = dma_addr; rx_buf->map = map; bus_dmamap_sync(ha->rx_tag, map, BUS_DMASYNC_PREREAD); return (0); } static void qlnx_free_tpa_mbuf(qlnx_host_t *ha, struct qlnx_agg_info *tpa) { struct sw_rx_data *rx_buf; rx_buf = &tpa->rx_buf; if (rx_buf->data != NULL) { if (rx_buf->map != NULL) { bus_dmamap_unload(ha->rx_tag, rx_buf->map); bus_dmamap_destroy(ha->rx_tag, rx_buf->map); rx_buf->map = NULL; } m_freem(rx_buf->data); rx_buf->data = NULL; } return; } /* This function allocates all memory needed per Rx queue */ static int qlnx_alloc_mem_rxq(qlnx_host_t *ha, struct qlnx_rx_queue *rxq) { int i, rc, num_allocated; struct ifnet *ifp; struct ecore_dev *cdev; cdev = &ha->cdev; ifp = ha->ifp; rxq->num_rx_buffers = RX_RING_SIZE; rxq->rx_buf_size = ha->rx_buf_size; /* Allocate the parallel driver ring for Rx buffers */ bzero((void *)&rxq->sw_rx_ring[0], (sizeof (struct sw_rx_data) * RX_RING_SIZE)); /* Allocate FW Rx ring */ rc = ecore_chain_alloc(cdev, ECORE_CHAIN_USE_TO_CONSUME_PRODUCE, ECORE_CHAIN_MODE_NEXT_PTR, ECORE_CHAIN_CNT_TYPE_U16, RX_RING_SIZE, sizeof(struct eth_rx_bd), &rxq->rx_bd_ring, NULL); if (rc) goto err; /* Allocate FW completion ring */ rc = ecore_chain_alloc(cdev, ECORE_CHAIN_USE_TO_CONSUME, ECORE_CHAIN_MODE_PBL, ECORE_CHAIN_CNT_TYPE_U16, RX_RING_SIZE, sizeof(union eth_rx_cqe), &rxq->rx_comp_ring, NULL); if (rc) goto err; /* Allocate buffers for the Rx ring */ for (i = 0; i < ETH_TPA_MAX_AGGS_NUM; i++) { rc = qlnx_alloc_tpa_mbuf(ha, rxq->rx_buf_size, &rxq->tpa_info[i]); if (rc) break; } for (i = 0; i < rxq->num_rx_buffers; i++) { rc = qlnx_alloc_rx_buffer(ha, rxq); if (rc) break; } num_allocated = i; if (!num_allocated) { QL_DPRINT1(ha, "Rx buffers allocation failed\n"); goto err; } else if (num_allocated < rxq->num_rx_buffers) { QL_DPRINT1(ha, "Allocated less buffers than" " desired (%d allocated)\n", num_allocated); } #ifdef QLNX_SOFT_LRO { struct lro_ctrl *lro; lro = &rxq->lro; #if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO) if (tcp_lro_init_args(lro, ifp, 0, rxq->num_rx_buffers)) { QL_DPRINT1(ha, "tcp_lro_init[%d] failed\n", rxq->rxq_id); goto err; } #else if (tcp_lro_init(lro)) { QL_DPRINT1(ha, "tcp_lro_init[%d] failed\n", rxq->rxq_id); goto err; } #endif /* #if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO) */ lro->ifp = ha->ifp; } #endif /* #ifdef QLNX_SOFT_LRO */ return 0; err: qlnx_free_mem_rxq(ha, rxq); return -ENOMEM; } static void qlnx_free_mem_txq(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct qlnx_tx_queue *txq) { struct ecore_dev *cdev; cdev = &ha->cdev; bzero((void *)&txq->sw_tx_ring[0], (sizeof (struct sw_tx_bd) * TX_RING_SIZE)); /* Free the real RQ ring used by FW */ if (txq->tx_pbl.p_virt_addr) { ecore_chain_free(cdev, &txq->tx_pbl); txq->tx_pbl.p_virt_addr = NULL; } return; } /* This function allocates all memory needed per Tx queue */ static int qlnx_alloc_mem_txq(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct qlnx_tx_queue *txq) { int ret = ECORE_SUCCESS; union eth_tx_bd_types *p_virt; struct ecore_dev *cdev; cdev = &ha->cdev; bzero((void *)&txq->sw_tx_ring[0], (sizeof (struct sw_tx_bd) * TX_RING_SIZE)); /* Allocate the real Tx ring to be used by FW */ ret = ecore_chain_alloc(cdev, ECORE_CHAIN_USE_TO_CONSUME_PRODUCE, ECORE_CHAIN_MODE_PBL, ECORE_CHAIN_CNT_TYPE_U16, TX_RING_SIZE, sizeof(*p_virt), &txq->tx_pbl, NULL); if (ret != ECORE_SUCCESS) { goto err; } txq->num_tx_buffers = TX_RING_SIZE; return 0; err: qlnx_free_mem_txq(ha, fp, txq); return -ENOMEM; } static void qlnx_free_tx_br(qlnx_host_t *ha, struct qlnx_fastpath *fp) { struct mbuf *mp; struct ifnet *ifp = ha->ifp; if (mtx_initialized(&fp->tx_mtx)) { if (fp->tx_br != NULL) { mtx_lock(&fp->tx_mtx); while ((mp = drbr_dequeue(ifp, fp->tx_br)) != NULL) { fp->tx_pkts_freed++; m_freem(mp); } mtx_unlock(&fp->tx_mtx); buf_ring_free(fp->tx_br, M_DEVBUF); fp->tx_br = NULL; } mtx_destroy(&fp->tx_mtx); } return; } static void qlnx_free_mem_fp(qlnx_host_t *ha, struct qlnx_fastpath *fp) { int tc; qlnx_free_mem_sb(ha, fp->sb_info); qlnx_free_mem_rxq(ha, fp->rxq); for (tc = 0; tc < ha->num_tc; tc++) qlnx_free_mem_txq(ha, fp, fp->txq[tc]); return; } static int qlnx_alloc_tx_br(qlnx_host_t *ha, struct qlnx_fastpath *fp) { snprintf(fp->tx_mtx_name, sizeof(fp->tx_mtx_name), "qlnx%d_fp%d_tx_mq_lock", ha->dev_unit, fp->rss_id); mtx_init(&fp->tx_mtx, fp->tx_mtx_name, NULL, MTX_DEF); fp->tx_br = buf_ring_alloc(TX_RING_SIZE, M_DEVBUF, M_NOWAIT, &fp->tx_mtx); if (fp->tx_br == NULL) { QL_DPRINT1(ha, "buf_ring_alloc failed for fp[%d, %d]\n", ha->dev_unit, fp->rss_id); return -ENOMEM; } return 0; } static int qlnx_alloc_mem_fp(qlnx_host_t *ha, struct qlnx_fastpath *fp) { int rc, tc; rc = qlnx_alloc_mem_sb(ha, fp->sb_info, fp->rss_id); if (rc) goto err; if (ha->rx_jumbo_buf_eq_mtu) { if (ha->max_frame_size <= MCLBYTES) ha->rx_buf_size = MCLBYTES; else if (ha->max_frame_size <= MJUMPAGESIZE) ha->rx_buf_size = MJUMPAGESIZE; else if (ha->max_frame_size <= MJUM9BYTES) ha->rx_buf_size = MJUM9BYTES; else if (ha->max_frame_size <= MJUM16BYTES) ha->rx_buf_size = MJUM16BYTES; } else { if (ha->max_frame_size <= MCLBYTES) ha->rx_buf_size = MCLBYTES; else ha->rx_buf_size = MJUMPAGESIZE; } rc = qlnx_alloc_mem_rxq(ha, fp->rxq); if (rc) goto err; for (tc = 0; tc < ha->num_tc; tc++) { rc = qlnx_alloc_mem_txq(ha, fp, fp->txq[tc]); if (rc) goto err; } return 0; err: qlnx_free_mem_fp(ha, fp); return -ENOMEM; } static void qlnx_free_mem_load(qlnx_host_t *ha) { int i; struct ecore_dev *cdev; cdev = &ha->cdev; for (i = 0; i < ha->num_rss; i++) { struct qlnx_fastpath *fp = &ha->fp_array[i]; qlnx_free_mem_fp(ha, fp); } return; } static int qlnx_alloc_mem_load(qlnx_host_t *ha) { int rc = 0, rss_id; for (rss_id = 0; rss_id < ha->num_rss; rss_id++) { struct qlnx_fastpath *fp = &ha->fp_array[rss_id]; rc = qlnx_alloc_mem_fp(ha, fp); if (rc) break; } return (rc); } static int qlnx_start_vport(struct ecore_dev *cdev, u8 vport_id, u16 mtu, u8 drop_ttl0_flg, u8 inner_vlan_removal_en_flg, u8 tx_switching, u8 hw_lro_enable) { int rc, i; struct ecore_sp_vport_start_params vport_start_params = { 0 }; qlnx_host_t *ha; ha = (qlnx_host_t *)cdev; vport_start_params.remove_inner_vlan = inner_vlan_removal_en_flg; vport_start_params.tx_switching = 0; vport_start_params.handle_ptp_pkts = 0; vport_start_params.only_untagged = 0; vport_start_params.drop_ttl0 = drop_ttl0_flg; vport_start_params.tpa_mode = (hw_lro_enable ? ECORE_TPA_MODE_RSC : ECORE_TPA_MODE_NONE); vport_start_params.max_buffers_per_cqe = QLNX_TPA_MAX_AGG_BUFFERS; vport_start_params.vport_id = vport_id; vport_start_params.mtu = mtu; QL_DPRINT2(ha, "Setting mtu to %d and VPORT ID = %d\n", mtu, vport_id); for_each_hwfn(cdev, i) { struct ecore_hwfn *p_hwfn = &cdev->hwfns[i]; vport_start_params.concrete_fid = p_hwfn->hw_info.concrete_fid; vport_start_params.opaque_fid = p_hwfn->hw_info.opaque_fid; rc = ecore_sp_vport_start(p_hwfn, &vport_start_params); if (rc) { QL_DPRINT1(ha, "Failed to start VPORT V-PORT %d" " with MTU %d\n" , vport_id, mtu); return -ENOMEM; } ecore_hw_start_fastpath(p_hwfn); QL_DPRINT2(ha, "Started V-PORT %d with MTU %d\n", vport_id, mtu); } return 0; } static int qlnx_update_vport(struct ecore_dev *cdev, struct qlnx_update_vport_params *params) { struct ecore_sp_vport_update_params sp_params; int rc, i, j, fp_index; struct ecore_hwfn *p_hwfn; struct ecore_rss_params *rss; qlnx_host_t *ha = (qlnx_host_t *)cdev; struct qlnx_fastpath *fp; memset(&sp_params, 0, sizeof(sp_params)); /* Translate protocol params into sp params */ sp_params.vport_id = params->vport_id; sp_params.update_vport_active_rx_flg = params->update_vport_active_rx_flg; sp_params.vport_active_rx_flg = params->vport_active_rx_flg; sp_params.update_vport_active_tx_flg = params->update_vport_active_tx_flg; sp_params.vport_active_tx_flg = params->vport_active_tx_flg; sp_params.update_inner_vlan_removal_flg = params->update_inner_vlan_removal_flg; sp_params.inner_vlan_removal_flg = params->inner_vlan_removal_flg; sp_params.sge_tpa_params = params->sge_tpa_params; /* RSS - is a bit tricky, since upper-layer isn't familiar with hwfns. * We need to re-fix the rss values per engine for CMT. */ if (params->rss_params->update_rss_config) sp_params.rss_params = params->rss_params; else sp_params.rss_params = NULL; for_each_hwfn(cdev, i) { p_hwfn = &cdev->hwfns[i]; if ((cdev->num_hwfns > 1) && params->rss_params->update_rss_config && params->rss_params->rss_enable) { rss = params->rss_params; for (j = 0; j < ECORE_RSS_IND_TABLE_SIZE; j++) { fp_index = ((cdev->num_hwfns * j) + i) % ha->num_rss; fp = &ha->fp_array[fp_index]; rss->rss_ind_table[j] = fp->rxq->handle; } for (j = 0; j < ECORE_RSS_IND_TABLE_SIZE;) { QL_DPRINT3(ha, "%p %p %p %p %p %p %p %p \n", rss->rss_ind_table[j], rss->rss_ind_table[j+1], rss->rss_ind_table[j+2], rss->rss_ind_table[j+3], rss->rss_ind_table[j+4], rss->rss_ind_table[j+5], rss->rss_ind_table[j+6], rss->rss_ind_table[j+7]); j += 8; } } sp_params.opaque_fid = p_hwfn->hw_info.opaque_fid; QL_DPRINT1(ha, "Update sp vport ID=%d\n", params->vport_id); rc = ecore_sp_vport_update(p_hwfn, &sp_params, ECORE_SPQ_MODE_EBLOCK, NULL); if (rc) { QL_DPRINT1(ha, "Failed to update VPORT\n"); return rc; } QL_DPRINT2(ha, "Updated V-PORT %d: tx_active_flag %d, \ rx_active_flag %d [tx_update %d], [rx_update %d]\n", params->vport_id, params->vport_active_tx_flg, params->vport_active_rx_flg, params->update_vport_active_tx_flg, params->update_vport_active_rx_flg); } return 0; } static void qlnx_reuse_rx_data(struct qlnx_rx_queue *rxq) { struct eth_rx_bd *rx_bd_cons = ecore_chain_consume(&rxq->rx_bd_ring); struct eth_rx_bd *rx_bd_prod = ecore_chain_produce(&rxq->rx_bd_ring); struct sw_rx_data *sw_rx_data_cons = &rxq->sw_rx_ring[rxq->sw_rx_cons]; struct sw_rx_data *sw_rx_data_prod = &rxq->sw_rx_ring[rxq->sw_rx_prod]; sw_rx_data_prod->data = sw_rx_data_cons->data; memcpy(rx_bd_prod, rx_bd_cons, sizeof(struct eth_rx_bd)); rxq->sw_rx_cons = (rxq->sw_rx_cons + 1) & (RX_RING_SIZE - 1); rxq->sw_rx_prod = (rxq->sw_rx_prod + 1) & (RX_RING_SIZE - 1); return; } static void qlnx_update_rx_prod(struct ecore_hwfn *p_hwfn, struct qlnx_rx_queue *rxq) { uint16_t bd_prod; uint16_t cqe_prod; union { struct eth_rx_prod_data rx_prod_data; uint32_t data32; } rx_prods; bd_prod = ecore_chain_get_prod_idx(&rxq->rx_bd_ring); cqe_prod = ecore_chain_get_prod_idx(&rxq->rx_comp_ring); /* Update producers */ rx_prods.rx_prod_data.bd_prod = htole16(bd_prod); rx_prods.rx_prod_data.cqe_prod = htole16(cqe_prod); /* Make sure that the BD and SGE data is updated before updating the * producers since FW might read the BD/SGE right after the producer * is updated. */ wmb(); internal_ram_wr(p_hwfn, rxq->hw_rxq_prod_addr, sizeof(rx_prods), &rx_prods.data32); /* mmiowb is needed to synchronize doorbell writes from more than one * processor. It guarantees that the write arrives to the device before * the napi lock is released and another qlnx_poll is called (possibly * on another CPU). Without this barrier, the next doorbell can bypass * this doorbell. This is applicable to IA64/Altix systems. */ wmb(); return; } static uint32_t qlnx_hash_key[] = { ((0x6d << 24)|(0x5a << 16)|(0x56 << 8)|0xda), ((0x25 << 24)|(0x5b << 16)|(0x0e << 8)|0xc2), ((0x41 << 24)|(0x67 << 16)|(0x25 << 8)|0x3d), ((0x43 << 24)|(0xa3 << 16)|(0x8f << 8)|0xb0), ((0xd0 << 24)|(0xca << 16)|(0x2b << 8)|0xcb), ((0xae << 24)|(0x7b << 16)|(0x30 << 8)|0xb4), ((0x77 << 24)|(0xcb << 16)|(0x2d << 8)|0xa3), ((0x80 << 24)|(0x30 << 16)|(0xf2 << 8)|0x0c), ((0x6a << 24)|(0x42 << 16)|(0xb7 << 8)|0x3b), ((0xbe << 24)|(0xac << 16)|(0x01 << 8)|0xfa)}; static int qlnx_start_queues(qlnx_host_t *ha) { int rc, tc, i, vport_id = 0, drop_ttl0_flg = 1, vlan_removal_en = 1, tx_switching = 0, hw_lro_enable = 0; struct ecore_dev *cdev = &ha->cdev; struct ecore_rss_params *rss_params = &ha->rss_params; struct qlnx_update_vport_params vport_update_params; struct ifnet *ifp; struct ecore_hwfn *p_hwfn; struct ecore_sge_tpa_params tpa_params; struct ecore_queue_start_common_params qparams; struct qlnx_fastpath *fp; ifp = ha->ifp; QL_DPRINT1(ha, "Num RSS = %d\n", ha->num_rss); if (!ha->num_rss) { QL_DPRINT1(ha, "Cannot update V-VPORT as active as there" " are no Rx queues\n"); return -EINVAL; } #ifndef QLNX_SOFT_LRO hw_lro_enable = ifp->if_capenable & IFCAP_LRO; #endif /* #ifndef QLNX_SOFT_LRO */ rc = qlnx_start_vport(cdev, vport_id, ifp->if_mtu, drop_ttl0_flg, vlan_removal_en, tx_switching, hw_lro_enable); if (rc) { QL_DPRINT1(ha, "Start V-PORT failed %d\n", rc); return rc; } QL_DPRINT2(ha, "Start vport ramrod passed, " "vport_id = %d, MTU = %d, vlan_removal_en = %d\n", vport_id, (int)(ifp->if_mtu + 0xe), vlan_removal_en); for_each_rss(i) { struct ecore_rxq_start_ret_params rx_ret_params; struct ecore_txq_start_ret_params tx_ret_params; fp = &ha->fp_array[i]; p_hwfn = &cdev->hwfns[(fp->rss_id % cdev->num_hwfns)]; bzero(&qparams, sizeof(struct ecore_queue_start_common_params)); bzero(&rx_ret_params, sizeof (struct ecore_rxq_start_ret_params)); qparams.queue_id = i ; qparams.vport_id = vport_id; qparams.stats_id = vport_id; qparams.p_sb = fp->sb_info; qparams.sb_idx = RX_PI; rc = ecore_eth_rx_queue_start(p_hwfn, p_hwfn->hw_info.opaque_fid, &qparams, fp->rxq->rx_buf_size, /* bd_max_bytes */ /* bd_chain_phys_addr */ fp->rxq->rx_bd_ring.p_phys_addr, /* cqe_pbl_addr */ ecore_chain_get_pbl_phys(&fp->rxq->rx_comp_ring), /* cqe_pbl_size */ ecore_chain_get_page_cnt(&fp->rxq->rx_comp_ring), &rx_ret_params); if (rc) { QL_DPRINT1(ha, "Start RXQ #%d failed %d\n", i, rc); return rc; } fp->rxq->hw_rxq_prod_addr = rx_ret_params.p_prod; fp->rxq->handle = rx_ret_params.p_handle; fp->rxq->hw_cons_ptr = &fp->sb_info->sb_virt->pi_array[RX_PI]; qlnx_update_rx_prod(p_hwfn, fp->rxq); for (tc = 0; tc < ha->num_tc; tc++) { struct qlnx_tx_queue *txq = fp->txq[tc]; bzero(&qparams, sizeof(struct ecore_queue_start_common_params)); bzero(&tx_ret_params, sizeof (struct ecore_txq_start_ret_params)); qparams.queue_id = txq->index / cdev->num_hwfns ; qparams.vport_id = vport_id; qparams.stats_id = vport_id; qparams.p_sb = fp->sb_info; qparams.sb_idx = TX_PI(tc); rc = ecore_eth_tx_queue_start(p_hwfn, p_hwfn->hw_info.opaque_fid, &qparams, tc, /* bd_chain_phys_addr */ ecore_chain_get_pbl_phys(&txq->tx_pbl), ecore_chain_get_page_cnt(&txq->tx_pbl), &tx_ret_params); if (rc) { QL_DPRINT1(ha, "Start TXQ #%d failed %d\n", txq->index, rc); return rc; } txq->doorbell_addr = tx_ret_params.p_doorbell; txq->handle = tx_ret_params.p_handle; txq->hw_cons_ptr = &fp->sb_info->sb_virt->pi_array[TX_PI(tc)]; SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_DEST, DB_DEST_XCM); SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_AGG_CMD, DB_AGG_CMD_SET); SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_AGG_VAL_SEL, DQ_XCM_ETH_TX_BD_PROD_CMD); txq->tx_db.data.agg_flags = DQ_XCM_ETH_DQ_CF_CMD; } } /* Fill struct with RSS params */ if (ha->num_rss > 1) { rss_params->update_rss_config = 1; rss_params->rss_enable = 1; rss_params->update_rss_capabilities = 1; rss_params->update_rss_ind_table = 1; rss_params->update_rss_key = 1; rss_params->rss_caps = ECORE_RSS_IPV4 | ECORE_RSS_IPV6 | ECORE_RSS_IPV4_TCP | ECORE_RSS_IPV6_TCP; rss_params->rss_table_size_log = 7; /* 2^7 = 128 */ for (i = 0; i < ECORE_RSS_IND_TABLE_SIZE; i++) { fp = &ha->fp_array[(i % ha->num_rss)]; rss_params->rss_ind_table[i] = fp->rxq->handle; } for (i = 0; i < ECORE_RSS_KEY_SIZE; i++) rss_params->rss_key[i] = (__le32)qlnx_hash_key[i]; } else { memset(rss_params, 0, sizeof(*rss_params)); } /* Prepare and send the vport enable */ memset(&vport_update_params, 0, sizeof(vport_update_params)); vport_update_params.vport_id = vport_id; vport_update_params.update_vport_active_tx_flg = 1; vport_update_params.vport_active_tx_flg = 1; vport_update_params.update_vport_active_rx_flg = 1; vport_update_params.vport_active_rx_flg = 1; vport_update_params.rss_params = rss_params; vport_update_params.update_inner_vlan_removal_flg = 1; vport_update_params.inner_vlan_removal_flg = 1; if (hw_lro_enable) { memset(&tpa_params, 0, sizeof (struct ecore_sge_tpa_params)); tpa_params.max_buffers_per_cqe = QLNX_TPA_MAX_AGG_BUFFERS; tpa_params.update_tpa_en_flg = 1; tpa_params.tpa_ipv4_en_flg = 1; tpa_params.tpa_ipv6_en_flg = 1; tpa_params.update_tpa_param_flg = 1; tpa_params.tpa_pkt_split_flg = 0; tpa_params.tpa_hdr_data_split_flg = 0; tpa_params.tpa_gro_consistent_flg = 0; tpa_params.tpa_max_aggs_num = ETH_TPA_MAX_AGGS_NUM; tpa_params.tpa_max_size = (uint16_t)(-1); tpa_params.tpa_min_size_to_start = ifp->if_mtu/2; tpa_params.tpa_min_size_to_cont = ifp->if_mtu/2; vport_update_params.sge_tpa_params = &tpa_params; } rc = qlnx_update_vport(cdev, &vport_update_params); if (rc) { QL_DPRINT1(ha, "Update V-PORT failed %d\n", rc); return rc; } return 0; } static int qlnx_drain_txq(qlnx_host_t *ha, struct qlnx_fastpath *fp, struct qlnx_tx_queue *txq) { uint16_t hw_bd_cons; uint16_t ecore_cons_idx; QL_DPRINT2(ha, "enter\n"); hw_bd_cons = le16toh(*txq->hw_cons_ptr); while (hw_bd_cons != (ecore_cons_idx = ecore_chain_get_cons_idx(&txq->tx_pbl))) { mtx_lock(&fp->tx_mtx); (void)qlnx_tx_int(ha, fp, txq); mtx_unlock(&fp->tx_mtx); qlnx_mdelay(__func__, 2); hw_bd_cons = le16toh(*txq->hw_cons_ptr); } QL_DPRINT2(ha, "[%d, %d]: done\n", fp->rss_id, txq->index); return 0; } static int qlnx_stop_queues(qlnx_host_t *ha) { struct qlnx_update_vport_params vport_update_params; struct ecore_dev *cdev; struct qlnx_fastpath *fp; int rc, tc, i; cdev = &ha->cdev; /* Disable the vport */ memset(&vport_update_params, 0, sizeof(vport_update_params)); vport_update_params.vport_id = 0; vport_update_params.update_vport_active_tx_flg = 1; vport_update_params.vport_active_tx_flg = 0; vport_update_params.update_vport_active_rx_flg = 1; vport_update_params.vport_active_rx_flg = 0; vport_update_params.rss_params = &ha->rss_params; vport_update_params.rss_params->update_rss_config = 0; vport_update_params.rss_params->rss_enable = 0; vport_update_params.update_inner_vlan_removal_flg = 0; vport_update_params.inner_vlan_removal_flg = 0; QL_DPRINT1(ha, "Update vport ID= %d\n", vport_update_params.vport_id); rc = qlnx_update_vport(cdev, &vport_update_params); if (rc) { QL_DPRINT1(ha, "Failed to update vport\n"); return rc; } /* Flush Tx queues. If needed, request drain from MCP */ for_each_rss(i) { fp = &ha->fp_array[i]; for (tc = 0; tc < ha->num_tc; tc++) { struct qlnx_tx_queue *txq = fp->txq[tc]; rc = qlnx_drain_txq(ha, fp, txq); if (rc) return rc; } } /* Stop all Queues in reverse order*/ for (i = ha->num_rss - 1; i >= 0; i--) { struct ecore_hwfn *p_hwfn = &cdev->hwfns[(i % cdev->num_hwfns)]; fp = &ha->fp_array[i]; /* Stop the Tx Queue(s)*/ for (tc = 0; tc < ha->num_tc; tc++) { int tx_queue_id; tx_queue_id = tc * ha->num_rss + i; rc = ecore_eth_tx_queue_stop(p_hwfn, fp->txq[tc]->handle); if (rc) { QL_DPRINT1(ha, "Failed to stop TXQ #%d\n", tx_queue_id); return rc; } } /* Stop the Rx Queue*/ rc = ecore_eth_rx_queue_stop(p_hwfn, fp->rxq->handle, false, false); if (rc) { QL_DPRINT1(ha, "Failed to stop RXQ #%d\n", i); return rc; } } /* Stop the vport */ for_each_hwfn(cdev, i) { struct ecore_hwfn *p_hwfn = &cdev->hwfns[i]; rc = ecore_sp_vport_stop(p_hwfn, p_hwfn->hw_info.opaque_fid, 0); if (rc) { QL_DPRINT1(ha, "Failed to stop VPORT\n"); return rc; } } return rc; } static int qlnx_set_ucast_rx_mac(qlnx_host_t *ha, enum ecore_filter_opcode opcode, unsigned char mac[ETH_ALEN]) { struct ecore_filter_ucast ucast; struct ecore_dev *cdev; int rc; cdev = &ha->cdev; bzero(&ucast, sizeof(struct ecore_filter_ucast)); ucast.opcode = opcode; ucast.type = ECORE_FILTER_MAC; ucast.is_rx_filter = 1; ucast.vport_to_add_to = 0; memcpy(&ucast.mac[0], mac, ETH_ALEN); rc = ecore_filter_ucast_cmd(cdev, &ucast, ECORE_SPQ_MODE_CB, NULL); return (rc); } static int qlnx_remove_all_ucast_mac(qlnx_host_t *ha) { struct ecore_filter_ucast ucast; struct ecore_dev *cdev; int rc; bzero(&ucast, sizeof(struct ecore_filter_ucast)); ucast.opcode = ECORE_FILTER_REPLACE; ucast.type = ECORE_FILTER_MAC; ucast.is_rx_filter = 1; cdev = &ha->cdev; rc = ecore_filter_ucast_cmd(cdev, &ucast, ECORE_SPQ_MODE_CB, NULL); return (rc); } static int qlnx_remove_all_mcast_mac(qlnx_host_t *ha) { struct ecore_filter_mcast *mcast; struct ecore_dev *cdev; int rc, i; cdev = &ha->cdev; mcast = &ha->ecore_mcast; bzero(mcast, sizeof(struct ecore_filter_mcast)); mcast->opcode = ECORE_FILTER_REMOVE; for (i = 0; i < QLNX_MAX_NUM_MULTICAST_ADDRS; i++) { if (ha->mcast[i].addr[0] || ha->mcast[i].addr[1] || ha->mcast[i].addr[2] || ha->mcast[i].addr[3] || ha->mcast[i].addr[4] || ha->mcast[i].addr[5]) { memcpy(&mcast->mac[i], &ha->mcast[i].addr[0], ETH_ALEN); mcast->num_mc_addrs++; } } mcast = &ha->ecore_mcast; rc = ecore_filter_mcast_cmd(cdev, mcast, ECORE_SPQ_MODE_CB, NULL); bzero(ha->mcast, (sizeof(qlnx_mcast_t) * QLNX_MAX_NUM_MULTICAST_ADDRS)); ha->nmcast = 0; return (rc); } static int qlnx_clean_filters(qlnx_host_t *ha) { int rc = 0; /* Remove all unicast macs */ rc = qlnx_remove_all_ucast_mac(ha); if (rc) return rc; /* Remove all multicast macs */ rc = qlnx_remove_all_mcast_mac(ha); if (rc) return rc; rc = qlnx_set_ucast_rx_mac(ha, ECORE_FILTER_FLUSH, ha->primary_mac); return (rc); } static int qlnx_set_rx_accept_filter(qlnx_host_t *ha, uint8_t filter) { struct ecore_filter_accept_flags accept; int rc = 0; struct ecore_dev *cdev; cdev = &ha->cdev; bzero(&accept, sizeof(struct ecore_filter_accept_flags)); accept.update_rx_mode_config = 1; accept.rx_accept_filter = filter; accept.update_tx_mode_config = 1; accept.tx_accept_filter = ECORE_ACCEPT_UCAST_MATCHED | ECORE_ACCEPT_MCAST_MATCHED | ECORE_ACCEPT_BCAST; rc = ecore_filter_accept_cmd(cdev, 0, accept, false, false, ECORE_SPQ_MODE_CB, NULL); return (rc); } static int qlnx_set_rx_mode(qlnx_host_t *ha) { int rc = 0; uint8_t filter; rc = qlnx_set_ucast_rx_mac(ha, ECORE_FILTER_REPLACE, ha->primary_mac); if (rc) return rc; rc = qlnx_remove_all_mcast_mac(ha); if (rc) return rc; filter = ECORE_ACCEPT_UCAST_MATCHED | ECORE_ACCEPT_MCAST_MATCHED | ECORE_ACCEPT_BCAST; ha->filter = filter; rc = qlnx_set_rx_accept_filter(ha, filter); return (rc); } static int qlnx_set_link(qlnx_host_t *ha, bool link_up) { int i, rc = 0; struct ecore_dev *cdev; struct ecore_hwfn *hwfn; struct ecore_ptt *ptt; cdev = &ha->cdev; for_each_hwfn(cdev, i) { hwfn = &cdev->hwfns[i]; ptt = ecore_ptt_acquire(hwfn); if (!ptt) return -EBUSY; rc = ecore_mcp_set_link(hwfn, ptt, link_up); ecore_ptt_release(hwfn, ptt); if (rc) return rc; } return (rc); } #if __FreeBSD_version >= 1100000 static uint64_t qlnx_get_counter(if_t ifp, ift_counter cnt) { qlnx_host_t *ha; uint64_t count; ha = (qlnx_host_t *)if_getsoftc(ifp); switch (cnt) { case IFCOUNTER_IPACKETS: count = ha->hw_stats.common.rx_ucast_pkts + ha->hw_stats.common.rx_mcast_pkts + ha->hw_stats.common.rx_bcast_pkts; break; case IFCOUNTER_IERRORS: count = ha->hw_stats.common.rx_crc_errors + ha->hw_stats.common.rx_align_errors + ha->hw_stats.common.rx_oversize_packets + ha->hw_stats.common.rx_undersize_packets; break; case IFCOUNTER_OPACKETS: count = ha->hw_stats.common.tx_ucast_pkts + ha->hw_stats.common.tx_mcast_pkts + ha->hw_stats.common.tx_bcast_pkts; break; case IFCOUNTER_OERRORS: count = ha->hw_stats.common.tx_err_drop_pkts; break; case IFCOUNTER_COLLISIONS: return (0); case IFCOUNTER_IBYTES: count = ha->hw_stats.common.rx_ucast_bytes + ha->hw_stats.common.rx_mcast_bytes + ha->hw_stats.common.rx_bcast_bytes; break; case IFCOUNTER_OBYTES: count = ha->hw_stats.common.tx_ucast_bytes + ha->hw_stats.common.tx_mcast_bytes + ha->hw_stats.common.tx_bcast_bytes; break; case IFCOUNTER_IMCASTS: count = ha->hw_stats.common.rx_mcast_bytes; break; case IFCOUNTER_OMCASTS: count = ha->hw_stats.common.tx_mcast_bytes; break; case IFCOUNTER_IQDROPS: case IFCOUNTER_OQDROPS: case IFCOUNTER_NOPROTO: default: return (if_get_counter_default(ifp, cnt)); } return (count); } #endif static void qlnx_timer(void *arg) { qlnx_host_t *ha; ha = (qlnx_host_t *)arg; ecore_get_vport_stats(&ha->cdev, &ha->hw_stats); if (ha->storm_stats_gather) qlnx_sample_storm_stats(ha); callout_reset(&ha->qlnx_callout, hz, qlnx_timer, ha); return; } static int qlnx_load(qlnx_host_t *ha) { int i; int rc = 0; struct ecore_dev *cdev; device_t dev; cdev = &ha->cdev; dev = ha->pci_dev; QL_DPRINT2(ha, "enter\n"); rc = qlnx_alloc_mem_arrays(ha); if (rc) goto qlnx_load_exit0; qlnx_init_fp(ha); rc = qlnx_alloc_mem_load(ha); if (rc) goto qlnx_load_exit1; QL_DPRINT2(ha, "Allocated %d RSS queues on %d TC/s\n", ha->num_rss, ha->num_tc); for (i = 0; i < ha->num_rss; i++) { if ((rc = bus_setup_intr(dev, ha->irq_vec[i].irq, (INTR_TYPE_NET | INTR_MPSAFE), NULL, qlnx_fp_isr, &ha->irq_vec[i], &ha->irq_vec[i].handle))) { QL_DPRINT1(ha, "could not setup interrupt\n"); goto qlnx_load_exit2; } QL_DPRINT2(ha, "rss_id = %d irq_rid %d \ irq %p handle %p\n", i, ha->irq_vec[i].irq_rid, ha->irq_vec[i].irq, ha->irq_vec[i].handle); bus_bind_intr(dev, ha->irq_vec[i].irq, (i % mp_ncpus)); } rc = qlnx_start_queues(ha); if (rc) goto qlnx_load_exit2; QL_DPRINT2(ha, "Start VPORT, RXQ and TXQ succeeded\n"); /* Add primary mac and set Rx filters */ rc = qlnx_set_rx_mode(ha); if (rc) goto qlnx_load_exit2; /* Ask for link-up using current configuration */ qlnx_set_link(ha, true); ha->state = QLNX_STATE_OPEN; bzero(&ha->hw_stats, sizeof(struct ecore_eth_stats)); if (ha->flags.callout_init) callout_reset(&ha->qlnx_callout, hz, qlnx_timer, ha); goto qlnx_load_exit0; qlnx_load_exit2: qlnx_free_mem_load(ha); qlnx_load_exit1: ha->num_rss = 0; qlnx_load_exit0: QL_DPRINT2(ha, "exit [%d]\n", rc); return rc; } static void qlnx_drain_soft_lro(qlnx_host_t *ha) { #ifdef QLNX_SOFT_LRO struct ifnet *ifp; int i; ifp = ha->ifp; if (ifp->if_capenable & IFCAP_LRO) { for (i = 0; i < ha->num_rss; i++) { struct qlnx_fastpath *fp = &ha->fp_array[i]; struct lro_ctrl *lro; lro = &fp->rxq->lro; #if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO) tcp_lro_flush_all(lro); #else struct lro_entry *queued; while ((!SLIST_EMPTY(&lro->lro_active))){ queued = SLIST_FIRST(&lro->lro_active); SLIST_REMOVE_HEAD(&lro->lro_active, next); tcp_lro_flush(lro, queued); } #endif /* #if (__FreeBSD_version >= 1100101) || (defined QLNX_QSORT_LRO) */ } } #endif /* #ifdef QLNX_SOFT_LRO */ return; } static void qlnx_unload(qlnx_host_t *ha) { struct ecore_dev *cdev; device_t dev; int i; cdev = &ha->cdev; dev = ha->pci_dev; QL_DPRINT2(ha, "enter\n"); QL_DPRINT1(ha, " QLNX STATE = %d\n",ha->state); if (ha->state == QLNX_STATE_OPEN) { qlnx_set_link(ha, false); qlnx_clean_filters(ha); qlnx_stop_queues(ha); ecore_hw_stop_fastpath(cdev); for (i = 0; i < ha->num_rss; i++) { if (ha->irq_vec[i].handle) { (void)bus_teardown_intr(dev, ha->irq_vec[i].irq, ha->irq_vec[i].handle); ha->irq_vec[i].handle = NULL; } } qlnx_drain_fp_taskqueues(ha); qlnx_drain_soft_lro(ha); qlnx_free_mem_load(ha); } if (ha->flags.callout_init) callout_drain(&ha->qlnx_callout); qlnx_mdelay(__func__, 1000); ha->state = QLNX_STATE_CLOSED; QL_DPRINT2(ha, "exit\n"); return; } static int qlnx_grc_dumpsize(qlnx_host_t *ha, uint32_t *num_dwords, int hwfn_index) { int rval = -1; struct ecore_hwfn *p_hwfn; struct ecore_ptt *p_ptt; ecore_dbg_set_app_ver(ecore_dbg_get_fw_func_ver()); p_hwfn = &ha->cdev.hwfns[hwfn_index]; p_ptt = ecore_ptt_acquire(p_hwfn); if (!p_ptt) { QL_DPRINT1(ha, "ecore_ptt_acquire failed\n"); return (rval); } rval = ecore_dbg_grc_get_dump_buf_size(p_hwfn, p_ptt, num_dwords); if (rval == DBG_STATUS_OK) rval = 0; else { QL_DPRINT1(ha, "ecore_dbg_grc_get_dump_buf_size failed" "[0x%x]\n", rval); } ecore_ptt_release(p_hwfn, p_ptt); return (rval); } static int qlnx_idle_chk_size(qlnx_host_t *ha, uint32_t *num_dwords, int hwfn_index) { int rval = -1; struct ecore_hwfn *p_hwfn; struct ecore_ptt *p_ptt; ecore_dbg_set_app_ver(ecore_dbg_get_fw_func_ver()); p_hwfn = &ha->cdev.hwfns[hwfn_index]; p_ptt = ecore_ptt_acquire(p_hwfn); if (!p_ptt) { QL_DPRINT1(ha, "ecore_ptt_acquire failed\n"); return (rval); } rval = ecore_dbg_idle_chk_get_dump_buf_size(p_hwfn, p_ptt, num_dwords); if (rval == DBG_STATUS_OK) rval = 0; else { QL_DPRINT1(ha, "ecore_dbg_idle_chk_get_dump_buf_size failed" " [0x%x]\n", rval); } ecore_ptt_release(p_hwfn, p_ptt); return (rval); } static void qlnx_sample_storm_stats(qlnx_host_t *ha) { int i, index; struct ecore_dev *cdev; qlnx_storm_stats_t *s_stats; uint32_t reg; struct ecore_ptt *p_ptt; struct ecore_hwfn *hwfn; if (ha->storm_stats_index >= QLNX_STORM_STATS_SAMPLES_PER_HWFN) { ha->storm_stats_gather = 0; return; } cdev = &ha->cdev; for_each_hwfn(cdev, i) { hwfn = &cdev->hwfns[i]; p_ptt = ecore_ptt_acquire(hwfn); if (!p_ptt) return; index = ha->storm_stats_index + (i * QLNX_STORM_STATS_SAMPLES_PER_HWFN); s_stats = &ha->storm_stats[index]; /* XSTORM */ reg = XSEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_ACTIVE_CYCLES_BB_K2; s_stats->xstorm_active_cycles = ecore_rd(hwfn, p_ptt, reg); reg = XSEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_STALL_CYCLES_BB_K2; s_stats->xstorm_stall_cycles = ecore_rd(hwfn, p_ptt, reg); reg = XSEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_SLEEPING_CYCLES_BB_K2; s_stats->xstorm_sleeping_cycles = ecore_rd(hwfn, p_ptt, reg); reg = XSEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_INACTIVE_CYCLES_BB_K2; s_stats->xstorm_inactive_cycles = ecore_rd(hwfn, p_ptt, reg); /* YSTORM */ reg = YSEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_ACTIVE_CYCLES_BB_K2; s_stats->ystorm_active_cycles = ecore_rd(hwfn, p_ptt, reg); reg = YSEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_STALL_CYCLES_BB_K2; s_stats->ystorm_stall_cycles = ecore_rd(hwfn, p_ptt, reg); reg = YSEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_SLEEPING_CYCLES_BB_K2; s_stats->ystorm_sleeping_cycles = ecore_rd(hwfn, p_ptt, reg); reg = YSEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_INACTIVE_CYCLES_BB_K2; s_stats->ystorm_inactive_cycles = ecore_rd(hwfn, p_ptt, reg); /* PSTORM */ reg = PSEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_ACTIVE_CYCLES_BB_K2; s_stats->pstorm_active_cycles = ecore_rd(hwfn, p_ptt, reg); reg = PSEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_STALL_CYCLES_BB_K2; s_stats->pstorm_stall_cycles = ecore_rd(hwfn, p_ptt, reg); reg = PSEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_SLEEPING_CYCLES_BB_K2; s_stats->pstorm_sleeping_cycles = ecore_rd(hwfn, p_ptt, reg); reg = PSEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_INACTIVE_CYCLES_BB_K2; s_stats->pstorm_inactive_cycles = ecore_rd(hwfn, p_ptt, reg); /* TSTORM */ reg = TSEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_ACTIVE_CYCLES_BB_K2; s_stats->tstorm_active_cycles = ecore_rd(hwfn, p_ptt, reg); reg = TSEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_STALL_CYCLES_BB_K2; s_stats->tstorm_stall_cycles = ecore_rd(hwfn, p_ptt, reg); reg = TSEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_SLEEPING_CYCLES_BB_K2; s_stats->tstorm_sleeping_cycles = ecore_rd(hwfn, p_ptt, reg); reg = TSEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_INACTIVE_CYCLES_BB_K2; s_stats->tstorm_inactive_cycles = ecore_rd(hwfn, p_ptt, reg); /* MSTORM */ reg = MSEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_ACTIVE_CYCLES_BB_K2; s_stats->mstorm_active_cycles = ecore_rd(hwfn, p_ptt, reg); reg = MSEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_STALL_CYCLES_BB_K2; s_stats->mstorm_stall_cycles = ecore_rd(hwfn, p_ptt, reg); reg = MSEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_SLEEPING_CYCLES_BB_K2; s_stats->mstorm_sleeping_cycles = ecore_rd(hwfn, p_ptt, reg); reg = MSEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_INACTIVE_CYCLES_BB_K2; s_stats->mstorm_inactive_cycles = ecore_rd(hwfn, p_ptt, reg); /* USTORM */ reg = USEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_ACTIVE_CYCLES_BB_K2; s_stats->ustorm_active_cycles = ecore_rd(hwfn, p_ptt, reg); reg = USEM_REG_FAST_MEMORY + SEM_FAST_REG_STORM_STALL_CYCLES_BB_K2; s_stats->ustorm_stall_cycles = ecore_rd(hwfn, p_ptt, reg); reg = USEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_SLEEPING_CYCLES_BB_K2; s_stats->ustorm_sleeping_cycles = ecore_rd(hwfn, p_ptt, reg); reg = USEM_REG_FAST_MEMORY + SEM_FAST_REG_IDLE_INACTIVE_CYCLES_BB_K2; s_stats->ustorm_inactive_cycles = ecore_rd(hwfn, p_ptt, reg); ecore_ptt_release(hwfn, p_ptt); } ha->storm_stats_index++; return; } /* * Name: qlnx_dump_buf8 * Function: dumps a buffer as bytes */ static void qlnx_dump_buf8(qlnx_host_t *ha, const char *msg, void *dbuf, uint32_t len) { device_t dev; uint32_t i = 0; uint8_t *buf; dev = ha->pci_dev; buf = dbuf; device_printf(dev, "%s: %s 0x%x dump start\n", __func__, msg, len); while (len >= 16) { device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x %02x %02x %02x" " %02x %02x %02x %02x %02x %02x %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], buf[8], buf[9], buf[10], buf[11], buf[12], buf[13], buf[14], buf[15]); i += 16; len -= 16; buf += 16; } switch (len) { case 1: device_printf(dev,"0x%08x: %02x\n", i, buf[0]); break; case 2: device_printf(dev,"0x%08x: %02x %02x\n", i, buf[0], buf[1]); break; case 3: device_printf(dev,"0x%08x: %02x %02x %02x\n", i, buf[0], buf[1], buf[2]); break; case 4: device_printf(dev,"0x%08x: %02x %02x %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3]); break; case 5: device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4]); break; case 6: device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]); break; case 7: device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6]); break; case 8: device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x %02x %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]); break; case 9: device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x %02x %02x %02x" " %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], buf[8]); break; case 10: device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x %02x %02x %02x" " %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], buf[8], buf[9]); break; case 11: device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x %02x %02x %02x" " %02x %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], buf[8], buf[9], buf[10]); break; case 12: device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x %02x %02x %02x" " %02x %02x %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], buf[8], buf[9], buf[10], buf[11]); break; case 13: device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x %02x %02x %02x" " %02x %02x %02x %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], buf[8], buf[9], buf[10], buf[11], buf[12]); break; case 14: device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x %02x %02x %02x" " %02x %02x %02x %02x %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], buf[8], buf[9], buf[10], buf[11], buf[12], buf[13]); break; case 15: device_printf(dev,"0x%08x:" " %02x %02x %02x %02x %02x %02x %02x %02x" " %02x %02x %02x %02x %02x %02x %02x\n", i, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7], buf[8], buf[9], buf[10], buf[11], buf[12], buf[13], buf[14]); break; default: break; } device_printf(dev, "%s: %s dump end\n", __func__, msg); return; } Index: stable/11/sys/dev/sbni/if_sbni.c =================================================================== --- stable/11/sys/dev/sbni/if_sbni.c (revision 332287) +++ stable/11/sys/dev/sbni/if_sbni.c (revision 332288) @@ -1,1277 +1,1277 @@ /*- * Copyright (c) 1997-2001 Granch, Ltd. All rights reserved. * Author: Denis I.Timofeev * * Redistributon and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * Device driver for Granch SBNI12 leased line adapters * * Revision 2.0.0 1997/08/06 * Initial revision by Alexey Zverev * * Revision 2.0.1 1997/08/11 * Additional internal statistics support (tx statistics) * * Revision 2.0.2 1997/11/05 * if_bpf bug has been fixed * * Revision 2.0.3 1998/12/20 * Memory leakage has been eliminated in * the sbni_st and sbni_timeout routines. * * Revision 3.0 2000/08/10 by Yaroslav Polyakov * Support for PCI cards. 4.1 modification. * * Revision 3.1 2000/09/12 * Removed extra #defines around bpf functions * * Revision 4.0 2000/11/23 by Denis Timofeev * Completely redesigned the buffer management * * Revision 4.1 2001/01/21 * Support for PCI Dual cards and new SBNI12D-10, -11 Dual/ISA cards * * Written with reference to NE2000 driver developed by David Greenman. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void sbni_init(void *); static void sbni_init_locked(struct sbni_softc *); static void sbni_start(struct ifnet *); static void sbni_start_locked(struct ifnet *); static int sbni_ioctl(struct ifnet *, u_long, caddr_t); static void sbni_stop(struct sbni_softc *); static void handle_channel(struct sbni_softc *); static void card_start(struct sbni_softc *); static int recv_frame(struct sbni_softc *); static void send_frame(struct sbni_softc *); static int upload_data(struct sbni_softc *, u_int, u_int, u_int, u_int32_t); static int skip_tail(struct sbni_softc *, u_int, u_int32_t); static void interpret_ack(struct sbni_softc *, u_int); static void download_data(struct sbni_softc *, u_int32_t *); static void prepare_to_send(struct sbni_softc *); static void drop_xmit_queue(struct sbni_softc *); static int get_rx_buf(struct sbni_softc *); static void indicate_pkt(struct sbni_softc *); static void change_level(struct sbni_softc *); static int check_fhdr(struct sbni_softc *, u_int *, u_int *, u_int *, u_int *, u_int32_t *); static int append_frame_to_pkt(struct sbni_softc *, u_int, u_int32_t); static void timeout_change_level(struct sbni_softc *); static void send_frame_header(struct sbni_softc *, u_int32_t *); static void set_initial_values(struct sbni_softc *, struct sbni_flags); static u_int32_t calc_crc32(u_int32_t, caddr_t, u_int); static timeout_t sbni_timeout; static __inline u_char sbni_inb(struct sbni_softc *, enum sbni_reg); static __inline void sbni_outb(struct sbni_softc *, enum sbni_reg, u_char); static __inline void sbni_insb(struct sbni_softc *, u_char *, u_int); static __inline void sbni_outsb(struct sbni_softc *, u_char *, u_int); static u_int32_t crc32tab[]; #ifdef SBNI_DUAL_COMPOUND static struct mtx headlist_lock; MTX_SYSINIT(headlist_lock, &headlist_lock, "sbni headlist", MTX_DEF); static struct sbni_softc *sbni_headlist; #endif /* -------------------------------------------------------------------------- */ static __inline u_char sbni_inb(struct sbni_softc *sc, enum sbni_reg reg) { return bus_space_read_1( rman_get_bustag(sc->io_res), rman_get_bushandle(sc->io_res), sc->io_off + reg); } static __inline void sbni_outb(struct sbni_softc *sc, enum sbni_reg reg, u_char value) { bus_space_write_1( rman_get_bustag(sc->io_res), rman_get_bushandle(sc->io_res), sc->io_off + reg, value); } static __inline void sbni_insb(struct sbni_softc *sc, u_char *to, u_int len) { bus_space_read_multi_1( rman_get_bustag(sc->io_res), rman_get_bushandle(sc->io_res), sc->io_off + DAT, to, len); } static __inline void sbni_outsb(struct sbni_softc *sc, u_char *from, u_int len) { bus_space_write_multi_1( rman_get_bustag(sc->io_res), rman_get_bushandle(sc->io_res), sc->io_off + DAT, from, len); } /* Valid combinations in CSR0 (for probing): VALID_DECODER 0000,0011,1011,1010 ; 0 ; - TR_REQ ; 1 ; + TR_RDY ; 2 ; - TR_RDY TR_REQ ; 3 ; + BU_EMP ; 4 ; + BU_EMP TR_REQ ; 5 ; + BU_EMP TR_RDY ; 6 ; - BU_EMP TR_RDY TR_REQ ; 7 ; + RC_RDY ; 8 ; + RC_RDY TR_REQ ; 9 ; + RC_RDY TR_RDY ; 10 ; - RC_RDY TR_RDY TR_REQ ; 11 ; - RC_RDY BU_EMP ; 12 ; - RC_RDY BU_EMP TR_REQ ; 13 ; - RC_RDY BU_EMP TR_RDY ; 14 ; - RC_RDY BU_EMP TR_RDY TR_REQ ; 15 ; - */ #define VALID_DECODER (2 + 8 + 0x10 + 0x20 + 0x80 + 0x100 + 0x200) int sbni_probe(struct sbni_softc *sc) { u_char csr0; csr0 = sbni_inb(sc, CSR0); if (csr0 != 0xff && csr0 != 0x00) { csr0 &= ~EN_INT; if (csr0 & BU_EMP) csr0 |= EN_INT; if (VALID_DECODER & (1 << (csr0 >> 4))) return (0); } return (ENXIO); } /* * Install interface into kernel networking data structures */ int sbni_attach(struct sbni_softc *sc, int unit, struct sbni_flags flags) { struct ifnet *ifp; u_char csr0; ifp = sc->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) return (ENOMEM); sbni_outb(sc, CSR0, 0); set_initial_values(sc, flags); /* Initialize ifnet structure */ ifp->if_softc = sc; if_initname(ifp, "sbni", unit); ifp->if_init = sbni_init; ifp->if_start = sbni_start; ifp->if_ioctl = sbni_ioctl; IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); /* report real baud rate */ csr0 = sbni_inb(sc, CSR0); ifp->if_baudrate = (csr0 & 0x01 ? 500000 : 2000000) / (1 << flags.rate); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; mtx_init(&sc->lock, ifp->if_xname, MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->wch, &sc->lock, 0); ether_ifattach(ifp, sc->enaddr); /* device attach does transition from UNCONFIGURED to IDLE state */ if_printf(ifp, "speed %ju, rxl ", (uintmax_t)ifp->if_baudrate); if (sc->delta_rxl) printf("auto\n"); else printf("%d (fixed)\n", sc->cur_rxl_index); return (0); } void sbni_detach(struct sbni_softc *sc) { SBNI_LOCK(sc); sbni_stop(sc); SBNI_UNLOCK(sc); callout_drain(&sc->wch); ether_ifdetach(sc->ifp); if (sc->irq_handle) bus_teardown_intr(sc->dev, sc->irq_res, sc->irq_handle); mtx_destroy(&sc->lock); if_free(sc->ifp); } void sbni_release_resources(struct sbni_softc *sc) { if (sc->irq_res) bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid, sc->irq_res); if (sc->io_res && sc->io_off == 0) bus_release_resource(sc->dev, SYS_RES_IOPORT, sc->io_rid, sc->io_res); } /* -------------------------------------------------------------------------- */ static void sbni_init(void *xsc) { struct sbni_softc *sc; sc = (struct sbni_softc *)xsc; SBNI_LOCK(sc); sbni_init_locked(sc); SBNI_UNLOCK(sc); } static void sbni_init_locked(struct sbni_softc *sc) { struct ifnet *ifp; ifp = sc->ifp; /* * kludge to avoid multiple initialization when more than once * protocols configured */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) return; card_start(sc); callout_reset(&sc->wch, hz/SBNI_HZ, sbni_timeout, sc); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; /* attempt to start output */ sbni_start_locked(ifp); } static void sbni_start(struct ifnet *ifp) { struct sbni_softc *sc = ifp->if_softc; SBNI_LOCK(sc); sbni_start_locked(ifp); SBNI_UNLOCK(sc); } static void sbni_start_locked(struct ifnet *ifp) { struct sbni_softc *sc = ifp->if_softc; if (sc->tx_frameno == 0) prepare_to_send(sc); } static void sbni_stop(struct sbni_softc *sc) { sbni_outb(sc, CSR0, 0); drop_xmit_queue(sc); if (sc->rx_buf_p) { m_freem(sc->rx_buf_p); sc->rx_buf_p = NULL; } callout_stop(&sc->wch); sc->ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); } /* -------------------------------------------------------------------------- */ /* interrupt handler */ /* * SBNI12D-10, -11/ISA boards within "common interrupt" mode could not * be looked as two independent single-channel devices. Every channel seems * as Ethernet interface but interrupt handler must be common. Really, first * channel ("master") driver only registers the handler. In it's struct softc * it has got pointer to "slave" channel's struct softc and handles that's * interrupts too. * softc of successfully attached ISA SBNI boards is linked to list. * While next board driver is initialized, it scans this list. If one * has found softc with same irq and ioaddr different by 4 then it assumes * this board to be "master". */ void sbni_intr(void *arg) { struct sbni_softc *sc; int repeat; sc = (struct sbni_softc *)arg; do { repeat = 0; SBNI_LOCK(sc); if (sbni_inb(sc, CSR0) & (RC_RDY | TR_RDY)) { handle_channel(sc); repeat = 1; } SBNI_UNLOCK(sc); if (sc->slave_sc) { /* second channel present */ SBNI_LOCK(sc->slave_sc); if (sbni_inb(sc->slave_sc, CSR0) & (RC_RDY | TR_RDY)) { handle_channel(sc->slave_sc); repeat = 1; } SBNI_UNLOCK(sc->slave_sc); } } while (repeat); } static void handle_channel(struct sbni_softc *sc) { int req_ans; u_char csr0; sbni_outb(sc, CSR0, (sbni_inb(sc, CSR0) & ~EN_INT) | TR_REQ); sc->timer_ticks = CHANGE_LEVEL_START_TICKS; for (;;) { csr0 = sbni_inb(sc, CSR0); if ((csr0 & (RC_RDY | TR_RDY)) == 0) break; req_ans = !(sc->state & FL_PREV_OK); if (csr0 & RC_RDY) req_ans = recv_frame(sc); /* * TR_RDY always equals 1 here because we have owned the marker, * and we set TR_REQ when disabled interrupts */ csr0 = sbni_inb(sc, CSR0); if ((csr0 & TR_RDY) == 0 || (csr0 & RC_RDY) != 0) if_printf(sc->ifp, "internal error!\n"); /* if state & FL_NEED_RESEND != 0 then tx_frameno != 0 */ if (req_ans || sc->tx_frameno != 0) send_frame(sc); else { /* send the marker without any data */ sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) & ~TR_REQ); } } sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) | EN_INT); } /* * Routine returns 1 if it need to acknoweledge received frame. * Empty frame received without errors won't be acknoweledged. */ static int recv_frame(struct sbni_softc *sc) { u_int32_t crc; u_int framelen, frameno, ack; u_int is_first, frame_ok; crc = CRC32_INITIAL; if (check_fhdr(sc, &framelen, &frameno, &ack, &is_first, &crc)) { frame_ok = framelen > 4 ? upload_data(sc, framelen, frameno, is_first, crc) : skip_tail(sc, framelen, crc); if (frame_ok) interpret_ack(sc, ack); } else { framelen = 0; frame_ok = 0; } sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) ^ CT_ZER); if (frame_ok) { sc->state |= FL_PREV_OK; if (framelen > 4) sc->in_stats.all_rx_number++; } else { sc->state &= ~FL_PREV_OK; change_level(sc); sc->in_stats.all_rx_number++; sc->in_stats.bad_rx_number++; } return (!frame_ok || framelen > 4); } static void send_frame(struct sbni_softc *sc) { u_int32_t crc; u_char csr0; crc = CRC32_INITIAL; if (sc->state & FL_NEED_RESEND) { /* if frame was sended but not ACK'ed - resend it */ if (sc->trans_errors) { sc->trans_errors--; if (sc->framelen != 0) sc->in_stats.resend_tx_number++; } else { /* cannot xmit with many attempts */ drop_xmit_queue(sc); goto do_send; } } else sc->trans_errors = TR_ERROR_COUNT; send_frame_header(sc, &crc); sc->state |= FL_NEED_RESEND; /* * FL_NEED_RESEND will be cleared after ACK, but if empty * frame sended then in prepare_to_send next frame */ if (sc->framelen) { download_data(sc, &crc); sc->in_stats.all_tx_number++; sc->state |= FL_WAIT_ACK; } sbni_outsb(sc, (u_char *)&crc, sizeof crc); do_send: csr0 = sbni_inb(sc, CSR0); sbni_outb(sc, CSR0, csr0 & ~TR_REQ); if (sc->tx_frameno) { /* next frame exists - request to send */ sbni_outb(sc, CSR0, csr0 | TR_REQ); } } static void download_data(struct sbni_softc *sc, u_int32_t *crc_p) { struct mbuf *m; caddr_t data_p; u_int data_len, pos, slice; data_p = NULL; /* initialized to avoid warn */ pos = 0; for (m = sc->tx_buf_p; m != NULL && pos < sc->pktlen; m = m->m_next) { if (pos + m->m_len > sc->outpos) { data_len = m->m_len - (sc->outpos - pos); data_p = mtod(m, caddr_t) + (sc->outpos - pos); goto do_copy; } else pos += m->m_len; } data_len = 0; do_copy: pos = 0; do { if (data_len) { slice = min(data_len, sc->framelen - pos); sbni_outsb(sc, data_p, slice); *crc_p = calc_crc32(*crc_p, data_p, slice); pos += slice; if (data_len -= slice) data_p += slice; else { do { m = m->m_next; } while (m != NULL && m->m_len == 0); if (m) { data_len = m->m_len; data_p = mtod(m, caddr_t); } } } else { /* frame too short - zero padding */ pos = sc->framelen - pos; while (pos--) { sbni_outb(sc, DAT, 0); *crc_p = CRC32(0, *crc_p); } return; } } while (pos < sc->framelen); } static int upload_data(struct sbni_softc *sc, u_int framelen, u_int frameno, u_int is_first, u_int32_t crc) { int frame_ok; if (is_first) { sc->wait_frameno = frameno; sc->inppos = 0; } if (sc->wait_frameno == frameno) { if (sc->inppos + framelen <= ETHER_MAX_LEN) { frame_ok = append_frame_to_pkt(sc, framelen, crc); /* * if CRC is right but framelen incorrect then transmitter * error was occurred... drop entire packet */ } else if ((frame_ok = skip_tail(sc, framelen, crc)) != 0) { sc->wait_frameno = 0; sc->inppos = 0; if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1); /* now skip all frames until is_first != 0 */ } } else frame_ok = skip_tail(sc, framelen, crc); if (is_first && !frame_ok) { /* * Frame has been violated, but we have stored * is_first already... Drop entire packet. */ sc->wait_frameno = 0; if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1); } return (frame_ok); } static __inline void send_complete(struct sbni_softc *); static __inline void send_complete(struct sbni_softc *sc) { m_freem(sc->tx_buf_p); sc->tx_buf_p = NULL; if_inc_counter(sc->ifp, IFCOUNTER_OPACKETS, 1); } static void interpret_ack(struct sbni_softc *sc, u_int ack) { if (ack == FRAME_SENT_OK) { sc->state &= ~FL_NEED_RESEND; if (sc->state & FL_WAIT_ACK) { sc->outpos += sc->framelen; if (--sc->tx_frameno) { sc->framelen = min( sc->maxframe, sc->pktlen - sc->outpos); } else { send_complete(sc); prepare_to_send(sc); } } } sc->state &= ~FL_WAIT_ACK; } /* * Glue received frame with previous fragments of packet. * Indicate packet when last frame would be accepted. */ static int append_frame_to_pkt(struct sbni_softc *sc, u_int framelen, u_int32_t crc) { caddr_t p; if (sc->inppos + framelen > ETHER_MAX_LEN) return (0); if (!sc->rx_buf_p && !get_rx_buf(sc)) return (0); p = sc->rx_buf_p->m_data + sc->inppos; sbni_insb(sc, p, framelen); if (calc_crc32(crc, p, framelen) != CRC32_REMAINDER) return (0); sc->inppos += framelen - 4; if (--sc->wait_frameno == 0) { /* last frame received */ indicate_pkt(sc); if_inc_counter(sc->ifp, IFCOUNTER_IPACKETS, 1); } return (1); } /* * Prepare to start output on adapter. Current priority must be set to splimp * before this routine is called. * Transmitter will be actually activated when marker has been accepted. */ static void prepare_to_send(struct sbni_softc *sc) { struct mbuf *m; u_int len; /* sc->tx_buf_p == NULL here! */ if (sc->tx_buf_p) printf("sbni: memory leak!\n"); sc->outpos = 0; sc->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); for (;;) { IF_DEQUEUE(&sc->ifp->if_snd, sc->tx_buf_p); if (!sc->tx_buf_p) { /* nothing to transmit... */ sc->pktlen = 0; sc->tx_frameno = 0; sc->framelen = 0; sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; return; } for (len = 0, m = sc->tx_buf_p; m; m = m->m_next) len += m->m_len; if (len != 0) break; m_freem(sc->tx_buf_p); } if (len < SBNI_MIN_LEN) len = SBNI_MIN_LEN; sc->pktlen = len; sc->tx_frameno = howmany(len, sc->maxframe); sc->framelen = min(len, sc->maxframe); sbni_outb(sc, CSR0, sbni_inb(sc, CSR0) | TR_REQ); sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; BPF_MTAP(sc->ifp, sc->tx_buf_p); } static void drop_xmit_queue(struct sbni_softc *sc) { struct mbuf *m; if (sc->tx_buf_p) { m_freem(sc->tx_buf_p); sc->tx_buf_p = NULL; if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1); } for (;;) { IF_DEQUEUE(&sc->ifp->if_snd, m); if (m == NULL) break; m_freem(m); if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1); } sc->tx_frameno = 0; sc->framelen = 0; sc->outpos = 0; sc->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } static void send_frame_header(struct sbni_softc *sc, u_int32_t *crc_p) { u_int32_t crc; u_int len_field; u_char value; crc = *crc_p; len_field = sc->framelen + 6; /* CRC + frameno + reserved */ if (sc->state & FL_NEED_RESEND) len_field |= FRAME_RETRY; /* non-first attempt... */ if (sc->outpos == 0) len_field |= FRAME_FIRST; len_field |= (sc->state & FL_PREV_OK) ? FRAME_SENT_OK : FRAME_SENT_BAD; sbni_outb(sc, DAT, SBNI_SIG); value = (u_char)len_field; sbni_outb(sc, DAT, value); crc = CRC32(value, crc); value = (u_char)(len_field >> 8); sbni_outb(sc, DAT, value); crc = CRC32(value, crc); sbni_outb(sc, DAT, sc->tx_frameno); crc = CRC32(sc->tx_frameno, crc); sbni_outb(sc, DAT, 0); crc = CRC32(0, crc); *crc_p = crc; } /* * if frame tail not needed (incorrect number or received twice), * it won't store, but CRC will be calculated */ static int skip_tail(struct sbni_softc *sc, u_int tail_len, u_int32_t crc) { while (tail_len--) crc = CRC32(sbni_inb(sc, DAT), crc); return (crc == CRC32_REMAINDER); } static int check_fhdr(struct sbni_softc *sc, u_int *framelen, u_int *frameno, u_int *ack, u_int *is_first, u_int32_t *crc_p) { u_int32_t crc; u_char value; crc = *crc_p; if (sbni_inb(sc, DAT) != SBNI_SIG) return (0); value = sbni_inb(sc, DAT); *framelen = (u_int)value; crc = CRC32(value, crc); value = sbni_inb(sc, DAT); *framelen |= ((u_int)value) << 8; crc = CRC32(value, crc); *ack = *framelen & FRAME_ACK_MASK; *is_first = (*framelen & FRAME_FIRST) != 0; if ((*framelen &= FRAME_LEN_MASK) < 6 || *framelen > SBNI_MAX_FRAME - 3) return (0); value = sbni_inb(sc, DAT); *frameno = (u_int)value; crc = CRC32(value, crc); crc = CRC32(sbni_inb(sc, DAT), crc); /* reserved byte */ *framelen -= 2; *crc_p = crc; return (1); } static int get_rx_buf(struct sbni_softc *sc) { struct mbuf *m; MGETHDR(m, M_NOWAIT, MT_DATA); if (m == NULL) { if_printf(sc->ifp, "cannot allocate header mbuf\n"); return (0); } /* * We always put the received packet in a single buffer - * either with just an mbuf header or in a cluster attached * to the header. The +2 is to compensate for the alignment * fixup below. */ if (ETHER_MAX_LEN + 2 > MHLEN) { /* Attach an mbuf cluster */ if (!(MCLGET(m, M_NOWAIT))) { m_freem(m); return (0); } } m->m_pkthdr.len = m->m_len = ETHER_MAX_LEN + 2; /* * The +2 is to longword align the start of the real packet. * (sizeof ether_header == 14) * This is important for NFS. */ m_adj(m, 2); sc->rx_buf_p = m; return (1); } static void indicate_pkt(struct sbni_softc *sc) { struct ifnet *ifp = sc->ifp; struct mbuf *m; m = sc->rx_buf_p; m->m_pkthdr.rcvif = ifp; m->m_pkthdr.len = m->m_len = sc->inppos; sc->rx_buf_p = NULL; SBNI_UNLOCK(sc); (*ifp->if_input)(ifp, m); SBNI_LOCK(sc); } /* -------------------------------------------------------------------------- */ /* * Routine checks periodically wire activity and regenerates marker if * connect was inactive for a long time. */ static void sbni_timeout(void *xsc) { struct sbni_softc *sc; u_char csr0; sc = (struct sbni_softc *)xsc; SBNI_ASSERT_LOCKED(sc); csr0 = sbni_inb(sc, CSR0); if (csr0 & RC_CHK) { if (sc->timer_ticks) { if (csr0 & (RC_RDY | BU_EMP)) /* receiving not active */ sc->timer_ticks--; } else { sc->in_stats.timeout_number++; if (sc->delta_rxl) timeout_change_level(sc); sbni_outb(sc, CSR1, *(u_char *)&sc->csr1 | PR_RES); csr0 = sbni_inb(sc, CSR0); } } sbni_outb(sc, CSR0, csr0 | RC_CHK); callout_reset(&sc->wch, hz/SBNI_HZ, sbni_timeout, sc); } /* -------------------------------------------------------------------------- */ static void card_start(struct sbni_softc *sc) { sc->timer_ticks = CHANGE_LEVEL_START_TICKS; sc->state &= ~(FL_WAIT_ACK | FL_NEED_RESEND); sc->state |= FL_PREV_OK; sc->inppos = 0; sc->wait_frameno = 0; sbni_outb(sc, CSR1, *(u_char *)&sc->csr1 | PR_RES); sbni_outb(sc, CSR0, EN_INT); } /* -------------------------------------------------------------------------- */ static u_char rxl_tab[] = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x08, 0x0a, 0x0c, 0x0f, 0x16, 0x18, 0x1a, 0x1c, 0x1f }; #define SIZE_OF_TIMEOUT_RXL_TAB 4 static u_char timeout_rxl_tab[] = { 0x03, 0x05, 0x08, 0x0b }; static void set_initial_values(struct sbni_softc *sc, struct sbni_flags flags) { if (flags.fixed_rxl) { sc->delta_rxl = 0; /* disable receive level autodetection */ sc->cur_rxl_index = flags.rxl; } else { sc->delta_rxl = DEF_RXL_DELTA; sc->cur_rxl_index = DEF_RXL; } sc->csr1.rate = flags.fixed_rate ? flags.rate : DEFAULT_RATE; sc->csr1.rxl = rxl_tab[sc->cur_rxl_index]; sc->maxframe = DEFAULT_FRAME_LEN; /* * generate Ethernet address (0x00ff01xxxxxx) */ *(u_int16_t *) sc->enaddr = htons(0x00ff); if (flags.mac_addr) { *(u_int32_t *) (sc->enaddr + 2) = htonl(flags.mac_addr | 0x01000000); } else { *(u_char *) (sc->enaddr + 2) = 0x01; read_random(sc->enaddr + 3, 3); } } #ifdef SBNI_DUAL_COMPOUND void sbni_add(struct sbni_softc *sc) { mtx_lock(&headlist_lock); sc->link = sbni_headlist; sbni_headlist = sc; mtx_unlock(&headlist_lock); } struct sbni_softc * connect_to_master(struct sbni_softc *sc) { struct sbni_softc *p, *p_prev; mtx_lock(&headlist_lock); for (p = sbni_headlist, p_prev = NULL; p; p_prev = p, p = p->link) { if (rman_get_start(p->io_res) == rman_get_start(sc->io_res) + 4 || rman_get_start(p->io_res) == rman_get_start(sc->io_res) - 4) { p->slave_sc = sc; if (p_prev) p_prev->link = p->link; else sbni_headlist = p->link; mtx_unlock(&headlist_lock); return p; } } mtx_unlock(&headlist_lock); return (NULL); } #endif /* SBNI_DUAL_COMPOUND */ /* Receive level auto-selection */ static void change_level(struct sbni_softc *sc) { if (sc->delta_rxl == 0) /* do not auto-negotiate RxL */ return; if (sc->cur_rxl_index == 0) sc->delta_rxl = 1; else if (sc->cur_rxl_index == 15) sc->delta_rxl = -1; else if (sc->cur_rxl_rcvd < sc->prev_rxl_rcvd) sc->delta_rxl = -sc->delta_rxl; sc->csr1.rxl = rxl_tab[sc->cur_rxl_index += sc->delta_rxl]; sbni_inb(sc, CSR0); /* it needed for PCI cards */ sbni_outb(sc, CSR1, *(u_char *)&sc->csr1); sc->prev_rxl_rcvd = sc->cur_rxl_rcvd; sc->cur_rxl_rcvd = 0; } static void timeout_change_level(struct sbni_softc *sc) { sc->cur_rxl_index = timeout_rxl_tab[sc->timeout_rxl]; if (++sc->timeout_rxl >= 4) sc->timeout_rxl = 0; sc->csr1.rxl = rxl_tab[sc->cur_rxl_index]; sbni_inb(sc, CSR0); sbni_outb(sc, CSR1, *(u_char *)&sc->csr1); sc->prev_rxl_rcvd = sc->cur_rxl_rcvd; sc->cur_rxl_rcvd = 0; } /* -------------------------------------------------------------------------- */ /* * Process an ioctl request. This code needs some work - it looks * pretty ugly. */ static int sbni_ioctl(struct ifnet *ifp, u_long command, caddr_t data) { struct sbni_softc *sc; struct ifreq *ifr; struct thread *td; struct sbni_in_stats *in_stats; struct sbni_flags flags; int error; sc = ifp->if_softc; ifr = (struct ifreq *)data; td = curthread; error = 0; switch (command) { case SIOCSIFFLAGS: /* * If the interface is marked up and stopped, then start it. * If it is marked down and running, then stop it. */ SBNI_LOCK(sc); if (ifp->if_flags & IFF_UP) { if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) sbni_init_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { sbni_stop(sc); } } SBNI_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: /* * Multicast list has changed; set the hardware filter * accordingly. */ error = 0; /* if (ifr == NULL) error = EAFNOSUPPORT; */ break; /* * SBNI specific ioctl */ case SIOCGHWFLAGS: /* get flags */ SBNI_LOCK(sc); bcopy((caddr_t)IF_LLADDR(sc->ifp)+3, (caddr_t) &flags, 3); flags.rxl = sc->cur_rxl_index; flags.rate = sc->csr1.rate; flags.fixed_rxl = (sc->delta_rxl == 0); flags.fixed_rate = 1; SBNI_UNLOCK(sc); bcopy(&flags, &ifr->ifr_ifru, sizeof(flags)); break; case SIOCGINSTATS: in_stats = malloc(sizeof(struct sbni_in_stats), M_DEVBUF, M_WAITOK); SBNI_LOCK(sc); bcopy(&sc->in_stats, in_stats, sizeof(struct sbni_in_stats)); SBNI_UNLOCK(sc); - error = copyout(ifr->ifr_data, in_stats, + error = copyout(ifr_data_get_ptr(ifr), in_stats, sizeof(struct sbni_in_stats)); free(in_stats, M_DEVBUF); break; case SIOCSHWFLAGS: /* set flags */ /* root only */ error = priv_check(td, PRIV_DRIVER); if (error) break; bcopy(&ifr->ifr_ifru, &flags, sizeof(flags)); SBNI_LOCK(sc); if (flags.fixed_rxl) { sc->delta_rxl = 0; sc->cur_rxl_index = flags.rxl; } else { sc->delta_rxl = DEF_RXL_DELTA; sc->cur_rxl_index = DEF_RXL; } sc->csr1.rxl = rxl_tab[sc->cur_rxl_index]; sc->csr1.rate = flags.fixed_rate ? flags.rate : DEFAULT_RATE; if (flags.mac_addr) bcopy((caddr_t) &flags, (caddr_t) IF_LLADDR(sc->ifp)+3, 3); /* Don't be afraid... */ sbni_outb(sc, CSR1, *(char*)(&sc->csr1) | PR_RES); SBNI_UNLOCK(sc); break; case SIOCRINSTATS: SBNI_LOCK(sc); if (!(error = priv_check(td, PRIV_DRIVER))) /* root only */ bzero(&sc->in_stats, sizeof(struct sbni_in_stats)); SBNI_UNLOCK(sc); break; default: error = ether_ioctl(ifp, command, data); break; } return (error); } /* -------------------------------------------------------------------------- */ static u_int32_t calc_crc32(u_int32_t crc, caddr_t p, u_int len) { while (len--) crc = CRC32(*p++, crc); return (crc); } static u_int32_t crc32tab[] __aligned(8) = { 0xD202EF8D, 0xA505DF1B, 0x3C0C8EA1, 0x4B0BBE37, 0xD56F2B94, 0xA2681B02, 0x3B614AB8, 0x4C667A2E, 0xDCD967BF, 0xABDE5729, 0x32D70693, 0x45D03605, 0xDBB4A3A6, 0xACB39330, 0x35BAC28A, 0x42BDF21C, 0xCFB5FFE9, 0xB8B2CF7F, 0x21BB9EC5, 0x56BCAE53, 0xC8D83BF0, 0xBFDF0B66, 0x26D65ADC, 0x51D16A4A, 0xC16E77DB, 0xB669474D, 0x2F6016F7, 0x58672661, 0xC603B3C2, 0xB1048354, 0x280DD2EE, 0x5F0AE278, 0xE96CCF45, 0x9E6BFFD3, 0x0762AE69, 0x70659EFF, 0xEE010B5C, 0x99063BCA, 0x000F6A70, 0x77085AE6, 0xE7B74777, 0x90B077E1, 0x09B9265B, 0x7EBE16CD, 0xE0DA836E, 0x97DDB3F8, 0x0ED4E242, 0x79D3D2D4, 0xF4DBDF21, 0x83DCEFB7, 0x1AD5BE0D, 0x6DD28E9B, 0xF3B61B38, 0x84B12BAE, 0x1DB87A14, 0x6ABF4A82, 0xFA005713, 0x8D076785, 0x140E363F, 0x630906A9, 0xFD6D930A, 0x8A6AA39C, 0x1363F226, 0x6464C2B0, 0xA4DEAE1D, 0xD3D99E8B, 0x4AD0CF31, 0x3DD7FFA7, 0xA3B36A04, 0xD4B45A92, 0x4DBD0B28, 0x3ABA3BBE, 0xAA05262F, 0xDD0216B9, 0x440B4703, 0x330C7795, 0xAD68E236, 0xDA6FD2A0, 0x4366831A, 0x3461B38C, 0xB969BE79, 0xCE6E8EEF, 0x5767DF55, 0x2060EFC3, 0xBE047A60, 0xC9034AF6, 0x500A1B4C, 0x270D2BDA, 0xB7B2364B, 0xC0B506DD, 0x59BC5767, 0x2EBB67F1, 0xB0DFF252, 0xC7D8C2C4, 0x5ED1937E, 0x29D6A3E8, 0x9FB08ED5, 0xE8B7BE43, 0x71BEEFF9, 0x06B9DF6F, 0x98DD4ACC, 0xEFDA7A5A, 0x76D32BE0, 0x01D41B76, 0x916B06E7, 0xE66C3671, 0x7F6567CB, 0x0862575D, 0x9606C2FE, 0xE101F268, 0x7808A3D2, 0x0F0F9344, 0x82079EB1, 0xF500AE27, 0x6C09FF9D, 0x1B0ECF0B, 0x856A5AA8, 0xF26D6A3E, 0x6B643B84, 0x1C630B12, 0x8CDC1683, 0xFBDB2615, 0x62D277AF, 0x15D54739, 0x8BB1D29A, 0xFCB6E20C, 0x65BFB3B6, 0x12B88320, 0x3FBA6CAD, 0x48BD5C3B, 0xD1B40D81, 0xA6B33D17, 0x38D7A8B4, 0x4FD09822, 0xD6D9C998, 0xA1DEF90E, 0x3161E49F, 0x4666D409, 0xDF6F85B3, 0xA868B525, 0x360C2086, 0x410B1010, 0xD80241AA, 0xAF05713C, 0x220D7CC9, 0x550A4C5F, 0xCC031DE5, 0xBB042D73, 0x2560B8D0, 0x52678846, 0xCB6ED9FC, 0xBC69E96A, 0x2CD6F4FB, 0x5BD1C46D, 0xC2D895D7, 0xB5DFA541, 0x2BBB30E2, 0x5CBC0074, 0xC5B551CE, 0xB2B26158, 0x04D44C65, 0x73D37CF3, 0xEADA2D49, 0x9DDD1DDF, 0x03B9887C, 0x74BEB8EA, 0xEDB7E950, 0x9AB0D9C6, 0x0A0FC457, 0x7D08F4C1, 0xE401A57B, 0x930695ED, 0x0D62004E, 0x7A6530D8, 0xE36C6162, 0x946B51F4, 0x19635C01, 0x6E646C97, 0xF76D3D2D, 0x806A0DBB, 0x1E0E9818, 0x6909A88E, 0xF000F934, 0x8707C9A2, 0x17B8D433, 0x60BFE4A5, 0xF9B6B51F, 0x8EB18589, 0x10D5102A, 0x67D220BC, 0xFEDB7106, 0x89DC4190, 0x49662D3D, 0x3E611DAB, 0xA7684C11, 0xD06F7C87, 0x4E0BE924, 0x390CD9B2, 0xA0058808, 0xD702B89E, 0x47BDA50F, 0x30BA9599, 0xA9B3C423, 0xDEB4F4B5, 0x40D06116, 0x37D75180, 0xAEDE003A, 0xD9D930AC, 0x54D13D59, 0x23D60DCF, 0xBADF5C75, 0xCDD86CE3, 0x53BCF940, 0x24BBC9D6, 0xBDB2986C, 0xCAB5A8FA, 0x5A0AB56B, 0x2D0D85FD, 0xB404D447, 0xC303E4D1, 0x5D677172, 0x2A6041E4, 0xB369105E, 0xC46E20C8, 0x72080DF5, 0x050F3D63, 0x9C066CD9, 0xEB015C4F, 0x7565C9EC, 0x0262F97A, 0x9B6BA8C0, 0xEC6C9856, 0x7CD385C7, 0x0BD4B551, 0x92DDE4EB, 0xE5DAD47D, 0x7BBE41DE, 0x0CB97148, 0x95B020F2, 0xE2B71064, 0x6FBF1D91, 0x18B82D07, 0x81B17CBD, 0xF6B64C2B, 0x68D2D988, 0x1FD5E91E, 0x86DCB8A4, 0xF1DB8832, 0x616495A3, 0x1663A535, 0x8F6AF48F, 0xF86DC419, 0x660951BA, 0x110E612C, 0x88073096, 0xFF000000 }; Index: stable/11/sys/dev/sfxge/sfxge.c =================================================================== --- stable/11/sys/dev/sfxge/sfxge.c (revision 332287) +++ stable/11/sys/dev/sfxge/sfxge.c (revision 332288) @@ -1,1200 +1,1202 @@ /*- * Copyright (c) 2010-2016 Solarflare Communications Inc. * All rights reserved. * * This software was developed in part by Philip Paeps under contract for * Solarflare Communications, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * The views and conclusions contained in the software and documentation are * those of the authors and should not be interpreted as representing official * policies, either expressed or implied, of the FreeBSD Project. */ #include __FBSDID("$FreeBSD$"); #include "opt_rss.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef RSS #include #endif #include "common/efx.h" #include "sfxge.h" #include "sfxge_rx.h" #include "sfxge_ioc.h" #include "sfxge_version.h" #define SFXGE_CAP (IFCAP_VLAN_MTU | IFCAP_VLAN_HWCSUM | \ IFCAP_RXCSUM | IFCAP_TXCSUM | \ IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6 | \ IFCAP_TSO4 | IFCAP_TSO6 | \ IFCAP_JUMBO_MTU | \ IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWSTATS) #define SFXGE_CAP_ENABLE SFXGE_CAP #define SFXGE_CAP_FIXED (IFCAP_VLAN_MTU | \ IFCAP_JUMBO_MTU | IFCAP_LINKSTATE | IFCAP_HWSTATS) MALLOC_DEFINE(M_SFXGE, "sfxge", "Solarflare 10GigE driver"); SYSCTL_NODE(_hw, OID_AUTO, sfxge, CTLFLAG_RD, 0, "SFXGE driver parameters"); #define SFXGE_PARAM_RX_RING SFXGE_PARAM(rx_ring) static int sfxge_rx_ring_entries = SFXGE_NDESCS; TUNABLE_INT(SFXGE_PARAM_RX_RING, &sfxge_rx_ring_entries); SYSCTL_INT(_hw_sfxge, OID_AUTO, rx_ring, CTLFLAG_RDTUN, &sfxge_rx_ring_entries, 0, "Maximum number of descriptors in a receive ring"); #define SFXGE_PARAM_TX_RING SFXGE_PARAM(tx_ring) static int sfxge_tx_ring_entries = SFXGE_NDESCS; TUNABLE_INT(SFXGE_PARAM_TX_RING, &sfxge_tx_ring_entries); SYSCTL_INT(_hw_sfxge, OID_AUTO, tx_ring, CTLFLAG_RDTUN, &sfxge_tx_ring_entries, 0, "Maximum number of descriptors in a transmit ring"); #define SFXGE_PARAM_RESTART_ATTEMPTS SFXGE_PARAM(restart_attempts) static int sfxge_restart_attempts = 3; TUNABLE_INT(SFXGE_PARAM_RESTART_ATTEMPTS, &sfxge_restart_attempts); SYSCTL_INT(_hw_sfxge, OID_AUTO, restart_attempts, CTLFLAG_RDTUN, &sfxge_restart_attempts, 0, "Maximum number of attempts to bring interface up after reset"); #if EFSYS_OPT_MCDI_LOGGING #define SFXGE_PARAM_MCDI_LOGGING SFXGE_PARAM(mcdi_logging) static int sfxge_mcdi_logging = 0; TUNABLE_INT(SFXGE_PARAM_MCDI_LOGGING, &sfxge_mcdi_logging); #endif static void sfxge_reset(void *arg, int npending); static int sfxge_estimate_rsrc_limits(struct sfxge_softc *sc) { efx_drv_limits_t limits; int rc; unsigned int evq_max; uint32_t evq_allocated; uint32_t rxq_allocated; uint32_t txq_allocated; /* * Limit the number of event queues to: * - number of CPUs * - hardwire maximum RSS channels * - administratively specified maximum RSS channels */ #ifdef RSS /* * Avoid extra limitations so that the number of queues * may be configured at administrator's will */ evq_max = MIN(MAX(rss_getnumbuckets(), 1), EFX_MAXRSS); #else evq_max = MIN(mp_ncpus, EFX_MAXRSS); #endif if (sc->max_rss_channels > 0) evq_max = MIN(evq_max, sc->max_rss_channels); memset(&limits, 0, sizeof(limits)); limits.edl_min_evq_count = 1; limits.edl_max_evq_count = evq_max; limits.edl_min_txq_count = SFXGE_TXQ_NTYPES; limits.edl_max_txq_count = evq_max + SFXGE_TXQ_NTYPES - 1; limits.edl_min_rxq_count = 1; limits.edl_max_rxq_count = evq_max; efx_nic_set_drv_limits(sc->enp, &limits); if ((rc = efx_nic_init(sc->enp)) != 0) return (rc); rc = efx_nic_get_vi_pool(sc->enp, &evq_allocated, &rxq_allocated, &txq_allocated); if (rc != 0) { efx_nic_fini(sc->enp); return (rc); } KASSERT(txq_allocated >= SFXGE_TXQ_NTYPES, ("txq_allocated < SFXGE_TXQ_NTYPES")); sc->evq_max = MIN(evq_allocated, evq_max); sc->evq_max = MIN(rxq_allocated, sc->evq_max); sc->evq_max = MIN(txq_allocated - (SFXGE_TXQ_NTYPES - 1), sc->evq_max); KASSERT(sc->evq_max <= evq_max, ("allocated more than maximum requested")); #ifdef RSS if (sc->evq_max < rss_getnumbuckets()) device_printf(sc->dev, "The number of allocated queues (%u) " "is less than the number of RSS buckets (%u); " "performance degradation might be observed", sc->evq_max, rss_getnumbuckets()); #endif /* * NIC is kept initialized in the case of success to be able to * initialize port to find out media types. */ return (0); } static int sfxge_set_drv_limits(struct sfxge_softc *sc) { efx_drv_limits_t limits; memset(&limits, 0, sizeof(limits)); /* Limits are strict since take into account initial estimation */ limits.edl_min_evq_count = limits.edl_max_evq_count = sc->intr.n_alloc; limits.edl_min_txq_count = limits.edl_max_txq_count = sc->intr.n_alloc + SFXGE_TXQ_NTYPES - 1; limits.edl_min_rxq_count = limits.edl_max_rxq_count = sc->intr.n_alloc; return (efx_nic_set_drv_limits(sc->enp, &limits)); } static int sfxge_start(struct sfxge_softc *sc) { int rc; SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc); if (sc->init_state == SFXGE_STARTED) return (0); if (sc->init_state != SFXGE_REGISTERED) { rc = EINVAL; goto fail; } /* Set required resource limits */ if ((rc = sfxge_set_drv_limits(sc)) != 0) goto fail; if ((rc = efx_nic_init(sc->enp)) != 0) goto fail; /* Start processing interrupts. */ if ((rc = sfxge_intr_start(sc)) != 0) goto fail2; /* Start processing events. */ if ((rc = sfxge_ev_start(sc)) != 0) goto fail3; /* Fire up the port. */ if ((rc = sfxge_port_start(sc)) != 0) goto fail4; /* Start the receiver side. */ if ((rc = sfxge_rx_start(sc)) != 0) goto fail5; /* Start the transmitter side. */ if ((rc = sfxge_tx_start(sc)) != 0) goto fail6; sc->init_state = SFXGE_STARTED; /* Tell the stack we're running. */ sc->ifnet->if_drv_flags |= IFF_DRV_RUNNING; sc->ifnet->if_drv_flags &= ~IFF_DRV_OACTIVE; return (0); fail6: sfxge_rx_stop(sc); fail5: sfxge_port_stop(sc); fail4: sfxge_ev_stop(sc); fail3: sfxge_intr_stop(sc); fail2: efx_nic_fini(sc->enp); fail: device_printf(sc->dev, "sfxge_start: %d\n", rc); return (rc); } static void sfxge_if_init(void *arg) { struct sfxge_softc *sc; sc = (struct sfxge_softc *)arg; SFXGE_ADAPTER_LOCK(sc); (void)sfxge_start(sc); SFXGE_ADAPTER_UNLOCK(sc); } static void sfxge_stop(struct sfxge_softc *sc) { SFXGE_ADAPTER_LOCK_ASSERT_OWNED(sc); if (sc->init_state != SFXGE_STARTED) return; sc->init_state = SFXGE_REGISTERED; /* Stop the transmitter. */ sfxge_tx_stop(sc); /* Stop the receiver. */ sfxge_rx_stop(sc); /* Stop the port. */ sfxge_port_stop(sc); /* Stop processing events. */ sfxge_ev_stop(sc); /* Stop processing interrupts. */ sfxge_intr_stop(sc); efx_nic_fini(sc->enp); sc->ifnet->if_drv_flags &= ~IFF_DRV_RUNNING; } static int sfxge_vpd_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc) { efx_vpd_value_t value; int rc = 0; switch (ioc->u.vpd.op) { case SFXGE_VPD_OP_GET_KEYWORD: value.evv_tag = ioc->u.vpd.tag; value.evv_keyword = ioc->u.vpd.keyword; rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value); if (rc != 0) break; ioc->u.vpd.len = MIN(ioc->u.vpd.len, value.evv_length); if (ioc->u.vpd.payload != 0) { rc = copyout(value.evv_value, ioc->u.vpd.payload, ioc->u.vpd.len); } break; case SFXGE_VPD_OP_SET_KEYWORD: if (ioc->u.vpd.len > sizeof(value.evv_value)) return (EINVAL); value.evv_tag = ioc->u.vpd.tag; value.evv_keyword = ioc->u.vpd.keyword; value.evv_length = ioc->u.vpd.len; rc = copyin(ioc->u.vpd.payload, value.evv_value, value.evv_length); if (rc != 0) break; rc = efx_vpd_set(sc->enp, sc->vpd_data, sc->vpd_size, &value); if (rc != 0) break; rc = efx_vpd_verify(sc->enp, sc->vpd_data, sc->vpd_size); if (rc != 0) break; rc = efx_vpd_write(sc->enp, sc->vpd_data, sc->vpd_size); break; default: rc = EOPNOTSUPP; break; } return (rc); } static int sfxge_private_ioctl(struct sfxge_softc *sc, sfxge_ioc_t *ioc) { switch (ioc->op) { case SFXGE_MCDI_IOC: return (sfxge_mcdi_ioctl(sc, ioc)); case SFXGE_NVRAM_IOC: return (sfxge_nvram_ioctl(sc, ioc)); case SFXGE_VPD_IOC: return (sfxge_vpd_ioctl(sc, ioc)); default: return (EOPNOTSUPP); } } static int sfxge_if_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data) { struct sfxge_softc *sc; struct ifreq *ifr; sfxge_ioc_t ioc; int error; ifr = (struct ifreq *)data; sc = ifp->if_softc; error = 0; switch (command) { case SIOCSIFFLAGS: SFXGE_ADAPTER_LOCK(sc); if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if ((ifp->if_flags ^ sc->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { sfxge_mac_filter_set(sc); } } else sfxge_start(sc); } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) sfxge_stop(sc); sc->if_flags = ifp->if_flags; SFXGE_ADAPTER_UNLOCK(sc); break; case SIOCSIFMTU: if (ifr->ifr_mtu == ifp->if_mtu) { /* Nothing to do */ error = 0; } else if (ifr->ifr_mtu > SFXGE_MAX_MTU) { error = EINVAL; } else if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) { ifp->if_mtu = ifr->ifr_mtu; error = 0; } else { /* Restart required */ SFXGE_ADAPTER_LOCK(sc); sfxge_stop(sc); ifp->if_mtu = ifr->ifr_mtu; error = sfxge_start(sc); SFXGE_ADAPTER_UNLOCK(sc); if (error != 0) { ifp->if_flags &= ~IFF_UP; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; if_down(ifp); } } break; case SIOCADDMULTI: case SIOCDELMULTI: if (ifp->if_drv_flags & IFF_DRV_RUNNING) sfxge_mac_filter_set(sc); break; case SIOCSIFCAP: { int reqcap = ifr->ifr_reqcap; int capchg_mask; SFXGE_ADAPTER_LOCK(sc); /* Capabilities to be changed in accordance with request */ capchg_mask = ifp->if_capenable ^ reqcap; /* * The networking core already rejects attempts to * enable capabilities we don't have. We still have * to reject attempts to disable capabilities that we * can't (yet) disable. */ KASSERT((reqcap & ~ifp->if_capabilities) == 0, ("Unsupported capabilities 0x%x requested 0x%x vs " "supported 0x%x", reqcap & ~ifp->if_capabilities, reqcap , ifp->if_capabilities)); if (capchg_mask & SFXGE_CAP_FIXED) { error = EINVAL; SFXGE_ADAPTER_UNLOCK(sc); break; } /* Check request before any changes */ if ((capchg_mask & IFCAP_TSO4) && (reqcap & (IFCAP_TSO4 | IFCAP_TXCSUM)) == IFCAP_TSO4) { error = EAGAIN; SFXGE_ADAPTER_UNLOCK(sc); if_printf(ifp, "enable txcsum before tso4\n"); break; } if ((capchg_mask & IFCAP_TSO6) && (reqcap & (IFCAP_TSO6 | IFCAP_TXCSUM_IPV6)) == IFCAP_TSO6) { error = EAGAIN; SFXGE_ADAPTER_UNLOCK(sc); if_printf(ifp, "enable txcsum6 before tso6\n"); break; } if (reqcap & IFCAP_TXCSUM) { ifp->if_hwassist |= (CSUM_IP | CSUM_TCP | CSUM_UDP); } else { ifp->if_hwassist &= ~(CSUM_IP | CSUM_TCP | CSUM_UDP); if (reqcap & IFCAP_TSO4) { reqcap &= ~IFCAP_TSO4; if_printf(ifp, "tso4 disabled due to -txcsum\n"); } } if (reqcap & IFCAP_TXCSUM_IPV6) { ifp->if_hwassist |= (CSUM_TCP_IPV6 | CSUM_UDP_IPV6); } else { ifp->if_hwassist &= ~(CSUM_TCP_IPV6 | CSUM_UDP_IPV6); if (reqcap & IFCAP_TSO6) { reqcap &= ~IFCAP_TSO6; if_printf(ifp, "tso6 disabled due to -txcsum6\n"); } } /* * The kernel takes both IFCAP_TSOx and CSUM_TSO into * account before using TSO. So, we do not touch * checksum flags when IFCAP_TSOx is modified. * Note that CSUM_TSO is (CSUM_IP_TSO|CSUM_IP6_TSO), * but both bits are set in IPv4 and IPv6 mbufs. */ ifp->if_capenable = reqcap; SFXGE_ADAPTER_UNLOCK(sc); break; } case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->media, command); break; #ifdef SIOCGI2C case SIOCGI2C: { struct ifi2creq i2c; - error = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); + error = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (error != 0) break; if (i2c.len > sizeof(i2c.data)) { error = EINVAL; break; } SFXGE_ADAPTER_LOCK(sc); error = efx_phy_module_get_info(sc->enp, i2c.dev_addr, i2c.offset, i2c.len, &i2c.data[0]); SFXGE_ADAPTER_UNLOCK(sc); if (error == 0) - error = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); + error = copyout(&i2c, ifr_data_get_ptr(ifr), + sizeof(i2c)); break; } #endif case SIOCGPRIVATE_0: error = priv_check(curthread, PRIV_DRIVER); if (error != 0) break; - error = copyin(ifr->ifr_data, &ioc, sizeof(ioc)); + error = copyin(ifr_data_get_ptr(ifr), &ioc, sizeof(ioc)); if (error != 0) return (error); error = sfxge_private_ioctl(sc, &ioc); if (error == 0) { - error = copyout(&ioc, ifr->ifr_data, sizeof(ioc)); + error = copyout(&ioc, ifr_data_get_ptr(ifr), + sizeof(ioc)); } break; default: error = ether_ioctl(ifp, command, data); } return (error); } static void sfxge_ifnet_fini(struct ifnet *ifp) { struct sfxge_softc *sc = ifp->if_softc; SFXGE_ADAPTER_LOCK(sc); sfxge_stop(sc); SFXGE_ADAPTER_UNLOCK(sc); ifmedia_removeall(&sc->media); ether_ifdetach(ifp); if_free(ifp); } static int sfxge_ifnet_init(struct ifnet *ifp, struct sfxge_softc *sc) { const efx_nic_cfg_t *encp = efx_nic_cfg_get(sc->enp); device_t dev; int rc; dev = sc->dev; sc->ifnet = ifp; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); ifp->if_init = sfxge_if_init; ifp->if_softc = sc; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = sfxge_if_ioctl; ifp->if_capabilities = SFXGE_CAP; ifp->if_capenable = SFXGE_CAP_ENABLE; ifp->if_hw_tsomax = SFXGE_TSO_MAX_SIZE; ifp->if_hw_tsomaxsegcount = SFXGE_TX_MAPPING_MAX_SEG; ifp->if_hw_tsomaxsegsize = PAGE_SIZE; #ifdef SFXGE_LRO ifp->if_capabilities |= IFCAP_LRO; ifp->if_capenable |= IFCAP_LRO; #endif if (encp->enc_hw_tx_insert_vlan_enabled) { ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING; ifp->if_capenable |= IFCAP_VLAN_HWTAGGING; } ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO | CSUM_TCP_IPV6 | CSUM_UDP_IPV6; ether_ifattach(ifp, encp->enc_mac_addr); ifp->if_transmit = sfxge_if_transmit; ifp->if_qflush = sfxge_if_qflush; ifp->if_get_counter = sfxge_get_counter; DBGPRINT(sc->dev, "ifmedia_init"); if ((rc = sfxge_port_ifmedia_init(sc)) != 0) goto fail; return (0); fail: ether_ifdetach(sc->ifnet); return (rc); } void sfxge_sram_buf_tbl_alloc(struct sfxge_softc *sc, size_t n, uint32_t *idp) { KASSERT(sc->buffer_table_next + n <= efx_nic_cfg_get(sc->enp)->enc_buftbl_limit, ("buffer table full")); *idp = sc->buffer_table_next; sc->buffer_table_next += n; } static int sfxge_bar_init(struct sfxge_softc *sc) { efsys_bar_t *esbp = &sc->bar; esbp->esb_rid = PCIR_BAR(EFX_MEM_BAR); if ((esbp->esb_res = bus_alloc_resource_any(sc->dev, SYS_RES_MEMORY, &esbp->esb_rid, RF_ACTIVE)) == NULL) { device_printf(sc->dev, "Cannot allocate BAR region %d\n", EFX_MEM_BAR); return (ENXIO); } esbp->esb_tag = rman_get_bustag(esbp->esb_res); esbp->esb_handle = rman_get_bushandle(esbp->esb_res); SFXGE_BAR_LOCK_INIT(esbp, device_get_nameunit(sc->dev)); return (0); } static void sfxge_bar_fini(struct sfxge_softc *sc) { efsys_bar_t *esbp = &sc->bar; bus_release_resource(sc->dev, SYS_RES_MEMORY, esbp->esb_rid, esbp->esb_res); SFXGE_BAR_LOCK_DESTROY(esbp); } static int sfxge_create(struct sfxge_softc *sc) { device_t dev; efx_nic_t *enp; int error; char rss_param_name[sizeof(SFXGE_PARAM(%d.max_rss_channels))]; #if EFSYS_OPT_MCDI_LOGGING char mcdi_log_param_name[sizeof(SFXGE_PARAM(%d.mcdi_logging))]; #endif dev = sc->dev; SFXGE_ADAPTER_LOCK_INIT(sc, device_get_nameunit(sc->dev)); sc->max_rss_channels = 0; snprintf(rss_param_name, sizeof(rss_param_name), SFXGE_PARAM(%d.max_rss_channels), (int)device_get_unit(dev)); TUNABLE_INT_FETCH(rss_param_name, &sc->max_rss_channels); #if EFSYS_OPT_MCDI_LOGGING sc->mcdi_logging = sfxge_mcdi_logging; snprintf(mcdi_log_param_name, sizeof(mcdi_log_param_name), SFXGE_PARAM(%d.mcdi_logging), (int)device_get_unit(dev)); TUNABLE_INT_FETCH(mcdi_log_param_name, &sc->mcdi_logging); #endif sc->stats_node = SYSCTL_ADD_NODE( device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "stats", CTLFLAG_RD, NULL, "Statistics"); if (sc->stats_node == NULL) { error = ENOMEM; goto fail; } TASK_INIT(&sc->task_reset, 0, sfxge_reset, sc); (void) pci_enable_busmaster(dev); /* Initialize DMA mappings. */ DBGPRINT(sc->dev, "dma_init..."); if ((error = sfxge_dma_init(sc)) != 0) goto fail; /* Map the device registers. */ DBGPRINT(sc->dev, "bar_init..."); if ((error = sfxge_bar_init(sc)) != 0) goto fail; error = efx_family(pci_get_vendor(dev), pci_get_device(dev), &sc->family); KASSERT(error == 0, ("Family should be filtered by sfxge_probe()")); DBGPRINT(sc->dev, "nic_create..."); /* Create the common code nic object. */ SFXGE_EFSYS_LOCK_INIT(&sc->enp_lock, device_get_nameunit(sc->dev), "nic"); if ((error = efx_nic_create(sc->family, (efsys_identifier_t *)sc, &sc->bar, &sc->enp_lock, &enp)) != 0) goto fail3; sc->enp = enp; /* Initialize MCDI to talk to the microcontroller. */ DBGPRINT(sc->dev, "mcdi_init..."); if ((error = sfxge_mcdi_init(sc)) != 0) goto fail4; /* Probe the NIC and build the configuration data area. */ DBGPRINT(sc->dev, "nic_probe..."); if ((error = efx_nic_probe(enp)) != 0) goto fail5; if (!ISP2(sfxge_rx_ring_entries) || (sfxge_rx_ring_entries < EFX_RXQ_MINNDESCS) || (sfxge_rx_ring_entries > EFX_RXQ_MAXNDESCS)) { log(LOG_ERR, "%s=%d must be power of 2 from %u to %u", SFXGE_PARAM_RX_RING, sfxge_rx_ring_entries, EFX_RXQ_MINNDESCS, EFX_RXQ_MAXNDESCS); error = EINVAL; goto fail_rx_ring_entries; } sc->rxq_entries = sfxge_rx_ring_entries; if (!ISP2(sfxge_tx_ring_entries) || (sfxge_tx_ring_entries < EFX_TXQ_MINNDESCS) || (sfxge_tx_ring_entries > EFX_TXQ_MAXNDESCS(efx_nic_cfg_get(enp)))) { log(LOG_ERR, "%s=%d must be power of 2 from %u to %u", SFXGE_PARAM_TX_RING, sfxge_tx_ring_entries, EFX_TXQ_MINNDESCS, EFX_TXQ_MAXNDESCS(efx_nic_cfg_get(enp))); error = EINVAL; goto fail_tx_ring_entries; } sc->txq_entries = sfxge_tx_ring_entries; SYSCTL_ADD_STRING(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "version", CTLFLAG_RD, SFXGE_VERSION_STRING, 0, "Driver version"); SYSCTL_ADD_UINT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "phy_type", CTLFLAG_RD, NULL, efx_nic_cfg_get(enp)->enc_phy_type, "PHY type"); /* Initialize the NVRAM. */ DBGPRINT(sc->dev, "nvram_init..."); if ((error = efx_nvram_init(enp)) != 0) goto fail6; /* Initialize the VPD. */ DBGPRINT(sc->dev, "vpd_init..."); if ((error = efx_vpd_init(enp)) != 0) goto fail7; efx_mcdi_new_epoch(enp); /* Reset the NIC. */ DBGPRINT(sc->dev, "nic_reset..."); if ((error = efx_nic_reset(enp)) != 0) goto fail8; /* Initialize buffer table allocation. */ sc->buffer_table_next = 0; /* * Guarantee minimum and estimate maximum number of event queues * to take it into account when MSI-X interrupts are allocated. * It initializes NIC and keeps it initialized on success. */ if ((error = sfxge_estimate_rsrc_limits(sc)) != 0) goto fail8; /* Set up interrupts. */ DBGPRINT(sc->dev, "intr_init..."); if ((error = sfxge_intr_init(sc)) != 0) goto fail9; /* Initialize event processing state. */ DBGPRINT(sc->dev, "ev_init..."); if ((error = sfxge_ev_init(sc)) != 0) goto fail11; /* Initialize port state. */ DBGPRINT(sc->dev, "port_init..."); if ((error = sfxge_port_init(sc)) != 0) goto fail12; /* Initialize receive state. */ DBGPRINT(sc->dev, "rx_init..."); if ((error = sfxge_rx_init(sc)) != 0) goto fail13; /* Initialize transmit state. */ DBGPRINT(sc->dev, "tx_init..."); if ((error = sfxge_tx_init(sc)) != 0) goto fail14; sc->init_state = SFXGE_INITIALIZED; DBGPRINT(sc->dev, "success"); return (0); fail14: sfxge_rx_fini(sc); fail13: sfxge_port_fini(sc); fail12: sfxge_ev_fini(sc); fail11: sfxge_intr_fini(sc); fail9: efx_nic_fini(sc->enp); fail8: efx_vpd_fini(enp); fail7: efx_nvram_fini(enp); fail6: fail_tx_ring_entries: fail_rx_ring_entries: efx_nic_unprobe(enp); fail5: sfxge_mcdi_fini(sc); fail4: sc->enp = NULL; efx_nic_destroy(enp); SFXGE_EFSYS_LOCK_DESTROY(&sc->enp_lock); fail3: sfxge_bar_fini(sc); (void) pci_disable_busmaster(sc->dev); fail: DBGPRINT(sc->dev, "failed %d", error); sc->dev = NULL; SFXGE_ADAPTER_LOCK_DESTROY(sc); return (error); } static void sfxge_destroy(struct sfxge_softc *sc) { efx_nic_t *enp; /* Clean up transmit state. */ sfxge_tx_fini(sc); /* Clean up receive state. */ sfxge_rx_fini(sc); /* Clean up port state. */ sfxge_port_fini(sc); /* Clean up event processing state. */ sfxge_ev_fini(sc); /* Clean up interrupts. */ sfxge_intr_fini(sc); /* Tear down common code subsystems. */ efx_nic_reset(sc->enp); efx_vpd_fini(sc->enp); efx_nvram_fini(sc->enp); efx_nic_unprobe(sc->enp); /* Tear down MCDI. */ sfxge_mcdi_fini(sc); /* Destroy common code context. */ enp = sc->enp; sc->enp = NULL; efx_nic_destroy(enp); /* Free DMA memory. */ sfxge_dma_fini(sc); /* Free mapped BARs. */ sfxge_bar_fini(sc); (void) pci_disable_busmaster(sc->dev); taskqueue_drain(taskqueue_thread, &sc->task_reset); /* Destroy the softc lock. */ SFXGE_ADAPTER_LOCK_DESTROY(sc); } static int sfxge_vpd_handler(SYSCTL_HANDLER_ARGS) { struct sfxge_softc *sc = arg1; efx_vpd_value_t value; int rc; value.evv_tag = arg2 >> 16; value.evv_keyword = arg2 & 0xffff; if ((rc = efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value)) != 0) return (rc); return (SYSCTL_OUT(req, value.evv_value, value.evv_length)); } static void sfxge_vpd_try_add(struct sfxge_softc *sc, struct sysctl_oid_list *list, efx_vpd_tag_t tag, const char *keyword) { struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev); efx_vpd_value_t value; /* Check whether VPD tag/keyword is present */ value.evv_tag = tag; value.evv_keyword = EFX_VPD_KEYWORD(keyword[0], keyword[1]); if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) != 0) return; SYSCTL_ADD_PROC( ctx, list, OID_AUTO, keyword, CTLTYPE_STRING|CTLFLAG_RD, sc, tag << 16 | EFX_VPD_KEYWORD(keyword[0], keyword[1]), sfxge_vpd_handler, "A", ""); } static int sfxge_vpd_init(struct sfxge_softc *sc) { struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(sc->dev); struct sysctl_oid *vpd_node; struct sysctl_oid_list *vpd_list; char keyword[3]; efx_vpd_value_t value; int rc; if ((rc = efx_vpd_size(sc->enp, &sc->vpd_size)) != 0) { /* * Unpriviledged functions deny VPD access. * Simply skip VPD in this case. */ if (rc == EACCES) goto done; goto fail; } sc->vpd_data = malloc(sc->vpd_size, M_SFXGE, M_WAITOK); if ((rc = efx_vpd_read(sc->enp, sc->vpd_data, sc->vpd_size)) != 0) goto fail2; /* Copy ID (product name) into device description, and log it. */ value.evv_tag = EFX_VPD_ID; if (efx_vpd_get(sc->enp, sc->vpd_data, sc->vpd_size, &value) == 0) { value.evv_value[value.evv_length] = 0; device_set_desc_copy(sc->dev, value.evv_value); device_printf(sc->dev, "%s\n", value.evv_value); } vpd_node = SYSCTL_ADD_NODE( ctx, SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev)), OID_AUTO, "vpd", CTLFLAG_RD, NULL, "Vital Product Data"); vpd_list = SYSCTL_CHILDREN(vpd_node); /* Add sysctls for all expected and any vendor-defined keywords. */ sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "PN"); sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "EC"); sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, "SN"); keyword[0] = 'V'; keyword[2] = 0; for (keyword[1] = '0'; keyword[1] <= '9'; keyword[1]++) sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword); for (keyword[1] = 'A'; keyword[1] <= 'Z'; keyword[1]++) sfxge_vpd_try_add(sc, vpd_list, EFX_VPD_RO, keyword); done: return (0); fail2: free(sc->vpd_data, M_SFXGE); fail: return (rc); } static void sfxge_vpd_fini(struct sfxge_softc *sc) { free(sc->vpd_data, M_SFXGE); } static void sfxge_reset(void *arg, int npending) { struct sfxge_softc *sc; int rc; unsigned attempt; (void)npending; sc = (struct sfxge_softc *)arg; SFXGE_ADAPTER_LOCK(sc); if (sc->init_state != SFXGE_STARTED) goto done; sfxge_stop(sc); efx_nic_reset(sc->enp); for (attempt = 0; attempt < sfxge_restart_attempts; ++attempt) { if ((rc = sfxge_start(sc)) == 0) goto done; device_printf(sc->dev, "start on reset failed (%d)\n", rc); DELAY(100000); } device_printf(sc->dev, "reset failed; interface is now stopped\n"); done: SFXGE_ADAPTER_UNLOCK(sc); } void sfxge_schedule_reset(struct sfxge_softc *sc) { taskqueue_enqueue(taskqueue_thread, &sc->task_reset); } static int sfxge_attach(device_t dev) { struct sfxge_softc *sc; struct ifnet *ifp; int error; sc = device_get_softc(dev); sc->dev = dev; /* Allocate ifnet. */ ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "Couldn't allocate ifnet\n"); error = ENOMEM; goto fail; } sc->ifnet = ifp; /* Initialize hardware. */ DBGPRINT(sc->dev, "create nic"); if ((error = sfxge_create(sc)) != 0) goto fail2; /* Create the ifnet for the port. */ DBGPRINT(sc->dev, "init ifnet"); if ((error = sfxge_ifnet_init(ifp, sc)) != 0) goto fail3; DBGPRINT(sc->dev, "init vpd"); if ((error = sfxge_vpd_init(sc)) != 0) goto fail4; /* * NIC is initialized inside sfxge_create() and kept inialized * to be able to initialize port to discover media types in * sfxge_ifnet_init(). */ efx_nic_fini(sc->enp); sc->init_state = SFXGE_REGISTERED; DBGPRINT(sc->dev, "success"); return (0); fail4: sfxge_ifnet_fini(ifp); fail3: efx_nic_fini(sc->enp); sfxge_destroy(sc); fail2: if_free(sc->ifnet); fail: DBGPRINT(sc->dev, "failed %d", error); return (error); } static int sfxge_detach(device_t dev) { struct sfxge_softc *sc; sc = device_get_softc(dev); sfxge_vpd_fini(sc); /* Destroy the ifnet. */ sfxge_ifnet_fini(sc->ifnet); /* Tear down hardware. */ sfxge_destroy(sc); return (0); } static int sfxge_probe(device_t dev) { uint16_t pci_vendor_id; uint16_t pci_device_id; efx_family_t family; int rc; pci_vendor_id = pci_get_vendor(dev); pci_device_id = pci_get_device(dev); DBGPRINT(dev, "PCI ID %04x:%04x", pci_vendor_id, pci_device_id); rc = efx_family(pci_vendor_id, pci_device_id, &family); if (rc != 0) { DBGPRINT(dev, "efx_family fail %d", rc); return (ENXIO); } if (family == EFX_FAMILY_SIENA) { device_set_desc(dev, "Solarflare SFC9000 family"); return (0); } if (family == EFX_FAMILY_HUNTINGTON) { device_set_desc(dev, "Solarflare SFC9100 family"); return (0); } if (family == EFX_FAMILY_MEDFORD) { device_set_desc(dev, "Solarflare SFC9200 family"); return (0); } DBGPRINT(dev, "impossible controller family %d", family); return (ENXIO); } static device_method_t sfxge_methods[] = { DEVMETHOD(device_probe, sfxge_probe), DEVMETHOD(device_attach, sfxge_attach), DEVMETHOD(device_detach, sfxge_detach), DEVMETHOD_END }; static devclass_t sfxge_devclass; static driver_t sfxge_driver = { "sfxge", sfxge_methods, sizeof(struct sfxge_softc) }; DRIVER_MODULE(sfxge, pci, sfxge_driver, sfxge_devclass, 0, 0); Index: stable/11/sys/dev/vxge/vxge.c =================================================================== --- stable/11/sys/dev/vxge/vxge.c (revision 332287) +++ stable/11/sys/dev/vxge/vxge.c (revision 332288) @@ -1,4196 +1,4201 @@ /*- * Copyright(c) 2002-2011 Exar Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification are permitted provided the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the name of the Exar Corporation nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*$FreeBSD$*/ #include static int vxge_pci_bd_no = -1; static u32 vxge_drv_copyright = 0; static u32 vxge_dev_ref_count = 0; static u32 vxge_dev_req_reboot = 0; static int vpath_selector[VXGE_HAL_MAX_VIRTUAL_PATHS] = \ {0, 1, 3, 3, 7, 7, 7, 7, 15, 15, 15, 15, 15, 15, 15, 15, 31}; /* * vxge_probe * Probes for x3100 devices */ int vxge_probe(device_t ndev) { int err = ENXIO; u16 pci_bd_no = 0; u16 pci_vendor_id = 0; u16 pci_device_id = 0; char adapter_name[64]; pci_vendor_id = pci_get_vendor(ndev); if (pci_vendor_id != VXGE_PCI_VENDOR_ID) goto _exit0; pci_device_id = pci_get_device(ndev); if (pci_device_id == VXGE_PCI_DEVICE_ID_TITAN_1) { pci_bd_no = (pci_get_bus(ndev) | pci_get_slot(ndev)); snprintf(adapter_name, sizeof(adapter_name), VXGE_ADAPTER_NAME, pci_get_revid(ndev)); device_set_desc_copy(ndev, adapter_name); if (!vxge_drv_copyright) { device_printf(ndev, VXGE_COPYRIGHT); vxge_drv_copyright = 1; } if (vxge_dev_req_reboot == 0) { vxge_pci_bd_no = pci_bd_no; err = BUS_PROBE_DEFAULT; } else { if (pci_bd_no != vxge_pci_bd_no) { vxge_pci_bd_no = pci_bd_no; err = BUS_PROBE_DEFAULT; } } } _exit0: return (err); } /* * vxge_attach * Connects driver to the system if probe was success @ndev handle */ int vxge_attach(device_t ndev) { int err = 0; vxge_dev_t *vdev; vxge_hal_device_t *hldev = NULL; vxge_hal_device_attr_t device_attr; vxge_free_resources_e error_level = VXGE_FREE_NONE; vxge_hal_status_e status = VXGE_HAL_OK; /* Get per-ndev buffer */ vdev = (vxge_dev_t *) device_get_softc(ndev); if (!vdev) goto _exit0; bzero(vdev, sizeof(vxge_dev_t)); vdev->ndev = ndev; strlcpy(vdev->ndev_name, "vxge", sizeof(vdev->ndev_name)); err = vxge_driver_config(vdev); if (err != 0) goto _exit0; /* Initialize HAL driver */ status = vxge_driver_init(vdev); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "Failed to initialize driver\n"); goto _exit0; } /* Enable PCI bus-master */ pci_enable_busmaster(ndev); /* Allocate resources */ err = vxge_alloc_resources(vdev); if (err != 0) { device_printf(vdev->ndev, "resource allocation failed\n"); goto _exit0; } err = vxge_device_hw_info_get(vdev); if (err != 0) { error_level = VXGE_FREE_BAR2; goto _exit0; } /* Get firmware default values for Device Configuration */ vxge_hal_device_config_default_get(vdev->device_config); /* Customize Device Configuration based on User request */ vxge_vpath_config(vdev); /* Allocate ISR resources */ err = vxge_alloc_isr_resources(vdev); if (err != 0) { error_level = VXGE_FREE_ISR_RESOURCE; device_printf(vdev->ndev, "isr resource allocation failed\n"); goto _exit0; } /* HAL attributes */ device_attr.bar0 = (u8 *) vdev->pdev->bar_info[0]; device_attr.bar1 = (u8 *) vdev->pdev->bar_info[1]; device_attr.bar2 = (u8 *) vdev->pdev->bar_info[2]; device_attr.regh0 = (vxge_bus_res_t *) vdev->pdev->reg_map[0]; device_attr.regh1 = (vxge_bus_res_t *) vdev->pdev->reg_map[1]; device_attr.regh2 = (vxge_bus_res_t *) vdev->pdev->reg_map[2]; device_attr.irqh = (pci_irq_h) vdev->config.isr_info[0].irq_handle; device_attr.cfgh = vdev->pdev; device_attr.pdev = vdev->pdev; /* Initialize HAL Device */ status = vxge_hal_device_initialize((vxge_hal_device_h *) &hldev, &device_attr, vdev->device_config); if (status != VXGE_HAL_OK) { error_level = VXGE_FREE_ISR_RESOURCE; device_printf(vdev->ndev, "hal device initialization failed\n"); goto _exit0; } vdev->devh = hldev; vxge_hal_device_private_set(hldev, vdev); if (vdev->is_privilaged) { err = vxge_firmware_verify(vdev); if (err != 0) { vxge_dev_req_reboot = 1; error_level = VXGE_FREE_TERMINATE_DEVICE; goto _exit0; } } /* Allocate memory for vpath */ vdev->vpaths = (vxge_vpath_t *) vxge_mem_alloc(vdev->no_of_vpath * sizeof(vxge_vpath_t)); if (vdev->vpaths == NULL) { error_level = VXGE_FREE_TERMINATE_DEVICE; device_printf(vdev->ndev, "vpath memory allocation failed\n"); goto _exit0; } vdev->no_of_func = 1; if (vdev->is_privilaged) { vxge_hal_func_mode_count(vdev->devh, vdev->config.hw_info.function_mode, &vdev->no_of_func); vxge_bw_priority_config(vdev); } /* Initialize mutexes */ vxge_mutex_init(vdev); /* Initialize Media */ vxge_media_init(vdev); err = vxge_ifp_setup(ndev); if (err != 0) { error_level = VXGE_FREE_MEDIA; device_printf(vdev->ndev, "setting up interface failed\n"); goto _exit0; } err = vxge_isr_setup(vdev); if (err != 0) { error_level = VXGE_FREE_INTERFACE; device_printf(vdev->ndev, "failed to associate interrupt handler with device\n"); goto _exit0; } vxge_device_hw_info_print(vdev); vdev->is_active = TRUE; _exit0: if (error_level) { vxge_free_resources(ndev, error_level); err = ENXIO; } return (err); } /* * vxge_detach * Detaches driver from the Kernel subsystem */ int vxge_detach(device_t ndev) { vxge_dev_t *vdev; vdev = (vxge_dev_t *) device_get_softc(ndev); if (vdev->is_active) { vdev->is_active = FALSE; vxge_stop(vdev); vxge_free_resources(ndev, VXGE_FREE_ALL); } return (0); } /* * vxge_shutdown * To shutdown device before system shutdown */ int vxge_shutdown(device_t ndev) { vxge_dev_t *vdev = (vxge_dev_t *) device_get_softc(ndev); vxge_stop(vdev); return (0); } /* * vxge_init * Initialize the interface */ void vxge_init(void *vdev_ptr) { vxge_dev_t *vdev = (vxge_dev_t *) vdev_ptr; VXGE_DRV_LOCK(vdev); vxge_init_locked(vdev); VXGE_DRV_UNLOCK(vdev); } /* * vxge_init_locked * Initialize the interface */ void vxge_init_locked(vxge_dev_t *vdev) { int i, err = EINVAL; vxge_hal_device_t *hldev = vdev->devh; vxge_hal_status_e status = VXGE_HAL_OK; vxge_hal_vpath_h vpath_handle; ifnet_t ifp = vdev->ifp; /* If device is in running state, initializing is not required */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) goto _exit0; VXGE_DRV_LOCK_ASSERT(vdev); /* Opening vpaths */ err = vxge_vpath_open(vdev); if (err != 0) goto _exit1; if (vdev->config.rth_enable) { status = vxge_rth_config(vdev); if (status != VXGE_HAL_OK) goto _exit1; } for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; /* check initial mtu before enabling the device */ status = vxge_hal_device_mtu_check(vpath_handle, ifp->if_mtu); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "invalid mtu size %u specified\n", ifp->if_mtu); goto _exit1; } status = vxge_hal_vpath_mtu_set(vpath_handle, ifp->if_mtu); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "setting mtu in device failed\n"); goto _exit1; } } /* Enable HAL device */ status = vxge_hal_device_enable(hldev); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "failed to enable device\n"); goto _exit1; } if (vdev->config.intr_mode == VXGE_HAL_INTR_MODE_MSIX) vxge_msix_enable(vdev); /* Checksum capability */ ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP); if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_TSO; for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; /* Enabling mcast for all vpath */ vxge_hal_vpath_mcast_enable(vpath_handle); /* Enabling bcast for all vpath */ status = vxge_hal_vpath_bcast_enable(vpath_handle); if (status != VXGE_HAL_OK) device_printf(vdev->ndev, "can't enable bcast on vpath (%d)\n", i); } /* Enable interrupts */ vxge_hal_device_intr_enable(vdev->devh); for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; bzero(&(vdev->vpaths[i].driver_stats), sizeof(vxge_drv_stats_t)); status = vxge_hal_vpath_enable(vpath_handle); if (status != VXGE_HAL_OK) goto _exit2; } vxge_os_mdelay(1000); /* Device is initialized */ vdev->is_initialized = TRUE; /* Now inform the stack we're ready */ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; ifp->if_drv_flags |= IFF_DRV_RUNNING; goto _exit0; _exit2: vxge_hal_device_intr_disable(vdev->devh); vxge_hal_device_disable(hldev); _exit1: vxge_vpath_close(vdev); _exit0: return; } /* * vxge_driver_init * Initializes HAL driver */ vxge_hal_status_e vxge_driver_init(vxge_dev_t *vdev) { vxge_hal_uld_cbs_t uld_callbacks; vxge_hal_driver_config_t driver_config; vxge_hal_status_e status = VXGE_HAL_OK; /* Initialize HAL driver */ if (!vxge_dev_ref_count) { bzero(&uld_callbacks, sizeof(vxge_hal_uld_cbs_t)); bzero(&driver_config, sizeof(vxge_hal_driver_config_t)); uld_callbacks.link_up = vxge_link_up; uld_callbacks.link_down = vxge_link_down; uld_callbacks.crit_err = vxge_crit_error; uld_callbacks.sched_timer = NULL; uld_callbacks.xpak_alarm_log = NULL; status = vxge_hal_driver_initialize(&driver_config, &uld_callbacks); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "failed to initialize driver\n"); goto _exit0; } } vxge_hal_driver_debug_set(VXGE_TRACE); vxge_dev_ref_count++; _exit0: return (status); } /* * vxge_driver_config */ int vxge_driver_config(vxge_dev_t *vdev) { int i, err = 0; char temp_buffer[30]; vxge_bw_info_t bw_info; VXGE_GET_PARAM("hint.vxge.0.no_of_vpath", vdev->config, no_of_vpath, VXGE_DEFAULT_USER_HARDCODED); if (vdev->config.no_of_vpath == VXGE_DEFAULT_USER_HARDCODED) vdev->config.no_of_vpath = mp_ncpus; if (vdev->config.no_of_vpath <= 0) { err = EINVAL; device_printf(vdev->ndev, "Failed to load driver, \ invalid config : \'no_of_vpath\'\n"); goto _exit0; } VXGE_GET_PARAM("hint.vxge.0.intr_coalesce", vdev->config, intr_coalesce, VXGE_DEFAULT_CONFIG_DISABLE); VXGE_GET_PARAM("hint.vxge.0.rth_enable", vdev->config, rth_enable, VXGE_DEFAULT_CONFIG_ENABLE); VXGE_GET_PARAM("hint.vxge.0.rth_bkt_sz", vdev->config, rth_bkt_sz, VXGE_DEFAULT_RTH_BUCKET_SIZE); VXGE_GET_PARAM("hint.vxge.0.lro_enable", vdev->config, lro_enable, VXGE_DEFAULT_CONFIG_ENABLE); VXGE_GET_PARAM("hint.vxge.0.tso_enable", vdev->config, tso_enable, VXGE_DEFAULT_CONFIG_ENABLE); VXGE_GET_PARAM("hint.vxge.0.tx_steering", vdev->config, tx_steering, VXGE_DEFAULT_CONFIG_DISABLE); VXGE_GET_PARAM("hint.vxge.0.msix_enable", vdev->config, intr_mode, VXGE_HAL_INTR_MODE_MSIX); VXGE_GET_PARAM("hint.vxge.0.ifqmaxlen", vdev->config, ifq_maxlen, VXGE_DEFAULT_CONFIG_IFQ_MAXLEN); VXGE_GET_PARAM("hint.vxge.0.port_mode", vdev->config, port_mode, VXGE_DEFAULT_CONFIG_VALUE); if (vdev->config.port_mode == VXGE_DEFAULT_USER_HARDCODED) vdev->config.port_mode = VXGE_DEFAULT_CONFIG_VALUE; VXGE_GET_PARAM("hint.vxge.0.l2_switch", vdev->config, l2_switch, VXGE_DEFAULT_CONFIG_VALUE); if (vdev->config.l2_switch == VXGE_DEFAULT_USER_HARDCODED) vdev->config.l2_switch = VXGE_DEFAULT_CONFIG_VALUE; VXGE_GET_PARAM("hint.vxge.0.fw_upgrade", vdev->config, fw_option, VXGE_FW_UPGRADE_ALL); VXGE_GET_PARAM("hint.vxge.0.low_latency", vdev->config, low_latency, VXGE_DEFAULT_CONFIG_DISABLE); VXGE_GET_PARAM("hint.vxge.0.func_mode", vdev->config, function_mode, VXGE_DEFAULT_CONFIG_VALUE); if (vdev->config.function_mode == VXGE_DEFAULT_USER_HARDCODED) vdev->config.function_mode = VXGE_DEFAULT_CONFIG_VALUE; if (!(is_multi_func(vdev->config.function_mode) || is_single_func(vdev->config.function_mode))) vdev->config.function_mode = VXGE_DEFAULT_CONFIG_VALUE; for (i = 0; i < VXGE_HAL_MAX_FUNCTIONS; i++) { bw_info.func_id = i; sprintf(temp_buffer, "hint.vxge.0.bandwidth_%d", i); VXGE_GET_PARAM(temp_buffer, bw_info, bandwidth, VXGE_DEFAULT_USER_HARDCODED); if (bw_info.bandwidth == VXGE_DEFAULT_USER_HARDCODED) bw_info.bandwidth = VXGE_HAL_VPATH_BW_LIMIT_DEFAULT; sprintf(temp_buffer, "hint.vxge.0.priority_%d", i); VXGE_GET_PARAM(temp_buffer, bw_info, priority, VXGE_DEFAULT_USER_HARDCODED); if (bw_info.priority == VXGE_DEFAULT_USER_HARDCODED) bw_info.priority = VXGE_HAL_VPATH_PRIORITY_DEFAULT; vxge_os_memcpy(&vdev->config.bw_info[i], &bw_info, sizeof(vxge_bw_info_t)); } _exit0: return (err); } /* * vxge_stop */ void vxge_stop(vxge_dev_t *vdev) { VXGE_DRV_LOCK(vdev); vxge_stop_locked(vdev); VXGE_DRV_UNLOCK(vdev); } /* * vxge_stop_locked * Common code for both stop and part of reset. * disables device, interrupts and closes vpaths handle */ void vxge_stop_locked(vxge_dev_t *vdev) { u64 adapter_status = 0; vxge_hal_status_e status; vxge_hal_device_t *hldev = vdev->devh; ifnet_t ifp = vdev->ifp; VXGE_DRV_LOCK_ASSERT(vdev); /* If device is not in "Running" state, return */ if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return; /* Set appropriate flags */ vdev->is_initialized = FALSE; hldev->link_state = VXGE_HAL_LINK_NONE; ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if_link_state_change(ifp, LINK_STATE_DOWN); /* Disable interrupts */ vxge_hal_device_intr_disable(hldev); /* Disable HAL device */ status = vxge_hal_device_disable(hldev); if (status != VXGE_HAL_OK) { vxge_hal_device_status(hldev, &adapter_status); device_printf(vdev->ndev, "adapter status: 0x%llx\n", adapter_status); } /* reset vpaths */ vxge_vpath_reset(vdev); vxge_os_mdelay(1000); /* Close Vpaths */ vxge_vpath_close(vdev); } void vxge_send(ifnet_t ifp) { vxge_vpath_t *vpath; vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; vpath = &(vdev->vpaths[0]); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { if (VXGE_TX_TRYLOCK(vpath)) { vxge_send_locked(ifp, vpath); VXGE_TX_UNLOCK(vpath); } } } static inline void vxge_send_locked(ifnet_t ifp, vxge_vpath_t *vpath) { mbuf_t m_head = NULL; vxge_dev_t *vdev = vpath->vdev; VXGE_TX_LOCK_ASSERT(vpath); if ((!vdev->is_initialized) || ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING)) return; while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (vxge_xmit(ifp, vpath, &m_head)) { if (m_head == NULL) break; ifp->if_drv_flags |= IFF_DRV_OACTIVE; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); VXGE_DRV_STATS(vpath, tx_again); break; } /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, m_head); } } #if __FreeBSD_version >= 800000 int vxge_mq_send(ifnet_t ifp, mbuf_t m_head) { int i = 0, err = 0; vxge_vpath_t *vpath; vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; if (vdev->config.tx_steering) { i = vxge_vpath_get(vdev, m_head); } else if (M_HASHTYPE_GET(m_head) != M_HASHTYPE_NONE) { i = m_head->m_pkthdr.flowid % vdev->no_of_vpath; } vpath = &(vdev->vpaths[i]); if (VXGE_TX_TRYLOCK(vpath)) { err = vxge_mq_send_locked(ifp, vpath, m_head); VXGE_TX_UNLOCK(vpath); } else err = drbr_enqueue(ifp, vpath->br, m_head); return (err); } static inline int vxge_mq_send_locked(ifnet_t ifp, vxge_vpath_t *vpath, mbuf_t m_head) { int err = 0; mbuf_t next = NULL; vxge_dev_t *vdev = vpath->vdev; VXGE_TX_LOCK_ASSERT(vpath); if ((!vdev->is_initialized) || ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING)) { err = drbr_enqueue(ifp, vpath->br, m_head); goto _exit0; } if (m_head == NULL) { next = drbr_dequeue(ifp, vpath->br); } else if (drbr_needs_enqueue(ifp, vpath->br)) { if ((err = drbr_enqueue(ifp, vpath->br, m_head)) != 0) goto _exit0; next = drbr_dequeue(ifp, vpath->br); } else next = m_head; /* Process the queue */ while (next != NULL) { if ((err = vxge_xmit(ifp, vpath, &next)) != 0) { if (next == NULL) break; ifp->if_drv_flags |= IFF_DRV_OACTIVE; err = drbr_enqueue(ifp, vpath->br, next); VXGE_DRV_STATS(vpath, tx_again); break; } if_inc_counter(ifp, IFCOUNTER_OBYTES, next->m_pkthdr.len); if (next->m_flags & M_MCAST) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); /* Send a copy of the frame to the BPF listener */ ETHER_BPF_MTAP(ifp, next); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; next = drbr_dequeue(ifp, vpath->br); } _exit0: return (err); } void vxge_mq_qflush(ifnet_t ifp) { int i; mbuf_t m_head; vxge_vpath_t *vpath; vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); if (!vpath->handle) continue; VXGE_TX_LOCK(vpath); while ((m_head = buf_ring_dequeue_sc(vpath->br)) != NULL) vxge_free_packet(m_head); VXGE_TX_UNLOCK(vpath); } if_qflush(ifp); } #endif static inline int vxge_xmit(ifnet_t ifp, vxge_vpath_t *vpath, mbuf_t *m_headp) { int err, num_segs = 0; u32 txdl_avail, dma_index, tagged = 0; dma_addr_t dma_addr; bus_size_t dma_sizes; void *dtr_priv; vxge_txdl_priv_t *txdl_priv; vxge_hal_txdl_h txdlh; vxge_hal_status_e status; vxge_dev_t *vdev = vpath->vdev; VXGE_DRV_STATS(vpath, tx_xmit); txdl_avail = vxge_hal_fifo_free_txdl_count_get(vpath->handle); if (txdl_avail < VXGE_TX_LOW_THRESHOLD) { VXGE_DRV_STATS(vpath, tx_low_dtr_cnt); err = ENOBUFS; goto _exit0; } /* Reserve descriptors */ status = vxge_hal_fifo_txdl_reserve(vpath->handle, &txdlh, &dtr_priv); if (status != VXGE_HAL_OK) { VXGE_DRV_STATS(vpath, tx_reserve_failed); err = ENOBUFS; goto _exit0; } /* Update Tx private structure for this descriptor */ txdl_priv = (vxge_txdl_priv_t *) dtr_priv; /* * Map the packet for DMA. * Returns number of segments through num_segs. */ err = vxge_dma_mbuf_coalesce(vpath->dma_tag_tx, txdl_priv->dma_map, m_headp, txdl_priv->dma_buffers, &num_segs); if (vpath->driver_stats.tx_max_frags < num_segs) vpath->driver_stats.tx_max_frags = num_segs; if (err == ENOMEM) { VXGE_DRV_STATS(vpath, tx_no_dma_setup); vxge_hal_fifo_txdl_free(vpath->handle, txdlh); goto _exit0; } else if (err != 0) { vxge_free_packet(*m_headp); VXGE_DRV_STATS(vpath, tx_no_dma_setup); vxge_hal_fifo_txdl_free(vpath->handle, txdlh); goto _exit0; } txdl_priv->mbuf_pkt = *m_headp; /* Set VLAN tag in descriptor only if this packet has it */ if ((*m_headp)->m_flags & M_VLANTAG) vxge_hal_fifo_txdl_vlan_set(txdlh, (*m_headp)->m_pkthdr.ether_vtag); /* Set descriptor buffer for header and each fragment/segment */ for (dma_index = 0; dma_index < num_segs; dma_index++) { dma_sizes = txdl_priv->dma_buffers[dma_index].ds_len; dma_addr = htole64(txdl_priv->dma_buffers[dma_index].ds_addr); vxge_hal_fifo_txdl_buffer_set(vpath->handle, txdlh, dma_index, dma_addr, dma_sizes); } /* Pre-write Sync of mapping */ bus_dmamap_sync(vpath->dma_tag_tx, txdl_priv->dma_map, BUS_DMASYNC_PREWRITE); if ((*m_headp)->m_pkthdr.csum_flags & CSUM_TSO) { if ((*m_headp)->m_pkthdr.tso_segsz) { VXGE_DRV_STATS(vpath, tx_tso); vxge_hal_fifo_txdl_lso_set(txdlh, VXGE_HAL_FIFO_LSO_FRM_ENCAP_AUTO, (*m_headp)->m_pkthdr.tso_segsz); } } /* Checksum */ if (ifp->if_hwassist > 0) { vxge_hal_fifo_txdl_cksum_set_bits(txdlh, VXGE_HAL_FIFO_TXD_TX_CKO_IPV4_EN | VXGE_HAL_FIFO_TXD_TX_CKO_TCP_EN | VXGE_HAL_FIFO_TXD_TX_CKO_UDP_EN); } if ((vxge_hal_device_check_id(vdev->devh) == VXGE_HAL_CARD_TITAN_1A) && (vdev->hw_fw_version >= VXGE_FW_VERSION(1, 8, 0))) tagged = 1; vxge_hal_fifo_txdl_post(vpath->handle, txdlh, tagged); VXGE_DRV_STATS(vpath, tx_posted); _exit0: return (err); } /* * vxge_tx_replenish * Allocate buffers and set them into descriptors for later use */ /* ARGSUSED */ vxge_hal_status_e vxge_tx_replenish(vxge_hal_vpath_h vpath_handle, vxge_hal_txdl_h txdlh, void *dtr_priv, u32 dtr_index, void *userdata, vxge_hal_reopen_e reopen) { int err = 0; vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_txdl_priv_t *txdl_priv = (vxge_txdl_priv_t *) dtr_priv; err = bus_dmamap_create(vpath->dma_tag_tx, BUS_DMA_NOWAIT, &txdl_priv->dma_map); return ((err == 0) ? VXGE_HAL_OK : VXGE_HAL_FAIL); } /* * vxge_tx_compl * If the interrupt is due to Tx completion, free the sent buffer */ vxge_hal_status_e vxge_tx_compl(vxge_hal_vpath_h vpath_handle, vxge_hal_txdl_h txdlh, void *dtr_priv, vxge_hal_fifo_tcode_e t_code, void *userdata) { vxge_hal_status_e status = VXGE_HAL_OK; vxge_txdl_priv_t *txdl_priv; vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_dev_t *vdev = vpath->vdev; ifnet_t ifp = vdev->ifp; VXGE_TX_LOCK(vpath); /* * For each completed descriptor * Get private structure, free buffer, do unmapping, and free descriptor */ do { VXGE_DRV_STATS(vpath, tx_compl); if (t_code != VXGE_HAL_FIFO_T_CODE_OK) { device_printf(vdev->ndev, "tx transfer code %d\n", t_code); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); VXGE_DRV_STATS(vpath, tx_tcode); vxge_hal_fifo_handle_tcode(vpath_handle, txdlh, t_code); } if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); txdl_priv = (vxge_txdl_priv_t *) dtr_priv; bus_dmamap_unload(vpath->dma_tag_tx, txdl_priv->dma_map); vxge_free_packet(txdl_priv->mbuf_pkt); vxge_hal_fifo_txdl_free(vpath->handle, txdlh); } while (vxge_hal_fifo_txdl_next_completed(vpath_handle, &txdlh, &dtr_priv, &t_code) == VXGE_HAL_OK); ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; VXGE_TX_UNLOCK(vpath); return (status); } /* ARGSUSED */ void vxge_tx_term(vxge_hal_vpath_h vpath_handle, vxge_hal_txdl_h txdlh, void *dtr_priv, vxge_hal_txdl_state_e state, void *userdata, vxge_hal_reopen_e reopen) { vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_txdl_priv_t *txdl_priv = (vxge_txdl_priv_t *) dtr_priv; if (state != VXGE_HAL_TXDL_STATE_POSTED) return; if (txdl_priv != NULL) { bus_dmamap_sync(vpath->dma_tag_tx, txdl_priv->dma_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(vpath->dma_tag_tx, txdl_priv->dma_map); bus_dmamap_destroy(vpath->dma_tag_tx, txdl_priv->dma_map); vxge_free_packet(txdl_priv->mbuf_pkt); } /* Free the descriptor */ vxge_hal_fifo_txdl_free(vpath->handle, txdlh); } /* * vxge_rx_replenish * Allocate buffers and set them into descriptors for later use */ /* ARGSUSED */ vxge_hal_status_e vxge_rx_replenish(vxge_hal_vpath_h vpath_handle, vxge_hal_rxd_h rxdh, void *dtr_priv, u32 dtr_index, void *userdata, vxge_hal_reopen_e reopen) { int err = 0; vxge_hal_status_e status = VXGE_HAL_OK; vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_rxd_priv_t *rxd_priv = (vxge_rxd_priv_t *) dtr_priv; /* Create DMA map for these descriptors */ err = bus_dmamap_create(vpath->dma_tag_rx, BUS_DMA_NOWAIT, &rxd_priv->dma_map); if (err == 0) { if (vxge_rx_rxd_1b_set(vpath, rxdh, dtr_priv)) { bus_dmamap_destroy(vpath->dma_tag_rx, rxd_priv->dma_map); status = VXGE_HAL_FAIL; } } return (status); } /* * vxge_rx_compl */ vxge_hal_status_e vxge_rx_compl(vxge_hal_vpath_h vpath_handle, vxge_hal_rxd_h rxdh, void *dtr_priv, u8 t_code, void *userdata) { mbuf_t mbuf_up; vxge_rxd_priv_t *rxd_priv; vxge_hal_ring_rxd_info_t ext_info; vxge_hal_status_e status = VXGE_HAL_OK; vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_dev_t *vdev = vpath->vdev; struct lro_ctrl *lro = &vpath->lro; /* get the interface pointer */ ifnet_t ifp = vdev->ifp; do { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { vxge_hal_ring_rxd_post(vpath_handle, rxdh); status = VXGE_HAL_FAIL; break; } VXGE_DRV_STATS(vpath, rx_compl); rxd_priv = (vxge_rxd_priv_t *) dtr_priv; /* Gets details of mbuf i.e., packet length */ vxge_rx_rxd_1b_get(vpath, rxdh, dtr_priv); /* * Prepare one buffer to send it to upper layer Since upper * layer frees the buffer do not use rxd_priv->mbuf_pkt. * Meanwhile prepare a new buffer, do mapping, use with the * current descriptor and post descriptor back to ring vpath */ mbuf_up = rxd_priv->mbuf_pkt; if (t_code != VXGE_HAL_RING_RXD_T_CODE_OK) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); VXGE_DRV_STATS(vpath, rx_tcode); status = vxge_hal_ring_handle_tcode(vpath_handle, rxdh, t_code); /* * If transfer code is not for unknown protocols and * vxge_hal_device_handle_tcode is NOT returned * VXGE_HAL_OK * drop this packet and increment rx_tcode stats */ if ((status != VXGE_HAL_OK) && (t_code != VXGE_HAL_RING_T_CODE_L3_PKT_ERR)) { vxge_free_packet(mbuf_up); vxge_hal_ring_rxd_post(vpath_handle, rxdh); continue; } } if (vxge_rx_rxd_1b_set(vpath, rxdh, dtr_priv)) { /* * If unable to allocate buffer, post descriptor back * to vpath for future processing of same packet. */ vxge_hal_ring_rxd_post(vpath_handle, rxdh); continue; } /* Get the extended information */ vxge_hal_ring_rxd_1b_info_get(vpath_handle, rxdh, &ext_info); /* post descriptor with newly allocated mbuf back to vpath */ vxge_hal_ring_rxd_post(vpath_handle, rxdh); vpath->rxd_posted++; if (vpath->rxd_posted % VXGE_RXD_REPLENISH_COUNT == 0) vxge_hal_ring_rxd_post_post_db(vpath_handle); /* * Set successfully computed checksums in the mbuf. * Leave the rest to the stack to be reverified. */ vxge_rx_checksum(ext_info, mbuf_up); #if __FreeBSD_version >= 800000 M_HASHTYPE_SET(mbuf_up, M_HASHTYPE_OPAQUE); mbuf_up->m_pkthdr.flowid = vpath->vp_index; #endif /* Post-Read sync for buffers */ bus_dmamap_sync(vpath->dma_tag_rx, rxd_priv->dma_map, BUS_DMASYNC_POSTREAD); vxge_rx_input(ifp, mbuf_up, vpath); } while (vxge_hal_ring_rxd_next_completed(vpath_handle, &rxdh, &dtr_priv, &t_code) == VXGE_HAL_OK); /* Flush any outstanding LRO work */ if (vpath->lro_enable && vpath->lro.lro_cnt) tcp_lro_flush_all(lro); return (status); } static inline void vxge_rx_input(ifnet_t ifp, mbuf_t mbuf_up, vxge_vpath_t *vpath) { if (vpath->lro_enable && vpath->lro.lro_cnt) { if (tcp_lro_rx(&vpath->lro, mbuf_up, 0) == 0) return; } (*ifp->if_input) (ifp, mbuf_up); } static inline void vxge_rx_checksum(vxge_hal_ring_rxd_info_t ext_info, mbuf_t mbuf_up) { if (!(ext_info.proto & VXGE_HAL_FRAME_PROTO_IP_FRAG) && (ext_info.proto & VXGE_HAL_FRAME_PROTO_TCP_OR_UDP) && ext_info.l3_cksum_valid && ext_info.l4_cksum_valid) { mbuf_up->m_pkthdr.csum_data = htons(0xffff); mbuf_up->m_pkthdr.csum_flags = CSUM_IP_CHECKED; mbuf_up->m_pkthdr.csum_flags |= CSUM_IP_VALID; mbuf_up->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); } else { if (ext_info.vlan) { mbuf_up->m_pkthdr.ether_vtag = ext_info.vlan; mbuf_up->m_flags |= M_VLANTAG; } } } /* * vxge_rx_term During unload terminate and free all descriptors * @vpath_handle Rx vpath Handle @rxdh Rx Descriptor Handle @state Descriptor * State @userdata Per-adapter Data @reopen vpath open/reopen option */ /* ARGSUSED */ void vxge_rx_term(vxge_hal_vpath_h vpath_handle, vxge_hal_rxd_h rxdh, void *dtr_priv, vxge_hal_rxd_state_e state, void *userdata, vxge_hal_reopen_e reopen) { vxge_vpath_t *vpath = (vxge_vpath_t *) userdata; vxge_rxd_priv_t *rxd_priv = (vxge_rxd_priv_t *) dtr_priv; if (state != VXGE_HAL_RXD_STATE_POSTED) return; if (rxd_priv != NULL) { bus_dmamap_sync(vpath->dma_tag_rx, rxd_priv->dma_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(vpath->dma_tag_rx, rxd_priv->dma_map); bus_dmamap_destroy(vpath->dma_tag_rx, rxd_priv->dma_map); vxge_free_packet(rxd_priv->mbuf_pkt); } /* Free the descriptor */ vxge_hal_ring_rxd_free(vpath_handle, rxdh); } /* * vxge_rx_rxd_1b_get * Get descriptors of packet to send up */ void vxge_rx_rxd_1b_get(vxge_vpath_t *vpath, vxge_hal_rxd_h rxdh, void *dtr_priv) { vxge_rxd_priv_t *rxd_priv = (vxge_rxd_priv_t *) dtr_priv; mbuf_t mbuf_up = rxd_priv->mbuf_pkt; /* Retrieve data from completed descriptor */ vxge_hal_ring_rxd_1b_get(vpath->handle, rxdh, &rxd_priv->dma_addr[0], (u32 *) &rxd_priv->dma_sizes[0]); /* Update newly created buffer to be sent up with packet length */ mbuf_up->m_len = rxd_priv->dma_sizes[0]; mbuf_up->m_pkthdr.len = rxd_priv->dma_sizes[0]; mbuf_up->m_next = NULL; } /* * vxge_rx_rxd_1b_set * Allocates new mbufs to be placed into descriptors */ int vxge_rx_rxd_1b_set(vxge_vpath_t *vpath, vxge_hal_rxd_h rxdh, void *dtr_priv) { int num_segs, err = 0; mbuf_t mbuf_pkt; bus_dmamap_t dma_map; bus_dma_segment_t dma_buffers[1]; vxge_rxd_priv_t *rxd_priv = (vxge_rxd_priv_t *) dtr_priv; vxge_dev_t *vdev = vpath->vdev; mbuf_pkt = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, vdev->rx_mbuf_sz); if (!mbuf_pkt) { err = ENOBUFS; VXGE_DRV_STATS(vpath, rx_no_buf); device_printf(vdev->ndev, "out of memory to allocate mbuf\n"); goto _exit0; } /* Update mbuf's length, packet length and receive interface */ mbuf_pkt->m_len = vdev->rx_mbuf_sz; mbuf_pkt->m_pkthdr.len = vdev->rx_mbuf_sz; mbuf_pkt->m_pkthdr.rcvif = vdev->ifp; /* Load DMA map */ err = vxge_dma_mbuf_coalesce(vpath->dma_tag_rx, vpath->extra_dma_map, &mbuf_pkt, dma_buffers, &num_segs); if (err != 0) { VXGE_DRV_STATS(vpath, rx_map_fail); vxge_free_packet(mbuf_pkt); goto _exit0; } /* Unload DMA map of mbuf in current descriptor */ bus_dmamap_sync(vpath->dma_tag_rx, rxd_priv->dma_map, BUS_DMASYNC_POSTREAD); bus_dmamap_unload(vpath->dma_tag_rx, rxd_priv->dma_map); /* Update descriptor private data */ dma_map = rxd_priv->dma_map; rxd_priv->mbuf_pkt = mbuf_pkt; rxd_priv->dma_addr[0] = htole64(dma_buffers->ds_addr); rxd_priv->dma_map = vpath->extra_dma_map; vpath->extra_dma_map = dma_map; /* Pre-Read/Write sync */ bus_dmamap_sync(vpath->dma_tag_rx, rxd_priv->dma_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* Set descriptor buffer */ vxge_hal_ring_rxd_1b_set(rxdh, rxd_priv->dma_addr[0], vdev->rx_mbuf_sz); _exit0: return (err); } /* * vxge_link_up * Callback for Link-up indication from HAL */ /* ARGSUSED */ void vxge_link_up(vxge_hal_device_h devh, void *userdata) { int i; vxge_vpath_t *vpath; vxge_hal_device_hw_info_t *hw_info; vxge_dev_t *vdev = (vxge_dev_t *) userdata; hw_info = &vdev->config.hw_info; ifnet_t ifp = vdev->ifp; if (vdev->config.intr_mode == VXGE_HAL_INTR_MODE_MSIX) { for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); vxge_hal_vpath_tti_ci_set(vpath->handle); vxge_hal_vpath_rti_ci_set(vpath->handle); } } if (vdev->is_privilaged && (hw_info->ports > 1)) { vxge_active_port_update(vdev); device_printf(vdev->ndev, "Active Port : %lld\n", vdev->active_port); } ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if_link_state_change(ifp, LINK_STATE_UP); } /* * vxge_link_down * Callback for Link-down indication from HAL */ /* ARGSUSED */ void vxge_link_down(vxge_hal_device_h devh, void *userdata) { int i; vxge_vpath_t *vpath; vxge_dev_t *vdev = (vxge_dev_t *) userdata; ifnet_t ifp = vdev->ifp; if (vdev->config.intr_mode == VXGE_HAL_INTR_MODE_MSIX) { for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); vxge_hal_vpath_tti_ci_reset(vpath->handle); vxge_hal_vpath_rti_ci_reset(vpath->handle); } } ifp->if_drv_flags |= IFF_DRV_OACTIVE; if_link_state_change(ifp, LINK_STATE_DOWN); } /* * vxge_reset */ void vxge_reset(vxge_dev_t *vdev) { if (!vdev->is_initialized) return; VXGE_DRV_LOCK(vdev); vxge_stop_locked(vdev); vxge_init_locked(vdev); VXGE_DRV_UNLOCK(vdev); } /* * vxge_crit_error * Callback for Critical error indication from HAL */ /* ARGSUSED */ void vxge_crit_error(vxge_hal_device_h devh, void *userdata, vxge_hal_event_e type, u64 serr_data) { vxge_dev_t *vdev = (vxge_dev_t *) userdata; ifnet_t ifp = vdev->ifp; switch (type) { case VXGE_HAL_EVENT_SERR: case VXGE_HAL_EVENT_KDFCCTL: case VXGE_HAL_EVENT_CRITICAL: vxge_hal_device_intr_disable(vdev->devh); ifp->if_drv_flags |= IFF_DRV_OACTIVE; if_link_state_change(ifp, LINK_STATE_DOWN); break; default: break; } } /* * vxge_ifp_setup */ int vxge_ifp_setup(device_t ndev) { ifnet_t ifp; int i, j, err = 0; vxge_dev_t *vdev = (vxge_dev_t *) device_get_softc(ndev); for (i = 0, j = 0; i < VXGE_HAL_MAX_VIRTUAL_PATHS; i++) { if (!bVAL1(vdev->config.hw_info.vpath_mask, i)) continue; if (j >= vdev->no_of_vpath) break; vdev->vpaths[j].vp_id = i; vdev->vpaths[j].vp_index = j; vdev->vpaths[j].vdev = vdev; vdev->vpaths[j].is_configured = TRUE; vxge_os_memcpy((u8 *) vdev->vpaths[j].mac_addr, (u8 *) (vdev->config.hw_info.mac_addrs[i]), (size_t) ETHER_ADDR_LEN); j++; } /* Get interface ifnet structure for this Ether device */ ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(vdev->ndev, "memory allocation for ifnet failed\n"); err = ENXIO; goto _exit0; } vdev->ifp = ifp; /* Initialize interface ifnet structure */ if_initname(ifp, device_get_name(ndev), device_get_unit(ndev)); ifp->if_baudrate = VXGE_BAUDRATE; ifp->if_init = vxge_init; ifp->if_softc = vdev; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = vxge_ioctl; ifp->if_start = vxge_send; #if __FreeBSD_version >= 800000 ifp->if_transmit = vxge_mq_send; ifp->if_qflush = vxge_mq_qflush; #endif ifp->if_snd.ifq_drv_maxlen = max(vdev->config.ifq_maxlen, ifqmaxlen); IFQ_SET_MAXLEN(&ifp->if_snd, ifp->if_snd.ifq_drv_maxlen); /* IFQ_SET_READY(&ifp->if_snd); */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_HWCSUM | IFCAP_VLAN_HWCSUM; ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU; ifp->if_capabilities |= IFCAP_JUMBO_MTU; if (vdev->config.tso_enable) vxge_tso_config(vdev); if (vdev->config.lro_enable) ifp->if_capabilities |= IFCAP_LRO; ifp->if_capenable = ifp->if_capabilities; strlcpy(vdev->ndev_name, device_get_nameunit(ndev), sizeof(vdev->ndev_name)); /* Attach the interface */ ether_ifattach(ifp, vdev->vpaths[0].mac_addr); _exit0: return (err); } /* * vxge_isr_setup * Register isr functions */ int vxge_isr_setup(vxge_dev_t *vdev) { int i, irq_rid, err = 0; vxge_vpath_t *vpath; void *isr_func_arg; void (*isr_func_ptr) (void *); switch (vdev->config.intr_mode) { case VXGE_HAL_INTR_MODE_IRQLINE: err = bus_setup_intr(vdev->ndev, vdev->config.isr_info[0].irq_res, (INTR_TYPE_NET | INTR_MPSAFE), vxge_isr_filter, vxge_isr_line, vdev, &vdev->config.isr_info[0].irq_handle); break; case VXGE_HAL_INTR_MODE_MSIX: for (i = 0; i < vdev->intr_count; i++) { irq_rid = vdev->config.isr_info[i].irq_rid; vpath = &vdev->vpaths[irq_rid / 4]; if ((irq_rid % 4) == 2) { isr_func_ptr = vxge_isr_msix; isr_func_arg = (void *) vpath; } else if ((irq_rid % 4) == 3) { isr_func_ptr = vxge_isr_msix_alarm; isr_func_arg = (void *) vpath; } else break; err = bus_setup_intr(vdev->ndev, vdev->config.isr_info[i].irq_res, (INTR_TYPE_NET | INTR_MPSAFE), NULL, (void *) isr_func_ptr, (void *) isr_func_arg, &vdev->config.isr_info[i].irq_handle); if (err != 0) break; } if (err != 0) { /* Teardown interrupt handler */ while (--i > 0) bus_teardown_intr(vdev->ndev, vdev->config.isr_info[i].irq_res, vdev->config.isr_info[i].irq_handle); } break; } return (err); } /* * vxge_isr_filter * ISR filter function - filter interrupts from other shared devices */ int vxge_isr_filter(void *handle) { u64 val64 = 0; vxge_dev_t *vdev = (vxge_dev_t *) handle; __hal_device_t *hldev = (__hal_device_t *) vdev->devh; vxge_hal_common_reg_t *common_reg = (vxge_hal_common_reg_t *) (hldev->common_reg); val64 = vxge_os_pio_mem_read64(vdev->pdev, (vdev->devh)->regh0, &common_reg->titan_general_int_status); return ((val64) ? FILTER_SCHEDULE_THREAD : FILTER_STRAY); } /* * vxge_isr_line * Interrupt service routine for Line interrupts */ void vxge_isr_line(void *vdev_ptr) { vxge_dev_t *vdev = (vxge_dev_t *) vdev_ptr; vxge_hal_device_handle_irq(vdev->devh, 0); } void vxge_isr_msix(void *vpath_ptr) { u32 got_rx = 0; u32 got_tx = 0; __hal_virtualpath_t *hal_vpath; vxge_vpath_t *vpath = (vxge_vpath_t *) vpath_ptr; vxge_dev_t *vdev = vpath->vdev; hal_vpath = ((__hal_vpath_handle_t *) vpath->handle)->vpath; VXGE_DRV_STATS(vpath, isr_msix); VXGE_HAL_DEVICE_STATS_SW_INFO_TRAFFIC_INTR(vdev->devh); vxge_hal_vpath_mf_msix_mask(vpath->handle, vpath->msix_vec); /* processing rx */ vxge_hal_vpath_poll_rx(vpath->handle, &got_rx); /* processing tx */ if (hal_vpath->vp_config->fifo.enable) { vxge_intr_coalesce_tx(vpath); vxge_hal_vpath_poll_tx(vpath->handle, &got_tx); } vxge_hal_vpath_mf_msix_unmask(vpath->handle, vpath->msix_vec); } void vxge_isr_msix_alarm(void *vpath_ptr) { int i; vxge_hal_status_e status = VXGE_HAL_OK; vxge_vpath_t *vpath = (vxge_vpath_t *) vpath_ptr; vxge_dev_t *vdev = vpath->vdev; VXGE_HAL_DEVICE_STATS_SW_INFO_NOT_TRAFFIC_INTR(vdev->devh); /* Process alarms in each vpath */ for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); vxge_hal_vpath_mf_msix_mask(vpath->handle, vpath->msix_vec_alarm); status = vxge_hal_vpath_alarm_process(vpath->handle, 0); if ((status == VXGE_HAL_ERR_EVENT_SLOT_FREEZE) || (status == VXGE_HAL_ERR_EVENT_SERR)) { device_printf(vdev->ndev, "processing alarms urecoverable error %x\n", status); /* Stop the driver */ vdev->is_initialized = FALSE; break; } vxge_hal_vpath_mf_msix_unmask(vpath->handle, vpath->msix_vec_alarm); } } /* * vxge_msix_enable */ vxge_hal_status_e vxge_msix_enable(vxge_dev_t *vdev) { int i, first_vp_id, msix_id; vxge_vpath_t *vpath; vxge_hal_status_e status = VXGE_HAL_OK; /* * Unmasking and Setting MSIX vectors before enabling interrupts * tim[] : 0 - Tx ## 1 - Rx ## 2 - UMQ-DMQ ## 0 - BITMAP */ int tim[4] = {0, 1, 0, 0}; for (i = 0; i < vdev->no_of_vpath; i++) { vpath = vdev->vpaths + i; first_vp_id = vdev->vpaths[0].vp_id; msix_id = vpath->vp_id * VXGE_HAL_VPATH_MSIX_ACTIVE; tim[1] = vpath->msix_vec = msix_id + 1; vpath->msix_vec_alarm = first_vp_id * VXGE_HAL_VPATH_MSIX_ACTIVE + VXGE_HAL_VPATH_MSIX_ALARM_ID; status = vxge_hal_vpath_mf_msix_set(vpath->handle, tim, VXGE_HAL_VPATH_MSIX_ALARM_ID); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "failed to set msix vectors to vpath\n"); break; } vxge_hal_vpath_mf_msix_unmask(vpath->handle, vpath->msix_vec); vxge_hal_vpath_mf_msix_unmask(vpath->handle, vpath->msix_vec_alarm); } return (status); } /* * vxge_media_init * Initializes, adds and sets media */ void vxge_media_init(vxge_dev_t *vdev) { ifmedia_init(&vdev->media, IFM_IMASK, vxge_media_change, vxge_media_status); /* Add supported media */ ifmedia_add(&vdev->media, IFM_ETHER | vdev->ifm_optics | IFM_FDX, 0, NULL); /* Set media */ ifmedia_add(&vdev->media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&vdev->media, IFM_ETHER | IFM_AUTO); } /* * vxge_media_status * Callback for interface media settings */ void vxge_media_status(ifnet_t ifp, struct ifmediareq *ifmr) { vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; vxge_hal_device_t *hldev = vdev->devh; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; /* set link state */ if (vxge_hal_device_link_state_get(hldev) == VXGE_HAL_LINK_UP) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= vdev->ifm_optics | IFM_FDX; if_link_state_change(ifp, LINK_STATE_UP); } } /* * vxge_media_change * Media change driver callback */ int vxge_media_change(ifnet_t ifp) { vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; struct ifmedia *ifmediap = &vdev->media; return (IFM_TYPE(ifmediap->ifm_media) != IFM_ETHER ? EINVAL : 0); } /* * Allocate PCI resources */ int vxge_alloc_resources(vxge_dev_t *vdev) { int err = 0; vxge_pci_info_t *pci_info = NULL; vxge_free_resources_e error_level = VXGE_FREE_NONE; device_t ndev = vdev->ndev; /* Allocate Buffer for HAL Device Configuration */ vdev->device_config = (vxge_hal_device_config_t *) vxge_mem_alloc(sizeof(vxge_hal_device_config_t)); if (!vdev->device_config) { err = ENOMEM; error_level = VXGE_DISABLE_PCI_BUSMASTER; device_printf(vdev->ndev, "failed to allocate memory for device config\n"); goto _exit0; } pci_info = (vxge_pci_info_t *) vxge_mem_alloc(sizeof(vxge_pci_info_t)); if (!pci_info) { error_level = VXGE_FREE_DEVICE_CONFIG; err = ENOMEM; device_printf(vdev->ndev, "failed to allocate memory for pci info\n"); goto _exit0; } pci_info->ndev = ndev; vdev->pdev = pci_info; err = vxge_alloc_bar_resources(vdev, 0); if (err != 0) { error_level = VXGE_FREE_BAR0; goto _exit0; } err = vxge_alloc_bar_resources(vdev, 1); if (err != 0) { error_level = VXGE_FREE_BAR1; goto _exit0; } err = vxge_alloc_bar_resources(vdev, 2); if (err != 0) error_level = VXGE_FREE_BAR2; _exit0: if (error_level) vxge_free_resources(ndev, error_level); return (err); } /* * vxge_alloc_bar_resources * Allocates BAR resources */ int vxge_alloc_bar_resources(vxge_dev_t *vdev, int i) { int err = 0; int res_id = 0; vxge_pci_info_t *pci_info = vdev->pdev; res_id = PCIR_BAR((i == 0) ? 0 : (i * 2)); pci_info->bar_info[i] = bus_alloc_resource_any(vdev->ndev, SYS_RES_MEMORY, &res_id, RF_ACTIVE); if (pci_info->bar_info[i] == NULL) { device_printf(vdev->ndev, "failed to allocate memory for bus resources\n"); err = ENOMEM; goto _exit0; } pci_info->reg_map[i] = (vxge_bus_res_t *) vxge_mem_alloc(sizeof(vxge_bus_res_t)); if (pci_info->reg_map[i] == NULL) { device_printf(vdev->ndev, "failed to allocate memory bar resources\n"); err = ENOMEM; goto _exit0; } ((vxge_bus_res_t *) (pci_info->reg_map[i]))->bus_space_tag = rman_get_bustag(pci_info->bar_info[i]); ((vxge_bus_res_t *) (pci_info->reg_map[i]))->bus_space_handle = rman_get_bushandle(pci_info->bar_info[i]); ((vxge_bus_res_t *) (pci_info->reg_map[i]))->bar_start_addr = pci_info->bar_info[i]; ((vxge_bus_res_t *) (pci_info->reg_map[i]))->bus_res_len = rman_get_size(pci_info->bar_info[i]); _exit0: return (err); } /* * vxge_alloc_isr_resources */ int vxge_alloc_isr_resources(vxge_dev_t *vdev) { int i, err = 0, irq_rid; int msix_vec_reqd, intr_count, msix_count; int intr_mode = VXGE_HAL_INTR_MODE_IRQLINE; if (vdev->config.intr_mode == VXGE_HAL_INTR_MODE_MSIX) { /* MSI-X messages supported by device */ intr_count = pci_msix_count(vdev->ndev); if (intr_count) { msix_vec_reqd = 4 * vdev->no_of_vpath; if (intr_count >= msix_vec_reqd) { intr_count = msix_vec_reqd; err = pci_alloc_msix(vdev->ndev, &intr_count); if (err == 0) intr_mode = VXGE_HAL_INTR_MODE_MSIX; } if ((err != 0) || (intr_count < msix_vec_reqd)) { device_printf(vdev->ndev, "Unable to allocate " "msi/x vectors switching to INTA mode\n"); } } } err = 0; vdev->intr_count = 0; vdev->config.intr_mode = intr_mode; switch (vdev->config.intr_mode) { case VXGE_HAL_INTR_MODE_IRQLINE: vdev->config.isr_info[0].irq_rid = 0; vdev->config.isr_info[0].irq_res = bus_alloc_resource_any(vdev->ndev, SYS_RES_IRQ, &vdev->config.isr_info[0].irq_rid, (RF_SHAREABLE | RF_ACTIVE)); if (vdev->config.isr_info[0].irq_res == NULL) { device_printf(vdev->ndev, "failed to allocate line interrupt resource\n"); err = ENOMEM; goto _exit0; } vdev->intr_count++; break; case VXGE_HAL_INTR_MODE_MSIX: msix_count = 0; for (i = 0; i < vdev->no_of_vpath; i++) { irq_rid = i * 4; vdev->config.isr_info[msix_count].irq_rid = irq_rid + 2; vdev->config.isr_info[msix_count].irq_res = bus_alloc_resource_any(vdev->ndev, SYS_RES_IRQ, &vdev->config.isr_info[msix_count].irq_rid, (RF_SHAREABLE | RF_ACTIVE)); if (vdev->config.isr_info[msix_count].irq_res == NULL) { device_printf(vdev->ndev, "allocating bus resource (rid %d) failed\n", vdev->config.isr_info[msix_count].irq_rid); err = ENOMEM; goto _exit0; } vdev->intr_count++; err = bus_bind_intr(vdev->ndev, vdev->config.isr_info[msix_count].irq_res, (i % mp_ncpus)); if (err != 0) break; msix_count++; } vdev->config.isr_info[msix_count].irq_rid = 3; vdev->config.isr_info[msix_count].irq_res = bus_alloc_resource_any(vdev->ndev, SYS_RES_IRQ, &vdev->config.isr_info[msix_count].irq_rid, (RF_SHAREABLE | RF_ACTIVE)); if (vdev->config.isr_info[msix_count].irq_res == NULL) { device_printf(vdev->ndev, "allocating bus resource (rid %d) failed\n", vdev->config.isr_info[msix_count].irq_rid); err = ENOMEM; goto _exit0; } vdev->intr_count++; err = bus_bind_intr(vdev->ndev, vdev->config.isr_info[msix_count].irq_res, (i % mp_ncpus)); break; } vdev->device_config->intr_mode = vdev->config.intr_mode; _exit0: return (err); } /* * vxge_free_resources * Undo what-all we did during load/attach */ void vxge_free_resources(device_t ndev, vxge_free_resources_e vxge_free_resource) { int i; vxge_dev_t *vdev; vdev = (vxge_dev_t *) device_get_softc(ndev); switch (vxge_free_resource) { case VXGE_FREE_ALL: for (i = 0; i < vdev->intr_count; i++) { bus_teardown_intr(ndev, vdev->config.isr_info[i].irq_res, vdev->config.isr_info[i].irq_handle); } /* FALLTHROUGH */ case VXGE_FREE_INTERFACE: ether_ifdetach(vdev->ifp); bus_generic_detach(ndev); if_free(vdev->ifp); /* FALLTHROUGH */ case VXGE_FREE_MEDIA: ifmedia_removeall(&vdev->media); /* FALLTHROUGH */ case VXGE_FREE_MUTEX: vxge_mutex_destroy(vdev); /* FALLTHROUGH */ case VXGE_FREE_VPATH: vxge_mem_free(vdev->vpaths, vdev->no_of_vpath * sizeof(vxge_vpath_t)); /* FALLTHROUGH */ case VXGE_FREE_TERMINATE_DEVICE: if (vdev->devh != NULL) { vxge_hal_device_private_set(vdev->devh, 0); vxge_hal_device_terminate(vdev->devh); } /* FALLTHROUGH */ case VXGE_FREE_ISR_RESOURCE: vxge_free_isr_resources(vdev); /* FALLTHROUGH */ case VXGE_FREE_BAR2: vxge_free_bar_resources(vdev, 2); /* FALLTHROUGH */ case VXGE_FREE_BAR1: vxge_free_bar_resources(vdev, 1); /* FALLTHROUGH */ case VXGE_FREE_BAR0: vxge_free_bar_resources(vdev, 0); /* FALLTHROUGH */ case VXGE_FREE_PCI_INFO: vxge_mem_free(vdev->pdev, sizeof(vxge_pci_info_t)); /* FALLTHROUGH */ case VXGE_FREE_DEVICE_CONFIG: vxge_mem_free(vdev->device_config, sizeof(vxge_hal_device_config_t)); /* FALLTHROUGH */ case VXGE_DISABLE_PCI_BUSMASTER: pci_disable_busmaster(ndev); /* FALLTHROUGH */ case VXGE_FREE_TERMINATE_DRIVER: if (vxge_dev_ref_count) { --vxge_dev_ref_count; if (0 == vxge_dev_ref_count) vxge_hal_driver_terminate(); } /* FALLTHROUGH */ default: case VXGE_FREE_NONE: break; /* NOTREACHED */ } } void vxge_free_isr_resources(vxge_dev_t *vdev) { int i; switch (vdev->config.intr_mode) { case VXGE_HAL_INTR_MODE_IRQLINE: if (vdev->config.isr_info[0].irq_res) { bus_release_resource(vdev->ndev, SYS_RES_IRQ, vdev->config.isr_info[0].irq_rid, vdev->config.isr_info[0].irq_res); vdev->config.isr_info[0].irq_res = NULL; } break; case VXGE_HAL_INTR_MODE_MSIX: for (i = 0; i < vdev->intr_count; i++) { if (vdev->config.isr_info[i].irq_res) { bus_release_resource(vdev->ndev, SYS_RES_IRQ, vdev->config.isr_info[i].irq_rid, vdev->config.isr_info[i].irq_res); vdev->config.isr_info[i].irq_res = NULL; } } if (vdev->intr_count) pci_release_msi(vdev->ndev); break; } } void vxge_free_bar_resources(vxge_dev_t *vdev, int i) { int res_id = 0; vxge_pci_info_t *pci_info = vdev->pdev; res_id = PCIR_BAR((i == 0) ? 0 : (i * 2)); if (pci_info->bar_info[i]) bus_release_resource(vdev->ndev, SYS_RES_MEMORY, res_id, pci_info->bar_info[i]); vxge_mem_free(pci_info->reg_map[i], sizeof(vxge_bus_res_t)); } /* * vxge_init_mutex * Initializes mutexes used in driver */ void vxge_mutex_init(vxge_dev_t *vdev) { int i; snprintf(vdev->mtx_drv_name, sizeof(vdev->mtx_drv_name), "%s_drv", vdev->ndev_name); mtx_init(&vdev->mtx_drv, vdev->mtx_drv_name, MTX_NETWORK_LOCK, MTX_DEF); for (i = 0; i < vdev->no_of_vpath; i++) { snprintf(vdev->vpaths[i].mtx_tx_name, sizeof(vdev->vpaths[i].mtx_tx_name), "%s_tx_%d", vdev->ndev_name, i); mtx_init(&vdev->vpaths[i].mtx_tx, vdev->vpaths[i].mtx_tx_name, NULL, MTX_DEF); } } /* * vxge_mutex_destroy * Destroys mutexes used in driver */ void vxge_mutex_destroy(vxge_dev_t *vdev) { int i; for (i = 0; i < vdev->no_of_vpath; i++) VXGE_TX_LOCK_DESTROY(&(vdev->vpaths[i])); VXGE_DRV_LOCK_DESTROY(vdev); } /* * vxge_rth_config */ vxge_hal_status_e vxge_rth_config(vxge_dev_t *vdev) { int i; vxge_hal_vpath_h vpath_handle; vxge_hal_rth_hash_types_t hash_types; vxge_hal_status_e status = VXGE_HAL_OK; u8 mtable[256] = {0}; /* Filling matable with bucket-to-vpath mapping */ vdev->config.rth_bkt_sz = VXGE_DEFAULT_RTH_BUCKET_SIZE; for (i = 0; i < (1 << vdev->config.rth_bkt_sz); i++) mtable[i] = i % vdev->no_of_vpath; /* Fill RTH hash types */ hash_types.hash_type_tcpipv4_en = VXGE_HAL_RING_HASH_TYPE_TCP_IPV4; hash_types.hash_type_tcpipv6_en = VXGE_HAL_RING_HASH_TYPE_TCP_IPV6; hash_types.hash_type_tcpipv6ex_en = VXGE_HAL_RING_HASH_TYPE_TCP_IPV6_EX; hash_types.hash_type_ipv4_en = VXGE_HAL_RING_HASH_TYPE_IPV4; hash_types.hash_type_ipv6_en = VXGE_HAL_RING_HASH_TYPE_IPV6; hash_types.hash_type_ipv6ex_en = VXGE_HAL_RING_HASH_TYPE_IPV6_EX; /* set indirection table, bucket-to-vpath mapping */ status = vxge_hal_vpath_rts_rth_itable_set(vdev->vpath_handles, vdev->no_of_vpath, mtable, ((u32) (1 << vdev->config.rth_bkt_sz))); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "rth configuration failed\n"); goto _exit0; } for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; status = vxge_hal_vpath_rts_rth_set(vpath_handle, RTH_ALG_JENKINS, &hash_types, vdev->config.rth_bkt_sz, TRUE); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "rth configuration failed for vpath (%d)\n", vdev->vpaths[i].vp_id); break; } } _exit0: return (status); } /* * vxge_vpath_config * Sets HAL parameter values from kenv */ void vxge_vpath_config(vxge_dev_t *vdev) { int i; u32 no_of_vpath = 0; vxge_hal_vp_config_t *vp_config; vxge_hal_device_config_t *device_config = vdev->device_config; device_config->debug_level = VXGE_TRACE; device_config->debug_mask = VXGE_COMPONENT_ALL; device_config->device_poll_millis = VXGE_DEFAULT_DEVICE_POLL_MILLIS; vdev->config.no_of_vpath = min(vdev->config.no_of_vpath, vdev->max_supported_vpath); for (i = 0; i < VXGE_HAL_MAX_VIRTUAL_PATHS; i++) { vp_config = &(device_config->vp_config[i]); vp_config->fifo.enable = VXGE_HAL_FIFO_DISABLE; vp_config->ring.enable = VXGE_HAL_RING_DISABLE; } for (i = 0; i < VXGE_HAL_MAX_VIRTUAL_PATHS; i++) { if (no_of_vpath >= vdev->config.no_of_vpath) break; if (!bVAL1(vdev->config.hw_info.vpath_mask, i)) continue; no_of_vpath++; vp_config = &(device_config->vp_config[i]); vp_config->mtu = VXGE_HAL_DEFAULT_MTU; vp_config->ring.enable = VXGE_HAL_RING_ENABLE; vp_config->ring.post_mode = VXGE_HAL_RING_POST_MODE_DOORBELL; vp_config->ring.buffer_mode = VXGE_HAL_RING_RXD_BUFFER_MODE_1; vp_config->ring.ring_length = vxge_ring_length_get(VXGE_HAL_RING_RXD_BUFFER_MODE_1); vp_config->ring.scatter_mode = VXGE_HAL_RING_SCATTER_MODE_A; vp_config->rpa_all_vid_en = VXGE_DEFAULT_ALL_VID_ENABLE; vp_config->rpa_strip_vlan_tag = VXGE_DEFAULT_STRIP_VLAN_TAG; vp_config->rpa_ucast_all_addr_en = VXGE_HAL_VPATH_RPA_UCAST_ALL_ADDR_DISABLE; vp_config->rti.intr_enable = VXGE_HAL_TIM_INTR_ENABLE; vp_config->rti.txfrm_cnt_en = VXGE_HAL_TXFRM_CNT_EN_ENABLE; vp_config->rti.util_sel = VXGE_HAL_TIM_UTIL_SEL_LEGACY_RX_NET_UTIL; vp_config->rti.uec_a = VXGE_DEFAULT_RTI_RX_UFC_A; vp_config->rti.uec_b = VXGE_DEFAULT_RTI_RX_UFC_B; vp_config->rti.uec_c = VXGE_DEFAULT_RTI_RX_UFC_C; vp_config->rti.uec_d = VXGE_DEFAULT_RTI_RX_UFC_D; vp_config->rti.urange_a = VXGE_DEFAULT_RTI_RX_URANGE_A; vp_config->rti.urange_b = VXGE_DEFAULT_RTI_RX_URANGE_B; vp_config->rti.urange_c = VXGE_DEFAULT_RTI_RX_URANGE_C; vp_config->rti.timer_ac_en = VXGE_HAL_TIM_TIMER_AC_ENABLE; vp_config->rti.timer_ci_en = VXGE_HAL_TIM_TIMER_CI_ENABLE; vp_config->rti.btimer_val = (VXGE_DEFAULT_RTI_BTIMER_VAL * 1000) / 272; vp_config->rti.rtimer_val = (VXGE_DEFAULT_RTI_RTIMER_VAL * 1000) / 272; vp_config->rti.ltimer_val = (VXGE_DEFAULT_RTI_LTIMER_VAL * 1000) / 272; if ((no_of_vpath > 1) && (VXGE_DEFAULT_CONFIG_MQ_ENABLE == 0)) continue; vp_config->fifo.enable = VXGE_HAL_FIFO_ENABLE; vp_config->fifo.max_aligned_frags = VXGE_DEFAULT_FIFO_ALIGNED_FRAGS; vp_config->tti.intr_enable = VXGE_HAL_TIM_INTR_ENABLE; vp_config->tti.txfrm_cnt_en = VXGE_HAL_TXFRM_CNT_EN_ENABLE; vp_config->tti.util_sel = VXGE_HAL_TIM_UTIL_SEL_LEGACY_TX_NET_UTIL; vp_config->tti.uec_a = VXGE_DEFAULT_TTI_TX_UFC_A; vp_config->tti.uec_b = VXGE_DEFAULT_TTI_TX_UFC_B; vp_config->tti.uec_c = VXGE_DEFAULT_TTI_TX_UFC_C; vp_config->tti.uec_d = VXGE_DEFAULT_TTI_TX_UFC_D; vp_config->tti.urange_a = VXGE_DEFAULT_TTI_TX_URANGE_A; vp_config->tti.urange_b = VXGE_DEFAULT_TTI_TX_URANGE_B; vp_config->tti.urange_c = VXGE_DEFAULT_TTI_TX_URANGE_C; vp_config->tti.timer_ac_en = VXGE_HAL_TIM_TIMER_AC_ENABLE; vp_config->tti.timer_ci_en = VXGE_HAL_TIM_TIMER_CI_ENABLE; vp_config->tti.btimer_val = (VXGE_DEFAULT_TTI_BTIMER_VAL * 1000) / 272; vp_config->tti.rtimer_val = (VXGE_DEFAULT_TTI_RTIMER_VAL * 1000) / 272; vp_config->tti.ltimer_val = (VXGE_DEFAULT_TTI_LTIMER_VAL * 1000) / 272; } vdev->no_of_vpath = no_of_vpath; if (vdev->no_of_vpath == 1) vdev->config.tx_steering = 0; if (vdev->config.rth_enable && (vdev->no_of_vpath > 1)) { device_config->rth_en = VXGE_HAL_RTH_ENABLE; device_config->rth_it_type = VXGE_HAL_RTH_IT_TYPE_MULTI_IT; } vdev->config.rth_enable = device_config->rth_en; } /* * vxge_vpath_cb_fn * Virtual path Callback function */ /* ARGSUSED */ static vxge_hal_status_e vxge_vpath_cb_fn(vxge_hal_client_h client_handle, vxge_hal_up_msg_h msgh, vxge_hal_message_type_e msg_type, vxge_hal_obj_id_t obj_id, vxge_hal_result_e result, vxge_hal_opaque_handle_t *opaque_handle) { return (VXGE_HAL_OK); } /* * vxge_vpath_open */ int vxge_vpath_open(vxge_dev_t *vdev) { int i, err = EINVAL; u64 func_id; vxge_vpath_t *vpath; vxge_hal_vpath_attr_t vpath_attr; vxge_hal_status_e status = VXGE_HAL_OK; struct lro_ctrl *lro = NULL; bzero(&vpath_attr, sizeof(vxge_hal_vpath_attr_t)); for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); lro = &vpath->lro; /* Vpath vpath_attr: FIFO */ vpath_attr.vp_id = vpath->vp_id; vpath_attr.fifo_attr.callback = vxge_tx_compl; vpath_attr.fifo_attr.txdl_init = vxge_tx_replenish; vpath_attr.fifo_attr.txdl_term = vxge_tx_term; vpath_attr.fifo_attr.userdata = vpath; vpath_attr.fifo_attr.per_txdl_space = sizeof(vxge_txdl_priv_t); /* Vpath vpath_attr: Ring */ vpath_attr.ring_attr.callback = vxge_rx_compl; vpath_attr.ring_attr.rxd_init = vxge_rx_replenish; vpath_attr.ring_attr.rxd_term = vxge_rx_term; vpath_attr.ring_attr.userdata = vpath; vpath_attr.ring_attr.per_rxd_space = sizeof(vxge_rxd_priv_t); err = vxge_dma_tags_create(vpath); if (err != 0) { device_printf(vdev->ndev, "failed to create dma tags\n"); break; } #if __FreeBSD_version >= 800000 vpath->br = buf_ring_alloc(VXGE_DEFAULT_BR_SIZE, M_DEVBUF, M_WAITOK, &vpath->mtx_tx); if (vpath->br == NULL) { err = ENOMEM; break; } #endif status = vxge_hal_vpath_open(vdev->devh, &vpath_attr, (vxge_hal_vpath_callback_f) vxge_vpath_cb_fn, NULL, &vpath->handle); if (status != VXGE_HAL_OK) { device_printf(vdev->ndev, "failed to open vpath (%d)\n", vpath->vp_id); err = EPERM; break; } vpath->is_open = TRUE; vdev->vpath_handles[i] = vpath->handle; vpath->tx_ticks = ticks; vpath->rx_ticks = ticks; vpath->tti_rtimer_val = VXGE_DEFAULT_TTI_RTIMER_VAL; vpath->rti_rtimer_val = VXGE_DEFAULT_RTI_RTIMER_VAL; vpath->tx_intr_coalesce = vdev->config.intr_coalesce; vpath->rx_intr_coalesce = vdev->config.intr_coalesce; func_id = vdev->config.hw_info.func_id; if (vdev->config.low_latency && (vdev->config.bw_info[func_id].priority == VXGE_DEFAULT_VPATH_PRIORITY_HIGH)) { vpath->tx_intr_coalesce = 0; } if (vdev->ifp->if_capenable & IFCAP_LRO) { err = tcp_lro_init(lro); if (err != 0) { device_printf(vdev->ndev, "LRO Initialization failed!\n"); break; } vpath->lro_enable = TRUE; lro->ifp = vdev->ifp; } } return (err); } void vxge_tso_config(vxge_dev_t *vdev) { u32 func_id, priority; vxge_hal_status_e status = VXGE_HAL_OK; vdev->ifp->if_capabilities |= IFCAP_TSO4; status = vxge_bw_priority_get(vdev, NULL); if (status == VXGE_HAL_OK) { func_id = vdev->config.hw_info.func_id; priority = vdev->config.bw_info[func_id].priority; if (priority != VXGE_DEFAULT_VPATH_PRIORITY_HIGH) vdev->ifp->if_capabilities &= ~IFCAP_TSO4; } #if __FreeBSD_version >= 800000 if (vdev->ifp->if_capabilities & IFCAP_TSO4) vdev->ifp->if_capabilities |= IFCAP_VLAN_HWTSO; #endif } vxge_hal_status_e vxge_bw_priority_get(vxge_dev_t *vdev, vxge_bw_info_t *bw_info) { u32 priority, bandwidth; u32 vpath_count; u64 func_id, func_mode, vpath_list[VXGE_HAL_MAX_VIRTUAL_PATHS]; vxge_hal_status_e status = VXGE_HAL_OK; func_id = vdev->config.hw_info.func_id; if (bw_info) { func_id = bw_info->func_id; func_mode = vdev->config.hw_info.function_mode; if ((is_single_func(func_mode)) && (func_id > 0)) return (VXGE_HAL_FAIL); } if (vdev->hw_fw_version >= VXGE_FW_VERSION(1, 8, 0)) { status = vxge_hal_vf_rx_bw_get(vdev->devh, func_id, &bandwidth, &priority); } else { status = vxge_hal_get_vpath_list(vdev->devh, func_id, vpath_list, &vpath_count); if (status == VXGE_HAL_OK) { status = vxge_hal_bw_priority_get(vdev->devh, vpath_list[0], &bandwidth, &priority); } } if (status == VXGE_HAL_OK) { if (bw_info) { bw_info->priority = priority; bw_info->bandwidth = bandwidth; } else { vdev->config.bw_info[func_id].priority = priority; vdev->config.bw_info[func_id].bandwidth = bandwidth; } } return (status); } /* * close vpaths */ void vxge_vpath_close(vxge_dev_t *vdev) { int i; vxge_vpath_t *vpath; for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); if (vpath->handle) vxge_hal_vpath_close(vpath->handle); #if __FreeBSD_version >= 800000 if (vpath->br != NULL) buf_ring_free(vpath->br, M_DEVBUF); #endif /* Free LRO memory */ if (vpath->lro_enable) tcp_lro_free(&vpath->lro); if (vpath->dma_tag_rx) { bus_dmamap_destroy(vpath->dma_tag_rx, vpath->extra_dma_map); bus_dma_tag_destroy(vpath->dma_tag_rx); } if (vpath->dma_tag_tx) bus_dma_tag_destroy(vpath->dma_tag_tx); vpath->handle = NULL; vpath->is_open = FALSE; } } /* * reset vpaths */ void vxge_vpath_reset(vxge_dev_t *vdev) { int i; vxge_hal_vpath_h vpath_handle; vxge_hal_status_e status = VXGE_HAL_OK; for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; status = vxge_hal_vpath_reset(vpath_handle); if (status != VXGE_HAL_OK) device_printf(vdev->ndev, "failed to reset vpath :%d\n", i); } } static inline int vxge_vpath_get(vxge_dev_t *vdev, mbuf_t mhead) { struct tcphdr *th = NULL; struct udphdr *uh = NULL; struct ip *ip = NULL; struct ip6_hdr *ip6 = NULL; struct ether_vlan_header *eth = NULL; void *ulp = NULL; int ehdrlen, iphlen = 0; u8 ipproto = 0; u16 etype, src_port, dst_port; u16 queue_len, counter = 0; src_port = dst_port = 0; queue_len = vdev->no_of_vpath; eth = mtod(mhead, struct ether_vlan_header *); if (eth->evl_encap_proto == htons(ETHERTYPE_VLAN)) { etype = ntohs(eth->evl_proto); ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { etype = ntohs(eth->evl_encap_proto); ehdrlen = ETHER_HDR_LEN; } switch (etype) { case ETHERTYPE_IP: ip = (struct ip *) (mhead->m_data + ehdrlen); iphlen = ip->ip_hl << 2; ipproto = ip->ip_p; th = (struct tcphdr *) ((caddr_t)ip + iphlen); uh = (struct udphdr *) ((caddr_t)ip + iphlen); break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *) (mhead->m_data + ehdrlen); iphlen = sizeof(struct ip6_hdr); ipproto = ip6->ip6_nxt; ulp = mtod(mhead, char *) + iphlen; th = ((struct tcphdr *) (ulp)); uh = ((struct udphdr *) (ulp)); break; default: break; } switch (ipproto) { case IPPROTO_TCP: src_port = th->th_sport; dst_port = th->th_dport; break; case IPPROTO_UDP: src_port = uh->uh_sport; dst_port = uh->uh_dport; break; default: break; } counter = (ntohs(src_port) + ntohs(dst_port)) & vpath_selector[queue_len - 1]; if (counter >= queue_len) counter = queue_len - 1; return (counter); } static inline vxge_hal_vpath_h vxge_vpath_handle_get(vxge_dev_t *vdev, int i) { return (vdev->vpaths[i].is_open ? vdev->vpaths[i].handle : NULL); } int vxge_firmware_verify(vxge_dev_t *vdev) { int err = 0; u64 active_config; vxge_hal_status_e status = VXGE_HAL_FAIL; if (vdev->fw_upgrade) { status = vxge_firmware_upgrade(vdev); if (status == VXGE_HAL_OK) { err = ENXIO; goto _exit0; } } if ((vdev->config.function_mode != VXGE_DEFAULT_CONFIG_VALUE) && (vdev->config.hw_info.function_mode != (u64) vdev->config.function_mode)) { status = vxge_func_mode_set(vdev); if (status == VXGE_HAL_OK) err = ENXIO; } /* l2_switch configuration */ active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_L2SwitchEnabled, &active_config); if (status == VXGE_HAL_OK) { vdev->l2_switch = active_config; if (vdev->config.l2_switch != VXGE_DEFAULT_CONFIG_VALUE) { if (vdev->config.l2_switch != active_config) { status = vxge_l2switch_mode_set(vdev); if (status == VXGE_HAL_OK) err = ENXIO; } } } if (vdev->config.hw_info.ports == VXGE_DUAL_PORT_MODE) { if (vxge_port_mode_update(vdev) == ENXIO) err = ENXIO; } _exit0: if (err == ENXIO) device_printf(vdev->ndev, "PLEASE POWER CYCLE THE SYSTEM\n"); return (err); } vxge_hal_status_e vxge_firmware_upgrade(vxge_dev_t *vdev) { u8 *fw_buffer; u32 fw_size; vxge_hal_device_hw_info_t *hw_info; vxge_hal_status_e status = VXGE_HAL_OK; hw_info = &vdev->config.hw_info; fw_size = sizeof(VXGE_FW_ARRAY_NAME); fw_buffer = (u8 *) VXGE_FW_ARRAY_NAME; device_printf(vdev->ndev, "Current firmware version : %s (%s)\n", hw_info->fw_version.version, hw_info->fw_date.date); device_printf(vdev->ndev, "Upgrading firmware to %d.%d.%d\n", VXGE_MIN_FW_MAJOR_VERSION, VXGE_MIN_FW_MINOR_VERSION, VXGE_MIN_FW_BUILD_NUMBER); /* Call HAL API to upgrade firmware */ status = vxge_hal_mrpcim_fw_upgrade(vdev->pdev, (pci_reg_h) vdev->pdev->reg_map[0], (u8 *) vdev->pdev->bar_info[0], fw_buffer, fw_size); device_printf(vdev->ndev, "firmware upgrade %s\n", (status == VXGE_HAL_OK) ? "successful" : "failed"); return (status); } vxge_hal_status_e vxge_func_mode_set(vxge_dev_t *vdev) { u64 active_config; vxge_hal_status_e status = VXGE_HAL_FAIL; status = vxge_hal_mrpcim_pcie_func_mode_set(vdev->devh, vdev->config.function_mode); device_printf(vdev->ndev, "function mode change %s\n", (status == VXGE_HAL_OK) ? "successful" : "failed"); if (status == VXGE_HAL_OK) { vxge_hal_set_fw_api(vdev->devh, 0ULL, VXGE_HAL_API_FUNC_MODE_COMMIT, 0, 0ULL, 0ULL); vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_NWPortMode, &active_config); /* * If in MF + DP mode * if user changes to SF, change port_mode to single port mode */ if (((is_multi_func(vdev->config.hw_info.function_mode)) && is_single_func(vdev->config.function_mode)) && (active_config == VXGE_HAL_DP_NP_MODE_DUAL_PORT)) { vdev->config.port_mode = VXGE_HAL_DP_NP_MODE_SINGLE_PORT; status = vxge_port_mode_set(vdev); } } return (status); } vxge_hal_status_e vxge_port_mode_set(vxge_dev_t *vdev) { vxge_hal_status_e status = VXGE_HAL_FAIL; status = vxge_hal_set_port_mode(vdev->devh, vdev->config.port_mode); device_printf(vdev->ndev, "port mode change %s\n", (status == VXGE_HAL_OK) ? "successful" : "failed"); if (status == VXGE_HAL_OK) { vxge_hal_set_fw_api(vdev->devh, 0ULL, VXGE_HAL_API_FUNC_MODE_COMMIT, 0, 0ULL, 0ULL); /* Configure vpath_mapping for active-active mode only */ if (vdev->config.port_mode == VXGE_HAL_DP_NP_MODE_DUAL_PORT) { status = vxge_hal_config_vpath_map(vdev->devh, VXGE_DUAL_PORT_MAP); device_printf(vdev->ndev, "dual port map change %s\n", (status == VXGE_HAL_OK) ? "successful" : "failed"); } } return (status); } int vxge_port_mode_update(vxge_dev_t *vdev) { int err = 0; u64 active_config; vxge_hal_status_e status = VXGE_HAL_FAIL; if ((vdev->config.port_mode == VXGE_HAL_DP_NP_MODE_DUAL_PORT) && is_single_func(vdev->config.hw_info.function_mode)) { device_printf(vdev->ndev, "Adapter in SF mode, dual port mode is not allowed\n"); err = EPERM; goto _exit0; } active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_NWPortMode, &active_config); if (status != VXGE_HAL_OK) { err = EINVAL; goto _exit0; } vdev->port_mode = active_config; if (vdev->config.port_mode != VXGE_DEFAULT_CONFIG_VALUE) { if (vdev->config.port_mode != vdev->port_mode) { status = vxge_port_mode_set(vdev); if (status != VXGE_HAL_OK) { err = EINVAL; goto _exit0; } err = ENXIO; vdev->port_mode = vdev->config.port_mode; } } active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_BehaviourOnFail, &active_config); if (status != VXGE_HAL_OK) { err = EINVAL; goto _exit0; } vdev->port_failure = active_config; /* * active/active mode : set to NoMove * active/passive mode: set to Failover-Failback */ if (vdev->port_mode == VXGE_HAL_DP_NP_MODE_DUAL_PORT) vdev->config.port_failure = VXGE_HAL_XMAC_NWIF_OnFailure_NoMove; else if (vdev->port_mode == VXGE_HAL_DP_NP_MODE_ACTIVE_PASSIVE) vdev->config.port_failure = VXGE_HAL_XMAC_NWIF_OnFailure_OtherPortBackOnRestore; if ((vdev->port_mode != VXGE_HAL_DP_NP_MODE_SINGLE_PORT) && (vdev->config.port_failure != vdev->port_failure)) { status = vxge_port_behavior_on_failure_set(vdev); if (status == VXGE_HAL_OK) err = ENXIO; } _exit0: return (err); } vxge_hal_status_e vxge_port_mode_get(vxge_dev_t *vdev, vxge_port_info_t *port_info) { int err = 0; u64 active_config; vxge_hal_status_e status = VXGE_HAL_FAIL; active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_NWPortMode, &active_config); if (status != VXGE_HAL_OK) { err = ENXIO; goto _exit0; } port_info->port_mode = active_config; active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_BehaviourOnFail, &active_config); if (status != VXGE_HAL_OK) { err = ENXIO; goto _exit0; } port_info->port_failure = active_config; _exit0: return (err); } vxge_hal_status_e vxge_port_behavior_on_failure_set(vxge_dev_t *vdev) { vxge_hal_status_e status = VXGE_HAL_FAIL; status = vxge_hal_set_behavior_on_failure(vdev->devh, vdev->config.port_failure); device_printf(vdev->ndev, "port behaviour on failure change %s\n", (status == VXGE_HAL_OK) ? "successful" : "failed"); if (status == VXGE_HAL_OK) vxge_hal_set_fw_api(vdev->devh, 0ULL, VXGE_HAL_API_FUNC_MODE_COMMIT, 0, 0ULL, 0ULL); return (status); } void vxge_active_port_update(vxge_dev_t *vdev) { u64 active_config; vxge_hal_status_e status = VXGE_HAL_FAIL; active_config = VXGE_DEFAULT_CONFIG_VALUE; status = vxge_hal_get_active_config(vdev->devh, VXGE_HAL_XMAC_NWIF_ActConfig_ActivePort, &active_config); if (status == VXGE_HAL_OK) vdev->active_port = active_config; } vxge_hal_status_e vxge_l2switch_mode_set(vxge_dev_t *vdev) { vxge_hal_status_e status = VXGE_HAL_FAIL; status = vxge_hal_set_l2switch_mode(vdev->devh, vdev->config.l2_switch); device_printf(vdev->ndev, "L2 switch %s\n", (status == VXGE_HAL_OK) ? (vdev->config.l2_switch) ? "enable" : "disable" : "change failed"); if (status == VXGE_HAL_OK) vxge_hal_set_fw_api(vdev->devh, 0ULL, VXGE_HAL_API_FUNC_MODE_COMMIT, 0, 0ULL, 0ULL); return (status); } /* * vxge_promisc_set * Enable Promiscuous Mode */ void vxge_promisc_set(vxge_dev_t *vdev) { int i; ifnet_t ifp; vxge_hal_vpath_h vpath_handle; if (!vdev->is_initialized) return; ifp = vdev->ifp; for (i = 0; i < vdev->no_of_vpath; i++) { vpath_handle = vxge_vpath_handle_get(vdev, i); if (!vpath_handle) continue; if (ifp->if_flags & IFF_PROMISC) vxge_hal_vpath_promisc_enable(vpath_handle); else vxge_hal_vpath_promisc_disable(vpath_handle); } } /* * vxge_change_mtu * Change interface MTU to a requested valid size */ int vxge_change_mtu(vxge_dev_t *vdev, unsigned long new_mtu) { int err = EINVAL; if ((new_mtu < VXGE_HAL_MIN_MTU) || (new_mtu > VXGE_HAL_MAX_MTU)) goto _exit0; (vdev->ifp)->if_mtu = new_mtu; device_printf(vdev->ndev, "MTU changed to %u\n", (vdev->ifp)->if_mtu); if (vdev->is_initialized) { if_down(vdev->ifp); vxge_reset(vdev); if_up(vdev->ifp); } err = 0; _exit0: return (err); } /* * Creates DMA tags for both Tx and Rx */ int vxge_dma_tags_create(vxge_vpath_t *vpath) { int err = 0; bus_size_t max_size, boundary; vxge_dev_t *vdev = vpath->vdev; ifnet_t ifp = vdev->ifp; max_size = ifp->if_mtu + VXGE_HAL_MAC_HEADER_MAX_SIZE + VXGE_HAL_HEADER_ETHERNET_II_802_3_ALIGN; VXGE_BUFFER_ALIGN(max_size, 128) if (max_size <= MCLBYTES) vdev->rx_mbuf_sz = MCLBYTES; else vdev->rx_mbuf_sz = (max_size > MJUMPAGESIZE) ? MJUM9BYTES : MJUMPAGESIZE; boundary = (max_size > PAGE_SIZE) ? 0 : PAGE_SIZE; /* DMA tag for Tx */ err = bus_dma_tag_create( bus_get_dma_tag(vdev->ndev), 1, PAGE_SIZE, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, VXGE_TSO_SIZE, VXGE_MAX_SEGS, PAGE_SIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &(vpath->dma_tag_tx)); if (err != 0) goto _exit0; /* DMA tag for Rx */ err = bus_dma_tag_create( bus_get_dma_tag(vdev->ndev), 1, boundary, BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, vdev->rx_mbuf_sz, 1, vdev->rx_mbuf_sz, BUS_DMA_ALLOCNOW, NULL, NULL, &(vpath->dma_tag_rx)); if (err != 0) goto _exit1; /* Create DMA map for this descriptor */ err = bus_dmamap_create(vpath->dma_tag_rx, BUS_DMA_NOWAIT, &vpath->extra_dma_map); if (err == 0) goto _exit0; bus_dma_tag_destroy(vpath->dma_tag_rx); _exit1: bus_dma_tag_destroy(vpath->dma_tag_tx); _exit0: return (err); } static inline int vxge_dma_mbuf_coalesce(bus_dma_tag_t dma_tag_tx, bus_dmamap_t dma_map, mbuf_t * m_headp, bus_dma_segment_t * dma_buffers, int *num_segs) { int err = 0; mbuf_t mbuf_pkt = NULL; retry: err = bus_dmamap_load_mbuf_sg(dma_tag_tx, dma_map, *m_headp, dma_buffers, num_segs, BUS_DMA_NOWAIT); if (err == EFBIG) { /* try to defrag, too many segments */ mbuf_pkt = m_defrag(*m_headp, M_NOWAIT); if (mbuf_pkt == NULL) { err = ENOBUFS; goto _exit0; } *m_headp = mbuf_pkt; goto retry; } _exit0: return (err); } int vxge_device_hw_info_get(vxge_dev_t *vdev) { int i, err = ENXIO; u64 vpath_mask = 0; u32 max_supported_vpath = 0; u32 fw_ver_maj_min; vxge_firmware_upgrade_e fw_option; vxge_hal_status_e status = VXGE_HAL_OK; vxge_hal_device_hw_info_t *hw_info; status = vxge_hal_device_hw_info_get(vdev->pdev, (pci_reg_h) vdev->pdev->reg_map[0], (u8 *) vdev->pdev->bar_info[0], &vdev->config.hw_info); if (status != VXGE_HAL_OK) goto _exit0; hw_info = &vdev->config.hw_info; vpath_mask = hw_info->vpath_mask; if (vpath_mask == 0) { device_printf(vdev->ndev, "No vpaths available in device\n"); goto _exit0; } fw_option = vdev->config.fw_option; /* Check how many vpaths are available */ for (i = 0; i < VXGE_HAL_MAX_VIRTUAL_PATHS; i++) { if (!((vpath_mask) & mBIT(i))) continue; max_supported_vpath++; } vdev->max_supported_vpath = max_supported_vpath; status = vxge_hal_device_is_privileged(hw_info->host_type, hw_info->func_id); vdev->is_privilaged = (status == VXGE_HAL_OK) ? TRUE : FALSE; vdev->hw_fw_version = VXGE_FW_VERSION( hw_info->fw_version.major, hw_info->fw_version.minor, hw_info->fw_version.build); fw_ver_maj_min = VXGE_FW_MAJ_MIN_VERSION(hw_info->fw_version.major, hw_info->fw_version.minor); if ((fw_option >= VXGE_FW_UPGRADE_FORCE) || (vdev->hw_fw_version != VXGE_DRV_FW_VERSION)) { /* For fw_ver 1.8.1 and above ignore build number. */ if ((fw_option == VXGE_FW_UPGRADE_ALL) && ((vdev->hw_fw_version >= VXGE_FW_VERSION(1, 8, 1)) && (fw_ver_maj_min == VXGE_DRV_FW_MAJ_MIN_VERSION))) { goto _exit1; } if (vdev->hw_fw_version < VXGE_BASE_FW_VERSION) { device_printf(vdev->ndev, "Upgrade driver through vxge_update, " "Unable to load the driver.\n"); goto _exit0; } vdev->fw_upgrade = TRUE; } _exit1: err = 0; _exit0: return (err); } /* * vxge_device_hw_info_print * Print device and driver information */ void vxge_device_hw_info_print(vxge_dev_t *vdev) { u32 i; device_t ndev; struct sysctl_ctx_list *ctx; struct sysctl_oid_list *children; char pmd_type[2][VXGE_PMD_INFO_LEN]; vxge_hal_device_t *hldev; vxge_hal_device_hw_info_t *hw_info; vxge_hal_device_pmd_info_t *pmd_port; hldev = vdev->devh; ndev = vdev->ndev; ctx = device_get_sysctl_ctx(ndev); children = SYSCTL_CHILDREN(device_get_sysctl_tree(ndev)); hw_info = &(vdev->config.hw_info); snprintf(vdev->config.nic_attr[VXGE_PRINT_DRV_VERSION], sizeof(vdev->config.nic_attr[VXGE_PRINT_DRV_VERSION]), "%d.%d.%d.%d", XGELL_VERSION_MAJOR, XGELL_VERSION_MINOR, XGELL_VERSION_FIX, XGELL_VERSION_BUILD); /* Print PCI-e bus type/speed/width info */ snprintf(vdev->config.nic_attr[VXGE_PRINT_PCIE_INFO], sizeof(vdev->config.nic_attr[VXGE_PRINT_PCIE_INFO]), "x%d", hldev->link_width); if (hldev->link_width <= VXGE_HAL_PCI_E_LINK_WIDTH_X4) device_printf(ndev, "For optimal performance a x8 " "PCI-Express slot is required.\n"); vxge_null_terminate((char *) hw_info->serial_number, sizeof(hw_info->serial_number)); vxge_null_terminate((char *) hw_info->part_number, sizeof(hw_info->part_number)); snprintf(vdev->config.nic_attr[VXGE_PRINT_SERIAL_NO], sizeof(vdev->config.nic_attr[VXGE_PRINT_SERIAL_NO]), "%s", hw_info->serial_number); snprintf(vdev->config.nic_attr[VXGE_PRINT_PART_NO], sizeof(vdev->config.nic_attr[VXGE_PRINT_PART_NO]), "%s", hw_info->part_number); snprintf(vdev->config.nic_attr[VXGE_PRINT_FW_VERSION], sizeof(vdev->config.nic_attr[VXGE_PRINT_FW_VERSION]), "%s", hw_info->fw_version.version); snprintf(vdev->config.nic_attr[VXGE_PRINT_FW_DATE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FW_DATE]), "%s", hw_info->fw_date.date); pmd_port = &(hw_info->pmd_port0); for (i = 0; i < hw_info->ports; i++) { vxge_pmd_port_type_get(vdev, pmd_port->type, pmd_type[i], sizeof(pmd_type[i])); strncpy(vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0 + i], "vendor=??, sn=??, pn=??, type=??", sizeof(vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0 + i])); vxge_null_terminate(pmd_port->vendor, sizeof(pmd_port->vendor)); if (strlen(pmd_port->vendor) == 0) { pmd_port = &(hw_info->pmd_port1); continue; } vxge_null_terminate(pmd_port->ser_num, sizeof(pmd_port->ser_num)); vxge_null_terminate(pmd_port->part_num, sizeof(pmd_port->part_num)); snprintf(vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0 + i], sizeof(vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0 + i]), "vendor=%s, sn=%s, pn=%s, type=%s", pmd_port->vendor, pmd_port->ser_num, pmd_port->part_num, pmd_type[i]); pmd_port = &(hw_info->pmd_port1); } switch (hw_info->function_mode) { case VXGE_HAL_PCIE_FUNC_MODE_SF1_VP17: snprintf(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]), "%s %d %s", "Single Function - 1 function(s)", vdev->max_supported_vpath, "VPath(s)/function"); break; case VXGE_HAL_PCIE_FUNC_MODE_MF2_VP8: snprintf(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]), "%s %d %s", "Multi Function - 2 function(s)", vdev->max_supported_vpath, "VPath(s)/function"); break; case VXGE_HAL_PCIE_FUNC_MODE_MF4_VP4: snprintf(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]), "%s %d %s", "Multi Function - 4 function(s)", vdev->max_supported_vpath, "VPath(s)/function"); break; case VXGE_HAL_PCIE_FUNC_MODE_MF8_VP2: snprintf(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]), "%s %d %s", "Multi Function - 8 function(s)", vdev->max_supported_vpath, "VPath(s)/function"); break; case VXGE_HAL_PCIE_FUNC_MODE_MF8P_VP2: snprintf(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]), "%s %d %s", "Multi Function (DirectIO) - 8 function(s)", vdev->max_supported_vpath, "VPath(s)/function"); break; } snprintf(vdev->config.nic_attr[VXGE_PRINT_INTR_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_INTR_MODE]), "%s", ((vdev->config.intr_mode == VXGE_HAL_INTR_MODE_MSIX) ? "MSI-X" : "INTA")); snprintf(vdev->config.nic_attr[VXGE_PRINT_VPATH_COUNT], sizeof(vdev->config.nic_attr[VXGE_PRINT_VPATH_COUNT]), "%d", vdev->no_of_vpath); snprintf(vdev->config.nic_attr[VXGE_PRINT_MTU_SIZE], sizeof(vdev->config.nic_attr[VXGE_PRINT_MTU_SIZE]), "%u", vdev->ifp->if_mtu); snprintf(vdev->config.nic_attr[VXGE_PRINT_LRO_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_LRO_MODE]), "%s", ((vdev->config.lro_enable) ? "Enabled" : "Disabled")); snprintf(vdev->config.nic_attr[VXGE_PRINT_RTH_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_RTH_MODE]), "%s", ((vdev->config.rth_enable) ? "Enabled" : "Disabled")); snprintf(vdev->config.nic_attr[VXGE_PRINT_TSO_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_TSO_MODE]), "%s", ((vdev->ifp->if_capenable & IFCAP_TSO4) ? "Enabled" : "Disabled")); snprintf(vdev->config.nic_attr[VXGE_PRINT_ADAPTER_TYPE], sizeof(vdev->config.nic_attr[VXGE_PRINT_ADAPTER_TYPE]), "%s", ((hw_info->ports == 1) ? "Single Port" : "Dual Port")); if (vdev->is_privilaged) { if (hw_info->ports > 1) { snprintf(vdev->config.nic_attr[VXGE_PRINT_PORT_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_PORT_MODE]), "%s", vxge_port_mode[vdev->port_mode]); if (vdev->port_mode != VXGE_HAL_DP_NP_MODE_SINGLE_PORT) snprintf(vdev->config.nic_attr[VXGE_PRINT_PORT_FAILURE], sizeof(vdev->config.nic_attr[VXGE_PRINT_PORT_FAILURE]), "%s", vxge_port_failure[vdev->port_failure]); vxge_active_port_update(vdev); snprintf(vdev->config.nic_attr[VXGE_PRINT_ACTIVE_PORT], sizeof(vdev->config.nic_attr[VXGE_PRINT_ACTIVE_PORT]), "%lld", vdev->active_port); } if (!is_single_func(hw_info->function_mode)) { snprintf(vdev->config.nic_attr[VXGE_PRINT_L2SWITCH_MODE], sizeof(vdev->config.nic_attr[VXGE_PRINT_L2SWITCH_MODE]), "%s", ((vdev->l2_switch) ? "Enabled" : "Disabled")); } } device_printf(ndev, "Driver version\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_DRV_VERSION]); device_printf(ndev, "Serial number\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_SERIAL_NO]); device_printf(ndev, "Part number\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PART_NO]); device_printf(ndev, "Firmware version\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_FW_VERSION]); device_printf(ndev, "Firmware date\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_FW_DATE]); device_printf(ndev, "Link width\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PCIE_INFO]); if (vdev->is_privilaged) { device_printf(ndev, "Function mode\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE]); } device_printf(ndev, "Interrupt type\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_INTR_MODE]); device_printf(ndev, "VPath(s) opened\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_VPATH_COUNT]); device_printf(ndev, "Adapter Type\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_ADAPTER_TYPE]); device_printf(ndev, "PMD Port 0\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0]); if (hw_info->ports > 1) { device_printf(ndev, "PMD Port 1\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_1]); if (vdev->is_privilaged) { device_printf(ndev, "Port Mode\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PORT_MODE]); if (vdev->port_mode != VXGE_HAL_DP_NP_MODE_SINGLE_PORT) device_printf(ndev, "Port Failure\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_PORT_FAILURE]); device_printf(vdev->ndev, "Active Port\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_ACTIVE_PORT]); } } if (vdev->is_privilaged && !is_single_func(hw_info->function_mode)) { device_printf(vdev->ndev, "L2 Switch\t: %s\n", vdev->config.nic_attr[VXGE_PRINT_L2SWITCH_MODE]); } device_printf(ndev, "MTU is %s\n", vdev->config.nic_attr[VXGE_PRINT_MTU_SIZE]); device_printf(ndev, "LRO %s\n", vdev->config.nic_attr[VXGE_PRINT_LRO_MODE]); device_printf(ndev, "RTH %s\n", vdev->config.nic_attr[VXGE_PRINT_RTH_MODE]); device_printf(ndev, "TSO %s\n", vdev->config.nic_attr[VXGE_PRINT_TSO_MODE]); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Driver version", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_DRV_VERSION], 0, "Driver version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Serial number", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_SERIAL_NO], 0, "Serial number"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Part number", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_PART_NO], 0, "Part number"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Firmware version", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_FW_VERSION], 0, "Firmware version"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Firmware date", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_FW_DATE], 0, "Firmware date"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Link width", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_PCIE_INFO], 0, "Link width"); if (vdev->is_privilaged) { SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Function mode", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_FUNC_MODE], 0, "Function mode"); } SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Interrupt type", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_INTR_MODE], 0, "Interrupt type"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "VPath(s) opened", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_VPATH_COUNT], 0, "VPath(s) opened"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Adapter Type", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_ADAPTER_TYPE], 0, "Adapter Type"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pmd port 0", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_0], 0, "pmd port"); if (hw_info->ports > 1) { SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "pmd port 1", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_PMD_PORTS_1], 0, "pmd port"); if (vdev->is_privilaged) { SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Port Mode", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_PORT_MODE], 0, "Port Mode"); if (vdev->port_mode != VXGE_HAL_DP_NP_MODE_SINGLE_PORT) SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "Port Failure", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_PORT_FAILURE], 0, "Port Failure"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "L2 Switch", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_L2SWITCH_MODE], 0, "L2 Switch"); } } SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "LRO mode", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_LRO_MODE], 0, "LRO mode"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "RTH mode", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_RTH_MODE], 0, "RTH mode"); SYSCTL_ADD_STRING(ctx, children, OID_AUTO, "TSO mode", CTLFLAG_RD, vdev->config.nic_attr[VXGE_PRINT_TSO_MODE], 0, "TSO mode"); } void vxge_pmd_port_type_get(vxge_dev_t *vdev, u32 port_type, char *ifm_name, u8 ifm_len) { vdev->ifm_optics = IFM_UNKNOWN; switch (port_type) { case VXGE_HAL_DEVICE_PMD_TYPE_10G_SR: vdev->ifm_optics = IFM_10G_SR; strlcpy(ifm_name, "10GbE SR", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_LR: vdev->ifm_optics = IFM_10G_LR; strlcpy(ifm_name, "10GbE LR", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_LRM: vdev->ifm_optics = IFM_10G_LRM; strlcpy(ifm_name, "10GbE LRM", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_DIRECT: vdev->ifm_optics = IFM_10G_TWINAX; strlcpy(ifm_name, "10GbE DA (Direct Attached)", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_CX4: vdev->ifm_optics = IFM_10G_CX4; strlcpy(ifm_name, "10GbE CX4", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_BASE_T: #if __FreeBSD_version >= 800000 vdev->ifm_optics = IFM_10G_T; #endif strlcpy(ifm_name, "10GbE baseT", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_10G_OTHER: strlcpy(ifm_name, "10GbE Other", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_SX: vdev->ifm_optics = IFM_1000_SX; strlcpy(ifm_name, "1GbE SX", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_LX: vdev->ifm_optics = IFM_1000_LX; strlcpy(ifm_name, "1GbE LX", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_CX: vdev->ifm_optics = IFM_1000_CX; strlcpy(ifm_name, "1GbE CX", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_BASE_T: vdev->ifm_optics = IFM_1000_T; strlcpy(ifm_name, "1GbE baseT", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_DIRECT: strlcpy(ifm_name, "1GbE DA (Direct Attached)", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_CX4: strlcpy(ifm_name, "1GbE CX4", ifm_len); break; case VXGE_HAL_DEVICE_PMD_TYPE_1G_OTHER: strlcpy(ifm_name, "1GbE Other", ifm_len); break; default: case VXGE_HAL_DEVICE_PMD_TYPE_UNKNOWN: strlcpy(ifm_name, "UNSUP", ifm_len); break; } } u32 vxge_ring_length_get(u32 buffer_mode) { return (VXGE_DEFAULT_RING_BLOCK * vxge_hal_ring_rxds_per_block_get(buffer_mode)); } /* * Removes trailing spaces padded * and NULL terminates strings */ static inline void vxge_null_terminate(char *str, size_t len) { len--; while (*str && (*str != ' ') && (len != 0)) ++str; --len; if (*str) *str = '\0'; } /* * vxge_ioctl * Callback to control the device */ int vxge_ioctl(ifnet_t ifp, u_long command, caddr_t data) { int mask, err = 0; vxge_dev_t *vdev = (vxge_dev_t *) ifp->if_softc; struct ifreq *ifr = (struct ifreq *) data; if (!vdev->is_active) return (EBUSY); switch (command) { /* Set/Get ifnet address */ case SIOCSIFADDR: case SIOCGIFADDR: ether_ioctl(ifp, command, data); break; /* Set Interface MTU */ case SIOCSIFMTU: err = vxge_change_mtu(vdev, (unsigned long)ifr->ifr_mtu); break; /* Set Interface Flags */ case SIOCSIFFLAGS: VXGE_DRV_LOCK(vdev); if (ifp->if_flags & IFF_UP) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { if ((ifp->if_flags ^ vdev->if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) vxge_promisc_set(vdev); } else { vxge_init_locked(vdev); } } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) vxge_stop_locked(vdev); } vdev->if_flags = ifp->if_flags; VXGE_DRV_UNLOCK(vdev); break; /* Add/delete multicast address */ case SIOCADDMULTI: case SIOCDELMULTI: break; /* Get/Set Interface Media */ case SIOCSIFMEDIA: case SIOCGIFMEDIA: err = ifmedia_ioctl(ifp, ifr, &vdev->media, command); break; /* Set Capabilities */ case SIOCSIFCAP: VXGE_DRV_LOCK(vdev); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP); if ((ifp->if_capenable & IFCAP_TSO) && !(ifp->if_capenable & IFCAP_TXCSUM)) { ifp->if_capenable &= ~IFCAP_TSO; ifp->if_hwassist &= ~CSUM_TSO; if_printf(ifp, "TSO Disabled\n"); } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; if (ifp->if_capenable & IFCAP_TSO) { if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist |= CSUM_TSO; if_printf(ifp, "TSO Enabled\n"); } else { ifp->if_capenable &= ~IFCAP_TSO; ifp->if_hwassist &= ~CSUM_TSO; if_printf(ifp, "Enable tx checksum offload \ first.\n"); err = EAGAIN; } } else { ifp->if_hwassist &= ~CSUM_TSO; if_printf(ifp, "TSO Disabled\n"); } } if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; if (mask & IFCAP_VLAN_MTU) ifp->if_capenable ^= IFCAP_VLAN_MTU; if (mask & IFCAP_VLAN_HWCSUM) ifp->if_capenable ^= IFCAP_VLAN_HWCSUM; #if __FreeBSD_version >= 800000 if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; #endif #if defined(VLAN_CAPABILITIES) VLAN_CAPABILITIES(ifp); #endif VXGE_DRV_UNLOCK(vdev); break; case SIOCGPRIVATE_0: VXGE_DRV_LOCK(vdev); err = vxge_ioctl_stats(vdev, ifr); VXGE_DRV_UNLOCK(vdev); break; case SIOCGPRIVATE_1: VXGE_DRV_LOCK(vdev); err = vxge_ioctl_regs(vdev, ifr); VXGE_DRV_UNLOCK(vdev); break; default: err = ether_ioctl(ifp, command, data); break; } return (err); } /* * vxge_ioctl_regs * IOCTL to get registers */ int vxge_ioctl_regs(vxge_dev_t *vdev, struct ifreq *ifr) { u64 value = 0x0; u32 vp_id = 0; u32 offset, reqd_size = 0; int i, err = EINVAL; - char *command = (char *) ifr->ifr_data; - void *reg_info = (void *) ifr->ifr_data; + char *command = ifr_data_get_ptr(ifr); + void *reg_info = ifr_data_get_ptr(ifr); vxge_vpath_t *vpath; vxge_hal_status_e status = VXGE_HAL_OK; vxge_hal_mgmt_reg_type_e regs_type; switch (*command) { case vxge_hal_mgmt_reg_type_pcicfgmgmt: if (vdev->is_privilaged) { reqd_size = sizeof(vxge_hal_pcicfgmgmt_reg_t); regs_type = vxge_hal_mgmt_reg_type_pcicfgmgmt; } break; case vxge_hal_mgmt_reg_type_mrpcim: if (vdev->is_privilaged) { reqd_size = sizeof(vxge_hal_mrpcim_reg_t); regs_type = vxge_hal_mgmt_reg_type_mrpcim; } break; case vxge_hal_mgmt_reg_type_srpcim: if (vdev->is_privilaged) { reqd_size = sizeof(vxge_hal_srpcim_reg_t); regs_type = vxge_hal_mgmt_reg_type_srpcim; } break; case vxge_hal_mgmt_reg_type_memrepair: if (vdev->is_privilaged) { /* reqd_size = sizeof(vxge_hal_memrepair_reg_t); */ regs_type = vxge_hal_mgmt_reg_type_memrepair; } break; case vxge_hal_mgmt_reg_type_legacy: reqd_size = sizeof(vxge_hal_legacy_reg_t); regs_type = vxge_hal_mgmt_reg_type_legacy; break; case vxge_hal_mgmt_reg_type_toc: reqd_size = sizeof(vxge_hal_toc_reg_t); regs_type = vxge_hal_mgmt_reg_type_toc; break; case vxge_hal_mgmt_reg_type_common: reqd_size = sizeof(vxge_hal_common_reg_t); regs_type = vxge_hal_mgmt_reg_type_common; break; case vxge_hal_mgmt_reg_type_vpmgmt: reqd_size = sizeof(vxge_hal_vpmgmt_reg_t); regs_type = vxge_hal_mgmt_reg_type_vpmgmt; vpath = &(vdev->vpaths[*((u32 *) reg_info + 1)]); vp_id = vpath->vp_id; break; case vxge_hal_mgmt_reg_type_vpath: reqd_size = sizeof(vxge_hal_vpath_reg_t); regs_type = vxge_hal_mgmt_reg_type_vpath; vpath = &(vdev->vpaths[*((u32 *) reg_info + 1)]); vp_id = vpath->vp_id; break; case VXGE_GET_VPATH_COUNT: *((u32 *) reg_info) = vdev->no_of_vpath; err = 0; break; default: reqd_size = 0; break; } if (reqd_size) { for (i = 0, offset = 0; offset < reqd_size; i++, offset += 0x0008) { value = 0x0; status = vxge_hal_mgmt_reg_read(vdev->devh, regs_type, vp_id, offset, &value); err = (status != VXGE_HAL_OK) ? EINVAL : 0; if (err == EINVAL) break; *((u64 *) ((u64 *) reg_info + i)) = value; } } return (err); } /* * vxge_ioctl_stats * IOCTL to get statistics */ int vxge_ioctl_stats(vxge_dev_t *vdev, struct ifreq *ifr) { int i, retsize, err = EINVAL; u32 bufsize; vxge_vpath_t *vpath; vxge_bw_info_t *bw_info; vxge_port_info_t *port_info; vxge_drv_stats_t *drv_stat; char *buffer = NULL; - char *command = (char *) ifr->ifr_data; + char *command = ifr_data_get_ptr(ifr); vxge_hal_status_e status = VXGE_HAL_OK; switch (*command) { case VXGE_GET_PCI_CONF: bufsize = VXGE_STATS_BUFFER_SIZE; buffer = (char *) vxge_mem_alloc(bufsize); if (buffer != NULL) { status = vxge_hal_aux_pci_config_read(vdev->devh, bufsize, buffer, &retsize); if (status == VXGE_HAL_OK) - err = copyout(buffer, ifr->ifr_data, retsize); + err = copyout(buffer, ifr_data_get_ptr(ifr), + retsize); else device_printf(vdev->ndev, "failed pciconfig statistics query\n"); vxge_mem_free(buffer, bufsize); } break; case VXGE_GET_MRPCIM_STATS: if (!vdev->is_privilaged) break; bufsize = VXGE_STATS_BUFFER_SIZE; buffer = (char *) vxge_mem_alloc(bufsize); if (buffer != NULL) { status = vxge_hal_aux_stats_mrpcim_read(vdev->devh, bufsize, buffer, &retsize); if (status == VXGE_HAL_OK) - err = copyout(buffer, ifr->ifr_data, retsize); + err = copyout(buffer, ifr_data_get_ptr(ifr), + retsize); else device_printf(vdev->ndev, "failed mrpcim statistics query\n"); vxge_mem_free(buffer, bufsize); } break; case VXGE_GET_DEVICE_STATS: bufsize = VXGE_STATS_BUFFER_SIZE; buffer = (char *) vxge_mem_alloc(bufsize); if (buffer != NULL) { status = vxge_hal_aux_stats_device_read(vdev->devh, bufsize, buffer, &retsize); if (status == VXGE_HAL_OK) - err = copyout(buffer, ifr->ifr_data, retsize); + err = copyout(buffer, ifr_data_get_ptr(ifr), + retsize); else device_printf(vdev->ndev, "failed device statistics query\n"); vxge_mem_free(buffer, bufsize); } break; case VXGE_GET_DEVICE_HWINFO: bufsize = sizeof(vxge_device_hw_info_t); buffer = (char *) vxge_mem_alloc(bufsize); if (buffer != NULL) { vxge_os_memcpy( &(((vxge_device_hw_info_t *) buffer)->hw_info), &vdev->config.hw_info, sizeof(vxge_hal_device_hw_info_t)); ((vxge_device_hw_info_t *) buffer)->port_mode = vdev->port_mode; ((vxge_device_hw_info_t *) buffer)->port_failure = vdev->port_failure; - err = copyout(buffer, ifr->ifr_data, bufsize); + err = copyout(buffer, ifr_data_get_ptr(ifr), bufsize); if (err != 0) device_printf(vdev->ndev, "failed device hardware info query\n"); vxge_mem_free(buffer, bufsize); } break; case VXGE_GET_DRIVER_STATS: bufsize = sizeof(vxge_drv_stats_t) * vdev->no_of_vpath; drv_stat = (vxge_drv_stats_t *) vxge_mem_alloc(bufsize); if (drv_stat != NULL) { for (i = 0; i < vdev->no_of_vpath; i++) { vpath = &(vdev->vpaths[i]); vpath->driver_stats.rx_lro_queued += vpath->lro.lro_queued; vpath->driver_stats.rx_lro_flushed += vpath->lro.lro_flushed; vxge_os_memcpy(&drv_stat[i], &(vpath->driver_stats), sizeof(vxge_drv_stats_t)); } - err = copyout(drv_stat, ifr->ifr_data, bufsize); + err = copyout(drv_stat, ifr_data_get_ptr(ifr), bufsize); if (err != 0) device_printf(vdev->ndev, "failed driver statistics query\n"); vxge_mem_free(drv_stat, bufsize); } break; case VXGE_GET_BANDWIDTH: - bw_info = (vxge_bw_info_t *) ifr->ifr_data; + bw_info = ifr_data_get_ptr(ifr); if ((vdev->config.hw_info.func_id != 0) && (vdev->hw_fw_version < VXGE_FW_VERSION(1, 8, 0))) break; if (vdev->config.hw_info.func_id != 0) bw_info->func_id = vdev->config.hw_info.func_id; status = vxge_bw_priority_get(vdev, bw_info); if (status != VXGE_HAL_OK) break; - err = copyout(bw_info, ifr->ifr_data, sizeof(vxge_bw_info_t)); + err = copyout(bw_info, ifr_data_get_ptr(ifr), + sizeof(vxge_bw_info_t)); break; case VXGE_SET_BANDWIDTH: if (vdev->is_privilaged) err = vxge_bw_priority_set(vdev, ifr); break; case VXGE_SET_PORT_MODE: if (vdev->is_privilaged) { if (vdev->config.hw_info.ports == VXGE_DUAL_PORT_MODE) { - port_info = (vxge_port_info_t *) ifr->ifr_data; + port_info = ifr_data_get_ptr(ifr); vdev->config.port_mode = port_info->port_mode; err = vxge_port_mode_update(vdev); if (err != ENXIO) err = VXGE_HAL_FAIL; else { err = VXGE_HAL_OK; device_printf(vdev->ndev, "PLEASE POWER CYCLE THE SYSTEM\n"); } } } break; case VXGE_GET_PORT_MODE: if (vdev->is_privilaged) { if (vdev->config.hw_info.ports == VXGE_DUAL_PORT_MODE) { - port_info = (vxge_port_info_t *) ifr->ifr_data; + port_info = ifr_data_get_ptr(ifr); err = vxge_port_mode_get(vdev, port_info); if (err == VXGE_HAL_OK) { - err = copyout(port_info, ifr->ifr_data, + err = copyout(port_info, + ifr_data_get_ptr(ifr), sizeof(vxge_port_info_t)); } } } break; default: break; } return (err); } int vxge_bw_priority_config(vxge_dev_t *vdev) { u32 i; int err = EINVAL; for (i = 0; i < vdev->no_of_func; i++) { err = vxge_bw_priority_update(vdev, i, TRUE); if (err != 0) break; } return (err); } int vxge_bw_priority_set(vxge_dev_t *vdev, struct ifreq *ifr) { int err; u32 func_id; vxge_bw_info_t *bw_info; - bw_info = (vxge_bw_info_t *) ifr->ifr_data; + bw_info = ifr_data_get_ptr(ifr); func_id = bw_info->func_id; vdev->config.bw_info[func_id].priority = bw_info->priority; vdev->config.bw_info[func_id].bandwidth = bw_info->bandwidth; err = vxge_bw_priority_update(vdev, func_id, FALSE); return (err); } int vxge_bw_priority_update(vxge_dev_t *vdev, u32 func_id, bool binit) { u32 i, set = 0; u32 bandwidth, priority, vpath_count; u64 vpath_list[VXGE_HAL_MAX_VIRTUAL_PATHS]; vxge_hal_device_t *hldev; vxge_hal_vp_config_t *vp_config; vxge_hal_status_e status = VXGE_HAL_OK; hldev = vdev->devh; status = vxge_hal_get_vpath_list(vdev->devh, func_id, vpath_list, &vpath_count); if (status != VXGE_HAL_OK) return (status); for (i = 0; i < vpath_count; i++) { vp_config = &(hldev->config.vp_config[vpath_list[i]]); /* Configure Bandwidth */ if (vdev->config.bw_info[func_id].bandwidth != VXGE_HAL_VPATH_BW_LIMIT_DEFAULT) { set = 1; bandwidth = vdev->config.bw_info[func_id].bandwidth; if (bandwidth < VXGE_HAL_VPATH_BW_LIMIT_MIN || bandwidth > VXGE_HAL_VPATH_BW_LIMIT_MAX) { bandwidth = VXGE_HAL_VPATH_BW_LIMIT_DEFAULT; } vp_config->bandwidth = bandwidth; } /* * If b/w limiting is enabled on any of the * VFs, then for remaining VFs set the priority to 3 * and b/w limiting to max i.e 10 Gb) */ if (vp_config->bandwidth == VXGE_HAL_VPATH_BW_LIMIT_DEFAULT) vp_config->bandwidth = VXGE_HAL_VPATH_BW_LIMIT_MAX; if (binit && vdev->config.low_latency) { if (func_id == 0) vdev->config.bw_info[func_id].priority = VXGE_DEFAULT_VPATH_PRIORITY_HIGH; } /* Configure Priority */ if (vdev->config.bw_info[func_id].priority != VXGE_HAL_VPATH_PRIORITY_DEFAULT) { set = 1; priority = vdev->config.bw_info[func_id].priority; if (priority < VXGE_HAL_VPATH_PRIORITY_MIN || priority > VXGE_HAL_VPATH_PRIORITY_MAX) { priority = VXGE_HAL_VPATH_PRIORITY_DEFAULT; } vp_config->priority = priority; } else if (vdev->config.low_latency) { set = 1; vp_config->priority = VXGE_DEFAULT_VPATH_PRIORITY_LOW; } if (set == 1) { status = vxge_hal_rx_bw_priority_set(vdev->devh, vpath_list[i]); if (status != VXGE_HAL_OK) break; if (vpath_list[i] < VXGE_HAL_TX_BW_VPATH_LIMIT) { status = vxge_hal_tx_bw_priority_set( vdev->devh, vpath_list[i]); if (status != VXGE_HAL_OK) break; } } } return ((status == VXGE_HAL_OK) ? 0 : EINVAL); } /* * vxge_intr_coalesce_tx * Changes interrupt coalescing if the interrupts are not within a range * Return Value: Nothing */ void vxge_intr_coalesce_tx(vxge_vpath_t *vpath) { u32 timer; if (!vpath->tx_intr_coalesce) return; vpath->tx_interrupts++; if (ticks > vpath->tx_ticks + hz/100) { vpath->tx_ticks = ticks; timer = vpath->tti_rtimer_val; if (vpath->tx_interrupts > VXGE_MAX_TX_INTERRUPT_COUNT) { if (timer != VXGE_TTI_RTIMER_ADAPT_VAL) { vpath->tti_rtimer_val = VXGE_TTI_RTIMER_ADAPT_VAL; vxge_hal_vpath_dynamic_tti_rtimer_set( vpath->handle, vpath->tti_rtimer_val); } } else { if (timer != 0) { vpath->tti_rtimer_val = 0; vxge_hal_vpath_dynamic_tti_rtimer_set( vpath->handle, vpath->tti_rtimer_val); } } vpath->tx_interrupts = 0; } } /* * vxge_intr_coalesce_rx * Changes interrupt coalescing if the interrupts are not within a range * Return Value: Nothing */ void vxge_intr_coalesce_rx(vxge_vpath_t *vpath) { u32 timer; if (!vpath->rx_intr_coalesce) return; vpath->rx_interrupts++; if (ticks > vpath->rx_ticks + hz/100) { vpath->rx_ticks = ticks; timer = vpath->rti_rtimer_val; if (vpath->rx_interrupts > VXGE_MAX_RX_INTERRUPT_COUNT) { if (timer != VXGE_RTI_RTIMER_ADAPT_VAL) { vpath->rti_rtimer_val = VXGE_RTI_RTIMER_ADAPT_VAL; vxge_hal_vpath_dynamic_rti_rtimer_set( vpath->handle, vpath->rti_rtimer_val); } } else { if (timer != 0) { vpath->rti_rtimer_val = 0; vxge_hal_vpath_dynamic_rti_rtimer_set( vpath->handle, vpath->rti_rtimer_val); } } vpath->rx_interrupts = 0; } } /* * vxge_methods FreeBSD device interface entry points */ static device_method_t vxge_methods[] = { DEVMETHOD(device_probe, vxge_probe), DEVMETHOD(device_attach, vxge_attach), DEVMETHOD(device_detach, vxge_detach), DEVMETHOD(device_shutdown, vxge_shutdown), DEVMETHOD_END }; static driver_t vxge_driver = { "vxge", vxge_methods, sizeof(vxge_dev_t), }; static devclass_t vxge_devclass; DRIVER_MODULE(vxge, pci, vxge_driver, vxge_devclass, 0, 0); Index: stable/11/sys/dev/wl/if_wl.c =================================================================== --- stable/11/sys/dev/wl/if_wl.c (revision 332287) +++ stable/11/sys/dev/wl/if_wl.c (revision 332288) @@ -1,2622 +1,2622 @@ /*- * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain all copyright * notices, this list of conditions and the following disclaimer. * 2. The names of the authors may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ /* * if_wl.c - original MACH, then BSDI ISA wavelan driver * ported to mach by Anders Klemets * to BSDI by Robert Morris * to FreeBSD by Jim Binkley * to FreeBSD 2.2+ by Michael Smith * * 2.2 update: * Changed interface to match 2.1-2.2 differences. * Implement IRQ selection logic in wlprobe() * Implement PSA updating. * Pruned heading comments for relevance. * Ripped out all the 'interface counters' cruft. * Cut the missing-interrupt timer back to 100ms. * 2.2.1 update: * now supports all multicast mode (mrouted will work), * but unfortunately must do that by going into promiscuous mode * NWID sysctl added so that normally promiscuous mode is NWID-specific * but can be made NWID-inspecific * 7/14/97 jrb * * Work done: * Ported to FreeBSD, got promiscuous mode working with bpfs, * and rewired timer routine. The i82586 will hang occasionally on output * and the watchdog timer will kick it if so and log an entry. * 2 second timeout there. Apparently the chip loses an interrupt. * Code borrowed from if_ie.c for watchdog timer. * * The wavelan card is a 2mbit radio modem that emulates ethernet; * i.e., it uses MAC addresses. This should not be a surprise since * it uses an ethernet controller as a major hw item. * It can broadcast, unicast or apparently multicast in a base cell * using an omni-directional antennae that is * about 800 feet around the base cell barring walls and metal. * With directional antennae, it can be used point to point over a mile * or so apparently (haven't tried that). * * There are ISA and pcmcia versions (not supported by this code). * The ISA card has an Intel 82586 lan controller on it. It consists * of 2 pieces of hw, the lan controller (intel) and a radio-modem. * The latter has an extra set of controller registers that has nothing * to do with the i82586 and allows setting and monitoring of radio * signal strength, etc. There is a nvram area called the PSA that * contains a number of setup variables including the IRQ and so-called * NWID or Network ID. The NWID must be set the same for all radio * cards to communicate (unless you are using the ATT/NCR roaming feature * with their access points. There is no support for that here. Roaming * involves a link-layer beacon sent out from the access points. End * stations monitor the signal strength and only use the strongest * access point). This driver assumes that the base ISA port, IRQ, * and NWID are first set in nvram via the dos-side "instconf.exe" utility * supplied with the card. This driver takes the ISA port from * the kernel configuration setup, and then determines the IRQ either * from the kernel config (if an explicit IRQ is set) or from the * PSA on the card if not. * The hw also magically just uses the IRQ set in the nvram. * The NWID is used magically as well by the radio-modem * to determine which packets to keep or throw out. * * sample config: * * device wl0 at isa? port 0x300 net irq ? * * Ifdefs: * 1. WLDEBUG. (off) - if turned on enables IFF_DEBUG set via ifconfig debug * 2. MULTICAST (on) - turned on and works up to and including mrouted * 3. WLCACHE (off) - define to turn on a signal strength * (and other metric) cache that is indexed by sender MAC address. * Apps can read this out to learn the remote signal strength of a * sender. Note that it has a switch so that it only stores * broadcast/multicast senders but it could be set to store unicast * too only. Size is hardwired in if_wl_wavelan.h * * one further note: promiscuous mode is a curious thing. In this driver, * promiscuous mode apparently CAN catch ALL packets and ignore the NWID * setting. This is probably more useful in a sense (for snoopers) if * you are interested in all traffic as opposed to if you are interested * in just your own. There is a driver specific sysctl to turn promiscuous * from just promiscuous to wildly promiscuous... * * This driver also knows how to load the synthesizers in the 2.4 Gz * ISA Half-card, Product number 847647476 (USA/FCC IEEE Channel set). * This product consists of a "mothercard" that contains the 82586, * NVRAM that holds the PSA, and the ISA-buss interface custom ASIC. * The radio transceiver is a "daughtercard" called the WaveMODEM which * connects to the mothercard through two single-inline connectors: a * 20-pin connector provides DC-power and modem signals, and a 3-pin * connector which exports the antenna connection. The code herein * loads the receive and transmit synthesizers and the corresponding * transmitter output power value from an EEPROM controlled through * additional registers via the MMC. The EEPROM address selected * are those whose values are preset by the DOS utility programs * provided with the product, and this provides compatible operation * with the DOS Packet Driver software. A future modification will * add the necessary functionality to this driver and to the wlconfig * utility to completely replace the DOS Configuration Utilities. * The 2.4 Gz WaveMODEM is described in document number 407-024692/E, * and is available through Lucent Technologies OEM supply channels. * --RAB 1997/06/08. */ #define MULTICAST 1 /* * Olivetti PC586 Mach Ethernet driver v1.0 * Copyright Ing. C. Olivetti & C. S.p.A. 1988, 1989 * All rights reserved. * */ /* Copyright 1988, 1989 by Olivetti Advanced Technology Center, Inc., Cupertino, California. All Rights Reserved Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appears in all copies and that both the copyright notice and this permission notice appear in supporting documentation, and that the name of Olivetti not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. OLIVETTI DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL OLIVETTI BE LIABLE FOR ANY SPECIAL, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT, NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUR OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* Copyright 1988, 1989 by Intel Corporation, Santa Clara, California. All Rights Reserved Permission to use, copy, modify, and distribute this software and its documentation for any purpose and without fee is hereby granted, provided that the above copyright notice appears in all copies and that both the copyright notice and this permission notice appear in supporting documentation, and that the name of Intel not be used in advertising or publicity pertaining to distribution of the software without specific, written prior permission. INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL INTEL BE LIABLE FOR ANY SPECIAL, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT, NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include __FBSDID("$FreeBSD$"); /* * NOTE: * by rvb: * 1. The best book on the 82586 is: * LAN Components User's Manual by Intel * The copy I found was dated 1984. This really tells you * what the state machines are doing * 2. In the current design, we only do one write at a time, * though the hardware is capable of chaining and possibly * even batching. The problem is that we only make one * transmit buffer available in sram space. */ #include "opt_wavelan.h" #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #include #include #endif #include #include /* was 1000 in original, fed to DELAY(x) */ #define DELAYCONST 1000 #include /* Definitions for the Intel chip */ #include #include static char t_packet[ETHERMTU + sizeof(struct ether_header) + sizeof(long)]; struct wl_softc { device_t dev; struct ifnet *ifp; u_char psa[0x40]; u_char nwid[2]; /* current radio modem nwid */ int flags; int tbusy; /* flag to determine if xmit is busy */ u_short begin_fd; u_short end_fd; u_short end_rbd; u_short hacr; /* latest host adapter CR command */ short mode; u_char chan24; /* 2.4 Gz: channel number/EEPROM Area # */ u_short freq24; /* 2.4 Gz: resulting frequency */ int rid_ioport; int rid_irq; struct resource *res_ioport; struct resource *res_irq; void *intr_cookie; struct mtx wl_mtx; struct callout watchdog_timer; #ifdef WLCACHE int w_sigitems; /* number of cached entries */ /* array of cache entries */ struct w_sigcache w_sigcache[ MAXCACHEITEMS ]; int w_nextcache; /* next free cache entry */ int w_wrapindex; /* next "free" cache entry */ #endif }; #define WL_LOCK(_sc) mtx_lock(&(_sc)->wl_mtx) #define WL_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->wl_mtx, MA_OWNED) #define WL_UNLOCK(_sc) mtx_unlock(&(_sc)->wl_mtx) static int wlprobe(device_t); static int wlattach(device_t); static int wldetach(device_t); static device_method_t wl_methods[] = { DEVMETHOD(device_probe, wlprobe), DEVMETHOD(device_attach, wlattach), DEVMETHOD(device_detach, wldetach), { 0, 0} }; static driver_t wl_driver = { "wl", wl_methods, sizeof (struct wl_softc) }; devclass_t wl_devclass; DRIVER_MODULE(wl, isa, wl_driver, wl_devclass, 0, 0); MODULE_DEPEND(wl, isa, 1, 1, 1); MODULE_DEPEND(wl, ether, 1, 1, 1); static struct isa_pnp_id wl_ids[] = { {0, NULL} }; /* * XXX The Wavelan appears to be prone to dropping stuff if you talk to * it too fast. This disgusting hack inserts a delay after each packet * is queued which helps avoid this behaviour on fast systems. */ static int wl_xmit_delay = 250; SYSCTL_INT(_machdep, OID_AUTO, wl_xmit_delay, CTLFLAG_RW, &wl_xmit_delay, 0, ""); /* * not XXX, but ZZZ (bizarre). * promiscuous mode can be toggled to ignore NWIDs. By default, * it does not. Caution should be exercised about combining * this mode with IFF_ALLMULTI which puts this driver in * promiscuous mode. */ static int wl_ignore_nwid = 0; SYSCTL_INT(_machdep, OID_AUTO, wl_ignore_nwid, CTLFLAG_RW, &wl_ignore_nwid, 0, ""); /* * Emit diagnostics about transmission problems */ static int xmt_watch = 0; SYSCTL_INT(_machdep, OID_AUTO, wl_xmit_watch, CTLFLAG_RW, &xmt_watch, 0, ""); /* * Collect SNR statistics */ static int gathersnr = 0; SYSCTL_INT(_machdep, OID_AUTO, wl_gather_snr, CTLFLAG_RW, &gathersnr, 0, ""); static int wl_allocate_resources(device_t device); static int wl_deallocate_resources(device_t device); static void wlstart(struct ifnet *ifp); static void wlstart_locked(struct ifnet *ifp); static void wlinit(void *xsc); static void wlinit_locked(struct wl_softc *sc); static int wlioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static void wlwatchdog(void *arg); static void wlintr(void *arg); static void wlxmt(struct wl_softc *sc, struct mbuf *m); static int wldiag(struct wl_softc *sc); static int wlconfig(struct wl_softc *sc); static int wlcmd(struct wl_softc *sc, char *str); static void wlmmcstat(struct wl_softc *sc); static u_short wlbldru(struct wl_softc *sc); static u_short wlmmcread(struct wl_softc *sc, u_short reg); static void wlinitmmc(struct wl_softc *sc); static int wlhwrst(struct wl_softc *sc); static void wlrustrt(struct wl_softc *sc); static void wlbldcu(struct wl_softc *sc); static int wlack(struct wl_softc *sc); static int wlread(struct wl_softc *sc, u_short fd_p); static void getsnr(struct wl_softc *sc); static void wlrcv(struct wl_softc *sc); static int wlrequeue(struct wl_softc *sc, u_short fd_p); static void wlsftwsleaze(u_short *countp, u_char **mb_pp, struct mbuf **tm_pp, struct wl_softc *sc); static void wlhdwsleaze(u_short *countp, u_char **mb_pp, struct mbuf **tm_pp, struct wl_softc *sc); #ifdef WLDEBUG static void wltbd(struct wl_softc *sc); #endif static void wlgetpsa(struct wl_softc *sc, u_char *buf); static void wlsetpsa(struct wl_softc *sc); static u_short wlpsacrc(u_char *buf); static void wldump(struct wl_softc *sc); #ifdef WLCACHE static void wl_cache_store(struct wl_softc *, struct ether_header *, struct mbuf *); static void wl_cache_zero(struct wl_softc *sc); #endif /* array for maping irq numbers to values for the irq parameter register */ static int irqvals[16] = { 0, 0, 0, 0x01, 0x02, 0x04, 0, 0x08, 0, 0, 0x10, 0x20, 0x40, 0, 0, 0x80 }; /* * wlprobe: * * This function "probes" or checks for the WaveLAN board on the bus to * see if it is there. As far as I can tell, the best break between this * routine and the attach code is to simply determine whether the board * is configured in properly. Currently my approach to this is to write * and read a word from the SRAM on the board being probed. If the word * comes back properly then we assume the board is there. The config * code expects to see a successful return from the probe routine before * attach will be called. * * input : address device is mapped to, and unit # being checked * output : a '1' is returned if the board exists, and a 0 otherwise * */ static int wlprobe(device_t device) { struct wl_softc *sc; char *str = "wl%d: board out of range [0..%d]\n"; u_char inbuf[100]; rman_res_t junk, sirq; int error, irq; error = ISA_PNP_PROBE(device_get_parent(device), device, wl_ids); if (error == ENXIO || error == 0) return (error); sc = device_get_softc(device); error = wl_allocate_resources(device); if (error) goto errexit; /* TBD. not true. * regular CMD() will not work, since no softc yet */ #define PCMD(sc, hacr) WL_WRITE_2((sc), HACR, (hacr)) PCMD(sc, HACR_RESET); /* reset the board */ DELAY(DELAYCONST); /* >> 4 clocks at 6MHz */ PCMD(sc, HACR_RESET); /* reset the board */ DELAY(DELAYCONST); /* >> 4 clocks at 6MHz */ /* clear reset command and set PIO#1 in autoincrement mode */ PCMD(sc, HACR_DEFAULT); PCMD(sc, HACR_DEFAULT); WL_WRITE_2(sc, PIOR1, 0); /* go to beginning of RAM */ WL_WRITE_MULTI_2(sc, PIOP1, str, strlen(str)/2+1); /* write string */ WL_WRITE_2(sc, PIOR1, 0); /* rewind */ WL_READ_MULTI_2(sc, PIOP1, inbuf, strlen(str)/2+1); /* read result */ if (bcmp(str, inbuf, strlen(str))) { error = ENXIO; goto errexit; } sc->chan24 = 0; /* 2.4 Gz: config channel */ sc->freq24 = 0; /* 2.4 Gz: frequency */ /* read the PSA from the board into temporary storage */ wlgetpsa(sc, inbuf); /* We read the IRQ value from the PSA on the board. */ for (irq = 15; irq >= 0; irq--) if (irqvals[irq] == inbuf[WLPSA_IRQNO]) break; if ((irq == 0) || (irqvals[irq] == 0)){ device_printf(device, "PSA corrupt (invalid IRQ value)\n"); } else { /* * If the IRQ requested by the PSA is already claimed by another * device, the board won't work, but the user can still access the * driver to change the IRQ. */ if (bus_get_resource(device, SYS_RES_IRQ, 0, &sirq, &junk)) goto errexit; if (irq != (int)sirq) device_printf(device, "board is configured for interrupt %d\n", irq); } wl_deallocate_resources(device); return (0); errexit: wl_deallocate_resources(device); return (error); } /* * wlattach: * * This function attaches a WaveLAN board to the "system". The rest of * runtime structures are initialized here (this routine is called after * a successful probe of the board). Once the ethernet address is read * and stored, the board's ifnet structure is attached and readied. * * input : isa_dev structure setup in autoconfig * output : board structs and ifnet is setup * */ static int wlattach(device_t device) { struct wl_softc *sc; int error, i, j; struct ifnet *ifp; u_char eaddr[6]; sc = device_get_softc(device); sc->dev = device; ifp = sc->ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { device_printf(device, "can not if_alloc()\n"); return (ENOSPC); } mtx_init(&sc->wl_mtx, device_get_nameunit(device), MTX_NETWORK_LOCK, MTX_DEF); callout_init_mtx(&sc->watchdog_timer, &sc->wl_mtx, 0); error = wl_allocate_resources(device); if (error) { wl_deallocate_resources(device); return (ENXIO); } #ifdef WLDEBUG printf("wlattach: base %jx, unit %d\n", rman_get_start(sc->res_ioport), device_get_unit(device)); #endif sc->flags = 0; sc->mode = 0; sc->hacr = HACR_RESET; CMD(sc); /* reset the board */ DELAY(DELAYCONST); /* >> 4 clocks at 6MHz */ /* clear reset command and set PIO#2 in parameter access mode */ sc->hacr = (HACR_DEFAULT & ~HACR_16BITS); CMD(sc); /* Read the PSA from the board for our later reference */ wlgetpsa(sc, sc->psa); /* fetch NWID */ sc->nwid[0] = sc->psa[WLPSA_NWID]; sc->nwid[1] = sc->psa[WLPSA_NWID+1]; /* fetch MAC address - decide which one first */ if (sc->psa[WLPSA_MACSEL] & 1) j = WLPSA_LOCALMAC; else j = WLPSA_UNIMAC; for (i=0; i < WAVELAN_ADDR_SIZE; ++i) eaddr[i] = sc->psa[j + i]; /* enter normal 16 bit mode operation */ sc->hacr = HACR_DEFAULT; CMD(sc); wlinitmmc(sc); WL_WRITE_2(sc, PIOR1, OFFSET_SCB + 8); /* address of scb_crcerrs */ WL_WRITE_2(sc, PIOP1, 0); /* clear scb_crcerrs */ WL_WRITE_2(sc, PIOP1, 0); /* clear scb_alnerrs */ WL_WRITE_2(sc, PIOP1, 0); /* clear scb_rscerrs */ WL_WRITE_2(sc, PIOP1, 0); /* clear scb_ovrnerrs */ ifp->if_softc = sc; ifp->if_mtu = WAVELAN_MTU; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX; #ifdef WLDEBUG ifp->if_flags |= IFF_DEBUG; #endif #if MULTICAST ifp->if_flags |= IFF_MULTICAST; #endif /* MULTICAST */ if_initname(ifp, device_get_name(device), device_get_unit(device)); ifp->if_init = wlinit; ifp->if_start = wlstart; ifp->if_ioctl = wlioctl; ifp->if_snd.ifq_maxlen = ifqmaxlen; /* no entries ifp->if_done ifp->if_reset */ ether_ifattach(ifp, eaddr); if_printf(ifp, "NWID 0x%02x%02x", sc->nwid[0], sc->nwid[1]); if (sc->freq24) printf(", Freq %d MHz",sc->freq24); /* 2.4 Gz */ printf("\n"); /* 2.4 Gz */ bus_setup_intr(device, sc->res_irq, INTR_TYPE_NET, NULL, wlintr, sc, &sc->intr_cookie); if (bootverbose) wldump(sc); device_printf(device, "WARNING: This driver is deprecated and will be removed.\n"); return (0); } static int wldetach(device_t device) { struct wl_softc *sc = device_get_softc(device); struct ifnet *ifp; ifp = sc->ifp; ether_ifdetach(ifp); WL_LOCK(sc); /* reset the board */ sc->hacr = HACR_RESET; CMD(sc); sc->hacr = HACR_DEFAULT; CMD(sc); callout_stop(&sc->watchdog_timer); WL_UNLOCK(sc); callout_drain(&sc->watchdog_timer); if (sc->intr_cookie != NULL) { bus_teardown_intr(device, sc->res_irq, sc->intr_cookie); sc->intr_cookie = NULL; } wl_deallocate_resources(device); if_free(ifp); mtx_destroy(&sc->wl_mtx); return (0); } static int wl_allocate_resources(device_t device) { struct wl_softc *sc = device_get_softc(device); int ports = 16; /* Number of ports */ sc->res_ioport = bus_alloc_resource_anywhere(device, SYS_RES_IOPORT, &sc->rid_ioport, ports, RF_ACTIVE); if (sc->res_ioport == NULL) goto errexit; sc->res_irq = bus_alloc_resource_any(device, SYS_RES_IRQ, &sc->rid_irq, RF_SHAREABLE|RF_ACTIVE); if (sc->res_irq == NULL) goto errexit; return (0); errexit: wl_deallocate_resources(device); return (ENXIO); } static int wl_deallocate_resources(device_t device) { struct wl_softc *sc = device_get_softc(device); if (sc->res_irq != 0) { bus_release_resource(device, SYS_RES_IRQ, sc->rid_irq, sc->res_irq); sc->res_irq = 0; } if (sc->res_ioport != 0) { bus_release_resource(device, SYS_RES_IOPORT, sc->rid_ioport, sc->res_ioport); sc->res_ioport = 0; } return (0); } /* * Print out interesting information about the 82596. */ static void wldump(struct wl_softc *sc) { int i; printf("hasr %04x\n", WL_READ_2(sc, HASR)); printf("scb at %04x:\n ", OFFSET_SCB); WL_WRITE_2(sc, PIOR1, OFFSET_SCB); for (i = 0; i < 8; i++) printf("%04x ", WL_READ_2(sc, PIOP1)); printf("\n"); printf("cu at %04x:\n ", OFFSET_CU); WL_WRITE_2(sc, PIOR1, OFFSET_CU); for (i = 0; i < 8; i++) printf("%04x ", WL_READ_2(sc, PIOP1)); printf("\n"); printf("tbd at %04x:\n ", OFFSET_TBD); WL_WRITE_2(sc, PIOR1, OFFSET_TBD); for (i = 0; i < 4; i++) printf("%04x ", WL_READ_2(sc, PIOP1)); printf("\n"); } /* Initialize the Modem Management Controller */ static void wlinitmmc(struct wl_softc *sc) { int configured; int mode = sc->mode; int i; /* 2.4 Gz */ /* enter 8 bit operation */ sc->hacr = (HACR_DEFAULT & ~HACR_16BITS); CMD(sc); configured = sc->psa[WLPSA_CONFIGURED] & 1; /* * Set default modem control parameters. Taken from NCR document * 407-0024326 Rev. A */ MMC_WRITE(MMC_JABBER_ENABLE, 0x01); MMC_WRITE(MMC_ANTEN_SEL, 0x02); MMC_WRITE(MMC_IFS, 0x20); MMC_WRITE(MMC_MOD_DELAY, 0x04); MMC_WRITE(MMC_JAM_TIME, 0x38); MMC_WRITE(MMC_DECAY_PRM, 0x00); /* obsolete ? */ MMC_WRITE(MMC_DECAY_UPDAT_PRM, 0x00); if (!configured) { MMC_WRITE(MMC_LOOPT_SEL, 0x00); if (sc->psa[WLPSA_COMPATNO] & 1) { MMC_WRITE(MMC_THR_PRE_SET, 0x01); /* 0x04 for AT and 0x01 for MCA */ } else { MMC_WRITE(MMC_THR_PRE_SET, 0x04); /* 0x04 for AT and 0x01 for MCA */ } MMC_WRITE(MMC_QUALITY_THR, 0x03); } else { /* use configuration defaults from parameter storage area */ if (sc->psa[WLPSA_NWIDENABLE] & 1) { if ((mode & (MOD_PROM | MOD_ENAL)) && wl_ignore_nwid) { MMC_WRITE(MMC_LOOPT_SEL, 0x40); } else { MMC_WRITE(MMC_LOOPT_SEL, 0x00); } } else { MMC_WRITE(MMC_LOOPT_SEL, 0x40); /* disable network id check */ } MMC_WRITE(MMC_THR_PRE_SET, sc->psa[WLPSA_THRESH]); MMC_WRITE(MMC_QUALITY_THR, sc->psa[WLPSA_QUALTHRESH]); } MMC_WRITE(MMC_FREEZE, 0x00); MMC_WRITE(MMC_ENCR_ENABLE, 0x00); MMC_WRITE(MMC_NETW_ID_L,sc->nwid[1]); /* set NWID */ MMC_WRITE(MMC_NETW_ID_H,sc->nwid[0]); /* enter normal 16 bit mode operation */ sc->hacr = HACR_DEFAULT; CMD(sc); CMD(sc); /* virtualpc1 needs this! */ if (sc->psa[WLPSA_COMPATNO]== /* 2.4 Gz: half-card ver */ WLPSA_COMPATNO_WL24B) { /* 2.4 Gz */ i=sc->chan24<<4; /* 2.4 Gz: position ch # */ MMC_WRITE(MMC_EEADDR,i+0x0f); /* 2.4 Gz: named ch, wc=16 */ MMC_WRITE(MMC_EECTRL,MMC_EECTRL_DWLD+ /* 2.4 Gz: Download Synths */ MMC_EECTRL_EEOP_READ); /* 2.4 Gz: Read EEPROM */ for (i=0; i<1000; ++i) { /* 2.4 Gz: wait for download */ DELAY(40); /* 2.4 Gz */ if ((wlmmcread(sc, MMC_EECTRLstat) /* 2.4 Gz: check DWLD and */ &(MMC_EECTRLstat_DWLD /* 2.4 Gz: EEBUSY */ +MMC_EECTRLstat_EEBUSY))==0) /* 2.4 Gz: */ break; /* 2.4 Gz: download finished */ } /* 2.4 Gz */ if (i==1000) printf("wl: synth load failed\n"); /* 2.4 Gz */ MMC_WRITE(MMC_EEADDR,0x61); /* 2.4 Gz: default pwr, wc=2 */ MMC_WRITE(MMC_EECTRL,MMC_EECTRL_DWLD+ /* 2.4 Gz: Download Xmit Pwr */ MMC_EECTRL_EEOP_READ); /* 2.4 Gz: Read EEPROM */ for (i=0; i<1000; ++i) { /* 2.4 Gz: wait for download */ DELAY(40); /* 2.4 Gz */ if ((wlmmcread(sc, MMC_EECTRLstat) /* 2.4 Gz: check DWLD and */ &(MMC_EECTRLstat_DWLD /* 2.4 Gz: EEBUSY */ +MMC_EECTRLstat_EEBUSY))==0) /* 2.4 Gz: */ break; /* 2.4 Gz: download finished */ } /* 2.4 Gz */ if (i==1000) printf("wl: xmit pwr load failed\n"); /* 2.4 Gz */ MMC_WRITE(MMC_ANALCTRL, /* 2.4 Gz: EXT ant+polarity */ MMC_ANALCTRL_ANTPOL + /* 2.4 Gz: */ MMC_ANALCTRL_EXTANT); /* 2.4 Gz: */ i=sc->chan24<<4; /* 2.4 Gz: position ch # */ MMC_WRITE(MMC_EEADDR,i); /* 2.4 Gz: get frequency */ MMC_WRITE(MMC_EECTRL, /* 2.4 Gz: EEPROM read */ MMC_EECTRL_EEOP_READ); /* 2.4 Gz: */ DELAY(40); /* 2.4 Gz */ i = wlmmcread(sc, MMC_EEDATALrv) /* 2.4 Gz: freq val */ + (wlmmcread(sc, MMC_EEDATAHrv)<<8); /* 2.4 Gz */ sc->freq24 = (i>>6)+2400; /* 2.4 Gz: save real freq */ } } /* * wlinit: * * Another routine that interfaces the "if" layer to this driver. * Simply resets the structures that are used by "upper layers". * As well as calling wlhwrst that does reset the WaveLAN board. * * input : softc pointer for this interface * output : structures (if structs) and board are reset * */ static void wlinit(void *xsc) { struct wl_softc *sc = xsc; WL_LOCK(sc); wlinit_locked(sc); WL_UNLOCK(sc); } static void wlinit_locked(struct wl_softc *sc) { struct ifnet *ifp = sc->ifp; int stat; #ifdef WLDEBUG if (sc->ifp->if_flags & IFF_DEBUG) if_printf(ifp, "entered wlinit()\n"); #endif WL_LOCK_ASSERT(sc); if ((stat = wlhwrst(sc)) == TRUE) { sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; /* same as DSF_RUNNING */ /* * OACTIVE is used by upper-level routines * and must be set */ sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; /* same as tbusy below */ sc->flags |= DSF_RUNNING; sc->tbusy = 0; callout_stop(&sc->watchdog_timer); wlstart_locked(ifp); } else { if_printf(ifp, "init(): trouble resetting board.\n"); } } /* * wlhwrst: * * This routine resets the WaveLAN board that corresponds to the * board number passed in. * * input : board number to do a hardware reset * output : board is reset * */ static int wlhwrst(struct wl_softc *sc) { #ifdef WLDEBUG if (sc->ifp->if_flags & IFF_DEBUG) if_printf(sc->ifp, "entered wlhwrst()\n"); #endif sc->hacr = HACR_RESET; CMD(sc); /* reset the board */ /* clear reset command and set PIO#1 in autoincrement mode */ sc->hacr = HACR_DEFAULT; CMD(sc); #ifdef WLDEBUG if (sc->ifp->if_flags & IFF_DEBUG) wlmmcstat(sc); /* Display MMC registers */ #endif /* WLDEBUG */ wlbldcu(sc); /* set up command unit structures */ if (wldiag(sc) == 0) return(0); if (wlconfig(sc) == 0) return(0); /* * insert code for loopback test here */ wlrustrt(sc); /* start receive unit */ /* enable interrupts */ sc->hacr = (HACR_DEFAULT | HACR_INTRON); CMD(sc); return(1); } /* * wlbldcu: * * This function builds up the command unit structures. It inits * the scp, iscp, scb, cb, tbd, and tbuf. * */ static void wlbldcu(struct wl_softc *sc) { scp_t scp; iscp_t iscp; scb_t scb; ac_t cb; tbd_t tbd; int i; bzero(&scp, sizeof(scp)); scp.scp_sysbus = 0; scp.scp_iscp = OFFSET_ISCP; scp.scp_iscp_base = 0; WL_WRITE_2(sc, PIOR1, OFFSET_SCP); WL_WRITE_MULTI_2(sc, PIOP1, &scp, sizeof(scp_t)/2); bzero(&iscp, sizeof(iscp)); iscp.iscp_busy = 1; iscp.iscp_scb_offset = OFFSET_SCB; iscp.iscp_scb = 0; iscp.iscp_scb_base = 0; WL_WRITE_2(sc, PIOR1, OFFSET_ISCP); WL_WRITE_MULTI_2(sc, PIOP1, &iscp, sizeof(iscp_t)/2); scb.scb_status = 0; scb.scb_command = SCB_RESET; scb.scb_cbl_offset = OFFSET_CU; scb.scb_rfa_offset = OFFSET_RU; scb.scb_crcerrs = 0; scb.scb_alnerrs = 0; scb.scb_rscerrs = 0; scb.scb_ovrnerrs = 0; WL_WRITE_2(sc, PIOR1, OFFSET_SCB); WL_WRITE_MULTI_2(sc, PIOP1, &scb, sizeof(scb_t)/2); SET_CHAN_ATTN(sc); WL_WRITE_2(sc, PIOR0, OFFSET_ISCP + 0); /* address of iscp_busy */ for (i = 1000000; WL_READ_2(sc, PIOP0) && (i-- > 0); ) continue; if (i <= 0) device_printf(sc->dev, "bldcu(): iscp_busy timeout.\n"); WL_WRITE_2(sc, PIOR0, OFFSET_SCB + 0); /* address of scb_status */ for (i = STATUS_TRIES; i-- > 0; ) { if (WL_READ_2(sc, PIOP0) == (SCB_SW_CX|SCB_SW_CNA)) break; } if (i <= 0) device_printf(sc->dev, "bldcu(): not ready after reset.\n"); wlack(sc); cb.ac_status = 0; cb.ac_command = AC_CW_EL; /* NOP */ cb.ac_link_offset = OFFSET_CU; WL_WRITE_2(sc, PIOR1, OFFSET_CU); WL_WRITE_MULTI_2(sc, PIOP1, &cb, 6/2); tbd.act_count = 0; tbd.next_tbd_offset = I82586NULL; tbd.buffer_addr = 0; tbd.buffer_base = 0; WL_WRITE_2(sc, PIOR1, OFFSET_TBD); WL_WRITE_MULTI_2(sc, PIOP1, &tbd, sizeof(tbd_t)/2); } /* * wlstart: * * send a packet * * input : board number * output : stuff sent to board if any there * */ static void wlstart(struct ifnet *ifp) { struct wl_softc *sc = ifp->if_softc; WL_LOCK(sc); wlstart_locked(ifp); WL_UNLOCK(sc); } static void wlstart_locked(struct ifnet *ifp) { struct mbuf *m; struct wl_softc *sc = ifp->if_softc; int scb_status, cu_status, scb_command; WL_LOCK_ASSERT(sc); #ifdef WLDEBUG if (sc->ifp->if_flags & IFF_DEBUG) if_printf(ifp, "entered wlstart()\n"); #endif WL_WRITE_2(sc, PIOR1, OFFSET_CU); cu_status = WL_READ_2(sc, PIOP1); WL_WRITE_2(sc, PIOR0,OFFSET_SCB + 0); /* scb_status */ scb_status = WL_READ_2(sc, PIOP0); WL_WRITE_2(sc, PIOR0, OFFSET_SCB + 2); scb_command = WL_READ_2(sc, PIOP0); /* * don't need OACTIVE check as tbusy here checks to see * if we are already busy */ if (sc->tbusy) { if ((scb_status & 0x0700) == SCB_CUS_IDLE && (cu_status & AC_SW_B) == 0){ sc->tbusy = 0; callout_stop(&sc->watchdog_timer); sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; /* * This is probably just a race. The xmt'r is just * became idle but WE have masked interrupts so ... */ #ifdef WLDEBUG if_printf(ifp, "CU idle, scb %04x %04x cu %04x\n", scb_status, scb_command, cu_status); #endif if (xmt_watch) printf("!!"); } else { return; /* genuinely still busy */ } } else if ((scb_status & 0x0700) == SCB_CUS_ACTV || (cu_status & AC_SW_B)){ #ifdef WLDEBUG if_printf(ifp, "CU unexpectedly busy; scb %04x cu %04x\n", scb_status, cu_status); #endif if (xmt_watch) if_printf(ifp, "busy?!\n"); return; /* hey, why are we busy? */ } /* get ourselves some data */ IF_DEQUEUE(&ifp->if_snd, m); if (m != NULL) { /* let BPF see it before we commit it */ BPF_MTAP(ifp, m); sc->tbusy++; /* set the watchdog timer so that if the board * fails to interrupt we will restart */ /* try 10 ms, not very long */ callout_reset(&sc->watchdog_timer, hz / 100, wlwatchdog, sc); sc->ifp->if_drv_flags |= IFF_DRV_OACTIVE; if_inc_counter(sc->ifp, IFCOUNTER_OPACKETS, 1); wlxmt(sc, m); } else { sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; } return; } /* * wlread: * * This routine does the actual copy of data (including ethernet header * structure) from the WaveLAN to an mbuf chain that will be passed up * to the "if" (network interface) layer. NOTE: we currently * don't handle trailer protocols, so if that is needed, it will * (at least in part) be added here. For simplicities sake, this * routine copies the receive buffers from the board into a local (stack) * buffer until the frame has been copied from the board. Once in * the local buffer, the contents are copied to an mbuf chain that * is then enqueued onto the appropriate "if" queue. * * input : board number, and a frame descriptor address * output : the packet is put into an mbuf chain, and passed up * assumes : if any errors occur, packet is "dropped on the floor" * */ static int wlread(struct wl_softc *sc, u_short fd_p) { struct ifnet *ifp = sc->ifp; fd_t fd; struct ether_header *eh; struct mbuf *m; rbd_t rbd; u_char *mb_p; u_short mlen, len; u_short bytes_in_msg, bytes_in_mbuf, bytes; WL_LOCK_ASSERT(sc); #ifdef WLDEBUG if (sc->ifp->if_flags & IFF_DEBUG) if_printf(ifp, "entered wlread()\n"); #endif if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) { if_printf(ifp, "read(): board is not running.\n"); sc->hacr &= ~HACR_INTRON; CMD(sc); /* turn off interrupts */ } /* * Collect message size. */ WL_WRITE_2(sc, PIOR1, fd_p); WL_READ_MULTI_2(sc, PIOP1, &fd, sizeof(fd_t)/2); if (fd.rbd_offset == I82586NULL) { if (wlhwrst(sc) != TRUE) { sc->hacr &= ~HACR_INTRON; CMD(sc); /* turn off interrupts */ if_printf(ifp, "read(): hwrst trouble.\n"); } return 0; } WL_WRITE_2(sc, PIOR1, fd.rbd_offset); WL_READ_MULTI_2(sc, PIOP1, &rbd, sizeof(rbd_t)/2); bytes_in_msg = rbd.status & RBD_SW_COUNT; /* * Allocate a cluster'd mbuf to receive the packet. */ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) { if (wlhwrst(sc) != TRUE) { sc->hacr &= ~HACR_INTRON; CMD(sc); /* turn off interrupts */ if_printf(ifp, "read(): hwrst trouble.\n"); } return 0; } m->m_pkthdr.len = m->m_len = MCLBYTES; m_adj(m, ETHER_ALIGN); /* align IP header */ /* * Collect the message data. */ mlen = 0; mb_p = mtod(m, u_char *); bytes_in_mbuf = m->m_len; /* Put the ethernet header inside the mbuf. */ bcopy(&fd.destination[0], mb_p, 14); mb_p += 14; mlen += 14; bytes_in_mbuf -= 14; bytes = min(bytes_in_mbuf, bytes_in_msg); for (;;) { if (bytes & 1) { len = bytes + 1; } else { len = bytes; } WL_WRITE_2(sc, PIOR1, rbd.buffer_addr); WL_READ_MULTI_2(sc, PIOP1, mb_p, len/2); mlen += bytes; if (bytes > bytes_in_mbuf) { /* XXX something wrong, a packet should fit in 1 cluster */ m_freem(m); if_printf(ifp, "read(): packet too large (%u > %u)\n", bytes, bytes_in_mbuf); if (wlhwrst(sc) != TRUE) { sc->hacr &= ~HACR_INTRON; CMD(sc); /* turn off interrupts */ if_printf(ifp, "read(): hwrst trouble.\n"); } return 0; } mb_p += bytes; bytes_in_mbuf -= bytes; bytes_in_msg -= bytes; if (bytes_in_msg == 0) { if (rbd.status & RBD_SW_EOF || rbd.next_rbd_offset == I82586NULL) { break; } WL_WRITE_2(sc, PIOR1, rbd.next_rbd_offset); WL_READ_MULTI_2(sc, PIOP1, &rbd, sizeof(rbd_t)/2); bytes_in_msg = rbd.status & RBD_SW_COUNT; } else { rbd.buffer_addr += bytes; } bytes = min(bytes_in_mbuf, bytes_in_msg); } m->m_pkthdr.len = m->m_len = mlen; m->m_pkthdr.rcvif = ifp; /* * If hw is in promiscuous mode (note that I said hardware, not if * IFF_PROMISC is set in ifnet flags), then if this is a unicast * packet and the MAC dst is not us, drop it. This check in normally * inside ether_input(), but IFF_MULTI causes hw promisc without * a bpf listener, so this is wrong. * Greg Troxel , 1998-08-07 */ /* * TBD: also discard packets where NWID does not match. * However, there does not appear to be a way to read the nwid * for a received packet. -gdt 1998-08-07 */ /* XXX verify mbuf length */ eh = mtod(m, struct ether_header *); if ( #ifdef WL_USE_IFNET_PROMISC_CHECK /* not defined */ (sc->ifp->if_flags & (IFF_PROMISC|IFF_ALLMULTI)) #else /* hw is in promisc mode if this is true */ (sc->mode & (MOD_PROM | MOD_ENAL)) #endif && (eh->ether_dhost[0] & 1) == 0 && /* !mcast and !bcast */ bcmp(eh->ether_dhost, IF_LLADDR(sc->ifp), sizeof(eh->ether_dhost)) != 0 ) { m_freem(m); return 1; } #ifdef WLDEBUG if (sc->ifp->if_flags & IFF_DEBUG) if_printf(ifp, "wlrecv %u bytes\n", mlen); #endif #ifdef WLCACHE wl_cache_store(sc, eh, m); #endif /* * received packet is now in a chain of mbuf's. next step is * to pass the packet upwards. */ WL_UNLOCK(sc); (*ifp->if_input)(ifp, m); WL_LOCK(sc); return 1; } /* * wlioctl: * * This routine processes an ioctl request from the "if" layer * above. * * input : pointer the appropriate "if" struct, command, and data * output : based on command appropriate action is taken on the * WaveLAN board(s) or related structures * return : error is returned containing exit conditions * */ static int wlioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ifreq *ifr = (struct ifreq *)data; struct wl_softc *sc = ifp->if_softc; short mode = 0; int error = 0; struct thread *td = curthread; /* XXX */ int irq, irqval, i, isroot; char psa_buf[0x40]; char eeprom_buf[0x80]; #ifdef WLCACHE size_t size; char * cpt; #endif #ifdef WLDEBUG if (sc->ifp->if_flags & IFF_DEBUG) if_printf(ifp, "entered wlioctl()\n"); #endif switch (cmd) { case SIOCSIFFLAGS: WL_LOCK(sc); if (ifp->if_flags & IFF_ALLMULTI) { mode |= MOD_ENAL; } if (ifp->if_flags & IFF_PROMISC) { mode |= MOD_PROM; } if (ifp->if_flags & IFF_LINK0) { mode |= MOD_PROM; } /* * force a complete reset if the receive multicast/ * promiscuous mode changes so that these take * effect immediately. * */ if (sc->mode != mode) { sc->mode = mode; if (sc->flags & DSF_RUNNING) { sc->flags &= ~DSF_RUNNING; wlinit_locked(sc); } } /* if interface is marked DOWN and still running then * stop it. */ if ((ifp->if_flags & IFF_UP) == 0 && sc->flags & DSF_RUNNING) { if_printf(ifp, "ioctl(): board is not running\n"); sc->flags &= ~DSF_RUNNING; sc->hacr &= ~HACR_INTRON; CMD(sc); /* turn off interrupts */ } /* else if interface is UP and RUNNING, start it */ else if (ifp->if_flags & IFF_UP && (sc->flags & DSF_RUNNING) == 0) { wlinit_locked(sc); } /* if WLDEBUG set on interface, then printf rf-modem regs */ if (ifp->if_flags & IFF_DEBUG) wlmmcstat(sc); WL_UNLOCK(sc); break; #if MULTICAST case SIOCADDMULTI: case SIOCDELMULTI: wlinit(sc); break; #endif /* MULTICAST */ /* DEVICE SPECIFIC */ /* copy the PSA out to the caller */ case SIOCGWLPSA: /* work out if they're root */ isroot = (priv_check(td, PRIV_NET80211_GETKEY) == 0); bzero(psa_buf, sizeof(psa_buf)); WL_LOCK(sc); for (i = 0; i < 0x40; i++) { /* don't hand the DES key out to non-root users */ if ((i > WLPSA_DESKEY) && (i < (WLPSA_DESKEY + 8)) && !isroot) continue; psa_buf[i] = sc->psa[i]; } WL_UNLOCK(sc); - error = copyout(psa_buf, ifr->ifr_data, sizeof(psa_buf)); + error = copyout(psa_buf, ifr_data_get_ptr(ifr), sizeof(psa_buf)); break; /* copy the PSA in from the caller; we only copy _some_ values */ case SIOCSWLPSA: /* root only */ if ((error = priv_check(td, PRIV_DRIVER))) break; - error = copyin(ifr->ifr_data, psa_buf, sizeof(psa_buf)); + error = copyin(ifr_data_get_ptr(ifr), psa_buf, sizeof(psa_buf)); if (error) break; /* check IRQ value */ irqval = psa_buf[WLPSA_IRQNO]; for (irq = 15; irq >= 0; irq--) if (irqvals[irq] == irqval) break; if (irq == 0) /* oops */ break; WL_LOCK(sc); /* new IRQ */ sc->psa[WLPSA_IRQNO] = irqval; /* local MAC */ for (i = 0; i < 6; i++) sc->psa[WLPSA_LOCALMAC + i] = psa_buf[WLPSA_LOCALMAC + i]; /* MAC select */ sc->psa[WLPSA_MACSEL] = psa_buf[WLPSA_MACSEL]; /* default nwid */ sc->psa[WLPSA_NWID] = psa_buf[WLPSA_NWID]; sc->psa[WLPSA_NWID + 1] = psa_buf[WLPSA_NWID + 1]; wlsetpsa(sc); /* update the PSA */ WL_UNLOCK(sc); break; /* get the current NWID out of the sc since we stored it there */ case SIOCGWLCNWID: WL_LOCK(sc); - ifr->ifr_data = (caddr_t) (sc->nwid[0] << 8 | sc->nwid[1]); + ifr_data_get_ptr(ifr) = (caddr_t) (sc->nwid[0] << 8 | sc->nwid[1]); WL_UNLOCK(sc); break; /* * change the nwid dynamically. This * ONLY changes the radio modem and does not * change the PSA. * * 2 steps: * 1. save in softc "soft registers" * 2. save in radio modem (MMC) */ case SIOCSWLCNWID: /* root only */ if ((error = priv_check(td, PRIV_DRIVER))) break; WL_LOCK(sc); if (!(ifp->if_flags & IFF_UP)) { error = EIO; /* only allowed while up */ } else { /* * soft c nwid shadows radio modem setting */ - sc->nwid[0] = (int)ifr->ifr_data >> 8; - sc->nwid[1] = (int)ifr->ifr_data & 0xff; + sc->nwid[0] = (int)ifr_data_get_ptr(ifr) >> 8; + sc->nwid[1] = (int)ifr_data_get_ptr(ifr) & 0xff; MMC_WRITE(MMC_NETW_ID_L,sc->nwid[1]); MMC_WRITE(MMC_NETW_ID_H,sc->nwid[0]); } WL_UNLOCK(sc); break; /* copy the EEPROM in 2.4 Gz WaveMODEM out to the caller */ case SIOCGWLEEPROM: /* root only */ if ((error = priv_check(td, PRIV_DRIVER))) break; bzero(eeprom_buf, sizeof(eeprom_buf)); WL_LOCK(sc); for (i=0x00; i<0x80; ++i) { /* 2.4 Gz: size of EEPROM */ MMC_WRITE(MMC_EEADDR,i); /* 2.4 Gz: get frequency */ MMC_WRITE(MMC_EECTRL, /* 2.4 Gz: EEPROM read */ MMC_EECTRL_EEOP_READ); /* 2.4 Gz: */ DELAY(40); /* 2.4 Gz */ eeprom_buf[2 * i] = /* 2.4 Gz: pass low byte of */ wlmmcread(sc, MMC_EEDATALrv); /* 2.4 Gz: EEPROM word */ eeprom_buf[2 * i + 1] = /* 2.4 Gz: pass hi byte of */ wlmmcread(sc, MMC_EEDATALrv); /* 2.4 Gz: EEPROM word */ } WL_UNLOCK(sc); - error = copyout(ifr->ifr_data, eeprom_buf, sizeof(eeprom_buf)); + error = copyout(ifr_data_get_ptr(ifr), eeprom_buf, sizeof(eeprom_buf)); break; #ifdef WLCACHE /* zero (Delete) the wl cache */ case SIOCDWLCACHE: /* root only */ if ((error = priv_check(td, PRIV_DRIVER))) break; WL_LOCK(sc); wl_cache_zero(sc); WL_UNLOCK(sc); break; /* read out the number of used cache elements */ case SIOCGWLCITEM: WL_LOCK(sc); - ifr->ifr_data = (caddr_t) sc->w_sigitems; + ifr_data_get_ptr(ifr) = (caddr_t) sc->w_sigitems; WL_UNLOCK(sc); break; /* read out the wl cache */ case SIOCGWLCACHE: WL_LOCK(sc); size = sc->w_sigitems * sizeof(struct w_sigcache); cpt = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO); if (cpt == NULL) { WL_UNLOCK(sc); return (ENOMEM); } bcopy(sc->w_sigcache, cpt, size); WL_UNLOCK(sc); - error = copyout(cpt, ifr->ifr_data, size); + error = copyout(cpt, ifr_data_get_ptr(ifr), size); free(cpt, M_DEVBUF); break; #endif default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } /* * wlwatchdog(): * * Called if the timer set in wlstart expires before an interrupt is received * from the wavelan. It seems to lose interrupts sometimes. * The watchdog routine gets called if the transmitter failed to interrupt * * input : which board is timing out * output : board reset * */ static void wlwatchdog(void *vsc) { struct wl_softc *sc = vsc; log(LOG_ERR, "%s: wavelan device timeout on xmit\n", sc->ifp->if_xname); if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1); wlinit_locked(sc); } /* * wlintr: * * This function is the interrupt handler for the WaveLAN * board. This routine will be called whenever either a packet * is received, or a packet has successfully been transferred and * the unit is ready to transmit another packet. * * input : board number that interrupted * output : either a packet is received, or a packet is transferred * */ static void wlintr(void *arg) { struct wl_softc *sc = (struct wl_softc *)arg; int ac_status; u_short int_type, int_type1; WL_LOCK(sc); #ifdef WLDEBUG if (sc->ifp->if_flags & IFF_DEBUG) if_printf(sc->ifp, "wlintr() called\n"); #endif if ((int_type = WL_READ_2(sc, HASR)) & HASR_MMC_INTR) { /* handle interrupt from the modem management controller */ /* This will clear the interrupt condition */ (void) wlmmcread(sc, MMC_DCE_STATUS); /* ignored for now */ } if (!(int_type & HASR_INTR)){ /* return if no interrupt from 82586 */ /* commented out. jrb. it happens when reinit occurs printf("wlintr: int_type %x, dump follows\n", int_type); wldump(sc); */ WL_UNLOCK(sc); return; } if (gathersnr) getsnr(sc); for (;;) { WL_WRITE_2(sc, PIOR0, OFFSET_SCB + 0); /* get scb status */ int_type = (WL_READ_2(sc, PIOP0) & SCB_SW_INT); if (int_type == 0) /* no interrupts left */ break; int_type1 = wlack(sc); /* acknowledge interrupt(s) */ /* make sure no bits disappeared (others may appear) */ if ((int_type & int_type1) != int_type) printf("wlack() int bits disappeared : %04x != int_type %04x\n", int_type1, int_type); int_type = int_type1; /* go with the new status */ /* * incoming packet */ if (int_type & SCB_SW_FR) { if_inc_counter(sc->ifp, IFCOUNTER_IPACKETS, 1); wlrcv(sc); } /* * receiver not ready */ if (int_type & SCB_SW_RNR) { if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1); #ifdef WLDEBUG if (sc->ifp->if_flags & IFF_DEBUG) if_printf(sc->ifp, "intr(): receiver overrun! begin_fd = %x\n", sc->begin_fd); #endif wlrustrt(sc); } /* * CU not ready */ if (int_type & SCB_SW_CNA) { /* * At present, we don't care about CNA's. We * believe they are a side effect of XMT. */ } if (int_type & SCB_SW_CX) { /* * At present, we only request Interrupt for * XMT. */ WL_WRITE_2(sc, PIOR1, OFFSET_CU); /* get command status */ ac_status = WL_READ_2(sc, PIOP1); if (xmt_watch) { /* report some anomalies */ if (sc->tbusy == 0) { if_printf(sc->ifp, "xmt intr but not busy, CU %04x\n", ac_status); } if (ac_status == 0) { if_printf(sc->ifp, "xmt intr but ac_status == 0\n"); } if (ac_status & AC_SW_A) { if_printf(sc->ifp, "xmt aborted\n"); } #ifdef notdef if (ac_status & TC_CARRIER) { if_printf(sc->ifp, "no carrier\n"); } #endif /* notdef */ if (ac_status & TC_CLS) { if_printf(sc->ifp, "no CTS\n"); } if (ac_status & TC_DMA) { if_printf(sc->ifp, "DMA underrun\n"); } if (ac_status & TC_DEFER) { if_printf(sc->ifp, "xmt deferred\n"); } if (ac_status & TC_SQE) { if_printf(sc->ifp, "heart beat\n"); } if (ac_status & TC_COLLISION) { if_printf(sc->ifp, "too many collisions\n"); } } /* if the transmit actually failed, or returned some status */ if ((!(ac_status & AC_SW_OK)) || (ac_status & 0xfff)) { if (ac_status & (TC_COLLISION | TC_CLS | TC_DMA)) { if_inc_counter(sc->ifp, IFCOUNTER_OERRORS, 1); } /* count collisions */ if_inc_counter(sc->ifp, IFCOUNTER_COLLISIONS, (ac_status & 0xf)); /* if TC_COLLISION set and collision count zero, 16 collisions */ if ((ac_status & 0x20) == 0x20) { if_inc_counter(sc->ifp, IFCOUNTER_COLLISIONS, 0x10); } } sc->tbusy = 0; callout_stop(&sc->watchdog_timer); sc->ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; wlstart_locked(sc->ifp); } } WL_UNLOCK(sc); return; } /* * wlrcv: * * This routine is called by the interrupt handler to initiate a * packet transfer from the board to the "if" layer above this * driver. This routine checks if a buffer has been successfully * received by the WaveLAN. If so, the routine wlread is called * to do the actual transfer of the board data (including the * ethernet header) into a packet (consisting of an mbuf chain). * * input : number of the board to check * output : if a packet is available, it is "sent up" * */ static void wlrcv(struct wl_softc *sc) { u_short fd_p, status, offset, link_offset; #ifdef WLDEBUG if (sc->ifp->if_flags & IFF_DEBUG) if_printf(sc->ifp, "entered wlrcv()\n"); #endif for (fd_p = sc->begin_fd; fd_p != I82586NULL; fd_p = sc->begin_fd) { WL_WRITE_2(sc, PIOR0, fd_p + 0); /* address of status */ status = WL_READ_2(sc, PIOP0); WL_WRITE_2(sc, PIOR1, fd_p + 4); /* address of link_offset */ link_offset = WL_READ_2(sc, PIOP1); offset = WL_READ_2(sc, PIOP1); /* rbd_offset */ if (status == 0xffff || offset == 0xffff /*I82586NULL*/) { if (wlhwrst(sc) != TRUE) if_printf(sc->ifp, "rcv(): hwrst ffff trouble.\n"); return; } else if (status & AC_SW_C) { if (status == (RFD_DONE|RFD_RSC)) { /* lost one */ #ifdef WLDEBUG if (sc->ifp->if_flags & IFF_DEBUG) if_printf(sc->ifp, "RCV: RSC %x\n", status); #endif if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1); } else if (!(status & RFD_OK)) { if_printf(sc->ifp, "RCV: !OK %x\n", status); if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1); } else if (status & 0xfff) { /* can't happen */ if_printf(sc->ifp, "RCV: ERRs %x\n", status); if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1); } else if (!wlread(sc, fd_p)) return; if (!wlrequeue(sc, fd_p)) { /* abort on chain error */ if (wlhwrst(sc) != TRUE) if_printf(sc->ifp, "rcv(): hwrst trouble.\n"); return; } sc->begin_fd = link_offset; } else { break; } } return; } /* * wlrequeue: * * This routine puts rbd's used in the last receive back onto the * free list for the next receive. * */ static int wlrequeue(struct wl_softc *sc, u_short fd_p) { fd_t fd; u_short l_rbdp, f_rbdp, rbd_offset; WL_WRITE_2(sc, PIOR0, fd_p + 6); rbd_offset = WL_READ_2(sc, PIOP0); if ((f_rbdp = rbd_offset) != I82586NULL) { l_rbdp = f_rbdp; for (;;) { WL_WRITE_2(sc, PIOR0, l_rbdp + 0); /* address of status */ if (WL_READ_2(sc, PIOP0) & RBD_SW_EOF) break; WL_WRITE_2(sc, PIOP0, 0); WL_WRITE_2(sc, PIOR0, l_rbdp + 2); /* next_rbd_offset */ if ((l_rbdp = WL_READ_2(sc, PIOP0)) == I82586NULL) break; } WL_WRITE_2(sc, PIOP0, 0); WL_WRITE_2(sc, PIOR0, l_rbdp + 2); /* next_rbd_offset */ WL_WRITE_2(sc, PIOP0, I82586NULL); WL_WRITE_2(sc, PIOR0, l_rbdp + 8); /* address of size */ WL_WRITE_2(sc, PIOP0, WL_READ_2(sc, PIOP0) | AC_CW_EL); WL_WRITE_2(sc, PIOR0, sc->end_rbd + 2); WL_WRITE_2(sc, PIOP0, f_rbdp); /* end_rbd->next_rbd_offset */ WL_WRITE_2(sc, PIOR0, sc->end_rbd + 8); /* size */ WL_WRITE_2(sc, PIOP0, WL_READ_2(sc, PIOP0) & ~AC_CW_EL); sc->end_rbd = l_rbdp; } fd.status = 0; fd.command = AC_CW_EL; fd.link_offset = I82586NULL; fd.rbd_offset = I82586NULL; WL_WRITE_2(sc, PIOR1, fd_p); WL_WRITE_MULTI_2(sc, PIOP1, &fd, 8/2); WL_WRITE_2(sc, PIOR1, sc->end_fd + 2); /* addr of command */ WL_WRITE_2(sc, PIOP1, 0); /* command = 0 */ WL_WRITE_2(sc, PIOP1, fd_p); /* end_fd->link_offset = fd_p */ sc->end_fd = fd_p; return 1; } #ifdef WLDEBUG static int xmt_debug = 0; #endif /* WLDEBUG */ /* * wlxmt: * * This routine fills in the appropriate registers and memory * locations on the WaveLAN board and starts the board off on * the transmit. * * input : pointers to board of interest's softc and the mbuf * output : board memory and registers are set for xfer and attention * */ static void wlxmt(struct wl_softc *sc, struct mbuf *m) { u_short xmtdata_p = OFFSET_TBUF; u_short xmtshort_p; struct mbuf *tm_p = m; struct ether_header *eh_p = mtod(m, struct ether_header *); u_char *mb_p = mtod(m, u_char *) + sizeof(struct ether_header); u_short count = m->m_len - sizeof(struct ether_header); ac_t cb; u_short tbd_p = OFFSET_TBD; u_short len, clen = 0; int spin; #ifdef WLDEBUG if (sc->ifp->if_flags & IFF_DEBUG) if_printf(sc->ifp, "entered wlxmt()\n"); #endif cb.ac_status = 0; cb.ac_command = (AC_CW_EL|AC_TRANSMIT|AC_CW_I); cb.ac_link_offset = I82586NULL; WL_WRITE_2(sc, PIOR1, OFFSET_CU); WL_WRITE_MULTI_2(sc, PIOP1, &cb, 6/2); WL_WRITE_2(sc, PIOP1, OFFSET_TBD); /* cb.cmd.transmit.tbd_offset */ WL_WRITE_MULTI_2(sc, PIOP1, eh_p->ether_dhost, WAVELAN_ADDR_SIZE/2); WL_WRITE_2(sc, PIOP1, eh_p->ether_type); #ifdef WLDEBUG if (sc->ifp->if_flags & IFF_DEBUG) { if (xmt_debug) { printf("XMT mbuf: L%d @%p ", count, (void *)mb_p); printf("ether type %x\n", eh_p->ether_type); } } #endif /* WLDEBUG */ WL_WRITE_2(sc, PIOR0, OFFSET_TBD); WL_WRITE_2(sc, PIOP0, 0); /* act_count */ WL_WRITE_2(sc, PIOR1, OFFSET_TBD + 4); WL_WRITE_2(sc, PIOP1, xmtdata_p); /* buffer_addr */ WL_WRITE_2(sc, PIOP1, 0); /* buffer_base */ for (;;) { if (count) { if (clen + count > WAVELAN_MTU) break; if (count & 1) len = count + 1; else len = count; WL_WRITE_2(sc, PIOR1, xmtdata_p); WL_WRITE_MULTI_2(sc, PIOP1, mb_p, len/2); clen += count; WL_WRITE_2(sc, PIOR0, tbd_p); /* address of act_count */ WL_WRITE_2(sc, PIOP0, WL_READ_2(sc, PIOP0) + count); xmtdata_p += len; if ((tm_p = tm_p->m_next) == (struct mbuf *)0) break; if (count & 1) { /* go to the next descriptor */ WL_WRITE_2(sc, PIOR0, tbd_p + 2); tbd_p += sizeof (tbd_t); WL_WRITE_2(sc, PIOP0, tbd_p); /* next_tbd_offset */ WL_WRITE_2(sc, PIOR0, tbd_p); WL_WRITE_2(sc, PIOP0, 0); /* act_count */ WL_WRITE_2(sc, PIOR1, tbd_p + 4); WL_WRITE_2(sc, PIOP1, xmtdata_p); /* buffer_addr */ WL_WRITE_2(sc, PIOP1, 0); /* buffer_base */ /* at the end -> coallesce remaining mbufs */ if (tbd_p == OFFSET_TBD + (N_TBD-1) * sizeof (tbd_t)) { wlsftwsleaze(&count, &mb_p, &tm_p, sc); continue; } /* next mbuf short -> coallesce as needed */ if ( (tm_p->m_next == (struct mbuf *) 0) || #define HDW_THRESHOLD 55 tm_p->m_len > HDW_THRESHOLD) /* ok */; else { wlhdwsleaze(&count, &mb_p, &tm_p, sc); continue; } } } else if ((tm_p = tm_p->m_next) == (struct mbuf *)0) break; count = tm_p->m_len; mb_p = mtod(tm_p, u_char *); #ifdef WLDEBUG if (sc->ifp->if_flags & IFF_DEBUG) if (xmt_debug) printf("mbuf+ L%d @%p ", count, (void *)mb_p); #endif /* WLDEBUG */ } #ifdef WLDEBUG if (sc->ifp->if_flags & IFF_DEBUG) if (xmt_debug) printf("CLEN = %d\n", clen); #endif /* WLDEBUG */ WL_WRITE_2(sc, PIOR0, tbd_p); if (clen < ETHERMIN) { WL_WRITE_2(sc, PIOP0, WL_READ_2(sc, PIOP0) + ETHERMIN - clen); WL_WRITE_2(sc, PIOR1, xmtdata_p); for (xmtshort_p = xmtdata_p; clen < ETHERMIN; clen += 2) WL_WRITE_2(sc, PIOP1, 0); } WL_WRITE_2(sc, PIOP0, WL_READ_2(sc, PIOP0) | TBD_SW_EOF); WL_WRITE_2(sc, PIOR0, tbd_p + 2); WL_WRITE_2(sc, PIOP0, I82586NULL); #ifdef WLDEBUG if (sc->ifp->if_flags & IFF_DEBUG) { if (xmt_debug) { wltbd(sc); printf("\n"); } } #endif /* WLDEBUG */ WL_WRITE_2(sc, PIOR0, OFFSET_SCB + 2); /* address of scb_command */ /* * wait for 586 to clear previous command, complain if it takes * too long */ for (spin = 1;;spin = (spin + 1) % 10000) { if (WL_READ_2(sc, PIOP0) == 0) { /* it's done, we can go */ break; } if ((spin == 0) && xmt_watch) { /* not waking up, and we care */ if_printf(sc->ifp, "slow accepting xmit\n"); } } WL_WRITE_2(sc, PIOP0, SCB_CU_STRT); /* new command */ SET_CHAN_ATTN(sc); m_freem(m); /* XXX * Pause to avoid transmit overrun problems. * The required delay tends to vary with platform type, and may be * related to interrupt loss. */ if (wl_xmit_delay) { DELAY(wl_xmit_delay); } return; } /* * wlbldru: * * This function builds the linear linked lists of fd's and * rbd's. Based on page 4-32 of 1986 Intel microcom handbook. * */ static u_short wlbldru(struct wl_softc *sc) { fd_t fd; rbd_t rbd; u_short fd_p = OFFSET_RU; u_short rbd_p = OFFSET_RBD; int i; sc->begin_fd = fd_p; for (i = 0; i < N_FD; i++) { fd.status = 0; fd.command = 0; fd.link_offset = fd_p + sizeof(fd_t); fd.rbd_offset = I82586NULL; WL_WRITE_2(sc, PIOR1, fd_p); WL_WRITE_MULTI_2(sc, PIOP1, &fd, 8/2); fd_p = fd.link_offset; } fd_p -= sizeof(fd_t); sc->end_fd = fd_p; WL_WRITE_2(sc, PIOR1, fd_p + 2); WL_WRITE_2(sc, PIOP1, AC_CW_EL); /* command */ WL_WRITE_2(sc, PIOP1, I82586NULL); /* link_offset */ fd_p = OFFSET_RU; WL_WRITE_2(sc, PIOR0, fd_p + 6); /* address of rbd_offset */ WL_WRITE_2(sc, PIOP0, rbd_p); WL_WRITE_2(sc, PIOR1, rbd_p); for (i = 0; i < N_RBD; i++) { rbd.status = 0; rbd.buffer_addr = rbd_p + sizeof(rbd_t) + 2; rbd.buffer_base = 0; rbd.size = RCVBUFSIZE; if (i != N_RBD-1) { rbd_p += sizeof(ru_t); rbd.next_rbd_offset = rbd_p; } else { rbd.next_rbd_offset = I82586NULL; rbd.size |= AC_CW_EL; sc->end_rbd = rbd_p; } WL_WRITE_MULTI_2(sc, PIOP1, &rbd, sizeof(rbd_t)/2); WL_WRITE_2(sc, PIOR1, rbd_p); } return sc->begin_fd; } /* * wlrustrt: * * This routine starts the receive unit running. First checks if the * board is actually ready, then the board is instructed to receive * packets again. * */ static void wlrustrt(struct wl_softc *sc) { u_short rfa; #ifdef WLDEBUG if (sc->ifp->if_flags & IFF_DEBUG) if_printf(sc->ifp, "entered wlrustrt()\n"); #endif WL_WRITE_2(sc, PIOR0, OFFSET_SCB); if (WL_READ_2(sc, PIOP0) & SCB_RUS_READY){ printf("wlrustrt: RUS_READY\n"); return; } WL_WRITE_2(sc, PIOR0, OFFSET_SCB + 2); WL_WRITE_2(sc, PIOP0, SCB_RU_STRT); /* command */ rfa = wlbldru(sc); WL_WRITE_2(sc, PIOR0, OFFSET_SCB + 6); /* address of scb_rfa_offset */ WL_WRITE_2(sc, PIOP0, rfa); SET_CHAN_ATTN(sc); return; } /* * wldiag: * * This routine does a 586 op-code number 7, and obtains the * diagnose status for the WaveLAN. * */ static int wldiag(struct wl_softc *sc) { short status; #ifdef WLDEBUG if (sc->ifp->if_flags & IFF_DEBUG) if_printf(sc->ifp, "entered wldiag()\n"); #endif WL_WRITE_2(sc, PIOR0, OFFSET_SCB); status = WL_READ_2(sc, PIOP0); if (status & SCB_SW_INT) { /* state is 2000 which seems ok if_printf(sc->ifp, "diag(): unexpected initial state %\n", WL_READ_2(sc, PIOP0)); */ wlack(sc); } WL_WRITE_2(sc, PIOR1, OFFSET_CU); WL_WRITE_2(sc, PIOP1, 0); /* ac_status */ WL_WRITE_2(sc, PIOP1, AC_DIAGNOSE|AC_CW_EL);/* ac_command */ if (wlcmd(sc, "diag()") == 0) return 0; WL_WRITE_2(sc, PIOR0, OFFSET_CU); if (WL_READ_2(sc, PIOP0) & 0x0800) { if_printf(sc->ifp, "i82586 Self Test failed!\n"); return 0; } return TRUE; } /* * wlconfig: * * This routine does a standard config of the WaveLAN board. * */ static int wlconfig(struct wl_softc *sc) { configure_t configure; #if MULTICAST struct ifmultiaddr *ifma; u_char *addrp; int cnt = 0; #endif /* MULTICAST */ #ifdef WLDEBUG if (sc->ifp->if_flags & IFF_DEBUG) if_printf(sc->ifp, "entered wlconfig()\n"); #endif WL_WRITE_2(sc, PIOR0, OFFSET_SCB); if (WL_READ_2(sc, PIOP0) & SCB_SW_INT) { /* if_printf(sc->ifp, "config(): unexpected initial state %x\n", WL_READ_2(sc, PIOP0)); */ } wlack(sc); WL_WRITE_2(sc, PIOR1, OFFSET_CU); WL_WRITE_2(sc, PIOP1, 0); /* ac_status */ WL_WRITE_2(sc, PIOP1, AC_CONFIGURE|AC_CW_EL); /* ac_command */ /* jrb hack */ configure.fifolim_bytecnt = 0x080c; configure.addrlen_mode = 0x0600; configure.linprio_interframe = 0x2060; configure.slot_time = 0xf200; configure.hardware = 0x0008; /* tx even w/o CD */ configure.min_frame_len = 0x0040; #if 0 /* This is the configuration block suggested by Marc Meertens * in an e-mail message to John * Ioannidis on 10 Nov 92. */ configure.fifolim_bytecnt = 0x040c; configure.addrlen_mode = 0x0600; configure.linprio_interframe = 0x2060; configure.slot_time = 0xf000; configure.hardware = 0x0008; /* tx even w/o CD */ configure.min_frame_len = 0x0040; #else /* * below is the default board configuration from p2-28 from 586 book */ configure.fifolim_bytecnt = 0x080c; configure.addrlen_mode = 0x2600; configure.linprio_interframe = 0x7820; /* IFS=120, ACS=2 */ configure.slot_time = 0xf00c; /* slottime=12 */ configure.hardware = 0x0008; /* tx even w/o CD */ configure.min_frame_len = 0x0040; #endif if (sc->mode & (MOD_PROM | MOD_ENAL)) configure.hardware |= 1; WL_WRITE_2(sc, PIOR1, OFFSET_CU + 6); WL_WRITE_MULTI_2(sc, PIOP1, &configure, sizeof(configure_t)/2); if (wlcmd(sc, "config()-configure") == 0) return 0; #if MULTICAST WL_WRITE_2(sc, PIOR1, OFFSET_CU); WL_WRITE_2(sc, PIOP1, 0); /* ac_status */ WL_WRITE_2(sc, PIOP1, AC_MCSETUP|AC_CW_EL); /* ac_command */ WL_WRITE_2(sc, PIOR1, OFFSET_CU + 8); if_maddr_rlock(sc->ifp); TAILQ_FOREACH(ifma, &sc->ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; addrp = LLADDR((struct sockaddr_dl *)ifma->ifma_addr); WL_WRITE_2(sc, PIOP1, addrp[0] + (addrp[1] << 8)); WL_WRITE_2(sc, PIOP1, addrp[2] + (addrp[3] << 8)); WL_WRITE_2(sc, PIOP1, addrp[4] + (addrp[5] << 8)); ++cnt; } if_maddr_runlock(sc->ifp); WL_WRITE_2(sc, PIOR1, OFFSET_CU + 6); /* mc-cnt */ WL_WRITE_2(sc, PIOP1, cnt * WAVELAN_ADDR_SIZE); if (wlcmd(sc, "config()-mcaddress") == 0) return 0; #endif /* MULTICAST */ WL_WRITE_2(sc, PIOR1, OFFSET_CU); WL_WRITE_2(sc, PIOP1, 0); /* ac_status */ WL_WRITE_2(sc, PIOP1, AC_IASETUP|AC_CW_EL); /* ac_command */ WL_WRITE_2(sc, PIOR1, OFFSET_CU + 6); WL_WRITE_MULTI_2(sc, PIOP1, IF_LLADDR(sc->ifp), WAVELAN_ADDR_SIZE/2); if (wlcmd(sc, "config()-address") == 0) return(0); wlinitmmc(sc); return(1); } /* * wlcmd: * * Set channel attention bit and busy wait until command has * completed. Then acknowledge the command completion. */ static int wlcmd(struct wl_softc *sc, char *str) { int i; WL_WRITE_2(sc, PIOR0, OFFSET_SCB + 2); /* address of scb_command */ WL_WRITE_2(sc, PIOP0, SCB_CU_STRT); SET_CHAN_ATTN(sc); WL_WRITE_2(sc, PIOR0, OFFSET_CU); for (i = 0; i < 0xffff; i++) if (WL_READ_2(sc, PIOP0) & AC_SW_C) break; if (i == 0xffff || !(WL_READ_2(sc, PIOP0) & AC_SW_OK)) { if_printf(sc->ifp, "%s failed; status = %d, inw = %x, outw = %x\n", str, WL_READ_2(sc, PIOP0) & AC_SW_OK, WL_READ_2(sc, PIOP0), WL_READ_2(sc, PIOR0)); WL_WRITE_2(sc, PIOR0, OFFSET_SCB); printf("scb_status %x\n", WL_READ_2(sc, PIOP0)); WL_WRITE_2(sc, PIOR0, OFFSET_SCB+2); printf("scb_command %x\n", WL_READ_2(sc, PIOP0)); WL_WRITE_2(sc, PIOR0, OFFSET_SCB+4); printf("scb_cbl %x\n", WL_READ_2(sc, PIOP0)); WL_WRITE_2(sc, PIOR0, OFFSET_CU+2); printf("cu_cmd %x\n", WL_READ_2(sc, PIOP0)); return(0); } WL_WRITE_2(sc, PIOR0, OFFSET_SCB); if ((WL_READ_2(sc, PIOP0) & SCB_SW_INT) && (WL_READ_2(sc, PIOP0) != SCB_SW_CNA)) { /* if_printf(sc->ifp, "%s: unexpected final state %x\n", str, WL_READ_2(sc, PIOP0)); */ } wlack(sc); return(TRUE); } /* * wlack: if the 82596 wants attention because it has finished * sending or receiving a packet, acknowledge its desire and * return bits indicating the kind of attention. wlack() returns * these bits so that the caller can service exactly the * conditions that wlack() acknowledged. */ static int wlack(struct wl_softc *sc) { int i; u_short cmd; WL_WRITE_2(sc, PIOR1, OFFSET_SCB); if (!(cmd = (WL_READ_2(sc, PIOP1) & SCB_SW_INT))) return(0); #ifdef WLDEBUG if (sc->ifp->if_flags & IFF_DEBUG) if_printf(sc->ifp, "doing a wlack()\n"); #endif WL_WRITE_2(sc, PIOP1, cmd); SET_CHAN_ATTN(sc); WL_WRITE_2(sc, PIOR0, OFFSET_SCB + 2); /* address of scb_command */ for (i = 1000000; WL_READ_2(sc, PIOP0) && (i-- > 0); ) continue; if (i < 1) if_printf(sc->ifp, "wlack(): board not accepting command.\n"); return(cmd); } #ifdef WLDEBUG static void wltbd(struct wl_softc *sc) { u_short tbd_p = OFFSET_TBD; tbd_t tbd; int i = 0; int sum = 0; for (;;) { WL_WRITE_2(sc, PIOR1, tbd_p); WL_READ_MULTI_2(sc, PIOP1, &tbd, sizeof(tbd_t)/2); sum += (tbd.act_count & ~TBD_SW_EOF); printf("%d: addr %x, count %d (%d), next %x, base %x\n", i++, tbd.buffer_addr, (tbd.act_count & ~TBD_SW_EOF), sum, tbd.next_tbd_offset, tbd.buffer_base); if (tbd.act_count & TBD_SW_EOF) break; tbd_p = tbd.next_tbd_offset; } } #endif static void wlhdwsleaze(u_short *countp, u_char **mb_pp, struct mbuf **tm_pp, struct wl_softc *sc) { struct mbuf *tm_p = *tm_pp; u_char *mb_p = *mb_pp; u_short count = 0; u_char *cp; int len; /* * can we get a run that will be coallesced or * that terminates before breaking */ do { count += tm_p->m_len; if (tm_p->m_len & 1) break; } while ((tm_p = tm_p->m_next) != (struct mbuf *)0); if ( (tm_p == (struct mbuf *)0) || count > HDW_THRESHOLD) { *countp = (*tm_pp)->m_len; *mb_pp = mtod((*tm_pp), u_char *); return; } /* we need to copy */ tm_p = *tm_pp; mb_p = *mb_pp; count = 0; cp = (u_char *) t_packet; for (;;) { bcopy(mtod(tm_p, u_char *), cp, len = tm_p->m_len); count += len; if (count > HDW_THRESHOLD) break; cp += len; if (tm_p->m_next == (struct mbuf *)0) break; tm_p = tm_p->m_next; } *countp = count; *mb_pp = (u_char *) t_packet; *tm_pp = tm_p; return; } static void wlsftwsleaze(u_short *countp, u_char **mb_pp, struct mbuf **tm_pp, struct wl_softc *sc) { struct mbuf *tm_p = *tm_pp; u_short count = 0; u_char *cp = (u_char *) t_packet; int len; /* we need to copy */ for (;;) { bcopy(mtod(tm_p, u_char *), cp, len = tm_p->m_len); count += len; cp += len; if (tm_p->m_next == (struct mbuf *)0) break; tm_p = tm_p->m_next; } *countp = count; *mb_pp = (u_char *) t_packet; *tm_pp = tm_p; return; } static void wlmmcstat(struct wl_softc *sc) { u_short tmp; device_printf(sc->dev, "DCE_STATUS: 0x%x, ", wlmmcread(sc, MMC_DCE_STATUS) & 0x0f); tmp = wlmmcread(sc, MMC_CORRECT_NWID_H) << 8; tmp |= wlmmcread(sc, MMC_CORRECT_NWID_L); printf("Correct NWID's: %d, ", tmp); tmp = wlmmcread(sc, MMC_WRONG_NWID_H) << 8; tmp |= wlmmcread(sc, MMC_WRONG_NWID_L); printf("Wrong NWID's: %d\n", tmp); printf("THR_PRE_SET: 0x%x, ", wlmmcread(sc, MMC_THR_PRE_SET)); printf("SIGNAL_LVL: %d, SILENCE_LVL: %d\n", wlmmcread(sc, MMC_SIGNAL_LVL), wlmmcread(sc, MMC_SILENCE_LVL)); printf("SIGN_QUAL: 0x%x, NETW_ID: %x:%x, DES: %d\n", wlmmcread(sc, MMC_SIGN_QUAL), wlmmcread(sc, MMC_NETW_ID_H), wlmmcread(sc, MMC_NETW_ID_L), wlmmcread(sc, MMC_DES_AVAIL)); } static u_short wlmmcread(struct wl_softc *sc, u_short reg) { while (WL_READ_2(sc, HASR) & HASR_MMC_BUSY) continue; WL_WRITE_2(sc, MMCR,reg << 1); while (WL_READ_2(sc, HASR) & HASR_MMC_BUSY) continue; return (u_short)WL_READ_2(sc, MMCR) >> 8; } static void getsnr(struct wl_softc *sc) { MMC_WRITE(MMC_FREEZE,1); /* * SNR retrieval procedure : * * read signal level : wlmmcread(sc, MMC_SIGNAL_LVL); * read silence level : wlmmcread(sc, MMC_SILENCE_LVL); */ MMC_WRITE(MMC_FREEZE,0); /* * SNR is signal:silence ratio. */ } /* ** wlgetpsa ** ** Reads the psa for the wavelan at (sc) into (buf) */ static void wlgetpsa(struct wl_softc *sc, u_char *buf) { int i; PCMD(sc, HACR_DEFAULT & ~HACR_16BITS); PCMD(sc, HACR_DEFAULT & ~HACR_16BITS); for (i = 0; i < 0x40; i++) { WL_WRITE_2(sc, PIOR2, i); buf[i] = WL_READ_1(sc, PIOP2); } PCMD(sc, HACR_DEFAULT); PCMD(sc, HACR_DEFAULT); } /* ** wlsetpsa ** ** Writes the psa for wavelan (unit) from the softc back to the ** board. Updates the CRC and sets the CRC OK flag. ** ** Do not call this when the board is operating, as it doesn't ** preserve the hacr. */ static void wlsetpsa(struct wl_softc *sc) { int i; u_short crc; crc = wlpsacrc(sc->psa); /* calculate CRC of PSA */ sc->psa[WLPSA_CRCLOW] = crc & 0xff; sc->psa[WLPSA_CRCHIGH] = (crc >> 8) & 0xff; sc->psa[WLPSA_CRCOK] = 0x55; /* default to 'bad' until programming complete */ PCMD(sc, HACR_DEFAULT & ~HACR_16BITS); PCMD(sc, HACR_DEFAULT & ~HACR_16BITS); for (i = 0; i < 0x40; i++) { DELAY(DELAYCONST); WL_WRITE_2(sc, PIOR2, i); /* write param memory */ DELAY(DELAYCONST); WL_WRITE_1(sc, PIOP2, sc->psa[i]); } DELAY(DELAYCONST); WL_WRITE_2(sc, PIOR2, WLPSA_CRCOK); /* update CRC flag*/ DELAY(DELAYCONST); sc->psa[WLPSA_CRCOK] = 0xaa; /* OK now */ WL_WRITE_1(sc, PIOP2, 0xaa); /* all OK */ DELAY(DELAYCONST); PCMD(sc, HACR_DEFAULT); PCMD(sc, HACR_DEFAULT); } /* ** CRC routine provided by Christopher Giordano , ** from original code by Tomi Mikkonen (tomitm@remedy.fi) */ static u_int crc16_table[16] = { 0x0000, 0xCC01, 0xD801, 0x1400, 0xF001, 0x3C00, 0x2800, 0xE401, 0xA001, 0x6C00, 0x7800, 0xB401, 0x5000, 0x9C01, 0x8801, 0x4400 }; static u_short wlpsacrc(u_char *buf) { u_short crc = 0; int i, r1; for (i = 0; i < 0x3d; i++, buf++) { /* lower 4 bits */ r1 = crc16_table[crc & 0xF]; crc = (crc >> 4) & 0x0FFF; crc = crc ^ r1 ^ crc16_table[*buf & 0xF]; /* upper 4 bits */ r1 = crc16_table[crc & 0xF]; crc = (crc >> 4) & 0x0FFF; crc = crc ^ r1 ^ crc16_table[(*buf >> 4) & 0xF]; } return(crc); } #ifdef WLCACHE /* * wl_cache_store * * take input packet and cache various radio hw characteristics * indexed by MAC address. * * Some things to think about: * note that no space is malloced. * We might hash the mac address if the cache were bigger. * It is not clear that the cache is big enough. * It is also not clear how big it should be. * The cache is IP-specific. We don't care about that as * we want it to be IP-specific. * The last N recv. packets are saved. This will tend * to reward agents and mobile hosts that beacon. * That is probably fine for mobile ip. */ /* globals for wavelan signal strength cache */ /* this should go into softc structure above. */ /* set true if you want to limit cache items to broadcast/mcast * only packets (not unicast) */ static int wl_cache_mcastonly = 1; SYSCTL_INT(_machdep, OID_AUTO, wl_cache_mcastonly, CTLFLAG_RW, &wl_cache_mcastonly, 0, ""); /* set true if you want to limit cache items to IP packets only */ static int wl_cache_iponly = 1; SYSCTL_INT(_machdep, OID_AUTO, wl_cache_iponly, CTLFLAG_RW, &wl_cache_iponly, 0, ""); /* zero out the cache */ static void wl_cache_zero(struct wl_softc *sc) { bzero(&sc->w_sigcache[0], sizeof(struct w_sigcache) * MAXCACHEITEMS); sc->w_sigitems = 0; sc->w_nextcache = 0; sc->w_wrapindex = 0; } /* store hw signal info in cache. * index is MAC address, but an ip src gets stored too * There are two filters here controllable via sysctl: * throw out unicast (on by default, but can be turned off) * throw out non-ip (on by default, but can be turned off) */ static void wl_cache_store (struct wl_softc *sc, struct ether_header *eh, struct mbuf *m) { #ifdef INET struct ip *ip = NULL; /* Avoid GCC warning */ int i; int signal, silence; int w_insertcache; /* computed index for cache entry storage */ int ipflag = wl_cache_iponly; #endif /* filters: * 1. ip only * 2. configurable filter to throw out unicast packets, * keep multicast only. */ #ifdef INET /* reject if not IP packet */ if ( wl_cache_iponly && (ntohs(eh->ether_type) != 0x800)) { return; } /* check if broadcast or multicast packet. we toss * unicast packets */ if (wl_cache_mcastonly && ((eh->ether_dhost[0] & 1) == 0)) { return; } /* find the ip header. we want to store the ip_src * address. use the mtod macro(in mbuf.h) * to typecast m to struct ip * */ if (ipflag) { ip = mtod(m, struct ip *); } /* do a linear search for a matching MAC address * in the cache table * . MAC address is 6 bytes, * . var w_nextcache holds total number of entries already cached */ for (i = 0; i < sc->w_nextcache; i++) { if (! bcmp(eh->ether_shost, sc->w_sigcache[i].macsrc, 6 )) { /* Match!, * so we already have this entry, * update the data, and LRU age */ break; } } /* did we find a matching mac address? * if yes, then overwrite a previously existing cache entry */ if (i < sc->w_nextcache ) { w_insertcache = i; } /* else, have a new address entry,so * add this new entry, * if table full, then we need to replace entry */ else { /* check for space in cache table * note: w_nextcache also holds number of entries * added in the cache table */ if ( sc->w_nextcache < MAXCACHEITEMS ) { w_insertcache = sc->w_nextcache; sc->w_nextcache++; sc->w_sigitems = sc->w_nextcache; } /* no space found, so simply wrap with wrap index * and "zap" the next entry */ else { if (sc->w_wrapindex == MAXCACHEITEMS) { sc->w_wrapindex = 0; } w_insertcache = sc->w_wrapindex++; } } /* invariant: w_insertcache now points at some slot * in cache. */ if (w_insertcache < 0 || w_insertcache >= MAXCACHEITEMS) { log(LOG_ERR, "wl_cache_store, bad index: %d of [0..%d], gross cache error\n", w_insertcache, MAXCACHEITEMS); return; } /* store items in cache * .ipsrc * .macsrc * .signal (0..63) ,silence (0..63) ,quality (0..15) */ if (ipflag) { sc->w_sigcache[w_insertcache].ipsrc = ip->ip_src.s_addr; } bcopy( eh->ether_shost, sc->w_sigcache[w_insertcache].macsrc, 6); signal = sc->w_sigcache[w_insertcache].signal = wlmmcread(sc, MMC_SIGNAL_LVL) & 0x3f; silence = sc->w_sigcache[w_insertcache].silence = wlmmcread(sc, MMC_SILENCE_LVL) & 0x3f; sc->w_sigcache[w_insertcache].quality = wlmmcread(sc, MMC_SIGN_QUAL) & 0x0f; if (signal > 0) sc->w_sigcache[w_insertcache].snr = signal - silence; else sc->w_sigcache[w_insertcache].snr = 0; #endif /* INET */ } #endif /* WLCACHE */ Index: stable/11/sys/net/if.c =================================================================== --- stable/11/sys/net/if.c (revision 332287) +++ stable/11/sys/net/if.c (revision 332288) @@ -1,4302 +1,4317 @@ /*- * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if.c 8.5 (Berkeley) 1/9/95 * $FreeBSD$ */ #include "opt_compat.h" #include "opt_inet6.h" #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #include #include #ifdef INET #include #endif /* INET */ #ifdef INET6 #include #include #endif /* INET6 */ #endif /* INET || INET6 */ #include #ifdef COMPAT_FREEBSD32 #include #include struct ifreq_buffer32 { uint32_t length; /* (size_t) */ uint32_t buffer; /* (void *) */ }; /* * Interface request structure used for socket * ioctl's. All interface ioctl's must have parameter * definitions which begin with ifr_name. The * remainder may be interface specific. */ struct ifreq32 { char ifr_name[IFNAMSIZ]; /* if name, e.g. "en0" */ union { struct sockaddr ifru_addr; struct sockaddr ifru_dstaddr; struct sockaddr ifru_broadaddr; struct ifreq_buffer32 ifru_buffer; short ifru_flags[2]; short ifru_index; int ifru_jid; int ifru_metric; int ifru_mtu; int ifru_phys; int ifru_media; uint32_t ifru_data; int ifru_cap[2]; u_int ifru_fib; u_char ifru_vlan_pcp; } ifr_ifru; }; CTASSERT(sizeof(struct ifreq) == sizeof(struct ifreq32)); CTASSERT(__offsetof(struct ifreq, ifr_ifru) == __offsetof(struct ifreq32, ifr_ifru)); #endif union ifreq_union { struct ifreq ifr; #ifdef COMPAT_FREEBSD32 struct ifreq32 ifr32; #endif }; SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN, &ifqmaxlen, 0, "max send queue size"); /* Log link state change events */ static int log_link_state_change = 1; SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW, &log_link_state_change, 0, "log interface link state change events"); /* Log promiscuous mode change events */ static int log_promisc_mode_change = 1; SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN, &log_promisc_mode_change, 1, "log promiscuous mode change events"); /* Interface description */ static unsigned int ifdescr_maxlen = 1024; SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW, &ifdescr_maxlen, 0, "administrative maximum length for interface description"); static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions"); /* global sx for non-critical path ifdescr */ static struct sx ifdescr_sx; SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr"); void (*bridge_linkstate_p)(struct ifnet *ifp); void (*ng_ether_link_state_p)(struct ifnet *ifp, int state); void (*lagg_linkstate_p)(struct ifnet *ifp, int state); /* These are external hooks for CARP. */ void (*carp_linkstate_p)(struct ifnet *ifp); void (*carp_demote_adj_p)(int, char *); int (*carp_master_p)(struct ifaddr *); #if defined(INET) || defined(INET6) int (*carp_forus_p)(struct ifnet *ifp, u_char *dhost); int (*carp_output_p)(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa); int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *); int (*carp_attach_p)(struct ifaddr *, int); void (*carp_detach_p)(struct ifaddr *); #endif #ifdef INET int (*carp_iamatch_p)(struct ifaddr *, uint8_t **); #endif #ifdef INET6 struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6); caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr); #endif struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL; /* * XXX: Style; these should be sorted alphabetically, and unprototyped * static functions should be prototyped. Currently they are sorted by * declaration order. */ static void if_attachdomain(void *); static void if_attachdomain1(struct ifnet *); static int ifconf(u_long, caddr_t); static void if_freemulti(struct ifmultiaddr *); static void if_grow(void); static void if_input_default(struct ifnet *, struct mbuf *); static int if_requestencap_default(struct ifnet *, struct if_encap_req *); static void if_route(struct ifnet *, int flag, int fam); static int if_setflag(struct ifnet *, int, int, int *, int); static int if_transmit(struct ifnet *ifp, struct mbuf *m); static void if_unroute(struct ifnet *, int flag, int fam); static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *); static int ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *); static int if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int); static void do_link_state_change(void *, int); static int if_getgroup(struct ifgroupreq *, struct ifnet *); static int if_getgroupmembers(struct ifgroupreq *); static void if_delgroups(struct ifnet *); static void if_attach_internal(struct ifnet *, int, struct if_clone *); static int if_detach_internal(struct ifnet *, int, struct if_clone **); #ifdef VIMAGE static void if_vmove(struct ifnet *, struct vnet *); #endif #ifdef INET6 /* * XXX: declare here to avoid to include many inet6 related files.. * should be more generalized? */ extern void nd6_setmtu(struct ifnet *); #endif /* ipsec helper hooks */ VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]); VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]); VNET_DEFINE(int, if_index); int ifqmaxlen = IFQ_MAXLEN; VNET_DEFINE(struct ifnethead, ifnet); /* depend on static init XXX */ VNET_DEFINE(struct ifgrouphead, ifg_head); static VNET_DEFINE(int, if_indexlim) = 8; /* Table of ifnet by index. */ VNET_DEFINE(struct ifnet **, ifindex_table); #define V_if_indexlim VNET(if_indexlim) #define V_ifindex_table VNET(ifindex_table) /* * The global network interface list (V_ifnet) and related state (such as * if_index, if_indexlim, and ifindex_table) are protected by an sxlock and * an rwlock. Either may be acquired shared to stablize the list, but both * must be acquired writable to modify the list. This model allows us to * both stablize the interface list during interrupt thread processing, but * also to stablize it over long-running ioctls, without introducing priority * inversions and deadlocks. */ struct rwlock ifnet_rwlock; RW_SYSINIT_FLAGS(ifnet_rw, &ifnet_rwlock, "ifnet_rw", RW_RECURSE); struct sx ifnet_sxlock; SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE); /* * The allocation of network interfaces is a rather non-atomic affair; we * need to select an index before we are ready to expose the interface for * use, so will use this pointer value to indicate reservation. */ #define IFNET_HOLD (void *)(uintptr_t)(-1) static if_com_alloc_t *if_com_alloc[256]; static if_com_free_t *if_com_free[256]; static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals"); MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); struct ifnet * ifnet_byindex_locked(u_short idx) { if (idx > V_if_index) return (NULL); if (V_ifindex_table[idx] == IFNET_HOLD) return (NULL); return (V_ifindex_table[idx]); } struct ifnet * ifnet_byindex(u_short idx) { struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); ifp = ifnet_byindex_locked(idx); IFNET_RUNLOCK_NOSLEEP(); return (ifp); } struct ifnet * ifnet_byindex_ref(u_short idx) { struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); ifp = ifnet_byindex_locked(idx); if (ifp == NULL || (ifp->if_flags & IFF_DYING)) { IFNET_RUNLOCK_NOSLEEP(); return (NULL); } if_ref(ifp); IFNET_RUNLOCK_NOSLEEP(); return (ifp); } /* * Allocate an ifindex array entry; return 0 on success or an error on * failure. */ static u_short ifindex_alloc(void) { u_short idx; IFNET_WLOCK_ASSERT(); retry: /* * Try to find an empty slot below V_if_index. If we fail, take the * next slot. */ for (idx = 1; idx <= V_if_index; idx++) { if (V_ifindex_table[idx] == NULL) break; } /* Catch if_index overflow. */ if (idx >= V_if_indexlim) { if_grow(); goto retry; } if (idx > V_if_index) V_if_index = idx; return (idx); } static void ifindex_free_locked(u_short idx) { IFNET_WLOCK_ASSERT(); V_ifindex_table[idx] = NULL; while (V_if_index > 0 && V_ifindex_table[V_if_index] == NULL) V_if_index--; } static void ifindex_free(u_short idx) { IFNET_WLOCK(); ifindex_free_locked(idx); IFNET_WUNLOCK(); } static void ifnet_setbyindex_locked(u_short idx, struct ifnet *ifp) { IFNET_WLOCK_ASSERT(); V_ifindex_table[idx] = ifp; } static void ifnet_setbyindex(u_short idx, struct ifnet *ifp) { IFNET_WLOCK(); ifnet_setbyindex_locked(idx, ifp); IFNET_WUNLOCK(); } struct ifaddr * ifaddr_byindex(u_short idx) { struct ifnet *ifp; struct ifaddr *ifa = NULL; IFNET_RLOCK_NOSLEEP(); ifp = ifnet_byindex_locked(idx); if (ifp != NULL && (ifa = ifp->if_addr) != NULL) ifa_ref(ifa); IFNET_RUNLOCK_NOSLEEP(); return (ifa); } /* * Network interface utility routines. * * Routines with ifa_ifwith* names take sockaddr *'s as * parameters. */ static void vnet_if_init(const void *unused __unused) { TAILQ_INIT(&V_ifnet); TAILQ_INIT(&V_ifg_head); IFNET_WLOCK(); if_grow(); /* create initial table */ IFNET_WUNLOCK(); vnet_if_clone_init(); } VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init, NULL); #ifdef VIMAGE static void vnet_if_uninit(const void *unused __unused) { VNET_ASSERT(TAILQ_EMPTY(&V_ifnet), ("%s:%d tailq &V_ifnet=%p " "not empty", __func__, __LINE__, &V_ifnet)); VNET_ASSERT(TAILQ_EMPTY(&V_ifg_head), ("%s:%d tailq &V_ifg_head=%p " "not empty", __func__, __LINE__, &V_ifg_head)); free((caddr_t)V_ifindex_table, M_IFNET); } VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST, vnet_if_uninit, NULL); static void vnet_if_return(const void *unused __unused) { struct ifnet *ifp, *nifp; /* Return all inherited interfaces to their parent vnets. */ TAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) { if (ifp->if_home_vnet != ifp->if_vnet) if_vmove(ifp, ifp->if_home_vnet); } } VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY, vnet_if_return, NULL); #endif static void if_grow(void) { int oldlim; u_int n; struct ifnet **e; IFNET_WLOCK_ASSERT(); oldlim = V_if_indexlim; IFNET_WUNLOCK(); n = (oldlim << 1) * sizeof(*e); e = malloc(n, M_IFNET, M_WAITOK | M_ZERO); IFNET_WLOCK(); if (V_if_indexlim != oldlim) { free(e, M_IFNET); return; } if (V_ifindex_table != NULL) { memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2); free((caddr_t)V_ifindex_table, M_IFNET); } V_if_indexlim <<= 1; V_ifindex_table = e; } /* * Allocate a struct ifnet and an index for an interface. A layer 2 * common structure will also be allocated if an allocation routine is * registered for the passed type. */ struct ifnet * if_alloc(u_char type) { struct ifnet *ifp; u_short idx; ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO); IFNET_WLOCK(); idx = ifindex_alloc(); ifnet_setbyindex_locked(idx, IFNET_HOLD); IFNET_WUNLOCK(); ifp->if_index = idx; ifp->if_type = type; ifp->if_alloctype = type; #ifdef VIMAGE ifp->if_vnet = curvnet; #endif if (if_com_alloc[type] != NULL) { ifp->if_l2com = if_com_alloc[type](type, ifp); if (ifp->if_l2com == NULL) { free(ifp, M_IFNET); ifindex_free(idx); return (NULL); } } IF_ADDR_LOCK_INIT(ifp); TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp); ifp->if_afdata_initialized = 0; IF_AFDATA_LOCK_INIT(ifp); TAILQ_INIT(&ifp->if_addrhead); TAILQ_INIT(&ifp->if_multiaddrs); TAILQ_INIT(&ifp->if_groups); #ifdef MAC mac_ifnet_init(ifp); #endif ifq_init(&ifp->if_snd, ifp); refcount_init(&ifp->if_refcount, 1); /* Index reference. */ for (int i = 0; i < IFCOUNTERS; i++) ifp->if_counters[i] = counter_u64_alloc(M_WAITOK); ifp->if_get_counter = if_get_counter_default; ifnet_setbyindex(ifp->if_index, ifp); return (ifp); } /* * Do the actual work of freeing a struct ifnet, and layer 2 common * structure. This call is made when the last reference to an * interface is released. */ static void if_free_internal(struct ifnet *ifp) { KASSERT((ifp->if_flags & IFF_DYING), ("if_free_internal: interface not dying")); if (if_com_free[ifp->if_alloctype] != NULL) if_com_free[ifp->if_alloctype](ifp->if_l2com, ifp->if_alloctype); #ifdef MAC mac_ifnet_destroy(ifp); #endif /* MAC */ if (ifp->if_description != NULL) free(ifp->if_description, M_IFDESCR); IF_AFDATA_DESTROY(ifp); IF_ADDR_LOCK_DESTROY(ifp); ifq_delete(&ifp->if_snd); for (int i = 0; i < IFCOUNTERS; i++) counter_u64_free(ifp->if_counters[i]); free(ifp, M_IFNET); } /* * Deregister an interface and free the associated storage. */ void if_free(struct ifnet *ifp) { ifp->if_flags |= IFF_DYING; /* XXX: Locking */ CURVNET_SET_QUIET(ifp->if_vnet); IFNET_WLOCK(); KASSERT(ifp == ifnet_byindex_locked(ifp->if_index), ("%s: freeing unallocated ifnet", ifp->if_xname)); ifindex_free_locked(ifp->if_index); IFNET_WUNLOCK(); if (refcount_release(&ifp->if_refcount)) if_free_internal(ifp); CURVNET_RESTORE(); } /* * Interfaces to keep an ifnet type-stable despite the possibility of the * driver calling if_free(). If there are additional references, we defer * freeing the underlying data structure. */ void if_ref(struct ifnet *ifp) { /* We don't assert the ifnet list lock here, but arguably should. */ refcount_acquire(&ifp->if_refcount); } void if_rele(struct ifnet *ifp) { if (!refcount_release(&ifp->if_refcount)) return; if_free_internal(ifp); } void ifq_init(struct ifaltq *ifq, struct ifnet *ifp) { mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF); if (ifq->ifq_maxlen == 0) ifq->ifq_maxlen = ifqmaxlen; ifq->altq_type = 0; ifq->altq_disc = NULL; ifq->altq_flags &= ALTQF_CANTCHANGE; ifq->altq_tbr = NULL; ifq->altq_ifp = ifp; } void ifq_delete(struct ifaltq *ifq) { mtx_destroy(&ifq->ifq_mtx); } /* * Perform generic interface initialization tasks and attach the interface * to the list of "active" interfaces. If vmove flag is set on entry * to if_attach_internal(), perform only a limited subset of initialization * tasks, given that we are moving from one vnet to another an ifnet which * has already been fully initialized. * * Note that if_detach_internal() removes group membership unconditionally * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL. * Thus, when if_vmove() is applied to a cloned interface, group membership * is lost while a cloned one always joins a group whose name is * ifc->ifc_name. To recover this after if_detach_internal() and * if_attach_internal(), the cloner should be specified to * if_attach_internal() via ifc. If it is non-NULL, if_attach_internal() * attempts to join a group whose name is ifc->ifc_name. * * XXX: * - The decision to return void and thus require this function to * succeed is questionable. * - We should probably do more sanity checking. For instance we don't * do anything to insure if_xname is unique or non-empty. */ void if_attach(struct ifnet *ifp) { if_attach_internal(ifp, 0, NULL); } /* * Compute the least common TSO limit. */ void if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax) { /* * 1) If there is no limit currently, take the limit from * the network adapter. * * 2) If the network adapter has a limit below the current * limit, apply it. */ if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 && ifp->if_hw_tsomax < pmax->tsomaxbytes)) { pmax->tsomaxbytes = ifp->if_hw_tsomax; } if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 && ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) { pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; } if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 && ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) { pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; } } /* * Update TSO limit of a network adapter. * * Returns zero if no change. Else non-zero. */ int if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax) { int retval = 0; if (ifp->if_hw_tsomax != pmax->tsomaxbytes) { ifp->if_hw_tsomax = pmax->tsomaxbytes; retval++; } if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) { ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize; retval++; } if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) { ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount; retval++; } return (retval); } static void if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc) { unsigned socksize, ifasize; int namelen, masklen; struct sockaddr_dl *sdl; struct ifaddr *ifa; if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index)) panic ("%s: BUG: if_attach called without if_alloc'd input()\n", ifp->if_xname); #ifdef VIMAGE ifp->if_vnet = curvnet; if (ifp->if_home_vnet == NULL) ifp->if_home_vnet = curvnet; #endif if_addgroup(ifp, IFG_ALL); /* Restore group membership for cloned interfaces. */ if (vmove && ifc != NULL) if_clone_addgroup(ifp, ifc); getmicrotime(&ifp->if_lastchange); ifp->if_epoch = time_uptime; KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) || (ifp->if_transmit != NULL && ifp->if_qflush != NULL), ("transmit and qflush must both either be set or both be NULL")); if (ifp->if_transmit == NULL) { ifp->if_transmit = if_transmit; ifp->if_qflush = if_qflush; } if (ifp->if_input == NULL) ifp->if_input = if_input_default; if (ifp->if_requestencap == NULL) ifp->if_requestencap = if_requestencap_default; if (!vmove) { #ifdef MAC mac_ifnet_create(ifp); #endif /* * Create a Link Level name for this device. */ namelen = strlen(ifp->if_xname); /* * Always save enough space for any possiable name so we * can do a rename in place later. */ masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ; socksize = masklen + ifp->if_addrlen; if (socksize < sizeof(*sdl)) socksize = sizeof(*sdl); socksize = roundup2(socksize, sizeof(long)); ifasize = sizeof(*ifa) + 2 * socksize; ifa = ifa_alloc(ifasize, M_WAITOK); sdl = (struct sockaddr_dl *)(ifa + 1); sdl->sdl_len = socksize; sdl->sdl_family = AF_LINK; bcopy(ifp->if_xname, sdl->sdl_data, namelen); sdl->sdl_nlen = namelen; sdl->sdl_index = ifp->if_index; sdl->sdl_type = ifp->if_type; ifp->if_addr = ifa; ifa->ifa_ifp = ifp; ifa->ifa_rtrequest = link_rtrequest; ifa->ifa_addr = (struct sockaddr *)sdl; sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); ifa->ifa_netmask = (struct sockaddr *)sdl; sdl->sdl_len = masklen; while (namelen != 0) sdl->sdl_data[--namelen] = 0xff; TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link); /* Reliably crash if used uninitialized. */ ifp->if_broadcastaddr = NULL; if (ifp->if_type == IFT_ETHER) { ifp->if_hw_addr = malloc(ifp->if_addrlen, M_IFADDR, M_WAITOK | M_ZERO); } #if defined(INET) || defined(INET6) /* Use defaults for TSO, if nothing is set */ if (ifp->if_hw_tsomax == 0 && ifp->if_hw_tsomaxsegcount == 0 && ifp->if_hw_tsomaxsegsize == 0) { /* * The TSO defaults needs to be such that an * NFS mbuf list of 35 mbufs totalling just * below 64K works and that a chain of mbufs * can be defragged into at most 32 segments: */ ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); ifp->if_hw_tsomaxsegcount = 35; ifp->if_hw_tsomaxsegsize = 2048; /* 2K */ /* XXX some drivers set IFCAP_TSO after ethernet attach */ if (ifp->if_capabilities & IFCAP_TSO) { if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n", ifp->if_hw_tsomax, ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); } } #endif } #ifdef VIMAGE else { /* * Update the interface index in the link layer address * of the interface. */ for (ifa = ifp->if_addr; ifa != NULL; ifa = TAILQ_NEXT(ifa, ifa_link)) { if (ifa->ifa_addr->sa_family == AF_LINK) { sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_index = ifp->if_index; } } } #endif IFNET_WLOCK(); TAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link); #ifdef VIMAGE curvnet->vnet_ifcnt++; #endif IFNET_WUNLOCK(); if (domain_init_status >= 2) if_attachdomain1(ifp); EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); /* Announce the interface. */ rt_ifannouncemsg(ifp, IFAN_ARRIVAL); } static void if_attachdomain(void *dummy) { struct ifnet *ifp; TAILQ_FOREACH(ifp, &V_ifnet, if_link) if_attachdomain1(ifp); } SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND, if_attachdomain, NULL); static void if_attachdomain1(struct ifnet *ifp) { struct domain *dp; /* * Since dp->dom_ifattach calls malloc() with M_WAITOK, we * cannot lock ifp->if_afdata initialization, entirely. */ IF_AFDATA_LOCK(ifp); if (ifp->if_afdata_initialized >= domain_init_status) { IF_AFDATA_UNLOCK(ifp); log(LOG_WARNING, "%s called more than once on %s\n", __func__, ifp->if_xname); return; } ifp->if_afdata_initialized = domain_init_status; IF_AFDATA_UNLOCK(ifp); /* address family dependent data region */ bzero(ifp->if_afdata, sizeof(ifp->if_afdata)); for (dp = domains; dp; dp = dp->dom_next) { if (dp->dom_ifattach) ifp->if_afdata[dp->dom_family] = (*dp->dom_ifattach)(ifp); } } /* * Remove any unicast or broadcast network addresses from an interface. */ void if_purgeaddrs(struct ifnet *ifp) { struct ifaddr *ifa, *next; /* XXX cannot hold IF_ADDR_WLOCK over called functions. */ TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) { if (ifa->ifa_addr->sa_family == AF_LINK) continue; #ifdef INET /* XXX: Ugly!! ad hoc just for INET */ if (ifa->ifa_addr->sa_family == AF_INET) { struct ifaliasreq ifr; bzero(&ifr, sizeof(ifr)); ifr.ifra_addr = *ifa->ifa_addr; if (ifa->ifa_dstaddr) ifr.ifra_broadaddr = *ifa->ifa_dstaddr; if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp, NULL) == 0) continue; } #endif /* INET */ #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) { in6_purgeaddr(ifa); /* ifp_addrhead is already updated */ continue; } #endif /* INET6 */ IF_ADDR_WLOCK(ifp); TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(ifa); } } /* * Remove any multicast network addresses from an interface when an ifnet * is going away. */ static void if_purgemaddrs(struct ifnet *ifp) { struct ifmultiaddr *ifma; struct ifmultiaddr *next; IF_ADDR_WLOCK(ifp); TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) if_delmulti_locked(ifp, ifma, 1); IF_ADDR_WUNLOCK(ifp); } /* * Detach an interface, removing it from the list of "active" interfaces. * If vmove flag is set on entry to if_detach_internal(), perform only a * limited subset of cleanup tasks, given that we are moving an ifnet from * one vnet to another, where it must be fully operational. * * XXXRW: There are some significant questions about event ordering, and * how to prevent things from starting to use the interface during detach. */ void if_detach(struct ifnet *ifp) { CURVNET_SET_QUIET(ifp->if_vnet); if_detach_internal(ifp, 0, NULL); CURVNET_RESTORE(); } /* * The vmove flag, if set, indicates that we are called from a callpath * that is moving an interface to a different vnet instance. * * The shutdown flag, if set, indicates that we are called in the * process of shutting down a vnet instance. Currently only the * vnet_if_return SYSUNINIT function sets it. Note: we can be called * on a vnet instance shutdown without this flag being set, e.g., when * the cloned interfaces are destoyed as first thing of teardown. */ static int if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp) { struct ifaddr *ifa; int i; struct domain *dp; struct ifnet *iter; int found = 0; #ifdef VIMAGE int shutdown; shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET && ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; #endif IFNET_WLOCK(); TAILQ_FOREACH(iter, &V_ifnet, if_link) if (iter == ifp) { TAILQ_REMOVE(&V_ifnet, ifp, if_link); found = 1; break; } IFNET_WUNLOCK(); if (!found) { /* * While we would want to panic here, we cannot * guarantee that the interface is indeed still on * the list given we don't hold locks all the way. */ return (ENOENT); #if 0 if (vmove) panic("%s: ifp=%p not on the ifnet tailq %p", __func__, ifp, &V_ifnet); else return; /* XXX this should panic as well? */ #endif } /* * At this point we know the interface still was on the ifnet list * and we removed it so we are in a stable state. */ #ifdef VIMAGE curvnet->vnet_ifcnt--; #endif /* * In any case (destroy or vmove) detach us from the groups * and remove/wait for pending events on the taskq. * XXX-BZ in theory an interface could still enqueue a taskq change? */ if_delgroups(ifp); taskqueue_drain(taskqueue_swi, &ifp->if_linktask); /* * Check if this is a cloned interface or not. Must do even if * shutting down as a if_vmove_reclaim() would move the ifp and * the if_clone_addgroup() will have a corrupted string overwise * from a gibberish pointer. */ if (vmove && ifcp != NULL) *ifcp = if_clone_findifc(ifp); if_down(ifp); #ifdef VIMAGE /* * On VNET shutdown abort here as the stack teardown will do all * the work top-down for us. */ if (shutdown) { /* * In case of a vmove we are done here without error. * If we would signal an error it would lead to the same * abort as if we did not find the ifnet anymore. * if_detach() calls us in void context and does not care * about an early abort notification, so life is splendid :) */ goto finish_vnet_shutdown; } #endif /* * At this point we are not tearing down a VNET and are either * going to destroy or vmove the interface and have to cleanup * accordingly. */ /* * Remove routes and flush queues. */ #ifdef ALTQ if (ALTQ_IS_ENABLED(&ifp->if_snd)) altq_disable(&ifp->if_snd); if (ALTQ_IS_ATTACHED(&ifp->if_snd)) altq_detach(&ifp->if_snd); #endif if_purgeaddrs(ifp); #ifdef INET in_ifdetach(ifp); #endif #ifdef INET6 /* * Remove all IPv6 kernel structs related to ifp. This should be done * before removing routing entries below, since IPv6 interface direct * routes are expected to be removed by the IPv6-specific kernel API. * Otherwise, the kernel will detect some inconsistency and bark it. */ in6_ifdetach(ifp); #endif if_purgemaddrs(ifp); /* Announce that the interface is gone. */ rt_ifannouncemsg(ifp, IFAN_DEPARTURE); EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); if (!vmove) { /* * Prevent further calls into the device driver via ifnet. */ if_dead(ifp); /* * Remove link ifaddr pointer and maybe decrement if_index. * Clean up all addresses. */ free(ifp->if_hw_addr, M_IFADDR); ifp->if_hw_addr = NULL; ifp->if_addr = NULL; /* We can now free link ifaddr. */ IF_ADDR_WLOCK(ifp); if (!TAILQ_EMPTY(&ifp->if_addrhead)) { ifa = TAILQ_FIRST(&ifp->if_addrhead); TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(ifa); } else IF_ADDR_WUNLOCK(ifp); } rt_flushifroutes(ifp); #ifdef VIMAGE finish_vnet_shutdown: #endif /* * We cannot hold the lock over dom_ifdetach calls as they might * sleep, for example trying to drain a callout, thus open up the * theoretical race with re-attaching. */ IF_AFDATA_LOCK(ifp); i = ifp->if_afdata_initialized; ifp->if_afdata_initialized = 0; IF_AFDATA_UNLOCK(ifp); for (dp = domains; i > 0 && dp; dp = dp->dom_next) { if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) { (*dp->dom_ifdetach)(ifp, ifp->if_afdata[dp->dom_family]); ifp->if_afdata[dp->dom_family] = NULL; } } return (0); } #ifdef VIMAGE /* * if_vmove() performs a limited version of if_detach() in current * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg. * An attempt is made to shrink if_index in current vnet, find an * unused if_index in target vnet and calls if_grow() if necessary, * and finally find an unused if_xname for the target vnet. */ static void if_vmove(struct ifnet *ifp, struct vnet *new_vnet) { struct if_clone *ifc; u_int bif_dlt, bif_hdrlen; int rc; /* * if_detach_internal() will call the eventhandler to notify * interface departure. That will detach if_bpf. We need to * safe the dlt and hdrlen so we can re-attach it later. */ bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen); /* * Detach from current vnet, but preserve LLADDR info, do not * mark as dead etc. so that the ifnet can be reattached later. * If we cannot find it, we lost the race to someone else. */ rc = if_detach_internal(ifp, 1, &ifc); if (rc != 0) return; /* * Unlink the ifnet from ifindex_table[] in current vnet, and shrink * the if_index for that vnet if possible. * * NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized, * or we'd lock on one vnet and unlock on another. */ IFNET_WLOCK(); ifindex_free_locked(ifp->if_index); IFNET_WUNLOCK(); /* * Perform interface-specific reassignment tasks, if provided by * the driver. */ if (ifp->if_reassign != NULL) ifp->if_reassign(ifp, new_vnet, NULL); /* * Switch to the context of the target vnet. */ CURVNET_SET_QUIET(new_vnet); IFNET_WLOCK(); ifp->if_index = ifindex_alloc(); ifnet_setbyindex_locked(ifp->if_index, ifp); IFNET_WUNLOCK(); if_attach_internal(ifp, 1, ifc); if (ifp->if_bpf == NULL) bpfattach(ifp, bif_dlt, bif_hdrlen); CURVNET_RESTORE(); } /* * Move an ifnet to or from another child prison/vnet, specified by the jail id. */ static int if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid) { struct prison *pr; struct ifnet *difp; int shutdown; /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); pr = prison_find_child(td->td_ucred->cr_prison, jid); sx_sunlock(&allprison_lock); if (pr == NULL) return (ENXIO); prison_hold_locked(pr); mtx_unlock(&pr->pr_mtx); /* Do not try to move the iface from and to the same prison. */ if (pr->pr_vnet == ifp->if_vnet) { prison_free(pr); return (EEXIST); } /* Make sure the named iface does not exists in the dst. prison/vnet. */ /* XXX Lock interfaces to avoid races. */ CURVNET_SET_QUIET(pr->pr_vnet); difp = ifunit(ifname); if (difp != NULL) { CURVNET_RESTORE(); prison_free(pr); return (EEXIST); } /* Make sure the VNET is stable. */ shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET && ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; if (shutdown) { CURVNET_RESTORE(); prison_free(pr); return (EBUSY); } CURVNET_RESTORE(); /* Move the interface into the child jail/vnet. */ if_vmove(ifp, pr->pr_vnet); /* Report the new if_xname back to the userland. */ sprintf(ifname, "%s", ifp->if_xname); prison_free(pr); return (0); } static int if_vmove_reclaim(struct thread *td, char *ifname, int jid) { struct prison *pr; struct vnet *vnet_dst; struct ifnet *ifp; int shutdown; /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); pr = prison_find_child(td->td_ucred->cr_prison, jid); sx_sunlock(&allprison_lock); if (pr == NULL) return (ENXIO); prison_hold_locked(pr); mtx_unlock(&pr->pr_mtx); /* Make sure the named iface exists in the source prison/vnet. */ CURVNET_SET(pr->pr_vnet); ifp = ifunit(ifname); /* XXX Lock to avoid races. */ if (ifp == NULL) { CURVNET_RESTORE(); prison_free(pr); return (ENXIO); } /* Do not try to move the iface from and to the same prison. */ vnet_dst = TD_TO_VNET(td); if (vnet_dst == ifp->if_vnet) { CURVNET_RESTORE(); prison_free(pr); return (EEXIST); } /* Make sure the VNET is stable. */ shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET && ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; if (shutdown) { CURVNET_RESTORE(); prison_free(pr); return (EBUSY); } /* Get interface back from child jail/vnet. */ if_vmove(ifp, vnet_dst); CURVNET_RESTORE(); /* Report the new if_xname back to the userland. */ sprintf(ifname, "%s", ifp->if_xname); prison_free(pr); return (0); } #endif /* VIMAGE */ /* * Add a group to an interface */ int if_addgroup(struct ifnet *ifp, const char *groupname) { struct ifg_list *ifgl; struct ifg_group *ifg = NULL; struct ifg_member *ifgm; int new = 0; if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' && groupname[strlen(groupname) - 1] <= '9') return (EINVAL); IFNET_WLOCK(); TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) { IFNET_WUNLOCK(); return (EEXIST); } if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP, M_NOWAIT)) == NULL) { IFNET_WUNLOCK(); return (ENOMEM); } if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member), M_TEMP, M_NOWAIT)) == NULL) { free(ifgl, M_TEMP); IFNET_WUNLOCK(); return (ENOMEM); } TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) if (!strcmp(ifg->ifg_group, groupname)) break; if (ifg == NULL) { if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group), M_TEMP, M_NOWAIT)) == NULL) { free(ifgl, M_TEMP); free(ifgm, M_TEMP); IFNET_WUNLOCK(); return (ENOMEM); } strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); ifg->ifg_refcnt = 0; TAILQ_INIT(&ifg->ifg_members); TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next); new = 1; } ifg->ifg_refcnt++; ifgl->ifgl_group = ifg; ifgm->ifgm_ifp = ifp; IF_ADDR_WLOCK(ifp); TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); IF_ADDR_WUNLOCK(ifp); IFNET_WUNLOCK(); if (new) EVENTHANDLER_INVOKE(group_attach_event, ifg); EVENTHANDLER_INVOKE(group_change_event, groupname); return (0); } /* * Remove a group from an interface */ int if_delgroup(struct ifnet *ifp, const char *groupname) { struct ifg_list *ifgl; struct ifg_member *ifgm; IFNET_WLOCK(); TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) break; if (ifgl == NULL) { IFNET_WUNLOCK(); return (ENOENT); } IF_ADDR_WLOCK(ifp); TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next); IF_ADDR_WUNLOCK(ifp); TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) if (ifgm->ifgm_ifp == ifp) break; if (ifgm != NULL) { TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next); free(ifgm, M_TEMP); } if (--ifgl->ifgl_group->ifg_refcnt == 0) { TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next); IFNET_WUNLOCK(); EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group); free(ifgl->ifgl_group, M_TEMP); } else IFNET_WUNLOCK(); free(ifgl, M_TEMP); EVENTHANDLER_INVOKE(group_change_event, groupname); return (0); } /* * Remove an interface from all groups */ static void if_delgroups(struct ifnet *ifp) { struct ifg_list *ifgl; struct ifg_member *ifgm; char groupname[IFNAMSIZ]; IFNET_WLOCK(); while (!TAILQ_EMPTY(&ifp->if_groups)) { ifgl = TAILQ_FIRST(&ifp->if_groups); strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ); IF_ADDR_WLOCK(ifp); TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next); IF_ADDR_WUNLOCK(ifp); TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) if (ifgm->ifgm_ifp == ifp) break; if (ifgm != NULL) { TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next); free(ifgm, M_TEMP); } if (--ifgl->ifgl_group->ifg_refcnt == 0) { TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next); IFNET_WUNLOCK(); EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group); free(ifgl->ifgl_group, M_TEMP); } else IFNET_WUNLOCK(); free(ifgl, M_TEMP); EVENTHANDLER_INVOKE(group_change_event, groupname); IFNET_WLOCK(); } IFNET_WUNLOCK(); } /* * Stores all groups from an interface in memory pointed * to by data */ static int if_getgroup(struct ifgroupreq *data, struct ifnet *ifp) { int len, error; struct ifg_list *ifgl; struct ifg_req ifgrq, *ifgp; struct ifgroupreq *ifgr = data; if (ifgr->ifgr_len == 0) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) ifgr->ifgr_len += sizeof(struct ifg_req); IF_ADDR_RUNLOCK(ifp); return (0); } len = ifgr->ifgr_len; ifgp = ifgr->ifgr_groups; /* XXX: wire */ IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { if (len < sizeof(ifgrq)) { IF_ADDR_RUNLOCK(ifp); return (EINVAL); } bzero(&ifgrq, sizeof ifgrq); strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group, sizeof(ifgrq.ifgrq_group)); if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) { IF_ADDR_RUNLOCK(ifp); return (error); } len -= sizeof(ifgrq); ifgp++; } IF_ADDR_RUNLOCK(ifp); return (0); } /* * Stores all members of a group in memory pointed to by data */ static int if_getgroupmembers(struct ifgroupreq *data) { struct ifgroupreq *ifgr = data; struct ifg_group *ifg; struct ifg_member *ifgm; struct ifg_req ifgrq, *ifgp; int len, error; IFNET_RLOCK(); TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) if (!strcmp(ifg->ifg_group, ifgr->ifgr_name)) break; if (ifg == NULL) { IFNET_RUNLOCK(); return (ENOENT); } if (ifgr->ifgr_len == 0) { TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) ifgr->ifgr_len += sizeof(ifgrq); IFNET_RUNLOCK(); return (0); } len = ifgr->ifgr_len; ifgp = ifgr->ifgr_groups; TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { if (len < sizeof(ifgrq)) { IFNET_RUNLOCK(); return (EINVAL); } bzero(&ifgrq, sizeof ifgrq); strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname, sizeof(ifgrq.ifgrq_member)); if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) { IFNET_RUNLOCK(); return (error); } len -= sizeof(ifgrq); ifgp++; } IFNET_RUNLOCK(); return (0); } /* * Return counter values from counter(9)s stored in ifnet. */ uint64_t if_get_counter_default(struct ifnet *ifp, ift_counter cnt) { KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); return (counter_u64_fetch(ifp->if_counters[cnt])); } /* * Increase an ifnet counter. Usually used for counters shared * between the stack and a driver, but function supports them all. */ void if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc) { KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); counter_u64_add(ifp->if_counters[cnt], inc); } /* * Copy data from ifnet to userland API structure if_data. */ void if_data_copy(struct ifnet *ifp, struct if_data *ifd) { ifd->ifi_type = ifp->if_type; ifd->ifi_physical = 0; ifd->ifi_addrlen = ifp->if_addrlen; ifd->ifi_hdrlen = ifp->if_hdrlen; ifd->ifi_link_state = ifp->if_link_state; ifd->ifi_vhid = 0; ifd->ifi_datalen = sizeof(struct if_data); ifd->ifi_mtu = ifp->if_mtu; ifd->ifi_metric = ifp->if_metric; ifd->ifi_baudrate = ifp->if_baudrate; ifd->ifi_hwassist = ifp->if_hwassist; ifd->ifi_epoch = ifp->if_epoch; ifd->ifi_lastchange = ifp->if_lastchange; ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS); ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS); ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS); ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS); ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS); ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES); ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES); ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS); ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS); ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS); ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS); ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO); } /* * Wrapper functions for struct ifnet address list locking macros. These are * used by kernel modules to avoid encoding programming interface or binary * interface assumptions that may be violated when kernel-internal locking * approaches change. */ void if_addr_rlock(struct ifnet *ifp) { IF_ADDR_RLOCK(ifp); } void if_addr_runlock(struct ifnet *ifp) { IF_ADDR_RUNLOCK(ifp); } void if_maddr_rlock(if_t ifp) { IF_ADDR_RLOCK((struct ifnet *)ifp); } void if_maddr_runlock(if_t ifp) { IF_ADDR_RUNLOCK((struct ifnet *)ifp); } /* * Initialization, destruction and refcounting functions for ifaddrs. */ struct ifaddr * ifa_alloc(size_t size, int flags) { struct ifaddr *ifa; KASSERT(size >= sizeof(struct ifaddr), ("%s: invalid size %zu", __func__, size)); ifa = malloc(size, M_IFADDR, M_ZERO | flags); if (ifa == NULL) return (NULL); if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL) goto fail; refcount_init(&ifa->ifa_refcnt, 1); return (ifa); fail: /* free(NULL) is okay */ counter_u64_free(ifa->ifa_opackets); counter_u64_free(ifa->ifa_ipackets); counter_u64_free(ifa->ifa_obytes); counter_u64_free(ifa->ifa_ibytes); free(ifa, M_IFADDR); return (NULL); } void ifa_ref(struct ifaddr *ifa) { refcount_acquire(&ifa->ifa_refcnt); } void ifa_free(struct ifaddr *ifa) { if (refcount_release(&ifa->ifa_refcnt)) { counter_u64_free(ifa->ifa_opackets); counter_u64_free(ifa->ifa_ipackets); counter_u64_free(ifa->ifa_obytes); counter_u64_free(ifa->ifa_ibytes); free(ifa, M_IFADDR); } } static int ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa, struct sockaddr *ia) { int error; struct rt_addrinfo info; struct sockaddr_dl null_sdl; struct ifnet *ifp; ifp = ifa->ifa_ifp; bzero(&info, sizeof(info)); if (cmd != RTM_DELETE) info.rti_ifp = V_loif; info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED; info.rti_info[RTAX_DST] = ia; info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl; link_init_sdl(ifp, (struct sockaddr *)&null_sdl, ifp->if_type); error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib); if (error != 0) log(LOG_DEBUG, "%s: %s failed for interface %s: %u\n", __func__, otype, if_name(ifp), error); return (error); } int ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia) { return (ifa_maintain_loopback_route(RTM_ADD, "insertion", ifa, ia)); } int ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia) { return (ifa_maintain_loopback_route(RTM_DELETE, "deletion", ifa, ia)); } int ifa_switch_loopback_route(struct ifaddr *ifa, struct sockaddr *ia) { return (ifa_maintain_loopback_route(RTM_CHANGE, "switch", ifa, ia)); } /* * XXX: Because sockaddr_dl has deeper structure than the sockaddr * structs used to represent other address families, it is necessary * to perform a different comparison. */ #define sa_dl_equal(a1, a2) \ ((((const struct sockaddr_dl *)(a1))->sdl_len == \ ((const struct sockaddr_dl *)(a2))->sdl_len) && \ (bcmp(CLLADDR((const struct sockaddr_dl *)(a1)), \ CLLADDR((const struct sockaddr_dl *)(a2)), \ ((const struct sockaddr_dl *)(a1))->sdl_alen) == 0)) /* * Locate an interface based on a complete address. */ /*ARGSUSED*/ static struct ifaddr * ifa_ifwithaddr_internal(const struct sockaddr *addr, int getref) { struct ifnet *ifp; struct ifaddr *ifa; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if (sa_equal(addr, ifa->ifa_addr)) { if (getref) ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); goto done; } /* IP6 doesn't have broadcast */ if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr && ifa->ifa_broadaddr->sa_len != 0 && sa_equal(ifa->ifa_broadaddr, addr)) { if (getref) ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); goto done; } } IF_ADDR_RUNLOCK(ifp); } ifa = NULL; done: IFNET_RUNLOCK_NOSLEEP(); return (ifa); } struct ifaddr * ifa_ifwithaddr(const struct sockaddr *addr) { return (ifa_ifwithaddr_internal(addr, 1)); } int ifa_ifwithaddr_check(const struct sockaddr *addr) { return (ifa_ifwithaddr_internal(addr, 0) != NULL); } /* * Locate an interface based on the broadcast address. */ /* ARGSUSED */ struct ifaddr * ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr && ifa->ifa_broadaddr->sa_len != 0 && sa_equal(ifa->ifa_broadaddr, addr)) { ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); goto done; } } IF_ADDR_RUNLOCK(ifp); } ifa = NULL; done: IFNET_RUNLOCK_NOSLEEP(); return (ifa); } /* * Locate the point to point interface with a given destination address. */ /*ARGSUSED*/ struct ifaddr * ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((ifp->if_flags & IFF_POINTOPOINT) == 0) continue; if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if (ifa->ifa_dstaddr != NULL && sa_equal(addr, ifa->ifa_dstaddr)) { ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); goto done; } } IF_ADDR_RUNLOCK(ifp); } ifa = NULL; done: IFNET_RUNLOCK_NOSLEEP(); return (ifa); } /* * Find an interface on a specific network. If many, choice * is most specific found. */ struct ifaddr * ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; struct ifaddr *ifa_maybe = NULL; u_int af = addr->sa_family; const char *addr_data = addr->sa_data, *cplim; /* * AF_LINK addresses can be looked up directly by their index number, * so do that if we can. */ if (af == AF_LINK) { const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)addr; if (sdl->sdl_index && sdl->sdl_index <= V_if_index) return (ifaddr_byindex(sdl->sdl_index)); } /* * Scan though each interface, looking for ones that have addresses * in this address family and the requested fib. Maintain a reference * on ifa_maybe once we find one, as we release the IF_ADDR_RLOCK() that * kept it stable when we move onto the next interface. */ IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { const char *cp, *cp2, *cp3; if (ifa->ifa_addr->sa_family != af) next: continue; if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) { /* * This is a bit broken as it doesn't * take into account that the remote end may * be a single node in the network we are * looking for. * The trouble is that we don't know the * netmask for the remote end. */ if (ifa->ifa_dstaddr != NULL && sa_equal(addr, ifa->ifa_dstaddr)) { ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); goto done; } } else { /* * Scan all the bits in the ifa's address. * If a bit dissagrees with what we are * looking for, mask it with the netmask * to see if it really matters. * (A byte at a time) */ if (ifa->ifa_netmask == 0) continue; cp = addr_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; while (cp3 < cplim) if ((*cp++ ^ *cp2++) & *cp3++) goto next; /* next address! */ /* * If the netmask of what we just found * is more specific than what we had before * (if we had one), or if the virtual status * of new prefix is better than of the old one, * then remember the new one before continuing * to search for an even better one. */ if (ifa_maybe == NULL || ifa_preferred(ifa_maybe, ifa) || rn_refines((caddr_t)ifa->ifa_netmask, (caddr_t)ifa_maybe->ifa_netmask)) { if (ifa_maybe != NULL) ifa_free(ifa_maybe); ifa_maybe = ifa; ifa_ref(ifa_maybe); } } } IF_ADDR_RUNLOCK(ifp); } ifa = ifa_maybe; ifa_maybe = NULL; done: IFNET_RUNLOCK_NOSLEEP(); if (ifa_maybe != NULL) ifa_free(ifa_maybe); return (ifa); } /* * Find an interface address specific to an interface best matching * a given address. */ struct ifaddr * ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp) { struct ifaddr *ifa; const char *cp, *cp2, *cp3; char *cplim; struct ifaddr *ifa_maybe = NULL; u_int af = addr->sa_family; if (af >= AF_MAX) return (NULL); IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != af) continue; if (ifa_maybe == NULL) ifa_maybe = ifa; if (ifa->ifa_netmask == 0) { if (sa_equal(addr, ifa->ifa_addr) || (ifa->ifa_dstaddr && sa_equal(addr, ifa->ifa_dstaddr))) goto done; continue; } if (ifp->if_flags & IFF_POINTOPOINT) { if (sa_equal(addr, ifa->ifa_dstaddr)) goto done; } else { cp = addr->sa_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; for (; cp3 < cplim; cp3++) if ((*cp++ ^ *cp2++) & *cp3) break; if (cp3 == cplim) goto done; } } ifa = ifa_maybe; done: if (ifa != NULL) ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); return (ifa); } /* * See whether new ifa is better than current one: * 1) A non-virtual one is preferred over virtual. * 2) A virtual in master state preferred over any other state. * * Used in several address selecting functions. */ int ifa_preferred(struct ifaddr *cur, struct ifaddr *next) { return (cur->ifa_carp && (!next->ifa_carp || ((*carp_master_p)(next) && !(*carp_master_p)(cur)))); } #include /* * Default action when installing a route with a Link Level gateway. * Lookup an appropriate real ifa to point to. * This should be moved to /sys/net/link.c eventually. */ static void link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info) { struct ifaddr *ifa, *oifa; struct sockaddr *dst; struct ifnet *ifp; if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == NULL) || ((ifp = ifa->ifa_ifp) == NULL) || ((dst = rt_key(rt)) == NULL)) return; ifa = ifaof_ifpforaddr(dst, ifp); if (ifa) { oifa = rt->rt_ifa; rt->rt_ifa = ifa; ifa_free(oifa); if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest) ifa->ifa_rtrequest(cmd, rt, info); } } struct sockaddr_dl * link_alloc_sdl(size_t size, int flags) { return (malloc(size, M_TEMP, flags)); } void link_free_sdl(struct sockaddr *sa) { free(sa, M_TEMP); } /* * Fills in given sdl with interface basic info. * Returns pointer to filled sdl. */ struct sockaddr_dl * link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype) { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)paddr; memset(sdl, 0, sizeof(struct sockaddr_dl)); sdl->sdl_len = sizeof(struct sockaddr_dl); sdl->sdl_family = AF_LINK; sdl->sdl_index = ifp->if_index; sdl->sdl_type = iftype; return (sdl); } /* * Mark an interface down and notify protocols of * the transition. */ static void if_unroute(struct ifnet *ifp, int flag, int fam) { struct ifaddr *ifa; KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP")); ifp->if_flags &= ~flag; getmicrotime(&ifp->if_lastchange); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) pfctlinput(PRC_IFDOWN, ifa->ifa_addr); ifp->if_qflush(ifp); if (ifp->if_carp) (*carp_linkstate_p)(ifp); rt_ifmsg(ifp); } /* * Mark an interface up and notify protocols of * the transition. */ static void if_route(struct ifnet *ifp, int flag, int fam) { struct ifaddr *ifa; KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP")); ifp->if_flags |= flag; getmicrotime(&ifp->if_lastchange); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) pfctlinput(PRC_IFUP, ifa->ifa_addr); if (ifp->if_carp) (*carp_linkstate_p)(ifp); rt_ifmsg(ifp); #ifdef INET6 in6_if_up(ifp); #endif } void (*vlan_link_state_p)(struct ifnet *); /* XXX: private from if_vlan */ void (*vlan_trunk_cap_p)(struct ifnet *); /* XXX: private from if_vlan */ struct ifnet *(*vlan_trunkdev_p)(struct ifnet *); struct ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t); int (*vlan_tag_p)(struct ifnet *, uint16_t *); int (*vlan_setcookie_p)(struct ifnet *, void *); void *(*vlan_cookie_p)(struct ifnet *); /* * Handle a change in the interface link state. To avoid LORs * between driver lock and upper layer locks, as well as possible * recursions, we post event to taskqueue, and all job * is done in static do_link_state_change(). */ void if_link_state_change(struct ifnet *ifp, int link_state) { /* Return if state hasn't changed. */ if (ifp->if_link_state == link_state) return; ifp->if_link_state = link_state; taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask); } static void do_link_state_change(void *arg, int pending) { struct ifnet *ifp = (struct ifnet *)arg; int link_state = ifp->if_link_state; CURVNET_SET(ifp->if_vnet); /* Notify that the link state has changed. */ rt_ifmsg(ifp); if (ifp->if_vlantrunk != NULL) (*vlan_link_state_p)(ifp); if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) && ifp->if_l2com != NULL) (*ng_ether_link_state_p)(ifp, link_state); if (ifp->if_carp) (*carp_linkstate_p)(ifp); if (ifp->if_bridge) (*bridge_linkstate_p)(ifp); if (ifp->if_lagg) (*lagg_linkstate_p)(ifp, link_state); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL); if (pending > 1) if_printf(ifp, "%d link states coalesced\n", pending); if (log_link_state_change) log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname, (link_state == LINK_STATE_UP) ? "UP" : "DOWN" ); EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state); CURVNET_RESTORE(); } /* * Mark an interface down and notify protocols of * the transition. */ void if_down(struct ifnet *ifp) { EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN); if_unroute(ifp, IFF_UP, AF_UNSPEC); } /* * Mark an interface up and notify protocols of * the transition. */ void if_up(struct ifnet *ifp) { if_route(ifp, IFF_UP, AF_UNSPEC); EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP); } /* * Flush an interface queue. */ void if_qflush(struct ifnet *ifp) { struct mbuf *m, *n; struct ifaltq *ifq; ifq = &ifp->if_snd; IFQ_LOCK(ifq); #ifdef ALTQ if (ALTQ_IS_ENABLED(ifq)) ALTQ_PURGE(ifq); #endif n = ifq->ifq_head; while ((m = n) != NULL) { n = m->m_nextpkt; m_freem(m); } ifq->ifq_head = 0; ifq->ifq_tail = 0; ifq->ifq_len = 0; IFQ_UNLOCK(ifq); } /* * Map interface name to interface structure pointer, with or without * returning a reference. */ struct ifnet * ifunit_ref(const char *name) { struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 && !(ifp->if_flags & IFF_DYING)) break; } if (ifp != NULL) if_ref(ifp); IFNET_RUNLOCK_NOSLEEP(); return (ifp); } struct ifnet * ifunit(const char *name) { struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0) break; } IFNET_RUNLOCK_NOSLEEP(); return (ifp); } static void * ifr_buffer_get_buffer(void *data) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) return ((void *)(uintptr_t) ifrup->ifr32.ifr_ifru.ifru_buffer.buffer); #endif return (ifrup->ifr.ifr_ifru.ifru_buffer.buffer); } static void ifr_buffer_set_buffer_null(void *data) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) ifrup->ifr32.ifr_ifru.ifru_buffer.buffer = 0; else #endif ifrup->ifr.ifr_ifru.ifru_buffer.buffer = NULL; } static size_t ifr_buffer_get_length(void *data) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) return (ifrup->ifr32.ifr_ifru.ifru_buffer.length); #endif return (ifrup->ifr.ifr_ifru.ifru_buffer.length); } static void ifr_buffer_set_length(void *data, size_t len) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) ifrup->ifr32.ifr_ifru.ifru_buffer.length = len; else #endif ifrup->ifr.ifr_ifru.ifru_buffer.length = len; } +void * +ifr_data_get_ptr(void *ifrp) +{ + union ifreq_union *ifrup; + + ifrup = ifrp; +#ifdef COMPAT_FREEBSD32 + if (SV_CURPROC_FLAG(SV_ILP32)) + return ((void *)(uintptr_t) + ifrup->ifr32.ifr_ifru.ifru_data); +#endif + return (ifrup->ifr.ifr_ifru.ifru_data); +} + /* * Hardware specific interface ioctls. */ static int ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) { struct ifreq *ifr; int error = 0, do_ifup = 0; int new_flags, temp_flags; size_t namelen, onamelen; size_t descrlen; char *descrbuf, *odescrbuf; char new_name[IFNAMSIZ]; struct ifaddr *ifa; struct sockaddr_dl *sdl; ifr = (struct ifreq *)data; switch (cmd) { case SIOCGIFINDEX: ifr->ifr_index = ifp->if_index; break; case SIOCGIFFLAGS: temp_flags = ifp->if_flags | ifp->if_drv_flags; ifr->ifr_flags = temp_flags & 0xffff; ifr->ifr_flagshigh = temp_flags >> 16; break; case SIOCGIFCAP: ifr->ifr_reqcap = ifp->if_capabilities; ifr->ifr_curcap = ifp->if_capenable; break; #ifdef MAC case SIOCGIFMAC: error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp); break; #endif case SIOCGIFMETRIC: ifr->ifr_metric = ifp->if_metric; break; case SIOCGIFMTU: ifr->ifr_mtu = ifp->if_mtu; break; case SIOCGIFPHYS: /* XXXGL: did this ever worked? */ ifr->ifr_phys = 0; break; case SIOCGIFDESCR: error = 0; sx_slock(&ifdescr_sx); if (ifp->if_description == NULL) error = ENOMSG; else { /* space for terminating nul */ descrlen = strlen(ifp->if_description) + 1; if (ifr_buffer_get_length(ifr) < descrlen) ifr_buffer_set_buffer_null(ifr); else error = copyout(ifp->if_description, ifr_buffer_get_buffer(ifr), descrlen); ifr_buffer_set_length(ifr, descrlen); } sx_sunlock(&ifdescr_sx); break; case SIOCSIFDESCR: error = priv_check(td, PRIV_NET_SETIFDESCR); if (error) return (error); /* * Copy only (length-1) bytes to make sure that * if_description is always nul terminated. The * length parameter is supposed to count the * terminating nul in. */ if (ifr_buffer_get_length(ifr) > ifdescr_maxlen) return (ENAMETOOLONG); else if (ifr_buffer_get_length(ifr) == 0) descrbuf = NULL; else { descrbuf = malloc(ifr_buffer_get_length(ifr), M_IFDESCR, M_WAITOK | M_ZERO); error = copyin(ifr_buffer_get_buffer(ifr), descrbuf, ifr_buffer_get_length(ifr) - 1); if (error) { free(descrbuf, M_IFDESCR); break; } } sx_xlock(&ifdescr_sx); odescrbuf = ifp->if_description; ifp->if_description = descrbuf; sx_xunlock(&ifdescr_sx); getmicrotime(&ifp->if_lastchange); free(odescrbuf, M_IFDESCR); break; case SIOCGIFFIB: ifr->ifr_fib = ifp->if_fib; break; case SIOCSIFFIB: error = priv_check(td, PRIV_NET_SETIFFIB); if (error) return (error); if (ifr->ifr_fib >= rt_numfibs) return (EINVAL); ifp->if_fib = ifr->ifr_fib; break; case SIOCSIFFLAGS: error = priv_check(td, PRIV_NET_SETIFFLAGS); if (error) return (error); /* * Currently, no driver owned flags pass the IFF_CANTCHANGE * check, so we don't need special handling here yet. */ new_flags = (ifr->ifr_flags & 0xffff) | (ifr->ifr_flagshigh << 16); if (ifp->if_flags & IFF_UP && (new_flags & IFF_UP) == 0) { if_down(ifp); } else if (new_flags & IFF_UP && (ifp->if_flags & IFF_UP) == 0) { do_ifup = 1; } /* See if permanently promiscuous mode bit is about to flip */ if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) { if (new_flags & IFF_PPROMISC) ifp->if_flags |= IFF_PROMISC; else if (ifp->if_pcount == 0) ifp->if_flags &= ~IFF_PROMISC; if (log_promisc_mode_change) log(LOG_INFO, "%s: permanently promiscuous mode %s\n", ifp->if_xname, ((new_flags & IFF_PPROMISC) ? "enabled" : "disabled")); } ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | (new_flags &~ IFF_CANTCHANGE); if (ifp->if_ioctl) { (void) (*ifp->if_ioctl)(ifp, cmd, data); } if (do_ifup) if_up(ifp); getmicrotime(&ifp->if_lastchange); break; case SIOCSIFCAP: error = priv_check(td, PRIV_NET_SETIFCAP); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); if (ifr->ifr_reqcap & ~ifp->if_capabilities) return (EINVAL); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); break; #ifdef MAC case SIOCSIFMAC: error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp); break; #endif case SIOCSIFNAME: error = priv_check(td, PRIV_NET_SETIFNAME); if (error) return (error); - error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL); + error = copyinstr(ifr_data_get_ptr(ifr), new_name, IFNAMSIZ, + NULL); if (error != 0) return (error); if (new_name[0] == '\0') return (EINVAL); if (new_name[IFNAMSIZ-1] != '\0') { new_name[IFNAMSIZ-1] = '\0'; if (strlen(new_name) == IFNAMSIZ-1) return (EINVAL); } if (ifunit(new_name) != NULL) return (EEXIST); /* * XXX: Locking. Nothing else seems to lock if_flags, * and there are numerous other races with the * ifunit() checks not being atomic with namespace * changes (renames, vmoves, if_attach, etc). */ ifp->if_flags |= IFF_RENAMING; /* Announce the departure of the interface. */ rt_ifannouncemsg(ifp, IFAN_DEPARTURE); EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); log(LOG_INFO, "%s: changing name to '%s'\n", ifp->if_xname, new_name); IF_ADDR_WLOCK(ifp); strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); ifa = ifp->if_addr; sdl = (struct sockaddr_dl *)ifa->ifa_addr; namelen = strlen(new_name); onamelen = sdl->sdl_nlen; /* * Move the address if needed. This is safe because we * allocate space for a name of length IFNAMSIZ when we * create this in if_attach(). */ if (namelen != onamelen) { bcopy(sdl->sdl_data + onamelen, sdl->sdl_data + namelen, sdl->sdl_alen); } bcopy(new_name, sdl->sdl_data, namelen); sdl->sdl_nlen = namelen; sdl = (struct sockaddr_dl *)ifa->ifa_netmask; bzero(sdl->sdl_data, onamelen); while (namelen != 0) sdl->sdl_data[--namelen] = 0xff; IF_ADDR_WUNLOCK(ifp); EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); /* Announce the return of the interface. */ rt_ifannouncemsg(ifp, IFAN_ARRIVAL); ifp->if_flags &= ~IFF_RENAMING; break; #ifdef VIMAGE case SIOCSIFVNET: error = priv_check(td, PRIV_NET_SETIFVNET); if (error) return (error); error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid); break; #endif case SIOCSIFMETRIC: error = priv_check(td, PRIV_NET_SETIFMETRIC); if (error) return (error); ifp->if_metric = ifr->ifr_metric; getmicrotime(&ifp->if_lastchange); break; case SIOCSIFPHYS: error = priv_check(td, PRIV_NET_SETIFPHYS); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCSIFMTU: { u_long oldmtu = ifp->if_mtu; error = priv_check(td, PRIV_NET_SETIFMTU); if (error) return (error); if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) return (EINVAL); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) { getmicrotime(&ifp->if_lastchange); rt_ifmsg(ifp); } /* * If the link MTU changed, do network layer specific procedure. */ if (ifp->if_mtu != oldmtu) { #ifdef INET6 nd6_setmtu(ifp); #endif rt_updatemtu(ifp); } break; } case SIOCADDMULTI: case SIOCDELMULTI: if (cmd == SIOCADDMULTI) error = priv_check(td, PRIV_NET_ADDMULTI); else error = priv_check(td, PRIV_NET_DELMULTI); if (error) return (error); /* Don't allow group membership on non-multicast interfaces. */ if ((ifp->if_flags & IFF_MULTICAST) == 0) return (EOPNOTSUPP); /* Don't let users screw up protocols' entries. */ if (ifr->ifr_addr.sa_family != AF_LINK) return (EINVAL); if (cmd == SIOCADDMULTI) { struct ifmultiaddr *ifma; /* * Userland is only permitted to join groups once * via the if_addmulti() KPI, because it cannot hold * struct ifmultiaddr * between calls. It may also * lose a race while we check if the membership * already exists. */ IF_ADDR_RLOCK(ifp); ifma = if_findmulti(ifp, &ifr->ifr_addr); IF_ADDR_RUNLOCK(ifp); if (ifma != NULL) error = EADDRINUSE; else error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); } else { error = if_delmulti(ifp, &ifr->ifr_addr); } if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCSIFPHYADDR: case SIOCDIFPHYADDR: #ifdef INET6 case SIOCSIFPHYADDR_IN6: #endif case SIOCSIFMEDIA: case SIOCSIFGENERIC: error = priv_check(td, PRIV_NET_HWIOCTL); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCGIFSTATUS: case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: case SIOCGIFMEDIA: case SIOCGIFXMEDIA: case SIOCGIFGENERIC: case SIOCGIFRSSKEY: case SIOCGIFRSSHASH: if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); break; case SIOCSIFLLADDR: error = priv_check(td, PRIV_NET_SETLLADDR); if (error) return (error); error = if_setlladdr(ifp, ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len); break; case SIOCGHWADDR: error = if_gethwaddr(ifp, ifr); break; case SIOCAIFGROUP: { struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr; error = priv_check(td, PRIV_NET_ADDIFGROUP); if (error) return (error); if ((error = if_addgroup(ifp, ifgr->ifgr_group))) return (error); break; } case SIOCGIFGROUP: if ((error = if_getgroup((struct ifgroupreq *)ifr, ifp))) return (error); break; case SIOCDIFGROUP: { struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr; error = priv_check(td, PRIV_NET_DELIFGROUP); if (error) return (error); if ((error = if_delgroup(ifp, ifgr->ifgr_group))) return (error); break; } default: error = ENOIOCTL; break; } return (error); } /* COMPAT_SVR4 */ #define OSIOCGIFCONF _IOWR('i', 20, struct ifconf) #ifdef COMPAT_FREEBSD32 struct ifconf32 { int32_t ifc_len; union { uint32_t ifcu_buf; uint32_t ifcu_req; } ifc_ifcu; }; #define SIOCGIFCONF32 _IOWR('i', 36, struct ifconf32) #endif /* * Interface ioctls. */ int ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td) { struct ifnet *ifp; struct ifreq *ifr; int error; int oif_flags; #ifdef VIMAGE int shutdown; #endif CURVNET_SET(so->so_vnet); #ifdef VIMAGE /* Make sure the VNET is stable. */ shutdown = (so->so_vnet->vnet_state > SI_SUB_VNET && so->so_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; if (shutdown) { CURVNET_RESTORE(); return (EBUSY); } #endif switch (cmd) { case SIOCGIFCONF: case OSIOCGIFCONF: /* COMPAT_SVR4 */ error = ifconf(cmd, data); CURVNET_RESTORE(); return (error); #ifdef COMPAT_FREEBSD32 case SIOCGIFCONF32: { struct ifconf32 *ifc32; struct ifconf ifc; ifc32 = (struct ifconf32 *)data; ifc.ifc_len = ifc32->ifc_len; ifc.ifc_buf = PTRIN(ifc32->ifc_buf); error = ifconf(SIOCGIFCONF, (void *)&ifc); CURVNET_RESTORE(); if (error == 0) ifc32->ifc_len = ifc.ifc_len; return (error); } #endif } ifr = (struct ifreq *)data; switch (cmd) { #ifdef VIMAGE case SIOCSIFRVNET: error = priv_check(td, PRIV_NET_SETIFVNET); if (error == 0) error = if_vmove_reclaim(td, ifr->ifr_name, ifr->ifr_jid); CURVNET_RESTORE(); return (error); #endif case SIOCIFCREATE: case SIOCIFCREATE2: error = priv_check(td, PRIV_NET_IFCREATE); if (error == 0) error = if_clone_create(ifr->ifr_name, - sizeof(ifr->ifr_name), - cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL); + sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ? + ifr_data_get_ptr(ifr) : NULL); CURVNET_RESTORE(); return (error); case SIOCIFDESTROY: error = priv_check(td, PRIV_NET_IFDESTROY); if (error == 0) error = if_clone_destroy(ifr->ifr_name); CURVNET_RESTORE(); return (error); case SIOCIFGCLONERS: error = if_clone_list((struct if_clonereq *)data); CURVNET_RESTORE(); return (error); case SIOCGIFGMEMB: error = if_getgroupmembers((struct ifgroupreq *)data); CURVNET_RESTORE(); return (error); #if defined(INET) || defined(INET6) case SIOCSVH: case SIOCGVH: if (carp_ioctl_p == NULL) error = EPROTONOSUPPORT; else error = (*carp_ioctl_p)(ifr, cmd, td); CURVNET_RESTORE(); return (error); #endif } ifp = ifunit_ref(ifr->ifr_name); if (ifp == NULL) { CURVNET_RESTORE(); return (ENXIO); } error = ifhwioctl(cmd, ifp, data, td); if (error != ENOIOCTL) { if_rele(ifp); CURVNET_RESTORE(); return (error); } oif_flags = ifp->if_flags; if (so->so_proto == NULL) { if_rele(ifp); CURVNET_RESTORE(); return (EOPNOTSUPP); } /* * Pass the request on to the socket control method, and if the * latter returns EOPNOTSUPP, directly to the interface. * * Make an exception for the legacy SIOCSIF* requests. Drivers * trust SIOCSIFADDR et al to come from an already privileged * layer, and do not perform any credentials checks or input * validation. */ error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data, ifp, td)); if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL && cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR && cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK) error = (*ifp->if_ioctl)(ifp, cmd, data); if ((oif_flags ^ ifp->if_flags) & IFF_UP) { #ifdef INET6 if (ifp->if_flags & IFF_UP) in6_if_up(ifp); #endif } if_rele(ifp); CURVNET_RESTORE(); return (error); } /* * The code common to handling reference counted flags, * e.g., in ifpromisc() and if_allmulti(). * The "pflag" argument can specify a permanent mode flag to check, * such as IFF_PPROMISC for promiscuous mode; should be 0 if none. * * Only to be used on stack-owned flags, not driver-owned flags. */ static int if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch) { struct ifreq ifr; int error; int oldflags, oldcount; /* Sanity checks to catch programming errors */ KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0, ("%s: setting driver-owned flag %d", __func__, flag)); if (onswitch) KASSERT(*refcount >= 0, ("%s: increment negative refcount %d for flag %d", __func__, *refcount, flag)); else KASSERT(*refcount > 0, ("%s: decrement non-positive refcount %d for flag %d", __func__, *refcount, flag)); /* In case this mode is permanent, just touch refcount */ if (ifp->if_flags & pflag) { *refcount += onswitch ? 1 : -1; return (0); } /* Save ifnet parameters for if_ioctl() may fail */ oldcount = *refcount; oldflags = ifp->if_flags; /* * See if we aren't the only and touching refcount is enough. * Actually toggle interface flag if we are the first or last. */ if (onswitch) { if ((*refcount)++) return (0); ifp->if_flags |= flag; } else { if (--(*refcount)) return (0); ifp->if_flags &= ~flag; } /* Call down the driver since we've changed interface flags */ if (ifp->if_ioctl == NULL) { error = EOPNOTSUPP; goto recover; } ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); if (error) goto recover; /* Notify userland that interface flags have changed */ rt_ifmsg(ifp); return (0); recover: /* Recover after driver error */ *refcount = oldcount; ifp->if_flags = oldflags; return (error); } /* * Set/clear promiscuous mode on interface ifp based on the truth value * of pswitch. The calls are reference counted so that only the first * "on" request actually has an effect, as does the final "off" request. * Results are undefined if the "off" and "on" requests are not matched. */ int ifpromisc(struct ifnet *ifp, int pswitch) { int error; int oldflags = ifp->if_flags; error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC, &ifp->if_pcount, pswitch); /* If promiscuous mode status has changed, log a message */ if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) && log_promisc_mode_change) log(LOG_INFO, "%s: promiscuous mode %s\n", ifp->if_xname, (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled"); return (error); } /* * Return interface configuration * of system. List may be used * in later ioctl's (above) to get * other information. */ /*ARGSUSED*/ static int ifconf(u_long cmd, caddr_t data) { struct ifconf *ifc = (struct ifconf *)data; struct ifnet *ifp; struct ifaddr *ifa; struct ifreq ifr; struct sbuf *sb; int error, full = 0, valid_len, max_len; /* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */ max_len = MAXPHYS - 1; /* Prevent hostile input from being able to crash the system */ if (ifc->ifc_len <= 0) return (EINVAL); again: if (ifc->ifc_len <= max_len) { max_len = ifc->ifc_len; full = 1; } sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN); max_len = 0; valid_len = 0; IFNET_RLOCK(); TAILQ_FOREACH(ifp, &V_ifnet, if_link) { int addrs; /* * Zero the ifr_name buffer to make sure we don't * disclose the contents of the stack. */ memset(ifr.ifr_name, 0, sizeof(ifr.ifr_name)); if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) >= sizeof(ifr.ifr_name)) { sbuf_delete(sb); IFNET_RUNLOCK(); return (ENAMETOOLONG); } addrs = 0; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct sockaddr *sa = ifa->ifa_addr; if (prison_if(curthread->td_ucred, sa) != 0) continue; addrs++; /* COMPAT_SVR4 */ if (cmd == OSIOCGIFCONF) { struct osockaddr *osa = (struct osockaddr *)&ifr.ifr_addr; ifr.ifr_addr = *sa; osa->sa_family = sa->sa_family; sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); } else if (sa->sa_len <= sizeof(*sa)) { ifr.ifr_addr = *sa; sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); } else { sbuf_bcat(sb, &ifr, offsetof(struct ifreq, ifr_addr)); max_len += offsetof(struct ifreq, ifr_addr); sbuf_bcat(sb, sa, sa->sa_len); max_len += sa->sa_len; } if (sbuf_error(sb) == 0) valid_len = sbuf_len(sb); } IF_ADDR_RUNLOCK(ifp); if (addrs == 0) { bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr)); sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); if (sbuf_error(sb) == 0) valid_len = sbuf_len(sb); } } IFNET_RUNLOCK(); /* * If we didn't allocate enough space (uncommon), try again. If * we have already allocated as much space as we are allowed, * return what we've got. */ if (valid_len != max_len && !full) { sbuf_delete(sb); goto again; } ifc->ifc_len = valid_len; sbuf_finish(sb); error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len); sbuf_delete(sb); return (error); } /* * Just like ifpromisc(), but for all-multicast-reception mode. */ int if_allmulti(struct ifnet *ifp, int onswitch) { return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch)); } struct ifmultiaddr * if_findmulti(struct ifnet *ifp, const struct sockaddr *sa) { struct ifmultiaddr *ifma; IF_ADDR_LOCK_ASSERT(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (sa->sa_family == AF_LINK) { if (sa_dl_equal(ifma->ifma_addr, sa)) break; } else { if (sa_equal(ifma->ifma_addr, sa)) break; } } return ifma; } /* * Allocate a new ifmultiaddr and initialize based on passed arguments. We * make copies of passed sockaddrs. The ifmultiaddr will not be added to * the ifnet multicast address list here, so the caller must do that and * other setup work (such as notifying the device driver). The reference * count is initialized to 1. */ static struct ifmultiaddr * if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa, int mflags) { struct ifmultiaddr *ifma; struct sockaddr *dupsa; ifma = malloc(sizeof *ifma, M_IFMADDR, mflags | M_ZERO); if (ifma == NULL) return (NULL); dupsa = malloc(sa->sa_len, M_IFMADDR, mflags); if (dupsa == NULL) { free(ifma, M_IFMADDR); return (NULL); } bcopy(sa, dupsa, sa->sa_len); ifma->ifma_addr = dupsa; ifma->ifma_ifp = ifp; ifma->ifma_refcount = 1; ifma->ifma_protospec = NULL; if (llsa == NULL) { ifma->ifma_lladdr = NULL; return (ifma); } dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags); if (dupsa == NULL) { free(ifma->ifma_addr, M_IFMADDR); free(ifma, M_IFMADDR); return (NULL); } bcopy(llsa, dupsa, llsa->sa_len); ifma->ifma_lladdr = dupsa; return (ifma); } /* * if_freemulti: free ifmultiaddr structure and possibly attached related * addresses. The caller is responsible for implementing reference * counting, notifying the driver, handling routing messages, and releasing * any dependent link layer state. */ static void if_freemulti(struct ifmultiaddr *ifma) { KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d", ifma->ifma_refcount)); if (ifma->ifma_lladdr != NULL) free(ifma->ifma_lladdr, M_IFMADDR); free(ifma->ifma_addr, M_IFMADDR); free(ifma, M_IFMADDR); } /* * Register an additional multicast address with a network interface. * * - If the address is already present, bump the reference count on the * address and return. * - If the address is not link-layer, look up a link layer address. * - Allocate address structures for one or both addresses, and attach to the * multicast address list on the interface. If automatically adding a link * layer address, the protocol address will own a reference to the link * layer address, to be freed when it is freed. * - Notify the network device driver of an addition to the multicast address * list. * * 'sa' points to caller-owned memory with the desired multicast address. * * 'retifma' will be used to return a pointer to the resulting multicast * address reference, if desired. */ int if_addmulti(struct ifnet *ifp, struct sockaddr *sa, struct ifmultiaddr **retifma) { struct ifmultiaddr *ifma, *ll_ifma; struct sockaddr *llsa; struct sockaddr_dl sdl; int error; /* * If the address is already present, return a new reference to it; * otherwise, allocate storage and set up a new address. */ IF_ADDR_WLOCK(ifp); ifma = if_findmulti(ifp, sa); if (ifma != NULL) { ifma->ifma_refcount++; if (retifma != NULL) *retifma = ifma; IF_ADDR_WUNLOCK(ifp); return (0); } /* * The address isn't already present; resolve the protocol address * into a link layer address, and then look that up, bump its * refcount or allocate an ifma for that also. * Most link layer resolving functions returns address data which * fits inside default sockaddr_dl structure. However callback * can allocate another sockaddr structure, in that case we need to * free it later. */ llsa = NULL; ll_ifma = NULL; if (ifp->if_resolvemulti != NULL) { /* Provide called function with buffer size information */ sdl.sdl_len = sizeof(sdl); llsa = (struct sockaddr *)&sdl; error = ifp->if_resolvemulti(ifp, &llsa, sa); if (error) goto unlock_out; } /* * Allocate the new address. Don't hook it up yet, as we may also * need to allocate a link layer multicast address. */ ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT); if (ifma == NULL) { error = ENOMEM; goto free_llsa_out; } /* * If a link layer address is found, we'll need to see if it's * already present in the address list, or allocate is as well. * When this block finishes, the link layer address will be on the * list. */ if (llsa != NULL) { ll_ifma = if_findmulti(ifp, llsa); if (ll_ifma == NULL) { ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT); if (ll_ifma == NULL) { --ifma->ifma_refcount; if_freemulti(ifma); error = ENOMEM; goto free_llsa_out; } TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma, ifma_link); } else ll_ifma->ifma_refcount++; ifma->ifma_llifma = ll_ifma; } /* * We now have a new multicast address, ifma, and possibly a new or * referenced link layer address. Add the primary address to the * ifnet address list. */ TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); if (retifma != NULL) *retifma = ifma; /* * Must generate the message while holding the lock so that 'ifma' * pointer is still valid. */ rt_newmaddrmsg(RTM_NEWMADDR, ifma); IF_ADDR_WUNLOCK(ifp); /* * We are certain we have added something, so call down to the * interface to let them know about it. */ if (ifp->if_ioctl != NULL) { (void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0); } if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl)) link_free_sdl(llsa); return (0); free_llsa_out: if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl)) link_free_sdl(llsa); unlock_out: IF_ADDR_WUNLOCK(ifp); return (error); } /* * Delete a multicast group membership by network-layer group address. * * Returns ENOENT if the entry could not be found. If ifp no longer * exists, results are undefined. This entry point should only be used * from subsystems which do appropriate locking to hold ifp for the * duration of the call. * Network-layer protocol domains must use if_delmulti_ifma(). */ int if_delmulti(struct ifnet *ifp, struct sockaddr *sa) { struct ifmultiaddr *ifma; int lastref; #ifdef INVARIANTS struct ifnet *oifp; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(oifp, &V_ifnet, if_link) if (ifp == oifp) break; if (ifp != oifp) ifp = NULL; IFNET_RUNLOCK_NOSLEEP(); KASSERT(ifp != NULL, ("%s: ifnet went away", __func__)); #endif if (ifp == NULL) return (ENOENT); IF_ADDR_WLOCK(ifp); lastref = 0; ifma = if_findmulti(ifp, sa); if (ifma != NULL) lastref = if_delmulti_locked(ifp, ifma, 0); IF_ADDR_WUNLOCK(ifp); if (ifma == NULL) return (ENOENT); if (lastref && ifp->if_ioctl != NULL) { (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0); } return (0); } /* * Delete all multicast group membership for an interface. * Should be used to quickly flush all multicast filters. */ void if_delallmulti(struct ifnet *ifp) { struct ifmultiaddr *ifma; struct ifmultiaddr *next; IF_ADDR_WLOCK(ifp); TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) if_delmulti_locked(ifp, ifma, 0); IF_ADDR_WUNLOCK(ifp); } /* * Delete a multicast group membership by group membership pointer. * Network-layer protocol domains must use this routine. * * It is safe to call this routine if the ifp disappeared. */ void if_delmulti_ifma(struct ifmultiaddr *ifma) { struct ifnet *ifp; int lastref; ifp = ifma->ifma_ifp; #ifdef DIAGNOSTIC if (ifp == NULL) { printf("%s: ifma_ifp seems to be detached\n", __func__); } else { struct ifnet *oifp; IFNET_RLOCK_NOSLEEP(); TAILQ_FOREACH(oifp, &V_ifnet, if_link) if (ifp == oifp) break; if (ifp != oifp) { printf("%s: ifnet %p disappeared\n", __func__, ifp); ifp = NULL; } IFNET_RUNLOCK_NOSLEEP(); } #endif /* * If and only if the ifnet instance exists: Acquire the address lock. */ if (ifp != NULL) IF_ADDR_WLOCK(ifp); lastref = if_delmulti_locked(ifp, ifma, 0); if (ifp != NULL) { /* * If and only if the ifnet instance exists: * Release the address lock. * If the group was left: update the hardware hash filter. */ IF_ADDR_WUNLOCK(ifp); if (lastref && ifp->if_ioctl != NULL) { (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0); } } } /* * Perform deletion of network-layer and/or link-layer multicast address. * * Return 0 if the reference count was decremented. * Return 1 if the final reference was released, indicating that the * hardware hash filter should be reprogrammed. */ static int if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching) { struct ifmultiaddr *ll_ifma; if (ifp != NULL && ifma->ifma_ifp != NULL) { KASSERT(ifma->ifma_ifp == ifp, ("%s: inconsistent ifp %p", __func__, ifp)); IF_ADDR_WLOCK_ASSERT(ifp); } ifp = ifma->ifma_ifp; /* * If the ifnet is detaching, null out references to ifnet, * so that upper protocol layers will notice, and not attempt * to obtain locks for an ifnet which no longer exists. The * routing socket announcement must happen before the ifnet * instance is detached from the system. */ if (detaching) { #ifdef DIAGNOSTIC printf("%s: detaching ifnet instance %p\n", __func__, ifp); #endif /* * ifp may already be nulled out if we are being reentered * to delete the ll_ifma. */ if (ifp != NULL) { rt_newmaddrmsg(RTM_DELMADDR, ifma); ifma->ifma_ifp = NULL; } } if (--ifma->ifma_refcount > 0) return 0; /* * If this ifma is a network-layer ifma, a link-layer ifma may * have been associated with it. Release it first if so. */ ll_ifma = ifma->ifma_llifma; if (ll_ifma != NULL) { KASSERT(ifma->ifma_lladdr != NULL, ("%s: llifma w/o lladdr", __func__)); if (detaching) ll_ifma->ifma_ifp = NULL; /* XXX */ if (--ll_ifma->ifma_refcount == 0) { if (ifp != NULL) { TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifma_link); } if_freemulti(ll_ifma); } } if (ifp != NULL) TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); if_freemulti(ifma); /* * The last reference to this instance of struct ifmultiaddr * was released; the hardware should be notified of this change. */ return 1; } /* * Set the link layer address on an interface. * * At this time we only support certain types of interfaces, * and we don't allow the length of the address to change. * * Set noinline to be dtrace-friendly */ __noinline int if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) { struct sockaddr_dl *sdl; struct ifaddr *ifa; struct ifreq ifr; IF_ADDR_RLOCK(ifp); ifa = ifp->if_addr; if (ifa == NULL) { IF_ADDR_RUNLOCK(ifp); return (EINVAL); } ifa_ref(ifa); IF_ADDR_RUNLOCK(ifp); sdl = (struct sockaddr_dl *)ifa->ifa_addr; if (sdl == NULL) { ifa_free(ifa); return (EINVAL); } if (len != sdl->sdl_alen) { /* don't allow length to change */ ifa_free(ifa); return (EINVAL); } switch (ifp->if_type) { case IFT_ETHER: case IFT_FDDI: case IFT_XETHER: case IFT_ISO88025: case IFT_L2VLAN: case IFT_BRIDGE: case IFT_ARCNET: case IFT_IEEE8023ADLAG: case IFT_IEEE80211: bcopy(lladdr, LLADDR(sdl), len); ifa_free(ifa); break; default: ifa_free(ifa); return (ENODEV); } /* * If the interface is already up, we need * to re-init it in order to reprogram its * address filter. */ if ((ifp->if_flags & IFF_UP) != 0) { if (ifp->if_ioctl) { ifp->if_flags &= ~IFF_UP; ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); ifp->if_flags |= IFF_UP; ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); } } EVENTHANDLER_INVOKE(iflladdr_event, ifp); return (0); } /* * Compat function for handling basic encapsulation requests. * Not converted stacks (FDDI, IB, ..) supports traditional * output model: ARP (and other similar L2 protocols) are handled * inside output routine, arpresolve/nd6_resolve() returns MAC * address instead of full prepend. * * This function creates calculated header==MAC for IPv4/IPv6 and * returns EAFNOSUPPORT (which is then handled in ARP code) for other * address families. */ static int if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req) { if (req->rtype != IFENCAP_LL) return (EOPNOTSUPP); if (req->bufsize < req->lladdr_len) return (ENOMEM); switch (req->family) { case AF_INET: case AF_INET6: break; default: return (EAFNOSUPPORT); } /* Copy lladdr to storage as is */ memmove(req->buf, req->lladdr, req->lladdr_len); req->bufsize = req->lladdr_len; req->lladdr_off = 0; return (0); } /* * Get the link layer address that was read from the hardware at attach. * * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type * their component interfaces as IFT_IEEE8023ADLAG. */ int if_gethwaddr(struct ifnet *ifp, struct ifreq *ifr) { if (ifp->if_hw_addr == NULL) return (ENODEV); switch (ifp->if_type) { case IFT_ETHER: case IFT_IEEE8023ADLAG: bcopy(ifp->if_hw_addr, ifr->ifr_addr.sa_data, ifp->if_addrlen); return (0); default: return (ENODEV); } } /* * The name argument must be a pointer to storage which will last as * long as the interface does. For physical devices, the result of * device_get_name(dev) is a good choice and for pseudo-devices a * static string works well. */ void if_initname(struct ifnet *ifp, const char *name, int unit) { ifp->if_dname = name; ifp->if_dunit = unit; if (unit != IF_DUNIT_NONE) snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit); else strlcpy(ifp->if_xname, name, IFNAMSIZ); } int if_printf(struct ifnet *ifp, const char * fmt, ...) { va_list ap; int retval; retval = printf("%s: ", ifp->if_xname); va_start(ap, fmt); retval += vprintf(fmt, ap); va_end(ap); return (retval); } void if_start(struct ifnet *ifp) { (*(ifp)->if_start)(ifp); } /* * Backwards compatibility interface for drivers * that have not implemented it */ static int if_transmit(struct ifnet *ifp, struct mbuf *m) { int error; IFQ_HANDOFF(ifp, m, error); return (error); } static void if_input_default(struct ifnet *ifp __unused, struct mbuf *m) { m_freem(m); } int if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust) { int active = 0; IF_LOCK(ifq); if (_IF_QFULL(ifq)) { IF_UNLOCK(ifq); if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); m_freem(m); return (0); } if (ifp != NULL) { if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust); if (m->m_flags & (M_BCAST|M_MCAST)) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); active = ifp->if_drv_flags & IFF_DRV_OACTIVE; } _IF_ENQUEUE(ifq, m); IF_UNLOCK(ifq); if (ifp != NULL && !active) (*(ifp)->if_start)(ifp); return (1); } void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f) { KASSERT(if_com_alloc[type] == NULL, ("if_register_com_alloc: %d already registered", type)); KASSERT(if_com_free[type] == NULL, ("if_register_com_alloc: %d free already registered", type)); if_com_alloc[type] = a; if_com_free[type] = f; } void if_deregister_com_alloc(u_char type) { KASSERT(if_com_alloc[type] != NULL, ("if_deregister_com_alloc: %d not registered", type)); KASSERT(if_com_free[type] != NULL, ("if_deregister_com_alloc: %d free not registered", type)); if_com_alloc[type] = NULL; if_com_free[type] = NULL; } /* API for driver access to network stack owned ifnet.*/ uint64_t if_setbaudrate(struct ifnet *ifp, uint64_t baudrate) { uint64_t oldbrate; oldbrate = ifp->if_baudrate; ifp->if_baudrate = baudrate; return (oldbrate); } uint64_t if_getbaudrate(if_t ifp) { return (((struct ifnet *)ifp)->if_baudrate); } int if_setcapabilities(if_t ifp, int capabilities) { ((struct ifnet *)ifp)->if_capabilities = capabilities; return (0); } int if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit) { ((struct ifnet *)ifp)->if_capabilities |= setbit; ((struct ifnet *)ifp)->if_capabilities &= ~clearbit; return (0); } int if_getcapabilities(if_t ifp) { return ((struct ifnet *)ifp)->if_capabilities; } int if_setcapenable(if_t ifp, int capabilities) { ((struct ifnet *)ifp)->if_capenable = capabilities; return (0); } int if_setcapenablebit(if_t ifp, int setcap, int clearcap) { if(setcap) ((struct ifnet *)ifp)->if_capenable |= setcap; if(clearcap) ((struct ifnet *)ifp)->if_capenable &= ~clearcap; return (0); } const char * if_getdname(if_t ifp) { return ((struct ifnet *)ifp)->if_dname; } int if_togglecapenable(if_t ifp, int togglecap) { ((struct ifnet *)ifp)->if_capenable ^= togglecap; return (0); } int if_getcapenable(if_t ifp) { return ((struct ifnet *)ifp)->if_capenable; } /* * This is largely undesirable because it ties ifnet to a device, but does * provide flexiblity for an embedded product vendor. Should be used with * the understanding that it violates the interface boundaries, and should be * a last resort only. */ int if_setdev(if_t ifp, void *dev) { return (0); } int if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags) { ((struct ifnet *)ifp)->if_drv_flags |= set_flags; ((struct ifnet *)ifp)->if_drv_flags &= ~clear_flags; return (0); } int if_getdrvflags(if_t ifp) { return ((struct ifnet *)ifp)->if_drv_flags; } int if_setdrvflags(if_t ifp, int flags) { ((struct ifnet *)ifp)->if_drv_flags = flags; return (0); } int if_setflags(if_t ifp, int flags) { ((struct ifnet *)ifp)->if_flags = flags; return (0); } int if_setflagbits(if_t ifp, int set, int clear) { ((struct ifnet *)ifp)->if_flags |= set; ((struct ifnet *)ifp)->if_flags &= ~clear; return (0); } int if_getflags(if_t ifp) { return ((struct ifnet *)ifp)->if_flags; } int if_clearhwassist(if_t ifp) { ((struct ifnet *)ifp)->if_hwassist = 0; return (0); } int if_sethwassistbits(if_t ifp, int toset, int toclear) { ((struct ifnet *)ifp)->if_hwassist |= toset; ((struct ifnet *)ifp)->if_hwassist &= ~toclear; return (0); } int if_sethwassist(if_t ifp, int hwassist_bit) { ((struct ifnet *)ifp)->if_hwassist = hwassist_bit; return (0); } int if_gethwassist(if_t ifp) { return ((struct ifnet *)ifp)->if_hwassist; } int if_setmtu(if_t ifp, int mtu) { ((struct ifnet *)ifp)->if_mtu = mtu; return (0); } int if_getmtu(if_t ifp) { return ((struct ifnet *)ifp)->if_mtu; } int if_getmtu_family(if_t ifp, int family) { struct domain *dp; for (dp = domains; dp; dp = dp->dom_next) { if (dp->dom_family == family && dp->dom_ifmtu != NULL) return (dp->dom_ifmtu((struct ifnet *)ifp)); } return (((struct ifnet *)ifp)->if_mtu); } int if_setsoftc(if_t ifp, void *softc) { ((struct ifnet *)ifp)->if_softc = softc; return (0); } void * if_getsoftc(if_t ifp) { return ((struct ifnet *)ifp)->if_softc; } void if_setrcvif(struct mbuf *m, if_t ifp) { m->m_pkthdr.rcvif = (struct ifnet *)ifp; } void if_setvtag(struct mbuf *m, uint16_t tag) { m->m_pkthdr.ether_vtag = tag; } uint16_t if_getvtag(struct mbuf *m) { return (m->m_pkthdr.ether_vtag); } int if_sendq_empty(if_t ifp) { return IFQ_DRV_IS_EMPTY(&((struct ifnet *)ifp)->if_snd); } struct ifaddr * if_getifaddr(if_t ifp) { return ((struct ifnet *)ifp)->if_addr; } int if_getamcount(if_t ifp) { return ((struct ifnet *)ifp)->if_amcount; } int if_setsendqready(if_t ifp) { IFQ_SET_READY(&((struct ifnet *)ifp)->if_snd); return (0); } int if_setsendqlen(if_t ifp, int tx_desc_count) { IFQ_SET_MAXLEN(&((struct ifnet *)ifp)->if_snd, tx_desc_count); ((struct ifnet *)ifp)->if_snd.ifq_drv_maxlen = tx_desc_count; return (0); } int if_vlantrunkinuse(if_t ifp) { return ((struct ifnet *)ifp)->if_vlantrunk != NULL?1:0; } int if_input(if_t ifp, struct mbuf* sendmp) { (*((struct ifnet *)ifp)->if_input)((struct ifnet *)ifp, sendmp); return (0); } /* XXX */ #ifndef ETH_ADDR_LEN #define ETH_ADDR_LEN 6 #endif int if_setupmultiaddr(if_t ifp, void *mta, int *cnt, int max) { struct ifmultiaddr *ifma; uint8_t *lmta = (uint8_t *)mta; int mcnt = 0; TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == max) break; bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), &lmta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); mcnt++; } *cnt = mcnt; return (0); } int if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max) { int error; if_maddr_rlock(ifp); error = if_setupmultiaddr(ifp, mta, cnt, max); if_maddr_runlock(ifp); return (error); } int if_multiaddr_count(if_t ifp, int max) { struct ifmultiaddr *ifma; int count; count = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; count++; if (count == max) break; } if_maddr_runlock(ifp); return (count); } int if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg) { struct ifmultiaddr *ifma; int cnt = 0; if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) cnt += filter(arg, ifma, cnt); if_maddr_runlock(ifp); return (cnt); } struct mbuf * if_dequeue(if_t ifp) { struct mbuf *m; IFQ_DRV_DEQUEUE(&((struct ifnet *)ifp)->if_snd, m); return (m); } int if_sendq_prepend(if_t ifp, struct mbuf *m) { IFQ_DRV_PREPEND(&((struct ifnet *)ifp)->if_snd, m); return (0); } int if_setifheaderlen(if_t ifp, int len) { ((struct ifnet *)ifp)->if_hdrlen = len; return (0); } caddr_t if_getlladdr(if_t ifp) { return (IF_LLADDR((struct ifnet *)ifp)); } void * if_gethandle(u_char type) { return (if_alloc(type)); } void if_bpfmtap(if_t ifh, struct mbuf *m) { struct ifnet *ifp = (struct ifnet *)ifh; BPF_MTAP(ifp, m); } void if_etherbpfmtap(if_t ifh, struct mbuf *m) { struct ifnet *ifp = (struct ifnet *)ifh; ETHER_BPF_MTAP(ifp, m); } void if_vlancap(if_t ifh) { struct ifnet *ifp = (struct ifnet *)ifh; VLAN_CAPABILITIES(ifp); } void if_setinitfn(if_t ifp, void (*init_fn)(void *)) { ((struct ifnet *)ifp)->if_init = init_fn; } void if_setioctlfn(if_t ifp, int (*ioctl_fn)(if_t, u_long, caddr_t)) { ((struct ifnet *)ifp)->if_ioctl = (void *)ioctl_fn; } void if_setstartfn(if_t ifp, void (*start_fn)(if_t)) { ((struct ifnet *)ifp)->if_start = (void *)start_fn; } void if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn) { ((struct ifnet *)ifp)->if_transmit = start_fn; } void if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn) { ((struct ifnet *)ifp)->if_qflush = flush_fn; } void if_setgetcounterfn(if_t ifp, if_get_counter_t fn) { ifp->if_get_counter = fn; } /* Revisit these - These are inline functions originally. */ int drbr_inuse_drv(if_t ifh, struct buf_ring *br) { return drbr_inuse(ifh, br); } struct mbuf* drbr_dequeue_drv(if_t ifh, struct buf_ring *br) { return drbr_dequeue(ifh, br); } int drbr_needs_enqueue_drv(if_t ifh, struct buf_ring *br) { return drbr_needs_enqueue(ifh, br); } int drbr_enqueue_drv(if_t ifh, struct buf_ring *br, struct mbuf *m) { return drbr_enqueue(ifh, br, m); } Index: stable/11/sys/net/if.h =================================================================== --- stable/11/sys/net/if.h (revision 332287) +++ stable/11/sys/net/if.h (revision 332288) @@ -1,588 +1,590 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #ifndef _NET_IF_H_ #define _NET_IF_H_ #include #if __BSD_VISIBLE /* * does not depend on on most other systems. This * helps userland compatibility. (struct timeval ifi_lastchange) * The same holds for . (struct sockaddr ifru_addr) */ #ifndef _KERNEL #include #include #endif #endif /* * Length of interface external name, including terminating '\0'. * Note: this is the same size as a generic device's external name. */ #define IF_NAMESIZE 16 #if __BSD_VISIBLE #define IFNAMSIZ IF_NAMESIZE #define IF_MAXUNIT 0x7fff /* historical value */ #endif #if __BSD_VISIBLE /* * Structure used to query names of interface cloners. */ struct if_clonereq { int ifcr_total; /* total cloners (out) */ int ifcr_count; /* room for this many in user buffer */ char *ifcr_buffer; /* buffer for cloner names */ }; /* * Structure describing information about an interface * which may be of interest to management entities. */ struct if_data { /* generic interface information */ uint8_t ifi_type; /* ethernet, tokenring, etc */ uint8_t ifi_physical; /* e.g., AUI, Thinnet, 10base-T, etc */ uint8_t ifi_addrlen; /* media address length */ uint8_t ifi_hdrlen; /* media header length */ uint8_t ifi_link_state; /* current link state */ uint8_t ifi_vhid; /* carp vhid */ uint16_t ifi_datalen; /* length of this data struct */ uint32_t ifi_mtu; /* maximum transmission unit */ uint32_t ifi_metric; /* routing metric (external only) */ uint64_t ifi_baudrate; /* linespeed */ /* volatile statistics */ uint64_t ifi_ipackets; /* packets received on interface */ uint64_t ifi_ierrors; /* input errors on interface */ uint64_t ifi_opackets; /* packets sent on interface */ uint64_t ifi_oerrors; /* output errors on interface */ uint64_t ifi_collisions; /* collisions on csma interfaces */ uint64_t ifi_ibytes; /* total number of octets received */ uint64_t ifi_obytes; /* total number of octets sent */ uint64_t ifi_imcasts; /* packets received via multicast */ uint64_t ifi_omcasts; /* packets sent via multicast */ uint64_t ifi_iqdrops; /* dropped on input */ uint64_t ifi_oqdrops; /* dropped on output */ uint64_t ifi_noproto; /* destined for unsupported protocol */ uint64_t ifi_hwassist; /* HW offload capabilities, see IFCAP */ /* Unions are here to make sizes MI. */ union { /* uptime at attach or stat reset */ time_t tt; uint64_t ph; } __ifi_epoch; #define ifi_epoch __ifi_epoch.tt union { /* time of last administrative change */ struct timeval tv; struct { uint64_t ph1; uint64_t ph2; } ph; } __ifi_lastchange; #define ifi_lastchange __ifi_lastchange.tv }; /*- * Interface flags are of two types: network stack owned flags, and driver * owned flags. Historically, these values were stored in the same ifnet * flags field, but with the advent of fine-grained locking, they have been * broken out such that the network stack is responsible for synchronizing * the stack-owned fields, and the device driver the device-owned fields. * Both halves can perform lockless reads of the other half's field, subject * to accepting the involved races. * * Both sets of flags come from the same number space, and should not be * permitted to conflict, as they are exposed to user space via a single * field. * * The following symbols identify read and write requirements for fields: * * (i) if_flags field set by device driver before attach, read-only there * after. * (n) if_flags field written only by the network stack, read by either the * stack or driver. * (d) if_drv_flags field written only by the device driver, read by either * the stack or driver. */ #define IFF_UP 0x1 /* (n) interface is up */ #define IFF_BROADCAST 0x2 /* (i) broadcast address valid */ #define IFF_DEBUG 0x4 /* (n) turn on debugging */ #define IFF_LOOPBACK 0x8 /* (i) is a loopback net */ #define IFF_POINTOPOINT 0x10 /* (i) is a point-to-point link */ /* 0x20 was IFF_SMART */ #define IFF_DRV_RUNNING 0x40 /* (d) resources allocated */ #define IFF_NOARP 0x80 /* (n) no address resolution protocol */ #define IFF_PROMISC 0x100 /* (n) receive all packets */ #define IFF_ALLMULTI 0x200 /* (n) receive all multicast packets */ #define IFF_DRV_OACTIVE 0x400 /* (d) tx hardware queue is full */ #define IFF_SIMPLEX 0x800 /* (i) can't hear own transmissions */ #define IFF_LINK0 0x1000 /* per link layer defined bit */ #define IFF_LINK1 0x2000 /* per link layer defined bit */ #define IFF_LINK2 0x4000 /* per link layer defined bit */ #define IFF_ALTPHYS IFF_LINK2 /* use alternate physical connection */ #define IFF_MULTICAST 0x8000 /* (i) supports multicast */ #define IFF_CANTCONFIG 0x10000 /* (i) unconfigurable using ioctl(2) */ #define IFF_PPROMISC 0x20000 /* (n) user-requested promisc mode */ #define IFF_MONITOR 0x40000 /* (n) user-requested monitor mode */ #define IFF_STATICARP 0x80000 /* (n) static ARP */ #define IFF_DYING 0x200000 /* (n) interface is winding down */ #define IFF_RENAMING 0x400000 /* (n) interface is being renamed */ /* * Old names for driver flags so that user space tools can continue to use * the old (portable) names. */ #ifndef _KERNEL #define IFF_RUNNING IFF_DRV_RUNNING #define IFF_OACTIVE IFF_DRV_OACTIVE #endif /* flags set internally only: */ #define IFF_CANTCHANGE \ (IFF_BROADCAST|IFF_POINTOPOINT|IFF_DRV_RUNNING|IFF_DRV_OACTIVE|\ IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI|IFF_PROMISC|\ IFF_DYING|IFF_CANTCONFIG) /* * Values for if_link_state. */ #define LINK_STATE_UNKNOWN 0 /* link invalid/unknown */ #define LINK_STATE_DOWN 1 /* link is down */ #define LINK_STATE_UP 2 /* link is up */ /* * Some convenience macros used for setting ifi_baudrate. * XXX 1000 vs. 1024? --thorpej@netbsd.org */ #define IF_Kbps(x) ((uintmax_t)(x) * 1000) /* kilobits/sec. */ #define IF_Mbps(x) (IF_Kbps((x) * 1000)) /* megabits/sec. */ #define IF_Gbps(x) (IF_Mbps((x) * 1000)) /* gigabits/sec. */ /* * Capabilities that interfaces can advertise. * * struct ifnet.if_capabilities * contains the optional features & capabilities a particular interface * supports (not only the driver but also the detected hw revision). * Capabilities are defined by IFCAP_* below. * struct ifnet.if_capenable * contains the enabled (either by default or through ifconfig) optional * features & capabilities on this interface. * Capabilities are defined by IFCAP_* below. * struct if_data.ifi_hwassist in mbuf CSUM_ flag form, controlled by above * contains the enabled optional feature & capabilites that can be used * individually per packet and are specified in the mbuf pkthdr.csum_flags * field. IFCAP_* and CSUM_* do not match one to one and CSUM_* may be * more detailed or differenciated than IFCAP_*. * Hwassist features are defined CSUM_* in sys/mbuf.h * * Capabilities that cannot be arbitrarily changed with ifconfig/ioctl * are listed in IFCAP_CANTCHANGE, similar to IFF_CANTCHANGE. * This is not strictly necessary because the common code never * changes capabilities, and it is left to the individual driver * to do the right thing. However, having the filter here * avoids replication of the same code in all individual drivers. */ #define IFCAP_RXCSUM 0x00001 /* can offload checksum on RX */ #define IFCAP_TXCSUM 0x00002 /* can offload checksum on TX */ #define IFCAP_NETCONS 0x00004 /* can be a network console */ #define IFCAP_VLAN_MTU 0x00008 /* VLAN-compatible MTU */ #define IFCAP_VLAN_HWTAGGING 0x00010 /* hardware VLAN tag support */ #define IFCAP_JUMBO_MTU 0x00020 /* 9000 byte MTU supported */ #define IFCAP_POLLING 0x00040 /* driver supports polling */ #define IFCAP_VLAN_HWCSUM 0x00080 /* can do IFCAP_HWCSUM on VLANs */ #define IFCAP_TSO4 0x00100 /* can do TCP Segmentation Offload */ #define IFCAP_TSO6 0x00200 /* can do TCP6 Segmentation Offload */ #define IFCAP_LRO 0x00400 /* can do Large Receive Offload */ #define IFCAP_WOL_UCAST 0x00800 /* wake on any unicast frame */ #define IFCAP_WOL_MCAST 0x01000 /* wake on any multicast frame */ #define IFCAP_WOL_MAGIC 0x02000 /* wake on any Magic Packet */ #define IFCAP_TOE4 0x04000 /* interface can offload TCP */ #define IFCAP_TOE6 0x08000 /* interface can offload TCP6 */ #define IFCAP_VLAN_HWFILTER 0x10000 /* interface hw can filter vlan tag */ #define IFCAP_POLLING_NOCOUNT 0x20000 /* polling ticks cannot be fragmented */ #define IFCAP_VLAN_HWTSO 0x40000 /* can do IFCAP_TSO on VLANs */ #define IFCAP_LINKSTATE 0x80000 /* the runtime link state is dynamic */ #define IFCAP_NETMAP 0x100000 /* netmap mode supported/enabled */ #define IFCAP_RXCSUM_IPV6 0x200000 /* can offload checksum on IPv6 RX */ #define IFCAP_TXCSUM_IPV6 0x400000 /* can offload checksum on IPv6 TX */ #define IFCAP_HWSTATS 0x800000 /* manages counters internally */ #define IFCAP_HWCSUM_IPV6 (IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6) #define IFCAP_HWCSUM (IFCAP_RXCSUM | IFCAP_TXCSUM) #define IFCAP_TSO (IFCAP_TSO4 | IFCAP_TSO6) #define IFCAP_WOL (IFCAP_WOL_UCAST | IFCAP_WOL_MCAST | IFCAP_WOL_MAGIC) #define IFCAP_TOE (IFCAP_TOE4 | IFCAP_TOE6) #define IFCAP_CANTCHANGE (IFCAP_NETMAP) #define IFQ_MAXLEN 50 #define IFNET_SLOWHZ 1 /* granularity is 1 second */ /* * Message format for use in obtaining information about interfaces * from getkerninfo and the routing socket * For the new, extensible interface see struct if_msghdrl below. */ struct if_msghdr { u_short ifm_msglen; /* to skip over non-understood messages */ u_char ifm_version; /* future binary compatibility */ u_char ifm_type; /* message type */ int ifm_addrs; /* like rtm_addrs */ int ifm_flags; /* value of if_flags */ u_short ifm_index; /* index for associated ifp */ struct if_data ifm_data;/* statistics and other data about if */ }; /* * The 'l' version shall be used by new interfaces, like NET_RT_IFLISTL. It is * extensible after ifm_data_off or within ifm_data. Both the if_msghdr and * if_data now have a member field detailing the struct length in addition to * the routing message length. Macros are provided to find the start of * ifm_data and the start of the socket address strucutres immediately following * struct if_msghdrl given a pointer to struct if_msghdrl. */ #define IF_MSGHDRL_IFM_DATA(_l) \ (struct if_data *)((char *)(_l) + (_l)->ifm_data_off) #define IF_MSGHDRL_RTA(_l) \ (void *)((uintptr_t)(_l) + (_l)->ifm_len) struct if_msghdrl { u_short ifm_msglen; /* to skip over non-understood messages */ u_char ifm_version; /* future binary compatibility */ u_char ifm_type; /* message type */ int ifm_addrs; /* like rtm_addrs */ int ifm_flags; /* value of if_flags */ u_short ifm_index; /* index for associated ifp */ u_short _ifm_spare1; /* spare space to grow if_index, see if_var.h */ u_short ifm_len; /* length of if_msghdrl incl. if_data */ u_short ifm_data_off; /* offset of if_data from beginning */ struct if_data ifm_data;/* statistics and other data about if */ }; /* * Message format for use in obtaining information about interface addresses * from getkerninfo and the routing socket * For the new, extensible interface see struct ifa_msghdrl below. */ struct ifa_msghdr { u_short ifam_msglen; /* to skip over non-understood messages */ u_char ifam_version; /* future binary compatibility */ u_char ifam_type; /* message type */ int ifam_addrs; /* like rtm_addrs */ int ifam_flags; /* value of ifa_flags */ u_short ifam_index; /* index for associated ifp */ int ifam_metric; /* value of ifa_ifp->if_metric */ }; /* * The 'l' version shall be used by new interfaces, like NET_RT_IFLISTL. It is * extensible after ifam_metric or within ifam_data. Both the ifa_msghdrl and * if_data now have a member field detailing the struct length in addition to * the routing message length. Macros are provided to find the start of * ifm_data and the start of the socket address strucutres immediately following * struct ifa_msghdrl given a pointer to struct ifa_msghdrl. */ #define IFA_MSGHDRL_IFAM_DATA(_l) \ (struct if_data *)((char *)(_l) + (_l)->ifam_data_off) #define IFA_MSGHDRL_RTA(_l) \ (void *)((uintptr_t)(_l) + (_l)->ifam_len) struct ifa_msghdrl { u_short ifam_msglen; /* to skip over non-understood messages */ u_char ifam_version; /* future binary compatibility */ u_char ifam_type; /* message type */ int ifam_addrs; /* like rtm_addrs */ int ifam_flags; /* value of ifa_flags */ u_short ifam_index; /* index for associated ifp */ u_short _ifam_spare1; /* spare space to grow if_index, see if_var.h */ u_short ifam_len; /* length of ifa_msghdrl incl. if_data */ u_short ifam_data_off; /* offset of if_data from beginning */ int ifam_metric; /* value of ifa_ifp->if_metric */ struct if_data ifam_data;/* statistics and other data about if or * address */ }; /* * Message format for use in obtaining information about multicast addresses * from the routing socket */ struct ifma_msghdr { u_short ifmam_msglen; /* to skip over non-understood messages */ u_char ifmam_version; /* future binary compatibility */ u_char ifmam_type; /* message type */ int ifmam_addrs; /* like rtm_addrs */ int ifmam_flags; /* value of ifa_flags */ u_short ifmam_index; /* index for associated ifp */ }; /* * Message format announcing the arrival or departure of a network interface. */ struct if_announcemsghdr { u_short ifan_msglen; /* to skip over non-understood messages */ u_char ifan_version; /* future binary compatibility */ u_char ifan_type; /* message type */ u_short ifan_index; /* index for associated ifp */ char ifan_name[IFNAMSIZ]; /* if name, e.g. "en0" */ u_short ifan_what; /* what type of announcement */ }; #define IFAN_ARRIVAL 0 /* interface arrival */ #define IFAN_DEPARTURE 1 /* interface departure */ /* * Buffer with length to be used in SIOCGIFDESCR/SIOCSIFDESCR requests */ struct ifreq_buffer { size_t length; void *buffer; }; /* * Interface request structure used for socket * ioctl's. All interface ioctl's must have parameter * definitions which begin with ifr_name. The * remainder may be interface specific. */ struct ifreq { char ifr_name[IFNAMSIZ]; /* if name, e.g. "en0" */ union { struct sockaddr ifru_addr; struct sockaddr ifru_dstaddr; struct sockaddr ifru_broadaddr; struct ifreq_buffer ifru_buffer; short ifru_flags[2]; short ifru_index; int ifru_jid; int ifru_metric; int ifru_mtu; int ifru_phys; int ifru_media; caddr_t ifru_data; int ifru_cap[2]; u_int ifru_fib; u_char ifru_vlan_pcp; } ifr_ifru; #define ifr_addr ifr_ifru.ifru_addr /* address */ #define ifr_dstaddr ifr_ifru.ifru_dstaddr /* other end of p-to-p link */ #define ifr_broadaddr ifr_ifru.ifru_broadaddr /* broadcast address */ #ifndef _KERNEL #define ifr_buffer ifr_ifru.ifru_buffer /* user supplied buffer with its length */ #endif #define ifr_flags ifr_ifru.ifru_flags[0] /* flags (low 16 bits) */ #define ifr_flagshigh ifr_ifru.ifru_flags[1] /* flags (high 16 bits) */ #define ifr_jid ifr_ifru.ifru_jid /* jail/vnet */ #define ifr_metric ifr_ifru.ifru_metric /* metric */ #define ifr_mtu ifr_ifru.ifru_mtu /* mtu */ #define ifr_phys ifr_ifru.ifru_phys /* physical wire */ #define ifr_media ifr_ifru.ifru_media /* physical media */ +#ifndef _KERNEL #define ifr_data ifr_ifru.ifru_data /* for use by interface */ +#endif #define ifr_reqcap ifr_ifru.ifru_cap[0] /* requested capabilities */ #define ifr_curcap ifr_ifru.ifru_cap[1] /* current capabilities */ #define ifr_index ifr_ifru.ifru_index /* interface index */ #define ifr_fib ifr_ifru.ifru_fib /* interface fib */ #define ifr_vlan_pcp ifr_ifru.ifru_vlan_pcp /* VLAN priority */ }; #define _SIZEOF_ADDR_IFREQ(ifr) \ ((ifr).ifr_addr.sa_len > sizeof(struct sockaddr) ? \ (sizeof(struct ifreq) - sizeof(struct sockaddr) + \ (ifr).ifr_addr.sa_len) : sizeof(struct ifreq)) struct ifaliasreq { char ifra_name[IFNAMSIZ]; /* if name, e.g. "en0" */ struct sockaddr ifra_addr; struct sockaddr ifra_broadaddr; struct sockaddr ifra_mask; int ifra_vhid; }; /* 9.x compat */ struct oifaliasreq { char ifra_name[IFNAMSIZ]; struct sockaddr ifra_addr; struct sockaddr ifra_broadaddr; struct sockaddr ifra_mask; }; struct ifmediareq { char ifm_name[IFNAMSIZ]; /* if name, e.g. "en0" */ int ifm_current; /* current media options */ int ifm_mask; /* don't care mask */ int ifm_status; /* media status */ int ifm_active; /* active options */ int ifm_count; /* # entries in ifm_ulist array */ int *ifm_ulist; /* media words */ }; struct ifdrv { char ifd_name[IFNAMSIZ]; /* if name, e.g. "en0" */ unsigned long ifd_cmd; size_t ifd_len; void *ifd_data; }; /* * Structure used to retrieve aux status data from interfaces. * Kernel suppliers to this interface should respect the formatting * needed by ifconfig(8): each line starts with a TAB and ends with * a newline. The canonical example to copy and paste is in if_tun.c. */ #define IFSTATMAX 800 /* 10 lines of text */ struct ifstat { char ifs_name[IFNAMSIZ]; /* if name, e.g. "en0" */ char ascii[IFSTATMAX + 1]; }; /* * Structure used in SIOCGIFCONF request. * Used to retrieve interface configuration * for machine (useful for programs which * must know all networks accessible). */ struct ifconf { int ifc_len; /* size of associated buffer */ union { caddr_t ifcu_buf; struct ifreq *ifcu_req; } ifc_ifcu; #define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */ #define ifc_req ifc_ifcu.ifcu_req /* array of structures returned */ }; /* * interface groups */ #define IFG_ALL "all" /* group contains all interfaces */ /* XXX: will we implement this? */ #define IFG_EGRESS "egress" /* if(s) default route(s) point to */ struct ifg_req { union { char ifgrqu_group[IFNAMSIZ]; char ifgrqu_member[IFNAMSIZ]; } ifgrq_ifgrqu; #define ifgrq_group ifgrq_ifgrqu.ifgrqu_group #define ifgrq_member ifgrq_ifgrqu.ifgrqu_member }; /* * Used to lookup groups for an interface */ struct ifgroupreq { char ifgr_name[IFNAMSIZ]; u_int ifgr_len; union { char ifgru_group[IFNAMSIZ]; struct ifg_req *ifgru_groups; } ifgr_ifgru; #define ifgr_group ifgr_ifgru.ifgru_group #define ifgr_groups ifgr_ifgru.ifgru_groups }; /* * Structure used to request i2c data * from interface transceivers. */ struct ifi2creq { uint8_t dev_addr; /* i2c address (0xA0, 0xA2) */ uint8_t offset; /* read offset */ uint8_t len; /* read length */ uint8_t spare0; uint32_t spare1; uint8_t data[8]; /* read buffer */ }; /* * RSS hash. */ #define RSS_FUNC_NONE 0 /* RSS disabled */ #define RSS_FUNC_PRIVATE 1 /* non-standard */ #define RSS_FUNC_TOEPLITZ 2 #define RSS_TYPE_IPV4 0x00000001 #define RSS_TYPE_TCP_IPV4 0x00000002 #define RSS_TYPE_IPV6 0x00000004 #define RSS_TYPE_IPV6_EX 0x00000008 #define RSS_TYPE_TCP_IPV6 0x00000010 #define RSS_TYPE_TCP_IPV6_EX 0x00000020 #define RSS_TYPE_UDP_IPV4 0x00000040 #define RSS_TYPE_UDP_IPV6 0x00000080 #define RSS_TYPE_UDP_IPV6_EX 0x00000100 #define RSS_KEYLEN 128 struct ifrsskey { char ifrk_name[IFNAMSIZ]; /* if name, e.g. "en0" */ uint8_t ifrk_func; /* RSS_FUNC_ */ uint8_t ifrk_spare0; uint16_t ifrk_keylen; uint8_t ifrk_key[RSS_KEYLEN]; }; struct ifrsshash { char ifrh_name[IFNAMSIZ]; /* if name, e.g. "en0" */ uint8_t ifrh_func; /* RSS_FUNC_ */ uint8_t ifrh_spare0; uint16_t ifrh_spare1; uint32_t ifrh_types; /* RSS_TYPE_ */ }; #endif /* __BSD_VISIBLE */ #ifdef _KERNEL #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_IFADDR); MALLOC_DECLARE(M_IFMADDR); #endif #endif #ifndef _KERNEL struct if_nameindex { unsigned int if_index; /* 1, 2, ... */ char *if_name; /* null terminated name: "le0", ... */ }; __BEGIN_DECLS void if_freenameindex(struct if_nameindex *); char *if_indextoname(unsigned int, char *); struct if_nameindex *if_nameindex(void); unsigned int if_nametoindex(const char *); __END_DECLS #endif #endif /* !_NET_IF_H_ */ Index: stable/11/sys/net/if_gif.c =================================================================== --- stable/11/sys/net/if_gif.c (revision 332287) +++ stable/11/sys/net/if_gif.c (revision 332288) @@ -1,1072 +1,1074 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: if_gif.c,v 1.87 2001/10/19 08:50:27 itojun Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif /* INET */ #ifdef INET6 #ifndef INET #include #endif #include #include #include #include #include #include #endif /* INET6 */ #include #include #include #include #include static const char gifname[] = "gif"; /* * gif_mtx protects a per-vnet gif_softc_list. */ static VNET_DEFINE(struct mtx, gif_mtx); #define V_gif_mtx VNET(gif_mtx) static MALLOC_DEFINE(M_GIF, "gif", "Generic Tunnel Interface"); static VNET_DEFINE(LIST_HEAD(, gif_softc), gif_softc_list); #define V_gif_softc_list VNET(gif_softc_list) static struct sx gif_ioctl_sx; SX_SYSINIT(gif_ioctl_sx, &gif_ioctl_sx, "gif_ioctl"); #define GIF_LIST_LOCK_INIT(x) mtx_init(&V_gif_mtx, "gif_mtx", \ NULL, MTX_DEF) #define GIF_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gif_mtx) #define GIF_LIST_LOCK(x) mtx_lock(&V_gif_mtx) #define GIF_LIST_UNLOCK(x) mtx_unlock(&V_gif_mtx) void (*ng_gif_input_p)(struct ifnet *ifp, struct mbuf **mp, int af); void (*ng_gif_input_orphan_p)(struct ifnet *ifp, struct mbuf *m, int af); void (*ng_gif_attach_p)(struct ifnet *ifp); void (*ng_gif_detach_p)(struct ifnet *ifp); static int gif_check_nesting(struct ifnet *, struct mbuf *); static int gif_set_tunnel(struct ifnet *, struct sockaddr *, struct sockaddr *); static void gif_delete_tunnel(struct ifnet *); static int gif_ioctl(struct ifnet *, u_long, caddr_t); static int gif_transmit(struct ifnet *, struct mbuf *); static void gif_qflush(struct ifnet *); static int gif_clone_create(struct if_clone *, int, caddr_t); static void gif_clone_destroy(struct ifnet *); static VNET_DEFINE(struct if_clone *, gif_cloner); #define V_gif_cloner VNET(gif_cloner) static int gifmodevent(module_t, int, void *); SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_GIF, gif, CTLFLAG_RW, 0, "Generic Tunnel Interface"); #ifndef MAX_GIF_NEST /* * This macro controls the default upper limitation on nesting of gif tunnels. * Since, setting a large value to this macro with a careless configuration * may introduce system crash, we don't allow any nestings by default. * If you need to configure nested gif tunnels, you can define this macro * in your kernel configuration file. However, if you do so, please be * careful to configure the tunnels so that it won't make a loop. */ #define MAX_GIF_NEST 1 #endif static VNET_DEFINE(int, max_gif_nesting) = MAX_GIF_NEST; #define V_max_gif_nesting VNET(max_gif_nesting) SYSCTL_INT(_net_link_gif, OID_AUTO, max_nesting, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(max_gif_nesting), 0, "Max nested tunnels"); /* * By default, we disallow creation of multiple tunnels between the same * pair of addresses. Some applications require this functionality so * we allow control over this check here. */ #ifdef XBONEHACK static VNET_DEFINE(int, parallel_tunnels) = 1; #else static VNET_DEFINE(int, parallel_tunnels) = 0; #endif #define V_parallel_tunnels VNET(parallel_tunnels) SYSCTL_INT(_net_link_gif, OID_AUTO, parallel_tunnels, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(parallel_tunnels), 0, "Allow parallel tunnels?"); /* copy from src/sys/net/if_ethersubr.c */ static const u_char etherbroadcastaddr[ETHER_ADDR_LEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; #ifndef ETHER_IS_BROADCAST #define ETHER_IS_BROADCAST(addr) \ (bcmp(etherbroadcastaddr, (addr), ETHER_ADDR_LEN) == 0) #endif static int gif_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct gif_softc *sc; sc = malloc(sizeof(struct gif_softc), M_GIF, M_WAITOK | M_ZERO); sc->gif_fibnum = curthread->td_proc->p_fibnum; GIF2IFP(sc) = if_alloc(IFT_GIF); GIF_LOCK_INIT(sc); GIF2IFP(sc)->if_softc = sc; if_initname(GIF2IFP(sc), gifname, unit); GIF2IFP(sc)->if_addrlen = 0; GIF2IFP(sc)->if_mtu = GIF_MTU; GIF2IFP(sc)->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; #if 0 /* turn off ingress filter */ GIF2IFP(sc)->if_flags |= IFF_LINK2; #endif GIF2IFP(sc)->if_ioctl = gif_ioctl; GIF2IFP(sc)->if_transmit = gif_transmit; GIF2IFP(sc)->if_qflush = gif_qflush; GIF2IFP(sc)->if_output = gif_output; GIF2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE; GIF2IFP(sc)->if_capenable |= IFCAP_LINKSTATE; if_attach(GIF2IFP(sc)); bpfattach(GIF2IFP(sc), DLT_NULL, sizeof(u_int32_t)); if (ng_gif_attach_p != NULL) (*ng_gif_attach_p)(GIF2IFP(sc)); GIF_LIST_LOCK(); LIST_INSERT_HEAD(&V_gif_softc_list, sc, gif_list); GIF_LIST_UNLOCK(); return (0); } static void gif_clone_destroy(struct ifnet *ifp) { struct gif_softc *sc; sx_xlock(&gif_ioctl_sx); sc = ifp->if_softc; gif_delete_tunnel(ifp); GIF_LIST_LOCK(); LIST_REMOVE(sc, gif_list); GIF_LIST_UNLOCK(); if (ng_gif_detach_p != NULL) (*ng_gif_detach_p)(ifp); bpfdetach(ifp); if_detach(ifp); ifp->if_softc = NULL; sx_xunlock(&gif_ioctl_sx); if_free(ifp); GIF_LOCK_DESTROY(sc); free(sc, M_GIF); } static void vnet_gif_init(const void *unused __unused) { LIST_INIT(&V_gif_softc_list); GIF_LIST_LOCK_INIT(); V_gif_cloner = if_clone_simple(gifname, gif_clone_create, gif_clone_destroy, 0); } VNET_SYSINIT(vnet_gif_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_gif_init, NULL); static void vnet_gif_uninit(const void *unused __unused) { if_clone_detach(V_gif_cloner); GIF_LIST_LOCK_DESTROY(); } VNET_SYSUNINIT(vnet_gif_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_gif_uninit, NULL); static int gifmodevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: case MOD_UNLOAD: break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t gif_mod = { "if_gif", gifmodevent, 0 }; DECLARE_MODULE(if_gif, gif_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_gif, 1); int gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg) { GIF_RLOCK_TRACKER; const struct ip *ip; struct gif_softc *sc; int ret; sc = (struct gif_softc *)arg; if (sc == NULL || (GIF2IFP(sc)->if_flags & IFF_UP) == 0) return (0); ret = 0; GIF_RLOCK(sc); /* no physical address */ if (sc->gif_family == 0) goto done; switch (proto) { #ifdef INET case IPPROTO_IPV4: #endif #ifdef INET6 case IPPROTO_IPV6: #endif case IPPROTO_ETHERIP: break; default: goto done; } /* Bail on short packets */ M_ASSERTPKTHDR(m); if (m->m_pkthdr.len < sizeof(struct ip)) goto done; ip = mtod(m, const struct ip *); switch (ip->ip_v) { #ifdef INET case 4: if (sc->gif_family != AF_INET) goto done; ret = in_gif_encapcheck(m, off, proto, arg); break; #endif #ifdef INET6 case 6: if (m->m_pkthdr.len < sizeof(struct ip6_hdr)) goto done; if (sc->gif_family != AF_INET6) goto done; ret = in6_gif_encapcheck(m, off, proto, arg); break; #endif } done: GIF_RUNLOCK(sc); return (ret); } static int gif_transmit(struct ifnet *ifp, struct mbuf *m) { struct gif_softc *sc; struct etherip_header *eth; #ifdef INET struct ip *ip; #endif #ifdef INET6 struct ip6_hdr *ip6; uint32_t t; #endif uint32_t af; uint8_t proto, ecn; int error; #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) { m_freem(m); goto err; } #endif error = ENETDOWN; sc = ifp->if_softc; if ((ifp->if_flags & IFF_MONITOR) != 0 || (ifp->if_flags & IFF_UP) == 0 || sc->gif_family == 0 || (error = gif_check_nesting(ifp, m)) != 0) { m_freem(m); goto err; } /* Now pull back the af that we stashed in the csum_data. */ if (ifp->if_bridge) af = AF_LINK; else af = m->m_pkthdr.csum_data; m->m_flags &= ~(M_BCAST|M_MCAST); M_SETFIB(m, sc->gif_fibnum); BPF_MTAP2(ifp, &af, sizeof(af), m); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); /* inner AF-specific encapsulation */ ecn = 0; switch (af) { #ifdef INET case AF_INET: proto = IPPROTO_IPV4; if (m->m_len < sizeof(struct ip)) m = m_pullup(m, sizeof(struct ip)); if (m == NULL) { error = ENOBUFS; goto err; } ip = mtod(m, struct ip *); ip_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED: ECN_NOCARE, &ecn, &ip->ip_tos); break; #endif #ifdef INET6 case AF_INET6: proto = IPPROTO_IPV6; if (m->m_len < sizeof(struct ip6_hdr)) m = m_pullup(m, sizeof(struct ip6_hdr)); if (m == NULL) { error = ENOBUFS; goto err; } t = 0; ip6 = mtod(m, struct ip6_hdr *); ip6_ecn_ingress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED: ECN_NOCARE, &t, &ip6->ip6_flow); ecn = (ntohl(t) >> 20) & 0xff; break; #endif case AF_LINK: proto = IPPROTO_ETHERIP; M_PREPEND(m, sizeof(struct etherip_header), M_NOWAIT); if (m == NULL) { error = ENOBUFS; goto err; } eth = mtod(m, struct etherip_header *); eth->eip_resvh = 0; eth->eip_ver = ETHERIP_VERSION; eth->eip_resvl = 0; break; default: error = EAFNOSUPPORT; m_freem(m); goto err; } /* XXX should we check if our outer source is legal? */ /* dispatch to output logic based on outer AF */ switch (sc->gif_family) { #ifdef INET case AF_INET: error = in_gif_output(ifp, m, proto, ecn); break; #endif #ifdef INET6 case AF_INET6: error = in6_gif_output(ifp, m, proto, ecn); break; #endif default: m_freem(m); } err: if (error) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (error); } static void gif_qflush(struct ifnet *ifp __unused) { } #define MTAG_GIF 1080679712 static int gif_check_nesting(struct ifnet *ifp, struct mbuf *m) { struct m_tag *mtag; int count; /* * gif may cause infinite recursion calls when misconfigured. * We'll prevent this by detecting loops. * * High nesting level may cause stack exhaustion. * We'll prevent this by introducing upper limit. */ count = 1; mtag = NULL; while ((mtag = m_tag_locate(m, MTAG_GIF, 0, mtag)) != NULL) { if (*(struct ifnet **)(mtag + 1) == ifp) { log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp)); return (EIO); } count++; } if (count > V_max_gif_nesting) { log(LOG_NOTICE, "%s: if_output recursively called too many times(%d)\n", if_name(ifp), count); return (EIO); } mtag = m_tag_alloc(MTAG_GIF, 0, sizeof(struct ifnet *), M_NOWAIT); if (mtag == NULL) return (ENOMEM); *(struct ifnet **)(mtag + 1) = ifp; m_tag_prepend(m, mtag); return (0); } int gif_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { uint32_t af; if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); else af = dst->sa_family; /* * Now save the af in the inbound pkt csum data, this is a cheat since * we are using the inbound csum_data field to carry the af over to * the gif_transmit() routine, avoiding using yet another mtag. */ m->m_pkthdr.csum_data = af; return (ifp->if_transmit(ifp, m)); } void gif_input(struct mbuf *m, struct ifnet *ifp, int proto, uint8_t ecn) { struct etherip_header *eip; #ifdef INET struct ip *ip; #endif #ifdef INET6 struct ip6_hdr *ip6; uint32_t t; #endif struct gif_softc *sc; struct ether_header *eh; struct ifnet *oldifp; int isr, n, af; if (ifp == NULL) { /* just in case */ m_freem(m); return; } sc = ifp->if_softc; m->m_pkthdr.rcvif = ifp; m_clrprotoflags(m); switch (proto) { #ifdef INET case IPPROTO_IPV4: af = AF_INET; if (m->m_len < sizeof(struct ip)) m = m_pullup(m, sizeof(struct ip)); if (m == NULL) goto drop; ip = mtod(m, struct ip *); if (ip_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED: ECN_NOCARE, &ecn, &ip->ip_tos) == 0) { m_freem(m); goto drop; } break; #endif #ifdef INET6 case IPPROTO_IPV6: af = AF_INET6; if (m->m_len < sizeof(struct ip6_hdr)) m = m_pullup(m, sizeof(struct ip6_hdr)); if (m == NULL) goto drop; t = htonl((uint32_t)ecn << 20); ip6 = mtod(m, struct ip6_hdr *); if (ip6_ecn_egress((ifp->if_flags & IFF_LINK1) ? ECN_ALLOWED: ECN_NOCARE, &t, &ip6->ip6_flow) == 0) { m_freem(m); goto drop; } break; #endif case IPPROTO_ETHERIP: af = AF_LINK; break; default: m_freem(m); goto drop; } #ifdef MAC mac_ifnet_create_mbuf(ifp, m); #endif if (bpf_peers_present(ifp->if_bpf)) { uint32_t af1 = af; bpf_mtap2(ifp->if_bpf, &af1, sizeof(af1), m); } if ((ifp->if_flags & IFF_MONITOR) != 0) { if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); m_freem(m); return; } if (ng_gif_input_p != NULL) { (*ng_gif_input_p)(ifp, &m, af); if (m == NULL) goto drop; } /* * Put the packet to the network layer input queue according to the * specified address family. * Note: older versions of gif_input directly called network layer * input functions, e.g. ip6_input, here. We changed the policy to * prevent too many recursive calls of such input functions, which * might cause kernel panic. But the change may introduce another * problem; if the input queue is full, packets are discarded. * The kernel stack overflow really happened, and we believed * queue-full rarely occurs, so we changed the policy. */ switch (af) { #ifdef INET case AF_INET: isr = NETISR_IP; break; #endif #ifdef INET6 case AF_INET6: isr = NETISR_IPV6; break; #endif case AF_LINK: n = sizeof(struct etherip_header) + sizeof(struct ether_header); if (n > m->m_len) m = m_pullup(m, n); if (m == NULL) goto drop; eip = mtod(m, struct etherip_header *); if (eip->eip_ver != ETHERIP_VERSION) { /* discard unknown versions */ m_freem(m); goto drop; } m_adj(m, sizeof(struct etherip_header)); m->m_flags &= ~(M_BCAST|M_MCAST); m->m_pkthdr.rcvif = ifp; if (ifp->if_bridge) { oldifp = ifp; eh = mtod(m, struct ether_header *); if (ETHER_IS_MULTICAST(eh->ether_dhost)) { if (ETHER_IS_BROADCAST(eh->ether_dhost)) m->m_flags |= M_BCAST; else m->m_flags |= M_MCAST; if_inc_counter(ifp, IFCOUNTER_IMCASTS, 1); } BRIDGE_INPUT(ifp, m); if (m != NULL && ifp != oldifp) { /* * The bridge gave us back itself or one of the * members for which the frame is addressed. */ ether_demux(ifp, m); return; } } if (m != NULL) m_freem(m); return; default: if (ng_gif_input_orphan_p != NULL) (*ng_gif_input_orphan_p)(ifp, m, af); else m_freem(m); return; } if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); M_SETFIB(m, ifp->if_fib); netisr_dispatch(isr, m); return; drop: if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */ int gif_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { GIF_RLOCK_TRACKER; struct ifreq *ifr = (struct ifreq*)data; struct sockaddr *dst, *src; struct gif_softc *sc; #ifdef INET struct sockaddr_in *sin = NULL; #endif #ifdef INET6 struct sockaddr_in6 *sin6 = NULL; #endif u_int options; int error; switch (cmd) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; case SIOCADDMULTI: case SIOCDELMULTI: case SIOCGIFMTU: case SIOCSIFFLAGS: return (0); case SIOCSIFMTU: if (ifr->ifr_mtu < GIF_MTU_MIN || ifr->ifr_mtu > GIF_MTU_MAX) return (EINVAL); else ifp->if_mtu = ifr->ifr_mtu; return (0); } sx_xlock(&gif_ioctl_sx); sc = ifp->if_softc; if (sc == NULL) { error = ENXIO; goto bad; } error = 0; switch (cmd) { case SIOCSIFPHYADDR: #ifdef INET6 case SIOCSIFPHYADDR_IN6: #endif error = EINVAL; switch (cmd) { #ifdef INET case SIOCSIFPHYADDR: src = (struct sockaddr *) &(((struct in_aliasreq *)data)->ifra_addr); dst = (struct sockaddr *) &(((struct in_aliasreq *)data)->ifra_dstaddr); break; #endif #ifdef INET6 case SIOCSIFPHYADDR_IN6: src = (struct sockaddr *) &(((struct in6_aliasreq *)data)->ifra_addr); dst = (struct sockaddr *) &(((struct in6_aliasreq *)data)->ifra_dstaddr); break; #endif default: goto bad; } /* sa_family must be equal */ if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len) goto bad; /* validate sa_len */ /* check sa_family looks sane for the cmd */ switch (src->sa_family) { #ifdef INET case AF_INET: if (src->sa_len != sizeof(struct sockaddr_in)) goto bad; if (cmd != SIOCSIFPHYADDR) { error = EAFNOSUPPORT; goto bad; } if (satosin(src)->sin_addr.s_addr == INADDR_ANY || satosin(dst)->sin_addr.s_addr == INADDR_ANY) { error = EADDRNOTAVAIL; goto bad; } break; #endif #ifdef INET6 case AF_INET6: if (src->sa_len != sizeof(struct sockaddr_in6)) goto bad; if (cmd != SIOCSIFPHYADDR_IN6) { error = EAFNOSUPPORT; goto bad; } error = EADDRNOTAVAIL; if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr) || IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr)) goto bad; /* * Check validity of the scope zone ID of the * addresses, and convert it into the kernel * internal form if necessary. */ error = sa6_embedscope(satosin6(src), 0); if (error != 0) goto bad; error = sa6_embedscope(satosin6(dst), 0); if (error != 0) goto bad; break; #endif default: error = EAFNOSUPPORT; goto bad; } error = gif_set_tunnel(ifp, src, dst); break; case SIOCDIFPHYADDR: gif_delete_tunnel(ifp); break; case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: #ifdef INET6 case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: #endif if (sc->gif_family == 0) { error = EADDRNOTAVAIL; break; } GIF_RLOCK(sc); switch (cmd) { #ifdef INET case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: if (sc->gif_family != AF_INET) { error = EADDRNOTAVAIL; break; } sin = (struct sockaddr_in *)&ifr->ifr_addr; memset(sin, 0, sizeof(*sin)); sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); break; #endif #ifdef INET6 case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: if (sc->gif_family != AF_INET6) { error = EADDRNOTAVAIL; break; } sin6 = (struct sockaddr_in6 *) &(((struct in6_ifreq *)data)->ifr_addr); memset(sin6, 0, sizeof(*sin6)); sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); break; #endif default: error = EAFNOSUPPORT; } if (error == 0) { switch (cmd) { #ifdef INET case SIOCGIFPSRCADDR: sin->sin_addr = sc->gif_iphdr->ip_src; break; case SIOCGIFPDSTADDR: sin->sin_addr = sc->gif_iphdr->ip_dst; break; #endif #ifdef INET6 case SIOCGIFPSRCADDR_IN6: sin6->sin6_addr = sc->gif_ip6hdr->ip6_src; break; case SIOCGIFPDSTADDR_IN6: sin6->sin6_addr = sc->gif_ip6hdr->ip6_dst; break; #endif } } GIF_RUNLOCK(sc); if (error != 0) break; switch (cmd) { #ifdef INET case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: error = prison_if(curthread->td_ucred, (struct sockaddr *)sin); if (error != 0) memset(sin, 0, sizeof(*sin)); break; #endif #ifdef INET6 case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: error = prison_if(curthread->td_ucred, (struct sockaddr *)sin6); if (error == 0) error = sa6_recoverscope(sin6); if (error != 0) memset(sin6, 0, sizeof(*sin6)); #endif } break; case SIOCGTUNFIB: ifr->ifr_fib = sc->gif_fibnum; break; case SIOCSTUNFIB: if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0) break; if (ifr->ifr_fib >= rt_numfibs) error = EINVAL; else sc->gif_fibnum = ifr->ifr_fib; break; case GIFGOPTS: options = sc->gif_options; - error = copyout(&options, ifr->ifr_data, sizeof(options)); + error = copyout(&options, ifr_data_get_ptr(ifr), + sizeof(options)); break; case GIFSOPTS: if ((error = priv_check(curthread, PRIV_NET_GIF)) != 0) break; - error = copyin(ifr->ifr_data, &options, sizeof(options)); + error = copyin(ifr_data_get_ptr(ifr), &options, + sizeof(options)); if (error) break; if (options & ~GIF_OPTMASK) error = EINVAL; else sc->gif_options = options; break; default: error = EINVAL; break; } bad: sx_xunlock(&gif_ioctl_sx); return (error); } static void gif_detach(struct gif_softc *sc) { sx_assert(&gif_ioctl_sx, SA_XLOCKED); if (sc->gif_ecookie != NULL) encap_detach(sc->gif_ecookie); sc->gif_ecookie = NULL; } static int gif_attach(struct gif_softc *sc, int af) { sx_assert(&gif_ioctl_sx, SA_XLOCKED); switch (af) { #ifdef INET case AF_INET: return (in_gif_attach(sc)); #endif #ifdef INET6 case AF_INET6: return (in6_gif_attach(sc)); #endif } return (EAFNOSUPPORT); } static int gif_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) { struct gif_softc *sc = ifp->if_softc; struct gif_softc *tsc; #ifdef INET struct ip *ip; #endif #ifdef INET6 struct ip6_hdr *ip6; #endif void *hdr; int error = 0; if (sc == NULL) return (ENXIO); /* Disallow parallel tunnels unless instructed otherwise. */ if (V_parallel_tunnels == 0) { GIF_LIST_LOCK(); LIST_FOREACH(tsc, &V_gif_softc_list, gif_list) { if (tsc == sc || tsc->gif_family != src->sa_family) continue; #ifdef INET if (tsc->gif_family == AF_INET && tsc->gif_iphdr->ip_src.s_addr == satosin(src)->sin_addr.s_addr && tsc->gif_iphdr->ip_dst.s_addr == satosin(dst)->sin_addr.s_addr) { error = EADDRNOTAVAIL; GIF_LIST_UNLOCK(); goto bad; } #endif #ifdef INET6 if (tsc->gif_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_src, &satosin6(src)->sin6_addr) && IN6_ARE_ADDR_EQUAL(&tsc->gif_ip6hdr->ip6_dst, &satosin6(dst)->sin6_addr)) { error = EADDRNOTAVAIL; GIF_LIST_UNLOCK(); goto bad; } #endif } GIF_LIST_UNLOCK(); } switch (src->sa_family) { #ifdef INET case AF_INET: hdr = ip = malloc(sizeof(struct ip), M_GIF, M_WAITOK | M_ZERO); ip->ip_src.s_addr = satosin(src)->sin_addr.s_addr; ip->ip_dst.s_addr = satosin(dst)->sin_addr.s_addr; break; #endif #ifdef INET6 case AF_INET6: hdr = ip6 = malloc(sizeof(struct ip6_hdr), M_GIF, M_WAITOK | M_ZERO); ip6->ip6_src = satosin6(src)->sin6_addr; ip6->ip6_dst = satosin6(dst)->sin6_addr; ip6->ip6_vfc = IPV6_VERSION; break; #endif default: return (EAFNOSUPPORT); } if (sc->gif_family != src->sa_family) gif_detach(sc); if (sc->gif_family == 0 || sc->gif_family != src->sa_family) error = gif_attach(sc, src->sa_family); GIF_WLOCK(sc); if (sc->gif_family != 0) free(sc->gif_hdr, M_GIF); sc->gif_family = src->sa_family; sc->gif_hdr = hdr; GIF_WUNLOCK(sc); #if defined(INET) || defined(INET6) bad: #endif if (error == 0 && sc->gif_family != 0) { ifp->if_drv_flags |= IFF_DRV_RUNNING; if_link_state_change(ifp, LINK_STATE_UP); } else { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; if_link_state_change(ifp, LINK_STATE_DOWN); } return (error); } static void gif_delete_tunnel(struct ifnet *ifp) { struct gif_softc *sc = ifp->if_softc; int family; if (sc == NULL) return; GIF_WLOCK(sc); family = sc->gif_family; sc->gif_family = 0; GIF_WUNLOCK(sc); if (family != 0) { gif_detach(sc); free(sc->gif_hdr, M_GIF); } ifp->if_drv_flags &= ~IFF_DRV_RUNNING; if_link_state_change(ifp, LINK_STATE_DOWN); } Index: stable/11/sys/net/if_gre.c =================================================================== --- stable/11/sys/net/if_gre.c (revision 332287) +++ stable/11/sys/net/if_gre.c (revision 332288) @@ -1,995 +1,997 @@ /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. * Copyright (c) 2014 Andrey V. Elsukov * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Heiko W.Rupp * * IPv6-over-GRE contributed by Gert Doering * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #include #include #endif #ifdef INET6 #include #include #include #include #endif #include #include #include #include #include #define GREMTU 1476 static const char grename[] = "gre"; static MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation"); static VNET_DEFINE(struct mtx, gre_mtx); #define V_gre_mtx VNET(gre_mtx) #define GRE_LIST_LOCK_INIT(x) mtx_init(&V_gre_mtx, "gre_mtx", NULL, \ MTX_DEF) #define GRE_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gre_mtx) #define GRE_LIST_LOCK(x) mtx_lock(&V_gre_mtx) #define GRE_LIST_UNLOCK(x) mtx_unlock(&V_gre_mtx) static VNET_DEFINE(LIST_HEAD(, gre_softc), gre_softc_list); #define V_gre_softc_list VNET(gre_softc_list) static struct sx gre_ioctl_sx; SX_SYSINIT(gre_ioctl_sx, &gre_ioctl_sx, "gre_ioctl"); static int gre_clone_create(struct if_clone *, int, caddr_t); static void gre_clone_destroy(struct ifnet *); static VNET_DEFINE(struct if_clone *, gre_cloner); #define V_gre_cloner VNET(gre_cloner) static void gre_qflush(struct ifnet *); static int gre_transmit(struct ifnet *, struct mbuf *); static int gre_ioctl(struct ifnet *, u_long, caddr_t); static int gre_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); static void gre_updatehdr(struct gre_softc *); static int gre_set_tunnel(struct ifnet *, struct sockaddr *, struct sockaddr *); static void gre_delete_tunnel(struct ifnet *); SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_TUNNEL, gre, CTLFLAG_RW, 0, "Generic Routing Encapsulation"); #ifndef MAX_GRE_NEST /* * This macro controls the default upper limitation on nesting of gre tunnels. * Since, setting a large value to this macro with a careless configuration * may introduce system crash, we don't allow any nestings by default. * If you need to configure nested gre tunnels, you can define this macro * in your kernel configuration file. However, if you do so, please be * careful to configure the tunnels so that it won't make a loop. */ #define MAX_GRE_NEST 1 #endif static VNET_DEFINE(int, max_gre_nesting) = MAX_GRE_NEST; #define V_max_gre_nesting VNET(max_gre_nesting) SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(max_gre_nesting), 0, "Max nested tunnels"); static void vnet_gre_init(const void *unused __unused) { LIST_INIT(&V_gre_softc_list); GRE_LIST_LOCK_INIT(); V_gre_cloner = if_clone_simple(grename, gre_clone_create, gre_clone_destroy, 0); } VNET_SYSINIT(vnet_gre_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_gre_init, NULL); static void vnet_gre_uninit(const void *unused __unused) { if_clone_detach(V_gre_cloner); GRE_LIST_LOCK_DESTROY(); } VNET_SYSUNINIT(vnet_gre_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_gre_uninit, NULL); static int gre_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct gre_softc *sc; sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO); sc->gre_fibnum = curthread->td_proc->p_fibnum; GRE2IFP(sc) = if_alloc(IFT_TUNNEL); GRE_LOCK_INIT(sc); GRE2IFP(sc)->if_softc = sc; if_initname(GRE2IFP(sc), grename, unit); GRE2IFP(sc)->if_mtu = GREMTU; GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST; GRE2IFP(sc)->if_output = gre_output; GRE2IFP(sc)->if_ioctl = gre_ioctl; GRE2IFP(sc)->if_transmit = gre_transmit; GRE2IFP(sc)->if_qflush = gre_qflush; GRE2IFP(sc)->if_capabilities |= IFCAP_LINKSTATE; GRE2IFP(sc)->if_capenable |= IFCAP_LINKSTATE; if_attach(GRE2IFP(sc)); bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t)); GRE_LIST_LOCK(); LIST_INSERT_HEAD(&V_gre_softc_list, sc, gre_list); GRE_LIST_UNLOCK(); return (0); } static void gre_clone_destroy(struct ifnet *ifp) { struct gre_softc *sc; sx_xlock(&gre_ioctl_sx); sc = ifp->if_softc; gre_delete_tunnel(ifp); GRE_LIST_LOCK(); LIST_REMOVE(sc, gre_list); GRE_LIST_UNLOCK(); bpfdetach(ifp); if_detach(ifp); ifp->if_softc = NULL; sx_xunlock(&gre_ioctl_sx); if_free(ifp); GRE_LOCK_DESTROY(sc); free(sc, M_GRE); } static int gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { GRE_RLOCK_TRACKER; struct ifreq *ifr = (struct ifreq *)data; struct sockaddr *src, *dst; struct gre_softc *sc; #ifdef INET struct sockaddr_in *sin = NULL; #endif #ifdef INET6 struct sockaddr_in6 *sin6 = NULL; #endif uint32_t opt; int error; switch (cmd) { case SIOCSIFMTU: /* XXX: */ if (ifr->ifr_mtu < 576) return (EINVAL); ifp->if_mtu = ifr->ifr_mtu; return (0); case SIOCSIFADDR: ifp->if_flags |= IFF_UP; case SIOCSIFFLAGS: case SIOCADDMULTI: case SIOCDELMULTI: return (0); case GRESADDRS: case GRESADDRD: case GREGADDRS: case GREGADDRD: case GRESPROTO: case GREGPROTO: return (EOPNOTSUPP); } src = dst = NULL; sx_xlock(&gre_ioctl_sx); sc = ifp->if_softc; if (sc == NULL) { error = ENXIO; goto end; } error = 0; switch (cmd) { case SIOCSIFPHYADDR: #ifdef INET6 case SIOCSIFPHYADDR_IN6: #endif error = EINVAL; switch (cmd) { #ifdef INET case SIOCSIFPHYADDR: src = (struct sockaddr *) &(((struct in_aliasreq *)data)->ifra_addr); dst = (struct sockaddr *) &(((struct in_aliasreq *)data)->ifra_dstaddr); break; #endif #ifdef INET6 case SIOCSIFPHYADDR_IN6: src = (struct sockaddr *) &(((struct in6_aliasreq *)data)->ifra_addr); dst = (struct sockaddr *) &(((struct in6_aliasreq *)data)->ifra_dstaddr); break; #endif default: error = EAFNOSUPPORT; goto end; } /* sa_family must be equal */ if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len) goto end; /* validate sa_len */ switch (src->sa_family) { #ifdef INET case AF_INET: if (src->sa_len != sizeof(struct sockaddr_in)) goto end; break; #endif #ifdef INET6 case AF_INET6: if (src->sa_len != sizeof(struct sockaddr_in6)) goto end; break; #endif default: error = EAFNOSUPPORT; goto end; } /* check sa_family looks sane for the cmd */ error = EAFNOSUPPORT; switch (cmd) { #ifdef INET case SIOCSIFPHYADDR: if (src->sa_family == AF_INET) break; goto end; #endif #ifdef INET6 case SIOCSIFPHYADDR_IN6: if (src->sa_family == AF_INET6) break; goto end; #endif } error = EADDRNOTAVAIL; switch (src->sa_family) { #ifdef INET case AF_INET: if (satosin(src)->sin_addr.s_addr == INADDR_ANY || satosin(dst)->sin_addr.s_addr == INADDR_ANY) goto end; break; #endif #ifdef INET6 case AF_INET6: if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr) || IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr)) goto end; /* * Check validity of the scope zone ID of the * addresses, and convert it into the kernel * internal form if necessary. */ error = sa6_embedscope(satosin6(src), 0); if (error != 0) goto end; error = sa6_embedscope(satosin6(dst), 0); if (error != 0) goto end; #endif } error = gre_set_tunnel(ifp, src, dst); break; case SIOCDIFPHYADDR: gre_delete_tunnel(ifp); break; case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: #ifdef INET6 case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: #endif if (sc->gre_family == 0) { error = EADDRNOTAVAIL; break; } GRE_RLOCK(sc); switch (cmd) { #ifdef INET case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: if (sc->gre_family != AF_INET) { error = EADDRNOTAVAIL; break; } sin = (struct sockaddr_in *)&ifr->ifr_addr; memset(sin, 0, sizeof(*sin)); sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); break; #endif #ifdef INET6 case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: if (sc->gre_family != AF_INET6) { error = EADDRNOTAVAIL; break; } sin6 = (struct sockaddr_in6 *) &(((struct in6_ifreq *)data)->ifr_addr); memset(sin6, 0, sizeof(*sin6)); sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); break; #endif } if (error == 0) { switch (cmd) { #ifdef INET case SIOCGIFPSRCADDR: sin->sin_addr = sc->gre_oip.ip_src; break; case SIOCGIFPDSTADDR: sin->sin_addr = sc->gre_oip.ip_dst; break; #endif #ifdef INET6 case SIOCGIFPSRCADDR_IN6: sin6->sin6_addr = sc->gre_oip6.ip6_src; break; case SIOCGIFPDSTADDR_IN6: sin6->sin6_addr = sc->gre_oip6.ip6_dst; break; #endif } } GRE_RUNLOCK(sc); if (error != 0) break; switch (cmd) { #ifdef INET case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: error = prison_if(curthread->td_ucred, (struct sockaddr *)sin); if (error != 0) memset(sin, 0, sizeof(*sin)); break; #endif #ifdef INET6 case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: error = prison_if(curthread->td_ucred, (struct sockaddr *)sin6); if (error == 0) error = sa6_recoverscope(sin6); if (error != 0) memset(sin6, 0, sizeof(*sin6)); #endif } break; case SIOCGTUNFIB: ifr->ifr_fib = sc->gre_fibnum; break; case SIOCSTUNFIB: if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) break; if (ifr->ifr_fib >= rt_numfibs) error = EINVAL; else sc->gre_fibnum = ifr->ifr_fib; break; case GRESKEY: if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) break; - if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0) + if ((error = copyin(ifr_data_get_ptr(ifr), &opt, + sizeof(opt))) != 0) break; if (sc->gre_key != opt) { GRE_WLOCK(sc); sc->gre_key = opt; gre_updatehdr(sc); GRE_WUNLOCK(sc); } break; case GREGKEY: - error = copyout(&sc->gre_key, ifr->ifr_data, + error = copyout(&sc->gre_key, ifr_data_get_ptr(ifr), sizeof(sc->gre_key)); break; case GRESOPTS: if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) break; - if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0) + if ((error = copyin(ifr_data_get_ptr(ifr), &opt, + sizeof(opt))) != 0) break; if (opt & ~GRE_OPTMASK) error = EINVAL; else { if (sc->gre_options != opt) { GRE_WLOCK(sc); sc->gre_options = opt; gre_updatehdr(sc); GRE_WUNLOCK(sc); } } break; case GREGOPTS: - error = copyout(&sc->gre_options, ifr->ifr_data, + error = copyout(&sc->gre_options, ifr_data_get_ptr(ifr), sizeof(sc->gre_options)); break; default: error = EINVAL; break; } end: sx_xunlock(&gre_ioctl_sx); return (error); } static void gre_updatehdr(struct gre_softc *sc) { struct grehdr *gh = NULL; uint32_t *opts; uint16_t flags; GRE_WLOCK_ASSERT(sc); switch (sc->gre_family) { #ifdef INET case AF_INET: sc->gre_hlen = sizeof(struct greip); sc->gre_oip.ip_v = IPPROTO_IPV4; sc->gre_oip.ip_hl = sizeof(struct ip) >> 2; sc->gre_oip.ip_p = IPPROTO_GRE; gh = &sc->gre_gihdr->gi_gre; break; #endif #ifdef INET6 case AF_INET6: sc->gre_hlen = sizeof(struct greip6); sc->gre_oip6.ip6_vfc = IPV6_VERSION; sc->gre_oip6.ip6_nxt = IPPROTO_GRE; gh = &sc->gre_gi6hdr->gi6_gre; break; #endif default: return; } flags = 0; opts = gh->gre_opts; if (sc->gre_options & GRE_ENABLE_CSUM) { flags |= GRE_FLAGS_CP; sc->gre_hlen += 2 * sizeof(uint16_t); *opts++ = 0; } if (sc->gre_key != 0) { flags |= GRE_FLAGS_KP; sc->gre_hlen += sizeof(uint32_t); *opts++ = htonl(sc->gre_key); } if (sc->gre_options & GRE_ENABLE_SEQ) { flags |= GRE_FLAGS_SP; sc->gre_hlen += sizeof(uint32_t); *opts++ = 0; } else sc->gre_oseq = 0; gh->gre_flags = htons(flags); } static void gre_detach(struct gre_softc *sc) { sx_assert(&gre_ioctl_sx, SA_XLOCKED); if (sc->gre_ecookie != NULL) encap_detach(sc->gre_ecookie); sc->gre_ecookie = NULL; } static int gre_set_tunnel(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) { struct gre_softc *sc, *tsc; #ifdef INET6 struct ip6_hdr *ip6; #endif #ifdef INET struct ip *ip; #endif void *hdr; int error; sx_assert(&gre_ioctl_sx, SA_XLOCKED); GRE_LIST_LOCK(); sc = ifp->if_softc; LIST_FOREACH(tsc, &V_gre_softc_list, gre_list) { if (tsc == sc || tsc->gre_family != src->sa_family) continue; #ifdef INET if (tsc->gre_family == AF_INET && tsc->gre_oip.ip_src.s_addr == satosin(src)->sin_addr.s_addr && tsc->gre_oip.ip_dst.s_addr == satosin(dst)->sin_addr.s_addr) { GRE_LIST_UNLOCK(); return (EADDRNOTAVAIL); } #endif #ifdef INET6 if (tsc->gre_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_src, &satosin6(src)->sin6_addr) && IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_dst, &satosin6(dst)->sin6_addr)) { GRE_LIST_UNLOCK(); return (EADDRNOTAVAIL); } #endif } GRE_LIST_UNLOCK(); switch (src->sa_family) { #ifdef INET case AF_INET: hdr = ip = malloc(sizeof(struct greip) + 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO); ip->ip_src = satosin(src)->sin_addr; ip->ip_dst = satosin(dst)->sin_addr; break; #endif #ifdef INET6 case AF_INET6: hdr = ip6 = malloc(sizeof(struct greip6) + 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO); ip6->ip6_src = satosin6(src)->sin6_addr; ip6->ip6_dst = satosin6(dst)->sin6_addr; break; #endif default: return (EAFNOSUPPORT); } if (sc->gre_family != 0) gre_detach(sc); GRE_WLOCK(sc); if (sc->gre_family != 0) free(sc->gre_hdr, M_GRE); sc->gre_family = src->sa_family; sc->gre_hdr = hdr; sc->gre_oseq = 0; sc->gre_iseq = UINT32_MAX; gre_updatehdr(sc); GRE_WUNLOCK(sc); error = 0; switch (src->sa_family) { #ifdef INET case AF_INET: error = in_gre_attach(sc); break; #endif #ifdef INET6 case AF_INET6: error = in6_gre_attach(sc); break; #endif } if (error == 0) { ifp->if_drv_flags |= IFF_DRV_RUNNING; if_link_state_change(ifp, LINK_STATE_UP); } return (error); } static void gre_delete_tunnel(struct ifnet *ifp) { struct gre_softc *sc = ifp->if_softc; int family; GRE_WLOCK(sc); family = sc->gre_family; sc->gre_family = 0; GRE_WUNLOCK(sc); if (family != 0) { gre_detach(sc); free(sc->gre_hdr, M_GRE); } ifp->if_drv_flags &= ~IFF_DRV_RUNNING; if_link_state_change(ifp, LINK_STATE_DOWN); } int gre_input(struct mbuf **mp, int *offp, int proto) { struct gre_softc *sc; struct grehdr *gh; struct ifnet *ifp; struct mbuf *m; uint32_t *opts; #ifdef notyet uint32_t key; #endif uint16_t flags; int hlen, isr, af; m = *mp; sc = encap_getarg(m); KASSERT(sc != NULL, ("encap_getarg returned NULL")); ifp = GRE2IFP(sc); hlen = *offp + sizeof(struct grehdr) + 4 * sizeof(uint32_t); if (m->m_pkthdr.len < hlen) goto drop; if (m->m_len < hlen) { m = m_pullup(m, hlen); if (m == NULL) goto drop; } gh = (struct grehdr *)mtodo(m, *offp); flags = ntohs(gh->gre_flags); if (flags & ~GRE_FLAGS_MASK) goto drop; opts = gh->gre_opts; hlen = 2 * sizeof(uint16_t); if (flags & GRE_FLAGS_CP) { /* reserved1 field must be zero */ if (((uint16_t *)opts)[1] != 0) goto drop; if (in_cksum_skip(m, m->m_pkthdr.len, *offp) != 0) goto drop; hlen += 2 * sizeof(uint16_t); opts++; } if (flags & GRE_FLAGS_KP) { #ifdef notyet /* * XXX: The current implementation uses the key only for outgoing * packets. But we can check the key value here, or even in the * encapcheck function. */ key = ntohl(*opts); #endif hlen += sizeof(uint32_t); opts++; } #ifdef notyet } else key = 0; if (sc->gre_key != 0 && (key != sc->gre_key || key != 0)) goto drop; #endif if (flags & GRE_FLAGS_SP) { #ifdef notyet seq = ntohl(*opts); #endif hlen += sizeof(uint32_t); } switch (ntohs(gh->gre_proto)) { case ETHERTYPE_WCCP: /* * For WCCP skip an additional 4 bytes if after GRE header * doesn't follow an IP header. */ if (flags == 0 && (*(uint8_t *)gh->gre_opts & 0xF0) != 0x40) hlen += sizeof(uint32_t); /* FALLTHROUGH */ case ETHERTYPE_IP: isr = NETISR_IP; af = AF_INET; break; case ETHERTYPE_IPV6: isr = NETISR_IPV6; af = AF_INET6; break; default: goto drop; } m_adj(m, *offp + hlen); m_clrprotoflags(m); m->m_pkthdr.rcvif = ifp; M_SETFIB(m, ifp->if_fib); #ifdef MAC mac_ifnet_create_mbuf(ifp, m); #endif BPF_MTAP2(ifp, &af, sizeof(af), m); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); if ((ifp->if_flags & IFF_MONITOR) != 0) m_freem(m); else netisr_dispatch(isr, m); return (IPPROTO_DONE); drop: if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); m_freem(m); return (IPPROTO_DONE); } #define MTAG_GRE 1307983903 static int gre_check_nesting(struct ifnet *ifp, struct mbuf *m) { struct m_tag *mtag; int count; count = 1; mtag = NULL; while ((mtag = m_tag_locate(m, MTAG_GRE, 0, mtag)) != NULL) { if (*(struct ifnet **)(mtag + 1) == ifp) { log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname); return (EIO); } count++; } if (count > V_max_gre_nesting) { log(LOG_NOTICE, "%s: if_output recursively called too many times(%d)\n", ifp->if_xname, count); return (EIO); } mtag = m_tag_alloc(MTAG_GRE, 0, sizeof(struct ifnet *), M_NOWAIT); if (mtag == NULL) return (ENOMEM); *(struct ifnet **)(mtag + 1) = ifp; m_tag_prepend(m, mtag); return (0); } static int gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { uint32_t af; int error; #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error != 0) goto drop; #endif if ((ifp->if_flags & IFF_MONITOR) != 0 || (ifp->if_flags & IFF_UP) == 0) { error = ENETDOWN; goto drop; } error = gre_check_nesting(ifp, m); if (error != 0) goto drop; m->m_flags &= ~(M_BCAST|M_MCAST); if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); else af = dst->sa_family; BPF_MTAP2(ifp, &af, sizeof(af), m); m->m_pkthdr.csum_data = af; /* save af for if_transmit */ return (ifp->if_transmit(ifp, m)); drop: m_freem(m); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (error); } static void gre_setseqn(struct grehdr *gh, uint32_t seq) { uint32_t *opts; uint16_t flags; opts = gh->gre_opts; flags = ntohs(gh->gre_flags); KASSERT((flags & GRE_FLAGS_SP) != 0, ("gre_setseqn called, but GRE_FLAGS_SP isn't set ")); if (flags & GRE_FLAGS_CP) opts++; if (flags & GRE_FLAGS_KP) opts++; *opts = htonl(seq); } static int gre_transmit(struct ifnet *ifp, struct mbuf *m) { GRE_RLOCK_TRACKER; struct gre_softc *sc; struct grehdr *gh; uint32_t iaf, oaf, oseq; int error, hlen, olen, plen; int want_seq, want_csum; plen = 0; sc = ifp->if_softc; if (sc == NULL) { error = ENETDOWN; m_freem(m); goto drop; } GRE_RLOCK(sc); if (sc->gre_family == 0) { GRE_RUNLOCK(sc); error = ENETDOWN; m_freem(m); goto drop; } iaf = m->m_pkthdr.csum_data; oaf = sc->gre_family; hlen = sc->gre_hlen; want_seq = (sc->gre_options & GRE_ENABLE_SEQ) != 0; if (want_seq) oseq = sc->gre_oseq++; /* XXX */ else oseq = 0; /* Make compiler happy. */ want_csum = (sc->gre_options & GRE_ENABLE_CSUM) != 0; M_SETFIB(m, sc->gre_fibnum); M_PREPEND(m, hlen, M_NOWAIT); if (m == NULL) { GRE_RUNLOCK(sc); error = ENOBUFS; goto drop; } bcopy(sc->gre_hdr, mtod(m, void *), hlen); GRE_RUNLOCK(sc); switch (oaf) { #ifdef INET case AF_INET: olen = sizeof(struct ip); break; #endif #ifdef INET6 case AF_INET6: olen = sizeof(struct ip6_hdr); break; #endif default: error = ENETDOWN; goto drop; } gh = (struct grehdr *)mtodo(m, olen); switch (iaf) { #ifdef INET case AF_INET: gh->gre_proto = htons(ETHERTYPE_IP); break; #endif #ifdef INET6 case AF_INET6: gh->gre_proto = htons(ETHERTYPE_IPV6); break; #endif default: error = ENETDOWN; goto drop; } if (want_seq) gre_setseqn(gh, oseq); if (want_csum) { *(uint16_t *)gh->gre_opts = in_cksum_skip(m, m->m_pkthdr.len, olen); } plen = m->m_pkthdr.len - hlen; switch (oaf) { #ifdef INET case AF_INET: error = in_gre_output(m, iaf, hlen); break; #endif #ifdef INET6 case AF_INET6: error = in6_gre_output(m, iaf, hlen); break; #endif default: m_freem(m); error = ENETDOWN; } drop: if (error) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); else { if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, plen); } return (error); } static void gre_qflush(struct ifnet *ifp __unused) { } static int gremodevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: case MOD_UNLOAD: break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t gre_mod = { "if_gre", gremodevent, 0 }; DECLARE_MODULE(if_gre, gre_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_gre, 1); Index: stable/11/sys/net/if_ipsec.c =================================================================== --- stable/11/sys/net/if_ipsec.c (revision 332287) +++ stable/11/sys/net/if_ipsec.c (revision 332288) @@ -1,1002 +1,1002 @@ /*- * Copyright (c) 2016 Yandex LLC * Copyright (c) 2016 Andrey V. Elsukov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET6 #include #endif #include #include #include static MALLOC_DEFINE(M_IPSEC, "ipsec", "IPsec Virtual Tunnel Interface"); static const char ipsecname[] = "ipsec"; #if defined(INET) && defined(INET6) #define IPSEC_SPCOUNT 4 #else #define IPSEC_SPCOUNT 2 #endif struct ipsec_softc { struct ifnet *ifp; struct rmlock lock; struct secpolicy *sp[IPSEC_SPCOUNT]; uint32_t reqid; u_int family; u_int fibnum; LIST_ENTRY(ipsec_softc) chain; LIST_ENTRY(ipsec_softc) hash; }; #define IPSEC_LOCK_INIT(sc) rm_init(&(sc)->lock, "if_ipsec softc") #define IPSEC_LOCK_DESTROY(sc) rm_destroy(&(sc)->lock) #define IPSEC_RLOCK_TRACKER struct rm_priotracker ipsec_tracker #define IPSEC_RLOCK(sc) rm_rlock(&(sc)->lock, &ipsec_tracker) #define IPSEC_RUNLOCK(sc) rm_runlock(&(sc)->lock, &ipsec_tracker) #define IPSEC_RLOCK_ASSERT(sc) rm_assert(&(sc)->lock, RA_RLOCKED) #define IPSEC_WLOCK(sc) rm_wlock(&(sc)->lock) #define IPSEC_WUNLOCK(sc) rm_wunlock(&(sc)->lock) #define IPSEC_WLOCK_ASSERT(sc) rm_assert(&(sc)->lock, RA_WLOCKED) static struct rmlock ipsec_sc_lock; RM_SYSINIT(ipsec_sc_lock, &ipsec_sc_lock, "if_ipsec softc list"); #define IPSEC_SC_RLOCK_TRACKER struct rm_priotracker ipsec_sc_tracker #define IPSEC_SC_RLOCK() rm_rlock(&ipsec_sc_lock, &ipsec_sc_tracker) #define IPSEC_SC_RUNLOCK() rm_runlock(&ipsec_sc_lock, &ipsec_sc_tracker) #define IPSEC_SC_RLOCK_ASSERT() rm_assert(&ipsec_sc_lock, RA_RLOCKED) #define IPSEC_SC_WLOCK() rm_wlock(&ipsec_sc_lock) #define IPSEC_SC_WUNLOCK() rm_wunlock(&ipsec_sc_lock) #define IPSEC_SC_WLOCK_ASSERT() rm_assert(&ipsec_sc_lock, RA_WLOCKED) LIST_HEAD(ipsec_iflist, ipsec_softc); static VNET_DEFINE(struct ipsec_iflist, ipsec_sc_list); static VNET_DEFINE(struct ipsec_iflist *, ipsec_sc_htbl); static VNET_DEFINE(u_long, ipsec_sc_hmask); #define V_ipsec_sc_list VNET(ipsec_sc_list) #define V_ipsec_sc_htbl VNET(ipsec_sc_htbl) #define V_ipsec_sc_hmask VNET(ipsec_sc_hmask) static uint32_t ipsec_hash(uint32_t id) { return (fnv_32_buf(&id, sizeof(id), FNV1_32_INIT)); } #define SCHASH_NHASH_LOG2 5 #define SCHASH_NHASH (1 << SCHASH_NHASH_LOG2) #define SCHASH_HASHVAL(id) (ipsec_hash((id)) & V_ipsec_sc_hmask) #define SCHASH_HASH(id) &V_ipsec_sc_htbl[SCHASH_HASHVAL(id)] /* * ipsec_ioctl_sx protects from concurrent ioctls. */ static struct sx ipsec_ioctl_sx; SX_SYSINIT(ipsec_ioctl_sx, &ipsec_ioctl_sx, "ipsec_ioctl"); static int ipsec_init_reqid(struct ipsec_softc *); static int ipsec_set_tunnel(struct ipsec_softc *, struct sockaddr *, struct sockaddr *, uint32_t); static void ipsec_delete_tunnel(struct ifnet *, int); static int ipsec_set_addresses(struct ifnet *, struct sockaddr *, struct sockaddr *); static int ipsec_set_reqid(struct ifnet *, uint32_t); static int ipsec_ioctl(struct ifnet *, u_long, caddr_t); static int ipsec_transmit(struct ifnet *, struct mbuf *); static int ipsec_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); static void ipsec_qflush(struct ifnet *); static int ipsec_clone_create(struct if_clone *, int, caddr_t); static void ipsec_clone_destroy(struct ifnet *); static VNET_DEFINE(struct if_clone *, ipsec_cloner); #define V_ipsec_cloner VNET(ipsec_cloner) static int ipsec_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct ipsec_softc *sc; struct ifnet *ifp; sc = malloc(sizeof(*sc), M_IPSEC, M_WAITOK | M_ZERO); sc->fibnum = curthread->td_proc->p_fibnum; sc->ifp = ifp = if_alloc(IFT_TUNNEL); IPSEC_LOCK_INIT(sc); ifp->if_softc = sc; if_initname(ifp, ipsecname, unit); ifp->if_addrlen = 0; ifp->if_mtu = IPSEC_MTU; ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; ifp->if_ioctl = ipsec_ioctl; ifp->if_transmit = ipsec_transmit; ifp->if_qflush = ipsec_qflush; ifp->if_output = ipsec_output; if_attach(ifp); bpfattach(ifp, DLT_NULL, sizeof(uint32_t)); IPSEC_SC_WLOCK(); LIST_INSERT_HEAD(&V_ipsec_sc_list, sc, chain); IPSEC_SC_WUNLOCK(); return (0); } static void ipsec_clone_destroy(struct ifnet *ifp) { struct ipsec_softc *sc; sx_xlock(&ipsec_ioctl_sx); sc = ifp->if_softc; IPSEC_SC_WLOCK(); ipsec_delete_tunnel(ifp, 1); LIST_REMOVE(sc, chain); IPSEC_SC_WUNLOCK(); bpfdetach(ifp); if_detach(ifp); ifp->if_softc = NULL; sx_xunlock(&ipsec_ioctl_sx); if_free(ifp); IPSEC_LOCK_DESTROY(sc); free(sc, M_IPSEC); } static void vnet_ipsec_init(const void *unused __unused) { LIST_INIT(&V_ipsec_sc_list); V_ipsec_sc_htbl = hashinit(SCHASH_NHASH, M_IPSEC, &V_ipsec_sc_hmask); V_ipsec_cloner = if_clone_simple(ipsecname, ipsec_clone_create, ipsec_clone_destroy, 0); } VNET_SYSINIT(vnet_ipsec_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_ipsec_init, NULL); static void vnet_ipsec_uninit(const void *unused __unused) { if_clone_detach(V_ipsec_cloner); hashdestroy(V_ipsec_sc_htbl, M_IPSEC, V_ipsec_sc_hmask); } VNET_SYSUNINIT(vnet_ipsec_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_ipsec_uninit, NULL); static struct secpolicy * ipsec_getpolicy(struct ipsec_softc *sc, int dir, sa_family_t af) { switch (af) { #ifdef INET case AF_INET: return (sc->sp[(dir == IPSEC_DIR_INBOUND ? 0: 1)]); #endif #ifdef INET6 case AF_INET6: return (sc->sp[(dir == IPSEC_DIR_INBOUND ? 0: 1) #ifdef INET + 2 #endif ]); #endif } return (NULL); } static struct secasindex * ipsec_getsaidx(struct ipsec_softc *sc, int dir, sa_family_t af) { struct secpolicy *sp; sp = ipsec_getpolicy(sc, dir, af); if (sp == NULL) return (NULL); return (&sp->req[0]->saidx); } static int ipsec_transmit(struct ifnet *ifp, struct mbuf *m) { IPSEC_RLOCK_TRACKER; struct ipsec_softc *sc; struct secpolicy *sp; struct ip *ip; uint32_t af; int error; #ifdef MAC error = mac_ifnet_check_transmit(ifp, m); if (error) { m_freem(m); goto err; } #endif error = ENETDOWN; sc = ifp->if_softc; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (ifp->if_flags & IFF_MONITOR) != 0 || (ifp->if_flags & IFF_UP) == 0) { m_freem(m); goto err; } /* Determine address family to correctly handle packet in BPF */ ip = mtod(m, struct ip *); switch (ip->ip_v) { #ifdef INET case IPVERSION: af = AF_INET; break; #endif #ifdef INET6 case (IPV6_VERSION >> 4): af = AF_INET6; break; #endif default: error = EAFNOSUPPORT; m_freem(m); goto err; } /* * Loop prevention. * XXX: for now just check presence of IPSEC_OUT_DONE mbuf tag. * We can read full chain and compare destination address, * proto and mode from xform_history with values from softc. */ if (m_tag_find(m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) { m_freem(m); goto err; } IPSEC_RLOCK(sc); if (sc->family == 0) { IPSEC_RUNLOCK(sc); m_freem(m); goto err; } sp = ipsec_getpolicy(sc, IPSEC_DIR_OUTBOUND, af); key_addref(sp); M_SETFIB(m, sc->fibnum); IPSEC_RUNLOCK(sc); BPF_MTAP2(ifp, &af, sizeof(af), m); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); switch (af) { #ifdef INET case AF_INET: error = ipsec4_process_packet(m, sp, NULL); break; #endif #ifdef INET6 case AF_INET6: error = ipsec6_process_packet(m, sp, NULL); break; #endif default: panic("%s: unknown address family\n", __func__); } err: if (error != 0) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (error); } static void ipsec_qflush(struct ifnet *ifp __unused) { } static int ipsec_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { return (ifp->if_transmit(ifp, m)); } int ipsec_if_input(struct mbuf *m, struct secasvar *sav, uint32_t af) { IPSEC_SC_RLOCK_TRACKER; struct secasindex *saidx; struct ipsec_softc *sc; struct ifnet *ifp; if (sav->state != SADB_SASTATE_MATURE && sav->state != SADB_SASTATE_DYING) { m_freem(m); return (ENETDOWN); } if (sav->sah->saidx.mode != IPSEC_MODE_TUNNEL || sav->sah->saidx.proto != IPPROTO_ESP) return (0); IPSEC_SC_RLOCK(); /* * We only acquire SC_RLOCK() while we are doing search in * ipsec_sc_htbl. It is safe, because removing softc or changing * of reqid/addresses requires removing from hash table. */ LIST_FOREACH(sc, SCHASH_HASH(sav->sah->saidx.reqid), hash) { saidx = ipsec_getsaidx(sc, IPSEC_DIR_INBOUND, sav->sah->saidx.src.sa.sa_family); /* SA's reqid should match reqid in SP */ if (saidx == NULL || sav->sah->saidx.reqid != saidx->reqid) continue; /* SAH's addresses should match tunnel endpoints. */ if (key_sockaddrcmp(&sav->sah->saidx.dst.sa, &saidx->dst.sa, 0) != 0) continue; if (key_sockaddrcmp(&sav->sah->saidx.src.sa, &saidx->src.sa, 0) == 0) break; } if (sc == NULL) { IPSEC_SC_RUNLOCK(); /* Tunnel was not found. Nothing to do. */ return (0); } ifp = sc->ifp; if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || (ifp->if_flags & IFF_UP) == 0) { IPSEC_SC_RUNLOCK(); m_freem(m); return (ENETDOWN); } /* * We found matching and working tunnel. * Set its ifnet as receiving interface. */ m->m_pkthdr.rcvif = ifp; IPSEC_SC_RUNLOCK(); /* m_clrprotoflags(m); */ M_SETFIB(m, ifp->if_fib); BPF_MTAP2(ifp, &af, sizeof(af), m); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); if ((ifp->if_flags & IFF_MONITOR) != 0) { m_freem(m); return (ENETDOWN); } return (0); } /* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */ int ipsec_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { IPSEC_RLOCK_TRACKER; struct ifreq *ifr = (struct ifreq*)data; struct sockaddr *dst, *src; struct ipsec_softc *sc; struct secasindex *saidx; #ifdef INET struct sockaddr_in *sin = NULL; #endif #ifdef INET6 struct sockaddr_in6 *sin6 = NULL; #endif uint32_t reqid; int error; switch (cmd) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; case SIOCADDMULTI: case SIOCDELMULTI: case SIOCGIFMTU: case SIOCSIFFLAGS: return (0); case SIOCSIFMTU: if (ifr->ifr_mtu < IPSEC_MTU_MIN || ifr->ifr_mtu > IPSEC_MTU_MAX) return (EINVAL); else ifp->if_mtu = ifr->ifr_mtu; return (0); } sx_xlock(&ipsec_ioctl_sx); sc = ifp->if_softc; /* Check that softc is still here */ if (sc == NULL) { error = ENXIO; goto bad; } error = 0; switch (cmd) { case SIOCSIFPHYADDR: #ifdef INET6 case SIOCSIFPHYADDR_IN6: #endif error = EINVAL; switch (cmd) { #ifdef INET case SIOCSIFPHYADDR: src = (struct sockaddr *) &(((struct in_aliasreq *)data)->ifra_addr); dst = (struct sockaddr *) &(((struct in_aliasreq *)data)->ifra_dstaddr); break; #endif #ifdef INET6 case SIOCSIFPHYADDR_IN6: src = (struct sockaddr *) &(((struct in6_aliasreq *)data)->ifra_addr); dst = (struct sockaddr *) &(((struct in6_aliasreq *)data)->ifra_dstaddr); break; #endif default: goto bad; } /* sa_family must be equal */ if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len) goto bad; /* validate sa_len */ switch (src->sa_family) { #ifdef INET case AF_INET: if (src->sa_len != sizeof(struct sockaddr_in)) goto bad; break; #endif #ifdef INET6 case AF_INET6: if (src->sa_len != sizeof(struct sockaddr_in6)) goto bad; break; #endif default: error = EAFNOSUPPORT; goto bad; } /* check sa_family looks sane for the cmd */ error = EAFNOSUPPORT; switch (cmd) { #ifdef INET case SIOCSIFPHYADDR: if (src->sa_family == AF_INET) break; goto bad; #endif #ifdef INET6 case SIOCSIFPHYADDR_IN6: if (src->sa_family == AF_INET6) break; goto bad; #endif } error = EADDRNOTAVAIL; switch (src->sa_family) { #ifdef INET case AF_INET: if (satosin(src)->sin_addr.s_addr == INADDR_ANY || satosin(dst)->sin_addr.s_addr == INADDR_ANY) goto bad; break; #endif #ifdef INET6 case AF_INET6: if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr) || IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr)) goto bad; /* * Check validity of the scope zone ID of the * addresses, and convert it into the kernel * internal form if necessary. */ error = sa6_embedscope(satosin6(src), 0); if (error != 0) goto bad; error = sa6_embedscope(satosin6(dst), 0); if (error != 0) goto bad; #endif }; error = ipsec_set_addresses(ifp, src, dst); break; case SIOCDIFPHYADDR: ipsec_delete_tunnel(ifp, 0); break; case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: #ifdef INET6 case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: #endif IPSEC_RLOCK(sc); if (sc->family == 0) { IPSEC_RUNLOCK(sc); error = EADDRNOTAVAIL; break; } saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, sc->family); switch (cmd) { #ifdef INET case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: if (saidx->src.sa.sa_family != AF_INET) { error = EADDRNOTAVAIL; break; } sin = (struct sockaddr_in *)&ifr->ifr_addr; memset(sin, 0, sizeof(*sin)); sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); break; #endif #ifdef INET6 case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: if (saidx->src.sa.sa_family != AF_INET6) { error = EADDRNOTAVAIL; break; } sin6 = (struct sockaddr_in6 *) &(((struct in6_ifreq *)data)->ifr_addr); memset(sin6, 0, sizeof(*sin6)); sin6->sin6_family = AF_INET6; sin6->sin6_len = sizeof(*sin6); break; #endif default: error = EAFNOSUPPORT; } if (error == 0) { switch (cmd) { #ifdef INET case SIOCGIFPSRCADDR: sin->sin_addr = saidx->src.sin.sin_addr; break; case SIOCGIFPDSTADDR: sin->sin_addr = saidx->dst.sin.sin_addr; break; #endif #ifdef INET6 case SIOCGIFPSRCADDR_IN6: sin6->sin6_addr = saidx->src.sin6.sin6_addr; break; case SIOCGIFPDSTADDR_IN6: sin6->sin6_addr = saidx->dst.sin6.sin6_addr; break; #endif } } IPSEC_RUNLOCK(sc); if (error != 0) break; switch (cmd) { #ifdef INET case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: error = prison_if(curthread->td_ucred, (struct sockaddr *)sin); if (error != 0) memset(sin, 0, sizeof(*sin)); break; #endif #ifdef INET6 case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: error = prison_if(curthread->td_ucred, (struct sockaddr *)sin6); if (error == 0) error = sa6_recoverscope(sin6); if (error != 0) memset(sin6, 0, sizeof(*sin6)); #endif } break; case SIOCGTUNFIB: ifr->ifr_fib = sc->fibnum; break; case SIOCSTUNFIB: if ((error = priv_check(curthread, PRIV_NET_SETIFFIB)) != 0) break; if (ifr->ifr_fib >= rt_numfibs) error = EINVAL; else sc->fibnum = ifr->ifr_fib; break; case IPSECGREQID: reqid = sc->reqid; - error = copyout(&reqid, ifr->ifr_data, sizeof(reqid)); + error = copyout(&reqid, ifr_data_get_ptr(ifr), sizeof(reqid)); break; case IPSECSREQID: if ((error = priv_check(curthread, PRIV_NET_SETIFCAP)) != 0) break; - error = copyin(ifr->ifr_data, &reqid, sizeof(reqid)); + error = copyin(ifr_data_get_ptr(ifr), &reqid, sizeof(reqid)); if (error != 0) break; error = ipsec_set_reqid(ifp, reqid); break; default: error = EINVAL; break; } bad: sx_xunlock(&ipsec_ioctl_sx); return (error); } /* * Allocate new private security policies for tunneling interface. * Each tunneling interface has following security policies for * both AF: * 0.0.0.0/0[any] 0.0.0.0/0[any] -P in \ * ipsec esp/tunnel/RemoteIP-LocalIP/unique:reqid * 0.0.0.0/0[any] 0.0.0.0/0[any] -P out \ * ipsec esp/tunnel/LocalIP-RemoteIP/unique:reqid */ static int ipsec_newpolicies(struct ipsec_softc *sc, struct secpolicy *sp[IPSEC_SPCOUNT], const struct sockaddr *src, const struct sockaddr *dst, uint32_t reqid) { struct ipsecrequest *isr; int i; memset(sp, 0, sizeof(struct secpolicy *) * IPSEC_SPCOUNT); for (i = 0; i < IPSEC_SPCOUNT; i++) { if ((sp[i] = key_newsp()) == NULL) goto fail; if ((isr = ipsec_newisr()) == NULL) goto fail; sp[i]->policy = IPSEC_POLICY_IPSEC; sp[i]->state = IPSEC_SPSTATE_DEAD; sp[i]->req[sp[i]->tcount++] = isr; sp[i]->created = time_second; /* Use priority field to store if_index */ sp[i]->priority = sc->ifp->if_index; isr->level = IPSEC_LEVEL_UNIQUE; isr->saidx.proto = IPPROTO_ESP; isr->saidx.mode = IPSEC_MODE_TUNNEL; isr->saidx.reqid = reqid; if (i % 2 == 0) { sp[i]->spidx.dir = IPSEC_DIR_INBOUND; bcopy(src, &isr->saidx.dst, src->sa_len); bcopy(dst, &isr->saidx.src, dst->sa_len); } else { sp[i]->spidx.dir = IPSEC_DIR_OUTBOUND; bcopy(src, &isr->saidx.src, src->sa_len); bcopy(dst, &isr->saidx.dst, dst->sa_len); } sp[i]->spidx.ul_proto = IPSEC_ULPROTO_ANY; #ifdef INET if (i < 2) { sp[i]->spidx.src.sa.sa_family = sp[i]->spidx.dst.sa.sa_family = AF_INET; sp[i]->spidx.src.sa.sa_len = sp[i]->spidx.dst.sa.sa_len = sizeof(struct sockaddr_in); continue; } #endif #ifdef INET6 sp[i]->spidx.src.sa.sa_family = sp[i]->spidx.dst.sa.sa_family = AF_INET6; sp[i]->spidx.src.sa.sa_len = sp[i]->spidx.dst.sa.sa_len = sizeof(struct sockaddr_in6); #endif } return (0); fail: for (i = 0; i < IPSEC_SPCOUNT; i++) key_freesp(&sp[i]); return (ENOMEM); } static int ipsec_check_reqid(uint32_t reqid) { struct ipsec_softc *sc; IPSEC_SC_RLOCK_ASSERT(); LIST_FOREACH(sc, &V_ipsec_sc_list, chain) { if (sc->reqid == reqid) return (EEXIST); } return (0); } /* * We use key_newreqid() to automatically obtain unique reqid. * Then we check that given id is unique, i.e. it is not used by * another if_ipsec(4) interface. This macro limits the number of * tries to get unique id. */ #define IPSEC_REQID_TRYCNT 64 static int ipsec_init_reqid(struct ipsec_softc *sc) { uint32_t reqid; int trycount; IPSEC_SC_RLOCK_ASSERT(); if (sc->reqid != 0) /* already initialized */ return (0); trycount = IPSEC_REQID_TRYCNT; while (--trycount > 0) { reqid = key_newreqid(); if (ipsec_check_reqid(reqid) == 0) break; } if (trycount == 0) return (EEXIST); sc->reqid = reqid; return (0); } /* * Set or update reqid for given tunneling interface. * When specified reqid is zero, generate new one. * We are protected by ioctl_sx lock from concurrent id generation. * Also softc would not disappear while we hold ioctl_sx lock. */ static int ipsec_set_reqid(struct ifnet *ifp, uint32_t reqid) { IPSEC_SC_RLOCK_TRACKER; struct ipsec_softc *sc; struct secasindex *saidx; sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); sc = ifp->if_softc; if (sc->reqid == reqid && reqid != 0) return (0); IPSEC_SC_RLOCK(); if (reqid != 0) { /* Check that specified reqid doesn't exist */ if (ipsec_check_reqid(reqid) != 0) { IPSEC_SC_RUNLOCK(); return (EEXIST); } sc->reqid = reqid; } else { /* Generate new reqid */ if (ipsec_init_reqid(sc) != 0) { IPSEC_SC_RUNLOCK(); return (EEXIST); } } IPSEC_SC_RUNLOCK(); /* Tunnel isn't fully configured, just return. */ if (sc->family == 0) return (0); saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, sc->family); KASSERT(saidx != NULL, ("saidx is NULL, but family is %d", sc->family)); return (ipsec_set_tunnel(sc, &saidx->src.sa, &saidx->dst.sa, sc->reqid)); } /* * Set tunnel endpoints addresses. */ static int ipsec_set_addresses(struct ifnet *ifp, struct sockaddr *src, struct sockaddr *dst) { IPSEC_SC_RLOCK_TRACKER; struct ipsec_softc *sc, *tsc; struct secasindex *saidx; sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); sc = ifp->if_softc; if (sc->family != 0) { saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, src->sa_family); if (saidx != NULL && saidx->reqid == sc->reqid && key_sockaddrcmp(&saidx->src.sa, src, 0) == 0 && key_sockaddrcmp(&saidx->dst.sa, dst, 0) == 0) return (0); /* Nothing has been changed. */ } /* * We cannot service IPsec tunnel when source address is * not our own. */ #ifdef INET if (src->sa_family == AF_INET && in_localip(satosin(src)->sin_addr) == 0) return (EADDRNOTAVAIL); #endif #ifdef INET6 /* * NOTE: IPv6 addresses are in kernel internal form with * embedded scope zone id. */ if (src->sa_family == AF_INET6 && in6_localip(&satosin6(src)->sin6_addr) == 0) return (EADDRNOTAVAIL); #endif /* Check that given addresses aren't already configured */ IPSEC_SC_RLOCK(); LIST_FOREACH(tsc, &V_ipsec_sc_list, chain) { if (tsc == sc || tsc->family != src->sa_family) continue; saidx = ipsec_getsaidx(tsc, IPSEC_DIR_OUTBOUND, tsc->family); if (key_sockaddrcmp(&saidx->src.sa, src, 0) == 0 && key_sockaddrcmp(&saidx->dst.sa, dst, 0) == 0) { /* We already have tunnel with such addresses */ IPSEC_SC_RUNLOCK(); return (EADDRNOTAVAIL); } } /* If reqid is not set, generate new one. */ if (ipsec_init_reqid(sc) != 0) { IPSEC_SC_RUNLOCK(); return (EEXIST); } IPSEC_SC_RUNLOCK(); return (ipsec_set_tunnel(sc, src, dst, sc->reqid)); } static int ipsec_set_tunnel(struct ipsec_softc *sc, struct sockaddr *src, struct sockaddr *dst, uint32_t reqid) { struct secpolicy *sp[IPSEC_SPCOUNT]; struct secpolicy *oldsp[IPSEC_SPCOUNT]; int i, f; sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); /* Allocate SP with new addresses. */ if (ipsec_newpolicies(sc, sp, src, dst, reqid) == 0) { /* Add new policies to SPDB */ if (key_register_ifnet(sp, IPSEC_SPCOUNT) != 0) { for (i = 0; i < IPSEC_SPCOUNT; i++) key_freesp(&sp[i]); return (EAGAIN); } IPSEC_SC_WLOCK(); if ((f = sc->family) != 0) LIST_REMOVE(sc, hash); IPSEC_WLOCK(sc); for (i = 0; i < IPSEC_SPCOUNT; i++) { oldsp[i] = sc->sp[i]; sc->sp[i] = sp[i]; } sc->family = src->sa_family; IPSEC_WUNLOCK(sc); LIST_INSERT_HEAD(SCHASH_HASH(sc->reqid), sc, hash); IPSEC_SC_WUNLOCK(); } else { sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; return (ENOMEM); } sc->ifp->if_drv_flags |= IFF_DRV_RUNNING; if (f != 0) { key_unregister_ifnet(oldsp, IPSEC_SPCOUNT); for (i = 0; i < IPSEC_SPCOUNT; i++) key_freesp(&oldsp[i]); } return (0); } static void ipsec_delete_tunnel(struct ifnet *ifp, int locked) { struct ipsec_softc *sc = ifp->if_softc; struct secpolicy *oldsp[IPSEC_SPCOUNT]; int i; sx_assert(&ipsec_ioctl_sx, SA_XLOCKED); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; if (sc->family != 0) { if (!locked) IPSEC_SC_WLOCK(); /* Remove from hash table */ LIST_REMOVE(sc, hash); IPSEC_WLOCK(sc); for (i = 0; i < IPSEC_SPCOUNT; i++) { oldsp[i] = sc->sp[i]; sc->sp[i] = NULL; } sc->family = 0; IPSEC_WUNLOCK(sc); if (!locked) IPSEC_SC_WUNLOCK(); key_unregister_ifnet(oldsp, IPSEC_SPCOUNT); for (i = 0; i < IPSEC_SPCOUNT; i++) key_freesp(&oldsp[i]); } } Index: stable/11/sys/net/if_spppsubr.c =================================================================== --- stable/11/sys/net/if_spppsubr.c (revision 332287) +++ stable/11/sys/net/if_spppsubr.c (revision 332288) @@ -1,5423 +1,5423 @@ /* * Synchronous PPP/Cisco/Frame Relay link level subroutines. * Keepalive protocol implemented in both Cisco and PPP modes. */ /*- * Copyright (C) 1994-2000 Cronyx Engineering. * Author: Serge Vakulenko, * * Heavily revamped to conform to RFC 1661. * Copyright (C) 1997, 2001 Joerg Wunsch. * * This software is distributed with NO WARRANTIES, not even the implied * warranties for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * * Authors grant any other persons or organisations permission to use * or modify this software as long as this message is kept with the software, * all derivative works or modified versions. * * From: Version 2.4, Thu Apr 30 17:17:21 MSD 1997 * * $FreeBSD$ */ #include #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif #ifdef INET6 #include #endif #include #include #define IOCTL_CMD_T u_long #define MAXALIVECNT 3 /* max. alive packets */ /* * Interface flags that can be set in an ifconfig command. * * Setting link0 will make the link passive, i.e. it will be marked * as being administrative openable, but won't be opened to begin * with. Incoming calls will be answered, or subsequent calls with * -link1 will cause the administrative open of the LCP layer. * * Setting link1 will cause the link to auto-dial only as packets * arrive to be sent. * * Setting IFF_DEBUG will syslog the option negotiation and state * transitions at level kern.debug. Note: all logs consistently look * like * * : * * with being something like "bppp0", and * being one of "lcp", "ipcp", "cisco", "chap", "pap", etc. */ #define IFF_PASSIVE IFF_LINK0 /* wait passively for connection */ #define IFF_AUTO IFF_LINK1 /* auto-dial on output */ #define IFF_CISCO IFF_LINK2 /* auto-dial on output */ #define PPP_ALLSTATIONS 0xff /* All-Stations broadcast address */ #define PPP_UI 0x03 /* Unnumbered Information */ #define PPP_IP 0x0021 /* Internet Protocol */ #define PPP_ISO 0x0023 /* ISO OSI Protocol */ #define PPP_XNS 0x0025 /* Xerox NS Protocol */ #define PPP_IPX 0x002b /* Novell IPX Protocol */ #define PPP_VJ_COMP 0x002d /* VJ compressed TCP/IP */ #define PPP_VJ_UCOMP 0x002f /* VJ uncompressed TCP/IP */ #define PPP_IPV6 0x0057 /* Internet Protocol Version 6 */ #define PPP_LCP 0xc021 /* Link Control Protocol */ #define PPP_PAP 0xc023 /* Password Authentication Protocol */ #define PPP_CHAP 0xc223 /* Challenge-Handshake Auth Protocol */ #define PPP_IPCP 0x8021 /* Internet Protocol Control Protocol */ #define PPP_IPV6CP 0x8057 /* IPv6 Control Protocol */ #define CONF_REQ 1 /* PPP configure request */ #define CONF_ACK 2 /* PPP configure acknowledge */ #define CONF_NAK 3 /* PPP configure negative ack */ #define CONF_REJ 4 /* PPP configure reject */ #define TERM_REQ 5 /* PPP terminate request */ #define TERM_ACK 6 /* PPP terminate acknowledge */ #define CODE_REJ 7 /* PPP code reject */ #define PROTO_REJ 8 /* PPP protocol reject */ #define ECHO_REQ 9 /* PPP echo request */ #define ECHO_REPLY 10 /* PPP echo reply */ #define DISC_REQ 11 /* PPP discard request */ #define LCP_OPT_MRU 1 /* maximum receive unit */ #define LCP_OPT_ASYNC_MAP 2 /* async control character map */ #define LCP_OPT_AUTH_PROTO 3 /* authentication protocol */ #define LCP_OPT_QUAL_PROTO 4 /* quality protocol */ #define LCP_OPT_MAGIC 5 /* magic number */ #define LCP_OPT_RESERVED 6 /* reserved */ #define LCP_OPT_PROTO_COMP 7 /* protocol field compression */ #define LCP_OPT_ADDR_COMP 8 /* address/control field compression */ #define IPCP_OPT_ADDRESSES 1 /* both IP addresses; deprecated */ #define IPCP_OPT_COMPRESSION 2 /* IP compression protocol (VJ) */ #define IPCP_OPT_ADDRESS 3 /* local IP address */ #define IPV6CP_OPT_IFID 1 /* interface identifier */ #define IPV6CP_OPT_COMPRESSION 2 /* IPv6 compression protocol */ #define IPCP_COMP_VJ 0x2d /* Code for VJ compression */ #define PAP_REQ 1 /* PAP name/password request */ #define PAP_ACK 2 /* PAP acknowledge */ #define PAP_NAK 3 /* PAP fail */ #define CHAP_CHALLENGE 1 /* CHAP challenge request */ #define CHAP_RESPONSE 2 /* CHAP challenge response */ #define CHAP_SUCCESS 3 /* CHAP response ok */ #define CHAP_FAILURE 4 /* CHAP response failed */ #define CHAP_MD5 5 /* hash algorithm - MD5 */ #define CISCO_MULTICAST 0x8f /* Cisco multicast address */ #define CISCO_UNICAST 0x0f /* Cisco unicast address */ #define CISCO_KEEPALIVE 0x8035 /* Cisco keepalive protocol */ #define CISCO_ADDR_REQ 0 /* Cisco address request */ #define CISCO_ADDR_REPLY 1 /* Cisco address reply */ #define CISCO_KEEPALIVE_REQ 2 /* Cisco keepalive request */ /* states are named and numbered according to RFC 1661 */ #define STATE_INITIAL 0 #define STATE_STARTING 1 #define STATE_CLOSED 2 #define STATE_STOPPED 3 #define STATE_CLOSING 4 #define STATE_STOPPING 5 #define STATE_REQ_SENT 6 #define STATE_ACK_RCVD 7 #define STATE_ACK_SENT 8 #define STATE_OPENED 9 static MALLOC_DEFINE(M_SPPP, "sppp", "synchronous PPP interface internals"); struct ppp_header { u_char address; u_char control; u_short protocol; } __packed; #define PPP_HEADER_LEN sizeof (struct ppp_header) struct lcp_header { u_char type; u_char ident; u_short len; } __packed; #define LCP_HEADER_LEN sizeof (struct lcp_header) struct cisco_packet { u_long type; u_long par1; u_long par2; u_short rel; u_short time0; u_short time1; } __packed; #define CISCO_PACKET_LEN sizeof (struct cisco_packet) /* * We follow the spelling and capitalization of RFC 1661 here, to make * it easier comparing with the standard. Please refer to this RFC in * case you can't make sense out of these abbreviation; it will also * explain the semantics related to the various events and actions. */ struct cp { u_short proto; /* PPP control protocol number */ u_char protoidx; /* index into state table in struct sppp */ u_char flags; #define CP_LCP 0x01 /* this is the LCP */ #define CP_AUTH 0x02 /* this is an authentication protocol */ #define CP_NCP 0x04 /* this is a NCP */ #define CP_QUAL 0x08 /* this is a quality reporting protocol */ const char *name; /* name of this control protocol */ /* event handlers */ void (*Up)(struct sppp *sp); void (*Down)(struct sppp *sp); void (*Open)(struct sppp *sp); void (*Close)(struct sppp *sp); void (*TO)(void *sp); int (*RCR)(struct sppp *sp, struct lcp_header *h, int len); void (*RCN_rej)(struct sppp *sp, struct lcp_header *h, int len); void (*RCN_nak)(struct sppp *sp, struct lcp_header *h, int len); /* actions */ void (*tlu)(struct sppp *sp); void (*tld)(struct sppp *sp); void (*tls)(struct sppp *sp); void (*tlf)(struct sppp *sp); void (*scr)(struct sppp *sp); }; #define SPP_FMT "%s: " #define SPP_ARGS(ifp) (ifp)->if_xname #define SPPP_LOCK(sp) mtx_lock (&(sp)->mtx) #define SPPP_UNLOCK(sp) mtx_unlock (&(sp)->mtx) #define SPPP_LOCK_ASSERT(sp) mtx_assert (&(sp)->mtx, MA_OWNED) #define SPPP_LOCK_OWNED(sp) mtx_owned (&(sp)->mtx) #ifdef INET /* * The following disgusting hack gets around the problem that IP TOS * can't be set yet. We want to put "interactive" traffic on a high * priority queue. To decide if traffic is interactive, we check that * a) it is TCP and b) one of its ports is telnet, rlogin or ftp control. * * XXX is this really still necessary? - joerg - */ static const u_short interactive_ports[8] = { 0, 513, 0, 0, 0, 21, 0, 23, }; #define INTERACTIVE(p) (interactive_ports[(p) & 7] == (p)) #endif /* almost every function needs these */ #define STDDCL \ struct ifnet *ifp = SP2IFP(sp); \ int debug = ifp->if_flags & IFF_DEBUG static int sppp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro); static void sppp_cisco_send(struct sppp *sp, int type, long par1, long par2); static void sppp_cisco_input(struct sppp *sp, struct mbuf *m); static void sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m); static void sppp_cp_send(struct sppp *sp, u_short proto, u_char type, u_char ident, u_short len, void *data); /* static void sppp_cp_timeout(void *arg); */ static void sppp_cp_change_state(const struct cp *cp, struct sppp *sp, int newstate); static void sppp_auth_send(const struct cp *cp, struct sppp *sp, unsigned int type, unsigned int id, ...); static void sppp_up_event(const struct cp *cp, struct sppp *sp); static void sppp_down_event(const struct cp *cp, struct sppp *sp); static void sppp_open_event(const struct cp *cp, struct sppp *sp); static void sppp_close_event(const struct cp *cp, struct sppp *sp); static void sppp_to_event(const struct cp *cp, struct sppp *sp); static void sppp_null(struct sppp *sp); static void sppp_pp_up(struct sppp *sp); static void sppp_pp_down(struct sppp *sp); static void sppp_lcp_init(struct sppp *sp); static void sppp_lcp_up(struct sppp *sp); static void sppp_lcp_down(struct sppp *sp); static void sppp_lcp_open(struct sppp *sp); static void sppp_lcp_close(struct sppp *sp); static void sppp_lcp_TO(void *sp); static int sppp_lcp_RCR(struct sppp *sp, struct lcp_header *h, int len); static void sppp_lcp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len); static void sppp_lcp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len); static void sppp_lcp_tlu(struct sppp *sp); static void sppp_lcp_tld(struct sppp *sp); static void sppp_lcp_tls(struct sppp *sp); static void sppp_lcp_tlf(struct sppp *sp); static void sppp_lcp_scr(struct sppp *sp); static void sppp_lcp_check_and_close(struct sppp *sp); static int sppp_ncp_check(struct sppp *sp); static void sppp_ipcp_init(struct sppp *sp); static void sppp_ipcp_up(struct sppp *sp); static void sppp_ipcp_down(struct sppp *sp); static void sppp_ipcp_open(struct sppp *sp); static void sppp_ipcp_close(struct sppp *sp); static void sppp_ipcp_TO(void *sp); static int sppp_ipcp_RCR(struct sppp *sp, struct lcp_header *h, int len); static void sppp_ipcp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len); static void sppp_ipcp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len); static void sppp_ipcp_tlu(struct sppp *sp); static void sppp_ipcp_tld(struct sppp *sp); static void sppp_ipcp_tls(struct sppp *sp); static void sppp_ipcp_tlf(struct sppp *sp); static void sppp_ipcp_scr(struct sppp *sp); static void sppp_ipv6cp_init(struct sppp *sp); static void sppp_ipv6cp_up(struct sppp *sp); static void sppp_ipv6cp_down(struct sppp *sp); static void sppp_ipv6cp_open(struct sppp *sp); static void sppp_ipv6cp_close(struct sppp *sp); static void sppp_ipv6cp_TO(void *sp); static int sppp_ipv6cp_RCR(struct sppp *sp, struct lcp_header *h, int len); static void sppp_ipv6cp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len); static void sppp_ipv6cp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len); static void sppp_ipv6cp_tlu(struct sppp *sp); static void sppp_ipv6cp_tld(struct sppp *sp); static void sppp_ipv6cp_tls(struct sppp *sp); static void sppp_ipv6cp_tlf(struct sppp *sp); static void sppp_ipv6cp_scr(struct sppp *sp); static void sppp_pap_input(struct sppp *sp, struct mbuf *m); static void sppp_pap_init(struct sppp *sp); static void sppp_pap_open(struct sppp *sp); static void sppp_pap_close(struct sppp *sp); static void sppp_pap_TO(void *sp); static void sppp_pap_my_TO(void *sp); static void sppp_pap_tlu(struct sppp *sp); static void sppp_pap_tld(struct sppp *sp); static void sppp_pap_scr(struct sppp *sp); static void sppp_chap_input(struct sppp *sp, struct mbuf *m); static void sppp_chap_init(struct sppp *sp); static void sppp_chap_open(struct sppp *sp); static void sppp_chap_close(struct sppp *sp); static void sppp_chap_TO(void *sp); static void sppp_chap_tlu(struct sppp *sp); static void sppp_chap_tld(struct sppp *sp); static void sppp_chap_scr(struct sppp *sp); static const char *sppp_auth_type_name(u_short proto, u_char type); static const char *sppp_cp_type_name(u_char type); #ifdef INET static const char *sppp_dotted_quad(u_long addr); static const char *sppp_ipcp_opt_name(u_char opt); #endif #ifdef INET6 static const char *sppp_ipv6cp_opt_name(u_char opt); #endif static const char *sppp_lcp_opt_name(u_char opt); static const char *sppp_phase_name(enum ppp_phase phase); static const char *sppp_proto_name(u_short proto); static const char *sppp_state_name(int state); static int sppp_params(struct sppp *sp, u_long cmd, void *data); static int sppp_strnlen(u_char *p, int max); static void sppp_keepalive(void *dummy); static void sppp_phase_network(struct sppp *sp); static void sppp_print_bytes(const u_char *p, u_short len); static void sppp_print_string(const char *p, u_short len); static void sppp_qflush(struct ifqueue *ifq); #ifdef INET static void sppp_set_ip_addr(struct sppp *sp, u_long src); #endif #ifdef INET6 static void sppp_get_ip6_addrs(struct sppp *sp, struct in6_addr *src, struct in6_addr *dst, struct in6_addr *srcmask); #ifdef IPV6CP_MYIFID_DYN static void sppp_set_ip6_addr(struct sppp *sp, const struct in6_addr *src); static void sppp_gen_ip6_addr(struct sppp *sp, const struct in6_addr *src); #endif static void sppp_suggest_ip6_addr(struct sppp *sp, struct in6_addr *src); #endif /* if_start () wrapper */ static void sppp_ifstart (struct ifnet *ifp); /* our control protocol descriptors */ static const struct cp lcp = { PPP_LCP, IDX_LCP, CP_LCP, "lcp", sppp_lcp_up, sppp_lcp_down, sppp_lcp_open, sppp_lcp_close, sppp_lcp_TO, sppp_lcp_RCR, sppp_lcp_RCN_rej, sppp_lcp_RCN_nak, sppp_lcp_tlu, sppp_lcp_tld, sppp_lcp_tls, sppp_lcp_tlf, sppp_lcp_scr }; static const struct cp ipcp = { PPP_IPCP, IDX_IPCP, #ifdef INET /* don't run IPCP if there's no IPv4 support */ CP_NCP, #else 0, #endif "ipcp", sppp_ipcp_up, sppp_ipcp_down, sppp_ipcp_open, sppp_ipcp_close, sppp_ipcp_TO, sppp_ipcp_RCR, sppp_ipcp_RCN_rej, sppp_ipcp_RCN_nak, sppp_ipcp_tlu, sppp_ipcp_tld, sppp_ipcp_tls, sppp_ipcp_tlf, sppp_ipcp_scr }; static const struct cp ipv6cp = { PPP_IPV6CP, IDX_IPV6CP, #ifdef INET6 /*don't run IPv6CP if there's no IPv6 support*/ CP_NCP, #else 0, #endif "ipv6cp", sppp_ipv6cp_up, sppp_ipv6cp_down, sppp_ipv6cp_open, sppp_ipv6cp_close, sppp_ipv6cp_TO, sppp_ipv6cp_RCR, sppp_ipv6cp_RCN_rej, sppp_ipv6cp_RCN_nak, sppp_ipv6cp_tlu, sppp_ipv6cp_tld, sppp_ipv6cp_tls, sppp_ipv6cp_tlf, sppp_ipv6cp_scr }; static const struct cp pap = { PPP_PAP, IDX_PAP, CP_AUTH, "pap", sppp_null, sppp_null, sppp_pap_open, sppp_pap_close, sppp_pap_TO, 0, 0, 0, sppp_pap_tlu, sppp_pap_tld, sppp_null, sppp_null, sppp_pap_scr }; static const struct cp chap = { PPP_CHAP, IDX_CHAP, CP_AUTH, "chap", sppp_null, sppp_null, sppp_chap_open, sppp_chap_close, sppp_chap_TO, 0, 0, 0, sppp_chap_tlu, sppp_chap_tld, sppp_null, sppp_null, sppp_chap_scr }; static const struct cp *cps[IDX_COUNT] = { &lcp, /* IDX_LCP */ &ipcp, /* IDX_IPCP */ &ipv6cp, /* IDX_IPV6CP */ &pap, /* IDX_PAP */ &chap, /* IDX_CHAP */ }; static void* sppp_alloc(u_char type, struct ifnet *ifp) { struct sppp *sp; sp = malloc(sizeof(struct sppp), M_SPPP, M_WAITOK | M_ZERO); sp->pp_ifp = ifp; return (sp); } static void sppp_free(void *com, u_char type) { free(com, M_SPPP); } static int sppp_modevent(module_t mod, int type, void *unused) { switch (type) { case MOD_LOAD: /* * XXX: should probably be IFT_SPPP, but it's fairly * harmless to allocate struct sppp's for non-sppp * interfaces. */ if_register_com_alloc(IFT_PPP, sppp_alloc, sppp_free); break; case MOD_UNLOAD: /* if_deregister_com_alloc(IFT_PPP); */ return EACCES; default: return EOPNOTSUPP; } return 0; } static moduledata_t spppmod = { "sppp", sppp_modevent, 0 }; MODULE_VERSION(sppp, 1); DECLARE_MODULE(sppp, spppmod, SI_SUB_DRIVERS, SI_ORDER_ANY); /* * Exported functions, comprising our interface to the lower layer. */ /* * Process the received packet. */ void sppp_input(struct ifnet *ifp, struct mbuf *m) { struct ppp_header *h; int isr = -1; struct sppp *sp = IFP2SP(ifp); int debug, do_account = 0; #ifdef INET int hlen, vjlen; u_char *iphdr; #endif SPPP_LOCK(sp); debug = ifp->if_flags & IFF_DEBUG; if (ifp->if_flags & IFF_UP) /* Count received bytes, add FCS and one flag */ if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len + 3); if (m->m_pkthdr.len <= PPP_HEADER_LEN) { /* Too small packet, drop it. */ if (debug) log(LOG_DEBUG, SPP_FMT "input packet is too small, %d bytes\n", SPP_ARGS(ifp), m->m_pkthdr.len); drop: m_freem (m); SPPP_UNLOCK(sp); drop2: if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); return; } if (sp->pp_mode == PP_FR) { sppp_fr_input (sp, m); SPPP_UNLOCK(sp); return; } /* Get PPP header. */ h = mtod (m, struct ppp_header*); m_adj (m, PPP_HEADER_LEN); switch (h->address) { case PPP_ALLSTATIONS: if (h->control != PPP_UI) goto invalid; if (sp->pp_mode == IFF_CISCO) { if (debug) log(LOG_DEBUG, SPP_FMT "PPP packet in Cisco mode " "\n", SPP_ARGS(ifp), h->address, h->control, ntohs(h->protocol)); goto drop; } switch (ntohs (h->protocol)) { default: if (debug) log(LOG_DEBUG, SPP_FMT "rejecting protocol " "\n", SPP_ARGS(ifp), h->address, h->control, ntohs(h->protocol)); if (sp->state[IDX_LCP] == STATE_OPENED) sppp_cp_send (sp, PPP_LCP, PROTO_REJ, ++sp->pp_seq[IDX_LCP], m->m_pkthdr.len + 2, &h->protocol); if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto drop; case PPP_LCP: sppp_cp_input(&lcp, sp, m); m_freem (m); SPPP_UNLOCK(sp); return; case PPP_PAP: if (sp->pp_phase >= PHASE_AUTHENTICATE) sppp_pap_input(sp, m); m_freem (m); SPPP_UNLOCK(sp); return; case PPP_CHAP: if (sp->pp_phase >= PHASE_AUTHENTICATE) sppp_chap_input(sp, m); m_freem (m); SPPP_UNLOCK(sp); return; #ifdef INET case PPP_IPCP: if (sp->pp_phase == PHASE_NETWORK) sppp_cp_input(&ipcp, sp, m); m_freem (m); SPPP_UNLOCK(sp); return; case PPP_IP: if (sp->state[IDX_IPCP] == STATE_OPENED) { isr = NETISR_IP; } do_account++; break; case PPP_VJ_COMP: if (sp->state[IDX_IPCP] == STATE_OPENED) { if ((vjlen = sl_uncompress_tcp_core(mtod(m, u_char *), m->m_len, m->m_len, TYPE_COMPRESSED_TCP, sp->pp_comp, &iphdr, &hlen)) <= 0) { if (debug) log(LOG_INFO, SPP_FMT "VJ uncompress failed on compressed packet\n", SPP_ARGS(ifp)); goto drop; } /* * Trim the VJ header off the packet, and prepend * the uncompressed IP header (which will usually * end up in two chained mbufs since there's not * enough leading space in the existing mbuf). */ m_adj(m, vjlen); M_PREPEND(m, hlen, M_NOWAIT); if (m == NULL) { SPPP_UNLOCK(sp); goto drop2; } bcopy(iphdr, mtod(m, u_char *), hlen); isr = NETISR_IP; } do_account++; break; case PPP_VJ_UCOMP: if (sp->state[IDX_IPCP] == STATE_OPENED) { if (sl_uncompress_tcp_core(mtod(m, u_char *), m->m_len, m->m_len, TYPE_UNCOMPRESSED_TCP, sp->pp_comp, &iphdr, &hlen) != 0) { if (debug) log(LOG_INFO, SPP_FMT "VJ uncompress failed on uncompressed packet\n", SPP_ARGS(ifp)); goto drop; } isr = NETISR_IP; } do_account++; break; #endif #ifdef INET6 case PPP_IPV6CP: if (sp->pp_phase == PHASE_NETWORK) sppp_cp_input(&ipv6cp, sp, m); m_freem (m); SPPP_UNLOCK(sp); return; case PPP_IPV6: if (sp->state[IDX_IPV6CP] == STATE_OPENED) isr = NETISR_IPV6; do_account++; break; #endif } break; case CISCO_MULTICAST: case CISCO_UNICAST: /* Don't check the control field here (RFC 1547). */ if (sp->pp_mode != IFF_CISCO) { if (debug) log(LOG_DEBUG, SPP_FMT "Cisco packet in PPP mode " "\n", SPP_ARGS(ifp), h->address, h->control, ntohs(h->protocol)); goto drop; } switch (ntohs (h->protocol)) { default: if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); goto invalid; case CISCO_KEEPALIVE: sppp_cisco_input (sp, m); m_freem (m); SPPP_UNLOCK(sp); return; #ifdef INET case ETHERTYPE_IP: isr = NETISR_IP; do_account++; break; #endif #ifdef INET6 case ETHERTYPE_IPV6: isr = NETISR_IPV6; do_account++; break; #endif } break; default: /* Invalid PPP packet. */ invalid: if (debug) log(LOG_DEBUG, SPP_FMT "invalid input packet " "\n", SPP_ARGS(ifp), h->address, h->control, ntohs(h->protocol)); goto drop; } if (! (ifp->if_flags & IFF_UP) || isr == -1) goto drop; SPPP_UNLOCK(sp); M_SETFIB(m, ifp->if_fib); /* Check queue. */ if (netisr_queue(isr, m)) { /* (0) on success. */ if (debug) log(LOG_DEBUG, SPP_FMT "protocol queue overflow\n", SPP_ARGS(ifp)); goto drop2; } if (do_account) /* * Do only account for network packets, not for control * packets. This is used by some subsystems to detect * idle lines. */ sp->pp_last_recv = time_uptime; } static void sppp_ifstart_sched(void *dummy) { struct sppp *sp = dummy; sp->if_start(SP2IFP(sp)); } /* if_start () wrapper function. We use it to schedule real if_start () for * execution. We can't call it directly */ static void sppp_ifstart(struct ifnet *ifp) { struct sppp *sp = IFP2SP(ifp); if (SPPP_LOCK_OWNED(sp)) { if (callout_pending(&sp->ifstart_callout)) return; callout_reset(&sp->ifstart_callout, 1, sppp_ifstart_sched, (void *)sp); } else { sp->if_start(ifp); } } /* * Enqueue transmit packet. */ static int sppp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *ro) { struct sppp *sp = IFP2SP(ifp); struct ppp_header *h; struct ifqueue *ifq = NULL; int error, rv = 0; #ifdef INET int ipproto = PPP_IP; #endif int debug = ifp->if_flags & IFF_DEBUG; SPPP_LOCK(sp); if (!(ifp->if_flags & IFF_UP) || (!(ifp->if_flags & IFF_AUTO) && !(ifp->if_drv_flags & IFF_DRV_RUNNING))) { #ifdef INET6 drop: #endif m_freem (m); SPPP_UNLOCK(sp); return (ENETDOWN); } if ((ifp->if_flags & IFF_AUTO) && !(ifp->if_drv_flags & IFF_DRV_RUNNING)) { #ifdef INET6 /* * XXX * * Hack to prevent the initialization-time generated * IPv6 multicast packet to erroneously cause a * dialout event in case IPv6 has been * administratively disabled on that interface. */ if (dst->sa_family == AF_INET6 && !(sp->confflags & CONF_ENABLE_IPV6)) goto drop; #endif /* * Interface is not yet running, but auto-dial. Need * to start LCP for it. */ ifp->if_drv_flags |= IFF_DRV_RUNNING; lcp.Open(sp); } #ifdef INET if (dst->sa_family == AF_INET) { /* XXX Check mbuf length here? */ struct ip *ip = mtod (m, struct ip*); struct tcphdr *tcp = (struct tcphdr*) ((long*)ip + ip->ip_hl); /* * When using dynamic local IP address assignment by using * 0.0.0.0 as a local address, the first TCP session will * not connect because the local TCP checksum is computed * using 0.0.0.0 which will later become our real IP address * so the TCP checksum computed at the remote end will * become invalid. So we * - don't let packets with src ip addr 0 thru * - we flag TCP packets with src ip 0 as an error */ if(ip->ip_src.s_addr == INADDR_ANY) /* -hm */ { m_freem(m); SPPP_UNLOCK(sp); if(ip->ip_p == IPPROTO_TCP) return(EADDRNOTAVAIL); else return(0); } /* * Put low delay, telnet, rlogin and ftp control packets * in front of the queue or let ALTQ take care. */ if (ALTQ_IS_ENABLED(&ifp->if_snd)) ; else if (_IF_QFULL(&sp->pp_fastq)) ; else if (ip->ip_tos & IPTOS_LOWDELAY) ifq = &sp->pp_fastq; else if (m->m_len < sizeof *ip + sizeof *tcp) ; else if (ip->ip_p != IPPROTO_TCP) ; else if (INTERACTIVE (ntohs (tcp->th_sport))) ifq = &sp->pp_fastq; else if (INTERACTIVE (ntohs (tcp->th_dport))) ifq = &sp->pp_fastq; /* * Do IP Header compression */ if (sp->pp_mode != IFF_CISCO && sp->pp_mode != PP_FR && (sp->ipcp.flags & IPCP_VJ) && ip->ip_p == IPPROTO_TCP) switch (sl_compress_tcp(m, ip, sp->pp_comp, sp->ipcp.compress_cid)) { case TYPE_COMPRESSED_TCP: ipproto = PPP_VJ_COMP; break; case TYPE_UNCOMPRESSED_TCP: ipproto = PPP_VJ_UCOMP; break; case TYPE_IP: ipproto = PPP_IP; break; default: m_freem(m); SPPP_UNLOCK(sp); return (EINVAL); } } #endif #ifdef INET6 if (dst->sa_family == AF_INET6) { /* XXX do something tricky here? */ } #endif if (sp->pp_mode == PP_FR) { /* Add frame relay header. */ m = sppp_fr_header (sp, m, dst->sa_family); if (! m) goto nobufs; goto out; } /* * Prepend general data packet PPP header. For now, IP only. */ M_PREPEND (m, PPP_HEADER_LEN, M_NOWAIT); if (! m) { nobufs: if (debug) log(LOG_DEBUG, SPP_FMT "no memory for transmit header\n", SPP_ARGS(ifp)); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); SPPP_UNLOCK(sp); return (ENOBUFS); } /* * May want to check size of packet * (albeit due to the implementation it's always enough) */ h = mtod (m, struct ppp_header*); if (sp->pp_mode == IFF_CISCO) { h->address = CISCO_UNICAST; /* unicast address */ h->control = 0; } else { h->address = PPP_ALLSTATIONS; /* broadcast address */ h->control = PPP_UI; /* Unnumbered Info */ } switch (dst->sa_family) { #ifdef INET case AF_INET: /* Internet Protocol */ if (sp->pp_mode == IFF_CISCO) h->protocol = htons (ETHERTYPE_IP); else { /* * Don't choke with an ENETDOWN early. It's * possible that we just started dialing out, * so don't drop the packet immediately. If * we notice that we run out of buffer space * below, we will however remember that we are * not ready to carry IP packets, and return * ENETDOWN, as opposed to ENOBUFS. */ h->protocol = htons(ipproto); if (sp->state[IDX_IPCP] != STATE_OPENED) rv = ENETDOWN; } break; #endif #ifdef INET6 case AF_INET6: /* Internet Protocol */ if (sp->pp_mode == IFF_CISCO) h->protocol = htons (ETHERTYPE_IPV6); else { /* * Don't choke with an ENETDOWN early. It's * possible that we just started dialing out, * so don't drop the packet immediately. If * we notice that we run out of buffer space * below, we will however remember that we are * not ready to carry IP packets, and return * ENETDOWN, as opposed to ENOBUFS. */ h->protocol = htons(PPP_IPV6); if (sp->state[IDX_IPV6CP] != STATE_OPENED) rv = ENETDOWN; } break; #endif default: m_freem (m); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); SPPP_UNLOCK(sp); return (EAFNOSUPPORT); } /* * Queue message on interface, and start output if interface * not yet active. */ out: if (ifq != NULL) error = !(IF_HANDOFF_ADJ(ifq, m, ifp, 3)); else IFQ_HANDOFF_ADJ(ifp, m, 3, error); if (error) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); SPPP_UNLOCK(sp); return (rv? rv: ENOBUFS); } SPPP_UNLOCK(sp); /* * Unlike in sppp_input(), we can always bump the timestamp * here since sppp_output() is only called on behalf of * network-layer traffic; control-layer traffic is handled * by sppp_cp_send(). */ sp->pp_last_sent = time_uptime; return (0); } void sppp_attach(struct ifnet *ifp) { struct sppp *sp = IFP2SP(ifp); /* Initialize mtx lock */ mtx_init(&sp->mtx, "sppp", MTX_NETWORK_LOCK, MTX_DEF | MTX_RECURSE); /* Initialize keepalive handler. */ callout_init(&sp->keepalive_callout, 1); callout_reset(&sp->keepalive_callout, hz * 10, sppp_keepalive, (void *)sp); ifp->if_mtu = PP_MTU; ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; ifp->if_output = sppp_output; #if 0 sp->pp_flags = PP_KEEPALIVE; #endif ifp->if_snd.ifq_maxlen = 32; sp->pp_fastq.ifq_maxlen = 32; sp->pp_cpq.ifq_maxlen = 20; sp->pp_loopcnt = 0; sp->pp_alivecnt = 0; bzero(&sp->pp_seq[0], sizeof(sp->pp_seq)); bzero(&sp->pp_rseq[0], sizeof(sp->pp_rseq)); sp->pp_phase = PHASE_DEAD; sp->pp_up = sppp_pp_up; sp->pp_down = sppp_pp_down; if(!mtx_initialized(&sp->pp_cpq.ifq_mtx)) mtx_init(&sp->pp_cpq.ifq_mtx, "sppp_cpq", NULL, MTX_DEF); if(!mtx_initialized(&sp->pp_fastq.ifq_mtx)) mtx_init(&sp->pp_fastq.ifq_mtx, "sppp_fastq", NULL, MTX_DEF); sp->pp_last_recv = sp->pp_last_sent = time_uptime; sp->confflags = 0; #ifdef INET sp->confflags |= CONF_ENABLE_VJ; #endif #ifdef INET6 sp->confflags |= CONF_ENABLE_IPV6; #endif callout_init(&sp->ifstart_callout, 1); sp->if_start = ifp->if_start; ifp->if_start = sppp_ifstart; sp->pp_comp = malloc(sizeof(struct slcompress), M_TEMP, M_WAITOK); sl_compress_init(sp->pp_comp, -1); sppp_lcp_init(sp); sppp_ipcp_init(sp); sppp_ipv6cp_init(sp); sppp_pap_init(sp); sppp_chap_init(sp); } void sppp_detach(struct ifnet *ifp) { struct sppp *sp = IFP2SP(ifp); int i; KASSERT(mtx_initialized(&sp->mtx), ("sppp mutex is not initialized")); /* Stop keepalive handler. */ if (!callout_drain(&sp->keepalive_callout)) callout_stop(&sp->keepalive_callout); for (i = 0; i < IDX_COUNT; i++) { if (!callout_drain(&sp->ch[i])) callout_stop(&sp->ch[i]); } if (!callout_drain(&sp->pap_my_to_ch)) callout_stop(&sp->pap_my_to_ch); mtx_destroy(&sp->pp_cpq.ifq_mtx); mtx_destroy(&sp->pp_fastq.ifq_mtx); mtx_destroy(&sp->mtx); } /* * Flush the interface output queue. */ static void sppp_flush_unlocked(struct ifnet *ifp) { struct sppp *sp = IFP2SP(ifp); sppp_qflush ((struct ifqueue *)&SP2IFP(sp)->if_snd); sppp_qflush (&sp->pp_fastq); sppp_qflush (&sp->pp_cpq); } void sppp_flush(struct ifnet *ifp) { struct sppp *sp = IFP2SP(ifp); SPPP_LOCK(sp); sppp_flush_unlocked (ifp); SPPP_UNLOCK(sp); } /* * Check if the output queue is empty. */ int sppp_isempty(struct ifnet *ifp) { struct sppp *sp = IFP2SP(ifp); int empty; SPPP_LOCK(sp); empty = !sp->pp_fastq.ifq_head && !sp->pp_cpq.ifq_head && !SP2IFP(sp)->if_snd.ifq_head; SPPP_UNLOCK(sp); return (empty); } /* * Get next packet to send. */ struct mbuf * sppp_dequeue(struct ifnet *ifp) { struct sppp *sp = IFP2SP(ifp); struct mbuf *m; SPPP_LOCK(sp); /* * Process only the control protocol queue until we have at * least one NCP open. * * Do always serve all three queues in Cisco mode. */ IF_DEQUEUE(&sp->pp_cpq, m); if (m == NULL && (sppp_ncp_check(sp) || sp->pp_mode == IFF_CISCO || sp->pp_mode == PP_FR)) { IF_DEQUEUE(&sp->pp_fastq, m); if (m == NULL) IF_DEQUEUE (&SP2IFP(sp)->if_snd, m); } SPPP_UNLOCK(sp); return m; } /* * Pick the next packet, do not remove it from the queue. */ struct mbuf * sppp_pick(struct ifnet *ifp) { struct sppp *sp = IFP2SP(ifp); struct mbuf *m; SPPP_LOCK(sp); m = sp->pp_cpq.ifq_head; if (m == NULL && (sp->pp_phase == PHASE_NETWORK || sp->pp_mode == IFF_CISCO || sp->pp_mode == PP_FR)) if ((m = sp->pp_fastq.ifq_head) == NULL) m = SP2IFP(sp)->if_snd.ifq_head; SPPP_UNLOCK(sp); return (m); } /* * Process an ioctl request. Called on low priority level. */ int sppp_ioctl(struct ifnet *ifp, IOCTL_CMD_T cmd, void *data) { struct ifreq *ifr = (struct ifreq*) data; struct sppp *sp = IFP2SP(ifp); int rv, going_up, going_down, newmode; SPPP_LOCK(sp); rv = 0; switch (cmd) { case SIOCAIFADDR: break; case SIOCSIFADDR: /* set the interface "up" when assigning an IP address */ ifp->if_flags |= IFF_UP; /* FALLTHROUGH */ case SIOCSIFFLAGS: going_up = ifp->if_flags & IFF_UP && (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0; going_down = (ifp->if_flags & IFF_UP) == 0 && ifp->if_drv_flags & IFF_DRV_RUNNING; newmode = ifp->if_flags & IFF_PASSIVE; if (!newmode) newmode = ifp->if_flags & IFF_AUTO; if (!newmode) newmode = ifp->if_flags & IFF_CISCO; ifp->if_flags &= ~(IFF_PASSIVE | IFF_AUTO | IFF_CISCO); ifp->if_flags |= newmode; if (!newmode) newmode = sp->pp_flags & PP_FR; if (newmode != sp->pp_mode) { going_down = 1; if (!going_up) going_up = ifp->if_drv_flags & IFF_DRV_RUNNING; } if (going_down) { if (sp->pp_mode != IFF_CISCO && sp->pp_mode != PP_FR) lcp.Close(sp); else if (sp->pp_tlf) (sp->pp_tlf)(sp); sppp_flush_unlocked(ifp); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; sp->pp_mode = newmode; } if (going_up) { if (sp->pp_mode != IFF_CISCO && sp->pp_mode != PP_FR) lcp.Close(sp); sp->pp_mode = newmode; if (sp->pp_mode == 0) { ifp->if_drv_flags |= IFF_DRV_RUNNING; lcp.Open(sp); } if ((sp->pp_mode == IFF_CISCO) || (sp->pp_mode == PP_FR)) { if (sp->pp_tls) (sp->pp_tls)(sp); ifp->if_drv_flags |= IFF_DRV_RUNNING; } } break; #ifdef SIOCSIFMTU #ifndef ifr_mtu #define ifr_mtu ifr_metric #endif case SIOCSIFMTU: if (ifr->ifr_mtu < 128 || ifr->ifr_mtu > sp->lcp.their_mru) return (EINVAL); ifp->if_mtu = ifr->ifr_mtu; break; #endif #ifdef SLIOCSETMTU case SLIOCSETMTU: if (*(short*)data < 128 || *(short*)data > sp->lcp.their_mru) return (EINVAL); ifp->if_mtu = *(short*)data; break; #endif #ifdef SIOCGIFMTU case SIOCGIFMTU: ifr->ifr_mtu = ifp->if_mtu; break; #endif #ifdef SLIOCGETMTU case SLIOCGETMTU: *(short*)data = ifp->if_mtu; break; #endif case SIOCADDMULTI: case SIOCDELMULTI: break; case SIOCGIFGENERIC: case SIOCSIFGENERIC: rv = sppp_params(sp, cmd, data); break; default: rv = ENOTTY; } SPPP_UNLOCK(sp); return rv; } /* * Cisco framing implementation. */ /* * Handle incoming Cisco keepalive protocol packets. */ static void sppp_cisco_input(struct sppp *sp, struct mbuf *m) { STDDCL; struct cisco_packet *h; u_long me, mymask; if (m->m_pkthdr.len < CISCO_PACKET_LEN) { if (debug) log(LOG_DEBUG, SPP_FMT "cisco invalid packet length: %d bytes\n", SPP_ARGS(ifp), m->m_pkthdr.len); return; } h = mtod (m, struct cisco_packet*); if (debug) log(LOG_DEBUG, SPP_FMT "cisco input: %d bytes " "<0x%lx 0x%lx 0x%lx 0x%x 0x%x-0x%x>\n", SPP_ARGS(ifp), m->m_pkthdr.len, (u_long)ntohl (h->type), (u_long)h->par1, (u_long)h->par2, (u_int)h->rel, (u_int)h->time0, (u_int)h->time1); switch (ntohl (h->type)) { default: if (debug) log(-1, SPP_FMT "cisco unknown packet type: 0x%lx\n", SPP_ARGS(ifp), (u_long)ntohl (h->type)); break; case CISCO_ADDR_REPLY: /* Reply on address request, ignore */ break; case CISCO_KEEPALIVE_REQ: sp->pp_alivecnt = 0; sp->pp_rseq[IDX_LCP] = ntohl (h->par1); if (sp->pp_seq[IDX_LCP] == sp->pp_rseq[IDX_LCP]) { /* Local and remote sequence numbers are equal. * Probably, the line is in loopback mode. */ if (sp->pp_loopcnt >= MAXALIVECNT) { printf (SPP_FMT "loopback\n", SPP_ARGS(ifp)); sp->pp_loopcnt = 0; if (ifp->if_flags & IFF_UP) { if_down (ifp); sppp_qflush (&sp->pp_cpq); } } ++sp->pp_loopcnt; /* Generate new local sequence number */ sp->pp_seq[IDX_LCP] = random(); break; } sp->pp_loopcnt = 0; if (! (ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { if_up(ifp); printf (SPP_FMT "up\n", SPP_ARGS(ifp)); } break; case CISCO_ADDR_REQ: sppp_get_ip_addrs(sp, &me, 0, &mymask); if (me != 0L) sppp_cisco_send(sp, CISCO_ADDR_REPLY, me, mymask); break; } } /* * Send Cisco keepalive packet. */ static void sppp_cisco_send(struct sppp *sp, int type, long par1, long par2) { STDDCL; struct ppp_header *h; struct cisco_packet *ch; struct mbuf *m; struct timeval tv; getmicrouptime(&tv); MGETHDR (m, M_NOWAIT, MT_DATA); if (! m) return; m->m_pkthdr.len = m->m_len = PPP_HEADER_LEN + CISCO_PACKET_LEN; m->m_pkthdr.rcvif = 0; h = mtod (m, struct ppp_header*); h->address = CISCO_MULTICAST; h->control = 0; h->protocol = htons (CISCO_KEEPALIVE); ch = (struct cisco_packet*) (h + 1); ch->type = htonl (type); ch->par1 = htonl (par1); ch->par2 = htonl (par2); ch->rel = -1; ch->time0 = htons ((u_short) (tv.tv_sec >> 16)); ch->time1 = htons ((u_short) tv.tv_sec); if (debug) log(LOG_DEBUG, SPP_FMT "cisco output: <0x%lx 0x%lx 0x%lx 0x%x 0x%x-0x%x>\n", SPP_ARGS(ifp), (u_long)ntohl (ch->type), (u_long)ch->par1, (u_long)ch->par2, (u_int)ch->rel, (u_int)ch->time0, (u_int)ch->time1); if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3)) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } /* * PPP protocol implementation. */ /* * Send PPP control protocol packet. */ static void sppp_cp_send(struct sppp *sp, u_short proto, u_char type, u_char ident, u_short len, void *data) { STDDCL; struct ppp_header *h; struct lcp_header *lh; struct mbuf *m; if (len > MHLEN - PPP_HEADER_LEN - LCP_HEADER_LEN) len = MHLEN - PPP_HEADER_LEN - LCP_HEADER_LEN; MGETHDR (m, M_NOWAIT, MT_DATA); if (! m) return; m->m_pkthdr.len = m->m_len = PPP_HEADER_LEN + LCP_HEADER_LEN + len; m->m_pkthdr.rcvif = 0; h = mtod (m, struct ppp_header*); h->address = PPP_ALLSTATIONS; /* broadcast address */ h->control = PPP_UI; /* Unnumbered Info */ h->protocol = htons (proto); /* Link Control Protocol */ lh = (struct lcp_header*) (h + 1); lh->type = type; lh->ident = ident; lh->len = htons (LCP_HEADER_LEN + len); if (len) bcopy (data, lh+1, len); if (debug) { log(LOG_DEBUG, SPP_FMT "%s output <%s id=0x%x len=%d", SPP_ARGS(ifp), sppp_proto_name(proto), sppp_cp_type_name (lh->type), lh->ident, ntohs (lh->len)); sppp_print_bytes ((u_char*) (lh+1), len); log(-1, ">\n"); } if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3)) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } /* * Handle incoming PPP control protocol packets. */ static void sppp_cp_input(const struct cp *cp, struct sppp *sp, struct mbuf *m) { STDDCL; struct lcp_header *h; int len = m->m_pkthdr.len; int rv; u_char *p; if (len < 4) { if (debug) log(LOG_DEBUG, SPP_FMT "%s invalid packet length: %d bytes\n", SPP_ARGS(ifp), cp->name, len); return; } h = mtod (m, struct lcp_header*); if (debug) { log(LOG_DEBUG, SPP_FMT "%s input(%s): <%s id=0x%x len=%d", SPP_ARGS(ifp), cp->name, sppp_state_name(sp->state[cp->protoidx]), sppp_cp_type_name (h->type), h->ident, ntohs (h->len)); sppp_print_bytes ((u_char*) (h+1), len-4); log(-1, ">\n"); } if (len > ntohs (h->len)) len = ntohs (h->len); p = (u_char *)(h + 1); switch (h->type) { case CONF_REQ: if (len < 4) { if (debug) log(-1, SPP_FMT "%s invalid conf-req length %d\n", SPP_ARGS(ifp), cp->name, len); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); break; } /* handle states where RCR doesn't get a SCA/SCN */ switch (sp->state[cp->protoidx]) { case STATE_CLOSING: case STATE_STOPPING: return; case STATE_CLOSED: sppp_cp_send(sp, cp->proto, TERM_ACK, h->ident, 0, 0); return; } rv = (cp->RCR)(sp, h, len); switch (sp->state[cp->protoidx]) { case STATE_OPENED: (cp->tld)(sp); (cp->scr)(sp); /* FALLTHROUGH */ case STATE_ACK_SENT: case STATE_REQ_SENT: /* * sppp_cp_change_state() have the side effect of * restarting the timeouts. We want to avoid that * if the state don't change, otherwise we won't * ever timeout and resend a configuration request * that got lost. */ if (sp->state[cp->protoidx] == (rv ? STATE_ACK_SENT: STATE_REQ_SENT)) break; sppp_cp_change_state(cp, sp, rv? STATE_ACK_SENT: STATE_REQ_SENT); break; case STATE_STOPPED: sp->rst_counter[cp->protoidx] = sp->lcp.max_configure; (cp->scr)(sp); sppp_cp_change_state(cp, sp, rv? STATE_ACK_SENT: STATE_REQ_SENT); break; case STATE_ACK_RCVD: if (rv) { sppp_cp_change_state(cp, sp, STATE_OPENED); if (debug) log(LOG_DEBUG, SPP_FMT "%s tlu\n", SPP_ARGS(ifp), cp->name); (cp->tlu)(sp); } else sppp_cp_change_state(cp, sp, STATE_ACK_RCVD); break; default: printf(SPP_FMT "%s illegal %s in state %s\n", SPP_ARGS(ifp), cp->name, sppp_cp_type_name(h->type), sppp_state_name(sp->state[cp->protoidx])); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } break; case CONF_ACK: if (h->ident != sp->confid[cp->protoidx]) { if (debug) log(-1, SPP_FMT "%s id mismatch 0x%x != 0x%x\n", SPP_ARGS(ifp), cp->name, h->ident, sp->confid[cp->protoidx]); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); break; } switch (sp->state[cp->protoidx]) { case STATE_CLOSED: case STATE_STOPPED: sppp_cp_send(sp, cp->proto, TERM_ACK, h->ident, 0, 0); break; case STATE_CLOSING: case STATE_STOPPING: break; case STATE_REQ_SENT: sp->rst_counter[cp->protoidx] = sp->lcp.max_configure; sppp_cp_change_state(cp, sp, STATE_ACK_RCVD); break; case STATE_OPENED: (cp->tld)(sp); /* FALLTHROUGH */ case STATE_ACK_RCVD: (cp->scr)(sp); sppp_cp_change_state(cp, sp, STATE_REQ_SENT); break; case STATE_ACK_SENT: sp->rst_counter[cp->protoidx] = sp->lcp.max_configure; sppp_cp_change_state(cp, sp, STATE_OPENED); if (debug) log(LOG_DEBUG, SPP_FMT "%s tlu\n", SPP_ARGS(ifp), cp->name); (cp->tlu)(sp); break; default: printf(SPP_FMT "%s illegal %s in state %s\n", SPP_ARGS(ifp), cp->name, sppp_cp_type_name(h->type), sppp_state_name(sp->state[cp->protoidx])); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } break; case CONF_NAK: case CONF_REJ: if (h->ident != sp->confid[cp->protoidx]) { if (debug) log(-1, SPP_FMT "%s id mismatch 0x%x != 0x%x\n", SPP_ARGS(ifp), cp->name, h->ident, sp->confid[cp->protoidx]); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); break; } if (h->type == CONF_NAK) (cp->RCN_nak)(sp, h, len); else /* CONF_REJ */ (cp->RCN_rej)(sp, h, len); switch (sp->state[cp->protoidx]) { case STATE_CLOSED: case STATE_STOPPED: sppp_cp_send(sp, cp->proto, TERM_ACK, h->ident, 0, 0); break; case STATE_REQ_SENT: case STATE_ACK_SENT: sp->rst_counter[cp->protoidx] = sp->lcp.max_configure; /* * Slow things down a bit if we think we might be * in loopback. Depend on the timeout to send the * next configuration request. */ if (sp->pp_loopcnt) break; (cp->scr)(sp); break; case STATE_OPENED: (cp->tld)(sp); /* FALLTHROUGH */ case STATE_ACK_RCVD: sppp_cp_change_state(cp, sp, STATE_REQ_SENT); (cp->scr)(sp); break; case STATE_CLOSING: case STATE_STOPPING: break; default: printf(SPP_FMT "%s illegal %s in state %s\n", SPP_ARGS(ifp), cp->name, sppp_cp_type_name(h->type), sppp_state_name(sp->state[cp->protoidx])); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } break; case TERM_REQ: switch (sp->state[cp->protoidx]) { case STATE_ACK_RCVD: case STATE_ACK_SENT: sppp_cp_change_state(cp, sp, STATE_REQ_SENT); /* FALLTHROUGH */ case STATE_CLOSED: case STATE_STOPPED: case STATE_CLOSING: case STATE_STOPPING: case STATE_REQ_SENT: sta: /* Send Terminate-Ack packet. */ if (debug) log(LOG_DEBUG, SPP_FMT "%s send terminate-ack\n", SPP_ARGS(ifp), cp->name); sppp_cp_send(sp, cp->proto, TERM_ACK, h->ident, 0, 0); break; case STATE_OPENED: (cp->tld)(sp); sp->rst_counter[cp->protoidx] = 0; sppp_cp_change_state(cp, sp, STATE_STOPPING); goto sta; break; default: printf(SPP_FMT "%s illegal %s in state %s\n", SPP_ARGS(ifp), cp->name, sppp_cp_type_name(h->type), sppp_state_name(sp->state[cp->protoidx])); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } break; case TERM_ACK: switch (sp->state[cp->protoidx]) { case STATE_CLOSED: case STATE_STOPPED: case STATE_REQ_SENT: case STATE_ACK_SENT: break; case STATE_CLOSING: sppp_cp_change_state(cp, sp, STATE_CLOSED); (cp->tlf)(sp); break; case STATE_STOPPING: sppp_cp_change_state(cp, sp, STATE_STOPPED); (cp->tlf)(sp); break; case STATE_ACK_RCVD: sppp_cp_change_state(cp, sp, STATE_REQ_SENT); break; case STATE_OPENED: (cp->tld)(sp); (cp->scr)(sp); sppp_cp_change_state(cp, sp, STATE_ACK_RCVD); break; default: printf(SPP_FMT "%s illegal %s in state %s\n", SPP_ARGS(ifp), cp->name, sppp_cp_type_name(h->type), sppp_state_name(sp->state[cp->protoidx])); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } break; case CODE_REJ: /* XXX catastrophic rejects (RXJ-) aren't handled yet. */ log(LOG_INFO, SPP_FMT "%s: ignoring RXJ (%s) for proto 0x%x, " "danger will robinson\n", SPP_ARGS(ifp), cp->name, sppp_cp_type_name(h->type), ntohs(*((u_short *)p))); switch (sp->state[cp->protoidx]) { case STATE_CLOSED: case STATE_STOPPED: case STATE_REQ_SENT: case STATE_ACK_SENT: case STATE_CLOSING: case STATE_STOPPING: case STATE_OPENED: break; case STATE_ACK_RCVD: sppp_cp_change_state(cp, sp, STATE_REQ_SENT); break; default: printf(SPP_FMT "%s illegal %s in state %s\n", SPP_ARGS(ifp), cp->name, sppp_cp_type_name(h->type), sppp_state_name(sp->state[cp->protoidx])); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } break; case PROTO_REJ: { int catastrophic; const struct cp *upper; int i; u_int16_t proto; catastrophic = 0; upper = NULL; proto = ntohs(*((u_int16_t *)p)); for (i = 0; i < IDX_COUNT; i++) { if (cps[i]->proto == proto) { upper = cps[i]; break; } } if (upper == NULL) catastrophic++; if (catastrophic || debug) log(catastrophic? LOG_INFO: LOG_DEBUG, SPP_FMT "%s: RXJ%c (%s) for proto 0x%x (%s/%s)\n", SPP_ARGS(ifp), cp->name, catastrophic ? '-' : '+', sppp_cp_type_name(h->type), proto, upper ? upper->name : "unknown", upper ? sppp_state_name(sp->state[upper->protoidx]) : "?"); /* * if we got RXJ+ against conf-req, the peer does not implement * this particular protocol type. terminate the protocol. */ if (upper && !catastrophic) { if (sp->state[upper->protoidx] == STATE_REQ_SENT) { upper->Close(sp); break; } } /* XXX catastrophic rejects (RXJ-) aren't handled yet. */ switch (sp->state[cp->protoidx]) { case STATE_CLOSED: case STATE_STOPPED: case STATE_REQ_SENT: case STATE_ACK_SENT: case STATE_CLOSING: case STATE_STOPPING: case STATE_OPENED: break; case STATE_ACK_RCVD: sppp_cp_change_state(cp, sp, STATE_REQ_SENT); break; default: printf(SPP_FMT "%s illegal %s in state %s\n", SPP_ARGS(ifp), cp->name, sppp_cp_type_name(h->type), sppp_state_name(sp->state[cp->protoidx])); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } break; } case DISC_REQ: if (cp->proto != PPP_LCP) goto illegal; /* Discard the packet. */ break; case ECHO_REQ: if (cp->proto != PPP_LCP) goto illegal; if (sp->state[cp->protoidx] != STATE_OPENED) { if (debug) log(-1, SPP_FMT "lcp echo req but lcp closed\n", SPP_ARGS(ifp)); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); break; } if (len < 8) { if (debug) log(-1, SPP_FMT "invalid lcp echo request " "packet length: %d bytes\n", SPP_ARGS(ifp), len); break; } if ((sp->lcp.opts & (1 << LCP_OPT_MAGIC)) && ntohl (*(long*)(h+1)) == sp->lcp.magic) { /* Line loopback mode detected. */ printf(SPP_FMT "loopback\n", SPP_ARGS(ifp)); sp->pp_loopcnt = MAXALIVECNT * 5; if_down (ifp); sppp_qflush (&sp->pp_cpq); /* Shut down the PPP link. */ /* XXX */ lcp.Down(sp); lcp.Up(sp); break; } *(long*)(h+1) = htonl (sp->lcp.magic); if (debug) log(-1, SPP_FMT "got lcp echo req, sending echo rep\n", SPP_ARGS(ifp)); sppp_cp_send (sp, PPP_LCP, ECHO_REPLY, h->ident, len-4, h+1); break; case ECHO_REPLY: if (cp->proto != PPP_LCP) goto illegal; if (h->ident != sp->lcp.echoid) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); break; } if (len < 8) { if (debug) log(-1, SPP_FMT "lcp invalid echo reply " "packet length: %d bytes\n", SPP_ARGS(ifp), len); break; } if (debug) log(-1, SPP_FMT "lcp got echo rep\n", SPP_ARGS(ifp)); if (!(sp->lcp.opts & (1 << LCP_OPT_MAGIC)) || ntohl (*(long*)(h+1)) != sp->lcp.magic) sp->pp_alivecnt = 0; break; default: /* Unknown packet type -- send Code-Reject packet. */ illegal: if (debug) log(-1, SPP_FMT "%s send code-rej for 0x%x\n", SPP_ARGS(ifp), cp->name, h->type); sppp_cp_send(sp, cp->proto, CODE_REJ, ++sp->pp_seq[cp->protoidx], m->m_pkthdr.len, h); if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); } } /* * The generic part of all Up/Down/Open/Close/TO event handlers. * Basically, the state transition handling in the automaton. */ static void sppp_up_event(const struct cp *cp, struct sppp *sp) { STDDCL; if (debug) log(LOG_DEBUG, SPP_FMT "%s up(%s)\n", SPP_ARGS(ifp), cp->name, sppp_state_name(sp->state[cp->protoidx])); switch (sp->state[cp->protoidx]) { case STATE_INITIAL: sppp_cp_change_state(cp, sp, STATE_CLOSED); break; case STATE_STARTING: sp->rst_counter[cp->protoidx] = sp->lcp.max_configure; (cp->scr)(sp); sppp_cp_change_state(cp, sp, STATE_REQ_SENT); break; default: printf(SPP_FMT "%s illegal up in state %s\n", SPP_ARGS(ifp), cp->name, sppp_state_name(sp->state[cp->protoidx])); } } static void sppp_down_event(const struct cp *cp, struct sppp *sp) { STDDCL; if (debug) log(LOG_DEBUG, SPP_FMT "%s down(%s)\n", SPP_ARGS(ifp), cp->name, sppp_state_name(sp->state[cp->protoidx])); switch (sp->state[cp->protoidx]) { case STATE_CLOSED: case STATE_CLOSING: sppp_cp_change_state(cp, sp, STATE_INITIAL); break; case STATE_STOPPED: sppp_cp_change_state(cp, sp, STATE_STARTING); (cp->tls)(sp); break; case STATE_STOPPING: case STATE_REQ_SENT: case STATE_ACK_RCVD: case STATE_ACK_SENT: sppp_cp_change_state(cp, sp, STATE_STARTING); break; case STATE_OPENED: (cp->tld)(sp); sppp_cp_change_state(cp, sp, STATE_STARTING); break; default: printf(SPP_FMT "%s illegal down in state %s\n", SPP_ARGS(ifp), cp->name, sppp_state_name(sp->state[cp->protoidx])); } } static void sppp_open_event(const struct cp *cp, struct sppp *sp) { STDDCL; if (debug) log(LOG_DEBUG, SPP_FMT "%s open(%s)\n", SPP_ARGS(ifp), cp->name, sppp_state_name(sp->state[cp->protoidx])); switch (sp->state[cp->protoidx]) { case STATE_INITIAL: sppp_cp_change_state(cp, sp, STATE_STARTING); (cp->tls)(sp); break; case STATE_STARTING: break; case STATE_CLOSED: sp->rst_counter[cp->protoidx] = sp->lcp.max_configure; (cp->scr)(sp); sppp_cp_change_state(cp, sp, STATE_REQ_SENT); break; case STATE_STOPPED: /* * Try escaping stopped state. This seems to bite * people occasionally, in particular for IPCP, * presumably following previous IPCP negotiation * aborts. Somehow, we must have missed a Down event * which would have caused a transition into starting * state, so as a bandaid we force the Down event now. * This effectively implements (something like the) * `restart' option mentioned in the state transition * table of RFC 1661. */ sppp_cp_change_state(cp, sp, STATE_STARTING); (cp->tls)(sp); break; case STATE_STOPPING: case STATE_REQ_SENT: case STATE_ACK_RCVD: case STATE_ACK_SENT: case STATE_OPENED: break; case STATE_CLOSING: sppp_cp_change_state(cp, sp, STATE_STOPPING); break; } } static void sppp_close_event(const struct cp *cp, struct sppp *sp) { STDDCL; if (debug) log(LOG_DEBUG, SPP_FMT "%s close(%s)\n", SPP_ARGS(ifp), cp->name, sppp_state_name(sp->state[cp->protoidx])); switch (sp->state[cp->protoidx]) { case STATE_INITIAL: case STATE_CLOSED: case STATE_CLOSING: break; case STATE_STARTING: sppp_cp_change_state(cp, sp, STATE_INITIAL); (cp->tlf)(sp); break; case STATE_STOPPED: sppp_cp_change_state(cp, sp, STATE_CLOSED); break; case STATE_STOPPING: sppp_cp_change_state(cp, sp, STATE_CLOSING); break; case STATE_OPENED: (cp->tld)(sp); /* FALLTHROUGH */ case STATE_REQ_SENT: case STATE_ACK_RCVD: case STATE_ACK_SENT: sp->rst_counter[cp->protoidx] = sp->lcp.max_terminate; sppp_cp_send(sp, cp->proto, TERM_REQ, ++sp->pp_seq[cp->protoidx], 0, 0); sppp_cp_change_state(cp, sp, STATE_CLOSING); break; } } static void sppp_to_event(const struct cp *cp, struct sppp *sp) { STDDCL; SPPP_LOCK(sp); if (debug) log(LOG_DEBUG, SPP_FMT "%s TO(%s) rst_counter = %d\n", SPP_ARGS(ifp), cp->name, sppp_state_name(sp->state[cp->protoidx]), sp->rst_counter[cp->protoidx]); if (--sp->rst_counter[cp->protoidx] < 0) /* TO- event */ switch (sp->state[cp->protoidx]) { case STATE_CLOSING: sppp_cp_change_state(cp, sp, STATE_CLOSED); (cp->tlf)(sp); break; case STATE_STOPPING: sppp_cp_change_state(cp, sp, STATE_STOPPED); (cp->tlf)(sp); break; case STATE_REQ_SENT: case STATE_ACK_RCVD: case STATE_ACK_SENT: sppp_cp_change_state(cp, sp, STATE_STOPPED); (cp->tlf)(sp); break; } else /* TO+ event */ switch (sp->state[cp->protoidx]) { case STATE_CLOSING: case STATE_STOPPING: sppp_cp_send(sp, cp->proto, TERM_REQ, ++sp->pp_seq[cp->protoidx], 0, 0); callout_reset(&sp->ch[cp->protoidx], sp->lcp.timeout, cp->TO, (void *)sp); break; case STATE_REQ_SENT: case STATE_ACK_RCVD: (cp->scr)(sp); /* sppp_cp_change_state() will restart the timer */ sppp_cp_change_state(cp, sp, STATE_REQ_SENT); break; case STATE_ACK_SENT: (cp->scr)(sp); callout_reset(&sp->ch[cp->protoidx], sp->lcp.timeout, cp->TO, (void *)sp); break; } SPPP_UNLOCK(sp); } /* * Change the state of a control protocol in the state automaton. * Takes care of starting/stopping the restart timer. */ static void sppp_cp_change_state(const struct cp *cp, struct sppp *sp, int newstate) { sp->state[cp->protoidx] = newstate; callout_stop (&sp->ch[cp->protoidx]); switch (newstate) { case STATE_INITIAL: case STATE_STARTING: case STATE_CLOSED: case STATE_STOPPED: case STATE_OPENED: break; case STATE_CLOSING: case STATE_STOPPING: case STATE_REQ_SENT: case STATE_ACK_RCVD: case STATE_ACK_SENT: callout_reset(&sp->ch[cp->protoidx], sp->lcp.timeout, cp->TO, (void *)sp); break; } } /* *--------------------------------------------------------------------------* * * * The LCP implementation. * * * *--------------------------------------------------------------------------* */ static void sppp_pp_up(struct sppp *sp) { SPPP_LOCK(sp); lcp.Up(sp); SPPP_UNLOCK(sp); } static void sppp_pp_down(struct sppp *sp) { SPPP_LOCK(sp); lcp.Down(sp); SPPP_UNLOCK(sp); } static void sppp_lcp_init(struct sppp *sp) { sp->lcp.opts = (1 << LCP_OPT_MAGIC); sp->lcp.magic = 0; sp->state[IDX_LCP] = STATE_INITIAL; sp->fail_counter[IDX_LCP] = 0; sp->pp_seq[IDX_LCP] = 0; sp->pp_rseq[IDX_LCP] = 0; sp->lcp.protos = 0; sp->lcp.mru = sp->lcp.their_mru = PP_MTU; /* Note that these values are relevant for all control protocols */ sp->lcp.timeout = 3 * hz; sp->lcp.max_terminate = 2; sp->lcp.max_configure = 10; sp->lcp.max_failure = 10; callout_init(&sp->ch[IDX_LCP], 1); } static void sppp_lcp_up(struct sppp *sp) { STDDCL; sp->pp_alivecnt = 0; sp->lcp.opts = (1 << LCP_OPT_MAGIC); sp->lcp.magic = 0; sp->lcp.protos = 0; sp->lcp.mru = sp->lcp.their_mru = PP_MTU; /* * If we are authenticator, negotiate LCP_AUTH */ if (sp->hisauth.proto != 0) sp->lcp.opts |= (1 << LCP_OPT_AUTH_PROTO); else sp->lcp.opts &= ~(1 << LCP_OPT_AUTH_PROTO); sp->pp_flags &= ~PP_NEEDAUTH; /* * If this interface is passive or dial-on-demand, and we are * still in Initial state, it means we've got an incoming * call. Activate the interface. */ if ((ifp->if_flags & (IFF_AUTO | IFF_PASSIVE)) != 0) { if (debug) log(LOG_DEBUG, SPP_FMT "Up event", SPP_ARGS(ifp)); ifp->if_drv_flags |= IFF_DRV_RUNNING; if (sp->state[IDX_LCP] == STATE_INITIAL) { if (debug) log(-1, "(incoming call)\n"); sp->pp_flags |= PP_CALLIN; lcp.Open(sp); } else if (debug) log(-1, "\n"); } else if ((ifp->if_flags & (IFF_AUTO | IFF_PASSIVE)) == 0 && (sp->state[IDX_LCP] == STATE_INITIAL)) { ifp->if_drv_flags |= IFF_DRV_RUNNING; lcp.Open(sp); } sppp_up_event(&lcp, sp); } static void sppp_lcp_down(struct sppp *sp) { STDDCL; sppp_down_event(&lcp, sp); /* * If this is neither a dial-on-demand nor a passive * interface, simulate an ``ifconfig down'' action, so the * administrator can force a redial by another ``ifconfig * up''. XXX For leased line operation, should we immediately * try to reopen the connection here? */ if ((ifp->if_flags & (IFF_AUTO | IFF_PASSIVE)) == 0) { log(LOG_INFO, SPP_FMT "Down event, taking interface down.\n", SPP_ARGS(ifp)); if_down(ifp); } else { if (debug) log(LOG_DEBUG, SPP_FMT "Down event (carrier loss)\n", SPP_ARGS(ifp)); sp->pp_flags &= ~PP_CALLIN; if (sp->state[IDX_LCP] != STATE_INITIAL) lcp.Close(sp); ifp->if_drv_flags &= ~IFF_DRV_RUNNING; } } static void sppp_lcp_open(struct sppp *sp) { sppp_open_event(&lcp, sp); } static void sppp_lcp_close(struct sppp *sp) { sppp_close_event(&lcp, sp); } static void sppp_lcp_TO(void *cookie) { sppp_to_event(&lcp, (struct sppp *)cookie); } /* * Analyze a configure request. Return true if it was agreeable, and * caused action sca, false if it has been rejected or nak'ed, and * caused action scn. (The return value is used to make the state * transition decision in the state automaton.) */ static int sppp_lcp_RCR(struct sppp *sp, struct lcp_header *h, int len) { STDDCL; u_char *buf, *r, *p; int origlen, rlen; u_long nmagic; u_short authproto; len -= 4; origlen = len; buf = r = malloc (len, M_TEMP, M_NOWAIT); if (! buf) return (0); if (debug) log(LOG_DEBUG, SPP_FMT "lcp parse opts: ", SPP_ARGS(ifp)); /* pass 1: check for things that need to be rejected */ p = (void*) (h+1); for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1]; len-=p[1], p+=p[1]) { if (debug) log(-1, " %s ", sppp_lcp_opt_name(*p)); switch (*p) { case LCP_OPT_MAGIC: /* Magic number. */ if (len >= 6 && p[1] == 6) continue; if (debug) log(-1, "[invalid] "); break; case LCP_OPT_ASYNC_MAP: /* Async control character map. */ if (len >= 6 && p[1] == 6) continue; if (debug) log(-1, "[invalid] "); break; case LCP_OPT_MRU: /* Maximum receive unit. */ if (len >= 4 && p[1] == 4) continue; if (debug) log(-1, "[invalid] "); break; case LCP_OPT_AUTH_PROTO: if (len < 4) { if (debug) log(-1, "[invalid] "); break; } authproto = (p[2] << 8) + p[3]; if (authproto == PPP_CHAP && p[1] != 5) { if (debug) log(-1, "[invalid chap len] "); break; } if (sp->myauth.proto == 0) { /* we are not configured to do auth */ if (debug) log(-1, "[not configured] "); break; } /* * Remote want us to authenticate, remember this, * so we stay in PHASE_AUTHENTICATE after LCP got * up. */ sp->pp_flags |= PP_NEEDAUTH; continue; default: /* Others not supported. */ if (debug) log(-1, "[rej] "); break; } /* Add the option to rejected list. */ bcopy (p, r, p[1]); r += p[1]; rlen += p[1]; } if (rlen) { if (debug) log(-1, " send conf-rej\n"); sppp_cp_send (sp, PPP_LCP, CONF_REJ, h->ident, rlen, buf); return 0; } else if (debug) log(-1, "\n"); /* * pass 2: check for option values that are unacceptable and * thus require to be nak'ed. */ if (debug) log(LOG_DEBUG, SPP_FMT "lcp parse opt values: ", SPP_ARGS(ifp)); p = (void*) (h+1); len = origlen; for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1]; len-=p[1], p+=p[1]) { if (debug) log(-1, " %s ", sppp_lcp_opt_name(*p)); switch (*p) { case LCP_OPT_MAGIC: /* Magic number -- extract. */ nmagic = (u_long)p[2] << 24 | (u_long)p[3] << 16 | p[4] << 8 | p[5]; if (nmagic != sp->lcp.magic) { sp->pp_loopcnt = 0; if (debug) log(-1, "0x%lx ", nmagic); continue; } if (debug && sp->pp_loopcnt < MAXALIVECNT*5) log(-1, "[glitch] "); ++sp->pp_loopcnt; /* * We negate our magic here, and NAK it. If * we see it later in an NAK packet, we * suggest a new one. */ nmagic = ~sp->lcp.magic; /* Gonna NAK it. */ p[2] = nmagic >> 24; p[3] = nmagic >> 16; p[4] = nmagic >> 8; p[5] = nmagic; break; case LCP_OPT_ASYNC_MAP: /* * Async control character map -- just ignore it. * * Quote from RFC 1662, chapter 6: * To enable this functionality, synchronous PPP * implementations MUST always respond to the * Async-Control-Character-Map Configuration * Option with the LCP Configure-Ack. However, * acceptance of the Configuration Option does * not imply that the synchronous implementation * will do any ACCM mapping. Instead, all such * octet mapping will be performed by the * asynchronous-to-synchronous converter. */ continue; case LCP_OPT_MRU: /* * Maximum receive unit. Always agreeable, * but ignored by now. */ sp->lcp.their_mru = p[2] * 256 + p[3]; if (debug) log(-1, "%lu ", sp->lcp.their_mru); continue; case LCP_OPT_AUTH_PROTO: authproto = (p[2] << 8) + p[3]; if (sp->myauth.proto != authproto) { /* not agreed, nak */ if (debug) log(-1, "[mine %s != his %s] ", sppp_proto_name(sp->hisauth.proto), sppp_proto_name(authproto)); p[2] = sp->myauth.proto >> 8; p[3] = sp->myauth.proto; break; } if (authproto == PPP_CHAP && p[4] != CHAP_MD5) { if (debug) log(-1, "[chap not MD5] "); p[4] = CHAP_MD5; break; } continue; } /* Add the option to nak'ed list. */ bcopy (p, r, p[1]); r += p[1]; rlen += p[1]; } if (rlen) { /* * Local and remote magics equal -- loopback? */ if (sp->pp_loopcnt >= MAXALIVECNT*5) { if (sp->pp_loopcnt == MAXALIVECNT*5) printf (SPP_FMT "loopback\n", SPP_ARGS(ifp)); if (ifp->if_flags & IFF_UP) { if_down(ifp); sppp_qflush(&sp->pp_cpq); /* XXX ? */ lcp.Down(sp); lcp.Up(sp); } } else if (!sp->pp_loopcnt && ++sp->fail_counter[IDX_LCP] >= sp->lcp.max_failure) { if (debug) log(-1, " max_failure (%d) exceeded, " "send conf-rej\n", sp->lcp.max_failure); sppp_cp_send(sp, PPP_LCP, CONF_REJ, h->ident, rlen, buf); } else { if (debug) log(-1, " send conf-nak\n"); sppp_cp_send (sp, PPP_LCP, CONF_NAK, h->ident, rlen, buf); } } else { if (debug) log(-1, " send conf-ack\n"); sp->fail_counter[IDX_LCP] = 0; sp->pp_loopcnt = 0; sppp_cp_send (sp, PPP_LCP, CONF_ACK, h->ident, origlen, h+1); } free (buf, M_TEMP); return (rlen == 0); } /* * Analyze the LCP Configure-Reject option list, and adjust our * negotiation. */ static void sppp_lcp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len) { STDDCL; u_char *buf, *p; len -= 4; buf = malloc (len, M_TEMP, M_NOWAIT); if (!buf) return; if (debug) log(LOG_DEBUG, SPP_FMT "lcp rej opts: ", SPP_ARGS(ifp)); p = (void*) (h+1); for (; len >= 2 && p[1] >= 2 && len >= p[1]; len -= p[1], p += p[1]) { if (debug) log(-1, " %s ", sppp_lcp_opt_name(*p)); switch (*p) { case LCP_OPT_MAGIC: /* Magic number -- can't use it, use 0 */ sp->lcp.opts &= ~(1 << LCP_OPT_MAGIC); sp->lcp.magic = 0; break; case LCP_OPT_MRU: /* * Should not be rejected anyway, since we only * negotiate a MRU if explicitly requested by * peer. */ sp->lcp.opts &= ~(1 << LCP_OPT_MRU); break; case LCP_OPT_AUTH_PROTO: /* * Peer doesn't want to authenticate himself, * deny unless this is a dialout call, and * AUTHFLAG_NOCALLOUT is set. */ if ((sp->pp_flags & PP_CALLIN) == 0 && (sp->hisauth.flags & AUTHFLAG_NOCALLOUT) != 0) { if (debug) log(-1, "[don't insist on auth " "for callout]"); sp->lcp.opts &= ~(1 << LCP_OPT_AUTH_PROTO); break; } if (debug) log(-1, "[access denied]\n"); lcp.Close(sp); break; } } if (debug) log(-1, "\n"); free (buf, M_TEMP); return; } /* * Analyze the LCP Configure-NAK option list, and adjust our * negotiation. */ static void sppp_lcp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len) { STDDCL; u_char *buf, *p; u_long magic; len -= 4; buf = malloc (len, M_TEMP, M_NOWAIT); if (!buf) return; if (debug) log(LOG_DEBUG, SPP_FMT "lcp nak opts: ", SPP_ARGS(ifp)); p = (void*) (h+1); for (; len >= 2 && p[1] >= 2 && len >= p[1]; len -= p[1], p += p[1]) { if (debug) log(-1, " %s ", sppp_lcp_opt_name(*p)); switch (*p) { case LCP_OPT_MAGIC: /* Magic number -- renegotiate */ if ((sp->lcp.opts & (1 << LCP_OPT_MAGIC)) && len >= 6 && p[1] == 6) { magic = (u_long)p[2] << 24 | (u_long)p[3] << 16 | p[4] << 8 | p[5]; /* * If the remote magic is our negated one, * this looks like a loopback problem. * Suggest a new magic to make sure. */ if (magic == ~sp->lcp.magic) { if (debug) log(-1, "magic glitch "); sp->lcp.magic = random(); } else { sp->lcp.magic = magic; if (debug) log(-1, "%lu ", magic); } } break; case LCP_OPT_MRU: /* * Peer wants to advise us to negotiate an MRU. * Agree on it if it's reasonable, or use * default otherwise. */ if (len >= 4 && p[1] == 4) { u_int mru = p[2] * 256 + p[3]; if (debug) log(-1, "%d ", mru); if (mru < PP_MTU || mru > PP_MAX_MRU) mru = PP_MTU; sp->lcp.mru = mru; sp->lcp.opts |= (1 << LCP_OPT_MRU); } break; case LCP_OPT_AUTH_PROTO: /* * Peer doesn't like our authentication method, * deny. */ if (debug) log(-1, "[access denied]\n"); lcp.Close(sp); break; } } if (debug) log(-1, "\n"); free (buf, M_TEMP); return; } static void sppp_lcp_tlu(struct sppp *sp) { STDDCL; int i; u_long mask; /* XXX ? */ if (! (ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { /* Coming out of loopback mode. */ if_up(ifp); printf (SPP_FMT "up\n", SPP_ARGS(ifp)); } for (i = 0; i < IDX_COUNT; i++) if ((cps[i])->flags & CP_QUAL) (cps[i])->Open(sp); if ((sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) != 0 || (sp->pp_flags & PP_NEEDAUTH) != 0) sp->pp_phase = PHASE_AUTHENTICATE; else sp->pp_phase = PHASE_NETWORK; if (debug) log(LOG_DEBUG, SPP_FMT "phase %s\n", SPP_ARGS(ifp), sppp_phase_name(sp->pp_phase)); /* * Open all authentication protocols. This is even required * if we already proceeded to network phase, since it might be * that remote wants us to authenticate, so we might have to * send a PAP request. Undesired authentication protocols * don't do anything when they get an Open event. */ for (i = 0; i < IDX_COUNT; i++) if ((cps[i])->flags & CP_AUTH) (cps[i])->Open(sp); if (sp->pp_phase == PHASE_NETWORK) { /* Notify all NCPs. */ for (i = 0; i < IDX_COUNT; i++) if (((cps[i])->flags & CP_NCP) && /* * XXX * Hack to administratively disable IPv6 if * not desired. Perhaps we should have another * flag for this, but right now, we can make * all struct cp's read/only. */ (cps[i] != &ipv6cp || (sp->confflags & CONF_ENABLE_IPV6))) (cps[i])->Open(sp); } /* Send Up events to all started protos. */ for (i = 0, mask = 1; i < IDX_COUNT; i++, mask <<= 1) if ((sp->lcp.protos & mask) && ((cps[i])->flags & CP_LCP) == 0) (cps[i])->Up(sp); /* notify low-level driver of state change */ if (sp->pp_chg) sp->pp_chg(sp, (int)sp->pp_phase); if (sp->pp_phase == PHASE_NETWORK) /* if no NCP is starting, close down */ sppp_lcp_check_and_close(sp); } static void sppp_lcp_tld(struct sppp *sp) { STDDCL; int i; u_long mask; sp->pp_phase = PHASE_TERMINATE; if (debug) log(LOG_DEBUG, SPP_FMT "phase %s\n", SPP_ARGS(ifp), sppp_phase_name(sp->pp_phase)); /* * Take upper layers down. We send the Down event first and * the Close second to prevent the upper layers from sending * ``a flurry of terminate-request packets'', as the RFC * describes it. */ for (i = 0, mask = 1; i < IDX_COUNT; i++, mask <<= 1) if ((sp->lcp.protos & mask) && ((cps[i])->flags & CP_LCP) == 0) { (cps[i])->Down(sp); (cps[i])->Close(sp); } } static void sppp_lcp_tls(struct sppp *sp) { STDDCL; sp->pp_phase = PHASE_ESTABLISH; if (debug) log(LOG_DEBUG, SPP_FMT "phase %s\n", SPP_ARGS(ifp), sppp_phase_name(sp->pp_phase)); /* Notify lower layer if desired. */ if (sp->pp_tls) (sp->pp_tls)(sp); else (sp->pp_up)(sp); } static void sppp_lcp_tlf(struct sppp *sp) { STDDCL; sp->pp_phase = PHASE_DEAD; if (debug) log(LOG_DEBUG, SPP_FMT "phase %s\n", SPP_ARGS(ifp), sppp_phase_name(sp->pp_phase)); /* Notify lower layer if desired. */ if (sp->pp_tlf) (sp->pp_tlf)(sp); else (sp->pp_down)(sp); } static void sppp_lcp_scr(struct sppp *sp) { char opt[6 /* magicnum */ + 4 /* mru */ + 5 /* chap */]; int i = 0; u_short authproto; if (sp->lcp.opts & (1 << LCP_OPT_MAGIC)) { if (! sp->lcp.magic) sp->lcp.magic = random(); opt[i++] = LCP_OPT_MAGIC; opt[i++] = 6; opt[i++] = sp->lcp.magic >> 24; opt[i++] = sp->lcp.magic >> 16; opt[i++] = sp->lcp.magic >> 8; opt[i++] = sp->lcp.magic; } if (sp->lcp.opts & (1 << LCP_OPT_MRU)) { opt[i++] = LCP_OPT_MRU; opt[i++] = 4; opt[i++] = sp->lcp.mru >> 8; opt[i++] = sp->lcp.mru; } if (sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) { authproto = sp->hisauth.proto; opt[i++] = LCP_OPT_AUTH_PROTO; opt[i++] = authproto == PPP_CHAP? 5: 4; opt[i++] = authproto >> 8; opt[i++] = authproto; if (authproto == PPP_CHAP) opt[i++] = CHAP_MD5; } sp->confid[IDX_LCP] = ++sp->pp_seq[IDX_LCP]; sppp_cp_send (sp, PPP_LCP, CONF_REQ, sp->confid[IDX_LCP], i, &opt); } /* * Check the open NCPs, return true if at least one NCP is open. */ static int sppp_ncp_check(struct sppp *sp) { int i, mask; for (i = 0, mask = 1; i < IDX_COUNT; i++, mask <<= 1) if ((sp->lcp.protos & mask) && (cps[i])->flags & CP_NCP) return 1; return 0; } /* * Re-check the open NCPs and see if we should terminate the link. * Called by the NCPs during their tlf action handling. */ static void sppp_lcp_check_and_close(struct sppp *sp) { if (sp->pp_phase < PHASE_NETWORK) /* don't bother, we are already going down */ return; if (sppp_ncp_check(sp)) return; lcp.Close(sp); } /* *--------------------------------------------------------------------------* * * * The IPCP implementation. * * * *--------------------------------------------------------------------------* */ #ifdef INET static void sppp_ipcp_init(struct sppp *sp) { sp->ipcp.opts = 0; sp->ipcp.flags = 0; sp->state[IDX_IPCP] = STATE_INITIAL; sp->fail_counter[IDX_IPCP] = 0; sp->pp_seq[IDX_IPCP] = 0; sp->pp_rseq[IDX_IPCP] = 0; callout_init(&sp->ch[IDX_IPCP], 1); } static void sppp_ipcp_up(struct sppp *sp) { sppp_up_event(&ipcp, sp); } static void sppp_ipcp_down(struct sppp *sp) { sppp_down_event(&ipcp, sp); } static void sppp_ipcp_open(struct sppp *sp) { STDDCL; u_long myaddr, hisaddr; sp->ipcp.flags &= ~(IPCP_HISADDR_SEEN | IPCP_MYADDR_SEEN | IPCP_MYADDR_DYN | IPCP_VJ); sp->ipcp.opts = 0; sppp_get_ip_addrs(sp, &myaddr, &hisaddr, 0); /* * If we don't have his address, this probably means our * interface doesn't want to talk IP at all. (This could * be the case if somebody wants to speak only IPX, for * example.) Don't open IPCP in this case. */ if (hisaddr == 0L) { /* XXX this message should go away */ if (debug) log(LOG_DEBUG, SPP_FMT "ipcp_open(): no IP interface\n", SPP_ARGS(ifp)); return; } if (myaddr == 0L) { /* * I don't have an assigned address, so i need to * negotiate my address. */ sp->ipcp.flags |= IPCP_MYADDR_DYN; sp->ipcp.opts |= (1 << IPCP_OPT_ADDRESS); } else sp->ipcp.flags |= IPCP_MYADDR_SEEN; if (sp->confflags & CONF_ENABLE_VJ) { sp->ipcp.opts |= (1 << IPCP_OPT_COMPRESSION); sp->ipcp.max_state = MAX_STATES - 1; sp->ipcp.compress_cid = 1; } sppp_open_event(&ipcp, sp); } static void sppp_ipcp_close(struct sppp *sp) { sppp_close_event(&ipcp, sp); if (sp->ipcp.flags & IPCP_MYADDR_DYN) /* * My address was dynamic, clear it again. */ sppp_set_ip_addr(sp, 0L); } static void sppp_ipcp_TO(void *cookie) { sppp_to_event(&ipcp, (struct sppp *)cookie); } /* * Analyze a configure request. Return true if it was agreeable, and * caused action sca, false if it has been rejected or nak'ed, and * caused action scn. (The return value is used to make the state * transition decision in the state automaton.) */ static int sppp_ipcp_RCR(struct sppp *sp, struct lcp_header *h, int len) { u_char *buf, *r, *p; struct ifnet *ifp = SP2IFP(sp); int rlen, origlen, debug = ifp->if_flags & IFF_DEBUG; u_long hisaddr, desiredaddr; int gotmyaddr = 0; int desiredcomp; len -= 4; origlen = len; /* * Make sure to allocate a buf that can at least hold a * conf-nak with an `address' option. We might need it below. */ buf = r = malloc ((len < 6? 6: len), M_TEMP, M_NOWAIT); if (! buf) return (0); /* pass 1: see if we can recognize them */ if (debug) log(LOG_DEBUG, SPP_FMT "ipcp parse opts: ", SPP_ARGS(ifp)); p = (void*) (h+1); for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1]; len-=p[1], p+=p[1]) { if (debug) log(-1, " %s ", sppp_ipcp_opt_name(*p)); switch (*p) { case IPCP_OPT_COMPRESSION: if (!(sp->confflags & CONF_ENABLE_VJ)) { /* VJ compression administratively disabled */ if (debug) log(-1, "[locally disabled] "); break; } /* * In theory, we should only conf-rej an * option that is shorter than RFC 1618 * requires (i.e. < 4), and should conf-nak * anything else that is not VJ. However, * since our algorithm always uses the * original option to NAK it with new values, * things would become more complicated. In * practice, the only commonly implemented IP * compression option is VJ anyway, so the * difference is negligible. */ if (len >= 6 && p[1] == 6) { /* * correctly formed compression option * that could be VJ compression */ continue; } if (debug) log(-1, "optlen %d [invalid/unsupported] ", p[1]); break; case IPCP_OPT_ADDRESS: if (len >= 6 && p[1] == 6) { /* correctly formed address option */ continue; } if (debug) log(-1, "[invalid] "); break; default: /* Others not supported. */ if (debug) log(-1, "[rej] "); break; } /* Add the option to rejected list. */ bcopy (p, r, p[1]); r += p[1]; rlen += p[1]; } if (rlen) { if (debug) log(-1, " send conf-rej\n"); sppp_cp_send (sp, PPP_IPCP, CONF_REJ, h->ident, rlen, buf); return 0; } else if (debug) log(-1, "\n"); /* pass 2: parse option values */ sppp_get_ip_addrs(sp, 0, &hisaddr, 0); if (debug) log(LOG_DEBUG, SPP_FMT "ipcp parse opt values: ", SPP_ARGS(ifp)); p = (void*) (h+1); len = origlen; for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1]; len-=p[1], p+=p[1]) { if (debug) log(-1, " %s ", sppp_ipcp_opt_name(*p)); switch (*p) { case IPCP_OPT_COMPRESSION: desiredcomp = p[2] << 8 | p[3]; /* We only support VJ */ if (desiredcomp == IPCP_COMP_VJ) { if (debug) log(-1, "VJ [ack] "); sp->ipcp.flags |= IPCP_VJ; sl_compress_init(sp->pp_comp, p[4]); sp->ipcp.max_state = p[4]; sp->ipcp.compress_cid = p[5]; continue; } if (debug) log(-1, "compproto %#04x [not supported] ", desiredcomp); p[2] = IPCP_COMP_VJ >> 8; p[3] = IPCP_COMP_VJ; p[4] = sp->ipcp.max_state; p[5] = sp->ipcp.compress_cid; break; case IPCP_OPT_ADDRESS: /* This is the address he wants in his end */ desiredaddr = p[2] << 24 | p[3] << 16 | p[4] << 8 | p[5]; if (desiredaddr == hisaddr || (hisaddr >= 1 && hisaddr <= 254 && desiredaddr != 0)) { /* * Peer's address is same as our value, * or we have set it to 0.0.0.* to * indicate that we do not really care, * this is agreeable. Gonna conf-ack * it. */ if (debug) log(-1, "%s [ack] ", sppp_dotted_quad(hisaddr)); /* record that we've seen it already */ sp->ipcp.flags |= IPCP_HISADDR_SEEN; continue; } /* * The address wasn't agreeable. This is either * he sent us 0.0.0.0, asking to assign him an * address, or he send us another address not * matching our value. Either case, we gonna * conf-nak it with our value. * XXX: we should "rej" if hisaddr == 0 */ if (debug) { if (desiredaddr == 0) log(-1, "[addr requested] "); else log(-1, "%s [not agreed] ", sppp_dotted_quad(desiredaddr)); } p[2] = hisaddr >> 24; p[3] = hisaddr >> 16; p[4] = hisaddr >> 8; p[5] = hisaddr; break; } /* Add the option to nak'ed list. */ bcopy (p, r, p[1]); r += p[1]; rlen += p[1]; } /* * If we are about to conf-ack the request, but haven't seen * his address so far, gonna conf-nak it instead, with the * `address' option present and our idea of his address being * filled in there, to request negotiation of both addresses. * * XXX This can result in an endless req - nak loop if peer * doesn't want to send us his address. Q: What should we do * about it? XXX A: implement the max-failure counter. */ if (rlen == 0 && !(sp->ipcp.flags & IPCP_HISADDR_SEEN) && !gotmyaddr) { buf[0] = IPCP_OPT_ADDRESS; buf[1] = 6; buf[2] = hisaddr >> 24; buf[3] = hisaddr >> 16; buf[4] = hisaddr >> 8; buf[5] = hisaddr; rlen = 6; if (debug) log(-1, "still need hisaddr "); } if (rlen) { if (debug) log(-1, " send conf-nak\n"); sppp_cp_send (sp, PPP_IPCP, CONF_NAK, h->ident, rlen, buf); } else { if (debug) log(-1, " send conf-ack\n"); sppp_cp_send (sp, PPP_IPCP, CONF_ACK, h->ident, origlen, h+1); } free (buf, M_TEMP); return (rlen == 0); } /* * Analyze the IPCP Configure-Reject option list, and adjust our * negotiation. */ static void sppp_ipcp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len) { u_char *buf, *p; struct ifnet *ifp = SP2IFP(sp); int debug = ifp->if_flags & IFF_DEBUG; len -= 4; buf = malloc (len, M_TEMP, M_NOWAIT); if (!buf) return; if (debug) log(LOG_DEBUG, SPP_FMT "ipcp rej opts: ", SPP_ARGS(ifp)); p = (void*) (h+1); for (; len >= 2 && p[1] >= 2 && len >= p[1]; len -= p[1], p += p[1]) { if (debug) log(-1, " %s ", sppp_ipcp_opt_name(*p)); switch (*p) { case IPCP_OPT_COMPRESSION: sp->ipcp.opts &= ~(1 << IPCP_OPT_COMPRESSION); break; case IPCP_OPT_ADDRESS: /* * Peer doesn't grok address option. This is * bad. XXX Should we better give up here? * XXX We could try old "addresses" option... */ sp->ipcp.opts &= ~(1 << IPCP_OPT_ADDRESS); break; } } if (debug) log(-1, "\n"); free (buf, M_TEMP); return; } /* * Analyze the IPCP Configure-NAK option list, and adjust our * negotiation. */ static void sppp_ipcp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len) { u_char *buf, *p; struct ifnet *ifp = SP2IFP(sp); int debug = ifp->if_flags & IFF_DEBUG; int desiredcomp; u_long wantaddr; len -= 4; buf = malloc (len, M_TEMP, M_NOWAIT); if (!buf) return; if (debug) log(LOG_DEBUG, SPP_FMT "ipcp nak opts: ", SPP_ARGS(ifp)); p = (void*) (h+1); for (; len >= 2 && p[1] >= 2 && len >= p[1]; len -= p[1], p += p[1]) { if (debug) log(-1, " %s ", sppp_ipcp_opt_name(*p)); switch (*p) { case IPCP_OPT_COMPRESSION: if (len >= 6 && p[1] == 6) { desiredcomp = p[2] << 8 | p[3]; if (debug) log(-1, "[wantcomp %#04x] ", desiredcomp); if (desiredcomp == IPCP_COMP_VJ) { sl_compress_init(sp->pp_comp, p[4]); sp->ipcp.max_state = p[4]; sp->ipcp.compress_cid = p[5]; if (debug) log(-1, "[agree] "); } else sp->ipcp.opts &= ~(1 << IPCP_OPT_COMPRESSION); } break; case IPCP_OPT_ADDRESS: /* * Peer doesn't like our local IP address. See * if we can do something for him. We'll drop * him our address then. */ if (len >= 6 && p[1] == 6) { wantaddr = p[2] << 24 | p[3] << 16 | p[4] << 8 | p[5]; sp->ipcp.opts |= (1 << IPCP_OPT_ADDRESS); if (debug) log(-1, "[wantaddr %s] ", sppp_dotted_quad(wantaddr)); /* * When doing dynamic address assignment, * we accept his offer. Otherwise, we * ignore it and thus continue to negotiate * our already existing value. * XXX: Bogus, if he said no once, he'll * just say no again, might as well die. */ if (sp->ipcp.flags & IPCP_MYADDR_DYN) { sppp_set_ip_addr(sp, wantaddr); if (debug) log(-1, "[agree] "); sp->ipcp.flags |= IPCP_MYADDR_SEEN; } } break; } } if (debug) log(-1, "\n"); free (buf, M_TEMP); return; } static void sppp_ipcp_tlu(struct sppp *sp) { /* we are up - notify isdn daemon */ if (sp->pp_con) sp->pp_con(sp); } static void sppp_ipcp_tld(struct sppp *sp) { } static void sppp_ipcp_tls(struct sppp *sp) { /* indicate to LCP that it must stay alive */ sp->lcp.protos |= (1 << IDX_IPCP); } static void sppp_ipcp_tlf(struct sppp *sp) { /* we no longer need LCP */ sp->lcp.protos &= ~(1 << IDX_IPCP); sppp_lcp_check_and_close(sp); } static void sppp_ipcp_scr(struct sppp *sp) { char opt[6 /* compression */ + 6 /* address */]; u_long ouraddr; int i = 0; if (sp->ipcp.opts & (1 << IPCP_OPT_COMPRESSION)) { opt[i++] = IPCP_OPT_COMPRESSION; opt[i++] = 6; opt[i++] = IPCP_COMP_VJ >> 8; opt[i++] = IPCP_COMP_VJ; opt[i++] = sp->ipcp.max_state; opt[i++] = sp->ipcp.compress_cid; } if (sp->ipcp.opts & (1 << IPCP_OPT_ADDRESS)) { sppp_get_ip_addrs(sp, &ouraddr, 0, 0); opt[i++] = IPCP_OPT_ADDRESS; opt[i++] = 6; opt[i++] = ouraddr >> 24; opt[i++] = ouraddr >> 16; opt[i++] = ouraddr >> 8; opt[i++] = ouraddr; } sp->confid[IDX_IPCP] = ++sp->pp_seq[IDX_IPCP]; sppp_cp_send(sp, PPP_IPCP, CONF_REQ, sp->confid[IDX_IPCP], i, &opt); } #else /* !INET */ static void sppp_ipcp_init(struct sppp *sp) { } static void sppp_ipcp_up(struct sppp *sp) { } static void sppp_ipcp_down(struct sppp *sp) { } static void sppp_ipcp_open(struct sppp *sp) { } static void sppp_ipcp_close(struct sppp *sp) { } static void sppp_ipcp_TO(void *cookie) { } static int sppp_ipcp_RCR(struct sppp *sp, struct lcp_header *h, int len) { return (0); } static void sppp_ipcp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len) { } static void sppp_ipcp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len) { } static void sppp_ipcp_tlu(struct sppp *sp) { } static void sppp_ipcp_tld(struct sppp *sp) { } static void sppp_ipcp_tls(struct sppp *sp) { } static void sppp_ipcp_tlf(struct sppp *sp) { } static void sppp_ipcp_scr(struct sppp *sp) { } #endif /* *--------------------------------------------------------------------------* * * * The IPv6CP implementation. * * * *--------------------------------------------------------------------------* */ #ifdef INET6 static void sppp_ipv6cp_init(struct sppp *sp) { sp->ipv6cp.opts = 0; sp->ipv6cp.flags = 0; sp->state[IDX_IPV6CP] = STATE_INITIAL; sp->fail_counter[IDX_IPV6CP] = 0; sp->pp_seq[IDX_IPV6CP] = 0; sp->pp_rseq[IDX_IPV6CP] = 0; callout_init(&sp->ch[IDX_IPV6CP], 1); } static void sppp_ipv6cp_up(struct sppp *sp) { sppp_up_event(&ipv6cp, sp); } static void sppp_ipv6cp_down(struct sppp *sp) { sppp_down_event(&ipv6cp, sp); } static void sppp_ipv6cp_open(struct sppp *sp) { STDDCL; struct in6_addr myaddr, hisaddr; #ifdef IPV6CP_MYIFID_DYN sp->ipv6cp.flags &= ~(IPV6CP_MYIFID_SEEN|IPV6CP_MYIFID_DYN); #else sp->ipv6cp.flags &= ~IPV6CP_MYIFID_SEEN; #endif sppp_get_ip6_addrs(sp, &myaddr, &hisaddr, 0); /* * If we don't have our address, this probably means our * interface doesn't want to talk IPv6 at all. (This could * be the case if somebody wants to speak only IPX, for * example.) Don't open IPv6CP in this case. */ if (IN6_IS_ADDR_UNSPECIFIED(&myaddr)) { /* XXX this message should go away */ if (debug) log(LOG_DEBUG, SPP_FMT "ipv6cp_open(): no IPv6 interface\n", SPP_ARGS(ifp)); return; } sp->ipv6cp.flags |= IPV6CP_MYIFID_SEEN; sp->ipv6cp.opts |= (1 << IPV6CP_OPT_IFID); sppp_open_event(&ipv6cp, sp); } static void sppp_ipv6cp_close(struct sppp *sp) { sppp_close_event(&ipv6cp, sp); } static void sppp_ipv6cp_TO(void *cookie) { sppp_to_event(&ipv6cp, (struct sppp *)cookie); } /* * Analyze a configure request. Return true if it was agreeable, and * caused action sca, false if it has been rejected or nak'ed, and * caused action scn. (The return value is used to make the state * transition decision in the state automaton.) */ static int sppp_ipv6cp_RCR(struct sppp *sp, struct lcp_header *h, int len) { u_char *buf, *r, *p; struct ifnet *ifp = SP2IFP(sp); int rlen, origlen, debug = ifp->if_flags & IFF_DEBUG; struct in6_addr myaddr, desiredaddr, suggestaddr; int ifidcount; int type; int collision, nohisaddr; char ip6buf[INET6_ADDRSTRLEN]; len -= 4; origlen = len; /* * Make sure to allocate a buf that can at least hold a * conf-nak with an `address' option. We might need it below. */ buf = r = malloc ((len < 6? 6: len), M_TEMP, M_NOWAIT); if (! buf) return (0); /* pass 1: see if we can recognize them */ if (debug) log(LOG_DEBUG, SPP_FMT "ipv6cp parse opts:", SPP_ARGS(ifp)); p = (void*) (h+1); ifidcount = 0; for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1]; len-=p[1], p+=p[1]) { if (debug) log(-1, " %s", sppp_ipv6cp_opt_name(*p)); switch (*p) { case IPV6CP_OPT_IFID: if (len >= 10 && p[1] == 10 && ifidcount == 0) { /* correctly formed address option */ ifidcount++; continue; } if (debug) log(-1, " [invalid]"); break; #ifdef notyet case IPV6CP_OPT_COMPRESSION: if (len >= 4 && p[1] >= 4) { /* correctly formed compress option */ continue; } if (debug) log(-1, " [invalid]"); break; #endif default: /* Others not supported. */ if (debug) log(-1, " [rej]"); break; } /* Add the option to rejected list. */ bcopy (p, r, p[1]); r += p[1]; rlen += p[1]; } if (rlen) { if (debug) log(-1, " send conf-rej\n"); sppp_cp_send (sp, PPP_IPV6CP, CONF_REJ, h->ident, rlen, buf); goto end; } else if (debug) log(-1, "\n"); /* pass 2: parse option values */ sppp_get_ip6_addrs(sp, &myaddr, 0, 0); if (debug) log(LOG_DEBUG, SPP_FMT "ipv6cp parse opt values: ", SPP_ARGS(ifp)); p = (void*) (h+1); len = origlen; type = CONF_ACK; for (rlen=0; len >= 2 && p[1] >= 2 && len >= p[1]; len-=p[1], p+=p[1]) { if (debug) log(-1, " %s", sppp_ipv6cp_opt_name(*p)); switch (*p) { #ifdef notyet case IPV6CP_OPT_COMPRESSION: continue; #endif case IPV6CP_OPT_IFID: bzero(&desiredaddr, sizeof(desiredaddr)); bcopy(&p[2], &desiredaddr.s6_addr[8], 8); collision = (bcmp(&desiredaddr.s6_addr[8], &myaddr.s6_addr[8], 8) == 0); nohisaddr = IN6_IS_ADDR_UNSPECIFIED(&desiredaddr); desiredaddr.s6_addr16[0] = htons(0xfe80); (void)in6_setscope(&desiredaddr, SP2IFP(sp), NULL); if (!collision && !nohisaddr) { /* no collision, hisaddr known - Conf-Ack */ type = CONF_ACK; if (debug) { log(-1, " %s [%s]", ip6_sprintf(ip6buf, &desiredaddr), sppp_cp_type_name(type)); } continue; } bzero(&suggestaddr, sizeof(suggestaddr)); if (collision && nohisaddr) { /* collision, hisaddr unknown - Conf-Rej */ type = CONF_REJ; bzero(&p[2], 8); } else { /* * - no collision, hisaddr unknown, or * - collision, hisaddr known * Conf-Nak, suggest hisaddr */ type = CONF_NAK; sppp_suggest_ip6_addr(sp, &suggestaddr); bcopy(&suggestaddr.s6_addr[8], &p[2], 8); } if (debug) log(-1, " %s [%s]", ip6_sprintf(ip6buf, &desiredaddr), sppp_cp_type_name(type)); break; } /* Add the option to nak'ed list. */ bcopy (p, r, p[1]); r += p[1]; rlen += p[1]; } if (rlen == 0 && type == CONF_ACK) { if (debug) log(-1, " send %s\n", sppp_cp_type_name(type)); sppp_cp_send (sp, PPP_IPV6CP, type, h->ident, origlen, h+1); } else { #ifdef DIAGNOSTIC if (type == CONF_ACK) panic("IPv6CP RCR: CONF_ACK with non-zero rlen"); #endif if (debug) { log(-1, " send %s suggest %s\n", sppp_cp_type_name(type), ip6_sprintf(ip6buf, &suggestaddr)); } sppp_cp_send (sp, PPP_IPV6CP, type, h->ident, rlen, buf); } end: free (buf, M_TEMP); return (rlen == 0); } /* * Analyze the IPv6CP Configure-Reject option list, and adjust our * negotiation. */ static void sppp_ipv6cp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len) { u_char *buf, *p; struct ifnet *ifp = SP2IFP(sp); int debug = ifp->if_flags & IFF_DEBUG; len -= 4; buf = malloc (len, M_TEMP, M_NOWAIT); if (!buf) return; if (debug) log(LOG_DEBUG, SPP_FMT "ipv6cp rej opts:", SPP_ARGS(ifp)); p = (void*) (h+1); for (; len >= 2 && p[1] >= 2 && len >= p[1]; len -= p[1], p += p[1]) { if (debug) log(-1, " %s", sppp_ipv6cp_opt_name(*p)); switch (*p) { case IPV6CP_OPT_IFID: /* * Peer doesn't grok address option. This is * bad. XXX Should we better give up here? */ sp->ipv6cp.opts &= ~(1 << IPV6CP_OPT_IFID); break; #ifdef notyet case IPV6CP_OPT_COMPRESS: sp->ipv6cp.opts &= ~(1 << IPV6CP_OPT_COMPRESS); break; #endif } } if (debug) log(-1, "\n"); free (buf, M_TEMP); return; } /* * Analyze the IPv6CP Configure-NAK option list, and adjust our * negotiation. */ static void sppp_ipv6cp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len) { u_char *buf, *p; struct ifnet *ifp = SP2IFP(sp); int debug = ifp->if_flags & IFF_DEBUG; struct in6_addr suggestaddr; char ip6buf[INET6_ADDRSTRLEN]; len -= 4; buf = malloc (len, M_TEMP, M_NOWAIT); if (!buf) return; if (debug) log(LOG_DEBUG, SPP_FMT "ipv6cp nak opts:", SPP_ARGS(ifp)); p = (void*) (h+1); for (; len >= 2 && p[1] >= 2 && len >= p[1]; len -= p[1], p += p[1]) { if (debug) log(-1, " %s", sppp_ipv6cp_opt_name(*p)); switch (*p) { case IPV6CP_OPT_IFID: /* * Peer doesn't like our local ifid. See * if we can do something for him. We'll drop * him our address then. */ if (len < 10 || p[1] != 10) break; bzero(&suggestaddr, sizeof(suggestaddr)); suggestaddr.s6_addr16[0] = htons(0xfe80); (void)in6_setscope(&suggestaddr, SP2IFP(sp), NULL); bcopy(&p[2], &suggestaddr.s6_addr[8], 8); sp->ipv6cp.opts |= (1 << IPV6CP_OPT_IFID); if (debug) log(-1, " [suggestaddr %s]", ip6_sprintf(ip6buf, &suggestaddr)); #ifdef IPV6CP_MYIFID_DYN /* * When doing dynamic address assignment, * we accept his offer. */ if (sp->ipv6cp.flags & IPV6CP_MYIFID_DYN) { struct in6_addr lastsuggest; /* * If equals to * , * we have a collision. generate new random * ifid. */ sppp_suggest_ip6_addr(&lastsuggest); if (IN6_ARE_ADDR_EQUAL(&suggestaddr, lastsuggest)) { if (debug) log(-1, " [random]"); sppp_gen_ip6_addr(sp, &suggestaddr); } sppp_set_ip6_addr(sp, &suggestaddr, 0); if (debug) log(-1, " [agree]"); sp->ipv6cp.flags |= IPV6CP_MYIFID_SEEN; } #else /* * Since we do not do dynamic address assignment, * we ignore it and thus continue to negotiate * our already existing value. This can possibly * go into infinite request-reject loop. * * This is not likely because we normally use * ifid based on MAC-address. * If you have no ethernet card on the node, too bad. * XXX should we use fail_counter? */ #endif break; #ifdef notyet case IPV6CP_OPT_COMPRESS: /* * Peer wants different compression parameters. */ break; #endif } } if (debug) log(-1, "\n"); free (buf, M_TEMP); return; } static void sppp_ipv6cp_tlu(struct sppp *sp) { /* we are up - notify isdn daemon */ if (sp->pp_con) sp->pp_con(sp); } static void sppp_ipv6cp_tld(struct sppp *sp) { } static void sppp_ipv6cp_tls(struct sppp *sp) { /* indicate to LCP that it must stay alive */ sp->lcp.protos |= (1 << IDX_IPV6CP); } static void sppp_ipv6cp_tlf(struct sppp *sp) { #if 0 /* need #if 0 to close IPv6CP properly */ /* we no longer need LCP */ sp->lcp.protos &= ~(1 << IDX_IPV6CP); sppp_lcp_check_and_close(sp); #endif } static void sppp_ipv6cp_scr(struct sppp *sp) { char opt[10 /* ifid */ + 4 /* compression, minimum */]; struct in6_addr ouraddr; int i = 0; if (sp->ipv6cp.opts & (1 << IPV6CP_OPT_IFID)) { sppp_get_ip6_addrs(sp, &ouraddr, 0, 0); opt[i++] = IPV6CP_OPT_IFID; opt[i++] = 10; bcopy(&ouraddr.s6_addr[8], &opt[i], 8); i += 8; } #ifdef notyet if (sp->ipv6cp.opts & (1 << IPV6CP_OPT_COMPRESSION)) { opt[i++] = IPV6CP_OPT_COMPRESSION; opt[i++] = 4; opt[i++] = 0; /* TBD */ opt[i++] = 0; /* TBD */ /* variable length data may follow */ } #endif sp->confid[IDX_IPV6CP] = ++sp->pp_seq[IDX_IPV6CP]; sppp_cp_send(sp, PPP_IPV6CP, CONF_REQ, sp->confid[IDX_IPV6CP], i, &opt); } #else /*INET6*/ static void sppp_ipv6cp_init(struct sppp *sp) { } static void sppp_ipv6cp_up(struct sppp *sp) { } static void sppp_ipv6cp_down(struct sppp *sp) { } static void sppp_ipv6cp_open(struct sppp *sp) { } static void sppp_ipv6cp_close(struct sppp *sp) { } static void sppp_ipv6cp_TO(void *sp) { } static int sppp_ipv6cp_RCR(struct sppp *sp, struct lcp_header *h, int len) { return 0; } static void sppp_ipv6cp_RCN_rej(struct sppp *sp, struct lcp_header *h, int len) { } static void sppp_ipv6cp_RCN_nak(struct sppp *sp, struct lcp_header *h, int len) { } static void sppp_ipv6cp_tlu(struct sppp *sp) { } static void sppp_ipv6cp_tld(struct sppp *sp) { } static void sppp_ipv6cp_tls(struct sppp *sp) { } static void sppp_ipv6cp_tlf(struct sppp *sp) { } static void sppp_ipv6cp_scr(struct sppp *sp) { } #endif /*INET6*/ /* *--------------------------------------------------------------------------* * * * The CHAP implementation. * * * *--------------------------------------------------------------------------* */ /* * The authentication protocols don't employ a full-fledged state machine as * the control protocols do, since they do have Open and Close events, but * not Up and Down, nor are they explicitly terminated. Also, use of the * authentication protocols may be different in both directions (this makes * sense, think of a machine that never accepts incoming calls but only * calls out, it doesn't require the called party to authenticate itself). * * Our state machine for the local authentication protocol (we are requesting * the peer to authenticate) looks like: * * RCA- * +--------------------------------------------+ * V scn,tld| * +--------+ Close +---------+ RCA+ * | |<----------------------------------| |------+ * +--->| Closed | TO* | Opened | sca | * | | |-----+ +-------| |<-----+ * | +--------+ irc | | +---------+ * | ^ | | ^ * | | | | | * | | | | | * | TO-| | | | * | |tld TO+ V | | * | | +------->+ | | * | | | | | | * | +--------+ V | | * | | |<----+<--------------------+ | * | | Req- | scr | * | | Sent | | * | | | | * | +--------+ | * | RCA- | | RCA+ | * +------+ +------------------------------------------+ * scn,tld sca,irc,ict,tlu * * * with: * * Open: LCP reached authentication phase * Close: LCP reached terminate phase * * RCA+: received reply (pap-req, chap-response), acceptable * RCN: received reply (pap-req, chap-response), not acceptable * TO+: timeout with restart counter >= 0 * TO-: timeout with restart counter < 0 * TO*: reschedule timeout for CHAP * * scr: send request packet (none for PAP, chap-challenge) * sca: send ack packet (pap-ack, chap-success) * scn: send nak packet (pap-nak, chap-failure) * ict: initialize re-challenge timer (CHAP only) * * tlu: this-layer-up, LCP reaches network phase * tld: this-layer-down, LCP enters terminate phase * * Note that in CHAP mode, after sending a new challenge, while the state * automaton falls back into Req-Sent state, it doesn't signal a tld * event to LCP, so LCP remains in network phase. Only after not getting * any response (or after getting an unacceptable response), CHAP closes, * causing LCP to enter terminate phase. * * With PAP, there is no initial request that can be sent. The peer is * expected to send one based on the successful negotiation of PAP as * the authentication protocol during the LCP option negotiation. * * Incoming authentication protocol requests (remote requests * authentication, we are peer) don't employ a state machine at all, * they are simply answered. Some peers [Ascend P50 firmware rev * 4.50] react allergically when sending IPCP requests while they are * still in authentication phase (thereby violating the standard that * demands that these NCP packets are to be discarded), so we keep * track of the peer demanding us to authenticate, and only proceed to * phase network once we've seen a positive acknowledge for the * authentication. */ /* * Handle incoming CHAP packets. */ static void sppp_chap_input(struct sppp *sp, struct mbuf *m) { STDDCL; struct lcp_header *h; int len; u_char *value, *name, digest[AUTHKEYLEN], dsize; int value_len, name_len; MD5_CTX ctx; len = m->m_pkthdr.len; if (len < 4) { if (debug) log(LOG_DEBUG, SPP_FMT "chap invalid packet length: %d bytes\n", SPP_ARGS(ifp), len); return; } h = mtod (m, struct lcp_header*); if (len > ntohs (h->len)) len = ntohs (h->len); switch (h->type) { /* challenge, failure and success are his authproto */ case CHAP_CHALLENGE: value = 1 + (u_char*)(h+1); value_len = value[-1]; name = value + value_len; name_len = len - value_len - 5; if (name_len < 0) { if (debug) { log(LOG_DEBUG, SPP_FMT "chap corrupted challenge " "<%s id=0x%x len=%d", SPP_ARGS(ifp), sppp_auth_type_name(PPP_CHAP, h->type), h->ident, ntohs(h->len)); sppp_print_bytes((u_char*) (h+1), len-4); log(-1, ">\n"); } break; } if (debug) { log(LOG_DEBUG, SPP_FMT "chap input <%s id=0x%x len=%d name=", SPP_ARGS(ifp), sppp_auth_type_name(PPP_CHAP, h->type), h->ident, ntohs(h->len)); sppp_print_string((char*) name, name_len); log(-1, " value-size=%d value=", value_len); sppp_print_bytes(value, value_len); log(-1, ">\n"); } /* Compute reply value. */ MD5Init(&ctx); MD5Update(&ctx, &h->ident, 1); MD5Update(&ctx, sp->myauth.secret, sppp_strnlen(sp->myauth.secret, AUTHKEYLEN)); MD5Update(&ctx, value, value_len); MD5Final(digest, &ctx); dsize = sizeof digest; sppp_auth_send(&chap, sp, CHAP_RESPONSE, h->ident, sizeof dsize, (const char *)&dsize, sizeof digest, digest, (size_t)sppp_strnlen(sp->myauth.name, AUTHNAMELEN), sp->myauth.name, 0); break; case CHAP_SUCCESS: if (debug) { log(LOG_DEBUG, SPP_FMT "chap success", SPP_ARGS(ifp)); if (len > 4) { log(-1, ": "); sppp_print_string((char*)(h + 1), len - 4); } log(-1, "\n"); } SPPP_LOCK(sp); sp->pp_flags &= ~PP_NEEDAUTH; if (sp->myauth.proto == PPP_CHAP && (sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) && (sp->lcp.protos & (1 << IDX_CHAP)) == 0) { /* * We are authenticator for CHAP but didn't * complete yet. Leave it to tlu to proceed * to network phase. */ SPPP_UNLOCK(sp); break; } SPPP_UNLOCK(sp); sppp_phase_network(sp); break; case CHAP_FAILURE: if (debug) { log(LOG_INFO, SPP_FMT "chap failure", SPP_ARGS(ifp)); if (len > 4) { log(-1, ": "); sppp_print_string((char*)(h + 1), len - 4); } log(-1, "\n"); } else log(LOG_INFO, SPP_FMT "chap failure\n", SPP_ARGS(ifp)); /* await LCP shutdown by authenticator */ break; /* response is my authproto */ case CHAP_RESPONSE: value = 1 + (u_char*)(h+1); value_len = value[-1]; name = value + value_len; name_len = len - value_len - 5; if (name_len < 0) { if (debug) { log(LOG_DEBUG, SPP_FMT "chap corrupted response " "<%s id=0x%x len=%d", SPP_ARGS(ifp), sppp_auth_type_name(PPP_CHAP, h->type), h->ident, ntohs(h->len)); sppp_print_bytes((u_char*)(h+1), len-4); log(-1, ">\n"); } break; } if (h->ident != sp->confid[IDX_CHAP]) { if (debug) log(LOG_DEBUG, SPP_FMT "chap dropping response for old ID " "(got %d, expected %d)\n", SPP_ARGS(ifp), h->ident, sp->confid[IDX_CHAP]); break; } if (name_len != sppp_strnlen(sp->hisauth.name, AUTHNAMELEN) || bcmp(name, sp->hisauth.name, name_len) != 0) { log(LOG_INFO, SPP_FMT "chap response, his name ", SPP_ARGS(ifp)); sppp_print_string(name, name_len); log(-1, " != expected "); sppp_print_string(sp->hisauth.name, sppp_strnlen(sp->hisauth.name, AUTHNAMELEN)); log(-1, "\n"); } if (debug) { log(LOG_DEBUG, SPP_FMT "chap input(%s) " "<%s id=0x%x len=%d name=", SPP_ARGS(ifp), sppp_state_name(sp->state[IDX_CHAP]), sppp_auth_type_name(PPP_CHAP, h->type), h->ident, ntohs (h->len)); sppp_print_string((char*)name, name_len); log(-1, " value-size=%d value=", value_len); sppp_print_bytes(value, value_len); log(-1, ">\n"); } if (value_len != AUTHKEYLEN) { if (debug) log(LOG_DEBUG, SPP_FMT "chap bad hash value length: " "%d bytes, should be %d\n", SPP_ARGS(ifp), value_len, AUTHKEYLEN); break; } MD5Init(&ctx); MD5Update(&ctx, &h->ident, 1); MD5Update(&ctx, sp->hisauth.secret, sppp_strnlen(sp->hisauth.secret, AUTHKEYLEN)); MD5Update(&ctx, sp->myauth.challenge, AUTHKEYLEN); MD5Final(digest, &ctx); #define FAILMSG "Failed..." #define SUCCMSG "Welcome!" if (value_len != sizeof digest || bcmp(digest, value, value_len) != 0) { /* action scn, tld */ sppp_auth_send(&chap, sp, CHAP_FAILURE, h->ident, sizeof(FAILMSG) - 1, (u_char *)FAILMSG, 0); chap.tld(sp); break; } /* action sca, perhaps tlu */ if (sp->state[IDX_CHAP] == STATE_REQ_SENT || sp->state[IDX_CHAP] == STATE_OPENED) sppp_auth_send(&chap, sp, CHAP_SUCCESS, h->ident, sizeof(SUCCMSG) - 1, (u_char *)SUCCMSG, 0); if (sp->state[IDX_CHAP] == STATE_REQ_SENT) { sppp_cp_change_state(&chap, sp, STATE_OPENED); chap.tlu(sp); } break; default: /* Unknown CHAP packet type -- ignore. */ if (debug) { log(LOG_DEBUG, SPP_FMT "chap unknown input(%s) " "<0x%x id=0x%xh len=%d", SPP_ARGS(ifp), sppp_state_name(sp->state[IDX_CHAP]), h->type, h->ident, ntohs(h->len)); sppp_print_bytes((u_char*)(h+1), len-4); log(-1, ">\n"); } break; } } static void sppp_chap_init(struct sppp *sp) { /* Chap doesn't have STATE_INITIAL at all. */ sp->state[IDX_CHAP] = STATE_CLOSED; sp->fail_counter[IDX_CHAP] = 0; sp->pp_seq[IDX_CHAP] = 0; sp->pp_rseq[IDX_CHAP] = 0; callout_init(&sp->ch[IDX_CHAP], 1); } static void sppp_chap_open(struct sppp *sp) { if (sp->myauth.proto == PPP_CHAP && (sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) != 0) { /* we are authenticator for CHAP, start it */ chap.scr(sp); sp->rst_counter[IDX_CHAP] = sp->lcp.max_configure; sppp_cp_change_state(&chap, sp, STATE_REQ_SENT); } /* nothing to be done if we are peer, await a challenge */ } static void sppp_chap_close(struct sppp *sp) { if (sp->state[IDX_CHAP] != STATE_CLOSED) sppp_cp_change_state(&chap, sp, STATE_CLOSED); } static void sppp_chap_TO(void *cookie) { struct sppp *sp = (struct sppp *)cookie; STDDCL; SPPP_LOCK(sp); if (debug) log(LOG_DEBUG, SPP_FMT "chap TO(%s) rst_counter = %d\n", SPP_ARGS(ifp), sppp_state_name(sp->state[IDX_CHAP]), sp->rst_counter[IDX_CHAP]); if (--sp->rst_counter[IDX_CHAP] < 0) /* TO- event */ switch (sp->state[IDX_CHAP]) { case STATE_REQ_SENT: chap.tld(sp); sppp_cp_change_state(&chap, sp, STATE_CLOSED); break; } else /* TO+ (or TO*) event */ switch (sp->state[IDX_CHAP]) { case STATE_OPENED: /* TO* event */ sp->rst_counter[IDX_CHAP] = sp->lcp.max_configure; /* FALLTHROUGH */ case STATE_REQ_SENT: chap.scr(sp); /* sppp_cp_change_state() will restart the timer */ sppp_cp_change_state(&chap, sp, STATE_REQ_SENT); break; } SPPP_UNLOCK(sp); } static void sppp_chap_tlu(struct sppp *sp) { STDDCL; int i; i = 0; sp->rst_counter[IDX_CHAP] = sp->lcp.max_configure; /* * Some broken CHAP implementations (Conware CoNet, firmware * 4.0.?) don't want to re-authenticate their CHAP once the * initial challenge-response exchange has taken place. * Provide for an option to avoid rechallenges. */ if ((sp->hisauth.flags & AUTHFLAG_NORECHALLENGE) == 0) { /* * Compute the re-challenge timeout. This will yield * a number between 300 and 810 seconds. */ i = 300 + ((unsigned)(random() & 0xff00) >> 7); callout_reset(&sp->ch[IDX_CHAP], i * hz, chap.TO, (void *)sp); } if (debug) { log(LOG_DEBUG, SPP_FMT "chap %s, ", SPP_ARGS(ifp), sp->pp_phase == PHASE_NETWORK? "reconfirmed": "tlu"); if ((sp->hisauth.flags & AUTHFLAG_NORECHALLENGE) == 0) log(-1, "next re-challenge in %d seconds\n", i); else log(-1, "re-challenging suppressed\n"); } SPPP_LOCK(sp); /* indicate to LCP that we need to be closed down */ sp->lcp.protos |= (1 << IDX_CHAP); if (sp->pp_flags & PP_NEEDAUTH) { /* * Remote is authenticator, but his auth proto didn't * complete yet. Defer the transition to network * phase. */ SPPP_UNLOCK(sp); return; } SPPP_UNLOCK(sp); /* * If we are already in phase network, we are done here. This * is the case if this is a dummy tlu event after a re-challenge. */ if (sp->pp_phase != PHASE_NETWORK) sppp_phase_network(sp); } static void sppp_chap_tld(struct sppp *sp) { STDDCL; if (debug) log(LOG_DEBUG, SPP_FMT "chap tld\n", SPP_ARGS(ifp)); callout_stop(&sp->ch[IDX_CHAP]); sp->lcp.protos &= ~(1 << IDX_CHAP); lcp.Close(sp); } static void sppp_chap_scr(struct sppp *sp) { u_long *ch, seed; u_char clen; /* Compute random challenge. */ ch = (u_long *)sp->myauth.challenge; read_random(&seed, sizeof seed); ch[0] = seed ^ random(); ch[1] = seed ^ random(); ch[2] = seed ^ random(); ch[3] = seed ^ random(); clen = AUTHKEYLEN; sp->confid[IDX_CHAP] = ++sp->pp_seq[IDX_CHAP]; sppp_auth_send(&chap, sp, CHAP_CHALLENGE, sp->confid[IDX_CHAP], sizeof clen, (const char *)&clen, (size_t)AUTHKEYLEN, sp->myauth.challenge, (size_t)sppp_strnlen(sp->myauth.name, AUTHNAMELEN), sp->myauth.name, 0); } /* *--------------------------------------------------------------------------* * * * The PAP implementation. * * * *--------------------------------------------------------------------------* */ /* * For PAP, we need to keep a little state also if we are the peer, not the * authenticator. This is since we don't get a request to authenticate, but * have to repeatedly authenticate ourself until we got a response (or the * retry counter is expired). */ /* * Handle incoming PAP packets. */ static void sppp_pap_input(struct sppp *sp, struct mbuf *m) { STDDCL; struct lcp_header *h; int len; u_char *name, *passwd, mlen; int name_len, passwd_len; len = m->m_pkthdr.len; if (len < 5) { if (debug) log(LOG_DEBUG, SPP_FMT "pap invalid packet length: %d bytes\n", SPP_ARGS(ifp), len); return; } h = mtod (m, struct lcp_header*); if (len > ntohs (h->len)) len = ntohs (h->len); switch (h->type) { /* PAP request is my authproto */ case PAP_REQ: name = 1 + (u_char*)(h+1); name_len = name[-1]; passwd = name + name_len + 1; if (name_len > len - 6 || (passwd_len = passwd[-1]) > len - 6 - name_len) { if (debug) { log(LOG_DEBUG, SPP_FMT "pap corrupted input " "<%s id=0x%x len=%d", SPP_ARGS(ifp), sppp_auth_type_name(PPP_PAP, h->type), h->ident, ntohs(h->len)); sppp_print_bytes((u_char*)(h+1), len-4); log(-1, ">\n"); } break; } if (debug) { log(LOG_DEBUG, SPP_FMT "pap input(%s) " "<%s id=0x%x len=%d name=", SPP_ARGS(ifp), sppp_state_name(sp->state[IDX_PAP]), sppp_auth_type_name(PPP_PAP, h->type), h->ident, ntohs(h->len)); sppp_print_string((char*)name, name_len); log(-1, " passwd="); sppp_print_string((char*)passwd, passwd_len); log(-1, ">\n"); } if (name_len != sppp_strnlen(sp->hisauth.name, AUTHNAMELEN) || passwd_len != sppp_strnlen(sp->hisauth.secret, AUTHKEYLEN) || bcmp(name, sp->hisauth.name, name_len) != 0 || bcmp(passwd, sp->hisauth.secret, passwd_len) != 0) { /* action scn, tld */ mlen = sizeof(FAILMSG) - 1; sppp_auth_send(&pap, sp, PAP_NAK, h->ident, sizeof mlen, (const char *)&mlen, sizeof(FAILMSG) - 1, (u_char *)FAILMSG, 0); pap.tld(sp); break; } /* action sca, perhaps tlu */ if (sp->state[IDX_PAP] == STATE_REQ_SENT || sp->state[IDX_PAP] == STATE_OPENED) { mlen = sizeof(SUCCMSG) - 1; sppp_auth_send(&pap, sp, PAP_ACK, h->ident, sizeof mlen, (const char *)&mlen, sizeof(SUCCMSG) - 1, (u_char *)SUCCMSG, 0); } if (sp->state[IDX_PAP] == STATE_REQ_SENT) { sppp_cp_change_state(&pap, sp, STATE_OPENED); pap.tlu(sp); } break; /* ack and nak are his authproto */ case PAP_ACK: callout_stop(&sp->pap_my_to_ch); if (debug) { log(LOG_DEBUG, SPP_FMT "pap success", SPP_ARGS(ifp)); name_len = *((char *)h); if (len > 5 && name_len) { log(-1, ": "); sppp_print_string((char*)(h+1), name_len); } log(-1, "\n"); } SPPP_LOCK(sp); sp->pp_flags &= ~PP_NEEDAUTH; if (sp->myauth.proto == PPP_PAP && (sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) && (sp->lcp.protos & (1 << IDX_PAP)) == 0) { /* * We are authenticator for PAP but didn't * complete yet. Leave it to tlu to proceed * to network phase. */ SPPP_UNLOCK(sp); break; } SPPP_UNLOCK(sp); sppp_phase_network(sp); break; case PAP_NAK: callout_stop (&sp->pap_my_to_ch); if (debug) { log(LOG_INFO, SPP_FMT "pap failure", SPP_ARGS(ifp)); name_len = *((char *)h); if (len > 5 && name_len) { log(-1, ": "); sppp_print_string((char*)(h+1), name_len); } log(-1, "\n"); } else log(LOG_INFO, SPP_FMT "pap failure\n", SPP_ARGS(ifp)); /* await LCP shutdown by authenticator */ break; default: /* Unknown PAP packet type -- ignore. */ if (debug) { log(LOG_DEBUG, SPP_FMT "pap corrupted input " "<0x%x id=0x%x len=%d", SPP_ARGS(ifp), h->type, h->ident, ntohs(h->len)); sppp_print_bytes((u_char*)(h+1), len-4); log(-1, ">\n"); } break; } } static void sppp_pap_init(struct sppp *sp) { /* PAP doesn't have STATE_INITIAL at all. */ sp->state[IDX_PAP] = STATE_CLOSED; sp->fail_counter[IDX_PAP] = 0; sp->pp_seq[IDX_PAP] = 0; sp->pp_rseq[IDX_PAP] = 0; callout_init(&sp->ch[IDX_PAP], 1); callout_init(&sp->pap_my_to_ch, 1); } static void sppp_pap_open(struct sppp *sp) { if (sp->hisauth.proto == PPP_PAP && (sp->lcp.opts & (1 << LCP_OPT_AUTH_PROTO)) != 0) { /* we are authenticator for PAP, start our timer */ sp->rst_counter[IDX_PAP] = sp->lcp.max_configure; sppp_cp_change_state(&pap, sp, STATE_REQ_SENT); } if (sp->myauth.proto == PPP_PAP) { /* we are peer, send a request, and start a timer */ pap.scr(sp); callout_reset(&sp->pap_my_to_ch, sp->lcp.timeout, sppp_pap_my_TO, (void *)sp); } } static void sppp_pap_close(struct sppp *sp) { if (sp->state[IDX_PAP] != STATE_CLOSED) sppp_cp_change_state(&pap, sp, STATE_CLOSED); } /* * That's the timeout routine if we are authenticator. Since the * authenticator is basically passive in PAP, we can't do much here. */ static void sppp_pap_TO(void *cookie) { struct sppp *sp = (struct sppp *)cookie; STDDCL; SPPP_LOCK(sp); if (debug) log(LOG_DEBUG, SPP_FMT "pap TO(%s) rst_counter = %d\n", SPP_ARGS(ifp), sppp_state_name(sp->state[IDX_PAP]), sp->rst_counter[IDX_PAP]); if (--sp->rst_counter[IDX_PAP] < 0) /* TO- event */ switch (sp->state[IDX_PAP]) { case STATE_REQ_SENT: pap.tld(sp); sppp_cp_change_state(&pap, sp, STATE_CLOSED); break; } else /* TO+ event, not very much we could do */ switch (sp->state[IDX_PAP]) { case STATE_REQ_SENT: /* sppp_cp_change_state() will restart the timer */ sppp_cp_change_state(&pap, sp, STATE_REQ_SENT); break; } SPPP_UNLOCK(sp); } /* * That's the timeout handler if we are peer. Since the peer is active, * we need to retransmit our PAP request since it is apparently lost. * XXX We should impose a max counter. */ static void sppp_pap_my_TO(void *cookie) { struct sppp *sp = (struct sppp *)cookie; STDDCL; if (debug) log(LOG_DEBUG, SPP_FMT "pap peer TO\n", SPP_ARGS(ifp)); SPPP_LOCK(sp); pap.scr(sp); SPPP_UNLOCK(sp); } static void sppp_pap_tlu(struct sppp *sp) { STDDCL; sp->rst_counter[IDX_PAP] = sp->lcp.max_configure; if (debug) log(LOG_DEBUG, SPP_FMT "%s tlu\n", SPP_ARGS(ifp), pap.name); SPPP_LOCK(sp); /* indicate to LCP that we need to be closed down */ sp->lcp.protos |= (1 << IDX_PAP); if (sp->pp_flags & PP_NEEDAUTH) { /* * Remote is authenticator, but his auth proto didn't * complete yet. Defer the transition to network * phase. */ SPPP_UNLOCK(sp); return; } SPPP_UNLOCK(sp); sppp_phase_network(sp); } static void sppp_pap_tld(struct sppp *sp) { STDDCL; if (debug) log(LOG_DEBUG, SPP_FMT "pap tld\n", SPP_ARGS(ifp)); callout_stop (&sp->ch[IDX_PAP]); callout_stop (&sp->pap_my_to_ch); sp->lcp.protos &= ~(1 << IDX_PAP); lcp.Close(sp); } static void sppp_pap_scr(struct sppp *sp) { u_char idlen, pwdlen; sp->confid[IDX_PAP] = ++sp->pp_seq[IDX_PAP]; pwdlen = sppp_strnlen(sp->myauth.secret, AUTHKEYLEN); idlen = sppp_strnlen(sp->myauth.name, AUTHNAMELEN); sppp_auth_send(&pap, sp, PAP_REQ, sp->confid[IDX_PAP], sizeof idlen, (const char *)&idlen, (size_t)idlen, sp->myauth.name, sizeof pwdlen, (const char *)&pwdlen, (size_t)pwdlen, sp->myauth.secret, 0); } /* * Random miscellaneous functions. */ /* * Send a PAP or CHAP proto packet. * * Varadic function, each of the elements for the ellipsis is of type * ``size_t mlen, const u_char *msg''. Processing will stop iff * mlen == 0. * NOTE: never declare variadic functions with types subject to type * promotion (i.e. u_char). This is asking for big trouble depending * on the architecture you are on... */ static void sppp_auth_send(const struct cp *cp, struct sppp *sp, unsigned int type, unsigned int id, ...) { STDDCL; struct ppp_header *h; struct lcp_header *lh; struct mbuf *m; u_char *p; int len; unsigned int mlen; const char *msg; va_list ap; MGETHDR (m, M_NOWAIT, MT_DATA); if (! m) return; m->m_pkthdr.rcvif = 0; h = mtod (m, struct ppp_header*); h->address = PPP_ALLSTATIONS; /* broadcast address */ h->control = PPP_UI; /* Unnumbered Info */ h->protocol = htons(cp->proto); lh = (struct lcp_header*)(h + 1); lh->type = type; lh->ident = id; p = (u_char*) (lh+1); va_start(ap, id); len = 0; while ((mlen = (unsigned int)va_arg(ap, size_t)) != 0) { msg = va_arg(ap, const char *); len += mlen; if (len > MHLEN - PPP_HEADER_LEN - LCP_HEADER_LEN) { va_end(ap); m_freem(m); return; } bcopy(msg, p, mlen); p += mlen; } va_end(ap); m->m_pkthdr.len = m->m_len = PPP_HEADER_LEN + LCP_HEADER_LEN + len; lh->len = htons (LCP_HEADER_LEN + len); if (debug) { log(LOG_DEBUG, SPP_FMT "%s output <%s id=0x%x len=%d", SPP_ARGS(ifp), cp->name, sppp_auth_type_name(cp->proto, lh->type), lh->ident, ntohs(lh->len)); sppp_print_bytes((u_char*) (lh+1), len); log(-1, ">\n"); } if (! IF_HANDOFF_ADJ(&sp->pp_cpq, m, ifp, 3)) if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } /* * Flush interface queue. */ static void sppp_qflush(struct ifqueue *ifq) { struct mbuf *m, *n; n = ifq->ifq_head; while ((m = n)) { n = m->m_nextpkt; m_freem (m); } ifq->ifq_head = 0; ifq->ifq_tail = 0; ifq->ifq_len = 0; } /* * Send keepalive packets, every 10 seconds. */ static void sppp_keepalive(void *dummy) { struct sppp *sp = (struct sppp*)dummy; struct ifnet *ifp = SP2IFP(sp); SPPP_LOCK(sp); /* Keepalive mode disabled or channel down? */ if (! (sp->pp_flags & PP_KEEPALIVE) || ! (ifp->if_drv_flags & IFF_DRV_RUNNING)) goto out; if (sp->pp_mode == PP_FR) { sppp_fr_keepalive (sp); goto out; } /* No keepalive in PPP mode if LCP not opened yet. */ if (sp->pp_mode != IFF_CISCO && sp->pp_phase < PHASE_AUTHENTICATE) goto out; if (sp->pp_alivecnt == MAXALIVECNT) { /* No keepalive packets got. Stop the interface. */ printf (SPP_FMT "down\n", SPP_ARGS(ifp)); if_down (ifp); sppp_qflush (&sp->pp_cpq); if (sp->pp_mode != IFF_CISCO) { /* XXX */ /* Shut down the PPP link. */ lcp.Down(sp); /* Initiate negotiation. XXX */ lcp.Up(sp); } } if (sp->pp_alivecnt <= MAXALIVECNT) ++sp->pp_alivecnt; if (sp->pp_mode == IFF_CISCO) sppp_cisco_send (sp, CISCO_KEEPALIVE_REQ, ++sp->pp_seq[IDX_LCP], sp->pp_rseq[IDX_LCP]); else if (sp->pp_phase >= PHASE_AUTHENTICATE) { long nmagic = htonl (sp->lcp.magic); sp->lcp.echoid = ++sp->pp_seq[IDX_LCP]; sppp_cp_send (sp, PPP_LCP, ECHO_REQ, sp->lcp.echoid, 4, &nmagic); } out: SPPP_UNLOCK(sp); callout_reset(&sp->keepalive_callout, hz * 10, sppp_keepalive, (void *)sp); } /* * Get both IP addresses. */ void sppp_get_ip_addrs(struct sppp *sp, u_long *src, u_long *dst, u_long *srcmask) { struct ifnet *ifp = SP2IFP(sp); struct ifaddr *ifa; struct sockaddr_in *si, *sm; u_long ssrc, ddst; sm = NULL; ssrc = ddst = 0L; /* * Pick the first AF_INET address from the list, * aliases don't make any sense on a p2p link anyway. */ si = NULL; if_addr_rlock(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (ifa->ifa_addr->sa_family == AF_INET) { si = (struct sockaddr_in *)ifa->ifa_addr; sm = (struct sockaddr_in *)ifa->ifa_netmask; if (si) break; } if (ifa) { if (si && si->sin_addr.s_addr) { ssrc = si->sin_addr.s_addr; if (srcmask) *srcmask = ntohl(sm->sin_addr.s_addr); } si = (struct sockaddr_in *)ifa->ifa_dstaddr; if (si && si->sin_addr.s_addr) ddst = si->sin_addr.s_addr; } if_addr_runlock(ifp); if (dst) *dst = ntohl(ddst); if (src) *src = ntohl(ssrc); } #ifdef INET /* * Set my IP address. */ static void sppp_set_ip_addr(struct sppp *sp, u_long src) { STDDCL; struct ifaddr *ifa; struct sockaddr_in *si; struct in_ifaddr *ia; /* * Pick the first AF_INET address from the list, * aliases don't make any sense on a p2p link anyway. */ si = NULL; if_addr_rlock(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family == AF_INET) { si = (struct sockaddr_in *)ifa->ifa_addr; if (si != NULL) { ifa_ref(ifa); break; } } } if_addr_runlock(ifp); if (ifa != NULL) { int error; /* delete old route */ error = rtinit(ifa, (int)RTM_DELETE, RTF_HOST); if (debug && error) { log(LOG_DEBUG, SPP_FMT "sppp_set_ip_addr: rtinit DEL failed, error=%d\n", SPP_ARGS(ifp), error); } /* set new address */ si->sin_addr.s_addr = htonl(src); ia = ifatoia(ifa); IN_IFADDR_WLOCK(); LIST_REMOVE(ia, ia_hash); LIST_INSERT_HEAD(INADDR_HASH(si->sin_addr.s_addr), ia, ia_hash); IN_IFADDR_WUNLOCK(); /* add new route */ error = rtinit(ifa, (int)RTM_ADD, RTF_HOST); if (debug && error) { log(LOG_DEBUG, SPP_FMT "sppp_set_ip_addr: rtinit ADD failed, error=%d", SPP_ARGS(ifp), error); } ifa_free(ifa); } } #endif #ifdef INET6 /* * Get both IPv6 addresses. */ static void sppp_get_ip6_addrs(struct sppp *sp, struct in6_addr *src, struct in6_addr *dst, struct in6_addr *srcmask) { struct ifnet *ifp = SP2IFP(sp); struct ifaddr *ifa; struct sockaddr_in6 *si, *sm; struct in6_addr ssrc, ddst; sm = NULL; bzero(&ssrc, sizeof(ssrc)); bzero(&ddst, sizeof(ddst)); /* * Pick the first link-local AF_INET6 address from the list, * aliases don't make any sense on a p2p link anyway. */ si = NULL; if_addr_rlock(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (ifa->ifa_addr->sa_family == AF_INET6) { si = (struct sockaddr_in6 *)ifa->ifa_addr; sm = (struct sockaddr_in6 *)ifa->ifa_netmask; if (si && IN6_IS_ADDR_LINKLOCAL(&si->sin6_addr)) break; } if (ifa) { if (si && !IN6_IS_ADDR_UNSPECIFIED(&si->sin6_addr)) { bcopy(&si->sin6_addr, &ssrc, sizeof(ssrc)); if (srcmask) { bcopy(&sm->sin6_addr, srcmask, sizeof(*srcmask)); } } si = (struct sockaddr_in6 *)ifa->ifa_dstaddr; if (si && !IN6_IS_ADDR_UNSPECIFIED(&si->sin6_addr)) bcopy(&si->sin6_addr, &ddst, sizeof(ddst)); } if (dst) bcopy(&ddst, dst, sizeof(*dst)); if (src) bcopy(&ssrc, src, sizeof(*src)); if_addr_runlock(ifp); } #ifdef IPV6CP_MYIFID_DYN /* * Generate random ifid. */ static void sppp_gen_ip6_addr(struct sppp *sp, struct in6_addr *addr) { /* TBD */ } /* * Set my IPv6 address. */ static void sppp_set_ip6_addr(struct sppp *sp, const struct in6_addr *src) { STDDCL; struct ifaddr *ifa; struct sockaddr_in6 *sin6; /* * Pick the first link-local AF_INET6 address from the list, * aliases don't make any sense on a p2p link anyway. */ sin6 = NULL; if_addr_rlock(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family == AF_INET6) { sin6 = (struct sockaddr_in6 *)ifa->ifa_addr; if (sin6 && IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { ifa_ref(ifa); break; } } } if_addr_runlock(ifp); if (ifa != NULL) { int error; struct sockaddr_in6 new_sin6 = *sin6; bcopy(src, &new_sin6.sin6_addr, sizeof(new_sin6.sin6_addr)); error = in6_ifinit(ifp, ifatoia6(ifa), &new_sin6, 1); if (debug && error) { log(LOG_DEBUG, SPP_FMT "sppp_set_ip6_addr: in6_ifinit " " failed, error=%d\n", SPP_ARGS(ifp), error); } ifa_free(ifa); } } #endif /* * Suggest a candidate address to be used by peer. */ static void sppp_suggest_ip6_addr(struct sppp *sp, struct in6_addr *suggest) { struct in6_addr myaddr; struct timeval tv; sppp_get_ip6_addrs(sp, &myaddr, 0, 0); myaddr.s6_addr[8] &= ~0x02; /* u bit to "local" */ microtime(&tv); if ((tv.tv_usec & 0xff) == 0 && (tv.tv_sec & 0xff) == 0) { myaddr.s6_addr[14] ^= 0xff; myaddr.s6_addr[15] ^= 0xff; } else { myaddr.s6_addr[14] ^= (tv.tv_usec & 0xff); myaddr.s6_addr[15] ^= (tv.tv_sec & 0xff); } if (suggest) bcopy(&myaddr, suggest, sizeof(myaddr)); } #endif /*INET6*/ static int sppp_params(struct sppp *sp, u_long cmd, void *data) { u_long subcmd; struct ifreq *ifr = (struct ifreq *)data; struct spppreq *spr; int rv = 0; if ((spr = malloc(sizeof(struct spppreq), M_TEMP, M_NOWAIT)) == NULL) return (EAGAIN); /* - * ifr->ifr_data is supposed to point to a struct spppreq. + * ifr_data_get_ptr(ifr) is supposed to point to a struct spppreq. * Check the cmd word first before attempting to fetch all the * data. */ - rv = fueword(ifr->ifr_data, &subcmd); + rv = fueword(ifr_data_get_ptr(ifr), &subcmd); if (rv == -1) { rv = EFAULT; goto quit; } - if (copyin((caddr_t)ifr->ifr_data, spr, sizeof(struct spppreq)) != 0) { + if (copyin(ifr_data_get_ptr(ifr), spr, sizeof(struct spppreq)) != 0) { rv = EFAULT; goto quit; } switch (subcmd) { case (u_long)SPPPIOGDEFS: if (cmd != SIOCGIFGENERIC) { rv = EINVAL; break; } /* * We copy over the entire current state, but clean * out some of the stuff we don't wanna pass up. * Remember, SIOCGIFGENERIC is unprotected, and can be * called by any user. No need to ever get PAP or * CHAP secrets back to userland anyway. */ spr->defs.pp_phase = sp->pp_phase; spr->defs.enable_vj = (sp->confflags & CONF_ENABLE_VJ) != 0; spr->defs.enable_ipv6 = (sp->confflags & CONF_ENABLE_IPV6) != 0; spr->defs.lcp = sp->lcp; spr->defs.ipcp = sp->ipcp; spr->defs.ipv6cp = sp->ipv6cp; spr->defs.myauth = sp->myauth; spr->defs.hisauth = sp->hisauth; bzero(spr->defs.myauth.secret, AUTHKEYLEN); bzero(spr->defs.myauth.challenge, AUTHKEYLEN); bzero(spr->defs.hisauth.secret, AUTHKEYLEN); bzero(spr->defs.hisauth.challenge, AUTHKEYLEN); /* * Fixup the LCP timeout value to milliseconds so * spppcontrol doesn't need to bother about the value * of "hz". We do the reverse calculation below when * setting it. */ spr->defs.lcp.timeout = sp->lcp.timeout * 1000 / hz; - rv = copyout(spr, (caddr_t)ifr->ifr_data, - sizeof(struct spppreq)); + rv = copyout(spr, ifr_data_get_ptr(ifr), + sizeof(struct spppreq)); break; case (u_long)SPPPIOSDEFS: if (cmd != SIOCSIFGENERIC) { rv = EINVAL; break; } /* * We have a very specific idea of which fields we * allow being passed back from userland, so to not * clobber our current state. For one, we only allow * setting anything if LCP is in dead or establish * phase. Once the authentication negotiations * started, the authentication settings must not be * changed again. (The administrator can force an * ifconfig down in order to get LCP back into dead * phase.) * * Also, we only allow for authentication parameters to be * specified. * * XXX Should allow to set or clear pp_flags. * * Finally, if the respective authentication protocol to * be used is set differently than 0, but the secret is * passed as all zeros, we don't trash the existing secret. * This allows an administrator to change the system name * only without clobbering the secret (which he didn't get * back in a previous SPPPIOGDEFS call). However, the * secrets are cleared if the authentication protocol is * reset to 0. */ if (sp->pp_phase != PHASE_DEAD && sp->pp_phase != PHASE_ESTABLISH) { rv = EBUSY; break; } if ((spr->defs.myauth.proto != 0 && spr->defs.myauth.proto != PPP_PAP && spr->defs.myauth.proto != PPP_CHAP) || (spr->defs.hisauth.proto != 0 && spr->defs.hisauth.proto != PPP_PAP && spr->defs.hisauth.proto != PPP_CHAP)) { rv = EINVAL; break; } if (spr->defs.myauth.proto == 0) /* resetting myauth */ bzero(&sp->myauth, sizeof sp->myauth); else { /* setting/changing myauth */ sp->myauth.proto = spr->defs.myauth.proto; bcopy(spr->defs.myauth.name, sp->myauth.name, AUTHNAMELEN); if (spr->defs.myauth.secret[0] != '\0') bcopy(spr->defs.myauth.secret, sp->myauth.secret, AUTHKEYLEN); } if (spr->defs.hisauth.proto == 0) /* resetting hisauth */ bzero(&sp->hisauth, sizeof sp->hisauth); else { /* setting/changing hisauth */ sp->hisauth.proto = spr->defs.hisauth.proto; sp->hisauth.flags = spr->defs.hisauth.flags; bcopy(spr->defs.hisauth.name, sp->hisauth.name, AUTHNAMELEN); if (spr->defs.hisauth.secret[0] != '\0') bcopy(spr->defs.hisauth.secret, sp->hisauth.secret, AUTHKEYLEN); } /* set LCP restart timer timeout */ if (spr->defs.lcp.timeout != 0) sp->lcp.timeout = spr->defs.lcp.timeout * hz / 1000; /* set VJ enable and IPv6 disable flags */ #ifdef INET if (spr->defs.enable_vj) sp->confflags |= CONF_ENABLE_VJ; else sp->confflags &= ~CONF_ENABLE_VJ; #endif #ifdef INET6 if (spr->defs.enable_ipv6) sp->confflags |= CONF_ENABLE_IPV6; else sp->confflags &= ~CONF_ENABLE_IPV6; #endif break; default: rv = EINVAL; } quit: free(spr, M_TEMP); return (rv); } static void sppp_phase_network(struct sppp *sp) { STDDCL; int i; u_long mask; sp->pp_phase = PHASE_NETWORK; if (debug) log(LOG_DEBUG, SPP_FMT "phase %s\n", SPP_ARGS(ifp), sppp_phase_name(sp->pp_phase)); /* Notify NCPs now. */ for (i = 0; i < IDX_COUNT; i++) if ((cps[i])->flags & CP_NCP) (cps[i])->Open(sp); /* Send Up events to all NCPs. */ for (i = 0, mask = 1; i < IDX_COUNT; i++, mask <<= 1) if ((sp->lcp.protos & mask) && ((cps[i])->flags & CP_NCP)) (cps[i])->Up(sp); /* if no NCP is starting, all this was in vain, close down */ sppp_lcp_check_and_close(sp); } static const char * sppp_cp_type_name(u_char type) { static char buf[12]; switch (type) { case CONF_REQ: return "conf-req"; case CONF_ACK: return "conf-ack"; case CONF_NAK: return "conf-nak"; case CONF_REJ: return "conf-rej"; case TERM_REQ: return "term-req"; case TERM_ACK: return "term-ack"; case CODE_REJ: return "code-rej"; case PROTO_REJ: return "proto-rej"; case ECHO_REQ: return "echo-req"; case ECHO_REPLY: return "echo-reply"; case DISC_REQ: return "discard-req"; } snprintf (buf, sizeof(buf), "cp/0x%x", type); return buf; } static const char * sppp_auth_type_name(u_short proto, u_char type) { static char buf[12]; switch (proto) { case PPP_CHAP: switch (type) { case CHAP_CHALLENGE: return "challenge"; case CHAP_RESPONSE: return "response"; case CHAP_SUCCESS: return "success"; case CHAP_FAILURE: return "failure"; } case PPP_PAP: switch (type) { case PAP_REQ: return "req"; case PAP_ACK: return "ack"; case PAP_NAK: return "nak"; } } snprintf (buf, sizeof(buf), "auth/0x%x", type); return buf; } static const char * sppp_lcp_opt_name(u_char opt) { static char buf[12]; switch (opt) { case LCP_OPT_MRU: return "mru"; case LCP_OPT_ASYNC_MAP: return "async-map"; case LCP_OPT_AUTH_PROTO: return "auth-proto"; case LCP_OPT_QUAL_PROTO: return "qual-proto"; case LCP_OPT_MAGIC: return "magic"; case LCP_OPT_PROTO_COMP: return "proto-comp"; case LCP_OPT_ADDR_COMP: return "addr-comp"; } snprintf (buf, sizeof(buf), "lcp/0x%x", opt); return buf; } #ifdef INET static const char * sppp_ipcp_opt_name(u_char opt) { static char buf[12]; switch (opt) { case IPCP_OPT_ADDRESSES: return "addresses"; case IPCP_OPT_COMPRESSION: return "compression"; case IPCP_OPT_ADDRESS: return "address"; } snprintf (buf, sizeof(buf), "ipcp/0x%x", opt); return buf; } #endif #ifdef INET6 static const char * sppp_ipv6cp_opt_name(u_char opt) { static char buf[12]; switch (opt) { case IPV6CP_OPT_IFID: return "ifid"; case IPV6CP_OPT_COMPRESSION: return "compression"; } sprintf (buf, "0x%x", opt); return buf; } #endif static const char * sppp_state_name(int state) { switch (state) { case STATE_INITIAL: return "initial"; case STATE_STARTING: return "starting"; case STATE_CLOSED: return "closed"; case STATE_STOPPED: return "stopped"; case STATE_CLOSING: return "closing"; case STATE_STOPPING: return "stopping"; case STATE_REQ_SENT: return "req-sent"; case STATE_ACK_RCVD: return "ack-rcvd"; case STATE_ACK_SENT: return "ack-sent"; case STATE_OPENED: return "opened"; } return "illegal"; } static const char * sppp_phase_name(enum ppp_phase phase) { switch (phase) { case PHASE_DEAD: return "dead"; case PHASE_ESTABLISH: return "establish"; case PHASE_TERMINATE: return "terminate"; case PHASE_AUTHENTICATE: return "authenticate"; case PHASE_NETWORK: return "network"; } return "illegal"; } static const char * sppp_proto_name(u_short proto) { static char buf[12]; switch (proto) { case PPP_LCP: return "lcp"; case PPP_IPCP: return "ipcp"; case PPP_PAP: return "pap"; case PPP_CHAP: return "chap"; case PPP_IPV6CP: return "ipv6cp"; } snprintf(buf, sizeof(buf), "proto/0x%x", (unsigned)proto); return buf; } static void sppp_print_bytes(const u_char *p, u_short len) { if (len) log(-1, " %*D", len, p, "-"); } static void sppp_print_string(const char *p, u_short len) { u_char c; while (len-- > 0) { c = *p++; /* * Print only ASCII chars directly. RFC 1994 recommends * using only them, but we don't rely on it. */ if (c < ' ' || c > '~') log(-1, "\\x%x", c); else log(-1, "%c", c); } } #ifdef INET static const char * sppp_dotted_quad(u_long addr) { static char s[16]; sprintf(s, "%d.%d.%d.%d", (int)((addr >> 24) & 0xff), (int)((addr >> 16) & 0xff), (int)((addr >> 8) & 0xff), (int)(addr & 0xff)); return s; } #endif static int sppp_strnlen(u_char *p, int max) { int len; for (len = 0; len < max && *p; ++p) ++len; return len; } /* a dummy, used to drop uninteresting events */ static void sppp_null(struct sppp *unused) { /* do just nothing */ } Index: stable/11/sys/net/if_var.h =================================================================== --- stable/11/sys/net/if_var.h (revision 332287) +++ stable/11/sys/net/if_var.h (revision 332288) @@ -1,673 +1,676 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)if.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #ifndef _NET_IF_VAR_H_ #define _NET_IF_VAR_H_ /* * Structures defining a network interface, providing a packet * transport mechanism (ala level 0 of the PUP protocols). * * Each interface accepts output datagrams of a specified maximum * length, and provides higher level routines with input datagrams * received from its medium. * * Output occurs when the routine if_output is called, with three parameters: * (*ifp->if_output)(ifp, m, dst, rt) * Here m is the mbuf chain to be sent and dst is the destination address. * The output routine encapsulates the supplied datagram if necessary, * and then transmits it on its medium. * * On input, each interface unwraps the data received by it, and either * places it on the input queue of an internetwork datagram routine * and posts the associated software interrupt, or passes the datagram to a raw * packet input routine. * * Routines exist for locating interfaces by their addresses * or for locating an interface on a certain network, as well as more general * routing and gateway routines maintaining information used to locate * interfaces. These routines live in the files if.c and route.c */ struct rtentry; /* ifa_rtrequest */ struct rt_addrinfo; /* ifa_rtrequest */ struct socket; struct carp_if; struct carp_softc; struct ifvlantrunk; struct route; /* if_output */ struct vnet; struct ifmedia; struct netmap_adapter; #ifdef _KERNEL #include /* ifqueue only? */ #include #include #endif /* _KERNEL */ #include #include /* XXX */ #include /* struct ifqueue */ #include /* XXX */ #include /* XXX */ #include /* if_link_task */ #define IF_DUNIT_NONE -1 #include TAILQ_HEAD(ifnethead, ifnet); /* we use TAILQs so that the order of */ TAILQ_HEAD(ifaddrhead, ifaddr); /* instantiation is preserved in the list */ TAILQ_HEAD(ifmultihead, ifmultiaddr); TAILQ_HEAD(ifgrouphead, ifg_group); #ifdef _KERNEL VNET_DECLARE(struct pfil_head, link_pfil_hook); /* packet filter hooks */ #define V_link_pfil_hook VNET(link_pfil_hook) #define HHOOK_IPSEC_INET 0 #define HHOOK_IPSEC_INET6 1 #define HHOOK_IPSEC_COUNT 2 VNET_DECLARE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]); VNET_DECLARE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]); #define V_ipsec_hhh_in VNET(ipsec_hhh_in) #define V_ipsec_hhh_out VNET(ipsec_hhh_out) #endif /* _KERNEL */ typedef enum { IFCOUNTER_IPACKETS = 0, IFCOUNTER_IERRORS, IFCOUNTER_OPACKETS, IFCOUNTER_OERRORS, IFCOUNTER_COLLISIONS, IFCOUNTER_IBYTES, IFCOUNTER_OBYTES, IFCOUNTER_IMCASTS, IFCOUNTER_OMCASTS, IFCOUNTER_IQDROPS, IFCOUNTER_OQDROPS, IFCOUNTER_NOPROTO, IFCOUNTERS /* Array size. */ } ift_counter; typedef struct ifnet * if_t; typedef void (*if_start_fn_t)(if_t); typedef int (*if_ioctl_fn_t)(if_t, u_long, caddr_t); typedef void (*if_init_fn_t)(void *); typedef void (*if_qflush_fn_t)(if_t); typedef int (*if_transmit_fn_t)(if_t, struct mbuf *); typedef uint64_t (*if_get_counter_t)(if_t, ift_counter); struct ifnet_hw_tsomax { u_int tsomaxbytes; /* TSO total burst length limit in bytes */ u_int tsomaxsegcount; /* TSO maximum segment count */ u_int tsomaxsegsize; /* TSO maximum segment size in bytes */ }; /* Interface encap request types */ typedef enum { IFENCAP_LL = 1 /* pre-calculate link-layer header */ } ife_type; /* * The structure below allows to request various pre-calculated L2/L3 headers * for different media. Requests varies by type (rtype field). * * IFENCAP_LL type: pre-calculates link header based on address family * and destination lladdr. * * Input data fields: * buf: pointer to destination buffer * bufsize: buffer size * flags: IFENCAP_FLAG_BROADCAST if destination is broadcast * family: address family defined by AF_ constant. * lladdr: pointer to link-layer address * lladdr_len: length of link-layer address * hdata: pointer to L3 header (optional, used for ARP requests). * Output data fields: * buf: encap data is stored here * bufsize: resulting encap length is stored here * lladdr_off: offset of link-layer address from encap hdr start * hdata: L3 header may be altered if necessary */ struct if_encap_req { u_char *buf; /* Destination buffer (w) */ size_t bufsize; /* size of provided buffer (r) */ ife_type rtype; /* request type (r) */ uint32_t flags; /* Request flags (r) */ int family; /* Address family AF_* (r) */ int lladdr_off; /* offset from header start (w) */ int lladdr_len; /* lladdr length (r) */ char *lladdr; /* link-level address pointer (r) */ char *hdata; /* Upper layer header data (rw) */ }; #define IFENCAP_FLAG_BROADCAST 0x02 /* Destination is broadcast */ /* * Structure defining a network interface. * * Size ILP32: 592 (approx) * LP64: 1048 (approx) */ struct ifnet { /* General book keeping of interface lists. */ TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */ LIST_ENTRY(ifnet) if_clones; /* interfaces of a cloner */ TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */ /* protected by if_addr_lock */ u_char if_alloctype; /* if_type at time of allocation */ /* Driver and protocol specific information that remains stable. */ void *if_softc; /* pointer to driver state */ void *if_llsoftc; /* link layer softc */ void *if_l2com; /* pointer to protocol bits */ const char *if_dname; /* driver name */ int if_dunit; /* unit or IF_DUNIT_NONE */ u_short if_index; /* numeric abbreviation for this if */ short if_index_reserved; /* spare space to grow if_index */ char if_xname[IFNAMSIZ]; /* external name (name + unit) */ char *if_description; /* interface description */ /* Variable fields that are touched by the stack and drivers. */ int if_flags; /* up/down, broadcast, etc. */ int if_drv_flags; /* driver-managed status flags */ int if_capabilities; /* interface features & capabilities */ int if_capenable; /* enabled features & capabilities */ void *if_linkmib; /* link-type-specific MIB data */ size_t if_linkmiblen; /* length of above data */ u_int if_refcount; /* reference count */ /* These fields are shared with struct if_data. */ uint8_t if_type; /* ethernet, tokenring, etc */ uint8_t if_addrlen; /* media address length */ uint8_t if_hdrlen; /* media header length */ uint8_t if_link_state; /* current link state */ uint32_t if_mtu; /* maximum transmission unit */ uint32_t if_metric; /* routing metric (external only) */ uint64_t if_baudrate; /* linespeed */ uint64_t if_hwassist; /* HW offload capabilities, see IFCAP */ time_t if_epoch; /* uptime at attach or stat reset */ struct timeval if_lastchange; /* time of last administrative change */ struct ifaltq if_snd; /* output queue (includes altq) */ struct task if_linktask; /* task for link change events */ /* Addresses of different protocol families assigned to this if. */ struct rwlock if_addr_lock; /* lock to protect address lists */ /* * if_addrhead is the list of all addresses associated to * an interface. * Some code in the kernel assumes that first element * of the list has type AF_LINK, and contains sockaddr_dl * addresses which store the link-level address and the name * of the interface. * However, access to the AF_LINK address through this * field is deprecated. Use if_addr or ifaddr_byindex() instead. */ struct ifaddrhead if_addrhead; /* linked list of addresses per if */ struct ifmultihead if_multiaddrs; /* multicast addresses configured */ int if_amcount; /* number of all-multicast requests */ struct ifaddr *if_addr; /* pointer to link-level address */ const u_int8_t *if_broadcastaddr; /* linklevel broadcast bytestring */ struct rwlock if_afdata_lock; void *if_afdata[AF_MAX]; int if_afdata_initialized; /* Additional features hung off the interface. */ u_int if_fib; /* interface FIB */ struct vnet *if_vnet; /* pointer to network stack instance */ struct vnet *if_home_vnet; /* where this ifnet originates from */ struct ifvlantrunk *if_vlantrunk; /* pointer to 802.1q data */ struct bpf_if *if_bpf; /* packet filter structure */ int if_pcount; /* number of promiscuous listeners */ void *if_bridge; /* bridge glue */ void *if_lagg; /* lagg glue */ void *if_pf_kif; /* pf glue */ struct carp_if *if_carp; /* carp interface structure */ struct label *if_label; /* interface MAC label */ struct netmap_adapter *if_netmap; /* netmap(4) softc */ /* Various procedures of the layer2 encapsulation and drivers. */ int (*if_output) /* output routine (enqueue) */ (struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); void (*if_input) /* input routine (from h/w driver) */ (struct ifnet *, struct mbuf *); if_start_fn_t if_start; /* initiate output routine */ if_ioctl_fn_t if_ioctl; /* ioctl routine */ if_init_fn_t if_init; /* Init routine */ int (*if_resolvemulti) /* validate/resolve multicast */ (struct ifnet *, struct sockaddr **, struct sockaddr *); if_qflush_fn_t if_qflush; /* flush any queue */ if_transmit_fn_t if_transmit; /* initiate output routine */ void (*if_reassign) /* reassign to vnet routine */ (struct ifnet *, struct vnet *, char *); if_get_counter_t if_get_counter; /* get counter values */ int (*if_requestencap) /* make link header from request */ (struct ifnet *, struct if_encap_req *); /* Statistics. */ counter_u64_t if_counters[IFCOUNTERS]; /* Stuff that's only temporary and doesn't belong here. */ /* * Network adapter TSO limits: * =========================== * * If the "if_hw_tsomax" field is zero the maximum segment * length limit does not apply. If the "if_hw_tsomaxsegcount" * or the "if_hw_tsomaxsegsize" field is zero the TSO segment * count limit does not apply. If all three fields are zero, * there is no TSO limit. * * NOTE: The TSO limits should reflect the values used in the * BUSDMA tag a network adapter is using to load a mbuf chain * for transmission. The TCP/IP network stack will subtract * space for all linklevel and protocol level headers and * ensure that the full mbuf chain passed to the network * adapter fits within the given limits. */ u_int if_hw_tsomax; /* TSO maximum size in bytes */ u_int if_hw_tsomaxsegcount; /* TSO maximum segment count */ u_int if_hw_tsomaxsegsize; /* TSO maximum segment size in bytes */ /* * Spare fields to be added before branching a stable branch, so * that structure can be enhanced without changing the kernel * binary interface. */ void *if_pspare[3]; /* packet pacing / general use */ void *if_hw_addr; /* hardware link-level address */ int if_ispare[4]; /* packet pacing / general use */ }; /* for compatibility with other BSDs */ #define if_addrlist if_addrhead #define if_list if_link #define if_name(ifp) ((ifp)->if_xname) /* * Locks for address lists on the network interface. */ #define IF_ADDR_LOCK_INIT(if) rw_init(&(if)->if_addr_lock, "if_addr_lock") #define IF_ADDR_LOCK_DESTROY(if) rw_destroy(&(if)->if_addr_lock) #define IF_ADDR_WLOCK(if) rw_wlock(&(if)->if_addr_lock) #define IF_ADDR_WUNLOCK(if) rw_wunlock(&(if)->if_addr_lock) #define IF_ADDR_RLOCK(if) rw_rlock(&(if)->if_addr_lock) #define IF_ADDR_RUNLOCK(if) rw_runlock(&(if)->if_addr_lock) #define IF_ADDR_LOCK_ASSERT(if) rw_assert(&(if)->if_addr_lock, RA_LOCKED) #define IF_ADDR_WLOCK_ASSERT(if) rw_assert(&(if)->if_addr_lock, RA_WLOCKED) /* * Function variations on locking macros intended to be used by loadable * kernel modules in order to divorce them from the internals of address list * locking. */ void if_addr_rlock(struct ifnet *ifp); /* if_addrhead */ void if_addr_runlock(struct ifnet *ifp); /* if_addrhead */ void if_maddr_rlock(if_t ifp); /* if_multiaddrs */ void if_maddr_runlock(if_t ifp); /* if_multiaddrs */ #ifdef _KERNEL #ifdef _SYS_EVENTHANDLER_H_ /* interface link layer address change event */ typedef void (*iflladdr_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(iflladdr_event, iflladdr_event_handler_t); /* interface address change event */ typedef void (*ifaddr_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifaddr_event, ifaddr_event_handler_t); /* new interface arrival event */ typedef void (*ifnet_arrival_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifnet_arrival_event, ifnet_arrival_event_handler_t); /* interface departure event */ typedef void (*ifnet_departure_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifnet_departure_event, ifnet_departure_event_handler_t); /* Interface link state change event */ typedef void (*ifnet_link_event_handler_t)(void *, struct ifnet *, int); EVENTHANDLER_DECLARE(ifnet_link_event, ifnet_link_event_handler_t); /* Interface up/down event */ #define IFNET_EVENT_UP 0 #define IFNET_EVENT_DOWN 1 typedef void (*ifnet_event_fn)(void *, struct ifnet *ifp, int event); EVENTHANDLER_DECLARE(ifnet_event, ifnet_event_fn); #endif /* _SYS_EVENTHANDLER_H_ */ /* * interface groups */ struct ifg_group { char ifg_group[IFNAMSIZ]; u_int ifg_refcnt; void *ifg_pf_kif; TAILQ_HEAD(, ifg_member) ifg_members; TAILQ_ENTRY(ifg_group) ifg_next; }; struct ifg_member { TAILQ_ENTRY(ifg_member) ifgm_next; struct ifnet *ifgm_ifp; }; struct ifg_list { struct ifg_group *ifgl_group; TAILQ_ENTRY(ifg_list) ifgl_next; }; #ifdef _SYS_EVENTHANDLER_H_ /* group attach event */ typedef void (*group_attach_event_handler_t)(void *, struct ifg_group *); EVENTHANDLER_DECLARE(group_attach_event, group_attach_event_handler_t); /* group detach event */ typedef void (*group_detach_event_handler_t)(void *, struct ifg_group *); EVENTHANDLER_DECLARE(group_detach_event, group_detach_event_handler_t); /* group change event */ typedef void (*group_change_event_handler_t)(void *, const char *); EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t); #endif /* _SYS_EVENTHANDLER_H_ */ #define IF_AFDATA_LOCK_INIT(ifp) \ rw_init(&(ifp)->if_afdata_lock, "if_afdata") #define IF_AFDATA_WLOCK(ifp) rw_wlock(&(ifp)->if_afdata_lock) #define IF_AFDATA_RLOCK(ifp) rw_rlock(&(ifp)->if_afdata_lock) #define IF_AFDATA_WUNLOCK(ifp) rw_wunlock(&(ifp)->if_afdata_lock) #define IF_AFDATA_RUNLOCK(ifp) rw_runlock(&(ifp)->if_afdata_lock) #define IF_AFDATA_LOCK(ifp) IF_AFDATA_WLOCK(ifp) #define IF_AFDATA_UNLOCK(ifp) IF_AFDATA_WUNLOCK(ifp) #define IF_AFDATA_TRYLOCK(ifp) rw_try_wlock(&(ifp)->if_afdata_lock) #define IF_AFDATA_DESTROY(ifp) rw_destroy(&(ifp)->if_afdata_lock) #define IF_AFDATA_LOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_LOCKED) #define IF_AFDATA_RLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_RLOCKED) #define IF_AFDATA_WLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_WLOCKED) #define IF_AFDATA_UNLOCK_ASSERT(ifp) rw_assert(&(ifp)->if_afdata_lock, RA_UNLOCKED) /* * 72 was chosen below because it is the size of a TCP/IP * header (40) + the minimum mss (32). */ #define IF_MINMTU 72 #define IF_MAXMTU 65535 #define TOEDEV(ifp) ((ifp)->if_llsoftc) /* * The ifaddr structure contains information about one address * of an interface. They are maintained by the different address families, * are allocated and attached when an address is set, and are linked * together so all addresses for an interface can be located. * * NOTE: a 'struct ifaddr' is always at the beginning of a larger * chunk of malloc'ed memory, where we store the three addresses * (ifa_addr, ifa_dstaddr and ifa_netmask) referenced here. */ struct ifaddr { struct sockaddr *ifa_addr; /* address of interface */ struct sockaddr *ifa_dstaddr; /* other end of p-to-p link */ #define ifa_broadaddr ifa_dstaddr /* broadcast address interface */ struct sockaddr *ifa_netmask; /* used to determine subnet */ struct ifnet *ifa_ifp; /* back-pointer to interface */ struct carp_softc *ifa_carp; /* pointer to CARP data */ TAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */ void (*ifa_rtrequest) /* check or clean routes (+ or -)'d */ (int, struct rtentry *, struct rt_addrinfo *); u_short ifa_flags; /* mostly rt_flags for cloning */ #define IFA_ROUTE RTF_UP /* route installed */ #define IFA_RTSELF RTF_HOST /* loopback route to self installed */ u_int ifa_refcnt; /* references to this structure */ counter_u64_t ifa_ipackets; counter_u64_t ifa_opackets; counter_u64_t ifa_ibytes; counter_u64_t ifa_obytes; }; /* For compatibility with other BSDs. SCTP uses it. */ #define ifa_list ifa_link struct ifaddr * ifa_alloc(size_t size, int flags); void ifa_free(struct ifaddr *ifa); void ifa_ref(struct ifaddr *ifa); /* * Multicast address structure. This is analogous to the ifaddr * structure except that it keeps track of multicast addresses. */ struct ifmultiaddr { TAILQ_ENTRY(ifmultiaddr) ifma_link; /* queue macro glue */ struct sockaddr *ifma_addr; /* address this membership is for */ struct sockaddr *ifma_lladdr; /* link-layer translation, if any */ struct ifnet *ifma_ifp; /* back-pointer to interface */ u_int ifma_refcount; /* reference count */ void *ifma_protospec; /* protocol-specific state, if any */ struct ifmultiaddr *ifma_llifma; /* pointer to ifma for ifma_lladdr */ }; extern struct rwlock ifnet_rwlock; extern struct sx ifnet_sxlock; #define IFNET_WLOCK() do { \ sx_xlock(&ifnet_sxlock); \ rw_wlock(&ifnet_rwlock); \ } while (0) #define IFNET_WUNLOCK() do { \ rw_wunlock(&ifnet_rwlock); \ sx_xunlock(&ifnet_sxlock); \ } while (0) /* * To assert the ifnet lock, you must know not only whether it's for read or * write, but also whether it was acquired with sleep support or not. */ #define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED) #define IFNET_RLOCK_NOSLEEP_ASSERT() rw_assert(&ifnet_rwlock, RA_RLOCKED) #define IFNET_WLOCK_ASSERT() do { \ sx_assert(&ifnet_sxlock, SA_XLOCKED); \ rw_assert(&ifnet_rwlock, RA_WLOCKED); \ } while (0) #define IFNET_RLOCK() sx_slock(&ifnet_sxlock) #define IFNET_RLOCK_NOSLEEP() rw_rlock(&ifnet_rwlock) #define IFNET_RUNLOCK() sx_sunlock(&ifnet_sxlock) #define IFNET_RUNLOCK_NOSLEEP() rw_runlock(&ifnet_rwlock) /* * Look up an ifnet given its index; the _ref variant also acquires a * reference that must be freed using if_rele(). It is almost always a bug * to call ifnet_byindex() instead if ifnet_byindex_ref(). */ struct ifnet *ifnet_byindex(u_short idx); struct ifnet *ifnet_byindex_locked(u_short idx); struct ifnet *ifnet_byindex_ref(u_short idx); /* * Given the index, ifaddr_byindex() returns the one and only * link-level ifaddr for the interface. You are not supposed to use * it to traverse the list of addresses associated to the interface. */ struct ifaddr *ifaddr_byindex(u_short idx); VNET_DECLARE(struct ifnethead, ifnet); VNET_DECLARE(struct ifgrouphead, ifg_head); VNET_DECLARE(int, if_index); VNET_DECLARE(struct ifnet *, loif); /* first loopback interface */ #define V_ifnet VNET(ifnet) #define V_ifg_head VNET(ifg_head) #define V_if_index VNET(if_index) #define V_loif VNET(loif) int if_addgroup(struct ifnet *, const char *); int if_delgroup(struct ifnet *, const char *); int if_addmulti(struct ifnet *, struct sockaddr *, struct ifmultiaddr **); int if_allmulti(struct ifnet *, int); struct ifnet* if_alloc(u_char); void if_attach(struct ifnet *); void if_dead(struct ifnet *); int if_delmulti(struct ifnet *, struct sockaddr *); void if_delmulti_ifma(struct ifmultiaddr *); void if_detach(struct ifnet *); void if_purgeaddrs(struct ifnet *); void if_delallmulti(struct ifnet *); void if_down(struct ifnet *); struct ifmultiaddr * if_findmulti(struct ifnet *, const struct sockaddr *); void if_free(struct ifnet *); void if_initname(struct ifnet *, const char *, int); void if_link_state_change(struct ifnet *, int); int if_printf(struct ifnet *, const char *, ...) __printflike(2, 3); void if_ref(struct ifnet *); void if_rele(struct ifnet *); int if_setlladdr(struct ifnet *, const u_char *, int); void if_up(struct ifnet *); int ifioctl(struct socket *, u_long, caddr_t, struct thread *); int ifpromisc(struct ifnet *, int); struct ifnet *ifunit(const char *); struct ifnet *ifunit_ref(const char *); int ifa_add_loopback_route(struct ifaddr *, struct sockaddr *); int ifa_del_loopback_route(struct ifaddr *, struct sockaddr *); int ifa_switch_loopback_route(struct ifaddr *, struct sockaddr *); struct ifaddr *ifa_ifwithaddr(const struct sockaddr *); int ifa_ifwithaddr_check(const struct sockaddr *); struct ifaddr *ifa_ifwithbroadaddr(const struct sockaddr *, int); struct ifaddr *ifa_ifwithdstaddr(const struct sockaddr *, int); struct ifaddr *ifa_ifwithnet(const struct sockaddr *, int, int); struct ifaddr *ifa_ifwithroute(int, const struct sockaddr *, struct sockaddr *, u_int); struct ifaddr *ifaof_ifpforaddr(const struct sockaddr *, struct ifnet *); int ifa_preferred(struct ifaddr *, struct ifaddr *); int if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen); typedef void *if_com_alloc_t(u_char type, struct ifnet *ifp); typedef void if_com_free_t(void *com, u_char type); void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f); void if_deregister_com_alloc(u_char type); void if_data_copy(struct ifnet *, struct if_data *); uint64_t if_get_counter_default(struct ifnet *, ift_counter); void if_inc_counter(struct ifnet *, ift_counter, int64_t); #define IF_LLADDR(ifp) \ LLADDR((struct sockaddr_dl *)((ifp)->if_addr->ifa_addr)) uint64_t if_setbaudrate(if_t ifp, uint64_t baudrate); uint64_t if_getbaudrate(if_t ifp); int if_setcapabilities(if_t ifp, int capabilities); int if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit); int if_getcapabilities(if_t ifp); int if_togglecapenable(if_t ifp, int togglecap); int if_setcapenable(if_t ifp, int capenable); int if_setcapenablebit(if_t ifp, int setcap, int clearcap); int if_getcapenable(if_t ifp); const char *if_getdname(if_t ifp); int if_setdev(if_t ifp, void *dev); int if_setdrvflagbits(if_t ifp, int if_setflags, int clear_flags); int if_getdrvflags(if_t ifp); int if_setdrvflags(if_t ifp, int flags); int if_clearhwassist(if_t ifp); int if_sethwassistbits(if_t ifp, int toset, int toclear); int if_sethwassist(if_t ifp, int hwassist_bit); int if_gethwassist(if_t ifp); int if_setsoftc(if_t ifp, void *softc); void *if_getsoftc(if_t ifp); int if_setflags(if_t ifp, int flags); int if_gethwaddr(if_t ifp, struct ifreq *); int if_setmtu(if_t ifp, int mtu); int if_getmtu(if_t ifp); int if_getmtu_family(if_t ifp, int family); int if_setflagbits(if_t ifp, int set, int clear); int if_getflags(if_t ifp); int if_sendq_empty(if_t ifp); int if_setsendqready(if_t ifp); int if_setsendqlen(if_t ifp, int tx_desc_count); int if_input(if_t ifp, struct mbuf* sendmp); int if_sendq_prepend(if_t ifp, struct mbuf *m); struct mbuf *if_dequeue(if_t ifp); int if_setifheaderlen(if_t ifp, int len); void if_setrcvif(struct mbuf *m, if_t ifp); void if_setvtag(struct mbuf *m, u_int16_t tag); u_int16_t if_getvtag(struct mbuf *m); int if_vlantrunkinuse(if_t ifp); caddr_t if_getlladdr(if_t ifp); void *if_gethandle(u_char); void if_bpfmtap(if_t ifp, struct mbuf *m); void if_etherbpfmtap(if_t ifp, struct mbuf *m); void if_vlancap(if_t ifp); int if_setupmultiaddr(if_t ifp, void *mta, int *cnt, int max); int if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max); int if_multiaddr_count(if_t ifp, int max); int if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg); int if_getamcount(if_t ifp); struct ifaddr * if_getifaddr(if_t ifp); /* Functions */ void if_setinitfn(if_t ifp, void (*)(void *)); void if_setioctlfn(if_t ifp, int (*)(if_t, u_long, caddr_t)); void if_setstartfn(if_t ifp, void (*)(if_t)); void if_settransmitfn(if_t ifp, if_transmit_fn_t); void if_setqflushfn(if_t ifp, if_qflush_fn_t); void if_setgetcounterfn(if_t ifp, if_get_counter_t); /* Revisit the below. These are inline functions originally */ int drbr_inuse_drv(if_t ifp, struct buf_ring *br); struct mbuf* drbr_dequeue_drv(if_t ifp, struct buf_ring *br); int drbr_needs_enqueue_drv(if_t ifp, struct buf_ring *br); int drbr_enqueue_drv(if_t ifp, struct buf_ring *br, struct mbuf *m); /* TSO */ void if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *); int if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *); +/* accessors for struct ifreq */ +void *ifr_data_get_ptr(void *ifrp); + #ifdef DEVICE_POLLING enum poll_cmd { POLL_ONLY, POLL_AND_CHECK_STATUS }; typedef int poll_handler_t(if_t ifp, enum poll_cmd cmd, int count); int ether_poll_register(poll_handler_t *h, if_t ifp); int ether_poll_deregister(if_t ifp); #endif /* DEVICE_POLLING */ #endif /* _KERNEL */ #include /* XXXAO: temporary unconditional include */ #endif /* !_NET_IF_VAR_H_ */ Index: stable/11/sys/net/if_vlan.c =================================================================== --- stable/11/sys/net/if_vlan.c (revision 332287) +++ stable/11/sys/net/if_vlan.c (revision 332288) @@ -1,2022 +1,2022 @@ /*- * Copyright 1998 Massachusetts Institute of Technology * Copyright 2012 ADARA Networks, Inc. * Copyright 2017 Dell EMC Isilon * * Portions of this software were developed by Robert N. M. Watson under * contract to ADARA Networks, Inc. * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that both the above copyright notice and this * permission notice appear in all copies, that both the above * copyright notice and this permission notice appear in all * supporting documentation, and that the name of M.I.T. not be used * in advertising or publicity pertaining to distribution of the * software without specific, written prior permission. M.I.T. makes * no representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * if_vlan.c - pseudo-device driver for IEEE 802.1Q virtual LANs. * This is sort of sneaky in the implementation, since * we need to pretend to be enough of an Ethernet implementation * to make arp work. The way we do this is by telling everyone * that we are an Ethernet, and then catch the packets that * ether_output() sends to us via if_transmit(), rewrite them for * use by the real outgoing interface, and ask it to send them. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_vlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif #define VLAN_DEF_HWIDTH 4 #define VLAN_IFFLAGS (IFF_BROADCAST | IFF_MULTICAST) #define UP_AND_RUNNING(ifp) \ ((ifp)->if_flags & IFF_UP && (ifp)->if_drv_flags & IFF_DRV_RUNNING) LIST_HEAD(ifvlanhead, ifvlan); struct ifvlantrunk { struct ifnet *parent; /* parent interface of this trunk */ struct rmlock lock; #ifdef VLAN_ARRAY #define VLAN_ARRAY_SIZE (EVL_VLID_MASK + 1) struct ifvlan *vlans[VLAN_ARRAY_SIZE]; /* static table */ #else struct ifvlanhead *hash; /* dynamic hash-list table */ uint16_t hmask; uint16_t hwidth; #endif int refcnt; }; /* * This macro provides a facility to iterate over every vlan on a trunk with * the assumption that none will be added/removed during iteration. */ #ifdef VLAN_ARRAY #define VLAN_FOREACH(_ifv, _trunk) \ size_t _i; \ for (_i = 0; _i < VLAN_ARRAY_SIZE; _i++) \ if (((_ifv) = (_trunk)->vlans[_i]) != NULL) #else /* VLAN_ARRAY */ #define VLAN_FOREACH(_ifv, _trunk) \ struct ifvlan *_next; \ size_t _i; \ for (_i = 0; _i < (1 << (_trunk)->hwidth); _i++) \ LIST_FOREACH_SAFE((_ifv), &(_trunk)->hash[_i], ifv_list, _next) #endif /* VLAN_ARRAY */ /* * This macro provides a facility to iterate over every vlan on a trunk while * also modifying the number of vlans on the trunk. The iteration continues * until some condition is met or there are no more vlans on the trunk. */ #ifdef VLAN_ARRAY /* The VLAN_ARRAY case is simple -- just a for loop using the condition. */ #define VLAN_FOREACH_UNTIL_SAFE(_ifv, _trunk, _cond) \ size_t _i; \ for (_i = 0; !(_cond) && _i < VLAN_ARRAY_SIZE; _i++) \ if (((_ifv) = (_trunk)->vlans[_i])) #else /* VLAN_ARRAY */ /* * The hash table case is more complicated. We allow for the hash table to be * modified (i.e. vlans removed) while we are iterating over it. To allow for * this we must restart the iteration every time we "touch" something during * the iteration, since removal will resize the hash table and invalidate our * current position. If acting on the touched element causes the trunk to be * emptied, then iteration also stops. */ #define VLAN_FOREACH_UNTIL_SAFE(_ifv, _trunk, _cond) \ size_t _i; \ bool _touch = false; \ for (_i = 0; \ !(_cond) && _i < (1 << (_trunk)->hwidth); \ _i = (_touch && ((_trunk) != NULL) ? 0 : _i + 1), _touch = false) \ if (((_ifv) = LIST_FIRST(&(_trunk)->hash[_i])) != NULL && \ (_touch = true)) #endif /* VLAN_ARRAY */ struct vlan_mc_entry { struct sockaddr_dl mc_addr; SLIST_ENTRY(vlan_mc_entry) mc_entries; }; struct ifvlan { struct ifvlantrunk *ifv_trunk; struct ifnet *ifv_ifp; #define TRUNK(ifv) ((ifv)->ifv_trunk) #define PARENT(ifv) ((ifv)->ifv_trunk->parent) void *ifv_cookie; int ifv_pflags; /* special flags we have set on parent */ int ifv_capenable; struct ifv_linkmib { int ifvm_encaplen; /* encapsulation length */ int ifvm_mtufudge; /* MTU fudged by this much */ int ifvm_mintu; /* min transmission unit */ uint16_t ifvm_proto; /* encapsulation ethertype */ uint16_t ifvm_tag; /* tag to apply on packets leaving if */ uint16_t ifvm_vid; /* VLAN ID */ uint8_t ifvm_pcp; /* Priority Code Point (PCP). */ } ifv_mib; struct task lladdr_task; SLIST_HEAD(, vlan_mc_entry) vlan_mc_listhead; #ifndef VLAN_ARRAY LIST_ENTRY(ifvlan) ifv_list; #endif }; #define ifv_proto ifv_mib.ifvm_proto #define ifv_tag ifv_mib.ifvm_tag #define ifv_vid ifv_mib.ifvm_vid #define ifv_pcp ifv_mib.ifvm_pcp #define ifv_encaplen ifv_mib.ifvm_encaplen #define ifv_mtufudge ifv_mib.ifvm_mtufudge #define ifv_mintu ifv_mib.ifvm_mintu /* Special flags we should propagate to parent. */ static struct { int flag; int (*func)(struct ifnet *, int); } vlan_pflags[] = { {IFF_PROMISC, ifpromisc}, {IFF_ALLMULTI, if_allmulti}, {0, NULL} }; SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, IFT_L2VLAN, vlan, CTLFLAG_RW, 0, "IEEE 802.1Q VLAN"); static SYSCTL_NODE(_net_link_vlan, PF_LINK, link, CTLFLAG_RW, 0, "for consistency"); static VNET_DEFINE(int, soft_pad); #define V_soft_pad VNET(soft_pad) SYSCTL_INT(_net_link_vlan, OID_AUTO, soft_pad, CTLFLAG_RW | CTLFLAG_VNET, &VNET_NAME(soft_pad), 0, "pad short frames before tagging"); /* * For now, make preserving PCP via an mbuf tag optional, as it increases * per-packet memory allocations and frees. In the future, it would be * preferable to reuse ether_vtag for this, or similar. */ static int vlan_mtag_pcp = 0; SYSCTL_INT(_net_link_vlan, OID_AUTO, mtag_pcp, CTLFLAG_RW, &vlan_mtag_pcp, 0, "Retain VLAN PCP information as packets are passed up the stack"); static const char vlanname[] = "vlan"; static MALLOC_DEFINE(M_VLAN, vlanname, "802.1Q Virtual LAN Interface"); static eventhandler_tag ifdetach_tag; static eventhandler_tag iflladdr_tag; /* * if_vlan uses two module-level locks to allow concurrent modification of vlan * interfaces and (mostly) allow for vlans to be destroyed while they are being * used for tx/rx. To accomplish this in a way that has acceptable performance * and cooperation with other parts of the network stack there is a * non-sleepable rmlock(9) and an sx(9). Both locks are exclusively acquired * when destroying a vlan interface, i.e. when the if_vlantrunk field of struct * ifnet is de-allocated and NULL'd. Thus a reader holding either lock has a * guarantee that the struct ifvlantrunk references a valid vlan trunk. * * The performance-sensitive paths that warrant using the rmlock(9) are * vlan_transmit and vlan_input. Both have to check for the vlan interface's * existence using if_vlantrunk, and being in the network tx/rx paths the use * of an rmlock(9) gives a measureable improvement in performance. * * The reason for having an sx(9) is mostly because there are still areas that * must be sleepable and also have safe concurrent access to a vlan interface. * Since the sx(9) exists, it is used by default in most paths unless sleeping * is not permitted, or if it is not clear whether sleeping is permitted. * * Note that despite these protections, there is still an inherent race in the * destruction of vlans since there's no guarantee that the ifnet hasn't been * freed/reused when the tx/rx functions are called by the stack. This can only * be fixed by addressing ifnet's lifetime issues. */ #define _VLAN_RM_ID ifv_rm_lock #define _VLAN_SX_ID ifv_sx static struct rmlock _VLAN_RM_ID; static struct sx _VLAN_SX_ID; #define VLAN_LOCKING_INIT() \ rm_init(&_VLAN_RM_ID, "vlan_rm"); \ sx_init(&_VLAN_SX_ID, "vlan_sx") #define VLAN_LOCKING_DESTROY() \ rm_destroy(&_VLAN_RM_ID); \ sx_destroy(&_VLAN_SX_ID) #define _VLAN_RM_TRACKER _vlan_rm_tracker #define VLAN_RLOCK() rm_rlock(&_VLAN_RM_ID, \ &_VLAN_RM_TRACKER) #define VLAN_RUNLOCK() rm_runlock(&_VLAN_RM_ID, \ &_VLAN_RM_TRACKER) #define VLAN_WLOCK() rm_wlock(&_VLAN_RM_ID) #define VLAN_WUNLOCK() rm_wunlock(&_VLAN_RM_ID) #define VLAN_RLOCK_ASSERT() rm_assert(&_VLAN_RM_ID, RA_RLOCKED) #define VLAN_WLOCK_ASSERT() rm_assert(&_VLAN_RM_ID, RA_WLOCKED) #define VLAN_RWLOCK_ASSERT() rm_assert(&_VLAN_RM_ID, RA_LOCKED) #define VLAN_LOCK_READER struct rm_priotracker _VLAN_RM_TRACKER #define VLAN_SLOCK() sx_slock(&_VLAN_SX_ID) #define VLAN_SUNLOCK() sx_sunlock(&_VLAN_SX_ID) #define VLAN_XLOCK() sx_xlock(&_VLAN_SX_ID) #define VLAN_XUNLOCK() sx_xunlock(&_VLAN_SX_ID) #define VLAN_SLOCK_ASSERT() sx_assert(&_VLAN_SX_ID, SA_SLOCKED) #define VLAN_XLOCK_ASSERT() sx_assert(&_VLAN_SX_ID, SA_XLOCKED) #define VLAN_SXLOCK_ASSERT() sx_assert(&_VLAN_SX_ID, SA_LOCKED) /* * We also have a per-trunk rmlock(9), that is locked shared on packet * processing and exclusive when configuration is changed. Note: This should * only be acquired while there is a shared lock on either of the global locks * via VLAN_SLOCK or VLAN_RLOCK. Thus, an exclusive lock on the global locks * makes a call to TRUNK_RLOCK/TRUNK_WLOCK technically superfluous. */ #define _TRUNK_RM_TRACKER _trunk_rm_tracker #define TRUNK_LOCK_INIT(trunk) rm_init(&(trunk)->lock, vlanname) #define TRUNK_LOCK_DESTROY(trunk) rm_destroy(&(trunk)->lock) #define TRUNK_RLOCK(trunk) rm_rlock(&(trunk)->lock, \ &_TRUNK_RM_TRACKER) #define TRUNK_WLOCK(trunk) rm_wlock(&(trunk)->lock) #define TRUNK_RUNLOCK(trunk) rm_runlock(&(trunk)->lock, \ &_TRUNK_RM_TRACKER) #define TRUNK_WUNLOCK(trunk) rm_wunlock(&(trunk)->lock) #define TRUNK_RLOCK_ASSERT(trunk) rm_assert(&(trunk)->lock, RA_RLOCKED) #define TRUNK_LOCK_ASSERT(trunk) rm_assert(&(trunk)->lock, RA_LOCKED) #define TRUNK_WLOCK_ASSERT(trunk) rm_assert(&(trunk)->lock, RA_WLOCKED) #define TRUNK_LOCK_READER struct rm_priotracker _TRUNK_RM_TRACKER /* * The VLAN_ARRAY substitutes the dynamic hash with a static array * with 4096 entries. In theory this can give a boost in processing, * however in practice it does not. Probably this is because the array * is too big to fit into CPU cache. */ #ifndef VLAN_ARRAY static void vlan_inithash(struct ifvlantrunk *trunk); static void vlan_freehash(struct ifvlantrunk *trunk); static int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv); static int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv); static void vlan_growhash(struct ifvlantrunk *trunk, int howmuch); static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid); #endif static void trunk_destroy(struct ifvlantrunk *trunk); static void vlan_init(void *foo); static void vlan_input(struct ifnet *ifp, struct mbuf *m); static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr); static void vlan_qflush(struct ifnet *ifp); static int vlan_setflag(struct ifnet *ifp, int flag, int status, int (*func)(struct ifnet *, int)); static int vlan_setflags(struct ifnet *ifp, int status); static int vlan_setmulti(struct ifnet *ifp); static int vlan_transmit(struct ifnet *ifp, struct mbuf *m); static void vlan_unconfig(struct ifnet *ifp); static void vlan_unconfig_locked(struct ifnet *ifp, int departing); static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t tag); static void vlan_link_state(struct ifnet *ifp); static void vlan_capabilities(struct ifvlan *ifv); static void vlan_trunk_capabilities(struct ifnet *ifp); static struct ifnet *vlan_clone_match_ethervid(const char *, int *); static int vlan_clone_match(struct if_clone *, const char *); static int vlan_clone_create(struct if_clone *, char *, size_t, caddr_t); static int vlan_clone_destroy(struct if_clone *, struct ifnet *); static void vlan_ifdetach(void *arg, struct ifnet *ifp); static void vlan_iflladdr(void *arg, struct ifnet *ifp); static void vlan_lladdr_fn(void *arg, int pending); static struct if_clone *vlan_cloner; #ifdef VIMAGE static VNET_DEFINE(struct if_clone *, vlan_cloner); #define V_vlan_cloner VNET(vlan_cloner) #endif #ifndef VLAN_ARRAY #define HASH(n, m) ((((n) >> 8) ^ ((n) >> 4) ^ (n)) & (m)) static void vlan_inithash(struct ifvlantrunk *trunk) { int i, n; /* * The trunk must not be locked here since we call malloc(M_WAITOK). * It is OK in case this function is called before the trunk struct * gets hooked up and becomes visible from other threads. */ KASSERT(trunk->hwidth == 0 && trunk->hash == NULL, ("%s: hash already initialized", __func__)); trunk->hwidth = VLAN_DEF_HWIDTH; n = 1 << trunk->hwidth; trunk->hmask = n - 1; trunk->hash = malloc(sizeof(struct ifvlanhead) * n, M_VLAN, M_WAITOK); for (i = 0; i < n; i++) LIST_INIT(&trunk->hash[i]); } static void vlan_freehash(struct ifvlantrunk *trunk) { #ifdef INVARIANTS int i; KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); for (i = 0; i < (1 << trunk->hwidth); i++) KASSERT(LIST_EMPTY(&trunk->hash[i]), ("%s: hash table not empty", __func__)); #endif free(trunk->hash, M_VLAN); trunk->hash = NULL; trunk->hwidth = trunk->hmask = 0; } static int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv) { int i, b; struct ifvlan *ifv2; TRUNK_WLOCK_ASSERT(trunk); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); b = 1 << trunk->hwidth; i = HASH(ifv->ifv_vid, trunk->hmask); LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list) if (ifv->ifv_vid == ifv2->ifv_vid) return (EEXIST); /* * Grow the hash when the number of vlans exceeds half of the number of * hash buckets squared. This will make the average linked-list length * buckets/2. */ if (trunk->refcnt > (b * b) / 2) { vlan_growhash(trunk, 1); i = HASH(ifv->ifv_vid, trunk->hmask); } LIST_INSERT_HEAD(&trunk->hash[i], ifv, ifv_list); trunk->refcnt++; return (0); } static int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv) { int i, b; struct ifvlan *ifv2; TRUNK_WLOCK_ASSERT(trunk); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); b = 1 << trunk->hwidth; i = HASH(ifv->ifv_vid, trunk->hmask); LIST_FOREACH(ifv2, &trunk->hash[i], ifv_list) if (ifv2 == ifv) { trunk->refcnt--; LIST_REMOVE(ifv2, ifv_list); if (trunk->refcnt < (b * b) / 2) vlan_growhash(trunk, -1); return (0); } panic("%s: vlan not found\n", __func__); return (ENOENT); /*NOTREACHED*/ } /* * Grow the hash larger or smaller if memory permits. */ static void vlan_growhash(struct ifvlantrunk *trunk, int howmuch) { struct ifvlan *ifv; struct ifvlanhead *hash2; int hwidth2, i, j, n, n2; TRUNK_WLOCK_ASSERT(trunk); KASSERT(trunk->hwidth > 0, ("%s: hwidth not positive", __func__)); if (howmuch == 0) { /* Harmless yet obvious coding error */ printf("%s: howmuch is 0\n", __func__); return; } hwidth2 = trunk->hwidth + howmuch; n = 1 << trunk->hwidth; n2 = 1 << hwidth2; /* Do not shrink the table below the default */ if (hwidth2 < VLAN_DEF_HWIDTH) return; /* M_NOWAIT because we're called with trunk mutex held */ hash2 = malloc(sizeof(struct ifvlanhead) * n2, M_VLAN, M_NOWAIT); if (hash2 == NULL) { printf("%s: out of memory -- hash size not changed\n", __func__); return; /* We can live with the old hash table */ } for (j = 0; j < n2; j++) LIST_INIT(&hash2[j]); for (i = 0; i < n; i++) while ((ifv = LIST_FIRST(&trunk->hash[i])) != NULL) { LIST_REMOVE(ifv, ifv_list); j = HASH(ifv->ifv_vid, n2 - 1); LIST_INSERT_HEAD(&hash2[j], ifv, ifv_list); } free(trunk->hash, M_VLAN); trunk->hash = hash2; trunk->hwidth = hwidth2; trunk->hmask = n2 - 1; if (bootverbose) if_printf(trunk->parent, "VLAN hash table resized from %d to %d buckets\n", n, n2); } static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid) { struct ifvlan *ifv; TRUNK_RLOCK_ASSERT(trunk); LIST_FOREACH(ifv, &trunk->hash[HASH(vid, trunk->hmask)], ifv_list) if (ifv->ifv_vid == vid) return (ifv); return (NULL); } #if 0 /* Debugging code to view the hashtables. */ static void vlan_dumphash(struct ifvlantrunk *trunk) { int i; struct ifvlan *ifv; for (i = 0; i < (1 << trunk->hwidth); i++) { printf("%d: ", i); LIST_FOREACH(ifv, &trunk->hash[i], ifv_list) printf("%s ", ifv->ifv_ifp->if_xname); printf("\n"); } } #endif /* 0 */ #else static __inline struct ifvlan * vlan_gethash(struct ifvlantrunk *trunk, uint16_t vid) { return trunk->vlans[vid]; } static __inline int vlan_inshash(struct ifvlantrunk *trunk, struct ifvlan *ifv) { if (trunk->vlans[ifv->ifv_vid] != NULL) return EEXIST; trunk->vlans[ifv->ifv_vid] = ifv; trunk->refcnt++; return (0); } static __inline int vlan_remhash(struct ifvlantrunk *trunk, struct ifvlan *ifv) { trunk->vlans[ifv->ifv_vid] = NULL; trunk->refcnt--; return (0); } static __inline void vlan_freehash(struct ifvlantrunk *trunk) { } static __inline void vlan_inithash(struct ifvlantrunk *trunk) { } #endif /* !VLAN_ARRAY */ static void trunk_destroy(struct ifvlantrunk *trunk) { VLAN_XLOCK_ASSERT(); VLAN_WLOCK_ASSERT(); vlan_freehash(trunk); trunk->parent->if_vlantrunk = NULL; TRUNK_LOCK_DESTROY(trunk); if_rele(trunk->parent); free(trunk, M_VLAN); } /* * Program our multicast filter. What we're actually doing is * programming the multicast filter of the parent. This has the * side effect of causing the parent interface to receive multicast * traffic that it doesn't really want, which ends up being discarded * later by the upper protocol layers. Unfortunately, there's no way * to avoid this: there really is only one physical interface. */ static int vlan_setmulti(struct ifnet *ifp) { struct ifnet *ifp_p; struct ifmultiaddr *ifma; struct ifvlan *sc; struct vlan_mc_entry *mc; int error; /* * XXX This stupidly needs the rmlock to avoid sleeping while holding * the in6_multi_mtx (see in6_mc_join_locked). */ VLAN_RWLOCK_ASSERT(); /* Find the parent. */ sc = ifp->if_softc; TRUNK_WLOCK_ASSERT(TRUNK(sc)); ifp_p = PARENT(sc); CURVNET_SET_QUIET(ifp_p->if_vnet); /* First, remove any existing filter entries. */ while ((mc = SLIST_FIRST(&sc->vlan_mc_listhead)) != NULL) { SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead, mc_entries); (void)if_delmulti(ifp_p, (struct sockaddr *)&mc->mc_addr); free(mc, M_VLAN); } /* Now program new ones. */ IF_ADDR_WLOCK(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, M_NOWAIT); if (mc == NULL) { IF_ADDR_WUNLOCK(ifp); return (ENOMEM); } bcopy(ifma->ifma_addr, &mc->mc_addr, ifma->ifma_addr->sa_len); mc->mc_addr.sdl_index = ifp_p->if_index; SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries); } IF_ADDR_WUNLOCK(ifp); SLIST_FOREACH (mc, &sc->vlan_mc_listhead, mc_entries) { error = if_addmulti(ifp_p, (struct sockaddr *)&mc->mc_addr, NULL); if (error) return (error); } CURVNET_RESTORE(); return (0); } /* * A handler for parent interface link layer address changes. * If the parent interface link layer address is changed we * should also change it on all children vlans. */ static void vlan_iflladdr(void *arg __unused, struct ifnet *ifp) { struct ifvlan *ifv; struct ifnet *ifv_ifp; struct ifvlantrunk *trunk; struct sockaddr_dl *sdl; VLAN_LOCK_READER; /* Need the rmlock since this is run on taskqueue_swi. */ VLAN_RLOCK(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { VLAN_RUNLOCK(); return; } /* * OK, it's a trunk. Loop over and change all vlan's lladdrs on it. * We need an exclusive lock here to prevent concurrent SIOCSIFLLADDR * ioctl calls on the parent garbling the lladdr of the child vlan. */ TRUNK_WLOCK(trunk); VLAN_FOREACH(ifv, trunk) { /* * Copy new new lladdr into the ifv_ifp, enqueue a task * to actually call if_setlladdr. if_setlladdr needs to * be deferred to a taskqueue because it will call into * the if_vlan ioctl path and try to acquire the global * lock. */ ifv_ifp = ifv->ifv_ifp; bcopy(IF_LLADDR(ifp), IF_LLADDR(ifv_ifp), ifp->if_addrlen); sdl = (struct sockaddr_dl *)ifv_ifp->if_addr->ifa_addr; sdl->sdl_alen = ifp->if_addrlen; taskqueue_enqueue(taskqueue_thread, &ifv->lladdr_task); } TRUNK_WUNLOCK(trunk); VLAN_RUNLOCK(); } /* * A handler for network interface departure events. * Track departure of trunks here so that we don't access invalid * pointers or whatever if a trunk is ripped from under us, e.g., * by ejecting its hot-plug card. However, if an ifnet is simply * being renamed, then there's no need to tear down the state. */ static void vlan_ifdetach(void *arg __unused, struct ifnet *ifp) { struct ifvlan *ifv; struct ifvlantrunk *trunk; /* If the ifnet is just being renamed, don't do anything. */ if (ifp->if_flags & IFF_RENAMING) return; VLAN_XLOCK(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { VLAN_XUNLOCK(); return; } /* * OK, it's a trunk. Loop over and detach all vlan's on it. * Check trunk pointer after each vlan_unconfig() as it will * free it and set to NULL after the last vlan was detached. */ VLAN_FOREACH_UNTIL_SAFE(ifv, ifp->if_vlantrunk, ifp->if_vlantrunk == NULL) vlan_unconfig_locked(ifv->ifv_ifp, 1); /* Trunk should have been destroyed in vlan_unconfig(). */ KASSERT(ifp->if_vlantrunk == NULL, ("%s: purge failed", __func__)); VLAN_XUNLOCK(); } /* * Return the trunk device for a virtual interface. */ static struct ifnet * vlan_trunkdev(struct ifnet *ifp) { struct ifvlan *ifv; VLAN_LOCK_READER; if (ifp->if_type != IFT_L2VLAN) return (NULL); /* Not clear if callers are sleepable, so acquire the rmlock. */ VLAN_RLOCK(); ifv = ifp->if_softc; ifp = NULL; if (ifv->ifv_trunk) ifp = PARENT(ifv); VLAN_RUNLOCK(); return (ifp); } /* * Return the 12-bit VLAN VID for this interface, for use by external * components such as Infiniband. * * XXXRW: Note that the function name here is historical; it should be named * vlan_vid(). */ static int vlan_tag(struct ifnet *ifp, uint16_t *vidp) { struct ifvlan *ifv; if (ifp->if_type != IFT_L2VLAN) return (EINVAL); ifv = ifp->if_softc; *vidp = ifv->ifv_vid; return (0); } /* * Return a driver specific cookie for this interface. Synchronization * with setcookie must be provided by the driver. */ static void * vlan_cookie(struct ifnet *ifp) { struct ifvlan *ifv; if (ifp->if_type != IFT_L2VLAN) return (NULL); ifv = ifp->if_softc; return (ifv->ifv_cookie); } /* * Store a cookie in our softc that drivers can use to store driver * private per-instance data in. */ static int vlan_setcookie(struct ifnet *ifp, void *cookie) { struct ifvlan *ifv; if (ifp->if_type != IFT_L2VLAN) return (EINVAL); ifv = ifp->if_softc; ifv->ifv_cookie = cookie; return (0); } /* * Return the vlan device present at the specific VID. */ static struct ifnet * vlan_devat(struct ifnet *ifp, uint16_t vid) { struct ifvlantrunk *trunk; struct ifvlan *ifv; VLAN_LOCK_READER; TRUNK_LOCK_READER; /* Not clear if callers are sleepable, so acquire the rmlock. */ VLAN_RLOCK(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { VLAN_RUNLOCK(); return (NULL); } ifp = NULL; TRUNK_RLOCK(trunk); ifv = vlan_gethash(trunk, vid); if (ifv) ifp = ifv->ifv_ifp; TRUNK_RUNLOCK(trunk); VLAN_RUNLOCK(); return (ifp); } /* * Recalculate the cached VLAN tag exposed via the MIB. */ static void vlan_tag_recalculate(struct ifvlan *ifv) { ifv->ifv_tag = EVL_MAKETAG(ifv->ifv_vid, ifv->ifv_pcp, 0); } /* * VLAN support can be loaded as a module. The only place in the * system that's intimately aware of this is ether_input. We hook * into this code through vlan_input_p which is defined there and * set here. No one else in the system should be aware of this so * we use an explicit reference here. */ extern void (*vlan_input_p)(struct ifnet *, struct mbuf *); /* For if_link_state_change() eyes only... */ extern void (*vlan_link_state_p)(struct ifnet *); static int vlan_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: ifdetach_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, vlan_ifdetach, NULL, EVENTHANDLER_PRI_ANY); if (ifdetach_tag == NULL) return (ENOMEM); iflladdr_tag = EVENTHANDLER_REGISTER(iflladdr_event, vlan_iflladdr, NULL, EVENTHANDLER_PRI_ANY); if (iflladdr_tag == NULL) return (ENOMEM); VLAN_LOCKING_INIT(); vlan_input_p = vlan_input; vlan_link_state_p = vlan_link_state; vlan_trunk_cap_p = vlan_trunk_capabilities; vlan_trunkdev_p = vlan_trunkdev; vlan_cookie_p = vlan_cookie; vlan_setcookie_p = vlan_setcookie; vlan_tag_p = vlan_tag; vlan_devat_p = vlan_devat; #ifndef VIMAGE vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match, vlan_clone_create, vlan_clone_destroy); #endif if (bootverbose) printf("vlan: initialized, using " #ifdef VLAN_ARRAY "full-size arrays" #else "hash tables with chaining" #endif "\n"); break; case MOD_UNLOAD: #ifndef VIMAGE if_clone_detach(vlan_cloner); #endif EVENTHANDLER_DEREGISTER(ifnet_departure_event, ifdetach_tag); EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_tag); vlan_input_p = NULL; vlan_link_state_p = NULL; vlan_trunk_cap_p = NULL; vlan_trunkdev_p = NULL; vlan_tag_p = NULL; vlan_cookie_p = NULL; vlan_setcookie_p = NULL; vlan_devat_p = NULL; VLAN_LOCKING_DESTROY(); if (bootverbose) printf("vlan: unloaded\n"); break; default: return (EOPNOTSUPP); } return (0); } static moduledata_t vlan_mod = { "if_vlan", vlan_modevent, 0 }; DECLARE_MODULE(if_vlan, vlan_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_vlan, 3); #ifdef VIMAGE static void vnet_vlan_init(const void *unused __unused) { vlan_cloner = if_clone_advanced(vlanname, 0, vlan_clone_match, vlan_clone_create, vlan_clone_destroy); V_vlan_cloner = vlan_cloner; } VNET_SYSINIT(vnet_vlan_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, vnet_vlan_init, NULL); static void vnet_vlan_uninit(const void *unused __unused) { if_clone_detach(V_vlan_cloner); } VNET_SYSUNINIT(vnet_vlan_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST, vnet_vlan_uninit, NULL); #endif /* * Check for . style interface names. */ static struct ifnet * vlan_clone_match_ethervid(const char *name, int *vidp) { char ifname[IFNAMSIZ]; char *cp; struct ifnet *ifp; int vid; strlcpy(ifname, name, IFNAMSIZ); if ((cp = strchr(ifname, '.')) == NULL) return (NULL); *cp = '\0'; if ((ifp = ifunit_ref(ifname)) == NULL) return (NULL); /* Parse VID. */ if (*++cp == '\0') { if_rele(ifp); return (NULL); } vid = 0; for(; *cp >= '0' && *cp <= '9'; cp++) vid = (vid * 10) + (*cp - '0'); if (*cp != '\0') { if_rele(ifp); return (NULL); } if (vidp != NULL) *vidp = vid; return (ifp); } static int vlan_clone_match(struct if_clone *ifc, const char *name) { const char *cp; if (vlan_clone_match_ethervid(name, NULL) != NULL) return (1); if (strncmp(vlanname, name, strlen(vlanname)) != 0) return (0); for (cp = name + 4; *cp != '\0'; cp++) { if (*cp < '0' || *cp > '9') return (0); } return (1); } static int vlan_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) { char *dp; int wildcard; int unit; int error; int vid; struct ifvlan *ifv; struct ifnet *ifp; struct ifnet *p; struct ifaddr *ifa; struct sockaddr_dl *sdl; struct vlanreq vlr; static const u_char eaddr[ETHER_ADDR_LEN]; /* 00:00:00:00:00:00 */ /* * There are 3 (ugh) ways to specify the cloned device: * o pass a parameter block with the clone request. * o specify parameters in the text of the clone device name * o specify no parameters and get an unattached device that * must be configured separately. * The first technique is preferred; the latter two are * supported for backwards compatibility. * * XXXRW: Note historic use of the word "tag" here. New ioctls may be * called for. */ if (params) { error = copyin(params, &vlr, sizeof(vlr)); if (error) return error; p = ifunit_ref(vlr.vlr_parent); if (p == NULL) return (ENXIO); error = ifc_name2unit(name, &unit); if (error != 0) { if_rele(p); return (error); } vid = vlr.vlr_tag; wildcard = (unit < 0); } else if ((p = vlan_clone_match_ethervid(name, &vid)) != NULL) { unit = -1; wildcard = 0; } else { p = NULL; error = ifc_name2unit(name, &unit); if (error != 0) return (error); wildcard = (unit < 0); } error = ifc_alloc_unit(ifc, &unit); if (error != 0) { if (p != NULL) if_rele(p); return (error); } /* In the wildcard case, we need to update the name. */ if (wildcard) { for (dp = name; *dp != '\0'; dp++); if (snprintf(dp, len - (dp-name), "%d", unit) > len - (dp-name) - 1) { panic("%s: interface name too long", __func__); } } ifv = malloc(sizeof(struct ifvlan), M_VLAN, M_WAITOK | M_ZERO); ifp = ifv->ifv_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { ifc_free_unit(ifc, unit); free(ifv, M_VLAN); if (p != NULL) if_rele(p); return (ENOSPC); } SLIST_INIT(&ifv->vlan_mc_listhead); ifp->if_softc = ifv; /* * Set the name manually rather than using if_initname because * we don't conform to the default naming convention for interfaces. */ strlcpy(ifp->if_xname, name, IFNAMSIZ); ifp->if_dname = vlanname; ifp->if_dunit = unit; /* NB: flags are not set here */ ifp->if_linkmib = &ifv->ifv_mib; ifp->if_linkmiblen = sizeof(ifv->ifv_mib); /* NB: mtu is not set here */ ifp->if_init = vlan_init; ifp->if_transmit = vlan_transmit; ifp->if_qflush = vlan_qflush; ifp->if_ioctl = vlan_ioctl; ifp->if_flags = VLAN_IFFLAGS; ether_ifattach(ifp, eaddr); /* Now undo some of the damage... */ ifp->if_baudrate = 0; ifp->if_type = IFT_L2VLAN; ifp->if_hdrlen = ETHER_VLAN_ENCAP_LEN; ifa = ifp->if_addr; sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_type = IFT_L2VLAN; if (p != NULL) { error = vlan_config(ifv, p, vid); if_rele(p); if (error != 0) { /* * Since we've partially failed, we need to back * out all the way, otherwise userland could get * confused. Thus, we destroy the interface. */ ether_ifdetach(ifp); vlan_unconfig(ifp); if_free(ifp); ifc_free_unit(ifc, unit); free(ifv, M_VLAN); return (error); } } return (0); } static int vlan_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) { struct ifvlan *ifv = ifp->if_softc; int unit = ifp->if_dunit; ether_ifdetach(ifp); /* first, remove it from system-wide lists */ vlan_unconfig(ifp); /* now it can be unconfigured and freed */ /* * We should have the only reference to the ifv now, so we can now * drain any remaining lladdr task before freeing the ifnet and the * ifvlan. */ taskqueue_drain(taskqueue_thread, &ifv->lladdr_task); if_free(ifp); free(ifv, M_VLAN); ifc_free_unit(ifc, unit); return (0); } /* * The ifp->if_init entry point for vlan(4) is a no-op. */ static void vlan_init(void *foo __unused) { } /* * The if_transmit method for vlan(4) interface. */ static int vlan_transmit(struct ifnet *ifp, struct mbuf *m) { struct ifvlan *ifv; struct ifnet *p; struct m_tag *mtag; uint16_t tag; int error, len, mcast; VLAN_LOCK_READER; VLAN_RLOCK(); ifv = ifp->if_softc; if (TRUNK(ifv) == NULL) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); VLAN_RUNLOCK(); m_freem(m); return (ENETDOWN); } p = PARENT(ifv); len = m->m_pkthdr.len; mcast = (m->m_flags & (M_MCAST | M_BCAST)) ? 1 : 0; BPF_MTAP(ifp, m); /* * Do not run parent's if_transmit() if the parent is not up, * or parent's driver will cause a system crash. */ if (!UP_AND_RUNNING(p)) { if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); VLAN_RUNLOCK(); m_freem(m); return (ENETDOWN); } /* * Pad the frame to the minimum size allowed if told to. * This option is in accord with IEEE Std 802.1Q, 2003 Ed., * paragraph C.4.4.3.b. It can help to work around buggy * bridges that violate paragraph C.4.4.3.a from the same * document, i.e., fail to pad short frames after untagging. * E.g., a tagged frame 66 bytes long (incl. FCS) is OK, but * untagging it will produce a 62-byte frame, which is a runt * and requires padding. There are VLAN-enabled network * devices that just discard such runts instead or mishandle * them somehow. */ if (V_soft_pad && p->if_type == IFT_ETHER) { static char pad[8]; /* just zeros */ int n; for (n = ETHERMIN + ETHER_HDR_LEN - m->m_pkthdr.len; n > 0; n -= sizeof(pad)) if (!m_append(m, min(n, sizeof(pad)), pad)) break; if (n > 0) { if_printf(ifp, "cannot pad short frame\n"); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); VLAN_RUNLOCK(); m_freem(m); return (0); } } /* * If underlying interface can do VLAN tag insertion itself, * just pass the packet along. However, we need some way to * tell the interface where the packet came from so that it * knows how to find the VLAN tag to use, so we attach a * packet tag that holds it. */ if (vlan_mtag_pcp && (mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_OUT, NULL)) != NULL) tag = EVL_MAKETAG(ifv->ifv_vid, *(uint8_t *)(mtag + 1), 0); else tag = ifv->ifv_tag; if (p->if_capenable & IFCAP_VLAN_HWTAGGING) { m->m_pkthdr.ether_vtag = tag; m->m_flags |= M_VLANTAG; } else { m = ether_vlanencap(m, tag); if (m == NULL) { if_printf(ifp, "unable to prepend VLAN header\n"); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); VLAN_RUNLOCK(); return (0); } } /* * Send it, precisely as ether_output() would have. */ error = (p->if_transmit)(p, m); if (error == 0) { if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(ifp, IFCOUNTER_OBYTES, len); if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast); } else if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); VLAN_RUNLOCK(); return (error); } /* * The ifp->if_qflush entry point for vlan(4) is a no-op. */ static void vlan_qflush(struct ifnet *ifp __unused) { } static void vlan_input(struct ifnet *ifp, struct mbuf *m) { struct ifvlantrunk *trunk; struct ifvlan *ifv; VLAN_LOCK_READER; TRUNK_LOCK_READER; struct m_tag *mtag; uint16_t vid, tag; VLAN_RLOCK(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { VLAN_RUNLOCK(); m_freem(m); return; } if (m->m_flags & M_VLANTAG) { /* * Packet is tagged, but m contains a normal * Ethernet frame; the tag is stored out-of-band. */ tag = m->m_pkthdr.ether_vtag; m->m_flags &= ~M_VLANTAG; } else { struct ether_vlan_header *evl; /* * Packet is tagged in-band as specified by 802.1q. */ switch (ifp->if_type) { case IFT_ETHER: if (m->m_len < sizeof(*evl) && (m = m_pullup(m, sizeof(*evl))) == NULL) { if_printf(ifp, "cannot pullup VLAN header\n"); VLAN_RUNLOCK(); return; } evl = mtod(m, struct ether_vlan_header *); tag = ntohs(evl->evl_tag); /* * Remove the 802.1q header by copying the Ethernet * addresses over it and adjusting the beginning of * the data in the mbuf. The encapsulated Ethernet * type field is already in place. */ bcopy((char *)evl, (char *)evl + ETHER_VLAN_ENCAP_LEN, ETHER_HDR_LEN - ETHER_TYPE_LEN); m_adj(m, ETHER_VLAN_ENCAP_LEN); break; default: #ifdef INVARIANTS panic("%s: %s has unsupported if_type %u", __func__, ifp->if_xname, ifp->if_type); #endif if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); VLAN_RUNLOCK(); m_freem(m); return; } } vid = EVL_VLANOFTAG(tag); TRUNK_RLOCK(trunk); ifv = vlan_gethash(trunk, vid); if (ifv == NULL || !UP_AND_RUNNING(ifv->ifv_ifp)) { TRUNK_RUNLOCK(trunk); if_inc_counter(ifp, IFCOUNTER_NOPROTO, 1); VLAN_RUNLOCK(); m_freem(m); return; } TRUNK_RUNLOCK(trunk); if (vlan_mtag_pcp) { /* * While uncommon, it is possible that we will find a 802.1q * packet encapsulated inside another packet that also had an * 802.1q header. For example, ethernet tunneled over IPSEC * arriving over ethernet. In that case, we replace the * existing 802.1q PCP m_tag value. */ mtag = m_tag_locate(m, MTAG_8021Q, MTAG_8021Q_PCP_IN, NULL); if (mtag == NULL) { mtag = m_tag_alloc(MTAG_8021Q, MTAG_8021Q_PCP_IN, sizeof(uint8_t), M_NOWAIT); if (mtag == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); VLAN_RUNLOCK(); m_freem(m); return; } m_tag_prepend(m, mtag); } *(uint8_t *)(mtag + 1) = EVL_PRIOFTAG(tag); } m->m_pkthdr.rcvif = ifv->ifv_ifp; if_inc_counter(ifv->ifv_ifp, IFCOUNTER_IPACKETS, 1); VLAN_RUNLOCK(); /* Pass it back through the parent's input routine. */ (*ifv->ifv_ifp->if_input)(ifv->ifv_ifp, m); } static void vlan_lladdr_fn(void *arg, int pending __unused) { struct ifvlan *ifv; struct ifnet *ifp; ifv = (struct ifvlan *)arg; ifp = ifv->ifv_ifp; /* The ifv_ifp already has the lladdr copied in. */ if_setlladdr(ifp, IF_LLADDR(ifp), ifp->if_addrlen); } static int vlan_config(struct ifvlan *ifv, struct ifnet *p, uint16_t vid) { struct ifvlantrunk *trunk; struct ifnet *ifp; int error = 0; /* * We can handle non-ethernet hardware types as long as * they handle the tagging and headers themselves. */ if (p->if_type != IFT_ETHER && (p->if_capenable & IFCAP_VLAN_HWTAGGING) == 0) return (EPROTONOSUPPORT); if ((p->if_flags & VLAN_IFFLAGS) != VLAN_IFFLAGS) return (EPROTONOSUPPORT); /* * Don't let the caller set up a VLAN VID with * anything except VLID bits. * VID numbers 0x0 and 0xFFF are reserved. */ if (vid == 0 || vid == 0xFFF || (vid & ~EVL_VLID_MASK)) return (EINVAL); if (ifv->ifv_trunk) return (EBUSY); /* Acquire rmlock after the branch so we can M_WAITOK. */ VLAN_XLOCK(); if (p->if_vlantrunk == NULL) { trunk = malloc(sizeof(struct ifvlantrunk), M_VLAN, M_WAITOK | M_ZERO); vlan_inithash(trunk); TRUNK_LOCK_INIT(trunk); VLAN_WLOCK(); TRUNK_WLOCK(trunk); p->if_vlantrunk = trunk; trunk->parent = p; if_ref(trunk->parent); } else { VLAN_WLOCK(); trunk = p->if_vlantrunk; TRUNK_WLOCK(trunk); } ifv->ifv_vid = vid; /* must set this before vlan_inshash() */ ifv->ifv_pcp = 0; /* Default: best effort delivery. */ vlan_tag_recalculate(ifv); error = vlan_inshash(trunk, ifv); if (error) goto done; ifv->ifv_proto = ETHERTYPE_VLAN; ifv->ifv_encaplen = ETHER_VLAN_ENCAP_LEN; ifv->ifv_mintu = ETHERMIN; ifv->ifv_pflags = 0; ifv->ifv_capenable = -1; /* * If the parent supports the VLAN_MTU capability, * i.e. can Tx/Rx larger than ETHER_MAX_LEN frames, * use it. */ if (p->if_capenable & IFCAP_VLAN_MTU) { /* * No need to fudge the MTU since the parent can * handle extended frames. */ ifv->ifv_mtufudge = 0; } else { /* * Fudge the MTU by the encapsulation size. This * makes us incompatible with strictly compliant * 802.1Q implementations, but allows us to use * the feature with other NetBSD implementations, * which might still be useful. */ ifv->ifv_mtufudge = ifv->ifv_encaplen; } ifv->ifv_trunk = trunk; ifp = ifv->ifv_ifp; /* * Initialize fields from our parent. This duplicates some * work with ether_ifattach() but allows for non-ethernet * interfaces to also work. */ ifp->if_mtu = p->if_mtu - ifv->ifv_mtufudge; ifp->if_baudrate = p->if_baudrate; ifp->if_output = p->if_output; ifp->if_input = p->if_input; ifp->if_resolvemulti = p->if_resolvemulti; ifp->if_addrlen = p->if_addrlen; ifp->if_broadcastaddr = p->if_broadcastaddr; /* * Copy only a selected subset of flags from the parent. * Other flags are none of our business. */ #define VLAN_COPY_FLAGS (IFF_SIMPLEX) ifp->if_flags &= ~VLAN_COPY_FLAGS; ifp->if_flags |= p->if_flags & VLAN_COPY_FLAGS; #undef VLAN_COPY_FLAGS ifp->if_link_state = p->if_link_state; vlan_capabilities(ifv); /* * Set up our interface address to reflect the underlying * physical interface's. */ bcopy(IF_LLADDR(p), IF_LLADDR(ifp), p->if_addrlen); ((struct sockaddr_dl *)ifp->if_addr->ifa_addr)->sdl_alen = p->if_addrlen; /* * Configure multicast addresses that may already be * joined on the vlan device. */ (void)vlan_setmulti(ifp); TASK_INIT(&ifv->lladdr_task, 0, vlan_lladdr_fn, ifv); /* We are ready for operation now. */ ifp->if_drv_flags |= IFF_DRV_RUNNING; /* Update flags on the parent, if necessary. */ vlan_setflags(ifp, 1); done: /* * We need to drop the non-sleepable rmlock so that the underlying * devices can sleep in their vlan_config hooks. */ TRUNK_WUNLOCK(trunk); VLAN_WUNLOCK(); if (error == 0) EVENTHANDLER_INVOKE(vlan_config, p, ifv->ifv_vid); VLAN_XUNLOCK(); return (error); } static void vlan_unconfig(struct ifnet *ifp) { VLAN_XLOCK(); vlan_unconfig_locked(ifp, 0); VLAN_XUNLOCK(); } static void vlan_unconfig_locked(struct ifnet *ifp, int departing) { struct ifvlantrunk *trunk; struct vlan_mc_entry *mc; struct ifvlan *ifv; struct ifnet *parent; int error; VLAN_XLOCK_ASSERT(); ifv = ifp->if_softc; trunk = ifv->ifv_trunk; parent = NULL; if (trunk != NULL) { /* * Both vlan_transmit and vlan_input rely on the trunk fields * being NULL to determine whether to bail, so we need to get * an exclusive lock here to prevent them from using bad * ifvlans. */ VLAN_WLOCK(); parent = trunk->parent; /* * Since the interface is being unconfigured, we need to * empty the list of multicast groups that we may have joined * while we were alive from the parent's list. */ while ((mc = SLIST_FIRST(&ifv->vlan_mc_listhead)) != NULL) { /* * If the parent interface is being detached, * all its multicast addresses have already * been removed. Warn about errors if * if_delmulti() does fail, but don't abort as * all callers expect vlan destruction to * succeed. */ if (!departing) { error = if_delmulti(parent, (struct sockaddr *)&mc->mc_addr); if (error) if_printf(ifp, "Failed to delete multicast address from parent: %d\n", error); } SLIST_REMOVE_HEAD(&ifv->vlan_mc_listhead, mc_entries); free(mc, M_VLAN); } vlan_setflags(ifp, 0); /* clear special flags on parent */ /* * The trunk lock isn't actually required here, but * vlan_remhash expects it. */ TRUNK_WLOCK(trunk); vlan_remhash(trunk, ifv); TRUNK_WUNLOCK(trunk); ifv->ifv_trunk = NULL; /* * Check if we were the last. */ if (trunk->refcnt == 0) { parent->if_vlantrunk = NULL; trunk_destroy(trunk); } VLAN_WUNLOCK(); } /* Disconnect from parent. */ if (ifv->ifv_pflags) if_printf(ifp, "%s: ifv_pflags unclean\n", __func__); ifp->if_mtu = ETHERMTU; ifp->if_link_state = LINK_STATE_UNKNOWN; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; /* * Only dispatch an event if vlan was * attached, otherwise there is nothing * to cleanup anyway. */ if (parent != NULL) EVENTHANDLER_INVOKE(vlan_unconfig, parent, ifv->ifv_vid); } /* Handle a reference counted flag that should be set on the parent as well */ static int vlan_setflag(struct ifnet *ifp, int flag, int status, int (*func)(struct ifnet *, int)) { struct ifvlan *ifv; int error; VLAN_SXLOCK_ASSERT(); ifv = ifp->if_softc; status = status ? (ifp->if_flags & flag) : 0; /* Now "status" contains the flag value or 0 */ /* * See if recorded parent's status is different from what * we want it to be. If it is, flip it. We record parent's * status in ifv_pflags so that we won't clear parent's flag * we haven't set. In fact, we don't clear or set parent's * flags directly, but get or release references to them. * That's why we can be sure that recorded flags still are * in accord with actual parent's flags. */ if (status != (ifv->ifv_pflags & flag)) { error = (*func)(PARENT(ifv), status); if (error) return (error); ifv->ifv_pflags &= ~flag; ifv->ifv_pflags |= status; } return (0); } /* * Handle IFF_* flags that require certain changes on the parent: * if "status" is true, update parent's flags respective to our if_flags; * if "status" is false, forcedly clear the flags set on parent. */ static int vlan_setflags(struct ifnet *ifp, int status) { int error, i; for (i = 0; vlan_pflags[i].flag; i++) { error = vlan_setflag(ifp, vlan_pflags[i].flag, status, vlan_pflags[i].func); if (error) return (error); } return (0); } /* Inform all vlans that their parent has changed link state */ static void vlan_link_state(struct ifnet *ifp) { struct ifvlantrunk *trunk; struct ifvlan *ifv; VLAN_LOCK_READER; /* Called from a taskqueue_swi task, so we cannot sleep. */ VLAN_RLOCK(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { VLAN_RUNLOCK(); return; } TRUNK_WLOCK(trunk); VLAN_FOREACH(ifv, trunk) { ifv->ifv_ifp->if_baudrate = trunk->parent->if_baudrate; if_link_state_change(ifv->ifv_ifp, trunk->parent->if_link_state); } TRUNK_WUNLOCK(trunk); VLAN_RUNLOCK(); } static void vlan_capabilities(struct ifvlan *ifv) { struct ifnet *p; struct ifnet *ifp; struct ifnet_hw_tsomax hw_tsomax; int cap = 0, ena = 0, mena; u_long hwa = 0; VLAN_SXLOCK_ASSERT(); TRUNK_WLOCK_ASSERT(TRUNK(ifv)); p = PARENT(ifv); ifp = ifv->ifv_ifp; /* Mask parent interface enabled capabilities disabled by user. */ mena = p->if_capenable & ifv->ifv_capenable; /* * If the parent interface can do checksum offloading * on VLANs, then propagate its hardware-assisted * checksumming flags. Also assert that checksum * offloading requires hardware VLAN tagging. */ if (p->if_capabilities & IFCAP_VLAN_HWCSUM) cap |= p->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); if (p->if_capenable & IFCAP_VLAN_HWCSUM && p->if_capenable & IFCAP_VLAN_HWTAGGING) { ena |= mena & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6); if (ena & IFCAP_TXCSUM) hwa |= p->if_hwassist & (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP); if (ena & IFCAP_TXCSUM_IPV6) hwa |= p->if_hwassist & (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6); } /* * If the parent interface can do TSO on VLANs then * propagate the hardware-assisted flag. TSO on VLANs * does not necessarily require hardware VLAN tagging. */ memset(&hw_tsomax, 0, sizeof(hw_tsomax)); if_hw_tsomax_common(p, &hw_tsomax); if_hw_tsomax_update(ifp, &hw_tsomax); if (p->if_capabilities & IFCAP_VLAN_HWTSO) cap |= p->if_capabilities & IFCAP_TSO; if (p->if_capenable & IFCAP_VLAN_HWTSO) { ena |= mena & IFCAP_TSO; if (ena & IFCAP_TSO) hwa |= p->if_hwassist & CSUM_TSO; } /* * If the parent interface can do LRO and checksum offloading on * VLANs, then guess it may do LRO on VLANs. False positive here * cost nothing, while false negative may lead to some confusions. */ if (p->if_capabilities & IFCAP_VLAN_HWCSUM) cap |= p->if_capabilities & IFCAP_LRO; if (p->if_capenable & IFCAP_VLAN_HWCSUM) ena |= p->if_capenable & IFCAP_LRO; /* * If the parent interface can offload TCP connections over VLANs then * propagate its TOE capability to the VLAN interface. * * All TOE drivers in the tree today can deal with VLANs. If this * changes then IFCAP_VLAN_TOE should be promoted to a full capability * with its own bit. */ #define IFCAP_VLAN_TOE IFCAP_TOE if (p->if_capabilities & IFCAP_VLAN_TOE) cap |= p->if_capabilities & IFCAP_TOE; if (p->if_capenable & IFCAP_VLAN_TOE) { TOEDEV(ifp) = TOEDEV(p); ena |= mena & IFCAP_TOE; } /* * If the parent interface supports dynamic link state, so does the * VLAN interface. */ cap |= (p->if_capabilities & IFCAP_LINKSTATE); ena |= (mena & IFCAP_LINKSTATE); ifp->if_capabilities = cap; ifp->if_capenable = ena; ifp->if_hwassist = hwa; } static void vlan_trunk_capabilities(struct ifnet *ifp) { struct ifvlantrunk *trunk; struct ifvlan *ifv; VLAN_SLOCK(); trunk = ifp->if_vlantrunk; if (trunk == NULL) { VLAN_SUNLOCK(); return; } TRUNK_WLOCK(trunk); VLAN_FOREACH(ifv, trunk) { vlan_capabilities(ifv); } TRUNK_WUNLOCK(trunk); VLAN_SUNLOCK(); } static int vlan_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ifnet *p; struct ifreq *ifr; struct ifaddr *ifa; struct ifvlan *ifv; struct ifvlantrunk *trunk; struct vlanreq vlr; int error = 0; VLAN_LOCK_READER; ifr = (struct ifreq *)data; ifa = (struct ifaddr *) data; ifv = ifp->if_softc; switch (cmd) { case SIOCSIFADDR: ifp->if_flags |= IFF_UP; #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) arp_ifinit(ifp, ifa); #endif break; case SIOCGIFADDR: bcopy(IF_LLADDR(ifp), &ifr->ifr_addr.sa_data[0], ifp->if_addrlen); break; case SIOCGIFMEDIA: VLAN_SLOCK(); if (TRUNK(ifv) != NULL) { p = PARENT(ifv); if_ref(p); error = (*p->if_ioctl)(p, SIOCGIFMEDIA, data); if_rele(p); /* Limit the result to the parent's current config. */ if (error == 0) { struct ifmediareq *ifmr; ifmr = (struct ifmediareq *)data; if (ifmr->ifm_count >= 1 && ifmr->ifm_ulist) { ifmr->ifm_count = 1; error = copyout(&ifmr->ifm_current, ifmr->ifm_ulist, sizeof(int)); } } } else { error = EINVAL; } VLAN_SUNLOCK(); break; case SIOCSIFMEDIA: error = EINVAL; break; case SIOCSIFMTU: /* * Set the interface MTU. */ VLAN_SLOCK(); trunk = TRUNK(ifv); if (trunk != NULL) { TRUNK_WLOCK(trunk); if (ifr->ifr_mtu > (PARENT(ifv)->if_mtu - ifv->ifv_mtufudge) || ifr->ifr_mtu < (ifv->ifv_mintu - ifv->ifv_mtufudge)) error = EINVAL; else ifp->if_mtu = ifr->ifr_mtu; TRUNK_WUNLOCK(trunk); } else error = EINVAL; VLAN_SUNLOCK(); break; case SIOCSETVLAN: #ifdef VIMAGE /* * XXXRW/XXXBZ: The goal in these checks is to allow a VLAN * interface to be delegated to a jail without allowing the * jail to change what underlying interface/VID it is * associated with. We are not entirely convinced that this * is the right way to accomplish that policy goal. */ if (ifp->if_vnet != ifp->if_home_vnet) { error = EPERM; break; } #endif - error = copyin(ifr->ifr_data, &vlr, sizeof(vlr)); + error = copyin(ifr_data_get_ptr(ifr), &vlr, sizeof(vlr)); if (error) break; if (vlr.vlr_parent[0] == '\0') { vlan_unconfig(ifp); break; } p = ifunit_ref(vlr.vlr_parent); if (p == NULL) { error = ENOENT; break; } error = vlan_config(ifv, p, vlr.vlr_tag); if_rele(p); break; case SIOCGETVLAN: #ifdef VIMAGE if (ifp->if_vnet != ifp->if_home_vnet) { error = EPERM; break; } #endif bzero(&vlr, sizeof(vlr)); VLAN_SLOCK(); if (TRUNK(ifv) != NULL) { strlcpy(vlr.vlr_parent, PARENT(ifv)->if_xname, sizeof(vlr.vlr_parent)); vlr.vlr_tag = ifv->ifv_vid; } VLAN_SUNLOCK(); - error = copyout(&vlr, ifr->ifr_data, sizeof(vlr)); + error = copyout(&vlr, ifr_data_get_ptr(ifr), sizeof(vlr)); break; case SIOCSIFFLAGS: /* * We should propagate selected flags to the parent, * e.g., promiscuous mode. */ VLAN_XLOCK(); if (TRUNK(ifv) != NULL) error = vlan_setflags(ifp, 1); VLAN_XUNLOCK(); break; case SIOCADDMULTI: case SIOCDELMULTI: /* * If we don't have a parent, just remember the membership for * when we do. * * XXX We need the rmlock here to avoid sleeping while * holding in6_multi_mtx. */ VLAN_RLOCK(); trunk = TRUNK(ifv); if (trunk != NULL) { TRUNK_WLOCK(trunk); error = vlan_setmulti(ifp); TRUNK_WUNLOCK(trunk); } VLAN_RUNLOCK(); break; case SIOCGVLANPCP: #ifdef VIMAGE if (ifp->if_vnet != ifp->if_home_vnet) { error = EPERM; break; } #endif ifr->ifr_vlan_pcp = ifv->ifv_pcp; break; case SIOCSVLANPCP: #ifdef VIMAGE if (ifp->if_vnet != ifp->if_home_vnet) { error = EPERM; break; } #endif error = priv_check(curthread, PRIV_NET_SETVLANPCP); if (error) break; if (ifr->ifr_vlan_pcp > 7) { error = EINVAL; break; } ifv->ifv_pcp = ifr->ifr_vlan_pcp; vlan_tag_recalculate(ifv); break; case SIOCSIFCAP: VLAN_SLOCK(); ifv->ifv_capenable = ifr->ifr_reqcap; trunk = TRUNK(ifv); if (trunk != NULL) { TRUNK_WLOCK(trunk); vlan_capabilities(ifv); TRUNK_WUNLOCK(trunk); } VLAN_SUNLOCK(); break; default: error = EINVAL; break; } return (error); } Index: stable/11/sys/net/iflib.c =================================================================== --- stable/11/sys/net/iflib.c (revision 332287) +++ stable/11/sys/net/iflib.c (revision 332288) @@ -1,5011 +1,5012 @@ /*- * Copyright (c) 2014-2016, Matthew Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Neither the name of Matthew Macy nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_acpi.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "ifdi_if.h" #if defined(__i386__) || defined(__amd64__) #include #include #include #include #include #include #endif /* * enable accounting of every mbuf as it comes in to and goes out of iflib's software descriptor references */ #define MEMORY_LOGGING 0 /* * Enable mbuf vectors for compressing long mbuf chains */ /* * NB: * - Prefetching in tx cleaning should perhaps be a tunable. The distance ahead * we prefetch needs to be determined by the time spent in m_free vis a vis * the cost of a prefetch. This will of course vary based on the workload: * - NFLX's m_free path is dominated by vm-based M_EXT manipulation which * is quite expensive, thus suggesting very little prefetch. * - small packet forwarding which is just returning a single mbuf to * UMA will typically be very fast vis a vis the cost of a memory * access. */ /* * File organization: * - private structures * - iflib private utility functions * - ifnet functions * - vlan registry and other exported functions * - iflib public core functions * * */ static MALLOC_DEFINE(M_IFLIB, "iflib", "ifnet library"); struct iflib_txq; typedef struct iflib_txq *iflib_txq_t; struct iflib_rxq; typedef struct iflib_rxq *iflib_rxq_t; struct iflib_fl; typedef struct iflib_fl *iflib_fl_t; typedef struct iflib_filter_info { driver_filter_t *ifi_filter; void *ifi_filter_arg; struct grouptask *ifi_task; } *iflib_filter_info_t; struct iflib_ctx { KOBJ_FIELDS; /* * Pointer to hardware driver's softc */ void *ifc_softc; device_t ifc_dev; if_t ifc_ifp; cpuset_t ifc_cpus; if_shared_ctx_t ifc_sctx; struct if_softc_ctx ifc_softc_ctx; struct mtx ifc_mtx; uint16_t ifc_nhwtxqs; uint16_t ifc_nhwrxqs; iflib_txq_t ifc_txqs; iflib_rxq_t ifc_rxqs; uint32_t ifc_if_flags; uint32_t ifc_flags; uint32_t ifc_max_fl_buf_size; int ifc_in_detach; int ifc_link_state; int ifc_link_irq; int ifc_pause_frames; int ifc_watchdog_events; struct cdev *ifc_led_dev; struct resource *ifc_msix_mem; struct if_irq ifc_legacy_irq; struct grouptask ifc_admin_task; struct grouptask ifc_vflr_task; struct iflib_filter_info ifc_filter_info; struct ifmedia ifc_media; struct sysctl_oid *ifc_sysctl_node; uint16_t ifc_sysctl_ntxqs; uint16_t ifc_sysctl_nrxqs; uint16_t ifc_sysctl_qs_eq_override; uint16_t ifc_sysctl_ntxds[8]; uint16_t ifc_sysctl_nrxds[8]; struct if_txrx ifc_txrx; #define isc_txd_encap ifc_txrx.ift_txd_encap #define isc_txd_flush ifc_txrx.ift_txd_flush #define isc_txd_credits_update ifc_txrx.ift_txd_credits_update #define isc_rxd_available ifc_txrx.ift_rxd_available #define isc_rxd_pkt_get ifc_txrx.ift_rxd_pkt_get #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_rxd_flush ifc_txrx.ift_rxd_flush #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_rxd_refill ifc_txrx.ift_rxd_refill #define isc_legacy_intr ifc_txrx.ift_legacy_intr eventhandler_tag ifc_vlan_attach_event; eventhandler_tag ifc_vlan_detach_event; uint8_t ifc_mac[ETHER_ADDR_LEN]; char ifc_mtx_name[16]; }; void * iflib_get_softc(if_ctx_t ctx) { return (ctx->ifc_softc); } device_t iflib_get_dev(if_ctx_t ctx) { return (ctx->ifc_dev); } if_t iflib_get_ifp(if_ctx_t ctx) { return (ctx->ifc_ifp); } struct ifmedia * iflib_get_media(if_ctx_t ctx) { return (&ctx->ifc_media); } void iflib_set_mac(if_ctx_t ctx, uint8_t mac[ETHER_ADDR_LEN]) { bcopy(mac, ctx->ifc_mac, ETHER_ADDR_LEN); } if_softc_ctx_t iflib_get_softc_ctx(if_ctx_t ctx) { return (&ctx->ifc_softc_ctx); } if_shared_ctx_t iflib_get_sctx(if_ctx_t ctx) { return (ctx->ifc_sctx); } #define CACHE_PTR_INCREMENT (CACHE_LINE_SIZE/sizeof(void*)) #define LINK_ACTIVE(ctx) ((ctx)->ifc_link_state == LINK_STATE_UP) #define CTX_IS_VF(ctx) ((ctx)->ifc_sctx->isc_flags & IFLIB_IS_VF) #define RX_SW_DESC_MAP_CREATED (1 << 0) #define TX_SW_DESC_MAP_CREATED (1 << 1) #define RX_SW_DESC_INUSE (1 << 3) #define TX_SW_DESC_MAPPED (1 << 4) typedef struct iflib_sw_rx_desc { bus_dmamap_t ifsd_map; /* bus_dma map for packet */ struct mbuf *ifsd_m; /* rx: uninitialized mbuf */ caddr_t ifsd_cl; /* direct cluster pointer for rx */ uint16_t ifsd_flags; } *iflib_rxsd_t; typedef struct iflib_sw_tx_desc_val { bus_dmamap_t ifsd_map; /* bus_dma map for packet */ struct mbuf *ifsd_m; /* pkthdr mbuf */ uint8_t ifsd_flags; } *iflib_txsd_val_t; typedef struct iflib_sw_tx_desc_array { bus_dmamap_t *ifsd_map; /* bus_dma maps for packet */ struct mbuf **ifsd_m; /* pkthdr mbufs */ uint8_t *ifsd_flags; } iflib_txsd_array_t; /* magic number that should be high enough for any hardware */ #define IFLIB_MAX_TX_SEGS 128 #define IFLIB_MAX_RX_SEGS 32 #define IFLIB_RX_COPY_THRESH 128 #define IFLIB_MAX_RX_REFRESH 32 #define IFLIB_QUEUE_IDLE 0 #define IFLIB_QUEUE_HUNG 1 #define IFLIB_QUEUE_WORKING 2 /* this should really scale with ring size - 32 is a fairly arbitrary value for this */ #define TX_BATCH_SIZE 16 #define IFLIB_RESTART_BUDGET 8 #define IFC_LEGACY 0x01 #define IFC_QFLUSH 0x02 #define IFC_MULTISEG 0x04 #define IFC_DMAR 0x08 #define IFC_SC_ALLOCATED 0x10 #define CSUM_OFFLOAD (CSUM_IP_TSO|CSUM_IP6_TSO|CSUM_IP| \ CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP| \ CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP) struct iflib_txq { uint16_t ift_in_use; uint16_t ift_cidx; uint16_t ift_cidx_processed; uint16_t ift_pidx; uint8_t ift_gen; uint8_t ift_db_pending; uint8_t ift_db_pending_queued; uint8_t ift_npending; uint8_t ift_br_offset; /* implicit pad */ uint64_t ift_processed; uint64_t ift_cleaned; #if MEMORY_LOGGING uint64_t ift_enqueued; uint64_t ift_dequeued; #endif uint64_t ift_no_tx_dma_setup; uint64_t ift_no_desc_avail; uint64_t ift_mbuf_defrag_failed; uint64_t ift_mbuf_defrag; uint64_t ift_map_failed; uint64_t ift_txd_encap_efbig; uint64_t ift_pullups; struct mtx ift_mtx; struct mtx ift_db_mtx; /* constant values */ if_ctx_t ift_ctx; struct ifmp_ring **ift_br; struct grouptask ift_task; uint16_t ift_size; uint16_t ift_id; struct callout ift_timer; struct callout ift_db_check; iflib_txsd_array_t ift_sds; uint8_t ift_nbr; uint8_t ift_qstatus; uint8_t ift_active; uint8_t ift_closed; int ift_watchdog_time; struct iflib_filter_info ift_filter_info; bus_dma_tag_t ift_desc_tag; bus_dma_tag_t ift_tso_desc_tag; iflib_dma_info_t ift_ifdi; #define MTX_NAME_LEN 16 char ift_mtx_name[MTX_NAME_LEN]; char ift_db_mtx_name[MTX_NAME_LEN]; bus_dma_segment_t ift_segs[IFLIB_MAX_TX_SEGS] __aligned(CACHE_LINE_SIZE); } __aligned(CACHE_LINE_SIZE); struct iflib_fl { uint16_t ifl_cidx; uint16_t ifl_pidx; uint16_t ifl_credits; uint8_t ifl_gen; #if MEMORY_LOGGING uint64_t ifl_m_enqueued; uint64_t ifl_m_dequeued; uint64_t ifl_cl_enqueued; uint64_t ifl_cl_dequeued; #endif /* implicit pad */ /* constant */ uint16_t ifl_size; uint16_t ifl_buf_size; uint16_t ifl_cltype; uma_zone_t ifl_zone; iflib_rxsd_t ifl_sds; iflib_rxq_t ifl_rxq; uint8_t ifl_id; bus_dma_tag_t ifl_desc_tag; iflib_dma_info_t ifl_ifdi; uint64_t ifl_bus_addrs[IFLIB_MAX_RX_REFRESH] __aligned(CACHE_LINE_SIZE); caddr_t ifl_vm_addrs[IFLIB_MAX_RX_REFRESH]; } __aligned(CACHE_LINE_SIZE); static inline int get_inuse(int size, int cidx, int pidx, int gen) { int used; if (pidx > cidx) used = pidx - cidx; else if (pidx < cidx) used = size - cidx + pidx; else if (gen == 0 && pidx == cidx) used = 0; else if (gen == 1 && pidx == cidx) used = size; else panic("bad state"); return (used); } #define TXQ_AVAIL(txq) (txq->ift_size - get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen)) #define IDXDIFF(head, tail, wrap) \ ((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head)) struct iflib_rxq { /* If there is a separate completion queue - * these are the cq cidx and pidx. Otherwise * these are unused. */ uint16_t ifr_size; uint16_t ifr_cq_cidx; uint16_t ifr_cq_pidx; uint8_t ifr_cq_gen; uint8_t ifr_fl_offset; if_ctx_t ifr_ctx; iflib_fl_t ifr_fl; uint64_t ifr_rx_irq; uint16_t ifr_id; uint8_t ifr_lro_enabled; uint8_t ifr_nfl; struct lro_ctrl ifr_lc; struct grouptask ifr_task; struct iflib_filter_info ifr_filter_info; iflib_dma_info_t ifr_ifdi; /* dynamically allocate if any drivers need a value substantially larger than this */ struct if_rxd_frag ifr_frags[IFLIB_MAX_RX_SEGS] __aligned(CACHE_LINE_SIZE); } __aligned(CACHE_LINE_SIZE); /* * Only allow a single packet to take up most 1/nth of the tx ring */ #define MAX_SINGLE_PACKET_FRACTION 12 #define IF_BAD_DMA (bus_addr_t)-1 static int enable_msix = 1; #define mtx_held(m) (((m)->mtx_lock & ~MTX_FLAGMASK) != (uintptr_t)0) #define CTX_ACTIVE(ctx) ((if_getdrvflags((ctx)->ifc_ifp) & IFF_DRV_RUNNING)) #define CTX_LOCK_INIT(_sc, _name) mtx_init(&(_sc)->ifc_mtx, _name, "iflib ctx lock", MTX_DEF) #define CTX_LOCK(ctx) mtx_lock(&(ctx)->ifc_mtx) #define CTX_UNLOCK(ctx) mtx_unlock(&(ctx)->ifc_mtx) #define CTX_LOCK_DESTROY(ctx) mtx_destroy(&(ctx)->ifc_mtx) #define TXDB_LOCK_INIT(txq) mtx_init(&(txq)->ift_db_mtx, (txq)->ift_db_mtx_name, NULL, MTX_DEF) #define TXDB_TRYLOCK(txq) mtx_trylock(&(txq)->ift_db_mtx) #define TXDB_LOCK(txq) mtx_lock(&(txq)->ift_db_mtx) #define TXDB_UNLOCK(txq) mtx_unlock(&(txq)->ift_db_mtx) #define TXDB_LOCK_DESTROY(txq) mtx_destroy(&(txq)->ift_db_mtx) #define CALLOUT_LOCK(txq) mtx_lock(&txq->ift_mtx) #define CALLOUT_UNLOCK(txq) mtx_unlock(&txq->ift_mtx) /* Our boot-time initialization hook */ static int iflib_module_event_handler(module_t, int, void *); static moduledata_t iflib_moduledata = { "iflib", iflib_module_event_handler, NULL }; DECLARE_MODULE(iflib, iflib_moduledata, SI_SUB_INIT_IF, SI_ORDER_ANY); MODULE_VERSION(iflib, 1); MODULE_DEPEND(iflib, pci, 1, 1, 1); MODULE_DEPEND(iflib, ether, 1, 1, 1); TASKQGROUP_DEFINE(if_config_tqg, 1, 1); #ifndef IFLIB_DEBUG_COUNTERS #ifdef INVARIANTS #define IFLIB_DEBUG_COUNTERS 1 #else #define IFLIB_DEBUG_COUNTERS 0 #endif /* !INVARIANTS */ #endif static SYSCTL_NODE(_net, OID_AUTO, iflib, CTLFLAG_RD, 0, "iflib driver parameters"); /* * XXX need to ensure that this can't accidentally cause the head to be moved backwards */ static int iflib_min_tx_latency = 0; SYSCTL_INT(_net_iflib, OID_AUTO, min_tx_latency, CTLFLAG_RW, &iflib_min_tx_latency, 0, "minimize transmit latency at the possibel expense of throughput"); #if IFLIB_DEBUG_COUNTERS static int iflib_tx_seen; static int iflib_tx_sent; static int iflib_tx_encap; static int iflib_rx_allocs; static int iflib_fl_refills; static int iflib_fl_refills_large; static int iflib_tx_frees; SYSCTL_INT(_net_iflib, OID_AUTO, tx_seen, CTLFLAG_RD, &iflib_tx_seen, 0, "# tx mbufs seen"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_sent, CTLFLAG_RD, &iflib_tx_sent, 0, "# tx mbufs sent"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_encap, CTLFLAG_RD, &iflib_tx_encap, 0, "# tx mbufs encapped"); SYSCTL_INT(_net_iflib, OID_AUTO, tx_frees, CTLFLAG_RD, &iflib_tx_frees, 0, "# tx frees"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_allocs, CTLFLAG_RD, &iflib_rx_allocs, 0, "# rx allocations"); SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills, CTLFLAG_RD, &iflib_fl_refills, 0, "# refills"); SYSCTL_INT(_net_iflib, OID_AUTO, fl_refills_large, CTLFLAG_RD, &iflib_fl_refills_large, 0, "# large refills"); static int iflib_txq_drain_flushing; static int iflib_txq_drain_oactive; static int iflib_txq_drain_notready; static int iflib_txq_drain_encapfail; SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_flushing, CTLFLAG_RD, &iflib_txq_drain_flushing, 0, "# drain flushes"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_oactive, CTLFLAG_RD, &iflib_txq_drain_oactive, 0, "# drain oactives"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_notready, CTLFLAG_RD, &iflib_txq_drain_notready, 0, "# drain notready"); SYSCTL_INT(_net_iflib, OID_AUTO, txq_drain_encapfail, CTLFLAG_RD, &iflib_txq_drain_encapfail, 0, "# drain encap fails"); static int iflib_encap_load_mbuf_fail; static int iflib_encap_txq_avail_fail; static int iflib_encap_txd_encap_fail; SYSCTL_INT(_net_iflib, OID_AUTO, encap_load_mbuf_fail, CTLFLAG_RD, &iflib_encap_load_mbuf_fail, 0, "# busdma load failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_txq_avail_fail, CTLFLAG_RD, &iflib_encap_txq_avail_fail, 0, "# txq avail failures"); SYSCTL_INT(_net_iflib, OID_AUTO, encap_txd_encap_fail, CTLFLAG_RD, &iflib_encap_txd_encap_fail, 0, "# driver encap failures"); static int iflib_task_fn_rxs; static int iflib_rx_intr_enables; static int iflib_fast_intrs; static int iflib_intr_link; static int iflib_intr_msix; static int iflib_rx_unavail; static int iflib_rx_ctx_inactive; static int iflib_rx_zero_len; static int iflib_rx_if_input; static int iflib_rx_mbuf_null; static int iflib_rxd_flush; static int iflib_verbose_debug; SYSCTL_INT(_net_iflib, OID_AUTO, intr_link, CTLFLAG_RD, &iflib_intr_link, 0, "# intr link calls"); SYSCTL_INT(_net_iflib, OID_AUTO, intr_msix, CTLFLAG_RD, &iflib_intr_msix, 0, "# intr msix calls"); SYSCTL_INT(_net_iflib, OID_AUTO, task_fn_rx, CTLFLAG_RD, &iflib_task_fn_rxs, 0, "# task_fn_rx calls"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_intr_enables, CTLFLAG_RD, &iflib_rx_intr_enables, 0, "# rx intr enables"); SYSCTL_INT(_net_iflib, OID_AUTO, fast_intrs, CTLFLAG_RD, &iflib_fast_intrs, 0, "# fast_intr calls"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_unavail, CTLFLAG_RD, &iflib_rx_unavail, 0, "# times rxeof called with no available data"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_ctx_inactive, CTLFLAG_RD, &iflib_rx_ctx_inactive, 0, "# times rxeof called with inactive context"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_zero_len, CTLFLAG_RD, &iflib_rx_zero_len, 0, "# times rxeof saw zero len mbuf"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_if_input, CTLFLAG_RD, &iflib_rx_if_input, 0, "# times rxeof called if_input"); SYSCTL_INT(_net_iflib, OID_AUTO, rx_mbuf_null, CTLFLAG_RD, &iflib_rx_mbuf_null, 0, "# times rxeof got null mbuf"); SYSCTL_INT(_net_iflib, OID_AUTO, rxd_flush, CTLFLAG_RD, &iflib_rxd_flush, 0, "# times rxd_flush called"); SYSCTL_INT(_net_iflib, OID_AUTO, verbose_debug, CTLFLAG_RW, &iflib_verbose_debug, 0, "enable verbose debugging"); #define DBG_COUNTER_INC(name) atomic_add_int(&(iflib_ ## name), 1) #else #define DBG_COUNTER_INC(name) #endif #define IFLIB_DEBUG 0 static void iflib_tx_structures_free(if_ctx_t ctx); static void iflib_rx_structures_free(if_ctx_t ctx); static int iflib_queues_alloc(if_ctx_t ctx); static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq); static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx, int budget); static int iflib_qset_structures_setup(if_ctx_t ctx); static int iflib_msix_init(if_ctx_t ctx); static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filterarg, int *rid, char *str); static void iflib_txq_check_drain(iflib_txq_t txq, int budget); static uint32_t iflib_txq_can_drain(struct ifmp_ring *); static int iflib_register(if_ctx_t); static void iflib_init_locked(if_ctx_t ctx); static void iflib_add_device_sysctl_pre(if_ctx_t ctx); static void iflib_add_device_sysctl_post(if_ctx_t ctx); #ifdef DEV_NETMAP #include #include #include MODULE_DEPEND(iflib, netmap, 1, 1, 1); /* * device-specific sysctl variables: * * iflib_crcstrip: 0: keep CRC in rx frames (default), 1: strip it. * During regular operations the CRC is stripped, but on some * hardware reception of frames not multiple of 64 is slower, * so using crcstrip=0 helps in benchmarks. * * iflib_rx_miss, iflib_rx_miss_bufs: * count packets that might be missed due to lost interrupts. */ SYSCTL_DECL(_dev_netmap); /* * The xl driver by default strips CRCs and we do not override it. */ int iflib_crcstrip = 1; SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_crcstrip, CTLFLAG_RW, &iflib_crcstrip, 1, "strip CRC on rx frames"); int iflib_rx_miss, iflib_rx_miss_bufs; SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss, CTLFLAG_RW, &iflib_rx_miss, 0, "potentially missed rx intr"); SYSCTL_INT(_dev_netmap, OID_AUTO, iflib_rx_miss_bufs, CTLFLAG_RW, &iflib_rx_miss_bufs, 0, "potentially missed rx intr bufs"); /* * Register/unregister. We are already under netmap lock. * Only called on the first register or the last unregister. */ static int iflib_netmap_register(struct netmap_adapter *na, int onoff) { struct ifnet *ifp = na->ifp; if_ctx_t ctx = ifp->if_softc; CTX_LOCK(ctx); IFDI_INTR_DISABLE(ctx); /* Tell the stack that the interface is no longer active */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); if (!CTX_IS_VF(ctx)) IFDI_CRCSTRIP_SET(ctx, onoff); /* enable or disable flags and callbacks in na and ifp */ if (onoff) { nm_set_native_flags(na); } else { nm_clear_native_flags(na); } IFDI_INIT(ctx); IFDI_CRCSTRIP_SET(ctx, onoff); // XXX why twice ? CTX_UNLOCK(ctx); return (ifp->if_drv_flags & IFF_DRV_RUNNING ? 0 : 1); } /* * Reconcile kernel and user view of the transmit ring. * * All information is in the kring. * Userspace wants to send packets up to the one before kring->rhead, * kernel knows kring->nr_hwcur is the first unsent packet. * * Here we push packets out (as many as possible), and possibly * reclaim buffers from previously completed transmission. * * The caller (netmap) guarantees that there is only one instance * running at any time. Any interference with other driver * methods should be handled by the individual drivers. */ static int iflib_netmap_txsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct ifnet *ifp = na->ifp; struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap ring */ u_int nic_i; /* index into the NIC ring */ u_int n; u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; struct if_pkt_info pi; /* * interrupts on every tx packet are expensive so request * them every half ring, or where NS_REPORT is set */ u_int report_frequency = kring->nkr_num_slots >> 1; /* device-specific */ if_ctx_t ctx = ifp->if_softc; iflib_txq_t txq = &ctx->ifc_txqs[kring->ring_id]; pi.ipi_segs = txq->ift_segs; pi.ipi_qsidx = kring->ring_id; pi.ipi_ndescs = 0; bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* * First part: process new packets to send. * nm_i is the current index in the netmap ring, * nic_i is the corresponding index in the NIC ring. * * If we have packets to send (nm_i != head) * iterate over the netmap ring, fetch length and update * the corresponding slot in the NIC ring. Some drivers also * need to update the buffer's physical address in the NIC slot * even NS_BUF_CHANGED is not set (PNMB computes the addresses). * * The netmap_reload_map() calls is especially expensive, * even when (as in this case) the tag is 0, so do only * when the buffer has actually changed. * * If possible do not set the report/intr bit on all slots, * but only a few times per ring or when NS_REPORT is set. * * Finally, on 10G and faster drivers, it might be useful * to prefetch the next slot and txr entry. */ nm_i = kring->nr_hwcur; if (nm_i != head) { /* we have new packets to send */ nic_i = netmap_idx_k2n(kring, nm_i); __builtin_prefetch(&ring->slot[nm_i]); __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i]); __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i]); for (n = 0; nm_i != head; n++) { struct netmap_slot *slot = &ring->slot[nm_i]; u_int len = slot->len; uint64_t paddr; void *addr = PNMB(na, slot, &paddr); int flags = (slot->flags & NS_REPORT || nic_i == 0 || nic_i == report_frequency) ? IPI_TX_INTR : 0; /* device-specific */ pi.ipi_pidx = nic_i; pi.ipi_flags = flags; /* Fill the slot in the NIC ring. */ ctx->isc_txd_encap(ctx->ifc_softc, &pi); /* prefetch for next round */ __builtin_prefetch(&ring->slot[nm_i + 1]); __builtin_prefetch(&txq->ift_sds.ifsd_m[nic_i + 1]); __builtin_prefetch(&txq->ift_sds.ifsd_map[nic_i + 1]); NM_CHECK_ADDR_LEN(na, addr, len); if (slot->flags & NS_BUF_CHANGED) { /* buffer has changed, reload map */ netmap_reload_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[nic_i], addr); } slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); /* make sure changes to the buffer are synced */ bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_sds.ifsd_map[nic_i], BUS_DMASYNC_PREWRITE); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } kring->nr_hwcur = head; /* synchronize the NIC ring */ bus_dmamap_sync(txq->ift_desc_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* (re)start the tx unit up to slot nic_i (excluded) */ ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, nic_i); } /* * Second part: reclaim buffers for completed transmissions. */ if (iflib_tx_credits_update(ctx, txq)) { /* some tx completed, increment avail */ nic_i = txq->ift_cidx_processed; kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim); } return (0); } /* * Reconcile kernel and user view of the receive ring. * Same as for the txsync, this routine must be efficient. * The caller guarantees a single invocations, but races against * the rest of the driver should be handled here. * * On call, kring->rhead is the first packet that userspace wants * to keep, and kring->rcur is the wakeup point. * The kernel has previously reported packets up to kring->rtail. * * If (flags & NAF_FORCE_READ) also check for incoming packets irrespective * of whether or not we received an interrupt. */ static int iflib_netmap_rxsync(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct ifnet *ifp = na->ifp; struct netmap_ring *ring = kring->ring; u_int nm_i; /* index into the netmap ring */ u_int nic_i; /* index into the NIC ring */ u_int i, n; u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR; struct if_rxd_info ri; /* device-specific */ if_ctx_t ctx = ifp->if_softc; iflib_rxq_t rxq = &ctx->ifc_rxqs[kring->ring_id]; iflib_fl_t fl = rxq->ifr_fl; if (head > lim) return netmap_ring_reinit(kring); bzero(&ri, sizeof(ri)); ri.iri_qsidx = kring->ring_id; ri.iri_ifp = ctx->ifc_ifp; /* XXX check sync modes */ for (i = 0, fl = rxq->ifr_fl; i < rxq->ifr_nfl; i++, fl++) bus_dmamap_sync(rxq->ifr_fl[i].ifl_desc_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* * First part: import newly received packets. * * nm_i is the index of the next free slot in the netmap ring, * nic_i is the index of the next received packet in the NIC ring, * and they may differ in case if_init() has been called while * in netmap mode. For the receive ring we have * * nic_i = rxr->next_check; * nm_i = kring->nr_hwtail (previous) * and * nm_i == (nic_i + kring->nkr_hwofs) % ring_size * * rxr->next_check is set to 0 on a ring reinit */ if (netmap_no_pendintr || force_update) { int crclen = iflib_crcstrip ? 0 : 4; int error, avail; uint16_t slot_flags = kring->nkr_slot_flags; for (fl = rxq->ifr_fl, i = 0; i < rxq->ifr_nfl; i++, fl++) { nic_i = fl->ifl_cidx; nm_i = netmap_idx_n2k(kring, nic_i); avail = ctx->isc_rxd_available(ctx->ifc_softc, kring->ring_id, nic_i, INT_MAX); for (n = 0; avail > 0; n++, avail--) { error = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); if (error) ring->slot[nm_i].len = 0; else ring->slot[nm_i].len = ri.iri_len - crclen; ring->slot[nm_i].flags = slot_flags; bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_sds[nic_i].ifsd_map, BUS_DMASYNC_POSTREAD); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } if (n) { /* update the state variables */ if (netmap_no_pendintr && !force_update) { /* diagnostics */ iflib_rx_miss ++; iflib_rx_miss_bufs += n; } fl->ifl_cidx = nic_i; kring->nr_hwtail = nm_i; } kring->nr_kflags &= ~NKR_PENDINTR; } } /* * Second part: skip past packets that userspace has released. * (kring->nr_hwcur to head excluded), * and make the buffers available for reception. * As usual nm_i is the index in the netmap ring, * nic_i is the index in the NIC ring, and * nm_i == (nic_i + kring->nkr_hwofs) % ring_size */ /* XXX not sure how this will work with multiple free lists */ nm_i = kring->nr_hwcur; if (nm_i != head) { nic_i = netmap_idx_k2n(kring, nm_i); for (n = 0; nm_i != head; n++) { struct netmap_slot *slot = &ring->slot[nm_i]; uint64_t paddr; caddr_t vaddr; void *addr = PNMB(na, slot, &paddr); if (addr == NETMAP_BUF_BASE(na)) /* bad buf */ goto ring_reset; vaddr = addr; if (slot->flags & NS_BUF_CHANGED) { /* buffer has changed, reload map */ netmap_reload_map(na, fl->ifl_ifdi->idi_tag, fl->ifl_sds[nic_i].ifsd_map, addr); slot->flags &= ~NS_BUF_CHANGED; } /* * XXX we should be batching this operation - TODO */ ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i, &paddr, &vaddr, 1, fl->ifl_buf_size); bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_sds[nic_i].ifsd_map, BUS_DMASYNC_PREREAD); nm_i = nm_next(nm_i, lim); nic_i = nm_next(nic_i, lim); } kring->nr_hwcur = head; bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); /* * IMPORTANT: we must leave one free slot in the ring, * so move nic_i back by one unit */ nic_i = nm_prev(nic_i, lim); ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, fl->ifl_id, nic_i); } return 0; ring_reset: return netmap_ring_reinit(kring); } static int iflib_netmap_attach(if_ctx_t ctx) { struct netmap_adapter na; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; bzero(&na, sizeof(na)); na.ifp = ctx->ifc_ifp; na.na_flags = NAF_BDG_MAYSLEEP; MPASS(ctx->ifc_softc_ctx.isc_ntxqsets); MPASS(ctx->ifc_softc_ctx.isc_nrxqsets); na.num_tx_desc = scctx->isc_ntxd[0]; na.num_rx_desc = scctx->isc_nrxd[0]; na.nm_txsync = iflib_netmap_txsync; na.nm_rxsync = iflib_netmap_rxsync; na.nm_register = iflib_netmap_register; na.num_tx_rings = ctx->ifc_softc_ctx.isc_ntxqsets; na.num_rx_rings = ctx->ifc_softc_ctx.isc_nrxqsets; return (netmap_attach(&na)); } static void iflib_netmap_txq_init(if_ctx_t ctx, iflib_txq_t txq) { struct netmap_adapter *na = NA(ctx->ifc_ifp); struct netmap_slot *slot; slot = netmap_reset(na, NR_TX, txq->ift_id, 0); if (slot == NULL) return; for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxd[0]; i++) { /* * In netmap mode, set the map for the packet buffer. * NOTE: Some drivers (not this one) also need to set * the physical buffer address in the NIC ring. * netmap_idx_n2k() maps a nic index, i, into the corresponding * netmap slot index, si */ int si = netmap_idx_n2k(&na->tx_rings[txq->ift_id], i); netmap_load_map(na, txq->ift_desc_tag, txq->ift_sds.ifsd_map[i], NMB(na, slot + si)); } } static void iflib_netmap_rxq_init(if_ctx_t ctx, iflib_rxq_t rxq) { struct netmap_adapter *na = NA(ctx->ifc_ifp); struct netmap_slot *slot; iflib_rxsd_t sd; int nrxd; slot = netmap_reset(na, NR_RX, rxq->ifr_id, 0); if (slot == NULL) return; sd = rxq->ifr_fl[0].ifl_sds; nrxd = ctx->ifc_softc_ctx.isc_nrxd[0]; for (int i = 0; i < nrxd; i++, sd++) { int sj = netmap_idx_n2k(&na->rx_rings[rxq->ifr_id], i); uint64_t paddr; void *addr; caddr_t vaddr; vaddr = addr = PNMB(na, slot + sj, &paddr); netmap_load_map(na, rxq->ifr_fl[0].ifl_ifdi->idi_tag, sd->ifsd_map, addr); /* Update descriptor and the cached value */ ctx->isc_rxd_refill(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, i, &paddr, &vaddr, 1, rxq->ifr_fl[0].ifl_buf_size); } /* preserve queue */ if (ctx->ifc_ifp->if_capenable & IFCAP_NETMAP) { struct netmap_kring *kring = &na->rx_rings[rxq->ifr_id]; int t = na->num_rx_desc - 1 - nm_kr_rxspace(kring); ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, t); } else ctx->isc_rxd_flush(ctx->ifc_softc, rxq->ifr_id, 0 /* fl_id */, nrxd-1); } #define iflib_netmap_detach(ifp) netmap_detach(ifp) #else #define iflib_netmap_txq_init(ctx, txq) #define iflib_netmap_rxq_init(ctx, rxq) #define iflib_netmap_detach(ifp) #define iflib_netmap_attach(ctx) (0) #define netmap_rx_irq(ifp, qid, budget) (0) #endif #if defined(__i386__) || defined(__amd64__) static __inline void prefetch(void *x) { __asm volatile("prefetcht0 %0" :: "m" (*(unsigned long *)x)); } #else #define prefetch(x) #endif static void _iflib_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int err) { if (err) return; *(bus_addr_t *) arg = segs[0].ds_addr; } int iflib_dma_alloc(if_ctx_t ctx, int size, iflib_dma_info_t dma, int mapflags) { int err; if_shared_ctx_t sctx = ctx->ifc_sctx; device_t dev = ctx->ifc_dev; KASSERT(sctx->isc_q_align != 0, ("alignment value not initialized")); err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ sctx->isc_q_align, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ size, /* maxsize */ 1, /* nsegments */ size, /* maxsegsize */ BUS_DMA_ALLOCNOW, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &dma->idi_tag); if (err) { device_printf(dev, "%s: bus_dma_tag_create failed: %d\n", __func__, err); goto fail_0; } err = bus_dmamem_alloc(dma->idi_tag, (void**) &dma->idi_vaddr, BUS_DMA_NOWAIT | BUS_DMA_COHERENT | BUS_DMA_ZERO, &dma->idi_map); if (err) { device_printf(dev, "%s: bus_dmamem_alloc(%ju) failed: %d\n", __func__, (uintmax_t)size, err); goto fail_1; } dma->idi_paddr = IF_BAD_DMA; err = bus_dmamap_load(dma->idi_tag, dma->idi_map, dma->idi_vaddr, size, _iflib_dmamap_cb, &dma->idi_paddr, mapflags | BUS_DMA_NOWAIT); if (err || dma->idi_paddr == IF_BAD_DMA) { device_printf(dev, "%s: bus_dmamap_load failed: %d\n", __func__, err); goto fail_2; } dma->idi_size = size; return (0); fail_2: bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); fail_1: bus_dma_tag_destroy(dma->idi_tag); fail_0: dma->idi_tag = NULL; return (err); } int iflib_dma_alloc_multi(if_ctx_t ctx, int *sizes, iflib_dma_info_t *dmalist, int mapflags, int count) { int i, err; iflib_dma_info_t *dmaiter; dmaiter = dmalist; for (i = 0; i < count; i++, dmaiter++) { if ((err = iflib_dma_alloc(ctx, sizes[i], *dmaiter, mapflags)) != 0) break; } if (err) iflib_dma_free_multi(dmalist, i); return (err); } void iflib_dma_free(iflib_dma_info_t dma) { if (dma->idi_tag == NULL) return; if (dma->idi_paddr != IF_BAD_DMA) { bus_dmamap_sync(dma->idi_tag, dma->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(dma->idi_tag, dma->idi_map); dma->idi_paddr = IF_BAD_DMA; } if (dma->idi_vaddr != NULL) { bus_dmamem_free(dma->idi_tag, dma->idi_vaddr, dma->idi_map); dma->idi_vaddr = NULL; } bus_dma_tag_destroy(dma->idi_tag); dma->idi_tag = NULL; } void iflib_dma_free_multi(iflib_dma_info_t *dmalist, int count) { int i; iflib_dma_info_t *dmaiter = dmalist; for (i = 0; i < count; i++, dmaiter++) iflib_dma_free(*dmaiter); } static int iflib_fast_intr(void *arg) { iflib_filter_info_t info = arg; struct grouptask *gtask = info->ifi_task; DBG_COUNTER_INC(fast_intrs); if (info->ifi_filter != NULL && info->ifi_filter(info->ifi_filter_arg) == FILTER_HANDLED) return (FILTER_HANDLED); GROUPTASK_ENQUEUE(gtask); return (FILTER_HANDLED); } static int _iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, driver_filter_t filter, driver_intr_t handler, void *arg, char *name) { int rc; struct resource *res; void *tag; device_t dev = ctx->ifc_dev; MPASS(rid < 512); irq->ii_rid = rid; res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &irq->ii_rid, RF_SHAREABLE | RF_ACTIVE); if (res == NULL) { device_printf(dev, "failed to allocate IRQ for rid %d, name %s.\n", rid, name); return (ENOMEM); } irq->ii_res = res; KASSERT(filter == NULL || handler == NULL, ("filter and handler can't both be non-NULL")); rc = bus_setup_intr(dev, res, INTR_MPSAFE | INTR_TYPE_NET, filter, handler, arg, &tag); if (rc != 0) { device_printf(dev, "failed to setup interrupt for rid %d, name %s: %d\n", rid, name ? name : "unknown", rc); return (rc); } else if (name) bus_describe_intr(dev, res, tag, "%s", name); irq->ii_tag = tag; return (0); } /********************************************************************* * * Allocate memory for tx_buffer structures. The tx_buffer stores all * the information needed to transmit a packet on the wire. This is * called only once at attach, setup is done every reset. * **********************************************************************/ static int iflib_txsd_alloc(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; int err, nsegments, ntsosegments; nsegments = scctx->isc_tx_nsegments; ntsosegments = scctx->isc_tx_tso_segments_max; MPASS(scctx->isc_ntxd[0] > 0); MPASS(scctx->isc_ntxd[txq->ift_br_offset] > 0); MPASS(nsegments > 0); MPASS(ntsosegments > 0); /* * Setup DMA descriptor areas. */ if ((err = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sctx->isc_tx_maxsize, /* maxsize */ nsegments, /* nsegments */ sctx->isc_tx_maxsegsize, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txq->ift_desc_tag))) { device_printf(dev,"Unable to allocate TX DMA tag: %d\n", err); device_printf(dev,"maxsize: %ju nsegments: %d maxsegsize: %ju\n", (uintmax_t)sctx->isc_tx_maxsize, nsegments, (uintmax_t)sctx->isc_tx_maxsegsize); goto fail; } #ifdef IFLIB_DIAGNOSTICS device_printf(dev,"maxsize: %zd nsegments: %d maxsegsize: %zd\n", sctx->isc_tx_maxsize, nsegments, sctx->isc_tx_maxsegsize); #endif if ((err = bus_dma_tag_create(bus_get_dma_tag(dev), 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ scctx->isc_tx_tso_size_max, /* maxsize */ ntsosegments, /* nsegments */ scctx->isc_tx_tso_segsize_max, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txq->ift_tso_desc_tag))) { device_printf(dev,"Unable to allocate TX TSO DMA tag: %d\n", err); goto fail; } #ifdef IFLIB_DIAGNOSTICS device_printf(dev,"TSO maxsize: %d ntsosegments: %d maxsegsize: %d\n", scctx->isc_tx_tso_size_max, ntsosegments, scctx->isc_tx_tso_segsize_max); #endif if (!(txq->ift_sds.ifsd_flags = (uint8_t *) malloc(sizeof(uint8_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } if (!(txq->ift_sds.ifsd_m = (struct mbuf **) malloc(sizeof(struct mbuf *) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer memory\n"); err = ENOMEM; goto fail; } /* Create the descriptor buffer dma maps */ #if defined(ACPI_DMAR) || (!(defined(__i386__) && !defined(__amd64__))) if ((ctx->ifc_flags & IFC_DMAR) == 0) return (0); if (!(txq->ift_sds.ifsd_map = (bus_dmamap_t *) malloc(sizeof(bus_dmamap_t) * scctx->isc_ntxd[txq->ift_br_offset], M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate tx_buffer map memory\n"); err = ENOMEM; goto fail; } for (int i = 0; i < scctx->isc_ntxd[txq->ift_br_offset]; i++) { err = bus_dmamap_create(txq->ift_desc_tag, 0, &txq->ift_sds.ifsd_map[i]); if (err != 0) { device_printf(dev, "Unable to create TX DMA map\n"); goto fail; } } #endif return (0); fail: /* We free all, it handles case where we are in the middle */ iflib_tx_structures_free(ctx); return (err); } static void iflib_txsd_destroy(if_ctx_t ctx, iflib_txq_t txq, int i) { bus_dmamap_t map; map = NULL; if (txq->ift_sds.ifsd_map != NULL) map = txq->ift_sds.ifsd_map[i]; if (map != NULL) { bus_dmamap_unload(txq->ift_desc_tag, map); bus_dmamap_destroy(txq->ift_desc_tag, map); txq->ift_sds.ifsd_map[i] = NULL; } } static void iflib_txq_destroy(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; for (int i = 0; i < txq->ift_size; i++) iflib_txsd_destroy(ctx, txq, i); if (txq->ift_sds.ifsd_map != NULL) { free(txq->ift_sds.ifsd_map, M_IFLIB); txq->ift_sds.ifsd_map = NULL; } if (txq->ift_sds.ifsd_m != NULL) { free(txq->ift_sds.ifsd_m, M_IFLIB); txq->ift_sds.ifsd_m = NULL; } if (txq->ift_sds.ifsd_flags != NULL) { free(txq->ift_sds.ifsd_flags, M_IFLIB); txq->ift_sds.ifsd_flags = NULL; } if (txq->ift_desc_tag != NULL) { bus_dma_tag_destroy(txq->ift_desc_tag); txq->ift_desc_tag = NULL; } if (txq->ift_tso_desc_tag != NULL) { bus_dma_tag_destroy(txq->ift_tso_desc_tag); txq->ift_tso_desc_tag = NULL; } } static void iflib_txsd_free(if_ctx_t ctx, iflib_txq_t txq, int i) { struct mbuf **mp; mp = &txq->ift_sds.ifsd_m[i]; if (*mp == NULL) return; if (txq->ift_sds.ifsd_map != NULL) { bus_dmamap_sync(txq->ift_desc_tag, txq->ift_sds.ifsd_map[i], BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txq->ift_desc_tag, txq->ift_sds.ifsd_map[i]); } m_free(*mp); DBG_COUNTER_INC(tx_frees); *mp = NULL; } static int iflib_txq_setup(iflib_txq_t txq) { if_ctx_t ctx = txq->ift_ctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; iflib_dma_info_t di; int i; /* Set number of descriptors available */ txq->ift_qstatus = IFLIB_QUEUE_IDLE; /* Reset indices */ txq->ift_cidx_processed = txq->ift_pidx = txq->ift_cidx = txq->ift_npending = 0; txq->ift_size = scctx->isc_ntxd[txq->ift_br_offset]; for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++) bzero((void *)di->idi_vaddr, di->idi_size); IFDI_TXQ_SETUP(ctx, txq->ift_id); for (i = 0, di = txq->ift_ifdi; i < ctx->ifc_nhwtxqs; i++, di++) bus_dmamap_sync(di->idi_tag, di->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } /********************************************************************* * * Allocate memory for rx_buffer structures. Since we use one * rx_buffer per received packet, the maximum number of rx_buffer's * that we'll need is equal to the number of receive descriptors * that we've allocated. * **********************************************************************/ static int iflib_rxsd_alloc(iflib_rxq_t rxq) { if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; iflib_fl_t fl; iflib_rxsd_t rxsd; int err; MPASS(scctx->isc_nrxd[0] > 0); MPASS(scctx->isc_nrxd[rxq->ifr_fl_offset] > 0); fl = rxq->ifr_fl; for (int i = 0; i < rxq->ifr_nfl; i++, fl++) { fl->ifl_sds = malloc(sizeof(struct iflib_sw_rx_desc) * scctx->isc_nrxd[rxq->ifr_fl_offset], M_IFLIB, M_WAITOK | M_ZERO); if (fl->ifl_sds == NULL) { device_printf(dev, "Unable to allocate rx sw desc memory\n"); return (ENOMEM); } fl->ifl_size = scctx->isc_nrxd[rxq->ifr_fl_offset]; /* this isn't necessarily the same */ err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */ 1, 0, /* alignment, bounds */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ sctx->isc_rx_maxsize, /* maxsize */ sctx->isc_rx_nsegments, /* nsegments */ sctx->isc_rx_maxsegsize, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockarg */ &fl->ifl_desc_tag); if (err) { device_printf(dev, "%s: bus_dma_tag_create failed %d\n", __func__, err); goto fail; } rxsd = fl->ifl_sds; for (int i = 0; i < scctx->isc_nrxd[rxq->ifr_fl_offset]; i++, rxsd++) { err = bus_dmamap_create(fl->ifl_desc_tag, 0, &rxsd->ifsd_map); if (err) { device_printf(dev, "%s: bus_dmamap_create failed: %d\n", __func__, err); goto fail; } } } return (0); fail: iflib_rx_structures_free(ctx); return (err); } /* * Internal service routines */ struct rxq_refill_cb_arg { int error; bus_dma_segment_t seg; int nseg; }; static void _rxq_refill_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error) { struct rxq_refill_cb_arg *cb_arg = arg; cb_arg->error = error; cb_arg->seg = segs[0]; cb_arg->nseg = nseg; } #ifdef ACPI_DMAR #define IS_DMAR(ctx) (ctx->ifc_flags & IFC_DMAR) #else #define IS_DMAR(ctx) (0) #endif /** * rxq_refill - refill an rxq free-buffer list * @ctx: the iflib context * @rxq: the free-list to refill * @n: the number of new buffers to allocate * * (Re)populate an rxq free-buffer list with up to @n new packet buffers. * The caller must assure that @n does not exceed the queue's capacity. */ static void _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count) { struct mbuf *m; int pidx = fl->ifl_pidx; iflib_rxsd_t rxsd = &fl->ifl_sds[pidx]; caddr_t cl; int n, i = 0; uint64_t bus_addr; int err; n = count; MPASS(n > 0); MPASS(fl->ifl_credits + n <= fl->ifl_size); if (pidx < fl->ifl_cidx) MPASS(pidx + n <= fl->ifl_cidx); if (pidx == fl->ifl_cidx && (fl->ifl_credits < fl->ifl_size)) MPASS(fl->ifl_gen == 0); if (pidx > fl->ifl_cidx) MPASS(n <= fl->ifl_size - pidx + fl->ifl_cidx); DBG_COUNTER_INC(fl_refills); if (n > 8) DBG_COUNTER_INC(fl_refills_large); while (n--) { /* * We allocate an uninitialized mbuf + cluster, mbuf is * initialized after rx. * * If the cluster is still set then we know a minimum sized packet was received */ if ((cl = rxsd->ifsd_cl) == NULL) { if ((cl = rxsd->ifsd_cl = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL) break; #if MEMORY_LOGGING fl->ifl_cl_enqueued++; #endif } if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) { break; } #if MEMORY_LOGGING fl->ifl_m_enqueued++; #endif DBG_COUNTER_INC(rx_allocs); #ifdef notyet if ((rxsd->ifsd_flags & RX_SW_DESC_MAP_CREATED) == 0) { int err; if ((err = bus_dmamap_create(fl->ifl_ifdi->idi_tag, 0, &rxsd->ifsd_map))) { log(LOG_WARNING, "bus_dmamap_create failed %d\n", err); uma_zfree(fl->ifl_zone, cl); n = 0; goto done; } rxsd->ifsd_flags |= RX_SW_DESC_MAP_CREATED; } #endif #if defined(__i386__) || defined(__amd64__) if (!IS_DMAR(ctx)) { bus_addr = pmap_kextract((vm_offset_t)cl); } else #endif { struct rxq_refill_cb_arg cb_arg; iflib_rxq_t q; cb_arg.error = 0; q = fl->ifl_rxq; err = bus_dmamap_load(fl->ifl_desc_tag, rxsd->ifsd_map, cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg, 0); if (err != 0 || cb_arg.error) { /* * !zone_pack ? */ if (fl->ifl_zone == zone_pack) uma_zfree(fl->ifl_zone, cl); m_free(m); n = 0; goto done; } bus_addr = cb_arg.seg.ds_addr; } rxsd->ifsd_flags |= RX_SW_DESC_INUSE; MPASS(rxsd->ifsd_m == NULL); rxsd->ifsd_cl = cl; rxsd->ifsd_m = m; fl->ifl_bus_addrs[i] = bus_addr; fl->ifl_vm_addrs[i] = cl; rxsd++; fl->ifl_credits++; i++; MPASS(fl->ifl_credits <= fl->ifl_size); if (++fl->ifl_pidx == fl->ifl_size) { fl->ifl_pidx = 0; fl->ifl_gen = 1; rxsd = fl->ifl_sds; } if (n == 0 || i == IFLIB_MAX_RX_REFRESH) { ctx->isc_rxd_refill(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx, fl->ifl_bus_addrs, fl->ifl_vm_addrs, i, fl->ifl_buf_size); i = 0; pidx = fl->ifl_pidx; } } done: DBG_COUNTER_INC(rxd_flush); if (fl->ifl_pidx == 0) pidx = fl->ifl_size - 1; else pidx = fl->ifl_pidx - 1; ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx); } static __inline void __iflib_fl_refill_lt(if_ctx_t ctx, iflib_fl_t fl, int max) { /* we avoid allowing pidx to catch up with cidx as it confuses ixl */ int32_t reclaimable = fl->ifl_size - fl->ifl_credits - 1; #ifdef INVARIANTS int32_t delta = fl->ifl_size - get_inuse(fl->ifl_size, fl->ifl_cidx, fl->ifl_pidx, fl->ifl_gen) - 1; #endif MPASS(fl->ifl_credits <= fl->ifl_size); MPASS(reclaimable == delta); if (reclaimable > 0) _iflib_fl_refill(ctx, fl, min(max, reclaimable)); } static void iflib_fl_bufs_free(iflib_fl_t fl) { iflib_dma_info_t idi = fl->ifl_ifdi; uint32_t i; for (i = 0; i < fl->ifl_size; i++) { iflib_rxsd_t d = &fl->ifl_sds[i]; if (d->ifsd_flags & RX_SW_DESC_INUSE) { bus_dmamap_unload(fl->ifl_desc_tag, d->ifsd_map); bus_dmamap_destroy(fl->ifl_desc_tag, d->ifsd_map); if (d->ifsd_m != NULL) { m_init(d->ifsd_m, M_NOWAIT, MT_DATA, 0); uma_zfree(zone_mbuf, d->ifsd_m); } if (d->ifsd_cl != NULL) uma_zfree(fl->ifl_zone, d->ifsd_cl); d->ifsd_flags = 0; } else { MPASS(d->ifsd_cl == NULL); MPASS(d->ifsd_m == NULL); } #if MEMORY_LOGGING fl->ifl_m_dequeued++; fl->ifl_cl_dequeued++; #endif d->ifsd_cl = NULL; d->ifsd_m = NULL; } /* * Reset free list values */ fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = 0;; bzero(idi->idi_vaddr, idi->idi_size); } /********************************************************************* * * Initialize a receive ring and its buffers. * **********************************************************************/ static int iflib_fl_setup(iflib_fl_t fl) { iflib_rxq_t rxq = fl->ifl_rxq; if_ctx_t ctx = rxq->ifr_ctx; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; /* ** Free current RX buffer structs and their mbufs */ iflib_fl_bufs_free(fl); /* Now replenish the mbufs */ MPASS(fl->ifl_credits == 0); /* * XXX don't set the max_frame_size to larger * than the hardware can handle */ if (sctx->isc_max_frame_size <= 2048) fl->ifl_buf_size = MCLBYTES; else if (sctx->isc_max_frame_size <= 4096) fl->ifl_buf_size = MJUMPAGESIZE; else if (sctx->isc_max_frame_size <= 9216) fl->ifl_buf_size = MJUM9BYTES; else fl->ifl_buf_size = MJUM16BYTES; if (fl->ifl_buf_size > ctx->ifc_max_fl_buf_size) ctx->ifc_max_fl_buf_size = fl->ifl_buf_size; fl->ifl_cltype = m_gettype(fl->ifl_buf_size); fl->ifl_zone = m_getzone(fl->ifl_buf_size); /* avoid pre-allocating zillions of clusters to an idle card * potentially speeding up attach */ _iflib_fl_refill(ctx, fl, min(128, fl->ifl_size)); MPASS(min(128, fl->ifl_size) == fl->ifl_credits); if (min(128, fl->ifl_size) != fl->ifl_credits) return (ENOBUFS); /* * handle failure */ MPASS(rxq != NULL); MPASS(fl->ifl_ifdi != NULL); bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); return (0); } /********************************************************************* * * Free receive ring data structures * **********************************************************************/ static void iflib_rx_sds_free(iflib_rxq_t rxq) { iflib_fl_t fl; int i; if (rxq->ifr_fl != NULL) { for (i = 0; i < rxq->ifr_nfl; i++) { fl = &rxq->ifr_fl[i]; if (fl->ifl_desc_tag != NULL) { bus_dma_tag_destroy(fl->ifl_desc_tag); fl->ifl_desc_tag = NULL; } } if (rxq->ifr_fl->ifl_sds != NULL) free(rxq->ifr_fl->ifl_sds, M_IFLIB); free(rxq->ifr_fl, M_IFLIB); rxq->ifr_fl = NULL; rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0; } } /* * MI independent logic * */ static void iflib_timer(void *arg) { iflib_txq_t txq = arg; if_ctx_t ctx = txq->ift_ctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; /* ** Check on the state of the TX queue(s), this ** can be done without the lock because its RO ** and the HUNG state will be static if set. */ IFDI_TIMER(ctx, txq->ift_id); if ((txq->ift_qstatus == IFLIB_QUEUE_HUNG) && (ctx->ifc_pause_frames == 0)) goto hung; if (TXQ_AVAIL(txq) <= 2*scctx->isc_tx_nsegments || ifmp_ring_is_stalled(txq->ift_br[0])) GROUPTASK_ENQUEUE(&txq->ift_task); ctx->ifc_pause_frames = 0; if (if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING) callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); return; hung: CTX_LOCK(ctx); if_setdrvflagbits(ctx->ifc_ifp, 0, IFF_DRV_RUNNING); device_printf(ctx->ifc_dev, "TX(%d) desc avail = %d, pidx = %d\n", txq->ift_id, TXQ_AVAIL(txq), txq->ift_pidx); IFDI_WATCHDOG_RESET(ctx); ctx->ifc_watchdog_events++; ctx->ifc_pause_frames = 0; iflib_init_locked(ctx); CTX_UNLOCK(ctx); } static void iflib_init_locked(if_ctx_t ctx) { if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; if_t ifp = ctx->ifc_ifp; iflib_fl_t fl; iflib_txq_t txq; iflib_rxq_t rxq; int i, j; if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); /* Set hardware offload abilities */ if_clearhwassist(ifp); if (if_getcapenable(ifp) & IFCAP_TXCSUM) if_sethwassistbits(ifp, CSUM_IP | CSUM_TCP | CSUM_UDP, 0); if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6) if_sethwassistbits(ifp, (CSUM_TCP_IPV6 | CSUM_UDP_IPV6), 0); if (if_getcapenable(ifp) & IFCAP_TSO4) if_sethwassistbits(ifp, CSUM_IP_TSO, 0); if (if_getcapenable(ifp) & IFCAP_TSO6) if_sethwassistbits(ifp, CSUM_IP6_TSO, 0); for (i = 0, txq = ctx->ifc_txqs; i < sctx->isc_ntxqsets; i++, txq++) { CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); callout_stop(&txq->ift_db_check); CALLOUT_UNLOCK(txq); iflib_netmap_txq_init(ctx, txq); } for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { iflib_netmap_rxq_init(ctx, rxq); } #ifdef INVARIANTS i = if_getdrvflags(ifp); #endif IFDI_INIT(ctx); MPASS(if_getdrvflags(ifp) == i); for (i = 0, rxq = ctx->ifc_rxqs; i < sctx->isc_nrxqsets; i++, rxq++) { for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { if (iflib_fl_setup(fl)) { device_printf(ctx->ifc_dev, "freelist setup failed - check cluster settings\n"); goto done; } } } done: if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE); IFDI_INTR_ENABLE(ctx); txq = ctx->ifc_txqs; for (i = 0; i < sctx->isc_ntxqsets; i++, txq++) callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); } static int iflib_media_change(if_t ifp) { if_ctx_t ctx = if_getsoftc(ifp); int err; CTX_LOCK(ctx); if ((err = IFDI_MEDIA_CHANGE(ctx)) == 0) iflib_init_locked(ctx); CTX_UNLOCK(ctx); return (err); } static void iflib_media_status(if_t ifp, struct ifmediareq *ifmr) { if_ctx_t ctx = if_getsoftc(ifp); CTX_LOCK(ctx); IFDI_UPDATE_ADMIN_STATUS(ctx); IFDI_MEDIA_STATUS(ctx, ifmr); CTX_UNLOCK(ctx); } static void iflib_stop(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; iflib_rxq_t rxq = ctx->ifc_rxqs; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; iflib_dma_info_t di; iflib_fl_t fl; int i, j; /* Tell the stack that the interface is no longer active */ if_setdrvflagbits(ctx->ifc_ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING); IFDI_INTR_DISABLE(ctx); msleep(ctx, &ctx->ifc_mtx, PUSER, "iflib_init", hz); /* Wait for current tx queue users to exit to disarm watchdog timer. */ for (i = 0; i < scctx->isc_ntxqsets; i++, txq++) { /* make sure all transmitters have completed before proceeding XXX */ /* clean any enqueued buffers */ iflib_txq_check_drain(txq, 0); /* Free any existing tx buffers. */ for (j = 0; j < txq->ift_size; j++) { iflib_txsd_free(ctx, txq, j); } txq->ift_processed = txq->ift_cleaned = txq->ift_cidx_processed = 0; txq->ift_in_use = txq->ift_cidx = txq->ift_pidx = txq->ift_no_desc_avail = 0; txq->ift_closed = txq->ift_mbuf_defrag = txq->ift_mbuf_defrag_failed = 0; txq->ift_no_tx_dma_setup = txq->ift_txd_encap_efbig = txq->ift_map_failed = 0; txq->ift_pullups = 0; ifmp_ring_reset_stats(txq->ift_br[0]); for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwtxqs; j++, di++) bzero((void *)di->idi_vaddr, di->idi_size); } for (i = 0; i < scctx->isc_nrxqsets; i++, rxq++) { /* make sure all transmitters have completed before proceeding XXX */ for (j = 0, di = txq->ift_ifdi; j < ctx->ifc_nhwrxqs; j++, di++) bzero((void *)di->idi_vaddr, di->idi_size); /* also resets the free lists pidx/cidx */ for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) iflib_fl_bufs_free(fl); } IFDI_STOP(ctx); } static iflib_rxsd_t rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int *cltype, int unload) { int flid, cidx; iflib_rxsd_t sd; iflib_fl_t fl; iflib_dma_info_t di; flid = irf->irf_flid; cidx = irf->irf_idx; fl = &rxq->ifr_fl[flid]; fl->ifl_credits--; #if MEMORY_LOGGING fl->ifl_m_dequeued++; if (cltype) fl->ifl_cl_dequeued++; #endif sd = &fl->ifl_sds[cidx]; di = fl->ifl_ifdi; bus_dmamap_sync(di->idi_tag, di->idi_map, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); /* not valid assert if bxe really does SGE from non-contiguous elements */ MPASS(fl->ifl_cidx == cidx); if (unload) bus_dmamap_unload(fl->ifl_desc_tag, sd->ifsd_map); if (__predict_false(++fl->ifl_cidx == fl->ifl_size)) { fl->ifl_cidx = 0; fl->ifl_gen = 0; } /* YES ick */ if (cltype) *cltype = fl->ifl_cltype; return (sd); } static struct mbuf * assemble_segments(iflib_rxq_t rxq, if_rxd_info_t ri) { int i, padlen , flags, cltype; struct mbuf *m, *mh, *mt; iflib_rxsd_t sd; caddr_t cl; i = 0; mh = NULL; do { sd = rxd_frag_to_sd(rxq, &ri->iri_frags[i], &cltype, TRUE); MPASS(sd->ifsd_cl != NULL); MPASS(sd->ifsd_m != NULL); /* Don't include zero-length frags */ if (ri->iri_frags[i].irf_len == 0) { /* XXX we can save the cluster here, but not the mbuf */ m_init(sd->ifsd_m, M_NOWAIT, MT_DATA, 0); m_free(sd->ifsd_m); sd->ifsd_m = NULL; continue; } m = sd->ifsd_m; if (mh == NULL) { flags = M_PKTHDR|M_EXT; mh = mt = m; padlen = ri->iri_pad; } else { flags = M_EXT; mt->m_next = m; mt = m; /* assuming padding is only on the first fragment */ padlen = 0; } sd->ifsd_m = NULL; cl = sd->ifsd_cl; sd->ifsd_cl = NULL; /* Can these two be made one ? */ m_init(m, M_NOWAIT, MT_DATA, flags); m_cljset(m, cl, cltype); /* * These must follow m_init and m_cljset */ m->m_data += padlen; ri->iri_len -= padlen; m->m_len = ri->iri_frags[i].irf_len; } while (++i < ri->iri_nfrags); return (mh); } /* * Process one software descriptor */ static struct mbuf * iflib_rxd_pkt_get(iflib_rxq_t rxq, if_rxd_info_t ri) { struct mbuf *m; iflib_rxsd_t sd; /* should I merge this back in now that the two paths are basically duplicated? */ if (ri->iri_nfrags == 1 && ri->iri_frags[0].irf_len <= MIN(IFLIB_RX_COPY_THRESH, MHLEN)) { sd = rxd_frag_to_sd(rxq, &ri->iri_frags[0], NULL, FALSE); m = sd->ifsd_m; sd->ifsd_m = NULL; m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR); memcpy(m->m_data, sd->ifsd_cl, ri->iri_len); m->m_len = ri->iri_frags[0].irf_len; } else { m = assemble_segments(rxq, ri); } m->m_pkthdr.len = ri->iri_len; m->m_pkthdr.rcvif = ri->iri_ifp; m->m_flags |= ri->iri_flags; m->m_pkthdr.ether_vtag = ri->iri_vtag; m->m_pkthdr.flowid = ri->iri_flowid; M_HASHTYPE_SET(m, ri->iri_rsstype); m->m_pkthdr.csum_flags = ri->iri_csum_flags; m->m_pkthdr.csum_data = ri->iri_csum_data; return (m); } static bool iflib_rxeof(iflib_rxq_t rxq, int budget) { if_ctx_t ctx = rxq->ifr_ctx; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; int avail, i; uint16_t *cidxp; struct if_rxd_info ri; int err, budget_left, rx_bytes, rx_pkts; iflib_fl_t fl; struct ifnet *ifp; int lro_enabled; /* * XXX early demux data packets so that if_input processing only handles * acks in interrupt context */ struct mbuf *m, *mh, *mt; if (netmap_rx_irq(ctx->ifc_ifp, rxq->ifr_id, &budget)) { return (FALSE); } mh = mt = NULL; MPASS(budget > 0); rx_pkts = rx_bytes = 0; if (sctx->isc_flags & IFLIB_HAS_RXCQ) cidxp = &rxq->ifr_cq_cidx; else cidxp = &rxq->ifr_fl[0].ifl_cidx; if ((avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget)) == 0) { for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) __iflib_fl_refill_lt(ctx, fl, budget + 8); DBG_COUNTER_INC(rx_unavail); return (false); } for (budget_left = budget; (budget_left > 0) && (avail > 0); budget_left--, avail--) { if (__predict_false(!CTX_ACTIVE(ctx))) { DBG_COUNTER_INC(rx_ctx_inactive); break; } /* * Reset client set fields to their default values */ bzero(&ri, sizeof(ri)); ri.iri_qsidx = rxq->ifr_id; ri.iri_cidx = *cidxp; ri.iri_ifp = ctx->ifc_ifp; ri.iri_frags = rxq->ifr_frags; err = ctx->isc_rxd_pkt_get(ctx->ifc_softc, &ri); /* in lieu of handling correctly - make sure it isn't being unhandled */ MPASS(err == 0); if (sctx->isc_flags & IFLIB_HAS_RXCQ) { *cidxp = ri.iri_cidx; /* Update our consumer index */ while (rxq->ifr_cq_cidx >= scctx->isc_nrxd[0]) { rxq->ifr_cq_cidx -= scctx->isc_nrxd[0]; rxq->ifr_cq_gen = 0; } /* was this only a completion queue message? */ if (__predict_false(ri.iri_nfrags == 0)) continue; } MPASS(ri.iri_nfrags != 0); MPASS(ri.iri_len != 0); /* will advance the cidx on the corresponding free lists */ m = iflib_rxd_pkt_get(rxq, &ri); if (avail == 0 && budget_left) avail = iflib_rxd_avail(ctx, rxq, *cidxp, budget_left); if (__predict_false(m == NULL)) { DBG_COUNTER_INC(rx_mbuf_null); continue; } /* imm_pkt: -- cxgb */ if (mh == NULL) mh = mt = m; else { mt->m_nextpkt = m; mt = m; } } /* make sure that we can refill faster than drain */ for (i = 0, fl = &rxq->ifr_fl[0]; i < sctx->isc_nfl; i++, fl++) __iflib_fl_refill_lt(ctx, fl, budget + 8); ifp = ctx->ifc_ifp; lro_enabled = (if_getcapenable(ifp) & IFCAP_LRO); while (mh != NULL) { m = mh; mh = mh->m_nextpkt; m->m_nextpkt = NULL; rx_bytes += m->m_pkthdr.len; rx_pkts++; #if defined(INET6) || defined(INET) if (lro_enabled && tcp_lro_rx(&rxq->ifr_lc, m, 0) == 0) continue; #endif DBG_COUNTER_INC(rx_if_input); ifp->if_input(ifp, m); } if_inc_counter(ifp, IFCOUNTER_IBYTES, rx_bytes); if_inc_counter(ifp, IFCOUNTER_IPACKETS, rx_pkts); /* * Flush any outstanding LRO work */ #if defined(INET6) || defined(INET) tcp_lro_flush_all(&rxq->ifr_lc); #endif if (avail) return true; return (iflib_rxd_avail(ctx, rxq, *cidxp, 1)); } #define M_CSUM_FLAGS(m) ((m)->m_pkthdr.csum_flags) #define M_HAS_VLANTAG(m) (m->m_flags & M_VLANTAG) #define TXQ_MAX_DB_DEFERRED(size) (size >> 5) #define TXQ_MAX_DB_CONSUMED(size) (size >> 4) static __inline void iflib_txd_db_check(if_ctx_t ctx, iflib_txq_t txq, int ring) { uint32_t dbval; if (ring || txq->ift_db_pending >= TXQ_MAX_DB_DEFERRED(txq->ift_size)) { /* the lock will only ever be contended in the !min_latency case */ if (!TXDB_TRYLOCK(txq)) return; dbval = txq->ift_npending ? txq->ift_npending : txq->ift_pidx; ctx->isc_txd_flush(ctx->ifc_softc, txq->ift_id, dbval); txq->ift_db_pending = txq->ift_npending = 0; TXDB_UNLOCK(txq); } } static void iflib_txd_deferred_db_check(void * arg) { iflib_txq_t txq = arg; /* simple non-zero boolean so use bitwise OR */ if ((txq->ift_db_pending | txq->ift_npending) && txq->ift_db_pending >= txq->ift_db_pending_queued) iflib_txd_db_check(txq->ift_ctx, txq, TRUE); txq->ift_db_pending_queued = 0; if (ifmp_ring_is_stalled(txq->ift_br[0])) iflib_txq_check_drain(txq, 4); } #ifdef PKT_DEBUG static void print_pkt(if_pkt_info_t pi) { printf("pi len: %d qsidx: %d nsegs: %d ndescs: %d flags: %x pidx: %d\n", pi->ipi_len, pi->ipi_qsidx, pi->ipi_nsegs, pi->ipi_ndescs, pi->ipi_flags, pi->ipi_pidx); printf("pi new_pidx: %d csum_flags: %lx tso_segsz: %d mflags: %x vtag: %d\n", pi->ipi_new_pidx, pi->ipi_csum_flags, pi->ipi_tso_segsz, pi->ipi_mflags, pi->ipi_vtag); printf("pi etype: %d ehdrlen: %d ip_hlen: %d ipproto: %d\n", pi->ipi_etype, pi->ipi_ehdrlen, pi->ipi_ip_hlen, pi->ipi_ipproto); } #endif #define IS_TSO4(pi) ((pi)->ipi_csum_flags & CSUM_IP_TSO) #define IS_TSO6(pi) ((pi)->ipi_csum_flags & CSUM_IP6_TSO) static int iflib_parse_header(iflib_txq_t txq, if_pkt_info_t pi, struct mbuf **mp) { struct ether_vlan_header *eh; struct mbuf *m, *n; n = m = *mp; /* * Determine where frame payload starts. * Jump over vlan headers if already present, * helpful for QinQ too. */ if (__predict_false(m->m_len < sizeof(*eh))) { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, sizeof(*eh))) == NULL)) return (ENOMEM); } eh = mtod(m, struct ether_vlan_header *); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { pi->ipi_etype = ntohs(eh->evl_proto); pi->ipi_ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; } else { pi->ipi_etype = ntohs(eh->evl_encap_proto); pi->ipi_ehdrlen = ETHER_HDR_LEN; } switch (pi->ipi_etype) { #ifdef INET case ETHERTYPE_IP: { struct ip *ip = NULL; struct tcphdr *th = NULL; int minthlen; minthlen = min(m->m_pkthdr.len, pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th)); if (__predict_false(m->m_len < minthlen)) { /* * if this code bloat is causing too much of a hit * move it to a separate function and mark it noinline */ if (m->m_len == pi->ipi_ehdrlen) { n = m->m_next; MPASS(n); if (n->m_len >= sizeof(*ip)) { ip = (struct ip *)n->m_data; if (n->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } else { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) return (ENOMEM); ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); } } else { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, minthlen)) == NULL)) return (ENOMEM); ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } } else { ip = (struct ip *)(m->m_data + pi->ipi_ehdrlen); if (m->m_len >= (ip->ip_hl << 2) + sizeof(*th)) th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2)); } pi->ipi_ip_hlen = ip->ip_hl << 2; pi->ipi_ipproto = ip->ip_p; pi->ipi_flags |= IPI_TX_IPV4; if (pi->ipi_csum_flags & CSUM_IP) ip->ip_sum = 0; if (pi->ipi_ipproto == IPPROTO_TCP) { if (__predict_false(th == NULL)) { txq->ift_pullups++; if (__predict_false((m = m_pullup(m, (ip->ip_hl << 2) + sizeof(*th))) == NULL)) return (ENOMEM); th = (struct tcphdr *)((caddr_t)ip + pi->ipi_ip_hlen); } pi->ipi_tcp_hflags = th->th_flags; pi->ipi_tcp_hlen = th->th_off << 2; pi->ipi_tcp_seq = th->th_seq; } if (IS_TSO4(pi)) { if (__predict_false(ip->ip_p != IPPROTO_TCP)) return (ENXIO); th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; } break; } #endif #ifdef INET6 case ETHERTYPE_IPV6: { struct ip6_hdr *ip6 = (struct ip6_hdr *)(m->m_data + pi->ipi_ehdrlen); struct tcphdr *th; pi->ipi_ip_hlen = sizeof(struct ip6_hdr); if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) { if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr))) == NULL)) return (ENOMEM); } th = (struct tcphdr *)((caddr_t)ip6 + pi->ipi_ip_hlen); /* XXX-BZ this will go badly in case of ext hdrs. */ pi->ipi_ipproto = ip6->ip6_nxt; pi->ipi_flags |= IPI_TX_IPV6; if (pi->ipi_ipproto == IPPROTO_TCP) { if (__predict_false(m->m_len < pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) { if (__predict_false((m = m_pullup(m, pi->ipi_ehdrlen + sizeof(struct ip6_hdr) + sizeof(struct tcphdr))) == NULL)) return (ENOMEM); } pi->ipi_tcp_hflags = th->th_flags; pi->ipi_tcp_hlen = th->th_off << 2; } if (IS_TSO6(pi)) { if (__predict_false(ip6->ip6_nxt != IPPROTO_TCP)) return (ENXIO); /* * The corresponding flag is set by the stack in the IPv4 * TSO case, but not in IPv6 (at least in FreeBSD 10.2). * So, set it here because the rest of the flow requires it. */ pi->ipi_csum_flags |= CSUM_TCP_IPV6; th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); pi->ipi_tso_segsz = m->m_pkthdr.tso_segsz; } break; } #endif default: pi->ipi_csum_flags &= ~CSUM_OFFLOAD; pi->ipi_ip_hlen = 0; break; } *mp = m; return (0); } static __noinline struct mbuf * collapse_pkthdr(struct mbuf *m0) { struct mbuf *m, *m_next, *tmp; m = m0; m_next = m->m_next; while (m_next != NULL && m_next->m_len == 0) { m = m_next; m->m_next = NULL; m_free(m); m_next = m_next->m_next; } m = m0; m->m_next = m_next; if ((m_next->m_flags & M_EXT) == 0) { m = m_defrag(m, M_NOWAIT); } else { tmp = m_next->m_next; memcpy(m_next, m, MPKTHSIZE); m = m_next; m->m_next = tmp; } return (m); } /* * If dodgy hardware rejects the scatter gather chain we've handed it * we'll need to remove the mbuf chain from ifsg_m[] before we can add the * m_defrag'd mbufs */ static __noinline struct mbuf * iflib_remove_mbuf(iflib_txq_t txq) { int ntxd, i, pidx; struct mbuf *m, *mh, **ifsd_m; pidx = txq->ift_pidx; ifsd_m = txq->ift_sds.ifsd_m; ntxd = txq->ift_size; mh = m = ifsd_m[pidx]; ifsd_m[pidx] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif i = 1; while (m) { ifsd_m[(pidx + i) & (ntxd -1)] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif m = m->m_next; i++; } return (mh); } static int iflib_busdma_load_mbuf_sg(iflib_txq_t txq, bus_dma_tag_t tag, bus_dmamap_t map, struct mbuf **m0, bus_dma_segment_t *segs, int *nsegs, int max_segs, int flags) { if_ctx_t ctx; if_shared_ctx_t sctx; if_softc_ctx_t scctx; int i, next, pidx, mask, err, maxsegsz, ntxd, count; struct mbuf *m, *tmp, **ifsd_m, **mp; m = *m0; /* * Please don't ever do this */ if (__predict_false(m->m_len == 0)) *m0 = m = collapse_pkthdr(m); ctx = txq->ift_ctx; sctx = ctx->ifc_sctx; scctx = &ctx->ifc_softc_ctx; ifsd_m = txq->ift_sds.ifsd_m; ntxd = txq->ift_size; pidx = txq->ift_pidx; if (map != NULL) { uint8_t *ifsd_flags = txq->ift_sds.ifsd_flags; err = bus_dmamap_load_mbuf_sg(tag, map, *m0, segs, nsegs, BUS_DMA_NOWAIT); if (err) return (err); ifsd_flags[pidx] |= TX_SW_DESC_MAPPED; i = 0; next = pidx; mask = (txq->ift_size-1); m = *m0; do { mp = &ifsd_m[next]; *mp = m; m = m->m_next; if (__predict_false((*mp)->m_len == 0)) { m_free(*mp); *mp = NULL; } else next = (pidx + i) & (ntxd-1); } while (m != NULL); } else { int buflen, sgsize, max_sgsize; vm_offset_t vaddr; vm_paddr_t curaddr; count = i = 0; maxsegsz = sctx->isc_tx_maxsize; m = *m0; do { if (__predict_false(m->m_len <= 0)) { tmp = m; m = m->m_next; tmp->m_next = NULL; m_free(tmp); continue; } buflen = m->m_len; vaddr = (vm_offset_t)m->m_data; /* * see if we can't be smarter about physically * contiguous mappings */ next = (pidx + count) & (ntxd-1); MPASS(ifsd_m[next] == NULL); #if MEMORY_LOGGING txq->ift_enqueued++; #endif ifsd_m[next] = m; while (buflen > 0) { max_sgsize = MIN(buflen, maxsegsz); curaddr = pmap_kextract(vaddr); sgsize = PAGE_SIZE - (curaddr & PAGE_MASK); sgsize = MIN(sgsize, max_sgsize); segs[i].ds_addr = curaddr; segs[i].ds_len = sgsize; vaddr += sgsize; buflen -= sgsize; i++; if (i >= max_segs) goto err; } count++; tmp = m; m = m->m_next; } while (m != NULL); *nsegs = i; } return (0); err: *m0 = iflib_remove_mbuf(txq); return (EFBIG); } static int iflib_encap(iflib_txq_t txq, struct mbuf **m_headp) { if_ctx_t ctx; if_shared_ctx_t sctx; if_softc_ctx_t scctx; bus_dma_segment_t *segs; struct mbuf *m_head; bus_dmamap_t map; struct if_pkt_info pi; int remap = 0; int err, nsegs, ndesc, max_segs, pidx, cidx, next, ntxd; bus_dma_tag_t desc_tag; segs = txq->ift_segs; ctx = txq->ift_ctx; sctx = ctx->ifc_sctx; scctx = &ctx->ifc_softc_ctx; segs = txq->ift_segs; ntxd = txq->ift_size; m_head = *m_headp; map = NULL; /* * If we're doing TSO the next descriptor to clean may be quite far ahead */ cidx = txq->ift_cidx; pidx = txq->ift_pidx; next = (cidx + CACHE_PTR_INCREMENT) & (ntxd-1); /* prefetch the next cache line of mbuf pointers and flags */ prefetch(&txq->ift_sds.ifsd_m[next]); if (txq->ift_sds.ifsd_map != NULL) { prefetch(&txq->ift_sds.ifsd_map[next]); map = txq->ift_sds.ifsd_map[pidx]; next = (cidx + CACHE_LINE_SIZE) & (ntxd-1); prefetch(&txq->ift_sds.ifsd_flags[next]); } if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { desc_tag = txq->ift_tso_desc_tag; max_segs = scctx->isc_tx_tso_segments_max; } else { desc_tag = txq->ift_desc_tag; max_segs = scctx->isc_tx_nsegments; } m_head = *m_headp; bzero(&pi, sizeof(pi)); pi.ipi_len = m_head->m_pkthdr.len; pi.ipi_mflags = (m_head->m_flags & (M_VLANTAG|M_BCAST|M_MCAST)); pi.ipi_csum_flags = m_head->m_pkthdr.csum_flags; pi.ipi_vtag = (m_head->m_flags & M_VLANTAG) ? m_head->m_pkthdr.ether_vtag : 0; pi.ipi_pidx = pidx; pi.ipi_qsidx = txq->ift_id; /* deliberate bitwise OR to make one condition */ if (__predict_true((pi.ipi_csum_flags | pi.ipi_vtag))) { if (__predict_false((err = iflib_parse_header(txq, &pi, m_headp)) != 0)) return (err); m_head = *m_headp; } retry: err = iflib_busdma_load_mbuf_sg(txq, desc_tag, map, m_headp, segs, &nsegs, max_segs, BUS_DMA_NOWAIT); defrag: if (__predict_false(err)) { switch (err) { case EFBIG: /* try collapse once and defrag once */ if (remap == 0) m_head = m_collapse(*m_headp, M_NOWAIT, max_segs); if (remap == 1) m_head = m_defrag(*m_headp, M_NOWAIT); remap++; if (__predict_false(m_head == NULL)) goto defrag_failed; txq->ift_mbuf_defrag++; *m_headp = m_head; goto retry; break; case ENOMEM: txq->ift_no_tx_dma_setup++; break; default: txq->ift_no_tx_dma_setup++; m_freem(*m_headp); DBG_COUNTER_INC(tx_frees); *m_headp = NULL; break; } txq->ift_map_failed++; DBG_COUNTER_INC(encap_load_mbuf_fail); return (err); } /* * XXX assumes a 1 to 1 relationship between segments and * descriptors - this does not hold true on all drivers, e.g. * cxgb */ if (__predict_false(nsegs + 2 > TXQ_AVAIL(txq))) { txq->ift_no_desc_avail++; if (map != NULL) bus_dmamap_unload(desc_tag, map); DBG_COUNTER_INC(encap_txq_avail_fail); if ((txq->ift_task.gt_task.ta_flags & TASK_ENQUEUED) == 0) GROUPTASK_ENQUEUE(&txq->ift_task); return (ENOBUFS); } pi.ipi_segs = segs; pi.ipi_nsegs = nsegs; MPASS(pidx >= 0 && pidx < txq->ift_size); #ifdef PKT_DEBUG print_pkt(&pi); #endif if ((err = ctx->isc_txd_encap(ctx->ifc_softc, &pi)) == 0) { bus_dmamap_sync(txq->ift_ifdi->idi_tag, txq->ift_ifdi->idi_map, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); DBG_COUNTER_INC(tx_encap); MPASS(pi.ipi_new_pidx >= 0 && pi.ipi_new_pidx < txq->ift_size); ndesc = pi.ipi_new_pidx - pi.ipi_pidx; if (pi.ipi_new_pidx < pi.ipi_pidx) { ndesc += txq->ift_size; txq->ift_gen = 1; } MPASS(pi.ipi_new_pidx != pidx); MPASS(ndesc > 0); txq->ift_in_use += ndesc; /* * We update the last software descriptor again here because there may * be a sentinel and/or there may be more mbufs than segments */ txq->ift_pidx = pi.ipi_new_pidx; txq->ift_npending += pi.ipi_ndescs; } else if (__predict_false(err == EFBIG && remap < 2)) { *m_headp = m_head = iflib_remove_mbuf(txq); remap = 1; txq->ift_txd_encap_efbig++; goto defrag; } else DBG_COUNTER_INC(encap_txd_encap_fail); return (err); defrag_failed: txq->ift_mbuf_defrag_failed++; txq->ift_map_failed++; m_freem(*m_headp); DBG_COUNTER_INC(tx_frees); *m_headp = NULL; return (ENOMEM); } /* forward compatibility for cxgb */ #define FIRST_QSET(ctx) 0 #define NTXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_ntxqsets) #define NRXQSETS(ctx) ((ctx)->ifc_softc_ctx.isc_nrxqsets) #define QIDX(ctx, m) ((((m)->m_pkthdr.flowid & ctx->ifc_softc_ctx.isc_rss_table_mask) % NTXQSETS(ctx)) + FIRST_QSET(ctx)) #define DESC_RECLAIMABLE(q) ((int)((q)->ift_processed - (q)->ift_cleaned - (q)->ift_ctx->ifc_softc_ctx.isc_tx_nsegments)) #define RECLAIM_THRESH(ctx) ((ctx)->ifc_sctx->isc_tx_reclaim_thresh) #define MAX_TX_DESC(ctx) ((ctx)->ifc_softc_ctx.isc_tx_tso_segments_max) /* if there are more than TXQ_MIN_OCCUPANCY packets pending we consider deferring * doorbell writes * * ORing with 2 assures that min occupancy is never less than 2 without any conditional logic */ #define TXQ_MIN_OCCUPANCY(size) ((size >> 6)| 0x2) static inline int iflib_txq_min_occupancy(iflib_txq_t txq) { if_ctx_t ctx; ctx = txq->ift_ctx; return (get_inuse(txq->ift_size, txq->ift_cidx, txq->ift_pidx, txq->ift_gen) < TXQ_MIN_OCCUPANCY(txq->ift_size) + MAX_TX_DESC(ctx)); } static void iflib_tx_desc_free(iflib_txq_t txq, int n) { int hasmap; uint32_t qsize, cidx, mask, gen; struct mbuf *m, **ifsd_m; uint8_t *ifsd_flags; bus_dmamap_t *ifsd_map; cidx = txq->ift_cidx; gen = txq->ift_gen; qsize = txq->ift_size; mask = qsize-1; hasmap = txq->ift_sds.ifsd_map != NULL; ifsd_flags = txq->ift_sds.ifsd_flags; ifsd_m = txq->ift_sds.ifsd_m; ifsd_map = txq->ift_sds.ifsd_map; while (n--) { prefetch(ifsd_m[(cidx + 3) & mask]); prefetch(ifsd_m[(cidx + 4) & mask]); if (ifsd_m[cidx] != NULL) { prefetch(&ifsd_m[(cidx + CACHE_PTR_INCREMENT) & mask]); prefetch(&ifsd_flags[(cidx + CACHE_PTR_INCREMENT) & mask]); if (hasmap && (ifsd_flags[cidx] & TX_SW_DESC_MAPPED)) { /* * does it matter if it's not the TSO tag? If so we'll * have to add the type to flags */ bus_dmamap_unload(txq->ift_desc_tag, ifsd_map[cidx]); ifsd_flags[cidx] &= ~TX_SW_DESC_MAPPED; } if ((m = ifsd_m[cidx]) != NULL) { /* XXX we don't support any drivers that batch packets yet */ MPASS(m->m_nextpkt == NULL); m_free(m); ifsd_m[cidx] = NULL; #if MEMORY_LOGGING txq->ift_dequeued++; #endif DBG_COUNTER_INC(tx_frees); } } if (__predict_false(++cidx == qsize)) { cidx = 0; gen = 0; } } txq->ift_cidx = cidx; txq->ift_gen = gen; } static __inline int iflib_completed_tx_reclaim(iflib_txq_t txq, int thresh) { int reclaim; if_ctx_t ctx = txq->ift_ctx; KASSERT(thresh >= 0, ("invalid threshold to reclaim")); MPASS(thresh /*+ MAX_TX_DESC(txq->ift_ctx) */ < txq->ift_size); /* * Need a rate-limiting check so that this isn't called every time */ iflib_tx_credits_update(ctx, txq); reclaim = DESC_RECLAIMABLE(txq); if (reclaim <= thresh /* + MAX_TX_DESC(txq->ift_ctx) */) { #ifdef INVARIANTS if (iflib_verbose_debug) { printf("%s processed=%ju cleaned=%ju tx_nsegments=%d reclaim=%d thresh=%d\n", __FUNCTION__, txq->ift_processed, txq->ift_cleaned, txq->ift_ctx->ifc_softc_ctx.isc_tx_nsegments, reclaim, thresh); } #endif return (0); } iflib_tx_desc_free(txq, reclaim); txq->ift_cleaned += reclaim; txq->ift_in_use -= reclaim; if (txq->ift_active == FALSE) txq->ift_active = TRUE; return (reclaim); } static struct mbuf ** _ring_peek_one(struct ifmp_ring *r, int cidx, int offset) { return (__DEVOLATILE(struct mbuf **, &r->items[(cidx + offset) & (r->size-1)])); } static void iflib_txq_check_drain(iflib_txq_t txq, int budget) { ifmp_ring_check_drainage(txq->ift_br[0], budget); } static uint32_t iflib_txq_can_drain(struct ifmp_ring *r) { iflib_txq_t txq = r->cookie; if_ctx_t ctx = txq->ift_ctx; return ((TXQ_AVAIL(txq) >= MAX_TX_DESC(ctx)) || ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, txq->ift_cidx_processed, false)); } static uint32_t iflib_txq_drain(struct ifmp_ring *r, uint32_t cidx, uint32_t pidx) { iflib_txq_t txq = r->cookie; if_ctx_t ctx = txq->ift_ctx; if_t ifp = ctx->ifc_ifp; struct mbuf **mp, *m; int i, count, consumed, pkt_sent, bytes_sent, mcast_sent, avail, err, in_use_prev, desc_used; if (__predict_false(!(if_getdrvflags(ifp) & IFF_DRV_RUNNING) || !LINK_ACTIVE(ctx))) { DBG_COUNTER_INC(txq_drain_notready); return (0); } avail = IDXDIFF(pidx, cidx, r->size); if (__predict_false(ctx->ifc_flags & IFC_QFLUSH)) { DBG_COUNTER_INC(txq_drain_flushing); for (i = 0; i < avail; i++) { m_free(r->items[(cidx + i) & (r->size-1)]); r->items[(cidx + i) & (r->size-1)] = NULL; } return (avail); } iflib_completed_tx_reclaim(txq, RECLAIM_THRESH(ctx)); if (__predict_false(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_OACTIVE)) { txq->ift_qstatus = IFLIB_QUEUE_IDLE; CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); callout_stop(&txq->ift_db_check); CALLOUT_UNLOCK(txq); DBG_COUNTER_INC(txq_drain_oactive); return (0); } consumed = mcast_sent = bytes_sent = pkt_sent = 0; count = MIN(avail, TX_BATCH_SIZE); for (desc_used = i = 0; i < count && TXQ_AVAIL(txq) > MAX_TX_DESC(ctx) + 2; i++) { mp = _ring_peek_one(r, cidx, i); in_use_prev = txq->ift_in_use; err = iflib_encap(txq, mp); /* * What other errors should we bail out for? */ if (err == ENOBUFS) { DBG_COUNTER_INC(txq_drain_encapfail); break; } consumed++; if (err) continue; pkt_sent++; m = *mp; DBG_COUNTER_INC(tx_sent); bytes_sent += m->m_pkthdr.len; if (m->m_flags & M_MCAST) mcast_sent++; txq->ift_db_pending += (txq->ift_in_use - in_use_prev); desc_used += (txq->ift_in_use - in_use_prev); iflib_txd_db_check(ctx, txq, FALSE); ETHER_BPF_MTAP(ifp, m); if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) break; if (desc_used > TXQ_MAX_DB_CONSUMED(txq->ift_size)) break; } if ((iflib_min_tx_latency || iflib_txq_min_occupancy(txq)) && txq->ift_db_pending) iflib_txd_db_check(ctx, txq, TRUE); else if ((txq->ift_db_pending || TXQ_AVAIL(txq) < MAX_TX_DESC(ctx)) && (callout_pending(&txq->ift_db_check) == 0)) { txq->ift_db_pending_queued = txq->ift_db_pending; callout_reset_on(&txq->ift_db_check, 1, iflib_txd_deferred_db_check, txq, txq->ift_db_check.c_cpu); } if_inc_counter(ifp, IFCOUNTER_OBYTES, bytes_sent); if_inc_counter(ifp, IFCOUNTER_OPACKETS, pkt_sent); if (mcast_sent) if_inc_counter(ifp, IFCOUNTER_OMCASTS, mcast_sent); return (consumed); } static void _task_fn_tx(void *context) { iflib_txq_t txq = context; if_ctx_t ctx = txq->ift_ctx; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; ifmp_ring_check_drainage(txq->ift_br[0], TX_BATCH_SIZE); } static void _task_fn_rx(void *context) { iflib_rxq_t rxq = context; if_ctx_t ctx = rxq->ifr_ctx; bool more; int rc; DBG_COUNTER_INC(task_fn_rxs); if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) return; if ((more = iflib_rxeof(rxq, 16 /* XXX */)) == false) { if (ctx->ifc_flags & IFC_LEGACY) IFDI_INTR_ENABLE(ctx); else { DBG_COUNTER_INC(rx_intr_enables); rc = IFDI_QUEUE_INTR_ENABLE(ctx, rxq->ifr_id); KASSERT(rc != ENOTSUP, ("MSI-X support requires queue_intr_enable, but not implemented in driver")); } } if (__predict_false(!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING))) return; if (more) GROUPTASK_ENQUEUE(&rxq->ifr_task); } static void _task_fn_admin(void *context) { if_ctx_t ctx = context; if_softc_ctx_t sctx = &ctx->ifc_softc_ctx; iflib_txq_t txq; int i; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; CTX_LOCK(ctx); for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) { CALLOUT_LOCK(txq); callout_stop(&txq->ift_timer); CALLOUT_UNLOCK(txq); } IFDI_UPDATE_ADMIN_STATUS(ctx); for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) callout_reset_on(&txq->ift_timer, hz/2, iflib_timer, txq, txq->ift_timer.c_cpu); IFDI_LINK_INTR_ENABLE(ctx); CTX_UNLOCK(ctx); if (LINK_ACTIVE(ctx) == 0) return; for (txq = ctx->ifc_txqs, i = 0; i < sctx->isc_ntxqsets; i++, txq++) iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); } static void _task_fn_iov(void *context) { if_ctx_t ctx = context; if (!(if_getdrvflags(ctx->ifc_ifp) & IFF_DRV_RUNNING)) return; CTX_LOCK(ctx); IFDI_VFLR_HANDLE(ctx); CTX_UNLOCK(ctx); } static int iflib_sysctl_int_delay(SYSCTL_HANDLER_ARGS) { int err; if_int_delay_info_t info; if_ctx_t ctx; info = (if_int_delay_info_t)arg1; ctx = info->iidi_ctx; info->iidi_req = req; info->iidi_oidp = oidp; CTX_LOCK(ctx); err = IFDI_SYSCTL_INT_DELAY(ctx, info); CTX_UNLOCK(ctx); return (err); } /********************************************************************* * * IFNET FUNCTIONS * **********************************************************************/ static void iflib_if_init_locked(if_ctx_t ctx) { iflib_stop(ctx); iflib_init_locked(ctx); } static void iflib_if_init(void *arg) { if_ctx_t ctx = arg; CTX_LOCK(ctx); iflib_if_init_locked(ctx); CTX_UNLOCK(ctx); } static int iflib_if_transmit(if_t ifp, struct mbuf *m) { if_ctx_t ctx = if_getsoftc(ifp); iflib_txq_t txq; int err, qidx; if (__predict_false((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || !LINK_ACTIVE(ctx))) { DBG_COUNTER_INC(tx_frees); m_freem(m); return (0); } MPASS(m->m_nextpkt == NULL); qidx = 0; if ((NTXQSETS(ctx) > 1) && M_HASHTYPE_GET(m)) qidx = QIDX(ctx, m); /* * XXX calculate buf_ring based on flowid (divvy up bits?) */ txq = &ctx->ifc_txqs[qidx]; #ifdef DRIVER_BACKPRESSURE if (txq->ift_closed) { while (m != NULL) { next = m->m_nextpkt; m->m_nextpkt = NULL; m_freem(m); m = next; } return (ENOBUFS); } #endif #ifdef notyet qidx = count = 0; mp = marr; next = m; do { count++; next = next->m_nextpkt; } while (next != NULL); if (count > nitems(marr)) if ((mp = malloc(count*sizeof(struct mbuf *), M_IFLIB, M_NOWAIT)) == NULL) { /* XXX check nextpkt */ m_freem(m); /* XXX simplify for now */ DBG_COUNTER_INC(tx_frees); return (ENOBUFS); } for (next = m, i = 0; next != NULL; i++) { mp[i] = next; next = next->m_nextpkt; mp[i]->m_nextpkt = NULL; } #endif DBG_COUNTER_INC(tx_seen); err = ifmp_ring_enqueue(txq->ift_br[0], (void **)&m, 1, TX_BATCH_SIZE); if (err) { GROUPTASK_ENQUEUE(&txq->ift_task); /* support forthcoming later */ #ifdef DRIVER_BACKPRESSURE txq->ift_closed = TRUE; #endif ifmp_ring_check_drainage(txq->ift_br[0], TX_BATCH_SIZE); m_freem(m); } else if (TXQ_AVAIL(txq) < (txq->ift_size >> 1)) { GROUPTASK_ENQUEUE(&txq->ift_task); } return (err); } static void iflib_if_qflush(if_t ifp) { if_ctx_t ctx = if_getsoftc(ifp); iflib_txq_t txq = ctx->ifc_txqs; int i; CTX_LOCK(ctx); ctx->ifc_flags |= IFC_QFLUSH; CTX_UNLOCK(ctx); for (i = 0; i < NTXQSETS(ctx); i++, txq++) while (!(ifmp_ring_is_idle(txq->ift_br[0]) || ifmp_ring_is_stalled(txq->ift_br[0]))) iflib_txq_check_drain(txq, 0); CTX_LOCK(ctx); ctx->ifc_flags &= ~IFC_QFLUSH; CTX_UNLOCK(ctx); if_qflush(ifp); } #define IFCAP_FLAGS (IFCAP_TXCSUM_IPV6 | IFCAP_RXCSUM_IPV6 | IFCAP_HWCSUM | IFCAP_LRO | \ IFCAP_TSO4 | IFCAP_TSO6 | IFCAP_VLAN_HWTAGGING | \ IFCAP_VLAN_MTU | IFCAP_VLAN_HWFILTER | IFCAP_VLAN_HWTSO) #define IFCAP_REINIT IFCAP_FLAGS static int iflib_if_ioctl(if_t ifp, u_long command, caddr_t data) { if_ctx_t ctx = if_getsoftc(ifp); struct ifreq *ifr = (struct ifreq *)data; #if defined(INET) || defined(INET6) struct ifaddr *ifa = (struct ifaddr *)data; #endif bool avoid_reset = FALSE; int err = 0, reinit = 0, bits; switch (command) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) avoid_reset = TRUE; #endif #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) avoid_reset = TRUE; #endif /* ** Calling init results in link renegotiation, ** so we avoid doing it when possible. */ if (avoid_reset) { if_setflagbits(ifp, IFF_UP,0); if (!(if_getdrvflags(ifp)& IFF_DRV_RUNNING)) reinit = 1; #ifdef INET if (!(if_getflags(ifp) & IFF_NOARP)) arp_ifinit(ifp, ifa); #endif } else err = ether_ioctl(ifp, command, data); break; case SIOCSIFMTU: CTX_LOCK(ctx); if (ifr->ifr_mtu == if_getmtu(ifp)) { CTX_UNLOCK(ctx); break; } bits = if_getdrvflags(ifp); /* stop the driver and free any clusters before proceeding */ iflib_stop(ctx); if ((err = IFDI_MTU_SET(ctx, ifr->ifr_mtu)) == 0) { if (ifr->ifr_mtu > ctx->ifc_max_fl_buf_size) ctx->ifc_flags |= IFC_MULTISEG; else ctx->ifc_flags &= ~IFC_MULTISEG; err = if_setmtu(ifp, ifr->ifr_mtu); } iflib_init_locked(ctx); if_setdrvflags(ifp, bits); CTX_UNLOCK(ctx); break; case SIOCSIFFLAGS: CTX_LOCK(ctx); if (if_getflags(ifp) & IFF_UP) { if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { if ((if_getflags(ifp) ^ ctx->ifc_if_flags) & (IFF_PROMISC | IFF_ALLMULTI)) { err = IFDI_PROMISC_SET(ctx, if_getflags(ifp)); } } else reinit = 1; } else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { iflib_stop(ctx); } ctx->ifc_if_flags = if_getflags(ifp); CTX_UNLOCK(ctx); break; break; case SIOCADDMULTI: case SIOCDELMULTI: if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { CTX_LOCK(ctx); IFDI_INTR_DISABLE(ctx); IFDI_MULTI_SET(ctx); IFDI_INTR_ENABLE(ctx); CTX_UNLOCK(ctx); } break; case SIOCSIFMEDIA: CTX_LOCK(ctx); IFDI_MEDIA_SET(ctx); CTX_UNLOCK(ctx); /* falls thru */ case SIOCGIFMEDIA: err = ifmedia_ioctl(ifp, ifr, &ctx->ifc_media, command); break; case SIOCGI2C: { struct ifi2creq i2c; - err = copyin(ifr->ifr_data, &i2c, sizeof(i2c)); + err = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c)); if (err != 0) break; if (i2c.dev_addr != 0xA0 && i2c.dev_addr != 0xA2) { err = EINVAL; break; } if (i2c.len > sizeof(i2c.data)) { err = EINVAL; break; } if ((err = IFDI_I2C_REQ(ctx, &i2c)) == 0) - err = copyout(&i2c, ifr->ifr_data, sizeof(i2c)); + err = copyout(&i2c, ifr_data_get_ptr(ifr), + sizeof(i2c)); break; } case SIOCSIFCAP: { int mask, setmask; mask = ifr->ifr_reqcap ^ if_getcapenable(ifp); setmask = 0; #ifdef TCP_OFFLOAD setmask |= mask & (IFCAP_TOE4|IFCAP_TOE6); #endif setmask |= (mask & IFCAP_FLAGS); if ((mask & IFCAP_WOL) && (if_getcapabilities(ifp) & IFCAP_WOL) != 0) setmask |= (mask & (IFCAP_WOL_MCAST|IFCAP_WOL_MAGIC)); if_vlancap(ifp); /* * want to ensure that traffic has stopped before we change any of the flags */ if (setmask) { CTX_LOCK(ctx); bits = if_getdrvflags(ifp); if (setmask & IFCAP_REINIT) iflib_stop(ctx); if_togglecapenable(ifp, setmask); if (setmask & IFCAP_REINIT) iflib_init_locked(ctx); if_setdrvflags(ifp, bits); CTX_UNLOCK(ctx); } break; } case SIOCGPRIVATE_0: case SIOCSDRVSPEC: case SIOCGDRVSPEC: CTX_LOCK(ctx); err = IFDI_PRIV_IOCTL(ctx, command, data); CTX_UNLOCK(ctx); break; default: err = ether_ioctl(ifp, command, data); break; } if (reinit) iflib_if_init(ctx); return (err); } static uint64_t iflib_if_get_counter(if_t ifp, ift_counter cnt) { if_ctx_t ctx = if_getsoftc(ifp); return (IFDI_GET_COUNTER(ctx, cnt)); } /********************************************************************* * * OTHER FUNCTIONS EXPORTED TO THE STACK * **********************************************************************/ static void iflib_vlan_register(void *arg, if_t ifp, uint16_t vtag) { if_ctx_t ctx = if_getsoftc(ifp); if ((void *)ctx != arg) return; if ((vtag == 0) || (vtag > 4095)) return; CTX_LOCK(ctx); IFDI_VLAN_REGISTER(ctx, vtag); /* Re-init to load the changes */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) iflib_init_locked(ctx); CTX_UNLOCK(ctx); } static void iflib_vlan_unregister(void *arg, if_t ifp, uint16_t vtag) { if_ctx_t ctx = if_getsoftc(ifp); if ((void *)ctx != arg) return; if ((vtag == 0) || (vtag > 4095)) return; CTX_LOCK(ctx); IFDI_VLAN_UNREGISTER(ctx, vtag); /* Re-init to load the changes */ if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) iflib_init_locked(ctx); CTX_UNLOCK(ctx); } static void iflib_led_func(void *arg, int onoff) { if_ctx_t ctx = arg; CTX_LOCK(ctx); IFDI_LED_FUNC(ctx, onoff); CTX_UNLOCK(ctx); } /********************************************************************* * * BUS FUNCTION DEFINITIONS * **********************************************************************/ int iflib_device_probe(device_t dev) { pci_vendor_info_t *ent; uint16_t pci_vendor_id, pci_device_id; uint16_t pci_subvendor_id, pci_subdevice_id; uint16_t pci_rev_id; if_shared_ctx_t sctx; if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) return (ENOTSUP); pci_vendor_id = pci_get_vendor(dev); pci_device_id = pci_get_device(dev); pci_subvendor_id = pci_get_subvendor(dev); pci_subdevice_id = pci_get_subdevice(dev); pci_rev_id = pci_get_revid(dev); if (sctx->isc_parse_devinfo != NULL) sctx->isc_parse_devinfo(&pci_device_id, &pci_subvendor_id, &pci_subdevice_id, &pci_rev_id); ent = sctx->isc_vendor_info; while (ent->pvi_vendor_id != 0) { if (pci_vendor_id != ent->pvi_vendor_id) { ent++; continue; } if ((pci_device_id == ent->pvi_device_id) && ((pci_subvendor_id == ent->pvi_subvendor_id) || (ent->pvi_subvendor_id == 0)) && ((pci_subdevice_id == ent->pvi_subdevice_id) || (ent->pvi_subdevice_id == 0)) && ((pci_rev_id == ent->pvi_rev_id) || (ent->pvi_rev_id == 0))) { device_set_desc_copy(dev, ent->pvi_name); /* this needs to be changed to zero if the bus probing code * ever stops re-probing on best match because the sctx * may have its values over written by register calls * in subsequent probes */ return (BUS_PROBE_DEFAULT); } ent++; } return (ENXIO); } int iflib_device_register(device_t dev, void *sc, if_shared_ctx_t sctx, if_ctx_t *ctxp) { int err, rid, msix, msix_bar; if_ctx_t ctx; if_t ifp; if_softc_ctx_t scctx; int i; uint16_t main_txq; uint16_t main_rxq; ctx = malloc(sizeof(* ctx), M_IFLIB, M_WAITOK|M_ZERO); if (sc == NULL) { sc = malloc(sctx->isc_driver->size, M_IFLIB, M_WAITOK|M_ZERO); device_set_softc(dev, ctx); ctx->ifc_flags |= IFC_SC_ALLOCATED; } ctx->ifc_sctx = sctx; ctx->ifc_dev = dev; ctx->ifc_txrx = *sctx->isc_txrx; ctx->ifc_softc = sc; if ((err = iflib_register(ctx)) != 0) { device_printf(dev, "iflib_register failed %d\n", err); return (err); } iflib_add_device_sysctl_pre(ctx); scctx = &ctx->ifc_softc_ctx; /* * XXX sanity check that ntxd & nrxd are a power of 2 */ if (ctx->ifc_sysctl_ntxqs != 0) scctx->isc_ntxqsets = ctx->ifc_sysctl_ntxqs; if (ctx->ifc_sysctl_nrxqs != 0) scctx->isc_nrxqsets = ctx->ifc_sysctl_nrxqs; for (i = 0; i < sctx->isc_ntxqs; i++) { if (ctx->ifc_sysctl_ntxds[i] != 0) scctx->isc_ntxd[i] = ctx->ifc_sysctl_ntxds[i]; else scctx->isc_ntxd[i] = sctx->isc_ntxd_default[i]; } for (i = 0; i < sctx->isc_nrxqs; i++) { if (ctx->ifc_sysctl_nrxds[i] != 0) scctx->isc_nrxd[i] = ctx->ifc_sysctl_nrxds[i]; else scctx->isc_nrxd[i] = sctx->isc_nrxd_default[i]; } for (i = 0; i < sctx->isc_nrxqs; i++) { if (scctx->isc_nrxd[i] < sctx->isc_nrxd_min[i]) { device_printf(dev, "nrxd%d: %d less than nrxd_min %d - resetting to min\n", i, scctx->isc_nrxd[i], sctx->isc_nrxd_min[i]); scctx->isc_nrxd[i] = sctx->isc_nrxd_min[i]; } if (scctx->isc_nrxd[i] > sctx->isc_nrxd_max[i]) { device_printf(dev, "nrxd%d: %d greater than nrxd_max %d - resetting to max\n", i, scctx->isc_nrxd[i], sctx->isc_nrxd_max[i]); scctx->isc_nrxd[i] = sctx->isc_nrxd_max[i]; } } for (i = 0; i < sctx->isc_ntxqs; i++) { if (scctx->isc_ntxd[i] < sctx->isc_ntxd_min[i]) { device_printf(dev, "ntxd%d: %d less than ntxd_min %d - resetting to min\n", i, scctx->isc_ntxd[i], sctx->isc_ntxd_min[i]); scctx->isc_ntxd[i] = sctx->isc_ntxd_min[i]; } if (scctx->isc_ntxd[i] > sctx->isc_ntxd_max[i]) { device_printf(dev, "ntxd%d: %d greater than ntxd_max %d - resetting to max\n", i, scctx->isc_ntxd[i], sctx->isc_ntxd_max[i]); scctx->isc_ntxd[i] = sctx->isc_ntxd_max[i]; } } if ((err = IFDI_ATTACH_PRE(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_PRE failed %d\n", err); return (err); } if (scctx->isc_ntxqsets_max) scctx->isc_ntxqsets = min(scctx->isc_ntxqsets, scctx->isc_ntxqsets_max); if (scctx->isc_nrxqsets_max) scctx->isc_nrxqsets = min(scctx->isc_nrxqsets, scctx->isc_nrxqsets_max); #ifdef ACPI_DMAR if (dmar_get_dma_tag(device_get_parent(dev), dev) != NULL) ctx->ifc_flags |= IFC_DMAR; #endif msix_bar = scctx->isc_msix_bar; ifp = ctx->ifc_ifp; if(sctx->isc_flags & IFLIB_HAS_TXCQ) main_txq = 1; else main_txq = 0; if(sctx->isc_flags & IFLIB_HAS_RXCQ) main_rxq = 1; else main_rxq = 0; /* XXX change for per-queue sizes */ device_printf(dev, "using %d tx descriptors and %d rx descriptors\n", scctx->isc_ntxd[main_txq], scctx->isc_nrxd[main_rxq]); for (i = 0; i < sctx->isc_nrxqs; i++) { if (!powerof2(scctx->isc_nrxd[i])) { /* round down instead? */ device_printf(dev, "# rx descriptors must be a power of 2\n"); err = EINVAL; goto fail; } } for (i = 0; i < sctx->isc_ntxqs; i++) { if (!powerof2(scctx->isc_ntxd[i])) { device_printf(dev, "# tx descriptors must be a power of 2"); err = EINVAL; goto fail; } } if (scctx->isc_tx_nsegments > scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_nsegments = max(1, scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION); if (scctx->isc_tx_tso_segments_max > scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION) scctx->isc_tx_tso_segments_max = max(1, scctx->isc_ntxd[main_txq] / MAX_SINGLE_PACKET_FRACTION); /* * Protect the stack against modern hardware */ if (scctx->isc_tx_tso_size_max > FREEBSD_TSO_SIZE_MAX) scctx->isc_tx_tso_size_max = FREEBSD_TSO_SIZE_MAX; /* TSO parameters - dig these out of the data sheet - simply correspond to tag setup */ ifp->if_hw_tsomaxsegcount = scctx->isc_tx_tso_segments_max; ifp->if_hw_tsomax = scctx->isc_tx_tso_size_max; ifp->if_hw_tsomaxsegsize = scctx->isc_tx_tso_segsize_max; if (scctx->isc_rss_table_size == 0) scctx->isc_rss_table_size = 64; scctx->isc_rss_table_mask = scctx->isc_rss_table_size-1; /* ** Now setup MSI or MSI/X, should ** return us the number of supported ** vectors. (Will be 1 for MSI) */ if (sctx->isc_flags & IFLIB_SKIP_MSIX) { msix = scctx->isc_vectors; } else if (scctx->isc_msix_bar != 0) msix = iflib_msix_init(ctx); else { scctx->isc_vectors = 1; scctx->isc_ntxqsets = 1; scctx->isc_nrxqsets = 1; scctx->isc_intr = IFLIB_INTR_LEGACY; msix = 0; } /* Get memory for the station queues */ if ((err = iflib_queues_alloc(ctx))) { device_printf(dev, "Unable to allocate queue memory\n"); goto fail; } if ((err = iflib_qset_structures_setup(ctx))) { device_printf(dev, "qset structure setup failed %d\n", err); goto fail_queues; } if (msix > 1 && (err = IFDI_MSIX_INTR_ASSIGN(ctx, msix)) != 0) { device_printf(dev, "IFDI_MSIX_INTR_ASSIGN failed %d\n", err); goto fail_intr_free; } if (msix <= 1) { rid = 0; if (scctx->isc_intr == IFLIB_INTR_MSI) { MPASS(msix == 1); rid = 1; } if ((err = iflib_legacy_setup(ctx, ctx->isc_legacy_intr, ctx->ifc_softc, &rid, "irq0")) != 0) { device_printf(dev, "iflib_legacy_setup failed %d\n", err); goto fail_intr_free; } } ether_ifattach(ctx->ifc_ifp, ctx->ifc_mac); if ((err = IFDI_ATTACH_POST(ctx)) != 0) { device_printf(dev, "IFDI_ATTACH_POST failed %d\n", err); goto fail_detach; } if ((err = iflib_netmap_attach(ctx))) { device_printf(ctx->ifc_dev, "netmap attach failed: %d\n", err); goto fail_detach; } *ctxp = ctx; if_setgetcounterfn(ctx->ifc_ifp, iflib_if_get_counter); iflib_add_device_sysctl_post(ctx); return (0); fail_detach: ether_ifdetach(ctx->ifc_ifp); fail_intr_free: if (scctx->isc_intr == IFLIB_INTR_MSIX || scctx->isc_intr == IFLIB_INTR_MSI) pci_release_msi(ctx->ifc_dev); fail_queues: /* XXX free queues */ fail: IFDI_DETACH(ctx); return (err); } int iflib_device_attach(device_t dev) { if_ctx_t ctx; if_shared_ctx_t sctx; if ((sctx = DEVICE_REGISTER(dev)) == NULL || sctx->isc_magic != IFLIB_MAGIC) return (ENOTSUP); pci_enable_busmaster(dev); return (iflib_device_register(dev, NULL, sctx, &ctx)); } int iflib_device_deregister(if_ctx_t ctx) { if_t ifp = ctx->ifc_ifp; iflib_txq_t txq; iflib_rxq_t rxq; device_t dev = ctx->ifc_dev; int i; struct taskqgroup *tqg; /* Make sure VLANS are not using driver */ if (if_vlantrunkinuse(ifp)) { device_printf(dev,"Vlan in use, detach first\n"); return (EBUSY); } CTX_LOCK(ctx); ctx->ifc_in_detach = 1; iflib_stop(ctx); CTX_UNLOCK(ctx); /* Unregister VLAN events */ if (ctx->ifc_vlan_attach_event != NULL) EVENTHANDLER_DEREGISTER(vlan_config, ctx->ifc_vlan_attach_event); if (ctx->ifc_vlan_detach_event != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, ctx->ifc_vlan_detach_event); iflib_netmap_detach(ifp); ether_ifdetach(ifp); /* ether_ifdetach calls if_qflush - lock must be destroy afterwards*/ CTX_LOCK_DESTROY(ctx); if (ctx->ifc_led_dev != NULL) led_destroy(ctx->ifc_led_dev); /* XXX drain any dependent tasks */ tqg = qgroup_softirq; for (txq = ctx->ifc_txqs, i = 0; i < NTXQSETS(ctx); i++, txq++) { callout_drain(&txq->ift_timer); callout_drain(&txq->ift_db_check); if (txq->ift_task.gt_uniq != NULL) taskqgroup_detach(tqg, &txq->ift_task); } for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) { if (rxq->ifr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &rxq->ifr_task); } tqg = qgroup_if_config_tqg; if (ctx->ifc_admin_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_admin_task); if (ctx->ifc_vflr_task.gt_uniq != NULL) taskqgroup_detach(tqg, &ctx->ifc_vflr_task); IFDI_DETACH(ctx); device_set_softc(ctx->ifc_dev, NULL); if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_LEGACY) { pci_release_msi(dev); } if (ctx->ifc_softc_ctx.isc_intr != IFLIB_INTR_MSIX) { iflib_irq_free(ctx, &ctx->ifc_legacy_irq); } if (ctx->ifc_msix_mem != NULL) { bus_release_resource(ctx->ifc_dev, SYS_RES_MEMORY, ctx->ifc_softc_ctx.isc_msix_bar, ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; } bus_generic_detach(dev); if_free(ifp); iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); if (ctx->ifc_flags & IFC_SC_ALLOCATED) free(ctx->ifc_softc, M_IFLIB); free(ctx, M_IFLIB); return (0); } int iflib_device_detach(device_t dev) { if_ctx_t ctx = device_get_softc(dev); return (iflib_device_deregister(ctx)); } int iflib_device_suspend(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_SUSPEND(ctx); CTX_UNLOCK(ctx); return bus_generic_suspend(dev); } int iflib_device_shutdown(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_SHUTDOWN(ctx); CTX_UNLOCK(ctx); return bus_generic_suspend(dev); } int iflib_device_resume(device_t dev) { if_ctx_t ctx = device_get_softc(dev); iflib_txq_t txq = ctx->ifc_txqs; CTX_LOCK(ctx); IFDI_RESUME(ctx); iflib_init_locked(ctx); CTX_UNLOCK(ctx); for (int i = 0; i < NTXQSETS(ctx); i++, txq++) iflib_txq_check_drain(txq, IFLIB_RESTART_BUDGET); return (bus_generic_resume(dev)); } int iflib_device_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *params) { int error; if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); error = IFDI_IOV_INIT(ctx, num_vfs, params); CTX_UNLOCK(ctx); return (error); } void iflib_device_iov_uninit(device_t dev) { if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); IFDI_IOV_UNINIT(ctx); CTX_UNLOCK(ctx); } int iflib_device_iov_add_vf(device_t dev, uint16_t vfnum, const nvlist_t *params) { int error; if_ctx_t ctx = device_get_softc(dev); CTX_LOCK(ctx); error = IFDI_IOV_VF_ADD(ctx, vfnum, params); CTX_UNLOCK(ctx); return (error); } /********************************************************************* * * MODULE FUNCTION DEFINITIONS * **********************************************************************/ /* * - Start a fast taskqueue thread for each core * - Start a taskqueue for control operations */ static int iflib_module_init(void) { return (0); } static int iflib_module_event_handler(module_t mod, int what, void *arg) { int err; switch (what) { case MOD_LOAD: if ((err = iflib_module_init()) != 0) return (err); break; case MOD_UNLOAD: return (EBUSY); default: return (EOPNOTSUPP); } return (0); } /********************************************************************* * * PUBLIC FUNCTION DEFINITIONS * ordered as in iflib.h * **********************************************************************/ static void _iflib_assert(if_shared_ctx_t sctx) { MPASS(sctx->isc_tx_maxsize); MPASS(sctx->isc_tx_maxsegsize); MPASS(sctx->isc_rx_maxsize); MPASS(sctx->isc_rx_nsegments); MPASS(sctx->isc_rx_maxsegsize); MPASS(sctx->isc_txrx->ift_txd_encap); MPASS(sctx->isc_txrx->ift_txd_flush); MPASS(sctx->isc_txrx->ift_txd_credits_update); MPASS(sctx->isc_txrx->ift_rxd_available); MPASS(sctx->isc_txrx->ift_rxd_pkt_get); MPASS(sctx->isc_txrx->ift_rxd_refill); MPASS(sctx->isc_txrx->ift_rxd_flush); MPASS(sctx->isc_nrxd_min[0]); MPASS(sctx->isc_nrxd_max[0]); MPASS(sctx->isc_nrxd_default[0]); MPASS(sctx->isc_ntxd_min[0]); MPASS(sctx->isc_ntxd_max[0]); MPASS(sctx->isc_ntxd_default[0]); } static int iflib_register(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; driver_t *driver = sctx->isc_driver; device_t dev = ctx->ifc_dev; if_t ifp; _iflib_assert(sctx); CTX_LOCK_INIT(ctx, device_get_nameunit(ctx->ifc_dev)); ifp = ctx->ifc_ifp = if_gethandle(IFT_ETHER); if (ifp == NULL) { device_printf(dev, "can not allocate ifnet structure\n"); return (ENOMEM); } /* * Initialize our context's device specific methods */ kobj_init((kobj_t) ctx, (kobj_class_t) driver); kobj_class_compile((kobj_class_t) driver); driver->refs++; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); if_setsoftc(ifp, ctx); if_setdev(ifp, dev); if_setinitfn(ifp, iflib_if_init); if_setioctlfn(ifp, iflib_if_ioctl); if_settransmitfn(ifp, iflib_if_transmit); if_setqflushfn(ifp, iflib_if_qflush); if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); if_setcapabilities(ifp, 0); if_setcapenable(ifp, 0); ctx->ifc_vlan_attach_event = EVENTHANDLER_REGISTER(vlan_config, iflib_vlan_register, ctx, EVENTHANDLER_PRI_FIRST); ctx->ifc_vlan_detach_event = EVENTHANDLER_REGISTER(vlan_unconfig, iflib_vlan_unregister, ctx, EVENTHANDLER_PRI_FIRST); ifmedia_init(&ctx->ifc_media, IFM_IMASK, iflib_media_change, iflib_media_status); return (0); } static int iflib_queues_alloc(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = ctx->ifc_dev; int nrxqsets = scctx->isc_nrxqsets; int ntxqsets = scctx->isc_ntxqsets; iflib_txq_t txq; iflib_rxq_t rxq; iflib_fl_t fl = NULL; int i, j, cpu, err, txconf, rxconf; iflib_dma_info_t ifdip; uint32_t *rxqsizes = scctx->isc_rxqsizes; uint32_t *txqsizes = scctx->isc_txqsizes; uint8_t nrxqs = sctx->isc_nrxqs; uint8_t ntxqs = sctx->isc_ntxqs; int nfree_lists = sctx->isc_nfl ? sctx->isc_nfl : 1; caddr_t *vaddrs; uint64_t *paddrs; struct ifmp_ring **brscp; int nbuf_rings = 1; /* XXX determine dynamically */ KASSERT(ntxqs > 0, ("number of queues per qset must be at least 1")); KASSERT(nrxqs > 0, ("number of queues per qset must be at least 1")); brscp = NULL; txq = NULL; rxq = NULL; /* Allocate the TX ring struct memory */ if (!(txq = (iflib_txq_t) malloc(sizeof(struct iflib_txq) * ntxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate TX ring memory\n"); err = ENOMEM; goto fail; } /* Now allocate the RX */ if (!(rxq = (iflib_rxq_t) malloc(sizeof(struct iflib_rxq) * nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate RX ring memory\n"); err = ENOMEM; goto rx_fail; } if (!(brscp = malloc(sizeof(void *) * nbuf_rings * nrxqsets, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to buf_ring_sc * memory\n"); err = ENOMEM; goto rx_fail; } ctx->ifc_txqs = txq; ctx->ifc_rxqs = rxq; /* * XXX handle allocation failure */ for (txconf = i = 0, cpu = CPU_FIRST(); i < ntxqsets; i++, txconf++, txq++, cpu = CPU_NEXT(cpu)) { /* Set up some basics */ if ((ifdip = malloc(sizeof(struct iflib_dma_info) * ntxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) { device_printf(dev, "failed to allocate iflib_dma_info\n"); err = ENOMEM; goto err_tx_desc; } txq->ift_ifdi = ifdip; for (j = 0; j < ntxqs; j++, ifdip++) { if (iflib_dma_alloc(ctx, txqsizes[j], ifdip, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate Descriptor memory\n"); err = ENOMEM; goto err_tx_desc; } bzero((void *)ifdip->idi_vaddr, txqsizes[j]); } txq->ift_ctx = ctx; txq->ift_id = i; if (sctx->isc_flags & IFLIB_HAS_TXCQ) { txq->ift_br_offset = 1; } else { txq->ift_br_offset = 0; } /* XXX fix this */ txq->ift_timer.c_cpu = cpu; txq->ift_db_check.c_cpu = cpu; txq->ift_nbr = nbuf_rings; if (iflib_txsd_alloc(txq)) { device_printf(dev, "Critical Failure setting up TX buffers\n"); err = ENOMEM; goto err_tx_desc; } /* Initialize the TX lock */ snprintf(txq->ift_mtx_name, MTX_NAME_LEN, "%s:tx(%d):callout", device_get_nameunit(dev), txq->ift_id); mtx_init(&txq->ift_mtx, txq->ift_mtx_name, NULL, MTX_DEF); callout_init_mtx(&txq->ift_timer, &txq->ift_mtx, 0); callout_init_mtx(&txq->ift_db_check, &txq->ift_mtx, 0); snprintf(txq->ift_db_mtx_name, MTX_NAME_LEN, "%s:tx(%d):db", device_get_nameunit(dev), txq->ift_id); TXDB_LOCK_INIT(txq); txq->ift_br = brscp + i*nbuf_rings; for (j = 0; j < nbuf_rings; j++) { err = ifmp_ring_alloc(&txq->ift_br[j], 2048, txq, iflib_txq_drain, iflib_txq_can_drain, M_IFLIB, M_WAITOK); if (err) { /* XXX free any allocated rings */ device_printf(dev, "Unable to allocate buf_ring\n"); goto err_tx_desc; } } } for (rxconf = i = 0; i < nrxqsets; i++, rxconf++, rxq++) { /* Set up some basics */ if ((ifdip = malloc(sizeof(struct iflib_dma_info) * nrxqs, M_IFLIB, M_WAITOK|M_ZERO)) == NULL) { device_printf(dev, "failed to allocate iflib_dma_info\n"); err = ENOMEM; goto err_tx_desc; } rxq->ifr_ifdi = ifdip; for (j = 0; j < nrxqs; j++, ifdip++) { if (iflib_dma_alloc(ctx, rxqsizes[j], ifdip, BUS_DMA_NOWAIT)) { device_printf(dev, "Unable to allocate Descriptor memory\n"); err = ENOMEM; goto err_tx_desc; } bzero((void *)ifdip->idi_vaddr, rxqsizes[j]); } rxq->ifr_ctx = ctx; rxq->ifr_id = i; if (sctx->isc_flags & IFLIB_HAS_RXCQ) { rxq->ifr_fl_offset = 1; } else { rxq->ifr_fl_offset = 0; } rxq->ifr_nfl = nfree_lists; if (!(fl = (iflib_fl_t) malloc(sizeof(struct iflib_fl) * nfree_lists, M_IFLIB, M_NOWAIT | M_ZERO))) { device_printf(dev, "Unable to allocate free list memory\n"); err = ENOMEM; goto err_tx_desc; } rxq->ifr_fl = fl; for (j = 0; j < nfree_lists; j++) { rxq->ifr_fl[j].ifl_rxq = rxq; rxq->ifr_fl[j].ifl_id = j; rxq->ifr_fl[j].ifl_ifdi = &rxq->ifr_ifdi[j + rxq->ifr_fl_offset]; } /* Allocate receive buffers for the ring*/ if (iflib_rxsd_alloc(rxq)) { device_printf(dev, "Critical Failure setting up receive buffers\n"); err = ENOMEM; goto err_rx_desc; } } /* TXQs */ vaddrs = malloc(sizeof(caddr_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); paddrs = malloc(sizeof(uint64_t)*ntxqsets*ntxqs, M_IFLIB, M_WAITOK); for (i = 0; i < ntxqsets; i++) { iflib_dma_info_t di = ctx->ifc_txqs[i].ift_ifdi; for (j = 0; j < ntxqs; j++, di++) { vaddrs[i*ntxqs + j] = di->idi_vaddr; paddrs[i*ntxqs + j] = di->idi_paddr; } } if ((err = IFDI_TX_QUEUES_ALLOC(ctx, vaddrs, paddrs, ntxqs, ntxqsets)) != 0) { device_printf(ctx->ifc_dev, "device queue allocation failed\n"); iflib_tx_structures_free(ctx); free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); goto err_rx_desc; } free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); /* RXQs */ vaddrs = malloc(sizeof(caddr_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); paddrs = malloc(sizeof(uint64_t)*nrxqsets*nrxqs, M_IFLIB, M_WAITOK); for (i = 0; i < nrxqsets; i++) { iflib_dma_info_t di = ctx->ifc_rxqs[i].ifr_ifdi; for (j = 0; j < nrxqs; j++, di++) { vaddrs[i*nrxqs + j] = di->idi_vaddr; paddrs[i*nrxqs + j] = di->idi_paddr; } } if ((err = IFDI_RX_QUEUES_ALLOC(ctx, vaddrs, paddrs, nrxqs, nrxqsets)) != 0) { device_printf(ctx->ifc_dev, "device queue allocation failed\n"); iflib_tx_structures_free(ctx); free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); goto err_rx_desc; } free(vaddrs, M_IFLIB); free(paddrs, M_IFLIB); return (0); /* XXX handle allocation failure changes */ err_rx_desc: err_tx_desc: if (ctx->ifc_rxqs != NULL) free(ctx->ifc_rxqs, M_IFLIB); ctx->ifc_rxqs = NULL; if (ctx->ifc_txqs != NULL) free(ctx->ifc_txqs, M_IFLIB); ctx->ifc_txqs = NULL; rx_fail: if (brscp != NULL) free(brscp, M_IFLIB); if (rxq != NULL) free(rxq, M_IFLIB); if (txq != NULL) free(txq, M_IFLIB); fail: return (err); } static int iflib_tx_structures_setup(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; int i; for (i = 0; i < NTXQSETS(ctx); i++, txq++) iflib_txq_setup(txq); return (0); } static void iflib_tx_structures_free(if_ctx_t ctx) { iflib_txq_t txq = ctx->ifc_txqs; int i, j; for (i = 0; i < NTXQSETS(ctx); i++, txq++) { iflib_txq_destroy(txq); for (j = 0; j < ctx->ifc_nhwtxqs; j++) iflib_dma_free(&txq->ift_ifdi[j]); } free(ctx->ifc_txqs, M_IFLIB); ctx->ifc_txqs = NULL; IFDI_QUEUES_FREE(ctx); } /********************************************************************* * * Initialize all receive rings. * **********************************************************************/ static int iflib_rx_structures_setup(if_ctx_t ctx) { iflib_rxq_t rxq = ctx->ifc_rxqs; int q; #if defined(INET6) || defined(INET) int i, err; #endif for (q = 0; q < ctx->ifc_softc_ctx.isc_nrxqsets; q++, rxq++) { #if defined(INET6) || defined(INET) tcp_lro_free(&rxq->ifr_lc); if ((err = tcp_lro_init_args(&rxq->ifr_lc, ctx->ifc_ifp, TCP_LRO_ENTRIES, min(1024, ctx->ifc_softc_ctx.isc_nrxd[rxq->ifr_fl_offset]))) != 0) { device_printf(ctx->ifc_dev, "LRO Initialization failed!\n"); goto fail; } rxq->ifr_lro_enabled = TRUE; #endif IFDI_RXQ_SETUP(ctx, rxq->ifr_id); } return (0); #if defined(INET6) || defined(INET) fail: /* * Free RX software descriptors allocated so far, we will only handle * the rings that completed, the failing case will have * cleaned up for itself. 'q' failed, so its the terminus. */ rxq = ctx->ifc_rxqs; for (i = 0; i < q; ++i, rxq++) { iflib_rx_sds_free(rxq); rxq->ifr_cq_gen = rxq->ifr_cq_cidx = rxq->ifr_cq_pidx = 0; } return (err); #endif } /********************************************************************* * * Free all receive rings. * **********************************************************************/ static void iflib_rx_structures_free(if_ctx_t ctx) { iflib_rxq_t rxq = ctx->ifc_rxqs; for (int i = 0; i < ctx->ifc_softc_ctx.isc_nrxqsets; i++, rxq++) { iflib_rx_sds_free(rxq); } } static int iflib_qset_structures_setup(if_ctx_t ctx) { int err; if ((err = iflib_tx_structures_setup(ctx)) != 0) return (err); if ((err = iflib_rx_structures_setup(ctx)) != 0) { device_printf(ctx->ifc_dev, "iflib_rx_structures_setup failed: %d\n", err); iflib_tx_structures_free(ctx); iflib_rx_structures_free(ctx); } return (err); } int iflib_irq_alloc(if_ctx_t ctx, if_irq_t irq, int rid, driver_filter_t filter, void *filter_arg, driver_intr_t handler, void *arg, char *name) { return (_iflib_irq_alloc(ctx, irq, rid, filter, handler, arg, name)); } static void find_nth(if_ctx_t ctx, cpuset_t *cpus, int qid) { int i, cpuid; CPU_COPY(&ctx->ifc_cpus, cpus); /* clear up to the qid'th bit */ for (i = 0; i < qid; i++) { cpuid = CPU_FFS(cpus); CPU_CLR(cpuid, cpus); } } int iflib_irq_alloc_generic(if_ctx_t ctx, if_irq_t irq, int rid, iflib_intr_type_t type, driver_filter_t *filter, void *filter_arg, int qid, char *name) { struct grouptask *gtask; struct taskqgroup *tqg; iflib_filter_info_t info; cpuset_t cpus; gtask_fn_t *fn; int tqrid, err; void *q; info = &ctx->ifc_filter_info; switch (type) { /* XXX merge tx/rx for netmap? */ case IFLIB_INTR_TX: q = &ctx->ifc_txqs[qid]; info = &ctx->ifc_txqs[qid].ift_filter_info; gtask = &ctx->ifc_txqs[qid].ift_task; tqg = qgroup_softirq; tqrid = irq->ii_rid; fn = _task_fn_tx; break; case IFLIB_INTR_RX: q = &ctx->ifc_rxqs[qid]; info = &ctx->ifc_rxqs[qid].ifr_filter_info; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_softirq; tqrid = irq->ii_rid; fn = _task_fn_rx; break; case IFLIB_INTR_ADMIN: q = ctx; info = &ctx->ifc_filter_info; gtask = &ctx->ifc_admin_task; tqg = qgroup_if_config_tqg; tqrid = -1; fn = _task_fn_admin; break; default: panic("unknown net intr type"); } GROUPTASK_INIT(gtask, 0, fn, q); info->ifi_filter = filter; info->ifi_filter_arg = filter_arg; info->ifi_task = gtask; /* XXX query cpu that rid belongs to */ err = _iflib_irq_alloc(ctx, irq, rid, iflib_fast_intr, NULL, info, name); if (err != 0) return (err); if (tqrid != -1) { find_nth(ctx, &cpus, qid); taskqgroup_attach_cpu(tqg, gtask, q, CPU_FFS(&cpus), irq->ii_rid, name); } else taskqgroup_attach(tqg, gtask, q, tqrid, name); return (0); } void iflib_softirq_alloc_generic(if_ctx_t ctx, int rid, iflib_intr_type_t type, void *arg, int qid, char *name) { struct grouptask *gtask; struct taskqgroup *tqg; gtask_fn_t *fn; void *q; switch (type) { case IFLIB_INTR_TX: q = &ctx->ifc_txqs[qid]; gtask = &ctx->ifc_txqs[qid].ift_task; tqg = qgroup_softirq; fn = _task_fn_tx; break; case IFLIB_INTR_RX: q = &ctx->ifc_rxqs[qid]; gtask = &ctx->ifc_rxqs[qid].ifr_task; tqg = qgroup_softirq; fn = _task_fn_rx; break; case IFLIB_INTR_ADMIN: q = ctx; gtask = &ctx->ifc_admin_task; tqg = qgroup_if_config_tqg; rid = -1; fn = _task_fn_admin; break; case IFLIB_INTR_IOV: q = ctx; gtask = &ctx->ifc_vflr_task; tqg = qgroup_if_config_tqg; rid = -1; fn = _task_fn_iov; break; default: panic("unknown net intr type"); } GROUPTASK_INIT(gtask, 0, fn, q); taskqgroup_attach(tqg, gtask, q, rid, name); } void iflib_irq_free(if_ctx_t ctx, if_irq_t irq) { if (irq->ii_tag) bus_teardown_intr(ctx->ifc_dev, irq->ii_res, irq->ii_tag); if (irq->ii_res) bus_release_resource(ctx->ifc_dev, SYS_RES_IRQ, irq->ii_rid, irq->ii_res); } static int iflib_legacy_setup(if_ctx_t ctx, driver_filter_t filter, void *filter_arg, int *rid, char *name) { iflib_txq_t txq = ctx->ifc_txqs; iflib_rxq_t rxq = ctx->ifc_rxqs; if_irq_t irq = &ctx->ifc_legacy_irq; iflib_filter_info_t info; struct grouptask *gtask; struct taskqgroup *tqg; gtask_fn_t *fn; int tqrid; void *q; int err; q = &ctx->ifc_rxqs[0]; info = &rxq[0].ifr_filter_info; gtask = &rxq[0].ifr_task; tqg = qgroup_softirq; tqrid = irq->ii_rid = *rid; fn = _task_fn_rx; ctx->ifc_flags |= IFC_LEGACY; info->ifi_filter = filter; info->ifi_filter_arg = filter_arg; info->ifi_task = gtask; /* We allocate a single interrupt resource */ if ((err = _iflib_irq_alloc(ctx, irq, tqrid, iflib_fast_intr, NULL, info, name)) != 0) return (err); GROUPTASK_INIT(gtask, 0, fn, q); taskqgroup_attach(tqg, gtask, q, tqrid, name); GROUPTASK_INIT(&txq->ift_task, 0, _task_fn_tx, txq); taskqgroup_attach(qgroup_softirq, &txq->ift_task, txq, tqrid, "tx"); GROUPTASK_INIT(&ctx->ifc_admin_task, 0, _task_fn_admin, ctx); taskqgroup_attach(qgroup_if_config_tqg, &ctx->ifc_admin_task, ctx, -1, "admin/link"); return (0); } void iflib_led_create(if_ctx_t ctx) { ctx->ifc_led_dev = led_create(iflib_led_func, ctx, device_get_nameunit(ctx->ifc_dev)); } void iflib_tx_intr_deferred(if_ctx_t ctx, int txqid) { GROUPTASK_ENQUEUE(&ctx->ifc_txqs[txqid].ift_task); } void iflib_rx_intr_deferred(if_ctx_t ctx, int rxqid) { GROUPTASK_ENQUEUE(&ctx->ifc_rxqs[rxqid].ifr_task); } void iflib_admin_intr_deferred(if_ctx_t ctx) { GROUPTASK_ENQUEUE(&ctx->ifc_admin_task); } void iflib_iov_intr_deferred(if_ctx_t ctx) { GROUPTASK_ENQUEUE(&ctx->ifc_vflr_task); } void iflib_io_tqg_attach(struct grouptask *gt, void *uniq, int cpu, char *name) { taskqgroup_attach_cpu(qgroup_softirq, gt, uniq, cpu, -1, name); } void iflib_config_gtask_init(if_ctx_t ctx, struct grouptask *gtask, gtask_fn_t *fn, char *name) { GROUPTASK_INIT(gtask, 0, fn, ctx); taskqgroup_attach(qgroup_if_config_tqg, gtask, gtask, -1, name); } void iflib_config_gtask_deinit(struct grouptask *gtask) { taskqgroup_detach(qgroup_if_config_tqg, gtask); } void iflib_link_state_change(if_ctx_t ctx, int link_state, uint64_t baudrate) { if_t ifp = ctx->ifc_ifp; iflib_txq_t txq = ctx->ifc_txqs; if_setbaudrate(ifp, baudrate); /* If link down, disable watchdog */ if ((ctx->ifc_link_state == LINK_STATE_UP) && (link_state == LINK_STATE_DOWN)) { for (int i = 0; i < ctx->ifc_softc_ctx.isc_ntxqsets; i++, txq++) txq->ift_qstatus = IFLIB_QUEUE_IDLE; } ctx->ifc_link_state = link_state; if_link_state_change(ifp, link_state); } static int iflib_tx_credits_update(if_ctx_t ctx, iflib_txq_t txq) { int credits; if (ctx->isc_txd_credits_update == NULL) return (0); if ((credits = ctx->isc_txd_credits_update(ctx->ifc_softc, txq->ift_id, txq->ift_cidx_processed, true)) == 0) return (0); txq->ift_processed += credits; txq->ift_cidx_processed += credits; if (txq->ift_cidx_processed >= txq->ift_size) txq->ift_cidx_processed -= txq->ift_size; return (credits); } static int iflib_rxd_avail(if_ctx_t ctx, iflib_rxq_t rxq, int cidx, int budget) { return (ctx->isc_rxd_available(ctx->ifc_softc, rxq->ifr_id, cidx, budget)); } void iflib_add_int_delay_sysctl(if_ctx_t ctx, const char *name, const char *description, if_int_delay_info_t info, int offset, int value) { info->iidi_ctx = ctx; info->iidi_offset = offset; info->iidi_value = value; SYSCTL_ADD_PROC(device_get_sysctl_ctx(ctx->ifc_dev), SYSCTL_CHILDREN(device_get_sysctl_tree(ctx->ifc_dev)), OID_AUTO, name, CTLTYPE_INT|CTLFLAG_RW, info, 0, iflib_sysctl_int_delay, "I", description); } struct mtx * iflib_ctx_lock_get(if_ctx_t ctx) { return (&ctx->ifc_mtx); } static int iflib_msix_init(if_ctx_t ctx) { device_t dev = ctx->ifc_dev; if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; int vectors, queues, rx_queues, tx_queues, queuemsgs, msgs; int iflib_num_tx_queues, iflib_num_rx_queues; int err, admincnt, bar; iflib_num_tx_queues = scctx->isc_ntxqsets; iflib_num_rx_queues = scctx->isc_nrxqsets; bar = ctx->ifc_softc_ctx.isc_msix_bar; admincnt = sctx->isc_admin_intrcnt; /* Override by tuneable */ if (enable_msix == 0) goto msi; /* ** When used in a virtualized environment ** PCI BUSMASTER capability may not be set ** so explicity set it here and rewrite ** the ENABLE in the MSIX control register ** at this point to cause the host to ** successfully initialize us. */ { uint16_t pci_cmd_word; int msix_ctrl, rid; rid = 0; pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2); pci_cmd_word |= PCIM_CMD_BUSMASTEREN; pci_write_config(dev, PCIR_COMMAND, pci_cmd_word, 2); pci_find_cap(dev, PCIY_MSIX, &rid); rid += PCIR_MSIX_CTRL; msix_ctrl = pci_read_config(dev, rid, 2); msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; pci_write_config(dev, rid, msix_ctrl, 2); } /* * bar == -1 => "trust me I know what I'm doing" * Some drivers are for hardware that is so shoddily * documented that no one knows which bars are which * so the developer has to map all bars. This hack * allows shoddy garbage to use msix in this framework. */ if (bar != -1) { ctx->ifc_msix_mem = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &bar, RF_ACTIVE); if (ctx->ifc_msix_mem == NULL) { /* May not be enabled */ device_printf(dev, "Unable to map MSIX table \n"); goto msi; } } /* First try MSI/X */ if ((msgs = pci_msix_count(dev)) == 0) { /* system has msix disabled */ device_printf(dev, "System has MSIX disabled \n"); bus_release_resource(dev, SYS_RES_MEMORY, bar, ctx->ifc_msix_mem); ctx->ifc_msix_mem = NULL; goto msi; } #if IFLIB_DEBUG /* use only 1 qset in debug mode */ queuemsgs = min(msgs - admincnt, 1); #else queuemsgs = msgs - admincnt; #endif if (bus_get_cpus(dev, INTR_CPUS, sizeof(ctx->ifc_cpus), &ctx->ifc_cpus) == 0) { #ifdef RSS queues = imin(queuemsgs, rss_getnumbuckets()); #else queues = queuemsgs; #endif queues = imin(CPU_COUNT(&ctx->ifc_cpus), queues); device_printf(dev, "pxm cpus: %d queue msgs: %d admincnt: %d\n", CPU_COUNT(&ctx->ifc_cpus), queuemsgs, admincnt); } else { device_printf(dev, "Unable to fetch CPU list\n"); /* Figure out a reasonable auto config value */ queues = min(queuemsgs, mp_ncpus); } #ifdef RSS /* If we're doing RSS, clamp at the number of RSS buckets */ if (queues > rss_getnumbuckets()) queues = rss_getnumbuckets(); #endif if (iflib_num_rx_queues > 0 && iflib_num_rx_queues < queuemsgs - admincnt) rx_queues = iflib_num_rx_queues; else rx_queues = queues; /* * We want this to be all logical CPUs by default */ if (iflib_num_tx_queues > 0 && iflib_num_tx_queues < queues) tx_queues = iflib_num_tx_queues; else tx_queues = mp_ncpus; if (ctx->ifc_sysctl_qs_eq_override == 0) { #ifdef INVARIANTS if (tx_queues != rx_queues) device_printf(dev, "queue equality override not set, capping rx_queues at %d and tx_queues at %d\n", min(rx_queues, tx_queues), min(rx_queues, tx_queues)); #endif tx_queues = min(rx_queues, tx_queues); rx_queues = min(rx_queues, tx_queues); } device_printf(dev, "using %d rx queues %d tx queues \n", rx_queues, tx_queues); vectors = rx_queues + admincnt; if ((err = pci_alloc_msix(dev, &vectors)) == 0) { device_printf(dev, "Using MSIX interrupts with %d vectors\n", vectors); scctx->isc_vectors = vectors; scctx->isc_nrxqsets = rx_queues; scctx->isc_ntxqsets = tx_queues; scctx->isc_intr = IFLIB_INTR_MSIX; return (vectors); } else { device_printf(dev, "failed to allocate %d msix vectors, err: %d - using MSI\n", vectors, err); } msi: vectors = pci_msi_count(dev); scctx->isc_nrxqsets = 1; scctx->isc_ntxqsets = 1; scctx->isc_vectors = vectors; if (vectors == 1 && pci_alloc_msi(dev, &vectors) == 0) { device_printf(dev,"Using an MSI interrupt\n"); scctx->isc_intr = IFLIB_INTR_MSI; } else { device_printf(dev,"Using a Legacy interrupt\n"); scctx->isc_intr = IFLIB_INTR_LEGACY; } return (vectors); } char * ring_states[] = { "IDLE", "BUSY", "STALLED", "ABDICATED" }; static int mp_ring_state_handler(SYSCTL_HANDLER_ARGS) { int rc; uint16_t *state = ((uint16_t *)oidp->oid_arg1); struct sbuf *sb; char *ring_state = "UNKNOWN"; /* XXX needed ? */ rc = sysctl_wire_old_buffer(req, 0); MPASS(rc == 0); if (rc != 0) return (rc); sb = sbuf_new_for_sysctl(NULL, NULL, 80, req); MPASS(sb != NULL); if (sb == NULL) return (ENOMEM); if (state[3] <= 3) ring_state = ring_states[state[3]]; sbuf_printf(sb, "pidx_head: %04hd pidx_tail: %04hd cidx: %04hd state: %s", state[0], state[1], state[2], ring_state); rc = sbuf_finish(sb); sbuf_delete(sb); return(rc); } enum iflib_ndesc_handler { IFLIB_NTXD_HANDLER, IFLIB_NRXD_HANDLER, }; static int mp_ndesc_handler(SYSCTL_HANDLER_ARGS) { if_ctx_t ctx = (void *)arg1; enum iflib_ndesc_handler type = arg2; char buf[256] = {0}; uint16_t *ndesc; char *p, *next; int nqs, rc, i; MPASS(type == IFLIB_NTXD_HANDLER || type == IFLIB_NRXD_HANDLER); nqs = 8; switch(type) { case IFLIB_NTXD_HANDLER: ndesc = ctx->ifc_sysctl_ntxds; if (ctx->ifc_sctx) nqs = ctx->ifc_sctx->isc_ntxqs; break; case IFLIB_NRXD_HANDLER: ndesc = ctx->ifc_sysctl_nrxds; if (ctx->ifc_sctx) nqs = ctx->ifc_sctx->isc_nrxqs; break; } if (nqs == 0) nqs = 8; for (i=0; i<8; i++) { if (i >= nqs) break; if (i) strcat(buf, ","); sprintf(strchr(buf, 0), "%d", ndesc[i]); } rc = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (rc || req->newptr == NULL) return rc; for (i = 0, next = buf, p = strsep(&next, " ,"); i < 8 && p; i++, p = strsep(&next, " ,")) { ndesc[i] = strtoul(p, NULL, 10); } return(rc); } #define NAME_BUFLEN 32 static void iflib_add_device_sysctl_pre(if_ctx_t ctx) { device_t dev = iflib_get_dev(ctx); struct sysctl_oid_list *child, *oid_list; struct sysctl_ctx_list *ctx_list; struct sysctl_oid *node; ctx_list = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); ctx->ifc_sysctl_node = node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, "iflib", CTLFLAG_RD, NULL, "IFLIB fields"); oid_list = SYSCTL_CHILDREN(node); SYSCTL_ADD_STRING(ctx_list, oid_list, OID_AUTO, "driver_version", CTLFLAG_RD, ctx->ifc_sctx->isc_driver_version, 0, "driver version"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_ntxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_ntxqs, 0, "# of txqs to use, 0 => use default #"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_nrxqs", CTLFLAG_RWTUN, &ctx->ifc_sysctl_nrxqs, 0, "# of rxqs to use, 0 => use default #"); SYSCTL_ADD_U16(ctx_list, oid_list, OID_AUTO, "override_qs_enable", CTLFLAG_RWTUN, &ctx->ifc_sysctl_qs_eq_override, 0, "permit #txq != #rxq"); /* XXX change for per-queue sizes */ SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_ntxds", CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NTXD_HANDLER, mp_ndesc_handler, "A", "list of # of tx descriptors to use, 0 = use default #"); SYSCTL_ADD_PROC(ctx_list, oid_list, OID_AUTO, "override_nrxds", CTLTYPE_STRING|CTLFLAG_RWTUN, ctx, IFLIB_NRXD_HANDLER, mp_ndesc_handler, "A", "list of # of rx descriptors to use, 0 = use default #"); } static void iflib_add_device_sysctl_post(if_ctx_t ctx) { if_shared_ctx_t sctx = ctx->ifc_sctx; if_softc_ctx_t scctx = &ctx->ifc_softc_ctx; device_t dev = iflib_get_dev(ctx); struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx_list; iflib_fl_t fl; iflib_txq_t txq; iflib_rxq_t rxq; int i, j; char namebuf[NAME_BUFLEN]; char *qfmt; struct sysctl_oid *queue_node, *fl_node, *node; struct sysctl_oid_list *queue_list, *fl_list; ctx_list = device_get_sysctl_ctx(dev); node = ctx->ifc_sysctl_node; child = SYSCTL_CHILDREN(node); if (scctx->isc_ntxqsets > 100) qfmt = "txq%03d"; else if (scctx->isc_ntxqsets > 10) qfmt = "txq%02d"; else qfmt = "txq%d"; for (i = 0, txq = ctx->ifc_txqs; i < scctx->isc_ntxqsets; i++, txq++) { snprintf(namebuf, NAME_BUFLEN, qfmt, i); queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); #if MEMORY_LOGGING SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_dequeued", CTLFLAG_RD, &txq->ift_dequeued, "total mbufs freed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_enqueued", CTLFLAG_RD, &txq->ift_enqueued, "total mbufs enqueued"); #endif SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag", CTLFLAG_RD, &txq->ift_mbuf_defrag, "# of times m_defrag was called"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "m_pullups", CTLFLAG_RD, &txq->ift_pullups, "# of times m_pullup was called"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "mbuf_defrag_failed", CTLFLAG_RD, &txq->ift_mbuf_defrag_failed, "# of times m_defrag failed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_desc_avail", CTLFLAG_RD, &txq->ift_no_desc_avail, "# of times no descriptors were available"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "tx_map_failed", CTLFLAG_RD, &txq->ift_map_failed, "# of times dma map failed"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txd_encap_efbig", CTLFLAG_RD, &txq->ift_txd_encap_efbig, "# of times txd_encap returned EFBIG"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "no_tx_dma_setup", CTLFLAG_RD, &txq->ift_no_tx_dma_setup, "# of times map failed for other than EFBIG"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_pidx", CTLFLAG_RD, &txq->ift_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx", CTLFLAG_RD, &txq->ift_cidx, 1, "Consumer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_cidx_processed", CTLFLAG_RD, &txq->ift_cidx_processed, 1, "Consumer Index seen by credit update"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "txq_in_use", CTLFLAG_RD, &txq->ift_in_use, 1, "descriptors in use"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_processed", CTLFLAG_RD, &txq->ift_processed, "descriptors procesed for clean"); SYSCTL_ADD_QUAD(ctx_list, queue_list, OID_AUTO, "txq_cleaned", CTLFLAG_RD, &txq->ift_cleaned, "total cleaned"); SYSCTL_ADD_PROC(ctx_list, queue_list, OID_AUTO, "ring_state", CTLTYPE_STRING | CTLFLAG_RD, __DEVOLATILE(uint64_t *, &txq->ift_br[0]->state), 0, mp_ring_state_handler, "A", "soft ring state"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_enqueues", CTLFLAG_RD, &txq->ift_br[0]->enqueues, "# of enqueues to the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_drops", CTLFLAG_RD, &txq->ift_br[0]->drops, "# of drops in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_starts", CTLFLAG_RD, &txq->ift_br[0]->starts, "# of normal consumer starts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_stalls", CTLFLAG_RD, &txq->ift_br[0]->stalls, "# of consumer stalls in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_restarts", CTLFLAG_RD, &txq->ift_br[0]->restarts, "# of consumer restarts in the mp_ring for this queue"); SYSCTL_ADD_COUNTER_U64(ctx_list, queue_list, OID_AUTO, "r_abdications", CTLFLAG_RD, &txq->ift_br[0]->abdications, "# of consumer abdications in the mp_ring for this queue"); } if (scctx->isc_nrxqsets > 100) qfmt = "rxq%03d"; else if (scctx->isc_nrxqsets > 10) qfmt = "rxq%02d"; else qfmt = "rxq%d"; for (i = 0, rxq = ctx->ifc_rxqs; i < scctx->isc_nrxqsets; i++, rxq++) { snprintf(namebuf, NAME_BUFLEN, qfmt, i); queue_node = SYSCTL_ADD_NODE(ctx_list, child, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "Queue Name"); queue_list = SYSCTL_CHILDREN(queue_node); if (sctx->isc_flags & IFLIB_HAS_RXCQ) { SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_pidx", CTLFLAG_RD, &rxq->ifr_cq_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, queue_list, OID_AUTO, "rxq_cq_cidx", CTLFLAG_RD, &rxq->ifr_cq_cidx, 1, "Consumer Index"); } for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++) { snprintf(namebuf, NAME_BUFLEN, "rxq_fl%d", j); fl_node = SYSCTL_ADD_NODE(ctx_list, queue_list, OID_AUTO, namebuf, CTLFLAG_RD, NULL, "freelist Name"); fl_list = SYSCTL_CHILDREN(fl_node); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "pidx", CTLFLAG_RD, &fl->ifl_pidx, 1, "Producer Index"); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "cidx", CTLFLAG_RD, &fl->ifl_cidx, 1, "Consumer Index"); SYSCTL_ADD_U16(ctx_list, fl_list, OID_AUTO, "credits", CTLFLAG_RD, &fl->ifl_credits, 1, "credits available"); #if MEMORY_LOGGING SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_enqueued", CTLFLAG_RD, &fl->ifl_m_enqueued, "mbufs allocated"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_m_dequeued", CTLFLAG_RD, &fl->ifl_m_dequeued, "mbufs freed"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_enqueued", CTLFLAG_RD, &fl->ifl_cl_enqueued, "clusters allocated"); SYSCTL_ADD_QUAD(ctx_list, fl_list, OID_AUTO, "fl_cl_dequeued", CTLFLAG_RD, &fl->ifl_cl_dequeued, "clusters freed"); #endif } } } Index: stable/11/sys/net80211/ieee80211_ioctl.c =================================================================== --- stable/11/sys/net80211/ieee80211_ioctl.c (revision 332287) +++ stable/11/sys/net80211/ieee80211_ioctl.c (revision 332288) @@ -1,3476 +1,3477 @@ /*- * Copyright (c) 2001 Atsushi Onoe * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * IEEE 802.11 ioctl support (FreeBSD-specific) */ #include "opt_inet.h" #include "opt_wlan.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #include #endif #include #include #include #include #define IS_UP_AUTO(_vap) \ (IFNET_IS_UP_RUNNING((_vap)->iv_ifp) && \ (_vap)->iv_roaming == IEEE80211_ROAMING_AUTO) static const uint8_t zerobssid[IEEE80211_ADDR_LEN]; static struct ieee80211_channel *findchannel(struct ieee80211com *, int ieee, int mode); static int ieee80211_scanreq(struct ieee80211vap *, struct ieee80211_scan_req *); static int ieee80211_ioctl_getkey(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_node *ni; struct ieee80211req_key ik; struct ieee80211_key *wk; const struct ieee80211_cipher *cip; u_int kid; int error; if (ireq->i_len != sizeof(ik)) return EINVAL; error = copyin(ireq->i_data, &ik, sizeof(ik)); if (error) return error; kid = ik.ik_keyix; if (kid == IEEE80211_KEYIX_NONE) { ni = ieee80211_find_vap_node(&ic->ic_sta, vap, ik.ik_macaddr); if (ni == NULL) return ENOENT; wk = &ni->ni_ucastkey; } else { if (kid >= IEEE80211_WEP_NKID) return EINVAL; wk = &vap->iv_nw_keys[kid]; IEEE80211_ADDR_COPY(&ik.ik_macaddr, vap->iv_bss->ni_macaddr); ni = NULL; } cip = wk->wk_cipher; ik.ik_type = cip->ic_cipher; ik.ik_keylen = wk->wk_keylen; ik.ik_flags = wk->wk_flags & (IEEE80211_KEY_XMIT | IEEE80211_KEY_RECV); if (wk->wk_keyix == vap->iv_def_txkey) ik.ik_flags |= IEEE80211_KEY_DEFAULT; if (priv_check(curthread, PRIV_NET80211_GETKEY) == 0) { /* NB: only root can read key data */ ik.ik_keyrsc = wk->wk_keyrsc[IEEE80211_NONQOS_TID]; ik.ik_keytsc = wk->wk_keytsc; memcpy(ik.ik_keydata, wk->wk_key, wk->wk_keylen); if (cip->ic_cipher == IEEE80211_CIPHER_TKIP) { memcpy(ik.ik_keydata+wk->wk_keylen, wk->wk_key + IEEE80211_KEYBUF_SIZE, IEEE80211_MICBUF_SIZE); ik.ik_keylen += IEEE80211_MICBUF_SIZE; } } else { ik.ik_keyrsc = 0; ik.ik_keytsc = 0; memset(ik.ik_keydata, 0, sizeof(ik.ik_keydata)); } if (ni != NULL) ieee80211_free_node(ni); return copyout(&ik, ireq->i_data, sizeof(ik)); } static int ieee80211_ioctl_getchanlist(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; if (sizeof(ic->ic_chan_active) < ireq->i_len) ireq->i_len = sizeof(ic->ic_chan_active); return copyout(&ic->ic_chan_active, ireq->i_data, ireq->i_len); } static int ieee80211_ioctl_getchaninfo(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; uint32_t space; space = __offsetof(struct ieee80211req_chaninfo, ic_chans[ic->ic_nchans]); if (space > ireq->i_len) space = ireq->i_len; /* XXX assumes compatible layout */ return copyout(&ic->ic_nchans, ireq->i_data, space); } static int ieee80211_ioctl_getwpaie(struct ieee80211vap *vap, struct ieee80211req *ireq, int req) { struct ieee80211_node *ni; struct ieee80211req_wpaie2 *wpaie; int error; if (ireq->i_len < IEEE80211_ADDR_LEN) return EINVAL; wpaie = IEEE80211_MALLOC(sizeof(*wpaie), M_TEMP, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO); if (wpaie == NULL) return ENOMEM; error = copyin(ireq->i_data, wpaie->wpa_macaddr, IEEE80211_ADDR_LEN); if (error != 0) goto bad; ni = ieee80211_find_vap_node(&vap->iv_ic->ic_sta, vap, wpaie->wpa_macaddr); if (ni == NULL) { error = ENOENT; goto bad; } if (ni->ni_ies.wpa_ie != NULL) { int ielen = ni->ni_ies.wpa_ie[1] + 2; if (ielen > sizeof(wpaie->wpa_ie)) ielen = sizeof(wpaie->wpa_ie); memcpy(wpaie->wpa_ie, ni->ni_ies.wpa_ie, ielen); } if (req == IEEE80211_IOC_WPAIE2) { if (ni->ni_ies.rsn_ie != NULL) { int ielen = ni->ni_ies.rsn_ie[1] + 2; if (ielen > sizeof(wpaie->rsn_ie)) ielen = sizeof(wpaie->rsn_ie); memcpy(wpaie->rsn_ie, ni->ni_ies.rsn_ie, ielen); } if (ireq->i_len > sizeof(struct ieee80211req_wpaie2)) ireq->i_len = sizeof(struct ieee80211req_wpaie2); } else { /* compatibility op, may overwrite wpa ie */ /* XXX check ic_flags? */ if (ni->ni_ies.rsn_ie != NULL) { int ielen = ni->ni_ies.rsn_ie[1] + 2; if (ielen > sizeof(wpaie->wpa_ie)) ielen = sizeof(wpaie->wpa_ie); memcpy(wpaie->wpa_ie, ni->ni_ies.rsn_ie, ielen); } if (ireq->i_len > sizeof(struct ieee80211req_wpaie)) ireq->i_len = sizeof(struct ieee80211req_wpaie); } ieee80211_free_node(ni); error = copyout(wpaie, ireq->i_data, ireq->i_len); bad: IEEE80211_FREE(wpaie, M_TEMP); return error; } static int ieee80211_ioctl_getstastats(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211_node *ni; uint8_t macaddr[IEEE80211_ADDR_LEN]; const size_t off = __offsetof(struct ieee80211req_sta_stats, is_stats); int error; if (ireq->i_len < off) return EINVAL; error = copyin(ireq->i_data, macaddr, IEEE80211_ADDR_LEN); if (error != 0) return error; ni = ieee80211_find_vap_node(&vap->iv_ic->ic_sta, vap, macaddr); if (ni == NULL) return ENOENT; if (ireq->i_len > sizeof(struct ieee80211req_sta_stats)) ireq->i_len = sizeof(struct ieee80211req_sta_stats); /* NB: copy out only the statistics */ error = copyout(&ni->ni_stats, (uint8_t *) ireq->i_data + off, ireq->i_len - off); ieee80211_free_node(ni); return error; } struct scanreq { struct ieee80211req_scan_result *sr; size_t space; }; static size_t scan_space(const struct ieee80211_scan_entry *se, int *ielen) { size_t len; *ielen = se->se_ies.len; /* * NB: ie's can be no more than 255 bytes and the max 802.11 * packet is <3Kbytes so we are sure this doesn't overflow * 16-bits; if this is a concern we can drop the ie's. */ len = sizeof(struct ieee80211req_scan_result) + se->se_ssid[1] + se->se_meshid[1] + *ielen; return roundup(len, sizeof(uint32_t)); } static void get_scan_space(void *arg, const struct ieee80211_scan_entry *se) { struct scanreq *req = arg; int ielen; req->space += scan_space(se, &ielen); } static void get_scan_result(void *arg, const struct ieee80211_scan_entry *se) { struct scanreq *req = arg; struct ieee80211req_scan_result *sr; int ielen, len, nr, nxr; uint8_t *cp; len = scan_space(se, &ielen); if (len > req->space) return; sr = req->sr; KASSERT(len <= 65535 && ielen <= 65535, ("len %u ssid %u ie %u", len, se->se_ssid[1], ielen)); sr->isr_len = len; sr->isr_ie_off = sizeof(struct ieee80211req_scan_result); sr->isr_ie_len = ielen; sr->isr_freq = se->se_chan->ic_freq; sr->isr_flags = se->se_chan->ic_flags; sr->isr_rssi = se->se_rssi; sr->isr_noise = se->se_noise; sr->isr_intval = se->se_intval; sr->isr_capinfo = se->se_capinfo; sr->isr_erp = se->se_erp; IEEE80211_ADDR_COPY(sr->isr_bssid, se->se_bssid); nr = min(se->se_rates[1], IEEE80211_RATE_MAXSIZE); memcpy(sr->isr_rates, se->se_rates+2, nr); nxr = min(se->se_xrates[1], IEEE80211_RATE_MAXSIZE - nr); memcpy(sr->isr_rates+nr, se->se_xrates+2, nxr); sr->isr_nrates = nr + nxr; /* copy SSID */ sr->isr_ssid_len = se->se_ssid[1]; cp = ((uint8_t *)sr) + sr->isr_ie_off; memcpy(cp, se->se_ssid+2, sr->isr_ssid_len); /* copy mesh id */ cp += sr->isr_ssid_len; sr->isr_meshid_len = se->se_meshid[1]; memcpy(cp, se->se_meshid+2, sr->isr_meshid_len); cp += sr->isr_meshid_len; if (ielen) memcpy(cp, se->se_ies.data, ielen); req->space -= len; req->sr = (struct ieee80211req_scan_result *)(((uint8_t *)sr) + len); } static int ieee80211_ioctl_getscanresults(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct scanreq req; int error; if (ireq->i_len < sizeof(struct scanreq)) return EFAULT; error = 0; req.space = 0; ieee80211_scan_iterate(vap, get_scan_space, &req); if (req.space > ireq->i_len) req.space = ireq->i_len; if (req.space > 0) { uint32_t space; void *p; space = req.space; /* XXX M_WAITOK after driver lock released */ p = IEEE80211_MALLOC(space, M_TEMP, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO); if (p == NULL) return ENOMEM; req.sr = p; ieee80211_scan_iterate(vap, get_scan_result, &req); ireq->i_len = space - req.space; error = copyout(p, ireq->i_data, ireq->i_len); IEEE80211_FREE(p, M_TEMP); } else ireq->i_len = 0; return error; } struct stainforeq { struct ieee80211vap *vap; struct ieee80211req_sta_info *si; size_t space; }; static size_t sta_space(const struct ieee80211_node *ni, size_t *ielen) { *ielen = ni->ni_ies.len; return roundup(sizeof(struct ieee80211req_sta_info) + *ielen, sizeof(uint32_t)); } static void get_sta_space(void *arg, struct ieee80211_node *ni) { struct stainforeq *req = arg; size_t ielen; if (req->vap != ni->ni_vap) return; if (ni->ni_vap->iv_opmode == IEEE80211_M_HOSTAP && ni->ni_associd == 0) /* only associated stations */ return; req->space += sta_space(ni, &ielen); } static void get_sta_info(void *arg, struct ieee80211_node *ni) { struct stainforeq *req = arg; struct ieee80211vap *vap = ni->ni_vap; struct ieee80211req_sta_info *si; size_t ielen, len; uint8_t *cp; if (req->vap != ni->ni_vap) return; if (vap->iv_opmode == IEEE80211_M_HOSTAP && ni->ni_associd == 0) /* only associated stations */ return; if (ni->ni_chan == IEEE80211_CHAN_ANYC) /* XXX bogus entry */ return; len = sta_space(ni, &ielen); if (len > req->space) return; si = req->si; si->isi_len = len; si->isi_ie_off = sizeof(struct ieee80211req_sta_info); si->isi_ie_len = ielen; si->isi_freq = ni->ni_chan->ic_freq; si->isi_flags = ni->ni_chan->ic_flags; si->isi_state = ni->ni_flags; si->isi_authmode = ni->ni_authmode; vap->iv_ic->ic_node_getsignal(ni, &si->isi_rssi, &si->isi_noise); vap->iv_ic->ic_node_getmimoinfo(ni, &si->isi_mimo); si->isi_capinfo = ni->ni_capinfo; si->isi_erp = ni->ni_erp; IEEE80211_ADDR_COPY(si->isi_macaddr, ni->ni_macaddr); si->isi_nrates = ni->ni_rates.rs_nrates; if (si->isi_nrates > 15) si->isi_nrates = 15; memcpy(si->isi_rates, ni->ni_rates.rs_rates, si->isi_nrates); si->isi_txrate = ni->ni_txrate; if (si->isi_txrate & IEEE80211_RATE_MCS) { const struct ieee80211_mcs_rates *mcs = &ieee80211_htrates[ni->ni_txrate &~ IEEE80211_RATE_MCS]; if (IEEE80211_IS_CHAN_HT40(ni->ni_chan)) { if (ni->ni_flags & IEEE80211_NODE_SGI40) si->isi_txmbps = mcs->ht40_rate_800ns; else si->isi_txmbps = mcs->ht40_rate_400ns; } else { if (ni->ni_flags & IEEE80211_NODE_SGI20) si->isi_txmbps = mcs->ht20_rate_800ns; else si->isi_txmbps = mcs->ht20_rate_400ns; } } else si->isi_txmbps = si->isi_txrate; si->isi_associd = ni->ni_associd; si->isi_txpower = ni->ni_txpower; si->isi_vlan = ni->ni_vlan; if (ni->ni_flags & IEEE80211_NODE_QOS) { memcpy(si->isi_txseqs, ni->ni_txseqs, sizeof(ni->ni_txseqs)); memcpy(si->isi_rxseqs, ni->ni_rxseqs, sizeof(ni->ni_rxseqs)); } else { si->isi_txseqs[0] = ni->ni_txseqs[IEEE80211_NONQOS_TID]; si->isi_rxseqs[0] = ni->ni_rxseqs[IEEE80211_NONQOS_TID]; } /* NB: leave all cases in case we relax ni_associd == 0 check */ if (ieee80211_node_is_authorized(ni)) si->isi_inact = vap->iv_inact_run; else if (ni->ni_associd != 0 || (vap->iv_opmode == IEEE80211_M_WDS && (vap->iv_flags_ext & IEEE80211_FEXT_WDSLEGACY))) si->isi_inact = vap->iv_inact_auth; else si->isi_inact = vap->iv_inact_init; si->isi_inact = (si->isi_inact - ni->ni_inact) * IEEE80211_INACT_WAIT; si->isi_localid = ni->ni_mllid; si->isi_peerid = ni->ni_mlpid; si->isi_peerstate = ni->ni_mlstate; if (ielen) { cp = ((uint8_t *)si) + si->isi_ie_off; memcpy(cp, ni->ni_ies.data, ielen); } req->si = (struct ieee80211req_sta_info *)(((uint8_t *)si) + len); req->space -= len; } static int getstainfo_common(struct ieee80211vap *vap, struct ieee80211req *ireq, struct ieee80211_node *ni, size_t off) { struct ieee80211com *ic = vap->iv_ic; struct stainforeq req; size_t space; void *p; int error; error = 0; req.space = 0; req.vap = vap; if (ni == NULL) ieee80211_iterate_nodes(&ic->ic_sta, get_sta_space, &req); else get_sta_space(&req, ni); if (req.space > ireq->i_len) req.space = ireq->i_len; if (req.space > 0) { space = req.space; /* XXX M_WAITOK after driver lock released */ p = IEEE80211_MALLOC(space, M_TEMP, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO); if (p == NULL) { error = ENOMEM; goto bad; } req.si = p; if (ni == NULL) ieee80211_iterate_nodes(&ic->ic_sta, get_sta_info, &req); else get_sta_info(&req, ni); ireq->i_len = space - req.space; error = copyout(p, (uint8_t *) ireq->i_data+off, ireq->i_len); IEEE80211_FREE(p, M_TEMP); } else ireq->i_len = 0; bad: if (ni != NULL) ieee80211_free_node(ni); return error; } static int ieee80211_ioctl_getstainfo(struct ieee80211vap *vap, struct ieee80211req *ireq) { uint8_t macaddr[IEEE80211_ADDR_LEN]; const size_t off = __offsetof(struct ieee80211req_sta_req, info); struct ieee80211_node *ni; int error; if (ireq->i_len < sizeof(struct ieee80211req_sta_req)) return EFAULT; error = copyin(ireq->i_data, macaddr, IEEE80211_ADDR_LEN); if (error != 0) return error; if (IEEE80211_ADDR_EQ(macaddr, vap->iv_ifp->if_broadcastaddr)) { ni = NULL; } else { ni = ieee80211_find_vap_node(&vap->iv_ic->ic_sta, vap, macaddr); if (ni == NULL) return ENOENT; } return getstainfo_common(vap, ireq, ni, off); } static int ieee80211_ioctl_getstatxpow(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211_node *ni; struct ieee80211req_sta_txpow txpow; int error; if (ireq->i_len != sizeof(txpow)) return EINVAL; error = copyin(ireq->i_data, &txpow, sizeof(txpow)); if (error != 0) return error; ni = ieee80211_find_vap_node(&vap->iv_ic->ic_sta, vap, txpow.it_macaddr); if (ni == NULL) return ENOENT; txpow.it_txpow = ni->ni_txpower; error = copyout(&txpow, ireq->i_data, sizeof(txpow)); ieee80211_free_node(ni); return error; } static int ieee80211_ioctl_getwmeparam(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_wme_state *wme = &ic->ic_wme; struct wmeParams *wmep; int ac; if ((ic->ic_caps & IEEE80211_C_WME) == 0) return EINVAL; ac = (ireq->i_len & IEEE80211_WMEPARAM_VAL); if (ac >= WME_NUM_AC) ac = WME_AC_BE; if (ireq->i_len & IEEE80211_WMEPARAM_BSS) wmep = &wme->wme_wmeBssChanParams.cap_wmeParams[ac]; else wmep = &wme->wme_wmeChanParams.cap_wmeParams[ac]; switch (ireq->i_type) { case IEEE80211_IOC_WME_CWMIN: /* WME: CWmin */ ireq->i_val = wmep->wmep_logcwmin; break; case IEEE80211_IOC_WME_CWMAX: /* WME: CWmax */ ireq->i_val = wmep->wmep_logcwmax; break; case IEEE80211_IOC_WME_AIFS: /* WME: AIFS */ ireq->i_val = wmep->wmep_aifsn; break; case IEEE80211_IOC_WME_TXOPLIMIT: /* WME: txops limit */ ireq->i_val = wmep->wmep_txopLimit; break; case IEEE80211_IOC_WME_ACM: /* WME: ACM (bss only) */ wmep = &wme->wme_wmeBssChanParams.cap_wmeParams[ac]; ireq->i_val = wmep->wmep_acm; break; case IEEE80211_IOC_WME_ACKPOLICY: /* WME: ACK policy (!bss only)*/ wmep = &wme->wme_wmeChanParams.cap_wmeParams[ac]; ireq->i_val = !wmep->wmep_noackPolicy; break; } return 0; } static int ieee80211_ioctl_getmaccmd(struct ieee80211vap *vap, struct ieee80211req *ireq) { const struct ieee80211_aclator *acl = vap->iv_acl; return (acl == NULL ? EINVAL : acl->iac_getioctl(vap, ireq)); } static int ieee80211_ioctl_getcurchan(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_channel *c; if (ireq->i_len != sizeof(struct ieee80211_channel)) return EINVAL; /* * vap's may have different operating channels when HT is * in use. When in RUN state report the vap-specific channel. * Otherwise return curchan. */ if (vap->iv_state == IEEE80211_S_RUN || vap->iv_state == IEEE80211_S_SLEEP) c = vap->iv_bss->ni_chan; else c = ic->ic_curchan; return copyout(c, ireq->i_data, sizeof(*c)); } static int getappie(const struct ieee80211_appie *aie, struct ieee80211req *ireq) { if (aie == NULL) return EINVAL; /* NB: truncate, caller can check length */ if (ireq->i_len > aie->ie_len) ireq->i_len = aie->ie_len; return copyout(aie->ie_data, ireq->i_data, ireq->i_len); } static int ieee80211_ioctl_getappie(struct ieee80211vap *vap, struct ieee80211req *ireq) { uint8_t fc0; fc0 = ireq->i_val & 0xff; if ((fc0 & IEEE80211_FC0_TYPE_MASK) != IEEE80211_FC0_TYPE_MGT) return EINVAL; /* NB: could check iv_opmode and reject but hardly worth the effort */ switch (fc0 & IEEE80211_FC0_SUBTYPE_MASK) { case IEEE80211_FC0_SUBTYPE_BEACON: return getappie(vap->iv_appie_beacon, ireq); case IEEE80211_FC0_SUBTYPE_PROBE_RESP: return getappie(vap->iv_appie_proberesp, ireq); case IEEE80211_FC0_SUBTYPE_ASSOC_RESP: return getappie(vap->iv_appie_assocresp, ireq); case IEEE80211_FC0_SUBTYPE_PROBE_REQ: return getappie(vap->iv_appie_probereq, ireq); case IEEE80211_FC0_SUBTYPE_ASSOC_REQ: return getappie(vap->iv_appie_assocreq, ireq); case IEEE80211_FC0_SUBTYPE_BEACON|IEEE80211_FC0_SUBTYPE_PROBE_RESP: return getappie(vap->iv_appie_wpa, ireq); } return EINVAL; } static int ieee80211_ioctl_getregdomain(struct ieee80211vap *vap, const struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; if (ireq->i_len != sizeof(ic->ic_regdomain)) return EINVAL; return copyout(&ic->ic_regdomain, ireq->i_data, sizeof(ic->ic_regdomain)); } static int ieee80211_ioctl_getroam(struct ieee80211vap *vap, const struct ieee80211req *ireq) { size_t len = ireq->i_len; /* NB: accept short requests for backwards compat */ if (len > sizeof(vap->iv_roamparms)) len = sizeof(vap->iv_roamparms); return copyout(vap->iv_roamparms, ireq->i_data, len); } static int ieee80211_ioctl_gettxparams(struct ieee80211vap *vap, const struct ieee80211req *ireq) { size_t len = ireq->i_len; /* NB: accept short requests for backwards compat */ if (len > sizeof(vap->iv_txparms)) len = sizeof(vap->iv_txparms); return copyout(vap->iv_txparms, ireq->i_data, len); } static int ieee80211_ioctl_getdevcaps(struct ieee80211com *ic, const struct ieee80211req *ireq) { struct ieee80211_devcaps_req *dc; struct ieee80211req_chaninfo *ci; int maxchans, error; maxchans = 1 + ((ireq->i_len - sizeof(struct ieee80211_devcaps_req)) / sizeof(struct ieee80211_channel)); /* NB: require 1 so we know ic_nchans is accessible */ if (maxchans < 1) return EINVAL; /* constrain max request size, 2K channels is ~24Kbytes */ if (maxchans > 2048) maxchans = 2048; dc = (struct ieee80211_devcaps_req *) IEEE80211_MALLOC(IEEE80211_DEVCAPS_SIZE(maxchans), M_TEMP, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO); if (dc == NULL) return ENOMEM; dc->dc_drivercaps = ic->ic_caps; dc->dc_cryptocaps = ic->ic_cryptocaps; dc->dc_htcaps = ic->ic_htcaps; ci = &dc->dc_chaninfo; ic->ic_getradiocaps(ic, maxchans, &ci->ic_nchans, ci->ic_chans); KASSERT(ci->ic_nchans <= maxchans, ("nchans %d maxchans %d", ci->ic_nchans, maxchans)); ieee80211_sort_channels(ci->ic_chans, ci->ic_nchans); error = copyout(dc, ireq->i_data, IEEE80211_DEVCAPS_SPACE(dc)); IEEE80211_FREE(dc, M_TEMP); return error; } static int ieee80211_ioctl_getstavlan(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211_node *ni; struct ieee80211req_sta_vlan vlan; int error; if (ireq->i_len != sizeof(vlan)) return EINVAL; error = copyin(ireq->i_data, &vlan, sizeof(vlan)); if (error != 0) return error; if (!IEEE80211_ADDR_EQ(vlan.sv_macaddr, zerobssid)) { ni = ieee80211_find_vap_node(&vap->iv_ic->ic_sta, vap, vlan.sv_macaddr); if (ni == NULL) return ENOENT; } else ni = ieee80211_ref_node(vap->iv_bss); vlan.sv_vlan = ni->ni_vlan; error = copyout(&vlan, ireq->i_data, sizeof(vlan)); ieee80211_free_node(ni); return error; } /* * Dummy ioctl get handler so the linker set is defined. */ static int dummy_ioctl_get(struct ieee80211vap *vap, struct ieee80211req *ireq) { return ENOSYS; } IEEE80211_IOCTL_GET(dummy, dummy_ioctl_get); static int ieee80211_ioctl_getdefault(struct ieee80211vap *vap, struct ieee80211req *ireq) { ieee80211_ioctl_getfunc * const *get; int error; SET_FOREACH(get, ieee80211_ioctl_getset) { error = (*get)(vap, ireq); if (error != ENOSYS) return error; } return EINVAL; } static int ieee80211_ioctl_get80211(struct ieee80211vap *vap, u_long cmd, struct ieee80211req *ireq) { #define MS(_v, _f) (((_v) & _f) >> _f##_S) struct ieee80211com *ic = vap->iv_ic; u_int kid, len; uint8_t tmpkey[IEEE80211_KEYBUF_SIZE]; char tmpssid[IEEE80211_NWID_LEN]; int error = 0; switch (ireq->i_type) { case IEEE80211_IOC_SSID: switch (vap->iv_state) { case IEEE80211_S_INIT: case IEEE80211_S_SCAN: ireq->i_len = vap->iv_des_ssid[0].len; memcpy(tmpssid, vap->iv_des_ssid[0].ssid, ireq->i_len); break; default: ireq->i_len = vap->iv_bss->ni_esslen; memcpy(tmpssid, vap->iv_bss->ni_essid, ireq->i_len); break; } error = copyout(tmpssid, ireq->i_data, ireq->i_len); break; case IEEE80211_IOC_NUMSSIDS: ireq->i_val = 1; break; case IEEE80211_IOC_WEP: if ((vap->iv_flags & IEEE80211_F_PRIVACY) == 0) ireq->i_val = IEEE80211_WEP_OFF; else if (vap->iv_flags & IEEE80211_F_DROPUNENC) ireq->i_val = IEEE80211_WEP_ON; else ireq->i_val = IEEE80211_WEP_MIXED; break; case IEEE80211_IOC_WEPKEY: kid = (u_int) ireq->i_val; if (kid >= IEEE80211_WEP_NKID) return EINVAL; len = (u_int) vap->iv_nw_keys[kid].wk_keylen; /* NB: only root can read WEP keys */ if (priv_check(curthread, PRIV_NET80211_GETKEY) == 0) { bcopy(vap->iv_nw_keys[kid].wk_key, tmpkey, len); } else { bzero(tmpkey, len); } ireq->i_len = len; error = copyout(tmpkey, ireq->i_data, len); break; case IEEE80211_IOC_NUMWEPKEYS: ireq->i_val = IEEE80211_WEP_NKID; break; case IEEE80211_IOC_WEPTXKEY: ireq->i_val = vap->iv_def_txkey; break; case IEEE80211_IOC_AUTHMODE: if (vap->iv_flags & IEEE80211_F_WPA) ireq->i_val = IEEE80211_AUTH_WPA; else ireq->i_val = vap->iv_bss->ni_authmode; break; case IEEE80211_IOC_CHANNEL: ireq->i_val = ieee80211_chan2ieee(ic, ic->ic_curchan); break; case IEEE80211_IOC_POWERSAVE: if (vap->iv_flags & IEEE80211_F_PMGTON) ireq->i_val = IEEE80211_POWERSAVE_ON; else ireq->i_val = IEEE80211_POWERSAVE_OFF; break; case IEEE80211_IOC_POWERSAVESLEEP: ireq->i_val = ic->ic_lintval; break; case IEEE80211_IOC_RTSTHRESHOLD: ireq->i_val = vap->iv_rtsthreshold; break; case IEEE80211_IOC_PROTMODE: ireq->i_val = ic->ic_protmode; break; case IEEE80211_IOC_TXPOWER: /* * Tx power limit is the min of max regulatory * power, any user-set limit, and the max the * radio can do. */ ireq->i_val = 2*ic->ic_curchan->ic_maxregpower; if (ireq->i_val > ic->ic_txpowlimit) ireq->i_val = ic->ic_txpowlimit; if (ireq->i_val > ic->ic_curchan->ic_maxpower) ireq->i_val = ic->ic_curchan->ic_maxpower; break; case IEEE80211_IOC_WPA: switch (vap->iv_flags & IEEE80211_F_WPA) { case IEEE80211_F_WPA1: ireq->i_val = 1; break; case IEEE80211_F_WPA2: ireq->i_val = 2; break; case IEEE80211_F_WPA1 | IEEE80211_F_WPA2: ireq->i_val = 3; break; default: ireq->i_val = 0; break; } break; case IEEE80211_IOC_CHANLIST: error = ieee80211_ioctl_getchanlist(vap, ireq); break; case IEEE80211_IOC_ROAMING: ireq->i_val = vap->iv_roaming; break; case IEEE80211_IOC_PRIVACY: ireq->i_val = (vap->iv_flags & IEEE80211_F_PRIVACY) != 0; break; case IEEE80211_IOC_DROPUNENCRYPTED: ireq->i_val = (vap->iv_flags & IEEE80211_F_DROPUNENC) != 0; break; case IEEE80211_IOC_COUNTERMEASURES: ireq->i_val = (vap->iv_flags & IEEE80211_F_COUNTERM) != 0; break; case IEEE80211_IOC_WME: ireq->i_val = (vap->iv_flags & IEEE80211_F_WME) != 0; break; case IEEE80211_IOC_HIDESSID: ireq->i_val = (vap->iv_flags & IEEE80211_F_HIDESSID) != 0; break; case IEEE80211_IOC_APBRIDGE: ireq->i_val = (vap->iv_flags & IEEE80211_F_NOBRIDGE) == 0; break; case IEEE80211_IOC_WPAKEY: error = ieee80211_ioctl_getkey(vap, ireq); break; case IEEE80211_IOC_CHANINFO: error = ieee80211_ioctl_getchaninfo(vap, ireq); break; case IEEE80211_IOC_BSSID: if (ireq->i_len != IEEE80211_ADDR_LEN) return EINVAL; if (vap->iv_state == IEEE80211_S_RUN || vap->iv_state == IEEE80211_S_SLEEP) { error = copyout(vap->iv_opmode == IEEE80211_M_WDS ? vap->iv_bss->ni_macaddr : vap->iv_bss->ni_bssid, ireq->i_data, ireq->i_len); } else error = copyout(vap->iv_des_bssid, ireq->i_data, ireq->i_len); break; case IEEE80211_IOC_WPAIE: case IEEE80211_IOC_WPAIE2: error = ieee80211_ioctl_getwpaie(vap, ireq, ireq->i_type); break; case IEEE80211_IOC_SCAN_RESULTS: error = ieee80211_ioctl_getscanresults(vap, ireq); break; case IEEE80211_IOC_STA_STATS: error = ieee80211_ioctl_getstastats(vap, ireq); break; case IEEE80211_IOC_TXPOWMAX: ireq->i_val = vap->iv_bss->ni_txpower; break; case IEEE80211_IOC_STA_TXPOW: error = ieee80211_ioctl_getstatxpow(vap, ireq); break; case IEEE80211_IOC_STA_INFO: error = ieee80211_ioctl_getstainfo(vap, ireq); break; case IEEE80211_IOC_WME_CWMIN: /* WME: CWmin */ case IEEE80211_IOC_WME_CWMAX: /* WME: CWmax */ case IEEE80211_IOC_WME_AIFS: /* WME: AIFS */ case IEEE80211_IOC_WME_TXOPLIMIT: /* WME: txops limit */ case IEEE80211_IOC_WME_ACM: /* WME: ACM (bss only) */ case IEEE80211_IOC_WME_ACKPOLICY: /* WME: ACK policy (!bss only) */ error = ieee80211_ioctl_getwmeparam(vap, ireq); break; case IEEE80211_IOC_DTIM_PERIOD: ireq->i_val = vap->iv_dtim_period; break; case IEEE80211_IOC_BEACON_INTERVAL: /* NB: get from ic_bss for station mode */ ireq->i_val = vap->iv_bss->ni_intval; break; case IEEE80211_IOC_PUREG: ireq->i_val = (vap->iv_flags & IEEE80211_F_PUREG) != 0; break; case IEEE80211_IOC_QUIET: ireq->i_val = vap->iv_quiet; break; case IEEE80211_IOC_QUIET_COUNT: ireq->i_val = vap->iv_quiet_count; break; case IEEE80211_IOC_QUIET_PERIOD: ireq->i_val = vap->iv_quiet_period; break; case IEEE80211_IOC_QUIET_DUR: ireq->i_val = vap->iv_quiet_duration; break; case IEEE80211_IOC_QUIET_OFFSET: ireq->i_val = vap->iv_quiet_offset; break; case IEEE80211_IOC_BGSCAN: ireq->i_val = (vap->iv_flags & IEEE80211_F_BGSCAN) != 0; break; case IEEE80211_IOC_BGSCAN_IDLE: ireq->i_val = vap->iv_bgscanidle*hz/1000; /* ms */ break; case IEEE80211_IOC_BGSCAN_INTERVAL: ireq->i_val = vap->iv_bgscanintvl/hz; /* seconds */ break; case IEEE80211_IOC_SCANVALID: ireq->i_val = vap->iv_scanvalid/hz; /* seconds */ break; case IEEE80211_IOC_FRAGTHRESHOLD: ireq->i_val = vap->iv_fragthreshold; break; case IEEE80211_IOC_MACCMD: error = ieee80211_ioctl_getmaccmd(vap, ireq); break; case IEEE80211_IOC_BURST: ireq->i_val = (vap->iv_flags & IEEE80211_F_BURST) != 0; break; case IEEE80211_IOC_BMISSTHRESHOLD: ireq->i_val = vap->iv_bmissthreshold; break; case IEEE80211_IOC_CURCHAN: error = ieee80211_ioctl_getcurchan(vap, ireq); break; case IEEE80211_IOC_SHORTGI: ireq->i_val = 0; if (vap->iv_flags_ht & IEEE80211_FHT_SHORTGI20) ireq->i_val |= IEEE80211_HTCAP_SHORTGI20; if (vap->iv_flags_ht & IEEE80211_FHT_SHORTGI40) ireq->i_val |= IEEE80211_HTCAP_SHORTGI40; break; case IEEE80211_IOC_AMPDU: ireq->i_val = 0; if (vap->iv_flags_ht & IEEE80211_FHT_AMPDU_TX) ireq->i_val |= 1; if (vap->iv_flags_ht & IEEE80211_FHT_AMPDU_RX) ireq->i_val |= 2; break; case IEEE80211_IOC_AMPDU_LIMIT: if (vap->iv_opmode == IEEE80211_M_HOSTAP) ireq->i_val = vap->iv_ampdu_rxmax; else if (vap->iv_state == IEEE80211_S_RUN || vap->iv_state == IEEE80211_S_SLEEP) /* * XXX TODO: this isn't completely correct, as we've * negotiated the higher of the two. */ ireq->i_val = MS(vap->iv_bss->ni_htparam, IEEE80211_HTCAP_MAXRXAMPDU); else ireq->i_val = vap->iv_ampdu_limit; break; case IEEE80211_IOC_AMPDU_DENSITY: if (vap->iv_opmode == IEEE80211_M_STA && (vap->iv_state == IEEE80211_S_RUN || vap->iv_state == IEEE80211_S_SLEEP)) /* * XXX TODO: this isn't completely correct, as we've * negotiated the higher of the two. */ ireq->i_val = MS(vap->iv_bss->ni_htparam, IEEE80211_HTCAP_MPDUDENSITY); else ireq->i_val = vap->iv_ampdu_density; break; case IEEE80211_IOC_AMSDU: ireq->i_val = 0; if (vap->iv_flags_ht & IEEE80211_FHT_AMSDU_TX) ireq->i_val |= 1; if (vap->iv_flags_ht & IEEE80211_FHT_AMSDU_RX) ireq->i_val |= 2; break; case IEEE80211_IOC_AMSDU_LIMIT: ireq->i_val = vap->iv_amsdu_limit; /* XXX truncation? */ break; case IEEE80211_IOC_PUREN: ireq->i_val = (vap->iv_flags_ht & IEEE80211_FHT_PUREN) != 0; break; case IEEE80211_IOC_DOTH: ireq->i_val = (vap->iv_flags & IEEE80211_F_DOTH) != 0; break; case IEEE80211_IOC_REGDOMAIN: error = ieee80211_ioctl_getregdomain(vap, ireq); break; case IEEE80211_IOC_ROAM: error = ieee80211_ioctl_getroam(vap, ireq); break; case IEEE80211_IOC_TXPARAMS: error = ieee80211_ioctl_gettxparams(vap, ireq); break; case IEEE80211_IOC_HTCOMPAT: ireq->i_val = (vap->iv_flags_ht & IEEE80211_FHT_HTCOMPAT) != 0; break; case IEEE80211_IOC_DWDS: ireq->i_val = (vap->iv_flags & IEEE80211_F_DWDS) != 0; break; case IEEE80211_IOC_INACTIVITY: ireq->i_val = (vap->iv_flags_ext & IEEE80211_FEXT_INACT) != 0; break; case IEEE80211_IOC_APPIE: error = ieee80211_ioctl_getappie(vap, ireq); break; case IEEE80211_IOC_WPS: ireq->i_val = (vap->iv_flags_ext & IEEE80211_FEXT_WPS) != 0; break; case IEEE80211_IOC_TSN: ireq->i_val = (vap->iv_flags_ext & IEEE80211_FEXT_TSN) != 0; break; case IEEE80211_IOC_DFS: ireq->i_val = (vap->iv_flags_ext & IEEE80211_FEXT_DFS) != 0; break; case IEEE80211_IOC_DOTD: ireq->i_val = (vap->iv_flags_ext & IEEE80211_FEXT_DOTD) != 0; break; case IEEE80211_IOC_DEVCAPS: error = ieee80211_ioctl_getdevcaps(ic, ireq); break; case IEEE80211_IOC_HTPROTMODE: ireq->i_val = ic->ic_htprotmode; break; case IEEE80211_IOC_HTCONF: if (vap->iv_flags_ht & IEEE80211_FHT_HT) { ireq->i_val = 1; if (vap->iv_flags_ht & IEEE80211_FHT_USEHT40) ireq->i_val |= 2; } else ireq->i_val = 0; break; case IEEE80211_IOC_STA_VLAN: error = ieee80211_ioctl_getstavlan(vap, ireq); break; case IEEE80211_IOC_SMPS: if (vap->iv_opmode == IEEE80211_M_STA && (vap->iv_state == IEEE80211_S_RUN || vap->iv_state == IEEE80211_S_SLEEP)) { if (vap->iv_bss->ni_flags & IEEE80211_NODE_MIMO_RTS) ireq->i_val = IEEE80211_HTCAP_SMPS_DYNAMIC; else if (vap->iv_bss->ni_flags & IEEE80211_NODE_MIMO_PS) ireq->i_val = IEEE80211_HTCAP_SMPS_ENA; else ireq->i_val = IEEE80211_HTCAP_SMPS_OFF; } else ireq->i_val = vap->iv_htcaps & IEEE80211_HTCAP_SMPS; break; case IEEE80211_IOC_RIFS: if (vap->iv_opmode == IEEE80211_M_STA && (vap->iv_state == IEEE80211_S_RUN || vap->iv_state == IEEE80211_S_SLEEP)) ireq->i_val = (vap->iv_bss->ni_flags & IEEE80211_NODE_RIFS) != 0; else ireq->i_val = (vap->iv_flags_ht & IEEE80211_FHT_RIFS) != 0; break; case IEEE80211_IOC_STBC: ireq->i_val = 0; if (vap->iv_flags_ht & IEEE80211_FHT_STBC_TX) ireq->i_val |= 1; if (vap->iv_flags_ht & IEEE80211_FHT_STBC_RX) ireq->i_val |= 2; break; default: error = ieee80211_ioctl_getdefault(vap, ireq); break; } return error; #undef MS } static int ieee80211_ioctl_setkey(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211req_key ik; struct ieee80211_node *ni; struct ieee80211_key *wk; uint16_t kid; int error, i; if (ireq->i_len != sizeof(ik)) return EINVAL; error = copyin(ireq->i_data, &ik, sizeof(ik)); if (error) return error; /* NB: cipher support is verified by ieee80211_crypt_newkey */ /* NB: this also checks ik->ik_keylen > sizeof(wk->wk_key) */ if (ik.ik_keylen > sizeof(ik.ik_keydata)) return E2BIG; kid = ik.ik_keyix; if (kid == IEEE80211_KEYIX_NONE) { /* XXX unicast keys currently must be tx/rx */ if (ik.ik_flags != (IEEE80211_KEY_XMIT | IEEE80211_KEY_RECV)) return EINVAL; if (vap->iv_opmode == IEEE80211_M_STA) { ni = ieee80211_ref_node(vap->iv_bss); if (!IEEE80211_ADDR_EQ(ik.ik_macaddr, ni->ni_bssid)) { ieee80211_free_node(ni); return EADDRNOTAVAIL; } } else { ni = ieee80211_find_vap_node(&vap->iv_ic->ic_sta, vap, ik.ik_macaddr); if (ni == NULL) return ENOENT; } wk = &ni->ni_ucastkey; } else { if (kid >= IEEE80211_WEP_NKID) return EINVAL; wk = &vap->iv_nw_keys[kid]; /* * Global slots start off w/o any assigned key index. * Force one here for consistency with IEEE80211_IOC_WEPKEY. */ if (wk->wk_keyix == IEEE80211_KEYIX_NONE) wk->wk_keyix = kid; ni = NULL; } error = 0; ieee80211_key_update_begin(vap); if (ieee80211_crypto_newkey(vap, ik.ik_type, ik.ik_flags, wk)) { wk->wk_keylen = ik.ik_keylen; /* NB: MIC presence is implied by cipher type */ if (wk->wk_keylen > IEEE80211_KEYBUF_SIZE) wk->wk_keylen = IEEE80211_KEYBUF_SIZE; for (i = 0; i < IEEE80211_TID_SIZE; i++) wk->wk_keyrsc[i] = ik.ik_keyrsc; wk->wk_keytsc = 0; /* new key, reset */ memset(wk->wk_key, 0, sizeof(wk->wk_key)); memcpy(wk->wk_key, ik.ik_keydata, ik.ik_keylen); IEEE80211_ADDR_COPY(wk->wk_macaddr, ni != NULL ? ni->ni_macaddr : ik.ik_macaddr); if (!ieee80211_crypto_setkey(vap, wk)) error = EIO; else if ((ik.ik_flags & IEEE80211_KEY_DEFAULT)) vap->iv_def_txkey = kid; } else error = ENXIO; ieee80211_key_update_end(vap); if (ni != NULL) ieee80211_free_node(ni); return error; } static int ieee80211_ioctl_delkey(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211req_del_key dk; int kid, error; if (ireq->i_len != sizeof(dk)) return EINVAL; error = copyin(ireq->i_data, &dk, sizeof(dk)); if (error) return error; kid = dk.idk_keyix; /* XXX uint8_t -> uint16_t */ if (dk.idk_keyix == (uint8_t) IEEE80211_KEYIX_NONE) { struct ieee80211_node *ni; if (vap->iv_opmode == IEEE80211_M_STA) { ni = ieee80211_ref_node(vap->iv_bss); if (!IEEE80211_ADDR_EQ(dk.idk_macaddr, ni->ni_bssid)) { ieee80211_free_node(ni); return EADDRNOTAVAIL; } } else { ni = ieee80211_find_vap_node(&vap->iv_ic->ic_sta, vap, dk.idk_macaddr); if (ni == NULL) return ENOENT; } /* XXX error return */ ieee80211_node_delucastkey(ni); ieee80211_free_node(ni); } else { if (kid >= IEEE80211_WEP_NKID) return EINVAL; /* XXX error return */ ieee80211_crypto_delkey(vap, &vap->iv_nw_keys[kid]); } return 0; } struct mlmeop { struct ieee80211vap *vap; int op; int reason; }; static void mlmedebug(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN], int op, int reason) { #ifdef IEEE80211_DEBUG static const struct { int mask; const char *opstr; } ops[] = { { 0, "op#0" }, { IEEE80211_MSG_IOCTL | IEEE80211_MSG_STATE | IEEE80211_MSG_ASSOC, "assoc" }, { IEEE80211_MSG_IOCTL | IEEE80211_MSG_STATE | IEEE80211_MSG_ASSOC, "disassoc" }, { IEEE80211_MSG_IOCTL | IEEE80211_MSG_STATE | IEEE80211_MSG_AUTH, "deauth" }, { IEEE80211_MSG_IOCTL | IEEE80211_MSG_STATE | IEEE80211_MSG_AUTH, "authorize" }, { IEEE80211_MSG_IOCTL | IEEE80211_MSG_STATE | IEEE80211_MSG_AUTH, "unauthorize" }, }; if (op == IEEE80211_MLME_AUTH) { IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_IOCTL | IEEE80211_MSG_STATE | IEEE80211_MSG_AUTH, mac, "station authenticate %s via MLME (reason: %d (%s))", reason == IEEE80211_STATUS_SUCCESS ? "ACCEPT" : "REJECT", reason, ieee80211_reason_to_string(reason)); } else if (!(IEEE80211_MLME_ASSOC <= op && op <= IEEE80211_MLME_AUTH)) { IEEE80211_NOTE_MAC(vap, IEEE80211_MSG_ANY, mac, "unknown MLME request %d (reason: %d (%s))", op, reason, ieee80211_reason_to_string(reason)); } else if (reason == IEEE80211_STATUS_SUCCESS) { IEEE80211_NOTE_MAC(vap, ops[op].mask, mac, "station %s via MLME", ops[op].opstr); } else { IEEE80211_NOTE_MAC(vap, ops[op].mask, mac, "station %s via MLME (reason: %d (%s))", ops[op].opstr, reason, ieee80211_reason_to_string(reason)); } #endif /* IEEE80211_DEBUG */ } static void domlme(void *arg, struct ieee80211_node *ni) { struct mlmeop *mop = arg; struct ieee80211vap *vap = ni->ni_vap; if (vap != mop->vap) return; /* * NB: if ni_associd is zero then the node is already cleaned * up and we don't need to do this (we're safely holding a * reference but should otherwise not modify it's state). */ if (ni->ni_associd == 0) return; mlmedebug(vap, ni->ni_macaddr, mop->op, mop->reason); if (mop->op == IEEE80211_MLME_DEAUTH) { IEEE80211_SEND_MGMT(ni, IEEE80211_FC0_SUBTYPE_DEAUTH, mop->reason); } else { IEEE80211_SEND_MGMT(ni, IEEE80211_FC0_SUBTYPE_DISASSOC, mop->reason); } ieee80211_node_leave(ni); } static int setmlme_dropsta(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN], struct mlmeop *mlmeop) { struct ieee80211_node_table *nt = &vap->iv_ic->ic_sta; struct ieee80211_node *ni; int error = 0; /* NB: the broadcast address means do 'em all */ if (!IEEE80211_ADDR_EQ(mac, vap->iv_ifp->if_broadcastaddr)) { IEEE80211_NODE_LOCK(nt); ni = ieee80211_find_node_locked(nt, mac); IEEE80211_NODE_UNLOCK(nt); /* * Don't do the node update inside the node * table lock. This unfortunately causes LORs * with drivers and their TX paths. */ if (ni != NULL) { domlme(mlmeop, ni); ieee80211_free_node(ni); } else error = ENOENT; } else { ieee80211_iterate_nodes(nt, domlme, mlmeop); } return error; } static int setmlme_common(struct ieee80211vap *vap, int op, const uint8_t mac[IEEE80211_ADDR_LEN], int reason) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_node_table *nt = &ic->ic_sta; struct ieee80211_node *ni; struct mlmeop mlmeop; int error; error = 0; switch (op) { case IEEE80211_MLME_DISASSOC: case IEEE80211_MLME_DEAUTH: switch (vap->iv_opmode) { case IEEE80211_M_STA: mlmedebug(vap, vap->iv_bss->ni_macaddr, op, reason); /* XXX not quite right */ ieee80211_new_state(vap, IEEE80211_S_INIT, reason); break; case IEEE80211_M_HOSTAP: mlmeop.vap = vap; mlmeop.op = op; mlmeop.reason = reason; error = setmlme_dropsta(vap, mac, &mlmeop); break; case IEEE80211_M_WDS: /* XXX user app should send raw frame? */ if (op != IEEE80211_MLME_DEAUTH) { error = EINVAL; break; } #if 0 /* XXX accept any address, simplifies user code */ if (!IEEE80211_ADDR_EQ(mac, vap->iv_bss->ni_macaddr)) { error = EINVAL; break; } #endif mlmedebug(vap, vap->iv_bss->ni_macaddr, op, reason); ni = ieee80211_ref_node(vap->iv_bss); IEEE80211_SEND_MGMT(ni, IEEE80211_FC0_SUBTYPE_DEAUTH, reason); ieee80211_free_node(ni); break; case IEEE80211_M_MBSS: IEEE80211_NODE_LOCK(nt); ni = ieee80211_find_node_locked(nt, mac); /* * Don't do the node update inside the node * table lock. This unfortunately causes LORs * with drivers and their TX paths. */ IEEE80211_NODE_UNLOCK(nt); if (ni != NULL) { ieee80211_node_leave(ni); ieee80211_free_node(ni); } else { error = ENOENT; } break; default: error = EINVAL; break; } break; case IEEE80211_MLME_AUTHORIZE: case IEEE80211_MLME_UNAUTHORIZE: if (vap->iv_opmode != IEEE80211_M_HOSTAP && vap->iv_opmode != IEEE80211_M_WDS) { error = EINVAL; break; } IEEE80211_NODE_LOCK(nt); ni = ieee80211_find_vap_node_locked(nt, vap, mac); /* * Don't do the node update inside the node * table lock. This unfortunately causes LORs * with drivers and their TX paths. */ IEEE80211_NODE_UNLOCK(nt); if (ni != NULL) { mlmedebug(vap, mac, op, reason); if (op == IEEE80211_MLME_AUTHORIZE) ieee80211_node_authorize(ni); else ieee80211_node_unauthorize(ni); ieee80211_free_node(ni); } else error = ENOENT; break; case IEEE80211_MLME_AUTH: if (vap->iv_opmode != IEEE80211_M_HOSTAP) { error = EINVAL; break; } IEEE80211_NODE_LOCK(nt); ni = ieee80211_find_vap_node_locked(nt, vap, mac); /* * Don't do the node update inside the node * table lock. This unfortunately causes LORs * with drivers and their TX paths. */ IEEE80211_NODE_UNLOCK(nt); if (ni != NULL) { mlmedebug(vap, mac, op, reason); if (reason == IEEE80211_STATUS_SUCCESS) { IEEE80211_SEND_MGMT(ni, IEEE80211_FC0_SUBTYPE_AUTH, 2); /* * For shared key auth, just continue the * exchange. Otherwise when 802.1x is not in * use mark the port authorized at this point * so traffic can flow. */ if (ni->ni_authmode != IEEE80211_AUTH_8021X && ni->ni_challenge == NULL) ieee80211_node_authorize(ni); } else { vap->iv_stats.is_rx_acl++; ieee80211_send_error(ni, ni->ni_macaddr, IEEE80211_FC0_SUBTYPE_AUTH, 2|(reason<<16)); ieee80211_node_leave(ni); } ieee80211_free_node(ni); } else error = ENOENT; break; default: error = EINVAL; break; } return error; } struct scanlookup { const uint8_t *mac; int esslen; const uint8_t *essid; const struct ieee80211_scan_entry *se; }; /* * Match mac address and any ssid. */ static void mlmelookup(void *arg, const struct ieee80211_scan_entry *se) { struct scanlookup *look = arg; if (!IEEE80211_ADDR_EQ(look->mac, se->se_macaddr)) return; if (look->esslen != 0) { if (se->se_ssid[1] != look->esslen) return; if (memcmp(look->essid, se->se_ssid+2, look->esslen)) return; } look->se = se; } static int setmlme_assoc_sta(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN], int ssid_len, const uint8_t ssid[IEEE80211_NWID_LEN]) { struct scanlookup lookup; KASSERT(vap->iv_opmode == IEEE80211_M_STA, ("expected opmode STA not %s", ieee80211_opmode_name[vap->iv_opmode])); /* NB: this is racey if roaming is !manual */ lookup.se = NULL; lookup.mac = mac; lookup.esslen = ssid_len; lookup.essid = ssid; ieee80211_scan_iterate(vap, mlmelookup, &lookup); if (lookup.se == NULL) return ENOENT; mlmedebug(vap, mac, IEEE80211_MLME_ASSOC, 0); if (!ieee80211_sta_join(vap, lookup.se->se_chan, lookup.se)) return EIO; /* XXX unique but could be better */ return 0; } static int setmlme_assoc_adhoc(struct ieee80211vap *vap, const uint8_t mac[IEEE80211_ADDR_LEN], int ssid_len, const uint8_t ssid[IEEE80211_NWID_LEN]) { struct ieee80211_scan_req *sr; int error; KASSERT(vap->iv_opmode == IEEE80211_M_IBSS || vap->iv_opmode == IEEE80211_M_AHDEMO, ("expected opmode IBSS or AHDEMO not %s", ieee80211_opmode_name[vap->iv_opmode])); if (ssid_len == 0) return EINVAL; sr = IEEE80211_MALLOC(sizeof(*sr), M_TEMP, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO); if (sr == NULL) return ENOMEM; /* NB: IEEE80211_IOC_SSID call missing for ap_scan=2. */ memset(vap->iv_des_ssid[0].ssid, 0, IEEE80211_NWID_LEN); vap->iv_des_ssid[0].len = ssid_len; memcpy(vap->iv_des_ssid[0].ssid, ssid, ssid_len); vap->iv_des_nssid = 1; sr->sr_flags = IEEE80211_IOC_SCAN_ACTIVE | IEEE80211_IOC_SCAN_ONCE; sr->sr_duration = IEEE80211_IOC_SCAN_FOREVER; memcpy(sr->sr_ssid[0].ssid, ssid, ssid_len); sr->sr_ssid[0].len = ssid_len; sr->sr_nssid = 1; error = ieee80211_scanreq(vap, sr); IEEE80211_FREE(sr, M_TEMP); return error; } static int ieee80211_ioctl_setmlme(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211req_mlme mlme; int error; if (ireq->i_len != sizeof(mlme)) return EINVAL; error = copyin(ireq->i_data, &mlme, sizeof(mlme)); if (error) return error; if (vap->iv_opmode == IEEE80211_M_STA && mlme.im_op == IEEE80211_MLME_ASSOC) return setmlme_assoc_sta(vap, mlme.im_macaddr, vap->iv_des_ssid[0].len, vap->iv_des_ssid[0].ssid); else if ((vap->iv_opmode == IEEE80211_M_IBSS || vap->iv_opmode == IEEE80211_M_AHDEMO) && mlme.im_op == IEEE80211_MLME_ASSOC) return setmlme_assoc_adhoc(vap, mlme.im_macaddr, mlme.im_ssid_len, mlme.im_ssid); else return setmlme_common(vap, mlme.im_op, mlme.im_macaddr, mlme.im_reason); } static int ieee80211_ioctl_macmac(struct ieee80211vap *vap, struct ieee80211req *ireq) { uint8_t mac[IEEE80211_ADDR_LEN]; const struct ieee80211_aclator *acl = vap->iv_acl; int error; if (ireq->i_len != sizeof(mac)) return EINVAL; error = copyin(ireq->i_data, mac, ireq->i_len); if (error) return error; if (acl == NULL) { acl = ieee80211_aclator_get("mac"); if (acl == NULL || !acl->iac_attach(vap)) return EINVAL; vap->iv_acl = acl; } if (ireq->i_type == IEEE80211_IOC_ADDMAC) acl->iac_add(vap, mac); else acl->iac_remove(vap, mac); return 0; } static int ieee80211_ioctl_setmaccmd(struct ieee80211vap *vap, struct ieee80211req *ireq) { const struct ieee80211_aclator *acl = vap->iv_acl; switch (ireq->i_val) { case IEEE80211_MACCMD_POLICY_OPEN: case IEEE80211_MACCMD_POLICY_ALLOW: case IEEE80211_MACCMD_POLICY_DENY: case IEEE80211_MACCMD_POLICY_RADIUS: if (acl == NULL) { acl = ieee80211_aclator_get("mac"); if (acl == NULL || !acl->iac_attach(vap)) return EINVAL; vap->iv_acl = acl; } acl->iac_setpolicy(vap, ireq->i_val); break; case IEEE80211_MACCMD_FLUSH: if (acl != NULL) acl->iac_flush(vap); /* NB: silently ignore when not in use */ break; case IEEE80211_MACCMD_DETACH: if (acl != NULL) { vap->iv_acl = NULL; acl->iac_detach(vap); } break; default: if (acl == NULL) return EINVAL; else return acl->iac_setioctl(vap, ireq); } return 0; } static int ieee80211_ioctl_setchanlist(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; uint8_t *chanlist, *list; int i, nchan, maxchan, error; if (ireq->i_len > sizeof(ic->ic_chan_active)) ireq->i_len = sizeof(ic->ic_chan_active); list = IEEE80211_MALLOC(ireq->i_len + IEEE80211_CHAN_BYTES, M_TEMP, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO); if (list == NULL) return ENOMEM; error = copyin(ireq->i_data, list, ireq->i_len); if (error) { IEEE80211_FREE(list, M_TEMP); return error; } nchan = 0; chanlist = list + ireq->i_len; /* NB: zero'd already */ maxchan = ireq->i_len * NBBY; for (i = 0; i < ic->ic_nchans; i++) { const struct ieee80211_channel *c = &ic->ic_channels[i]; /* * Calculate the intersection of the user list and the * available channels so users can do things like specify * 1-255 to get all available channels. */ if (c->ic_ieee < maxchan && isset(list, c->ic_ieee)) { setbit(chanlist, c->ic_ieee); nchan++; } } if (nchan == 0) { IEEE80211_FREE(list, M_TEMP); return EINVAL; } if (ic->ic_bsschan != IEEE80211_CHAN_ANYC && /* XXX */ isclr(chanlist, ic->ic_bsschan->ic_ieee)) ic->ic_bsschan = IEEE80211_CHAN_ANYC; memcpy(ic->ic_chan_active, chanlist, IEEE80211_CHAN_BYTES); ieee80211_scan_flush(vap); IEEE80211_FREE(list, M_TEMP); return ENETRESET; } static int ieee80211_ioctl_setstastats(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211_node *ni; uint8_t macaddr[IEEE80211_ADDR_LEN]; int error; /* * NB: we could copyin ieee80211req_sta_stats so apps * could make selective changes but that's overkill; * just clear all stats for now. */ if (ireq->i_len < IEEE80211_ADDR_LEN) return EINVAL; error = copyin(ireq->i_data, macaddr, IEEE80211_ADDR_LEN); if (error != 0) return error; ni = ieee80211_find_vap_node(&vap->iv_ic->ic_sta, vap, macaddr); if (ni == NULL) return ENOENT; /* XXX require ni_vap == vap? */ memset(&ni->ni_stats, 0, sizeof(ni->ni_stats)); ieee80211_free_node(ni); return 0; } static int ieee80211_ioctl_setstatxpow(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211_node *ni; struct ieee80211req_sta_txpow txpow; int error; if (ireq->i_len != sizeof(txpow)) return EINVAL; error = copyin(ireq->i_data, &txpow, sizeof(txpow)); if (error != 0) return error; ni = ieee80211_find_vap_node(&vap->iv_ic->ic_sta, vap, txpow.it_macaddr); if (ni == NULL) return ENOENT; ni->ni_txpower = txpow.it_txpow; ieee80211_free_node(ni); return error; } static int ieee80211_ioctl_setwmeparam(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_wme_state *wme = &ic->ic_wme; struct wmeParams *wmep, *chanp; int isbss, ac, aggrmode; if ((ic->ic_caps & IEEE80211_C_WME) == 0) return EOPNOTSUPP; isbss = (ireq->i_len & IEEE80211_WMEPARAM_BSS); ac = (ireq->i_len & IEEE80211_WMEPARAM_VAL); aggrmode = (wme->wme_flags & WME_F_AGGRMODE); if (ac >= WME_NUM_AC) ac = WME_AC_BE; if (isbss) { chanp = &wme->wme_bssChanParams.cap_wmeParams[ac]; wmep = &wme->wme_wmeBssChanParams.cap_wmeParams[ac]; } else { chanp = &wme->wme_chanParams.cap_wmeParams[ac]; wmep = &wme->wme_wmeChanParams.cap_wmeParams[ac]; } switch (ireq->i_type) { case IEEE80211_IOC_WME_CWMIN: /* WME: CWmin */ wmep->wmep_logcwmin = ireq->i_val; if (!isbss || !aggrmode) chanp->wmep_logcwmin = ireq->i_val; break; case IEEE80211_IOC_WME_CWMAX: /* WME: CWmax */ wmep->wmep_logcwmax = ireq->i_val; if (!isbss || !aggrmode) chanp->wmep_logcwmax = ireq->i_val; break; case IEEE80211_IOC_WME_AIFS: /* WME: AIFS */ wmep->wmep_aifsn = ireq->i_val; if (!isbss || !aggrmode) chanp->wmep_aifsn = ireq->i_val; break; case IEEE80211_IOC_WME_TXOPLIMIT: /* WME: txops limit */ wmep->wmep_txopLimit = ireq->i_val; if (!isbss || !aggrmode) chanp->wmep_txopLimit = ireq->i_val; break; case IEEE80211_IOC_WME_ACM: /* WME: ACM (bss only) */ wmep->wmep_acm = ireq->i_val; if (!aggrmode) chanp->wmep_acm = ireq->i_val; break; case IEEE80211_IOC_WME_ACKPOLICY: /* WME: ACK policy (!bss only)*/ wmep->wmep_noackPolicy = chanp->wmep_noackPolicy = (ireq->i_val) == 0; break; } ieee80211_wme_updateparams(vap); return 0; } static int find11gchannel(struct ieee80211com *ic, int start, int freq) { const struct ieee80211_channel *c; int i; for (i = start+1; i < ic->ic_nchans; i++) { c = &ic->ic_channels[i]; if (c->ic_freq == freq && IEEE80211_IS_CHAN_ANYG(c)) return 1; } /* NB: should not be needed but in case things are mis-sorted */ for (i = 0; i < start; i++) { c = &ic->ic_channels[i]; if (c->ic_freq == freq && IEEE80211_IS_CHAN_ANYG(c)) return 1; } return 0; } static struct ieee80211_channel * findchannel(struct ieee80211com *ic, int ieee, int mode) { static const u_int chanflags[IEEE80211_MODE_MAX] = { [IEEE80211_MODE_AUTO] = 0, [IEEE80211_MODE_11A] = IEEE80211_CHAN_A, [IEEE80211_MODE_11B] = IEEE80211_CHAN_B, [IEEE80211_MODE_11G] = IEEE80211_CHAN_G, [IEEE80211_MODE_FH] = IEEE80211_CHAN_FHSS, [IEEE80211_MODE_TURBO_A] = IEEE80211_CHAN_108A, [IEEE80211_MODE_TURBO_G] = IEEE80211_CHAN_108G, [IEEE80211_MODE_STURBO_A] = IEEE80211_CHAN_STURBO, [IEEE80211_MODE_HALF] = IEEE80211_CHAN_HALF, [IEEE80211_MODE_QUARTER] = IEEE80211_CHAN_QUARTER, /* NB: handled specially below */ [IEEE80211_MODE_11NA] = IEEE80211_CHAN_A, [IEEE80211_MODE_11NG] = IEEE80211_CHAN_G, }; u_int modeflags; int i; modeflags = chanflags[mode]; for (i = 0; i < ic->ic_nchans; i++) { struct ieee80211_channel *c = &ic->ic_channels[i]; if (c->ic_ieee != ieee) continue; if (mode == IEEE80211_MODE_AUTO) { /* ignore turbo channels for autoselect */ if (IEEE80211_IS_CHAN_TURBO(c)) continue; /* * XXX special-case 11b/g channels so we * always select the g channel if both * are present. * XXX prefer HT to non-HT? */ if (!IEEE80211_IS_CHAN_B(c) || !find11gchannel(ic, i, c->ic_freq)) return c; } else { /* must check HT specially */ if ((mode == IEEE80211_MODE_11NA || mode == IEEE80211_MODE_11NG) && !IEEE80211_IS_CHAN_HT(c)) continue; if ((c->ic_flags & modeflags) == modeflags) return c; } } return NULL; } /* * Check the specified against any desired mode (aka netband). * This is only used (presently) when operating in hostap mode * to enforce consistency. */ static int check_mode_consistency(const struct ieee80211_channel *c, int mode) { KASSERT(c != IEEE80211_CHAN_ANYC, ("oops, no channel")); switch (mode) { case IEEE80211_MODE_11B: return (IEEE80211_IS_CHAN_B(c)); case IEEE80211_MODE_11G: return (IEEE80211_IS_CHAN_ANYG(c) && !IEEE80211_IS_CHAN_HT(c)); case IEEE80211_MODE_11A: return (IEEE80211_IS_CHAN_A(c) && !IEEE80211_IS_CHAN_HT(c)); case IEEE80211_MODE_STURBO_A: return (IEEE80211_IS_CHAN_STURBO(c)); case IEEE80211_MODE_11NA: return (IEEE80211_IS_CHAN_HTA(c)); case IEEE80211_MODE_11NG: return (IEEE80211_IS_CHAN_HTG(c)); } return 1; } /* * Common code to set the current channel. If the device * is up and running this may result in an immediate channel * change or a kick of the state machine. */ static int setcurchan(struct ieee80211vap *vap, struct ieee80211_channel *c) { struct ieee80211com *ic = vap->iv_ic; int error; if (c != IEEE80211_CHAN_ANYC) { if (IEEE80211_IS_CHAN_RADAR(c)) return EBUSY; /* XXX better code? */ if (vap->iv_opmode == IEEE80211_M_HOSTAP) { if (IEEE80211_IS_CHAN_NOHOSTAP(c)) return EINVAL; if (!check_mode_consistency(c, vap->iv_des_mode)) return EINVAL; } else if (vap->iv_opmode == IEEE80211_M_IBSS) { if (IEEE80211_IS_CHAN_NOADHOC(c)) return EINVAL; } if ((vap->iv_state == IEEE80211_S_RUN || vap->iv_state == IEEE80211_S_SLEEP) && vap->iv_bss->ni_chan == c) return 0; /* NB: nothing to do */ } vap->iv_des_chan = c; error = 0; if (vap->iv_opmode == IEEE80211_M_MONITOR && vap->iv_des_chan != IEEE80211_CHAN_ANYC) { /* * Monitor mode can switch directly. */ if (IFNET_IS_UP_RUNNING(vap->iv_ifp)) { /* XXX need state machine for other vap's to follow */ ieee80211_setcurchan(ic, vap->iv_des_chan); vap->iv_bss->ni_chan = ic->ic_curchan; } else { ic->ic_curchan = vap->iv_des_chan; ic->ic_rt = ieee80211_get_ratetable(ic->ic_curchan); } } else { /* * Need to go through the state machine in case we * need to reassociate or the like. The state machine * will pickup the desired channel and avoid scanning. */ if (IS_UP_AUTO(vap)) ieee80211_new_state(vap, IEEE80211_S_SCAN, 0); else if (vap->iv_des_chan != IEEE80211_CHAN_ANYC) { /* * When not up+running and a real channel has * been specified fix the current channel so * there is immediate feedback; e.g. via ifconfig. */ ic->ic_curchan = vap->iv_des_chan; ic->ic_rt = ieee80211_get_ratetable(ic->ic_curchan); } } return error; } /* * Old api for setting the current channel; this is * deprecated because channel numbers are ambiguous. */ static int ieee80211_ioctl_setchannel(struct ieee80211vap *vap, const struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_channel *c; /* XXX 0xffff overflows 16-bit signed */ if (ireq->i_val == 0 || ireq->i_val == (int16_t) IEEE80211_CHAN_ANY) { c = IEEE80211_CHAN_ANYC; } else { struct ieee80211_channel *c2; c = findchannel(ic, ireq->i_val, vap->iv_des_mode); if (c == NULL) { c = findchannel(ic, ireq->i_val, IEEE80211_MODE_AUTO); if (c == NULL) return EINVAL; } /* * Fine tune channel selection based on desired mode: * if 11b is requested, find the 11b version of any * 11g channel returned, * if static turbo, find the turbo version of any * 11a channel return, * if 11na is requested, find the ht version of any * 11a channel returned, * if 11ng is requested, find the ht version of any * 11g channel returned, * otherwise we should be ok with what we've got. */ switch (vap->iv_des_mode) { case IEEE80211_MODE_11B: if (IEEE80211_IS_CHAN_ANYG(c)) { c2 = findchannel(ic, ireq->i_val, IEEE80211_MODE_11B); /* NB: should not happen, =>'s 11g w/o 11b */ if (c2 != NULL) c = c2; } break; case IEEE80211_MODE_TURBO_A: if (IEEE80211_IS_CHAN_A(c)) { c2 = findchannel(ic, ireq->i_val, IEEE80211_MODE_TURBO_A); if (c2 != NULL) c = c2; } break; case IEEE80211_MODE_11NA: if (IEEE80211_IS_CHAN_A(c)) { c2 = findchannel(ic, ireq->i_val, IEEE80211_MODE_11NA); if (c2 != NULL) c = c2; } break; case IEEE80211_MODE_11NG: if (IEEE80211_IS_CHAN_ANYG(c)) { c2 = findchannel(ic, ireq->i_val, IEEE80211_MODE_11NG); if (c2 != NULL) c = c2; } break; default: /* NB: no static turboG */ break; } } return setcurchan(vap, c); } /* * New/current api for setting the current channel; a complete * channel description is provide so there is no ambiguity in * identifying the channel. */ static int ieee80211_ioctl_setcurchan(struct ieee80211vap *vap, const struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_channel chan, *c; int error; if (ireq->i_len != sizeof(chan)) return EINVAL; error = copyin(ireq->i_data, &chan, sizeof(chan)); if (error != 0) return error; /* XXX 0xffff overflows 16-bit signed */ if (chan.ic_freq == 0 || chan.ic_freq == IEEE80211_CHAN_ANY) { c = IEEE80211_CHAN_ANYC; } else { c = ieee80211_find_channel(ic, chan.ic_freq, chan.ic_flags); if (c == NULL) return EINVAL; } return setcurchan(vap, c); } static int ieee80211_ioctl_setregdomain(struct ieee80211vap *vap, const struct ieee80211req *ireq) { struct ieee80211_regdomain_req *reg; int nchans, error; nchans = 1 + ((ireq->i_len - sizeof(struct ieee80211_regdomain_req)) / sizeof(struct ieee80211_channel)); if (!(1 <= nchans && nchans <= IEEE80211_CHAN_MAX)) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_IOCTL, "%s: bad # chans, i_len %d nchans %d\n", __func__, ireq->i_len, nchans); return EINVAL; } reg = (struct ieee80211_regdomain_req *) IEEE80211_MALLOC(IEEE80211_REGDOMAIN_SIZE(nchans), M_TEMP, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO); if (reg == NULL) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_IOCTL, "%s: no memory, nchans %d\n", __func__, nchans); return ENOMEM; } error = copyin(ireq->i_data, reg, IEEE80211_REGDOMAIN_SIZE(nchans)); if (error == 0) { /* NB: validate inline channel count against storage size */ if (reg->chaninfo.ic_nchans != nchans) { IEEE80211_DPRINTF(vap, IEEE80211_MSG_IOCTL, "%s: chan cnt mismatch, %d != %d\n", __func__, reg->chaninfo.ic_nchans, nchans); error = EINVAL; } else error = ieee80211_setregdomain(vap, reg); } IEEE80211_FREE(reg, M_TEMP); return (error == 0 ? ENETRESET : error); } static int ieee80211_ioctl_setroam(struct ieee80211vap *vap, const struct ieee80211req *ireq) { if (ireq->i_len != sizeof(vap->iv_roamparms)) return EINVAL; /* XXX validate params */ /* XXX? ENETRESET to push to device? */ return copyin(ireq->i_data, vap->iv_roamparms, sizeof(vap->iv_roamparms)); } static int checkrate(const struct ieee80211_rateset *rs, int rate) { int i; if (rate == IEEE80211_FIXED_RATE_NONE) return 1; for (i = 0; i < rs->rs_nrates; i++) if ((rs->rs_rates[i] & IEEE80211_RATE_VAL) == rate) return 1; return 0; } static int checkmcs(int mcs) { if (mcs == IEEE80211_FIXED_RATE_NONE) return 1; if ((mcs & IEEE80211_RATE_MCS) == 0) /* MCS always have 0x80 set */ return 0; return (mcs & 0x7f) <= 15; /* XXX could search ht rate set */ } static int ieee80211_ioctl_settxparams(struct ieee80211vap *vap, const struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_txparams_req parms; /* XXX stack use? */ struct ieee80211_txparam *src, *dst; const struct ieee80211_rateset *rs; int error, mode, changed, is11n, nmodes; /* NB: accept short requests for backwards compat */ if (ireq->i_len > sizeof(parms)) return EINVAL; error = copyin(ireq->i_data, &parms, ireq->i_len); if (error != 0) return error; nmodes = ireq->i_len / sizeof(struct ieee80211_txparam); changed = 0; /* validate parameters and check if anything changed */ for (mode = IEEE80211_MODE_11A; mode < nmodes; mode++) { if (isclr(ic->ic_modecaps, mode)) continue; src = &parms.params[mode]; dst = &vap->iv_txparms[mode]; rs = &ic->ic_sup_rates[mode]; /* NB: 11n maps to legacy */ is11n = (mode == IEEE80211_MODE_11NA || mode == IEEE80211_MODE_11NG); if (src->ucastrate != dst->ucastrate) { if (!checkrate(rs, src->ucastrate) && (!is11n || !checkmcs(src->ucastrate))) return EINVAL; changed++; } if (src->mcastrate != dst->mcastrate) { if (!checkrate(rs, src->mcastrate) && (!is11n || !checkmcs(src->mcastrate))) return EINVAL; changed++; } if (src->mgmtrate != dst->mgmtrate) { if (!checkrate(rs, src->mgmtrate) && (!is11n || !checkmcs(src->mgmtrate))) return EINVAL; changed++; } if (src->maxretry != dst->maxretry) /* NB: no bounds */ changed++; } if (changed) { /* * Copy new parameters in place and notify the * driver so it can push state to the device. */ for (mode = IEEE80211_MODE_11A; mode < nmodes; mode++) { if (isset(ic->ic_modecaps, mode)) vap->iv_txparms[mode] = parms.params[mode]; } /* XXX could be more intelligent, e.g. don't reset if setting not being used */ return ENETRESET; } return 0; } /* * Application Information Element support. */ static int setappie(struct ieee80211_appie **aie, const struct ieee80211req *ireq) { struct ieee80211_appie *app = *aie; struct ieee80211_appie *napp; int error; if (ireq->i_len == 0) { /* delete any existing ie */ if (app != NULL) { *aie = NULL; /* XXX racey */ IEEE80211_FREE(app, M_80211_NODE_IE); } return 0; } if (!(2 <= ireq->i_len && ireq->i_len <= IEEE80211_MAX_APPIE)) return EINVAL; /* * Allocate a new appie structure and copy in the user data. * When done swap in the new structure. Note that we do not * guard against users holding a ref to the old structure; * this must be handled outside this code. * * XXX bad bad bad */ napp = (struct ieee80211_appie *) IEEE80211_MALLOC( sizeof(struct ieee80211_appie) + ireq->i_len, M_80211_NODE_IE, IEEE80211_M_NOWAIT); if (napp == NULL) return ENOMEM; /* XXX holding ic lock */ error = copyin(ireq->i_data, napp->ie_data, ireq->i_len); if (error) { IEEE80211_FREE(napp, M_80211_NODE_IE); return error; } napp->ie_len = ireq->i_len; *aie = napp; if (app != NULL) IEEE80211_FREE(app, M_80211_NODE_IE); return 0; } static void setwparsnie(struct ieee80211vap *vap, uint8_t *ie, int space) { /* validate data is present as best we can */ if (space == 0 || 2+ie[1] > space) return; if (ie[0] == IEEE80211_ELEMID_VENDOR) vap->iv_wpa_ie = ie; else if (ie[0] == IEEE80211_ELEMID_RSN) vap->iv_rsn_ie = ie; } static int ieee80211_ioctl_setappie_locked(struct ieee80211vap *vap, const struct ieee80211req *ireq, int fc0) { int error; IEEE80211_LOCK_ASSERT(vap->iv_ic); switch (fc0 & IEEE80211_FC0_SUBTYPE_MASK) { case IEEE80211_FC0_SUBTYPE_BEACON: if (vap->iv_opmode != IEEE80211_M_HOSTAP && vap->iv_opmode != IEEE80211_M_IBSS) { error = EINVAL; break; } error = setappie(&vap->iv_appie_beacon, ireq); if (error == 0) ieee80211_beacon_notify(vap, IEEE80211_BEACON_APPIE); break; case IEEE80211_FC0_SUBTYPE_PROBE_RESP: error = setappie(&vap->iv_appie_proberesp, ireq); break; case IEEE80211_FC0_SUBTYPE_ASSOC_RESP: if (vap->iv_opmode == IEEE80211_M_HOSTAP) error = setappie(&vap->iv_appie_assocresp, ireq); else error = EINVAL; break; case IEEE80211_FC0_SUBTYPE_PROBE_REQ: error = setappie(&vap->iv_appie_probereq, ireq); break; case IEEE80211_FC0_SUBTYPE_ASSOC_REQ: if (vap->iv_opmode == IEEE80211_M_STA) error = setappie(&vap->iv_appie_assocreq, ireq); else error = EINVAL; break; case (IEEE80211_APPIE_WPA & IEEE80211_FC0_SUBTYPE_MASK): error = setappie(&vap->iv_appie_wpa, ireq); if (error == 0) { /* * Must split single blob of data into separate * WPA and RSN ie's because they go in different * locations in the mgt frames. * XXX use IEEE80211_IOC_WPA2 so user code does split */ vap->iv_wpa_ie = NULL; vap->iv_rsn_ie = NULL; if (vap->iv_appie_wpa != NULL) { struct ieee80211_appie *appie = vap->iv_appie_wpa; uint8_t *data = appie->ie_data; /* XXX ie length validate is painful, cheat */ setwparsnie(vap, data, appie->ie_len); setwparsnie(vap, data + 2 + data[1], appie->ie_len - (2 + data[1])); } if (vap->iv_opmode == IEEE80211_M_HOSTAP || vap->iv_opmode == IEEE80211_M_IBSS) { /* * Must rebuild beacon frame as the update * mechanism doesn't handle WPA/RSN ie's. * Could extend it but it doesn't normally * change; this is just to deal with hostapd * plumbing the ie after the interface is up. */ error = ENETRESET; } } break; default: error = EINVAL; break; } return error; } static int ieee80211_ioctl_setappie(struct ieee80211vap *vap, const struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; int error; uint8_t fc0; fc0 = ireq->i_val & 0xff; if ((fc0 & IEEE80211_FC0_TYPE_MASK) != IEEE80211_FC0_TYPE_MGT) return EINVAL; /* NB: could check iv_opmode and reject but hardly worth the effort */ IEEE80211_LOCK(ic); error = ieee80211_ioctl_setappie_locked(vap, ireq, fc0); IEEE80211_UNLOCK(ic); return error; } static int ieee80211_ioctl_chanswitch(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; struct ieee80211_chanswitch_req csr; struct ieee80211_channel *c; int error; if (ireq->i_len != sizeof(csr)) return EINVAL; error = copyin(ireq->i_data, &csr, sizeof(csr)); if (error != 0) return error; /* XXX adhoc mode not supported */ if (vap->iv_opmode != IEEE80211_M_HOSTAP || (vap->iv_flags & IEEE80211_F_DOTH) == 0) return EOPNOTSUPP; c = ieee80211_find_channel(ic, csr.csa_chan.ic_freq, csr.csa_chan.ic_flags); if (c == NULL) return ENOENT; IEEE80211_LOCK(ic); if ((ic->ic_flags & IEEE80211_F_CSAPENDING) == 0) ieee80211_csa_startswitch(ic, c, csr.csa_mode, csr.csa_count); else if (csr.csa_count == 0) ieee80211_csa_cancelswitch(ic); else error = EBUSY; IEEE80211_UNLOCK(ic); return error; } static int ieee80211_scanreq(struct ieee80211vap *vap, struct ieee80211_scan_req *sr) { #define IEEE80211_IOC_SCAN_FLAGS \ (IEEE80211_IOC_SCAN_NOPICK | IEEE80211_IOC_SCAN_ACTIVE | \ IEEE80211_IOC_SCAN_PICK1ST | IEEE80211_IOC_SCAN_BGSCAN | \ IEEE80211_IOC_SCAN_ONCE | IEEE80211_IOC_SCAN_NOBCAST | \ IEEE80211_IOC_SCAN_NOJOIN | IEEE80211_IOC_SCAN_FLUSH | \ IEEE80211_IOC_SCAN_CHECK) struct ieee80211com *ic = vap->iv_ic; int error, i; /* convert duration */ if (sr->sr_duration == IEEE80211_IOC_SCAN_FOREVER) sr->sr_duration = IEEE80211_SCAN_FOREVER; else { if (sr->sr_duration < IEEE80211_IOC_SCAN_DURATION_MIN || sr->sr_duration > IEEE80211_IOC_SCAN_DURATION_MAX) return EINVAL; sr->sr_duration = msecs_to_ticks(sr->sr_duration); if (sr->sr_duration < 1) sr->sr_duration = 1; } /* convert min/max channel dwell */ if (sr->sr_mindwell != 0) { sr->sr_mindwell = msecs_to_ticks(sr->sr_mindwell); if (sr->sr_mindwell < 1) sr->sr_mindwell = 1; } if (sr->sr_maxdwell != 0) { sr->sr_maxdwell = msecs_to_ticks(sr->sr_maxdwell); if (sr->sr_maxdwell < 1) sr->sr_maxdwell = 1; } /* NB: silently reduce ssid count to what is supported */ if (sr->sr_nssid > IEEE80211_SCAN_MAX_SSID) sr->sr_nssid = IEEE80211_SCAN_MAX_SSID; for (i = 0; i < sr->sr_nssid; i++) if (sr->sr_ssid[i].len > IEEE80211_NWID_LEN) return EINVAL; /* cleanse flags just in case, could reject if invalid flags */ sr->sr_flags &= IEEE80211_IOC_SCAN_FLAGS; /* * Add an implicit NOPICK if the vap is not marked UP. This * allows applications to scan without joining a bss (or picking * a channel and setting up a bss) and without forcing manual * roaming mode--you just need to mark the parent device UP. */ if ((vap->iv_ifp->if_flags & IFF_UP) == 0) sr->sr_flags |= IEEE80211_IOC_SCAN_NOPICK; IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: flags 0x%x%s duration 0x%x mindwell %u maxdwell %u nssid %d\n", __func__, sr->sr_flags, (vap->iv_ifp->if_flags & IFF_UP) == 0 ? " (!IFF_UP)" : "", sr->sr_duration, sr->sr_mindwell, sr->sr_maxdwell, sr->sr_nssid); /* * If we are in INIT state then the driver has never had a chance * to setup hardware state to do a scan; we must use the state * machine to get us up to the SCAN state but once we reach SCAN * state we then want to use the supplied params. Stash the * parameters in the vap and mark IEEE80211_FEXT_SCANREQ; the * state machines will recognize this and use the stashed params * to issue the scan request. * * Otherwise just invoke the scan machinery directly. */ IEEE80211_LOCK(ic); if (ic->ic_nrunning == 0) { IEEE80211_UNLOCK(ic); return ENXIO; } if (vap->iv_state == IEEE80211_S_INIT) { /* NB: clobbers previous settings */ vap->iv_scanreq_flags = sr->sr_flags; vap->iv_scanreq_duration = sr->sr_duration; vap->iv_scanreq_nssid = sr->sr_nssid; for (i = 0; i < sr->sr_nssid; i++) { vap->iv_scanreq_ssid[i].len = sr->sr_ssid[i].len; memcpy(vap->iv_scanreq_ssid[i].ssid, sr->sr_ssid[i].ssid, sr->sr_ssid[i].len); } vap->iv_flags_ext |= IEEE80211_FEXT_SCANREQ; IEEE80211_UNLOCK(ic); ieee80211_new_state(vap, IEEE80211_S_SCAN, 0); } else { vap->iv_flags_ext &= ~IEEE80211_FEXT_SCANREQ; IEEE80211_UNLOCK(ic); if (sr->sr_flags & IEEE80211_IOC_SCAN_CHECK) { error = ieee80211_check_scan(vap, sr->sr_flags, sr->sr_duration, sr->sr_mindwell, sr->sr_maxdwell, sr->sr_nssid, /* NB: cheat, we assume structures are compatible */ (const struct ieee80211_scan_ssid *) &sr->sr_ssid[0]); } else { error = ieee80211_start_scan(vap, sr->sr_flags, sr->sr_duration, sr->sr_mindwell, sr->sr_maxdwell, sr->sr_nssid, /* NB: cheat, we assume structures are compatible */ (const struct ieee80211_scan_ssid *) &sr->sr_ssid[0]); } if (error == 0) return EINPROGRESS; } return 0; #undef IEEE80211_IOC_SCAN_FLAGS } static int ieee80211_ioctl_scanreq(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211_scan_req *sr; int error; if (ireq->i_len != sizeof(*sr)) return EINVAL; sr = IEEE80211_MALLOC(sizeof(*sr), M_TEMP, IEEE80211_M_NOWAIT | IEEE80211_M_ZERO); if (sr == NULL) return ENOMEM; error = copyin(ireq->i_data, sr, sizeof(*sr)); if (error != 0) goto bad; error = ieee80211_scanreq(vap, sr); bad: IEEE80211_FREE(sr, M_TEMP); return error; } static int ieee80211_ioctl_setstavlan(struct ieee80211vap *vap, struct ieee80211req *ireq) { struct ieee80211_node *ni; struct ieee80211req_sta_vlan vlan; int error; if (ireq->i_len != sizeof(vlan)) return EINVAL; error = copyin(ireq->i_data, &vlan, sizeof(vlan)); if (error != 0) return error; if (!IEEE80211_ADDR_EQ(vlan.sv_macaddr, zerobssid)) { ni = ieee80211_find_vap_node(&vap->iv_ic->ic_sta, vap, vlan.sv_macaddr); if (ni == NULL) return ENOENT; } else ni = ieee80211_ref_node(vap->iv_bss); ni->ni_vlan = vlan.sv_vlan; ieee80211_free_node(ni); return error; } static int isvap11g(const struct ieee80211vap *vap) { const struct ieee80211_node *bss = vap->iv_bss; return bss->ni_chan != IEEE80211_CHAN_ANYC && IEEE80211_IS_CHAN_ANYG(bss->ni_chan); } static int isvapht(const struct ieee80211vap *vap) { const struct ieee80211_node *bss = vap->iv_bss; return bss->ni_chan != IEEE80211_CHAN_ANYC && IEEE80211_IS_CHAN_HT(bss->ni_chan); } /* * Dummy ioctl set handler so the linker set is defined. */ static int dummy_ioctl_set(struct ieee80211vap *vap, struct ieee80211req *ireq) { return ENOSYS; } IEEE80211_IOCTL_SET(dummy, dummy_ioctl_set); static int ieee80211_ioctl_setdefault(struct ieee80211vap *vap, struct ieee80211req *ireq) { ieee80211_ioctl_setfunc * const *set; int error; SET_FOREACH(set, ieee80211_ioctl_setset) { error = (*set)(vap, ireq); if (error != ENOSYS) return error; } return EINVAL; } static int ieee80211_ioctl_set80211(struct ieee80211vap *vap, u_long cmd, struct ieee80211req *ireq) { struct ieee80211com *ic = vap->iv_ic; int error; const struct ieee80211_authenticator *auth; uint8_t tmpkey[IEEE80211_KEYBUF_SIZE]; char tmpssid[IEEE80211_NWID_LEN]; uint8_t tmpbssid[IEEE80211_ADDR_LEN]; struct ieee80211_key *k; u_int kid; uint32_t flags; error = 0; switch (ireq->i_type) { case IEEE80211_IOC_SSID: if (ireq->i_val != 0 || ireq->i_len > IEEE80211_NWID_LEN) return EINVAL; error = copyin(ireq->i_data, tmpssid, ireq->i_len); if (error) break; memset(vap->iv_des_ssid[0].ssid, 0, IEEE80211_NWID_LEN); vap->iv_des_ssid[0].len = ireq->i_len; memcpy(vap->iv_des_ssid[0].ssid, tmpssid, ireq->i_len); vap->iv_des_nssid = (ireq->i_len > 0); error = ENETRESET; break; case IEEE80211_IOC_WEP: switch (ireq->i_val) { case IEEE80211_WEP_OFF: vap->iv_flags &= ~IEEE80211_F_PRIVACY; vap->iv_flags &= ~IEEE80211_F_DROPUNENC; break; case IEEE80211_WEP_ON: vap->iv_flags |= IEEE80211_F_PRIVACY; vap->iv_flags |= IEEE80211_F_DROPUNENC; break; case IEEE80211_WEP_MIXED: vap->iv_flags |= IEEE80211_F_PRIVACY; vap->iv_flags &= ~IEEE80211_F_DROPUNENC; break; } error = ENETRESET; break; case IEEE80211_IOC_WEPKEY: kid = (u_int) ireq->i_val; if (kid >= IEEE80211_WEP_NKID) return EINVAL; k = &vap->iv_nw_keys[kid]; if (ireq->i_len == 0) { /* zero-len =>'s delete any existing key */ (void) ieee80211_crypto_delkey(vap, k); break; } if (ireq->i_len > sizeof(tmpkey)) return EINVAL; memset(tmpkey, 0, sizeof(tmpkey)); error = copyin(ireq->i_data, tmpkey, ireq->i_len); if (error) break; ieee80211_key_update_begin(vap); k->wk_keyix = kid; /* NB: force fixed key id */ if (ieee80211_crypto_newkey(vap, IEEE80211_CIPHER_WEP, IEEE80211_KEY_XMIT | IEEE80211_KEY_RECV, k)) { k->wk_keylen = ireq->i_len; memcpy(k->wk_key, tmpkey, sizeof(tmpkey)); IEEE80211_ADDR_COPY(k->wk_macaddr, vap->iv_myaddr); if (!ieee80211_crypto_setkey(vap, k)) error = EINVAL; } else error = EINVAL; ieee80211_key_update_end(vap); break; case IEEE80211_IOC_WEPTXKEY: kid = (u_int) ireq->i_val; if (kid >= IEEE80211_WEP_NKID && (uint16_t) kid != IEEE80211_KEYIX_NONE) return EINVAL; vap->iv_def_txkey = kid; break; case IEEE80211_IOC_AUTHMODE: switch (ireq->i_val) { case IEEE80211_AUTH_WPA: case IEEE80211_AUTH_8021X: /* 802.1x */ case IEEE80211_AUTH_OPEN: /* open */ case IEEE80211_AUTH_SHARED: /* shared-key */ case IEEE80211_AUTH_AUTO: /* auto */ auth = ieee80211_authenticator_get(ireq->i_val); if (auth == NULL) return EINVAL; break; default: return EINVAL; } switch (ireq->i_val) { case IEEE80211_AUTH_WPA: /* WPA w/ 802.1x */ vap->iv_flags |= IEEE80211_F_PRIVACY; ireq->i_val = IEEE80211_AUTH_8021X; break; case IEEE80211_AUTH_OPEN: /* open */ vap->iv_flags &= ~(IEEE80211_F_WPA|IEEE80211_F_PRIVACY); break; case IEEE80211_AUTH_SHARED: /* shared-key */ case IEEE80211_AUTH_8021X: /* 802.1x */ vap->iv_flags &= ~IEEE80211_F_WPA; /* both require a key so mark the PRIVACY capability */ vap->iv_flags |= IEEE80211_F_PRIVACY; break; case IEEE80211_AUTH_AUTO: /* auto */ vap->iv_flags &= ~IEEE80211_F_WPA; /* XXX PRIVACY handling? */ /* XXX what's the right way to do this? */ break; } /* NB: authenticator attach/detach happens on state change */ vap->iv_bss->ni_authmode = ireq->i_val; /* XXX mixed/mode/usage? */ vap->iv_auth = auth; error = ENETRESET; break; case IEEE80211_IOC_CHANNEL: error = ieee80211_ioctl_setchannel(vap, ireq); break; case IEEE80211_IOC_POWERSAVE: switch (ireq->i_val) { case IEEE80211_POWERSAVE_OFF: if (vap->iv_flags & IEEE80211_F_PMGTON) { ieee80211_syncflag(vap, -IEEE80211_F_PMGTON); error = ERESTART; } break; case IEEE80211_POWERSAVE_ON: if ((vap->iv_caps & IEEE80211_C_PMGT) == 0) error = EOPNOTSUPP; else if ((vap->iv_flags & IEEE80211_F_PMGTON) == 0) { ieee80211_syncflag(vap, IEEE80211_F_PMGTON); error = ERESTART; } break; default: error = EINVAL; break; } break; case IEEE80211_IOC_POWERSAVESLEEP: if (ireq->i_val < 0) return EINVAL; ic->ic_lintval = ireq->i_val; error = ERESTART; break; case IEEE80211_IOC_RTSTHRESHOLD: if (!(IEEE80211_RTS_MIN <= ireq->i_val && ireq->i_val <= IEEE80211_RTS_MAX)) return EINVAL; vap->iv_rtsthreshold = ireq->i_val; error = ERESTART; break; case IEEE80211_IOC_PROTMODE: if (ireq->i_val > IEEE80211_PROT_RTSCTS) return EINVAL; ic->ic_protmode = (enum ieee80211_protmode)ireq->i_val; /* NB: if not operating in 11g this can wait */ if (ic->ic_bsschan != IEEE80211_CHAN_ANYC && IEEE80211_IS_CHAN_ANYG(ic->ic_bsschan)) error = ERESTART; break; case IEEE80211_IOC_TXPOWER: if ((ic->ic_caps & IEEE80211_C_TXPMGT) == 0) return EOPNOTSUPP; if (!(IEEE80211_TXPOWER_MIN <= ireq->i_val && ireq->i_val <= IEEE80211_TXPOWER_MAX)) return EINVAL; ic->ic_txpowlimit = ireq->i_val; error = ERESTART; break; case IEEE80211_IOC_ROAMING: if (!(IEEE80211_ROAMING_DEVICE <= ireq->i_val && ireq->i_val <= IEEE80211_ROAMING_MANUAL)) return EINVAL; vap->iv_roaming = (enum ieee80211_roamingmode)ireq->i_val; /* XXXX reset? */ break; case IEEE80211_IOC_PRIVACY: if (ireq->i_val) { /* XXX check for key state? */ vap->iv_flags |= IEEE80211_F_PRIVACY; } else vap->iv_flags &= ~IEEE80211_F_PRIVACY; /* XXX ERESTART? */ break; case IEEE80211_IOC_DROPUNENCRYPTED: if (ireq->i_val) vap->iv_flags |= IEEE80211_F_DROPUNENC; else vap->iv_flags &= ~IEEE80211_F_DROPUNENC; /* XXX ERESTART? */ break; case IEEE80211_IOC_WPAKEY: error = ieee80211_ioctl_setkey(vap, ireq); break; case IEEE80211_IOC_DELKEY: error = ieee80211_ioctl_delkey(vap, ireq); break; case IEEE80211_IOC_MLME: error = ieee80211_ioctl_setmlme(vap, ireq); break; case IEEE80211_IOC_COUNTERMEASURES: if (ireq->i_val) { if ((vap->iv_flags & IEEE80211_F_WPA) == 0) return EOPNOTSUPP; vap->iv_flags |= IEEE80211_F_COUNTERM; } else vap->iv_flags &= ~IEEE80211_F_COUNTERM; /* XXX ERESTART? */ break; case IEEE80211_IOC_WPA: if (ireq->i_val > 3) return EINVAL; /* XXX verify ciphers available */ flags = vap->iv_flags & ~IEEE80211_F_WPA; switch (ireq->i_val) { case 0: /* wpa_supplicant calls this to clear the WPA config */ break; case 1: if (!(vap->iv_caps & IEEE80211_C_WPA1)) return EOPNOTSUPP; flags |= IEEE80211_F_WPA1; break; case 2: if (!(vap->iv_caps & IEEE80211_C_WPA2)) return EOPNOTSUPP; flags |= IEEE80211_F_WPA2; break; case 3: if ((vap->iv_caps & IEEE80211_C_WPA) != IEEE80211_C_WPA) return EOPNOTSUPP; flags |= IEEE80211_F_WPA1 | IEEE80211_F_WPA2; break; default: /* Can't set any -> error */ return EOPNOTSUPP; } vap->iv_flags = flags; error = ERESTART; /* NB: can change beacon frame */ break; case IEEE80211_IOC_WME: if (ireq->i_val) { if ((vap->iv_caps & IEEE80211_C_WME) == 0) return EOPNOTSUPP; ieee80211_syncflag(vap, IEEE80211_F_WME); } else ieee80211_syncflag(vap, -IEEE80211_F_WME); error = ERESTART; /* NB: can change beacon frame */ break; case IEEE80211_IOC_HIDESSID: if (ireq->i_val) vap->iv_flags |= IEEE80211_F_HIDESSID; else vap->iv_flags &= ~IEEE80211_F_HIDESSID; error = ERESTART; /* XXX ENETRESET? */ break; case IEEE80211_IOC_APBRIDGE: if (ireq->i_val == 0) vap->iv_flags |= IEEE80211_F_NOBRIDGE; else vap->iv_flags &= ~IEEE80211_F_NOBRIDGE; break; case IEEE80211_IOC_BSSID: if (ireq->i_len != sizeof(tmpbssid)) return EINVAL; error = copyin(ireq->i_data, tmpbssid, ireq->i_len); if (error) break; IEEE80211_ADDR_COPY(vap->iv_des_bssid, tmpbssid); if (IEEE80211_ADDR_EQ(vap->iv_des_bssid, zerobssid)) vap->iv_flags &= ~IEEE80211_F_DESBSSID; else vap->iv_flags |= IEEE80211_F_DESBSSID; error = ENETRESET; break; case IEEE80211_IOC_CHANLIST: error = ieee80211_ioctl_setchanlist(vap, ireq); break; #define OLD_IEEE80211_IOC_SCAN_REQ 23 #ifdef OLD_IEEE80211_IOC_SCAN_REQ case OLD_IEEE80211_IOC_SCAN_REQ: IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: active scan request\n", __func__); /* * If we are in INIT state then the driver has never * had a chance to setup hardware state to do a scan; * use the state machine to get us up the SCAN state. * Otherwise just invoke the scan machinery to start * a one-time scan. */ if (vap->iv_state == IEEE80211_S_INIT) ieee80211_new_state(vap, IEEE80211_S_SCAN, 0); else (void) ieee80211_start_scan(vap, IEEE80211_SCAN_ACTIVE | IEEE80211_SCAN_NOPICK | IEEE80211_SCAN_ONCE, IEEE80211_SCAN_FOREVER, 0, 0, /* XXX use ioctl params */ vap->iv_des_nssid, vap->iv_des_ssid); break; #endif /* OLD_IEEE80211_IOC_SCAN_REQ */ case IEEE80211_IOC_SCAN_REQ: error = ieee80211_ioctl_scanreq(vap, ireq); break; case IEEE80211_IOC_SCAN_CANCEL: IEEE80211_DPRINTF(vap, IEEE80211_MSG_SCAN, "%s: cancel scan\n", __func__); ieee80211_cancel_scan(vap); break; case IEEE80211_IOC_HTCONF: if (ireq->i_val & 1) ieee80211_syncflag_ht(vap, IEEE80211_FHT_HT); else ieee80211_syncflag_ht(vap, -IEEE80211_FHT_HT); if (ireq->i_val & 2) ieee80211_syncflag_ht(vap, IEEE80211_FHT_USEHT40); else ieee80211_syncflag_ht(vap, -IEEE80211_FHT_USEHT40); error = ENETRESET; break; case IEEE80211_IOC_ADDMAC: case IEEE80211_IOC_DELMAC: error = ieee80211_ioctl_macmac(vap, ireq); break; case IEEE80211_IOC_MACCMD: error = ieee80211_ioctl_setmaccmd(vap, ireq); break; case IEEE80211_IOC_STA_STATS: error = ieee80211_ioctl_setstastats(vap, ireq); break; case IEEE80211_IOC_STA_TXPOW: error = ieee80211_ioctl_setstatxpow(vap, ireq); break; case IEEE80211_IOC_WME_CWMIN: /* WME: CWmin */ case IEEE80211_IOC_WME_CWMAX: /* WME: CWmax */ case IEEE80211_IOC_WME_AIFS: /* WME: AIFS */ case IEEE80211_IOC_WME_TXOPLIMIT: /* WME: txops limit */ case IEEE80211_IOC_WME_ACM: /* WME: ACM (bss only) */ case IEEE80211_IOC_WME_ACKPOLICY: /* WME: ACK policy (!bss only) */ error = ieee80211_ioctl_setwmeparam(vap, ireq); break; case IEEE80211_IOC_DTIM_PERIOD: if (vap->iv_opmode != IEEE80211_M_HOSTAP && vap->iv_opmode != IEEE80211_M_MBSS && vap->iv_opmode != IEEE80211_M_IBSS) return EINVAL; if (IEEE80211_DTIM_MIN <= ireq->i_val && ireq->i_val <= IEEE80211_DTIM_MAX) { vap->iv_dtim_period = ireq->i_val; error = ENETRESET; /* requires restart */ } else error = EINVAL; break; case IEEE80211_IOC_BEACON_INTERVAL: if (vap->iv_opmode != IEEE80211_M_HOSTAP && vap->iv_opmode != IEEE80211_M_MBSS && vap->iv_opmode != IEEE80211_M_IBSS) return EINVAL; if (IEEE80211_BINTVAL_MIN <= ireq->i_val && ireq->i_val <= IEEE80211_BINTVAL_MAX) { ic->ic_bintval = ireq->i_val; error = ENETRESET; /* requires restart */ } else error = EINVAL; break; case IEEE80211_IOC_PUREG: if (ireq->i_val) vap->iv_flags |= IEEE80211_F_PUREG; else vap->iv_flags &= ~IEEE80211_F_PUREG; /* NB: reset only if we're operating on an 11g channel */ if (isvap11g(vap)) error = ENETRESET; break; case IEEE80211_IOC_QUIET: vap->iv_quiet= ireq->i_val; break; case IEEE80211_IOC_QUIET_COUNT: vap->iv_quiet_count=ireq->i_val; break; case IEEE80211_IOC_QUIET_PERIOD: vap->iv_quiet_period=ireq->i_val; break; case IEEE80211_IOC_QUIET_OFFSET: vap->iv_quiet_offset=ireq->i_val; break; case IEEE80211_IOC_QUIET_DUR: if(ireq->i_val < vap->iv_bss->ni_intval) vap->iv_quiet_duration = ireq->i_val; else error = EINVAL; break; case IEEE80211_IOC_BGSCAN: if (ireq->i_val) { if ((vap->iv_caps & IEEE80211_C_BGSCAN) == 0) return EOPNOTSUPP; vap->iv_flags |= IEEE80211_F_BGSCAN; } else vap->iv_flags &= ~IEEE80211_F_BGSCAN; break; case IEEE80211_IOC_BGSCAN_IDLE: if (ireq->i_val >= IEEE80211_BGSCAN_IDLE_MIN) vap->iv_bgscanidle = ireq->i_val*hz/1000; else error = EINVAL; break; case IEEE80211_IOC_BGSCAN_INTERVAL: if (ireq->i_val >= IEEE80211_BGSCAN_INTVAL_MIN) vap->iv_bgscanintvl = ireq->i_val*hz; else error = EINVAL; break; case IEEE80211_IOC_SCANVALID: if (ireq->i_val >= IEEE80211_SCAN_VALID_MIN) vap->iv_scanvalid = ireq->i_val*hz; else error = EINVAL; break; case IEEE80211_IOC_FRAGTHRESHOLD: if ((vap->iv_caps & IEEE80211_C_TXFRAG) == 0 && ireq->i_val != IEEE80211_FRAG_MAX) return EOPNOTSUPP; if (!(IEEE80211_FRAG_MIN <= ireq->i_val && ireq->i_val <= IEEE80211_FRAG_MAX)) return EINVAL; vap->iv_fragthreshold = ireq->i_val; error = ERESTART; break; case IEEE80211_IOC_BURST: if (ireq->i_val) { if ((vap->iv_caps & IEEE80211_C_BURST) == 0) return EOPNOTSUPP; ieee80211_syncflag(vap, IEEE80211_F_BURST); } else ieee80211_syncflag(vap, -IEEE80211_F_BURST); error = ERESTART; break; case IEEE80211_IOC_BMISSTHRESHOLD: if (!(IEEE80211_HWBMISS_MIN <= ireq->i_val && ireq->i_val <= IEEE80211_HWBMISS_MAX)) return EINVAL; vap->iv_bmissthreshold = ireq->i_val; error = ERESTART; break; case IEEE80211_IOC_CURCHAN: error = ieee80211_ioctl_setcurchan(vap, ireq); break; case IEEE80211_IOC_SHORTGI: if (ireq->i_val) { #define IEEE80211_HTCAP_SHORTGI \ (IEEE80211_HTCAP_SHORTGI20 | IEEE80211_HTCAP_SHORTGI40) if (((ireq->i_val ^ vap->iv_htcaps) & IEEE80211_HTCAP_SHORTGI) != 0) return EINVAL; if (ireq->i_val & IEEE80211_HTCAP_SHORTGI20) vap->iv_flags_ht |= IEEE80211_FHT_SHORTGI20; if (ireq->i_val & IEEE80211_HTCAP_SHORTGI40) vap->iv_flags_ht |= IEEE80211_FHT_SHORTGI40; #undef IEEE80211_HTCAP_SHORTGI } else vap->iv_flags_ht &= ~(IEEE80211_FHT_SHORTGI20 | IEEE80211_FHT_SHORTGI40); error = ERESTART; break; case IEEE80211_IOC_AMPDU: if (ireq->i_val && (vap->iv_htcaps & IEEE80211_HTC_AMPDU) == 0) return EINVAL; if (ireq->i_val & 1) vap->iv_flags_ht |= IEEE80211_FHT_AMPDU_TX; else vap->iv_flags_ht &= ~IEEE80211_FHT_AMPDU_TX; if (ireq->i_val & 2) vap->iv_flags_ht |= IEEE80211_FHT_AMPDU_RX; else vap->iv_flags_ht &= ~IEEE80211_FHT_AMPDU_RX; /* NB: reset only if we're operating on an 11n channel */ if (isvapht(vap)) error = ERESTART; break; case IEEE80211_IOC_AMPDU_LIMIT: if (!(IEEE80211_HTCAP_MAXRXAMPDU_8K <= ireq->i_val && ireq->i_val <= IEEE80211_HTCAP_MAXRXAMPDU_64K)) return EINVAL; if (vap->iv_opmode == IEEE80211_M_HOSTAP) vap->iv_ampdu_rxmax = ireq->i_val; else vap->iv_ampdu_limit = ireq->i_val; error = ERESTART; break; case IEEE80211_IOC_AMPDU_DENSITY: if (!(IEEE80211_HTCAP_MPDUDENSITY_NA <= ireq->i_val && ireq->i_val <= IEEE80211_HTCAP_MPDUDENSITY_16)) return EINVAL; vap->iv_ampdu_density = ireq->i_val; error = ERESTART; break; case IEEE80211_IOC_AMSDU: if (ireq->i_val && (vap->iv_htcaps & IEEE80211_HTC_AMSDU) == 0) return EINVAL; if (ireq->i_val & 1) vap->iv_flags_ht |= IEEE80211_FHT_AMSDU_TX; else vap->iv_flags_ht &= ~IEEE80211_FHT_AMSDU_TX; if (ireq->i_val & 2) vap->iv_flags_ht |= IEEE80211_FHT_AMSDU_RX; else vap->iv_flags_ht &= ~IEEE80211_FHT_AMSDU_RX; /* NB: reset only if we're operating on an 11n channel */ if (isvapht(vap)) error = ERESTART; break; case IEEE80211_IOC_AMSDU_LIMIT: /* XXX validate */ vap->iv_amsdu_limit = ireq->i_val; /* XXX truncation? */ break; case IEEE80211_IOC_PUREN: if (ireq->i_val) { if ((vap->iv_flags_ht & IEEE80211_FHT_HT) == 0) return EINVAL; vap->iv_flags_ht |= IEEE80211_FHT_PUREN; } else vap->iv_flags_ht &= ~IEEE80211_FHT_PUREN; /* NB: reset only if we're operating on an 11n channel */ if (isvapht(vap)) error = ERESTART; break; case IEEE80211_IOC_DOTH: if (ireq->i_val) { #if 0 /* XXX no capability */ if ((vap->iv_caps & IEEE80211_C_DOTH) == 0) return EOPNOTSUPP; #endif vap->iv_flags |= IEEE80211_F_DOTH; } else vap->iv_flags &= ~IEEE80211_F_DOTH; error = ENETRESET; break; case IEEE80211_IOC_REGDOMAIN: error = ieee80211_ioctl_setregdomain(vap, ireq); break; case IEEE80211_IOC_ROAM: error = ieee80211_ioctl_setroam(vap, ireq); break; case IEEE80211_IOC_TXPARAMS: error = ieee80211_ioctl_settxparams(vap, ireq); break; case IEEE80211_IOC_HTCOMPAT: if (ireq->i_val) { if ((vap->iv_flags_ht & IEEE80211_FHT_HT) == 0) return EOPNOTSUPP; vap->iv_flags_ht |= IEEE80211_FHT_HTCOMPAT; } else vap->iv_flags_ht &= ~IEEE80211_FHT_HTCOMPAT; /* NB: reset only if we're operating on an 11n channel */ if (isvapht(vap)) error = ERESTART; break; case IEEE80211_IOC_DWDS: if (ireq->i_val) { /* NB: DWDS only makes sense for WDS-capable devices */ if ((ic->ic_caps & IEEE80211_C_WDS) == 0) return EOPNOTSUPP; /* NB: DWDS is used only with ap+sta vaps */ if (vap->iv_opmode != IEEE80211_M_HOSTAP && vap->iv_opmode != IEEE80211_M_STA) return EINVAL; vap->iv_flags |= IEEE80211_F_DWDS; if (vap->iv_opmode == IEEE80211_M_STA) vap->iv_flags_ext |= IEEE80211_FEXT_4ADDR; } else { vap->iv_flags &= ~IEEE80211_F_DWDS; if (vap->iv_opmode == IEEE80211_M_STA) vap->iv_flags_ext &= ~IEEE80211_FEXT_4ADDR; } break; case IEEE80211_IOC_INACTIVITY: if (ireq->i_val) vap->iv_flags_ext |= IEEE80211_FEXT_INACT; else vap->iv_flags_ext &= ~IEEE80211_FEXT_INACT; break; case IEEE80211_IOC_APPIE: error = ieee80211_ioctl_setappie(vap, ireq); break; case IEEE80211_IOC_WPS: if (ireq->i_val) { if ((vap->iv_caps & IEEE80211_C_WPA) == 0) return EOPNOTSUPP; vap->iv_flags_ext |= IEEE80211_FEXT_WPS; } else vap->iv_flags_ext &= ~IEEE80211_FEXT_WPS; break; case IEEE80211_IOC_TSN: if (ireq->i_val) { if ((vap->iv_caps & IEEE80211_C_WPA) == 0) return EOPNOTSUPP; vap->iv_flags_ext |= IEEE80211_FEXT_TSN; } else vap->iv_flags_ext &= ~IEEE80211_FEXT_TSN; break; case IEEE80211_IOC_CHANSWITCH: error = ieee80211_ioctl_chanswitch(vap, ireq); break; case IEEE80211_IOC_DFS: if (ireq->i_val) { if ((vap->iv_caps & IEEE80211_C_DFS) == 0) return EOPNOTSUPP; /* NB: DFS requires 11h support */ if ((vap->iv_flags & IEEE80211_F_DOTH) == 0) return EINVAL; vap->iv_flags_ext |= IEEE80211_FEXT_DFS; } else vap->iv_flags_ext &= ~IEEE80211_FEXT_DFS; break; case IEEE80211_IOC_DOTD: if (ireq->i_val) vap->iv_flags_ext |= IEEE80211_FEXT_DOTD; else vap->iv_flags_ext &= ~IEEE80211_FEXT_DOTD; if (vap->iv_opmode == IEEE80211_M_STA) error = ENETRESET; break; case IEEE80211_IOC_HTPROTMODE: if (ireq->i_val > IEEE80211_PROT_RTSCTS) return EINVAL; ic->ic_htprotmode = ireq->i_val ? IEEE80211_PROT_RTSCTS : IEEE80211_PROT_NONE; /* NB: if not operating in 11n this can wait */ if (isvapht(vap)) error = ERESTART; break; case IEEE80211_IOC_STA_VLAN: error = ieee80211_ioctl_setstavlan(vap, ireq); break; case IEEE80211_IOC_SMPS: if ((ireq->i_val &~ IEEE80211_HTCAP_SMPS) != 0 || ireq->i_val == 0x0008) /* value of 2 is reserved */ return EINVAL; if (ireq->i_val != IEEE80211_HTCAP_SMPS_OFF && (vap->iv_htcaps & IEEE80211_HTC_SMPS) == 0) return EOPNOTSUPP; vap->iv_htcaps = (vap->iv_htcaps &~ IEEE80211_HTCAP_SMPS) | ireq->i_val; /* NB: if not operating in 11n this can wait */ if (isvapht(vap)) error = ERESTART; break; case IEEE80211_IOC_RIFS: if (ireq->i_val != 0) { if ((vap->iv_htcaps & IEEE80211_HTC_RIFS) == 0) return EOPNOTSUPP; vap->iv_flags_ht |= IEEE80211_FHT_RIFS; } else vap->iv_flags_ht &= ~IEEE80211_FHT_RIFS; /* NB: if not operating in 11n this can wait */ if (isvapht(vap)) error = ERESTART; break; case IEEE80211_IOC_STBC: /* Check if we can do STBC TX/RX before changing the setting */ if ((ireq->i_val & 1) && ((vap->iv_htcaps & IEEE80211_HTCAP_TXSTBC) == 0)) return EOPNOTSUPP; if ((ireq->i_val & 2) && ((vap->iv_htcaps & IEEE80211_HTCAP_RXSTBC) == 0)) return EOPNOTSUPP; /* TX */ if (ireq->i_val & 1) vap->iv_flags_ht |= IEEE80211_FHT_STBC_TX; else vap->iv_flags_ht &= ~IEEE80211_FHT_STBC_TX; /* RX */ if (ireq->i_val & 2) vap->iv_flags_ht |= IEEE80211_FHT_STBC_RX; else vap->iv_flags_ht &= ~IEEE80211_FHT_STBC_RX; /* NB: reset only if we're operating on an 11n channel */ if (isvapht(vap)) error = ERESTART; break; default: error = ieee80211_ioctl_setdefault(vap, ireq); break; } /* * The convention is that ENETRESET means an operation * requires a complete re-initialization of the device (e.g. * changing something that affects the association state). * ERESTART means the request may be handled with only a * reload of the hardware state. We hand ERESTART requests * to the iv_reset callback so the driver can decide. If * a device does not fillin iv_reset then it defaults to one * that returns ENETRESET. Otherwise a driver may return * ENETRESET (in which case a full reset will be done) or * 0 to mean there's no need to do anything (e.g. when the * change has no effect on the driver/device). */ if (error == ERESTART) error = IFNET_IS_UP_RUNNING(vap->iv_ifp) ? vap->iv_reset(vap, ireq->i_type) : 0; if (error == ENETRESET) { /* XXX need to re-think AUTO handling */ if (IS_UP_AUTO(vap)) ieee80211_init(vap); error = 0; } return error; } int ieee80211_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ieee80211vap *vap = ifp->if_softc; struct ieee80211com *ic = vap->iv_ic; int error = 0, wait = 0; struct ifreq *ifr; struct ifaddr *ifa; /* XXX */ switch (cmd) { case SIOCSIFFLAGS: IEEE80211_LOCK(ic); if ((ifp->if_flags ^ vap->iv_ifflags) & IFF_PROMISC) { /* * Enable promiscuous mode when: * 1. Interface is not a member of bridge, or * 2. Requested by user, or * 3. In monitor (or adhoc-demo) mode. */ if (ifp->if_bridge == NULL || (ifp->if_flags & IFF_PPROMISC) != 0 || vap->iv_opmode == IEEE80211_M_MONITOR || (vap->iv_opmode == IEEE80211_M_AHDEMO && (vap->iv_caps & IEEE80211_C_TDMA) == 0)) { ieee80211_promisc(vap, ifp->if_flags & IFF_PROMISC); vap->iv_ifflags ^= IFF_PROMISC; } } if ((ifp->if_flags ^ vap->iv_ifflags) & IFF_ALLMULTI) { ieee80211_allmulti(vap, ifp->if_flags & IFF_ALLMULTI); vap->iv_ifflags ^= IFF_ALLMULTI; } if (ifp->if_flags & IFF_UP) { /* * Bring ourself up unless we're already operational. * If we're the first vap and the parent is not up * then it will automatically be brought up as a * side-effect of bringing ourself up. */ if (vap->iv_state == IEEE80211_S_INIT) { if (ic->ic_nrunning == 0) wait = 1; ieee80211_start_locked(vap); } } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) { /* * Stop ourself. If we are the last vap to be * marked down the parent will also be taken down. */ if (ic->ic_nrunning == 1) wait = 1; ieee80211_stop_locked(vap); } IEEE80211_UNLOCK(ic); /* Wait for parent ioctl handler if it was queued */ if (wait) { ieee80211_waitfor_parent(ic); /* * Check if the MAC address was changed * via SIOCSIFLLADDR ioctl. */ if_addr_rlock(ifp); if ((ifp->if_flags & IFF_UP) == 0 && !IEEE80211_ADDR_EQ(vap->iv_myaddr, IF_LLADDR(ifp))) IEEE80211_ADDR_COPY(vap->iv_myaddr, IF_LLADDR(ifp)); if_addr_runlock(ifp); } break; case SIOCADDMULTI: case SIOCDELMULTI: ieee80211_runtask(ic, &ic->ic_mcast_task); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: ifr = (struct ifreq *)data; error = ifmedia_ioctl(ifp, ifr, &vap->iv_media, cmd); break; case SIOCG80211: error = ieee80211_ioctl_get80211(vap, cmd, (struct ieee80211req *) data); break; case SIOCS80211: error = priv_check(curthread, PRIV_NET80211_MANAGE); if (error == 0) error = ieee80211_ioctl_set80211(vap, cmd, (struct ieee80211req *) data); break; case SIOCG80211STATS: ifr = (struct ifreq *)data; - copyout(&vap->iv_stats, ifr->ifr_data, sizeof (vap->iv_stats)); + copyout(&vap->iv_stats, ifr_data_get_ptr(ifr), + sizeof (vap->iv_stats)); break; case SIOCSIFMTU: ifr = (struct ifreq *)data; if (!(IEEE80211_MTU_MIN <= ifr->ifr_mtu && ifr->ifr_mtu <= IEEE80211_MTU_MAX)) error = EINVAL; else ifp->if_mtu = ifr->ifr_mtu; break; case SIOCSIFADDR: /* * XXX Handle this directly so we can suppress if_init calls. * XXX This should be done in ether_ioctl but for the moment * XXX there are too many other parts of the system that * XXX set IFF_UP and so suppress if_init being called when * XXX it should be. */ ifa = (struct ifaddr *) data; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: if ((ifp->if_flags & IFF_UP) == 0) { ifp->if_flags |= IFF_UP; ifp->if_init(ifp->if_softc); } arp_ifinit(ifp, ifa); break; #endif default: if ((ifp->if_flags & IFF_UP) == 0) { ifp->if_flags |= IFF_UP; ifp->if_init(ifp->if_softc); } break; } break; default: /* * Pass unknown ioctls first to the driver, and if it * returns ENOTTY, then to the generic Ethernet handler. */ if (ic->ic_ioctl != NULL && (error = ic->ic_ioctl(ic, cmd, data)) != ENOTTY) break; error = ether_ioctl(ifp, cmd, data); break; } return (error); } Index: stable/11/sys/netinet/ip_carp.c =================================================================== --- stable/11/sys/netinet/ip_carp.c (revision 332287) +++ stable/11/sys/netinet/ip_carp.c (revision 332288) @@ -1,2165 +1,2167 @@ /*- * Copyright (c) 2002 Michael Shalayeff. * Copyright (c) 2003 Ryan McBride. * Copyright (c) 2011 Gleb Smirnoff * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_bpf.h" #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #include #include #endif #ifdef INET #include #include #endif #ifdef INET6 #include #include #include #include #include #include #endif #include static MALLOC_DEFINE(M_CARP, "CARP", "CARP addresses"); struct carp_softc { struct ifnet *sc_carpdev; /* Pointer to parent ifnet. */ struct ifaddr **sc_ifas; /* Our ifaddrs. */ struct sockaddr_dl sc_addr; /* Our link level address. */ struct callout sc_ad_tmo; /* Advertising timeout. */ #ifdef INET struct callout sc_md_tmo; /* Master down timeout. */ #endif #ifdef INET6 struct callout sc_md6_tmo; /* XXX: Master down timeout. */ #endif struct mtx sc_mtx; int sc_vhid; int sc_advskew; int sc_advbase; int sc_naddrs; int sc_naddrs6; int sc_ifasiz; enum { INIT = 0, BACKUP, MASTER } sc_state; int sc_suppress; int sc_sendad_errors; #define CARP_SENDAD_MAX_ERRORS 3 int sc_sendad_success; #define CARP_SENDAD_MIN_SUCCESS 3 int sc_init_counter; uint64_t sc_counter; /* authentication */ #define CARP_HMAC_PAD 64 unsigned char sc_key[CARP_KEY_LEN]; unsigned char sc_pad[CARP_HMAC_PAD]; SHA1_CTX sc_sha1; TAILQ_ENTRY(carp_softc) sc_list; /* On the carp_if list. */ LIST_ENTRY(carp_softc) sc_next; /* On the global list. */ }; struct carp_if { #ifdef INET int cif_naddrs; #endif #ifdef INET6 int cif_naddrs6; #endif TAILQ_HEAD(, carp_softc) cif_vrs; #ifdef INET struct ip_moptions cif_imo; #endif #ifdef INET6 struct ip6_moptions cif_im6o; #endif struct ifnet *cif_ifp; struct mtx cif_mtx; uint32_t cif_flags; #define CIF_PROMISC 0x00000001 }; #define CARP_INET 0 #define CARP_INET6 1 static int proto_reg[] = {-1, -1}; /* * Brief design of carp(4). * * Any carp-capable ifnet may have a list of carp softcs hanging off * its ifp->if_carp pointer. Each softc represents one unique virtual * host id, or vhid. The softc has a back pointer to the ifnet. All * softcs are joined in a global list, which has quite limited use. * * Any interface address that takes part in CARP negotiation has a * pointer to the softc of its vhid, ifa->ifa_carp. That could be either * AF_INET or AF_INET6 address. * * Although, one can get the softc's backpointer to ifnet and traverse * through its ifp->if_addrhead queue to find all interface addresses * involved in CARP, we keep a growable array of ifaddr pointers. This * allows us to avoid grabbing the IF_ADDR_LOCK() in many traversals that * do calls into the network stack, thus avoiding LORs. * * Locking: * * Each softc has a lock sc_mtx. It is used to synchronise carp_input_c(), * callout-driven events and ioctl()s. * * To traverse the list of softcs on an ifnet we use CIF_LOCK() or carp_sx. * To traverse the global list we use the mutex carp_mtx. * * Known issues with locking: * * - Sending ad, we put the pointer to the softc in an mtag, and no reference * counting is done on the softc. * - On module unload we may race (?) with packet processing thread * dereferencing our function pointers. */ /* Accept incoming CARP packets. */ static VNET_DEFINE(int, carp_allow) = 1; #define V_carp_allow VNET(carp_allow) /* Preempt slower nodes. */ static VNET_DEFINE(int, carp_preempt) = 0; #define V_carp_preempt VNET(carp_preempt) /* Log level. */ static VNET_DEFINE(int, carp_log) = 1; #define V_carp_log VNET(carp_log) /* Global advskew demotion. */ static VNET_DEFINE(int, carp_demotion) = 0; #define V_carp_demotion VNET(carp_demotion) /* Send error demotion factor. */ static VNET_DEFINE(int, carp_senderr_adj) = CARP_MAXSKEW; #define V_carp_senderr_adj VNET(carp_senderr_adj) /* Iface down demotion factor. */ static VNET_DEFINE(int, carp_ifdown_adj) = CARP_MAXSKEW; #define V_carp_ifdown_adj VNET(carp_ifdown_adj) static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS); SYSCTL_NODE(_net_inet, IPPROTO_CARP, carp, CTLFLAG_RW, 0, "CARP"); SYSCTL_INT(_net_inet_carp, OID_AUTO, allow, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(carp_allow), 0, "Accept incoming CARP packets"); SYSCTL_INT(_net_inet_carp, OID_AUTO, preempt, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(carp_preempt), 0, "High-priority backup preemption mode"); SYSCTL_INT(_net_inet_carp, OID_AUTO, log, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(carp_log), 0, "CARP log level"); SYSCTL_PROC(_net_inet_carp, OID_AUTO, demotion, CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW, 0, 0, carp_demote_adj_sysctl, "I", "Adjust demotion factor (skew of advskew)"); SYSCTL_INT(_net_inet_carp, OID_AUTO, senderr_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(carp_senderr_adj), 0, "Send error demotion factor adjustment"); SYSCTL_INT(_net_inet_carp, OID_AUTO, ifdown_demotion_factor, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(carp_ifdown_adj), 0, "Interface down demotion factor adjustment"); VNET_PCPUSTAT_DEFINE(struct carpstats, carpstats); VNET_PCPUSTAT_SYSINIT(carpstats); VNET_PCPUSTAT_SYSUNINIT(carpstats); #define CARPSTATS_ADD(name, val) \ counter_u64_add(VNET(carpstats)[offsetof(struct carpstats, name) / \ sizeof(uint64_t)], (val)) #define CARPSTATS_INC(name) CARPSTATS_ADD(name, 1) SYSCTL_VNET_PCPUSTAT(_net_inet_carp, OID_AUTO, stats, struct carpstats, carpstats, "CARP statistics (struct carpstats, netinet/ip_carp.h)"); #define CARP_LOCK_INIT(sc) mtx_init(&(sc)->sc_mtx, "carp_softc", \ NULL, MTX_DEF) #define CARP_LOCK_DESTROY(sc) mtx_destroy(&(sc)->sc_mtx) #define CARP_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) #define CARP_LOCK(sc) mtx_lock(&(sc)->sc_mtx) #define CARP_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) #define CIF_LOCK_INIT(cif) mtx_init(&(cif)->cif_mtx, "carp_if", \ NULL, MTX_DEF) #define CIF_LOCK_DESTROY(cif) mtx_destroy(&(cif)->cif_mtx) #define CIF_LOCK_ASSERT(cif) mtx_assert(&(cif)->cif_mtx, MA_OWNED) #define CIF_LOCK(cif) mtx_lock(&(cif)->cif_mtx) #define CIF_UNLOCK(cif) mtx_unlock(&(cif)->cif_mtx) #define CIF_FREE(cif) do { \ CIF_LOCK(cif); \ if (TAILQ_EMPTY(&(cif)->cif_vrs)) \ carp_free_if(cif); \ else \ CIF_UNLOCK(cif); \ } while (0) #define CARP_LOG(...) do { \ if (V_carp_log > 0) \ log(LOG_INFO, "carp: " __VA_ARGS__); \ } while (0) #define CARP_DEBUG(...) do { \ if (V_carp_log > 1) \ log(LOG_DEBUG, __VA_ARGS__); \ } while (0) #define IFNET_FOREACH_IFA(ifp, ifa) \ IF_ADDR_LOCK_ASSERT(ifp); \ TAILQ_FOREACH((ifa), &(ifp)->if_addrhead, ifa_link) \ if ((ifa)->ifa_carp != NULL) #define CARP_FOREACH_IFA(sc, ifa) \ CARP_LOCK_ASSERT(sc); \ for (int _i = 0; \ _i < (sc)->sc_naddrs + (sc)->sc_naddrs6 && \ ((ifa) = sc->sc_ifas[_i]) != NULL; \ ++_i) #define IFNET_FOREACH_CARP(ifp, sc) \ KASSERT(mtx_owned(&ifp->if_carp->cif_mtx) || \ sx_xlocked(&carp_sx), ("cif_vrs not locked")); \ TAILQ_FOREACH((sc), &(ifp)->if_carp->cif_vrs, sc_list) #define DEMOTE_ADVSKEW(sc) \ (((sc)->sc_advskew + V_carp_demotion > CARP_MAXSKEW) ? \ CARP_MAXSKEW : ((sc)->sc_advskew + V_carp_demotion)) static void carp_input_c(struct mbuf *, struct carp_header *, sa_family_t); static struct carp_softc *carp_alloc(struct ifnet *); static void carp_destroy(struct carp_softc *); static struct carp_if *carp_alloc_if(struct ifnet *); static void carp_free_if(struct carp_if *); static void carp_set_state(struct carp_softc *, int, const char* reason); static void carp_sc_state(struct carp_softc *); static void carp_setrun(struct carp_softc *, sa_family_t); static void carp_master_down(void *); static void carp_master_down_locked(struct carp_softc *, const char* reason); static void carp_send_ad(void *); static void carp_send_ad_locked(struct carp_softc *); static void carp_addroute(struct carp_softc *); static void carp_ifa_addroute(struct ifaddr *); static void carp_delroute(struct carp_softc *); static void carp_ifa_delroute(struct ifaddr *); static void carp_send_ad_all(void *, int); static void carp_demote_adj(int, char *); static LIST_HEAD(, carp_softc) carp_list; static struct mtx carp_mtx; static struct sx carp_sx; static struct task carp_sendall_task = TASK_INITIALIZER(0, carp_send_ad_all, NULL); static void carp_hmac_prepare(struct carp_softc *sc) { uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT; uint8_t vhid = sc->sc_vhid & 0xff; struct ifaddr *ifa; int i, found; #ifdef INET struct in_addr last, cur, in; #endif #ifdef INET6 struct in6_addr last6, cur6, in6; #endif CARP_LOCK_ASSERT(sc); /* Compute ipad from key. */ bzero(sc->sc_pad, sizeof(sc->sc_pad)); bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key)); for (i = 0; i < sizeof(sc->sc_pad); i++) sc->sc_pad[i] ^= 0x36; /* Precompute first part of inner hash. */ SHA1Init(&sc->sc_sha1); SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad)); SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version)); SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type)); SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid)); #ifdef INET cur.s_addr = 0; do { found = 0; last = cur; cur.s_addr = 0xffffffff; CARP_FOREACH_IFA(sc, ifa) { in.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; if (ifa->ifa_addr->sa_family == AF_INET && ntohl(in.s_addr) > ntohl(last.s_addr) && ntohl(in.s_addr) < ntohl(cur.s_addr)) { cur.s_addr = in.s_addr; found++; } } if (found) SHA1Update(&sc->sc_sha1, (void *)&cur, sizeof(cur)); } while (found); #endif /* INET */ #ifdef INET6 memset(&cur6, 0, sizeof(cur6)); do { found = 0; last6 = cur6; memset(&cur6, 0xff, sizeof(cur6)); CARP_FOREACH_IFA(sc, ifa) { in6 = ifatoia6(ifa)->ia_addr.sin6_addr; if (IN6_IS_SCOPE_EMBED(&in6)) in6.s6_addr16[1] = 0; if (ifa->ifa_addr->sa_family == AF_INET6 && memcmp(&in6, &last6, sizeof(in6)) > 0 && memcmp(&in6, &cur6, sizeof(in6)) < 0) { cur6 = in6; found++; } } if (found) SHA1Update(&sc->sc_sha1, (void *)&cur6, sizeof(cur6)); } while (found); #endif /* INET6 */ /* convert ipad to opad */ for (i = 0; i < sizeof(sc->sc_pad); i++) sc->sc_pad[i] ^= 0x36 ^ 0x5c; } static void carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2], unsigned char md[20]) { SHA1_CTX sha1ctx; CARP_LOCK_ASSERT(sc); /* fetch first half of inner hash */ bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx)); SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter)); SHA1Final(md, &sha1ctx); /* outer hash */ SHA1Init(&sha1ctx); SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad)); SHA1Update(&sha1ctx, md, 20); SHA1Final(md, &sha1ctx); } static int carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2], unsigned char md[20]) { unsigned char md2[20]; CARP_LOCK_ASSERT(sc); carp_hmac_generate(sc, counter, md2); return (bcmp(md, md2, sizeof(md2))); } /* * process input packet. * we have rearranged checks order compared to the rfc, * but it seems more efficient this way or not possible otherwise. */ #ifdef INET int carp_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); struct carp_header *ch; int iplen, len; iplen = *offp; *mp = NULL; CARPSTATS_INC(carps_ipackets); if (!V_carp_allow) { m_freem(m); return (IPPROTO_DONE); } /* verify that the IP TTL is 255. */ if (ip->ip_ttl != CARP_DFLTTL) { CARPSTATS_INC(carps_badttl); CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, ip->ip_ttl, m->m_pkthdr.rcvif->if_xname); m_freem(m); return (IPPROTO_DONE); } iplen = ip->ip_hl << 2; if (m->m_pkthdr.len < iplen + sizeof(*ch)) { CARPSTATS_INC(carps_badlen); CARP_DEBUG("%s: received len %zd < sizeof(struct carp_header) " "on %s\n", __func__, m->m_len - sizeof(struct ip), m->m_pkthdr.rcvif->if_xname); m_freem(m); return (IPPROTO_DONE); } if (iplen + sizeof(*ch) < m->m_len) { if ((m = m_pullup(m, iplen + sizeof(*ch))) == NULL) { CARPSTATS_INC(carps_hdrops); CARP_DEBUG("%s: pullup failed\n", __func__); return (IPPROTO_DONE); } ip = mtod(m, struct ip *); } ch = (struct carp_header *)((char *)ip + iplen); /* * verify that the received packet length is * equal to the CARP header */ len = iplen + sizeof(*ch); if (len > m->m_pkthdr.len) { CARPSTATS_INC(carps_badlen); CARP_DEBUG("%s: packet too short %d on %s\n", __func__, m->m_pkthdr.len, m->m_pkthdr.rcvif->if_xname); m_freem(m); return (IPPROTO_DONE); } if ((m = m_pullup(m, len)) == NULL) { CARPSTATS_INC(carps_hdrops); return (IPPROTO_DONE); } ip = mtod(m, struct ip *); ch = (struct carp_header *)((char *)ip + iplen); /* verify the CARP checksum */ m->m_data += iplen; if (in_cksum(m, len - iplen)) { CARPSTATS_INC(carps_badsum); CARP_DEBUG("%s: checksum failed on %s\n", __func__, m->m_pkthdr.rcvif->if_xname); m_freem(m); return (IPPROTO_DONE); } m->m_data -= iplen; carp_input_c(m, ch, AF_INET); return (IPPROTO_DONE); } #endif #ifdef INET6 int carp6_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct carp_header *ch; u_int len; CARPSTATS_INC(carps_ipackets6); if (!V_carp_allow) { m_freem(m); return (IPPROTO_DONE); } /* check if received on a valid carp interface */ if (m->m_pkthdr.rcvif->if_carp == NULL) { CARPSTATS_INC(carps_badif); CARP_DEBUG("%s: packet received on non-carp interface: %s\n", __func__, m->m_pkthdr.rcvif->if_xname); m_freem(m); return (IPPROTO_DONE); } /* verify that the IP TTL is 255 */ if (ip6->ip6_hlim != CARP_DFLTTL) { CARPSTATS_INC(carps_badttl); CARP_DEBUG("%s: received ttl %d != 255 on %s\n", __func__, ip6->ip6_hlim, m->m_pkthdr.rcvif->if_xname); m_freem(m); return (IPPROTO_DONE); } /* verify that we have a complete carp packet */ len = m->m_len; IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch)); if (ch == NULL) { CARPSTATS_INC(carps_badlen); CARP_DEBUG("%s: packet size %u too small\n", __func__, len); return (IPPROTO_DONE); } /* verify the CARP checksum */ m->m_data += *offp; if (in_cksum(m, sizeof(*ch))) { CARPSTATS_INC(carps_badsum); CARP_DEBUG("%s: checksum failed, on %s\n", __func__, m->m_pkthdr.rcvif->if_xname); m_freem(m); return (IPPROTO_DONE); } m->m_data -= *offp; carp_input_c(m, ch, AF_INET6); return (IPPROTO_DONE); } #endif /* INET6 */ static void carp_input_c(struct mbuf *m, struct carp_header *ch, sa_family_t af) { struct ifnet *ifp = m->m_pkthdr.rcvif; struct ifaddr *ifa; struct carp_softc *sc; uint64_t tmp_counter; struct timeval sc_tv, ch_tv; /* verify that the VHID is valid on the receiving interface */ IF_ADDR_RLOCK(ifp); IFNET_FOREACH_IFA(ifp, ifa) if (ifa->ifa_addr->sa_family == af && ifa->ifa_carp->sc_vhid == ch->carp_vhid) { ifa_ref(ifa); break; } IF_ADDR_RUNLOCK(ifp); if (ifa == NULL) { CARPSTATS_INC(carps_badvhid); m_freem(m); return; } /* verify the CARP version. */ if (ch->carp_version != CARP_VERSION) { CARPSTATS_INC(carps_badver); CARP_DEBUG("%s: invalid version %d\n", ifp->if_xname, ch->carp_version); ifa_free(ifa); m_freem(m); return; } sc = ifa->ifa_carp; CARP_LOCK(sc); ifa_free(ifa); if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) { CARPSTATS_INC(carps_badauth); CARP_DEBUG("%s: incorrect hash for VHID %u@%s\n", __func__, sc->sc_vhid, ifp->if_xname); goto out; } tmp_counter = ntohl(ch->carp_counter[0]); tmp_counter = tmp_counter<<32; tmp_counter += ntohl(ch->carp_counter[1]); /* XXX Replay protection goes here */ sc->sc_init_counter = 0; sc->sc_counter = tmp_counter; sc_tv.tv_sec = sc->sc_advbase; sc_tv.tv_usec = DEMOTE_ADVSKEW(sc) * 1000000 / 256; ch_tv.tv_sec = ch->carp_advbase; ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256; switch (sc->sc_state) { case INIT: break; case MASTER: /* * If we receive an advertisement from a master who's going to * be more frequent than us, go into BACKUP state. */ if (timevalcmp(&sc_tv, &ch_tv, >) || timevalcmp(&sc_tv, &ch_tv, ==)) { callout_stop(&sc->sc_ad_tmo); carp_set_state(sc, BACKUP, "more frequent advertisement received"); carp_setrun(sc, 0); carp_delroute(sc); } break; case BACKUP: /* * If we're pre-empting masters who advertise slower than us, * and this one claims to be slower, treat him as down. */ if (V_carp_preempt && timevalcmp(&sc_tv, &ch_tv, <)) { carp_master_down_locked(sc, "preempting a slower master"); break; } /* * If the master is going to advertise at such a low frequency * that he's guaranteed to time out, we'd might as well just * treat him as timed out now. */ sc_tv.tv_sec = sc->sc_advbase * 3; if (timevalcmp(&sc_tv, &ch_tv, <)) { carp_master_down_locked(sc, "master will time out"); break; } /* * Otherwise, we reset the counter and wait for the next * advertisement. */ carp_setrun(sc, af); break; } out: CARP_UNLOCK(sc); m_freem(m); } static int carp_prepare_ad(struct mbuf *m, struct carp_softc *sc, struct carp_header *ch) { struct m_tag *mtag; if (sc->sc_init_counter) { /* this could also be seconds since unix epoch */ sc->sc_counter = arc4random(); sc->sc_counter = sc->sc_counter << 32; sc->sc_counter += arc4random(); } else sc->sc_counter++; ch->carp_counter[0] = htonl((sc->sc_counter>>32)&0xffffffff); ch->carp_counter[1] = htonl(sc->sc_counter&0xffffffff); carp_hmac_generate(sc, ch->carp_counter, ch->carp_md); /* Tag packet for carp_output */ if ((mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), M_NOWAIT)) == NULL) { m_freem(m); CARPSTATS_INC(carps_onomem); return (ENOMEM); } bcopy(&sc, mtag + 1, sizeof(sc)); m_tag_prepend(m, mtag); return (0); } /* * To avoid LORs and possible recursions this function shouldn't * be called directly, but scheduled via taskqueue. */ static void carp_send_ad_all(void *ctx __unused, int pending __unused) { struct carp_softc *sc; mtx_lock(&carp_mtx); LIST_FOREACH(sc, &carp_list, sc_next) if (sc->sc_state == MASTER) { CARP_LOCK(sc); CURVNET_SET(sc->sc_carpdev->if_vnet); carp_send_ad_locked(sc); CURVNET_RESTORE(); CARP_UNLOCK(sc); } mtx_unlock(&carp_mtx); } /* Send a periodic advertisement, executed in callout context. */ static void carp_send_ad(void *v) { struct carp_softc *sc = v; CARP_LOCK_ASSERT(sc); CURVNET_SET(sc->sc_carpdev->if_vnet); carp_send_ad_locked(sc); CURVNET_RESTORE(); CARP_UNLOCK(sc); } static void carp_send_ad_error(struct carp_softc *sc, int error) { if (error) { if (sc->sc_sendad_errors < INT_MAX) sc->sc_sendad_errors++; if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) { static const char fmt[] = "send error %d on %s"; char msg[sizeof(fmt) + IFNAMSIZ]; sprintf(msg, fmt, error, sc->sc_carpdev->if_xname); carp_demote_adj(V_carp_senderr_adj, msg); } sc->sc_sendad_success = 0; } else { if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS && ++sc->sc_sendad_success >= CARP_SENDAD_MIN_SUCCESS) { static const char fmt[] = "send ok on %s"; char msg[sizeof(fmt) + IFNAMSIZ]; sprintf(msg, fmt, sc->sc_carpdev->if_xname); carp_demote_adj(-V_carp_senderr_adj, msg); sc->sc_sendad_errors = 0; } else sc->sc_sendad_errors = 0; } } static void carp_send_ad_locked(struct carp_softc *sc) { struct carp_header ch; struct timeval tv; struct sockaddr sa; struct ifaddr *ifa; struct carp_header *ch_ptr; struct mbuf *m; int len, advskew; CARP_LOCK_ASSERT(sc); advskew = DEMOTE_ADVSKEW(sc); tv.tv_sec = sc->sc_advbase; tv.tv_usec = advskew * 1000000 / 256; ch.carp_version = CARP_VERSION; ch.carp_type = CARP_ADVERTISEMENT; ch.carp_vhid = sc->sc_vhid; ch.carp_advbase = sc->sc_advbase; ch.carp_advskew = advskew; ch.carp_authlen = 7; /* XXX DEFINE */ ch.carp_pad1 = 0; /* must be zero */ ch.carp_cksum = 0; /* XXXGL: OpenBSD picks first ifaddr with needed family. */ #ifdef INET if (sc->sc_naddrs) { struct ip *ip; m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { CARPSTATS_INC(carps_onomem); goto resched; } len = sizeof(*ip) + sizeof(ch); m->m_pkthdr.len = len; m->m_pkthdr.rcvif = NULL; m->m_len = len; M_ALIGN(m, m->m_len); m->m_flags |= M_MCAST; ip = mtod(m, struct ip *); ip->ip_v = IPVERSION; ip->ip_hl = sizeof(*ip) >> 2; ip->ip_tos = IPTOS_LOWDELAY; ip->ip_len = htons(len); ip->ip_off = htons(IP_DF); ip->ip_ttl = CARP_DFLTTL; ip->ip_p = IPPROTO_CARP; ip->ip_sum = 0; ip_fillid(ip); bzero(&sa, sizeof(sa)); sa.sa_family = AF_INET; ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); if (ifa != NULL) { ip->ip_src.s_addr = ifatoia(ifa)->ia_addr.sin_addr.s_addr; ifa_free(ifa); } else ip->ip_src.s_addr = 0; ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP); ch_ptr = (struct carp_header *)(&ip[1]); bcopy(&ch, ch_ptr, sizeof(ch)); if (carp_prepare_ad(m, sc, ch_ptr)) goto resched; m->m_data += sizeof(*ip); ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip)); m->m_data -= sizeof(*ip); CARPSTATS_INC(carps_opackets); carp_send_ad_error(sc, ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_carpdev->if_carp->cif_imo, NULL)); } #endif /* INET */ #ifdef INET6 if (sc->sc_naddrs6) { struct ip6_hdr *ip6; m = m_gethdr(M_NOWAIT, MT_DATA); if (m == NULL) { CARPSTATS_INC(carps_onomem); goto resched; } len = sizeof(*ip6) + sizeof(ch); m->m_pkthdr.len = len; m->m_pkthdr.rcvif = NULL; m->m_len = len; M_ALIGN(m, m->m_len); m->m_flags |= M_MCAST; ip6 = mtod(m, struct ip6_hdr *); bzero(ip6, sizeof(*ip6)); ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_hlim = CARP_DFLTTL; ip6->ip6_nxt = IPPROTO_CARP; bzero(&sa, sizeof(sa)); /* set the source address */ sa.sa_family = AF_INET6; ifa = ifaof_ifpforaddr(&sa, sc->sc_carpdev); if (ifa != NULL) { bcopy(IFA_IN6(ifa), &ip6->ip6_src, sizeof(struct in6_addr)); ifa_free(ifa); } else /* This should never happen with IPv6. */ bzero(&ip6->ip6_src, sizeof(struct in6_addr)); /* Set the multicast destination. */ ip6->ip6_dst.s6_addr16[0] = htons(0xff02); ip6->ip6_dst.s6_addr8[15] = 0x12; if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) { m_freem(m); CARP_DEBUG("%s: in6_setscope failed\n", __func__); goto resched; } ch_ptr = (struct carp_header *)(&ip6[1]); bcopy(&ch, ch_ptr, sizeof(ch)); if (carp_prepare_ad(m, sc, ch_ptr)) goto resched; m->m_data += sizeof(*ip6); ch_ptr->carp_cksum = in_cksum(m, len - sizeof(*ip6)); m->m_data -= sizeof(*ip6); CARPSTATS_INC(carps_opackets6); carp_send_ad_error(sc, ip6_output(m, NULL, NULL, 0, &sc->sc_carpdev->if_carp->cif_im6o, NULL, NULL)); } #endif /* INET6 */ resched: callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc); } static void carp_addroute(struct carp_softc *sc) { struct ifaddr *ifa; CARP_FOREACH_IFA(sc, ifa) carp_ifa_addroute(ifa); } static void carp_ifa_addroute(struct ifaddr *ifa) { switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: in_addprefix(ifatoia(ifa), RTF_UP); ifa_add_loopback_route(ifa, (struct sockaddr *)&ifatoia(ifa)->ia_addr); break; #endif #ifdef INET6 case AF_INET6: ifa_add_loopback_route(ifa, (struct sockaddr *)&ifatoia6(ifa)->ia_addr); nd6_add_ifa_lle(ifatoia6(ifa)); break; #endif } } static void carp_delroute(struct carp_softc *sc) { struct ifaddr *ifa; CARP_FOREACH_IFA(sc, ifa) carp_ifa_delroute(ifa); } static void carp_ifa_delroute(struct ifaddr *ifa) { switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: ifa_del_loopback_route(ifa, (struct sockaddr *)&ifatoia(ifa)->ia_addr); in_scrubprefix(ifatoia(ifa), LLE_STATIC); break; #endif #ifdef INET6 case AF_INET6: ifa_del_loopback_route(ifa, (struct sockaddr *)&ifatoia6(ifa)->ia_addr); nd6_rem_ifa_lle(ifatoia6(ifa), 1); break; #endif } } int carp_master(struct ifaddr *ifa) { struct carp_softc *sc = ifa->ifa_carp; return (sc->sc_state == MASTER); } #ifdef INET /* * Broadcast a gratuitous ARP request containing * the virtual router MAC address for each IP address * associated with the virtual router. */ static void carp_send_arp(struct carp_softc *sc) { struct ifaddr *ifa; struct in_addr addr; CARP_FOREACH_IFA(sc, ifa) { if (ifa->ifa_addr->sa_family != AF_INET) continue; addr = ((struct sockaddr_in *)ifa->ifa_addr)->sin_addr; arp_announce_ifaddr(sc->sc_carpdev, addr, LLADDR(&sc->sc_addr)); } } int carp_iamatch(struct ifaddr *ifa, uint8_t **enaddr) { struct carp_softc *sc = ifa->ifa_carp; if (sc->sc_state == MASTER) { *enaddr = LLADDR(&sc->sc_addr); return (1); } return (0); } #endif #ifdef INET6 static void carp_send_na(struct carp_softc *sc) { static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT; struct ifaddr *ifa; struct in6_addr *in6; CARP_FOREACH_IFA(sc, ifa) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; in6 = IFA_IN6(ifa); nd6_na_output(sc->sc_carpdev, &mcast, in6, ND_NA_FLAG_OVERRIDE, 1, NULL); DELAY(1000); /* XXX */ } } /* * Returns ifa in case it's a carp address and it is MASTER, or if the address * matches and is not a carp address. Returns NULL otherwise. */ struct ifaddr * carp_iamatch6(struct ifnet *ifp, struct in6_addr *taddr) { struct ifaddr *ifa; ifa = NULL; IF_ADDR_RLOCK(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (!IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) continue; if (ifa->ifa_carp && ifa->ifa_carp->sc_state != MASTER) ifa = NULL; else ifa_ref(ifa); break; } IF_ADDR_RUNLOCK(ifp); return (ifa); } caddr_t carp_macmatch6(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr) { struct ifaddr *ifa; IF_ADDR_RLOCK(ifp); IFNET_FOREACH_IFA(ifp, ifa) if (ifa->ifa_addr->sa_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(taddr, IFA_IN6(ifa))) { struct carp_softc *sc = ifa->ifa_carp; struct m_tag *mtag; IF_ADDR_RUNLOCK(ifp); mtag = m_tag_get(PACKET_TAG_CARP, sizeof(struct carp_softc *), M_NOWAIT); if (mtag == NULL) /* Better a bit than nothing. */ return (LLADDR(&sc->sc_addr)); bcopy(&sc, mtag + 1, sizeof(sc)); m_tag_prepend(m, mtag); return (LLADDR(&sc->sc_addr)); } IF_ADDR_RUNLOCK(ifp); return (NULL); } #endif /* INET6 */ int carp_forus(struct ifnet *ifp, u_char *dhost) { struct carp_softc *sc; uint8_t *ena = dhost; if (ena[0] || ena[1] || ena[2] != 0x5e || ena[3] || ena[4] != 1) return (0); CIF_LOCK(ifp->if_carp); IFNET_FOREACH_CARP(ifp, sc) { CARP_LOCK(sc); if (sc->sc_state == MASTER && !bcmp(dhost, LLADDR(&sc->sc_addr), ETHER_ADDR_LEN)) { CARP_UNLOCK(sc); CIF_UNLOCK(ifp->if_carp); return (1); } CARP_UNLOCK(sc); } CIF_UNLOCK(ifp->if_carp); return (0); } /* Master down timeout event, executed in callout context. */ static void carp_master_down(void *v) { struct carp_softc *sc = v; CARP_LOCK_ASSERT(sc); CURVNET_SET(sc->sc_carpdev->if_vnet); if (sc->sc_state == BACKUP) { carp_master_down_locked(sc, "master timed out"); } CURVNET_RESTORE(); CARP_UNLOCK(sc); } static void carp_master_down_locked(struct carp_softc *sc, const char *reason) { CARP_LOCK_ASSERT(sc); switch (sc->sc_state) { case BACKUP: carp_set_state(sc, MASTER, reason); carp_send_ad_locked(sc); #ifdef INET carp_send_arp(sc); #endif #ifdef INET6 carp_send_na(sc); #endif carp_setrun(sc, 0); carp_addroute(sc); break; case INIT: case MASTER: #ifdef INVARIANTS panic("carp: VHID %u@%s: master_down event in %s state\n", sc->sc_vhid, sc->sc_carpdev->if_xname, sc->sc_state ? "MASTER" : "INIT"); #endif break; } } /* * When in backup state, af indicates whether to reset the master down timer * for v4 or v6. If it's set to zero, reset the ones which are already pending. */ static void carp_setrun(struct carp_softc *sc, sa_family_t af) { struct timeval tv; CARP_LOCK_ASSERT(sc); if ((sc->sc_carpdev->if_flags & IFF_UP) == 0 || sc->sc_carpdev->if_link_state != LINK_STATE_UP || (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0)) return; switch (sc->sc_state) { case INIT: carp_set_state(sc, BACKUP, "initialization complete"); carp_setrun(sc, 0); break; case BACKUP: callout_stop(&sc->sc_ad_tmo); tv.tv_sec = 3 * sc->sc_advbase; tv.tv_usec = sc->sc_advskew * 1000000 / 256; switch (af) { #ifdef INET case AF_INET: callout_reset(&sc->sc_md_tmo, tvtohz(&tv), carp_master_down, sc); break; #endif #ifdef INET6 case AF_INET6: callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), carp_master_down, sc); break; #endif default: #ifdef INET if (sc->sc_naddrs) callout_reset(&sc->sc_md_tmo, tvtohz(&tv), carp_master_down, sc); #endif #ifdef INET6 if (sc->sc_naddrs6) callout_reset(&sc->sc_md6_tmo, tvtohz(&tv), carp_master_down, sc); #endif break; } break; case MASTER: tv.tv_sec = sc->sc_advbase; tv.tv_usec = sc->sc_advskew * 1000000 / 256; callout_reset(&sc->sc_ad_tmo, tvtohz(&tv), carp_send_ad, sc); break; } } /* * Setup multicast structures. */ static int carp_multicast_setup(struct carp_if *cif, sa_family_t sa) { struct ifnet *ifp = cif->cif_ifp; int error = 0; switch (sa) { #ifdef INET case AF_INET: { struct ip_moptions *imo = &cif->cif_imo; struct in_addr addr; if (imo->imo_membership) return (0); imo->imo_membership = (struct in_multi **)malloc( (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP, M_WAITOK); imo->imo_mfilters = NULL; imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; imo->imo_multicast_vif = -1; addr.s_addr = htonl(INADDR_CARP_GROUP); if ((error = in_joingroup(ifp, &addr, NULL, &imo->imo_membership[0])) != 0) { free(imo->imo_membership, M_CARP); break; } imo->imo_num_memberships++; imo->imo_multicast_ifp = ifp; imo->imo_multicast_ttl = CARP_DFLTTL; imo->imo_multicast_loop = 0; break; } #endif #ifdef INET6 case AF_INET6: { struct ip6_moptions *im6o = &cif->cif_im6o; struct in6_addr in6; struct in6_multi *in6m; if (im6o->im6o_membership) return (0); im6o->im6o_membership = (struct in6_multi **)malloc( (sizeof(struct in6_multi *) * IPV6_MIN_MEMBERSHIPS), M_CARP, M_ZERO | M_WAITOK); im6o->im6o_mfilters = NULL; im6o->im6o_max_memberships = IPV6_MIN_MEMBERSHIPS; im6o->im6o_multicast_hlim = CARP_DFLTTL; im6o->im6o_multicast_ifp = ifp; /* Join IPv6 CARP multicast group. */ bzero(&in6, sizeof(in6)); in6.s6_addr16[0] = htons(0xff02); in6.s6_addr8[15] = 0x12; if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { free(im6o->im6o_membership, M_CARP); break; } in6m = NULL; if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) { free(im6o->im6o_membership, M_CARP); break; } im6o->im6o_membership[0] = in6m; im6o->im6o_num_memberships++; /* Join solicited multicast address. */ bzero(&in6, sizeof(in6)); in6.s6_addr16[0] = htons(0xff02); in6.s6_addr32[1] = 0; in6.s6_addr32[2] = htonl(1); in6.s6_addr32[3] = 0; in6.s6_addr8[12] = 0xff; if ((error = in6_setscope(&in6, ifp, NULL)) != 0) { in6_mc_leave(im6o->im6o_membership[0], NULL); free(im6o->im6o_membership, M_CARP); break; } in6m = NULL; if ((error = in6_mc_join(ifp, &in6, NULL, &in6m, 0)) != 0) { in6_mc_leave(im6o->im6o_membership[0], NULL); free(im6o->im6o_membership, M_CARP); break; } im6o->im6o_membership[1] = in6m; im6o->im6o_num_memberships++; break; } #endif } return (error); } /* * Free multicast structures. */ static void carp_multicast_cleanup(struct carp_if *cif, sa_family_t sa) { sx_assert(&carp_sx, SA_XLOCKED); switch (sa) { #ifdef INET case AF_INET: if (cif->cif_naddrs == 0) { struct ip_moptions *imo = &cif->cif_imo; in_leavegroup(imo->imo_membership[0], NULL); KASSERT(imo->imo_mfilters == NULL, ("%s: imo_mfilters != NULL", __func__)); free(imo->imo_membership, M_CARP); imo->imo_membership = NULL; } break; #endif #ifdef INET6 case AF_INET6: if (cif->cif_naddrs6 == 0) { struct ip6_moptions *im6o = &cif->cif_im6o; in6_mc_leave(im6o->im6o_membership[0], NULL); in6_mc_leave(im6o->im6o_membership[1], NULL); KASSERT(im6o->im6o_mfilters == NULL, ("%s: im6o_mfilters != NULL", __func__)); free(im6o->im6o_membership, M_CARP); im6o->im6o_membership = NULL; } break; #endif } } int carp_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa) { struct m_tag *mtag; struct carp_softc *sc; if (!sa) return (0); switch (sa->sa_family) { #ifdef INET case AF_INET: break; #endif #ifdef INET6 case AF_INET6: break; #endif default: return (0); } mtag = m_tag_find(m, PACKET_TAG_CARP, NULL); if (mtag == NULL) return (0); bcopy(mtag + 1, &sc, sizeof(sc)); /* Set the source MAC address to the Virtual Router MAC Address. */ switch (ifp->if_type) { case IFT_ETHER: case IFT_BRIDGE: case IFT_L2VLAN: { struct ether_header *eh; eh = mtod(m, struct ether_header *); eh->ether_shost[0] = 0; eh->ether_shost[1] = 0; eh->ether_shost[2] = 0x5e; eh->ether_shost[3] = 0; eh->ether_shost[4] = 1; eh->ether_shost[5] = sc->sc_vhid; } break; case IFT_FDDI: { struct fddi_header *fh; fh = mtod(m, struct fddi_header *); fh->fddi_shost[0] = 0; fh->fddi_shost[1] = 0; fh->fddi_shost[2] = 0x5e; fh->fddi_shost[3] = 0; fh->fddi_shost[4] = 1; fh->fddi_shost[5] = sc->sc_vhid; } break; case IFT_ISO88025: { struct iso88025_header *th; th = mtod(m, struct iso88025_header *); th->iso88025_shost[0] = 3; th->iso88025_shost[1] = 0; th->iso88025_shost[2] = 0x40 >> (sc->sc_vhid - 1); th->iso88025_shost[3] = 0x40000 >> (sc->sc_vhid - 1); th->iso88025_shost[4] = 0; th->iso88025_shost[5] = 0; } break; default: printf("%s: carp is not supported for the %d interface type\n", ifp->if_xname, ifp->if_type); return (EOPNOTSUPP); } return (0); } static struct carp_softc* carp_alloc(struct ifnet *ifp) { struct carp_softc *sc; struct carp_if *cif; sx_assert(&carp_sx, SA_XLOCKED); if ((cif = ifp->if_carp) == NULL) cif = carp_alloc_if(ifp); sc = malloc(sizeof(*sc), M_CARP, M_WAITOK|M_ZERO); sc->sc_advbase = CARP_DFLTINTV; sc->sc_vhid = -1; /* required setting */ sc->sc_init_counter = 1; sc->sc_state = INIT; sc->sc_ifasiz = sizeof(struct ifaddr *); sc->sc_ifas = malloc(sc->sc_ifasiz, M_CARP, M_WAITOK|M_ZERO); sc->sc_carpdev = ifp; CARP_LOCK_INIT(sc); #ifdef INET callout_init_mtx(&sc->sc_md_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); #endif #ifdef INET6 callout_init_mtx(&sc->sc_md6_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); #endif callout_init_mtx(&sc->sc_ad_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); CIF_LOCK(cif); TAILQ_INSERT_TAIL(&cif->cif_vrs, sc, sc_list); CIF_UNLOCK(cif); mtx_lock(&carp_mtx); LIST_INSERT_HEAD(&carp_list, sc, sc_next); mtx_unlock(&carp_mtx); return (sc); } static void carp_grow_ifas(struct carp_softc *sc) { struct ifaddr **new; new = malloc(sc->sc_ifasiz * 2, M_CARP, M_WAITOK | M_ZERO); CARP_LOCK(sc); bcopy(sc->sc_ifas, new, sc->sc_ifasiz); free(sc->sc_ifas, M_CARP); sc->sc_ifas = new; sc->sc_ifasiz *= 2; CARP_UNLOCK(sc); } static void carp_destroy(struct carp_softc *sc) { struct ifnet *ifp = sc->sc_carpdev; struct carp_if *cif = ifp->if_carp; sx_assert(&carp_sx, SA_XLOCKED); if (sc->sc_suppress) carp_demote_adj(-V_carp_ifdown_adj, "vhid removed"); CARP_UNLOCK(sc); CIF_LOCK(cif); TAILQ_REMOVE(&cif->cif_vrs, sc, sc_list); CIF_UNLOCK(cif); mtx_lock(&carp_mtx); LIST_REMOVE(sc, sc_next); mtx_unlock(&carp_mtx); callout_drain(&sc->sc_ad_tmo); #ifdef INET callout_drain(&sc->sc_md_tmo); #endif #ifdef INET6 callout_drain(&sc->sc_md6_tmo); #endif CARP_LOCK_DESTROY(sc); free(sc->sc_ifas, M_CARP); free(sc, M_CARP); } static struct carp_if* carp_alloc_if(struct ifnet *ifp) { struct carp_if *cif; int error; cif = malloc(sizeof(*cif), M_CARP, M_WAITOK|M_ZERO); if ((error = ifpromisc(ifp, 1)) != 0) printf("%s: ifpromisc(%s) failed: %d\n", __func__, ifp->if_xname, error); else cif->cif_flags |= CIF_PROMISC; CIF_LOCK_INIT(cif); cif->cif_ifp = ifp; TAILQ_INIT(&cif->cif_vrs); IF_ADDR_WLOCK(ifp); ifp->if_carp = cif; if_ref(ifp); IF_ADDR_WUNLOCK(ifp); return (cif); } static void carp_free_if(struct carp_if *cif) { struct ifnet *ifp = cif->cif_ifp; CIF_LOCK_ASSERT(cif); KASSERT(TAILQ_EMPTY(&cif->cif_vrs), ("%s: softc list not empty", __func__)); IF_ADDR_WLOCK(ifp); ifp->if_carp = NULL; IF_ADDR_WUNLOCK(ifp); CIF_LOCK_DESTROY(cif); if (cif->cif_flags & CIF_PROMISC) ifpromisc(ifp, 0); if_rele(ifp); free(cif, M_CARP); } static void carp_carprcp(struct carpreq *carpr, struct carp_softc *sc, int priv) { CARP_LOCK(sc); carpr->carpr_state = sc->sc_state; carpr->carpr_vhid = sc->sc_vhid; carpr->carpr_advbase = sc->sc_advbase; carpr->carpr_advskew = sc->sc_advskew; if (priv) bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key)); else bzero(carpr->carpr_key, sizeof(carpr->carpr_key)); CARP_UNLOCK(sc); } int carp_ioctl(struct ifreq *ifr, u_long cmd, struct thread *td) { struct carpreq carpr; struct ifnet *ifp; struct carp_softc *sc = NULL; int error = 0, locked = 0; - if ((error = copyin(ifr->ifr_data, &carpr, sizeof carpr))) + if ((error = copyin(ifr_data_get_ptr(ifr), &carpr, sizeof carpr))) return (error); ifp = ifunit_ref(ifr->ifr_name); if (ifp == NULL) return (ENXIO); switch (ifp->if_type) { case IFT_ETHER: case IFT_L2VLAN: case IFT_BRIDGE: case IFT_FDDI: case IFT_ISO88025: break; default: error = EOPNOTSUPP; goto out; } if ((ifp->if_flags & IFF_MULTICAST) == 0) { error = EADDRNOTAVAIL; goto out; } sx_xlock(&carp_sx); switch (cmd) { case SIOCSVH: if ((error = priv_check(td, PRIV_NETINET_CARP))) break; if (carpr.carpr_vhid <= 0 || carpr.carpr_vhid > CARP_MAXVHID || carpr.carpr_advbase < 0 || carpr.carpr_advskew < 0) { error = EINVAL; break; } if (ifp->if_carp) { IFNET_FOREACH_CARP(ifp, sc) if (sc->sc_vhid == carpr.carpr_vhid) break; } if (sc == NULL) { sc = carp_alloc(ifp); CARP_LOCK(sc); sc->sc_vhid = carpr.carpr_vhid; LLADDR(&sc->sc_addr)[0] = 0; LLADDR(&sc->sc_addr)[1] = 0; LLADDR(&sc->sc_addr)[2] = 0x5e; LLADDR(&sc->sc_addr)[3] = 0; LLADDR(&sc->sc_addr)[4] = 1; LLADDR(&sc->sc_addr)[5] = sc->sc_vhid; } else CARP_LOCK(sc); locked = 1; if (carpr.carpr_advbase > 0) { if (carpr.carpr_advbase > 255 || carpr.carpr_advbase < CARP_DFLTINTV) { error = EINVAL; break; } sc->sc_advbase = carpr.carpr_advbase; } if (carpr.carpr_advskew >= 255) { error = EINVAL; break; } sc->sc_advskew = carpr.carpr_advskew; if (carpr.carpr_key[0] != '\0') { bcopy(carpr.carpr_key, sc->sc_key, sizeof(sc->sc_key)); carp_hmac_prepare(sc); } if (sc->sc_state != INIT && carpr.carpr_state != sc->sc_state) { switch (carpr.carpr_state) { case BACKUP: callout_stop(&sc->sc_ad_tmo); carp_set_state(sc, BACKUP, "user requested via ifconfig"); carp_setrun(sc, 0); carp_delroute(sc); break; case MASTER: carp_master_down_locked(sc, "user requested via ifconfig"); break; default: break; } } break; case SIOCGVH: { int priveleged; if (carpr.carpr_vhid < 0 || carpr.carpr_vhid > CARP_MAXVHID) { error = EINVAL; break; } if (carpr.carpr_count < 1) { error = EMSGSIZE; break; } if (ifp->if_carp == NULL) { error = ENOENT; break; } priveleged = (priv_check(td, PRIV_NETINET_CARP) == 0); if (carpr.carpr_vhid != 0) { IFNET_FOREACH_CARP(ifp, sc) if (sc->sc_vhid == carpr.carpr_vhid) break; if (sc == NULL) { error = ENOENT; break; } carp_carprcp(&carpr, sc, priveleged); - error = copyout(&carpr, ifr->ifr_data, sizeof(carpr)); + error = copyout(&carpr, ifr_data_get_ptr(ifr), + sizeof(carpr)); } else { int i, count; count = 0; IFNET_FOREACH_CARP(ifp, sc) count++; if (count > carpr.carpr_count) { CIF_UNLOCK(ifp->if_carp); error = EMSGSIZE; break; } i = 0; IFNET_FOREACH_CARP(ifp, sc) { carp_carprcp(&carpr, sc, priveleged); carpr.carpr_count = count; - error = copyout(&carpr, ifr->ifr_data + + error = copyout(&carpr, + (caddr_t)ifr_data_get_ptr(ifr) + (i * sizeof(carpr)), sizeof(carpr)); if (error) { CIF_UNLOCK(ifp->if_carp); break; } i++; } } break; } default: error = EINVAL; } sx_xunlock(&carp_sx); out: if (locked) CARP_UNLOCK(sc); if_rele(ifp); return (error); } static int carp_get_vhid(struct ifaddr *ifa) { if (ifa == NULL || ifa->ifa_carp == NULL) return (0); return (ifa->ifa_carp->sc_vhid); } int carp_attach(struct ifaddr *ifa, int vhid) { struct ifnet *ifp = ifa->ifa_ifp; struct carp_if *cif = ifp->if_carp; struct carp_softc *sc; int index, error; KASSERT(ifa->ifa_carp == NULL, ("%s: ifa %p attached", __func__, ifa)); switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: #endif #ifdef INET6 case AF_INET6: #endif break; default: return (EPROTOTYPE); } sx_xlock(&carp_sx); if (ifp->if_carp == NULL) { sx_xunlock(&carp_sx); return (ENOPROTOOPT); } IFNET_FOREACH_CARP(ifp, sc) if (sc->sc_vhid == vhid) break; if (sc == NULL) { sx_xunlock(&carp_sx); return (ENOENT); } error = carp_multicast_setup(cif, ifa->ifa_addr->sa_family); if (error) { CIF_FREE(cif); sx_xunlock(&carp_sx); return (error); } index = sc->sc_naddrs + sc->sc_naddrs6 + 1; if (index > sc->sc_ifasiz / sizeof(struct ifaddr *)) carp_grow_ifas(sc); switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: cif->cif_naddrs++; sc->sc_naddrs++; break; #endif #ifdef INET6 case AF_INET6: cif->cif_naddrs6++; sc->sc_naddrs6++; break; #endif } ifa_ref(ifa); CARP_LOCK(sc); sc->sc_ifas[index - 1] = ifa; ifa->ifa_carp = sc; carp_hmac_prepare(sc); carp_sc_state(sc); CARP_UNLOCK(sc); sx_xunlock(&carp_sx); return (0); } void carp_detach(struct ifaddr *ifa) { struct ifnet *ifp = ifa->ifa_ifp; struct carp_if *cif = ifp->if_carp; struct carp_softc *sc = ifa->ifa_carp; int i, index; KASSERT(sc != NULL, ("%s: %p not attached", __func__, ifa)); sx_xlock(&carp_sx); CARP_LOCK(sc); /* Shift array. */ index = sc->sc_naddrs + sc->sc_naddrs6; for (i = 0; i < index; i++) if (sc->sc_ifas[i] == ifa) break; KASSERT(i < index, ("%s: %p no backref", __func__, ifa)); for (; i < index - 1; i++) sc->sc_ifas[i] = sc->sc_ifas[i+1]; sc->sc_ifas[index - 1] = NULL; switch (ifa->ifa_addr->sa_family) { #ifdef INET case AF_INET: cif->cif_naddrs--; sc->sc_naddrs--; break; #endif #ifdef INET6 case AF_INET6: cif->cif_naddrs6--; sc->sc_naddrs6--; break; #endif } carp_ifa_delroute(ifa); carp_multicast_cleanup(cif, ifa->ifa_addr->sa_family); ifa->ifa_carp = NULL; ifa_free(ifa); carp_hmac_prepare(sc); carp_sc_state(sc); if (sc->sc_naddrs == 0 && sc->sc_naddrs6 == 0) carp_destroy(sc); else CARP_UNLOCK(sc); CIF_FREE(cif); sx_xunlock(&carp_sx); } static void carp_set_state(struct carp_softc *sc, int state, const char *reason) { CARP_LOCK_ASSERT(sc); if (sc->sc_state != state) { const char *carp_states[] = { CARP_STATES }; char subsys[IFNAMSIZ+5]; snprintf(subsys, IFNAMSIZ+5, "%u@%s", sc->sc_vhid, sc->sc_carpdev->if_xname); CARP_LOG("%s: %s -> %s (%s)\n", subsys, carp_states[sc->sc_state], carp_states[state], reason); sc->sc_state = state; devctl_notify("CARP", subsys, carp_states[state], NULL); } } static void carp_linkstate(struct ifnet *ifp) { struct carp_softc *sc; CIF_LOCK(ifp->if_carp); IFNET_FOREACH_CARP(ifp, sc) { CARP_LOCK(sc); carp_sc_state(sc); CARP_UNLOCK(sc); } CIF_UNLOCK(ifp->if_carp); } static void carp_sc_state(struct carp_softc *sc) { CARP_LOCK_ASSERT(sc); if (sc->sc_carpdev->if_link_state != LINK_STATE_UP || !(sc->sc_carpdev->if_flags & IFF_UP)) { callout_stop(&sc->sc_ad_tmo); #ifdef INET callout_stop(&sc->sc_md_tmo); #endif #ifdef INET6 callout_stop(&sc->sc_md6_tmo); #endif carp_set_state(sc, INIT, "hardware interface down"); carp_setrun(sc, 0); if (!sc->sc_suppress) carp_demote_adj(V_carp_ifdown_adj, "interface down"); sc->sc_suppress = 1; } else { carp_set_state(sc, INIT, "hardware interface up"); carp_setrun(sc, 0); if (sc->sc_suppress) carp_demote_adj(-V_carp_ifdown_adj, "interface up"); sc->sc_suppress = 0; } } static void carp_demote_adj(int adj, char *reason) { atomic_add_int(&V_carp_demotion, adj); CARP_LOG("demoted by %d to %d (%s)\n", adj, V_carp_demotion, reason); taskqueue_enqueue(taskqueue_swi, &carp_sendall_task); } static int carp_demote_adj_sysctl(SYSCTL_HANDLER_ARGS) { int new, error; new = V_carp_demotion; error = sysctl_handle_int(oidp, &new, 0, req); if (error || !req->newptr) return (error); carp_demote_adj(new, "sysctl"); return (0); } #ifdef INET extern struct domain inetdomain; static struct protosw in_carp_protosw = { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_CARP, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = carp_input, .pr_output = rip_output, .pr_ctloutput = rip_ctloutput, .pr_usrreqs = &rip_usrreqs }; #endif #ifdef INET6 extern struct domain inet6domain; static struct protosw in6_carp_protosw = { .pr_type = SOCK_RAW, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_CARP, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = carp6_input, .pr_output = rip6_output, .pr_ctloutput = rip6_ctloutput, .pr_usrreqs = &rip6_usrreqs }; #endif static void carp_mod_cleanup(void) { #ifdef INET if (proto_reg[CARP_INET] == 0) { (void)ipproto_unregister(IPPROTO_CARP); pf_proto_unregister(PF_INET, IPPROTO_CARP, SOCK_RAW); proto_reg[CARP_INET] = -1; } carp_iamatch_p = NULL; #endif #ifdef INET6 if (proto_reg[CARP_INET6] == 0) { (void)ip6proto_unregister(IPPROTO_CARP); pf_proto_unregister(PF_INET6, IPPROTO_CARP, SOCK_RAW); proto_reg[CARP_INET6] = -1; } carp_iamatch6_p = NULL; carp_macmatch6_p = NULL; #endif carp_ioctl_p = NULL; carp_attach_p = NULL; carp_detach_p = NULL; carp_get_vhid_p = NULL; carp_linkstate_p = NULL; carp_forus_p = NULL; carp_output_p = NULL; carp_demote_adj_p = NULL; carp_master_p = NULL; mtx_unlock(&carp_mtx); taskqueue_drain(taskqueue_swi, &carp_sendall_task); mtx_destroy(&carp_mtx); sx_destroy(&carp_sx); } static int carp_mod_load(void) { int err; mtx_init(&carp_mtx, "carp_mtx", NULL, MTX_DEF); sx_init(&carp_sx, "carp_sx"); LIST_INIT(&carp_list); carp_get_vhid_p = carp_get_vhid; carp_forus_p = carp_forus; carp_output_p = carp_output; carp_linkstate_p = carp_linkstate; carp_ioctl_p = carp_ioctl; carp_attach_p = carp_attach; carp_detach_p = carp_detach; carp_demote_adj_p = carp_demote_adj; carp_master_p = carp_master; #ifdef INET6 carp_iamatch6_p = carp_iamatch6; carp_macmatch6_p = carp_macmatch6; proto_reg[CARP_INET6] = pf_proto_register(PF_INET6, (struct protosw *)&in6_carp_protosw); if (proto_reg[CARP_INET6]) { printf("carp: error %d attaching to PF_INET6\n", proto_reg[CARP_INET6]); carp_mod_cleanup(); return (proto_reg[CARP_INET6]); } err = ip6proto_register(IPPROTO_CARP); if (err) { printf("carp: error %d registering with INET6\n", err); carp_mod_cleanup(); return (err); } #endif #ifdef INET carp_iamatch_p = carp_iamatch; proto_reg[CARP_INET] = pf_proto_register(PF_INET, &in_carp_protosw); if (proto_reg[CARP_INET]) { printf("carp: error %d attaching to PF_INET\n", proto_reg[CARP_INET]); carp_mod_cleanup(); return (proto_reg[CARP_INET]); } err = ipproto_register(IPPROTO_CARP); if (err) { printf("carp: error %d registering with INET\n", err); carp_mod_cleanup(); return (err); } #endif return (0); } static int carp_modevent(module_t mod, int type, void *data) { switch (type) { case MOD_LOAD: return carp_mod_load(); /* NOTREACHED */ case MOD_UNLOAD: mtx_lock(&carp_mtx); if (LIST_EMPTY(&carp_list)) carp_mod_cleanup(); else { mtx_unlock(&carp_mtx); return (EBUSY); } break; default: return (EINVAL); } return (0); } static moduledata_t carp_mod = { "carp", carp_modevent, 0 }; DECLARE_MODULE(carp, carp_mod, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); Index: stable/11/sys/netpfil/pf/if_pfsync.c =================================================================== --- stable/11/sys/netpfil/pf/if_pfsync.c (revision 332287) +++ stable/11/sys/netpfil/pf/if_pfsync.c (revision 332288) @@ -1,2431 +1,2433 @@ /*- * Copyright (c) 2002 Michael Shalayeff * Copyright (c) 2012 Gleb Smirnoff * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2009 David Gwynne * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ * * Revisions picked from OpenBSD after revision 1.110 import: * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input() * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates * 1.120, 1.175 - use monotonic time_uptime * 1.122 - reduce number of updates for non-TCP sessions * 1.125, 1.127 - rewrite merge or stale processing * 1.128 - cleanups * 1.146 - bzero() mbuf before sparsely filling it with data * 1.170 - SIOCSIFMTU checks * 1.126, 1.142 - deferred packets processing * 1.173 - correct expire time processing */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_pf.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define PFSYNC_MINPKT ( \ sizeof(struct ip) + \ sizeof(struct pfsync_header) + \ sizeof(struct pfsync_subheader) ) struct pfsync_pkt { struct ip *ip; struct in_addr src; u_int8_t flags; }; static int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, struct pfsync_state_peer *); static int pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int); static int pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int); static int pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int); static int pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int); static int pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int); static int pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int); static int pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int); static int pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int); static int pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int); static int pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int); static int pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int); static int pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int); static int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = { pfsync_in_clr, /* PFSYNC_ACT_CLR */ pfsync_in_ins, /* PFSYNC_ACT_INS */ pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ pfsync_in_upd, /* PFSYNC_ACT_UPD */ pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ pfsync_in_del, /* PFSYNC_ACT_DEL */ pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ pfsync_in_error, /* PFSYNC_ACT_INS_F */ pfsync_in_error, /* PFSYNC_ACT_DEL_F */ pfsync_in_bus, /* PFSYNC_ACT_BUS */ pfsync_in_tdb, /* PFSYNC_ACT_TDB */ pfsync_in_eof /* PFSYNC_ACT_EOF */ }; struct pfsync_q { void (*write)(struct pf_state *, void *); size_t len; u_int8_t action; }; /* we have one of these for every PFSYNC_S_ */ static void pfsync_out_state(struct pf_state *, void *); static void pfsync_out_iack(struct pf_state *, void *); static void pfsync_out_upd_c(struct pf_state *, void *); static void pfsync_out_del(struct pf_state *, void *); static struct pfsync_q pfsync_qs[] = { { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD }, { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } }; static void pfsync_q_ins(struct pf_state *, int, bool); static void pfsync_q_del(struct pf_state *, bool); static void pfsync_update_state(struct pf_state *); struct pfsync_upd_req_item { TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; struct pfsync_upd_req ur_msg; }; struct pfsync_deferral { struct pfsync_softc *pd_sc; TAILQ_ENTRY(pfsync_deferral) pd_entry; u_int pd_refs; struct callout pd_tmo; struct pf_state *pd_st; struct mbuf *pd_m; }; struct pfsync_softc { /* Configuration */ struct ifnet *sc_ifp; struct ifnet *sc_sync_if; struct ip_moptions sc_imo; struct in_addr sc_sync_peer; uint32_t sc_flags; #define PFSYNCF_OK 0x00000001 #define PFSYNCF_DEFER 0x00000002 #define PFSYNCF_PUSH 0x00000004 uint8_t sc_maxupdates; struct ip sc_template; struct callout sc_tmo; struct mtx sc_mtx; /* Queued data */ size_t sc_len; TAILQ_HEAD(, pf_state) sc_qs[PFSYNC_S_COUNT]; TAILQ_HEAD(, pfsync_upd_req_item) sc_upd_req_list; TAILQ_HEAD(, pfsync_deferral) sc_deferrals; u_int sc_deferred; void *sc_plus; size_t sc_pluslen; /* Bulk update info */ struct mtx sc_bulk_mtx; uint32_t sc_ureq_sent; int sc_bulk_tries; uint32_t sc_ureq_received; int sc_bulk_hashid; uint64_t sc_bulk_stateid; uint32_t sc_bulk_creatorid; struct callout sc_bulk_tmo; struct callout sc_bulkfail_tmo; }; #define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) #define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) #define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) #define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) static const char pfsyncname[] = "pfsync"; static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); static VNET_DEFINE(struct pfsync_softc *, pfsyncif) = NULL; #define V_pfsyncif VNET(pfsyncif) static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL; #define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) static VNET_DEFINE(struct pfsyncstats, pfsyncstats); #define V_pfsyncstats VNET(pfsyncstats) static VNET_DEFINE(int, pfsync_carp_adj) = CARP_MAXSKEW; #define V_pfsync_carp_adj VNET(pfsync_carp_adj) static void pfsync_timeout(void *); static void pfsync_push(struct pfsync_softc *); static void pfsyncintr(void *); static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, void *); static void pfsync_multicast_cleanup(struct pfsync_softc *); static void pfsync_pointers_init(void); static void pfsync_pointers_uninit(void); static int pfsync_init(void); static void pfsync_uninit(void); SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC"); SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(pfsyncstats), pfsyncstats, "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW, &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); static int pfsync_clone_create(struct if_clone *, int, caddr_t); static void pfsync_clone_destroy(struct ifnet *); static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, struct pf_state_peer *); static int pfsyncoutput(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); static int pfsyncioctl(struct ifnet *, u_long, caddr_t); static int pfsync_defer(struct pf_state *, struct mbuf *); static void pfsync_undefer(struct pfsync_deferral *, int); static void pfsync_undefer_state(struct pf_state *, int); static void pfsync_defer_tmo(void *); static void pfsync_request_update(u_int32_t, u_int64_t); static void pfsync_update_state_req(struct pf_state *); static void pfsync_drop(struct pfsync_softc *); static void pfsync_sendout(int); static void pfsync_send_plus(void *, size_t); static void pfsync_bulk_start(void); static void pfsync_bulk_status(u_int8_t); static void pfsync_bulk_update(void *); static void pfsync_bulk_fail(void *); #ifdef IPSEC static void pfsync_update_net_tdb(struct pfsync_tdb *); #endif #define PFSYNC_MAX_BULKTRIES 12 VNET_DEFINE(struct if_clone *, pfsync_cloner); #define V_pfsync_cloner VNET(pfsync_cloner) static int pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) { struct pfsync_softc *sc; struct ifnet *ifp; int q; if (unit != 0) return (EINVAL); sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); sc->sc_flags |= PFSYNCF_OK; for (q = 0; q < PFSYNC_S_COUNT; q++) TAILQ_INIT(&sc->sc_qs[q]); TAILQ_INIT(&sc->sc_upd_req_list); TAILQ_INIT(&sc->sc_deferrals); sc->sc_len = PFSYNC_MINPKT; sc->sc_maxupdates = 128; ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); if (ifp == NULL) { free(sc, M_PFSYNC); return (ENOSPC); } if_initname(ifp, pfsyncname, unit); ifp->if_softc = sc; ifp->if_ioctl = pfsyncioctl; ifp->if_output = pfsyncoutput; ifp->if_type = IFT_PFSYNC; ifp->if_snd.ifq_maxlen = ifqmaxlen; ifp->if_hdrlen = sizeof(struct pfsync_header); ifp->if_mtu = ETHERMTU; mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); callout_init(&sc->sc_tmo, 1); callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); if_attach(ifp); bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); V_pfsyncif = sc; return (0); } static void pfsync_clone_destroy(struct ifnet *ifp) { struct pfsync_softc *sc = ifp->if_softc; /* * At this stage, everything should have already been * cleared by pfsync_uninit(), and we have only to * drain callouts. */ while (sc->sc_deferred > 0) { struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_deferrals); TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); sc->sc_deferred--; if (callout_stop(&pd->pd_tmo) > 0) { pf_release_state(pd->pd_st); m_freem(pd->pd_m); free(pd, M_PFSYNC); } else { pd->pd_refs++; callout_drain(&pd->pd_tmo); free(pd, M_PFSYNC); } } callout_drain(&sc->sc_tmo); callout_drain(&sc->sc_bulkfail_tmo); callout_drain(&sc->sc_bulk_tmo); if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); bpfdetach(ifp); if_detach(ifp); pfsync_drop(sc); if_free(ifp); if (sc->sc_imo.imo_membership) pfsync_multicast_cleanup(sc); mtx_destroy(&sc->sc_mtx); mtx_destroy(&sc->sc_bulk_mtx); free(sc, M_PFSYNC); V_pfsyncif = NULL; } static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, struct pf_state_peer *d) { if (s->scrub.scrub_flag && d->scrub == NULL) { d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); if (d->scrub == NULL) return (ENOMEM); } return (0); } static int pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) { struct pfsync_softc *sc = V_pfsyncif; #ifndef __NO_STRICT_ALIGNMENT struct pfsync_state_key key[2]; #endif struct pfsync_state_key *kw, *ks; struct pf_state *st = NULL; struct pf_state_key *skw = NULL, *sks = NULL; struct pf_rule *r = NULL; struct pfi_kif *kif; int error; PF_RULES_RASSERT(); if (sp->creatorid == 0) { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("%s: invalid creator id: %08x\n", __func__, ntohl(sp->creatorid)); return (EINVAL); } if ((kif = pfi_kif_find(sp->ifname)) == NULL) { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("%s: unknown interface: %s\n", __func__, sp->ifname); if (flags & PFSYNC_SI_IOCTL) return (EINVAL); return (0); /* skip this state */ } /* * If the ruleset checksums match or the state is coming from the ioctl, * it's safe to associate the state with the rule of that number. */ if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) r = pf_main_ruleset.rules[ PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; else r = &V_pf_default_rule; if ((r->max_states && counter_u64_fetch(r->states_cur) >= r->max_states)) goto cleanup; /* * XXXGL: consider M_WAITOK in ioctl path after. */ if ((st = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO)) == NULL) goto cleanup; if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) goto cleanup; #ifndef __NO_STRICT_ALIGNMENT bcopy(&sp->key, key, sizeof(struct pfsync_state_key) * 2); kw = &key[PF_SK_WIRE]; ks = &key[PF_SK_STACK]; #else kw = &sp->key[PF_SK_WIRE]; ks = &sp->key[PF_SK_STACK]; #endif if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->af) || PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->af) || kw->port[0] != ks->port[0] || kw->port[1] != ks->port[1]) { sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); if (sks == NULL) goto cleanup; } else sks = skw; /* allocate memory for scrub info */ if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) goto cleanup; /* Copy to state key(s). */ skw->addr[0] = kw->addr[0]; skw->addr[1] = kw->addr[1]; skw->port[0] = kw->port[0]; skw->port[1] = kw->port[1]; skw->proto = sp->proto; skw->af = sp->af; if (sks != skw) { sks->addr[0] = ks->addr[0]; sks->addr[1] = ks->addr[1]; sks->port[0] = ks->port[0]; sks->port[1] = ks->port[1]; sks->proto = sp->proto; sks->af = sp->af; } /* copy to state */ bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); st->creation = time_uptime - ntohl(sp->creation); st->expire = time_uptime; if (sp->expire) { uint32_t timeout; timeout = r->timeout[sp->timeout]; if (!timeout) timeout = V_pf_default_rule.timeout[sp->timeout]; /* sp->expire may have been adaptively scaled by export. */ st->expire -= timeout - ntohl(sp->expire); } st->direction = sp->direction; st->log = sp->log; st->timeout = sp->timeout; st->state_flags = sp->state_flags; st->id = sp->id; st->creatorid = sp->creatorid; pf_state_peer_ntoh(&sp->src, &st->src); pf_state_peer_ntoh(&sp->dst, &st->dst); st->rule.ptr = r; st->nat_rule.ptr = NULL; st->anchor.ptr = NULL; st->rt_kif = NULL; st->pfsync_time = time_uptime; st->sync_state = PFSYNC_S_NONE; if (!(flags & PFSYNC_SI_IOCTL)) st->state_flags |= PFSTATE_NOSYNC; if ((error = pf_state_insert(kif, skw, sks, st)) != 0) goto cleanup_state; /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ counter_u64_add(r->states_cur, 1); counter_u64_add(r->states_tot, 1); if (!(flags & PFSYNC_SI_IOCTL)) { st->state_flags &= ~PFSTATE_NOSYNC; if (st->state_flags & PFSTATE_ACK) { pfsync_q_ins(st, PFSYNC_S_IACK, true); pfsync_push(sc); } } st->state_flags &= ~PFSTATE_ACK; PF_STATE_UNLOCK(st); return (0); cleanup: error = ENOMEM; if (skw == sks) sks = NULL; if (skw != NULL) uma_zfree(V_pf_state_key_z, skw); if (sks != NULL) uma_zfree(V_pf_state_key_z, sks); cleanup_state: /* pf_state_insert() frees the state keys. */ if (st) { if (st->dst.scrub) uma_zfree(V_pf_state_scrub_z, st->dst.scrub); if (st->src.scrub) uma_zfree(V_pf_state_scrub_z, st->src.scrub); uma_zfree(V_pf_state_z, st); } return (error); } static int pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_pkt pkt; struct mbuf *m = *mp; struct ip *ip = mtod(m, struct ip *); struct pfsync_header *ph; struct pfsync_subheader subh; int offset, len; int rv; uint16_t count; *mp = NULL; V_pfsyncstats.pfsyncs_ipackets++; /* Verify that we have a sync interface configured. */ if (!sc || !sc->sc_sync_if || !V_pf_status.running || (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) goto done; /* verify that the packet came in on the right interface */ if (sc->sc_sync_if != m->m_pkthdr.rcvif) { V_pfsyncstats.pfsyncs_badif++; goto done; } if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); /* verify that the IP TTL is 255. */ if (ip->ip_ttl != PFSYNC_DFLTTL) { V_pfsyncstats.pfsyncs_badttl++; goto done; } offset = ip->ip_hl << 2; if (m->m_pkthdr.len < offset + sizeof(*ph)) { V_pfsyncstats.pfsyncs_hdrops++; goto done; } if (offset + sizeof(*ph) > m->m_len) { if (m_pullup(m, offset + sizeof(*ph)) == NULL) { V_pfsyncstats.pfsyncs_hdrops++; return (IPPROTO_DONE); } ip = mtod(m, struct ip *); } ph = (struct pfsync_header *)((char *)ip + offset); /* verify the version */ if (ph->version != PFSYNC_VERSION) { V_pfsyncstats.pfsyncs_badver++; goto done; } len = ntohs(ph->len) + offset; if (m->m_pkthdr.len < len) { V_pfsyncstats.pfsyncs_badlen++; goto done; } /* Cheaper to grab this now than having to mess with mbufs later */ pkt.ip = ip; pkt.src = ip->ip_src; pkt.flags = 0; /* * Trusting pf_chksum during packet processing, as well as seeking * in interface name tree, require holding PF_RULES_RLOCK(). */ PF_RULES_RLOCK(); if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) pkt.flags |= PFSYNC_SI_CKSUM; offset += sizeof(*ph); while (offset <= len - sizeof(subh)) { m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); offset += sizeof(subh); if (subh.action >= PFSYNC_ACT_MAX) { V_pfsyncstats.pfsyncs_badact++; PF_RULES_RUNLOCK(); goto done; } count = ntohs(subh.count); V_pfsyncstats.pfsyncs_iacts[subh.action] += count; rv = (*pfsync_acts[subh.action])(&pkt, m, offset, count); if (rv == -1) { PF_RULES_RUNLOCK(); return (IPPROTO_DONE); } offset += rv; } PF_RULES_RUNLOCK(); done: m_freem(m); return (IPPROTO_DONE); } static int pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct pfsync_clr *clr; struct mbuf *mp; int len = sizeof(*clr) * count; int i, offp; u_int32_t creatorid; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } clr = (struct pfsync_clr *)(mp->m_data + offp); for (i = 0; i < count; i++) { creatorid = clr[i].creatorid; if (clr[i].ifname[0] != '\0' && pfi_kif_find(clr[i].ifname) == NULL) continue; for (int i = 0; i <= pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; struct pf_state *s; relock: PF_HASHROW_LOCK(ih); LIST_FOREACH(s, &ih->states, entry) { if (s->creatorid == creatorid) { s->state_flags |= PFSTATE_NOSYNC; pf_unlink_state(s, PF_ENTER_LOCKED); goto relock; } } PF_HASHROW_UNLOCK(ih); } } return (len); } static int pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct mbuf *mp; struct pfsync_state *sa, *sp; int len = sizeof(*sp) * count; int i, offp; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } sa = (struct pfsync_state *)(mp->m_data + offp); for (i = 0; i < count; i++) { sp = &sa[i]; /* Check for invalid values. */ if (sp->timeout >= PFTM_MAX || sp->src.state > PF_TCPS_PROXY_DST || sp->dst.state > PF_TCPS_PROXY_DST || sp->direction > PF_OUT || (sp->af != AF_INET && sp->af != AF_INET6)) { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("%s: invalid value\n", __func__); V_pfsyncstats.pfsyncs_badval++; continue; } if (pfsync_state_import(sp, pkt->flags) == ENOMEM) /* Drop out, but process the rest of the actions. */ break; } return (len); } static int pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct pfsync_ins_ack *ia, *iaa; struct pf_state *st; struct mbuf *mp; int len = count * sizeof(*ia); int offp, i; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); for (i = 0; i < count; i++) { ia = &iaa[i]; st = pf_find_state_byid(ia->id, ia->creatorid); if (st == NULL) continue; if (st->state_flags & PFSTATE_ACK) { PFSYNC_LOCK(V_pfsyncif); pfsync_undefer_state(st, 0); PFSYNC_UNLOCK(V_pfsyncif); } PF_STATE_UNLOCK(st); } /* * XXX this is not yet implemented, but we know the size of the * message so we can skip it. */ return (count * sizeof(struct pfsync_ins_ack)); } static int pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, struct pfsync_state_peer *dst) { int sync = 0; PF_STATE_LOCK_ASSERT(st); /* * The state should never go backwards except * for syn-proxy states. Neither should the * sequence window slide backwards. */ if ((st->src.state > src->state && (st->src.state < PF_TCPS_PROXY_SRC || src->state >= PF_TCPS_PROXY_SRC)) || (st->src.state == src->state && SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) sync++; else pf_state_peer_ntoh(src, &st->src); if ((st->dst.state > dst->state) || (st->dst.state >= TCPS_SYN_SENT && SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) sync++; else pf_state_peer_ntoh(dst, &st->dst); return (sync); } static int pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_state *sa, *sp; struct pf_state *st; int sync; struct mbuf *mp; int len = count * sizeof(*sp); int offp, i; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } sa = (struct pfsync_state *)(mp->m_data + offp); for (i = 0; i < count; i++) { sp = &sa[i]; /* check for invalid values */ if (sp->timeout >= PFTM_MAX || sp->src.state > PF_TCPS_PROXY_DST || sp->dst.state > PF_TCPS_PROXY_DST) { if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pfsync_input: PFSYNC_ACT_UPD: " "invalid value\n"); } V_pfsyncstats.pfsyncs_badval++; continue; } st = pf_find_state_byid(sp->id, sp->creatorid); if (st == NULL) { /* insert the update */ if (pfsync_state_import(sp, 0)) V_pfsyncstats.pfsyncs_badstate++; continue; } if (st->state_flags & PFSTATE_ACK) { PFSYNC_LOCK(sc); pfsync_undefer_state(st, 1); PFSYNC_UNLOCK(sc); } if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); else { sync = 0; /* * Non-TCP protocol state machine always go * forwards */ if (st->src.state > sp->src.state) sync++; else pf_state_peer_ntoh(&sp->src, &st->src); if (st->dst.state > sp->dst.state) sync++; else pf_state_peer_ntoh(&sp->dst, &st->dst); } if (sync < 2) { pfsync_alloc_scrub_memory(&sp->dst, &st->dst); pf_state_peer_ntoh(&sp->dst, &st->dst); st->expire = time_uptime; st->timeout = sp->timeout; } st->pfsync_time = time_uptime; if (sync) { V_pfsyncstats.pfsyncs_stale++; pfsync_update_state(st); PF_STATE_UNLOCK(st); PFSYNC_LOCK(sc); pfsync_push(sc); PFSYNC_UNLOCK(sc); continue; } PF_STATE_UNLOCK(st); } return (len); } static int pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_upd_c *ua, *up; struct pf_state *st; int len = count * sizeof(*up); int sync; struct mbuf *mp; int offp, i; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } ua = (struct pfsync_upd_c *)(mp->m_data + offp); for (i = 0; i < count; i++) { up = &ua[i]; /* check for invalid values */ if (up->timeout >= PFTM_MAX || up->src.state > PF_TCPS_PROXY_DST || up->dst.state > PF_TCPS_PROXY_DST) { if (V_pf_status.debug >= PF_DEBUG_MISC) { printf("pfsync_input: " "PFSYNC_ACT_UPD_C: " "invalid value\n"); } V_pfsyncstats.pfsyncs_badval++; continue; } st = pf_find_state_byid(up->id, up->creatorid); if (st == NULL) { /* We don't have this state. Ask for it. */ PFSYNC_LOCK(sc); pfsync_request_update(up->creatorid, up->id); PFSYNC_UNLOCK(sc); continue; } if (st->state_flags & PFSTATE_ACK) { PFSYNC_LOCK(sc); pfsync_undefer_state(st, 1); PFSYNC_UNLOCK(sc); } if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) sync = pfsync_upd_tcp(st, &up->src, &up->dst); else { sync = 0; /* * Non-TCP protocol state machine always go * forwards */ if (st->src.state > up->src.state) sync++; else pf_state_peer_ntoh(&up->src, &st->src); if (st->dst.state > up->dst.state) sync++; else pf_state_peer_ntoh(&up->dst, &st->dst); } if (sync < 2) { pfsync_alloc_scrub_memory(&up->dst, &st->dst); pf_state_peer_ntoh(&up->dst, &st->dst); st->expire = time_uptime; st->timeout = up->timeout; } st->pfsync_time = time_uptime; if (sync) { V_pfsyncstats.pfsyncs_stale++; pfsync_update_state(st); PF_STATE_UNLOCK(st); PFSYNC_LOCK(sc); pfsync_push(sc); PFSYNC_UNLOCK(sc); continue; } PF_STATE_UNLOCK(st); } return (len); } static int pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct pfsync_upd_req *ur, *ura; struct mbuf *mp; int len = count * sizeof(*ur); int i, offp; struct pf_state *st; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } ura = (struct pfsync_upd_req *)(mp->m_data + offp); for (i = 0; i < count; i++) { ur = &ura[i]; if (ur->id == 0 && ur->creatorid == 0) pfsync_bulk_start(); else { st = pf_find_state_byid(ur->id, ur->creatorid); if (st == NULL) { V_pfsyncstats.pfsyncs_badstate++; continue; } if (st->state_flags & PFSTATE_NOSYNC) { PF_STATE_UNLOCK(st); continue; } pfsync_update_state_req(st); PF_STATE_UNLOCK(st); } } return (len); } static int pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct mbuf *mp; struct pfsync_state *sa, *sp; struct pf_state *st; int len = count * sizeof(*sp); int offp, i; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } sa = (struct pfsync_state *)(mp->m_data + offp); for (i = 0; i < count; i++) { sp = &sa[i]; st = pf_find_state_byid(sp->id, sp->creatorid); if (st == NULL) { V_pfsyncstats.pfsyncs_badstate++; continue; } st->state_flags |= PFSTATE_NOSYNC; pf_unlink_state(st, PF_ENTER_LOCKED); } return (len); } static int pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct mbuf *mp; struct pfsync_del_c *sa, *sp; struct pf_state *st; int len = count * sizeof(*sp); int offp, i; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } sa = (struct pfsync_del_c *)(mp->m_data + offp); for (i = 0; i < count; i++) { sp = &sa[i]; st = pf_find_state_byid(sp->id, sp->creatorid); if (st == NULL) { V_pfsyncstats.pfsyncs_badstate++; continue; } st->state_flags |= PFSTATE_NOSYNC; pf_unlink_state(st, PF_ENTER_LOCKED); } return (len); } static int pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_bus *bus; struct mbuf *mp; int len = count * sizeof(*bus); int offp; PFSYNC_BLOCK(sc); /* If we're not waiting for a bulk update, who cares. */ if (sc->sc_ureq_sent == 0) { PFSYNC_BUNLOCK(sc); return (len); } mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { PFSYNC_BUNLOCK(sc); V_pfsyncstats.pfsyncs_badlen++; return (-1); } bus = (struct pfsync_bus *)(mp->m_data + offp); switch (bus->status) { case PFSYNC_BUS_START: callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + V_pf_limits[PF_LIMIT_STATES].limit / ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / sizeof(struct pfsync_state)), pfsync_bulk_fail, sc); if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received bulk update start\n"); break; case PFSYNC_BUS_END: if (time_uptime - ntohl(bus->endtime) >= sc->sc_ureq_sent) { /* that's it, we're happy */ sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; callout_stop(&sc->sc_bulkfail_tmo); if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync bulk done"); sc->sc_flags |= PFSYNCF_OK; if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received valid " "bulk update end\n"); } else { if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received invalid " "bulk update end: bad timestamp\n"); } break; } PFSYNC_BUNLOCK(sc); return (len); } static int pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { int len = count * sizeof(struct pfsync_tdb); #if defined(IPSEC) struct pfsync_tdb *tp; struct mbuf *mp; int offp; int i; int s; mp = m_pulldown(m, offset, len, &offp); if (mp == NULL) { V_pfsyncstats.pfsyncs_badlen++; return (-1); } tp = (struct pfsync_tdb *)(mp->m_data + offp); for (i = 0; i < count; i++) pfsync_update_net_tdb(&tp[i]); #endif return (len); } #if defined(IPSEC) /* Update an in-kernel tdb. Silently fail if no tdb is found. */ static void pfsync_update_net_tdb(struct pfsync_tdb *pt) { struct tdb *tdb; int s; /* check for invalid values */ if (ntohl(pt->spi) <= SPI_RESERVED_MAX || (pt->dst.sa.sa_family != AF_INET && pt->dst.sa.sa_family != AF_INET6)) goto bad; tdb = gettdb(pt->spi, &pt->dst, pt->sproto); if (tdb) { pt->rpl = ntohl(pt->rpl); pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); /* Neither replay nor byte counter should ever decrease. */ if (pt->rpl < tdb->tdb_rpl || pt->cur_bytes < tdb->tdb_cur_bytes) { goto bad; } tdb->tdb_rpl = pt->rpl; tdb->tdb_cur_bytes = pt->cur_bytes; } return; bad: if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " "invalid value\n"); V_pfsyncstats.pfsyncs_badstate++; return; } #endif static int pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { /* check if we are at the right place in the packet */ if (offset != m->m_pkthdr.len) V_pfsyncstats.pfsyncs_badlen++; /* we're done. free and let the caller return */ m_freem(m); return (-1); } static int pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) { V_pfsyncstats.pfsyncs_badact++; m_freem(m); return (-1); } static int pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, struct route *rt) { m_freem(m); return (0); } /* ARGSUSED */ static int pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct pfsync_softc *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; struct pfsyncreq pfsyncr; int error; switch (cmd) { case SIOCSIFFLAGS: PFSYNC_LOCK(sc); if (ifp->if_flags & IFF_UP) { ifp->if_drv_flags |= IFF_DRV_RUNNING; PFSYNC_UNLOCK(sc); pfsync_pointers_init(); } else { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; PFSYNC_UNLOCK(sc); pfsync_pointers_uninit(); } break; case SIOCSIFMTU: if (!sc->sc_sync_if || ifr->ifr_mtu <= PFSYNC_MINPKT || ifr->ifr_mtu > sc->sc_sync_if->if_mtu) return (EINVAL); if (ifr->ifr_mtu < ifp->if_mtu) { PFSYNC_LOCK(sc); if (sc->sc_len > PFSYNC_MINPKT) pfsync_sendout(1); PFSYNC_UNLOCK(sc); } ifp->if_mtu = ifr->ifr_mtu; break; case SIOCGETPFSYNC: bzero(&pfsyncr, sizeof(pfsyncr)); PFSYNC_LOCK(sc); if (sc->sc_sync_if) { strlcpy(pfsyncr.pfsyncr_syncdev, sc->sc_sync_if->if_xname, IFNAMSIZ); } pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; pfsyncr.pfsyncr_defer = (PFSYNCF_DEFER == (sc->sc_flags & PFSYNCF_DEFER)); PFSYNC_UNLOCK(sc); - return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); + return (copyout(&pfsyncr, ifr_data_get_ptr(ifr), + sizeof(pfsyncr))); case SIOCSETPFSYNC: { struct ip_moptions *imo = &sc->sc_imo; struct ifnet *sifp; struct ip *ip; void *mship = NULL; if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) return (error); - if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) + if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr, + sizeof(pfsyncr)))) return (error); if (pfsyncr.pfsyncr_maxupdates > 255) return (EINVAL); if (pfsyncr.pfsyncr_syncdev[0] == 0) sifp = NULL; else if ((sifp = ifunit_ref(pfsyncr.pfsyncr_syncdev)) == NULL) return (EINVAL); if (sifp != NULL && ( pfsyncr.pfsyncr_syncpeer.s_addr == 0 || pfsyncr.pfsyncr_syncpeer.s_addr == htonl(INADDR_PFSYNC_GROUP))) mship = malloc((sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_PFSYNC, M_WAITOK | M_ZERO); PFSYNC_LOCK(sc); if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP); else sc->sc_sync_peer.s_addr = pfsyncr.pfsyncr_syncpeer.s_addr; sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; if (pfsyncr.pfsyncr_defer) { sc->sc_flags |= PFSYNCF_DEFER; pfsync_defer_ptr = pfsync_defer; } else { sc->sc_flags &= ~PFSYNCF_DEFER; pfsync_defer_ptr = NULL; } if (sifp == NULL) { if (sc->sc_sync_if) if_rele(sc->sc_sync_if); sc->sc_sync_if = NULL; if (imo->imo_membership) pfsync_multicast_cleanup(sc); PFSYNC_UNLOCK(sc); break; } if (sc->sc_len > PFSYNC_MINPKT && (sifp->if_mtu < sc->sc_ifp->if_mtu || (sc->sc_sync_if != NULL && sifp->if_mtu < sc->sc_sync_if->if_mtu) || sifp->if_mtu < MCLBYTES - sizeof(struct ip))) pfsync_sendout(1); if (imo->imo_membership) pfsync_multicast_cleanup(sc); if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) { error = pfsync_multicast_setup(sc, sifp, mship); if (error) { if_rele(sifp); free(mship, M_PFSYNC); return (error); } } if (sc->sc_sync_if) if_rele(sc->sc_sync_if); sc->sc_sync_if = sifp; ip = &sc->sc_template; bzero(ip, sizeof(*ip)); ip->ip_v = IPVERSION; ip->ip_hl = sizeof(sc->sc_template) >> 2; ip->ip_tos = IPTOS_LOWDELAY; /* len and id are set later. */ ip->ip_off = htons(IP_DF); ip->ip_ttl = PFSYNC_DFLTTL; ip->ip_p = IPPROTO_PFSYNC; ip->ip_src.s_addr = INADDR_ANY; ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; /* Request a full state table update. */ if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) (*carp_demote_adj_p)(V_pfsync_carp_adj, "pfsync bulk start"); sc->sc_flags &= ~PFSYNCF_OK; if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: requesting bulk update\n"); pfsync_request_update(0, 0); PFSYNC_UNLOCK(sc); PFSYNC_BLOCK(sc); sc->sc_ureq_sent = time_uptime; callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, sc); PFSYNC_BUNLOCK(sc); break; } default: return (ENOTTY); } return (0); } static void pfsync_out_state(struct pf_state *st, void *buf) { struct pfsync_state *sp = buf; pfsync_state_export(sp, st); } static void pfsync_out_iack(struct pf_state *st, void *buf) { struct pfsync_ins_ack *iack = buf; iack->id = st->id; iack->creatorid = st->creatorid; } static void pfsync_out_upd_c(struct pf_state *st, void *buf) { struct pfsync_upd_c *up = buf; bzero(up, sizeof(*up)); up->id = st->id; pf_state_peer_hton(&st->src, &up->src); pf_state_peer_hton(&st->dst, &up->dst); up->creatorid = st->creatorid; up->timeout = st->timeout; } static void pfsync_out_del(struct pf_state *st, void *buf) { struct pfsync_del_c *dp = buf; dp->id = st->id; dp->creatorid = st->creatorid; st->state_flags |= PFSTATE_NOSYNC; } static void pfsync_drop(struct pfsync_softc *sc) { struct pf_state *st, *next; struct pfsync_upd_req_item *ur; int q; for (q = 0; q < PFSYNC_S_COUNT; q++) { if (TAILQ_EMPTY(&sc->sc_qs[q])) continue; TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) { KASSERT(st->sync_state == q, ("%s: st->sync_state == q", __func__)); st->sync_state = PFSYNC_S_NONE; pf_release_state(st); } TAILQ_INIT(&sc->sc_qs[q]); } while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); free(ur, M_PFSYNC); } sc->sc_plus = NULL; sc->sc_len = PFSYNC_MINPKT; } static void pfsync_sendout(int schedswi) { struct pfsync_softc *sc = V_pfsyncif; struct ifnet *ifp = sc->sc_ifp; struct mbuf *m; struct ip *ip; struct pfsync_header *ph; struct pfsync_subheader *subh; struct pf_state *st, *st_next; struct pfsync_upd_req_item *ur; int offset; int q, count = 0; KASSERT(sc != NULL, ("%s: null sc", __func__)); KASSERT(sc->sc_len > PFSYNC_MINPKT, ("%s: sc_len %zu", __func__, sc->sc_len)); PFSYNC_LOCK_ASSERT(sc); if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) { pfsync_drop(sc); return; } m = m_get2(max_linkhdr + sc->sc_len, M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) { if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); V_pfsyncstats.pfsyncs_onomem++; return; } m->m_data += max_linkhdr; m->m_len = m->m_pkthdr.len = sc->sc_len; /* build the ip header */ ip = (struct ip *)m->m_data; bcopy(&sc->sc_template, ip, sizeof(*ip)); offset = sizeof(*ip); ip->ip_len = htons(m->m_pkthdr.len); ip_fillid(ip); /* build the pfsync header */ ph = (struct pfsync_header *)(m->m_data + offset); bzero(ph, sizeof(*ph)); offset += sizeof(*ph); ph->version = PFSYNC_VERSION; ph->len = htons(sc->sc_len - sizeof(*ip)); bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); /* walk the queues */ for (q = 0; q < PFSYNC_S_COUNT; q++) { if (TAILQ_EMPTY(&sc->sc_qs[q])) continue; subh = (struct pfsync_subheader *)(m->m_data + offset); offset += sizeof(*subh); count = 0; TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, st_next) { KASSERT(st->sync_state == q, ("%s: st->sync_state == q", __func__)); /* * XXXGL: some of write methods do unlocked reads * of state data :( */ pfsync_qs[q].write(st, m->m_data + offset); offset += pfsync_qs[q].len; st->sync_state = PFSYNC_S_NONE; pf_release_state(st); count++; } TAILQ_INIT(&sc->sc_qs[q]); bzero(subh, sizeof(*subh)); subh->action = pfsync_qs[q].action; subh->count = htons(count); V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; } if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { subh = (struct pfsync_subheader *)(m->m_data + offset); offset += sizeof(*subh); count = 0; while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); bcopy(&ur->ur_msg, m->m_data + offset, sizeof(ur->ur_msg)); offset += sizeof(ur->ur_msg); free(ur, M_PFSYNC); count++; } bzero(subh, sizeof(*subh)); subh->action = PFSYNC_ACT_UPD_REQ; subh->count = htons(count); V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; } /* has someone built a custom region for us to add? */ if (sc->sc_plus != NULL) { bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); offset += sc->sc_pluslen; sc->sc_plus = NULL; } subh = (struct pfsync_subheader *)(m->m_data + offset); offset += sizeof(*subh); bzero(subh, sizeof(*subh)); subh->action = PFSYNC_ACT_EOF; subh->count = htons(1); V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; /* we're done, let's put it on the wire */ if (ifp->if_bpf) { m->m_data += sizeof(*ip); m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); BPF_MTAP(ifp, m); m->m_data -= sizeof(*ip); m->m_len = m->m_pkthdr.len = sc->sc_len; } if (sc->sc_sync_if == NULL) { sc->sc_len = PFSYNC_MINPKT; m_freem(m); return; } if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); sc->sc_len = PFSYNC_MINPKT; if (!_IF_QFULL(&sc->sc_ifp->if_snd)) _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); else { m_freem(m); if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1); } if (schedswi) swi_sched(V_pfsync_swi_cookie, 0); } static void pfsync_insert_state(struct pf_state *st) { struct pfsync_softc *sc = V_pfsyncif; if (st->state_flags & PFSTATE_NOSYNC) return; if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { st->state_flags |= PFSTATE_NOSYNC; return; } KASSERT(st->sync_state == PFSYNC_S_NONE, ("%s: st->sync_state %u", __func__, st->sync_state)); PFSYNC_LOCK(sc); if (sc->sc_len == PFSYNC_MINPKT) callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); pfsync_q_ins(st, PFSYNC_S_INS, true); PFSYNC_UNLOCK(sc); st->sync_updates = 0; } static int pfsync_defer(struct pf_state *st, struct mbuf *m) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_deferral *pd; if (m->m_flags & (M_BCAST|M_MCAST)) return (0); PFSYNC_LOCK(sc); if (sc == NULL || !(sc->sc_ifp->if_flags & IFF_DRV_RUNNING) || !(sc->sc_flags & PFSYNCF_DEFER)) { PFSYNC_UNLOCK(sc); return (0); } if (sc->sc_deferred >= 128) pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); if (pd == NULL) return (0); sc->sc_deferred++; m->m_flags |= M_SKIP_FIREWALL; st->state_flags |= PFSTATE_ACK; pd->pd_sc = sc; pd->pd_refs = 0; pd->pd_st = st; pf_ref_state(st); pd->pd_m = m; TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); callout_init_mtx(&pd->pd_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd); pfsync_push(sc); return (1); } static void pfsync_undefer(struct pfsync_deferral *pd, int drop) { struct pfsync_softc *sc = pd->pd_sc; struct mbuf *m = pd->pd_m; struct pf_state *st = pd->pd_st; PFSYNC_LOCK_ASSERT(sc); TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); sc->sc_deferred--; pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ free(pd, M_PFSYNC); pf_release_state(st); if (drop) m_freem(m); else { _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); pfsync_push(sc); } } static void pfsync_defer_tmo(void *arg) { struct pfsync_deferral *pd = arg; struct pfsync_softc *sc = pd->pd_sc; struct mbuf *m = pd->pd_m; struct pf_state *st = pd->pd_st; PFSYNC_LOCK_ASSERT(sc); CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); sc->sc_deferred--; pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ if (pd->pd_refs == 0) free(pd, M_PFSYNC); PFSYNC_UNLOCK(sc); ip_output(m, NULL, NULL, 0, NULL, NULL); pf_release_state(st); CURVNET_RESTORE(); } static void pfsync_undefer_state(struct pf_state *st, int drop) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_deferral *pd; PFSYNC_LOCK_ASSERT(sc); TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { if (pd->pd_st == st) { if (callout_stop(&pd->pd_tmo) > 0) pfsync_undefer(pd, drop); return; } } panic("%s: unable to find deferred state", __func__); } static void pfsync_update_state(struct pf_state *st) { struct pfsync_softc *sc = V_pfsyncif; bool sync = false, ref = true; PF_STATE_LOCK_ASSERT(st); PFSYNC_LOCK(sc); if (st->state_flags & PFSTATE_ACK) pfsync_undefer_state(st, 0); if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) pfsync_q_del(st, true); PFSYNC_UNLOCK(sc); return; } if (sc->sc_len == PFSYNC_MINPKT) callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); switch (st->sync_state) { case PFSYNC_S_UPD_C: case PFSYNC_S_UPD: case PFSYNC_S_INS: /* we're already handling it */ if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { st->sync_updates++; if (st->sync_updates >= sc->sc_maxupdates) sync = true; } break; case PFSYNC_S_IACK: pfsync_q_del(st, false); ref = false; /* FALLTHROUGH */ case PFSYNC_S_NONE: pfsync_q_ins(st, PFSYNC_S_UPD_C, ref); st->sync_updates = 0; break; default: panic("%s: unexpected sync state %d", __func__, st->sync_state); } if (sync || (time_uptime - st->pfsync_time) < 2) pfsync_push(sc); PFSYNC_UNLOCK(sc); } static void pfsync_request_update(u_int32_t creatorid, u_int64_t id) { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_upd_req_item *item; size_t nlen = sizeof(struct pfsync_upd_req); PFSYNC_LOCK_ASSERT(sc); /* * This code does a bit to prevent multiple update requests for the * same state being generated. It searches current subheader queue, * but it doesn't lookup into queue of already packed datagrams. */ TAILQ_FOREACH(item, &sc->sc_upd_req_list, ur_entry) if (item->ur_msg.id == id && item->ur_msg.creatorid == creatorid) return; item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); if (item == NULL) return; /* XXX stats */ item->ur_msg.id = id; item->ur_msg.creatorid = creatorid; if (TAILQ_EMPTY(&sc->sc_upd_req_list)) nlen += sizeof(struct pfsync_subheader); if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { pfsync_sendout(1); nlen = sizeof(struct pfsync_subheader) + sizeof(struct pfsync_upd_req); } TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); sc->sc_len += nlen; } static void pfsync_update_state_req(struct pf_state *st) { struct pfsync_softc *sc = V_pfsyncif; bool ref = true; PF_STATE_LOCK_ASSERT(st); PFSYNC_LOCK(sc); if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) pfsync_q_del(st, true); PFSYNC_UNLOCK(sc); return; } switch (st->sync_state) { case PFSYNC_S_UPD_C: case PFSYNC_S_IACK: pfsync_q_del(st, false); ref = false; /* FALLTHROUGH */ case PFSYNC_S_NONE: pfsync_q_ins(st, PFSYNC_S_UPD, ref); pfsync_push(sc); break; case PFSYNC_S_INS: case PFSYNC_S_UPD: case PFSYNC_S_DEL: /* we're already handling it */ break; default: panic("%s: unexpected sync state %d", __func__, st->sync_state); } PFSYNC_UNLOCK(sc); } static void pfsync_delete_state(struct pf_state *st) { struct pfsync_softc *sc = V_pfsyncif; bool ref = true; PFSYNC_LOCK(sc); if (st->state_flags & PFSTATE_ACK) pfsync_undefer_state(st, 1); if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) pfsync_q_del(st, true); PFSYNC_UNLOCK(sc); return; } if (sc->sc_len == PFSYNC_MINPKT) callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); switch (st->sync_state) { case PFSYNC_S_INS: /* We never got to tell the world so just forget about it. */ pfsync_q_del(st, true); break; case PFSYNC_S_UPD_C: case PFSYNC_S_UPD: case PFSYNC_S_IACK: pfsync_q_del(st, false); ref = false; /* FALLTHROUGH */ case PFSYNC_S_NONE: pfsync_q_ins(st, PFSYNC_S_DEL, ref); break; default: panic("%s: unexpected sync state %d", __func__, st->sync_state); } PFSYNC_UNLOCK(sc); } static void pfsync_clear_states(u_int32_t creatorid, const char *ifname) { struct pfsync_softc *sc = V_pfsyncif; struct { struct pfsync_subheader subh; struct pfsync_clr clr; } __packed r; bzero(&r, sizeof(r)); r.subh.action = PFSYNC_ACT_CLR; r.subh.count = htons(1); V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); r.clr.creatorid = creatorid; PFSYNC_LOCK(sc); pfsync_send_plus(&r, sizeof(r)); PFSYNC_UNLOCK(sc); } static void pfsync_q_ins(struct pf_state *st, int q, bool ref) { struct pfsync_softc *sc = V_pfsyncif; size_t nlen = pfsync_qs[q].len; PFSYNC_LOCK_ASSERT(sc); KASSERT(st->sync_state == PFSYNC_S_NONE, ("%s: st->sync_state %u", __func__, st->sync_state)); KASSERT(sc->sc_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", sc->sc_len)); if (TAILQ_EMPTY(&sc->sc_qs[q])) nlen += sizeof(struct pfsync_subheader); if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { pfsync_sendout(1); nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; } sc->sc_len += nlen; TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); st->sync_state = q; if (ref) pf_ref_state(st); } static void pfsync_q_del(struct pf_state *st, bool unref) { struct pfsync_softc *sc = V_pfsyncif; int q = st->sync_state; PFSYNC_LOCK_ASSERT(sc); KASSERT(st->sync_state != PFSYNC_S_NONE, ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); sc->sc_len -= pfsync_qs[q].len; TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); st->sync_state = PFSYNC_S_NONE; if (unref) pf_release_state(st); if (TAILQ_EMPTY(&sc->sc_qs[q])) sc->sc_len -= sizeof(struct pfsync_subheader); } static void pfsync_bulk_start(void) { struct pfsync_softc *sc = V_pfsyncif; if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: received bulk update request\n"); PFSYNC_BLOCK(sc); sc->sc_ureq_received = time_uptime; sc->sc_bulk_hashid = 0; sc->sc_bulk_stateid = 0; pfsync_bulk_status(PFSYNC_BUS_START); callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); PFSYNC_BUNLOCK(sc); } static void pfsync_bulk_update(void *arg) { struct pfsync_softc *sc = arg; struct pf_state *s; int i, sent = 0; PFSYNC_BLOCK_ASSERT(sc); CURVNET_SET(sc->sc_ifp->if_vnet); /* * Start with last state from previous invocation. * It may had gone, in this case start from the * hash slot. */ s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); if (s != NULL) i = PF_IDHASH(s); else i = sc->sc_bulk_hashid; for (; i <= pf_hashmask; i++) { struct pf_idhash *ih = &V_pf_idhash[i]; if (s != NULL) PF_HASHROW_ASSERT(ih); else { PF_HASHROW_LOCK(ih); s = LIST_FIRST(&ih->states); } for (; s; s = LIST_NEXT(s, entry)) { if (sent > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) < sizeof(struct pfsync_state)) { /* We've filled a packet. */ sc->sc_bulk_hashid = i; sc->sc_bulk_stateid = s->id; sc->sc_bulk_creatorid = s->creatorid; PF_HASHROW_UNLOCK(ih); callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); goto full; } if (s->sync_state == PFSYNC_S_NONE && s->timeout < PFTM_MAX && s->pfsync_time <= sc->sc_ureq_received) { pfsync_update_state_req(s); sent++; } } PF_HASHROW_UNLOCK(ih); } /* We're done. */ pfsync_bulk_status(PFSYNC_BUS_END); full: CURVNET_RESTORE(); } static void pfsync_bulk_status(u_int8_t status) { struct { struct pfsync_subheader subh; struct pfsync_bus bus; } __packed r; struct pfsync_softc *sc = V_pfsyncif; bzero(&r, sizeof(r)); r.subh.action = PFSYNC_ACT_BUS; r.subh.count = htons(1); V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; r.bus.creatorid = V_pf_status.hostid; r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); r.bus.status = status; PFSYNC_LOCK(sc); pfsync_send_plus(&r, sizeof(r)); PFSYNC_UNLOCK(sc); } static void pfsync_bulk_fail(void *arg) { struct pfsync_softc *sc = arg; CURVNET_SET(sc->sc_ifp->if_vnet); PFSYNC_BLOCK_ASSERT(sc); if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { /* Try again */ callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, V_pfsyncif); PFSYNC_LOCK(sc); pfsync_request_update(0, 0); PFSYNC_UNLOCK(sc); } else { /* Pretend like the transfer was ok. */ sc->sc_ureq_sent = 0; sc->sc_bulk_tries = 0; PFSYNC_LOCK(sc); if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync bulk fail"); sc->sc_flags |= PFSYNCF_OK; PFSYNC_UNLOCK(sc); if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: failed to receive bulk update\n"); } CURVNET_RESTORE(); } static void pfsync_send_plus(void *plus, size_t pluslen) { struct pfsync_softc *sc = V_pfsyncif; PFSYNC_LOCK_ASSERT(sc); if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu) pfsync_sendout(1); sc->sc_plus = plus; sc->sc_len += (sc->sc_pluslen = pluslen); pfsync_sendout(1); } static void pfsync_timeout(void *arg) { struct pfsync_softc *sc = arg; CURVNET_SET(sc->sc_ifp->if_vnet); PFSYNC_LOCK(sc); pfsync_push(sc); PFSYNC_UNLOCK(sc); CURVNET_RESTORE(); } static void pfsync_push(struct pfsync_softc *sc) { PFSYNC_LOCK_ASSERT(sc); sc->sc_flags |= PFSYNCF_PUSH; swi_sched(V_pfsync_swi_cookie, 0); } static void pfsyncintr(void *arg) { struct pfsync_softc *sc = arg; struct mbuf *m, *n; CURVNET_SET(sc->sc_ifp->if_vnet); PFSYNC_LOCK(sc); if ((sc->sc_flags & PFSYNCF_PUSH) && sc->sc_len > PFSYNC_MINPKT) { pfsync_sendout(0); sc->sc_flags &= ~PFSYNCF_PUSH; } _IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m); PFSYNC_UNLOCK(sc); for (; m != NULL; m = n) { n = m->m_nextpkt; m->m_nextpkt = NULL; /* * We distinguish between a deferral packet and our * own pfsync packet based on M_SKIP_FIREWALL * flag. This is XXX. */ if (m->m_flags & M_SKIP_FIREWALL) ip_output(m, NULL, NULL, 0, NULL, NULL); else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL) == 0) V_pfsyncstats.pfsyncs_opackets++; else V_pfsyncstats.pfsyncs_oerrors++; } CURVNET_RESTORE(); } static int pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship) { struct ip_moptions *imo = &sc->sc_imo; int error; if (!(ifp->if_flags & IFF_MULTICAST)) return (EADDRNOTAVAIL); imo->imo_membership = (struct in_multi **)mship; imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; imo->imo_multicast_vif = -1; if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL, &imo->imo_membership[0])) != 0) { imo->imo_membership = NULL; return (error); } imo->imo_num_memberships++; imo->imo_multicast_ifp = ifp; imo->imo_multicast_ttl = PFSYNC_DFLTTL; imo->imo_multicast_loop = 0; return (0); } static void pfsync_multicast_cleanup(struct pfsync_softc *sc) { struct ip_moptions *imo = &sc->sc_imo; in_leavegroup(imo->imo_membership[0], NULL); free(imo->imo_membership, M_PFSYNC); imo->imo_membership = NULL; imo->imo_multicast_ifp = NULL; } #ifdef INET extern struct domain inetdomain; static struct protosw in_pfsync_protosw = { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_PFSYNC, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_input = pfsync_input, .pr_output = rip_output, .pr_ctloutput = rip_ctloutput, .pr_usrreqs = &rip_usrreqs }; #endif static void pfsync_pointers_init() { PF_RULES_WLOCK(); pfsync_state_import_ptr = pfsync_state_import; pfsync_insert_state_ptr = pfsync_insert_state; pfsync_update_state_ptr = pfsync_update_state; pfsync_delete_state_ptr = pfsync_delete_state; pfsync_clear_states_ptr = pfsync_clear_states; pfsync_defer_ptr = pfsync_defer; PF_RULES_WUNLOCK(); } static void pfsync_pointers_uninit() { PF_RULES_WLOCK(); pfsync_state_import_ptr = NULL; pfsync_insert_state_ptr = NULL; pfsync_update_state_ptr = NULL; pfsync_delete_state_ptr = NULL; pfsync_clear_states_ptr = NULL; pfsync_defer_ptr = NULL; PF_RULES_WUNLOCK(); } static void vnet_pfsync_init(const void *unused __unused) { int error; V_pfsync_cloner = if_clone_simple(pfsyncname, pfsync_clone_create, pfsync_clone_destroy, 1); error = swi_add(NULL, pfsyncname, pfsyncintr, V_pfsyncif, SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); if (error) { if_clone_detach(V_pfsync_cloner); log(LOG_INFO, "swi_add() failed in %s\n", __func__); } } VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY, vnet_pfsync_init, NULL); static void vnet_pfsync_uninit(const void *unused __unused) { if_clone_detach(V_pfsync_cloner); swi_remove(V_pfsync_swi_cookie); } /* * Detach after pf is gone; otherwise we might touch pfsync memory * from within pf after freeing pfsync. */ VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_pfsync_uninit, NULL); static int pfsync_init() { #ifdef INET int error; error = pf_proto_register(PF_INET, &in_pfsync_protosw); if (error) return (error); error = ipproto_register(IPPROTO_PFSYNC); if (error) { pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW); return (error); } #endif pfsync_pointers_init(); return (0); } static void pfsync_uninit() { pfsync_pointers_uninit(); #ifdef INET ipproto_unregister(IPPROTO_PFSYNC); pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW); #endif } static int pfsync_modevent(module_t mod, int type, void *data) { int error = 0; switch (type) { case MOD_LOAD: error = pfsync_init(); break; case MOD_QUIESCE: /* * Module should not be unloaded due to race conditions. */ error = EBUSY; break; case MOD_UNLOAD: pfsync_uninit(); break; default: error = EINVAL; break; } return (error); } static moduledata_t pfsync_mod = { pfsyncname, pfsync_modevent, 0 }; #define PFSYNC_MODVER 1 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */ DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); MODULE_VERSION(pfsync, PFSYNC_MODVER); MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); Index: stable/11/sys/security/mac/mac_net.c =================================================================== --- stable/11/sys/security/mac/mac_net.c (revision 332287) +++ stable/11/sys/security/mac/mac_net.c (revision 332288) @@ -1,501 +1,501 @@ /*- * Copyright (c) 1999-2002, 2009 Robert N. M. Watson * Copyright (c) 2001 Ilmar S. Habibulin * Copyright (c) 2001-2004 Networks Associates Technology, Inc. * Copyright (c) 2006 SPARTA, Inc. * Copyright (c) 2008 Apple Inc. * All rights reserved. * * This software was developed by Robert Watson and Ilmar Habibulin for the * TrustedBSD Project. * * This software was enhanced by SPARTA ISSO under SPAWAR contract * N66001-04-C-6019 ("SEFOS"). * * This software was developed for the FreeBSD Project in part by Network * Associates Laboratories, the Security Research Division of Network * Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), * as part of the DARPA CHATS research program. * * This software was developed at the University of Cambridge Computer * Laboratory with support from a grant from Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * XXXRW: struct ifnet locking is incomplete in the network code, so we use * our own global mutex for struct ifnet. Non-ideal, but should help in the * SMP environment. */ struct mtx mac_ifnet_mtx; MTX_SYSINIT(mac_ifnet_mtx, &mac_ifnet_mtx, "mac_ifnet", MTX_DEF); /* * Retrieve the label associated with an mbuf by searching for the tag. * Depending on the value of mac_labelmbufs, it's possible that a label will * not be present, in which case NULL is returned. Policies must handle the * possibility of an mbuf not having label storage if they do not enforce * early loading. */ struct label * mac_mbuf_to_label(struct mbuf *m) { struct m_tag *tag; struct label *label; if (m == NULL) return (NULL); tag = m_tag_find(m, PACKET_TAG_MACLABEL, NULL); if (tag == NULL) return (NULL); label = (struct label *)(tag+1); return (label); } static struct label * mac_bpfdesc_label_alloc(void) { struct label *label; label = mac_labelzone_alloc(M_WAITOK); MAC_POLICY_PERFORM(bpfdesc_init_label, label); return (label); } void mac_bpfdesc_init(struct bpf_d *d) { if (mac_labeled & MPC_OBJECT_BPFDESC) d->bd_label = mac_bpfdesc_label_alloc(); else d->bd_label = NULL; } static struct label * mac_ifnet_label_alloc(void) { struct label *label; label = mac_labelzone_alloc(M_WAITOK); MAC_POLICY_PERFORM(ifnet_init_label, label); return (label); } void mac_ifnet_init(struct ifnet *ifp) { if (mac_labeled & MPC_OBJECT_IFNET) ifp->if_label = mac_ifnet_label_alloc(); else ifp->if_label = NULL; } int mac_mbuf_tag_init(struct m_tag *tag, int flag) { struct label *label; int error; label = (struct label *) (tag + 1); mac_init_label(label); if (flag & M_WAITOK) MAC_POLICY_CHECK(mbuf_init_label, label, flag); else MAC_POLICY_CHECK_NOSLEEP(mbuf_init_label, label, flag); if (error) { MAC_POLICY_PERFORM_NOSLEEP(mbuf_destroy_label, label); mac_destroy_label(label); } return (error); } int mac_mbuf_init(struct mbuf *m, int flag) { struct m_tag *tag; int error; M_ASSERTPKTHDR(m); if (mac_labeled & MPC_OBJECT_MBUF) { tag = m_tag_get(PACKET_TAG_MACLABEL, sizeof(struct label), flag); if (tag == NULL) return (ENOMEM); error = mac_mbuf_tag_init(tag, flag); if (error) { m_tag_free(tag); return (error); } m_tag_prepend(m, tag); } return (0); } static void mac_bpfdesc_label_free(struct label *label) { MAC_POLICY_PERFORM_NOSLEEP(bpfdesc_destroy_label, label); mac_labelzone_free(label); } void mac_bpfdesc_destroy(struct bpf_d *d) { if (d->bd_label != NULL) { mac_bpfdesc_label_free(d->bd_label); d->bd_label = NULL; } } static void mac_ifnet_label_free(struct label *label) { MAC_POLICY_PERFORM_NOSLEEP(ifnet_destroy_label, label); mac_labelzone_free(label); } void mac_ifnet_destroy(struct ifnet *ifp) { if (ifp->if_label != NULL) { mac_ifnet_label_free(ifp->if_label); ifp->if_label = NULL; } } void mac_mbuf_tag_destroy(struct m_tag *tag) { struct label *label; label = (struct label *)(tag+1); MAC_POLICY_PERFORM_NOSLEEP(mbuf_destroy_label, label); mac_destroy_label(label); } /* * mac_mbuf_tag_copy is called when an mbuf header is duplicated, in which * case the labels must also be duplicated. */ void mac_mbuf_tag_copy(struct m_tag *src, struct m_tag *dest) { struct label *src_label, *dest_label; src_label = (struct label *)(src+1); dest_label = (struct label *)(dest+1); /* * mac_mbuf_tag_init() is called on the target tag in m_tag_copy(), * so we don't need to call it here. */ MAC_POLICY_PERFORM_NOSLEEP(mbuf_copy_label, src_label, dest_label); } void mac_mbuf_copy(struct mbuf *m_from, struct mbuf *m_to) { struct label *src_label, *dest_label; if (mac_policy_count == 0) return; src_label = mac_mbuf_to_label(m_from); dest_label = mac_mbuf_to_label(m_to); MAC_POLICY_PERFORM_NOSLEEP(mbuf_copy_label, src_label, dest_label); } static void mac_ifnet_copy_label(struct label *src, struct label *dest) { MAC_POLICY_PERFORM_NOSLEEP(ifnet_copy_label, src, dest); } static int mac_ifnet_externalize_label(struct label *label, char *elements, char *outbuf, size_t outbuflen) { int error; MAC_POLICY_EXTERNALIZE(ifnet, label, elements, outbuf, outbuflen); return (error); } static int mac_ifnet_internalize_label(struct label *label, char *string) { int error; MAC_POLICY_INTERNALIZE(ifnet, label, string); return (error); } void mac_ifnet_create(struct ifnet *ifp) { if (mac_policy_count == 0) return; MAC_IFNET_LOCK(ifp); MAC_POLICY_PERFORM_NOSLEEP(ifnet_create, ifp, ifp->if_label); MAC_IFNET_UNLOCK(ifp); } void mac_bpfdesc_create(struct ucred *cred, struct bpf_d *d) { MAC_POLICY_PERFORM_NOSLEEP(bpfdesc_create, cred, d, d->bd_label); } void mac_bpfdesc_create_mbuf(struct bpf_d *d, struct mbuf *m) { struct label *label; /* Assume reader lock is enough. */ BPFD_LOCK_ASSERT(d); if (mac_policy_count == 0) return; label = mac_mbuf_to_label(m); MAC_POLICY_PERFORM_NOSLEEP(bpfdesc_create_mbuf, d, d->bd_label, m, label); } void mac_ifnet_create_mbuf(struct ifnet *ifp, struct mbuf *m) { struct label *label; if (mac_policy_count == 0) return; label = mac_mbuf_to_label(m); MAC_IFNET_LOCK(ifp); MAC_POLICY_PERFORM_NOSLEEP(ifnet_create_mbuf, ifp, ifp->if_label, m, label); MAC_IFNET_UNLOCK(ifp); } MAC_CHECK_PROBE_DEFINE2(bpfdesc_check_receive, "struct bpf_d *", "struct ifnet *"); int mac_bpfdesc_check_receive(struct bpf_d *d, struct ifnet *ifp) { int error; /* Assume reader lock is enough. */ BPFD_LOCK_ASSERT(d); if (mac_policy_count == 0) return (0); MAC_IFNET_LOCK(ifp); MAC_POLICY_CHECK_NOSLEEP(bpfdesc_check_receive, d, d->bd_label, ifp, ifp->if_label); MAC_CHECK_PROBE2(bpfdesc_check_receive, error, d, ifp); MAC_IFNET_UNLOCK(ifp); return (error); } MAC_CHECK_PROBE_DEFINE2(ifnet_check_transmit, "struct ifnet *", "struct mbuf *"); int mac_ifnet_check_transmit(struct ifnet *ifp, struct mbuf *m) { struct label *label; int error; M_ASSERTPKTHDR(m); if (mac_policy_count == 0) return (0); label = mac_mbuf_to_label(m); MAC_IFNET_LOCK(ifp); MAC_POLICY_CHECK_NOSLEEP(ifnet_check_transmit, ifp, ifp->if_label, m, label); MAC_CHECK_PROBE2(ifnet_check_transmit, error, ifp, m); MAC_IFNET_UNLOCK(ifp); return (error); } int mac_ifnet_ioctl_get(struct ucred *cred, struct ifreq *ifr, struct ifnet *ifp) { char *elements, *buffer; struct label *intlabel; struct mac mac; int error; if (!(mac_labeled & MPC_OBJECT_IFNET)) return (EINVAL); - error = copyin(ifr->ifr_ifru.ifru_data, &mac, sizeof(mac)); + error = copyin(ifr_data_get_ptr(ifr), &mac, sizeof(mac)); if (error) return (error); error = mac_check_structmac_consistent(&mac); if (error) return (error); elements = malloc(mac.m_buflen, M_MACTEMP, M_WAITOK); error = copyinstr(mac.m_string, elements, mac.m_buflen, NULL); if (error) { free(elements, M_MACTEMP); return (error); } buffer = malloc(mac.m_buflen, M_MACTEMP, M_WAITOK | M_ZERO); intlabel = mac_ifnet_label_alloc(); MAC_IFNET_LOCK(ifp); mac_ifnet_copy_label(ifp->if_label, intlabel); MAC_IFNET_UNLOCK(ifp); error = mac_ifnet_externalize_label(intlabel, elements, buffer, mac.m_buflen); mac_ifnet_label_free(intlabel); if (error == 0) error = copyout(buffer, mac.m_string, strlen(buffer)+1); free(buffer, M_MACTEMP); free(elements, M_MACTEMP); return (error); } int mac_ifnet_ioctl_set(struct ucred *cred, struct ifreq *ifr, struct ifnet *ifp) { struct label *intlabel; struct mac mac; char *buffer; int error; if (!(mac_labeled & MPC_OBJECT_IFNET)) return (EINVAL); - error = copyin(ifr->ifr_ifru.ifru_data, &mac, sizeof(mac)); + error = copyin(ifr_data_get_ptr(ifr), &mac, sizeof(mac)); if (error) return (error); error = mac_check_structmac_consistent(&mac); if (error) return (error); buffer = malloc(mac.m_buflen, M_MACTEMP, M_WAITOK); error = copyinstr(mac.m_string, buffer, mac.m_buflen, NULL); if (error) { free(buffer, M_MACTEMP); return (error); } intlabel = mac_ifnet_label_alloc(); error = mac_ifnet_internalize_label(intlabel, buffer); free(buffer, M_MACTEMP); if (error) { mac_ifnet_label_free(intlabel); return (error); } /* * XXX: Note that this is a redundant privilege check, since policies * impose this check themselves if required by the policy * Eventually, this should go away. */ error = priv_check_cred(cred, PRIV_NET_SETIFMAC, 0); if (error) { mac_ifnet_label_free(intlabel); return (error); } MAC_IFNET_LOCK(ifp); MAC_POLICY_CHECK_NOSLEEP(ifnet_check_relabel, cred, ifp, ifp->if_label, intlabel); if (error) { MAC_IFNET_UNLOCK(ifp); mac_ifnet_label_free(intlabel); return (error); } MAC_POLICY_PERFORM_NOSLEEP(ifnet_relabel, cred, ifp, ifp->if_label, intlabel); MAC_IFNET_UNLOCK(ifp); mac_ifnet_label_free(intlabel); return (0); } Index: stable/11 =================================================================== --- stable/11 (revision 332287) +++ stable/11 (revision 332288) Property changes on: stable/11 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r331797