Index: head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c =================================================================== --- head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c (revision 305524) +++ head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c (revision 305525) @@ -1,3095 +1,3078 @@ /*- * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 2004-2006 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_inet6.h" #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "vmbus_if.h" /* Short for Hyper-V network interface */ #define NETVSC_DEVNAME "hn" /* * It looks like offset 0 of buf is reserved to hold the softc pointer. * The sc pointer evidently not needed, and is not presently populated. * The packet offset is where the netvsc_packet starts in the buffer. */ #define HV_NV_SC_PTR_OFFSET_IN_BUF 0 #define HV_NV_PACKET_OFFSET_IN_BUF 16 /* YYY should get it from the underlying channel */ #define HN_TX_DESC_CNT 512 #define HN_LROENT_CNT_DEF 128 #define HN_RING_CNT_DEF_MAX 8 -#define HN_RNDIS_PKT_LEN \ - (sizeof(struct rndis_packet_msg) + \ - RNDIS_HASHVAL_PPI_SIZE + \ - RNDIS_VLAN_PPI_SIZE + \ - RNDIS_TSO_PPI_SIZE + \ - RNDIS_CSUM_PPI_SIZE) +#define HN_RNDIS_PKT_LEN \ + (sizeof(struct rndis_packet_msg) + \ + HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \ + HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ + HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ + HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE #define HN_TX_DATA_BOUNDARY PAGE_SIZE #define HN_TX_DATA_MAXSIZE IP_MAXPACKET #define HN_TX_DATA_SEGSIZE PAGE_SIZE /* -1 for RNDIS packet message */ #define HN_TX_DATA_SEGCNT_MAX (NETVSC_PACKET_MAXPAGE - 1) #define HN_DIRECT_TX_SIZE_DEF 128 #define HN_EARLY_TXEOF_THRESH 8 struct hn_txdesc { #ifndef HN_USE_TXDESC_BUFRING SLIST_ENTRY(hn_txdesc) link; #endif struct mbuf *m; struct hn_tx_ring *txr; int refs; uint32_t flags; /* HN_TXD_FLAG_ */ struct hn_send_ctx send_ctx; bus_dmamap_t data_dmap; bus_addr_t rndis_pkt_paddr; struct rndis_packet_msg *rndis_pkt; bus_dmamap_t rndis_pkt_dmap; }; #define HN_TXD_FLAG_ONLIST 0x1 #define HN_TXD_FLAG_DMAMAP 0x2 /* * Only enable UDP checksum offloading when it is on 2012R2 or * later. UDP checksum offloading doesn't work on earlier * Windows releases. */ #define HN_CSUM_ASSIST_WIN8 (CSUM_IP | CSUM_TCP) #define HN_CSUM_ASSIST (CSUM_IP | CSUM_UDP | CSUM_TCP) #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU) #define HN_LRO_LENLIM_DEF (25 * ETHERMTU) /* YYY 2*MTU is a bit rough, but should be good enough. */ #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu) #define HN_LRO_ACKCNT_DEF 1 /* * Be aware that this sleepable mutex will exhibit WITNESS errors when * certain TCP and ARP code paths are taken. This appears to be a * well-known condition, as all other drivers checked use a sleeping * mutex to protect their transmit paths. * Also Be aware that mutexes do not play well with semaphores, and there * is a conflicting semaphore in a certain channel code path. */ #define NV_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->hn_lock, _name, MTX_NETWORK_LOCK, MTX_DEF) #define NV_LOCK(_sc) mtx_lock(&(_sc)->hn_lock) #define NV_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->hn_lock, MA_OWNED) #define NV_UNLOCK(_sc) mtx_unlock(&(_sc)->hn_lock) #define NV_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->hn_lock) /* * Globals */ int hv_promisc_mode = 0; /* normal mode by default */ SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Hyper-V network interface"); /* Trust tcp segements verification on host side. */ static int hn_trust_hosttcp = 1; SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN, &hn_trust_hosttcp, 0, "Trust tcp segement verification on host side, " "when csum info is missing (global setting)"); /* Trust udp datagrams verification on host side. */ static int hn_trust_hostudp = 1; SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN, &hn_trust_hostudp, 0, "Trust udp datagram verification on host side, " "when csum info is missing (global setting)"); /* Trust ip packets verification on host side. */ static int hn_trust_hostip = 1; SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN, &hn_trust_hostip, 0, "Trust ip packet verification on host side, " "when csum info is missing (global setting)"); #if __FreeBSD_version >= 1100045 /* Limit TSO burst size */ static int hn_tso_maxlen = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, &hn_tso_maxlen, 0, "TSO burst limit"); #endif /* Limit chimney send size */ static int hn_tx_chimney_size = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN, &hn_tx_chimney_size, 0, "Chimney send packet size limit"); /* Limit the size of packet for direct transmission */ static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF; SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN, &hn_direct_tx_size, 0, "Size of the packet for direct transmission"); #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 static int hn_lro_entry_count = HN_LROENT_CNT_DEF; SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, &hn_lro_entry_count, 0, "LRO entry count"); #endif #endif static int hn_share_tx_taskq = 0; SYSCTL_INT(_hw_hn, OID_AUTO, share_tx_taskq, CTLFLAG_RDTUN, &hn_share_tx_taskq, 0, "Enable shared TX taskqueue"); static struct taskqueue *hn_tx_taskq; #ifndef HN_USE_TXDESC_BUFRING static int hn_use_txdesc_bufring = 0; #else static int hn_use_txdesc_bufring = 1; #endif SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD, &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors"); static int hn_bind_tx_taskq = -1; SYSCTL_INT(_hw_hn, OID_AUTO, bind_tx_taskq, CTLFLAG_RDTUN, &hn_bind_tx_taskq, 0, "Bind TX taskqueue to the specified cpu"); static int hn_use_if_start = 0; SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN, &hn_use_if_start, 0, "Use if_start TX method"); static int hn_chan_cnt = 0; SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN, &hn_chan_cnt, 0, "# of channels to use; each channel has one RX ring and one TX ring"); static int hn_tx_ring_cnt = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN, &hn_tx_ring_cnt, 0, "# of TX rings to use"); static int hn_tx_swq_depth = 0; SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN, &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING"); #if __FreeBSD_version >= 1100095 static u_int hn_lro_mbufq_depth = 0; SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue"); #endif static u_int hn_cpu_index; /* * Forward declarations */ static void hn_stop(hn_softc_t *sc); static void hn_ifinit_locked(hn_softc_t *sc); static void hn_ifinit(void *xsc); static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); static int hn_start_locked(struct hn_tx_ring *txr, int len); static void hn_start(struct ifnet *ifp); static void hn_start_txeof(struct hn_tx_ring *); static int hn_ifmedia_upd(struct ifnet *ifp); static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); #if __FreeBSD_version >= 1100099 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS); #endif static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS); static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS); static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS); static int hn_check_iplen(const struct mbuf *, int); static int hn_create_tx_ring(struct hn_softc *, int); static void hn_destroy_tx_ring(struct hn_tx_ring *); static int hn_create_tx_data(struct hn_softc *, int); static void hn_destroy_tx_data(struct hn_softc *); static void hn_start_taskfunc(void *, int); static void hn_start_txeof_taskfunc(void *, int); static void hn_stop_tx_tasks(struct hn_softc *); static int hn_encap(struct hn_tx_ring *, struct hn_txdesc *, struct mbuf **); static int hn_create_rx_data(struct hn_softc *sc, int); static void hn_destroy_rx_data(struct hn_softc *sc); static void hn_set_chim_size(struct hn_softc *, int); static void hn_channel_attach(struct hn_softc *, struct vmbus_channel *); static void hn_subchan_attach(struct hn_softc *, struct vmbus_channel *); static void hn_subchan_setup(struct hn_softc *); static int hn_transmit(struct ifnet *, struct mbuf *); static void hn_xmit_qflush(struct ifnet *); static int hn_xmit(struct hn_tx_ring *, int); static void hn_xmit_txeof(struct hn_tx_ring *); static void hn_xmit_taskfunc(void *, int); static void hn_xmit_txeof_taskfunc(void *, int); #if __FreeBSD_version >= 1100099 static void hn_set_lro_lenlim(struct hn_softc *sc, int lenlim) { int i; for (i = 0; i < sc->hn_rx_ring_inuse; ++i) sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim; } #endif static int hn_get_txswq_depth(const struct hn_tx_ring *txr) { KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet")); if (hn_tx_swq_depth < txr->hn_txdesc_cnt) return txr->hn_txdesc_cnt; return hn_tx_swq_depth; } static int hn_ifmedia_upd(struct ifnet *ifp __unused) { return EOPNOTSUPP; } static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) { struct hn_softc *sc = ifp->if_softc; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (!sc->hn_carrier) { ifmr->ifm_active |= IFM_NONE; return; } ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= IFM_10G_T | IFM_FDX; } /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */ static const struct hyperv_guid g_net_vsc_device_type = { .hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E} }; /* * Standard probe entry point. * */ static int netvsc_probe(device_t dev) { if (VMBUS_PROBE_GUID(device_get_parent(dev), dev, &g_net_vsc_device_type) == 0) { device_set_desc(dev, "Hyper-V Network Interface"); return BUS_PROBE_DEFAULT; } return ENXIO; } /* * Standard attach entry point. * * Called when the driver is loaded. It allocates needed resources, * and initializes the "hardware" and software. */ static int netvsc_attach(device_t dev) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; netvsc_device_info device_info; hn_softc_t *sc; int unit = device_get_unit(dev); struct ifnet *ifp = NULL; int error, ring_cnt, tx_ring_cnt; #if __FreeBSD_version >= 1100045 int tso_maxlen; #endif sc = device_get_softc(dev); sc->hn_unit = unit; sc->hn_dev = dev; sc->hn_prichan = vmbus_get_channel(dev); if (hn_tx_taskq == NULL) { sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK, taskqueue_thread_enqueue, &sc->hn_tx_taskq); if (hn_bind_tx_taskq >= 0) { int cpu = hn_bind_tx_taskq; cpuset_t cpu_set; if (cpu > mp_ncpus - 1) cpu = mp_ncpus - 1; CPU_SETOF(cpu, &cpu_set); taskqueue_start_threads_cpuset(&sc->hn_tx_taskq, 1, PI_NET, &cpu_set, "%s tx", device_get_nameunit(dev)); } else { taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET, "%s tx", device_get_nameunit(dev)); } } else { sc->hn_tx_taskq = hn_tx_taskq; } NV_LOCK_INIT(sc, "NetVSCLock"); ifp = sc->hn_ifp = if_alloc(IFT_ETHER); ifp->if_softc = sc; if_initname(ifp, device_get_name(dev), device_get_unit(dev)); /* * Figure out the # of RX rings (ring_cnt) and the # of TX rings * to use (tx_ring_cnt). * * NOTE: * The # of RX rings to use is same as the # of channels to use. */ ring_cnt = hn_chan_cnt; if (ring_cnt <= 0) { /* Default */ ring_cnt = mp_ncpus; if (ring_cnt > HN_RING_CNT_DEF_MAX) ring_cnt = HN_RING_CNT_DEF_MAX; } else if (ring_cnt > mp_ncpus) { ring_cnt = mp_ncpus; } tx_ring_cnt = hn_tx_ring_cnt; if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt) tx_ring_cnt = ring_cnt; if (hn_use_if_start) { /* ifnet.if_start only needs one TX ring. */ tx_ring_cnt = 1; } /* * Set the leader CPU for channels. */ sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus; error = hn_create_tx_data(sc, tx_ring_cnt); if (error) goto failed; error = hn_create_rx_data(sc, ring_cnt); if (error) goto failed; /* * Associate the first TX/RX ring w/ the primary channel. */ hn_channel_attach(sc, sc->hn_prichan); ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = hn_ioctl; ifp->if_init = hn_ifinit; /* needed by hv_rf_on_device_add() code */ ifp->if_mtu = ETHERMTU; if (hn_use_if_start) { int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]); ifp->if_start = hn_start; IFQ_SET_MAXLEN(&ifp->if_snd, qdepth); ifp->if_snd.ifq_drv_maxlen = qdepth - 1; IFQ_SET_READY(&ifp->if_snd); } else { ifp->if_transmit = hn_transmit; ifp->if_qflush = hn_xmit_qflush; } ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); /* XXX ifmedia_set really should do this for us */ sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; /* * Tell upper layers that we support full VLAN capability. */ ifp->if_hdrlen = sizeof(struct ether_vlan_header); ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO | IFCAP_LRO; ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO | IFCAP_LRO; ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist | CSUM_TSO; sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev), HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0); if (sc->hn_xact == NULL) goto failed; error = hv_rf_on_device_add(sc, &device_info, &ring_cnt, &sc->hn_rx_ring[0]); if (error) goto failed; KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_inuse, ("invalid channel count %d, should be less than %d", ring_cnt, sc->hn_rx_ring_inuse)); /* * Set the # of TX/RX rings that could be used according to * the # of channels that host offered. */ if (sc->hn_tx_ring_inuse > ring_cnt) sc->hn_tx_ring_inuse = ring_cnt; sc->hn_rx_ring_inuse = ring_cnt; device_printf(dev, "%d TX ring, %d RX ring\n", sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse); if (sc->hn_rx_ring_inuse > 1) hn_subchan_setup(sc); #if __FreeBSD_version >= 1100099 if (sc->hn_rx_ring_inuse > 1) { /* * Reduce TCP segment aggregation limit for multiple * RX rings to increase ACK timeliness. */ hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF); } #endif if (device_info.link_state == NDIS_MEDIA_STATE_CONNECTED) { sc->hn_carrier = 1; } #if __FreeBSD_version >= 1100045 tso_maxlen = hn_tso_maxlen; if (tso_maxlen <= 0 || tso_maxlen > IP_MAXPACKET) tso_maxlen = IP_MAXPACKET; ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; ifp->if_hw_tsomaxsegsize = PAGE_SIZE; ifp->if_hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); #endif ether_ifattach(ifp, device_info.mac_addr); #if __FreeBSD_version >= 1100045 if_printf(ifp, "TSO: %u/%u/%u\n", ifp->if_hw_tsomax, ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); #endif hn_set_chim_size(sc, sc->hn_chim_szmax); if (hn_tx_chimney_size > 0 && hn_tx_chimney_size < sc->hn_chim_szmax) hn_set_chim_size(sc, hn_tx_chimney_size); ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD, &sc->hn_nvs_ver, 0, "NVS version"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_ndis_version_sysctl, "A", "NDIS version"); return (0); failed: hn_destroy_tx_data(sc); if (ifp != NULL) if_free(ifp); return (error); } /* * Standard detach entry point */ static int netvsc_detach(device_t dev) { struct hn_softc *sc = device_get_softc(dev); if (bootverbose) printf("netvsc_detach\n"); /* * XXXKYS: Need to clean up all our * driver state; this is the driver * unloading. */ /* * XXXKYS: Need to stop outgoing traffic and unregister * the netdevice. */ hv_rf_on_device_remove(sc); hn_stop_tx_tasks(sc); ifmedia_removeall(&sc->hn_media); hn_destroy_rx_data(sc); hn_destroy_tx_data(sc); if (sc->hn_tx_taskq != hn_tx_taskq) taskqueue_free(sc->hn_tx_taskq); vmbus_xact_ctx_destroy(sc->hn_xact); return (0); } /* * Standard shutdown entry point */ static int netvsc_shutdown(device_t dev) { return (0); } static __inline int hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) { struct mbuf *m = *m_head; int error; error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { struct mbuf *m_new; m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); if (m_new == NULL) return ENOBUFS; else *m_head = m = m_new; txr->hn_tx_collapsed++; error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); } if (!error) { bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, BUS_DMASYNC_PREWRITE); txd->flags |= HN_TXD_FLAG_DMAMAP; } return error; } static __inline void hn_txdesc_dmamap_unload(struct hn_tx_ring *txr, struct hn_txdesc *txd) { if (txd->flags & HN_TXD_FLAG_DMAMAP) { bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(txr->hn_tx_data_dtag, txd->data_dmap); txd->flags &= ~HN_TXD_FLAG_DMAMAP; } } static __inline int hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd) { KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, ("put an onlist txd %#x", txd->flags)); KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); if (atomic_fetchadd_int(&txd->refs, -1) != 1) return 0; hn_txdesc_dmamap_unload(txr, txd); if (txd->m != NULL) { m_freem(txd->m); txd->m = NULL; } txd->flags |= HN_TXD_FLAG_ONLIST; #ifndef HN_USE_TXDESC_BUFRING mtx_lock_spin(&txr->hn_txlist_spin); KASSERT(txr->hn_txdesc_avail >= 0 && txr->hn_txdesc_avail < txr->hn_txdesc_cnt, ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail)); txr->hn_txdesc_avail++; SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); mtx_unlock_spin(&txr->hn_txlist_spin); #else atomic_add_int(&txr->hn_txdesc_avail, 1); buf_ring_enqueue(txr->hn_txdesc_br, txd); #endif return 1; } static __inline struct hn_txdesc * hn_txdesc_get(struct hn_tx_ring *txr) { struct hn_txdesc *txd; #ifndef HN_USE_TXDESC_BUFRING mtx_lock_spin(&txr->hn_txlist_spin); txd = SLIST_FIRST(&txr->hn_txlist); if (txd != NULL) { KASSERT(txr->hn_txdesc_avail > 0, ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail)); txr->hn_txdesc_avail--; SLIST_REMOVE_HEAD(&txr->hn_txlist, link); } mtx_unlock_spin(&txr->hn_txlist_spin); #else txd = buf_ring_dequeue_sc(txr->hn_txdesc_br); #endif if (txd != NULL) { #ifdef HN_USE_TXDESC_BUFRING atomic_subtract_int(&txr->hn_txdesc_avail, 1); #endif KASSERT(txd->m == NULL && txd->refs == 0 && (txd->flags & HN_TXD_FLAG_ONLIST), ("invalid txd")); txd->flags &= ~HN_TXD_FLAG_ONLIST; txd->refs = 1; } return txd; } static __inline void hn_txdesc_hold(struct hn_txdesc *txd) { /* 0->1 transition will never work */ KASSERT(txd->refs > 0, ("invalid refs %d", txd->refs)); atomic_add_int(&txd->refs, 1); } static __inline void hn_txeof(struct hn_tx_ring *txr) { txr->hn_has_txeof = 0; txr->hn_txeof(txr); } static void hn_tx_done(struct hn_send_ctx *sndc, struct hn_softc *sc, struct vmbus_channel *chan, const void *data __unused, int dlen __unused) { struct hn_txdesc *txd = sndc->hn_cbarg; struct hn_tx_ring *txr; if (sndc->hn_chim_idx != HN_NVS_CHIM_IDX_INVALID) hn_chim_free(sc, sndc->hn_chim_idx); txr = txd->txr; KASSERT(txr->hn_chan == chan, ("channel mismatch, on chan%u, should be chan%u", vmbus_chan_subidx(chan), vmbus_chan_subidx(txr->hn_chan))); txr->hn_has_txeof = 1; hn_txdesc_put(txr, txd); ++txr->hn_txdone_cnt; if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) { txr->hn_txdone_cnt = 0; if (txr->hn_oactive) hn_txeof(txr); } } void netvsc_channel_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr) { #if defined(INET) || defined(INET6) tcp_lro_flush_all(&rxr->hn_lro); #endif /* * NOTE: * 'txr' could be NULL, if multiple channels and * ifnet.if_start method are enabled. */ if (txr == NULL || !txr->hn_has_txeof) return; txr->hn_txdone_cnt = 0; hn_txeof(txr); } static __inline uint32_t hn_rndis_pktmsg_offset(uint32_t ofs) { KASSERT(ofs >= sizeof(struct rndis_packet_msg), ("invalid RNDIS packet msg offset %u", ofs)); return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset)); } /* * NOTE: * If this function fails, then both txd and m_head0 will be freed. */ static int hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0) { bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; int error, nsegs, i; struct mbuf *m_head = *m_head0; struct rndis_packet_msg *pkt; - rndis_per_packet_info *rppi; - struct rndis_hash_value *hash_value; uint32_t send_buf_section_idx; int send_buf_section_size, pktlen; + uint32_t *pi_data; /* * extension points to the area reserved for the * rndis_filter_packet, which is placed just after * the netvsc_packet (and rppi struct, if present; * length is updated later). */ pkt = txd->rndis_pkt; pkt->rm_type = REMOTE_NDIS_PACKET_MSG; pkt->rm_len = sizeof(*pkt) + m_head->m_pkthdr.len; pkt->rm_dataoffset = sizeof(*pkt); pkt->rm_datalen = m_head->m_pkthdr.len; pkt->rm_pktinfooffset = sizeof(*pkt); pkt->rm_pktinfolen = 0; /* * Set the hash value for this packet, so that the host could * dispatch the TX done event for this packet back to this TX * ring's channel. */ - rppi = hv_set_rppi_data(pkt, RNDIS_HASHVAL_PPI_SIZE, - nbl_hash_value); - hash_value = (struct rndis_hash_value *)((uint8_t *)rppi + - rppi->per_packet_info_offset); - hash_value->hash_value = txr->hn_tx_idx; + pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, + HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL); + *pi_data = txr->hn_tx_idx; if (m_head->m_flags & M_VLANTAG) { - ndis_8021q_info *rppi_vlan_info; - - rppi = hv_set_rppi_data(pkt, RNDIS_VLAN_PPI_SIZE, - ieee_8021q_info); - - rppi_vlan_info = (ndis_8021q_info *)((uint8_t *)rppi + - rppi->per_packet_info_offset); - rppi_vlan_info->u1.value = NDIS_VLAN_INFO_MAKE( + pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, + NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN); + *pi_data = NDIS_VLAN_INFO_MAKE( EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag), EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag), EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag)); } if (m_head->m_pkthdr.csum_flags & CSUM_TSO) { #if defined(INET6) || defined(INET) - rndis_tcp_tso_info *tso_info; struct ether_vlan_header *eh; int ether_len; /* * XXX need m_pullup and use mtodo */ eh = mtod(m_head, struct ether_vlan_header*); if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN; else ether_len = ETHER_HDR_LEN; - rppi = hv_set_rppi_data(pkt, RNDIS_TSO_PPI_SIZE, - tcp_large_send_info); - - tso_info = (rndis_tcp_tso_info *)((uint8_t *)rppi + - rppi->per_packet_info_offset); - + pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, + NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO); #ifdef INET if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) { struct ip *ip = (struct ip *)(m_head->m_data + ether_len); unsigned long iph_len = ip->ip_hl << 2; struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iph_len); ip->ip_len = 0; ip->ip_sum = 0; th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(IPPROTO_TCP)); - tso_info->value = NDIS_LSO2_INFO_MAKEIPV4(0, + *pi_data = NDIS_LSO2_INFO_MAKEIPV4(0, m_head->m_pkthdr.tso_segsz); } #endif #if defined(INET6) && defined(INET) else #endif #ifdef INET6 { struct ip6_hdr *ip6 = (struct ip6_hdr *) (m_head->m_data + ether_len); struct tcphdr *th = (struct tcphdr *)(ip6 + 1); ip6->ip6_plen = 0; th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0); - tso_info->value = NDIS_LSO2_INFO_MAKEIPV6(0, + *pi_data = NDIS_LSO2_INFO_MAKEIPV6(0, m_head->m_pkthdr.tso_segsz); } #endif #endif /* INET6 || INET */ } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) { - rndis_tcp_ip_csum_info *csum_info; + pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN, + NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM); + *pi_data = NDIS_TXCSUM_INFO_IPV4; - rppi = hv_set_rppi_data(pkt, RNDIS_CSUM_PPI_SIZE, - tcpip_chksum_info); - csum_info = (rndis_tcp_ip_csum_info *)((uint8_t *)rppi + - rppi->per_packet_info_offset); - - csum_info->value = NDIS_TXCSUM_INFO_IPV4; if (m_head->m_pkthdr.csum_flags & CSUM_IP) - csum_info->value |= NDIS_TXCSUM_INFO_IPCS; + *pi_data |= NDIS_TXCSUM_INFO_IPCS; if (m_head->m_pkthdr.csum_flags & CSUM_TCP) - csum_info->value |= NDIS_TXCSUM_INFO_TCPCS; + *pi_data |= NDIS_TXCSUM_INFO_TCPCS; else if (m_head->m_pkthdr.csum_flags & CSUM_UDP) - csum_info->value |= NDIS_TXCSUM_INFO_UDPCS; + *pi_data |= NDIS_TXCSUM_INFO_UDPCS; } pktlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen; /* Convert RNDIS packet message offsets */ pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt->rm_dataoffset); pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset); /* * Chimney send, if the packet could fit into one chimney buffer. */ if (pkt->rm_len < txr->hn_chim_size) { txr->hn_tx_chimney_tried++; send_buf_section_idx = hn_chim_alloc(txr->hn_sc); if (send_buf_section_idx != HN_NVS_CHIM_IDX_INVALID) { uint8_t *dest = txr->hn_sc->hn_chim + (send_buf_section_idx * txr->hn_sc->hn_chim_szmax); memcpy(dest, pkt, pktlen); dest += pktlen; m_copydata(m_head, 0, m_head->m_pkthdr.len, dest); send_buf_section_size = pkt->rm_len; txr->hn_gpa_cnt = 0; txr->hn_tx_chimney++; goto done; } } error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs); if (error) { int freed; /* * This mbuf is not linked w/ the txd yet, so free it now. */ m_freem(m_head); *m_head0 = NULL; freed = hn_txdesc_put(txr, txd); KASSERT(freed != 0, ("fail to free txd upon txdma error")); txr->hn_txdma_failed++; if_inc_counter(txr->hn_sc->hn_ifp, IFCOUNTER_OERRORS, 1); return error; } *m_head0 = m_head; /* +1 RNDIS packet message */ txr->hn_gpa_cnt = nsegs + 1; /* send packet with page buffer */ txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr); txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK; txr->hn_gpa[0].gpa_len = pktlen; /* * Fill the page buffers with mbuf info after the page * buffer for RNDIS packet message. */ for (i = 0; i < nsegs; ++i) { struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1]; gpa->gpa_page = atop(segs[i].ds_addr); gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK; gpa->gpa_len = segs[i].ds_len; } send_buf_section_idx = HN_NVS_CHIM_IDX_INVALID; send_buf_section_size = 0; done: txd->m = m_head; /* Set the completion routine */ hn_send_ctx_init(&txd->send_ctx, hn_tx_done, txd, send_buf_section_idx, send_buf_section_size); return 0; } /* * NOTE: * If this function fails, then txd will be freed, but the mbuf * associated w/ the txd will _not_ be freed. */ static int hn_send_pkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd) { int error, send_failed = 0; again: /* * Make sure that txd is not freed before ETHER_BPF_MTAP. */ hn_txdesc_hold(txd); error = hv_nv_on_send(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA, &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt); if (!error) { ETHER_BPF_MTAP(ifp, txd->m); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); if (!hn_use_if_start) { if_inc_counter(ifp, IFCOUNTER_OBYTES, txd->m->m_pkthdr.len); if (txd->m->m_flags & M_MCAST) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); } txr->hn_pkts++; } hn_txdesc_put(txr, txd); if (__predict_false(error)) { int freed; /* * This should "really rarely" happen. * * XXX Too many RX to be acked or too many sideband * commands to run? Ask netvsc_channel_rollup() * to kick start later. */ txr->hn_has_txeof = 1; if (!send_failed) { txr->hn_send_failed++; send_failed = 1; /* * Try sending again after set hn_has_txeof; * in case that we missed the last * netvsc_channel_rollup(). */ goto again; } if_printf(ifp, "send failed\n"); /* * Caller will perform further processing on the * associated mbuf, so don't free it in hn_txdesc_put(); * only unload it from the DMA map in hn_txdesc_put(), * if it was loaded. */ txd->m = NULL; freed = hn_txdesc_put(txr, txd); KASSERT(freed != 0, ("fail to free txd upon send error")); txr->hn_send_failed++; } return error; } /* * Start a transmit of one or more packets */ static int hn_start_locked(struct hn_tx_ring *txr, int len) { struct hn_softc *sc = txr->hn_sc; struct ifnet *ifp = sc->hn_ifp; KASSERT(hn_use_if_start, ("hn_start_locked is called, when if_start is disabled")); KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); mtx_assert(&txr->hn_tx_lock, MA_OWNED); if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return 0; while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { struct hn_txdesc *txd; struct mbuf *m_head; int error; IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); if (m_head == NULL) break; if (len > 0 && m_head->m_pkthdr.len > len) { /* * This sending could be time consuming; let callers * dispatch this packet sending (and sending of any * following up packets) to tx taskqueue. */ IFQ_DRV_PREPEND(&ifp->if_snd, m_head); return 1; } txd = hn_txdesc_get(txr); if (txd == NULL) { txr->hn_no_txdescs++; IFQ_DRV_PREPEND(&ifp->if_snd, m_head); atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); break; } error = hn_encap(txr, txd, &m_head); if (error) { /* Both txd and m_head are freed */ continue; } error = hn_send_pkt(ifp, txr, txd); if (__predict_false(error)) { /* txd is freed, but m_head is not */ IFQ_DRV_PREPEND(&ifp->if_snd, m_head); atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); break; } } return 0; } /* * Link up/down notification */ void netvsc_linkstatus_callback(struct hn_softc *sc, uint32_t status) { if (status == 1) { sc->hn_carrier = 1; } else { sc->hn_carrier = 0; } } /* * Append the specified data to the indicated mbuf chain, * Extend the mbuf chain if the new data does not fit in * existing space. * * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c. * There should be an equivalent in the kernel mbuf code, * but there does not appear to be one yet. * * Differs from m_append() in that additional mbufs are * allocated with cluster size MJUMPAGESIZE, and filled * accordingly. * * Return 1 if able to complete the job; otherwise 0. */ static int hv_m_append(struct mbuf *m0, int len, c_caddr_t cp) { struct mbuf *m, *n; int remainder, space; for (m = m0; m->m_next != NULL; m = m->m_next) ; remainder = len; space = M_TRAILINGSPACE(m); if (space > 0) { /* * Copy into available space. */ if (space > remainder) space = remainder; bcopy(cp, mtod(m, caddr_t) + m->m_len, space); m->m_len += space; cp += space; remainder -= space; } while (remainder > 0) { /* * Allocate a new mbuf; could check space * and allocate a cluster instead. */ n = m_getjcl(M_NOWAIT, m->m_type, 0, MJUMPAGESIZE); if (n == NULL) break; n->m_len = min(MJUMPAGESIZE, remainder); bcopy(cp, mtod(n, caddr_t), n->m_len); cp += n->m_len; remainder -= n->m_len; m->m_next = n; m = n; } if (m0->m_flags & M_PKTHDR) m0->m_pkthdr.len += len - remainder; return (remainder == 0); } #if defined(INET) || defined(INET6) static __inline int hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m) { #if __FreeBSD_version >= 1100095 if (hn_lro_mbufq_depth) { tcp_lro_queue_mbuf(lc, m); return 0; } #endif return tcp_lro_rx(lc, m, 0); } #endif /* * Called when we receive a data packet from the "wire" on the * specified device * * Note: This is no longer used as a callback */ int netvsc_recv(struct hn_rx_ring *rxr, const void *data, int dlen, const struct hn_recvinfo *info) { struct ifnet *ifp = rxr->hn_ifp; struct mbuf *m_new; int size, do_lro = 0, do_csum = 1; int hash_type; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) return (0); /* * Bail out if packet contains more data than configured MTU. */ if (dlen > (ifp->if_mtu + ETHER_HDR_LEN)) { return (0); } else if (dlen <= MHLEN) { m_new = m_gethdr(M_NOWAIT, MT_DATA); if (m_new == NULL) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); return (0); } memcpy(mtod(m_new, void *), data, dlen); m_new->m_pkthdr.len = m_new->m_len = dlen; rxr->hn_small_pkts++; } else { /* * Get an mbuf with a cluster. For packets 2K or less, * get a standard 2K cluster. For anything larger, get a * 4K cluster. Any buffers larger than 4K can cause problems * if looped around to the Hyper-V TX channel, so avoid them. */ size = MCLBYTES; if (dlen > MCLBYTES) { /* 4096 */ size = MJUMPAGESIZE; } m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); if (m_new == NULL) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); return (0); } hv_m_append(m_new, dlen, data); } m_new->m_pkthdr.rcvif = ifp; if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0)) do_csum = 0; /* receive side checksum offload */ if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { /* IP csum offload */ if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) { m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); rxr->hn_csum_ip++; } /* TCP/UDP csum offload */ if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK | NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) { m_new->m_pkthdr.csum_flags |= (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK) rxr->hn_csum_tcp++; else rxr->hn_csum_udp++; } if ((info->csum_info & (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) == (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) do_lro = 1; } else { const struct ether_header *eh; uint16_t etype; int hoff; hoff = sizeof(*eh); if (m_new->m_len < hoff) goto skip; eh = mtod(m_new, struct ether_header *); etype = ntohs(eh->ether_type); if (etype == ETHERTYPE_VLAN) { const struct ether_vlan_header *evl; hoff = sizeof(*evl); if (m_new->m_len < hoff) goto skip; evl = mtod(m_new, struct ether_vlan_header *); etype = ntohs(evl->evl_proto); } if (etype == ETHERTYPE_IP) { int pr; pr = hn_check_iplen(m_new, hoff); if (pr == IPPROTO_TCP) { if (do_csum && (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_TCP)) { rxr->hn_csum_trusted++; m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; } do_lro = 1; } else if (pr == IPPROTO_UDP) { if (do_csum && (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_UDP)) { rxr->hn_csum_trusted++; m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); m_new->m_pkthdr.csum_data = 0xffff; } } else if (pr != IPPROTO_DONE && do_csum && (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) { rxr->hn_csum_trusted++; m_new->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); } } } skip: if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { m_new->m_pkthdr.ether_vtag = EVL_MAKETAG( NDIS_VLAN_INFO_ID(info->vlan_info), NDIS_VLAN_INFO_PRI(info->vlan_info), NDIS_VLAN_INFO_CFI(info->vlan_info)); m_new->m_flags |= M_VLANTAG; } if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { rxr->hn_rss_pkts++; m_new->m_pkthdr.flowid = info->hash_value; hash_type = M_HASHTYPE_OPAQUE_HASH; if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) == NDIS_HASH_FUNCTION_TOEPLITZ) { uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK); switch (type) { case NDIS_HASH_IPV4: hash_type = M_HASHTYPE_RSS_IPV4; break; case NDIS_HASH_TCP_IPV4: hash_type = M_HASHTYPE_RSS_TCP_IPV4; break; case NDIS_HASH_IPV6: hash_type = M_HASHTYPE_RSS_IPV6; break; case NDIS_HASH_IPV6_EX: hash_type = M_HASHTYPE_RSS_IPV6_EX; break; case NDIS_HASH_TCP_IPV6: hash_type = M_HASHTYPE_RSS_TCP_IPV6; break; case NDIS_HASH_TCP_IPV6_EX: hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX; break; } } } else { m_new->m_pkthdr.flowid = rxr->hn_rx_idx; hash_type = M_HASHTYPE_OPAQUE; } M_HASHTYPE_SET(m_new, hash_type); /* * Note: Moved RX completion back to hv_nv_on_receive() so all * messages (not just data messages) will trigger a response. */ if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); rxr->hn_pkts++; if ((ifp->if_capenable & IFCAP_LRO) && do_lro) { #if defined(INET) || defined(INET6) struct lro_ctrl *lro = &rxr->hn_lro; if (lro->lro_cnt) { rxr->hn_lro_tried++; if (hn_lro_rx(lro, m_new) == 0) { /* DONE! */ return 0; } } #endif } /* We're not holding the lock here, so don't release it */ (*ifp->if_input)(ifp, m_new); return (0); } /* * Rules for using sc->temp_unusable: * 1. sc->temp_unusable can only be read or written while holding NV_LOCK() * 2. code reading sc->temp_unusable under NV_LOCK(), and finding * sc->temp_unusable set, must release NV_LOCK() and exit * 3. to retain exclusive control of the interface, * sc->temp_unusable must be set by code before releasing NV_LOCK() * 4. only code setting sc->temp_unusable can clear sc->temp_unusable * 5. code setting sc->temp_unusable must eventually clear sc->temp_unusable */ /* * Standard ioctl entry point. Called when the user wants to configure * the interface. */ static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { hn_softc_t *sc = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; #ifdef INET struct ifaddr *ifa = (struct ifaddr *)data; #endif netvsc_device_info device_info; int mask, error = 0, ring_cnt; int retry_cnt = 500; switch(cmd) { case SIOCSIFADDR: #ifdef INET if (ifa->ifa_addr->sa_family == AF_INET) { ifp->if_flags |= IFF_UP; if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) hn_ifinit(sc); arp_ifinit(ifp, ifa); } else #endif error = ether_ioctl(ifp, cmd, data); break; case SIOCSIFMTU: /* Check MTU value change */ if (ifp->if_mtu == ifr->ifr_mtu) break; if (ifr->ifr_mtu > NETVSC_MAX_CONFIGURABLE_MTU) { error = EINVAL; break; } /* Obtain and record requested MTU */ ifp->if_mtu = ifr->ifr_mtu; #if __FreeBSD_version >= 1100099 /* * Make sure that LRO aggregation length limit is still * valid, after the MTU change. */ NV_LOCK(sc); if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp)) hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); NV_UNLOCK(sc); #endif do { NV_LOCK(sc); if (!sc->temp_unusable) { sc->temp_unusable = TRUE; retry_cnt = -1; } NV_UNLOCK(sc); if (retry_cnt > 0) { retry_cnt--; DELAY(5 * 1000); } } while (retry_cnt > 0); if (retry_cnt == 0) { error = EINVAL; break; } /* We must remove and add back the device to cause the new * MTU to take effect. This includes tearing down, but not * deleting the channel, then bringing it back up. */ error = hv_rf_on_device_remove(sc); if (error) { NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); break; } /* Wait for subchannels to be destroyed */ vmbus_subchan_drain(sc->hn_prichan); ring_cnt = sc->hn_rx_ring_inuse; error = hv_rf_on_device_add(sc, &device_info, &ring_cnt, &sc->hn_rx_ring[0]); if (error) { NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); break; } /* # of channels can _not_ be changed */ KASSERT(sc->hn_rx_ring_inuse == ring_cnt, ("RX ring count %d and channel count %u mismatch", sc->hn_rx_ring_cnt, ring_cnt)); if (sc->hn_rx_ring_inuse > 1) { int r; /* * Skip the rings on primary channel; they are * handled by the hv_rf_on_device_add() above. */ for (r = 1; r < sc->hn_rx_ring_cnt; ++r) { sc->hn_rx_ring[r].hn_rx_flags &= ~HN_RX_FLAG_ATTACHED; } for (r = 1; r < sc->hn_tx_ring_cnt; ++r) { sc->hn_tx_ring[r].hn_tx_flags &= ~HN_TX_FLAG_ATTACHED; } hn_subchan_setup(sc); } if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax) hn_set_chim_size(sc, sc->hn_chim_szmax); hn_ifinit_locked(sc); NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); break; case SIOCSIFFLAGS: do { NV_LOCK(sc); if (!sc->temp_unusable) { sc->temp_unusable = TRUE; retry_cnt = -1; } NV_UNLOCK(sc); if (retry_cnt > 0) { retry_cnt--; DELAY(5 * 1000); } } while (retry_cnt > 0); if (retry_cnt == 0) { error = EINVAL; break; } if (ifp->if_flags & IFF_UP) { /* * If only the state of the PROMISC flag changed, * then just use the 'set promisc mode' command * instead of reinitializing the entire NIC. Doing * a full re-init means reloading the firmware and * waiting for it to start up, which may take a * second or two. */ #ifdef notyet /* Fixme: Promiscuous mode? */ if (ifp->if_drv_flags & IFF_DRV_RUNNING && ifp->if_flags & IFF_PROMISC && !(sc->hn_if_flags & IFF_PROMISC)) { /* do something here for Hyper-V */ } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && !(ifp->if_flags & IFF_PROMISC) && sc->hn_if_flags & IFF_PROMISC) { /* do something here for Hyper-V */ } else #endif hn_ifinit_locked(sc); } else { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { hn_stop(sc); } } NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); sc->hn_if_flags = ifp->if_flags; error = 0; break; case SIOCSIFCAP: NV_LOCK(sc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; if (ifp->if_capenable & IFCAP_TXCSUM) { ifp->if_hwassist |= sc->hn_tx_ring[0].hn_csum_assist; } else { ifp->if_hwassist &= ~sc->hn_tx_ring[0].hn_csum_assist; } } if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_LRO) ifp->if_capenable ^= IFCAP_LRO; if (mask & IFCAP_TSO4) { ifp->if_capenable ^= IFCAP_TSO4; if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_IP_TSO; else ifp->if_hwassist &= ~CSUM_IP_TSO; } if (mask & IFCAP_TSO6) { ifp->if_capenable ^= IFCAP_TSO6; if (ifp->if_capenable & IFCAP_TSO6) ifp->if_hwassist |= CSUM_IP6_TSO; else ifp->if_hwassist &= ~CSUM_IP6_TSO; } NV_UNLOCK(sc); error = 0; break; case SIOCADDMULTI: case SIOCDELMULTI: #ifdef notyet /* Fixme: Multicast mode? */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) { NV_LOCK(sc); netvsc_setmulti(sc); NV_UNLOCK(sc); error = 0; } #endif error = EINVAL; break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } /* * */ static void hn_stop(hn_softc_t *sc) { struct ifnet *ifp; int ret, i; ifp = sc->hn_ifp; if (bootverbose) printf(" Closing Device ...\n"); atomic_clear_int(&ifp->if_drv_flags, (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) sc->hn_tx_ring[i].hn_oactive = 0; if_link_state_change(ifp, LINK_STATE_DOWN); sc->hn_initdone = 0; ret = hv_rf_on_close(sc); } /* * FreeBSD transmit entry point */ static void hn_start(struct ifnet *ifp) { struct hn_softc *sc = ifp->if_softc; struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; sched = hn_start_locked(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (!sched) return; } do_sched: taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); } static void hn_start_txeof(struct hn_tx_ring *txr) { struct hn_softc *sc = txr->hn_sc; struct ifnet *ifp = sc->hn_ifp; KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); sched = hn_start_locked(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (sched) { taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); } } else { do_sched: /* * Release the OACTIVE earlier, with the hope, that * others could catch up. The task will clear the * flag again with the hn_tx_lock to avoid possible * races. */ atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); } } /* * */ static void hn_ifinit_locked(hn_softc_t *sc) { struct ifnet *ifp; int ret, i; ifp = sc->hn_ifp; if (ifp->if_drv_flags & IFF_DRV_RUNNING) { return; } hv_promisc_mode = 1; ret = hv_rf_on_open(sc); if (ret != 0) { return; } else { sc->hn_initdone = 1; } atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) sc->hn_tx_ring[i].hn_oactive = 0; atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); if_link_state_change(ifp, LINK_STATE_UP); } /* * */ static void hn_ifinit(void *xsc) { hn_softc_t *sc = xsc; NV_LOCK(sc); if (sc->temp_unusable) { NV_UNLOCK(sc); return; } sc->temp_unusable = TRUE; NV_UNLOCK(sc); hn_ifinit_locked(sc); NV_LOCK(sc); sc->temp_unusable = FALSE; NV_UNLOCK(sc); } #ifdef LATER /* * */ static void hn_watchdog(struct ifnet *ifp) { hn_softc_t *sc; sc = ifp->if_softc; printf("hn%d: watchdog timeout -- resetting\n", sc->hn_unit); hn_ifinit(sc); /*???*/ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } #endif #if __FreeBSD_version >= 1100099 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; unsigned int lenlim; int error; lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim; error = sysctl_handle_int(oidp, &lenlim, 0, req); if (error || req->newptr == NULL) return error; if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) || lenlim > TCP_LRO_LENGTH_MAX) return EINVAL; NV_LOCK(sc); hn_set_lro_lenlim(sc, lenlim); NV_UNLOCK(sc); return 0; } static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ackcnt, error, i; /* * lro_ackcnt_lim is append count limit, * +1 to turn it into aggregation limit. */ ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1; error = sysctl_handle_int(oidp, &ackcnt, 0, req); if (error || req->newptr == NULL) return error; if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1)) return EINVAL; /* * Convert aggregation limit back to append * count limit. */ --ackcnt; NV_LOCK(sc); for (i = 0; i < sc->hn_rx_ring_inuse; ++i) sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt; NV_UNLOCK(sc); return 0; } #endif static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int hcsum = arg2; int on, error, i; on = 0; if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum) on = 1; error = sysctl_handle_int(oidp, &on, 0, req); if (error || req->newptr == NULL) return error; NV_LOCK(sc); for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; if (on) rxr->hn_trust_hcsum |= hcsum; else rxr->hn_trust_hcsum &= ~hcsum; } NV_UNLOCK(sc); return 0; } static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int chim_size, error; chim_size = sc->hn_tx_ring[0].hn_chim_size; error = sysctl_handle_int(oidp, &chim_size, 0, req); if (error || req->newptr == NULL) return error; if (chim_size > sc->hn_chim_szmax || chim_size <= 0) return EINVAL; hn_set_chim_size(sc, chim_size); return 0; } static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_rx_ring *rxr; u_long stat; stat = 0; for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { rxr = &sc->hn_rx_ring[i]; stat += *((u_long *)((uint8_t *)rxr + ofs)); } error = sysctl_handle_long(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { rxr = &sc->hn_rx_ring[i]; *((u_long *)((uint8_t *)rxr + ofs)) = 0; } return 0; } static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_rx_ring *rxr; uint64_t stat; stat = 0; for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { rxr = &sc->hn_rx_ring[i]; stat += *((uint64_t *)((uint8_t *)rxr + ofs)); } error = sysctl_handle_64(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_rx_ring_inuse; ++i) { rxr = &sc->hn_rx_ring[i]; *((uint64_t *)((uint8_t *)rxr + ofs)) = 0; } return 0; } static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error; struct hn_tx_ring *txr; u_long stat; stat = 0; for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { txr = &sc->hn_tx_ring[i]; stat += *((u_long *)((uint8_t *)txr + ofs)); } error = sysctl_handle_long(oidp, &stat, 0, req); if (error || req->newptr == NULL) return error; /* Zero out this stat. */ for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { txr = &sc->hn_tx_ring[i]; *((u_long *)((uint8_t *)txr + ofs)) = 0; } return 0; } static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; int ofs = arg2, i, error, conf; struct hn_tx_ring *txr; txr = &sc->hn_tx_ring[0]; conf = *((int *)((uint8_t *)txr + ofs)); error = sysctl_handle_int(oidp, &conf, 0, req); if (error || req->newptr == NULL) return error; NV_LOCK(sc); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { txr = &sc->hn_tx_ring[i]; *((int *)((uint8_t *)txr + ofs)) = conf; } NV_UNLOCK(sc); return 0; } static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS) { struct hn_softc *sc = arg1; char verstr[16]; snprintf(verstr, sizeof(verstr), "%u.%u", NDIS_VERSION_MAJOR(sc->hn_ndis_ver), NDIS_VERSION_MINOR(sc->hn_ndis_ver)); return sysctl_handle_string(oidp, verstr, sizeof(verstr), req); } static int hn_check_iplen(const struct mbuf *m, int hoff) { const struct ip *ip; int len, iphlen, iplen; const struct tcphdr *th; int thoff; /* TCP data offset */ len = hoff + sizeof(struct ip); /* The packet must be at least the size of an IP header. */ if (m->m_pkthdr.len < len) return IPPROTO_DONE; /* The fixed IP header must reside completely in the first mbuf. */ if (m->m_len < len) return IPPROTO_DONE; ip = mtodo(m, hoff); /* Bound check the packet's stated IP header length. */ iphlen = ip->ip_hl << 2; if (iphlen < sizeof(struct ip)) /* minimum header length */ return IPPROTO_DONE; /* The full IP header must reside completely in the one mbuf. */ if (m->m_len < hoff + iphlen) return IPPROTO_DONE; iplen = ntohs(ip->ip_len); /* * Check that the amount of data in the buffers is as * at least much as the IP header would have us expect. */ if (m->m_pkthdr.len < hoff + iplen) return IPPROTO_DONE; /* * Ignore IP fragments. */ if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) return IPPROTO_DONE; /* * The TCP/IP or UDP/IP header must be entirely contained within * the first fragment of a packet. */ switch (ip->ip_p) { case IPPROTO_TCP: if (iplen < iphlen + sizeof(struct tcphdr)) return IPPROTO_DONE; if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) return IPPROTO_DONE; th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); thoff = th->th_off << 2; if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) return IPPROTO_DONE; if (m->m_len < hoff + iphlen + thoff) return IPPROTO_DONE; break; case IPPROTO_UDP: if (iplen < iphlen + sizeof(struct udphdr)) return IPPROTO_DONE; if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) return IPPROTO_DONE; break; default: if (iplen < iphlen) return IPPROTO_DONE; break; } return ip->ip_p; } static int hn_create_rx_data(struct hn_softc *sc, int ring_cnt) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; device_t dev = sc->hn_dev; #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 int lroent_cnt; #endif #endif int i; /* * Create RXBUF for reception. * * NOTE: * - It is shared by all channels. * - A large enough buffer is allocated, certain version of NVSes * may further limit the usable space. */ sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev), PAGE_SIZE, 0, NETVSC_RECEIVE_BUFFER_SIZE, &sc->hn_rxbuf_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (sc->hn_rxbuf == NULL) { device_printf(sc->hn_dev, "allocate rxbuf failed\n"); return (ENOMEM); } sc->hn_rx_ring_cnt = ring_cnt; sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt; sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt, M_NETVSC, M_WAITOK | M_ZERO); #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 lroent_cnt = hn_lro_entry_count; if (lroent_cnt < TCP_LRO_ENTRIES) lroent_cnt = TCP_LRO_ENTRIES; device_printf(dev, "LRO: entry count %d\n", lroent_cnt); #endif #endif /* INET || INET6 */ ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); /* Create dev.hn.UNIT.rx sysctl tree */ sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; if (hn_trust_hosttcp) rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP; if (hn_trust_hostudp) rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP; if (hn_trust_hostip) rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP; rxr->hn_ifp = sc->hn_ifp; if (i < sc->hn_tx_ring_cnt) rxr->hn_txr = &sc->hn_tx_ring[i]; rxr->hn_rdbuf = malloc(NETVSC_PACKET_SIZE, M_NETVSC, M_WAITOK); rxr->hn_rx_idx = i; rxr->hn_rxbuf = sc->hn_rxbuf; /* * Initialize LRO. */ #if defined(INET) || defined(INET6) #if __FreeBSD_version >= 1100095 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt, hn_lro_mbufq_depth); #else tcp_lro_init(&rxr->hn_lro); rxr->hn_lro.ifp = sc->hn_ifp; #endif #if __FreeBSD_version >= 1100099 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF; rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF; #endif #endif /* INET || INET6 */ if (sc->hn_rx_sysctl_tree != NULL) { char name[16]; /* * Create per RX ring sysctl tree: * dev.hn.UNIT.rx.RINGID */ snprintf(name, sizeof(name), "%d", i); rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree), OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (rxr->hn_rx_sysctl_tree != NULL) { SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), OID_AUTO, "packets", CTLFLAG_RW, &rxr->hn_pkts, "# of packets received"); SYSCTL_ADD_ULONG(ctx, SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree), OID_AUTO, "rss_pkts", CTLFLAG_RW, &rxr->hn_rss_pkts, "# of packets w/ RSS info received"); } } } SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_lro.lro_queued), hn_rx_stat_u64_sysctl, "LU", "LRO queued"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_lro.lro_flushed), hn_rx_stat_u64_sysctl, "LU", "LRO flushed"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_lro_tried), hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries"); #if __FreeBSD_version >= 1100099 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim", CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_lro_lenlim_sysctl, "IU", "Max # of data bytes to be aggregated by LRO"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_lro_ackcnt_sysctl, "I", "Max # of ACKs to be aggregated by LRO"); #endif SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP, hn_trust_hcsum_sysctl, "I", "Trust tcp segement verification on host side, " "when csum info is missing"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP, hn_trust_hcsum_sysctl, "I", "Trust udp datagram verification on host side, " "when csum info is missing"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP, hn_trust_hcsum_sysctl, "I", "Trust ip packet verification on host side, " "when csum info is missing"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_ip), hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_tcp), hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_udp), hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_csum_trusted), hn_rx_stat_ulong_sysctl, "LU", "# of packets that we trust host's csum verification"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_rx_ring, hn_small_pkts), hn_rx_stat_ulong_sysctl, "LU", "# of small packets received"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt", CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse", CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings"); return (0); } static void hn_destroy_rx_data(struct hn_softc *sc) { int i; if (sc->hn_rxbuf != NULL) { hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf); sc->hn_rxbuf = NULL; } if (sc->hn_rx_ring_cnt == 0) return; for (i = 0; i < sc->hn_rx_ring_cnt; ++i) { struct hn_rx_ring *rxr = &sc->hn_rx_ring[i]; #if defined(INET) || defined(INET6) tcp_lro_free(&rxr->hn_lro); #endif free(rxr->hn_rdbuf, M_NETVSC); } free(sc->hn_rx_ring, M_NETVSC); sc->hn_rx_ring = NULL; sc->hn_rx_ring_cnt = 0; sc->hn_rx_ring_inuse = 0; } static int hn_create_tx_ring(struct hn_softc *sc, int id) { struct hn_tx_ring *txr = &sc->hn_tx_ring[id]; device_t dev = sc->hn_dev; bus_dma_tag_t parent_dtag; int error, i; uint32_t version; txr->hn_sc = sc; txr->hn_tx_idx = id; #ifndef HN_USE_TXDESC_BUFRING mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); #endif mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); txr->hn_txdesc_cnt = HN_TX_DESC_CNT; txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, M_NETVSC, M_WAITOK | M_ZERO); #ifndef HN_USE_TXDESC_BUFRING SLIST_INIT(&txr->hn_txlist); #else txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_NETVSC, M_WAITOK, &txr->hn_tx_lock); #endif txr->hn_tx_taskq = sc->hn_tx_taskq; if (hn_use_if_start) { txr->hn_txeof = hn_start_txeof; TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr); TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr); } else { int br_depth; txr->hn_txeof = hn_xmit_txeof; TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr); TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr); br_depth = hn_get_txswq_depth(txr); txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_NETVSC, M_WAITOK, &txr->hn_tx_lock); } txr->hn_direct_tx_size = hn_direct_tx_size; version = VMBUS_GET_VERSION(device_get_parent(dev), dev); if (version >= VMBUS_VERSION_WIN8_1) { txr->hn_csum_assist = HN_CSUM_ASSIST; } else { txr->hn_csum_assist = HN_CSUM_ASSIST_WIN8; if (id == 0) { device_printf(dev, "bus version %u.%u, " "no UDP checksum offloading\n", VMBUS_VERSION_MAJOR(version), VMBUS_VERSION_MINOR(version)); } } /* * Always schedule transmission instead of trying to do direct * transmission. This one gives the best performance so far. */ txr->hn_sched_tx = 1; parent_dtag = bus_get_dma_tag(dev); /* DMA tag for RNDIS packet messages. */ error = bus_dma_tag_create(parent_dtag, /* parent */ HN_RNDIS_PKT_ALIGN, /* alignment */ HN_RNDIS_PKT_BOUNDARY, /* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ HN_RNDIS_PKT_LEN, /* maxsize */ 1, /* nsegments */ HN_RNDIS_PKT_LEN, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->hn_tx_rndis_dtag); if (error) { device_printf(dev, "failed to create rndis dmatag\n"); return error; } /* DMA tag for data. */ error = bus_dma_tag_create(parent_dtag, /* parent */ 1, /* alignment */ HN_TX_DATA_BOUNDARY, /* boundary */ BUS_SPACE_MAXADDR, /* lowaddr */ BUS_SPACE_MAXADDR, /* highaddr */ NULL, NULL, /* filter, filterarg */ HN_TX_DATA_MAXSIZE, /* maxsize */ HN_TX_DATA_SEGCNT_MAX, /* nsegments */ HN_TX_DATA_SEGSIZE, /* maxsegsize */ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ &txr->hn_tx_data_dtag); if (error) { device_printf(dev, "failed to create data dmatag\n"); return error; } for (i = 0; i < txr->hn_txdesc_cnt; ++i) { struct hn_txdesc *txd = &txr->hn_txdesc[i]; txd->txr = txr; /* * Allocate and load RNDIS packet message. */ error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag, (void **)&txd->rndis_pkt, BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, &txd->rndis_pkt_dmap); if (error) { device_printf(dev, "failed to allocate rndis_packet_msg, %d\n", i); return error; } error = bus_dmamap_load(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap, txd->rndis_pkt, HN_RNDIS_PKT_LEN, hyperv_dma_map_paddr, &txd->rndis_pkt_paddr, BUS_DMA_NOWAIT); if (error) { device_printf(dev, "failed to load rndis_packet_msg, %d\n", i); bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, txd->rndis_pkt_dmap); return error; } /* DMA map for TX data. */ error = bus_dmamap_create(txr->hn_tx_data_dtag, 0, &txd->data_dmap); if (error) { device_printf(dev, "failed to allocate tx data dmamap\n"); bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap); bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, txd->rndis_pkt_dmap); return error; } /* All set, put it to list */ txd->flags |= HN_TXD_FLAG_ONLIST; #ifndef HN_USE_TXDESC_BUFRING SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); #else buf_ring_enqueue(txr->hn_txdesc_br, txd); #endif } txr->hn_txdesc_avail = txr->hn_txdesc_cnt; if (sc->hn_tx_sysctl_tree != NULL) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; char name[16]; /* * Create per TX ring sysctl tree: * dev.hn.UNIT.tx.RINGID */ ctx = device_get_sysctl_ctx(dev); child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree); snprintf(name, sizeof(name), "%d", id); txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); if (txr->hn_tx_sysctl_tree != NULL) { child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", CTLFLAG_RD, &txr->hn_txdesc_avail, 0, "# of available TX descs"); if (!hn_use_if_start) { SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive", CTLFLAG_RD, &txr->hn_oactive, 0, "over active"); } SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets", CTLFLAG_RW, &txr->hn_pkts, "# of packets transmitted"); } } return 0; } static void hn_txdesc_dmamap_destroy(struct hn_txdesc *txd) { struct hn_tx_ring *txr = txd->txr; KASSERT(txd->m == NULL, ("still has mbuf installed")); KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap); bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt, txd->rndis_pkt_dmap); bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); } static void hn_destroy_tx_ring(struct hn_tx_ring *txr) { struct hn_txdesc *txd; if (txr->hn_txdesc == NULL) return; #ifndef HN_USE_TXDESC_BUFRING while ((txd = SLIST_FIRST(&txr->hn_txlist)) != NULL) { SLIST_REMOVE_HEAD(&txr->hn_txlist, link); hn_txdesc_dmamap_destroy(txd); } #else mtx_lock(&txr->hn_tx_lock); while ((txd = buf_ring_dequeue_sc(txr->hn_txdesc_br)) != NULL) hn_txdesc_dmamap_destroy(txd); mtx_unlock(&txr->hn_tx_lock); #endif if (txr->hn_tx_data_dtag != NULL) bus_dma_tag_destroy(txr->hn_tx_data_dtag); if (txr->hn_tx_rndis_dtag != NULL) bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); #ifdef HN_USE_TXDESC_BUFRING buf_ring_free(txr->hn_txdesc_br, M_NETVSC); #endif free(txr->hn_txdesc, M_NETVSC); txr->hn_txdesc = NULL; if (txr->hn_mbuf_br != NULL) buf_ring_free(txr->hn_mbuf_br, M_NETVSC); #ifndef HN_USE_TXDESC_BUFRING mtx_destroy(&txr->hn_txlist_spin); #endif mtx_destroy(&txr->hn_tx_lock); } static int hn_create_tx_data(struct hn_softc *sc, int ring_cnt) { struct sysctl_oid_list *child; struct sysctl_ctx_list *ctx; int i; /* * Create TXBUF for chimney sending. * * NOTE: It is shared by all channels. */ sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev), PAGE_SIZE, 0, NETVSC_SEND_BUFFER_SIZE, &sc->hn_chim_dma, BUS_DMA_WAITOK | BUS_DMA_ZERO); if (sc->hn_chim == NULL) { device_printf(sc->hn_dev, "allocate txbuf failed\n"); return (ENOMEM); } sc->hn_tx_ring_cnt = ring_cnt; sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt; sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt, M_NETVSC, M_WAITOK | M_ZERO); ctx = device_get_sysctl_ctx(sc->hn_dev); child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev)); /* Create dev.hn.UNIT.tx sysctl tree */ sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx", CTLFLAG_RD | CTLFLAG_MPSAFE, 0, ""); for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { int error; error = hn_create_tx_ring(sc, i); if (error) return error; } SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_no_txdescs), hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_send_failed), hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_txdma_failed), hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_tx_collapsed), hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_tx_chimney), hn_tx_stat_ulong_sysctl, "LU", "# of chimney send"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried", CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_tx_chimney_tried), hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0, "# of total TX descs"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", CTLFLAG_RD, &sc->hn_chim_szmax, 0, "Chimney send packet size upper boundary"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, hn_chim_size_sysctl, "I", "Chimney send packet size limit"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_direct_tx_size), hn_tx_conf_int_sysctl, "I", "Size of the packet for direct transmission"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, __offsetof(struct hn_tx_ring, hn_sched_tx), hn_tx_conf_int_sysctl, "I", "Always schedule transmission " "instead of doing direct transmission"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt", CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse", CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings"); return 0; } static void hn_set_chim_size(struct hn_softc *sc, int chim_size) { int i; NV_LOCK(sc); for (i = 0; i < sc->hn_tx_ring_inuse; ++i) sc->hn_tx_ring[i].hn_chim_size = chim_size; NV_UNLOCK(sc); } static void hn_destroy_tx_data(struct hn_softc *sc) { int i; if (sc->hn_chim != NULL) { hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim); sc->hn_chim = NULL; } if (sc->hn_tx_ring_cnt == 0) return; for (i = 0; i < sc->hn_tx_ring_cnt; ++i) hn_destroy_tx_ring(&sc->hn_tx_ring[i]); free(sc->hn_tx_ring, M_NETVSC); sc->hn_tx_ring = NULL; sc->hn_tx_ring_cnt = 0; sc->hn_tx_ring_inuse = 0; } static void hn_start_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); hn_start_locked(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_start_txeof_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE); hn_start_locked(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_stop_tx_tasks(struct hn_softc *sc) { int i; for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task); taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task); } } static int hn_xmit(struct hn_tx_ring *txr, int len) { struct hn_softc *sc = txr->hn_sc; struct ifnet *ifp = sc->hn_ifp; struct mbuf *m_head; mtx_assert(&txr->hn_tx_lock, MA_OWNED); KASSERT(hn_use_if_start == 0, ("hn_xmit is called, when if_start is enabled")); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive) return 0; while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) { struct hn_txdesc *txd; int error; if (len > 0 && m_head->m_pkthdr.len > len) { /* * This sending could be time consuming; let callers * dispatch this packet sending (and sending of any * following up packets) to tx taskqueue. */ drbr_putback(ifp, txr->hn_mbuf_br, m_head); return 1; } txd = hn_txdesc_get(txr); if (txd == NULL) { txr->hn_no_txdescs++; drbr_putback(ifp, txr->hn_mbuf_br, m_head); txr->hn_oactive = 1; break; } error = hn_encap(txr, txd, &m_head); if (error) { /* Both txd and m_head are freed; discard */ drbr_advance(ifp, txr->hn_mbuf_br); continue; } error = hn_send_pkt(ifp, txr, txd); if (__predict_false(error)) { /* txd is freed, but m_head is not */ drbr_putback(ifp, txr->hn_mbuf_br, m_head); txr->hn_oactive = 1; break; } /* Sent */ drbr_advance(ifp, txr->hn_mbuf_br); } return 0; } static int hn_transmit(struct ifnet *ifp, struct mbuf *m) { struct hn_softc *sc = ifp->if_softc; struct hn_tx_ring *txr; int error, idx = 0; /* * Select the TX ring based on flowid */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse; txr = &sc->hn_tx_ring[idx]; error = drbr_enqueue(ifp, txr->hn_mbuf_br, m); if (error) { if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); return error; } if (txr->hn_oactive) return 0; if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; sched = hn_xmit(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (!sched) return 0; } do_sched: taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); return 0; } static void hn_xmit_qflush(struct ifnet *ifp) { struct hn_softc *sc = ifp->if_softc; int i; for (i = 0; i < sc->hn_tx_ring_inuse; ++i) { struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; struct mbuf *m; mtx_lock(&txr->hn_tx_lock); while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL) m_freem(m); mtx_unlock(&txr->hn_tx_lock); } if_qflush(ifp); } static void hn_xmit_txeof(struct hn_tx_ring *txr) { if (txr->hn_sched_tx) goto do_sched; if (mtx_trylock(&txr->hn_tx_lock)) { int sched; txr->hn_oactive = 0; sched = hn_xmit(txr, txr->hn_direct_tx_size); mtx_unlock(&txr->hn_tx_lock); if (sched) { taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task); } } else { do_sched: /* * Release the oactive earlier, with the hope, that * others could catch up. The task will clear the * oactive again with the hn_tx_lock to avoid possible * races. */ txr->hn_oactive = 0; taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); } } static void hn_xmit_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); hn_xmit(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused) { struct hn_tx_ring *txr = xtxr; mtx_lock(&txr->hn_tx_lock); txr->hn_oactive = 0; hn_xmit(txr, 0); mtx_unlock(&txr->hn_tx_lock); } static void hn_channel_attach(struct hn_softc *sc, struct vmbus_channel *chan) { struct hn_rx_ring *rxr; int idx; idx = vmbus_chan_subidx(chan); KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse, ("invalid channel index %d, should > 0 && < %d", idx, sc->hn_rx_ring_inuse)); rxr = &sc->hn_rx_ring[idx]; KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0, ("RX ring %d already attached", idx)); rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED; if (bootverbose) { if_printf(sc->hn_ifp, "link RX ring %d to channel%u\n", idx, vmbus_chan_id(chan)); } if (idx < sc->hn_tx_ring_inuse) { struct hn_tx_ring *txr = &sc->hn_tx_ring[idx]; KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0, ("TX ring %d already attached", idx)); txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED; txr->hn_chan = chan; if (bootverbose) { if_printf(sc->hn_ifp, "link TX ring %d to channel%u\n", idx, vmbus_chan_id(chan)); } } /* Bind channel to a proper CPU */ vmbus_chan_cpu_set(chan, (sc->hn_cpu + idx) % mp_ncpus); } static void hn_subchan_attach(struct hn_softc *sc, struct vmbus_channel *chan) { KASSERT(!vmbus_chan_is_primary(chan), ("subchannel callback on primary channel")); hn_channel_attach(sc, chan); } static void hn_subchan_setup(struct hn_softc *sc) { struct vmbus_channel **subchans; int subchan_cnt = sc->hn_rx_ring_inuse - 1; int i; /* Wait for sub-channels setup to complete. */ subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt); /* Attach the sub-channels. */ for (i = 0; i < subchan_cnt; ++i) { struct vmbus_channel *subchan = subchans[i]; /* NOTE: Calling order is critical. */ hn_subchan_attach(sc, subchan); hv_nv_subchan_attach(subchan, &sc->hn_rx_ring[vmbus_chan_subidx(subchan)]); } /* Release the sub-channels */ vmbus_subchan_rel(subchans, subchan_cnt); if_printf(sc->hn_ifp, "%d sub-channels setup done\n", subchan_cnt); } static void hn_tx_taskq_create(void *arg __unused) { if (!hn_share_tx_taskq) return; hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK, taskqueue_thread_enqueue, &hn_tx_taskq); if (hn_bind_tx_taskq >= 0) { int cpu = hn_bind_tx_taskq; cpuset_t cpu_set; if (cpu > mp_ncpus - 1) cpu = mp_ncpus - 1; CPU_SETOF(cpu, &cpu_set); taskqueue_start_threads_cpuset(&hn_tx_taskq, 1, PI_NET, &cpu_set, "hn tx"); } else { taskqueue_start_threads(&hn_tx_taskq, 1, PI_NET, "hn tx"); } } SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_FIRST, hn_tx_taskq_create, NULL); static void hn_tx_taskq_destroy(void *arg __unused) { if (hn_tx_taskq != NULL) taskqueue_free(hn_tx_taskq); } SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_FIRST, hn_tx_taskq_destroy, NULL); static device_method_t netvsc_methods[] = { /* Device interface */ DEVMETHOD(device_probe, netvsc_probe), DEVMETHOD(device_attach, netvsc_attach), DEVMETHOD(device_detach, netvsc_detach), DEVMETHOD(device_shutdown, netvsc_shutdown), { 0, 0 } }; static driver_t netvsc_driver = { NETVSC_DEVNAME, netvsc_methods, sizeof(hn_softc_t) }; static devclass_t netvsc_devclass; DRIVER_MODULE(hn, vmbus, netvsc_driver, netvsc_devclass, 0, 0); MODULE_VERSION(hn, 1); MODULE_DEPEND(hn, vmbus, 1, 1, 1); Index: head/sys/dev/hyperv/netvsc/hv_rndis.h =================================================================== --- head/sys/dev/hyperv/netvsc/hv_rndis.h (revision 305524) +++ head/sys/dev/hyperv/netvsc/hv_rndis.h (revision 305525) @@ -1,907 +1,892 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __HV_RNDIS_H__ #define __HV_RNDIS_H__ #include /* * NDIS protocol version numbers */ #define NDIS_VERSION_5_0 0x00050000 #define NDIS_VERSION_5_1 0x00050001 #define NDIS_VERSION_6_0 0x00060000 #define NDIS_VERSION_6_1 0x00060001 #define NDIS_VERSION_6_30 0x0006001e #define NDIS_VERSION_MAJOR(ver) (((ver) & 0xffff0000) >> 16) #define NDIS_VERSION_MINOR(ver) ((ver) & 0xffff) /* * Object Identifiers used by NdisRequest Query/Set Information */ /* * General Objects */ #define RNDIS_OID_GEN_SUPPORTED_LIST 0x00010101 #define RNDIS_OID_GEN_HARDWARE_STATUS 0x00010102 #define RNDIS_OID_GEN_MEDIA_SUPPORTED 0x00010103 #define RNDIS_OID_GEN_MEDIA_IN_USE 0x00010104 #define RNDIS_OID_GEN_MAXIMUM_LOOKAHEAD 0x00010105 #define RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE 0x00010106 #define RNDIS_OID_GEN_LINK_SPEED 0x00010107 #define RNDIS_OID_GEN_TRANSMIT_BUFFER_SPACE 0x00010108 #define RNDIS_OID_GEN_RECEIVE_BUFFER_SPACE 0x00010109 #define RNDIS_OID_GEN_TRANSMIT_BLOCK_SIZE 0x0001010A #define RNDIS_OID_GEN_RECEIVE_BLOCK_SIZE 0x0001010B #define RNDIS_OID_GEN_VENDOR_ID 0x0001010C #define RNDIS_OID_GEN_VENDOR_DESCRIPTION 0x0001010D #define RNDIS_OID_GEN_CURRENT_PACKET_FILTER 0x0001010E #define RNDIS_OID_GEN_CURRENT_LOOKAHEAD 0x0001010F #define RNDIS_OID_GEN_DRIVER_VERSION 0x00010110 #define RNDIS_OID_GEN_MAXIMUM_TOTAL_SIZE 0x00010111 #define RNDIS_OID_GEN_PROTOCOL_OPTIONS 0x00010112 #define RNDIS_OID_GEN_MAC_OPTIONS 0x00010113 #define RNDIS_OID_GEN_MEDIA_CONNECT_STATUS 0x00010114 #define RNDIS_OID_GEN_MAXIMUM_SEND_PACKETS 0x00010115 #define RNDIS_OID_GEN_VENDOR_DRIVER_VERSION 0x00010116 #define RNDIS_OID_GEN_NETWORK_LAYER_ADDRESSES 0x00010118 #define RNDIS_OID_GEN_TRANSPORT_HEADER_OFFSET 0x00010119 #define RNDIS_OID_GEN_MACHINE_NAME 0x0001021A #define RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER 0x0001021B /* * For receive side scale */ /* Query only */ #define RNDIS_OID_GEN_RSS_CAPABILITIES 0x00010203 /* Query and set */ #define RNDIS_OID_GEN_RSS_PARAMETERS 0x00010204 #define RNDIS_OID_GEN_XMIT_OK 0x00020101 #define RNDIS_OID_GEN_RCV_OK 0x00020102 #define RNDIS_OID_GEN_XMIT_ERROR 0x00020103 #define RNDIS_OID_GEN_RCV_ERROR 0x00020104 #define RNDIS_OID_GEN_RCV_NO_BUFFER 0x00020105 #define RNDIS_OID_GEN_DIRECTED_BYTES_XMIT 0x00020201 #define RNDIS_OID_GEN_DIRECTED_FRAMES_XMIT 0x00020202 #define RNDIS_OID_GEN_MULTICAST_BYTES_XMIT 0x00020203 #define RNDIS_OID_GEN_MULTICAST_FRAMES_XMIT 0x00020204 #define RNDIS_OID_GEN_BROADCAST_BYTES_XMIT 0x00020205 #define RNDIS_OID_GEN_BROADCAST_FRAMES_XMIT 0x00020206 #define RNDIS_OID_GEN_DIRECTED_BYTES_RCV 0x00020207 #define RNDIS_OID_GEN_DIRECTED_FRAMES_RCV 0x00020208 #define RNDIS_OID_GEN_MULTICAST_BYTES_RCV 0x00020209 #define RNDIS_OID_GEN_MULTICAST_FRAMES_RCV 0x0002020A #define RNDIS_OID_GEN_BROADCAST_BYTES_RCV 0x0002020B #define RNDIS_OID_GEN_BROADCAST_FRAMES_RCV 0x0002020C #define RNDIS_OID_GEN_RCV_CRC_ERROR 0x0002020D #define RNDIS_OID_GEN_TRANSMIT_QUEUE_LENGTH 0x0002020E #define RNDIS_OID_GEN_GET_TIME_CAPS 0x0002020F #define RNDIS_OID_GEN_GET_NETCARD_TIME 0x00020210 /* * These are connection-oriented general OIDs. * These replace the above OIDs for connection-oriented media. */ #define RNDIS_OID_GEN_CO_SUPPORTED_LIST 0x00010101 #define RNDIS_OID_GEN_CO_HARDWARE_STATUS 0x00010102 #define RNDIS_OID_GEN_CO_MEDIA_SUPPORTED 0x00010103 #define RNDIS_OID_GEN_CO_MEDIA_IN_USE 0x00010104 #define RNDIS_OID_GEN_CO_LINK_SPEED 0x00010105 #define RNDIS_OID_GEN_CO_VENDOR_ID 0x00010106 #define RNDIS_OID_GEN_CO_VENDOR_DESCRIPTION 0x00010107 #define RNDIS_OID_GEN_CO_DRIVER_VERSION 0x00010108 #define RNDIS_OID_GEN_CO_PROTOCOL_OPTIONS 0x00010109 #define RNDIS_OID_GEN_CO_MAC_OPTIONS 0x0001010A #define RNDIS_OID_GEN_CO_MEDIA_CONNECT_STATUS 0x0001010B #define RNDIS_OID_GEN_CO_VENDOR_DRIVER_VERSION 0x0001010C #define RNDIS_OID_GEN_CO_MINIMUM_LINK_SPEED 0x0001010D #define RNDIS_OID_GEN_CO_GET_TIME_CAPS 0x00010201 #define RNDIS_OID_GEN_CO_GET_NETCARD_TIME 0x00010202 /* * These are connection-oriented statistics OIDs. */ #define RNDIS_OID_GEN_CO_XMIT_PDUS_OK 0x00020101 #define RNDIS_OID_GEN_CO_RCV_PDUS_OK 0x00020102 #define RNDIS_OID_GEN_CO_XMIT_PDUS_ERROR 0x00020103 #define RNDIS_OID_GEN_CO_RCV_PDUS_ERROR 0x00020104 #define RNDIS_OID_GEN_CO_RCV_PDUS_NO_BUFFER 0x00020105 #define RNDIS_OID_GEN_CO_RCV_CRC_ERROR 0x00020201 #define RNDIS_OID_GEN_CO_TRANSMIT_QUEUE_LENGTH 0x00020202 #define RNDIS_OID_GEN_CO_BYTES_XMIT 0x00020203 #define RNDIS_OID_GEN_CO_BYTES_RCV 0x00020204 #define RNDIS_OID_GEN_CO_BYTES_XMIT_OUTSTANDING 0x00020205 #define RNDIS_OID_GEN_CO_NETCARD_LOAD 0x00020206 /* * These are objects for Connection-oriented media call-managers. */ #define RNDIS_OID_CO_ADD_PVC 0xFF000001 #define RNDIS_OID_CO_DELETE_PVC 0xFF000002 #define RNDIS_OID_CO_GET_CALL_INFORMATION 0xFF000003 #define RNDIS_OID_CO_ADD_ADDRESS 0xFF000004 #define RNDIS_OID_CO_DELETE_ADDRESS 0xFF000005 #define RNDIS_OID_CO_GET_ADDRESSES 0xFF000006 #define RNDIS_OID_CO_ADDRESS_CHANGE 0xFF000007 #define RNDIS_OID_CO_SIGNALING_ENABLED 0xFF000008 #define RNDIS_OID_CO_SIGNALING_DISABLED 0xFF000009 /* * 802.3 Objects (Ethernet) */ #define RNDIS_OID_802_3_PERMANENT_ADDRESS 0x01010101 #define RNDIS_OID_802_3_CURRENT_ADDRESS 0x01010102 #define RNDIS_OID_802_3_MULTICAST_LIST 0x01010103 #define RNDIS_OID_802_3_MAXIMUM_LIST_SIZE 0x01010104 #define RNDIS_OID_802_3_MAC_OPTIONS 0x01010105 /* * */ #define NDIS_802_3_MAC_OPTION_PRIORITY 0x00000001 #define RNDIS_OID_802_3_RCV_ERROR_ALIGNMENT 0x01020101 #define RNDIS_OID_802_3_XMIT_ONE_COLLISION 0x01020102 #define RNDIS_OID_802_3_XMIT_MORE_COLLISIONS 0x01020103 #define RNDIS_OID_802_3_XMIT_DEFERRED 0x01020201 #define RNDIS_OID_802_3_XMIT_MAX_COLLISIONS 0x01020202 #define RNDIS_OID_802_3_RCV_OVERRUN 0x01020203 #define RNDIS_OID_802_3_XMIT_UNDERRUN 0x01020204 #define RNDIS_OID_802_3_XMIT_HEARTBEAT_FAILURE 0x01020205 #define RNDIS_OID_802_3_XMIT_TIMES_CRS_LOST 0x01020206 #define RNDIS_OID_802_3_XMIT_LATE_COLLISIONS 0x01020207 /* * RNDIS MP custom OID for test */ #define OID_RNDISMP_GET_RECEIVE_BUFFERS 0xFFA0C90D // Query only /* * Remote NDIS offload parameters */ #define RNDIS_OBJECT_TYPE_DEFAULT 0x80 #define RNDIS_OFFLOAD_PARAMETERS_REVISION_3 3 #define RNDIS_OFFLOAD_PARAMETERS_NO_CHANGE 0 #define RNDIS_OFFLOAD_PARAMETERS_LSOV2_DISABLED 1 #define RNDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED 2 #define RNDIS_OFFLOAD_PARAMETERS_LSOV1_ENABLED 2 #define RNDIS_OFFLOAD_PARAMETERS_RSC_DISABLED 1 #define RNDIS_OFFLOAD_PARAMETERS_RSC_ENABLED 2 #define RNDIS_OFFLOAD_PARAMETERS_TX_RX_DISABLED 1 #define RNDIS_OFFLOAD_PARAMETERS_TX_ENABLED_RX_DISABLED 2 #define RNDIS_OFFLOAD_PARAMETERS_RX_ENABLED_TX_DISABLED 3 #define RNDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED 4 #define RNDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE 1 #define RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV4 0 #define RNDIS_TCP_LARGE_SEND_OFFLOAD_IPV6 1 #define RNDIS_OID_TCP_OFFLOAD_CURRENT_CONFIG 0xFC01020B /* query only */ #define RNDIS_OID_TCP_OFFLOAD_PARAMETERS 0xFC01020C /* set only */ #define RNDIS_OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020D/* query only */ #define RNDIS_OID_TCP_CONNECTION_OFFLOAD_CURRENT_CONFIG 0xFC01020E /* query only */ #define RNDIS_OID_TCP_CONNECTION_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020F /* query */ #define RNDIS_OID_OFFLOAD_ENCAPSULATION 0x0101010A /* set/query */ /* * NdisInitialize message */ typedef struct rndis_initialize_request_ { /* RNDIS request ID */ uint32_t request_id; uint32_t major_version; uint32_t minor_version; uint32_t max_xfer_size; } rndis_initialize_request; /* * Response to NdisInitialize */ typedef struct rndis_initialize_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS status */ uint32_t status; uint32_t major_version; uint32_t minor_version; uint32_t device_flags; /* RNDIS medium */ uint32_t medium; uint32_t max_pkts_per_msg; uint32_t max_xfer_size; uint32_t pkt_align_factor; uint32_t af_list_offset; uint32_t af_list_size; } rndis_initialize_complete; /* * Call manager devices only: Information about an address family * supported by the device is appended to the response to NdisInitialize. */ typedef struct rndis_co_address_family_ { /* RNDIS AF */ uint32_t address_family; uint32_t major_version; uint32_t minor_version; } rndis_co_address_family; /* * NdisHalt message */ typedef struct rndis_halt_request_ { /* RNDIS request ID */ uint32_t request_id; } rndis_halt_request; /* * NdisQueryRequest message */ typedef struct rndis_query_request_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS OID */ uint32_t oid; uint32_t info_buffer_length; uint32_t info_buffer_offset; /* RNDIS handle */ uint32_t device_vc_handle; } rndis_query_request; /* * Response to NdisQueryRequest */ typedef struct rndis_query_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS status */ uint32_t status; uint32_t info_buffer_length; uint32_t info_buffer_offset; } rndis_query_complete; /* * NdisSetRequest message */ typedef struct rndis_set_request_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS OID */ uint32_t oid; uint32_t info_buffer_length; uint32_t info_buffer_offset; /* RNDIS handle */ uint32_t device_vc_handle; } rndis_set_request; /* * Response to NdisSetRequest */ typedef struct rndis_set_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS status */ uint32_t status; } rndis_set_complete; /* * NdisReset message */ typedef struct rndis_reset_request_ { uint32_t reserved; } rndis_reset_request; /* * Response to NdisReset */ typedef struct rndis_reset_complete_ { /* RNDIS status */ uint32_t status; uint32_t addressing_reset; } rndis_reset_complete; /* * NdisMIndicateStatus message */ typedef struct rndis_indicate_status_ { /* RNDIS status */ uint32_t status; uint32_t status_buf_length; uint32_t status_buf_offset; } rndis_indicate_status; /* * Diagnostic information passed as the status buffer in * rndis_indicate_status messages signifying error conditions. */ typedef struct rndis_diagnostic_info_ { /* RNDIS status */ uint32_t diag_status; uint32_t error_offset; } rndis_diagnostic_info; /* * NdisKeepAlive message */ typedef struct rndis_keepalive_request_ { /* RNDIS request ID */ uint32_t request_id; } rndis_keepalive_request; /* * Response to NdisKeepAlive */ typedef struct rndis_keepalive_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS status */ uint32_t status; } rndis_keepalive_complete; /* * Data message. All offset fields contain byte offsets from the beginning * of the rndis_packet structure. All length fields are in bytes. * VcHandle is set to 0 for connectionless data, otherwise it * contains the VC handle. */ typedef struct rndis_packet_ { uint32_t data_offset; uint32_t data_length; uint32_t oob_data_offset; uint32_t oob_data_length; uint32_t num_oob_data_elements; uint32_t per_pkt_info_offset; uint32_t per_pkt_info_length; /* RNDIS handle */ uint32_t vc_handle; uint32_t reserved; } rndis_packet; typedef struct rndis_packet_ex_ { uint32_t data_offset; uint32_t data_length; uint32_t oob_data_offset; uint32_t oob_data_length; uint32_t num_oob_data_elements; uint32_t per_pkt_info_offset; uint32_t per_pkt_info_length; /* RNDIS handle */ uint32_t vc_handle; uint32_t reserved; uint64_t data_buf_id; uint32_t data_buf_offset; uint64_t next_header_buf_id; uint32_t next_header_byte_offset; uint32_t next_header_byte_count; } rndis_packet_ex; /* * Optional Out of Band data associated with a Data message. */ typedef struct rndis_oobd_ { uint32_t size; /* RNDIS class ID */ uint32_t type; uint32_t class_info_offset; } rndis_oobd; /* * Packet extension field contents associated with a Data message. */ typedef struct rndis_per_packet_info_ { uint32_t size; uint32_t type; uint32_t per_packet_info_offset; } rndis_per_packet_info; typedef enum ndis_per_pkt_infotype_ { tcpip_chksum_info, ipsec_info, tcp_large_send_info, classification_handle_info, ndis_reserved, sgl_info, ieee_8021q_info, original_pkt_info, pkt_cancel_id, original_netbuf_list, cached_netbuf_list, short_pkt_padding_info, max_perpkt_info } ndis_per_pkt_infotype; #define nbl_hash_value pkt_cancel_id #define nbl_hash_info original_netbuf_list typedef struct ndis_8021q_info_ { union { struct { uint32_t user_pri : 3; /* User Priority */ uint32_t cfi : 1; /* Canonical Format ID */ uint32_t vlan_id : 12; uint32_t reserved : 16; } s1; uint32_t value; } u1; } ndis_8021q_info; struct rndis_object_header { uint8_t type; uint8_t revision; uint16_t size; }; typedef struct rndis_offload_params_ { struct rndis_object_header header; uint8_t ipv4_csum; uint8_t tcp_ipv4_csum; uint8_t udp_ipv4_csum; uint8_t tcp_ipv6_csum; uint8_t udp_ipv6_csum; uint8_t lso_v1; uint8_t ip_sec_v1; uint8_t lso_v2_ipv4; uint8_t lso_v2_ipv6; uint8_t tcp_connection_ipv4; uint8_t tcp_connection_ipv6; uint32_t flags; uint8_t ip_sec_v2; uint8_t ip_sec_v2_ipv4; struct { uint8_t rsc_ipv4; uint8_t rsc_ipv6; }; struct { uint8_t encapsulated_packet_task_offload; uint8_t encapsulation_types; }; } rndis_offload_params; typedef struct rndis_tcp_ip_csum_info_ { union { struct { uint32_t is_ipv4:1; uint32_t is_ipv6:1; uint32_t tcp_csum:1; uint32_t udp_csum:1; uint32_t ip_header_csum:1; uint32_t reserved:11; uint32_t tcp_header_offset:10; } xmit; struct { uint32_t tcp_csum_failed:1; uint32_t udp_csum_failed:1; uint32_t ip_csum_failed:1; uint32_t tcp_csum_succeeded:1; uint32_t udp_csum_succeeded:1; uint32_t ip_csum_succeeded:1; uint32_t loopback:1; uint32_t tcp_csum_value_invalid:1; uint32_t ip_csum_value_invalid:1; } receive; uint32_t value; }; } rndis_tcp_ip_csum_info; struct rndis_hash_value { uint32_t hash_value; } __packed; struct rndis_hash_info { uint32_t hash_info; } __packed; typedef struct rndis_tcp_tso_info_ { union { struct { uint32_t unused:30; uint32_t type:1; uint32_t reserved2:1; } xmit; struct { uint32_t mss:20; uint32_t tcp_header_offset:10; uint32_t type:1; uint32_t reserved2:1; } lso_v1_xmit; struct { uint32_t tcp_payload:30; uint32_t type:1; uint32_t reserved2:1; } lso_v1_xmit_complete; struct { uint32_t mss:20; uint32_t tcp_header_offset:10; uint32_t type:1; uint32_t ip_version:1; } lso_v2_xmit; struct { uint32_t reserved:30; uint32_t type:1; uint32_t reserved2:1; } lso_v2_xmit_complete; uint32_t value; }; } rndis_tcp_tso_info; -#define RNDIS_HASHVAL_PPI_SIZE (sizeof(rndis_per_packet_info) + \ - sizeof(struct rndis_hash_value)) - -#define RNDIS_VLAN_PPI_SIZE (sizeof(rndis_per_packet_info) + \ - sizeof(ndis_8021q_info)) - -#define RNDIS_CSUM_PPI_SIZE (sizeof(rndis_per_packet_info) + \ - sizeof(rndis_tcp_ip_csum_info)) - -#define RNDIS_TSO_PPI_SIZE (sizeof(rndis_per_packet_info) + \ - sizeof(rndis_tcp_tso_info)) - /* * Format of Information buffer passed in a SetRequest for the OID * OID_GEN_RNDIS_CONFIG_PARAMETER. */ typedef struct rndis_config_parameter_info_ { uint32_t parameter_name_offset; uint32_t parameter_name_length; uint32_t parameter_type; uint32_t parameter_value_offset; uint32_t parameter_value_length; } rndis_config_parameter_info; /* * Values for ParameterType in rndis_config_parameter_info */ #define RNDIS_CONFIG_PARAM_TYPE_INTEGER 0 #define RNDIS_CONFIG_PARAM_TYPE_STRING 2 /* * CONDIS Miniport messages for connection oriented devices * that do not implement a call manager. */ /* * CoNdisMiniportCreateVc message */ typedef struct rcondis_mp_create_vc_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS handle */ uint32_t ndis_vc_handle; } rcondis_mp_create_vc; /* * Response to CoNdisMiniportCreateVc */ typedef struct rcondis_mp_create_vc_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS handle */ uint32_t device_vc_handle; /* RNDIS status */ uint32_t status; } rcondis_mp_create_vc_complete; /* * CoNdisMiniportDeleteVc message */ typedef struct rcondis_mp_delete_vc_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS handle */ uint32_t device_vc_handle; } rcondis_mp_delete_vc; /* * Response to CoNdisMiniportDeleteVc */ typedef struct rcondis_mp_delete_vc_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS status */ uint32_t status; } rcondis_mp_delete_vc_complete; /* * CoNdisMiniportQueryRequest message */ typedef struct rcondis_mp_query_request_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS request type */ uint32_t request_type; /* RNDIS OID */ uint32_t oid; /* RNDIS handle */ uint32_t device_vc_handle; uint32_t info_buf_length; uint32_t info_buf_offset; } rcondis_mp_query_request; /* * CoNdisMiniportSetRequest message */ typedef struct rcondis_mp_set_request_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS request type */ uint32_t request_type; /* RNDIS OID */ uint32_t oid; /* RNDIS handle */ uint32_t device_vc_handle; uint32_t info_buf_length; uint32_t info_buf_offset; } rcondis_mp_set_request; /* * CoNdisIndicateStatus message */ typedef struct rcondis_indicate_status_ { /* RNDIS handle */ uint32_t ndis_vc_handle; /* RNDIS status */ uint32_t status; uint32_t status_buf_length; uint32_t status_buf_offset; } rcondis_indicate_status; /* * CONDIS Call/VC parameters */ typedef struct rcondis_specific_parameters_ { uint32_t parameter_type; uint32_t parameter_length; uint32_t parameter_offset; } rcondis_specific_parameters; typedef struct rcondis_media_parameters_ { uint32_t flags; uint32_t reserved1; uint32_t reserved2; rcondis_specific_parameters media_specific; } rcondis_media_parameters; typedef struct rndis_flowspec_ { uint32_t token_rate; uint32_t token_bucket_size; uint32_t peak_bandwidth; uint32_t latency; uint32_t delay_variation; uint32_t service_type; uint32_t max_sdu_size; uint32_t minimum_policed_size; } rndis_flowspec; typedef struct rcondis_call_manager_parameters_ { rndis_flowspec transmit; rndis_flowspec receive; rcondis_specific_parameters call_mgr_specific; } rcondis_call_manager_parameters; /* * CoNdisMiniportActivateVc message */ typedef struct rcondis_mp_activate_vc_request_ { /* RNDIS request ID */ uint32_t request_id; uint32_t flags; /* RNDIS handle */ uint32_t device_vc_handle; uint32_t media_params_offset; uint32_t media_params_length; uint32_t call_mgr_params_offset; uint32_t call_mgr_params_length; } rcondis_mp_activate_vc_request; /* * Response to CoNdisMiniportActivateVc */ typedef struct rcondis_mp_activate_vc_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS status */ uint32_t status; } rcondis_mp_activate_vc_complete; /* * CoNdisMiniportDeactivateVc message */ typedef struct rcondis_mp_deactivate_vc_request_ { /* RNDIS request ID */ uint32_t request_id; uint32_t flags; /* RNDIS handle */ uint32_t device_vc_handle; } rcondis_mp_deactivate_vc_request; /* * Response to CoNdisMiniportDeactivateVc */ typedef struct rcondis_mp_deactivate_vc_complete_ { /* RNDIS request ID */ uint32_t request_id; /* RNDIS status */ uint32_t status; } rcondis_mp_deactivate_vc_complete; /* * union with all of the RNDIS messages */ typedef union rndis_msg_container_ { rndis_packet packet; rndis_initialize_request init_request; rndis_halt_request halt_request; rndis_query_request query_request; rndis_set_request set_request; rndis_reset_request reset_request; rndis_keepalive_request keepalive_request; rndis_indicate_status indicate_status; rndis_initialize_complete init_complete; rndis_query_complete query_complete; rndis_set_complete set_complete; rndis_reset_complete reset_complete; rndis_keepalive_complete keepalive_complete; rcondis_mp_create_vc co_miniport_create_vc; rcondis_mp_delete_vc co_miniport_delete_vc; rcondis_indicate_status co_miniport_status; rcondis_mp_activate_vc_request co_miniport_activate_vc; rcondis_mp_deactivate_vc_request co_miniport_deactivate_vc; rcondis_mp_create_vc_complete co_miniport_create_vc_complete; rcondis_mp_delete_vc_complete co_miniport_delete_vc_complete; rcondis_mp_activate_vc_complete co_miniport_activate_vc_complete; rcondis_mp_deactivate_vc_complete co_miniport_deactivate_vc_complete; rndis_packet_ex packet_ex; } rndis_msg_container; /* * Remote NDIS message format */ typedef struct rndis_msg_ { uint32_t ndis_msg_type; /* * Total length of this message, from the beginning * of the rndis_msg struct, in bytes. */ uint32_t msg_len; /* Actual message */ rndis_msg_container msg; } rndis_msg; /* * Handy macros */ /* * get the size of an RNDIS message. Pass in the message type, * rndis_set_request, rndis_packet for example */ #define RNDIS_MESSAGE_SIZE(message) \ (sizeof(message) + (sizeof(rndis_msg) - sizeof(rndis_msg_container))) /* * get pointer to info buffer with message pointer */ #define MESSAGE_TO_INFO_BUFFER(message) \ (((PUCHAR)(message)) + message->InformationBufferOffset) /* * get pointer to status buffer with message pointer */ #define MESSAGE_TO_STATUS_BUFFER(message) \ (((PUCHAR)(message)) + message->StatusBufferOffset) /* * get pointer to OOBD buffer with message pointer */ #define MESSAGE_TO_OOBD_BUFFER(message) \ (((PUCHAR)(message)) + message->OOBDataOffset) /* * get pointer to data buffer with message pointer */ #define MESSAGE_TO_DATA_BUFFER(message) \ (((PUCHAR)(message)) + message->PerPacketInfoOffset) /* * get pointer to contained message from NDIS_MESSAGE pointer */ #define RNDIS_MESSAGE_PTR_TO_MESSAGE_PTR(rndis_message) \ ((void *) &rndis_message->Message) /* * get pointer to contained message from NDIS_MESSAGE pointer */ #define RNDIS_MESSAGE_RAW_PTR_TO_MESSAGE_PTR(rndis_message) \ ((void *) rndis_message) /* * Structures used in OID_RNDISMP_GET_RECEIVE_BUFFERS */ #define RNDISMP_RECEIVE_BUFFER_ELEM_FLAG_VMQ_RECEIVE_BUFFER 0x00000001 typedef struct rndismp_rx_buf_elem_ { uint32_t flags; uint32_t length; uint64_t rx_buf_id; uint32_t gpadl_handle; void *rx_buf; } rndismp_rx_buf_elem; typedef struct rndismp_rx_bufs_info_ { uint32_t num_rx_bufs; rndismp_rx_buf_elem rx_buf_elems[1]; } rndismp_rx_bufs_info; #define RNDIS_HEADER_SIZE (sizeof(rndis_msg) - sizeof(rndis_msg_container)) /* * Externs */ struct hn_rx_ring; struct hn_tx_ring; struct hn_recvinfo; int netvsc_recv(struct hn_rx_ring *rxr, const void *data, int dlen, const struct hn_recvinfo *info); void netvsc_channel_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr); - -void* hv_set_rppi_data(struct rndis_packet_msg *pkt, uint32_t rppi_size, - int pkt_type); #endif /* __HV_RNDIS_H__ */ Index: head/sys/dev/hyperv/netvsc/hv_rndis_filter.c =================================================================== --- head/sys/dev/hyperv/netvsc/hv_rndis_filter.c (revision 305524) +++ head/sys/dev/hyperv/netvsc/hv_rndis_filter.c (revision 305525) @@ -1,1189 +1,1193 @@ /*- * Copyright (c) 2009-2012,2016 Microsoft Corp. * Copyright (c) 2010-2012 Citrix Inc. * Copyright (c) 2012 NetApp Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define HV_RF_RECVINFO_VLAN 0x1 #define HV_RF_RECVINFO_CSUM 0x2 #define HV_RF_RECVINFO_HASHINF 0x4 #define HV_RF_RECVINFO_HASHVAL 0x8 #define HV_RF_RECVINFO_ALL \ (HV_RF_RECVINFO_VLAN | \ HV_RF_RECVINFO_CSUM | \ HV_RF_RECVINFO_HASHINF | \ HV_RF_RECVINFO_HASHVAL) #define HN_RNDIS_RID_COMPAT_MASK 0xffff #define HN_RNDIS_RID_COMPAT_MAX HN_RNDIS_RID_COMPAT_MASK #define HN_RNDIS_XFER_SIZE 2048 /* * Forward declarations */ static void hv_rf_receive_indicate_status(struct hn_softc *sc, const void *data, int dlen); static void hv_rf_receive_data(struct hn_rx_ring *rxr, const void *data, int dlen); static int hv_rf_query_device_mac(struct hn_softc *sc, uint8_t *eaddr); static int hv_rf_query_device_link_status(struct hn_softc *sc, uint32_t *link_status); static int hv_rf_init_device(struct hn_softc *sc); static int hn_rndis_query(struct hn_softc *sc, uint32_t oid, const void *idata, size_t idlen, void *odata, size_t *odlen0); static int hn_rndis_set(struct hn_softc *sc, uint32_t oid, const void *data, size_t dlen); static int hn_rndis_conf_offload(struct hn_softc *sc); static int hn_rndis_get_rsscaps(struct hn_softc *sc, int *rxr_cnt); static int hn_rndis_conf_rss(struct hn_softc *sc, int nchan); static __inline uint32_t hn_rndis_rid(struct hn_softc *sc) { uint32_t rid; again: rid = atomic_fetchadd_int(&sc->hn_rndis_rid, 1); if (rid == 0) goto again; /* Use upper 16 bits for non-compat RNDIS messages. */ return ((rid & 0xffff) << 16); } -/* - * Set the Per-Packet-Info with the specified type - */ void * -hv_set_rppi_data(struct rndis_packet_msg *pkt, uint32_t rppi_size, int pkt_type) +hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize, + size_t pi_dlen, uint32_t pi_type) { - rndis_per_packet_info *rppi; + const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen); + struct rndis_pktinfo *pi; - /* Data immediately follow per-packet-info. */ - pkt->rm_dataoffset += rppi_size; + KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0, + ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen)); - /* Update RNDIS packet msg length */ - pkt->rm_len += rppi_size; - /* * Per-packet-info does not move; it only grows. * * NOTE: * rm_pktinfooffset in this phase counts from the beginning * of rndis_packet_msg. */ - rppi = (rndis_per_packet_info *)((uint8_t *)pkt + - pkt->rm_pktinfooffset + pkt->rm_pktinfolen); - pkt->rm_pktinfolen += rppi_size; + KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize, + ("%u pktinfo overflows RNDIS packet msg", pi_type)); + pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset + + pkt->rm_pktinfolen); + pkt->rm_pktinfolen += pi_size; - rppi->size = rppi_size; - rppi->type = pkt_type; - rppi->per_packet_info_offset = sizeof(rndis_per_packet_info); + pi->rm_size = pi_size; + pi->rm_type = pi_type; + pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET; - return (rppi); + /* Data immediately follow per-packet-info. */ + pkt->rm_dataoffset += pi_size; + + /* Update RNDIS packet msg length */ + pkt->rm_len += pi_size; + + return (pi->rm_data); } /* * RNDIS filter receive indicate status */ static void hv_rf_receive_indicate_status(struct hn_softc *sc, const void *data, int dlen) { const struct rndis_status_msg *msg; if (dlen < sizeof(*msg)) { if_printf(sc->hn_ifp, "invalid RNDIS status\n"); return; } msg = data; switch (msg->rm_status) { case RNDIS_STATUS_MEDIA_CONNECT: netvsc_linkstatus_callback(sc, 1); break; case RNDIS_STATUS_MEDIA_DISCONNECT: netvsc_linkstatus_callback(sc, 0); break; default: /* TODO: */ if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n", msg->rm_status); break; } } static int hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_recvinfo *info) { const struct rndis_pktinfo *pi = info_data; uint32_t mask = 0; while (info_dlen != 0) { const void *data; uint32_t dlen; if (__predict_false(info_dlen < sizeof(*pi))) return (EINVAL); if (__predict_false(info_dlen < pi->rm_size)) return (EINVAL); info_dlen -= pi->rm_size; if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) return (EINVAL); if (__predict_false(pi->rm_size < pi->rm_pktinfooffset)) return (EINVAL); dlen = pi->rm_size - pi->rm_pktinfooffset; data = pi->rm_data; switch (pi->rm_type) { case NDIS_PKTINFO_TYPE_VLAN: if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE)) return (EINVAL); info->vlan_info = *((const uint32_t *)data); mask |= HV_RF_RECVINFO_VLAN; break; case NDIS_PKTINFO_TYPE_CSUM: if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE)) return (EINVAL); info->csum_info = *((const uint32_t *)data); mask |= HV_RF_RECVINFO_CSUM; break; case HN_NDIS_PKTINFO_TYPE_HASHVAL: if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE)) return (EINVAL); info->hash_value = *((const uint32_t *)data); mask |= HV_RF_RECVINFO_HASHVAL; break; case HN_NDIS_PKTINFO_TYPE_HASHINF: if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE)) return (EINVAL); info->hash_info = *((const uint32_t *)data); mask |= HV_RF_RECVINFO_HASHINF; break; default: goto next; } if (mask == HV_RF_RECVINFO_ALL) { /* All found; done */ break; } next: pi = (const struct rndis_pktinfo *) ((const uint8_t *)pi + pi->rm_size); } /* * Final fixup. * - If there is no hash value, invalidate the hash info. */ if ((mask & HV_RF_RECVINFO_HASHVAL) == 0) info->hash_info = HN_NDIS_HASH_INFO_INVALID; return (0); } static __inline bool hn_rndis_check_overlap(int off, int len, int check_off, int check_len) { if (off < check_off) { if (__predict_true(off + len <= check_off)) return (false); } else if (off > check_off) { if (__predict_true(check_off + check_len <= off)) return (false); } return (true); } /* * RNDIS filter receive data */ static void hv_rf_receive_data(struct hn_rx_ring *rxr, const void *data, int dlen) { const struct rndis_packet_msg *pkt; struct hn_recvinfo info; int data_off, pktinfo_off, data_len, pktinfo_len; /* * Check length. */ if (__predict_false(dlen < sizeof(*pkt))) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n"); return; } pkt = data; if (__predict_false(dlen < pkt->rm_len)) { if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, " "dlen %d, msglen %u\n", dlen, pkt->rm_len); return; } if (__predict_false(pkt->rm_len < pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, " "msglen %u, data %u, oob %u, pktinfo %u\n", pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen, pkt->rm_pktinfolen); return; } if (__predict_false(pkt->rm_datalen == 0)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n"); return; } /* * Check offests. */ #define IS_OFFSET_INVALID(ofs) \ ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \ ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK)) /* XXX Hyper-V does not meet data offset alignment requirement */ if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "data offset %u\n", pkt->rm_dataoffset); return; } if (__predict_false(pkt->rm_oobdataoffset > 0 && IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "oob offset %u\n", pkt->rm_oobdataoffset); return; } if (__predict_true(pkt->rm_pktinfooffset > 0) && __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "pktinfo offset %u\n", pkt->rm_pktinfooffset); return; } #undef IS_OFFSET_INVALID data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset); data_len = pkt->rm_datalen; pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset); pktinfo_len = pkt->rm_pktinfolen; /* * Check OOB coverage. */ if (__predict_false(pkt->rm_oobdatalen != 0)) { int oob_off, oob_len; if_printf(rxr->hn_ifp, "got oobdata\n"); oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset); oob_len = pkt->rm_oobdatalen; if (__predict_false(oob_off + oob_len > pkt->rm_len)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "oob overflow, msglen %u, oob abs %d len %d\n", pkt->rm_len, oob_off, oob_len); return; } /* * Check against data. */ if (hn_rndis_check_overlap(oob_off, oob_len, data_off, data_len)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "oob overlaps data, oob abs %d len %d, " "data abs %d len %d\n", oob_off, oob_len, data_off, data_len); return; } /* * Check against pktinfo. */ if (pktinfo_len != 0 && hn_rndis_check_overlap(oob_off, oob_len, pktinfo_off, pktinfo_len)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "oob overlaps pktinfo, oob abs %d len %d, " "pktinfo abs %d len %d\n", oob_off, oob_len, pktinfo_off, pktinfo_len); return; } } /* * Check per-packet-info coverage and find useful per-packet-info. */ info.vlan_info = HN_NDIS_VLAN_INFO_INVALID; info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID; info.hash_info = HN_NDIS_HASH_INFO_INVALID; if (__predict_true(pktinfo_len != 0)) { bool overlap; int error; if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "pktinfo overflow, msglen %u, " "pktinfo abs %d len %d\n", pkt->rm_len, pktinfo_off, pktinfo_len); return; } /* * Check packet info coverage. */ overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len, data_off, data_len); if (__predict_false(overlap)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "pktinfo overlap data, pktinfo abs %d len %d, " "data abs %d len %d\n", pktinfo_off, pktinfo_len, data_off, data_len); return; } /* * Find useful per-packet-info. */ error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off, pktinfo_len, &info); if (__predict_false(error)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg " "pktinfo\n"); return; } } if (__predict_false(data_off + data_len > pkt->rm_len)) { if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, " "data overflow, msglen %u, data abs %d len %d\n", pkt->rm_len, data_off, data_len); return; } netvsc_recv(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info); } /* * RNDIS filter on receive */ void hv_rf_on_receive(struct hn_softc *sc, struct hn_rx_ring *rxr, const void *data, int dlen) { const struct rndis_comp_hdr *comp; const struct rndis_msghdr *hdr; if (__predict_false(dlen < sizeof(*hdr))) { if_printf(rxr->hn_ifp, "invalid RNDIS msg\n"); return; } hdr = data; switch (hdr->rm_type) { case REMOTE_NDIS_PACKET_MSG: hv_rf_receive_data(rxr, data, dlen); break; case REMOTE_NDIS_INITIALIZE_CMPLT: case REMOTE_NDIS_QUERY_CMPLT: case REMOTE_NDIS_SET_CMPLT: case REMOTE_NDIS_KEEPALIVE_CMPLT: /* unused */ if (dlen < sizeof(*comp)) { if_printf(rxr->hn_ifp, "invalid RNDIS cmplt\n"); return; } comp = data; KASSERT(comp->rm_rid > HN_RNDIS_RID_COMPAT_MAX, ("invalid RNDIS rid 0x%08x\n", comp->rm_rid)); vmbus_xact_ctx_wakeup(sc->hn_xact, comp, dlen); break; case REMOTE_NDIS_INDICATE_STATUS_MSG: hv_rf_receive_indicate_status(sc, data, dlen); break; case REMOTE_NDIS_RESET_CMPLT: /* * Reset completed, no rid. * * NOTE: * RESET is not issued by hn(4), so this message should * _not_ be observed. */ if_printf(rxr->hn_ifp, "RESET cmplt received\n"); break; default: if_printf(rxr->hn_ifp, "unknown RNDIS msg 0x%x\n", hdr->rm_type); break; } } /* * RNDIS filter query device MAC address */ static int hv_rf_query_device_mac(struct hn_softc *sc, uint8_t *eaddr) { size_t eaddr_len; int error; eaddr_len = ETHER_ADDR_LEN; error = hn_rndis_query(sc, OID_802_3_PERMANENT_ADDRESS, NULL, 0, eaddr, &eaddr_len); if (error) return (error); if (eaddr_len != ETHER_ADDR_LEN) { if_printf(sc->hn_ifp, "invalid eaddr len %zu\n", eaddr_len); return (EINVAL); } return (0); } /* * RNDIS filter query device link status */ static int hv_rf_query_device_link_status(struct hn_softc *sc, uint32_t *link_status) { size_t size; int error; size = sizeof(*link_status); error = hn_rndis_query(sc, OID_GEN_MEDIA_CONNECT_STATUS, NULL, 0, link_status, &size); if (error) return (error); if (size != sizeof(uint32_t)) { if_printf(sc->hn_ifp, "invalid link status len %zu\n", size); return (EINVAL); } return (0); } static uint8_t netvsc_hash_key[NDIS_HASH_KEYSIZE_TOEPLITZ] = { 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa }; static const void * hn_rndis_xact_exec1(struct hn_softc *sc, struct vmbus_xact *xact, size_t reqlen, struct hn_send_ctx *sndc, size_t *comp_len) { struct vmbus_gpa gpa[HN_XACT_REQ_PGCNT]; int gpa_cnt, error; bus_addr_t paddr; KASSERT(reqlen <= HN_XACT_REQ_SIZE && reqlen > 0, ("invalid request length %zu", reqlen)); /* * Setup the SG list. */ paddr = vmbus_xact_req_paddr(xact); KASSERT((paddr & PAGE_MASK) == 0, ("vmbus xact request is not page aligned 0x%jx", (uintmax_t)paddr)); for (gpa_cnt = 0; gpa_cnt < HN_XACT_REQ_PGCNT; ++gpa_cnt) { int len = PAGE_SIZE; if (reqlen == 0) break; if (reqlen < len) len = reqlen; gpa[gpa_cnt].gpa_page = atop(paddr) + gpa_cnt; gpa[gpa_cnt].gpa_len = len; gpa[gpa_cnt].gpa_ofs = 0; reqlen -= len; } KASSERT(reqlen == 0, ("still have %zu request data left", reqlen)); /* * Send this RNDIS control message and wait for its completion * message. */ vmbus_xact_activate(xact); error = hv_nv_on_send(sc->hn_prichan, HN_NVS_RNDIS_MTYPE_CTRL, sndc, gpa, gpa_cnt); if (error) { vmbus_xact_deactivate(xact); if_printf(sc->hn_ifp, "RNDIS ctrl send failed: %d\n", error); return (NULL); } return (vmbus_xact_wait(xact, comp_len)); } static const void * hn_rndis_xact_execute(struct hn_softc *sc, struct vmbus_xact *xact, uint32_t rid, size_t reqlen, size_t *comp_len0, uint32_t comp_type) { const struct rndis_comp_hdr *comp; size_t comp_len, min_complen = *comp_len0; KASSERT(rid > HN_RNDIS_RID_COMPAT_MAX, ("invalid rid %u\n", rid)); KASSERT(min_complen >= sizeof(*comp), ("invalid minimum complete len %zu", min_complen)); /* * Execute the xact setup by the caller. */ comp = hn_rndis_xact_exec1(sc, xact, reqlen, &hn_send_ctx_none, &comp_len); if (comp == NULL) return (NULL); /* * Check this RNDIS complete message. */ if (comp_len < min_complen) { if (comp_len >= sizeof(*comp)) { /* rm_status field is valid */ if_printf(sc->hn_ifp, "invalid RNDIS comp len %zu, " "status 0x%08x\n", comp_len, comp->rm_status); } else { if_printf(sc->hn_ifp, "invalid RNDIS comp len %zu\n", comp_len); } return (NULL); } if (comp->rm_len < min_complen) { if_printf(sc->hn_ifp, "invalid RNDIS comp msglen %u\n", comp->rm_len); return (NULL); } if (comp->rm_type != comp_type) { if_printf(sc->hn_ifp, "unexpected RNDIS comp 0x%08x, " "expect 0x%08x\n", comp->rm_type, comp_type); return (NULL); } if (comp->rm_rid != rid) { if_printf(sc->hn_ifp, "RNDIS comp rid mismatch %u, " "expect %u\n", comp->rm_rid, rid); return (NULL); } /* All pass! */ *comp_len0 = comp_len; return (comp); } static int hn_rndis_query(struct hn_softc *sc, uint32_t oid, const void *idata, size_t idlen, void *odata, size_t *odlen0) { struct rndis_query_req *req; const struct rndis_query_comp *comp; struct vmbus_xact *xact; size_t reqlen, odlen = *odlen0, comp_len; int error, ofs; uint32_t rid; reqlen = sizeof(*req) + idlen; xact = vmbus_xact_get(sc->hn_xact, reqlen); if (xact == NULL) { if_printf(sc->hn_ifp, "no xact for RNDIS query 0x%08x\n", oid); return (ENXIO); } rid = hn_rndis_rid(sc); req = vmbus_xact_req_data(xact); req->rm_type = REMOTE_NDIS_QUERY_MSG; req->rm_len = reqlen; req->rm_rid = rid; req->rm_oid = oid; /* * XXX * This is _not_ RNDIS Spec conforming: * "This MUST be set to 0 when there is no input data * associated with the OID." * * If this field was set to 0 according to the RNDIS Spec, * Hyper-V would set non-SUCCESS status in the query * completion. */ req->rm_infobufoffset = RNDIS_QUERY_REQ_INFOBUFOFFSET; if (idlen > 0) { req->rm_infobuflen = idlen; /* Input data immediately follows RNDIS query. */ memcpy(req + 1, idata, idlen); } comp_len = sizeof(*comp) + odlen; comp = hn_rndis_xact_execute(sc, xact, rid, reqlen, &comp_len, REMOTE_NDIS_QUERY_CMPLT); if (comp == NULL) { if_printf(sc->hn_ifp, "exec RNDIS query 0x%08x failed\n", oid); error = EIO; goto done; } if (comp->rm_status != RNDIS_STATUS_SUCCESS) { if_printf(sc->hn_ifp, "RNDIS query 0x%08x failed: " "status 0x%08x\n", oid, comp->rm_status); error = EIO; goto done; } if (comp->rm_infobuflen == 0 || comp->rm_infobufoffset == 0) { /* No output data! */ if_printf(sc->hn_ifp, "RNDIS query 0x%08x, no data\n", oid); *odlen0 = 0; error = 0; goto done; } /* * Check output data length and offset. */ /* ofs is the offset from the beginning of comp. */ ofs = RNDIS_QUERY_COMP_INFOBUFOFFSET_ABS(comp->rm_infobufoffset); if (ofs < sizeof(*comp) || ofs + comp->rm_infobuflen > comp_len) { if_printf(sc->hn_ifp, "RNDIS query invalid comp ib off/len, " "%u/%u\n", comp->rm_infobufoffset, comp->rm_infobuflen); error = EINVAL; goto done; } /* * Save output data. */ if (comp->rm_infobuflen < odlen) odlen = comp->rm_infobuflen; memcpy(odata, ((const uint8_t *)comp) + ofs, odlen); *odlen0 = odlen; error = 0; done: vmbus_xact_put(xact); return (error); } static int hn_rndis_get_rsscaps(struct hn_softc *sc, int *rxr_cnt) { struct ndis_rss_caps in, caps; size_t caps_len; int error; /* * Only NDIS 6.30+ is supported. */ KASSERT(sc->hn_ndis_ver >= NDIS_VERSION_6_30, ("NDIS 6.30+ is required, NDIS version 0x%08x", sc->hn_ndis_ver)); *rxr_cnt = 0; memset(&in, 0, sizeof(in)); in.ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_CAPS; in.ndis_hdr.ndis_rev = NDIS_RSS_CAPS_REV_2; in.ndis_hdr.ndis_size = NDIS_RSS_CAPS_SIZE; caps_len = NDIS_RSS_CAPS_SIZE; error = hn_rndis_query(sc, OID_GEN_RECEIVE_SCALE_CAPABILITIES, &in, NDIS_RSS_CAPS_SIZE, &caps, &caps_len); if (error) return (error); if (caps_len < NDIS_RSS_CAPS_SIZE_6_0) { if_printf(sc->hn_ifp, "invalid NDIS RSS caps len %zu", caps_len); return (EINVAL); } if (caps.ndis_nrxr == 0) { if_printf(sc->hn_ifp, "0 RX rings!?\n"); return (EINVAL); } *rxr_cnt = caps.ndis_nrxr; if (caps_len == NDIS_RSS_CAPS_SIZE) { if (bootverbose) { if_printf(sc->hn_ifp, "RSS indirect table size %u\n", caps.ndis_nind); } } return (0); } static int hn_rndis_set(struct hn_softc *sc, uint32_t oid, const void *data, size_t dlen) { struct rndis_set_req *req; const struct rndis_set_comp *comp; struct vmbus_xact *xact; size_t reqlen, comp_len; uint32_t rid; int error; KASSERT(dlen > 0, ("invalid dlen %zu", dlen)); reqlen = sizeof(*req) + dlen; xact = vmbus_xact_get(sc->hn_xact, reqlen); if (xact == NULL) { if_printf(sc->hn_ifp, "no xact for RNDIS set 0x%08x\n", oid); return (ENXIO); } rid = hn_rndis_rid(sc); req = vmbus_xact_req_data(xact); req->rm_type = REMOTE_NDIS_SET_MSG; req->rm_len = reqlen; req->rm_rid = rid; req->rm_oid = oid; req->rm_infobuflen = dlen; req->rm_infobufoffset = RNDIS_SET_REQ_INFOBUFOFFSET; /* Data immediately follows RNDIS set. */ memcpy(req + 1, data, dlen); comp_len = sizeof(*comp); comp = hn_rndis_xact_execute(sc, xact, rid, reqlen, &comp_len, REMOTE_NDIS_SET_CMPLT); if (comp == NULL) { if_printf(sc->hn_ifp, "exec RNDIS set 0x%08x failed\n", oid); error = EIO; goto done; } if (comp->rm_status != RNDIS_STATUS_SUCCESS) { if_printf(sc->hn_ifp, "RNDIS set 0x%08x failed: " "status 0x%08x\n", oid, comp->rm_status); error = EIO; goto done; } error = 0; done: vmbus_xact_put(xact); return (error); } static int hn_rndis_conf_offload(struct hn_softc *sc) { struct ndis_offload_params params; size_t paramsz; int error; /* NOTE: 0 means "no change" */ memset(¶ms, 0, sizeof(params)); params.ndis_hdr.ndis_type = NDIS_OBJTYPE_DEFAULT; if (sc->hn_ndis_ver < NDIS_VERSION_6_30) { params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_2; paramsz = NDIS_OFFLOAD_PARAMS_SIZE_6_1; } else { params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_3; paramsz = NDIS_OFFLOAD_PARAMS_SIZE; } params.ndis_hdr.ndis_size = paramsz; params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TXRX; params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TXRX; params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TXRX; if (sc->hn_ndis_ver >= NDIS_VERSION_6_30) { params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TXRX; params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TXRX; } params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_ON; /* XXX ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_ON */ error = hn_rndis_set(sc, OID_TCP_OFFLOAD_PARAMETERS, ¶ms, paramsz); if (error) { if_printf(sc->hn_ifp, "offload config failed: %d\n", error); } else { if (bootverbose) if_printf(sc->hn_ifp, "offload config done\n"); } return (error); } static int hn_rndis_conf_rss(struct hn_softc *sc, int nchan) { struct ndis_rssprm_toeplitz *rss = &sc->hn_rss; struct ndis_rss_params *prm = &rss->rss_params; int i, error; /* * Only NDIS 6.30+ is supported. */ KASSERT(sc->hn_ndis_ver >= NDIS_VERSION_6_30, ("NDIS 6.30+ is required, NDIS version 0x%08x", sc->hn_ndis_ver)); memset(rss, 0, sizeof(*rss)); prm->ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_PARAMS; prm->ndis_hdr.ndis_rev = NDIS_RSS_PARAMS_REV_2; prm->ndis_hdr.ndis_size = sizeof(*rss); prm->ndis_hash = NDIS_HASH_FUNCTION_TOEPLITZ | NDIS_HASH_IPV4 | NDIS_HASH_TCP_IPV4 | NDIS_HASH_IPV6 | NDIS_HASH_TCP_IPV6; /* TODO: Take ndis_rss_caps.ndis_nind into account */ prm->ndis_indsize = sizeof(rss->rss_ind); prm->ndis_indoffset = __offsetof(struct ndis_rssprm_toeplitz, rss_ind[0]); prm->ndis_keysize = sizeof(rss->rss_key); prm->ndis_keyoffset = __offsetof(struct ndis_rssprm_toeplitz, rss_key[0]); /* Setup RSS key */ memcpy(rss->rss_key, netvsc_hash_key, sizeof(rss->rss_key)); /* Setup RSS indirect table */ /* TODO: Take ndis_rss_caps.ndis_nind into account */ for (i = 0; i < NDIS_HASH_INDCNT; ++i) rss->rss_ind[i] = i % nchan; error = hn_rndis_set(sc, OID_GEN_RECEIVE_SCALE_PARAMETERS, rss, sizeof(*rss)); if (error) { if_printf(sc->hn_ifp, "RSS config failed: %d\n", error); } else { if (bootverbose) if_printf(sc->hn_ifp, "RSS config done\n"); } return (error); } static int hn_rndis_set_rxfilter(struct hn_softc *sc, uint32_t filter) { int error; error = hn_rndis_set(sc, OID_GEN_CURRENT_PACKET_FILTER, &filter, sizeof(filter)); if (error) { if_printf(sc->hn_ifp, "set RX filter 0x%08x failed: %d\n", filter, error); } else { if (bootverbose) { if_printf(sc->hn_ifp, "set RX filter 0x%08x done\n", filter); } } return (error); } /* * RNDIS filter init device */ static int hv_rf_init_device(struct hn_softc *sc) { struct rndis_init_req *req; const struct rndis_init_comp *comp; struct vmbus_xact *xact; size_t comp_len; uint32_t rid; int error; xact = vmbus_xact_get(sc->hn_xact, sizeof(*req)); if (xact == NULL) { if_printf(sc->hn_ifp, "no xact for RNDIS init\n"); return (ENXIO); } rid = hn_rndis_rid(sc); req = vmbus_xact_req_data(xact); req->rm_type = REMOTE_NDIS_INITIALIZE_MSG; req->rm_len = sizeof(*req); req->rm_rid = rid; req->rm_ver_major = RNDIS_VERSION_MAJOR; req->rm_ver_minor = RNDIS_VERSION_MINOR; req->rm_max_xfersz = HN_RNDIS_XFER_SIZE; comp_len = RNDIS_INIT_COMP_SIZE_MIN; comp = hn_rndis_xact_execute(sc, xact, rid, sizeof(*req), &comp_len, REMOTE_NDIS_INITIALIZE_CMPLT); if (comp == NULL) { if_printf(sc->hn_ifp, "exec RNDIS init failed\n"); error = EIO; goto done; } if (comp->rm_status != RNDIS_STATUS_SUCCESS) { if_printf(sc->hn_ifp, "RNDIS init failed: status 0x%08x\n", comp->rm_status); error = EIO; goto done; } if (bootverbose) { if_printf(sc->hn_ifp, "RNDIS ver %u.%u, pktsz %u, pktcnt %u, " "align %u\n", comp->rm_ver_major, comp->rm_ver_minor, comp->rm_pktmaxsz, comp->rm_pktmaxcnt, 1U << comp->rm_align); } error = 0; done: vmbus_xact_put(xact); return (error); } /* * RNDIS filter halt device */ static int hv_rf_halt_device(struct hn_softc *sc) { struct vmbus_xact *xact; struct rndis_halt_req *halt; struct hn_send_ctx sndc; size_t comp_len; xact = vmbus_xact_get(sc->hn_xact, sizeof(*halt)); if (xact == NULL) { if_printf(sc->hn_ifp, "no xact for RNDIS halt\n"); return (ENXIO); } halt = vmbus_xact_req_data(xact); halt->rm_type = REMOTE_NDIS_HALT_MSG; halt->rm_len = sizeof(*halt); halt->rm_rid = hn_rndis_rid(sc); /* No RNDIS completion; rely on NVS message send completion */ hn_send_ctx_init_simple(&sndc, hn_nvs_sent_xact, xact); hn_rndis_xact_exec1(sc, xact, sizeof(*halt), &sndc, &comp_len); vmbus_xact_put(xact); if (bootverbose) if_printf(sc->hn_ifp, "RNDIS halt done\n"); return (0); } /* * RNDIS filter on device add */ int hv_rf_on_device_add(struct hn_softc *sc, void *additl_info, int *nchan0, struct hn_rx_ring *rxr) { int ret; netvsc_device_info *dev_info = (netvsc_device_info *)additl_info; device_t dev = sc->hn_dev; struct hn_nvs_subch_req *req; const struct hn_nvs_subch_resp *resp; size_t resp_len; struct vmbus_xact *xact = NULL; uint32_t status, nsubch; int nchan = *nchan0; int rxr_cnt; /* * Let the inner driver handle this first to create the netvsc channel * NOTE! Once the channel is created, we may get a receive callback * (hv_rf_on_receive()) before this call is completed. * Note: Earlier code used a function pointer here. */ ret = hv_nv_on_device_add(sc, rxr); if (ret != 0) return (ret); /* * Initialize the rndis device */ /* Send the rndis initialization message */ ret = hv_rf_init_device(sc); if (ret != 0) { /* * TODO: If rndis init failed, we will need to shut down * the channel */ } /* Get the mac address */ ret = hv_rf_query_device_mac(sc, dev_info->mac_addr); if (ret != 0) { /* TODO: shut down rndis device and the channel */ } /* Configure NDIS offload settings */ hn_rndis_conf_offload(sc); hv_rf_query_device_link_status(sc, &dev_info->link_state); if (sc->hn_ndis_ver < NDIS_VERSION_6_30 || nchan == 1) { /* * Either RSS is not supported, or multiple RX/TX rings * are not requested. */ *nchan0 = 1; return (0); } /* * Get RSS capabilities, e.g. # of RX rings, and # of indirect * table entries. */ ret = hn_rndis_get_rsscaps(sc, &rxr_cnt); if (ret) { /* No RSS; this is benign. */ *nchan0 = 1; return (0); } if (nchan > rxr_cnt) nchan = rxr_cnt; if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n", rxr_cnt, nchan); if (nchan == 1) { device_printf(dev, "only 1 channel is supported, no vRSS\n"); goto out; } /* * Ask NVS to allocate sub-channels. */ xact = vmbus_xact_get(sc->hn_xact, sizeof(*req)); if (xact == NULL) { if_printf(sc->hn_ifp, "no xact for nvs subch req\n"); ret = ENXIO; goto out; } req = vmbus_xact_req_data(xact); req->nvs_type = HN_NVS_TYPE_SUBCH_REQ; req->nvs_op = HN_NVS_SUBCH_OP_ALLOC; req->nvs_nsubch = nchan - 1; resp_len = sizeof(*resp); resp = hn_nvs_xact_execute(sc, xact, req, sizeof(*req), &resp_len, HN_NVS_TYPE_SUBCH_RESP); if (resp == NULL) { if_printf(sc->hn_ifp, "exec subch failed\n"); ret = EIO; goto out; } status = resp->nvs_status; nsubch = resp->nvs_nsubch; vmbus_xact_put(xact); xact = NULL; if (status != HN_NVS_STATUS_OK) { if_printf(sc->hn_ifp, "subch req failed: %x\n", status); ret = EIO; goto out; } if (nsubch > nchan - 1) { if_printf(sc->hn_ifp, "%u subchans are allocated, requested %u\n", nsubch, nchan - 1); nsubch = nchan - 1; } nchan = nsubch + 1; ret = hn_rndis_conf_rss(sc, nchan); if (ret != 0) *nchan0 = 1; else *nchan0 = nchan; out: if (xact != NULL) vmbus_xact_put(xact); return (ret); } /* * RNDIS filter on device remove */ int hv_rf_on_device_remove(struct hn_softc *sc) { int ret; /* Halt and release the rndis device */ ret = hv_rf_halt_device(sc); /* Pass control to inner driver to remove the device */ ret |= hv_nv_on_device_remove(sc); return (ret); } /* * RNDIS filter on open */ int hv_rf_on_open(struct hn_softc *sc) { uint32_t filter; /* XXX */ if (hv_promisc_mode != 1) { filter = NDIS_PACKET_TYPE_BROADCAST | NDIS_PACKET_TYPE_ALL_MULTICAST | NDIS_PACKET_TYPE_DIRECTED; } else { filter = NDIS_PACKET_TYPE_PROMISCUOUS; } return (hn_rndis_set_rxfilter(sc, filter)); } /* * RNDIS filter on close */ int hv_rf_on_close(struct hn_softc *sc) { return (hn_rndis_set_rxfilter(sc, 0)); } void hv_rf_channel_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr) { netvsc_channel_rollup(rxr, txr); } Index: head/sys/dev/hyperv/netvsc/if_hnreg.h =================================================================== --- head/sys/dev/hyperv/netvsc/if_hnreg.h (revision 305524) +++ head/sys/dev/hyperv/netvsc/if_hnreg.h (revision 305525) @@ -1,224 +1,228 @@ /*- * Copyright (c) 2016 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _IF_HNREG_H_ #define _IF_HNREG_H_ #include #include #define HN_NVS_RXBUF_SIG 0xcafe #define HN_NVS_CHIM_SIG 0xface #define HN_NVS_CHIM_IDX_INVALID 0xffffffff #define HN_NVS_RNDIS_MTYPE_DATA 0 #define HN_NVS_RNDIS_MTYPE_CTRL 1 /* * NVS message transacion status codes. */ #define HN_NVS_STATUS_OK 1 #define HN_NVS_STATUS_FAILED 2 /* * NVS request/response message types. */ #define HN_NVS_TYPE_INIT 1 #define HN_NVS_TYPE_INIT_RESP 2 #define HN_NVS_TYPE_NDIS_INIT 100 #define HN_NVS_TYPE_RXBUF_CONN 101 #define HN_NVS_TYPE_RXBUF_CONNRESP 102 #define HN_NVS_TYPE_RXBUF_DISCONN 103 #define HN_NVS_TYPE_CHIM_CONN 104 #define HN_NVS_TYPE_CHIM_CONNRESP 105 #define HN_NVS_TYPE_CHIM_DISCONN 106 #define HN_NVS_TYPE_RNDIS 107 #define HN_NVS_TYPE_RNDIS_ACK 108 #define HN_NVS_TYPE_NDIS_CONF 125 #define HN_NVS_TYPE_VFASSOC_NOTE 128 /* notification */ #define HN_NVS_TYPE_SET_DATAPATH 129 #define HN_NVS_TYPE_SUBCH_REQ 133 #define HN_NVS_TYPE_SUBCH_RESP 133 /* same as SUBCH_REQ */ #define HN_NVS_TYPE_TXTBL_NOTE 134 /* notification */ /* * Any size less than this one will _not_ work, e.g. hn_nvs_init * only has 12B valid data, however, if only 12B data were sent, * Hypervisor would never reply. */ #define HN_NVS_REQSIZE_MIN 32 /* NVS message common header */ struct hn_nvs_hdr { uint32_t nvs_type; } __packed; struct hn_nvs_init { uint32_t nvs_type; /* HN_NVS_TYPE_INIT */ uint32_t nvs_ver_min; uint32_t nvs_ver_max; uint8_t nvs_rsvd[20]; } __packed; CTASSERT(sizeof(struct hn_nvs_init) >= HN_NVS_REQSIZE_MIN); struct hn_nvs_init_resp { uint32_t nvs_type; /* HN_NVS_TYPE_INIT_RESP */ uint32_t nvs_ver; /* deprecated */ uint32_t nvs_rsvd; uint32_t nvs_status; /* HN_NVS_STATUS_ */ } __packed; /* No reponse */ struct hn_nvs_ndis_conf { uint32_t nvs_type; /* HN_NVS_TYPE_NDIS_CONF */ uint32_t nvs_mtu; uint32_t nvs_rsvd; uint64_t nvs_caps; /* HN_NVS_NDIS_CONF_ */ uint8_t nvs_rsvd1[12]; } __packed; CTASSERT(sizeof(struct hn_nvs_ndis_conf) >= HN_NVS_REQSIZE_MIN); #define HN_NVS_NDIS_CONF_SRIOV 0x0004 #define HN_NVS_NDIS_CONF_VLAN 0x0008 /* No response */ struct hn_nvs_ndis_init { uint32_t nvs_type; /* HN_NVS_TYPE_NDIS_INIT */ uint32_t nvs_ndis_major; /* NDIS_VERSION_MAJOR_ */ uint32_t nvs_ndis_minor; /* NDIS_VERSION_MINOR_ */ uint8_t nvs_rsvd[20]; } __packed; CTASSERT(sizeof(struct hn_nvs_ndis_init) >= HN_NVS_REQSIZE_MIN); struct hn_nvs_rxbuf_conn { uint32_t nvs_type; /* HN_NVS_TYPE_RXBUF_CONN */ uint32_t nvs_gpadl; /* RXBUF vmbus GPADL */ uint16_t nvs_sig; /* HN_NVS_RXBUF_SIG */ uint8_t nvs_rsvd[22]; } __packed; CTASSERT(sizeof(struct hn_nvs_rxbuf_conn) >= HN_NVS_REQSIZE_MIN); struct hn_nvs_rxbuf_sect { uint32_t nvs_start; uint32_t nvs_slotsz; uint32_t nvs_slotcnt; uint32_t nvs_end; } __packed; struct hn_nvs_rxbuf_connresp { uint32_t nvs_type; /* HN_NVS_TYPE_RXBUF_CONNRESP */ uint32_t nvs_status; /* HN_NVS_STATUS_ */ uint32_t nvs_nsect; /* # of elem in nvs_sect */ struct hn_nvs_rxbuf_sect nvs_sect[]; } __packed; /* No response */ struct hn_nvs_rxbuf_disconn { uint32_t nvs_type; /* HN_NVS_TYPE_RXBUF_DISCONN */ uint16_t nvs_sig; /* HN_NVS_RXBUF_SIG */ uint8_t nvs_rsvd[26]; } __packed; CTASSERT(sizeof(struct hn_nvs_rxbuf_disconn) >= HN_NVS_REQSIZE_MIN); struct hn_nvs_chim_conn { uint32_t nvs_type; /* HN_NVS_TYPE_CHIM_CONN */ uint32_t nvs_gpadl; /* chimney buf vmbus GPADL */ uint16_t nvs_sig; /* NDIS_NVS_CHIM_SIG */ uint8_t nvs_rsvd[22]; } __packed; CTASSERT(sizeof(struct hn_nvs_chim_conn) >= HN_NVS_REQSIZE_MIN); struct hn_nvs_chim_connresp { uint32_t nvs_type; /* HN_NVS_TYPE_CHIM_CONNRESP */ uint32_t nvs_status; /* HN_NVS_STATUS_ */ uint32_t nvs_sectsz; /* section size */ } __packed; /* No response */ struct hn_nvs_chim_disconn { uint32_t nvs_type; /* HN_NVS_TYPE_CHIM_DISCONN */ uint16_t nvs_sig; /* HN_NVS_CHIM_SIG */ uint8_t nvs_rsvd[26]; } __packed; CTASSERT(sizeof(struct hn_nvs_chim_disconn) >= HN_NVS_REQSIZE_MIN); #define HN_NVS_SUBCH_OP_ALLOC 1 struct hn_nvs_subch_req { uint32_t nvs_type; /* HN_NVS_TYPE_SUBCH_REQ */ uint32_t nvs_op; /* HN_NVS_SUBCH_OP_ */ uint32_t nvs_nsubch; uint8_t nvs_rsvd[20]; } __packed; CTASSERT(sizeof(struct hn_nvs_subch_req) >= HN_NVS_REQSIZE_MIN); struct hn_nvs_subch_resp { uint32_t nvs_type; /* HN_NVS_TYPE_SUBCH_RESP */ uint32_t nvs_status; /* HN_NVS_STATUS_ */ uint32_t nvs_nsubch; } __packed; struct hn_nvs_rndis { uint32_t nvs_type; /* HN_NVS_TYPE_RNDIS */ uint32_t nvs_rndis_mtype;/* HN_NVS_RNDIS_MTYPE_ */ /* * Chimney sending buffer index and size. * * NOTE: * If nvs_chim_idx is set to HN_NVS_CHIM_IDX_INVALID * and nvs_chim_sz is set to 0, then chimney sending * buffer is _not_ used by this RNDIS message. */ uint32_t nvs_chim_idx; uint32_t nvs_chim_sz; uint8_t nvs_rsvd[16]; } __packed; CTASSERT(sizeof(struct hn_nvs_rndis) >= HN_NVS_REQSIZE_MIN); struct hn_nvs_rndis_ack { uint32_t nvs_type; /* HN_NVS_TYPE_RNDIS_ACK */ uint32_t nvs_status; /* HN_NVS_STATUS_ */ uint8_t nvs_rsvd[24]; } __packed; CTASSERT(sizeof(struct hn_nvs_rndis_ack) >= HN_NVS_REQSIZE_MIN); /* * RNDIS extension */ /* Per-packet hash info */ #define HN_NDIS_HASH_INFO_SIZE sizeof(uint32_t) #define HN_NDIS_PKTINFO_TYPE_HASHINF NDIS_PKTINFO_TYPE_ORIG_NBLIST /* NDIS_HASH_ */ /* Per-packet hash value */ #define HN_NDIS_HASH_VALUE_SIZE sizeof(uint32_t) #define HN_NDIS_PKTINFO_TYPE_HASHVAL NDIS_PKTINFO_TYPE_PKT_CANCELID +/* Per-packet-info size */ +#define HN_RNDIS_PKTINFO_SIZE(dlen) \ + __offsetof(struct rndis_pktinfo, rm_data[dlen]) + #endif /* !_IF_HNREG_H_ */ Index: head/sys/dev/hyperv/netvsc/if_hnvar.h =================================================================== --- head/sys/dev/hyperv/netvsc/if_hnvar.h (revision 305524) +++ head/sys/dev/hyperv/netvsc/if_hnvar.h (revision 305525) @@ -1,126 +1,129 @@ /*- * Copyright (c) 2016 Microsoft Corp. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _IF_HNVAR_H_ #define _IF_HNVAR_H_ #include #include #include struct hn_softc; struct vmbus_channel; struct hn_send_ctx; typedef void (*hn_sent_callback_t) (struct hn_send_ctx *, struct hn_softc *, struct vmbus_channel *, const void *, int); struct hn_send_ctx { hn_sent_callback_t hn_cb; void *hn_cbarg; uint32_t hn_chim_idx; int hn_chim_sz; }; struct rndis_hash_info; struct rndix_hash_value; struct ndis_8021q_info_; struct rndis_tcp_ip_csum_info_; #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff #define HN_NDIS_RXCSUM_INFO_INVALID 0 #define HN_NDIS_HASH_INFO_INVALID 0 struct hn_recvinfo { uint32_t vlan_info; uint32_t csum_info; uint32_t hash_info; uint32_t hash_value; }; #define HN_SEND_CTX_INITIALIZER(cb, cbarg) \ { \ .hn_cb = cb, \ .hn_cbarg = cbarg, \ .hn_chim_idx = HN_NVS_CHIM_IDX_INVALID, \ .hn_chim_sz = 0 \ } static __inline void hn_send_ctx_init(struct hn_send_ctx *sndc, hn_sent_callback_t cb, void *cbarg, uint32_t chim_idx, int chim_sz) { sndc->hn_cb = cb; sndc->hn_cbarg = cbarg; sndc->hn_chim_idx = chim_idx; sndc->hn_chim_sz = chim_sz; } static __inline void hn_send_ctx_init_simple(struct hn_send_ctx *sndc, hn_sent_callback_t cb, void *cbarg) { hn_send_ctx_init(sndc, cb, cbarg, HN_NVS_CHIM_IDX_INVALID, 0); } static __inline int hn_nvs_send(struct vmbus_channel *chan, uint16_t flags, void *nvs_msg, int nvs_msglen, struct hn_send_ctx *sndc) { return (vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_INBAND, flags, nvs_msg, nvs_msglen, (uint64_t)(uintptr_t)sndc)); } static __inline int hn_nvs_send_sglist(struct vmbus_channel *chan, struct vmbus_gpa sg[], int sglen, void *nvs_msg, int nvs_msglen, struct hn_send_ctx *sndc) { return (vmbus_chan_send_sglist(chan, sg, sglen, nvs_msg, nvs_msglen, (uint64_t)(uintptr_t)sndc)); } struct vmbus_xact; const void *hn_nvs_xact_execute(struct hn_softc *sc, struct vmbus_xact *xact, void *req, int reqlen, size_t *resp_len, uint32_t type); void hn_nvs_sent_xact(struct hn_send_ctx *sndc, struct hn_softc *sc, struct vmbus_channel *chan, const void *data, int dlen); uint32_t hn_chim_alloc(struct hn_softc *sc); void hn_chim_free(struct hn_softc *sc, uint32_t chim_idx); +void *hn_rndis_pktinfo_append(struct rndis_packet_msg *, + size_t pktsize, size_t pi_dlen, uint32_t pi_type); + extern struct hn_send_ctx hn_send_ctx_none; #endif /* !_IF_HNVAR_H_ */