Index: head/sys/dev/hyperv/netvsc/hv_net_vsc.h =================================================================== --- head/sys/dev/hyperv/netvsc/hv_net_vsc.h +++ head/sys/dev/hyperv/netvsc/hv_net_vsc.h @@ -1011,6 +1011,38 @@ #define HN_TRUST_HCSUM_TCP 0x0002 #define HN_TRUST_HCSUM_UDP 0x0004 +struct hn_tx_ring { + struct mtx hn_txlist_spin; + struct hn_txdesc_list hn_txlist; + int hn_txdesc_cnt; + int hn_txdesc_avail; + int hn_txeof; + + int hn_sched_tx; + struct taskqueue *hn_tx_taskq; + struct task hn_start_task; + struct task hn_txeof_task; + + struct mtx hn_tx_lock; + struct hn_softc *hn_sc; + + int hn_direct_tx_size; + int hn_tx_chimney_size; + bus_dma_tag_t hn_tx_data_dtag; + uint64_t hn_csum_assist; + + u_long hn_no_txdescs; + u_long hn_send_failed; + u_long hn_txdma_failed; + u_long hn_tx_collapsed; + u_long hn_tx_chimney; + + /* Rarely used stuffs */ + struct hn_txdesc *hn_txdesc; + bus_dma_tag_t hn_tx_rndis_dtag; + struct sysctl_oid *hn_tx_sysctl_tree; +} __aligned(CACHE_LINE_SIZE); + /* * Device-specific softc structure */ @@ -1028,33 +1060,14 @@ struct hv_device *hn_dev_obj; netvsc_dev *net_dev; - struct hn_txdesc *hn_txdesc; - bus_dma_tag_t hn_tx_data_dtag; - bus_dma_tag_t hn_tx_rndis_dtag; - int hn_tx_chimney_size; - int hn_tx_chimney_max; - uint64_t hn_csum_assist; - - struct mtx hn_txlist_spin; - struct hn_txdesc_list hn_txlist; - int hn_txdesc_cnt; - int hn_txdesc_avail; - int hn_txeof; - - int hn_sched_tx; - int hn_direct_tx_size; - struct taskqueue *hn_tx_taskq; - struct task hn_start_task; - struct task hn_txeof_task; - int hn_rx_ring_cnt; struct hn_rx_ring *hn_rx_ring; - u_long hn_no_txdescs; - u_long hn_send_failed; - u_long hn_txdma_failed; - u_long hn_tx_collapsed; - u_long hn_tx_chimney; + int hn_tx_ring_cnt; + struct hn_tx_ring *hn_tx_ring; + int hn_tx_chimney_max; + struct taskqueue *hn_tx_taskq; + struct sysctl_oid *hn_tx_sysctl_tree; } hn_softc_t; /* Index: head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c =================================================================== --- head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c +++ head/sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c @@ -153,7 +153,7 @@ struct hn_txdesc { SLIST_ENTRY(hn_txdesc) link; struct mbuf *m; - struct hn_softc *sc; + struct hn_tx_ring *txr; int refs; uint32_t flags; /* HN_TXD_FLAG_ */ netvsc_packet netvsc_pkt; /* XXX to be removed */ @@ -193,7 +193,6 @@ #define NV_LOCK_INIT(_sc, _name) \ mtx_init(&(_sc)->hn_lock, _name, MTX_NETWORK_LOCK, MTX_DEF) #define NV_LOCK(_sc) mtx_lock(&(_sc)->hn_lock) -#define NV_TRYLOCK(_sc) mtx_trylock(&(_sc)->hn_lock) #define NV_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->hn_lock, MA_OWNED) #define NV_UNLOCK(_sc) mtx_unlock(&(_sc)->hn_lock) #define NV_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->hn_lock) @@ -266,9 +265,9 @@ static void hn_ifinit_locked(hn_softc_t *sc); static void hn_ifinit(void *xsc); static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); -static int hn_start_locked(struct ifnet *ifp, int len); +static int hn_start_locked(struct hn_tx_ring *txr, int len); static void hn_start(struct ifnet *ifp); -static void hn_start_txeof(struct ifnet *ifp); +static void hn_start_txeof(struct hn_tx_ring *); static int hn_ifmedia_upd(struct ifnet *ifp); static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS); @@ -277,14 +276,20 @@ static int hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS); static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS); +static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS); +static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS); static int hn_check_iplen(const struct mbuf *, int); -static int hn_create_tx_ring(struct hn_softc *sc); -static void hn_destroy_tx_ring(struct hn_softc *sc); +static int hn_create_tx_ring(struct hn_softc *, int); +static void hn_destroy_tx_ring(struct hn_tx_ring *); +static int hn_create_tx_data(struct hn_softc *); +static void hn_destroy_tx_data(struct hn_softc *); static void hn_start_taskfunc(void *xsc, int pending); static void hn_txeof_taskfunc(void *xsc, int pending); -static int hn_encap(struct hn_softc *, struct hn_txdesc *, struct mbuf **); +static void hn_stop_tx_tasks(struct hn_softc *); +static int hn_encap(struct hn_tx_ring *, struct hn_txdesc *, struct mbuf **); static void hn_create_rx_data(struct hn_softc *sc); static void hn_destroy_rx_data(struct hn_softc *sc); +static void hn_set_tx_chimney_size(struct hn_softc *, int); static int hn_ifmedia_upd(struct ifnet *ifp __unused) @@ -350,8 +355,6 @@ hn_softc_t *sc; int unit = device_get_unit(dev); struct ifnet *ifp = NULL; - struct sysctl_oid_list *child; - struct sysctl_ctx_list *ctx; int error; #if __FreeBSD_version >= 1100045 int tso_maxlen; @@ -365,7 +368,6 @@ bzero(sc, sizeof(hn_softc_t)); sc->hn_unit = unit; sc->hn_dev = dev; - sc->hn_direct_tx_size = hn_direct_tx_size; if (hn_tx_taskq == NULL) { sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK, @@ -375,13 +377,6 @@ } else { sc->hn_tx_taskq = hn_tx_taskq; } - TASK_INIT(&sc->hn_start_task, 0, hn_start_taskfunc, sc); - TASK_INIT(&sc->hn_txeof_task, 0, hn_txeof_taskfunc, sc); - - error = hn_create_tx_ring(sc); - if (error) - goto failed; - NV_LOCK_INIT(sc, "NetVSCLock"); sc->hn_dev_obj = device_ctx; @@ -389,6 +384,10 @@ ifp = sc->hn_ifp = if_alloc(IFT_ETHER); ifp->if_softc = sc; + error = hn_create_tx_data(sc); + if (error) + goto failed; + hn_create_rx_data(sc); if_initname(ifp, device_get_name(dev), device_get_unit(dev)); @@ -421,12 +420,7 @@ ifp->if_capenable |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO | IFCAP_LRO; - - if (hv_vmbus_protocal_version >= HV_VMBUS_VERSION_WIN8_1) - sc->hn_csum_assist = HN_CSUM_ASSIST; - else - sc->hn_csum_assist = HN_CSUM_ASSIST_WIN8; - ifp->if_hwassist = sc->hn_csum_assist | CSUM_TSO; + ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist | CSUM_TSO; error = hv_rf_on_device_add(device_ctx, &device_info); if (error) @@ -455,52 +449,14 @@ #endif sc->hn_tx_chimney_max = sc->net_dev->send_section_size; - sc->hn_tx_chimney_size = sc->hn_tx_chimney_max; + hn_set_tx_chimney_size(sc, sc->hn_tx_chimney_max); if (hn_tx_chimney_size > 0 && hn_tx_chimney_size < sc->hn_tx_chimney_max) - sc->hn_tx_chimney_size = hn_tx_chimney_size; - - /* - * Always schedule transmission instead of trying - * to do direct transmission. This one gives the - * best performance so far. - */ - sc->hn_sched_tx = 1; - - ctx = device_get_sysctl_ctx(dev); - child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); - - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_txdescs", - CTLFLAG_RW, &sc->hn_no_txdescs, "# of times short of TX descs"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "send_failed", - CTLFLAG_RW, &sc->hn_send_failed, "# of hyper-v sending failure"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "txdma_failed", - CTLFLAG_RW, &sc->hn_txdma_failed, "# of TX DMA failure"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_collapsed", - CTLFLAG_RW, &sc->hn_tx_collapsed, "# of TX mbuf collapsed"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_chimney", - CTLFLAG_RW, &sc->hn_tx_chimney, "# of chimney send"); - SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", - CTLFLAG_RD, &sc->hn_txdesc_cnt, 0, "# of total TX descs"); - SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", - CTLFLAG_RD, &sc->hn_txdesc_avail, 0, "# of available TX descs"); - SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", - CTLFLAG_RD, &sc->hn_tx_chimney_max, 0, - "Chimney send packet size upper boundary"); - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", - CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_tx_chimney_size_sysctl, - "I", "Chimney send packet size limit"); - SYSCTL_ADD_INT(ctx, child, OID_AUTO, "direct_tx_size", - CTLFLAG_RW, &sc->hn_direct_tx_size, 0, - "Size of the packet for direct transmission"); - SYSCTL_ADD_INT(ctx, child, OID_AUTO, "sched_tx", - CTLFLAG_RW, &sc->hn_sched_tx, 0, - "Always schedule transmission " - "instead of doing direct transmission"); + hn_set_tx_chimney_size(sc, hn_tx_chimney_size); return (0); failed: - hn_destroy_tx_ring(sc); + hn_destroy_tx_data(sc); if (ifp != NULL) if_free(ifp); return (error); @@ -531,14 +487,14 @@ hv_rf_on_device_remove(hv_device, HV_RF_NV_DESTROY_CHANNEL); - taskqueue_drain(sc->hn_tx_taskq, &sc->hn_start_task); - taskqueue_drain(sc->hn_tx_taskq, &sc->hn_txeof_task); - if (sc->hn_tx_taskq != hn_tx_taskq) - taskqueue_free(sc->hn_tx_taskq); + hn_stop_tx_tasks(sc); ifmedia_removeall(&sc->hn_media); hn_destroy_rx_data(sc); - hn_destroy_tx_ring(sc); + hn_destroy_tx_data(sc); + + if (sc->hn_tx_taskq != hn_tx_taskq) + taskqueue_free(sc->hn_tx_taskq); return (0); } @@ -553,13 +509,13 @@ } static __inline int -hn_txdesc_dmamap_load(struct hn_softc *sc, struct hn_txdesc *txd, +hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) { struct mbuf *m = *m_head; int error; - error = bus_dmamap_load_mbuf_sg(sc->hn_tx_data_dtag, txd->data_dmap, + error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); if (error == EFBIG) { struct mbuf *m_new; @@ -569,13 +525,13 @@ return ENOBUFS; else *m_head = m = m_new; - sc->hn_tx_collapsed++; + txr->hn_tx_collapsed++; - error = bus_dmamap_load_mbuf_sg(sc->hn_tx_data_dtag, + error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); } if (!error) { - bus_dmamap_sync(sc->hn_tx_data_dtag, txd->data_dmap, + bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, BUS_DMASYNC_PREWRITE); txd->flags |= HN_TXD_FLAG_DMAMAP; } @@ -583,20 +539,20 @@ } static __inline void -hn_txdesc_dmamap_unload(struct hn_softc *sc, struct hn_txdesc *txd) +hn_txdesc_dmamap_unload(struct hn_tx_ring *txr, struct hn_txdesc *txd) { if (txd->flags & HN_TXD_FLAG_DMAMAP) { - bus_dmamap_sync(sc->hn_tx_data_dtag, + bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap, BUS_DMASYNC_POSTWRITE); - bus_dmamap_unload(sc->hn_tx_data_dtag, + bus_dmamap_unload(txr->hn_tx_data_dtag, txd->data_dmap); txd->flags &= ~HN_TXD_FLAG_DMAMAP; } } static __inline int -hn_txdesc_put(struct hn_softc *sc, struct hn_txdesc *txd) +hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd) { KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, @@ -606,7 +562,7 @@ if (atomic_fetchadd_int(&txd->refs, -1) != 1) return 0; - hn_txdesc_dmamap_unload(sc, txd); + hn_txdesc_dmamap_unload(txr, txd); if (txd->m != NULL) { m_freem(txd->m); txd->m = NULL; @@ -614,31 +570,31 @@ txd->flags |= HN_TXD_FLAG_ONLIST; - mtx_lock_spin(&sc->hn_txlist_spin); - KASSERT(sc->hn_txdesc_avail >= 0 && - sc->hn_txdesc_avail < sc->hn_txdesc_cnt, - ("txdesc_put: invalid txd avail %d", sc->hn_txdesc_avail)); - sc->hn_txdesc_avail++; - SLIST_INSERT_HEAD(&sc->hn_txlist, txd, link); - mtx_unlock_spin(&sc->hn_txlist_spin); + mtx_lock_spin(&txr->hn_txlist_spin); + KASSERT(txr->hn_txdesc_avail >= 0 && + txr->hn_txdesc_avail < txr->hn_txdesc_cnt, + ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail)); + txr->hn_txdesc_avail++; + SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); + mtx_unlock_spin(&txr->hn_txlist_spin); return 1; } static __inline struct hn_txdesc * -hn_txdesc_get(struct hn_softc *sc) +hn_txdesc_get(struct hn_tx_ring *txr) { struct hn_txdesc *txd; - mtx_lock_spin(&sc->hn_txlist_spin); - txd = SLIST_FIRST(&sc->hn_txlist); + mtx_lock_spin(&txr->hn_txlist_spin); + txd = SLIST_FIRST(&txr->hn_txlist); if (txd != NULL) { - KASSERT(sc->hn_txdesc_avail > 0, - ("txdesc_get: invalid txd avail %d", sc->hn_txdesc_avail)); - sc->hn_txdesc_avail--; - SLIST_REMOVE_HEAD(&sc->hn_txlist, link); + KASSERT(txr->hn_txdesc_avail > 0, + ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail)); + txr->hn_txdesc_avail--; + SLIST_REMOVE_HEAD(&txr->hn_txlist, link); } - mtx_unlock_spin(&sc->hn_txlist_spin); + mtx_unlock_spin(&txr->hn_txlist_spin); if (txd != NULL) { KASSERT(txd->m == NULL && txd->refs == 0 && @@ -670,20 +626,21 @@ { netvsc_packet *packet = context; struct hn_txdesc *txd; - struct hn_softc *sc; + struct hn_tx_ring *txr; txd = (struct hn_txdesc *)(uintptr_t) packet->compl.send.send_completion_tid; - sc = txd->sc; - sc->hn_txeof = 1; - hn_txdesc_put(sc, txd); + txr = txd->txr; + txr->hn_txeof = 1; + hn_txdesc_put(txr, txd); } void netvsc_channel_rollup(struct hv_device *device_ctx) { struct hn_softc *sc = device_get_softc(device_ctx->device); + struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; /* TODO: vRSS */ #if defined(INET) || defined(INET6) struct hn_rx_ring *rxr = &sc->hn_rx_ring[0]; /* TODO: vRSS */ struct lro_ctrl *lro = &rxr->hn_lro; @@ -695,11 +652,11 @@ } #endif - if (!sc->hn_txeof) + if (!txr->hn_txeof) return; - sc->hn_txeof = 0; - hn_start_txeof(sc->hn_ifp); + txr->hn_txeof = 0; + hn_start_txeof(txr); } /* @@ -707,7 +664,7 @@ * If this function fails, then both txd and m_head0 will be freed. */ static int -hn_encap(struct hn_softc *sc, struct hn_txdesc *txd, struct mbuf **m_head0) +hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0) { bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; int error, nsegs, i; @@ -810,7 +767,7 @@ #endif tso_info->lso_v2_xmit.tcp_header_offset = 0; tso_info->lso_v2_xmit.mss = m_head->m_pkthdr.tso_segsz; - } else if (m_head->m_pkthdr.csum_flags & sc->hn_csum_assist) { + } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) { rndis_tcp_ip_csum_info *csum_info; rndis_msg_size += RNDIS_CSUM_PPI_SIZE; @@ -837,8 +794,8 @@ /* * Chimney send, if the packet could fit into one chimney buffer. */ - if (packet->tot_data_buf_len < sc->hn_tx_chimney_size) { - netvsc_dev *net_dev = sc->net_dev; + if (packet->tot_data_buf_len < txr->hn_tx_chimney_size) { + netvsc_dev *net_dev = txr->hn_sc->net_dev; uint32_t send_buf_section_idx; send_buf_section_idx = @@ -857,12 +814,12 @@ packet->send_buf_section_size = packet->tot_data_buf_len; packet->page_buf_count = 0; - sc->hn_tx_chimney++; + txr->hn_tx_chimney++; goto done; } } - error = hn_txdesc_dmamap_load(sc, txd, &m_head, segs, &nsegs); + error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs); if (error) { int freed; @@ -872,12 +829,12 @@ m_freem(m_head); *m_head0 = NULL; - freed = hn_txdesc_put(sc, txd); + freed = hn_txdesc_put(txr, txd); KASSERT(freed != 0, ("fail to free txd upon txdma error")); - sc->hn_txdma_failed++; - if_inc_counter(sc->hn_ifp, IFCOUNTER_OERRORS, 1); + txr->hn_txdma_failed++; + if_inc_counter(txr->hn_sc->hn_ifp, IFCOUNTER_OERRORS, 1); return error; } *m_head0 = m_head; @@ -920,11 +877,15 @@ * Start a transmit of one or more packets */ static int -hn_start_locked(struct ifnet *ifp, int len) +hn_start_locked(struct hn_tx_ring *txr, int len) { - struct hn_softc *sc = ifp->if_softc; + struct hn_softc *sc = txr->hn_sc; + struct ifnet *ifp = sc->hn_ifp; struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev); + KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); + mtx_assert(&txr->hn_tx_lock, MA_OWNED); + if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != IFF_DRV_RUNNING) return 0; @@ -948,15 +909,15 @@ return 1; } - txd = hn_txdesc_get(sc); + txd = hn_txdesc_get(txr); if (txd == NULL) { - sc->hn_no_txdescs++; + txr->hn_no_txdescs++; IF_PREPEND(&ifp->if_snd, m_head); atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); break; } - error = hn_encap(sc, txd, &m_head); + error = hn_encap(txr, txd, &m_head); if (error) { /* Both txd and m_head are freed */ continue; @@ -971,7 +932,7 @@ ETHER_BPF_MTAP(ifp, m_head); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); } - hn_txdesc_put(sc, txd); + hn_txdesc_put(txr, txd); if (__predict_false(error)) { int freed; @@ -983,9 +944,9 @@ * commands to run? Ask netvsc_channel_rollup() * to kick start later. */ - sc->hn_txeof = 1; + txr->hn_txeof = 1; if (!send_failed) { - sc->hn_send_failed++; + txr->hn_send_failed++; send_failed = 1; /* * Try sending again after set hn_txeof; @@ -1002,11 +963,11 @@ * DMA map in hn_txdesc_put(), if it was loaded. */ txd->m = NULL; - freed = hn_txdesc_put(sc, txd); + freed = hn_txdesc_put(txr, txd); KASSERT(freed != 0, ("fail to free txd upon send error")); - sc->hn_send_failed++; + txr->hn_send_failed++; IF_PREPEND(&ifp->if_snd, m_head); atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); break; @@ -1384,8 +1345,10 @@ } sc->hn_tx_chimney_max = sc->net_dev->send_section_size; - if (sc->hn_tx_chimney_size > sc->hn_tx_chimney_max) - sc->hn_tx_chimney_size = sc->hn_tx_chimney_max; + if (sc->hn_tx_ring[0].hn_tx_chimney_size > + sc->hn_tx_chimney_max) + hn_set_tx_chimney_size(sc, sc->hn_tx_chimney_max); + hn_ifinit_locked(sc); NV_LOCK(sc); @@ -1450,10 +1413,13 @@ mask = ifr->ifr_reqcap ^ ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; - if (ifp->if_capenable & IFCAP_TXCSUM) - ifp->if_hwassist |= sc->hn_csum_assist; - else - ifp->if_hwassist &= ~sc->hn_csum_assist; + if (ifp->if_capenable & IFCAP_TXCSUM) { + ifp->if_hwassist |= + sc->hn_tx_ring[0].hn_csum_assist; + } else { + ifp->if_hwassist &= + ~sc->hn_tx_ring[0].hn_csum_assist; + } } if (mask & IFCAP_RXCSUM) @@ -1536,48 +1502,54 @@ hn_start(struct ifnet *ifp) { struct hn_softc *sc = ifp->if_softc; + struct hn_tx_ring *txr = &sc->hn_tx_ring[0]; - if (sc->hn_sched_tx) + if (txr->hn_sched_tx) goto do_sched; - if (NV_TRYLOCK(sc)) { + if (mtx_trylock(&txr->hn_tx_lock)) { int sched; - sched = hn_start_locked(ifp, sc->hn_direct_tx_size); - NV_UNLOCK(sc); + sched = hn_start_locked(txr, txr->hn_direct_tx_size); + mtx_unlock(&txr->hn_tx_lock); if (!sched) return; } do_sched: - taskqueue_enqueue(sc->hn_tx_taskq, &sc->hn_start_task); + taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_start_task); } static void -hn_start_txeof(struct ifnet *ifp) +hn_start_txeof(struct hn_tx_ring *txr) { - struct hn_softc *sc = ifp->if_softc; + struct hn_softc *sc = txr->hn_sc; + struct ifnet *ifp = sc->hn_ifp; - if (sc->hn_sched_tx) + KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring")); + + if (txr->hn_sched_tx) goto do_sched; - if (NV_TRYLOCK(sc)) { + if (mtx_trylock(&txr->hn_tx_lock)) { int sched; atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); - sched = hn_start_locked(ifp, sc->hn_direct_tx_size); - NV_UNLOCK(sc); - if (sched) - taskqueue_enqueue(sc->hn_tx_taskq, &sc->hn_start_task); + sched = hn_start_locked(txr, txr->hn_direct_tx_size); + mtx_unlock(&txr->hn_tx_lock); + if (sched) { + taskqueue_enqueue(txr->hn_tx_taskq, + &txr->hn_start_task); + } } else { do_sched: /* * Release the OACTIVE earlier, with the hope, that * others could catch up. The task will clear the - * flag again with the NV_LOCK to avoid possible + * flag again with the hn_tx_lock to avoid possible * races. */ atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); - taskqueue_enqueue(sc->hn_tx_taskq, &sc->hn_txeof_task); + taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task); } } @@ -1736,7 +1708,7 @@ struct hn_softc *sc = arg1; int chimney_size, error; - chimney_size = sc->hn_tx_chimney_size; + chimney_size = sc->hn_tx_ring[0].hn_tx_chimney_size; error = sysctl_handle_int(oidp, &chimney_size, 0, req); if (error || req->newptr == NULL) return error; @@ -1744,8 +1716,7 @@ if (chimney_size > sc->hn_tx_chimney_max || chimney_size <= 0) return EINVAL; - if (sc->hn_tx_chimney_size != chimney_size) - sc->hn_tx_chimney_size = chimney_size; + hn_set_tx_chimney_size(sc, chimney_size); return 0; } @@ -1802,6 +1773,56 @@ } static int +hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS) +{ + struct hn_softc *sc = arg1; + int ofs = arg2, i, error; + struct hn_tx_ring *txr; + u_long stat; + + stat = 0; + for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { + txr = &sc->hn_tx_ring[i]; + stat += *((u_long *)((uint8_t *)txr + ofs)); + } + + error = sysctl_handle_long(oidp, &stat, 0, req); + if (error || req->newptr == NULL) + return error; + + /* Zero out this stat. */ + for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { + txr = &sc->hn_tx_ring[i]; + *((u_long *)((uint8_t *)txr + ofs)) = 0; + } + return 0; +} + +static int +hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS) +{ + struct hn_softc *sc = arg1; + int ofs = arg2, i, error, conf; + struct hn_tx_ring *txr; + + txr = &sc->hn_tx_ring[0]; + conf = *((int *)((uint8_t *)txr + ofs)); + + error = sysctl_handle_int(oidp, &conf, 0, req); + if (error || req->newptr == NULL) + return error; + + NV_LOCK(sc); + for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { + txr = &sc->hn_tx_ring[i]; + *((int *)((uint8_t *)txr + ofs)) = conf; + } + NV_UNLOCK(sc); + + return 0; +} + +static int hn_check_iplen(const struct mbuf *m, int hoff) { const struct ip *ip; @@ -2019,16 +2040,37 @@ } static int -hn_create_tx_ring(struct hn_softc *sc) +hn_create_tx_ring(struct hn_softc *sc, int id) { + struct hn_tx_ring *txr = &sc->hn_tx_ring[id]; bus_dma_tag_t parent_dtag; int error, i; - sc->hn_txdesc_cnt = HN_TX_DESC_CNT; - sc->hn_txdesc = malloc(sizeof(struct hn_txdesc) * sc->hn_txdesc_cnt, + txr->hn_sc = sc; + + mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); + mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF); + + txr->hn_txdesc_cnt = HN_TX_DESC_CNT; + txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt, M_NETVSC, M_WAITOK | M_ZERO); - SLIST_INIT(&sc->hn_txlist); - mtx_init(&sc->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); + SLIST_INIT(&txr->hn_txlist); + + txr->hn_tx_taskq = sc->hn_tx_taskq; + TASK_INIT(&txr->hn_start_task, 0, hn_start_taskfunc, txr); + TASK_INIT(&txr->hn_txeof_task, 0, hn_txeof_taskfunc, txr); + + txr->hn_direct_tx_size = hn_direct_tx_size; + if (hv_vmbus_protocal_version >= HV_VMBUS_VERSION_WIN8_1) + txr->hn_csum_assist = HN_CSUM_ASSIST; + else + txr->hn_csum_assist = HN_CSUM_ASSIST_WIN8; + + /* + * Always schedule transmission instead of trying to do direct + * transmission. This one gives the best performance so far. + */ + txr->hn_sched_tx = 1; parent_dtag = bus_get_dma_tag(sc->hn_dev); @@ -2045,7 +2087,7 @@ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ - &sc->hn_tx_rndis_dtag); + &txr->hn_tx_rndis_dtag); if (error) { device_printf(sc->hn_dev, "failed to create rndis dmatag\n"); return error; @@ -2064,21 +2106,21 @@ 0, /* flags */ NULL, /* lockfunc */ NULL, /* lockfuncarg */ - &sc->hn_tx_data_dtag); + &txr->hn_tx_data_dtag); if (error) { device_printf(sc->hn_dev, "failed to create data dmatag\n"); return error; } - for (i = 0; i < sc->hn_txdesc_cnt; ++i) { - struct hn_txdesc *txd = &sc->hn_txdesc[i]; + for (i = 0; i < txr->hn_txdesc_cnt; ++i) { + struct hn_txdesc *txd = &txr->hn_txdesc[i]; - txd->sc = sc; + txd->txr = txr; /* * Allocate and load RNDIS messages. */ - error = bus_dmamem_alloc(sc->hn_tx_rndis_dtag, + error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag, (void **)&txd->rndis_msg, BUS_DMA_WAITOK | BUS_DMA_COHERENT, &txd->rndis_msg_dmap); @@ -2088,7 +2130,7 @@ return error; } - error = bus_dmamap_load(sc->hn_tx_rndis_dtag, + error = bus_dmamap_load(txr->hn_tx_rndis_dtag, txd->rndis_msg_dmap, txd->rndis_msg, HN_RNDIS_MSG_LEN, hn_dma_map_paddr, &txd->rndis_msg_paddr, @@ -2096,80 +2138,221 @@ if (error) { device_printf(sc->hn_dev, "failed to load rndis_msg, %d\n", i); - bus_dmamem_free(sc->hn_tx_rndis_dtag, + bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_msg, txd->rndis_msg_dmap); return error; } /* DMA map for TX data. */ - error = bus_dmamap_create(sc->hn_tx_data_dtag, 0, + error = bus_dmamap_create(txr->hn_tx_data_dtag, 0, &txd->data_dmap); if (error) { device_printf(sc->hn_dev, "failed to allocate tx data dmamap\n"); - bus_dmamap_unload(sc->hn_tx_rndis_dtag, + bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_msg_dmap); - bus_dmamem_free(sc->hn_tx_rndis_dtag, + bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_msg, txd->rndis_msg_dmap); return error; } /* All set, put it to list */ txd->flags |= HN_TXD_FLAG_ONLIST; - SLIST_INSERT_HEAD(&sc->hn_txlist, txd, link); + SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link); + } + txr->hn_txdesc_avail = txr->hn_txdesc_cnt; + + if (sc->hn_tx_sysctl_tree != NULL) { + struct sysctl_oid_list *child; + struct sysctl_ctx_list *ctx; + char name[16]; + + /* + * Create per TX ring sysctl tree: + * dev.hn.UNIT.tx.RINGID + */ + ctx = device_get_sysctl_ctx(sc->hn_dev); + child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree); + + snprintf(name, sizeof(name), "%d", id); + txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, + name, CTLFLAG_RD, 0, ""); + + if (txr->hn_tx_sysctl_tree != NULL) { + child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree); + + SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", + CTLFLAG_RD, &txr->hn_txdesc_avail, 0, + "# of available TX descs"); + } } - sc->hn_txdesc_avail = sc->hn_txdesc_cnt; return 0; } static void -hn_destroy_tx_ring(struct hn_softc *sc) +hn_destroy_tx_ring(struct hn_tx_ring *txr) { struct hn_txdesc *txd; - while ((txd = SLIST_FIRST(&sc->hn_txlist)) != NULL) { + if (txr->hn_txdesc == NULL) + return; + + while ((txd = SLIST_FIRST(&txr->hn_txlist)) != NULL) { KASSERT(txd->m == NULL, ("still has mbuf installed")); KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped")); - SLIST_REMOVE_HEAD(&sc->hn_txlist, link); + SLIST_REMOVE_HEAD(&txr->hn_txlist, link); - bus_dmamap_unload(sc->hn_tx_rndis_dtag, + bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_msg_dmap); - bus_dmamem_free(sc->hn_tx_rndis_dtag, + bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_msg, txd->rndis_msg_dmap); - bus_dmamap_destroy(sc->hn_tx_data_dtag, txd->data_dmap); + bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap); } - if (sc->hn_tx_data_dtag != NULL) - bus_dma_tag_destroy(sc->hn_tx_data_dtag); - if (sc->hn_tx_rndis_dtag != NULL) - bus_dma_tag_destroy(sc->hn_tx_rndis_dtag); - free(sc->hn_txdesc, M_NETVSC); - mtx_destroy(&sc->hn_txlist_spin); + if (txr->hn_tx_data_dtag != NULL) + bus_dma_tag_destroy(txr->hn_tx_data_dtag); + if (txr->hn_tx_rndis_dtag != NULL) + bus_dma_tag_destroy(txr->hn_tx_rndis_dtag); + free(txr->hn_txdesc, M_NETVSC); + txr->hn_txdesc = NULL; + + mtx_destroy(&txr->hn_txlist_spin); + mtx_destroy(&txr->hn_tx_lock); +} + +static int +hn_create_tx_data(struct hn_softc *sc) +{ + struct sysctl_oid_list *child; + struct sysctl_ctx_list *ctx; + int i; + + sc->hn_tx_ring_cnt = 1; /* TODO: vRSS */ + sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt, + M_NETVSC, M_WAITOK | M_ZERO); + + ctx = device_get_sysctl_ctx(sc->hn_dev); + child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev)); + + /* Create dev.hn.UNIT.tx sysctl tree */ + sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx", + CTLFLAG_RD, 0, ""); + + for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { + int error; + + error = hn_create_tx_ring(sc, i); + if (error) + return error; + } + + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs", + CTLTYPE_ULONG | CTLFLAG_RW, sc, + __offsetof(struct hn_tx_ring, hn_no_txdescs), + hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs"); + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed", + CTLTYPE_ULONG | CTLFLAG_RW, sc, + __offsetof(struct hn_tx_ring, hn_send_failed), + hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure"); + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed", + CTLTYPE_ULONG | CTLFLAG_RW, sc, + __offsetof(struct hn_tx_ring, hn_txdma_failed), + hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure"); + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed", + CTLTYPE_ULONG | CTLFLAG_RW, sc, + __offsetof(struct hn_tx_ring, hn_tx_collapsed), + hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed"); + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney", + CTLTYPE_ULONG | CTLFLAG_RW, sc, + __offsetof(struct hn_tx_ring, hn_tx_chimney), + hn_tx_stat_ulong_sysctl, "LU", "# of chimney send"); + SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", + CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0, + "# of total TX descs"); + SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", + CTLFLAG_RD, &sc->hn_tx_chimney_max, 0, + "Chimney send packet size upper boundary"); + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", + CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_tx_chimney_size_sysctl, + "I", "Chimney send packet size limit"); + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size", + CTLTYPE_INT | CTLFLAG_RW, sc, + __offsetof(struct hn_tx_ring, hn_direct_tx_size), + hn_tx_conf_int_sysctl, "I", + "Size of the packet for direct transmission"); + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx", + CTLTYPE_INT | CTLFLAG_RW, sc, + __offsetof(struct hn_tx_ring, hn_sched_tx), + hn_tx_conf_int_sysctl, "I", + "Always schedule transmission " + "instead of doing direct transmission"); + + return 0; } static void -hn_start_taskfunc(void *xsc, int pending __unused) +hn_set_tx_chimney_size(struct hn_softc *sc, int chimney_size) { - struct hn_softc *sc = xsc; + int i; NV_LOCK(sc); - hn_start_locked(sc->hn_ifp, 0); + for (i = 0; i < sc->hn_tx_ring_cnt; ++i) + sc->hn_tx_ring[i].hn_tx_chimney_size = chimney_size; NV_UNLOCK(sc); } static void -hn_txeof_taskfunc(void *xsc, int pending __unused) +hn_destroy_tx_data(struct hn_softc *sc) { - struct hn_softc *sc = xsc; - struct ifnet *ifp = sc->hn_ifp; + int i; - NV_LOCK(sc); - atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE); - hn_start_locked(ifp, 0); - NV_UNLOCK(sc); + if (sc->hn_tx_ring_cnt == 0) + return; + + for (i = 0; i < sc->hn_tx_ring_cnt; ++i) + hn_destroy_tx_ring(&sc->hn_tx_ring[i]); + + free(sc->hn_tx_ring, M_NETVSC); + sc->hn_tx_ring = NULL; + + sc->hn_tx_ring_cnt = 0; +} + +static void +hn_start_taskfunc(void *xtxr, int pending __unused) +{ + struct hn_tx_ring *txr = xtxr; + + mtx_lock(&txr->hn_tx_lock); + hn_start_locked(txr, 0); + mtx_unlock(&txr->hn_tx_lock); +} + +static void +hn_txeof_taskfunc(void *xtxr, int pending __unused) +{ + struct hn_tx_ring *txr = xtxr; + + mtx_lock(&txr->hn_tx_lock); + atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE); + hn_start_locked(txr, 0); + mtx_unlock(&txr->hn_tx_lock); +} + +static void +hn_stop_tx_tasks(struct hn_softc *sc) +{ + int i; + + for (i = 0; i < sc->hn_tx_ring_cnt; ++i) { + struct hn_tx_ring *txr = &sc->hn_tx_ring[i]; + + taskqueue_drain(txr->hn_tx_taskq, &txr->hn_start_task); + taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task); + } } static void