Index: sys/dev/hyperv/netvsc/if_hn.c =================================================================== --- sys/dev/hyperv/netvsc/if_hn.c +++ sys/dev/hyperv/netvsc/if_hn.c @@ -123,6 +123,8 @@ #define HN_VFMAP_SIZE_DEF 8 +#define HN_XPNT_VF_ATTWAIT_MIN 2 /* seconds */ + /* YYY should get it from the underlying channel */ #define HN_TX_DESC_CNT 512 @@ -263,6 +265,7 @@ static void hn_ifaddr_event(void *, struct ifnet *); static void hn_ifnet_attevent(void *, struct ifnet *); static void hn_ifnet_detevent(void *, struct ifnet *); +static void hn_ifnet_lnkevent(void *, struct ifnet *, int); static bool hn_ismyvf(const struct hn_softc *, const struct ifnet *); @@ -270,6 +273,15 @@ struct ifnet *, bool); static void hn_rxvf_set(struct hn_softc *, struct ifnet *); static void hn_rxvf_set_task(void *, int); +static void hn_xpnt_vf_input(struct ifnet *, struct mbuf *); +static int hn_xpnt_vf_iocsetflags(struct hn_softc *); +static int hn_xpnt_vf_iocsetcaps(struct hn_softc *, + struct ifreq *); +static void hn_xpnt_vf_saveifflags(struct hn_softc *); +static bool hn_xpnt_vf_isready(struct hn_softc *); +static void hn_xpnt_vf_setready(struct hn_softc *); +static void hn_xpnt_vf_init_taskfunc(void *, int); +static void hn_xpnt_vf_init(struct hn_softc *); static int hn_rndis_rxinfo(const void *, int, struct hn_rxinfo *); @@ -322,6 +334,8 @@ static int hn_rxvf_sysctl(SYSCTL_HANDLER_ARGS); static int hn_vflist_sysctl(SYSCTL_HANDLER_ARGS); static int hn_vfmap_sysctl(SYSCTL_HANDLER_ARGS); +static int hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS); +static int hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS); static void hn_stop(struct hn_softc *, bool); static void hn_init_locked(struct hn_softc *); @@ -352,6 +366,7 @@ static void hn_drain_rxtx(struct hn_softc *, int); static void hn_polling(struct hn_softc *, u_int); static void hn_chan_polling(struct vmbus_channel *, u_int); +static void hn_mtu_change_fixup(struct hn_softc *); static void hn_update_link_status(struct hn_softc *); static void hn_change_network(struct hn_softc *); @@ -529,6 +544,22 @@ SYSCTL_PROC(_hw_hn, OID_AUTO, vfmap, CTLFLAG_RD | CTLTYPE_STRING, 0, 0, hn_vfmap_sysctl, "A", "VF mapping"); +/* Transparent VF */ +static int hn_xpnt_vf = 0; +SYSCTL_INT(_hw_hn, OID_AUTO, vf_transparent, CTLFLAG_RDTUN, + &hn_xpnt_vf, 0, "Transparent VF mod"); + +/* Accurate BPF support for Transparent VF */ +static int hn_xpnt_vf_accbpf = 0; +SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_accbpf, CTLFLAG_RDTUN, + &hn_xpnt_vf_accbpf, 0, "Accurate BPF for transparent VF"); + +/* Extra wait for transparent VF attach routing; unit seconds. */ +static int hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN; +SYSCTL_INT(_hw_hn, OID_AUTO, vf_xpnt_attwait, CTLFLAG_RWTUN, + &hn_xpnt_vf_attwait, 0, + "Extra wait for transparent VF attach routing; unit: seconds"); + static u_int hn_cpu_index; /* next CPU for channel */ static struct taskqueue **hn_tx_taskque;/* shared TX taskqueues */ @@ -807,8 +838,12 @@ HN_LOCK_ASSERT(sc); - if ((ifp->if_flags & IFF_PROMISC) || - (sc->hn_flags & HN_FLAG_RXVF)) { + /* + * If the non-transparent mode VF is activated, we don't know how + * its RX filter is configured, so stick the synthetic device in + * the promiscous mode. + */ + if ((ifp->if_flags & IFF_PROMISC) || (sc->hn_flags & HN_FLAG_RXVF)) { filter = NDIS_PACKET_TYPE_PROMISCUOUS; } else { filter = NDIS_PACKET_TYPE_DIRECTED; @@ -1086,7 +1121,7 @@ } hn_nvs_set_datapath(sc, - rxvf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTHETIC); + rxvf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTH); hn_rxvf_set(sc, rxvf ? ifp : NULL); @@ -1126,6 +1161,322 @@ hn_rxvf_change(arg, ifp, ifp->if_flags & IFF_UP); } +static int +hn_xpnt_vf_iocsetcaps(struct hn_softc *sc, struct ifreq *ifr) +{ + struct ifnet *ifp, *vf_ifp; + uint64_t tmp; + int error; + + HN_LOCK_ASSERT(sc); + ifp = sc->hn_ifp; + vf_ifp = sc->hn_vf_ifp; + + /* + * Fix up requested capabilities w/ supported capabilities, + * since the supported capabilities could have been changed. + */ + ifr->ifr_reqcap &= ifp->if_capabilities; + /* Pass SIOCSIFCAP to VF. */ + error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFCAP, (caddr_t)ifr); + + /* + * NOTE: + * The error will be propagated to the callers, however, it + * is _not_ useful here. + */ + + /* + * Merge VF's enabled capabilities. + */ + ifp->if_capenable = vf_ifp->if_capenable & ifp->if_capabilities; + + tmp = vf_ifp->if_hwassist & HN_CSUM_IP_HWASSIST(sc); + if (ifp->if_capenable & IFCAP_TXCSUM) + ifp->if_hwassist |= tmp; + else + ifp->if_hwassist &= ~tmp; + + tmp = vf_ifp->if_hwassist & HN_CSUM_IP6_HWASSIST(sc); + if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) + ifp->if_hwassist |= tmp; + else + ifp->if_hwassist &= ~tmp; + + tmp = vf_ifp->if_hwassist & CSUM_IP_TSO; + if (ifp->if_capenable & IFCAP_TSO4) + ifp->if_hwassist |= tmp; + else + ifp->if_hwassist &= ~tmp; + + tmp = vf_ifp->if_hwassist & CSUM_IP6_TSO; + if (ifp->if_capenable & IFCAP_TSO6) + ifp->if_hwassist |= tmp; + else + ifp->if_hwassist &= ~tmp; + + return (error); +} + +static int +hn_xpnt_vf_iocsetflags(struct hn_softc *sc) +{ + struct ifnet *vf_ifp; + struct ifreq ifr; + + HN_LOCK_ASSERT(sc); + vf_ifp = sc->hn_vf_ifp; + + memset(&ifr, 0, sizeof(ifr)); + strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); + ifr.ifr_flags = vf_ifp->if_flags & 0xffff; + ifr.ifr_flagshigh = vf_ifp->if_flags >> 16; + return (vf_ifp->if_ioctl(vf_ifp, SIOCSIFFLAGS, (caddr_t)&ifr)); +} + +static void +hn_xpnt_vf_saveifflags(struct hn_softc *sc) +{ + struct ifnet *ifp = sc->hn_ifp; + int allmulti = 0; + + HN_LOCK_ASSERT(sc); + + /* XXX vlan(4) style mcast addr maintenance */ + if (!TAILQ_EMPTY(&ifp->if_multiaddrs)) + allmulti = IFF_ALLMULTI; + + /* Always set the VF's if_flags */ + sc->hn_vf_ifp->if_flags = ifp->if_flags | allmulti; +} + +static void +hn_xpnt_vf_input(struct ifnet *vf_ifp, struct mbuf *m) +{ + struct rm_priotracker pt; + struct ifnet *hn_ifp = NULL; + struct mbuf *mn; + + /* + * XXX racy, if hn(4) ever detached. + */ + rm_rlock(&hn_vfmap_lock, &pt); + if (vf_ifp->if_index < hn_vfmap_size) + hn_ifp = hn_vfmap[vf_ifp->if_index]; + rm_runlock(&hn_vfmap_lock, &pt); + + if (hn_ifp != NULL) { + /* + * Fix up rcvif and go through hn(4)'s if_input and + * increase ipackets. + */ + for (mn = m; mn != NULL; mn = mn->m_nextpkt) { + /* Allow tapping on the VF. */ + ETHER_BPF_MTAP(vf_ifp, mn); + mn->m_pkthdr.rcvif = hn_ifp; + if_inc_counter(hn_ifp, IFCOUNTER_IPACKETS, 1); + } + hn_ifp->if_input(hn_ifp, m); + } else { + /* + * In the middle of the transition; free this + * mbuf chain. + */ + while (m != NULL) { + mn = m->m_nextpkt; + m->m_nextpkt = NULL; + m_freem(m); + m = mn; + } + } +} + +static void +hn_mtu_change_fixup(struct hn_softc *sc) +{ + struct ifnet *ifp; + + HN_LOCK_ASSERT(sc); + ifp = sc->hn_ifp; + + hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu); +#if __FreeBSD_version >= 1100099 + if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < HN_LRO_LENLIM_MIN(ifp)) + hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); +#endif +} + +static void +hn_xpnt_vf_setready(struct hn_softc *sc) +{ + struct ifnet *ifp, *vf_ifp; + struct ifreq ifr; + + HN_LOCK_ASSERT(sc); + ifp = sc->hn_ifp; + vf_ifp = sc->hn_vf_ifp; + + /* + * Mark the VF ready. + */ + sc->hn_vf_rdytick = 0; + + /* + * Save information for restoration. + */ + sc->hn_saved_caps = ifp->if_capabilities; + sc->hn_saved_tsomax = ifp->if_hw_tsomax; + sc->hn_saved_tsosegcnt = ifp->if_hw_tsomaxsegcount; + sc->hn_saved_tsosegsz = ifp->if_hw_tsomaxsegsize; + + /* + * Intersect supported/enabled capabilities. + * + * NOTE: + * if_hwassist is not changed here. + */ + ifp->if_capabilities &= vf_ifp->if_capabilities; + ifp->if_capenable &= ifp->if_capabilities; + + /* + * Fix TSO settings. + */ + if (ifp->if_hw_tsomax > vf_ifp->if_hw_tsomax) + ifp->if_hw_tsomax = vf_ifp->if_hw_tsomax; + if (ifp->if_hw_tsomaxsegcount > vf_ifp->if_hw_tsomaxsegcount) + ifp->if_hw_tsomaxsegcount = vf_ifp->if_hw_tsomaxsegcount; + if (ifp->if_hw_tsomaxsegsize > vf_ifp->if_hw_tsomaxsegsize) + ifp->if_hw_tsomaxsegsize = vf_ifp->if_hw_tsomaxsegsize; + + /* + * Change VF's enabled capabilities. + */ + memset(&ifr, 0, sizeof(ifr)); + strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); + ifr.ifr_reqcap = ifp->if_capenable; + hn_xpnt_vf_iocsetcaps(sc, &ifr); + + if (ifp->if_mtu != ETHERMTU) { + int error; + + /* + * Change VF's MTU. + */ + memset(&ifr, 0, sizeof(ifr)); + strlcpy(ifr.ifr_name, vf_ifp->if_xname, sizeof(ifr.ifr_name)); + ifr.ifr_mtu = ifp->if_mtu; + error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, (caddr_t)&ifr); + if (error) { + if_printf(ifp, "%s SIOCSIFMTU %u failed\n", + vf_ifp->if_xname, ifp->if_mtu); + if (ifp->if_mtu > ETHERMTU) { + if_printf(ifp, "change MTU to %d\n", ETHERMTU); + + /* + * XXX + * No need to adjust the synthetic parts' MTU; + * failure of the adjustment will cause us + * infinite headache. + */ + ifp->if_mtu = ETHERMTU; + hn_mtu_change_fixup(sc); + } + } + } +} + +static bool +hn_xpnt_vf_isready(struct hn_softc *sc) +{ + + HN_LOCK_ASSERT(sc); + + if (!hn_xpnt_vf || sc->hn_vf_ifp == NULL) + return (false); + + if (sc->hn_vf_rdytick == 0) + return (true); + + if (sc->hn_vf_rdytick > ticks) + return (false); + + /* Mark VF as ready. */ + hn_xpnt_vf_setready(sc); + return (true); +} + +static void +hn_xpnt_vf_init(struct hn_softc *sc) +{ + int error; + + HN_LOCK_ASSERT(sc); + + KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0, + ("%s: transparent VF was enabled", sc->hn_ifp->if_xname)); + + if (bootverbose) { + if_printf(sc->hn_ifp, "try bringing up %s\n", + sc->hn_vf_ifp->if_xname); + } + + /* + * Bring the VF up. + */ + hn_xpnt_vf_saveifflags(sc); + sc->hn_vf_ifp->if_flags |= IFF_UP; + error = hn_xpnt_vf_iocsetflags(sc); + if (error) { + if_printf(sc->hn_ifp, "bringing up %s failed: %d\n", + sc->hn_vf_ifp->if_xname, error); + return; + } + + /* + * NOTE: + * Datapath setting must happen _after_ bringing the VF up. + */ + hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF); + + /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ + rm_wlock(&sc->hn_vf_lock); + sc->hn_xvf_flags |= HN_XVFFLAG_ENABLED; + rm_wunlock(&sc->hn_vf_lock); +} + +static void +hn_xpnt_vf_init_taskfunc(void *xsc, int pending __unused) +{ + struct hn_softc *sc = xsc; + + HN_LOCK(sc); + + if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) + goto done; + if (sc->hn_vf_ifp == NULL) + goto done; + if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) + goto done; + + if (sc->hn_vf_rdytick != 0) { + /* Mark VF as ready. */ + hn_xpnt_vf_setready(sc); + } + + if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) { + /* + * Delayed VF initialization. + */ + if (bootverbose) { + if_printf(sc->hn_ifp, "delayed initialize %s\n", + sc->hn_vf_ifp->if_xname); + } + hn_xpnt_vf_init(sc); + } +done: + HN_UNLOCK(sc); +} + static void hn_ifnet_attevent(void *xsc, struct ifnet *ifp) { @@ -1145,6 +1496,16 @@ goto done; } + if (hn_xpnt_vf && ifp->if_start != NULL) { + /* + * ifnet.if_start is _not_ supported by transparent + * mode VF; mainly due to the IFF_DRV_OACTIVE flag. + */ + if_printf(sc->hn_ifp, "%s uses if_start, which is unsupported " + "in transparent VF mode.\n", ifp->if_xname); + goto done; + } + rm_wlock(&hn_vfmap_lock); if (ifp->if_index >= hn_vfmap_size) { @@ -1168,7 +1529,37 @@ rm_wunlock(&hn_vfmap_lock); + /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ + rm_wlock(&sc->hn_vf_lock); + KASSERT((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) == 0, + ("%s: transparent VF was enabled", sc->hn_ifp->if_xname)); sc->hn_vf_ifp = ifp; + rm_wunlock(&sc->hn_vf_lock); + + if (hn_xpnt_vf) { + int wait_ticks; + + /* + * Install if_input for vf_ifp, which does vf_ifp -> hn_ifp. + * Save vf_ifp's current if_input for later restoration. + */ + sc->hn_vf_input = ifp->if_input; + ifp->if_input = hn_xpnt_vf_input; + + /* + * Stop link status management; use the VF's. + */ + hn_suspend_mgmt(sc); + + /* + * Give VF sometime to complete its attach routing. + */ + wait_ticks = hn_xpnt_vf_attwait * hz; + sc->hn_vf_rdytick = ticks + wait_ticks; + + taskqueue_enqueue_timeout(sc->hn_vf_taskq, &sc->hn_vf_init, + wait_ticks); + } done: HN_UNLOCK(sc); } @@ -1186,7 +1577,61 @@ if (!hn_ismyvf(sc, ifp)) goto done; + if (hn_xpnt_vf) { + /* + * Make sure that the delayed initialization is not running. + * + * NOTE: + * - This lock _must_ be released, since the hn_vf_init task + * will try holding this lock. + * - It is safe to release this lock here, since the + * hn_ifnet_attevent() is interlocked by the hn_vf_ifp. + * + * XXX racy, if hn(4) ever detached. + */ + HN_UNLOCK(sc); + taskqueue_drain_timeout(sc->hn_vf_taskq, &sc->hn_vf_init); + HN_LOCK(sc); + + KASSERT(sc->hn_vf_input != NULL, ("%s VF input is not saved", + sc->hn_ifp->if_xname)); + ifp->if_input = sc->hn_vf_input; + sc->hn_vf_input = NULL; + + if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) + hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH); + + if (sc->hn_vf_rdytick == 0) { + /* + * The VF was ready; restore some settings. + */ + sc->hn_ifp->if_capabilities = sc->hn_saved_caps; + /* + * NOTE: + * There is _no_ need to fixup if_capenable and + * if_hwassist, since the if_capabilities before + * restoration was an intersection of the VF's + * if_capabilites and the synthetic device's + * if_capabilites. + */ + sc->hn_ifp->if_hw_tsomax = sc->hn_saved_tsomax; + sc->hn_ifp->if_hw_tsomaxsegcount = + sc->hn_saved_tsosegcnt; + sc->hn_ifp->if_hw_tsomaxsegsize = sc->hn_saved_tsosegsz; + } + + /* + * Resume link status management, which was suspended + * by hn_ifnet_attevent(). + */ + hn_resume_mgmt(sc); + } + + /* NOTE: hn_vf_lock for hn_transmit()/hn_qflush() */ + rm_wlock(&sc->hn_vf_lock); + sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED; sc->hn_vf_ifp = NULL; + rm_wunlock(&sc->hn_vf_lock); rm_wlock(&hn_vfmap_lock); @@ -1205,6 +1650,15 @@ HN_UNLOCK(sc); } +static void +hn_ifnet_lnkevent(void *xsc, struct ifnet *ifp, int link_state) +{ + struct hn_softc *sc = xsc; + + if (sc->hn_vf_ifp == ifp) + if_link_state_change(sc->hn_ifp, link_state); +} + /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */ static const struct hyperv_guid g_net_vsc_device_type = { .hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46, @@ -1236,6 +1690,9 @@ sc->hn_dev = dev; sc->hn_prichan = vmbus_get_channel(dev); HN_LOCK_INIT(sc); + rm_init(&sc->hn_vf_lock, "hnvf"); + if (hn_xpnt_vf && hn_xpnt_vf_accbpf) + sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF; /* * Initialize these tunables once. @@ -1275,6 +1732,18 @@ TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0, hn_netchg_status_taskfunc, sc); + if (hn_xpnt_vf) { + /* + * Setup taskqueue for VF tasks, e.g. delayed VF bringing up. + */ + sc->hn_vf_taskq = taskqueue_create("hn_vf", M_WAITOK, + taskqueue_thread_enqueue, &sc->hn_vf_taskq); + taskqueue_start_threads(&sc->hn_vf_taskq, 1, PI_NET, "%s vf", + device_get_nameunit(dev)); + TIMEOUT_TASK_INIT(sc->hn_vf_taskq, &sc->hn_vf_init, 0, + hn_xpnt_vf_init_taskfunc, sc); + } + /* * Allocate ifnet and setup its name earlier, so that if_printf * can be used by functions, which will be called after @@ -1401,6 +1870,14 @@ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_hwassist_sysctl, "A", "hwassist"); + SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_max", + CTLFLAG_RD, &ifp->if_hw_tsomax, 0, "max TSO size"); + SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegcnt", + CTLFLAG_RD, &ifp->if_hw_tsomaxsegcount, 0, + "max # of TSO segments"); + SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "tso_maxsegsz", + CTLFLAG_RD, &ifp->if_hw_tsomaxsegsize, 0, + "max size of TSO segment"); SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_rxfilter_sysctl, "A", "rxfilter"); @@ -1445,9 +1922,20 @@ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf", CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, hn_vf_sysctl, "A", "Virtual Function's name"); - SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxvf", - CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, - hn_rxvf_sysctl, "A", "activated Virtual Function's name"); + if (!hn_xpnt_vf) { + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxvf", + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, + hn_rxvf_sysctl, "A", "activated Virtual Function's name"); + } else { + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_enabled", + CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, + hn_xpnt_vf_enabled_sysctl, "I", + "Transparent VF enabled"); + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf_xpnt_accbpf", + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0, + hn_xpnt_vf_accbpf_sysctl, "I", + "Accurate BPF for transparent VF"); + } /* * Setup the ifmedia, which has been initialized earlier. @@ -1480,7 +1968,7 @@ ifp->if_qflush = hn_xmit_qflush; } - ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO; + ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO | IFCAP_LINKSTATE; #ifdef foo /* We can't diff IPv6 packets from IPv4 packets on RX path. */ ifp->if_capabilities |= IFCAP_RXCSUM_IPV6; @@ -1515,7 +2003,13 @@ ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO); if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) { + /* + * Lock hn_set_tso_maxsize() to simplify its + * internal logic. + */ + HN_LOCK(sc); hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU); + HN_UNLOCK(sc); ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; ifp->if_hw_tsomaxsegsize = PAGE_SIZE; } @@ -1536,10 +2030,15 @@ sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0; hn_update_link_status(sc); - sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event, - hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY); - sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event, - hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY); + if (!hn_xpnt_vf) { + sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event, + hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY); + sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event, + hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY); + } else { + sc->hn_ifnet_lnkhand = EVENTHANDLER_REGISTER(ifnet_link_event, + hn_ifnet_lnkevent, sc, EVENTHANDLER_PRI_ANY); + } /* * NOTE: @@ -1566,6 +2065,14 @@ struct hn_softc *sc = device_get_softc(dev); struct ifnet *ifp = sc->hn_ifp, *vf_ifp; + if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) { + /* + * In case that the vmbus missed the orphan handler + * installation. + */ + vmbus_xact_ctx_orphan(sc->hn_xact); + } + if (sc->hn_ifaddr_evthand != NULL) EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand); if (sc->hn_ifnet_evthand != NULL) @@ -1578,20 +2085,14 @@ EVENTHANDLER_DEREGISTER(ifnet_departure_event, sc->hn_ifnet_dethand); } + if (sc->hn_ifnet_lnkhand != NULL) + EVENTHANDLER_DEREGISTER(ifnet_link_event, sc->hn_ifnet_lnkhand); vf_ifp = sc->hn_vf_ifp; __compiler_membar(); if (vf_ifp != NULL) hn_ifnet_detevent(sc, vf_ifp); - if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) { - /* - * In case that the vmbus missed the orphan handler - * installation. - */ - vmbus_xact_ctx_orphan(sc->hn_xact); - } - if (device_is_attached(dev)) { HN_LOCK(sc); if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) { @@ -1621,6 +2122,8 @@ free(sc->hn_tx_taskqs, M_DEVBUF); } taskqueue_free(sc->hn_mgmt_taskq0); + if (sc->hn_vf_taskq != NULL) + taskqueue_free(sc->hn_vf_taskq); if (sc->hn_xact != NULL) { /* @@ -1634,6 +2137,7 @@ if_free(ifp); HN_LOCK_DESTROY(sc); + rm_destroy(&sc->hn_vf_lock); return (0); } @@ -2699,7 +3203,8 @@ hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct hn_softc *sc = ifp->if_softc; - struct ifreq *ifr = (struct ifreq *)data; + struct ifreq *ifr = (struct ifreq *)data, ifr_vf; + struct ifnet *vf_ifp; int mask, error = 0; switch (cmd) { @@ -2728,6 +3233,21 @@ break; } + if (hn_xpnt_vf_isready(sc)) { + vf_ifp = sc->hn_vf_ifp; + ifr_vf = *ifr; + strlcpy(ifr_vf.ifr_name, vf_ifp->if_xname, + sizeof(ifr_vf.ifr_name)); + error = vf_ifp->if_ioctl(vf_ifp, SIOCSIFMTU, + (caddr_t)&ifr_vf); + if (error) { + HN_UNLOCK(sc); + if_printf(ifp, "%s SIOCSIFMTU %d failed: %d\n", + vf_ifp->if_xname, ifr->ifr_mtu, error); + break; + } + } + /* * Suspend this interface before the synthetic parts * are ripped. @@ -2756,23 +3276,32 @@ ifp->if_mtu = ifr->ifr_mtu; /* - * Make sure that various parameters based on MTU are - * still valid, after the MTU change. + * Synthetic parts' reattach may change the chimney + * sending size; update it. */ if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax) hn_set_chim_size(sc, sc->hn_chim_szmax); - hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu); -#if __FreeBSD_version >= 1100099 - if (sc->hn_rx_ring[0].hn_lro.lro_length_lim < - HN_LRO_LENLIM_MIN(ifp)) - hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp)); -#endif + + /* + * Make sure that various parameters based on MTU are + * still valid, after the MTU change. + */ + hn_mtu_change_fixup(sc); /* * All done! Resume the interface now. */ hn_resume(sc); + if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { + /* + * Since we have reattached the NVS part, + * change the datapath to VF again; in case + * that it is lost, after the NVS was detached. + */ + hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_VF); + } + HN_UNLOCK(sc); break; @@ -2784,6 +3313,9 @@ break; } + if (hn_xpnt_vf_isready(sc)) + hn_xpnt_vf_saveifflags(sc); + if (ifp->if_flags & IFF_UP) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) { /* @@ -2794,6 +3326,9 @@ HN_NO_SLEEPING(sc); hn_rxfilter_config(sc); HN_SLEEPING_OK(sc); + + if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) + error = hn_xpnt_vf_iocsetflags(sc); } else { hn_init_locked(sc); } @@ -2808,7 +3343,22 @@ case SIOCSIFCAP: HN_LOCK(sc); - mask = ifr->ifr_reqcap ^ ifp->if_capenable; + + if (hn_xpnt_vf_isready(sc)) { + ifr_vf = *ifr; + strlcpy(ifr_vf.ifr_name, sc->hn_vf_ifp->if_xname, + sizeof(ifr_vf.ifr_name)); + error = hn_xpnt_vf_iocsetcaps(sc, &ifr_vf); + HN_UNLOCK(sc); + break; + } + + /* + * Fix up requested capabilities w/ supported capabilities, + * since the supported capabilities could have been changed. + */ + mask = (ifr->ifr_reqcap & ifp->if_capabilities) ^ + ifp->if_capenable; if (mask & IFCAP_TXCSUM) { ifp->if_capenable ^= IFCAP_TXCSUM; @@ -2873,11 +3423,42 @@ HN_SLEEPING_OK(sc); } + /* XXX vlan(4) style mcast addr maintenance */ + if (hn_xpnt_vf_isready(sc)) { + int old_if_flags; + + old_if_flags = sc->hn_vf_ifp->if_flags; + hn_xpnt_vf_saveifflags(sc); + + if ((sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) && + ((old_if_flags ^ sc->hn_vf_ifp->if_flags) & + IFF_ALLMULTI)) + error = hn_xpnt_vf_iocsetflags(sc); + } + HN_UNLOCK(sc); break; case SIOCSIFMEDIA: case SIOCGIFMEDIA: + HN_LOCK(sc); + if (hn_xpnt_vf_isready(sc)) { + /* + * SIOCGIFMEDIA expects ifmediareq, so don't + * create and pass ifr_vf to the VF here; just + * replace the ifr_name. + */ + vf_ifp = sc->hn_vf_ifp; + strlcpy(ifr->ifr_name, vf_ifp->if_xname, + sizeof(ifr->ifr_name)); + error = vf_ifp->if_ioctl(vf_ifp, cmd, data); + /* Restore the ifr_name. */ + strlcpy(ifr->ifr_name, ifp->if_xname, + sizeof(ifr->ifr_name)); + HN_UNLOCK(sc); + break; + } + HN_UNLOCK(sc); error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); break; @@ -2899,11 +3480,37 @@ KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED, ("synthetic parts were not attached")); + /* Clear RUNNING bit ASAP. */ + atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); + /* Disable polling. */ hn_polling(sc, 0); - /* Clear RUNNING bit _before_ hn_suspend_data() */ - atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); + if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { + KASSERT(sc->hn_vf_ifp != NULL, + ("%s: VF is not attached", ifp->if_xname)); + + /* NOTE: hn_vf_lock for hn_transmit() */ + rm_wlock(&sc->hn_vf_lock); + sc->hn_xvf_flags &= ~HN_XVFFLAG_ENABLED; + rm_wunlock(&sc->hn_vf_lock); + + /* + * NOTE: + * Datapath setting must happen _before_ bringing + * the VF down. + */ + hn_nvs_set_datapath(sc, HN_NVS_DATAPATH_SYNTH); + + /* + * Bring the VF down. + */ + hn_xpnt_vf_saveifflags(sc); + sc->hn_vf_ifp->if_flags &= ~IFF_UP; + hn_xpnt_vf_iocsetflags(sc); + } + + /* Suspend data transfers. */ hn_suspend_data(sc); /* Clear OACTIVE bit. */ @@ -2912,8 +3519,8 @@ sc->hn_tx_ring[i].hn_oactive = 0; /* - * If the VF is active, make sure the filter is not 0, even if - * the synthetic NIC is down. + * If the non-transparent mode VF is active, make sure + * that the RX filter still allows packet reception. */ if (!detaching && (sc->hn_flags & HN_FLAG_RXVF)) hn_rxfilter_config(sc); @@ -2944,6 +3551,11 @@ /* Clear TX 'suspended' bit. */ hn_resume_tx(sc, sc->hn_tx_ring_inuse); + if (hn_xpnt_vf_isready(sc)) { + /* Initialize transparent VF. */ + hn_xpnt_vf_init(sc); + } + /* Everything is ready; unleash! */ atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING); @@ -3567,6 +4179,42 @@ } static int +hn_xpnt_vf_accbpf_sysctl(SYSCTL_HANDLER_ARGS) +{ + struct hn_softc *sc = arg1; + int error, onoff = 0; + + if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) + onoff = 1; + error = sysctl_handle_int(oidp, &onoff, 0, req); + if (error || req->newptr == NULL) + return (error); + + HN_LOCK(sc); + /* NOTE: hn_vf_lock for hn_transmit() */ + rm_wlock(&sc->hn_vf_lock); + if (onoff) + sc->hn_xvf_flags |= HN_XVFFLAG_ACCBPF; + else + sc->hn_xvf_flags &= ~HN_XVFFLAG_ACCBPF; + rm_wunlock(&sc->hn_vf_lock); + HN_UNLOCK(sc); + + return (0); +} + +static int +hn_xpnt_vf_enabled_sysctl(SYSCTL_HANDLER_ARGS) +{ + struct hn_softc *sc = arg1; + int enabled = 0; + + if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) + enabled = 1; + return (sysctl_handle_int(oidp, &enabled, 0, req)); +} + +static int hn_check_iplen(const struct mbuf *m, int hoff) { const struct ip *ip; @@ -4282,8 +4930,11 @@ hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu) { struct ifnet *ifp = sc->hn_ifp; + u_int hw_tsomax; int tso_minlen; + HN_LOCK_ASSERT(sc); + if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0) return; @@ -4301,7 +4952,13 @@ tso_maxlen = IP_MAXPACKET; if (tso_maxlen > sc->hn_ndis_tso_szmax) tso_maxlen = sc->hn_ndis_tso_szmax; - ifp->if_hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); + hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); + + if (hn_xpnt_vf_isready(sc)) { + if (hw_tsomax > sc->hn_vf_ifp->if_hw_tsomax) + hw_tsomax = sc->hn_vf_ifp->if_hw_tsomax; + } + ifp->if_hw_tsomax = hw_tsomax; if (bootverbose) if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax); } @@ -4654,6 +5311,59 @@ struct hn_tx_ring *txr; int error, idx = 0; + if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) { + struct rm_priotracker pt; + + rm_rlock(&sc->hn_vf_lock, &pt); + if (__predict_true(sc->hn_xvf_flags & HN_XVFFLAG_ENABLED)) { + struct mbuf *m_bpf = NULL; + int obytes, omcast; + + obytes = m->m_pkthdr.len; + if (m->m_flags & M_MCAST) + omcast = 1; + + if (sc->hn_xvf_flags & HN_XVFFLAG_ACCBPF) { + if (bpf_peers_present(ifp->if_bpf)) { + m_bpf = m_copypacket(m, M_NOWAIT); + if (m_bpf == NULL) { + /* + * Failed to grab a shallow + * copy; tap now. + */ + ETHER_BPF_MTAP(ifp, m); + } + } + } else { + ETHER_BPF_MTAP(ifp, m); + } + + error = sc->hn_vf_ifp->if_transmit(sc->hn_vf_ifp, m); + rm_runlock(&sc->hn_vf_lock, &pt); + + if (m_bpf != NULL) { + if (!error) + ETHER_BPF_MTAP(ifp, m_bpf); + m_freem(m_bpf); + } + + if (error == ENOBUFS) { + if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); + } else if (error) { + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + } else { + if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); + if_inc_counter(ifp, IFCOUNTER_OBYTES, obytes); + if (omcast) { + if_inc_counter(ifp, IFCOUNTER_OMCASTS, + omcast); + } + } + return (error); + } + rm_runlock(&sc->hn_vf_lock, &pt); + } + #if defined(INET6) || defined(INET) /* * Perform TSO packet header fixup now, since the TSO @@ -4746,11 +5456,17 @@ hn_xmit_qflush(struct ifnet *ifp) { struct hn_softc *sc = ifp->if_softc; + struct rm_priotracker pt; int i; for (i = 0; i < sc->hn_tx_ring_inuse; ++i) hn_tx_ring_qflush(&sc->hn_tx_ring[i]); if_qflush(ifp); + + rm_rlock(&sc->hn_vf_lock, &pt); + if (sc->hn_xvf_flags & HN_XVFFLAG_ENABLED) + sc->hn_vf_ifp->if_qflush(sc->hn_vf_ifp); + rm_runlock(&sc->hn_vf_lock, &pt); } static void @@ -5457,6 +6173,11 @@ /* Disable polling. */ hn_polling(sc, 0); + /* + * If the non-transparent mode VF is activated, the synthetic + * device is receiving packets, so the data path of the + * synthetic device must be suspended. + */ if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || (sc->hn_flags & HN_FLAG_RXVF)) hn_suspend_data(sc); @@ -5547,17 +6268,24 @@ hn_resume(struct hn_softc *sc) { + /* + * If the non-transparent mode VF is activated, the synthetic + * device have to receive packets, so the data path of the + * synthetic device must be resumed. + */ if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) || (sc->hn_flags & HN_FLAG_RXVF)) hn_resume_data(sc); /* - * When the VF is activated, the synthetic interface is changed - * to DOWN in hn_rxvf_change(). Here, if the VF is still active, - * we don't call hn_resume_mgmt() until the VF is deactivated in - * hn_rxvf_change(). + * Don't resume link status change if VF is attached/activated. + * - In the non-transparent VF mode, the synthetic device marks + * link down until the VF is deactivated; i.e. VF is down. + * - In transparent VF mode, VF's media status is used until + * the VF is detached. */ - if (!(sc->hn_flags & HN_FLAG_RXVF)) + if ((sc->hn_flags & HN_FLAG_RXVF) == 0 && + !(hn_xpnt_vf && sc->hn_vf_ifp != NULL)) hn_resume_mgmt(sc); /* @@ -6087,6 +6815,24 @@ { int i; +#ifdef HN_IFSTART_SUPPORT + /* + * Don't use ifnet.if_start if transparent VF mode is requested; + * mainly due to the IFF_DRV_OACTIVE flag. + */ + if (hn_xpnt_vf && hn_use_if_start) { + hn_use_if_start = 0; + printf("hn: tranparent VF mode, if_transmit will be used, " + "instead of if_start\n"); + } +#endif + if (hn_xpnt_vf_attwait < HN_XPNT_VF_ATTWAIT_MIN) { + printf("hn: invalid transparent VF attach routing " + "wait timeout %d, reset to %d\n", + hn_xpnt_vf_attwait, HN_XPNT_VF_ATTWAIT_MIN); + hn_xpnt_vf_attwait = HN_XPNT_VF_ATTWAIT_MIN; + } + /* * Initialize VF map. */ Index: sys/dev/hyperv/netvsc/if_hnreg.h =================================================================== --- sys/dev/hyperv/netvsc/if_hnreg.h +++ sys/dev/hyperv/netvsc/if_hnreg.h @@ -133,7 +133,7 @@ } __packed; CTASSERT(sizeof(struct hn_nvs_ndis_init) >= HN_NVS_REQSIZE_MIN); -#define HN_NVS_DATAPATH_SYNTHETIC 0 +#define HN_NVS_DATAPATH_SYNTH 0 #define HN_NVS_DATAPATH_VF 1 /* No response */ Index: sys/dev/hyperv/netvsc/if_hnvar.h =================================================================== --- sys/dev/hyperv/netvsc/if_hnvar.h +++ sys/dev/hyperv/netvsc/if_hnvar.h @@ -174,7 +174,6 @@ */ struct hn_softc { struct ifnet *hn_ifp; - struct ifnet *hn_vf_ifp; /* SR-IOV VF */ struct ifmedia hn_media; device_t hn_dev; int hn_if_flags; @@ -185,6 +184,10 @@ int hn_rx_ring_inuse; struct hn_rx_ring *hn_rx_ring; + struct rmlock hn_vf_lock; + struct ifnet *hn_vf_ifp; /* SR-IOV VF */ + uint32_t hn_xvf_flags; /* transparent VF flags */ + int hn_tx_ring_cnt; int hn_tx_ring_inuse; struct hn_tx_ring *hn_tx_ring; @@ -241,6 +244,24 @@ eventhandler_tag hn_ifnet_evthand; eventhandler_tag hn_ifnet_atthand; eventhandler_tag hn_ifnet_dethand; + eventhandler_tag hn_ifnet_lnkhand; + + /* + * Transparent VF delayed initialization. + */ + int hn_vf_rdytick; /* ticks, 0 == ready */ + struct taskqueue *hn_vf_taskq; + struct timeout_task hn_vf_init; + + /* + * Saved information for VF under transparent mode. + */ + void (*hn_vf_input) + (struct ifnet *, struct mbuf *); + int hn_saved_caps; + u_int hn_saved_tsomax; + u_int hn_saved_tsosegcnt; + u_int hn_saved_tsosegsz; }; #define HN_FLAG_RXBUF_CONNECTED 0x0001 @@ -255,6 +276,9 @@ #define HN_FLAG_ERRORS (HN_FLAG_RXBUF_REF | HN_FLAG_CHIM_REF) +#define HN_XVFFLAG_ENABLED 0x0001 +#define HN_XVFFLAG_ACCBPF 0x0002 + #define HN_NO_SLEEPING(sc) \ do { \ (sc)->hn_flags |= HN_FLAG_NO_SLEEPING; \