diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c --- a/sys/dev/virtio/network/if_vtnet.c +++ b/sys/dev/virtio/network/if_vtnet.c @@ -71,6 +71,7 @@ #include #include #include +#include #include #include @@ -110,6 +111,7 @@ static int vtnet_alloc_rx_filters(struct vtnet_softc *); static void vtnet_free_rx_filters(struct vtnet_softc *); static int vtnet_alloc_virtqueues(struct vtnet_softc *); +static int vtnet_alloc_interface(struct vtnet_softc *); static int vtnet_setup_interface(struct vtnet_softc *); static int vtnet_ioctl_mtu(struct vtnet_softc *, int); static int vtnet_ioctl_ifflags(struct vtnet_softc *); @@ -231,6 +233,7 @@ static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *, struct sysctl_oid_list *, struct vtnet_txq *); static void vtnet_setup_queue_sysctl(struct vtnet_softc *); +static void vtnet_load_tunables(struct vtnet_softc *); static void vtnet_setup_sysctl(struct vtnet_softc *); static int vtnet_rxq_enable_intr(struct vtnet_rxq *); @@ -299,6 +302,15 @@ SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &vtnet_rx_process_limit, 0, "Limits RX segments processed in a single pass"); +static int vtnet_lro_entry_count = 128; +SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, + &vtnet_lro_entry_count, 0, "Software LRO entry count"); + +/* Enable sorted LRO, and the depth of the mbuf queue. */ +static int vtnet_lro_mbufq_depth = 0; +SYSCTL_UINT(_hw_vtnet, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, + &vtnet_lro_mbufq_depth, 0, "Depth of software LRO mbuf queue"); + static uma_zone_t vtnet_tx_header_zone; static struct virtio_feature_desc vtnet_feature_desc[] = { @@ -433,6 +445,13 @@ VTNET_CORE_LOCK_INIT(sc); callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0); + vtnet_load_tunables(sc); + + error = vtnet_alloc_interface(sc); + if (error) { + device_printf(dev, "cannot allocate interface\n"); + goto fail; + } vtnet_setup_sysctl(sc); vtnet_setup_features(sc); @@ -651,8 +670,8 @@ */ if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) { device_printf(dev, - "LRO disabled since both mergeable buffers and " - "indirect descriptors were not negotiated\n"); + "Host LRO disabled since both mergeable buffers " + "and indirect descriptors were not negotiated\n"); features &= ~VTNET_LRO_FEATURES; negotiated_features = virtio_negotiate_features(dev, features); @@ -709,6 +728,14 @@ else sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_SEPARATE; + /* + * Favor "hardware" LRO if negotiated, but support software LRO as + * a fallback; there is usually little benefit (or worse) with both. + */ + if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) == 0 && + virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6) == 0) + sc->vtnet_flags |= VTNET_FLAG_SW_LRO; + if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) @@ -775,6 +802,14 @@ if (rxq->vtnrx_sg == NULL) return (ENOMEM); +#if defined(INET) || defined(INET6) + if (vtnet_software_lro(sc)) { + if (tcp_lro_init_args(&rxq->vtnrx_lro, sc->vtnet_ifp, + sc->vtnet_lro_entry_count, sc->vtnet_lro_mbufq_depth) != 0) + return (ENOMEM); + } +#endif + NET_TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq); rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT, taskqueue_thread_enqueue, &rxq->vtnrx_tq); @@ -853,6 +888,10 @@ rxq->vtnrx_sc = NULL; rxq->vtnrx_id = -1; +#if defined(INET) || defined(INET6) + tcp_lro_free(&rxq->vtnrx_lro); +#endif + if (rxq->vtnrx_sg != NULL) { sglist_free(rxq->vtnrx_sg); rxq->vtnrx_sg = NULL; @@ -992,22 +1031,34 @@ } static int -vtnet_setup_interface(struct vtnet_softc *sc) +vtnet_alloc_interface(struct vtnet_softc *sc) { device_t dev; - struct pfil_head_args pa; struct ifnet *ifp; dev = sc->vtnet_dev; - ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER); - if (ifp == NULL) { - device_printf(dev, "cannot allocate ifnet structure\n"); - return (ENOSPC); - } + ifp = if_alloc(IFT_ETHER); + if (ifp == NULL) + return (ENOMEM); - if_initname(ifp, device_get_name(dev), device_get_unit(dev)); + sc->vtnet_ifp = ifp; ifp->if_softc = sc; + if_initname(ifp, device_get_name(dev), device_get_unit(dev)); + + return (0); +} + +static int +vtnet_setup_interface(struct vtnet_softc *sc) +{ + device_t dev; + struct pfil_head_args pa; + struct ifnet *ifp; + + dev = sc->vtnet_dev; + ifp = sc->vtnet_ifp; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST | IFF_KNOWSEPOCH; ifp->if_baudrate = IF_Gbps(10); @@ -1072,10 +1123,8 @@ vtnet_fixup_needs_csum) != 0) sc->vtnet_flags |= VTNET_FLAG_FIXUP_NEEDS_CSUM; - if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) || - virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6) || - virtio_with_feature(dev, VIRTIO_NET_F_GUEST_ECN)) - ifp->if_capabilities |= IFCAP_LRO; + /* Support either "hardware" or software LRO. */ + ifp->if_capabilities |= IFCAP_LRO; } if (ifp->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6)) { @@ -1263,6 +1312,11 @@ else reinit = 1; + /* BMV: Avoid needless renegotiation for just software LRO. */ + if ((mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) == + IFCAP_LRO && vtnet_software_lro(sc)) + reinit = update = 0; + if (mask & IFCAP_RXCSUM) ifp->if_capenable ^= IFCAP_RXCSUM; if (mask & IFCAP_RXCSUM_IPV6) @@ -1869,6 +1923,23 @@ return (1); } +#if defined(INET) || defined(INET6) +static int +vtnet_lro_rx(struct vtnet_rxq *rxq, struct mbuf *m) +{ + struct lro_ctrl *lro; + + lro = &rxq->vtnrx_lro; + + if (lro->lro_mbuf_max != 0) { + tcp_lro_queue_mbuf(lro, m); + return (0); + } + + return (tcp_lro_rx(lro, m, 0)); +} +#endif + static void vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m, struct virtio_net_hdr *hdr) @@ -1906,9 +1977,14 @@ rxq->vtnrx_stats.vrxs_ipackets++; rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len; - VTNET_RXQ_UNLOCK(rxq); +#if defined(INET) || defined(INET6) + if (vtnet_software_lro(sc) && ifp->if_capenable & IFCAP_LRO) { + if (vtnet_lro_rx(rxq, m) == 0) + return; + } +#endif + (*ifp->if_input)(ifp, m); - VTNET_RXQ_LOCK(rxq); } static int @@ -2013,14 +2089,14 @@ } vtnet_rxq_input(rxq, m, &lhdr); - - /* Must recheck after dropping the Rx lock. */ - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - break; } - if (deq > 0) + if (deq > 0) { +#if defined(INET) || defined(INET6) + tcp_lro_flush_all(&rxq->vtnrx_lro); +#endif virtqueue_notify(vq); + } return (count > 0 ? 0 : EAGAIN); } @@ -3226,7 +3302,7 @@ features &= ~VIRTIO_NET_F_GUEST_CSUM; } - if (ifp->if_capabilities & IFCAP_LRO) { + if (ifp->if_capabilities & IFCAP_LRO && !vtnet_software_lro(sc)) { if (ifp->if_capenable & IFCAP_LRO) features |= VTNET_LRO_FEATURES; else @@ -4106,6 +4182,19 @@ vtnet_setup_stat_sysctl(ctx, child, sc); } +static void +vtnet_load_tunables(struct vtnet_softc *sc) +{ + + sc->vtnet_lro_entry_count = vtnet_tunable_int(sc, + "lro_entry_count", vtnet_lro_entry_count); + if (sc->vtnet_lro_entry_count < TCP_LRO_ENTRIES) + sc->vtnet_lro_entry_count = TCP_LRO_ENTRIES; + + sc->vtnet_lro_mbufq_depth = vtnet_tunable_int(sc, + "lro_mbufq_depth", vtnet_lro_mbufq_depth); +} + static int vtnet_rxq_enable_intr(struct vtnet_rxq *rxq) { diff --git a/sys/dev/virtio/network/if_vtnetvar.h b/sys/dev/virtio/network/if_vtnetvar.h --- a/sys/dev/virtio/network/if_vtnetvar.h +++ b/sys/dev/virtio/network/if_vtnetvar.h @@ -79,6 +79,7 @@ struct vtnet_rxq_stats vtnrx_stats; struct taskqueue *vtnrx_tq; struct task vtnrx_intrtask; + struct lro_ctrl vtnrx_lro; #ifdef DEV_NETMAP uint32_t vtnrx_nm_refill; struct virtio_net_hdr_mrg_rxbuf vtnrx_shrhdr; @@ -155,6 +156,7 @@ #define VTNET_FLAG_EVENT_IDX 0x0800 #define VTNET_FLAG_SUSPENDED 0x1000 #define VTNET_FLAG_FIXUP_NEEDS_CSUM 0x2000 +#define VTNET_FLAG_SW_LRO 0x4000 int vtnet_link_active; int vtnet_hdr_size; @@ -168,6 +170,8 @@ int vtnet_act_vq_pairs; int vtnet_max_vq_pairs; int vtnet_requested_vq_pairs; + int vtnet_lro_entry_count; + int vtnet_lro_mbufq_depth; struct virtqueue *vtnet_ctrl_vq; struct vtnet_mac_filter *vtnet_mac_filter; @@ -192,6 +196,12 @@ return ((sc->vtnet_flags & VTNET_FLAG_MODERN) != 0); } +static bool +vtnet_software_lro(struct vtnet_softc *sc) +{ + return ((sc->vtnet_flags & VTNET_FLAG_SW_LRO) != 0); +} + /* * Maximum number of queue pairs we will autoconfigure to. */ @@ -325,8 +335,7 @@ /* * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us - * frames larger than 1514 bytes. We do not yet support software LRO - * via tcp_lro_rx(). + * frames larger than 1514 bytes. */ #define VTNET_LRO_FEATURES (VIRTIO_NET_F_GUEST_TSO4 | \ VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN)