Index: sys/dev/vmware/vmxnet3/if_vmx.c =================================================================== --- sys/dev/vmware/vmxnet3/if_vmx.c +++ sys/dev/vmware/vmxnet3/if_vmx.c @@ -239,6 +239,10 @@ static void vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t); +#ifdef DEV_NETMAP +#include "vmx_netmap.h" +#endif + /* Tunables. */ static int vmxnet3_mq_disable = 0; TUNABLE_INT("hw.vmx.mq_disable", &vmxnet3_mq_disable); @@ -270,6 +274,9 @@ MODULE_DEPEND(vmx, pci, 1, 1, 1); MODULE_DEPEND(vmx, ether, 1, 1, 1); +#ifdef DEV_NETMAP +MODULE_DEPEND(vmx, netmap, 1, 1, 1); +#endif #define VMXNET3_VMWARE_VENDOR_ID 0x15AD #define VMXNET3_VMWARE_DEVICE_ID 0x07B0 @@ -347,6 +354,10 @@ vmxnet3_start_taskqueue(sc); #endif +#ifdef DEV_NETMAP + vmxnet3_netmap_attach(sc); +#endif + fail: if (error) vmxnet3_detach(dev); @@ -390,6 +401,10 @@ #endif vmxnet3_free_interrupts(sc); +#ifdef DEV_NETMAP + netmap_detach(ifp); +#endif + if (ifp != NULL) { if_free(ifp); sc->vmx_ifp = NULL; @@ -1846,6 +1861,11 @@ txr = &txq->vxtxq_cmd_ring; txc = &txq->vxtxq_comp_ring; +#ifdef DEV_NETMAP + if (netmap_tx_irq(sc->vmx_ifp, txq - sc->vmx_txq) != NM_IRQ_PASS) + return; +#endif + VMXNET3_TXQ_LOCK_ASSERT(txq); for (;;) { @@ -2111,6 +2131,15 @@ ifp = sc->vmx_ifp; rxc = &rxq->vxrxq_comp_ring; +#ifdef DEV_NETMAP + { + int dummy; + if (netmap_rx_irq(ifp, rxq - sc->vmx_rxq, &dummy) != + NM_IRQ_PASS) + return; + } +#endif + VMXNET3_RXQ_LOCK_ASSERT(rxq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) @@ -2401,6 +2430,10 @@ struct vmxnet3_txqueue *txq; int i; +#ifdef DEV_NETMAP + netmap_disable_all_rings(sc->vmx_ifp); +#endif + for (i = 0; i < sc->vmx_nrxqueues; i++) { rxq = &sc->vmx_rxq[i]; VMXNET3_RXQ_LOCK(rxq); @@ -2454,6 +2487,10 @@ bzero(txr->vxtxr_txd, txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc)); +#ifdef DEV_NETMAP + vmxnet3_netmap_txq_init(sc, txq); +#endif + txc = &txq->vxtxq_comp_ring; txc->vxcr_next = 0; txc->vxcr_gen = VMXNET3_INIT_GEN; @@ -2468,6 +2505,10 @@ struct vmxnet3_rxring *rxr; struct vmxnet3_comp_ring *rxc; int i, populate, idx, frame_size, error; +#ifdef DEV_NETMAP + struct netmap_adapter *na; + struct netmap_slot *slot; +#endif ifp = sc->vmx_ifp; frame_size = ETHER_ALIGN + sizeof(struct ether_vlan_header) + @@ -2498,12 +2539,24 @@ else populate = VMXNET3_RXRINGS_PERQ; +#ifdef DEV_NETMAP + na = NA(ifp); + slot = netmap_reset(na, NR_RX, rxq - sc->vmx_rxq, 0); +#endif + for (i = 0; i < populate; i++) { rxr = &rxq->vxrxq_cmd_ring[i]; rxr->vxrxr_fill = 0; rxr->vxrxr_gen = VMXNET3_INIT_GEN; bzero(rxr->vxrxr_rxd, rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc)); +#ifdef DEV_NETMAP + if (slot != NULL) { + vmxnet3_netmap_rxq_init(sc, rxq, rxr, slot); + i = populate; + break; + } +#endif for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) { error = vmxnet3_newbuf(sc, rxr); @@ -2626,6 +2679,10 @@ ifp->if_drv_flags |= IFF_DRV_RUNNING; vmxnet3_link_status(sc); +#ifdef DEV_NETMAP + netmap_enable_all_rings(ifp); +#endif + vmxnet3_enable_all_intrs(sc); callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc); } Index: sys/dev/vmware/vmxnet3/vmx_netmap.h =================================================================== --- /dev/null +++ sys/dev/vmware/vmxnet3/vmx_netmap.h @@ -0,0 +1,348 @@ +#include +#include + +static int +vmxnet3_netmap_reg(struct netmap_adapter *na, int onoff) +{ + struct ifnet *ifp = na->ifp; + struct vmxnet3_softc *sc = ifp->if_softc; + + VMXNET3_CORE_LOCK(sc); + vmxnet3_stop(sc); + if (onoff) { + nm_set_native_flags(na); + } else { + nm_clear_native_flags(na); + } + vmxnet3_init_locked(sc); + VMXNET3_CORE_UNLOCK(sc); + return 0; +} + +static void +vmxnet3_netmap_rxq_init(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq, + struct vmxnet3_rxring *rxr, struct netmap_slot *slot) +{ + struct ifnet *ifp = sc->vmx_ifp; + struct netmap_adapter *na = NA(ifp); + struct vmxnet3_rxdesc *rxd; + int q, i; + + q = rxq - sc->vmx_rxq; + + for (i = 0; ; i++) { + int idx = rxr->vxrxr_fill; + int si = netmap_idx_n2k(na->rx_rings[q], idx); + struct vmxnet3_rxbuf *rxb = &rxr->vxrxr_rxbuf[idx]; + uint64_t paddr; + void *addr; + + addr = PNMB(na, slot + si, &paddr); + netmap_load_map(na, rxr->vxrxr_rxtag, rxb->vrxb_dmamap, addr); + + rxd = &rxr->vxrxr_rxd[idx]; + rxd->addr = paddr; + rxd->len = NETMAP_BUF_SIZE(na); + rxd->gen = rxr->vxrxr_gen ^ 1; + rxd->btype = VMXNET3_BTYPE_HEAD; + nm_prdis("%d: addr %lx len %u btype %u gen %u", + idx, rxd->addr, rxd->len, rxd->btype, rxd->gen); + + if (i == rxr->vxrxr_ndesc -1) + break; + + rxd->gen ^= 1; + vmxnet3_rxr_increment_fill(rxr); + } +} + +static void +vmxnet3_netmap_txq_init(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq) +{ + struct ifnet *ifp = sc->vmx_ifp; + struct netmap_adapter *na; + struct netmap_slot *slot; + struct vmxnet3_txring *txr; + int i, gen, q; + + q = txq - sc->vmx_txq; + + na = NA(ifp); + + slot = netmap_reset(na, NR_TX, q, 0); + if (slot == NULL) + return; + + txr = &txq->vxtxq_cmd_ring; + gen = txr->vxtxr_gen ^ 1; + + for (i = 0; i < txr->vxtxr_ndesc; i++) { + int si = netmap_idx_n2k(na->tx_rings[q], i); + struct vmxnet3_txdesc *txd = &txr->vxtxr_txd[i]; + uint64_t paddr; + void *addr; + + addr = PNMB(na, slot + si, &paddr); + + txd->addr = paddr; + txd->len = 0; + txd->gen = gen; + txd->dtype = 0; + txd->offload_mode = VMXNET3_OM_NONE; + txd->offload_pos = 0; + txd->hlen = 0; + txd->eop = 0; + txd->compreq = 0; + txd->vtag_mode = 0; + txd->vtag = 0; + + netmap_load_map(na, txr->vxtxr_txtag, + txr->vxtxr_txbuf[i].vtxb_dmamap, addr); + } +} + +static int +vmxnet3_netmap_txsync(struct netmap_kring *kring, int flags) +{ + struct netmap_adapter *na = kring->na; + struct ifnet *ifp = na->ifp; + struct netmap_ring *ring = kring->ring; + u_int nm_i; + u_int nic_i; + u_int n; + u_int const lim = kring->nkr_num_slots - 1; + u_int const head = kring->rhead; + + /* + * interrupts on every tx packet are expensive so request + * them every half ring, or where NS_REPORT is set + */ + u_int report_frequency = kring->nkr_num_slots >> 1; + /* device specific */ + struct vmxnet3_softc *sc = ifp->if_softc; + struct vmxnet3_txqueue *txq = &sc->vmx_txq[kring->ring_id]; + struct vmxnet3_txring *txr = &txq->vxtxq_cmd_ring; + struct vmxnet3_comp_ring *txc = &txq->vxtxq_comp_ring; + struct vmxnet3_txcompdesc *txcd = txc->vxcr_u.txcd; + int gen = txr->vxtxr_gen; + + /* no need to dma-sync the ring; memory barriers are sufficient */ + + nm_i = kring->nr_hwcur; + if (nm_i != head) { + nic_i = netmap_idx_k2n(kring, nm_i); + for (n = 0; nm_i != head; n++) { + struct netmap_slot *slot = &ring->slot[nm_i]; + u_int len = slot->len; + uint64_t paddr; + void *addr = PNMB(na, slot, &paddr); + int compreq = !!(slot->flags & NS_REPORT || + nic_i == 0 || nic_i == report_frequency); + + /* device specific */ + struct vmxnet3_txdesc *curr = &txr->vxtxr_txd[nic_i]; + struct vmxnet3_txbuf *txbuf = &txr->vxtxr_txbuf[nic_i]; + + NM_CHECK_ADDR_LEN(na, addr, len); + + /* fill the slot in the NIC ring */ + curr->len = len; + curr->eop = 1; /* NS_MOREFRAG not supported */ + curr->compreq = compreq; + + if (slot->flags & NS_BUF_CHANGED) { + curr->addr = paddr; + netmap_reload_map(na, txr->vxtxr_txtag, + txbuf->vtxb_dmamap, addr); + } + slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED); + + /* make sure changes to the buffer are synced */ + bus_dmamap_sync(txr->vxtxr_txtag, txbuf->vtxb_dmamap, + BUS_DMASYNC_PREWRITE); + + /* pass ownership */ + vmxnet3_barrier(sc, VMXNET3_BARRIER_WR); + curr->gen = gen; + + nm_i = nm_next(nm_i, lim); + nic_i++; + if (unlikely(nic_i == lim + 1)) { + nic_i = 0; + gen = txr->vxtxr_gen ^= 1; + } + } + + vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), nic_i); + } + kring->nr_hwcur = nm_i; + + /* reclaim completed packets */ + for (;;) { + u_int sop; + struct vmxnet3_txbuf *txb; + + txcd = &txc->vxcr_u.txcd[txc->vxcr_next]; + if (txcd->gen != txc->vxcr_gen) + break; + + vmxnet3_barrier(sc, VMXNET3_BARRIER_RD); + + if (++txc->vxcr_next == txc->vxcr_ndesc) { + txc->vxcr_next = 0; + txc->vxcr_gen ^= 1; + } + + sop = txr->vxtxr_next; + txb = &txr->vxtxr_txbuf[sop]; + + bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap, + BUS_DMASYNC_POSTWRITE); + + txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc; + } + kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, txr->vxtxr_next), lim); + + return 0; +} + +static int +vmxnet3_netmap_rxsync(struct netmap_kring *kring, int flags) +{ + struct netmap_adapter *na = kring->na; + struct netmap_ring *ring = kring->ring; + u_int nm_i; + u_int nic_i; + u_int const lim = kring->nkr_num_slots - 1; + u_int const head = kring->rhead; + int force_update = (flags & NAF_FORCE_READ); + + struct ifnet *ifp = na->ifp; + struct vmxnet3_softc *sc = ifp->if_softc; + struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[kring->ring_id]; + struct vmxnet3_rxring *rxr; + struct vmxnet3_comp_ring *rxc; + + if (head > lim) + return netmap_ring_reinit(kring); + + rxr = &rxq->vxrxq_cmd_ring[0]; + + /* no need to dma-sync the ring; memory barriers are sufficient */ + + /* first part: import newly received packets */ + if (netmap_no_pendintr || force_update) { + rxc = &rxq->vxrxq_comp_ring; + nm_i = kring->nr_hwtail; + nic_i = netmap_idx_k2n(kring, nm_i); + for (;;) { + struct vmxnet3_rxcompdesc *rxcd; + struct vmxnet3_rxbuf *rxb; + + rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next]; + + if (rxcd->gen != rxc->vxcr_gen) + break; + vmxnet3_barrier(sc, VMXNET3_BARRIER_RD); + + while (__predict_false(rxcd->rxd_idx != nic_i)) { + nm_prlim(1, "%u skipped! idx %u", nic_i, rxcd->rxd_idx); + /* to shelter the application from this we + * would need to rotate the kernel-owned + * portion of the netmap and nic rings. We + * return len=0 for now and hope for the best. + */ + ring->slot[nm_i].len = 0; + nic_i = nm_next(nm_i, lim); + nm_i = nm_next(nm_i, lim); + } + + rxb = &rxr->vxrxr_rxbuf[nic_i]; + + ring->slot[nm_i].len = rxcd->len; + ring->slot[nm_i].flags = 0; + + bus_dmamap_sync(rxr->vxrxr_rxtag, rxb->vrxb_dmamap, + BUS_DMASYNC_POSTREAD); + + nic_i = nm_next(nm_i, lim); + nm_i = nm_next(nm_i, lim); + + rxc->vxcr_next++; + if (__predict_false(rxc->vxcr_next == rxc->vxcr_ndesc)) { + rxc->vxcr_next = 0; + rxc->vxcr_gen ^= 1; + } + } + kring->nr_hwtail = nm_i; + } + /* second part: skip past packets that userspace has released */ + nm_i = kring->nr_hwcur; + if (nm_i != head) { + nic_i = netmap_idx_k2n(kring, nm_i); + while (nm_i != head) { + struct netmap_slot *slot = &ring->slot[nm_i]; + struct vmxnet3_rxdesc *rxd_fill; + struct vmxnet3_rxbuf *rxbuf; + + if (slot->flags & NS_BUF_CHANGED) { + uint64_t paddr; + void *addr = PNMB(na, slot, &paddr); + struct vmxnet3_rxdesc *rxd = &rxr->vxrxr_rxd[nic_i]; + + + if (addr == NETMAP_BUF_BASE(na)) + return netmap_ring_reinit(kring); + + rxd->addr = paddr; + rxbuf = &rxr->vxrxr_rxbuf[nic_i]; + netmap_reload_map(na, rxr->vxrxr_rxtag, + rxbuf->vrxb_dmamap, addr); + slot->flags &= ~NS_BUF_CHANGED; + vmxnet3_barrier(sc, VMXNET3_BARRIER_WR); + } + + rxd_fill = &rxr->vxrxr_rxd[rxr->vxrxr_fill]; + rxbuf = &rxr->vxrxr_rxbuf[rxr->vxrxr_fill]; + + bus_dmamap_sync(rxr->vxrxr_rxtag, rxbuf->vrxb_dmamap, + BUS_DMASYNC_PREREAD); + + rxd_fill->gen = rxr->vxrxr_gen; + vmxnet3_rxr_increment_fill(rxr); + + nm_i = nm_next(nm_i, lim); + nic_i = nm_next(nic_i, lim); + } + kring->nr_hwcur = head; + if (__predict_false(rxq->vxrxq_rs->update_rxhead)) { + vmxnet3_write_bar0(sc, + VMXNET3_BAR0_RXH1(kring->ring_id), rxr->vxrxr_fill); + } + } + return 0; +} + +static void +vmxnet3_netmap_attach(struct vmxnet3_softc *sc) +{ + struct netmap_adapter na; + int enable = 0; + + if (getenv_int("vmxnet3.netmap_native", &enable) < 0 || !enable) { + return; + } + + bzero(&na, sizeof(na)); + + na.ifp = sc->vmx_ifp; + na.na_flags = NAF_BDG_MAYSLEEP; + na.num_tx_desc = sc->vmx_ntxdescs; + na.num_rx_desc = sc->vmx_nrxdescs; + na.num_tx_rings = sc->vmx_ntxqueues; + na.num_rx_rings = sc->vmx_nrxqueues; + na.nm_register = vmxnet3_netmap_reg; + na.nm_txsync = vmxnet3_netmap_txsync; + na.nm_rxsync = vmxnet3_netmap_rxsync; + netmap_attach(&na); +}