Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -2512,17 +2512,19 @@ dev/nand/nfc_if.m optional nand dev/netmap/if_ptnet.c optional netmap inet dev/netmap/netmap.c optional netmap +dev/netmap/netmap_bdg.c optional netmap dev/netmap/netmap_freebsd.c optional netmap dev/netmap/netmap_generic.c optional netmap +dev/netmap/netmap_kloop.c optional netmap +dev/netmap/netmap_legacy.c optional netmap dev/netmap/netmap_mbq.c optional netmap dev/netmap/netmap_mem2.c optional netmap dev/netmap/netmap_monitor.c optional netmap +dev/netmap/netmap_null.c optional netmap dev/netmap/netmap_offloadings.c optional netmap dev/netmap/netmap_pipe.c optional netmap dev/netmap/netmap_pt.c optional netmap dev/netmap/netmap_vale.c optional netmap -dev/netmap/netmap_legacy.c optional netmap -dev/netmap/netmap_bdg.c optional netmap # compile-with "${NORMAL_C} -Wconversion -Wextra" dev/nfsmb/nfsmb.c optional nfsmb pci dev/nge/if_nge.c optional nge Index: sys/dev/netmap/if_ptnet.c =================================================================== --- sys/dev/netmap/if_ptnet.c +++ sys/dev/netmap/if_ptnet.c @@ -128,8 +128,8 @@ struct resource *irq; void *cookie; int kring_id; - struct ptnet_csb_gh *ptgh; - struct ptnet_csb_hg *pthg; + struct nm_csb_atok *atok; + struct nm_csb_ktoa *ktoa; unsigned int kick; struct mtx lock; struct buf_ring *bufring; /* for TX queues */ @@ -166,8 +166,8 @@ unsigned int num_tx_rings; struct ptnet_queue *queues; struct ptnet_queue *rxqueues; - struct ptnet_csb_gh *csb_gh; - struct ptnet_csb_hg *csb_hg; + struct nm_csb_atok *csb_gh; + struct nm_csb_ktoa *csb_hg; unsigned int min_tx_space; @@ -209,7 +209,7 @@ static int ptnet_irqs_init(struct ptnet_softc *sc); static void ptnet_irqs_fini(struct ptnet_softc *sc); -static uint32_t ptnet_nm_ptctl(if_t ifp, uint32_t cmd); +static uint32_t ptnet_nm_ptctl(struct ptnet_softc *sc, uint32_t cmd); static int ptnet_nm_config(struct netmap_adapter *na, struct nm_config_info *info); static void ptnet_update_vnet_hdr(struct ptnet_softc *sc); @@ -327,7 +327,7 @@ sc->num_rings = num_tx_rings + num_rx_rings; sc->num_tx_rings = num_tx_rings; - if (sc->num_rings * sizeof(struct ptnet_csb_gh) > PAGE_SIZE) { + if (sc->num_rings * sizeof(struct nm_csb_atok) > PAGE_SIZE) { device_printf(dev, "CSB cannot handle that many rings (%u)\n", sc->num_rings); err = ENOMEM; @@ -342,7 +342,7 @@ err = ENOMEM; goto err_path; } - sc->csb_hg = (struct ptnet_csb_hg *)(((char *)sc->csb_gh) + PAGE_SIZE); + sc->csb_hg = (struct nm_csb_ktoa *)(((char *)sc->csb_gh) + PAGE_SIZE); { /* @@ -379,8 +379,8 @@ pq->sc = sc; pq->kring_id = i; pq->kick = PTNET_IO_KICK_BASE + 4 * i; - pq->ptgh = sc->csb_gh + i; - pq->pthg = sc->csb_hg + i; + pq->atok = sc->csb_gh + i; + pq->ktoa = sc->csb_hg + i; snprintf(pq->lock_name, sizeof(pq->lock_name), "%s-%d", device_get_nameunit(dev), i); mtx_init(&pq->lock, pq->lock_name, NULL, MTX_DEF); @@ -505,6 +505,17 @@ return err; } +/* Stop host sync-kloop if it was running. */ +static void +ptnet_device_shutdown(struct ptnet_softc *sc) +{ + ptnet_nm_ptctl(sc, PTNETMAP_PTCTL_DELETE); + bus_write_4(sc->iomem, PTNET_IO_CSB_GH_BAH, 0); + bus_write_4(sc->iomem, PTNET_IO_CSB_GH_BAL, 0); + bus_write_4(sc->iomem, PTNET_IO_CSB_HG_BAH, 0); + bus_write_4(sc->iomem, PTNET_IO_CSB_HG_BAL, 0); +} + static int ptnet_detach(device_t dev) { @@ -511,6 +522,8 @@ struct ptnet_softc *sc = device_get_softc(dev); int i; + ptnet_device_shutdown(sc); + #ifdef DEVICE_POLLING if (sc->ifp->if_capenable & IFCAP_POLLING) { ether_poll_deregister(sc->ifp); @@ -543,10 +556,6 @@ ptnet_irqs_fini(sc); if (sc->csb_gh) { - bus_write_4(sc->iomem, PTNET_IO_CSB_GH_BAH, 0); - bus_write_4(sc->iomem, PTNET_IO_CSB_GH_BAL, 0); - bus_write_4(sc->iomem, PTNET_IO_CSB_HG_BAH, 0); - bus_write_4(sc->iomem, PTNET_IO_CSB_HG_BAL, 0); contigfree(sc->csb_gh, 2*PAGE_SIZE, M_DEVBUF); sc->csb_gh = NULL; sc->csb_hg = NULL; @@ -583,9 +592,8 @@ static int ptnet_suspend(device_t dev) { - struct ptnet_softc *sc; + struct ptnet_softc *sc = device_get_softc(dev); - sc = device_get_softc(dev); (void)sc; return (0); @@ -594,9 +602,8 @@ static int ptnet_resume(device_t dev) { - struct ptnet_softc *sc; + struct ptnet_softc *sc = device_get_softc(dev); - sc = device_get_softc(dev); (void)sc; return (0); @@ -605,11 +612,11 @@ static int ptnet_shutdown(device_t dev) { - /* - * Suspend already does all of what we need to - * do here; we just never expect to be resumed. - */ - return (ptnet_suspend(dev)); + struct ptnet_softc *sc = device_get_softc(dev); + + ptnet_device_shutdown(sc); + + return (0); } static int @@ -796,7 +803,7 @@ /* Make sure the worker sees the * IFF_DRV_RUNNING down. */ PTNET_Q_LOCK(pq); - pq->ptgh->guest_need_kick = 0; + pq->atok->appl_need_kick = 0; PTNET_Q_UNLOCK(pq); /* Wait for rescheduling to finish. */ if (pq->taskq) { @@ -810,7 +817,7 @@ for (i = 0; i < sc->num_rings; i++) { pq = sc-> queues + i; PTNET_Q_LOCK(pq); - pq->ptgh->guest_need_kick = 1; + pq->atok->appl_need_kick = 1; PTNET_Q_UNLOCK(pq); } } @@ -881,7 +888,7 @@ return ret; } - if (sc->ptna->backend_regifs == 0) { + if (sc->ptna->backend_users == 0) { ret = ptnet_nm_krings_create(na_nm); if (ret) { device_printf(sc->dev, "ptnet_nm_krings_create() " @@ -962,7 +969,7 @@ ptnet_nm_register(na_dr, 0 /* off */); - if (sc->ptna->backend_regifs == 0) { + if (sc->ptna->backend_users == 0) { netmap_mem_rings_delete(na_dr); ptnet_nm_krings_delete(na_nm); } @@ -1092,9 +1099,8 @@ } static uint32_t -ptnet_nm_ptctl(if_t ifp, uint32_t cmd) +ptnet_nm_ptctl(struct ptnet_softc *sc, uint32_t cmd) { - struct ptnet_softc *sc = if_getsoftc(ifp); /* * Write a command and read back error status, * with zero meaning success. @@ -1130,8 +1136,8 @@ /* Sync krings from the host, reading from * CSB. */ for (i = 0; i < sc->num_rings; i++) { - struct ptnet_csb_gh *ptgh = sc->queues[i].ptgh; - struct ptnet_csb_hg *pthg = sc->queues[i].pthg; + struct nm_csb_atok *atok = sc->queues[i].atok; + struct nm_csb_ktoa *ktoa = sc->queues[i].ktoa; struct netmap_kring *kring; if (i < na->num_tx_rings) { @@ -1139,15 +1145,15 @@ } else { kring = na->rx_rings[i - na->num_tx_rings]; } - kring->rhead = kring->ring->head = ptgh->head; - kring->rcur = kring->ring->cur = ptgh->cur; - kring->nr_hwcur = pthg->hwcur; + kring->rhead = kring->ring->head = atok->head; + kring->rcur = kring->ring->cur = atok->cur; + kring->nr_hwcur = ktoa->hwcur; kring->nr_hwtail = kring->rtail = - kring->ring->tail = pthg->hwtail; + kring->ring->tail = ktoa->hwtail; ND("%d,%d: csb {hc %u h %u c %u ht %u}", t, i, - pthg->hwcur, ptgh->head, ptgh->cur, - pthg->hwtail); + ktoa->hwcur, atok->head, atok->cur, + ktoa->hwtail); ND("%d,%d: kring {hc %u rh %u rc %u h %u c %u ht %u rt %u t %u}", t, i, kring->nr_hwcur, kring->rhead, kring->rcur, kring->ring->head, kring->ring->cur, kring->nr_hwtail, @@ -1178,7 +1184,7 @@ int i; if (!onoff) { - sc->ptna->backend_regifs--; + sc->ptna->backend_users--; } /* If this is the last netmap client, guest interrupt enable flags may @@ -1191,17 +1197,17 @@ D("Exit netmap mode, re-enable interrupts"); for (i = 0; i < sc->num_rings; i++) { pq = sc->queues + i; - pq->ptgh->guest_need_kick = 1; + pq->atok->appl_need_kick = 1; } } if (onoff) { - if (sc->ptna->backend_regifs == 0) { + if (sc->ptna->backend_users == 0) { /* Initialize notification enable fields in the CSB. */ for (i = 0; i < sc->num_rings; i++) { pq = sc->queues + i; - pq->pthg->host_need_kick = 1; - pq->ptgh->guest_need_kick = + pq->ktoa->kern_need_kick = 1; + pq->atok->appl_need_kick = (!(ifp->if_capenable & IFCAP_POLLING) && i >= sc->num_tx_rings); } @@ -1211,17 +1217,13 @@ /* Make sure the host adapter passed through is ready * for txsync/rxsync. */ - ret = ptnet_nm_ptctl(ifp, PTNETMAP_PTCTL_CREATE); + ret = ptnet_nm_ptctl(sc, PTNETMAP_PTCTL_CREATE); if (ret) { return ret; } - } - /* Sync from CSB must be done after REGIF PTCTL. Skip this - * step only if this is a netmap client and it is not the - * first one. */ - if ((!native && sc->ptna->backend_regifs == 0) || - (native && na->active_fds == 0)) { + /* Align the guest krings and rings to the state stored + * in the CSB. */ ptnet_sync_from_csb(sc, na); } @@ -1254,19 +1256,13 @@ } } - /* Sync from CSB must be done before UNREGIF PTCTL, on the last - * netmap client. */ - if (native && na->active_fds == 0) { - ptnet_sync_from_csb(sc, na); + if (sc->ptna->backend_users == 0) { + ret = ptnet_nm_ptctl(sc, PTNETMAP_PTCTL_DELETE); } - - if (sc->ptna->backend_regifs == 0) { - ret = ptnet_nm_ptctl(ifp, PTNETMAP_PTCTL_DELETE); - } } if (onoff) { - sc->ptna->backend_regifs++; + sc->ptna->backend_users++; } return ret; @@ -1279,7 +1275,7 @@ struct ptnet_queue *pq = sc->queues + kring->ring_id; bool notify; - notify = netmap_pt_guest_txsync(pq->ptgh, pq->pthg, kring, flags); + notify = netmap_pt_guest_txsync(pq->atok, pq->ktoa, kring, flags); if (notify) { ptnet_kick(pq); } @@ -1294,7 +1290,7 @@ struct ptnet_queue *pq = sc->rxqueues + kring->ring_id; bool notify; - notify = netmap_pt_guest_rxsync(pq->ptgh, pq->pthg, kring, flags); + notify = netmap_pt_guest_rxsync(pq->atok, pq->ktoa, kring, flags); if (notify) { ptnet_kick(pq); } @@ -1310,7 +1306,7 @@ for (i = 0; i < sc->num_rings; i++) { struct ptnet_queue *pq = sc->queues + i; - pq->ptgh->guest_need_kick = onoff; + pq->atok->appl_need_kick = onoff; } } @@ -1676,25 +1672,13 @@ } /* End of offloading-related functions to be shared with vtnet. */ -static inline void -ptnet_sync_tail(struct ptnet_csb_hg *pthg, struct netmap_kring *kring) -{ - struct netmap_ring *ring = kring->ring; - - /* Update hwcur and hwtail as known by the host. */ - ptnetmap_guest_read_kring_csb(pthg, kring); - - /* nm_sync_finalize */ - ring->tail = kring->rtail = kring->nr_hwtail; -} - static void ptnet_ring_update(struct ptnet_queue *pq, struct netmap_kring *kring, unsigned int head, unsigned int sync_flags) { struct netmap_ring *ring = kring->ring; - struct ptnet_csb_gh *ptgh = pq->ptgh; - struct ptnet_csb_hg *pthg = pq->pthg; + struct nm_csb_atok *atok = pq->atok; + struct nm_csb_ktoa *ktoa = pq->ktoa; /* Some packets have been pushed to the netmap ring. We have * to tell the host to process the new packets, updating cur @@ -1704,11 +1688,11 @@ /* Mimic nm_txsync_prologue/nm_rxsync_prologue. */ kring->rcur = kring->rhead = head; - ptnetmap_guest_write_kring_csb(ptgh, kring->rcur, kring->rhead); + ptnetmap_guest_write_kring_csb(atok, kring->rcur, kring->rhead); /* Kick the host if needed. */ - if (NM_ACCESS_ONCE(pthg->host_need_kick)) { - ptgh->sync_flags = sync_flags; + if (NM_ACCESS_ONCE(ktoa->kern_need_kick)) { + atok->sync_flags = sync_flags; ptnet_kick(pq); } } @@ -1728,8 +1712,8 @@ struct netmap_adapter *na = &sc->ptna->dr.up; if_t ifp = sc->ifp; unsigned int batch_count = 0; - struct ptnet_csb_gh *ptgh; - struct ptnet_csb_hg *pthg; + struct nm_csb_atok *atok; + struct nm_csb_ktoa *ktoa; struct netmap_kring *kring; struct netmap_ring *ring; struct netmap_slot *slot; @@ -1758,8 +1742,8 @@ return ENETDOWN; } - ptgh = pq->ptgh; - pthg = pq->pthg; + atok = pq->atok; + ktoa = pq->ktoa; kring = na->tx_rings[pq->kring_id]; ring = kring->ring; lim = kring->nkr_num_slots - 1; @@ -1771,7 +1755,7 @@ /* We ran out of slot, let's see if the host has * freed up some, by reading hwcur and hwtail from * the CSB. */ - ptnet_sync_tail(pthg, kring); + ptnet_sync_tail(ktoa, kring); if (PTNET_TX_NOSPACE(head, kring, minspace)) { /* Still no slots available. Reactivate the @@ -1778,10 +1762,10 @@ * interrupts so that we can be notified * when some free slots are made available by * the host. */ - ptgh->guest_need_kick = 1; + atok->appl_need_kick = 1; /* Double-check. */ - ptnet_sync_tail(pthg, kring); + ptnet_sync_tail(ktoa, kring); if (likely(PTNET_TX_NOSPACE(head, kring, minspace))) { break; @@ -1790,7 +1774,7 @@ RD(1, "Found more slots by doublecheck"); /* More slots were freed before reactivating * the interrupts. */ - ptgh->guest_need_kick = 0; + atok->appl_need_kick = 0; } } @@ -2020,8 +2004,8 @@ { struct ptnet_softc *sc = pq->sc; bool have_vnet_hdr = sc->vnet_hdr_len; - struct ptnet_csb_gh *ptgh = pq->ptgh; - struct ptnet_csb_hg *pthg = pq->pthg; + struct nm_csb_atok *atok = pq->atok; + struct nm_csb_ktoa *ktoa = pq->ktoa; struct netmap_adapter *na = &sc->ptna->dr.up; struct netmap_kring *kring = na->rx_rings[pq->kring_id]; struct netmap_ring *ring = kring->ring; @@ -2053,7 +2037,7 @@ /* We ran out of slot, let's see if the host has * added some, by reading hwcur and hwtail from * the CSB. */ - ptnet_sync_tail(pthg, kring); + ptnet_sync_tail(ktoa, kring); if (head == ring->tail) { /* Still no slots available. Reactivate @@ -2060,14 +2044,14 @@ * interrupts as they were disabled by the * host thread right before issuing the * last interrupt. */ - ptgh->guest_need_kick = 1; + atok->appl_need_kick = 1; /* Double-check. */ - ptnet_sync_tail(pthg, kring); + ptnet_sync_tail(ktoa, kring); if (likely(head == ring->tail)) { break; } - ptgh->guest_need_kick = 0; + atok->appl_need_kick = 0; } } Index: sys/dev/netmap/netmap.c =================================================================== --- sys/dev/netmap/netmap.c +++ sys/dev/netmap/netmap.c @@ -527,9 +527,6 @@ /* Non-zero if ptnet devices are allowed to use virtio-net headers. */ int ptnet_vnet_hdr = 1; -/* 0 if ptnetmap should not use worker threads for TX processing */ -int ptnetmap_tx_workers = 1; - /* * SYSCTL calls are grouped between SYSBEGIN and SYSEND to be emulated * in some other operating systems @@ -569,8 +566,6 @@ #endif SYSCTL_INT(_dev_netmap, OID_AUTO, ptnet_vnet_hdr, CTLFLAG_RW, &ptnet_vnet_hdr, 0, "Allow ptnet devices to use virtio-net headers"); -SYSCTL_INT(_dev_netmap, OID_AUTO, ptnetmap_tx_workers, CTLFLAG_RW, - &ptnetmap_tx_workers, 0, "Use worker threads for pnetmap TX processing"); SYSEND; @@ -1033,14 +1028,6 @@ priv->np_nifp = NULL; } -/* call with NMG_LOCK held */ -static __inline int -nm_si_user(struct netmap_priv_d *priv, enum txrx t) -{ - return (priv->np_na != NULL && - (priv->np_qlast[t] - priv->np_qfirst[t] > 1)); -} - struct netmap_priv_d* netmap_priv_new(void) { @@ -1527,9 +1514,7 @@ * 0 !NULL type matches and na created/found * !0 !NULL impossible */ - - /* try to see if this is a ptnetmap port */ - error = netmap_get_pt_host_na(hdr, na, nmd, create); + error = netmap_get_null_na(hdr, na, nmd, create); if (error || *na != NULL) goto out; @@ -1739,7 +1724,7 @@ /* * Error routine called when txsync/rxsync detects an error. - * Can't do much more than resetting head =cur = hwcur, tail = hwtail + * Can't do much more than resetting head = cur = hwcur, tail = hwtail * Return 1 on reinit. * * This routine is only called by the upper half of the kernel. @@ -1810,12 +1795,6 @@ enum txrx t; u_int j; - if ((nr_flags & NR_PTNETMAP_HOST) && ((nr_mode != NR_REG_ALL_NIC) || - nr_flags & (NR_RX_RINGS_ONLY|NR_TX_RINGS_ONLY))) { - D("Error: only NR_REG_ALL_NIC supported with netmap passthrough"); - return EINVAL; - } - for_rx_tx(t) { if (nr_flags & excluded_direction[t]) { priv->np_qfirst[t] = priv->np_qlast[t] = 0; @@ -1823,6 +1802,7 @@ } switch (nr_mode) { case NR_REG_ALL_NIC: + case NR_REG_NULL: priv->np_qfirst[t] = 0; priv->np_qlast[t] = nma_get_nrings(na, t); ND("ALL/PIPE: %s %d %d", nm_txrx2str(t), @@ -1861,7 +1841,7 @@ return EINVAL; } } - priv->np_flags = nr_flags | nr_mode; // TODO + priv->np_flags = nr_flags; /* Allow transparent forwarding mode in the host --> nic * direction only if all the TX hw rings have been opened. */ @@ -1927,6 +1907,7 @@ } priv->np_flags = 0; priv->np_txpoll = 0; + priv->np_kloop_state = 0; } @@ -2021,6 +2002,110 @@ return (priv->np_qfirst[NR_RX] != priv->np_qlast[NR_RX]); } +/* Validate the CSB entries for both directions (atok and ktoa). + * To be called under NMG_LOCK(). */ +static int +netmap_csb_validate(struct netmap_priv_d *priv, struct nmreq_opt_csb *csbo) +{ + struct nm_csb_atok *csb_atok_base = + (struct nm_csb_atok *)(uintptr_t)csbo->csb_atok; + struct nm_csb_ktoa *csb_ktoa_base = + (struct nm_csb_ktoa *)(uintptr_t)csbo->csb_ktoa; + enum txrx t; + int num_rings[NR_TXRX], tot_rings; + size_t entry_size[2]; + void *csb_start[2]; + int i; + + if (priv->np_kloop_state & NM_SYNC_KLOOP_RUNNING) { + nm_prerr("Cannot update CSB while kloop is running\n"); + return EBUSY; + } + + tot_rings = 0; + for_rx_tx(t) { + num_rings[t] = priv->np_qlast[t] - priv->np_qfirst[t]; + tot_rings += num_rings[t]; + } + if (tot_rings <= 0) + return 0; + + if (!(priv->np_flags & NR_EXCLUSIVE)) { + nm_prerr("CSB mode requires NR_EXCLUSIVE\n"); + return EINVAL; + } + + entry_size[0] = sizeof(*csb_atok_base); + entry_size[1] = sizeof(*csb_ktoa_base); + csb_start[0] = (void *)csb_atok_base; + csb_start[1] = (void *)csb_ktoa_base; + + for (i = 0; i < 2; i++) { + /* On Linux we could use access_ok() to simplify + * the validation. However, the advantage of + * this approach is that it works also on + * FreeBSD. */ + size_t csb_size = tot_rings * entry_size[i]; + void *tmp; + int err; + + if ((uintptr_t)csb_start[i] & (entry_size[i]-1)) { + nm_prerr("Unaligned CSB address\n"); + return EINVAL; + } + + tmp = nm_os_malloc(csb_size); + if (!tmp) + return ENOMEM; + if (i == 0) { + /* Application --> kernel direction. */ + err = copyin(csb_start[i], tmp, csb_size); + } else { + /* Kernel --> application direction. */ + memset(tmp, 0, csb_size); + err = copyout(tmp, csb_start[i], csb_size); + } + nm_os_free(tmp); + if (err) { + nm_prerr("Invalid CSB address\n"); + return err; + } + } + + priv->np_csb_atok_base = csb_atok_base; + priv->np_csb_ktoa_base = csb_ktoa_base; + + /* Initialize the CSB. */ + for_rx_tx(t) { + for (i = 0; i < num_rings[t]; i++) { + struct netmap_kring *kring = + NMR(priv->np_na, t)[i + priv->np_qfirst[t]]; + struct nm_csb_atok *csb_atok = csb_atok_base + i; + struct nm_csb_ktoa *csb_ktoa = csb_ktoa_base + i; + + if (t == NR_RX) { + csb_atok += num_rings[NR_TX]; + csb_ktoa += num_rings[NR_TX]; + } + + CSB_WRITE(csb_atok, head, kring->rhead); + CSB_WRITE(csb_atok, cur, kring->rcur); + CSB_WRITE(csb_atok, appl_need_kick, 1); + CSB_WRITE(csb_atok, sync_flags, 1); + CSB_WRITE(csb_ktoa, hwcur, kring->nr_hwcur); + CSB_WRITE(csb_ktoa, hwtail, kring->nr_hwtail); + CSB_WRITE(csb_ktoa, kern_need_kick, 1); + + nm_prinf("csb_init for kring %s: head %u, cur %u, " + "hwcur %u, hwtail %u\n", kring->name, + kring->rhead, kring->rcur, kring->nr_hwcur, + kring->nr_hwtail); + } + } + + return 0; +} + /* * possibly move the interface to netmap-mode. * If success it returns a pointer to netmap_if, otherwise NULL. @@ -2307,7 +2392,6 @@ struct ifnet *ifp = NULL; int error = 0; u_int i, qfirst, qlast; - struct netmap_if *nifp; struct netmap_kring **krings; int sync_flags; enum txrx t; @@ -2316,14 +2400,10 @@ case NIOCCTRL: { struct nmreq_header *hdr = (struct nmreq_header *)data; - if (hdr->nr_version != NETMAP_API) { - D("API mismatch for reqtype %d: got %d need %d", - hdr->nr_version, - hdr->nr_version, NETMAP_API); - hdr->nr_version = NETMAP_API; - } if (hdr->nr_version < NETMAP_MIN_API || hdr->nr_version > NETMAP_MAX_API) { + D("API mismatch: got %d need %d", + hdr->nr_version, NETMAP_API); return EINVAL; } @@ -2345,12 +2425,14 @@ case NETMAP_REQ_REGISTER: { struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body; + struct netmap_if *nifp; + /* Protect access to priv from concurrent requests. */ NMG_LOCK(); do { + struct nmreq_option *opt; u_int memflags; #ifdef WITH_EXTMEM - struct nmreq_option *opt; #endif /* WITH_EXTMEM */ if (priv->np_nifp != NULL) { /* thread already registered */ @@ -2397,6 +2479,8 @@ } if (na->virt_hdr_len && !(req->nr_flags & NR_ACCEPT_VNET_HDR)) { + D("virt_hdr_len=%d, but application does " + "not accept it", na->virt_hdr_len); error = EIO; break; } @@ -2406,6 +2490,23 @@ if (error) { /* reg. failed, release priv and ref */ break; } + + opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options, + NETMAP_REQ_OPT_CSB); + if (opt != NULL) { + struct nmreq_opt_csb *csbo = + (struct nmreq_opt_csb *)opt; + error = nmreq_checkduplicate(opt); + if (!error) { + error = netmap_csb_validate(priv, csbo); + } + opt->nro_status = error; + if (error) { + netmap_do_unregif(priv); + break; + } + } + nifp = priv->np_nifp; priv->np_td = td; /* for debugging purposes */ @@ -2473,6 +2574,7 @@ * so that we can call netmap_get_na(). */ struct nmreq_register regreq; bzero(®req, sizeof(regreq)); + regreq.nr_mode = NR_REG_ALL_NIC; regreq.nr_tx_slots = req->nr_tx_slots; regreq.nr_rx_slots = req->nr_rx_slots; regreq.nr_tx_rings = req->nr_tx_rings; @@ -2505,8 +2607,6 @@ break; if (na == NULL) /* only memory info */ break; - req->nr_offset = 0; - req->nr_rx_slots = req->nr_tx_slots = 0; netmap_update_config(na); req->nr_rx_rings = na->num_rx_rings; req->nr_tx_rings = na->num_tx_rings; @@ -2519,17 +2619,17 @@ } #ifdef WITH_VALE case NETMAP_REQ_VALE_ATTACH: { - error = nm_bdg_ctl_attach(hdr, NULL /* userspace request */); + error = netmap_vale_attach(hdr, NULL /* userspace request */); break; } case NETMAP_REQ_VALE_DETACH: { - error = nm_bdg_ctl_detach(hdr, NULL /* userspace request */); + error = netmap_vale_detach(hdr, NULL /* userspace request */); break; } case NETMAP_REQ_VALE_LIST: { - error = netmap_bdg_list(hdr); + error = netmap_vale_list(hdr); break; } @@ -2540,6 +2640,8 @@ * so that we can call netmap_get_bdg_na(). */ struct nmreq_register regreq; bzero(®req, sizeof(regreq)); + regreq.nr_mode = NR_REG_ALL_NIC; + /* For now we only support virtio-net headers, and only for * VALE ports, but this may change in future. Valid lengths * for the virtio-net header are 0 (no header), 10 and 12. */ @@ -2581,6 +2683,7 @@ struct ifnet *ifp; bzero(®req, sizeof(regreq)); + regreq.nr_mode = NR_REG_ALL_NIC; NMG_LOCK(); hdr->nr_reqtype = NETMAP_REQ_REGISTER; hdr->nr_body = (uintptr_t)®req; @@ -2612,22 +2715,80 @@ } #endif /* WITH_VALE */ case NETMAP_REQ_POOLS_INFO_GET: { + /* Get information from the memory allocator used for + * hdr->nr_name. */ struct nmreq_pools_info *req = (struct nmreq_pools_info *)(uintptr_t)hdr->nr_body; - /* Get information from the memory allocator. This - * netmap device must already be bound to a port. - * Note that hdr->nr_name is ignored. */ NMG_LOCK(); - if (priv->np_na && priv->np_na->nm_mem) { - struct netmap_mem_d *nmd = priv->np_na->nm_mem; + do { + /* Build a nmreq_register out of the nmreq_pools_info, + * so that we can call netmap_get_na(). */ + struct nmreq_register regreq; + bzero(®req, sizeof(regreq)); + regreq.nr_mem_id = req->nr_mem_id; + regreq.nr_mode = NR_REG_ALL_NIC; + + hdr->nr_reqtype = NETMAP_REQ_REGISTER; + hdr->nr_body = (uintptr_t)®req; + error = netmap_get_na(hdr, &na, &ifp, NULL, 1 /* create */); + hdr->nr_reqtype = NETMAP_REQ_POOLS_INFO_GET; /* reset type */ + hdr->nr_body = (uintptr_t)req; /* reset nr_body */ + if (error) { + na = NULL; + ifp = NULL; + break; + } + nmd = na->nm_mem; /* grab the memory allocator */ + if (nmd == NULL) { + error = EINVAL; + break; + } + + /* Finalize the memory allocator, get the pools + * information and release the allocator. */ + error = netmap_mem_finalize(nmd, na); + if (error) { + break; + } error = netmap_mem_pools_info_get(req, nmd); + netmap_mem_drop(na); + } while (0); + netmap_unget_na(na, ifp); + NMG_UNLOCK(); + break; + } + + case NETMAP_REQ_CSB_ENABLE: { + struct nmreq_option *opt; + + opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options, + NETMAP_REQ_OPT_CSB); + if (opt == NULL) { + error = EINVAL; } else { - error = EINVAL; + struct nmreq_opt_csb *csbo = + (struct nmreq_opt_csb *)opt; + error = nmreq_checkduplicate(opt); + if (!error) { + NMG_LOCK(); + error = netmap_csb_validate(priv, csbo); + NMG_UNLOCK(); + } + opt->nro_status = error; } - NMG_UNLOCK(); break; } + case NETMAP_REQ_SYNC_KLOOP_START: { + error = netmap_sync_kloop(priv, hdr); + break; + } + + case NETMAP_REQ_SYNC_KLOOP_STOP: { + error = netmap_sync_kloop_stop(priv); + break; + } + default: { error = EINVAL; break; @@ -2641,22 +2802,20 @@ case NIOCTXSYNC: case NIOCRXSYNC: { - nifp = priv->np_nifp; - - if (nifp == NULL) { + if (unlikely(priv->np_nifp == NULL)) { error = ENXIO; break; } mb(); /* make sure following reads are not from cache */ - na = priv->np_na; /* we have a reference */ - - if (na == NULL) { - D("Internal error: nifp != NULL && na == NULL"); - error = ENXIO; + if (unlikely(priv->np_csb_atok_base)) { + nm_prerr("Invalid sync in CSB mode\n"); + error = EBUSY; break; } + na = priv->np_na; /* we have a reference */ + mbq_init(&q); t = (cmd == NIOCTXSYNC ? NR_TX : NR_RX); krings = NMR(na, t); @@ -2739,6 +2898,8 @@ case NETMAP_REQ_VALE_NEWIF: return sizeof(struct nmreq_vale_newif); case NETMAP_REQ_VALE_DELIF: + case NETMAP_REQ_SYNC_KLOOP_STOP: + case NETMAP_REQ_CSB_ENABLE: return 0; case NETMAP_REQ_VALE_POLLING_ENABLE: case NETMAP_REQ_VALE_POLLING_DISABLE: @@ -2745,12 +2906,14 @@ return sizeof(struct nmreq_vale_polling); case NETMAP_REQ_POOLS_INFO_GET: return sizeof(struct nmreq_pools_info); + case NETMAP_REQ_SYNC_KLOOP_START: + return sizeof(struct nmreq_sync_kloop_start); } return 0; } static size_t -nmreq_opt_size_by_type(uint16_t nro_reqtype) +nmreq_opt_size_by_type(uint32_t nro_reqtype, uint64_t nro_size) { size_t rv = sizeof(struct nmreq_option); #ifdef NETMAP_REQ_OPT_DEBUG @@ -2763,6 +2926,13 @@ rv = sizeof(struct nmreq_opt_extmem); break; #endif /* WITH_EXTMEM */ + case NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS: + if (nro_size >= rv) + rv = nro_size; + break; + case NETMAP_REQ_OPT_CSB: + rv = sizeof(struct nmreq_opt_csb); + break; } /* subtract the common header */ return rv - sizeof(struct nmreq_option); @@ -2809,7 +2979,7 @@ if (error) goto out_err; optsz += sizeof(*src); - optsz += nmreq_opt_size_by_type(buf.nro_reqtype); + optsz += nmreq_opt_size_by_type(buf.nro_reqtype, buf.nro_size); if (rqsz + optsz > NETMAP_REQ_MAXSIZE) { error = EMSGSIZE; goto out_err; @@ -2863,7 +3033,8 @@ p = (char *)(opt + 1); /* copy the option body */ - optsz = nmreq_opt_size_by_type(opt->nro_reqtype); + optsz = nmreq_opt_size_by_type(opt->nro_reqtype, + opt->nro_size); if (optsz) { /* the option body follows the option header */ error = copyin(src + 1, p, optsz); @@ -2937,7 +3108,8 @@ /* copy the option body only if there was no error */ if (!rerror && !src->nro_status) { - optsz = nmreq_opt_size_by_type(src->nro_reqtype); + optsz = nmreq_opt_size_by_type(src->nro_reqtype, + src->nro_size); if (optsz) { error = copyout(src + 1, dst + 1, optsz); if (error) { @@ -3015,7 +3187,8 @@ struct netmap_adapter *na; struct netmap_kring *kring; struct netmap_ring *ring; - u_int i, check_all_tx, check_all_rx, want[NR_TXRX], revents = 0; + u_int i, want[NR_TXRX], revents = 0; + NM_SELINFO_T *si[NR_TXRX]; #define want_tx want[NR_TX] #define want_rx want[NR_RX] struct mbq q; /* packets from RX hw queues to host stack */ @@ -3038,8 +3211,7 @@ mbq_init(&q); - if (priv->np_nifp == NULL) { - D("No if registered"); + if (unlikely(priv->np_nifp == NULL)) { return POLLERR; } mb(); /* make sure following reads are not from cache */ @@ -3046,9 +3218,14 @@ na = priv->np_na; - if (!nm_netmap_on(na)) + if (unlikely(!nm_netmap_on(na))) return POLLERR; + if (unlikely(priv->np_csb_atok_base)) { + nm_prerr("Invalid poll in CSB mode\n"); + return POLLERR; + } + if (netmap_verbose & 0x8000) D("device %s events 0x%x", na->name, events); want_tx = events & (POLLOUT | POLLWRNORM); @@ -3055,10 +3232,10 @@ want_rx = events & (POLLIN | POLLRDNORM); /* - * check_all_{tx|rx} are set if the card has more than one queue AND - * the file descriptor is bound to all of them. If so, we sleep on - * the "global" selinfo, otherwise we sleep on individual selinfo - * (FreeBSD only allows two selinfo's per file descriptor). + * If the card has more than one queue AND the file descriptor is + * bound to all of them, we sleep on the "global" selinfo, otherwise + * we sleep on individual selinfo (FreeBSD only allows two selinfo's + * per file descriptor). * The interrupt routine in the driver wake one or the other * (or both) depending on which clients are active. * @@ -3067,8 +3244,10 @@ * there are pending packets to send. The latter can be disabled * passing NETMAP_NO_TX_POLL in the NIOCREG call. */ - check_all_tx = nm_si_user(priv, NR_TX); - check_all_rx = nm_si_user(priv, NR_RX); + si[NR_RX] = nm_si_user(priv, NR_RX) ? &na->si[NR_RX] : + &na->rx_rings[priv->np_qfirst[NR_RX]]->si; + si[NR_TX] = nm_si_user(priv, NR_TX) ? &na->si[NR_TX] : + &na->tx_rings[priv->np_qfirst[NR_TX]]->si; #ifdef __FreeBSD__ /* @@ -3105,10 +3284,8 @@ #ifdef linux /* The selrecord must be unconditional on linux. */ - nm_os_selrecord(sr, check_all_tx ? - &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]]->si); - nm_os_selrecord(sr, check_all_rx ? - &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]]->si); + nm_os_selrecord(sr, si[NR_RX]); + nm_os_selrecord(sr, si[NR_TX]); #endif /* linux */ /* @@ -3173,8 +3350,7 @@ send_down = 0; if (want_tx && retry_tx && sr) { #ifndef linux - nm_os_selrecord(sr, check_all_tx ? - &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]]->si); + nm_os_selrecord(sr, si[NR_TX]); #endif /* !linux */ retry_tx = 0; goto flush_tx; @@ -3234,8 +3410,7 @@ #ifndef linux if (retry_rx && sr) { - nm_os_selrecord(sr, check_all_rx ? - &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]]->si); + nm_os_selrecord(sr, si[NR_RX]); } #endif /* !linux */ if (send_down || retry_rx) { @@ -3328,12 +3503,6 @@ int netmap_attach_common(struct netmap_adapter *na) { - if (na->num_tx_rings == 0 || na->num_rx_rings == 0) { - D("%s: invalid rings tx %d rx %d", - na->name, na->num_tx_rings, na->num_rx_rings); - return EINVAL; - } - if (!na->rx_buf_maxsize) { /* Set a conservative default (larger is safer). */ na->rx_buf_maxsize = PAGE_SIZE; @@ -3443,6 +3612,12 @@ if (arg == NULL || arg->ifp == NULL) return EINVAL; + if (arg->num_tx_rings == 0 || arg->num_rx_rings == 0) { + D("%s: invalid rings tx %d rx %d", + arg->name, arg->num_tx_rings, arg->num_rx_rings); + return EINVAL; + } + ifp = arg->ifp; if (NM_NA_CLASH(ifp)) { /* If NA(ifp) is not null but there is no valid netmap @@ -3458,7 +3633,7 @@ goto fail; hwna->up = *arg; hwna->up.na_flags |= NAF_HOST_RINGS | NAF_NATIVE; - strncpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name)); + strlcpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name)); if (override_reg) { hwna->nm_hw_register = hwna->up.nm_register; hwna->up.nm_register = netmap_hw_reg; @@ -3879,7 +4054,7 @@ struct ifnet *ifp = na->ifp; /* We undo the setup for intercepting packets only if we are the - * last user of this adapapter. */ + * last user of this adapter. */ if (na->active_fds > 0) { return; } @@ -3890,7 +4065,6 @@ na->na_flags &= ~NAF_NETMAP_ON; } - /* * Module loader and unloader * Index: sys/dev/netmap/netmap_bdg.h =================================================================== --- sys/dev/netmap/netmap_bdg.h +++ sys/dev/netmap/netmap_bdg.h @@ -44,6 +44,40 @@ #endif /* __FreeBSD__ */ +/* + * The following bridge-related functions are used by other + * kernel modules. + * + * VALE only supports unicast or broadcast. The lookup + * function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports, + * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 to indicate + * drop. + */ +typedef uint32_t (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr, + struct netmap_vp_adapter *, void *private_data); +typedef int (*bdg_config_fn_t)(struct nm_ifreq *); +typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *); +typedef void *(*bdg_update_private_data_fn_t)(void *private_data, void *callback_data, int *error); +typedef int (*bdg_vp_create_fn_t)(struct nmreq_header *hdr, + struct ifnet *ifp, struct netmap_mem_d *nmd, + struct netmap_vp_adapter **ret); +typedef int (*bdg_bwrap_attach_fn_t)(const char *nr_name, struct netmap_adapter *hwna); +struct netmap_bdg_ops { + bdg_lookup_fn_t lookup; + bdg_config_fn_t config; + bdg_dtor_fn_t dtor; + bdg_vp_create_fn_t vp_create; + bdg_bwrap_attach_fn_t bwrap_attach; + char name[IFNAMSIZ]; +}; +int netmap_bwrap_attach(const char *name, struct netmap_adapter *, struct netmap_bdg_ops *); +int netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token); + +#define NM_BRIDGES 8 /* number of bridges */ +#define NM_BDG_MAXPORTS 254 /* up to 254 */ +#define NM_BDG_BROADCAST NM_BDG_MAXPORTS +#define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1) + /* XXX Should go away after fixing find_bridge() - Michio */ #define NM_BDG_HASH 1024 /* forwarding table entries */ @@ -95,7 +129,8 @@ * different ring index. * The function is set by netmap_bdg_regops(). */ - struct netmap_bdg_ops *bdg_ops; + struct netmap_bdg_ops bdg_ops; + struct netmap_bdg_ops bdg_saved_ops; /* * Contains the data structure used by the bdg_ops.lookup function. @@ -111,6 +146,7 @@ */ #define NM_BDG_ACTIVE 1 #define NM_BDG_EXCLUSIVE 2 +#define NM_BDG_NEED_BWRAP 4 uint8_t bdg_flags; @@ -149,6 +185,13 @@ struct netmap_adapter *hwna); int netmap_bwrap_krings_create_common(struct netmap_adapter *na); void netmap_bwrap_krings_delete_common(struct netmap_adapter *na); +struct nm_bridge *netmap_init_bridges2(u_int); +void netmap_uninit_bridges2(struct nm_bridge *, u_int); +int netmap_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback, + void *callback_data, void *auth_token); +int netmap_bdg_config(struct nm_ifreq *nifr); +int nm_is_bwrap(struct netmap_adapter *); + #define NM_NEED_BWRAP (-2) #endif /* _NET_NETMAP_BDG_H_ */ Index: sys/dev/netmap/netmap_bdg.c =================================================================== --- sys/dev/netmap/netmap_bdg.c +++ sys/dev/netmap/netmap_bdg.c @@ -126,7 +126,7 @@ * Right now we have a static array and deletions are protected * by an exclusive lock. */ -static struct nm_bridge *nm_bridges; +struct nm_bridge *nm_bridges; #endif /* !CONFIG_NET_NS */ @@ -139,15 +139,15 @@ (c == '_'); } -/* Validate the name of a VALE bridge port and return the +/* Validate the name of a bdg port and return the * position of the ":" character. */ static int -nm_vale_name_validate(const char *name) +nm_bdg_name_validate(const char *name, size_t prefixlen) { int colon_pos = -1; int i; - if (!name || strlen(name) < strlen(NM_BDG_NAME)) { + if (!name || strlen(name) < prefixlen) { return -1; } @@ -186,7 +186,8 @@ netmap_bns_getbridges(&bridges, &num_bridges); - namelen = nm_vale_name_validate(name); + namelen = nm_bdg_name_validate(name, + (ops != NULL ? strlen(ops->name) : 0)); if (namelen < 0) { D("invalid bridge name %s", name ? name : NULL); return NULL; @@ -222,7 +223,7 @@ for (i = 0; i < NM_BDG_MAXPORTS; i++) b->bdg_port_index[i] = i; /* set the default function */ - b->bdg_ops = ops; + b->bdg_ops = b->bdg_saved_ops = *ops; b->private_data = b->ht; b->bdg_flags = 0; NM_BNS_GET(b); @@ -240,13 +241,49 @@ ND("marking bridge %s as free", b->bdg_basename); nm_os_free(b->ht); - b->bdg_ops = NULL; + memset(&b->bdg_ops, 0, sizeof(b->bdg_ops)); + memset(&b->bdg_saved_ops, 0, sizeof(b->bdg_saved_ops)); b->bdg_flags = 0; NM_BNS_PUT(b); return 0; } +/* Called by external kernel modules (e.g., Openvswitch). + * to modify the private data previously given to regops(). + * 'name' may be just bridge's name (including ':' if it + * is not just NM_BDG_NAME). + * Called without NMG_LOCK. + */ +int +netmap_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback, + void *callback_data, void *auth_token) +{ + void *private_data = NULL; + struct nm_bridge *b; + int error = 0; + NMG_LOCK(); + b = nm_find_bridge(name, 0 /* don't create */, NULL); + if (!b) { + error = EINVAL; + goto unlock_update_priv; + } + if (!nm_bdg_valid_auth_token(b, auth_token)) { + error = EACCES; + goto unlock_update_priv; + } + BDG_WLOCK(b); + private_data = callback(b->private_data, callback_data, &error); + b->private_data = private_data; + BDG_WUNLOCK(b); + +unlock_update_priv: + NMG_UNLOCK(); + return error; +} + + + /* remove from bridge b the ports in slots hw and sw * (sw can be -1 if not needed) */ @@ -295,8 +332,8 @@ } BDG_WLOCK(b); - if (b->bdg_ops->dtor) - b->bdg_ops->dtor(b->bdg_ports[s_hw]); + if (b->bdg_ops.dtor) + b->bdg_ops.dtor(b->bdg_ports[s_hw]); b->bdg_ports[s_hw] = NULL; if (s_sw >= 0) { b->bdg_ports[s_sw] = NULL; @@ -428,7 +465,7 @@ } /* bdg_netmap_attach creates a struct netmap_adapter */ - error = b->bdg_ops->vp_create(hdr, NULL, nmd, &vpna); + error = b->bdg_ops.vp_create(hdr, NULL, nmd, &vpna); if (error) { D("error %d", error); goto out; @@ -459,7 +496,7 @@ /* host adapter might not be created */ error = hw->nm_bdg_attach(nr_name, hw, b); if (error == NM_NEED_BWRAP) { - error = b->bdg_ops->bwrap_attach(nr_name, hw); + error = b->bdg_ops.bwrap_attach(nr_name, hw); } if (error) goto out; @@ -502,143 +539,14 @@ return error; } -/* Process NETMAP_REQ_VALE_ATTACH. - */ + int -nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token) -{ - struct nmreq_vale_attach *req = - (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; - struct netmap_vp_adapter * vpna; - struct netmap_adapter *na = NULL; - struct netmap_mem_d *nmd = NULL; - struct nm_bridge *b = NULL; - int error; - - NMG_LOCK(); - /* permission check for modified bridges */ - b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); - if (b && !nm_bdg_valid_auth_token(b, auth_token)) { - error = EACCES; - goto unlock_exit; - } - - if (req->reg.nr_mem_id) { - nmd = netmap_mem_find(req->reg.nr_mem_id); - if (nmd == NULL) { - error = EINVAL; - goto unlock_exit; - } - } - - /* check for existing one */ - error = netmap_get_vale_na(hdr, &na, nmd, 0); - if (na) { - error = EBUSY; - goto unref_exit; - } - error = netmap_get_vale_na(hdr, &na, - nmd, 1 /* create if not exists */); - if (error) { /* no device */ - goto unlock_exit; - } - - if (na == NULL) { /* VALE prefix missing */ - error = EINVAL; - goto unlock_exit; - } - - if (NETMAP_OWNED_BY_ANY(na)) { - error = EBUSY; - goto unref_exit; - } - - if (na->nm_bdg_ctl) { - /* nop for VALE ports. The bwrap needs to put the hwna - * in netmap mode (see netmap_bwrap_bdg_ctl) - */ - error = na->nm_bdg_ctl(hdr, na); - if (error) - goto unref_exit; - ND("registered %s to netmap-mode", na->name); - } - vpna = (struct netmap_vp_adapter *)na; - req->port_index = vpna->bdg_port; - NMG_UNLOCK(); - return 0; - -unref_exit: - netmap_adapter_put(na); -unlock_exit: - NMG_UNLOCK(); - return error; -} - -static inline int nm_is_bwrap(struct netmap_adapter *na) { return na->nm_register == netmap_bwrap_reg; } -/* Process NETMAP_REQ_VALE_DETACH. - */ -int -nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token) -{ - struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body; - struct netmap_vp_adapter *vpna; - struct netmap_adapter *na; - struct nm_bridge *b = NULL; - int error; - NMG_LOCK(); - /* permission check for modified bridges */ - b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); - if (b && !nm_bdg_valid_auth_token(b, auth_token)) { - error = EACCES; - goto unlock_exit; - } - - error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */); - if (error) { /* no device, or another bridge or user owns the device */ - goto unlock_exit; - } - - if (na == NULL) { /* VALE prefix missing */ - error = EINVAL; - goto unlock_exit; - } else if (nm_is_bwrap(na) && - ((struct netmap_bwrap_adapter *)na)->na_polling_state) { - /* Don't detach a NIC with polling */ - error = EBUSY; - goto unref_exit; - } - - vpna = (struct netmap_vp_adapter *)na; - if (na->na_vp != vpna) { - /* trying to detach first attach of VALE persistent port attached - * to 2 bridges - */ - error = EBUSY; - goto unref_exit; - } - nmreq_det->port_index = vpna->bdg_port; - - if (na->nm_bdg_ctl) { - /* remove the port from bridge. The bwrap - * also needs to put the hwna in normal mode - */ - error = na->nm_bdg_ctl(hdr, na); - } - -unref_exit: - netmap_adapter_put(na); -unlock_exit: - NMG_UNLOCK(); - return error; - -} - struct nm_bdg_polling_state; struct nm_bdg_kthread { @@ -661,7 +569,7 @@ }; static void -netmap_bwrap_polling(void *data, int is_kthread) +netmap_bwrap_polling(void *data) { struct nm_bdg_kthread *nbk = data; struct netmap_bwrap_adapter *bna; @@ -693,7 +601,6 @@ bzero(&kcfg, sizeof(kcfg)); kcfg.worker_fn = netmap_bwrap_polling; - kcfg.use_kthread = 1; for (i = 0; i < bps->ncpus; i++) { struct nm_bdg_kthread *t = bps->kthreads + i; int all = (bps->ncpus == 1 && @@ -932,86 +839,6 @@ return error; } -/* Process NETMAP_REQ_VALE_LIST. */ -int -netmap_bdg_list(struct nmreq_header *hdr) -{ - struct nmreq_vale_list *req = - (struct nmreq_vale_list *)(uintptr_t)hdr->nr_body; - int namelen = strlen(hdr->nr_name); - struct nm_bridge *b, *bridges; - struct netmap_vp_adapter *vpna; - int error = 0, i, j; - u_int num_bridges; - - netmap_bns_getbridges(&bridges, &num_bridges); - - /* this is used to enumerate bridges and ports */ - if (namelen) { /* look up indexes of bridge and port */ - if (strncmp(hdr->nr_name, NM_BDG_NAME, - strlen(NM_BDG_NAME))) { - return EINVAL; - } - NMG_LOCK(); - b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); - if (!b) { - NMG_UNLOCK(); - return ENOENT; - } - - req->nr_bridge_idx = b - bridges; /* bridge index */ - req->nr_port_idx = NM_BDG_NOPORT; - for (j = 0; j < b->bdg_active_ports; j++) { - i = b->bdg_port_index[j]; - vpna = b->bdg_ports[i]; - if (vpna == NULL) { - D("This should not happen"); - continue; - } - /* the former and the latter identify a - * virtual port and a NIC, respectively - */ - if (!strcmp(vpna->up.name, hdr->nr_name)) { - req->nr_port_idx = i; /* port index */ - break; - } - } - NMG_UNLOCK(); - } else { - /* return the first non-empty entry starting from - * bridge nr_arg1 and port nr_arg2. - * - * Users can detect the end of the same bridge by - * seeing the new and old value of nr_arg1, and can - * detect the end of all the bridge by error != 0 - */ - i = req->nr_bridge_idx; - j = req->nr_port_idx; - - NMG_LOCK(); - for (error = ENOENT; i < NM_BRIDGES; i++) { - b = bridges + i; - for ( ; j < NM_BDG_MAXPORTS; j++) { - if (b->bdg_ports[j] == NULL) - continue; - vpna = b->bdg_ports[j]; - /* write back the VALE switch name */ - strncpy(hdr->nr_name, vpna->up.name, - (size_t)IFNAMSIZ); - error = 0; - goto out; - } - j = 0; /* following bridges scan from 0 */ - } - out: - req->nr_bridge_idx = i; - req->nr_port_idx = j; - NMG_UNLOCK(); - } - - return error; -} - /* Called by external kernel modules (e.g., Openvswitch). * to set configure/lookup/dtor functions of a VALE instance. * Register callbacks to the given bridge. 'name' may be just @@ -1041,12 +868,19 @@ if (!bdg_ops) { /* resetting the bridge */ bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); - b->bdg_ops = NULL; + b->bdg_ops = b->bdg_saved_ops; b->private_data = b->ht; } else { /* modifying the bridge */ b->private_data = private_data; - b->bdg_ops = bdg_ops; +#define nm_bdg_override(m) if (bdg_ops->m) b->bdg_ops.m = bdg_ops->m + nm_bdg_override(lookup); + nm_bdg_override(config); + nm_bdg_override(dtor); + nm_bdg_override(vp_create); + nm_bdg_override(bwrap_attach); +#undef nm_bdg_override + } BDG_WUNLOCK(b); @@ -1071,8 +905,8 @@ NMG_UNLOCK(); /* Don't call config() with NMG_LOCK() held */ BDG_RLOCK(b); - if (b->bdg_ops->config != NULL) - error = b->bdg_ops->config(nr); + if (b->bdg_ops.config != NULL) + error = b->bdg_ops.config(nr); BDG_RUNLOCK(b); return error; } @@ -1593,8 +1427,8 @@ ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", na->name, ring_n, kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, - ring->head, ring->cur, ring->tail, - hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail); + kring->rhead, kring->rcur, kring->rtail, + hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); /* second step: the new packets are sent on the tx ring * (which is actually the same ring) */ @@ -1612,7 +1446,7 @@ ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", na->name, ring_n, kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, - ring->head, ring->cur, ring->tail, + kring->rhead, kring->rcur, kring->rtail, hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); put_out: nm_kr_put(hw_kring); @@ -1756,6 +1590,8 @@ hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */ hostna->rx_buf_maxsize = hwna->rx_buf_maxsize; } + if (hwna->na_flags & NAF_MOREFRAG) + na->na_flags |= NAF_MOREFRAG; ND("%s<->%s txr %d txd %d rxr %d rxd %d", na->name, ifp->if_xname, Index: sys/dev/netmap/netmap_freebsd.c =================================================================== --- sys/dev/netmap/netmap_freebsd.c +++ sys/dev/netmap/netmap_freebsd.c @@ -735,9 +735,9 @@ } #endif /* WITH_EXTMEM */ -/* ======================== PTNETMAP SUPPORT ========================== */ +/* ================== PTNETMAP GUEST SUPPORT ==================== */ -#ifdef WITH_PTNETMAP_GUEST +#ifdef WITH_PTNETMAP #include #include #include /* bus_dmamap_* */ @@ -932,7 +932,7 @@ return bus_generic_shutdown(dev); } -#endif /* WITH_PTNETMAP_GUEST */ +#endif /* WITH_PTNETMAP */ /* * In order to track whether pages are still mapped, we hook into @@ -1145,8 +1145,8 @@ } struct nm_kctx_ctx { - struct thread *user_td; /* thread user-space (kthread creator) to send ioctl */ - struct ptnetmap_cfgentry_bhyve cfg; + /* Userspace thread (kthread creator). */ + struct thread *user_td; /* worker function and parameter */ nm_kctx_worker_fn_t worker_fn; @@ -1161,7 +1161,6 @@ struct nm_kctx { struct thread *worker; struct mtx worker_lock; - uint64_t scheduled; /* pending wake_up request */ struct nm_kctx_ctx worker_ctx; int run; /* used to stop kthread */ int attach_user; /* kthread attached to user_process */ @@ -1168,49 +1167,11 @@ int affinity; }; -void inline -nm_os_kctx_worker_wakeup(struct nm_kctx *nmk) -{ - /* - * There may be a race between FE and BE, - * which call both this function, and worker kthread, - * that reads nmk->scheduled. - * - * For us it is not important the counter value, - * but simply that it has changed since the last - * time the kthread saw it. - */ - mtx_lock(&nmk->worker_lock); - nmk->scheduled++; - if (nmk->worker_ctx.cfg.wchan) { - wakeup((void *)(uintptr_t)nmk->worker_ctx.cfg.wchan); - } - mtx_unlock(&nmk->worker_lock); -} - -void inline -nm_os_kctx_send_irq(struct nm_kctx *nmk) -{ - struct nm_kctx_ctx *ctx = &nmk->worker_ctx; - int err; - - if (ctx->user_td && ctx->cfg.ioctl_fd > 0) { - err = kern_ioctl(ctx->user_td, ctx->cfg.ioctl_fd, ctx->cfg.ioctl_cmd, - (caddr_t)&ctx->cfg.ioctl_data); - if (err) { - D("kern_ioctl error: %d ioctl parameters: fd %d com %lu data %p", - err, ctx->cfg.ioctl_fd, (unsigned long)ctx->cfg.ioctl_cmd, - &ctx->cfg.ioctl_data); - } - } -} - static void nm_kctx_worker(void *data) { struct nm_kctx *nmk = data; struct nm_kctx_ctx *ctx = &nmk->worker_ctx; - uint64_t old_scheduled = nmk->scheduled; if (nmk->affinity >= 0) { thread_lock(curthread); @@ -1231,30 +1192,8 @@ kthread_suspend_check(); } - /* - * if wchan is not defined, we don't have notification - * mechanism and we continually execute worker_fn() - */ - if (!ctx->cfg.wchan) { - ctx->worker_fn(ctx->worker_private, 1); /* worker body */ - } else { - /* checks if there is a pending notification */ - mtx_lock(&nmk->worker_lock); - if (likely(nmk->scheduled != old_scheduled)) { - old_scheduled = nmk->scheduled; - mtx_unlock(&nmk->worker_lock); - - ctx->worker_fn(ctx->worker_private, 1); /* worker body */ - - continue; - } else if (nmk->run) { - /* wait on event with one second timeout */ - msleep((void *)(uintptr_t)ctx->cfg.wchan, &nmk->worker_lock, - 0, "nmk_ev", hz); - nmk->scheduled++; - } - mtx_unlock(&nmk->worker_lock); - } + /* Continuously execute worker process. */ + ctx->worker_fn(ctx->worker_private); /* worker body */ } kthread_exit(); @@ -1284,11 +1223,6 @@ /* attach kthread to user process (ptnetmap) */ nmk->attach_user = cfg->attach_user; - /* store kick/interrupt configuration */ - if (opaque) { - nmk->worker_ctx.cfg = *((struct ptnetmap_cfgentry_bhyve *)opaque); - } - return nmk; } @@ -1298,9 +1232,13 @@ struct proc *p = NULL; int error = 0; - if (nmk->worker) { + /* Temporarily disable this function as it is currently broken + * and causes kernel crashes. The failure can be triggered by + * the "vale_polling_enable_disable" test in ctrl-api-test.c. */ + return EOPNOTSUPP; + + if (nmk->worker) return EBUSY; - } /* check if we want to attach kthread to user process */ if (nmk->attach_user) { @@ -1329,15 +1267,14 @@ void nm_os_kctx_worker_stop(struct nm_kctx *nmk) { - if (!nmk->worker) { + if (!nmk->worker) return; - } + /* tell to kthread to exit from main loop */ nmk->run = 0; /* wake up kthread if it sleeps */ kthread_resume(nmk->worker); - nm_os_kctx_worker_wakeup(nmk); nmk->worker = NULL; } @@ -1347,12 +1284,10 @@ { if (!nmk) return; - if (nmk->worker) { + + if (nmk->worker) nm_os_kctx_worker_stop(nmk); - } - memset(&nmk->worker_ctx.cfg, 0, sizeof(nmk->worker_ctx.cfg)); - free(nmk, M_DEVBUF); } @@ -1549,6 +1484,7 @@ void nm_os_onattach(struct ifnet *ifp) { + ifp->if_capabilities |= IFCAP_NETMAP; } void Index: sys/dev/netmap/netmap_generic.c =================================================================== --- sys/dev/netmap/netmap_generic.c +++ sys/dev/netmap/netmap_generic.c @@ -1115,7 +1115,7 @@ return ENOMEM; } na = (struct netmap_adapter *)gna; - strncpy(na->name, ifp->if_xname, sizeof(na->name)); + strlcpy(na->name, ifp->if_xname, sizeof(na->name)); na->ifp = ifp; na->num_tx_desc = num_tx_desc; na->num_rx_desc = num_rx_desc; Index: sys/dev/netmap/netmap_kern.h =================================================================== --- sys/dev/netmap/netmap_kern.h +++ sys/dev/netmap/netmap_kern.h @@ -54,15 +54,15 @@ #if defined(CONFIG_NETMAP_GENERIC) #define WITH_GENERIC #endif -#if defined(CONFIG_NETMAP_PTNETMAP_GUEST) -#define WITH_PTNETMAP_GUEST +#if defined(CONFIG_NETMAP_PTNETMAP) +#define WITH_PTNETMAP #endif -#if defined(CONFIG_NETMAP_PTNETMAP_HOST) -#define WITH_PTNETMAP_HOST -#endif #if defined(CONFIG_NETMAP_SINK) #define WITH_SINK #endif +#if defined(CONFIG_NETMAP_NULL) +#define WITH_NMNULL +#endif #elif defined (_WIN32) #define WITH_VALE // comment out to disable VALE support @@ -69,6 +69,7 @@ #define WITH_PIPES #define WITH_MONITOR #define WITH_GENERIC +#define WITH_NMNULL #else /* neither linux nor windows */ #define WITH_VALE // comment out to disable VALE support @@ -75,9 +76,9 @@ #define WITH_PIPES #define WITH_MONITOR #define WITH_GENERIC -#define WITH_PTNETMAP_HOST /* ptnetmap host support */ -#define WITH_PTNETMAP_GUEST /* ptnetmap guest support */ +#define WITH_PTNETMAP /* ptnetmap guest support */ #define WITH_EXTMEM +#define WITH_NMNULL #endif #if defined(__FreeBSD__) @@ -700,7 +701,7 @@ */ #define NAF_HOST_RINGS 64 /* the adapter supports the host rings */ #define NAF_FORCE_NATIVE 128 /* the adapter is always NATIVE */ -#define NAF_PTNETMAP_HOST 256 /* the adapter supports ptnetmap in the host */ +/* free */ #define NAF_MOREFRAG 512 /* the adapter supports NS_MOREFRAG */ #define NAF_ZOMBIE (1U<<30) /* the nic driver has been unloaded */ #define NAF_BUSY (1U<<31) /* the adapter is used internally and @@ -718,9 +719,9 @@ u_int num_tx_desc; /* number of descriptor in each queue */ u_int num_rx_desc; - /* tx_rings and rx_rings are private but allocated - * as a contiguous chunk of memory. Each array has - * N+1 entries, for the adapter queues and for the host queue. + /* tx_rings and rx_rings are private but allocated as a + * contiguous chunk of memory. Each array has N+K entries, + * N for the hardware rings and K for the host rings. */ struct netmap_kring **tx_rings; /* array of TX rings. */ struct netmap_kring **rx_rings; /* array of RX rings. */ @@ -1080,12 +1081,12 @@ */ struct netmap_vp_adapter *saved_na_vp; }; -int nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token); -int nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token); int nm_bdg_polling(struct nmreq_header *hdr); -int netmap_bdg_list(struct nmreq_header *hdr); #ifdef WITH_VALE +int netmap_vale_attach(struct nmreq_header *hdr, void *auth_token); +int netmap_vale_detach(struct nmreq_header *hdr, void *auth_token); +int netmap_vale_list(struct nmreq_header *hdr); int netmap_vi_create(struct nmreq_header *hdr, int); int nm_vi_create(struct nmreq_header *); int nm_vi_destroy(const char *name); @@ -1115,7 +1116,13 @@ #endif /* WITH_PIPES */ +#ifdef WITH_NMNULL +struct netmap_null_adapter { + struct netmap_adapter up; +}; +#endif /* WITH_NMNULL */ + /* return slots reserved to rx clients; used in drivers */ static inline uint32_t nm_kr_rxspace(struct netmap_kring *k) @@ -1442,51 +1449,8 @@ int netmap_get_hw_na(struct ifnet *ifp, struct netmap_mem_d *nmd, struct netmap_adapter **na); - -/* - * The following bridge-related functions are used by other - * kernel modules. - * - * VALE only supports unicast or broadcast. The lookup - * function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports, - * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 to indicate - * drop. - */ -typedef uint32_t (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr, - struct netmap_vp_adapter *, void *private_data); -typedef int (*bdg_config_fn_t)(struct nm_ifreq *); -typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *); -typedef void *(*bdg_update_private_data_fn_t)(void *private_data, void *callback_data, int *error); -typedef int (*bdg_vp_create_fn_t)(struct nmreq_header *hdr, - struct ifnet *ifp, struct netmap_mem_d *nmd, - struct netmap_vp_adapter **ret); -typedef int (*bdg_bwrap_attach_fn_t)(const char *nr_name, struct netmap_adapter *hwna); -struct netmap_bdg_ops { - bdg_lookup_fn_t lookup; - bdg_config_fn_t config; - bdg_dtor_fn_t dtor; - bdg_vp_create_fn_t vp_create; - bdg_bwrap_attach_fn_t bwrap_attach; - char name[IFNAMSIZ]; -}; -int netmap_bwrap_attach(const char *name, struct netmap_adapter *, struct netmap_bdg_ops *); -int netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token); - -#define NM_BRIDGES 8 /* number of bridges */ -#define NM_BDG_MAXPORTS 254 /* up to 254 */ -#define NM_BDG_BROADCAST NM_BDG_MAXPORTS -#define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1) - -struct nm_bridge *netmap_init_bridges2(u_int); -void netmap_uninit_bridges2(struct nm_bridge *, u_int); -int netmap_init_bridges(void); -void netmap_uninit_bridges(void); -int nm_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback, - void *callback_data, void *auth_token); -int netmap_bdg_config(struct nm_ifreq *nifr); - #ifdef WITH_VALE -uint32_t netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, +uint32_t netmap_vale_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, struct netmap_vp_adapter *, void *private_data); /* these are redefined in case of no VALE support */ @@ -1525,11 +1489,20 @@ (((struct nmreq_register *)(uintptr_t)hdr->nr_body)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0) #endif +#ifdef WITH_NMNULL +int netmap_get_null_na(struct nmreq_header *hdr, struct netmap_adapter **na, + struct netmap_mem_d *nmd, int create); +#else /* !WITH_NMNULL */ +#define netmap_get_null_na(hdr, _2, _3, _4) \ + (((struct nmreq_register *)(uintptr_t)hdr->nr_body)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0) +#endif /* WITH_NMNULL */ + #ifdef CONFIG_NET_NS struct net *netmap_bns_get(void); void netmap_bns_put(struct net *); void netmap_bns_getbridges(struct nm_bridge **, u_int *); #else +extern struct nm_bridge *nm_bridges; #define netmap_bns_get() #define netmap_bns_put(_1) #define netmap_bns_getbridges(b, n) \ @@ -1612,7 +1585,6 @@ #ifdef linux extern int netmap_generic_txqdisc; #endif -extern int ptnetmap_tx_workers; /* * NA returns a pointer to the struct netmap adapter from the ifp. @@ -1911,6 +1883,9 @@ u_int np_qfirst[NR_TXRX], np_qlast[NR_TXRX]; /* range of tx/rx rings to scan */ uint16_t np_txpoll; + uint16_t np_kloop_state; /* use with NMG_LOCK held */ +#define NM_SYNC_KLOOP_RUNNING (1 << 0) +#define NM_SYNC_KLOOP_STOPPING (1 << 1) int np_sync_flags; /* to be passed to nm_sync */ int np_refs; /* use with NMG_LOCK held */ @@ -1920,7 +1895,26 @@ * number of rings. */ NM_SELINFO_T *np_si[NR_TXRX]; + + /* In the optional CSB mode, the user must specify the start address + * of two arrays of Communication Status Block (CSB) entries, for the + * two directions (kernel read application write, and kernel write + * application read). + * The number of entries must agree with the number of rings bound to + * the netmap file descriptor. The entries corresponding to the TX + * rings are laid out before the ones corresponding to the RX rings. + * + * Array of CSB entries for application --> kernel communication + * (N entries). */ + struct nm_csb_atok *np_csb_atok_base; + /* Array of CSB entries for kernel --> application communication + * (N entries). */ + struct nm_csb_ktoa *np_csb_ktoa_base; + struct thread *np_td; /* kqueue, just debugging */ +#ifdef linux + struct file *np_filp; /* used by sync kloop */ +#endif /* linux */ }; struct netmap_priv_d *netmap_priv_new(void); @@ -1943,6 +1937,14 @@ return 0; } +/* call with NMG_LOCK held */ +static __inline int +nm_si_user(struct netmap_priv_d *priv, enum txrx t) +{ + return (priv->np_na != NULL && + (priv->np_qlast[t] - priv->np_qfirst[t] > 1)); +} + #ifdef WITH_PIPES int netmap_pipe_txsync(struct netmap_kring *txkring, int flags); int netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags); @@ -2143,8 +2145,7 @@ * kernel thread routines */ struct nm_kctx; /* OS-specific kernel context - opaque */ -typedef void (*nm_kctx_worker_fn_t)(void *data, int is_kthread); -typedef void (*nm_kctx_notify_fn_t)(void *data); +typedef void (*nm_kctx_worker_fn_t)(void *data); /* kthread configuration */ struct nm_kctx_cfg { @@ -2151,9 +2152,7 @@ long type; /* kthread type/identifier */ nm_kctx_worker_fn_t worker_fn; /* worker function */ void *worker_private;/* worker parameter */ - nm_kctx_notify_fn_t notify_fn; /* notify function */ int attach_user; /* attach kthread to user process */ - int use_kthread; /* use a kthread for the context */ }; /* kthread configuration */ struct nm_kctx *nm_os_kctx_create(struct nm_kctx_cfg *cfg, @@ -2161,48 +2160,25 @@ int nm_os_kctx_worker_start(struct nm_kctx *); void nm_os_kctx_worker_stop(struct nm_kctx *); void nm_os_kctx_destroy(struct nm_kctx *); -void nm_os_kctx_worker_wakeup(struct nm_kctx *nmk); -void nm_os_kctx_send_irq(struct nm_kctx *); void nm_os_kctx_worker_setaff(struct nm_kctx *, int); u_int nm_os_ncpus(void); -#ifdef WITH_PTNETMAP_HOST +int netmap_sync_kloop(struct netmap_priv_d *priv, + struct nmreq_header *hdr); +int netmap_sync_kloop_stop(struct netmap_priv_d *priv); + +#ifdef WITH_PTNETMAP +/* ptnetmap guest routines */ + /* - * netmap adapter for host ptnetmap ports + * ptnetmap_memdev routines used to talk with ptnetmap_memdev device driver */ -struct netmap_pt_host_adapter { - struct netmap_adapter up; +struct ptnetmap_memdev; +int nm_os_pt_memdev_iomap(struct ptnetmap_memdev *, vm_paddr_t *, void **, + uint64_t *); +void nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *); +uint32_t nm_os_pt_memdev_ioread(struct ptnetmap_memdev *, unsigned int); - /* the passed-through adapter */ - struct netmap_adapter *parent; - /* parent->na_flags, saved at NETMAP_PT_HOST_CREATE time, - * and restored at NETMAP_PT_HOST_DELETE time */ - uint32_t parent_na_flags; - - int (*parent_nm_notify)(struct netmap_kring *kring, int flags); - void *ptns; -}; - -/* ptnetmap host-side routines */ -int netmap_get_pt_host_na(struct nmreq_header *hdr, struct netmap_adapter **na, - struct netmap_mem_d * nmd, int create); -int ptnetmap_ctl(const char *nr_name, int create, struct netmap_adapter *na); - -static inline int -nm_ptnetmap_host_on(struct netmap_adapter *na) -{ - return na && na->na_flags & NAF_PTNETMAP_HOST; -} -#else /* !WITH_PTNETMAP_HOST */ -#define netmap_get_pt_host_na(hdr, _2, _3, _4) \ - (((struct nmreq_register *)(uintptr_t)hdr->nr_body)->nr_flags & (NR_PTNETMAP_HOST) ? EOPNOTSUPP : 0) -#define ptnetmap_ctl(_1, _2, _3) EINVAL -#define nm_ptnetmap_host_on(_1) EINVAL -#endif /* !WITH_PTNETMAP_HOST */ - -#ifdef WITH_PTNETMAP_GUEST -/* ptnetmap GUEST routines */ - /* * netmap adapter for guest ptnetmap ports */ @@ -2218,7 +2194,7 @@ * network stack and netmap clients. * Used to decide when we need (de)allocate krings/rings and * start (stop) ptnetmap kthreads. */ - int backend_regifs; + int backend_users; }; @@ -2225,20 +2201,77 @@ int netmap_pt_guest_attach(struct netmap_adapter *na, unsigned int nifp_offset, unsigned int memid); -struct ptnet_csb_gh; -struct ptnet_csb_hg; -bool netmap_pt_guest_txsync(struct ptnet_csb_gh *ptgh, - struct ptnet_csb_hg *pthg, - struct netmap_kring *kring, - int flags); -bool netmap_pt_guest_rxsync(struct ptnet_csb_gh *ptgh, - struct ptnet_csb_hg *pthg, +bool netmap_pt_guest_txsync(struct nm_csb_atok *atok, + struct nm_csb_ktoa *ktoa, struct netmap_kring *kring, int flags); +bool netmap_pt_guest_rxsync(struct nm_csb_atok *atok, + struct nm_csb_ktoa *ktoa, + struct netmap_kring *kring, int flags); int ptnet_nm_krings_create(struct netmap_adapter *na); void ptnet_nm_krings_delete(struct netmap_adapter *na); void ptnet_nm_dtor(struct netmap_adapter *na); -#endif /* WITH_PTNETMAP_GUEST */ +/* Guest driver: Write kring pointers (cur, head) to the CSB. + * This routine is coupled with ptnetmap_host_read_kring_csb(). */ +static inline void +ptnetmap_guest_write_kring_csb(struct nm_csb_atok *atok, uint32_t cur, + uint32_t head) +{ + /* + * We need to write cur and head to the CSB but we cannot do it atomically. + * There is no way we can prevent the host from reading the updated value + * of one of the two and the old value of the other. However, if we make + * sure that the host never reads a value of head more recent than the + * value of cur we are safe. We can allow the host to read a value of cur + * more recent than the value of head, since in the netmap ring cur can be + * ahead of head and cur cannot wrap around head because it must be behind + * tail. Inverting the order of writes below could instead result into the + * host to think head went ahead of cur, which would cause the sync + * prologue to fail. + * + * The following memory barrier scheme is used to make this happen: + * + * Guest Host + * + * STORE(cur) LOAD(head) + * mb() <-----------> mb() + * STORE(head) LOAD(cur) + */ + atok->cur = cur; + nm_stst_barrier(); + atok->head = head; +} + +/* Guest driver: Read kring pointers (hwcur, hwtail) from the CSB. + * This routine is coupled with ptnetmap_host_write_kring_csb(). */ +static inline void +ptnetmap_guest_read_kring_csb(struct nm_csb_ktoa *ktoa, + struct netmap_kring *kring) +{ + /* + * We place a memory barrier to make sure that the update of hwtail never + * overtakes the update of hwcur. + * (see explanation in ptnetmap_host_write_kring_csb). + */ + kring->nr_hwtail = ktoa->hwtail; + nm_stst_barrier(); + kring->nr_hwcur = ktoa->hwcur; +} + +/* Helper function wrapping ptnetmap_guest_read_kring_csb(). */ +static inline void +ptnet_sync_tail(struct nm_csb_ktoa *ktoa, struct netmap_kring *kring) +{ + struct netmap_ring *ring = kring->ring; + + /* Update hwcur and hwtail as known by the host. */ + ptnetmap_guest_read_kring_csb(ktoa, kring); + + /* nm_sync_finalize */ + ring->tail = kring->rtail = kring->nr_hwtail; +} +#endif /* WITH_PTNETMAP */ + #ifdef __FreeBSD__ /* * FreeBSD mbuf allocator/deallocator in emulation mode: @@ -2355,4 +2388,16 @@ struct nmreq_option * nmreq_findoption(struct nmreq_option *, uint16_t); int nmreq_checkduplicate(struct nmreq_option *); +int netmap_init_bridges(void); +void netmap_uninit_bridges(void); + +/* Functions to read and write CSB fields from the kernel. */ +#if defined (linux) +#define CSB_READ(csb, field, r) (get_user(r, &csb->field)) +#define CSB_WRITE(csb, field, v) (put_user(v, &csb->field)) +#else /* ! linux */ +#define CSB_READ(csb, field, r) (r = fuword32(&csb->field)) +#define CSB_WRITE(csb, field, v) (suword32(&csb->field, v)) +#endif /* ! linux */ + #endif /* _NET_NETMAP_KERN_H_ */ Index: sys/dev/netmap/netmap_kloop.c =================================================================== --- sys/dev/netmap/netmap_kloop.c +++ sys/dev/netmap/netmap_kloop.c @@ -0,0 +1,916 @@ +/* + * Copyright (C) 2016-2018 Vincenzo Maffione + * Copyright (C) 2015 Stefano Garzarella + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * common headers + */ +#if defined(__FreeBSD__) +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define usleep_range(_1, _2) \ + pause_sbt("sync-kloop-sleep", SBT_1US * _1, SBT_1US * 1, C_ABSOLUTE) + +#elif defined(linux) +#include +#include +#include +#endif + +#include +#include +#include +#include + +/* Support for eventfd-based notifications. */ +#if defined(linux) +#define SYNC_KLOOP_POLL +#endif + +/* Write kring pointers (hwcur, hwtail) to the CSB. + * This routine is coupled with ptnetmap_guest_read_kring_csb(). */ +static inline void +sync_kloop_kernel_write(struct nm_csb_ktoa __user *ptr, uint32_t hwcur, + uint32_t hwtail) +{ + /* + * The same scheme used in ptnetmap_guest_write_kring_csb() applies here. + * We allow the application to read a value of hwcur more recent than the value + * of hwtail, since this would anyway result in a consistent view of the + * ring state (and hwcur can never wraparound hwtail, since hwcur must be + * behind head). + * + * The following memory barrier scheme is used to make this happen: + * + * Application Kernel + * + * STORE(hwcur) LOAD(hwtail) + * mb() <-------------> mb() + * STORE(hwtail) LOAD(hwcur) + */ + CSB_WRITE(ptr, hwcur, hwcur); + nm_stst_barrier(); + CSB_WRITE(ptr, hwtail, hwtail); +} + +/* Read kring pointers (head, cur, sync_flags) from the CSB. + * This routine is coupled with ptnetmap_guest_write_kring_csb(). */ +static inline void +sync_kloop_kernel_read(struct nm_csb_atok __user *ptr, + struct netmap_ring *shadow_ring, + uint32_t num_slots) +{ + /* + * We place a memory barrier to make sure that the update of head never + * overtakes the update of cur. + * (see explanation in ptnetmap_guest_write_kring_csb). + */ + CSB_READ(ptr, head, shadow_ring->head); + nm_stst_barrier(); + CSB_READ(ptr, cur, shadow_ring->cur); + CSB_READ(ptr, sync_flags, shadow_ring->flags); +} + +/* Enable or disable application --> kernel kicks. */ +static inline void +csb_ktoa_kick_enable(struct nm_csb_ktoa __user *csb_ktoa, uint32_t val) +{ + CSB_WRITE(csb_ktoa, kern_need_kick, val); +} + +/* Are application interrupt enabled or disabled? */ +static inline uint32_t +csb_atok_intr_enabled(struct nm_csb_atok __user *csb_atok) +{ + uint32_t v; + + CSB_READ(csb_atok, appl_need_kick, v); + + return v; +} + +static inline void +sync_kloop_kring_dump(const char *title, const struct netmap_kring *kring) +{ + nm_prinf("sync_kloop: %s - name: %s hwcur: %d hwtail: %d " + "rhead: %d rcur: %d rtail: %d\n", + title, kring->name, kring->nr_hwcur, kring->nr_hwtail, + kring->rhead, kring->rcur, kring->rtail); +} + +struct sync_kloop_ring_args { + struct netmap_kring *kring; + struct nm_csb_atok *csb_atok; + struct nm_csb_ktoa *csb_ktoa; +#ifdef SYNC_KLOOP_POLL + struct eventfd_ctx *irq_ctx; +#endif /* SYNC_KLOOP_POLL */ +}; + +static void +netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a) +{ + struct netmap_kring *kring = a->kring; + struct nm_csb_atok *csb_atok = a->csb_atok; + struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa; + struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */ + bool more_txspace = false; + uint32_t num_slots; + int batch; + + num_slots = kring->nkr_num_slots; + + /* Disable application --> kernel notifications. */ + csb_ktoa_kick_enable(csb_ktoa, 0); + /* Copy the application kring pointers from the CSB */ + sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); + + for (;;) { + batch = shadow_ring.head - kring->nr_hwcur; + if (batch < 0) + batch += num_slots; + +#ifdef PTN_TX_BATCH_LIM + if (batch > PTN_TX_BATCH_LIM(num_slots)) { + /* If application moves ahead too fast, let's cut the move so + * that we don't exceed our batch limit. */ + uint32_t head_lim = kring->nr_hwcur + PTN_TX_BATCH_LIM(num_slots); + + if (head_lim >= num_slots) + head_lim -= num_slots; + ND(1, "batch: %d head: %d head_lim: %d", batch, shadow_ring.head, + head_lim); + shadow_ring.head = head_lim; + batch = PTN_TX_BATCH_LIM(num_slots); + } +#endif /* PTN_TX_BATCH_LIM */ + + if (nm_kr_txspace(kring) <= (num_slots >> 1)) { + shadow_ring.flags |= NAF_FORCE_RECLAIM; + } + + /* Netmap prologue */ + shadow_ring.tail = kring->rtail; + if (unlikely(nm_txsync_prologue(kring, &shadow_ring) >= num_slots)) { + /* Reinit ring and enable notifications. */ + netmap_ring_reinit(kring); + csb_ktoa_kick_enable(csb_ktoa, 1); + break; + } + + if (unlikely(netmap_verbose & NM_VERB_TXSYNC)) { + sync_kloop_kring_dump("pre txsync", kring); + } + + if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) { + /* Reenable notifications. */ + csb_ktoa_kick_enable(csb_ktoa, 1); + nm_prerr("sync_kloop: txsync() failed\n"); + break; + } + + /* + * Finalize + * Copy kernel hwcur and hwtail into the CSB for the application sync(), and + * do the nm_sync_finalize. + */ + sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur, + kring->nr_hwtail); + if (kring->rtail != kring->nr_hwtail) { + /* Some more room available in the parent adapter. */ + kring->rtail = kring->nr_hwtail; + more_txspace = true; + } + + if (unlikely(netmap_verbose & NM_VERB_TXSYNC)) { + sync_kloop_kring_dump("post txsync", kring); + } + + /* Interrupt the application if needed. */ +#ifdef SYNC_KLOOP_POLL + if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) { + /* Disable application kick to avoid sending unnecessary kicks */ + eventfd_signal(a->irq_ctx, 1); + more_txspace = false; + } +#endif /* SYNC_KLOOP_POLL */ + + /* Read CSB to see if there is more work to do. */ + sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); + if (shadow_ring.head == kring->rhead) { + /* + * No more packets to transmit. We enable notifications and + * go to sleep, waiting for a kick from the application when new + * new slots are ready for transmission. + */ + /* Reenable notifications. */ + csb_ktoa_kick_enable(csb_ktoa, 1); + /* Doublecheck. */ + sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); + if (shadow_ring.head != kring->rhead) { + /* We won the race condition, there are more packets to + * transmit. Disable notifications and do another cycle */ + csb_ktoa_kick_enable(csb_ktoa, 0); + continue; + } + break; + } + + if (nm_kr_txempty(kring)) { + /* No more available TX slots. We stop waiting for a notification + * from the backend (netmap_tx_irq). */ + ND(1, "TX ring"); + break; + } + } + +#ifdef SYNC_KLOOP_POLL + if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) { + eventfd_signal(a->irq_ctx, 1); + } +#endif /* SYNC_KLOOP_POLL */ +} + +/* RX cycle without receive any packets */ +#define SYNC_LOOP_RX_DRY_CYCLES_MAX 2 + +static inline int +sync_kloop_norxslots(struct netmap_kring *kring, uint32_t g_head) +{ + return (NM_ACCESS_ONCE(kring->nr_hwtail) == nm_prev(g_head, + kring->nkr_num_slots - 1)); +} + +static void +netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a) +{ + + struct netmap_kring *kring = a->kring; + struct nm_csb_atok *csb_atok = a->csb_atok; + struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa; + struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */ + int dry_cycles = 0; + bool some_recvd = false; + uint32_t num_slots; + + num_slots = kring->nkr_num_slots; + + /* Get RX csb_atok and csb_ktoa pointers from the CSB. */ + num_slots = kring->nkr_num_slots; + + /* Disable notifications. */ + csb_ktoa_kick_enable(csb_ktoa, 0); + /* Copy the application kring pointers from the CSB */ + sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); + + for (;;) { + uint32_t hwtail; + + /* Netmap prologue */ + shadow_ring.tail = kring->rtail; + if (unlikely(nm_rxsync_prologue(kring, &shadow_ring) >= num_slots)) { + /* Reinit ring and enable notifications. */ + netmap_ring_reinit(kring); + csb_ktoa_kick_enable(csb_ktoa, 1); + break; + } + + if (unlikely(netmap_verbose & NM_VERB_RXSYNC)) { + sync_kloop_kring_dump("pre rxsync", kring); + } + + if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) { + /* Reenable notifications. */ + csb_ktoa_kick_enable(csb_ktoa, 1); + nm_prerr("sync_kloop: rxsync() failed\n"); + break; + } + + /* + * Finalize + * Copy kernel hwcur and hwtail into the CSB for the application sync() + */ + hwtail = NM_ACCESS_ONCE(kring->nr_hwtail); + sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur, hwtail); + if (kring->rtail != hwtail) { + kring->rtail = hwtail; + some_recvd = true; + dry_cycles = 0; + } else { + dry_cycles++; + } + + if (unlikely(netmap_verbose & NM_VERB_RXSYNC)) { + sync_kloop_kring_dump("post rxsync", kring); + } + +#ifdef SYNC_KLOOP_POLL + /* Interrupt the application if needed. */ + if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) { + /* Disable application kick to avoid sending unnecessary kicks */ + eventfd_signal(a->irq_ctx, 1); + some_recvd = false; + } +#endif /* SYNC_KLOOP_POLL */ + + /* Read CSB to see if there is more work to do. */ + sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); + if (sync_kloop_norxslots(kring, shadow_ring.head)) { + /* + * No more slots available for reception. We enable notification and + * go to sleep, waiting for a kick from the application when new receive + * slots are available. + */ + /* Reenable notifications. */ + csb_ktoa_kick_enable(csb_ktoa, 1); + /* Doublecheck. */ + sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); + if (!sync_kloop_norxslots(kring, shadow_ring.head)) { + /* We won the race condition, more slots are available. Disable + * notifications and do another cycle. */ + csb_ktoa_kick_enable(csb_ktoa, 0); + continue; + } + break; + } + + hwtail = NM_ACCESS_ONCE(kring->nr_hwtail); + if (unlikely(hwtail == kring->rhead || + dry_cycles >= SYNC_LOOP_RX_DRY_CYCLES_MAX)) { + /* No more packets to be read from the backend. We stop and + * wait for a notification from the backend (netmap_rx_irq). */ + ND(1, "nr_hwtail: %d rhead: %d dry_cycles: %d", + hwtail, kring->rhead, dry_cycles); + break; + } + } + + nm_kr_put(kring); + +#ifdef SYNC_KLOOP_POLL + /* Interrupt the application if needed. */ + if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) { + eventfd_signal(a->irq_ctx, 1); + } +#endif /* SYNC_KLOOP_POLL */ +} + +#ifdef SYNC_KLOOP_POLL +struct sync_kloop_poll_entry { + /* Support for receiving notifications from + * a netmap ring or from the application. */ + struct file *filp; + wait_queue_t wait; + wait_queue_head_t *wqh; + + /* Support for sending notifications to the application. */ + struct eventfd_ctx *irq_ctx; + struct file *irq_filp; +}; + +struct sync_kloop_poll_ctx { + poll_table wait_table; + unsigned int next_entry; + unsigned int num_entries; + struct sync_kloop_poll_entry entries[0]; +}; + +static void +sync_kloop_poll_table_queue_proc(struct file *file, wait_queue_head_t *wqh, + poll_table *pt) +{ + struct sync_kloop_poll_ctx *poll_ctx = + container_of(pt, struct sync_kloop_poll_ctx, wait_table); + struct sync_kloop_poll_entry *entry = poll_ctx->entries + + poll_ctx->next_entry; + + BUG_ON(poll_ctx->next_entry >= poll_ctx->num_entries); + entry->wqh = wqh; + entry->filp = file; + /* Use the default wake up function. */ + init_waitqueue_entry(&entry->wait, current); + add_wait_queue(wqh, &entry->wait); + poll_ctx->next_entry++; +} +#endif /* SYNC_KLOOP_POLL */ + +int +netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr) +{ + struct nmreq_sync_kloop_start *req = + (struct nmreq_sync_kloop_start *)(uintptr_t)hdr->nr_body; + struct nmreq_opt_sync_kloop_eventfds *eventfds_opt = NULL; +#ifdef SYNC_KLOOP_POLL + struct sync_kloop_poll_ctx *poll_ctx = NULL; +#endif /* SYNC_KLOOP_POLL */ + int num_rx_rings, num_tx_rings, num_rings; + uint32_t sleep_us = req->sleep_us; + struct nm_csb_atok* csb_atok_base; + struct nm_csb_ktoa* csb_ktoa_base; + struct netmap_adapter *na; + struct nmreq_option *opt; + int err = 0; + int i; + + if (sleep_us > 1000000) { + /* We do not accept sleeping for more than a second. */ + return EINVAL; + } + + if (priv->np_nifp == NULL) { + return ENXIO; + } + mb(); /* make sure following reads are not from cache */ + + na = priv->np_na; + if (!nm_netmap_on(na)) { + return ENXIO; + } + + NMG_LOCK(); + /* Make sure the application is working in CSB mode. */ + if (!priv->np_csb_atok_base || !priv->np_csb_ktoa_base) { + NMG_UNLOCK(); + nm_prerr("sync-kloop on %s requires " + "NETMAP_REQ_OPT_CSB option\n", na->name); + return EINVAL; + } + + csb_atok_base = priv->np_csb_atok_base; + csb_ktoa_base = priv->np_csb_ktoa_base; + + /* Make sure that no kloop is currently running. */ + if (priv->np_kloop_state & NM_SYNC_KLOOP_RUNNING) { + err = EBUSY; + } + priv->np_kloop_state |= NM_SYNC_KLOOP_RUNNING; + NMG_UNLOCK(); + if (err) { + return err; + } + + num_rx_rings = priv->np_qlast[NR_RX] - priv->np_qfirst[NR_RX]; + num_tx_rings = priv->np_qlast[NR_TX] - priv->np_qfirst[NR_TX]; + num_rings = num_tx_rings + num_rx_rings; + + /* Validate notification options. */ + opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options, + NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS); + if (opt != NULL) { + err = nmreq_checkduplicate(opt); + if (err) { + opt->nro_status = err; + goto out; + } + if (opt->nro_size != sizeof(*eventfds_opt) + + sizeof(eventfds_opt->eventfds[0]) * num_rings) { + /* Option size not consistent with the number of + * entries. */ + opt->nro_status = err = EINVAL; + goto out; + } +#ifdef SYNC_KLOOP_POLL + eventfds_opt = (struct nmreq_opt_sync_kloop_eventfds *)opt; + opt->nro_status = 0; + /* We need 2 poll entries for TX and RX notifications coming + * from the netmap adapter, plus one entries per ring for the + * notifications coming from the application. */ + poll_ctx = nm_os_malloc(sizeof(*poll_ctx) + + (2 + num_rings) * sizeof(poll_ctx->entries[0])); + init_poll_funcptr(&poll_ctx->wait_table, + sync_kloop_poll_table_queue_proc); + poll_ctx->num_entries = 2 + num_rings; + poll_ctx->next_entry = 0; + /* Poll for notifications coming from the applications through + * eventfds . */ + for (i = 0; i < num_rings; i++) { + struct eventfd_ctx *irq; + struct file *filp; + unsigned long mask; + + filp = eventfd_fget(eventfds_opt->eventfds[i].ioeventfd); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + goto out; + } + mask = filp->f_op->poll(filp, &poll_ctx->wait_table); + if (mask & POLLERR) { + err = EINVAL; + goto out; + } + + filp = eventfd_fget(eventfds_opt->eventfds[i].irqfd); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + goto out; + } + poll_ctx->entries[i].irq_filp = filp; + irq = eventfd_ctx_fileget(filp); + if (IS_ERR(irq)) { + err = PTR_ERR(irq); + goto out; + } + poll_ctx->entries[i].irq_ctx = irq; + } + /* Poll for notifications coming from the netmap rings bound to + * this file descriptor. */ + { + NM_SELINFO_T *si[NR_TXRX]; + + NMG_LOCK(); + si[NR_RX] = nm_si_user(priv, NR_RX) ? &na->si[NR_RX] : + &na->rx_rings[priv->np_qfirst[NR_RX]]->si; + si[NR_TX] = nm_si_user(priv, NR_TX) ? &na->si[NR_TX] : + &na->tx_rings[priv->np_qfirst[NR_TX]]->si; + NMG_UNLOCK(); + poll_wait(priv->np_filp, si[NR_RX], &poll_ctx->wait_table); + poll_wait(priv->np_filp, si[NR_TX], &poll_ctx->wait_table); + } +#else /* SYNC_KLOOP_POLL */ + opt->nro_status = EOPNOTSUPP; + goto out; +#endif /* SYNC_KLOOP_POLL */ + } + + /* Main loop. */ + for (;;) { + if (unlikely(NM_ACCESS_ONCE(priv->np_kloop_state) & NM_SYNC_KLOOP_STOPPING)) { + break; + } + +#ifdef SYNC_KLOOP_POLL + if (poll_ctx) + __set_current_state(TASK_INTERRUPTIBLE); +#endif /* SYNC_KLOOP_POLL */ + + /* Process all the TX rings bound to this file descriptor. */ + for (i = 0; i < num_tx_rings; i++) { + struct sync_kloop_ring_args a = { + .kring = NMR(na, NR_TX)[i + priv->np_qfirst[NR_TX]], + .csb_atok = csb_atok_base + i, + .csb_ktoa = csb_ktoa_base + i, + }; + +#ifdef SYNC_KLOOP_POLL + if (poll_ctx) + a.irq_ctx = poll_ctx->entries[i].irq_ctx; +#endif /* SYNC_KLOOP_POLL */ + if (unlikely(nm_kr_tryget(a.kring, 1, NULL))) { + continue; + } + netmap_sync_kloop_tx_ring(&a); + nm_kr_put(a.kring); + } + + /* Process all the RX rings bound to this file descriptor. */ + for (i = 0; i < num_rx_rings; i++) { + struct sync_kloop_ring_args a = { + .kring = NMR(na, NR_RX)[i + priv->np_qfirst[NR_RX]], + .csb_atok = csb_atok_base + num_tx_rings + i, + .csb_ktoa = csb_ktoa_base + num_tx_rings + i, + }; + +#ifdef SYNC_KLOOP_POLL + if (poll_ctx) + a.irq_ctx = poll_ctx->entries[num_tx_rings + i].irq_ctx; +#endif /* SYNC_KLOOP_POLL */ + + if (unlikely(nm_kr_tryget(a.kring, 1, NULL))) { + continue; + } + netmap_sync_kloop_rx_ring(&a); + nm_kr_put(a.kring); + } + +#ifdef SYNC_KLOOP_POLL + if (poll_ctx) { + /* If a poll context is present, yield to the scheduler + * waiting for a notification to come either from + * netmap or the application. */ + schedule_timeout_interruptible(msecs_to_jiffies(1000)); + } else +#endif /* SYNC_KLOOP_POLL */ + { + /* Default synchronization method: sleep for a while. */ + usleep_range(sleep_us, sleep_us); + } + } +out: +#ifdef SYNC_KLOOP_POLL + if (poll_ctx) { + /* Stop polling from netmap and the eventfds, and deallocate + * the poll context. */ + __set_current_state(TASK_RUNNING); + for (i = 0; i < poll_ctx->next_entry; i++) { + struct sync_kloop_poll_entry *entry = + poll_ctx->entries + i; + + if (entry->wqh) + remove_wait_queue(entry->wqh, &entry->wait); + /* We did not get a reference to the eventfds, but + * don't do that on netmap file descriptors (since + * a reference was not taken. */ + if (entry->filp && entry->filp != priv->np_filp) + fput(entry->filp); + if (entry->irq_ctx) + eventfd_ctx_put(entry->irq_ctx); + if (entry->irq_filp) + fput(entry->irq_filp); + } + nm_os_free(poll_ctx); + poll_ctx = NULL; + } +#endif /* SYNC_KLOOP_POLL */ + + /* Reset the kloop state. */ + NMG_LOCK(); + priv->np_kloop_state = 0; + NMG_UNLOCK(); + + return err; +} + +int +netmap_sync_kloop_stop(struct netmap_priv_d *priv) +{ + bool running = true; + int err = 0; + + NMG_LOCK(); + priv->np_kloop_state |= NM_SYNC_KLOOP_STOPPING; + NMG_UNLOCK(); + while (running) { + usleep_range(1000, 1500); + NMG_LOCK(); + running = (NM_ACCESS_ONCE(priv->np_kloop_state) + & NM_SYNC_KLOOP_RUNNING); + NMG_UNLOCK(); + } + + return err; +} + +#ifdef WITH_PTNETMAP +/* + * Guest ptnetmap txsync()/rxsync() routines, used in ptnet device drivers. + * These routines are reused across the different operating systems supported + * by netmap. + */ + +/* + * Reconcile host and guest views of the transmit ring. + * + * Guest user wants to transmit packets up to the one before ring->head, + * and guest kernel knows tx_ring->hwcur is the first packet unsent + * by the host kernel. + * + * We push out as many packets as possible, and possibly + * reclaim buffers from previously completed transmission. + * + * Notifications from the host are enabled only if the user guest would + * block (no space in the ring). + */ +bool +netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, + struct netmap_kring *kring, int flags) +{ + bool notify = false; + + /* Disable notifications */ + atok->appl_need_kick = 0; + + /* + * First part: tell the host (updating the CSB) to process the new + * packets. + */ + kring->nr_hwcur = ktoa->hwcur; + ptnetmap_guest_write_kring_csb(atok, kring->rcur, kring->rhead); + + /* Ask for a kick from a guest to the host if needed. */ + if (((kring->rhead != kring->nr_hwcur || nm_kr_txempty(kring)) + && NM_ACCESS_ONCE(ktoa->kern_need_kick)) || + (flags & NAF_FORCE_RECLAIM)) { + atok->sync_flags = flags; + notify = true; + } + + /* + * Second part: reclaim buffers for completed transmissions. + */ + if (nm_kr_txempty(kring) || (flags & NAF_FORCE_RECLAIM)) { + ptnetmap_guest_read_kring_csb(ktoa, kring); + } + + /* + * No more room in the ring for new transmissions. The user thread will + * go to sleep and we need to be notified by the host when more free + * space is available. + */ + if (nm_kr_txempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) { + /* Reenable notifications. */ + atok->appl_need_kick = 1; + /* Double check */ + ptnetmap_guest_read_kring_csb(ktoa, kring); + /* If there is new free space, disable notifications */ + if (unlikely(!nm_kr_txempty(kring))) { + atok->appl_need_kick = 0; + } + } + + ND(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)", + kring->name, atok->head, atok->cur, ktoa->hwtail, + kring->rhead, kring->rcur, kring->nr_hwtail); + + return notify; +} + +/* + * Reconcile host and guest view of the receive ring. + * + * Update hwcur/hwtail from host (reading from CSB). + * + * If guest user has released buffers up to the one before ring->head, we + * also give them to the host. + * + * Notifications from the host are enabled only if the user guest would + * block (no more completed slots in the ring). + */ +bool +netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, + struct netmap_kring *kring, int flags) +{ + bool notify = false; + + /* Disable notifications */ + atok->appl_need_kick = 0; + + /* + * First part: import newly received packets, by updating the kring + * hwtail to the hwtail known from the host (read from the CSB). + * This also updates the kring hwcur. + */ + ptnetmap_guest_read_kring_csb(ktoa, kring); + kring->nr_kflags &= ~NKR_PENDINTR; + + /* + * Second part: tell the host about the slots that guest user has + * released, by updating cur and head in the CSB. + */ + if (kring->rhead != kring->nr_hwcur) { + ptnetmap_guest_write_kring_csb(atok, kring->rcur, + kring->rhead); + /* Ask for a kick from the guest to the host if needed. */ + if (NM_ACCESS_ONCE(ktoa->kern_need_kick)) { + atok->sync_flags = flags; + notify = true; + } + } + + /* + * No more completed RX slots. The user thread will go to sleep and + * we need to be notified by the host when more RX slots have been + * completed. + */ + if (nm_kr_rxempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) { + /* Reenable notifications. */ + atok->appl_need_kick = 1; + /* Double check */ + ptnetmap_guest_read_kring_csb(ktoa, kring); + /* If there are new slots, disable notifications. */ + if (!nm_kr_rxempty(kring)) { + atok->appl_need_kick = 0; + } + } + + ND(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)", + kring->name, atok->head, atok->cur, ktoa->hwtail, + kring->rhead, kring->rcur, kring->nr_hwtail); + + return notify; +} + +/* + * Callbacks for ptnet drivers: nm_krings_create, nm_krings_delete, nm_dtor. + */ +int +ptnet_nm_krings_create(struct netmap_adapter *na) +{ + struct netmap_pt_guest_adapter *ptna = + (struct netmap_pt_guest_adapter *)na; /* Upcast. */ + struct netmap_adapter *na_nm = &ptna->hwup.up; + struct netmap_adapter *na_dr = &ptna->dr.up; + int ret; + + if (ptna->backend_users) { + return 0; + } + + /* Create krings on the public netmap adapter. */ + ret = netmap_hw_krings_create(na_nm); + if (ret) { + return ret; + } + + /* Copy krings into the netmap adapter private to the driver. */ + na_dr->tx_rings = na_nm->tx_rings; + na_dr->rx_rings = na_nm->rx_rings; + + return 0; +} + +void +ptnet_nm_krings_delete(struct netmap_adapter *na) +{ + struct netmap_pt_guest_adapter *ptna = + (struct netmap_pt_guest_adapter *)na; /* Upcast. */ + struct netmap_adapter *na_nm = &ptna->hwup.up; + struct netmap_adapter *na_dr = &ptna->dr.up; + + if (ptna->backend_users) { + return; + } + + na_dr->tx_rings = NULL; + na_dr->rx_rings = NULL; + + netmap_hw_krings_delete(na_nm); +} + +void +ptnet_nm_dtor(struct netmap_adapter *na) +{ + struct netmap_pt_guest_adapter *ptna = + (struct netmap_pt_guest_adapter *)na; + + netmap_mem_put(ptna->dr.up.nm_mem); + memset(&ptna->dr, 0, sizeof(ptna->dr)); + netmap_mem_pt_guest_ifp_del(na->nm_mem, na->ifp); +} + +int +netmap_pt_guest_attach(struct netmap_adapter *arg, + unsigned int nifp_offset, unsigned int memid) +{ + struct netmap_pt_guest_adapter *ptna; + struct ifnet *ifp = arg ? arg->ifp : NULL; + int error; + + /* get allocator */ + arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, memid); + if (arg->nm_mem == NULL) + return ENOMEM; + arg->na_flags |= NAF_MEM_OWNER; + error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter), 1); + if (error) + return error; + + /* get the netmap_pt_guest_adapter */ + ptna = (struct netmap_pt_guest_adapter *) NA(ifp); + + /* Initialize a separate pass-through netmap adapter that is going to + * be used by the ptnet driver only, and so never exposed to netmap + * applications. We only need a subset of the available fields. */ + memset(&ptna->dr, 0, sizeof(ptna->dr)); + ptna->dr.up.ifp = ifp; + ptna->dr.up.nm_mem = netmap_mem_get(ptna->hwup.up.nm_mem); + ptna->dr.up.nm_config = ptna->hwup.up.nm_config; + + ptna->backend_users = 0; + + return 0; +} + +#endif /* WITH_PTNETMAP */ Index: sys/dev/netmap/netmap_legacy.c =================================================================== --- sys/dev/netmap/netmap_legacy.c +++ sys/dev/netmap/netmap_legacy.c @@ -56,6 +56,7 @@ */ #include #include +#include static int nmreq_register_from_legacy(struct nmreq *nmr, struct nmreq_header *hdr, @@ -80,10 +81,11 @@ } else { regmode = NR_REG_ALL_NIC; } - nmr->nr_flags = regmode | - (nmr->nr_flags & (~NR_REG_MASK)); + req->nr_mode = regmode; + } else { + req->nr_mode = nmr->nr_flags & NR_REG_MASK; } - req->nr_mode = nmr->nr_flags & NR_REG_MASK; + /* Fix nr_name, nr_mode and nr_ringid to handle pipe requests. */ if (req->nr_mode == NR_REG_PIPE_MASTER || req->nr_mode == NR_REG_PIPE_SLAVE) { @@ -131,7 +133,7 @@ /* First prepare the request header. */ hdr->nr_version = NETMAP_API; /* new API */ - strncpy(hdr->nr_name, nmr->nr_name, sizeof(nmr->nr_name)); + strlcpy(hdr->nr_name, nmr->nr_name, sizeof(nmr->nr_name)); hdr->nr_options = (uintptr_t)NULL; hdr->nr_body = (uintptr_t)NULL; @@ -242,7 +244,6 @@ if (!req) { goto oom; } hdr->nr_body = (uintptr_t)req; hdr->nr_reqtype = NETMAP_REQ_PORT_INFO_GET; - req->nr_offset = nmr->nr_offset; req->nr_memsize = nmr->nr_memsize; req->nr_tx_slots = nmr->nr_tx_slots; req->nr_rx_slots = nmr->nr_rx_slots; @@ -300,7 +301,6 @@ case NETMAP_REQ_PORT_INFO_GET: { struct nmreq_port_info_get *req = (struct nmreq_port_info_get *)(uintptr_t)hdr->nr_body; - nmr->nr_offset = req->nr_offset; nmr->nr_memsize = req->nr_memsize; nmr->nr_tx_slots = req->nr_tx_slots; nmr->nr_rx_slots = req->nr_rx_slots; @@ -321,7 +321,7 @@ case NETMAP_REQ_VALE_LIST: { struct nmreq_vale_list *req = (struct nmreq_vale_list *)(uintptr_t)hdr->nr_body; - strncpy(nmr->nr_name, hdr->nr_name, sizeof(nmr->nr_name)); + strlcpy(nmr->nr_name, hdr->nr_name, sizeof(nmr->nr_name)); nmr->nr_arg1 = req->nr_bridge_idx; nmr->nr_arg2 = req->nr_port_idx; break; Index: sys/dev/netmap/netmap_mem2.h =================================================================== --- sys/dev/netmap/netmap_mem2.h +++ sys/dev/netmap/netmap_mem2.h @@ -158,7 +158,7 @@ ({ int *perr = _perr; if (perr) *(perr) = EOPNOTSUPP; NULL; }) #endif /* WITH_EXTMEM */ -#ifdef WITH_PTNETMAP_GUEST +#ifdef WITH_PTNETMAP struct netmap_mem_d* netmap_mem_pt_guest_new(struct ifnet *, unsigned int nifp_offset, unsigned int memid); @@ -165,7 +165,7 @@ struct ptnetmap_memdev; struct netmap_mem_d* netmap_mem_pt_guest_attach(struct ptnetmap_memdev *, uint16_t); int netmap_mem_pt_guest_ifp_del(struct netmap_mem_d *, struct ifnet *); -#endif /* WITH_PTNETMAP_GUEST */ +#endif /* WITH_PTNETMAP */ int netmap_mem_pools_info_get(struct nmreq_pools_info *, struct netmap_mem_d *); Index: sys/dev/netmap/netmap_mem2.c =================================================================== --- sys/dev/netmap/netmap_mem2.c +++ sys/dev/netmap/netmap_mem2.c @@ -397,7 +397,7 @@ if (p->bitmap == NULL) { /* Allocate the bitmap */ n = (p->objtotal + 31) / 32; - p->bitmap = nm_os_malloc(sizeof(uint32_t) * n); + p->bitmap = nm_os_malloc(sizeof(p->bitmap[0]) * n); if (p->bitmap == NULL) { D("Unable to create bitmap (%d entries) for allocator '%s'", (int)n, p->name); @@ -405,7 +405,7 @@ } p->bitmap_slots = n; } else { - memset(p->bitmap, 0, p->bitmap_slots); + memset(p->bitmap, 0, p->bitmap_slots * sizeof(p->bitmap[0])); } p->objfree = 0; @@ -480,8 +480,10 @@ nmd->ops->nmd_deref(nmd); nmd->active--; - if (!nmd->active) + if (last_user) { nmd->nm_grp = -1; + nmd->lasterr = 0; + } NMA_UNLOCK(nmd); return last_user; @@ -1998,7 +2000,7 @@ /* initialize base fields -- override const */ *(u_int *)(uintptr_t)&nifp->ni_tx_rings = na->num_tx_rings; *(u_int *)(uintptr_t)&nifp->ni_rx_rings = na->num_rx_rings; - strncpy(nifp->ni_name, na->name, (size_t)IFNAMSIZ); + strlcpy(nifp->ni_name, na->name, sizeof(nifp->ni_name)); /* * fill the slots for the rx and tx rings. They contain the offset @@ -2343,7 +2345,7 @@ #endif /* WITH_EXTMEM */ -#ifdef WITH_PTNETMAP_GUEST +#ifdef WITH_PTNETMAP struct mem_pt_if { struct mem_pt_if *next; struct ifnet *ifp; @@ -2386,7 +2388,8 @@ NMA_UNLOCK(nmd); - D("added (ifp=%p,nifp_offset=%u)", ptif->ifp, ptif->nifp_offset); + nm_prinf("ptnet if added (ifp=%s,nifp_offset=%u)\n", + ptif->ifp->if_xname, ptif->nifp_offset); return 0; } @@ -2667,7 +2670,7 @@ continue; kring->ring = (struct netmap_ring *) ((char *)nifp + - nifp->ring_ofs[i + na->num_tx_rings + 1]); + nifp->ring_ofs[netmap_all_rings(na, NR_TX) + i]); } error = 0; @@ -2832,4 +2835,4 @@ return nmd; } -#endif /* WITH_PTNETMAP_GUEST */ +#endif /* WITH_PTNETMAP */ Index: sys/dev/netmap/netmap_null.c =================================================================== --- sys/dev/netmap/netmap_null.c +++ sys/dev/netmap/netmap_null.c @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2018 Giuseppe Lettieri + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* $FreeBSD$ */ + +#if defined(__FreeBSD__) +#include /* prerequisite */ + +#include +#include +#include /* defines used in kernel.h */ +#include /* types used in module initialization */ +#include +#include +#include +#include +#include +#include +#include /* sockaddrs */ +#include +#include +#include /* bus_dmamap_* */ +#include + + +#elif defined(linux) + +#include "bsd_glue.h" + +#elif defined(__APPLE__) + +#warning OSX support is only partial +#include "osx_glue.h" + +#elif defined(_WIN32) +#include "win_glue.h" + +#else + +#error Unsupported platform + +#endif /* unsupported */ + +/* + * common headers + */ + +#include +#include +#include + +#ifdef WITH_NMNULL + +static int +netmap_null_txsync(struct netmap_kring *kring, int flags) +{ + (void)kring; + (void)flags; + return 0; +} + +static int +netmap_null_rxsync(struct netmap_kring *kring, int flags) +{ + (void)kring; + (void)flags; + return 0; +} + +static int +netmap_null_krings_create(struct netmap_adapter *na) +{ + return netmap_krings_create(na, 0); +} + +static void +netmap_null_krings_delete(struct netmap_adapter *na) +{ + netmap_krings_delete(na); +} + +static int +netmap_null_reg(struct netmap_adapter *na, int onoff) +{ + if (na->active_fds == 0) { + if (onoff) + na->na_flags |= NAF_NETMAP_ON; + else + na->na_flags &= ~NAF_NETMAP_ON; + } + return 0; +} + +static int +netmap_null_bdg_attach(const char *name, struct netmap_adapter *na, + struct nm_bridge *b) +{ + (void)name; + (void)na; + (void)b; + return EINVAL; +} + +int +netmap_get_null_na(struct nmreq_header *hdr, struct netmap_adapter **na, + struct netmap_mem_d *nmd, int create) +{ + struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body; + struct netmap_null_adapter *nna; + int error; + + if (req->nr_mode != NR_REG_NULL) { + ND("not a null port"); + return 0; + } + + if (!create) { + D("null ports cannot be re-opened"); + return EINVAL; + } + + if (nmd == NULL) { + D("null ports must use an existing allocator"); + return EINVAL; + } + + nna = nm_os_malloc(sizeof(*nna)); + if (nna == NULL) { + error = ENOMEM; + goto err; + } + snprintf(nna->up.name, sizeof(nna->up.name), "null:%s", hdr->nr_name); + + nna->up.nm_txsync = netmap_null_txsync; + nna->up.nm_rxsync = netmap_null_rxsync; + nna->up.nm_register = netmap_null_reg; + nna->up.nm_krings_create = netmap_null_krings_create; + nna->up.nm_krings_delete = netmap_null_krings_delete; + nna->up.nm_bdg_attach = netmap_null_bdg_attach; + nna->up.nm_mem = netmap_mem_get(nmd); + + nna->up.num_tx_rings = req->nr_tx_rings; + nna->up.num_rx_rings = req->nr_rx_rings; + nna->up.num_tx_desc = req->nr_tx_slots; + nna->up.num_rx_desc = req->nr_rx_slots; + error = netmap_attach_common(&nna->up); + if (error) + goto free_nna; + *na = &nna->up; + D("created null %s", nna->up.name); + + return 0; + +free_nna: + nm_os_free(nna); +err: + return error; +} + + +#endif /* WITH_NMNULL */ Index: sys/dev/netmap/netmap_pipe.c =================================================================== --- sys/dev/netmap/netmap_pipe.c +++ sys/dev/netmap/netmap_pipe.c @@ -443,7 +443,7 @@ /* In case of no error we put our rings in netmap mode */ for_rx_tx(t) { - for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { + for (i = 0; i < nma_get_nrings(na, t); i++) { struct netmap_kring *kring = NMR(na, t)[i]; if (nm_kring_pending_on(kring)) { struct netmap_kring *sring, *dring; @@ -490,7 +490,7 @@ if (na->active_fds == 0) na->na_flags &= ~NAF_NETMAP_ON; for_rx_tx(t) { - for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { + for (i = 0; i < nma_get_nrings(na, t); i++) { struct netmap_kring *kring = NMR(na, t)[i]; if (nm_kring_pending_off(kring)) { @@ -567,7 +567,7 @@ sna = na; cleanup: for_rx_tx(t) { - for (i = 0; i < nma_get_nrings(sna, t) + 1; i++) { + for (i = 0; i < nma_get_nrings(sna, t); i++) { struct netmap_kring *kring = NMR(sna, t)[i]; struct netmap_ring *ring = kring->ring; uint32_t j, lim = kring->nkr_num_slots - 1; @@ -674,11 +674,11 @@ int create_error; /* Temporarily remove the pipe suffix. */ - strncpy(nr_name_orig, hdr->nr_name, sizeof(nr_name_orig)); + strlcpy(nr_name_orig, hdr->nr_name, sizeof(nr_name_orig)); *cbra = '\0'; error = netmap_get_na(hdr, &pna, &ifp, nmd, create); /* Restore the pipe suffix. */ - strncpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name)); + strlcpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name)); if (!error) break; if (error != ENXIO || retries++) { @@ -691,7 +691,7 @@ NMG_UNLOCK(); create_error = netmap_vi_create(hdr, 1 /* autodelete */); NMG_LOCK(); - strncpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name)); + strlcpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name)); if (create_error && create_error != EEXIST) { if (create_error != EOPNOTSUPP) { D("failed to create a persistent vale port: %d", create_error); Index: sys/dev/netmap/netmap_vale.c =================================================================== --- sys/dev/netmap/netmap_vale.c +++ sys/dev/netmap/netmap_vale.c @@ -121,18 +121,18 @@ "Max batch size to be used in the bridge"); SYSEND; -static int netmap_vp_create(struct nmreq_header *hdr, struct ifnet *, +static int netmap_vale_vp_create(struct nmreq_header *hdr, struct ifnet *, struct netmap_mem_d *nmd, struct netmap_vp_adapter **); -static int netmap_vp_bdg_attach(const char *, struct netmap_adapter *, +static int netmap_vale_vp_bdg_attach(const char *, struct netmap_adapter *, struct nm_bridge *); static int netmap_vale_bwrap_attach(const char *, struct netmap_adapter *); /* - * For each output interface, nm_bdg_q is used to construct a list. + * For each output interface, nm_vale_q is used to construct a list. * bq_len is the number of output buffers (we can have coalescing * during the copy). */ -struct nm_bdg_q { +struct nm_vale_q { uint16_t bq_head; uint16_t bq_tail; uint32_t bq_len; /* number of buffers */ @@ -140,10 +140,10 @@ /* Holds the default callbacks */ struct netmap_bdg_ops vale_bdg_ops = { - .lookup = netmap_bdg_learning, + .lookup = netmap_vale_learning, .config = NULL, .dtor = NULL, - .vp_create = netmap_vp_create, + .vp_create = netmap_vale_vp_create, .bwrap_attach = netmap_vale_bwrap_attach, .name = NM_BDG_NAME, }; @@ -212,7 +212,7 @@ /* all port:rings + broadcast */ num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1; l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX; - l += sizeof(struct nm_bdg_q) * num_dstq; + l += sizeof(struct nm_vale_q) * num_dstq; l += sizeof(uint16_t) * NM_BDG_BATCH_MAX; nrings = netmap_real_rings(na, NR_TX); @@ -219,7 +219,7 @@ kring = na->tx_rings; for (i = 0; i < nrings; i++) { struct nm_bdg_fwd *ft; - struct nm_bdg_q *dstq; + struct nm_vale_q *dstq; int j; ft = nm_os_malloc(l); @@ -227,7 +227,7 @@ nm_free_bdgfwd(na); return ENOMEM; } - dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); + dstq = (struct nm_vale_q *)(ft + NM_BDG_BATCH_MAX); for (j = 0; j < num_dstq; j++) { dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL; dstq[j].bq_len = 0; @@ -307,11 +307,228 @@ return ret; } +/* Process NETMAP_REQ_VALE_LIST. */ +int +netmap_vale_list(struct nmreq_header *hdr) +{ + struct nmreq_vale_list *req = + (struct nmreq_vale_list *)(uintptr_t)hdr->nr_body; + int namelen = strlen(hdr->nr_name); + struct nm_bridge *b, *bridges; + struct netmap_vp_adapter *vpna; + int error = 0, i, j; + u_int num_bridges; + netmap_bns_getbridges(&bridges, &num_bridges); + /* this is used to enumerate bridges and ports */ + if (namelen) { /* look up indexes of bridge and port */ + if (strncmp(hdr->nr_name, NM_BDG_NAME, + strlen(NM_BDG_NAME))) { + return EINVAL; + } + NMG_LOCK(); + b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); + if (!b) { + NMG_UNLOCK(); + return ENOENT; + } + + req->nr_bridge_idx = b - bridges; /* bridge index */ + req->nr_port_idx = NM_BDG_NOPORT; + for (j = 0; j < b->bdg_active_ports; j++) { + i = b->bdg_port_index[j]; + vpna = b->bdg_ports[i]; + if (vpna == NULL) { + D("This should not happen"); + continue; + } + /* the former and the latter identify a + * virtual port and a NIC, respectively + */ + if (!strcmp(vpna->up.name, hdr->nr_name)) { + req->nr_port_idx = i; /* port index */ + break; + } + } + NMG_UNLOCK(); + } else { + /* return the first non-empty entry starting from + * bridge nr_arg1 and port nr_arg2. + * + * Users can detect the end of the same bridge by + * seeing the new and old value of nr_arg1, and can + * detect the end of all the bridge by error != 0 + */ + i = req->nr_bridge_idx; + j = req->nr_port_idx; + + NMG_LOCK(); + for (error = ENOENT; i < NM_BRIDGES; i++) { + b = bridges + i; + for ( ; j < NM_BDG_MAXPORTS; j++) { + if (b->bdg_ports[j] == NULL) + continue; + vpna = b->bdg_ports[j]; + /* write back the VALE switch name */ + strlcpy(hdr->nr_name, vpna->up.name, + sizeof(hdr->nr_name)); + error = 0; + goto out; + } + j = 0; /* following bridges scan from 0 */ + } + out: + req->nr_bridge_idx = i; + req->nr_port_idx = j; + NMG_UNLOCK(); + } + + return error; +} + +/* Process NETMAP_REQ_VALE_ATTACH. + */ +int +netmap_vale_attach(struct nmreq_header *hdr, void *auth_token) +{ + struct nmreq_vale_attach *req = + (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; + struct netmap_vp_adapter * vpna; + struct netmap_adapter *na = NULL; + struct netmap_mem_d *nmd = NULL; + struct nm_bridge *b = NULL; + int error; + + NMG_LOCK(); + /* permission check for modified bridges */ + b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); + if (b && !nm_bdg_valid_auth_token(b, auth_token)) { + error = EACCES; + goto unlock_exit; + } + + if (req->reg.nr_mem_id) { + nmd = netmap_mem_find(req->reg.nr_mem_id); + if (nmd == NULL) { + error = EINVAL; + goto unlock_exit; + } + } + + /* check for existing one */ + error = netmap_get_vale_na(hdr, &na, nmd, 0); + if (na) { + error = EBUSY; + goto unref_exit; + } + error = netmap_get_vale_na(hdr, &na, + nmd, 1 /* create if not exists */); + if (error) { /* no device */ + goto unlock_exit; + } + + if (na == NULL) { /* VALE prefix missing */ + error = EINVAL; + goto unlock_exit; + } + + if (NETMAP_OWNED_BY_ANY(na)) { + error = EBUSY; + goto unref_exit; + } + + if (na->nm_bdg_ctl) { + /* nop for VALE ports. The bwrap needs to put the hwna + * in netmap mode (see netmap_bwrap_bdg_ctl) + */ + error = na->nm_bdg_ctl(hdr, na); + if (error) + goto unref_exit; + ND("registered %s to netmap-mode", na->name); + } + vpna = (struct netmap_vp_adapter *)na; + req->port_index = vpna->bdg_port; + + if (nmd) + netmap_mem_put(nmd); + + NMG_UNLOCK(); + return 0; + +unref_exit: + netmap_adapter_put(na); +unlock_exit: + if (nmd) + netmap_mem_put(nmd); + + NMG_UNLOCK(); + return error; +} + +/* Process NETMAP_REQ_VALE_DETACH. + */ +int +netmap_vale_detach(struct nmreq_header *hdr, void *auth_token) +{ + struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body; + struct netmap_vp_adapter *vpna; + struct netmap_adapter *na; + struct nm_bridge *b = NULL; + int error; + + NMG_LOCK(); + /* permission check for modified bridges */ + b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); + if (b && !nm_bdg_valid_auth_token(b, auth_token)) { + error = EACCES; + goto unlock_exit; + } + + error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */); + if (error) { /* no device, or another bridge or user owns the device */ + goto unlock_exit; + } + + if (na == NULL) { /* VALE prefix missing */ + error = EINVAL; + goto unlock_exit; + } else if (nm_is_bwrap(na) && + ((struct netmap_bwrap_adapter *)na)->na_polling_state) { + /* Don't detach a NIC with polling */ + error = EBUSY; + goto unref_exit; + } + + vpna = (struct netmap_vp_adapter *)na; + if (na->na_vp != vpna) { + /* trying to detach first attach of VALE persistent port attached + * to 2 bridges + */ + error = EBUSY; + goto unref_exit; + } + nmreq_det->port_index = vpna->bdg_port; + + if (na->nm_bdg_ctl) { + /* remove the port from bridge. The bwrap + * also needs to put the hwna in normal mode + */ + error = na->nm_bdg_ctl(hdr, na); + } + +unref_exit: + netmap_adapter_put(na); +unlock_exit: + NMG_UNLOCK(); + return error; + +} + + /* nm_dtor callback for ephemeral VALE ports */ static void -netmap_vp_dtor(struct netmap_adapter *na) +netmap_vale_vp_dtor(struct netmap_adapter *na) { struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na; struct nm_bridge *b = vpna->na_bdg; @@ -334,47 +551,13 @@ } -/* Called by external kernel modules (e.g., Openvswitch). - * to modify the private data previously given to regops(). - * 'name' may be just bridge's name (including ':' if it - * is not just NM_BDG_NAME). - * Called without NMG_LOCK. - */ -int -nm_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback, - void *callback_data, void *auth_token) -{ - void *private_data = NULL; - struct nm_bridge *b; - int error = 0; - NMG_LOCK(); - b = nm_find_bridge(name, 0 /* don't create */, NULL); - if (!b) { - error = EINVAL; - goto unlock_update_priv; - } - if (!nm_bdg_valid_auth_token(b, auth_token)) { - error = EACCES; - goto unlock_update_priv; - } - BDG_WLOCK(b); - private_data = callback(b->private_data, callback_data, &error); - b->private_data = private_data; - BDG_WUNLOCK(b); - -unlock_update_priv: - NMG_UNLOCK(); - return error; -} - - /* nm_krings_create callback for VALE ports. * Calls the standard netmap_krings_create, then adds leases on rx * rings and bdgfwd on tx rings. */ static int -netmap_vp_krings_create(struct netmap_adapter *na) +netmap_vale_vp_krings_create(struct netmap_adapter *na) { u_int tailroom; int error, i; @@ -409,7 +592,7 @@ /* nm_krings_delete callback for VALE ports. */ static void -netmap_vp_krings_delete(struct netmap_adapter *na) +netmap_vale_vp_krings_delete(struct netmap_adapter *na) { nm_free_bdgfwd(na); netmap_krings_delete(na); @@ -417,7 +600,7 @@ static int -nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, +nm_vale_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, u_int ring_nr); @@ -429,7 +612,7 @@ * Returns the next position in the ring. */ static int -nm_bdg_preflush(struct netmap_kring *kring, u_int end) +nm_vale_preflush(struct netmap_kring *kring, u_int end) { struct netmap_vp_adapter *na = (struct netmap_vp_adapter*)kring->na; @@ -488,7 +671,7 @@ ft[ft_i - frags].ft_frags = frags; frags = 1; if (unlikely((int)ft_i >= bridge_batch)) - ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); + ft_i = nm_vale_flush(ft, ft_i, na, ring_nr); } if (frags > 1) { /* Here ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG, and we @@ -499,7 +682,7 @@ D("Truncate incomplete fragment at %d (%d frags)", ft_i, frags); } if (ft_i) - ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); + ft_i = nm_vale_flush(ft, ft_i, na, ring_nr); BDG_RUNLOCK(b); return j; } @@ -528,7 +711,7 @@ static __inline uint32_t -nm_bridge_rthash(const uint8_t *addr) +nm_vale_rthash(const uint8_t *addr) { uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key @@ -554,7 +737,7 @@ * ring in *dst_ring (at the moment, always use ring 0) */ uint32_t -netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, +netmap_vale_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, struct netmap_vp_adapter *na, void *private_data) { uint8_t *buf = ((uint8_t *)ft->ft_buf) + ft->ft_offset; @@ -586,7 +769,7 @@ */ if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */ uint8_t *s = buf+6; - sh = nm_bridge_rthash(s); /* hash of source */ + sh = nm_vale_rthash(s); /* hash of source */ /* update source port forwarding entry */ na->last_smac = ht[sh].mac = smac; /* XXX expire ? */ ht[sh].ports = mysrc; @@ -596,7 +779,7 @@ } dst = NM_BDG_BROADCAST; if ((buf[0] & 1) == 0) { /* unicast */ - dh = nm_bridge_rthash(buf); /* hash of dst */ + dh = nm_vale_rthash(buf); /* hash of dst */ if (ht[dh].mac == dmac) { /* found dst */ dst = ht[dh].ports; } @@ -682,10 +865,10 @@ * number of ports, and lets us replace the learn and dispatch functions. */ int -nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, +nm_vale_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, u_int ring_nr) { - struct nm_bdg_q *dst_ents, *brddst; + struct nm_vale_q *dst_ents, *brddst; uint16_t num_dsts = 0, *dsts; struct nm_bridge *b = na->na_bdg; u_int i, me = na->bdg_port; @@ -696,7 +879,7 @@ * queues per port plus one for the broadcast traffic. * Then we have an array of destination indexes. */ - dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); + dst_ents = (struct nm_vale_q *)(ft + NM_BDG_BATCH_MAX); dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1); /* first pass: find a destination for each packet in the batch */ @@ -703,7 +886,7 @@ for (i = 0; likely(i < n); i += ft[i].ft_frags) { uint8_t dst_ring = ring_nr; /* default, same ring as origin */ uint16_t dst_port, d_i; - struct nm_bdg_q *d; + struct nm_vale_q *d; struct nm_bdg_fwd *start_ft = NULL; ND("slot %d frags %d", i, ft[i].ft_frags); @@ -720,7 +903,7 @@ */ continue; } - dst_port = b->bdg_ops->lookup(start_ft, &dst_ring, na, b->private_data); + dst_port = b->bdg_ops.lookup(start_ft, &dst_ring, na, b->private_data); if (netmap_verbose > 255) RD(5, "slot %d port %d -> %d", i, me, dst_port); if (dst_port >= NM_BDG_NOPORT) @@ -778,7 +961,7 @@ u_int dst_nr, lim, j, d_i, next, brd_next; u_int needed, howmany; int retry = netmap_txsync_retry; - struct nm_bdg_q *d; + struct nm_vale_q *d; uint32_t my_start = 0, lease_idx = 0; int nrings; int virt_hdr_mismatch = 0; @@ -862,7 +1045,7 @@ if (dst_na->retry && retry) { /* try to get some free slot from the previous run */ - kring->nm_notify(kring, 0); + kring->nm_notify(kring, NAF_FORCE_RECLAIM); /* actually useful only for bwraps, since there * the notify will trigger a txsync on the hwna. VALE ports * have dst_na->retry == 0 @@ -1030,7 +1213,7 @@ /* nm_txsync callback for VALE ports */ static int -netmap_vp_txsync(struct netmap_kring *kring, int flags) +netmap_vale_vp_txsync(struct netmap_kring *kring, int flags) { struct netmap_vp_adapter *na = (struct netmap_vp_adapter *)kring->na; @@ -1049,7 +1232,7 @@ if (bridge_batch > NM_BDG_BATCH) bridge_batch = NM_BDG_BATCH; - done = nm_bdg_preflush(kring, head); + done = nm_vale_preflush(kring, head); done: if (done != head) D("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail); @@ -1068,7 +1251,7 @@ * Only persistent VALE ports have a non-null ifp. */ static int -netmap_vp_create(struct nmreq_header *hdr, struct ifnet *ifp, +netmap_vale_vp_create(struct nmreq_header *hdr, struct ifnet *ifp, struct netmap_mem_d *nmd, struct netmap_vp_adapter **ret) { struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body; @@ -1089,7 +1272,7 @@ na = &vpna->up; na->ifp = ifp; - strncpy(na->name, hdr->nr_name, sizeof(na->name)); + strlcpy(na->name, hdr->nr_name, sizeof(na->name)); /* bound checking */ na->num_tx_rings = req->nr_tx_rings; @@ -1129,12 +1312,12 @@ */ if (ifp) na->na_flags |= NAF_NATIVE; - na->nm_txsync = netmap_vp_txsync; - na->nm_rxsync = netmap_vp_rxsync; - na->nm_register = netmap_vp_reg; - na->nm_krings_create = netmap_vp_krings_create; - na->nm_krings_delete = netmap_vp_krings_delete; - na->nm_dtor = netmap_vp_dtor; + na->nm_txsync = netmap_vale_vp_txsync; + na->nm_rxsync = netmap_vp_rxsync; /* use the one provided by bdg */ + na->nm_register = netmap_vp_reg; /* use the one provided by bdg */ + na->nm_krings_create = netmap_vale_vp_krings_create; + na->nm_krings_delete = netmap_vale_vp_krings_delete; + na->nm_dtor = netmap_vale_vp_dtor; ND("nr_mem_id %d", req->nr_mem_id); na->nm_mem = nmd ? netmap_mem_get(nmd): @@ -1144,7 +1327,7 @@ req->nr_extra_bufs, npipes, &error); if (na->nm_mem == NULL) goto err; - na->nm_bdg_attach = netmap_vp_bdg_attach; + na->nm_bdg_attach = netmap_vale_vp_bdg_attach; /* other nmd fields are set in the common routine */ error = netmap_attach_common(na); if (error) @@ -1163,19 +1346,16 @@ * The na_vp port is this same netmap_adapter. There is no host port. */ static int -netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na, +netmap_vale_vp_bdg_attach(const char *name, struct netmap_adapter *na, struct nm_bridge *b) { struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; - if (b->bdg_ops != &vale_bdg_ops) { + if ((b->bdg_flags & NM_BDG_NEED_BWRAP) || vpna->na_bdg) { return NM_NEED_BWRAP; } - if (vpna->na_bdg) { - return NM_NEED_BWRAP; - } na->na_vp = vpna; - strncpy(na->name, name, sizeof(na->name)); + strlcpy(na->name, name, sizeof(na->name)); na->na_hostvp = NULL; return 0; } @@ -1186,12 +1366,12 @@ int error; /* impersonate a netmap_vp_adapter */ - error = netmap_vp_krings_create(na); + error = netmap_vale_vp_krings_create(na); if (error) return error; error = netmap_bwrap_krings_create_common(na); if (error) { - netmap_vp_krings_delete(na); + netmap_vale_vp_krings_delete(na); } return error; } @@ -1200,7 +1380,7 @@ netmap_vale_bwrap_krings_delete(struct netmap_adapter *na) { netmap_bwrap_krings_delete_common(na); - netmap_vp_krings_delete(na); + netmap_vale_vp_krings_delete(na); } static int @@ -1216,9 +1396,9 @@ return ENOMEM; } na = &bna->up.up; - strncpy(na->name, nr_name, sizeof(na->name)); + strlcpy(na->name, nr_name, sizeof(na->name)); na->nm_register = netmap_bwrap_reg; - na->nm_txsync = netmap_vp_txsync; + na->nm_txsync = netmap_vale_vp_txsync; // na->nm_rxsync = netmap_bwrap_rxsync; na->nm_krings_create = netmap_vale_bwrap_krings_create; na->nm_krings_delete = netmap_vale_bwrap_krings_delete; @@ -1389,7 +1569,7 @@ } } /* netmap_vp_create creates a struct netmap_vp_adapter */ - error = netmap_vp_create(hdr, ifp, nmd, &vpna); + error = netmap_vale_vp_create(hdr, ifp, nmd, &vpna); if (error) { D("error %d", error); goto err_1; Index: sys/modules/netmap/Makefile =================================================================== --- sys/modules/netmap/Makefile +++ sys/modules/netmap/Makefile @@ -3,12 +3,12 @@ # Compile netmap as a module, useful if you want a netmap bridge # or loadable drivers. -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" +.include # FreeBSD 10 and earlier +# .include "${SYSDIR}/conf/kern.opts.mk" -.PATH: ${SYSDIR}/dev/netmap -.PATH.h: ${SYSDIR}/net -CFLAGS += -I${SYSDIR}/ -D INET +.PATH: ${.CURDIR}/../../dev/netmap +.PATH.h: ${.CURDIR}/../../net +CFLAGS += -I${.CURDIR}/../../ -D INET -D VIMAGE KMOD = netmap SRCS = device_if.h bus_if.h pci_if.h opt_netmap.h SRCS += netmap.c netmap.h netmap_kern.h @@ -20,8 +20,10 @@ SRCS += netmap_offloadings.c SRCS += netmap_pipe.c SRCS += netmap_monitor.c -SRCS += netmap_pt.c +SRCS += netmap_kloop.c SRCS += netmap_legacy.c +SRCS += netmap_bdg.c +SRCS += netmap_null.c SRCS += if_ptnet.c SRCS += opt_inet.h opt_inet6.h Index: sys/net/netmap.h =================================================================== --- sys/net/netmap.h +++ sys/net/netmap.h @@ -41,9 +41,9 @@ #ifndef _NET_NETMAP_H_ #define _NET_NETMAP_H_ -#define NETMAP_API 12 /* current API version */ +#define NETMAP_API 13 /* current API version */ -#define NETMAP_MIN_API 11 /* min and max versions accepted */ +#define NETMAP_MIN_API 13 /* min and max versions accepted */ #define NETMAP_MAX_API 15 /* * Some fields should be cache-aligned to reduce contention. @@ -479,6 +479,10 @@ * !=0: errno value */ uint32_t nro_status; + /* Option size, used only for options that can have variable size + * (e.g. because they contain arrays). For fixed-size options this + * field should be set to zero. */ + uint64_t nro_size; }; /* Header common to all requests. Do not reorder these fields, as we need @@ -518,6 +522,15 @@ NETMAP_REQ_VALE_POLLING_DISABLE, /* Get info about the pools of a memory allocator. */ NETMAP_REQ_POOLS_INFO_GET, + /* Start an in-kernel loop that syncs the rings periodically or + * on notifications. The loop runs in the context of the ioctl + * syscall, and only stops on NETMAP_REQ_SYNC_KLOOP_STOP. */ + NETMAP_REQ_SYNC_KLOOP_START, + /* Stops the thread executing the in-kernel loop. The thread + * returns from the ioctl syscall. */ + NETMAP_REQ_SYNC_KLOOP_STOP, + /* Enable CSB mode on a registered netmap control device. */ + NETMAP_REQ_CSB_ENABLE, }; enum { @@ -524,6 +537,17 @@ /* On NETMAP_REQ_REGISTER, ask netmap to use memory allocated * from user-space allocated memory pools (e.g. hugepages). */ NETMAP_REQ_OPT_EXTMEM = 1, + + /* ON NETMAP_REQ_SYNC_KLOOP_START, ask netmap to use eventfd-based + * notifications to synchronize the kernel loop with the application. + */ + NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS, + + /* On NETMAP_REQ_REGISTER, ask netmap to work in CSB mode, where + * head, cur and tail pointers are not exchanged through the + * struct netmap_ring header, but rather using an user-provided + * memory area (see struct nm_csb_atok and struct nm_csb_ktoa). */ + NETMAP_REQ_OPT_CSB, }; /* @@ -541,6 +565,7 @@ uint16_t nr_mem_id; /* id of the memory allocator */ uint16_t nr_ringid; /* ring(s) we care about */ uint32_t nr_mode; /* specify NR_REG_* modes */ + uint32_t nr_extra_bufs; /* number of requested extra buffers */ uint64_t nr_flags; /* additional flags (see below) */ /* monitors use nr_ringid and nr_mode to select the rings to monitor */ @@ -549,9 +574,7 @@ #define NR_ZCOPY_MON 0x400 /* request exclusive access to the selected rings */ #define NR_EXCLUSIVE 0x800 -/* request ptnetmap host support */ -#define NR_PASSTHROUGH_HOST NR_PTNETMAP_HOST /* deprecated */ -#define NR_PTNETMAP_HOST 0x1000 +/* 0x1000 unused */ #define NR_RX_RINGS_ONLY 0x2000 #define NR_TX_RINGS_ONLY 0x4000 /* Applications set this flag if they are able to deal with virtio-net headers, @@ -564,8 +587,6 @@ * NETMAP_DO_RX_POLL. */ #define NR_DO_RX_POLL 0x10000 #define NR_NO_TX_POLL 0x20000 - - uint32_t nr_extra_bufs; /* number of requested extra buffers */ }; /* Valid values for nmreq_register.nr_mode (see above). */ @@ -576,10 +597,11 @@ NR_REG_ONE_NIC = 4, NR_REG_PIPE_MASTER = 5, /* deprecated, use "x{y" port name syntax */ NR_REG_PIPE_SLAVE = 6, /* deprecated, use "x}y" port name syntax */ + NR_REG_NULL = 7, }; /* A single ioctl number is shared by all the new API command. - * Demultiplexing is done using the nr_hdr.nr_reqtype field. + * Demultiplexing is done using the hdr.nr_reqtype field. * FreeBSD uses the size value embedded in the _IOWR to determine * how much to copy in/out, so we define the ioctl() command * specifying only nmreq_header, and copyin/copyout the rest. */ @@ -595,16 +617,18 @@ /* * nr_reqtype: NETMAP_REQ_PORT_INFO_GET * Get information about a netmap port, including number of rings. - * slots per ring, id of the memory allocator, etc. + * slots per ring, id of the memory allocator, etc. The netmap + * control device used for this operation does not need to be bound + * to a netmap port. */ struct nmreq_port_info_get { - uint64_t nr_offset; /* nifp offset in the shared region */ uint64_t nr_memsize; /* size of the shared region */ uint32_t nr_tx_slots; /* slots in tx rings */ uint32_t nr_rx_slots; /* slots in rx rings */ uint16_t nr_tx_rings; /* number of tx rings */ uint16_t nr_rx_rings; /* number of rx rings */ - uint16_t nr_mem_id; /* id of the memory allocator */ + uint16_t nr_mem_id; /* memory allocator id (in/out) */ + uint16_t pad1; }; #define NM_BDG_NAME "vale" /* prefix for bridge port name */ @@ -620,6 +644,7 @@ struct nmreq_vale_attach { struct nmreq_register reg; uint32_t port_index; + uint32_t pad1; }; /* @@ -630,6 +655,7 @@ */ struct nmreq_vale_detach { uint32_t port_index; + uint32_t pad1; }; /* @@ -639,15 +665,18 @@ struct nmreq_vale_list { /* Name of the VALE port (valeXXX:YYY) or empty. */ uint16_t nr_bridge_idx; + uint16_t pad1; uint32_t nr_port_idx; }; /* * nr_reqtype: NETMAP_REQ_PORT_HDR_SET or NETMAP_REQ_PORT_HDR_GET - * Set the port header length. + * Set or get the port header length of the port identified by hdr.nr_name. + * The control device does not need to be bound to a netmap port. */ struct nmreq_port_hdr { uint32_t nr_hdr_len; + uint32_t pad1; }; /* @@ -660,6 +689,7 @@ uint16_t nr_tx_rings; /* number of tx rings */ uint16_t nr_rx_rings; /* number of rx rings */ uint16_t nr_mem_id; /* id of the memory allocator */ + uint16_t pad1; }; /* @@ -672,17 +702,20 @@ #define NETMAP_POLLING_MODE_MULTI_CPU 2 uint32_t nr_first_cpu_id; uint32_t nr_num_polling_cpus; + uint32_t pad1; }; /* * nr_reqtype: NETMAP_REQ_POOLS_INFO_GET - * Get info about the pools of the memory allocator of the port bound - * to a given netmap control device (used i.e. by a ptnetmap-enabled - * hypervisor). The nr_hdr.nr_name field is ignored. + * Get info about the pools of the memory allocator of the netmap + * port specified by hdr.nr_name and nr_mem_id. The netmap control + * device used for this operation does not need to be bound to a netmap + * port. */ struct nmreq_pools_info { uint64_t nr_memsize; - uint16_t nr_mem_id; + uint16_t nr_mem_id; /* in/out argument */ + uint16_t pad1[3]; uint64_t nr_if_pool_offset; uint32_t nr_if_pool_objtotal; uint32_t nr_if_pool_objsize; @@ -695,9 +728,135 @@ }; /* + * nr_reqtype: NETMAP_REQ_SYNC_KLOOP_START + * Start an in-kernel loop that syncs the rings periodically or on + * notifications. The loop runs in the context of the ioctl syscall, + * and only stops on NETMAP_REQ_SYNC_KLOOP_STOP. + * The registered netmap port must be open in CSB mode. + */ +struct nmreq_sync_kloop_start { + /* Sleeping is the default synchronization method for the kloop. + * The 'sleep_us' field specifies how many microsconds to sleep for + * when there is no work to do, before doing another kloop iteration. + */ + uint32_t sleep_us; + uint32_t pad1; +}; + +/* A CSB entry for the application --> kernel direction. */ +struct nm_csb_atok { + uint32_t head; /* AW+ KR+ the head of the appl netmap_ring */ + uint32_t cur; /* AW+ KR+ the cur of the appl netmap_ring */ + uint32_t appl_need_kick; /* AW+ KR+ kern --> appl notification enable */ + uint32_t sync_flags; /* AW+ KR+ the flags of the appl [tx|rx]sync() */ + uint32_t pad[12]; /* pad to a 64 bytes cacheline */ +}; + +/* A CSB entry for the application <-- kernel direction. */ +struct nm_csb_ktoa { + uint32_t hwcur; /* AR+ KW+ the hwcur of the kern netmap_kring */ + uint32_t hwtail; /* AR+ KW+ the hwtail of the kern netmap_kring */ + uint32_t kern_need_kick; /* AR+ KW+ appl-->kern notification enable */ + uint32_t pad[13]; +}; + +#ifdef __linux__ + +#ifdef __KERNEL__ +#define nm_stst_barrier smp_wmb +#else /* !__KERNEL__ */ +static inline void nm_stst_barrier(void) +{ + /* A memory barrier with release semantic has the combined + * effect of a store-store barrier and a load-store barrier, + * which is fine for us. */ + __atomic_thread_fence(__ATOMIC_RELEASE); +} +#endif /* !__KERNEL__ */ + +#elif defined(__FreeBSD__) + +#ifdef _KERNEL +#define nm_stst_barrier atomic_thread_fence_rel +#else /* !_KERNEL */ +static inline void nm_stst_barrier(void) +{ + __atomic_thread_fence(__ATOMIC_RELEASE); +} +#endif /* !_KERNEL */ + +#else /* !__linux__ && !__FreeBSD__ */ +#error "OS not supported" +#endif /* !__linux__ && !__FreeBSD__ */ + +/* Application side of sync-kloop: Write ring pointers (cur, head) to the CSB. + * This routine is coupled with sync_kloop_kernel_read(). */ +static inline void +nm_sync_kloop_appl_write(struct nm_csb_atok *atok, uint32_t cur, + uint32_t head) +{ + /* + * We need to write cur and head to the CSB but we cannot do it atomically. + * There is no way we can prevent the host from reading the updated value + * of one of the two and the old value of the other. However, if we make + * sure that the host never reads a value of head more recent than the + * value of cur we are safe. We can allow the host to read a value of cur + * more recent than the value of head, since in the netmap ring cur can be + * ahead of head and cur cannot wrap around head because it must be behind + * tail. Inverting the order of writes below could instead result into the + * host to think head went ahead of cur, which would cause the sync + * prologue to fail. + * + * The following memory barrier scheme is used to make this happen: + * + * Guest Host + * + * STORE(cur) LOAD(head) + * mb() <-----------> mb() + * STORE(head) LOAD(cur) + * + */ + atok->cur = cur; + nm_stst_barrier(); + atok->head = head; +} + +/* Application side of sync-kloop: Read kring pointers (hwcur, hwtail) from + * the CSB. This routine is coupled with sync_kloop_kernel_write(). */ +static inline void +nm_sync_kloop_appl_read(struct nm_csb_ktoa *ktoa, uint32_t *hwtail, + uint32_t *hwcur) +{ + /* + * We place a memory barrier to make sure that the update of hwtail never + * overtakes the update of hwcur. + * (see explanation in sync_kloop_kernel_write). + */ + *hwtail = ktoa->hwtail; + nm_stst_barrier(); + *hwcur = ktoa->hwcur; +} + +/* * data for NETMAP_REQ_OPT_* options */ +struct nmreq_opt_sync_kloop_eventfds { + struct nmreq_option nro_opt; /* common header */ + /* An array of N entries for bidirectional notifications between + * the kernel loop and the application. The number of entries and + * their order must agree with the CSB arrays passed in the + * NETMAP_REQ_OPT_CSB option. Each entry contains a file descriptor + * backed by an eventfd. + */ + struct { + /* Notifier for the application --> kernel loop direction. */ + int32_t ioeventfd; + /* Notifier for the kernel loop --> application direction. */ + int32_t irqfd; + } eventfds[0]; +}; + struct nmreq_opt_extmem { struct nmreq_option nro_opt; /* common header */ uint64_t nro_usrptr; /* (in) ptr to usr memory */ @@ -704,4 +863,16 @@ struct nmreq_pools_info nro_info; /* (in/out) */ }; +struct nmreq_opt_csb { + struct nmreq_option nro_opt; + + /* Array of CSB entries for application --> kernel communication + * (N entries). */ + uint64_t csb_atok; + + /* Array of CSB entries for kernel --> application communication + * (N entries). */ + uint64_t csb_ktoa; +}; + #endif /* _NET_NETMAP_H_ */ Index: sys/net/netmap_user.h =================================================================== --- sys/net/netmap_user.h +++ sys/net/netmap_user.h @@ -1091,18 +1091,36 @@ ring = NETMAP_RXRING(d->nifp, ri); for ( ; !nm_ring_empty(ring) && cnt != got; got++) { u_int idx, i; + u_char *oldbuf; + struct netmap_slot *slot; if (d->hdr.buf) { /* from previous round */ cb(arg, &d->hdr, d->hdr.buf); } i = ring->cur; - idx = ring->slot[i].buf_idx; + slot = &ring->slot[i]; + idx = slot->buf_idx; /* d->cur_rx_ring doesn't change inside this loop, but * set it here, so it reflects d->hdr.buf's ring */ d->cur_rx_ring = ri; - d->hdr.slot = &ring->slot[i]; - d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx); + d->hdr.slot = slot; + oldbuf = d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx); // __builtin_prefetch(buf); - d->hdr.len = d->hdr.caplen = ring->slot[i].len; + d->hdr.len = d->hdr.caplen = slot->len; + while (slot->flags & NS_MOREFRAG) { + u_char *nbuf; + u_int oldlen = slot->len; + i = nm_ring_next(ring, i); + slot = &ring->slot[i]; + d->hdr.len += slot->len; + nbuf = (u_char *)NETMAP_BUF(ring, slot->buf_idx); + if (oldbuf != NULL && nbuf - oldbuf == ring->nr_buf_size && + oldlen == ring->nr_buf_size) { + d->hdr.caplen += slot->len; + oldbuf = nbuf; + } else { + oldbuf = NULL; + } + } d->hdr.ts = ring->ts; ring->head = ring->cur = nm_ring_next(ring, i); } Index: sys/net/netmap_virt.h =================================================================== --- sys/net/netmap_virt.h +++ sys/net/netmap_virt.h @@ -1,7 +1,7 @@ /* * Copyright (C) 2013-2016 Luigi Rizzo * Copyright (C) 2013-2016 Giuseppe Lettieri - * Copyright (C) 2013-2016 Vincenzo Maffione + * Copyright (C) 2013-2018 Vincenzo Maffione * Copyright (C) 2015 Stefano Garzarella * All rights reserved. * @@ -33,14 +33,15 @@ #define NETMAP_VIRT_H /* - * ptnetmap_memdev: device used to expose memory into the guest VM + * Register offsets and other macros for the ptnetmap paravirtual devices: + * ptnetmap-memdev: device used to expose memory into the guest + * ptnet: paravirtualized NIC exposing a netmap port in the guest * * These macros are used in the hypervisor frontend (QEMU, bhyve) and in the * guest device driver. */ -/* PCI identifiers and PCI BARs for the ptnetmap memdev - * and ptnetmap network interface. */ +/* PCI identifiers and PCI BARs for ptnetmap-memdev and ptnet. */ #define PTNETMAP_MEMDEV_NAME "ptnetmap-memdev" #define PTNETMAP_PCI_VENDOR_ID 0x1b36 /* QEMU virtual devices */ #define PTNETMAP_PCI_DEVICE_ID 0x000c /* memory device */ @@ -49,7 +50,7 @@ #define PTNETMAP_MEM_PCI_BAR 1 #define PTNETMAP_MSIX_PCI_BAR 2 -/* Registers for the ptnetmap memdev */ +/* Device registers for ptnetmap-memdev */ #define PTNET_MDEV_IO_MEMSIZE_LO 0 /* netmap memory size (low) */ #define PTNET_MDEV_IO_MEMSIZE_HI 4 /* netmap_memory_size (high) */ #define PTNET_MDEV_IO_MEMID 8 /* memory allocator ID in the host */ @@ -64,74 +65,10 @@ #define PTNET_MDEV_IO_BUF_POOL_OBJSZ 96 #define PTNET_MDEV_IO_END 100 -/* - * ptnetmap configuration - * - * The ptnet kthreads (running in host kernel-space) need to be configured - * in order to know how to intercept guest kicks (I/O register writes) and - * how to inject MSI-X interrupts to the guest. The configuration may vary - * depending on the hypervisor. Currently, we support QEMU/KVM on Linux and - * and bhyve on FreeBSD. - * The configuration is passed by the hypervisor to the host netmap module - * by means of an ioctl() with nr_cmd=NETMAP_PT_HOST_CREATE, and it is - * specified by the ptnetmap_cfg struct. This struct contains an header - * with general informations and an array of entries whose size depends - * on the hypervisor. The NETMAP_PT_HOST_CREATE command is issued every - * time the kthreads are started. - */ -struct ptnetmap_cfg { -#define PTNETMAP_CFGTYPE_QEMU 0x1 -#define PTNETMAP_CFGTYPE_BHYVE 0x2 - uint16_t cfgtype; /* how to interpret the cfg entries */ - uint16_t entry_size; /* size of a config entry */ - uint32_t num_rings; /* number of config entries */ - void *csb_gh; /* CSB for guest --> host communication */ - void *csb_hg; /* CSB for host --> guest communication */ - /* Configuration entries are allocated right after the struct. */ -}; - -/* Configuration of a ptnetmap ring for QEMU. */ -struct ptnetmap_cfgentry_qemu { - uint32_t ioeventfd; /* to intercept guest register access */ - uint32_t irqfd; /* to inject guest interrupts */ -}; - -/* Configuration of a ptnetmap ring for bhyve. */ -struct ptnetmap_cfgentry_bhyve { - uint64_t wchan; /* tsleep() parameter, to wake up kthread */ - uint32_t ioctl_fd; /* ioctl fd */ - /* ioctl parameters to send irq */ - uint32_t ioctl_cmd; - /* vmm.ko MSIX parameters for IOCTL */ - struct { - uint64_t msg_data; - uint64_t addr; - } ioctl_data; -}; - -/* - * Pass a pointer to a userspace buffer to be passed to kernelspace for write - * or read. Used by NETMAP_PT_HOST_CREATE. - * XXX deprecated - */ -static inline void -nmreq_pointer_put(struct nmreq *nmr, void *userptr) -{ - uintptr_t *pp = (uintptr_t *)&nmr->nr_arg1; - *pp = (uintptr_t)userptr; -} - -static inline void * -nmreq_pointer_get(const struct nmreq *nmr) -{ - const uintptr_t *pp = (const uintptr_t *)&nmr->nr_arg1; - return (void *)*pp; -} - /* ptnetmap features */ #define PTNETMAP_F_VNET_HDR 1 -/* I/O registers for the ptnet device. */ +/* Device registers for the ptnet network device. */ #define PTNET_IO_PTFEAT 0 #define PTNET_IO_PTCTL 4 #define PTNET_IO_MAC_LO 8 @@ -153,140 +90,11 @@ #define PTNET_IO_KICK_BASE 128 #define PTNET_IO_MASK 0xff -/* ptnetmap control commands (values for PTCTL register) */ +/* ptnet control commands (values for PTCTL register): + * - CREATE starts the host sync-kloop + * - DELETE stops the host sync-kloop + */ #define PTNETMAP_PTCTL_CREATE 1 #define PTNETMAP_PTCTL_DELETE 2 -/* ptnetmap synchronization variables shared between guest and host */ -struct ptnet_csb_gh { - uint32_t head; /* GW+ HR+ the head of the guest netmap_ring */ - uint32_t cur; /* GW+ HR+ the cur of the guest netmap_ring */ - uint32_t guest_need_kick; /* GW+ HR+ host-->guest notification enable */ - uint32_t sync_flags; /* GW+ HR+ the flags of the guest [tx|rx]sync() */ - char pad[48]; /* pad to a 64 bytes cacheline */ -}; -struct ptnet_csb_hg { - uint32_t hwcur; /* GR+ HW+ the hwcur of the host netmap_kring */ - uint32_t hwtail; /* GR+ HW+ the hwtail of the host netmap_kring */ - uint32_t host_need_kick; /* GR+ HW+ guest-->host notification enable */ - char pad[4+48]; -}; - -#ifdef WITH_PTNETMAP_GUEST - -/* ptnetmap_memdev routines used to talk with ptnetmap_memdev device driver */ -struct ptnetmap_memdev; -int nm_os_pt_memdev_iomap(struct ptnetmap_memdev *, vm_paddr_t *, void **, - uint64_t *); -void nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *); -uint32_t nm_os_pt_memdev_ioread(struct ptnetmap_memdev *, unsigned int); - -/* Guest driver: Write kring pointers (cur, head) to the CSB. - * This routine is coupled with ptnetmap_host_read_kring_csb(). */ -static inline void -ptnetmap_guest_write_kring_csb(struct ptnet_csb_gh *ptr, uint32_t cur, - uint32_t head) -{ - /* - * We need to write cur and head to the CSB but we cannot do it atomically. - * There is no way we can prevent the host from reading the updated value - * of one of the two and the old value of the other. However, if we make - * sure that the host never reads a value of head more recent than the - * value of cur we are safe. We can allow the host to read a value of cur - * more recent than the value of head, since in the netmap ring cur can be - * ahead of head and cur cannot wrap around head because it must be behind - * tail. Inverting the order of writes below could instead result into the - * host to think head went ahead of cur, which would cause the sync - * prologue to fail. - * - * The following memory barrier scheme is used to make this happen: - * - * Guest Host - * - * STORE(cur) LOAD(head) - * mb() <-----------> mb() - * STORE(head) LOAD(cur) - */ - ptr->cur = cur; - mb(); - ptr->head = head; -} - -/* Guest driver: Read kring pointers (hwcur, hwtail) from the CSB. - * This routine is coupled with ptnetmap_host_write_kring_csb(). */ -static inline void -ptnetmap_guest_read_kring_csb(struct ptnet_csb_hg *pthg, struct netmap_kring *kring) -{ - /* - * We place a memory barrier to make sure that the update of hwtail never - * overtakes the update of hwcur. - * (see explanation in ptnetmap_host_write_kring_csb). - */ - kring->nr_hwtail = pthg->hwtail; - mb(); - kring->nr_hwcur = pthg->hwcur; -} - -#endif /* WITH_PTNETMAP_GUEST */ - -#ifdef WITH_PTNETMAP_HOST -/* - * ptnetmap kernel thread routines - * */ - -/* Functions to read and write CSB fields in the host */ -#if defined (linux) -#define CSB_READ(csb, field, r) (get_user(r, &csb->field)) -#define CSB_WRITE(csb, field, v) (put_user(v, &csb->field)) -#else /* ! linux */ -#define CSB_READ(csb, field, r) (r = fuword32(&csb->field)) -#define CSB_WRITE(csb, field, v) (suword32(&csb->field, v)) -#endif /* ! linux */ - -/* Host netmap: Write kring pointers (hwcur, hwtail) to the CSB. - * This routine is coupled with ptnetmap_guest_read_kring_csb(). */ -static inline void -ptnetmap_host_write_kring_csb(struct ptnet_csb_hg __user *ptr, uint32_t hwcur, - uint32_t hwtail) -{ - /* - * The same scheme used in ptnetmap_guest_write_kring_csb() applies here. - * We allow the guest to read a value of hwcur more recent than the value - * of hwtail, since this would anyway result in a consistent view of the - * ring state (and hwcur can never wraparound hwtail, since hwcur must be - * behind head). - * - * The following memory barrier scheme is used to make this happen: - * - * Guest Host - * - * STORE(hwcur) LOAD(hwtail) - * mb() <-------------> mb() - * STORE(hwtail) LOAD(hwcur) - */ - CSB_WRITE(ptr, hwcur, hwcur); - mb(); - CSB_WRITE(ptr, hwtail, hwtail); -} - -/* Host netmap: Read kring pointers (head, cur, sync_flags) from the CSB. - * This routine is coupled with ptnetmap_guest_write_kring_csb(). */ -static inline void -ptnetmap_host_read_kring_csb(struct ptnet_csb_gh __user *ptr, - struct netmap_ring *shadow_ring, - uint32_t num_slots) -{ - /* - * We place a memory barrier to make sure that the update of head never - * overtakes the update of cur. - * (see explanation in ptnetmap_guest_write_kring_csb). - */ - CSB_READ(ptr, head, shadow_ring->head); - mb(); - CSB_READ(ptr, cur, shadow_ring->cur); - CSB_READ(ptr, sync_flags, shadow_ring->flags); -} - -#endif /* WITH_PTNETMAP_HOST */ - #endif /* NETMAP_VIRT_H */