Index: head/sys/conf/files =================================================================== --- head/sys/conf/files +++ head/sys/conf/files @@ -2517,17 +2517,19 @@ dev/nand/nfc_if.m optional nand dev/netmap/if_ptnet.c optional netmap inet dev/netmap/netmap.c optional netmap +dev/netmap/netmap_bdg.c optional netmap dev/netmap/netmap_freebsd.c optional netmap dev/netmap/netmap_generic.c optional netmap +dev/netmap/netmap_kloop.c optional netmap +dev/netmap/netmap_legacy.c optional netmap dev/netmap/netmap_mbq.c optional netmap dev/netmap/netmap_mem2.c optional netmap dev/netmap/netmap_monitor.c optional netmap +dev/netmap/netmap_null.c optional netmap dev/netmap/netmap_offloadings.c optional netmap dev/netmap/netmap_pipe.c optional netmap dev/netmap/netmap_pt.c optional netmap dev/netmap/netmap_vale.c optional netmap -dev/netmap/netmap_legacy.c optional netmap -dev/netmap/netmap_bdg.c optional netmap # compile-with "${NORMAL_C} -Wconversion -Wextra" dev/nfsmb/nfsmb.c optional nfsmb pci dev/nge/if_nge.c optional nge Index: head/sys/dev/netmap/if_ixl_netmap.h =================================================================== --- head/sys/dev/netmap/if_ixl_netmap.h +++ head/sys/dev/netmap/if_ixl_netmap.h @@ -129,7 +129,7 @@ na.ifp = vsi->ifp; na.na_flags = NAF_BDG_MAYSLEEP; // XXX check that queues is set. - nm_prinf("queues is %p\n", vsi->queues); + nm_prinf("queues is %p", vsi->queues); if (vsi->queues) { na.num_tx_desc = vsi->queues[0].num_desc; na.num_rx_desc = vsi->queues[0].num_desc; Index: head/sys/dev/netmap/if_ptnet.c =================================================================== --- head/sys/dev/netmap/if_ptnet.c +++ head/sys/dev/netmap/if_ptnet.c @@ -128,8 +128,8 @@ struct resource *irq; void *cookie; int kring_id; - struct ptnet_csb_gh *ptgh; - struct ptnet_csb_hg *pthg; + struct nm_csb_atok *atok; + struct nm_csb_ktoa *ktoa; unsigned int kick; struct mtx lock; struct buf_ring *bufring; /* for TX queues */ @@ -166,8 +166,8 @@ unsigned int num_tx_rings; struct ptnet_queue *queues; struct ptnet_queue *rxqueues; - struct ptnet_csb_gh *csb_gh; - struct ptnet_csb_hg *csb_hg; + struct nm_csb_atok *csb_gh; + struct nm_csb_ktoa *csb_hg; unsigned int min_tx_space; @@ -209,7 +209,7 @@ static int ptnet_irqs_init(struct ptnet_softc *sc); static void ptnet_irqs_fini(struct ptnet_softc *sc); -static uint32_t ptnet_nm_ptctl(if_t ifp, uint32_t cmd); +static uint32_t ptnet_nm_ptctl(struct ptnet_softc *sc, uint32_t cmd); static int ptnet_nm_config(struct netmap_adapter *na, struct nm_config_info *info); static void ptnet_update_vnet_hdr(struct ptnet_softc *sc); @@ -327,7 +327,7 @@ sc->num_rings = num_tx_rings + num_rx_rings; sc->num_tx_rings = num_tx_rings; - if (sc->num_rings * sizeof(struct ptnet_csb_gh) > PAGE_SIZE) { + if (sc->num_rings * sizeof(struct nm_csb_atok) > PAGE_SIZE) { device_printf(dev, "CSB cannot handle that many rings (%u)\n", sc->num_rings); err = ENOMEM; @@ -342,7 +342,7 @@ err = ENOMEM; goto err_path; } - sc->csb_hg = (struct ptnet_csb_hg *)(((char *)sc->csb_gh) + PAGE_SIZE); + sc->csb_hg = (struct nm_csb_ktoa *)(((char *)sc->csb_gh) + PAGE_SIZE); { /* @@ -379,8 +379,8 @@ pq->sc = sc; pq->kring_id = i; pq->kick = PTNET_IO_KICK_BASE + 4 * i; - pq->ptgh = sc->csb_gh + i; - pq->pthg = sc->csb_hg + i; + pq->atok = sc->csb_gh + i; + pq->ktoa = sc->csb_hg + i; snprintf(pq->lock_name, sizeof(pq->lock_name), "%s-%d", device_get_nameunit(dev), i); mtx_init(&pq->lock, pq->lock_name, NULL, MTX_DEF); @@ -505,12 +505,25 @@ return err; } +/* Stop host sync-kloop if it was running. */ +static void +ptnet_device_shutdown(struct ptnet_softc *sc) +{ + ptnet_nm_ptctl(sc, PTNETMAP_PTCTL_DELETE); + bus_write_4(sc->iomem, PTNET_IO_CSB_GH_BAH, 0); + bus_write_4(sc->iomem, PTNET_IO_CSB_GH_BAL, 0); + bus_write_4(sc->iomem, PTNET_IO_CSB_HG_BAH, 0); + bus_write_4(sc->iomem, PTNET_IO_CSB_HG_BAL, 0); +} + static int ptnet_detach(device_t dev) { struct ptnet_softc *sc = device_get_softc(dev); int i; + ptnet_device_shutdown(sc); + #ifdef DEVICE_POLLING if (sc->ifp->if_capenable & IFCAP_POLLING) { ether_poll_deregister(sc->ifp); @@ -543,10 +556,6 @@ ptnet_irqs_fini(sc); if (sc->csb_gh) { - bus_write_4(sc->iomem, PTNET_IO_CSB_GH_BAH, 0); - bus_write_4(sc->iomem, PTNET_IO_CSB_GH_BAL, 0); - bus_write_4(sc->iomem, PTNET_IO_CSB_HG_BAH, 0); - bus_write_4(sc->iomem, PTNET_IO_CSB_HG_BAL, 0); contigfree(sc->csb_gh, 2*PAGE_SIZE, M_DEVBUF); sc->csb_gh = NULL; sc->csb_hg = NULL; @@ -583,9 +592,8 @@ static int ptnet_suspend(device_t dev) { - struct ptnet_softc *sc; + struct ptnet_softc *sc = device_get_softc(dev); - sc = device_get_softc(dev); (void)sc; return (0); @@ -594,9 +602,8 @@ static int ptnet_resume(device_t dev) { - struct ptnet_softc *sc; + struct ptnet_softc *sc = device_get_softc(dev); - sc = device_get_softc(dev); (void)sc; return (0); @@ -605,11 +612,11 @@ static int ptnet_shutdown(device_t dev) { - /* - * Suspend already does all of what we need to - * do here; we just never expect to be resumed. - */ - return (ptnet_suspend(dev)); + struct ptnet_softc *sc = device_get_softc(dev); + + ptnet_device_shutdown(sc); + + return (0); } static int @@ -796,7 +803,7 @@ /* Make sure the worker sees the * IFF_DRV_RUNNING down. */ PTNET_Q_LOCK(pq); - pq->ptgh->guest_need_kick = 0; + pq->atok->appl_need_kick = 0; PTNET_Q_UNLOCK(pq); /* Wait for rescheduling to finish. */ if (pq->taskq) { @@ -810,7 +817,7 @@ for (i = 0; i < sc->num_rings; i++) { pq = sc-> queues + i; PTNET_Q_LOCK(pq); - pq->ptgh->guest_need_kick = 1; + pq->atok->appl_need_kick = 1; PTNET_Q_UNLOCK(pq); } } @@ -881,7 +888,7 @@ return ret; } - if (sc->ptna->backend_regifs == 0) { + if (sc->ptna->backend_users == 0) { ret = ptnet_nm_krings_create(na_nm); if (ret) { device_printf(sc->dev, "ptnet_nm_krings_create() " @@ -962,7 +969,7 @@ ptnet_nm_register(na_dr, 0 /* off */); - if (sc->ptna->backend_regifs == 0) { + if (sc->ptna->backend_users == 0) { netmap_mem_rings_delete(na_dr); ptnet_nm_krings_delete(na_nm); } @@ -1092,9 +1099,8 @@ } static uint32_t -ptnet_nm_ptctl(if_t ifp, uint32_t cmd) +ptnet_nm_ptctl(struct ptnet_softc *sc, uint32_t cmd) { - struct ptnet_softc *sc = if_getsoftc(ifp); /* * Write a command and read back error status, * with zero meaning success. @@ -1130,8 +1136,8 @@ /* Sync krings from the host, reading from * CSB. */ for (i = 0; i < sc->num_rings; i++) { - struct ptnet_csb_gh *ptgh = sc->queues[i].ptgh; - struct ptnet_csb_hg *pthg = sc->queues[i].pthg; + struct nm_csb_atok *atok = sc->queues[i].atok; + struct nm_csb_ktoa *ktoa = sc->queues[i].ktoa; struct netmap_kring *kring; if (i < na->num_tx_rings) { @@ -1139,15 +1145,15 @@ } else { kring = na->rx_rings[i - na->num_tx_rings]; } - kring->rhead = kring->ring->head = ptgh->head; - kring->rcur = kring->ring->cur = ptgh->cur; - kring->nr_hwcur = pthg->hwcur; + kring->rhead = kring->ring->head = atok->head; + kring->rcur = kring->ring->cur = atok->cur; + kring->nr_hwcur = ktoa->hwcur; kring->nr_hwtail = kring->rtail = - kring->ring->tail = pthg->hwtail; + kring->ring->tail = ktoa->hwtail; ND("%d,%d: csb {hc %u h %u c %u ht %u}", t, i, - pthg->hwcur, ptgh->head, ptgh->cur, - pthg->hwtail); + ktoa->hwcur, atok->head, atok->cur, + ktoa->hwtail); ND("%d,%d: kring {hc %u rh %u rc %u h %u c %u ht %u rt %u t %u}", t, i, kring->nr_hwcur, kring->rhead, kring->rcur, kring->ring->head, kring->ring->cur, kring->nr_hwtail, @@ -1178,7 +1184,7 @@ int i; if (!onoff) { - sc->ptna->backend_regifs--; + sc->ptna->backend_users--; } /* If this is the last netmap client, guest interrupt enable flags may @@ -1191,17 +1197,17 @@ D("Exit netmap mode, re-enable interrupts"); for (i = 0; i < sc->num_rings; i++) { pq = sc->queues + i; - pq->ptgh->guest_need_kick = 1; + pq->atok->appl_need_kick = 1; } } if (onoff) { - if (sc->ptna->backend_regifs == 0) { + if (sc->ptna->backend_users == 0) { /* Initialize notification enable fields in the CSB. */ for (i = 0; i < sc->num_rings; i++) { pq = sc->queues + i; - pq->pthg->host_need_kick = 1; - pq->ptgh->guest_need_kick = + pq->ktoa->kern_need_kick = 1; + pq->atok->appl_need_kick = (!(ifp->if_capenable & IFCAP_POLLING) && i >= sc->num_tx_rings); } @@ -1211,17 +1217,13 @@ /* Make sure the host adapter passed through is ready * for txsync/rxsync. */ - ret = ptnet_nm_ptctl(ifp, PTNETMAP_PTCTL_CREATE); + ret = ptnet_nm_ptctl(sc, PTNETMAP_PTCTL_CREATE); if (ret) { return ret; } - } - /* Sync from CSB must be done after REGIF PTCTL. Skip this - * step only if this is a netmap client and it is not the - * first one. */ - if ((!native && sc->ptna->backend_regifs == 0) || - (native && na->active_fds == 0)) { + /* Align the guest krings and rings to the state stored + * in the CSB. */ ptnet_sync_from_csb(sc, na); } @@ -1254,19 +1256,13 @@ } } - /* Sync from CSB must be done before UNREGIF PTCTL, on the last - * netmap client. */ - if (native && na->active_fds == 0) { - ptnet_sync_from_csb(sc, na); + if (sc->ptna->backend_users == 0) { + ret = ptnet_nm_ptctl(sc, PTNETMAP_PTCTL_DELETE); } - - if (sc->ptna->backend_regifs == 0) { - ret = ptnet_nm_ptctl(ifp, PTNETMAP_PTCTL_DELETE); - } } if (onoff) { - sc->ptna->backend_regifs++; + sc->ptna->backend_users++; } return ret; @@ -1279,7 +1275,7 @@ struct ptnet_queue *pq = sc->queues + kring->ring_id; bool notify; - notify = netmap_pt_guest_txsync(pq->ptgh, pq->pthg, kring, flags); + notify = netmap_pt_guest_txsync(pq->atok, pq->ktoa, kring, flags); if (notify) { ptnet_kick(pq); } @@ -1294,7 +1290,7 @@ struct ptnet_queue *pq = sc->rxqueues + kring->ring_id; bool notify; - notify = netmap_pt_guest_rxsync(pq->ptgh, pq->pthg, kring, flags); + notify = netmap_pt_guest_rxsync(pq->atok, pq->ktoa, kring, flags); if (notify) { ptnet_kick(pq); } @@ -1310,7 +1306,7 @@ for (i = 0; i < sc->num_rings; i++) { struct ptnet_queue *pq = sc->queues + i; - pq->ptgh->guest_need_kick = onoff; + pq->atok->appl_need_kick = onoff; } } @@ -1676,25 +1672,13 @@ } /* End of offloading-related functions to be shared with vtnet. */ -static inline void -ptnet_sync_tail(struct ptnet_csb_hg *pthg, struct netmap_kring *kring) -{ - struct netmap_ring *ring = kring->ring; - - /* Update hwcur and hwtail as known by the host. */ - ptnetmap_guest_read_kring_csb(pthg, kring); - - /* nm_sync_finalize */ - ring->tail = kring->rtail = kring->nr_hwtail; -} - static void ptnet_ring_update(struct ptnet_queue *pq, struct netmap_kring *kring, unsigned int head, unsigned int sync_flags) { struct netmap_ring *ring = kring->ring; - struct ptnet_csb_gh *ptgh = pq->ptgh; - struct ptnet_csb_hg *pthg = pq->pthg; + struct nm_csb_atok *atok = pq->atok; + struct nm_csb_ktoa *ktoa = pq->ktoa; /* Some packets have been pushed to the netmap ring. We have * to tell the host to process the new packets, updating cur @@ -1704,11 +1688,11 @@ /* Mimic nm_txsync_prologue/nm_rxsync_prologue. */ kring->rcur = kring->rhead = head; - ptnetmap_guest_write_kring_csb(ptgh, kring->rcur, kring->rhead); + ptnetmap_guest_write_kring_csb(atok, kring->rcur, kring->rhead); /* Kick the host if needed. */ - if (NM_ACCESS_ONCE(pthg->host_need_kick)) { - ptgh->sync_flags = sync_flags; + if (NM_ACCESS_ONCE(ktoa->kern_need_kick)) { + atok->sync_flags = sync_flags; ptnet_kick(pq); } } @@ -1728,8 +1712,8 @@ struct netmap_adapter *na = &sc->ptna->dr.up; if_t ifp = sc->ifp; unsigned int batch_count = 0; - struct ptnet_csb_gh *ptgh; - struct ptnet_csb_hg *pthg; + struct nm_csb_atok *atok; + struct nm_csb_ktoa *ktoa; struct netmap_kring *kring; struct netmap_ring *ring; struct netmap_slot *slot; @@ -1758,8 +1742,8 @@ return ENETDOWN; } - ptgh = pq->ptgh; - pthg = pq->pthg; + atok = pq->atok; + ktoa = pq->ktoa; kring = na->tx_rings[pq->kring_id]; ring = kring->ring; lim = kring->nkr_num_slots - 1; @@ -1771,17 +1755,17 @@ /* We ran out of slot, let's see if the host has * freed up some, by reading hwcur and hwtail from * the CSB. */ - ptnet_sync_tail(pthg, kring); + ptnet_sync_tail(ktoa, kring); if (PTNET_TX_NOSPACE(head, kring, minspace)) { /* Still no slots available. Reactivate the * interrupts so that we can be notified * when some free slots are made available by * the host. */ - ptgh->guest_need_kick = 1; + atok->appl_need_kick = 1; /* Double-check. */ - ptnet_sync_tail(pthg, kring); + ptnet_sync_tail(ktoa, kring); if (likely(PTNET_TX_NOSPACE(head, kring, minspace))) { break; @@ -1790,7 +1774,7 @@ RD(1, "Found more slots by doublecheck"); /* More slots were freed before reactivating * the interrupts. */ - ptgh->guest_need_kick = 0; + atok->appl_need_kick = 0; } } @@ -2020,8 +2004,8 @@ { struct ptnet_softc *sc = pq->sc; bool have_vnet_hdr = sc->vnet_hdr_len; - struct ptnet_csb_gh *ptgh = pq->ptgh; - struct ptnet_csb_hg *pthg = pq->pthg; + struct nm_csb_atok *atok = pq->atok; + struct nm_csb_ktoa *ktoa = pq->ktoa; struct netmap_adapter *na = &sc->ptna->dr.up; struct netmap_kring *kring = na->rx_rings[pq->kring_id]; struct netmap_ring *ring = kring->ring; @@ -2053,21 +2037,21 @@ /* We ran out of slot, let's see if the host has * added some, by reading hwcur and hwtail from * the CSB. */ - ptnet_sync_tail(pthg, kring); + ptnet_sync_tail(ktoa, kring); if (head == ring->tail) { /* Still no slots available. Reactivate * interrupts as they were disabled by the * host thread right before issuing the * last interrupt. */ - ptgh->guest_need_kick = 1; + atok->appl_need_kick = 1; /* Double-check. */ - ptnet_sync_tail(pthg, kring); + ptnet_sync_tail(ktoa, kring); if (likely(head == ring->tail)) { break; } - ptgh->guest_need_kick = 0; + atok->appl_need_kick = 0; } } Index: head/sys/dev/netmap/if_vtnet_netmap.h =================================================================== --- head/sys/dev/netmap/if_vtnet_netmap.h +++ head/sys/dev/netmap/if_vtnet_netmap.h @@ -79,7 +79,7 @@ } if (deq) - nm_prinf("%d sgs dequeued from %s-%d (netmap=%d)\n", + nm_prinf("%d sgs dequeued from %s-%d (netmap=%d)", deq, nm_txrx2str(t), idx, netmap_bufs); } @@ -230,7 +230,7 @@ /*writeable=*/0); if (unlikely(err)) { if (err != ENOSPC) - nm_prerr("virtqueue_enqueue(%s) failed: %d\n", + nm_prerr("virtqueue_enqueue(%s) failed: %d", kring->name, err); break; } @@ -251,7 +251,7 @@ if (token == NULL) break; if (unlikely(token != (void *)txq)) - nm_prerr("BUG: TX token mismatch\n"); + nm_prerr("BUG: TX token mismatch"); else n++; } @@ -307,7 +307,7 @@ /*readable=*/0, /*writeable=*/sg.sg_nseg); if (unlikely(err)) { if (err != ENOSPC) - nm_prerr("virtqueue_enqueue(%s) failed: %d\n", + nm_prerr("virtqueue_enqueue(%s) failed: %d", kring->name, err); break; } @@ -391,7 +391,7 @@ break; } if (unlikely(token != (void *)rxq)) { - nm_prerr("BUG: RX token mismatch\n"); + nm_prerr("BUG: RX token mismatch"); } else { /* Skip the virtio-net header. */ len -= sc->vtnet_hdr_size; @@ -533,7 +533,7 @@ netmap_attach(&na); - nm_prinf("vtnet attached txq=%d, txd=%d rxq=%d, rxd=%d\n", + nm_prinf("vtnet attached txq=%d, txd=%d rxq=%d, rxd=%d", na.num_tx_rings, na.num_tx_desc, na.num_tx_rings, na.num_rx_desc); } Index: head/sys/dev/netmap/netmap.c =================================================================== --- head/sys/dev/netmap/netmap.c +++ head/sys/dev/netmap/netmap.c @@ -480,6 +480,9 @@ /* user-controlled variables */ int netmap_verbose; +#ifdef CONFIG_NETMAP_DEBUG +int netmap_debug; +#endif /* CONFIG_NETMAP_DEBUG */ static int netmap_no_timestamp; /* don't timestamp on rxsync */ int netmap_no_pendintr = 1; @@ -527,9 +530,6 @@ /* Non-zero if ptnet devices are allowed to use virtio-net headers. */ int ptnet_vnet_hdr = 1; -/* 0 if ptnetmap should not use worker threads for TX processing */ -int ptnetmap_tx_workers = 1; - /* * SYSCTL calls are grouped between SYSBEGIN and SYSEND to be emulated * in some other operating systems @@ -540,6 +540,10 @@ SYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW, 0, "Netmap args"); SYSCTL_INT(_dev_netmap, OID_AUTO, verbose, CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode"); +#ifdef CONFIG_NETMAP_DEBUG +SYSCTL_INT(_dev_netmap, OID_AUTO, debug, + CTLFLAG_RW, &netmap_debug, 0, "Debug messages"); +#endif /* CONFIG_NETMAP_DEBUG */ SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp, CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp"); SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr, CTLFLAG_RW, &netmap_no_pendintr, @@ -569,8 +573,6 @@ #endif SYSCTL_INT(_dev_netmap, OID_AUTO, ptnet_vnet_hdr, CTLFLAG_RW, &ptnet_vnet_hdr, 0, "Allow ptnet devices to use virtio-net headers"); -SYSCTL_INT(_dev_netmap, OID_AUTO, ptnetmap_tx_workers, CTLFLAG_RW, - &ptnetmap_tx_workers, 0, "Use worker threads for pnetmap TX processing"); SYSEND; @@ -692,7 +694,7 @@ op = "Clamp"; } if (op && msg) - nm_prinf("%s %s to %d (was %d)\n", op, msg, *v, oldv); + nm_prinf("%s %s to %d (was %d)", op, msg, *v, oldv); return *v; } @@ -776,13 +778,14 @@ na->num_rx_rings = info.num_rx_rings; na->num_rx_desc = info.num_rx_descs; na->rx_buf_maxsize = info.rx_buf_maxsize; - D("configuration changed for %s: txring %d x %d, " - "rxring %d x %d, rxbufsz %d", - na->name, na->num_tx_rings, na->num_tx_desc, - na->num_rx_rings, na->num_rx_desc, na->rx_buf_maxsize); + if (netmap_verbose) + nm_prinf("configuration changed for %s: txring %d x %d, " + "rxring %d x %d, rxbufsz %d", + na->name, na->num_tx_rings, na->num_tx_desc, + na->num_rx_rings, na->num_rx_desc, na->rx_buf_maxsize); return 0; } - D("WARNING: configuration changed for %s while active: " + nm_prerr("WARNING: configuration changed for %s while active: " "txring %d x %d, rxring %d x %d, rxbufsz %d", na->name, info.num_tx_rings, info.num_tx_descs, info.num_rx_rings, info.num_rx_descs, @@ -828,7 +831,8 @@ enum txrx t; if (na->tx_rings != NULL) { - D("warning: krings were already created"); + if (netmap_debug & NM_DEBUG_ON) + nm_prerr("warning: krings were already created"); return 0; } @@ -842,7 +846,7 @@ na->tx_rings = nm_os_malloc((size_t)len); if (na->tx_rings == NULL) { - D("Cannot allocate krings"); + nm_prerr("Cannot allocate krings"); return ENOMEM; } na->rx_rings = na->tx_rings + n[NR_TX]; @@ -910,7 +914,8 @@ enum txrx t; if (na->tx_rings == NULL) { - D("warning: krings were already deleted"); + if (netmap_debug & NM_DEBUG_ON) + nm_prerr("warning: krings were already deleted"); return; } @@ -1012,11 +1017,11 @@ * happens if the close() occurs while a concurrent * syscall is running. */ - if (netmap_verbose) - D("deleting last instance for %s", na->name); + if (netmap_debug & NM_DEBUG_ON) + nm_prinf("deleting last instance for %s", na->name); if (nm_netmap_on(na)) { - D("BUG: netmap on while going to delete the krings"); + nm_prerr("BUG: netmap on while going to delete the krings"); } na->nm_krings_delete(na); @@ -1033,14 +1038,6 @@ priv->np_nifp = NULL; } -/* call with NMG_LOCK held */ -static __inline int -nm_si_user(struct netmap_priv_d *priv, enum txrx t) -{ - return (priv->np_na != NULL && - (priv->np_qlast[t] - priv->np_qfirst[t] > 1)); -} - struct netmap_priv_d* netmap_priv_new(void) { @@ -1136,8 +1133,8 @@ /* Send packets up, outside the lock; head/prev machinery * is only useful for Windows. */ while ((m = mbq_dequeue(q)) != NULL) { - if (netmap_verbose & NM_VERB_HOST) - D("sending up pkt %p size %d", m, MBUF_LEN(m)); + if (netmap_debug & NM_DEBUG_HOST) + nm_prinf("sending up pkt %p size %d", m, MBUF_LEN(m)); prev = nm_os_send_up(dst, m, prev); if (head == NULL) head = prev; @@ -1332,8 +1329,8 @@ m_copydata(m, 0, len, NMB(na, slot)); ND("nm %d len %d", nm_i, len); - if (netmap_verbose) - D("%s", nm_dump_buf(NMB(na, slot),len, 128, NULL)); + if (netmap_debug & NM_DEBUG_HOST) + nm_prinf("%s", nm_dump_buf(NMB(na, slot),len, 128, NULL)); slot->len = len; slot->flags = 0; @@ -1500,7 +1497,7 @@ if (req->nr_mode == NR_REG_PIPE_MASTER || req->nr_mode == NR_REG_PIPE_SLAVE) { /* Do not accept deprecated pipe modes. */ - D("Deprecated pipe nr_mode, use xx{yy or xx}yy syntax"); + nm_prerr("Deprecated pipe nr_mode, use xx{yy or xx}yy syntax"); return EINVAL; } @@ -1527,9 +1524,7 @@ * 0 !NULL type matches and na created/found * !0 !NULL impossible */ - - /* try to see if this is a ptnetmap port */ - error = netmap_get_pt_host_na(hdr, na, nmd, create); + error = netmap_get_null_na(hdr, na, nmd, create); if (error || *na != NULL) goto out; @@ -1739,7 +1734,7 @@ /* * Error routine called when txsync/rxsync detects an error. - * Can't do much more than resetting head =cur = hwcur, tail = hwtail + * Can't do much more than resetting head = cur = hwcur, tail = hwtail * Return 1 on reinit. * * This routine is only called by the upper half of the kernel. @@ -1810,12 +1805,6 @@ enum txrx t; u_int j; - if ((nr_flags & NR_PTNETMAP_HOST) && ((nr_mode != NR_REG_ALL_NIC) || - nr_flags & (NR_RX_RINGS_ONLY|NR_TX_RINGS_ONLY))) { - D("Error: only NR_REG_ALL_NIC supported with netmap passthrough"); - return EINVAL; - } - for_rx_tx(t) { if (nr_flags & excluded_direction[t]) { priv->np_qfirst[t] = priv->np_qlast[t] = 0; @@ -1823,6 +1812,7 @@ } switch (nr_mode) { case NR_REG_ALL_NIC: + case NR_REG_NULL: priv->np_qfirst[t] = 0; priv->np_qlast[t] = nma_get_nrings(na, t); ND("ALL/PIPE: %s %d %d", nm_txrx2str(t), @@ -1831,7 +1821,7 @@ case NR_REG_SW: case NR_REG_NIC_SW: if (!(na->na_flags & NAF_HOST_RINGS)) { - D("host rings not supported"); + nm_prerr("host rings not supported"); return EINVAL; } priv->np_qfirst[t] = (nr_mode == NR_REG_SW ? @@ -1844,7 +1834,7 @@ case NR_REG_ONE_NIC: if (nr_ringid >= na->num_tx_rings && nr_ringid >= na->num_rx_rings) { - D("invalid ring id %d", nr_ringid); + nm_prerr("invalid ring id %d", nr_ringid); return EINVAL; } /* if not enough rings, use the first one */ @@ -1857,11 +1847,11 @@ priv->np_qfirst[t], priv->np_qlast[t]); break; default: - D("invalid regif type %d", nr_mode); + nm_prerr("invalid regif type %d", nr_mode); return EINVAL; } } - priv->np_flags = nr_flags | nr_mode; // TODO + priv->np_flags = nr_flags; /* Allow transparent forwarding mode in the host --> nic * direction only if all the TX hw rings have been opened. */ @@ -1871,7 +1861,7 @@ } if (netmap_verbose) { - D("%s: tx [%d,%d) rx [%d,%d) id %d", + nm_prinf("%s: tx [%d,%d) rx [%d,%d) id %d", na->name, priv->np_qfirst[NR_TX], priv->np_qlast[NR_TX], @@ -1927,6 +1917,7 @@ } priv->np_flags = 0; priv->np_txpoll = 0; + priv->np_kloop_state = 0; } @@ -1943,8 +1934,8 @@ int excl = (priv->np_flags & NR_EXCLUSIVE); enum txrx t; - if (netmap_verbose) - D("%s: grabbing tx [%d, %d) rx [%d, %d)", + if (netmap_debug & NM_DEBUG_ON) + nm_prinf("%s: grabbing tx [%d, %d) rx [%d, %d)", na->name, priv->np_qfirst[NR_TX], priv->np_qlast[NR_TX], @@ -2021,6 +2012,110 @@ return (priv->np_qfirst[NR_RX] != priv->np_qlast[NR_RX]); } +/* Validate the CSB entries for both directions (atok and ktoa). + * To be called under NMG_LOCK(). */ +static int +netmap_csb_validate(struct netmap_priv_d *priv, struct nmreq_opt_csb *csbo) +{ + struct nm_csb_atok *csb_atok_base = + (struct nm_csb_atok *)(uintptr_t)csbo->csb_atok; + struct nm_csb_ktoa *csb_ktoa_base = + (struct nm_csb_ktoa *)(uintptr_t)csbo->csb_ktoa; + enum txrx t; + int num_rings[NR_TXRX], tot_rings; + size_t entry_size[2]; + void *csb_start[2]; + int i; + + if (priv->np_kloop_state & NM_SYNC_KLOOP_RUNNING) { + nm_prerr("Cannot update CSB while kloop is running"); + return EBUSY; + } + + tot_rings = 0; + for_rx_tx(t) { + num_rings[t] = priv->np_qlast[t] - priv->np_qfirst[t]; + tot_rings += num_rings[t]; + } + if (tot_rings <= 0) + return 0; + + if (!(priv->np_flags & NR_EXCLUSIVE)) { + nm_prerr("CSB mode requires NR_EXCLUSIVE"); + return EINVAL; + } + + entry_size[0] = sizeof(*csb_atok_base); + entry_size[1] = sizeof(*csb_ktoa_base); + csb_start[0] = (void *)csb_atok_base; + csb_start[1] = (void *)csb_ktoa_base; + + for (i = 0; i < 2; i++) { + /* On Linux we could use access_ok() to simplify + * the validation. However, the advantage of + * this approach is that it works also on + * FreeBSD. */ + size_t csb_size = tot_rings * entry_size[i]; + void *tmp; + int err; + + if ((uintptr_t)csb_start[i] & (entry_size[i]-1)) { + nm_prerr("Unaligned CSB address"); + return EINVAL; + } + + tmp = nm_os_malloc(csb_size); + if (!tmp) + return ENOMEM; + if (i == 0) { + /* Application --> kernel direction. */ + err = copyin(csb_start[i], tmp, csb_size); + } else { + /* Kernel --> application direction. */ + memset(tmp, 0, csb_size); + err = copyout(tmp, csb_start[i], csb_size); + } + nm_os_free(tmp); + if (err) { + nm_prerr("Invalid CSB address"); + return err; + } + } + + priv->np_csb_atok_base = csb_atok_base; + priv->np_csb_ktoa_base = csb_ktoa_base; + + /* Initialize the CSB. */ + for_rx_tx(t) { + for (i = 0; i < num_rings[t]; i++) { + struct netmap_kring *kring = + NMR(priv->np_na, t)[i + priv->np_qfirst[t]]; + struct nm_csb_atok *csb_atok = csb_atok_base + i; + struct nm_csb_ktoa *csb_ktoa = csb_ktoa_base + i; + + if (t == NR_RX) { + csb_atok += num_rings[NR_TX]; + csb_ktoa += num_rings[NR_TX]; + } + + CSB_WRITE(csb_atok, head, kring->rhead); + CSB_WRITE(csb_atok, cur, kring->rcur); + CSB_WRITE(csb_atok, appl_need_kick, 1); + CSB_WRITE(csb_atok, sync_flags, 1); + CSB_WRITE(csb_ktoa, hwcur, kring->nr_hwcur); + CSB_WRITE(csb_ktoa, hwtail, kring->nr_hwtail); + CSB_WRITE(csb_ktoa, kern_need_kick, 1); + + nm_prinf("csb_init for kring %s: head %u, cur %u, " + "hwcur %u, hwtail %u", kring->name, + kring->rhead, kring->rcur, kring->nr_hwcur, + kring->nr_hwtail); + } + } + + return 0; +} + /* * possibly move the interface to netmap-mode. * If success it returns a pointer to netmap_if, otherwise NULL. @@ -2137,7 +2232,7 @@ na->name, mtu, na->rx_buf_maxsize, nbs); if (na->rx_buf_maxsize == 0) { - D("%s: error: rx_buf_maxsize == 0", na->name); + nm_prerr("%s: error: rx_buf_maxsize == 0", na->name); error = EIO; goto err_drop_mem; } @@ -2149,7 +2244,7 @@ * cannot be used in this case. */ if (nbs < mtu) { nm_prerr("error: netmap buf size (%u) " - "< device MTU (%u)\n", nbs, mtu); + "< device MTU (%u)", nbs, mtu); error = EINVAL; goto err_drop_mem; } @@ -2162,14 +2257,14 @@ if (!(na->na_flags & NAF_MOREFRAG)) { nm_prerr("error: large MTU (%d) needed " "but %s does not support " - "NS_MOREFRAG\n", mtu, + "NS_MOREFRAG", mtu, na->ifp->if_xname); error = EINVAL; goto err_drop_mem; } else if (nbs < na->rx_buf_maxsize) { nm_prerr("error: using NS_MOREFRAG on " "%s requires netmap buf size " - ">= %u\n", na->ifp->if_xname, + ">= %u", na->ifp->if_xname, na->rx_buf_maxsize); error = EINVAL; goto err_drop_mem; @@ -2177,7 +2272,7 @@ nm_prinf("info: netmap application on " "%s needs to support " "NS_MOREFRAG " - "(MTU=%u,netmap_buf_size=%u)\n", + "(MTU=%u,netmap_buf_size=%u)", na->ifp->if_xname, mtu, nbs); } } @@ -2307,7 +2402,6 @@ struct ifnet *ifp = NULL; int error = 0; u_int i, qfirst, qlast; - struct netmap_if *nifp; struct netmap_kring **krings; int sync_flags; enum txrx t; @@ -2316,14 +2410,10 @@ case NIOCCTRL: { struct nmreq_header *hdr = (struct nmreq_header *)data; - if (hdr->nr_version != NETMAP_API) { - D("API mismatch for reqtype %d: got %d need %d", - hdr->nr_version, - hdr->nr_version, NETMAP_API); - hdr->nr_version = NETMAP_API; - } if (hdr->nr_version < NETMAP_MIN_API || hdr->nr_version > NETMAP_MAX_API) { + nm_prerr("API mismatch: got %d need %d", + hdr->nr_version, NETMAP_API); return EINVAL; } @@ -2345,13 +2435,13 @@ case NETMAP_REQ_REGISTER: { struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body; + struct netmap_if *nifp; + /* Protect access to priv from concurrent requests. */ NMG_LOCK(); do { - u_int memflags; -#ifdef WITH_EXTMEM struct nmreq_option *opt; -#endif /* WITH_EXTMEM */ + u_int memflags; if (priv->np_nifp != NULL) { /* thread already registered */ error = EBUSY; @@ -2382,6 +2472,10 @@ /* find the allocator and get a reference */ nmd = netmap_mem_find(req->nr_mem_id); if (nmd == NULL) { + if (netmap_verbose) { + nm_prerr("%s: failed to find mem_id %u", + hdr->nr_name, req->nr_mem_id); + } error = EINVAL; break; } @@ -2397,6 +2491,8 @@ } if (na->virt_hdr_len && !(req->nr_flags & NR_ACCEPT_VNET_HDR)) { + nm_prerr("virt_hdr_len=%d, but application does " + "not accept it", na->virt_hdr_len); error = EIO; break; } @@ -2406,6 +2502,23 @@ if (error) { /* reg. failed, release priv and ref */ break; } + + opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options, + NETMAP_REQ_OPT_CSB); + if (opt != NULL) { + struct nmreq_opt_csb *csbo = + (struct nmreq_opt_csb *)opt; + error = nmreq_checkduplicate(opt); + if (!error) { + error = netmap_csb_validate(priv, csbo); + } + opt->nro_status = error; + if (error) { + netmap_do_unregif(priv); + break; + } + } + nifp = priv->np_nifp; priv->np_td = td; /* for debugging purposes */ @@ -2430,12 +2543,12 @@ if (req->nr_extra_bufs) { if (netmap_verbose) - D("requested %d extra buffers", + nm_prinf("requested %d extra buffers", req->nr_extra_bufs); req->nr_extra_bufs = netmap_extra_alloc(na, &nifp->ni_bufs_head, req->nr_extra_bufs); if (netmap_verbose) - D("got %d extra buffers", req->nr_extra_bufs); + nm_prinf("got %d extra buffers", req->nr_extra_bufs); } req->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp); @@ -2473,6 +2586,7 @@ * so that we can call netmap_get_na(). */ struct nmreq_register regreq; bzero(®req, sizeof(regreq)); + regreq.nr_mode = NR_REG_ALL_NIC; regreq.nr_tx_slots = req->nr_tx_slots; regreq.nr_rx_slots = req->nr_rx_slots; regreq.nr_tx_rings = req->nr_tx_rings; @@ -2494,6 +2608,10 @@ } else { nmd = netmap_mem_find(req->nr_mem_id ? req->nr_mem_id : 1); if (nmd == NULL) { + if (netmap_verbose) + nm_prerr("%s: failed to find mem_id %u", + hdr->nr_name, + req->nr_mem_id ? req->nr_mem_id : 1); error = EINVAL; break; } @@ -2505,8 +2623,6 @@ break; if (na == NULL) /* only memory info */ break; - req->nr_offset = 0; - req->nr_rx_slots = req->nr_tx_slots = 0; netmap_update_config(na); req->nr_rx_rings = na->num_rx_rings; req->nr_tx_rings = na->num_tx_rings; @@ -2519,17 +2635,17 @@ } #ifdef WITH_VALE case NETMAP_REQ_VALE_ATTACH: { - error = nm_bdg_ctl_attach(hdr, NULL /* userspace request */); + error = netmap_vale_attach(hdr, NULL /* userspace request */); break; } case NETMAP_REQ_VALE_DETACH: { - error = nm_bdg_ctl_detach(hdr, NULL /* userspace request */); + error = netmap_vale_detach(hdr, NULL /* userspace request */); break; } case NETMAP_REQ_VALE_LIST: { - error = netmap_bdg_list(hdr); + error = netmap_vale_list(hdr); break; } @@ -2540,12 +2656,16 @@ * so that we can call netmap_get_bdg_na(). */ struct nmreq_register regreq; bzero(®req, sizeof(regreq)); + regreq.nr_mode = NR_REG_ALL_NIC; + /* For now we only support virtio-net headers, and only for * VALE ports, but this may change in future. Valid lengths * for the virtio-net header are 0 (no header), 10 and 12. */ if (req->nr_hdr_len != 0 && req->nr_hdr_len != sizeof(struct nm_vnet_hdr) && req->nr_hdr_len != 12) { + if (netmap_verbose) + nm_prerr("invalid hdr_len %u", req->nr_hdr_len); error = EINVAL; break; } @@ -2562,7 +2682,8 @@ if (na->virt_hdr_len) { vpna->mfs = NETMAP_BUF_SIZE(na); } - D("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na); + if (netmap_verbose) + nm_prinf("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na); netmap_adapter_put(na); } else if (!na) { error = ENXIO; @@ -2581,6 +2702,7 @@ struct ifnet *ifp; bzero(®req, sizeof(regreq)); + regreq.nr_mode = NR_REG_ALL_NIC; NMG_LOCK(); hdr->nr_reqtype = NETMAP_REQ_REGISTER; hdr->nr_body = (uintptr_t)®req; @@ -2612,22 +2734,80 @@ } #endif /* WITH_VALE */ case NETMAP_REQ_POOLS_INFO_GET: { + /* Get information from the memory allocator used for + * hdr->nr_name. */ struct nmreq_pools_info *req = (struct nmreq_pools_info *)(uintptr_t)hdr->nr_body; - /* Get information from the memory allocator. This - * netmap device must already be bound to a port. - * Note that hdr->nr_name is ignored. */ NMG_LOCK(); - if (priv->np_na && priv->np_na->nm_mem) { - struct netmap_mem_d *nmd = priv->np_na->nm_mem; + do { + /* Build a nmreq_register out of the nmreq_pools_info, + * so that we can call netmap_get_na(). */ + struct nmreq_register regreq; + bzero(®req, sizeof(regreq)); + regreq.nr_mem_id = req->nr_mem_id; + regreq.nr_mode = NR_REG_ALL_NIC; + + hdr->nr_reqtype = NETMAP_REQ_REGISTER; + hdr->nr_body = (uintptr_t)®req; + error = netmap_get_na(hdr, &na, &ifp, NULL, 1 /* create */); + hdr->nr_reqtype = NETMAP_REQ_POOLS_INFO_GET; /* reset type */ + hdr->nr_body = (uintptr_t)req; /* reset nr_body */ + if (error) { + na = NULL; + ifp = NULL; + break; + } + nmd = na->nm_mem; /* grab the memory allocator */ + if (nmd == NULL) { + error = EINVAL; + break; + } + + /* Finalize the memory allocator, get the pools + * information and release the allocator. */ + error = netmap_mem_finalize(nmd, na); + if (error) { + break; + } error = netmap_mem_pools_info_get(req, nmd); - } else { + netmap_mem_drop(na); + } while (0); + netmap_unget_na(na, ifp); + NMG_UNLOCK(); + break; + } + + case NETMAP_REQ_CSB_ENABLE: { + struct nmreq_option *opt; + + opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options, + NETMAP_REQ_OPT_CSB); + if (opt == NULL) { error = EINVAL; + } else { + struct nmreq_opt_csb *csbo = + (struct nmreq_opt_csb *)opt; + error = nmreq_checkduplicate(opt); + if (!error) { + NMG_LOCK(); + error = netmap_csb_validate(priv, csbo); + NMG_UNLOCK(); + } + opt->nro_status = error; } - NMG_UNLOCK(); break; } + case NETMAP_REQ_SYNC_KLOOP_START: { + error = netmap_sync_kloop(priv, hdr); + break; + } + + case NETMAP_REQ_SYNC_KLOOP_STOP: { + error = netmap_sync_kloop_stop(priv); + break; + } + default: { error = EINVAL; break; @@ -2641,22 +2821,20 @@ case NIOCTXSYNC: case NIOCRXSYNC: { - nifp = priv->np_nifp; - - if (nifp == NULL) { + if (unlikely(priv->np_nifp == NULL)) { error = ENXIO; break; } mb(); /* make sure following reads are not from cache */ - na = priv->np_na; /* we have a reference */ - - if (na == NULL) { - D("Internal error: nifp != NULL && na == NULL"); - error = ENXIO; + if (unlikely(priv->np_csb_atok_base)) { + nm_prerr("Invalid sync in CSB mode"); + error = EBUSY; break; } + na = priv->np_na; /* we have a reference */ + mbq_init(&q); t = (cmd == NIOCTXSYNC ? NR_TX : NR_RX); krings = NMR(na, t); @@ -2674,8 +2852,8 @@ } if (cmd == NIOCTXSYNC) { - if (netmap_verbose & NM_VERB_TXSYNC) - D("pre txsync ring %d cur %d hwcur %d", + if (netmap_debug & NM_DEBUG_TXSYNC) + nm_prinf("pre txsync ring %d cur %d hwcur %d", i, ring->cur, kring->nr_hwcur); if (nm_txsync_prologue(kring, ring) >= kring->nkr_num_slots) { @@ -2683,8 +2861,8 @@ } else if (kring->nm_sync(kring, sync_flags | NAF_FORCE_RECLAIM) == 0) { nm_sync_finalize(kring); } - if (netmap_verbose & NM_VERB_TXSYNC) - D("post txsync ring %d cur %d hwcur %d", + if (netmap_debug & NM_DEBUG_TXSYNC) + nm_prinf("post txsync ring %d cur %d hwcur %d", i, ring->cur, kring->nr_hwcur); } else { @@ -2739,18 +2917,22 @@ case NETMAP_REQ_VALE_NEWIF: return sizeof(struct nmreq_vale_newif); case NETMAP_REQ_VALE_DELIF: + case NETMAP_REQ_SYNC_KLOOP_STOP: + case NETMAP_REQ_CSB_ENABLE: return 0; case NETMAP_REQ_VALE_POLLING_ENABLE: case NETMAP_REQ_VALE_POLLING_DISABLE: return sizeof(struct nmreq_vale_polling); case NETMAP_REQ_POOLS_INFO_GET: return sizeof(struct nmreq_pools_info); + case NETMAP_REQ_SYNC_KLOOP_START: + return sizeof(struct nmreq_sync_kloop_start); } return 0; } static size_t -nmreq_opt_size_by_type(uint16_t nro_reqtype) +nmreq_opt_size_by_type(uint32_t nro_reqtype, uint64_t nro_size) { size_t rv = sizeof(struct nmreq_option); #ifdef NETMAP_REQ_OPT_DEBUG @@ -2763,6 +2945,13 @@ rv = sizeof(struct nmreq_opt_extmem); break; #endif /* WITH_EXTMEM */ + case NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS: + if (nro_size >= rv) + rv = nro_size; + break; + case NETMAP_REQ_OPT_CSB: + rv = sizeof(struct nmreq_opt_csb); + break; } /* subtract the common header */ return rv - sizeof(struct nmreq_option); @@ -2778,8 +2967,11 @@ struct nmreq_option buf; uint64_t *ptrs; - if (hdr->nr_reserved) + if (hdr->nr_reserved) { + if (netmap_verbose) + nm_prerr("nr_reserved must be zero"); return EINVAL; + } if (!nr_body_is_user) return 0; @@ -2796,6 +2988,8 @@ (!rqsz && hdr->nr_body != (uintptr_t)NULL)) { /* Request body expected, but not found; or * request body found but unexpected. */ + if (netmap_verbose) + nm_prerr("nr_body expected but not found, or vice versa"); error = EINVAL; goto out_err; } @@ -2809,7 +3003,7 @@ if (error) goto out_err; optsz += sizeof(*src); - optsz += nmreq_opt_size_by_type(buf.nro_reqtype); + optsz += nmreq_opt_size_by_type(buf.nro_reqtype, buf.nro_size); if (rqsz + optsz > NETMAP_REQ_MAXSIZE) { error = EMSGSIZE; goto out_err; @@ -2863,7 +3057,8 @@ p = (char *)(opt + 1); /* copy the option body */ - optsz = nmreq_opt_size_by_type(opt->nro_reqtype); + optsz = nmreq_opt_size_by_type(opt->nro_reqtype, + opt->nro_size); if (optsz) { /* the option body follows the option header */ error = copyin(src + 1, p, optsz); @@ -2937,7 +3132,8 @@ /* copy the option body only if there was no error */ if (!rerror && !src->nro_status) { - optsz = nmreq_opt_size_by_type(src->nro_reqtype); + optsz = nmreq_opt_size_by_type(src->nro_reqtype, + src->nro_size); if (optsz) { error = copyout(src + 1, dst + 1, optsz); if (error) { @@ -3015,7 +3211,8 @@ struct netmap_adapter *na; struct netmap_kring *kring; struct netmap_ring *ring; - u_int i, check_all_tx, check_all_rx, want[NR_TXRX], revents = 0; + u_int i, want[NR_TXRX], revents = 0; + NM_SELINFO_T *si[NR_TXRX]; #define want_tx want[NR_TX] #define want_rx want[NR_RX] struct mbq q; /* packets from RX hw queues to host stack */ @@ -3038,27 +3235,31 @@ mbq_init(&q); - if (priv->np_nifp == NULL) { - D("No if registered"); + if (unlikely(priv->np_nifp == NULL)) { return POLLERR; } mb(); /* make sure following reads are not from cache */ na = priv->np_na; - if (!nm_netmap_on(na)) + if (unlikely(!nm_netmap_on(na))) return POLLERR; - if (netmap_verbose & 0x8000) - D("device %s events 0x%x", na->name, events); + if (unlikely(priv->np_csb_atok_base)) { + nm_prerr("Invalid poll in CSB mode"); + return POLLERR; + } + + if (netmap_debug & NM_DEBUG_ON) + nm_prinf("device %s events 0x%x", na->name, events); want_tx = events & (POLLOUT | POLLWRNORM); want_rx = events & (POLLIN | POLLRDNORM); /* - * check_all_{tx|rx} are set if the card has more than one queue AND - * the file descriptor is bound to all of them. If so, we sleep on - * the "global" selinfo, otherwise we sleep on individual selinfo - * (FreeBSD only allows two selinfo's per file descriptor). + * If the card has more than one queue AND the file descriptor is + * bound to all of them, we sleep on the "global" selinfo, otherwise + * we sleep on individual selinfo (FreeBSD only allows two selinfo's + * per file descriptor). * The interrupt routine in the driver wake one or the other * (or both) depending on which clients are active. * @@ -3067,8 +3268,10 @@ * there are pending packets to send. The latter can be disabled * passing NETMAP_NO_TX_POLL in the NIOCREG call. */ - check_all_tx = nm_si_user(priv, NR_TX); - check_all_rx = nm_si_user(priv, NR_RX); + si[NR_RX] = nm_si_user(priv, NR_RX) ? &na->si[NR_RX] : + &na->rx_rings[priv->np_qfirst[NR_RX]]->si; + si[NR_TX] = nm_si_user(priv, NR_TX) ? &na->si[NR_TX] : + &na->tx_rings[priv->np_qfirst[NR_TX]]->si; #ifdef __FreeBSD__ /* @@ -3105,10 +3308,8 @@ #ifdef linux /* The selrecord must be unconditional on linux. */ - nm_os_selrecord(sr, check_all_tx ? - &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]]->si); - nm_os_selrecord(sr, check_all_rx ? - &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]]->si); + nm_os_selrecord(sr, si[NR_RX]); + nm_os_selrecord(sr, si[NR_TX]); #endif /* linux */ /* @@ -3173,8 +3374,7 @@ send_down = 0; if (want_tx && retry_tx && sr) { #ifndef linux - nm_os_selrecord(sr, check_all_tx ? - &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]]->si); + nm_os_selrecord(sr, si[NR_TX]); #endif /* !linux */ retry_tx = 0; goto flush_tx; @@ -3234,8 +3434,7 @@ #ifndef linux if (retry_rx && sr) { - nm_os_selrecord(sr, check_all_rx ? - &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]]->si); + nm_os_selrecord(sr, si[NR_RX]); } #endif /* !linux */ if (send_down || retry_rx) { @@ -3290,7 +3489,7 @@ } if (!na->nm_intr) { - D("Cannot %s interrupts for %s", onoff ? "enable" : "disable", + nm_prerr("Cannot %s interrupts for %s", onoff ? "enable" : "disable", na->name); return -1; } @@ -3328,12 +3527,6 @@ int netmap_attach_common(struct netmap_adapter *na) { - if (na->num_tx_rings == 0 || na->num_rx_rings == 0) { - D("%s: invalid rings tx %d rx %d", - na->name, na->num_tx_rings, na->num_rx_rings); - return EINVAL; - } - if (!na->rx_buf_maxsize) { /* Set a conservative default (larger is safer). */ na->rx_buf_maxsize = PAGE_SIZE; @@ -3436,20 +3629,31 @@ struct ifnet *ifp = NULL; if (size < sizeof(struct netmap_hw_adapter)) { - D("Invalid netmap adapter size %d", (int)size); + if (netmap_debug & NM_DEBUG_ON) + nm_prerr("Invalid netmap adapter size %d", (int)size); return EINVAL; } - if (arg == NULL || arg->ifp == NULL) + if (arg == NULL || arg->ifp == NULL) { + if (netmap_debug & NM_DEBUG_ON) + nm_prerr("either arg or arg->ifp is NULL"); return EINVAL; + } + if (arg->num_tx_rings == 0 || arg->num_rx_rings == 0) { + if (netmap_debug & NM_DEBUG_ON) + nm_prerr("%s: invalid rings tx %d rx %d", + arg->name, arg->num_tx_rings, arg->num_rx_rings); + return EINVAL; + } + ifp = arg->ifp; if (NM_NA_CLASH(ifp)) { /* If NA(ifp) is not null but there is no valid netmap * adapter it means that someone else is using the same * pointer (e.g. ax25_ptr on linux). This happens for * instance when also PF_RING is in use. */ - D("Error: netmap adapter hook is busy"); + nm_prerr("Error: netmap adapter hook is busy"); return EBUSY; } @@ -3458,7 +3662,7 @@ goto fail; hwna->up = *arg; hwna->up.na_flags |= NAF_HOST_RINGS | NAF_NATIVE; - strncpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name)); + strlcpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name)); if (override_reg) { hwna->nm_hw_register = hwna->up.nm_register; hwna->up.nm_register = netmap_hw_reg; @@ -3483,7 +3687,7 @@ return 0; fail: - D("fail, arg %p ifp %p na %p", arg, ifp, hwna); + nm_prerr("fail, arg %p ifp %p na %p", arg, ifp, hwna); return (hwna ? EINVAL : ENOMEM); } @@ -3521,7 +3725,8 @@ na->nm_dtor(na); if (na->tx_rings) { /* XXX should not happen */ - D("freeing leftover tx_rings"); + if (netmap_debug & NM_DEBUG_ON) + nm_prerr("freeing leftover tx_rings"); na->nm_krings_delete(na); } netmap_pipe_dealloc(na); @@ -3619,7 +3824,7 @@ // mtx_lock(&na->core_lock); if (!nm_netmap_on(na)) { - D("%s not in netmap mode anymore", na->name); + nm_prerr("%s not in netmap mode anymore", na->name); error = ENXIO; goto done; } @@ -3638,7 +3843,7 @@ // XXX reconsider long packets if we handle fragments if (len > NETMAP_BUF_SIZE(na)) { /* too long for us */ - D("%s from_host, drop packet size %d > %d", na->name, + nm_prerr("%s from_host, drop packet size %d > %d", na->name, len, NETMAP_BUF_SIZE(na)); goto done; } @@ -3749,8 +3954,8 @@ new_hwofs -= lim + 1; /* Always set the new offset value and realign the ring. */ - if (netmap_verbose) - D("%s %s%d hwofs %d -> %d, hwtail %d -> %d", + if (netmap_debug & NM_DEBUG_ON) + nm_prinf("%s %s%d hwofs %d -> %d, hwtail %d -> %d", na->name, tx == NR_TX ? "TX" : "RX", n, kring->nkr_hwofs, new_hwofs, @@ -3796,8 +4001,8 @@ q &= NETMAP_RING_MASK; - if (netmap_verbose) { - RD(5, "received %s queue %d", work_done ? "RX" : "TX" , q); + if (netmap_debug & (NM_DEBUG_RXINTR|NM_DEBUG_TXINTR)) { + nm_prlim(5, "received %s queue %d", work_done ? "RX" : "TX" , q); } if (q >= nma_get_nrings(na, t)) @@ -3879,7 +4084,7 @@ struct ifnet *ifp = na->ifp; /* We undo the setup for intercepting packets only if we are the - * last user of this adapapter. */ + * last user of this adapter. */ if (na->active_fds > 0) { return; } @@ -3890,7 +4095,6 @@ na->na_flags &= ~NAF_NETMAP_ON; } - /* * Module loader and unloader * @@ -3915,7 +4119,7 @@ netmap_uninit_bridges(); netmap_mem_fini(); NMG_LOCK_DESTROY(); - nm_prinf("netmap: unloaded module.\n"); + nm_prinf("netmap: unloaded module."); } @@ -3952,7 +4156,7 @@ if (error) goto fail; - nm_prinf("netmap: loaded module\n"); + nm_prinf("netmap: loaded module"); return (0); fail: netmap_fini(); Index: head/sys/dev/netmap/netmap_bdg.h =================================================================== --- head/sys/dev/netmap/netmap_bdg.h +++ head/sys/dev/netmap/netmap_bdg.h @@ -44,6 +44,40 @@ #endif /* __FreeBSD__ */ +/* + * The following bridge-related functions are used by other + * kernel modules. + * + * VALE only supports unicast or broadcast. The lookup + * function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports, + * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 to indicate + * drop. + */ +typedef uint32_t (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr, + struct netmap_vp_adapter *, void *private_data); +typedef int (*bdg_config_fn_t)(struct nm_ifreq *); +typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *); +typedef void *(*bdg_update_private_data_fn_t)(void *private_data, void *callback_data, int *error); +typedef int (*bdg_vp_create_fn_t)(struct nmreq_header *hdr, + struct ifnet *ifp, struct netmap_mem_d *nmd, + struct netmap_vp_adapter **ret); +typedef int (*bdg_bwrap_attach_fn_t)(const char *nr_name, struct netmap_adapter *hwna); +struct netmap_bdg_ops { + bdg_lookup_fn_t lookup; + bdg_config_fn_t config; + bdg_dtor_fn_t dtor; + bdg_vp_create_fn_t vp_create; + bdg_bwrap_attach_fn_t bwrap_attach; + char name[IFNAMSIZ]; +}; +int netmap_bwrap_attach(const char *name, struct netmap_adapter *, struct netmap_bdg_ops *); +int netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token); + +#define NM_BRIDGES 8 /* number of bridges */ +#define NM_BDG_MAXPORTS 254 /* up to 254 */ +#define NM_BDG_BROADCAST NM_BDG_MAXPORTS +#define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1) + /* XXX Should go away after fixing find_bridge() - Michio */ #define NM_BDG_HASH 1024 /* forwarding table entries */ @@ -95,7 +129,8 @@ * different ring index. * The function is set by netmap_bdg_regops(). */ - struct netmap_bdg_ops *bdg_ops; + struct netmap_bdg_ops bdg_ops; + struct netmap_bdg_ops bdg_saved_ops; /* * Contains the data structure used by the bdg_ops.lookup function. @@ -111,6 +146,7 @@ */ #define NM_BDG_ACTIVE 1 #define NM_BDG_EXCLUSIVE 2 +#define NM_BDG_NEED_BWRAP 4 uint8_t bdg_flags; @@ -149,6 +185,13 @@ struct netmap_adapter *hwna); int netmap_bwrap_krings_create_common(struct netmap_adapter *na); void netmap_bwrap_krings_delete_common(struct netmap_adapter *na); +struct nm_bridge *netmap_init_bridges2(u_int); +void netmap_uninit_bridges2(struct nm_bridge *, u_int); +int netmap_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback, + void *callback_data, void *auth_token); +int netmap_bdg_config(struct nm_ifreq *nifr); +int nm_is_bwrap(struct netmap_adapter *); + #define NM_NEED_BWRAP (-2) #endif /* _NET_NETMAP_BDG_H_ */ Index: head/sys/dev/netmap/netmap_bdg.c =================================================================== --- head/sys/dev/netmap/netmap_bdg.c +++ head/sys/dev/netmap/netmap_bdg.c @@ -126,7 +126,7 @@ * Right now we have a static array and deletions are protected * by an exclusive lock. */ -static struct nm_bridge *nm_bridges; +struct nm_bridge *nm_bridges; #endif /* !CONFIG_NET_NS */ @@ -139,15 +139,15 @@ (c == '_'); } -/* Validate the name of a VALE bridge port and return the +/* Validate the name of a bdg port and return the * position of the ":" character. */ static int -nm_vale_name_validate(const char *name) +nm_bdg_name_validate(const char *name, size_t prefixlen) { int colon_pos = -1; int i; - if (!name || strlen(name) < strlen(NM_BDG_NAME)) { + if (!name || strlen(name) < prefixlen) { return -1; } @@ -186,9 +186,10 @@ netmap_bns_getbridges(&bridges, &num_bridges); - namelen = nm_vale_name_validate(name); + namelen = nm_bdg_name_validate(name, + (ops != NULL ? strlen(ops->name) : 0)); if (namelen < 0) { - D("invalid bridge name %s", name ? name : NULL); + nm_prerr("invalid bridge name %s", name ? name : NULL); return NULL; } @@ -213,7 +214,7 @@ b->bdg_active_ports); b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH); if (b->ht == NULL) { - D("failed to allocate hash table"); + nm_prerr("failed to allocate hash table"); return NULL; } strncpy(b->bdg_basename, name, namelen); @@ -222,7 +223,7 @@ for (i = 0; i < NM_BDG_MAXPORTS; i++) b->bdg_port_index[i] = i; /* set the default function */ - b->bdg_ops = ops; + b->bdg_ops = b->bdg_saved_ops = *ops; b->private_data = b->ht; b->bdg_flags = 0; NM_BNS_GET(b); @@ -240,13 +241,49 @@ ND("marking bridge %s as free", b->bdg_basename); nm_os_free(b->ht); - b->bdg_ops = NULL; + memset(&b->bdg_ops, 0, sizeof(b->bdg_ops)); + memset(&b->bdg_saved_ops, 0, sizeof(b->bdg_saved_ops)); b->bdg_flags = 0; NM_BNS_PUT(b); return 0; } +/* Called by external kernel modules (e.g., Openvswitch). + * to modify the private data previously given to regops(). + * 'name' may be just bridge's name (including ':' if it + * is not just NM_BDG_NAME). + * Called without NMG_LOCK. + */ +int +netmap_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback, + void *callback_data, void *auth_token) +{ + void *private_data = NULL; + struct nm_bridge *b; + int error = 0; + NMG_LOCK(); + b = nm_find_bridge(name, 0 /* don't create */, NULL); + if (!b) { + error = EINVAL; + goto unlock_update_priv; + } + if (!nm_bdg_valid_auth_token(b, auth_token)) { + error = EACCES; + goto unlock_update_priv; + } + BDG_WLOCK(b); + private_data = callback(b->private_data, callback_data, &error); + b->private_data = private_data; + BDG_WUNLOCK(b); + +unlock_update_priv: + NMG_UNLOCK(); + return error; +} + + + /* remove from bridge b the ports in slots hw and sw * (sw can be -1 if not needed) */ @@ -267,8 +304,8 @@ acquire BDG_WLOCK() and copy back the array. */ - if (netmap_verbose) - D("detach %d and %d (lim %d)", hw, sw, lim); + if (netmap_debug & NM_DEBUG_BDG) + nm_prinf("detach %d and %d (lim %d)", hw, sw, lim); /* make a copy of the list of active ports, update it, * and then copy back within BDG_WLOCK(). */ @@ -291,12 +328,12 @@ } } if (hw >= 0 || sw >= 0) { - D("XXX delete failed hw %d sw %d, should panic...", hw, sw); + nm_prerr("delete failed hw %d sw %d, should panic...", hw, sw); } BDG_WLOCK(b); - if (b->bdg_ops->dtor) - b->bdg_ops->dtor(b->bdg_ports[s_hw]); + if (b->bdg_ops.dtor) + b->bdg_ops.dtor(b->bdg_ports[s_hw]); b->bdg_ports[s_hw] = NULL; if (s_sw >= 0) { b->bdg_ports[s_sw] = NULL; @@ -402,7 +439,7 @@ /* yes we should, see if we have space to attach entries */ needed = 2; /* in some cases we only need 1 */ if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { - D("bridge full %d, cannot create new port", b->bdg_active_ports); + nm_prerr("bridge full %d, cannot create new port", b->bdg_active_ports); return ENOMEM; } /* record the next two ports available, but do not allocate yet */ @@ -428,9 +465,10 @@ } /* bdg_netmap_attach creates a struct netmap_adapter */ - error = b->bdg_ops->vp_create(hdr, NULL, nmd, &vpna); + error = b->bdg_ops.vp_create(hdr, NULL, nmd, &vpna); if (error) { - D("error %d", error); + if (netmap_debug & NM_DEBUG_BDG) + nm_prerr("error %d", error); goto out; } /* shortcut - we can skip get_hw_na(), @@ -459,7 +497,7 @@ /* host adapter might not be created */ error = hw->nm_bdg_attach(nr_name, hw, b); if (error == NM_NEED_BWRAP) { - error = b->bdg_ops->bwrap_attach(nr_name, hw); + error = b->bdg_ops.bwrap_attach(nr_name, hw); } if (error) goto out; @@ -502,143 +540,14 @@ return error; } -/* Process NETMAP_REQ_VALE_ATTACH. - */ -int -nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token) -{ - struct nmreq_vale_attach *req = - (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; - struct netmap_vp_adapter * vpna; - struct netmap_adapter *na = NULL; - struct netmap_mem_d *nmd = NULL; - struct nm_bridge *b = NULL; - int error; - NMG_LOCK(); - /* permission check for modified bridges */ - b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); - if (b && !nm_bdg_valid_auth_token(b, auth_token)) { - error = EACCES; - goto unlock_exit; - } - - if (req->reg.nr_mem_id) { - nmd = netmap_mem_find(req->reg.nr_mem_id); - if (nmd == NULL) { - error = EINVAL; - goto unlock_exit; - } - } - - /* check for existing one */ - error = netmap_get_vale_na(hdr, &na, nmd, 0); - if (na) { - error = EBUSY; - goto unref_exit; - } - error = netmap_get_vale_na(hdr, &na, - nmd, 1 /* create if not exists */); - if (error) { /* no device */ - goto unlock_exit; - } - - if (na == NULL) { /* VALE prefix missing */ - error = EINVAL; - goto unlock_exit; - } - - if (NETMAP_OWNED_BY_ANY(na)) { - error = EBUSY; - goto unref_exit; - } - - if (na->nm_bdg_ctl) { - /* nop for VALE ports. The bwrap needs to put the hwna - * in netmap mode (see netmap_bwrap_bdg_ctl) - */ - error = na->nm_bdg_ctl(hdr, na); - if (error) - goto unref_exit; - ND("registered %s to netmap-mode", na->name); - } - vpna = (struct netmap_vp_adapter *)na; - req->port_index = vpna->bdg_port; - NMG_UNLOCK(); - return 0; - -unref_exit: - netmap_adapter_put(na); -unlock_exit: - NMG_UNLOCK(); - return error; -} - -static inline int +int nm_is_bwrap(struct netmap_adapter *na) { return na->nm_register == netmap_bwrap_reg; } -/* Process NETMAP_REQ_VALE_DETACH. - */ -int -nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token) -{ - struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body; - struct netmap_vp_adapter *vpna; - struct netmap_adapter *na; - struct nm_bridge *b = NULL; - int error; - NMG_LOCK(); - /* permission check for modified bridges */ - b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); - if (b && !nm_bdg_valid_auth_token(b, auth_token)) { - error = EACCES; - goto unlock_exit; - } - - error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */); - if (error) { /* no device, or another bridge or user owns the device */ - goto unlock_exit; - } - - if (na == NULL) { /* VALE prefix missing */ - error = EINVAL; - goto unlock_exit; - } else if (nm_is_bwrap(na) && - ((struct netmap_bwrap_adapter *)na)->na_polling_state) { - /* Don't detach a NIC with polling */ - error = EBUSY; - goto unref_exit; - } - - vpna = (struct netmap_vp_adapter *)na; - if (na->na_vp != vpna) { - /* trying to detach first attach of VALE persistent port attached - * to 2 bridges - */ - error = EBUSY; - goto unref_exit; - } - nmreq_det->port_index = vpna->bdg_port; - - if (na->nm_bdg_ctl) { - /* remove the port from bridge. The bwrap - * also needs to put the hwna in normal mode - */ - error = na->nm_bdg_ctl(hdr, na); - } - -unref_exit: - netmap_adapter_put(na); -unlock_exit: - NMG_UNLOCK(); - return error; - -} - struct nm_bdg_polling_state; struct nm_bdg_kthread { @@ -661,7 +570,7 @@ }; static void -netmap_bwrap_polling(void *data, int is_kthread) +netmap_bwrap_polling(void *data) { struct nm_bdg_kthread *nbk = data; struct netmap_bwrap_adapter *bna; @@ -693,7 +602,6 @@ bzero(&kcfg, sizeof(kcfg)); kcfg.worker_fn = netmap_bwrap_polling; - kcfg.use_kthread = 1; for (i = 0; i < bps->ncpus; i++) { struct nm_bdg_kthread *t = bps->kthreads + i; int all = (bps->ncpus == 1 && @@ -703,8 +611,9 @@ t->bps = bps; t->qfirst = all ? bps->qfirst /* must be 0 */: affinity; t->qlast = all ? bps->qlast : t->qfirst + 1; - D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst, - t->qlast); + if (netmap_verbose) + nm_prinf("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst, + t->qlast); kcfg.type = i; kcfg.worker_private = t; @@ -732,7 +641,7 @@ int error, i, j; if (!bps) { - D("polling is not configured"); + nm_prerr("polling is not configured"); return EFAULT; } bps->stopped = false; @@ -741,7 +650,7 @@ struct nm_bdg_kthread *t = bps->kthreads + i; error = nm_os_kctx_worker_start(t->nmk); if (error) { - D("error in nm_kthread_start()"); + nm_prerr("error in nm_kthread_start(): %d", error); goto cleanup; } } @@ -784,10 +693,10 @@ avail_cpus = nm_os_ncpus(); if (req_cpus == 0) { - D("req_cpus must be > 0"); + nm_prerr("req_cpus must be > 0"); return EINVAL; } else if (req_cpus >= avail_cpus) { - D("Cannot use all the CPUs in the system"); + nm_prerr("Cannot use all the CPUs in the system"); return EINVAL; } @@ -797,7 +706,7 @@ * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2, * ring 2 and 3 are polled by core 2 and 3, respectively. */ if (i + req_cpus > nma_get_nrings(na, NR_RX)) { - D("Rings %u-%u not in range (have %d rings)", + nm_prerr("Rings %u-%u not in range (have %d rings)", i, i + req_cpus, nma_get_nrings(na, NR_RX)); return EINVAL; } @@ -809,7 +718,7 @@ /* Poll all the rings using a core specified by nr_first_cpu_id. * the number of cores must be 1. */ if (req_cpus != 1) { - D("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU " + nm_prerr("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU " "(was %d)", req_cpus); return EINVAL; } @@ -817,7 +726,7 @@ qlast = nma_get_nrings(na, NR_RX); core_from = i; } else { - D("Invalid polling mode"); + nm_prerr("Invalid polling mode"); return EINVAL; } @@ -826,7 +735,7 @@ bps->qlast = qlast; bps->cpu_from = core_from; bps->ncpus = req_cpus; - D("%s qfirst %u qlast %u cpu_from %u ncpus %u", + nm_prinf("%s qfirst %u qlast %u cpu_from %u ncpus %u", req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ? "MULTI" : "SINGLE", qfirst, qlast, core_from, req_cpus); @@ -842,7 +751,7 @@ bna = (struct netmap_bwrap_adapter *)na; if (bna->na_polling_state) { - D("ERROR adapter already in polling mode"); + nm_prerr("ERROR adapter already in polling mode"); return EFAULT; } @@ -871,7 +780,7 @@ /* start kthread now */ error = nm_bdg_polling_start_kthreads(bps); if (error) { - D("ERROR nm_bdg_polling_start_kthread()"); + nm_prerr("ERROR nm_bdg_polling_start_kthread()"); nm_os_free(bps->kthreads); nm_os_free(bps); bna->na_polling_state = NULL; @@ -887,7 +796,7 @@ struct nm_bdg_polling_state *bps; if (!bna->na_polling_state) { - D("ERROR adapter is not in polling mode"); + nm_prerr("ERROR adapter is not in polling mode"); return EFAULT; } bps = bna->na_polling_state; @@ -932,86 +841,6 @@ return error; } -/* Process NETMAP_REQ_VALE_LIST. */ -int -netmap_bdg_list(struct nmreq_header *hdr) -{ - struct nmreq_vale_list *req = - (struct nmreq_vale_list *)(uintptr_t)hdr->nr_body; - int namelen = strlen(hdr->nr_name); - struct nm_bridge *b, *bridges; - struct netmap_vp_adapter *vpna; - int error = 0, i, j; - u_int num_bridges; - - netmap_bns_getbridges(&bridges, &num_bridges); - - /* this is used to enumerate bridges and ports */ - if (namelen) { /* look up indexes of bridge and port */ - if (strncmp(hdr->nr_name, NM_BDG_NAME, - strlen(NM_BDG_NAME))) { - return EINVAL; - } - NMG_LOCK(); - b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); - if (!b) { - NMG_UNLOCK(); - return ENOENT; - } - - req->nr_bridge_idx = b - bridges; /* bridge index */ - req->nr_port_idx = NM_BDG_NOPORT; - for (j = 0; j < b->bdg_active_ports; j++) { - i = b->bdg_port_index[j]; - vpna = b->bdg_ports[i]; - if (vpna == NULL) { - D("This should not happen"); - continue; - } - /* the former and the latter identify a - * virtual port and a NIC, respectively - */ - if (!strcmp(vpna->up.name, hdr->nr_name)) { - req->nr_port_idx = i; /* port index */ - break; - } - } - NMG_UNLOCK(); - } else { - /* return the first non-empty entry starting from - * bridge nr_arg1 and port nr_arg2. - * - * Users can detect the end of the same bridge by - * seeing the new and old value of nr_arg1, and can - * detect the end of all the bridge by error != 0 - */ - i = req->nr_bridge_idx; - j = req->nr_port_idx; - - NMG_LOCK(); - for (error = ENOENT; i < NM_BRIDGES; i++) { - b = bridges + i; - for ( ; j < NM_BDG_MAXPORTS; j++) { - if (b->bdg_ports[j] == NULL) - continue; - vpna = b->bdg_ports[j]; - /* write back the VALE switch name */ - strncpy(hdr->nr_name, vpna->up.name, - (size_t)IFNAMSIZ); - error = 0; - goto out; - } - j = 0; /* following bridges scan from 0 */ - } - out: - req->nr_bridge_idx = i; - req->nr_port_idx = j; - NMG_UNLOCK(); - } - - return error; -} - /* Called by external kernel modules (e.g., Openvswitch). * to set configure/lookup/dtor functions of a VALE instance. * Register callbacks to the given bridge. 'name' may be just @@ -1041,12 +870,19 @@ if (!bdg_ops) { /* resetting the bridge */ bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); - b->bdg_ops = NULL; + b->bdg_ops = b->bdg_saved_ops; b->private_data = b->ht; } else { /* modifying the bridge */ b->private_data = private_data; - b->bdg_ops = bdg_ops; +#define nm_bdg_override(m) if (bdg_ops->m) b->bdg_ops.m = bdg_ops->m + nm_bdg_override(lookup); + nm_bdg_override(config); + nm_bdg_override(dtor); + nm_bdg_override(vp_create); + nm_bdg_override(bwrap_attach); +#undef nm_bdg_override + } BDG_WUNLOCK(b); @@ -1071,8 +907,8 @@ NMG_UNLOCK(); /* Don't call config() with NMG_LOCK() held */ BDG_RLOCK(b); - if (b->bdg_ops->config != NULL) - error = b->bdg_ops->config(nr); + if (b->bdg_ops.config != NULL) + error = b->bdg_ops.config(nr); BDG_RUNLOCK(b); return error; } @@ -1137,7 +973,7 @@ int n; if (head > lim) { - D("ouch dangerous reset!!!"); + nm_prerr("ouch dangerous reset!!!"); n = netmap_ring_reinit(kring); goto done; } @@ -1154,7 +990,7 @@ void *addr = NMB(na, slot); if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */ - D("bad buffer index %d, ignore ?", + nm_prerr("bad buffer index %d, ignore ?", slot->buf_idx); } slot->flags &= ~NS_BUF_CHANGED; @@ -1283,8 +1119,8 @@ int ret = NM_IRQ_COMPLETED; int error; - if (netmap_verbose) - D("%s %s 0x%x", na->name, kring->name, flags); + if (netmap_debug & NM_DEBUG_RXINTR) + nm_prinf("%s %s 0x%x", na->name, kring->name, flags); bkring = vpna->up.tx_rings[ring_nr]; @@ -1293,8 +1129,8 @@ return EIO; } - if (netmap_verbose) - D("%s head %d cur %d tail %d", na->name, + if (netmap_debug & NM_DEBUG_RXINTR) + nm_prinf("%s head %d cur %d tail %d", na->name, kring->rhead, kring->rcur, kring->rtail); /* simulate a user wakeup on the rx ring @@ -1305,7 +1141,7 @@ goto put_out; if (kring->nr_hwcur == kring->nr_hwtail) { if (netmap_verbose) - D("how strange, interrupt with no packets on %s", + nm_prerr("how strange, interrupt with no packets on %s", na->name); goto put_out; } @@ -1593,8 +1429,8 @@ ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", na->name, ring_n, kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, - ring->head, ring->cur, ring->tail, - hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail); + kring->rhead, kring->rcur, kring->rtail, + hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); /* second step: the new packets are sent on the tx ring * (which is actually the same ring) */ @@ -1612,7 +1448,7 @@ ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", na->name, ring_n, kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, - ring->head, ring->cur, ring->tail, + kring->rhead, kring->rcur, kring->rtail, hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); put_out: nm_kr_put(hw_kring); @@ -1688,7 +1524,7 @@ /* make sure the NIC is not already in use */ if (NETMAP_OWNED_BY_ANY(hwna)) { - D("NIC %s busy, cannot attach to bridge", hwna->name); + nm_prerr("NIC %s busy, cannot attach to bridge", hwna->name); return EBUSY; } @@ -1756,6 +1592,8 @@ hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */ hostna->rx_buf_maxsize = hwna->rx_buf_maxsize; } + if (hwna->na_flags & NAF_MOREFRAG) + na->na_flags |= NAF_MOREFRAG; ND("%s<->%s txr %d txd %d rxr %d rxd %d", na->name, ifp->if_xname, Index: head/sys/dev/netmap/netmap_freebsd.c =================================================================== --- head/sys/dev/netmap/netmap_freebsd.c +++ head/sys/dev/netmap/netmap_freebsd.c @@ -735,9 +735,9 @@ } #endif /* WITH_EXTMEM */ -/* ======================== PTNETMAP SUPPORT ========================== */ +/* ================== PTNETMAP GUEST SUPPORT ==================== */ -#ifdef WITH_PTNETMAP_GUEST +#ifdef WITH_PTNETMAP #include #include #include /* bus_dmamap_* */ @@ -932,7 +932,7 @@ return bus_generic_shutdown(dev); } -#endif /* WITH_PTNETMAP_GUEST */ +#endif /* WITH_PTNETMAP */ /* * In order to track whether pages are still mapped, we hook into @@ -1145,8 +1145,8 @@ } struct nm_kctx_ctx { - struct thread *user_td; /* thread user-space (kthread creator) to send ioctl */ - struct ptnetmap_cfgentry_bhyve cfg; + /* Userspace thread (kthread creator). */ + struct thread *user_td; /* worker function and parameter */ nm_kctx_worker_fn_t worker_fn; @@ -1161,56 +1161,17 @@ struct nm_kctx { struct thread *worker; struct mtx worker_lock; - uint64_t scheduled; /* pending wake_up request */ struct nm_kctx_ctx worker_ctx; int run; /* used to stop kthread */ int attach_user; /* kthread attached to user_process */ int affinity; }; -void inline -nm_os_kctx_worker_wakeup(struct nm_kctx *nmk) -{ - /* - * There may be a race between FE and BE, - * which call both this function, and worker kthread, - * that reads nmk->scheduled. - * - * For us it is not important the counter value, - * but simply that it has changed since the last - * time the kthread saw it. - */ - mtx_lock(&nmk->worker_lock); - nmk->scheduled++; - if (nmk->worker_ctx.cfg.wchan) { - wakeup((void *)(uintptr_t)nmk->worker_ctx.cfg.wchan); - } - mtx_unlock(&nmk->worker_lock); -} - -void inline -nm_os_kctx_send_irq(struct nm_kctx *nmk) -{ - struct nm_kctx_ctx *ctx = &nmk->worker_ctx; - int err; - - if (ctx->user_td && ctx->cfg.ioctl_fd > 0) { - err = kern_ioctl(ctx->user_td, ctx->cfg.ioctl_fd, ctx->cfg.ioctl_cmd, - (caddr_t)&ctx->cfg.ioctl_data); - if (err) { - D("kern_ioctl error: %d ioctl parameters: fd %d com %lu data %p", - err, ctx->cfg.ioctl_fd, (unsigned long)ctx->cfg.ioctl_cmd, - &ctx->cfg.ioctl_data); - } - } -} - static void nm_kctx_worker(void *data) { struct nm_kctx *nmk = data; struct nm_kctx_ctx *ctx = &nmk->worker_ctx; - uint64_t old_scheduled = nmk->scheduled; if (nmk->affinity >= 0) { thread_lock(curthread); @@ -1231,30 +1192,8 @@ kthread_suspend_check(); } - /* - * if wchan is not defined, we don't have notification - * mechanism and we continually execute worker_fn() - */ - if (!ctx->cfg.wchan) { - ctx->worker_fn(ctx->worker_private, 1); /* worker body */ - } else { - /* checks if there is a pending notification */ - mtx_lock(&nmk->worker_lock); - if (likely(nmk->scheduled != old_scheduled)) { - old_scheduled = nmk->scheduled; - mtx_unlock(&nmk->worker_lock); - - ctx->worker_fn(ctx->worker_private, 1); /* worker body */ - - continue; - } else if (nmk->run) { - /* wait on event with one second timeout */ - msleep((void *)(uintptr_t)ctx->cfg.wchan, &nmk->worker_lock, - 0, "nmk_ev", hz); - nmk->scheduled++; - } - mtx_unlock(&nmk->worker_lock); - } + /* Continuously execute worker process. */ + ctx->worker_fn(ctx->worker_private); /* worker body */ } kthread_exit(); @@ -1284,11 +1223,6 @@ /* attach kthread to user process (ptnetmap) */ nmk->attach_user = cfg->attach_user; - /* store kick/interrupt configuration */ - if (opaque) { - nmk->worker_ctx.cfg = *((struct ptnetmap_cfgentry_bhyve *)opaque); - } - return nmk; } @@ -1298,9 +1232,13 @@ struct proc *p = NULL; int error = 0; - if (nmk->worker) { + /* Temporarily disable this function as it is currently broken + * and causes kernel crashes. The failure can be triggered by + * the "vale_polling_enable_disable" test in ctrl-api-test.c. */ + return EOPNOTSUPP; + + if (nmk->worker) return EBUSY; - } /* check if we want to attach kthread to user process */ if (nmk->attach_user) { @@ -1329,15 +1267,14 @@ void nm_os_kctx_worker_stop(struct nm_kctx *nmk) { - if (!nmk->worker) { + if (!nmk->worker) return; - } + /* tell to kthread to exit from main loop */ nmk->run = 0; /* wake up kthread if it sleeps */ kthread_resume(nmk->worker); - nm_os_kctx_worker_wakeup(nmk); nmk->worker = NULL; } @@ -1347,11 +1284,9 @@ { if (!nmk) return; - if (nmk->worker) { - nm_os_kctx_worker_stop(nmk); - } - memset(&nmk->worker_ctx.cfg, 0, sizeof(nmk->worker_ctx.cfg)); + if (nmk->worker) + nm_os_kctx_worker_stop(nmk); free(nmk, M_DEVBUF); } Index: head/sys/dev/netmap/netmap_generic.c =================================================================== --- head/sys/dev/netmap/netmap_generic.c +++ head/sys/dev/netmap/netmap_generic.c @@ -81,7 +81,6 @@ #include #include /* bus_dmamap_* in netmap_kern.h */ -// XXX temporary - D() defined here #include #include #include @@ -179,7 +178,7 @@ r = mod_timer(&ctx->timer, jiffies + msecs_to_jiffies(RATE_PERIOD * 1000)); if (unlikely(r)) - D("[v1000] Error: mod_timer()"); + nm_prerr("mod_timer() failed"); } static struct rate_context rate_ctx; @@ -240,14 +239,14 @@ for_each_rx_kring_h(r, kring, na) { if (nm_kring_pending_off(kring)) { - D("Emulated adapter: ring '%s' deactivated", kring->name); + nm_prinf("Emulated adapter: ring '%s' deactivated", kring->name); kring->nr_mode = NKR_NETMAP_OFF; } } for_each_tx_kring_h(r, kring, na) { if (nm_kring_pending_off(kring)) { kring->nr_mode = NKR_NETMAP_OFF; - D("Emulated adapter: ring '%s' deactivated", kring->name); + nm_prinf("Emulated adapter: ring '%s' deactivated", kring->name); } } @@ -300,11 +299,11 @@ #ifdef RATE_GENERIC if (--rate_ctx.refcount == 0) { - D("del_timer()"); + nm_prinf("del_timer()"); del_timer(&rate_ctx.timer); } #endif - D("Emulated adapter for %s deactivated", na->name); + nm_prinf("Emulated adapter for %s deactivated", na->name); } return 0; @@ -329,14 +328,14 @@ } if (na->active_fds == 0) { - D("Emulated adapter for %s activated", na->name); + nm_prinf("Emulated adapter for %s activated", na->name); /* Do all memory allocations when (na->active_fds == 0), to * simplify error management. */ /* Allocate memory for mitigation support on all the rx queues. */ gna->mit = nm_os_malloc(na->num_rx_rings * sizeof(struct nm_generic_mit)); if (!gna->mit) { - D("mitigation allocation failed"); + nm_prerr("mitigation allocation failed"); error = ENOMEM; goto out; } @@ -363,7 +362,7 @@ kring->tx_pool = nm_os_malloc(na->num_tx_desc * sizeof(struct mbuf *)); if (!kring->tx_pool) { - D("tx_pool allocation failed"); + nm_prerr("tx_pool allocation failed"); error = ENOMEM; goto free_tx_pools; } @@ -374,14 +373,14 @@ for_each_rx_kring_h(r, kring, na) { if (nm_kring_pending_on(kring)) { - D("Emulated adapter: ring '%s' activated", kring->name); + nm_prinf("Emulated adapter: ring '%s' activated", kring->name); kring->nr_mode = NKR_NETMAP_ON; } } for_each_tx_kring_h(r, kring, na) { if (nm_kring_pending_on(kring)) { - D("Emulated adapter: ring '%s' activated", kring->name); + nm_prinf("Emulated adapter: ring '%s' activated", kring->name); kring->nr_mode = NKR_NETMAP_ON; } } @@ -399,14 +398,14 @@ /* Prepare to intercept incoming traffic. */ error = nm_os_catch_rx(gna, 1); if (error) { - D("nm_os_catch_rx(1) failed (%d)", error); + nm_prerr("nm_os_catch_rx(1) failed (%d)", error); goto free_tx_pools; } /* Let netmap control the packet steering. */ error = nm_os_catch_tx(gna, 1); if (error) { - D("nm_os_catch_tx(1) failed (%d)", error); + nm_prerr("nm_os_catch_tx(1) failed (%d)", error); goto catch_rx; } @@ -414,11 +413,11 @@ #ifdef RATE_GENERIC if (rate_ctx.refcount == 0) { - D("setup_timer()"); + nm_prinf("setup_timer()"); memset(&rate_ctx, 0, sizeof(rate_ctx)); setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx); if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) { - D("Error: mod_timer()"); + nm_prerr("Error: mod_timer()"); } } rate_ctx.refcount++; @@ -462,7 +461,7 @@ unsigned int r_orig = r; if (unlikely(!nm_netmap_on(na) || r >= na->num_tx_rings)) { - D("Error: no netmap adapter on device %p", + nm_prerr("Error: no netmap adapter on device %p", GEN_TX_MBUF_IFP(m)); return; } @@ -488,7 +487,7 @@ if (match) { if (r != r_orig) { - RD(1, "event %p migrated: ring %u --> %u", + nm_prlim(1, "event %p migrated: ring %u --> %u", m, r_orig, r); } break; @@ -497,7 +496,7 @@ if (++r == na->num_tx_rings) r = 0; if (r == r_orig) { - RD(1, "Cannot match event %p", m); + nm_prlim(1, "Cannot match event %p", m); return; } } @@ -528,7 +527,7 @@ u_int n = 0; struct mbuf **tx_pool = kring->tx_pool; - ND("hwcur = %d, hwtail = %d", kring->nr_hwcur, kring->nr_hwtail); + nm_prdis("hwcur = %d, hwtail = %d", kring->nr_hwcur, kring->nr_hwtail); while (nm_i != hwcur) { /* buffers not completed */ struct mbuf *m = tx_pool[nm_i]; @@ -537,7 +536,7 @@ if (m == NULL) { /* Nothing to do, this is going * to be replenished. */ - RD(3, "Is this happening?"); + nm_prlim(3, "Is this happening?"); } else if (MBUF_QUEUED(m)) { break; /* Not dequeued yet. */ @@ -576,7 +575,7 @@ nm_i = nm_next(nm_i, lim); } kring->nr_hwtail = nm_prev(nm_i, lim); - ND("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail); + nm_prdis("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail); return n; } @@ -598,7 +597,7 @@ } if (unlikely(e >= n)) { - D("This cannot happen"); + nm_prerr("This cannot happen"); e = 0; } @@ -654,7 +653,7 @@ kring->tx_pool[e] = NULL; - ND(5, "Request Event at %d mbuf %p refcnt %d", e, m, m ? MBUF_REFCNT(m) : -2 ); + nm_prdis("Request Event at %d mbuf %p refcnt %d", e, m, m ? MBUF_REFCNT(m) : -2 ); /* Decrement the refcount. This will free it if we lose the race * with the driver. */ @@ -699,7 +698,7 @@ * but only when cur == hwtail, which means that the * client is going to block. */ event = ring_middle(nm_i, head, lim); - ND(3, "Place txqdisc event (hwcur=%u,event=%u," + nm_prdis("Place txqdisc event (hwcur=%u,event=%u," "head=%u,hwtail=%u)", nm_i, event, head, kring->nr_hwtail); } @@ -725,7 +724,7 @@ kring->tx_pool[nm_i] = m = nm_os_get_mbuf(ifp, NETMAP_BUF_SIZE(na)); if (m == NULL) { - RD(2, "Failed to replenish mbuf"); + nm_prlim(2, "Failed to replenish mbuf"); /* Here we could schedule a timer which * retries to replenish after a while, * and notifies the client when it @@ -854,7 +853,7 @@ /* This may happen when GRO/LRO features are enabled for * the NIC driver when the generic adapter does not * support RX scatter-gather. */ - RD(2, "Warning: driver pushed up big packet " + nm_prlim(2, "Warning: driver pushed up big packet " "(size=%d)", (int)MBUF_LEN(m)); m_freem(m); } else if (unlikely(mbq_len(&kring->rx_queue) > 1024)) { @@ -1048,7 +1047,7 @@ */ netmap_adapter_put(prev_na); } - D("Native netmap adapter %p restored", prev_na); + nm_prinf("Native netmap adapter %p restored", prev_na); } NM_RESTORE_NA(ifp, prev_na); /* @@ -1056,7 +1055,7 @@ * overrides WNA(ifp) if na->ifp is not NULL. */ na->ifp = NULL; - D("Emulated netmap adapter for %s destroyed", na->name); + nm_prinf("Emulated netmap adapter for %s destroyed", na->name); } int @@ -1086,7 +1085,7 @@ #ifdef __FreeBSD__ if (ifp->if_type == IFT_LOOP) { - D("if_loop is not supported by %s", __func__); + nm_prerr("if_loop is not supported by %s", __func__); return EINVAL; } #endif @@ -1096,26 +1095,25 @@ * adapter it means that someone else is using the same * pointer (e.g. ax25_ptr on linux). This happens for * instance when also PF_RING is in use. */ - D("Error: netmap adapter hook is busy"); + nm_prerr("Error: netmap adapter hook is busy"); return EBUSY; } num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */ nm_os_generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); /* ignore errors */ - ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc); if (num_tx_desc == 0 || num_rx_desc == 0) { - D("Device has no hw slots (tx %u, rx %u)", num_tx_desc, num_rx_desc); + nm_prerr("Device has no hw slots (tx %u, rx %u)", num_tx_desc, num_rx_desc); return EINVAL; } gna = nm_os_malloc(sizeof(*gna)); if (gna == NULL) { - D("no memory on attach, give up"); + nm_prerr("no memory on attach, give up"); return ENOMEM; } na = (struct netmap_adapter *)gna; - strncpy(na->name, ifp->if_xname, sizeof(na->name)); + strlcpy(na->name, ifp->if_xname, sizeof(na->name)); na->ifp = ifp; na->num_tx_desc = num_tx_desc; na->num_rx_desc = num_rx_desc; @@ -1129,10 +1127,10 @@ */ na->na_flags = NAF_SKIP_INTR | NAF_HOST_RINGS; - ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)", + nm_prdis("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)", ifp->num_tx_queues, ifp->real_num_tx_queues, ifp->tx_queue_len); - ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)", + nm_prdis("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)", ifp->num_rx_queues, ifp->real_num_rx_queues); nm_os_generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings); @@ -1151,7 +1149,7 @@ nm_os_generic_set_features(gna); - D("Emulated adapter for %s created (prev was %p)", na->name, gna->prev); + nm_prinf("Emulated adapter for %s created (prev was %p)", na->name, gna->prev); return retval; } Index: head/sys/dev/netmap/netmap_kern.h =================================================================== --- head/sys/dev/netmap/netmap_kern.h +++ head/sys/dev/netmap/netmap_kern.h @@ -54,30 +54,31 @@ #if defined(CONFIG_NETMAP_GENERIC) #define WITH_GENERIC #endif -#if defined(CONFIG_NETMAP_PTNETMAP_GUEST) -#define WITH_PTNETMAP_GUEST +#if defined(CONFIG_NETMAP_PTNETMAP) +#define WITH_PTNETMAP #endif -#if defined(CONFIG_NETMAP_PTNETMAP_HOST) -#define WITH_PTNETMAP_HOST -#endif #if defined(CONFIG_NETMAP_SINK) #define WITH_SINK #endif +#if defined(CONFIG_NETMAP_NULL) +#define WITH_NMNULL +#endif #elif defined (_WIN32) #define WITH_VALE // comment out to disable VALE support #define WITH_PIPES #define WITH_MONITOR #define WITH_GENERIC +#define WITH_NMNULL #else /* neither linux nor windows */ #define WITH_VALE // comment out to disable VALE support #define WITH_PIPES #define WITH_MONITOR #define WITH_GENERIC -#define WITH_PTNETMAP_HOST /* ptnetmap host support */ -#define WITH_PTNETMAP_GUEST /* ptnetmap guest support */ +#define WITH_PTNETMAP /* ptnetmap guest support */ #define WITH_EXTMEM +#define WITH_NMNULL #endif #if defined(__FreeBSD__) @@ -239,38 +240,54 @@ #define NMG_LOCK_ASSERT() NM_MTX_ASSERT(netmap_global_lock) #if defined(__FreeBSD__) -#define nm_prerr printf -#define nm_prinf printf +#define nm_prerr_int printf +#define nm_prinf_int printf #elif defined (_WIN32) -#define nm_prerr DbgPrint -#define nm_prinf DbgPrint +#define nm_prerr_int DbgPrint +#define nm_prinf_int DbgPrint #elif defined(linux) -#define nm_prerr(fmt, arg...) printk(KERN_ERR fmt, ##arg) -#define nm_prinf(fmt, arg...) printk(KERN_INFO fmt, ##arg) +#define nm_prerr_int(fmt, arg...) printk(KERN_ERR fmt, ##arg) +#define nm_prinf_int(fmt, arg...) printk(KERN_INFO fmt, ##arg) #endif -#define ND(format, ...) -#define D(format, ...) \ +#define nm_prinf(format, ...) \ do { \ struct timeval __xxts; \ microtime(&__xxts); \ - nm_prerr("%03d.%06d [%4d] %-25s " format "\n", \ + nm_prinf_int("%03d.%06d [%4d] %-25s " format "\n",\ (int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \ __LINE__, __FUNCTION__, ##__VA_ARGS__); \ } while (0) -/* rate limited, lps indicates how many per second */ -#define RD(lps, format, ...) \ +#define nm_prerr(format, ...) \ do { \ + struct timeval __xxts; \ + microtime(&__xxts); \ + nm_prerr_int("%03d.%06d [%4d] %-25s " format "\n",\ + (int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \ + __LINE__, __FUNCTION__, ##__VA_ARGS__); \ + } while (0) + +/* Disabled printf (used to be ND). */ +#define nm_prdis(format, ...) + +/* Rate limited, lps indicates how many per second. */ +#define nm_prlim(lps, format, ...) \ + do { \ static int t0, __cnt; \ if (t0 != time_second) { \ t0 = time_second; \ __cnt = 0; \ } \ if (__cnt++ < lps) \ - D(format, ##__VA_ARGS__); \ + nm_prinf(format, ##__VA_ARGS__); \ } while (0) +/* Old macros. */ +#define ND nm_prdis +#define D nm_prerr +#define RD nm_prlim + struct netmap_adapter; struct nm_bdg_fwd; struct nm_bridge; @@ -700,7 +717,7 @@ */ #define NAF_HOST_RINGS 64 /* the adapter supports the host rings */ #define NAF_FORCE_NATIVE 128 /* the adapter is always NATIVE */ -#define NAF_PTNETMAP_HOST 256 /* the adapter supports ptnetmap in the host */ +/* free */ #define NAF_MOREFRAG 512 /* the adapter supports NS_MOREFRAG */ #define NAF_ZOMBIE (1U<<30) /* the nic driver has been unloaded */ #define NAF_BUSY (1U<<31) /* the adapter is used internally and @@ -718,9 +735,9 @@ u_int num_tx_desc; /* number of descriptor in each queue */ u_int num_rx_desc; - /* tx_rings and rx_rings are private but allocated - * as a contiguous chunk of memory. Each array has - * N+1 entries, for the adapter queues and for the host queue. + /* tx_rings and rx_rings are private but allocated as a + * contiguous chunk of memory. Each array has N+K entries, + * N for the hardware rings and K for the host rings. */ struct netmap_kring **tx_rings; /* array of TX rings. */ struct netmap_kring **rx_rings; /* array of RX rings. */ @@ -1080,12 +1097,12 @@ */ struct netmap_vp_adapter *saved_na_vp; }; -int nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token); -int nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token); int nm_bdg_polling(struct nmreq_header *hdr); -int netmap_bdg_list(struct nmreq_header *hdr); #ifdef WITH_VALE +int netmap_vale_attach(struct nmreq_header *hdr, void *auth_token); +int netmap_vale_detach(struct nmreq_header *hdr, void *auth_token); +int netmap_vale_list(struct nmreq_header *hdr); int netmap_vi_create(struct nmreq_header *hdr, int); int nm_vi_create(struct nmreq_header *); int nm_vi_destroy(const char *name); @@ -1115,7 +1132,13 @@ #endif /* WITH_PIPES */ +#ifdef WITH_NMNULL +struct netmap_null_adapter { + struct netmap_adapter up; +}; +#endif /* WITH_NMNULL */ + /* return slots reserved to rx clients; used in drivers */ static inline uint32_t nm_kr_rxspace(struct netmap_kring *k) @@ -1442,51 +1465,8 @@ int netmap_get_hw_na(struct ifnet *ifp, struct netmap_mem_d *nmd, struct netmap_adapter **na); - -/* - * The following bridge-related functions are used by other - * kernel modules. - * - * VALE only supports unicast or broadcast. The lookup - * function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports, - * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 to indicate - * drop. - */ -typedef uint32_t (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr, - struct netmap_vp_adapter *, void *private_data); -typedef int (*bdg_config_fn_t)(struct nm_ifreq *); -typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *); -typedef void *(*bdg_update_private_data_fn_t)(void *private_data, void *callback_data, int *error); -typedef int (*bdg_vp_create_fn_t)(struct nmreq_header *hdr, - struct ifnet *ifp, struct netmap_mem_d *nmd, - struct netmap_vp_adapter **ret); -typedef int (*bdg_bwrap_attach_fn_t)(const char *nr_name, struct netmap_adapter *hwna); -struct netmap_bdg_ops { - bdg_lookup_fn_t lookup; - bdg_config_fn_t config; - bdg_dtor_fn_t dtor; - bdg_vp_create_fn_t vp_create; - bdg_bwrap_attach_fn_t bwrap_attach; - char name[IFNAMSIZ]; -}; -int netmap_bwrap_attach(const char *name, struct netmap_adapter *, struct netmap_bdg_ops *); -int netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token); - -#define NM_BRIDGES 8 /* number of bridges */ -#define NM_BDG_MAXPORTS 254 /* up to 254 */ -#define NM_BDG_BROADCAST NM_BDG_MAXPORTS -#define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1) - -struct nm_bridge *netmap_init_bridges2(u_int); -void netmap_uninit_bridges2(struct nm_bridge *, u_int); -int netmap_init_bridges(void); -void netmap_uninit_bridges(void); -int nm_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback, - void *callback_data, void *auth_token); -int netmap_bdg_config(struct nm_ifreq *nifr); - #ifdef WITH_VALE -uint32_t netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, +uint32_t netmap_vale_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, struct netmap_vp_adapter *, void *private_data); /* these are redefined in case of no VALE support */ @@ -1525,11 +1505,20 @@ (((struct nmreq_register *)(uintptr_t)hdr->nr_body)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0) #endif +#ifdef WITH_NMNULL +int netmap_get_null_na(struct nmreq_header *hdr, struct netmap_adapter **na, + struct netmap_mem_d *nmd, int create); +#else /* !WITH_NMNULL */ +#define netmap_get_null_na(hdr, _2, _3, _4) \ + (((struct nmreq_register *)(uintptr_t)hdr->nr_body)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0) +#endif /* WITH_NMNULL */ + #ifdef CONFIG_NET_NS struct net *netmap_bns_get(void); void netmap_bns_put(struct net *); void netmap_bns_getbridges(struct nm_bridge **, u_int *); #else +extern struct nm_bridge *nm_bridges; #define netmap_bns_get() #define netmap_bns_put(_1) #define netmap_bns_getbridges(b, n) \ @@ -1591,16 +1580,24 @@ #define NETMAP_BUF_SIZE(_na) ((_na)->na_lut.objsize) extern int netmap_no_pendintr; extern int netmap_mitigate; -extern int netmap_verbose; /* for debugging */ -enum { /* verbose flags */ - NM_VERB_ON = 1, /* generic verbose */ - NM_VERB_HOST = 0x2, /* verbose host stack */ - NM_VERB_RXSYNC = 0x10, /* verbose on rxsync/txsync */ - NM_VERB_TXSYNC = 0x20, - NM_VERB_RXINTR = 0x100, /* verbose on rx/tx intr (driver) */ - NM_VERB_TXINTR = 0x200, - NM_VERB_NIC_RXSYNC = 0x1000, /* verbose on rx/tx intr (driver) */ - NM_VERB_NIC_TXSYNC = 0x2000, +extern int netmap_verbose; +#ifdef CONFIG_NETMAP_DEBUG +extern int netmap_debug; /* for debugging */ +#else /* !CONFIG_NETMAP_DEBUG */ +#define netmap_debug (0) +#endif /* !CONFIG_NETMAP_DEBUG */ +enum { /* debug flags */ + NM_DEBUG_ON = 1, /* generic debug messsages */ + NM_DEBUG_HOST = 0x2, /* debug host stack */ + NM_DEBUG_RXSYNC = 0x10, /* debug on rxsync/txsync */ + NM_DEBUG_TXSYNC = 0x20, + NM_DEBUG_RXINTR = 0x100, /* debug on rx/tx intr (driver) */ + NM_DEBUG_TXINTR = 0x200, + NM_DEBUG_NIC_RXSYNC = 0x1000, /* debug on rx/tx intr (driver) */ + NM_DEBUG_NIC_TXSYNC = 0x2000, + NM_DEBUG_MEM = 0x4000, /* verbose memory allocations/deallocations */ + NM_DEBUG_VALE = 0x8000, /* debug messages from memory allocators */ + NM_DEBUG_BDG = NM_DEBUG_VALE, }; extern int netmap_txsync_retry; @@ -1612,7 +1609,6 @@ #ifdef linux extern int netmap_generic_txqdisc; #endif -extern int ptnetmap_tx_workers; /* * NA returns a pointer to the struct netmap adapter from the ifp. @@ -1809,6 +1805,11 @@ netmap_idx_n2k(struct netmap_kring *kr, int idx) { int n = kr->nkr_num_slots; + + if (likely(kr->nkr_hwofs == 0)) { + return idx; + } + idx += kr->nkr_hwofs; if (idx < 0) return idx + n; @@ -1823,6 +1824,11 @@ netmap_idx_k2n(struct netmap_kring *kr, int idx) { int n = kr->nkr_num_slots; + + if (likely(kr->nkr_hwofs == 0)) { + return idx; + } + idx -= kr->nkr_hwofs; if (idx < 0) return idx + n; @@ -1911,6 +1917,9 @@ u_int np_qfirst[NR_TXRX], np_qlast[NR_TXRX]; /* range of tx/rx rings to scan */ uint16_t np_txpoll; + uint16_t np_kloop_state; /* use with NMG_LOCK held */ +#define NM_SYNC_KLOOP_RUNNING (1 << 0) +#define NM_SYNC_KLOOP_STOPPING (1 << 1) int np_sync_flags; /* to be passed to nm_sync */ int np_refs; /* use with NMG_LOCK held */ @@ -1920,7 +1929,26 @@ * number of rings. */ NM_SELINFO_T *np_si[NR_TXRX]; + + /* In the optional CSB mode, the user must specify the start address + * of two arrays of Communication Status Block (CSB) entries, for the + * two directions (kernel read application write, and kernel write + * application read). + * The number of entries must agree with the number of rings bound to + * the netmap file descriptor. The entries corresponding to the TX + * rings are laid out before the ones corresponding to the RX rings. + * + * Array of CSB entries for application --> kernel communication + * (N entries). */ + struct nm_csb_atok *np_csb_atok_base; + /* Array of CSB entries for kernel --> application communication + * (N entries). */ + struct nm_csb_ktoa *np_csb_ktoa_base; + struct thread *np_td; /* kqueue, just debugging */ +#ifdef linux + struct file *np_filp; /* used by sync kloop */ +#endif /* linux */ }; struct netmap_priv_d *netmap_priv_new(void); @@ -1943,6 +1971,14 @@ return 0; } +/* call with NMG_LOCK held */ +static __inline int +nm_si_user(struct netmap_priv_d *priv, enum txrx t) +{ + return (priv->np_na != NULL && + (priv->np_qlast[t] - priv->np_qfirst[t] > 1)); +} + #ifdef WITH_PIPES int netmap_pipe_txsync(struct netmap_kring *txkring, int flags); int netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags); @@ -2143,17 +2179,14 @@ * kernel thread routines */ struct nm_kctx; /* OS-specific kernel context - opaque */ -typedef void (*nm_kctx_worker_fn_t)(void *data, int is_kthread); -typedef void (*nm_kctx_notify_fn_t)(void *data); +typedef void (*nm_kctx_worker_fn_t)(void *data); /* kthread configuration */ struct nm_kctx_cfg { long type; /* kthread type/identifier */ nm_kctx_worker_fn_t worker_fn; /* worker function */ void *worker_private;/* worker parameter */ - nm_kctx_notify_fn_t notify_fn; /* notify function */ int attach_user; /* attach kthread to user process */ - int use_kthread; /* use a kthread for the context */ }; /* kthread configuration */ struct nm_kctx *nm_os_kctx_create(struct nm_kctx_cfg *cfg, @@ -2161,48 +2194,25 @@ int nm_os_kctx_worker_start(struct nm_kctx *); void nm_os_kctx_worker_stop(struct nm_kctx *); void nm_os_kctx_destroy(struct nm_kctx *); -void nm_os_kctx_worker_wakeup(struct nm_kctx *nmk); -void nm_os_kctx_send_irq(struct nm_kctx *); void nm_os_kctx_worker_setaff(struct nm_kctx *, int); u_int nm_os_ncpus(void); -#ifdef WITH_PTNETMAP_HOST +int netmap_sync_kloop(struct netmap_priv_d *priv, + struct nmreq_header *hdr); +int netmap_sync_kloop_stop(struct netmap_priv_d *priv); + +#ifdef WITH_PTNETMAP +/* ptnetmap guest routines */ + /* - * netmap adapter for host ptnetmap ports + * ptnetmap_memdev routines used to talk with ptnetmap_memdev device driver */ -struct netmap_pt_host_adapter { - struct netmap_adapter up; +struct ptnetmap_memdev; +int nm_os_pt_memdev_iomap(struct ptnetmap_memdev *, vm_paddr_t *, void **, + uint64_t *); +void nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *); +uint32_t nm_os_pt_memdev_ioread(struct ptnetmap_memdev *, unsigned int); - /* the passed-through adapter */ - struct netmap_adapter *parent; - /* parent->na_flags, saved at NETMAP_PT_HOST_CREATE time, - * and restored at NETMAP_PT_HOST_DELETE time */ - uint32_t parent_na_flags; - - int (*parent_nm_notify)(struct netmap_kring *kring, int flags); - void *ptns; -}; - -/* ptnetmap host-side routines */ -int netmap_get_pt_host_na(struct nmreq_header *hdr, struct netmap_adapter **na, - struct netmap_mem_d * nmd, int create); -int ptnetmap_ctl(const char *nr_name, int create, struct netmap_adapter *na); - -static inline int -nm_ptnetmap_host_on(struct netmap_adapter *na) -{ - return na && na->na_flags & NAF_PTNETMAP_HOST; -} -#else /* !WITH_PTNETMAP_HOST */ -#define netmap_get_pt_host_na(hdr, _2, _3, _4) \ - (((struct nmreq_register *)(uintptr_t)hdr->nr_body)->nr_flags & (NR_PTNETMAP_HOST) ? EOPNOTSUPP : 0) -#define ptnetmap_ctl(_1, _2, _3) EINVAL -#define nm_ptnetmap_host_on(_1) EINVAL -#endif /* !WITH_PTNETMAP_HOST */ - -#ifdef WITH_PTNETMAP_GUEST -/* ptnetmap GUEST routines */ - /* * netmap adapter for guest ptnetmap ports */ @@ -2218,27 +2228,84 @@ * network stack and netmap clients. * Used to decide when we need (de)allocate krings/rings and * start (stop) ptnetmap kthreads. */ - int backend_regifs; + int backend_users; }; int netmap_pt_guest_attach(struct netmap_adapter *na, unsigned int nifp_offset, unsigned int memid); -struct ptnet_csb_gh; -struct ptnet_csb_hg; -bool netmap_pt_guest_txsync(struct ptnet_csb_gh *ptgh, - struct ptnet_csb_hg *pthg, - struct netmap_kring *kring, - int flags); -bool netmap_pt_guest_rxsync(struct ptnet_csb_gh *ptgh, - struct ptnet_csb_hg *pthg, +bool netmap_pt_guest_txsync(struct nm_csb_atok *atok, + struct nm_csb_ktoa *ktoa, struct netmap_kring *kring, int flags); +bool netmap_pt_guest_rxsync(struct nm_csb_atok *atok, + struct nm_csb_ktoa *ktoa, + struct netmap_kring *kring, int flags); int ptnet_nm_krings_create(struct netmap_adapter *na); void ptnet_nm_krings_delete(struct netmap_adapter *na); void ptnet_nm_dtor(struct netmap_adapter *na); -#endif /* WITH_PTNETMAP_GUEST */ +/* Guest driver: Write kring pointers (cur, head) to the CSB. + * This routine is coupled with ptnetmap_host_read_kring_csb(). */ +static inline void +ptnetmap_guest_write_kring_csb(struct nm_csb_atok *atok, uint32_t cur, + uint32_t head) +{ + /* + * We need to write cur and head to the CSB but we cannot do it atomically. + * There is no way we can prevent the host from reading the updated value + * of one of the two and the old value of the other. However, if we make + * sure that the host never reads a value of head more recent than the + * value of cur we are safe. We can allow the host to read a value of cur + * more recent than the value of head, since in the netmap ring cur can be + * ahead of head and cur cannot wrap around head because it must be behind + * tail. Inverting the order of writes below could instead result into the + * host to think head went ahead of cur, which would cause the sync + * prologue to fail. + * + * The following memory barrier scheme is used to make this happen: + * + * Guest Host + * + * STORE(cur) LOAD(head) + * mb() <-----------> mb() + * STORE(head) LOAD(cur) + */ + atok->cur = cur; + nm_stst_barrier(); + atok->head = head; +} + +/* Guest driver: Read kring pointers (hwcur, hwtail) from the CSB. + * This routine is coupled with ptnetmap_host_write_kring_csb(). */ +static inline void +ptnetmap_guest_read_kring_csb(struct nm_csb_ktoa *ktoa, + struct netmap_kring *kring) +{ + /* + * We place a memory barrier to make sure that the update of hwtail never + * overtakes the update of hwcur. + * (see explanation in ptnetmap_host_write_kring_csb). + */ + kring->nr_hwtail = ktoa->hwtail; + nm_stst_barrier(); + kring->nr_hwcur = ktoa->hwcur; +} + +/* Helper function wrapping ptnetmap_guest_read_kring_csb(). */ +static inline void +ptnet_sync_tail(struct nm_csb_ktoa *ktoa, struct netmap_kring *kring) +{ + struct netmap_ring *ring = kring->ring; + + /* Update hwcur and hwtail as known by the host. */ + ptnetmap_guest_read_kring_csb(ktoa, kring); + + /* nm_sync_finalize */ + ring->tail = kring->rtail = kring->nr_hwtail; +} +#endif /* WITH_PTNETMAP */ + #ifdef __FreeBSD__ /* * FreeBSD mbuf allocator/deallocator in emulation mode: @@ -2354,5 +2421,17 @@ struct nmreq_option * nmreq_findoption(struct nmreq_option *, uint16_t); int nmreq_checkduplicate(struct nmreq_option *); + +int netmap_init_bridges(void); +void netmap_uninit_bridges(void); + +/* Functions to read and write CSB fields from the kernel. */ +#if defined (linux) +#define CSB_READ(csb, field, r) (get_user(r, &csb->field)) +#define CSB_WRITE(csb, field, v) (put_user(v, &csb->field)) +#else /* ! linux */ +#define CSB_READ(csb, field, r) (r = fuword32(&csb->field)) +#define CSB_WRITE(csb, field, v) (suword32(&csb->field, v)) +#endif /* ! linux */ #endif /* _NET_NETMAP_KERN_H_ */ Index: head/sys/dev/netmap/netmap_kloop.c =================================================================== --- head/sys/dev/netmap/netmap_kloop.c +++ head/sys/dev/netmap/netmap_kloop.c @@ -0,0 +1,916 @@ +/* + * Copyright (C) 2016-2018 Vincenzo Maffione + * Copyright (C) 2015 Stefano Garzarella + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +/* + * common headers + */ +#if defined(__FreeBSD__) +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define usleep_range(_1, _2) \ + pause_sbt("sync-kloop-sleep", SBT_1US * _1, SBT_1US * 1, C_ABSOLUTE) + +#elif defined(linux) +#include +#include +#include +#endif + +#include +#include +#include +#include + +/* Support for eventfd-based notifications. */ +#if defined(linux) +#define SYNC_KLOOP_POLL +#endif + +/* Write kring pointers (hwcur, hwtail) to the CSB. + * This routine is coupled with ptnetmap_guest_read_kring_csb(). */ +static inline void +sync_kloop_kernel_write(struct nm_csb_ktoa __user *ptr, uint32_t hwcur, + uint32_t hwtail) +{ + /* + * The same scheme used in ptnetmap_guest_write_kring_csb() applies here. + * We allow the application to read a value of hwcur more recent than the value + * of hwtail, since this would anyway result in a consistent view of the + * ring state (and hwcur can never wraparound hwtail, since hwcur must be + * behind head). + * + * The following memory barrier scheme is used to make this happen: + * + * Application Kernel + * + * STORE(hwcur) LOAD(hwtail) + * mb() <-------------> mb() + * STORE(hwtail) LOAD(hwcur) + */ + CSB_WRITE(ptr, hwcur, hwcur); + nm_stst_barrier(); + CSB_WRITE(ptr, hwtail, hwtail); +} + +/* Read kring pointers (head, cur, sync_flags) from the CSB. + * This routine is coupled with ptnetmap_guest_write_kring_csb(). */ +static inline void +sync_kloop_kernel_read(struct nm_csb_atok __user *ptr, + struct netmap_ring *shadow_ring, + uint32_t num_slots) +{ + /* + * We place a memory barrier to make sure that the update of head never + * overtakes the update of cur. + * (see explanation in ptnetmap_guest_write_kring_csb). + */ + CSB_READ(ptr, head, shadow_ring->head); + nm_stst_barrier(); + CSB_READ(ptr, cur, shadow_ring->cur); + CSB_READ(ptr, sync_flags, shadow_ring->flags); +} + +/* Enable or disable application --> kernel kicks. */ +static inline void +csb_ktoa_kick_enable(struct nm_csb_ktoa __user *csb_ktoa, uint32_t val) +{ + CSB_WRITE(csb_ktoa, kern_need_kick, val); +} + +/* Are application interrupt enabled or disabled? */ +static inline uint32_t +csb_atok_intr_enabled(struct nm_csb_atok __user *csb_atok) +{ + uint32_t v; + + CSB_READ(csb_atok, appl_need_kick, v); + + return v; +} + +static inline void +sync_kloop_kring_dump(const char *title, const struct netmap_kring *kring) +{ + nm_prinf("%s - name: %s hwcur: %d hwtail: %d " + "rhead: %d rcur: %d rtail: %d", + title, kring->name, kring->nr_hwcur, kring->nr_hwtail, + kring->rhead, kring->rcur, kring->rtail); +} + +struct sync_kloop_ring_args { + struct netmap_kring *kring; + struct nm_csb_atok *csb_atok; + struct nm_csb_ktoa *csb_ktoa; +#ifdef SYNC_KLOOP_POLL + struct eventfd_ctx *irq_ctx; +#endif /* SYNC_KLOOP_POLL */ +}; + +static void +netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a) +{ + struct netmap_kring *kring = a->kring; + struct nm_csb_atok *csb_atok = a->csb_atok; + struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa; + struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */ + bool more_txspace = false; + uint32_t num_slots; + int batch; + + num_slots = kring->nkr_num_slots; + + /* Disable application --> kernel notifications. */ + csb_ktoa_kick_enable(csb_ktoa, 0); + /* Copy the application kring pointers from the CSB */ + sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); + + for (;;) { + batch = shadow_ring.head - kring->nr_hwcur; + if (batch < 0) + batch += num_slots; + +#ifdef PTN_TX_BATCH_LIM + if (batch > PTN_TX_BATCH_LIM(num_slots)) { + /* If application moves ahead too fast, let's cut the move so + * that we don't exceed our batch limit. */ + uint32_t head_lim = kring->nr_hwcur + PTN_TX_BATCH_LIM(num_slots); + + if (head_lim >= num_slots) + head_lim -= num_slots; + nm_prdis(1, "batch: %d head: %d head_lim: %d", batch, shadow_ring.head, + head_lim); + shadow_ring.head = head_lim; + batch = PTN_TX_BATCH_LIM(num_slots); + } +#endif /* PTN_TX_BATCH_LIM */ + + if (nm_kr_txspace(kring) <= (num_slots >> 1)) { + shadow_ring.flags |= NAF_FORCE_RECLAIM; + } + + /* Netmap prologue */ + shadow_ring.tail = kring->rtail; + if (unlikely(nm_txsync_prologue(kring, &shadow_ring) >= num_slots)) { + /* Reinit ring and enable notifications. */ + netmap_ring_reinit(kring); + csb_ktoa_kick_enable(csb_ktoa, 1); + break; + } + + if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) { + sync_kloop_kring_dump("pre txsync", kring); + } + + if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) { + /* Reenable notifications. */ + csb_ktoa_kick_enable(csb_ktoa, 1); + nm_prerr("txsync() failed"); + break; + } + + /* + * Finalize + * Copy kernel hwcur and hwtail into the CSB for the application sync(), and + * do the nm_sync_finalize. + */ + sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur, + kring->nr_hwtail); + if (kring->rtail != kring->nr_hwtail) { + /* Some more room available in the parent adapter. */ + kring->rtail = kring->nr_hwtail; + more_txspace = true; + } + + if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) { + sync_kloop_kring_dump("post txsync", kring); + } + + /* Interrupt the application if needed. */ +#ifdef SYNC_KLOOP_POLL + if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) { + /* Disable application kick to avoid sending unnecessary kicks */ + eventfd_signal(a->irq_ctx, 1); + more_txspace = false; + } +#endif /* SYNC_KLOOP_POLL */ + + /* Read CSB to see if there is more work to do. */ + sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); + if (shadow_ring.head == kring->rhead) { + /* + * No more packets to transmit. We enable notifications and + * go to sleep, waiting for a kick from the application when new + * new slots are ready for transmission. + */ + /* Reenable notifications. */ + csb_ktoa_kick_enable(csb_ktoa, 1); + /* Doublecheck. */ + sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); + if (shadow_ring.head != kring->rhead) { + /* We won the race condition, there are more packets to + * transmit. Disable notifications and do another cycle */ + csb_ktoa_kick_enable(csb_ktoa, 0); + continue; + } + break; + } + + if (nm_kr_txempty(kring)) { + /* No more available TX slots. We stop waiting for a notification + * from the backend (netmap_tx_irq). */ + nm_prdis(1, "TX ring"); + break; + } + } + +#ifdef SYNC_KLOOP_POLL + if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) { + eventfd_signal(a->irq_ctx, 1); + } +#endif /* SYNC_KLOOP_POLL */ +} + +/* RX cycle without receive any packets */ +#define SYNC_LOOP_RX_DRY_CYCLES_MAX 2 + +static inline int +sync_kloop_norxslots(struct netmap_kring *kring, uint32_t g_head) +{ + return (NM_ACCESS_ONCE(kring->nr_hwtail) == nm_prev(g_head, + kring->nkr_num_slots - 1)); +} + +static void +netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a) +{ + + struct netmap_kring *kring = a->kring; + struct nm_csb_atok *csb_atok = a->csb_atok; + struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa; + struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */ + int dry_cycles = 0; + bool some_recvd = false; + uint32_t num_slots; + + num_slots = kring->nkr_num_slots; + + /* Get RX csb_atok and csb_ktoa pointers from the CSB. */ + num_slots = kring->nkr_num_slots; + + /* Disable notifications. */ + csb_ktoa_kick_enable(csb_ktoa, 0); + /* Copy the application kring pointers from the CSB */ + sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); + + for (;;) { + uint32_t hwtail; + + /* Netmap prologue */ + shadow_ring.tail = kring->rtail; + if (unlikely(nm_rxsync_prologue(kring, &shadow_ring) >= num_slots)) { + /* Reinit ring and enable notifications. */ + netmap_ring_reinit(kring); + csb_ktoa_kick_enable(csb_ktoa, 1); + break; + } + + if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) { + sync_kloop_kring_dump("pre rxsync", kring); + } + + if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) { + /* Reenable notifications. */ + csb_ktoa_kick_enable(csb_ktoa, 1); + nm_prerr("rxsync() failed"); + break; + } + + /* + * Finalize + * Copy kernel hwcur and hwtail into the CSB for the application sync() + */ + hwtail = NM_ACCESS_ONCE(kring->nr_hwtail); + sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur, hwtail); + if (kring->rtail != hwtail) { + kring->rtail = hwtail; + some_recvd = true; + dry_cycles = 0; + } else { + dry_cycles++; + } + + if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) { + sync_kloop_kring_dump("post rxsync", kring); + } + +#ifdef SYNC_KLOOP_POLL + /* Interrupt the application if needed. */ + if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) { + /* Disable application kick to avoid sending unnecessary kicks */ + eventfd_signal(a->irq_ctx, 1); + some_recvd = false; + } +#endif /* SYNC_KLOOP_POLL */ + + /* Read CSB to see if there is more work to do. */ + sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); + if (sync_kloop_norxslots(kring, shadow_ring.head)) { + /* + * No more slots available for reception. We enable notification and + * go to sleep, waiting for a kick from the application when new receive + * slots are available. + */ + /* Reenable notifications. */ + csb_ktoa_kick_enable(csb_ktoa, 1); + /* Doublecheck. */ + sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); + if (!sync_kloop_norxslots(kring, shadow_ring.head)) { + /* We won the race condition, more slots are available. Disable + * notifications and do another cycle. */ + csb_ktoa_kick_enable(csb_ktoa, 0); + continue; + } + break; + } + + hwtail = NM_ACCESS_ONCE(kring->nr_hwtail); + if (unlikely(hwtail == kring->rhead || + dry_cycles >= SYNC_LOOP_RX_DRY_CYCLES_MAX)) { + /* No more packets to be read from the backend. We stop and + * wait for a notification from the backend (netmap_rx_irq). */ + nm_prdis(1, "nr_hwtail: %d rhead: %d dry_cycles: %d", + hwtail, kring->rhead, dry_cycles); + break; + } + } + + nm_kr_put(kring); + +#ifdef SYNC_KLOOP_POLL + /* Interrupt the application if needed. */ + if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) { + eventfd_signal(a->irq_ctx, 1); + } +#endif /* SYNC_KLOOP_POLL */ +} + +#ifdef SYNC_KLOOP_POLL +struct sync_kloop_poll_entry { + /* Support for receiving notifications from + * a netmap ring or from the application. */ + struct file *filp; + wait_queue_t wait; + wait_queue_head_t *wqh; + + /* Support for sending notifications to the application. */ + struct eventfd_ctx *irq_ctx; + struct file *irq_filp; +}; + +struct sync_kloop_poll_ctx { + poll_table wait_table; + unsigned int next_entry; + unsigned int num_entries; + struct sync_kloop_poll_entry entries[0]; +}; + +static void +sync_kloop_poll_table_queue_proc(struct file *file, wait_queue_head_t *wqh, + poll_table *pt) +{ + struct sync_kloop_poll_ctx *poll_ctx = + container_of(pt, struct sync_kloop_poll_ctx, wait_table); + struct sync_kloop_poll_entry *entry = poll_ctx->entries + + poll_ctx->next_entry; + + BUG_ON(poll_ctx->next_entry >= poll_ctx->num_entries); + entry->wqh = wqh; + entry->filp = file; + /* Use the default wake up function. */ + init_waitqueue_entry(&entry->wait, current); + add_wait_queue(wqh, &entry->wait); + poll_ctx->next_entry++; +} +#endif /* SYNC_KLOOP_POLL */ + +int +netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr) +{ + struct nmreq_sync_kloop_start *req = + (struct nmreq_sync_kloop_start *)(uintptr_t)hdr->nr_body; + struct nmreq_opt_sync_kloop_eventfds *eventfds_opt = NULL; +#ifdef SYNC_KLOOP_POLL + struct sync_kloop_poll_ctx *poll_ctx = NULL; +#endif /* SYNC_KLOOP_POLL */ + int num_rx_rings, num_tx_rings, num_rings; + uint32_t sleep_us = req->sleep_us; + struct nm_csb_atok* csb_atok_base; + struct nm_csb_ktoa* csb_ktoa_base; + struct netmap_adapter *na; + struct nmreq_option *opt; + int err = 0; + int i; + + if (sleep_us > 1000000) { + /* We do not accept sleeping for more than a second. */ + return EINVAL; + } + + if (priv->np_nifp == NULL) { + return ENXIO; + } + mb(); /* make sure following reads are not from cache */ + + na = priv->np_na; + if (!nm_netmap_on(na)) { + return ENXIO; + } + + NMG_LOCK(); + /* Make sure the application is working in CSB mode. */ + if (!priv->np_csb_atok_base || !priv->np_csb_ktoa_base) { + NMG_UNLOCK(); + nm_prerr("sync-kloop on %s requires " + "NETMAP_REQ_OPT_CSB option", na->name); + return EINVAL; + } + + csb_atok_base = priv->np_csb_atok_base; + csb_ktoa_base = priv->np_csb_ktoa_base; + + /* Make sure that no kloop is currently running. */ + if (priv->np_kloop_state & NM_SYNC_KLOOP_RUNNING) { + err = EBUSY; + } + priv->np_kloop_state |= NM_SYNC_KLOOP_RUNNING; + NMG_UNLOCK(); + if (err) { + return err; + } + + num_rx_rings = priv->np_qlast[NR_RX] - priv->np_qfirst[NR_RX]; + num_tx_rings = priv->np_qlast[NR_TX] - priv->np_qfirst[NR_TX]; + num_rings = num_tx_rings + num_rx_rings; + + /* Validate notification options. */ + opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options, + NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS); + if (opt != NULL) { + err = nmreq_checkduplicate(opt); + if (err) { + opt->nro_status = err; + goto out; + } + if (opt->nro_size != sizeof(*eventfds_opt) + + sizeof(eventfds_opt->eventfds[0]) * num_rings) { + /* Option size not consistent with the number of + * entries. */ + opt->nro_status = err = EINVAL; + goto out; + } +#ifdef SYNC_KLOOP_POLL + eventfds_opt = (struct nmreq_opt_sync_kloop_eventfds *)opt; + opt->nro_status = 0; + /* We need 2 poll entries for TX and RX notifications coming + * from the netmap adapter, plus one entries per ring for the + * notifications coming from the application. */ + poll_ctx = nm_os_malloc(sizeof(*poll_ctx) + + (2 + num_rings) * sizeof(poll_ctx->entries[0])); + init_poll_funcptr(&poll_ctx->wait_table, + sync_kloop_poll_table_queue_proc); + poll_ctx->num_entries = 2 + num_rings; + poll_ctx->next_entry = 0; + /* Poll for notifications coming from the applications through + * eventfds . */ + for (i = 0; i < num_rings; i++) { + struct eventfd_ctx *irq; + struct file *filp; + unsigned long mask; + + filp = eventfd_fget(eventfds_opt->eventfds[i].ioeventfd); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + goto out; + } + mask = filp->f_op->poll(filp, &poll_ctx->wait_table); + if (mask & POLLERR) { + err = EINVAL; + goto out; + } + + filp = eventfd_fget(eventfds_opt->eventfds[i].irqfd); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + goto out; + } + poll_ctx->entries[i].irq_filp = filp; + irq = eventfd_ctx_fileget(filp); + if (IS_ERR(irq)) { + err = PTR_ERR(irq); + goto out; + } + poll_ctx->entries[i].irq_ctx = irq; + } + /* Poll for notifications coming from the netmap rings bound to + * this file descriptor. */ + { + NM_SELINFO_T *si[NR_TXRX]; + + NMG_LOCK(); + si[NR_RX] = nm_si_user(priv, NR_RX) ? &na->si[NR_RX] : + &na->rx_rings[priv->np_qfirst[NR_RX]]->si; + si[NR_TX] = nm_si_user(priv, NR_TX) ? &na->si[NR_TX] : + &na->tx_rings[priv->np_qfirst[NR_TX]]->si; + NMG_UNLOCK(); + poll_wait(priv->np_filp, si[NR_RX], &poll_ctx->wait_table); + poll_wait(priv->np_filp, si[NR_TX], &poll_ctx->wait_table); + } +#else /* SYNC_KLOOP_POLL */ + opt->nro_status = EOPNOTSUPP; + goto out; +#endif /* SYNC_KLOOP_POLL */ + } + + /* Main loop. */ + for (;;) { + if (unlikely(NM_ACCESS_ONCE(priv->np_kloop_state) & NM_SYNC_KLOOP_STOPPING)) { + break; + } + +#ifdef SYNC_KLOOP_POLL + if (poll_ctx) + __set_current_state(TASK_INTERRUPTIBLE); +#endif /* SYNC_KLOOP_POLL */ + + /* Process all the TX rings bound to this file descriptor. */ + for (i = 0; i < num_tx_rings; i++) { + struct sync_kloop_ring_args a = { + .kring = NMR(na, NR_TX)[i + priv->np_qfirst[NR_TX]], + .csb_atok = csb_atok_base + i, + .csb_ktoa = csb_ktoa_base + i, + }; + +#ifdef SYNC_KLOOP_POLL + if (poll_ctx) + a.irq_ctx = poll_ctx->entries[i].irq_ctx; +#endif /* SYNC_KLOOP_POLL */ + if (unlikely(nm_kr_tryget(a.kring, 1, NULL))) { + continue; + } + netmap_sync_kloop_tx_ring(&a); + nm_kr_put(a.kring); + } + + /* Process all the RX rings bound to this file descriptor. */ + for (i = 0; i < num_rx_rings; i++) { + struct sync_kloop_ring_args a = { + .kring = NMR(na, NR_RX)[i + priv->np_qfirst[NR_RX]], + .csb_atok = csb_atok_base + num_tx_rings + i, + .csb_ktoa = csb_ktoa_base + num_tx_rings + i, + }; + +#ifdef SYNC_KLOOP_POLL + if (poll_ctx) + a.irq_ctx = poll_ctx->entries[num_tx_rings + i].irq_ctx; +#endif /* SYNC_KLOOP_POLL */ + + if (unlikely(nm_kr_tryget(a.kring, 1, NULL))) { + continue; + } + netmap_sync_kloop_rx_ring(&a); + nm_kr_put(a.kring); + } + +#ifdef SYNC_KLOOP_POLL + if (poll_ctx) { + /* If a poll context is present, yield to the scheduler + * waiting for a notification to come either from + * netmap or the application. */ + schedule_timeout_interruptible(msecs_to_jiffies(1000)); + } else +#endif /* SYNC_KLOOP_POLL */ + { + /* Default synchronization method: sleep for a while. */ + usleep_range(sleep_us, sleep_us); + } + } +out: +#ifdef SYNC_KLOOP_POLL + if (poll_ctx) { + /* Stop polling from netmap and the eventfds, and deallocate + * the poll context. */ + __set_current_state(TASK_RUNNING); + for (i = 0; i < poll_ctx->next_entry; i++) { + struct sync_kloop_poll_entry *entry = + poll_ctx->entries + i; + + if (entry->wqh) + remove_wait_queue(entry->wqh, &entry->wait); + /* We did not get a reference to the eventfds, but + * don't do that on netmap file descriptors (since + * a reference was not taken. */ + if (entry->filp && entry->filp != priv->np_filp) + fput(entry->filp); + if (entry->irq_ctx) + eventfd_ctx_put(entry->irq_ctx); + if (entry->irq_filp) + fput(entry->irq_filp); + } + nm_os_free(poll_ctx); + poll_ctx = NULL; + } +#endif /* SYNC_KLOOP_POLL */ + + /* Reset the kloop state. */ + NMG_LOCK(); + priv->np_kloop_state = 0; + NMG_UNLOCK(); + + return err; +} + +int +netmap_sync_kloop_stop(struct netmap_priv_d *priv) +{ + bool running = true; + int err = 0; + + NMG_LOCK(); + priv->np_kloop_state |= NM_SYNC_KLOOP_STOPPING; + NMG_UNLOCK(); + while (running) { + usleep_range(1000, 1500); + NMG_LOCK(); + running = (NM_ACCESS_ONCE(priv->np_kloop_state) + & NM_SYNC_KLOOP_RUNNING); + NMG_UNLOCK(); + } + + return err; +} + +#ifdef WITH_PTNETMAP +/* + * Guest ptnetmap txsync()/rxsync() routines, used in ptnet device drivers. + * These routines are reused across the different operating systems supported + * by netmap. + */ + +/* + * Reconcile host and guest views of the transmit ring. + * + * Guest user wants to transmit packets up to the one before ring->head, + * and guest kernel knows tx_ring->hwcur is the first packet unsent + * by the host kernel. + * + * We push out as many packets as possible, and possibly + * reclaim buffers from previously completed transmission. + * + * Notifications from the host are enabled only if the user guest would + * block (no space in the ring). + */ +bool +netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, + struct netmap_kring *kring, int flags) +{ + bool notify = false; + + /* Disable notifications */ + atok->appl_need_kick = 0; + + /* + * First part: tell the host (updating the CSB) to process the new + * packets. + */ + kring->nr_hwcur = ktoa->hwcur; + ptnetmap_guest_write_kring_csb(atok, kring->rcur, kring->rhead); + + /* Ask for a kick from a guest to the host if needed. */ + if (((kring->rhead != kring->nr_hwcur || nm_kr_txempty(kring)) + && NM_ACCESS_ONCE(ktoa->kern_need_kick)) || + (flags & NAF_FORCE_RECLAIM)) { + atok->sync_flags = flags; + notify = true; + } + + /* + * Second part: reclaim buffers for completed transmissions. + */ + if (nm_kr_txempty(kring) || (flags & NAF_FORCE_RECLAIM)) { + ptnetmap_guest_read_kring_csb(ktoa, kring); + } + + /* + * No more room in the ring for new transmissions. The user thread will + * go to sleep and we need to be notified by the host when more free + * space is available. + */ + if (nm_kr_txempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) { + /* Reenable notifications. */ + atok->appl_need_kick = 1; + /* Double check */ + ptnetmap_guest_read_kring_csb(ktoa, kring); + /* If there is new free space, disable notifications */ + if (unlikely(!nm_kr_txempty(kring))) { + atok->appl_need_kick = 0; + } + } + + nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)", + kring->name, atok->head, atok->cur, ktoa->hwtail, + kring->rhead, kring->rcur, kring->nr_hwtail); + + return notify; +} + +/* + * Reconcile host and guest view of the receive ring. + * + * Update hwcur/hwtail from host (reading from CSB). + * + * If guest user has released buffers up to the one before ring->head, we + * also give them to the host. + * + * Notifications from the host are enabled only if the user guest would + * block (no more completed slots in the ring). + */ +bool +netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, + struct netmap_kring *kring, int flags) +{ + bool notify = false; + + /* Disable notifications */ + atok->appl_need_kick = 0; + + /* + * First part: import newly received packets, by updating the kring + * hwtail to the hwtail known from the host (read from the CSB). + * This also updates the kring hwcur. + */ + ptnetmap_guest_read_kring_csb(ktoa, kring); + kring->nr_kflags &= ~NKR_PENDINTR; + + /* + * Second part: tell the host about the slots that guest user has + * released, by updating cur and head in the CSB. + */ + if (kring->rhead != kring->nr_hwcur) { + ptnetmap_guest_write_kring_csb(atok, kring->rcur, + kring->rhead); + /* Ask for a kick from the guest to the host if needed. */ + if (NM_ACCESS_ONCE(ktoa->kern_need_kick)) { + atok->sync_flags = flags; + notify = true; + } + } + + /* + * No more completed RX slots. The user thread will go to sleep and + * we need to be notified by the host when more RX slots have been + * completed. + */ + if (nm_kr_rxempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) { + /* Reenable notifications. */ + atok->appl_need_kick = 1; + /* Double check */ + ptnetmap_guest_read_kring_csb(ktoa, kring); + /* If there are new slots, disable notifications. */ + if (!nm_kr_rxempty(kring)) { + atok->appl_need_kick = 0; + } + } + + nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)", + kring->name, atok->head, atok->cur, ktoa->hwtail, + kring->rhead, kring->rcur, kring->nr_hwtail); + + return notify; +} + +/* + * Callbacks for ptnet drivers: nm_krings_create, nm_krings_delete, nm_dtor. + */ +int +ptnet_nm_krings_create(struct netmap_adapter *na) +{ + struct netmap_pt_guest_adapter *ptna = + (struct netmap_pt_guest_adapter *)na; /* Upcast. */ + struct netmap_adapter *na_nm = &ptna->hwup.up; + struct netmap_adapter *na_dr = &ptna->dr.up; + int ret; + + if (ptna->backend_users) { + return 0; + } + + /* Create krings on the public netmap adapter. */ + ret = netmap_hw_krings_create(na_nm); + if (ret) { + return ret; + } + + /* Copy krings into the netmap adapter private to the driver. */ + na_dr->tx_rings = na_nm->tx_rings; + na_dr->rx_rings = na_nm->rx_rings; + + return 0; +} + +void +ptnet_nm_krings_delete(struct netmap_adapter *na) +{ + struct netmap_pt_guest_adapter *ptna = + (struct netmap_pt_guest_adapter *)na; /* Upcast. */ + struct netmap_adapter *na_nm = &ptna->hwup.up; + struct netmap_adapter *na_dr = &ptna->dr.up; + + if (ptna->backend_users) { + return; + } + + na_dr->tx_rings = NULL; + na_dr->rx_rings = NULL; + + netmap_hw_krings_delete(na_nm); +} + +void +ptnet_nm_dtor(struct netmap_adapter *na) +{ + struct netmap_pt_guest_adapter *ptna = + (struct netmap_pt_guest_adapter *)na; + + netmap_mem_put(ptna->dr.up.nm_mem); + memset(&ptna->dr, 0, sizeof(ptna->dr)); + netmap_mem_pt_guest_ifp_del(na->nm_mem, na->ifp); +} + +int +netmap_pt_guest_attach(struct netmap_adapter *arg, + unsigned int nifp_offset, unsigned int memid) +{ + struct netmap_pt_guest_adapter *ptna; + struct ifnet *ifp = arg ? arg->ifp : NULL; + int error; + + /* get allocator */ + arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, memid); + if (arg->nm_mem == NULL) + return ENOMEM; + arg->na_flags |= NAF_MEM_OWNER; + error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter), 1); + if (error) + return error; + + /* get the netmap_pt_guest_adapter */ + ptna = (struct netmap_pt_guest_adapter *) NA(ifp); + + /* Initialize a separate pass-through netmap adapter that is going to + * be used by the ptnet driver only, and so never exposed to netmap + * applications. We only need a subset of the available fields. */ + memset(&ptna->dr, 0, sizeof(ptna->dr)); + ptna->dr.up.ifp = ifp; + ptna->dr.up.nm_mem = netmap_mem_get(ptna->hwup.up.nm_mem); + ptna->dr.up.nm_config = ptna->hwup.up.nm_config; + + ptna->backend_users = 0; + + return 0; +} + +#endif /* WITH_PTNETMAP */ Index: head/sys/dev/netmap/netmap_legacy.c =================================================================== --- head/sys/dev/netmap/netmap_legacy.c +++ head/sys/dev/netmap/netmap_legacy.c @@ -56,6 +56,7 @@ */ #include #include +#include static int nmreq_register_from_legacy(struct nmreq *nmr, struct nmreq_header *hdr, @@ -80,10 +81,11 @@ } else { regmode = NR_REG_ALL_NIC; } - nmr->nr_flags = regmode | - (nmr->nr_flags & (~NR_REG_MASK)); + req->nr_mode = regmode; + } else { + req->nr_mode = nmr->nr_flags & NR_REG_MASK; } - req->nr_mode = nmr->nr_flags & NR_REG_MASK; + /* Fix nr_name, nr_mode and nr_ringid to handle pipe requests. */ if (req->nr_mode == NR_REG_PIPE_MASTER || req->nr_mode == NR_REG_PIPE_SLAVE) { @@ -131,7 +133,7 @@ /* First prepare the request header. */ hdr->nr_version = NETMAP_API; /* new API */ - strncpy(hdr->nr_name, nmr->nr_name, sizeof(nmr->nr_name)); + strlcpy(hdr->nr_name, nmr->nr_name, sizeof(nmr->nr_name)); hdr->nr_options = (uintptr_t)NULL; hdr->nr_body = (uintptr_t)NULL; @@ -221,7 +223,7 @@ } case NETMAP_PT_HOST_CREATE: case NETMAP_PT_HOST_DELETE: { - D("Netmap passthrough not supported yet"); + nm_prerr("Netmap passthrough not supported yet"); return NULL; break; } @@ -242,7 +244,6 @@ if (!req) { goto oom; } hdr->nr_body = (uintptr_t)req; hdr->nr_reqtype = NETMAP_REQ_PORT_INFO_GET; - req->nr_offset = nmr->nr_offset; req->nr_memsize = nmr->nr_memsize; req->nr_tx_slots = nmr->nr_tx_slots; req->nr_rx_slots = nmr->nr_rx_slots; @@ -262,7 +263,7 @@ } nm_os_free(hdr); } - D("Failed to allocate memory for nmreq_xyz struct"); + nm_prerr("Failed to allocate memory for nmreq_xyz struct"); return NULL; } @@ -300,7 +301,6 @@ case NETMAP_REQ_PORT_INFO_GET: { struct nmreq_port_info_get *req = (struct nmreq_port_info_get *)(uintptr_t)hdr->nr_body; - nmr->nr_offset = req->nr_offset; nmr->nr_memsize = req->nr_memsize; nmr->nr_tx_slots = req->nr_tx_slots; nmr->nr_rx_slots = req->nr_rx_slots; @@ -321,7 +321,7 @@ case NETMAP_REQ_VALE_LIST: { struct nmreq_vale_list *req = (struct nmreq_vale_list *)(uintptr_t)hdr->nr_body; - strncpy(nmr->nr_name, hdr->nr_name, sizeof(nmr->nr_name)); + strlcpy(nmr->nr_name, hdr->nr_name, sizeof(nmr->nr_name)); nmr->nr_arg1 = req->nr_bridge_idx; nmr->nr_arg2 = req->nr_port_idx; break; Index: head/sys/dev/netmap/netmap_mem2.h =================================================================== --- head/sys/dev/netmap/netmap_mem2.h +++ head/sys/dev/netmap/netmap_mem2.h @@ -158,14 +158,14 @@ ({ int *perr = _perr; if (perr) *(perr) = EOPNOTSUPP; NULL; }) #endif /* WITH_EXTMEM */ -#ifdef WITH_PTNETMAP_GUEST +#ifdef WITH_PTNETMAP struct netmap_mem_d* netmap_mem_pt_guest_new(struct ifnet *, unsigned int nifp_offset, unsigned int memid); struct ptnetmap_memdev; struct netmap_mem_d* netmap_mem_pt_guest_attach(struct ptnetmap_memdev *, uint16_t); int netmap_mem_pt_guest_ifp_del(struct netmap_mem_d *, struct ifnet *); -#endif /* WITH_PTNETMAP_GUEST */ +#endif /* WITH_PTNETMAP */ int netmap_mem_pools_info_get(struct nmreq_pools_info *, struct netmap_mem_d *); Index: head/sys/dev/netmap/netmap_mem2.c =================================================================== --- head/sys/dev/netmap/netmap_mem2.c +++ head/sys/dev/netmap/netmap_mem2.c @@ -318,7 +318,7 @@ #ifdef NM_DEBUG_MEM_PUTGET #define NM_DBG_REFC(nmd, func, line) \ - nm_prinf("%s:%d mem[%d] -> %d\n", func, line, (nmd)->nm_id, (nmd)->refcount); + nm_prinf("%d mem[%d] -> %d", line, (nmd)->nm_id, (nmd)->refcount); #else #define NM_DBG_REFC(nmd, func, line) #endif @@ -397,15 +397,15 @@ if (p->bitmap == NULL) { /* Allocate the bitmap */ n = (p->objtotal + 31) / 32; - p->bitmap = nm_os_malloc(sizeof(uint32_t) * n); + p->bitmap = nm_os_malloc(sizeof(p->bitmap[0]) * n); if (p->bitmap == NULL) { - D("Unable to create bitmap (%d entries) for allocator '%s'", (int)n, + nm_prerr("Unable to create bitmap (%d entries) for allocator '%s'", (int)n, p->name); return ENOMEM; } p->bitmap_slots = n; } else { - memset(p->bitmap, 0, p->bitmap_slots); + memset(p->bitmap, 0, p->bitmap_slots * sizeof(p->bitmap[0])); } p->objfree = 0; @@ -416,16 +416,21 @@ */ for (j = 0; j < p->objtotal; j++) { if (p->invalid_bitmap && nm_isset(p->invalid_bitmap, j)) { - D("skipping %s %d", p->name, j); + if (netmap_debug & NM_DEBUG_MEM) + nm_prinf("skipping %s %d", p->name, j); continue; } p->bitmap[ (j>>5) ] |= ( 1U << (j & 31U) ); p->objfree++; } - ND("%s free %u", p->name, p->objfree); - if (p->objfree == 0) + if (netmap_verbose) + nm_prinf("%s free %u", p->name, p->objfree); + if (p->objfree == 0) { + if (netmap_verbose) + nm_prerr("%s: no objects available", p->name); return ENOMEM; + } return 0; } @@ -447,6 +452,7 @@ * buffers 0 and 1 are reserved */ if (nmd->pools[NETMAP_BUF_POOL].objfree < 2) { + nm_prerr("%s: not enough buffers", nmd->pools[NETMAP_BUF_POOL].name); return ENOMEM; } @@ -480,8 +486,10 @@ nmd->ops->nmd_deref(nmd); nmd->active--; - if (!nmd->active) + if (last_user) { nmd->nm_grp = -1; + nmd->lasterr = 0; + } NMA_UNLOCK(nmd); return last_user; @@ -720,16 +728,20 @@ { int err = 0, id; id = nm_iommu_group_id(dev); - if (netmap_verbose) - D("iommu_group %d", id); + if (netmap_debug & NM_DEBUG_MEM) + nm_prinf("iommu_group %d", id); NMA_LOCK(nmd); if (nmd->nm_grp < 0) nmd->nm_grp = id; - if (nmd->nm_grp != id) + if (nmd->nm_grp != id) { + if (netmap_verbose) + nm_prerr("iommu group mismatch: %u vs %u", + nmd->nm_grp, id); nmd->lasterr = err = ENOMEM; + } NMA_UNLOCK(nmd); return err; @@ -805,7 +817,7 @@ return pa; } /* this is only in case of errors */ - D("invalid ofs 0x%x out of 0x%x 0x%x 0x%x", (u_int)o, + nm_prerr("invalid ofs 0x%x out of 0x%x 0x%x 0x%x", (u_int)o, p[NETMAP_IF_POOL].memtotal, p[NETMAP_IF_POOL].memtotal + p[NETMAP_RING_POOL].memtotal, @@ -854,13 +866,13 @@ int i, j; if (netmap_mem_get_info(nmd, &memsize, &memflags, NULL)) { - D("memory not finalised yet"); + nm_prerr("memory not finalised yet"); return NULL; } mainMdl = IoAllocateMdl(NULL, memsize, FALSE, FALSE, NULL); if (mainMdl == NULL) { - D("failed to allocate mdl"); + nm_prerr("failed to allocate mdl"); return NULL; } @@ -876,7 +888,7 @@ tempMdl = IoAllocateMdl(p->lut[0].vaddr, clsz, FALSE, FALSE, NULL); if (tempMdl == NULL) { NMA_UNLOCK(nmd); - D("fail to allocate tempMdl"); + nm_prerr("fail to allocate tempMdl"); IoFreeMdl(mainMdl); return NULL; } @@ -971,7 +983,7 @@ p->name, ofs, i, vaddr); return ofs; } - D("address %p is not contained inside any cluster (%s)", + nm_prerr("address %p is not contained inside any cluster (%s)", vaddr, p->name); return 0; /* An error occurred */ } @@ -1002,12 +1014,12 @@ void *vaddr = NULL; if (len > p->_objsize) { - D("%s request size %d too large", p->name, len); + nm_prerr("%s request size %d too large", p->name, len); return NULL; } if (p->objfree == 0) { - D("no more %s objects", p->name); + nm_prerr("no more %s objects", p->name); return NULL; } if (start) @@ -1049,13 +1061,13 @@ uint32_t *ptr, mask; if (j >= p->objtotal) { - D("invalid index %u, max %u", j, p->objtotal); + nm_prerr("invalid index %u, max %u", j, p->objtotal); return 1; } ptr = &p->bitmap[j / 32]; mask = (1 << (j % 32)); if (*ptr & mask) { - D("ouch, double free on buffer %d", j); + nm_prerr("ouch, double free on buffer %d", j); return 1; } else { *ptr |= mask; @@ -1086,7 +1098,7 @@ netmap_obj_free(p, j); return; } - D("address %p is not contained inside any cluster (%s)", + nm_prerr("address %p is not contained inside any cluster (%s)", vaddr, p->name); } @@ -1127,7 +1139,7 @@ uint32_t cur = *head; /* save current head */ uint32_t *p = netmap_buf_malloc(nmd, &pos, head); if (p == NULL) { - D("no more buffers after %d of %d", i, n); + nm_prerr("no more buffers after %d of %d", i, n); *head = cur; /* restore */ break; } @@ -1158,9 +1170,9 @@ break; } if (head != 0) - D("breaking with head %d", head); - if (netmap_verbose) - D("freed %d buffers", i); + nm_prerr("breaking with head %d", head); + if (netmap_debug & NM_DEBUG_MEM) + nm_prinf("freed %d buffers", i); } @@ -1176,7 +1188,7 @@ for (i = 0; i < n; i++) { void *vaddr = netmap_buf_malloc(nmd, &pos, &index); if (vaddr == NULL) { - D("no more buffers after %d of %d", i, n); + nm_prerr("no more buffers after %d of %d", i, n); goto cleanup; } slot[i].buf_idx = index; @@ -1217,7 +1229,7 @@ struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL]; if (i < 2 || i >= p->objtotal) { - D("Cannot free buf#%d: should be in [2, %d[", i, p->objtotal); + nm_prerr("Cannot free buf#%d: should be in [2, %d[", i, p->objtotal); return; } netmap_obj_free(p, i); @@ -1317,22 +1329,22 @@ #define LINE_ROUND NM_CACHE_ALIGN // 64 if (objsize >= MAX_CLUSTSIZE) { /* we could do it but there is no point */ - D("unsupported allocation for %d bytes", objsize); + nm_prerr("unsupported allocation for %d bytes", objsize); return EINVAL; } /* make sure objsize is a multiple of LINE_ROUND */ i = (objsize & (LINE_ROUND - 1)); if (i) { - D("XXX aligning object by %d bytes", LINE_ROUND - i); + nm_prinf("aligning object by %d bytes", LINE_ROUND - i); objsize += LINE_ROUND - i; } if (objsize < p->objminsize || objsize > p->objmaxsize) { - D("requested objsize %d out of range [%d, %d]", + nm_prerr("requested objsize %d out of range [%d, %d]", objsize, p->objminsize, p->objmaxsize); return EINVAL; } if (objtotal < p->nummin || objtotal > p->nummax) { - D("requested objtotal %d out of range [%d, %d]", + nm_prerr("requested objtotal %d out of range [%d, %d]", objtotal, p->nummin, p->nummax); return EINVAL; } @@ -1354,13 +1366,13 @@ } /* exact solution not found */ if (clustentries == 0) { - D("unsupported allocation for %d bytes", objsize); + nm_prerr("unsupported allocation for %d bytes", objsize); return EINVAL; } /* compute clustsize */ clustsize = clustentries * objsize; - if (netmap_verbose) - D("objsize %d clustsize %d objects %d", + if (netmap_debug & NM_DEBUG_MEM) + nm_prinf("objsize %d clustsize %d objects %d", objsize, clustsize, clustentries); /* @@ -1403,7 +1415,7 @@ p->lut = nm_alloc_lut(p->objtotal); if (p->lut == NULL) { - D("Unable to create lookup table for '%s'", p->name); + nm_prerr("Unable to create lookup table for '%s'", p->name); goto clean; } @@ -1430,7 +1442,7 @@ * If we get here, there is a severe memory shortage, * so halve the allocated memory to reclaim some. */ - D("Unable to create cluster at %d for '%s' allocator", + nm_prerr("Unable to create cluster at %d for '%s' allocator", i, p->name); if (i < 2) /* nothing to halve */ goto out; @@ -1466,7 +1478,7 @@ } p->memtotal = p->numclusters * p->_clustsize; if (netmap_verbose) - D("Pre-allocated %d clusters (%d/%dKB) for '%s'", + nm_prinf("Pre-allocated %d clusters (%d/%dKB) for '%s'", p->numclusters, p->_clustsize >> 10, p->memtotal >> 10, p->name); @@ -1498,8 +1510,8 @@ { int i; - if (netmap_verbose) - D("resetting %p", nmd); + if (netmap_debug & NM_DEBUG_MEM) + nm_prinf("resetting %p", nmd); for (i = 0; i < NETMAP_POOLS_NR; i++) { netmap_reset_obj_allocator(&nmd->pools[i]); } @@ -1525,7 +1537,7 @@ (void)i; (void)lim; (void)lut; - D("unsupported on Windows"); + nm_prerr("unsupported on Windows"); #else /* linux */ ND("unmapping and freeing plut for %s", na->name); if (lut->plut == NULL) @@ -1561,7 +1573,7 @@ (void)i; (void)lim; (void)lut; - D("unsupported on Windows"); + nm_prerr("unsupported on Windows"); #else /* linux */ if (lut->plut != NULL) { @@ -1572,7 +1584,7 @@ ND("allocating physical lut for %s", na->name); lut->plut = nm_alloc_plut(lim); if (lut->plut == NULL) { - D("Failed to allocate physical lut for %s", na->name); + nm_prerr("Failed to allocate physical lut for %s", na->name); return ENOMEM; } @@ -1589,7 +1601,7 @@ error = netmap_load_map(na, (bus_dma_tag_t) na->pdev, &lut->plut[i].paddr, p->lut[i].vaddr, p->_clustsize); if (error) { - D("Failed to map cluster #%d from the %s pool", i, p->name); + nm_prerr("Failed to map cluster #%d from the %s pool", i, p->name); break; } @@ -1627,13 +1639,13 @@ nmd->flags |= NETMAP_MEM_FINALIZED; if (netmap_verbose) - D("interfaces %d KB, rings %d KB, buffers %d MB", + nm_prinf("interfaces %d KB, rings %d KB, buffers %d MB", nmd->pools[NETMAP_IF_POOL].memtotal >> 10, nmd->pools[NETMAP_RING_POOL].memtotal >> 10, nmd->pools[NETMAP_BUF_POOL].memtotal >> 20); if (netmap_verbose) - D("Free buffers: %d", nmd->pools[NETMAP_BUF_POOL].objfree); + nm_prinf("Free buffers: %d", nmd->pools[NETMAP_BUF_POOL].objfree); return 0; @@ -1740,7 +1752,7 @@ p[NETMAP_BUF_POOL].num = v; if (netmap_verbose) - D("req if %d*%d ring %d*%d buf %d*%d", + nm_prinf("req if %d*%d ring %d*%d buf %d*%d", p[NETMAP_IF_POOL].num, p[NETMAP_IF_POOL].size, p[NETMAP_RING_POOL].num, @@ -1850,13 +1862,13 @@ struct netmap_ring *ring = kring->ring; if (ring == NULL || kring->users > 0 || (kring->nr_kflags & NKR_NEEDRING)) { - if (netmap_verbose) - D("NOT deleting ring %s (ring %p, users %d neekring %d)", + if (netmap_debug & NM_DEBUG_MEM) + nm_prinf("NOT deleting ring %s (ring %p, users %d neekring %d)", kring->name, ring, kring->users, kring->nr_kflags & NKR_NEEDRING); continue; } - if (netmap_verbose) - D("deleting ring %s", kring->name); + if (netmap_debug & NM_DEBUG_MEM) + nm_prinf("deleting ring %s", kring->name); if (!(kring->nr_kflags & NKR_FAKERING)) { ND("freeing bufs for %s", kring->name); netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots); @@ -1891,19 +1903,19 @@ if (ring || (!kring->users && !(kring->nr_kflags & NKR_NEEDRING))) { /* uneeded, or already created by somebody else */ - if (netmap_verbose) - D("NOT creating ring %s (ring %p, users %d neekring %d)", + if (netmap_debug & NM_DEBUG_MEM) + nm_prinf("NOT creating ring %s (ring %p, users %d neekring %d)", kring->name, ring, kring->users, kring->nr_kflags & NKR_NEEDRING); continue; } - if (netmap_verbose) - D("creating %s", kring->name); + if (netmap_debug & NM_DEBUG_MEM) + nm_prinf("creating %s", kring->name); ndesc = kring->nkr_num_slots; len = sizeof(struct netmap_ring) + ndesc * sizeof(struct netmap_slot); ring = netmap_ring_malloc(na->nm_mem, len); if (ring == NULL) { - D("Cannot allocate %s_ring", nm_txrx2str(t)); + nm_prerr("Cannot allocate %s_ring", nm_txrx2str(t)); goto cleanup; } ND("txring at %p", ring); @@ -1925,14 +1937,16 @@ ND("initializing slots for %s_ring", nm_txrx2str(t)); if (!(kring->nr_kflags & NKR_FAKERING)) { /* this is a real ring */ - ND("allocating buffers for %s", kring->name); + if (netmap_debug & NM_DEBUG_MEM) + nm_prinf("allocating buffers for %s", kring->name); if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) { - D("Cannot allocate buffers for %s_ring", nm_txrx2str(t)); + nm_prerr("Cannot allocate buffers for %s_ring", nm_txrx2str(t)); goto cleanup; } } else { /* this is a fake ring, set all indices to 0 */ - ND("NOT allocating buffers for %s", kring->name); + if (netmap_debug & NM_DEBUG_MEM) + nm_prinf("NOT allocating buffers for %s", kring->name); netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0); } /* ring info */ @@ -1998,7 +2012,7 @@ /* initialize base fields -- override const */ *(u_int *)(uintptr_t)&nifp->ni_tx_rings = na->num_tx_rings; *(u_int *)(uintptr_t)&nifp->ni_rx_rings = na->num_rx_rings; - strncpy(nifp->ni_name, na->name, (size_t)IFNAMSIZ); + strlcpy(nifp->ni_name, na->name, sizeof(nifp->ni_name)); /* * fill the slots for the rx and tx rings. They contain the offset @@ -2049,8 +2063,8 @@ netmap_mem2_deref(struct netmap_mem_d *nmd) { - if (netmap_verbose) - D("active = %d", nmd->active); + if (netmap_debug & NM_DEBUG_MEM) + nm_prinf("active = %d", nmd->active); } @@ -2217,14 +2231,15 @@ pi->nr_buf_pool_objtotal = netmap_min_priv_params[NETMAP_BUF_POOL].num; if (pi->nr_buf_pool_objsize == 0) pi->nr_buf_pool_objsize = netmap_min_priv_params[NETMAP_BUF_POOL].size; - D("if %d %d ring %d %d buf %d %d", + if (netmap_verbose & NM_DEBUG_MEM) + nm_prinf("if %d %d ring %d %d buf %d %d", pi->nr_if_pool_objtotal, pi->nr_if_pool_objsize, pi->nr_ring_pool_objtotal, pi->nr_ring_pool_objsize, pi->nr_buf_pool_objtotal, pi->nr_buf_pool_objsize); os = nm_os_extmem_create(usrptr, pi, &error); if (os == NULL) { - D("os extmem creation failed"); + nm_prerr("os extmem creation failed"); goto out; } @@ -2233,7 +2248,8 @@ nm_os_extmem_delete(os); return &nme->up; } - D("not found, creating new"); + if (netmap_verbose & NM_DEBUG_MEM) + nm_prinf("not found, creating new"); nme = _netmap_mem_private_new(sizeof(*nme), (struct netmap_obj_params[]){ @@ -2343,7 +2359,7 @@ #endif /* WITH_EXTMEM */ -#ifdef WITH_PTNETMAP_GUEST +#ifdef WITH_PTNETMAP struct mem_pt_if { struct mem_pt_if *next; struct ifnet *ifp; @@ -2386,7 +2402,8 @@ NMA_UNLOCK(nmd); - D("added (ifp=%p,nifp_offset=%u)", ptif->ifp, ptif->nifp_offset); + nm_prinf("ifp=%s,nifp_offset=%u", + ptif->ifp->if_xname, ptif->nifp_offset); return 0; } @@ -2667,7 +2684,7 @@ continue; kring->ring = (struct netmap_ring *) ((char *)nifp + - nifp->ring_ofs[i + na->num_tx_rings + 1]); + nifp->ring_ofs[netmap_all_rings(na, NR_TX) + i]); } error = 0; @@ -2832,4 +2849,4 @@ return nmd; } -#endif /* WITH_PTNETMAP_GUEST */ +#endif /* WITH_PTNETMAP */ Index: head/sys/dev/netmap/netmap_null.c =================================================================== --- head/sys/dev/netmap/netmap_null.c +++ head/sys/dev/netmap/netmap_null.c @@ -0,0 +1,184 @@ +/* + * Copyright (C) 2018 Giuseppe Lettieri + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* $FreeBSD$ */ + +#if defined(__FreeBSD__) +#include /* prerequisite */ + +#include +#include +#include /* defines used in kernel.h */ +#include /* types used in module initialization */ +#include +#include +#include +#include +#include +#include +#include /* sockaddrs */ +#include +#include +#include /* bus_dmamap_* */ +#include + + +#elif defined(linux) + +#include "bsd_glue.h" + +#elif defined(__APPLE__) + +#warning OSX support is only partial +#include "osx_glue.h" + +#elif defined(_WIN32) +#include "win_glue.h" + +#else + +#error Unsupported platform + +#endif /* unsupported */ + +/* + * common headers + */ + +#include +#include +#include + +#ifdef WITH_NMNULL + +static int +netmap_null_txsync(struct netmap_kring *kring, int flags) +{ + (void)kring; + (void)flags; + return 0; +} + +static int +netmap_null_rxsync(struct netmap_kring *kring, int flags) +{ + (void)kring; + (void)flags; + return 0; +} + +static int +netmap_null_krings_create(struct netmap_adapter *na) +{ + return netmap_krings_create(na, 0); +} + +static void +netmap_null_krings_delete(struct netmap_adapter *na) +{ + netmap_krings_delete(na); +} + +static int +netmap_null_reg(struct netmap_adapter *na, int onoff) +{ + if (na->active_fds == 0) { + if (onoff) + na->na_flags |= NAF_NETMAP_ON; + else + na->na_flags &= ~NAF_NETMAP_ON; + } + return 0; +} + +static int +netmap_null_bdg_attach(const char *name, struct netmap_adapter *na, + struct nm_bridge *b) +{ + (void)name; + (void)na; + (void)b; + return EINVAL; +} + +int +netmap_get_null_na(struct nmreq_header *hdr, struct netmap_adapter **na, + struct netmap_mem_d *nmd, int create) +{ + struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body; + struct netmap_null_adapter *nna; + int error; + + if (req->nr_mode != NR_REG_NULL) { + nm_prdis("not a null port"); + return 0; + } + + if (!create) { + nm_prerr("null ports cannot be re-opened"); + return EINVAL; + } + + if (nmd == NULL) { + nm_prerr("null ports must use an existing allocator"); + return EINVAL; + } + + nna = nm_os_malloc(sizeof(*nna)); + if (nna == NULL) { + error = ENOMEM; + goto err; + } + snprintf(nna->up.name, sizeof(nna->up.name), "null:%s", hdr->nr_name); + + nna->up.nm_txsync = netmap_null_txsync; + nna->up.nm_rxsync = netmap_null_rxsync; + nna->up.nm_register = netmap_null_reg; + nna->up.nm_krings_create = netmap_null_krings_create; + nna->up.nm_krings_delete = netmap_null_krings_delete; + nna->up.nm_bdg_attach = netmap_null_bdg_attach; + nna->up.nm_mem = netmap_mem_get(nmd); + + nna->up.num_tx_rings = req->nr_tx_rings; + nna->up.num_rx_rings = req->nr_rx_rings; + nna->up.num_tx_desc = req->nr_tx_slots; + nna->up.num_rx_desc = req->nr_rx_slots; + error = netmap_attach_common(&nna->up); + if (error) + goto free_nna; + *na = &nna->up; + netmap_adapter_get(*na); + nm_prdis("created null %s", nna->up.name); + + return 0; + +free_nna: + nm_os_free(nna); +err: + return error; +} + + +#endif /* WITH_NMNULL */ Index: head/sys/dev/netmap/netmap_pipe.c =================================================================== --- head/sys/dev/netmap/netmap_pipe.c +++ head/sys/dev/netmap/netmap_pipe.c @@ -443,7 +443,7 @@ /* In case of no error we put our rings in netmap mode */ for_rx_tx(t) { - for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { + for (i = 0; i < nma_get_nrings(na, t); i++) { struct netmap_kring *kring = NMR(na, t)[i]; if (nm_kring_pending_on(kring)) { struct netmap_kring *sring, *dring; @@ -490,7 +490,7 @@ if (na->active_fds == 0) na->na_flags &= ~NAF_NETMAP_ON; for_rx_tx(t) { - for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { + for (i = 0; i < nma_get_nrings(na, t); i++) { struct netmap_kring *kring = NMR(na, t)[i]; if (nm_kring_pending_off(kring)) { @@ -567,7 +567,7 @@ sna = na; cleanup: for_rx_tx(t) { - for (i = 0; i < nma_get_nrings(sna, t) + 1; i++) { + for (i = 0; i < nma_get_nrings(sna, t); i++) { struct netmap_kring *kring = NMR(sna, t)[i]; struct netmap_ring *ring = kring->ring; uint32_t j, lim = kring->nkr_num_slots - 1; @@ -674,11 +674,11 @@ int create_error; /* Temporarily remove the pipe suffix. */ - strncpy(nr_name_orig, hdr->nr_name, sizeof(nr_name_orig)); + strlcpy(nr_name_orig, hdr->nr_name, sizeof(nr_name_orig)); *cbra = '\0'; error = netmap_get_na(hdr, &pna, &ifp, nmd, create); /* Restore the pipe suffix. */ - strncpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name)); + strlcpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name)); if (!error) break; if (error != ENXIO || retries++) { @@ -691,7 +691,7 @@ NMG_UNLOCK(); create_error = netmap_vi_create(hdr, 1 /* autodelete */); NMG_LOCK(); - strncpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name)); + strlcpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name)); if (create_error && create_error != EEXIST) { if (create_error != EOPNOTSUPP) { D("failed to create a persistent vale port: %d", create_error); Index: head/sys/dev/netmap/netmap_vale.c =================================================================== --- head/sys/dev/netmap/netmap_vale.c +++ head/sys/dev/netmap/netmap_vale.c @@ -121,18 +121,18 @@ "Max batch size to be used in the bridge"); SYSEND; -static int netmap_vp_create(struct nmreq_header *hdr, struct ifnet *, +static int netmap_vale_vp_create(struct nmreq_header *hdr, struct ifnet *, struct netmap_mem_d *nmd, struct netmap_vp_adapter **); -static int netmap_vp_bdg_attach(const char *, struct netmap_adapter *, +static int netmap_vale_vp_bdg_attach(const char *, struct netmap_adapter *, struct nm_bridge *); static int netmap_vale_bwrap_attach(const char *, struct netmap_adapter *); /* - * For each output interface, nm_bdg_q is used to construct a list. + * For each output interface, nm_vale_q is used to construct a list. * bq_len is the number of output buffers (we can have coalescing * during the copy). */ -struct nm_bdg_q { +struct nm_vale_q { uint16_t bq_head; uint16_t bq_tail; uint32_t bq_len; /* number of buffers */ @@ -140,10 +140,10 @@ /* Holds the default callbacks */ struct netmap_bdg_ops vale_bdg_ops = { - .lookup = netmap_bdg_learning, + .lookup = netmap_vale_learning, .config = NULL, .dtor = NULL, - .vp_create = netmap_vp_create, + .vp_create = netmap_vale_vp_create, .bwrap_attach = netmap_vale_bwrap_attach, .name = NM_BDG_NAME, }; @@ -212,14 +212,14 @@ /* all port:rings + broadcast */ num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1; l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX; - l += sizeof(struct nm_bdg_q) * num_dstq; + l += sizeof(struct nm_vale_q) * num_dstq; l += sizeof(uint16_t) * NM_BDG_BATCH_MAX; nrings = netmap_real_rings(na, NR_TX); kring = na->tx_rings; for (i = 0; i < nrings; i++) { struct nm_bdg_fwd *ft; - struct nm_bdg_q *dstq; + struct nm_vale_q *dstq; int j; ft = nm_os_malloc(l); @@ -227,7 +227,7 @@ nm_free_bdgfwd(na); return ENOMEM; } - dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); + dstq = (struct nm_vale_q *)(ft + NM_BDG_BATCH_MAX); for (j = 0; j < num_dstq; j++) { dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL; dstq[j].bq_len = 0; @@ -307,11 +307,228 @@ return ret; } +/* Process NETMAP_REQ_VALE_LIST. */ +int +netmap_vale_list(struct nmreq_header *hdr) +{ + struct nmreq_vale_list *req = + (struct nmreq_vale_list *)(uintptr_t)hdr->nr_body; + int namelen = strlen(hdr->nr_name); + struct nm_bridge *b, *bridges; + struct netmap_vp_adapter *vpna; + int error = 0, i, j; + u_int num_bridges; + netmap_bns_getbridges(&bridges, &num_bridges); + /* this is used to enumerate bridges and ports */ + if (namelen) { /* look up indexes of bridge and port */ + if (strncmp(hdr->nr_name, NM_BDG_NAME, + strlen(NM_BDG_NAME))) { + return EINVAL; + } + NMG_LOCK(); + b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); + if (!b) { + NMG_UNLOCK(); + return ENOENT; + } + + req->nr_bridge_idx = b - bridges; /* bridge index */ + req->nr_port_idx = NM_BDG_NOPORT; + for (j = 0; j < b->bdg_active_ports; j++) { + i = b->bdg_port_index[j]; + vpna = b->bdg_ports[i]; + if (vpna == NULL) { + nm_prerr("This should not happen"); + continue; + } + /* the former and the latter identify a + * virtual port and a NIC, respectively + */ + if (!strcmp(vpna->up.name, hdr->nr_name)) { + req->nr_port_idx = i; /* port index */ + break; + } + } + NMG_UNLOCK(); + } else { + /* return the first non-empty entry starting from + * bridge nr_arg1 and port nr_arg2. + * + * Users can detect the end of the same bridge by + * seeing the new and old value of nr_arg1, and can + * detect the end of all the bridge by error != 0 + */ + i = req->nr_bridge_idx; + j = req->nr_port_idx; + + NMG_LOCK(); + for (error = ENOENT; i < NM_BRIDGES; i++) { + b = bridges + i; + for ( ; j < NM_BDG_MAXPORTS; j++) { + if (b->bdg_ports[j] == NULL) + continue; + vpna = b->bdg_ports[j]; + /* write back the VALE switch name */ + strlcpy(hdr->nr_name, vpna->up.name, + sizeof(hdr->nr_name)); + error = 0; + goto out; + } + j = 0; /* following bridges scan from 0 */ + } + out: + req->nr_bridge_idx = i; + req->nr_port_idx = j; + NMG_UNLOCK(); + } + + return error; +} + +/* Process NETMAP_REQ_VALE_ATTACH. + */ +int +netmap_vale_attach(struct nmreq_header *hdr, void *auth_token) +{ + struct nmreq_vale_attach *req = + (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; + struct netmap_vp_adapter * vpna; + struct netmap_adapter *na = NULL; + struct netmap_mem_d *nmd = NULL; + struct nm_bridge *b = NULL; + int error; + + NMG_LOCK(); + /* permission check for modified bridges */ + b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); + if (b && !nm_bdg_valid_auth_token(b, auth_token)) { + error = EACCES; + goto unlock_exit; + } + + if (req->reg.nr_mem_id) { + nmd = netmap_mem_find(req->reg.nr_mem_id); + if (nmd == NULL) { + error = EINVAL; + goto unlock_exit; + } + } + + /* check for existing one */ + error = netmap_get_vale_na(hdr, &na, nmd, 0); + if (na) { + error = EBUSY; + goto unref_exit; + } + error = netmap_get_vale_na(hdr, &na, + nmd, 1 /* create if not exists */); + if (error) { /* no device */ + goto unlock_exit; + } + + if (na == NULL) { /* VALE prefix missing */ + error = EINVAL; + goto unlock_exit; + } + + if (NETMAP_OWNED_BY_ANY(na)) { + error = EBUSY; + goto unref_exit; + } + + if (na->nm_bdg_ctl) { + /* nop for VALE ports. The bwrap needs to put the hwna + * in netmap mode (see netmap_bwrap_bdg_ctl) + */ + error = na->nm_bdg_ctl(hdr, na); + if (error) + goto unref_exit; + ND("registered %s to netmap-mode", na->name); + } + vpna = (struct netmap_vp_adapter *)na; + req->port_index = vpna->bdg_port; + + if (nmd) + netmap_mem_put(nmd); + + NMG_UNLOCK(); + return 0; + +unref_exit: + netmap_adapter_put(na); +unlock_exit: + if (nmd) + netmap_mem_put(nmd); + + NMG_UNLOCK(); + return error; +} + +/* Process NETMAP_REQ_VALE_DETACH. + */ +int +netmap_vale_detach(struct nmreq_header *hdr, void *auth_token) +{ + struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body; + struct netmap_vp_adapter *vpna; + struct netmap_adapter *na; + struct nm_bridge *b = NULL; + int error; + + NMG_LOCK(); + /* permission check for modified bridges */ + b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); + if (b && !nm_bdg_valid_auth_token(b, auth_token)) { + error = EACCES; + goto unlock_exit; + } + + error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */); + if (error) { /* no device, or another bridge or user owns the device */ + goto unlock_exit; + } + + if (na == NULL) { /* VALE prefix missing */ + error = EINVAL; + goto unlock_exit; + } else if (nm_is_bwrap(na) && + ((struct netmap_bwrap_adapter *)na)->na_polling_state) { + /* Don't detach a NIC with polling */ + error = EBUSY; + goto unref_exit; + } + + vpna = (struct netmap_vp_adapter *)na; + if (na->na_vp != vpna) { + /* trying to detach first attach of VALE persistent port attached + * to 2 bridges + */ + error = EBUSY; + goto unref_exit; + } + nmreq_det->port_index = vpna->bdg_port; + + if (na->nm_bdg_ctl) { + /* remove the port from bridge. The bwrap + * also needs to put the hwna in normal mode + */ + error = na->nm_bdg_ctl(hdr, na); + } + +unref_exit: + netmap_adapter_put(na); +unlock_exit: + NMG_UNLOCK(); + return error; + +} + + /* nm_dtor callback for ephemeral VALE ports */ static void -netmap_vp_dtor(struct netmap_adapter *na) +netmap_vale_vp_dtor(struct netmap_adapter *na) { struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na; struct nm_bridge *b = vpna->na_bdg; @@ -334,47 +551,13 @@ } -/* Called by external kernel modules (e.g., Openvswitch). - * to modify the private data previously given to regops(). - * 'name' may be just bridge's name (including ':' if it - * is not just NM_BDG_NAME). - * Called without NMG_LOCK. - */ -int -nm_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback, - void *callback_data, void *auth_token) -{ - void *private_data = NULL; - struct nm_bridge *b; - int error = 0; - NMG_LOCK(); - b = nm_find_bridge(name, 0 /* don't create */, NULL); - if (!b) { - error = EINVAL; - goto unlock_update_priv; - } - if (!nm_bdg_valid_auth_token(b, auth_token)) { - error = EACCES; - goto unlock_update_priv; - } - BDG_WLOCK(b); - private_data = callback(b->private_data, callback_data, &error); - b->private_data = private_data; - BDG_WUNLOCK(b); - -unlock_update_priv: - NMG_UNLOCK(); - return error; -} - - /* nm_krings_create callback for VALE ports. * Calls the standard netmap_krings_create, then adds leases on rx * rings and bdgfwd on tx rings. */ static int -netmap_vp_krings_create(struct netmap_adapter *na) +netmap_vale_vp_krings_create(struct netmap_adapter *na) { u_int tailroom; int error, i; @@ -409,7 +592,7 @@ /* nm_krings_delete callback for VALE ports. */ static void -netmap_vp_krings_delete(struct netmap_adapter *na) +netmap_vale_vp_krings_delete(struct netmap_adapter *na) { nm_free_bdgfwd(na); netmap_krings_delete(na); @@ -417,7 +600,7 @@ static int -nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, +nm_vale_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, u_int ring_nr); @@ -429,7 +612,7 @@ * Returns the next position in the ring. */ static int -nm_bdg_preflush(struct netmap_kring *kring, u_int end) +nm_vale_preflush(struct netmap_kring *kring, u_int end) { struct netmap_vp_adapter *na = (struct netmap_vp_adapter*)kring->na; @@ -470,7 +653,7 @@ buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ? (void *)(uintptr_t)slot->ptr : NMB(&na->up, slot); if (unlikely(buf == NULL)) { - RD(5, "NULL %s buffer pointer from %s slot %d len %d", + nm_prlim(5, "NULL %s buffer pointer from %s slot %d len %d", (slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT", kring->name, j, ft[ft_i].ft_len); buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up); @@ -488,7 +671,7 @@ ft[ft_i - frags].ft_frags = frags; frags = 1; if (unlikely((int)ft_i >= bridge_batch)) - ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); + ft_i = nm_vale_flush(ft, ft_i, na, ring_nr); } if (frags > 1) { /* Here ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG, and we @@ -496,10 +679,10 @@ frags--; ft[ft_i - 1].ft_flags &= ~NS_MOREFRAG; ft[ft_i - frags].ft_frags = frags; - D("Truncate incomplete fragment at %d (%d frags)", ft_i, frags); + nm_prlim(5, "Truncate incomplete fragment at %d (%d frags)", ft_i, frags); } if (ft_i) - ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr); + ft_i = nm_vale_flush(ft, ft_i, na, ring_nr); BDG_RUNLOCK(b); return j; } @@ -528,7 +711,7 @@ static __inline uint32_t -nm_bridge_rthash(const uint8_t *addr) +nm_vale_rthash(const uint8_t *addr) { uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key @@ -554,7 +737,7 @@ * ring in *dst_ring (at the moment, always use ring 0) */ uint32_t -netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, +netmap_vale_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, struct netmap_vp_adapter *na, void *private_data) { uint8_t *buf = ((uint8_t *)ft->ft_buf) + ft->ft_offset; @@ -586,17 +769,17 @@ */ if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */ uint8_t *s = buf+6; - sh = nm_bridge_rthash(s); /* hash of source */ + sh = nm_vale_rthash(s); /* hash of source */ /* update source port forwarding entry */ na->last_smac = ht[sh].mac = smac; /* XXX expire ? */ ht[sh].ports = mysrc; - if (netmap_verbose) - D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d", + if (netmap_debug & NM_DEBUG_VALE) + nm_prinf("src %02x:%02x:%02x:%02x:%02x:%02x on port %d", s[0], s[1], s[2], s[3], s[4], s[5], mysrc); } dst = NM_BDG_BROADCAST; if ((buf[0] & 1) == 0) { /* unicast */ - dh = nm_bridge_rthash(buf); /* hash of dst */ + dh = nm_vale_rthash(buf); /* hash of dst */ if (ht[dh].mac == dmac) { /* found dst */ dst = ht[dh].ports; } @@ -655,24 +838,28 @@ k->nkr_leases[lease_idx] = NR_NOSLOT; k->nkr_lease_idx = nm_next(lease_idx, lim); +#ifdef CONFIG_NETMAP_DEBUG if (n > nm_kr_space(k, is_rx)) { - D("invalid request for %d slots", n); + nm_prerr("invalid request for %d slots", n); panic("x"); } +#endif /* CONFIG NETMAP_DEBUG */ /* XXX verify that there are n slots */ k->nkr_hwlease += n; if (k->nkr_hwlease > lim) k->nkr_hwlease -= lim + 1; +#ifdef CONFIG_NETMAP_DEBUG if (k->nkr_hwlease >= k->nkr_num_slots || k->nr_hwcur >= k->nkr_num_slots || k->nr_hwtail >= k->nkr_num_slots || k->nkr_lease_idx >= k->nkr_num_slots) { - D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d", + nm_prerr("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d", k->na->name, k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease, k->nkr_lease_idx, k->nkr_num_slots); } +#endif /* CONFIG_NETMAP_DEBUG */ return lease_idx; } @@ -682,10 +869,10 @@ * number of ports, and lets us replace the learn and dispatch functions. */ int -nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, +nm_vale_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na, u_int ring_nr) { - struct nm_bdg_q *dst_ents, *brddst; + struct nm_vale_q *dst_ents, *brddst; uint16_t num_dsts = 0, *dsts; struct nm_bridge *b = na->na_bdg; u_int i, me = na->bdg_port; @@ -696,14 +883,14 @@ * queues per port plus one for the broadcast traffic. * Then we have an array of destination indexes. */ - dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX); + dst_ents = (struct nm_vale_q *)(ft + NM_BDG_BATCH_MAX); dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1); /* first pass: find a destination for each packet in the batch */ for (i = 0; likely(i < n); i += ft[i].ft_frags) { uint8_t dst_ring = ring_nr; /* default, same ring as origin */ uint16_t dst_port, d_i; - struct nm_bdg_q *d; + struct nm_vale_q *d; struct nm_bdg_fwd *start_ft = NULL; ND("slot %d frags %d", i, ft[i].ft_frags); @@ -720,7 +907,7 @@ */ continue; } - dst_port = b->bdg_ops->lookup(start_ft, &dst_ring, na, b->private_data); + dst_port = b->bdg_ops.lookup(start_ft, &dst_ring, na, b->private_data); if (netmap_verbose > 255) RD(5, "slot %d port %d -> %d", i, me, dst_port); if (dst_port >= NM_BDG_NOPORT) @@ -778,7 +965,7 @@ u_int dst_nr, lim, j, d_i, next, brd_next; u_int needed, howmany; int retry = netmap_txsync_retry; - struct nm_bdg_q *d; + struct nm_vale_q *d; uint32_t my_start = 0, lease_idx = 0; int nrings; int virt_hdr_mismatch = 0; @@ -862,7 +1049,7 @@ if (dst_na->retry && retry) { /* try to get some free slot from the previous run */ - kring->nm_notify(kring, 0); + kring->nm_notify(kring, NAF_FORCE_RECLAIM); /* actually useful only for bwraps, since there * the notify will trigger a txsync on the hwna. VALE ports * have dst_na->retry == 0 @@ -1030,7 +1217,7 @@ /* nm_txsync callback for VALE ports */ static int -netmap_vp_txsync(struct netmap_kring *kring, int flags) +netmap_vale_vp_txsync(struct netmap_kring *kring, int flags) { struct netmap_vp_adapter *na = (struct netmap_vp_adapter *)kring->na; @@ -1049,17 +1236,17 @@ if (bridge_batch > NM_BDG_BATCH) bridge_batch = NM_BDG_BATCH; - done = nm_bdg_preflush(kring, head); + done = nm_vale_preflush(kring, head); done: if (done != head) - D("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail); + nm_prerr("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail); /* * packets between 'done' and 'cur' are left unsent. */ kring->nr_hwcur = done; kring->nr_hwtail = nm_prev(done, lim); - if (netmap_verbose) - D("%s ring %d flags %d", na->up.name, kring->ring_id, flags); + if (netmap_debug & NM_DEBUG_TXSYNC) + nm_prinf("%s ring %d flags %d", na->up.name, kring->ring_id, flags); return 0; } @@ -1068,7 +1255,7 @@ * Only persistent VALE ports have a non-null ifp. */ static int -netmap_vp_create(struct nmreq_header *hdr, struct ifnet *ifp, +netmap_vale_vp_create(struct nmreq_header *hdr, struct ifnet *ifp, struct netmap_mem_d *nmd, struct netmap_vp_adapter **ret) { struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body; @@ -1089,7 +1276,7 @@ na = &vpna->up; na->ifp = ifp; - strncpy(na->name, hdr->nr_name, sizeof(na->name)); + strlcpy(na->name, hdr->nr_name, sizeof(na->name)); /* bound checking */ na->num_tx_rings = req->nr_tx_rings; @@ -1109,6 +1296,7 @@ */ nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL); /* validate extra bufs */ + extrabufs = req->nr_extra_bufs; nm_bound_var(&extrabufs, 0, 0, 128*NM_BDG_MAXSLOTS, NULL); req->nr_extra_bufs = extrabufs; /* write back */ @@ -1121,7 +1309,7 @@ /*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero?? vpna->mfs = netmap_buf_size; */ if (netmap_verbose) - D("max frame size %u", vpna->mfs); + nm_prinf("max frame size %u", vpna->mfs); na->na_flags |= NAF_BDG_MAYSLEEP; /* persistent VALE ports look like hw devices @@ -1129,12 +1317,12 @@ */ if (ifp) na->na_flags |= NAF_NATIVE; - na->nm_txsync = netmap_vp_txsync; - na->nm_rxsync = netmap_vp_rxsync; - na->nm_register = netmap_vp_reg; - na->nm_krings_create = netmap_vp_krings_create; - na->nm_krings_delete = netmap_vp_krings_delete; - na->nm_dtor = netmap_vp_dtor; + na->nm_txsync = netmap_vale_vp_txsync; + na->nm_rxsync = netmap_vp_rxsync; /* use the one provided by bdg */ + na->nm_register = netmap_vp_reg; /* use the one provided by bdg */ + na->nm_krings_create = netmap_vale_vp_krings_create; + na->nm_krings_delete = netmap_vale_vp_krings_delete; + na->nm_dtor = netmap_vale_vp_dtor; ND("nr_mem_id %d", req->nr_mem_id); na->nm_mem = nmd ? netmap_mem_get(nmd): @@ -1144,7 +1332,7 @@ req->nr_extra_bufs, npipes, &error); if (na->nm_mem == NULL) goto err; - na->nm_bdg_attach = netmap_vp_bdg_attach; + na->nm_bdg_attach = netmap_vale_vp_bdg_attach; /* other nmd fields are set in the common routine */ error = netmap_attach_common(na); if (error) @@ -1163,19 +1351,16 @@ * The na_vp port is this same netmap_adapter. There is no host port. */ static int -netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na, +netmap_vale_vp_bdg_attach(const char *name, struct netmap_adapter *na, struct nm_bridge *b) { struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; - if (b->bdg_ops != &vale_bdg_ops) { + if ((b->bdg_flags & NM_BDG_NEED_BWRAP) || vpna->na_bdg) { return NM_NEED_BWRAP; } - if (vpna->na_bdg) { - return NM_NEED_BWRAP; - } na->na_vp = vpna; - strncpy(na->name, name, sizeof(na->name)); + strlcpy(na->name, name, sizeof(na->name)); na->na_hostvp = NULL; return 0; } @@ -1186,12 +1371,12 @@ int error; /* impersonate a netmap_vp_adapter */ - error = netmap_vp_krings_create(na); + error = netmap_vale_vp_krings_create(na); if (error) return error; error = netmap_bwrap_krings_create_common(na); if (error) { - netmap_vp_krings_delete(na); + netmap_vale_vp_krings_delete(na); } return error; } @@ -1200,7 +1385,7 @@ netmap_vale_bwrap_krings_delete(struct netmap_adapter *na) { netmap_bwrap_krings_delete_common(na); - netmap_vp_krings_delete(na); + netmap_vale_vp_krings_delete(na); } static int @@ -1216,9 +1401,9 @@ return ENOMEM; } na = &bna->up.up; - strncpy(na->name, nr_name, sizeof(na->name)); + strlcpy(na->name, nr_name, sizeof(na->name)); na->nm_register = netmap_bwrap_reg; - na->nm_txsync = netmap_vp_txsync; + na->nm_txsync = netmap_vale_vp_txsync; // na->nm_rxsync = netmap_bwrap_rxsync; na->nm_krings_create = netmap_vale_bwrap_krings_create; na->nm_krings_delete = netmap_vale_bwrap_krings_delete; @@ -1313,7 +1498,8 @@ NMG_UNLOCK(); - D("destroying a persistent vale interface %s", ifp->if_xname); + if (netmap_verbose) + nm_prinf("destroying a persistent vale interface %s", ifp->if_xname); /* Linux requires all the references are released * before unregister */ @@ -1389,9 +1575,10 @@ } } /* netmap_vp_create creates a struct netmap_vp_adapter */ - error = netmap_vp_create(hdr, ifp, nmd, &vpna); + error = netmap_vale_vp_create(hdr, ifp, nmd, &vpna); if (error) { - D("error %d", error); + if (netmap_debug & NM_DEBUG_VALE) + nm_prerr("error %d", error); goto err_1; } /* persist-specific routines */ Index: head/sys/modules/netmap/Makefile =================================================================== --- head/sys/modules/netmap/Makefile +++ head/sys/modules/netmap/Makefile @@ -3,12 +3,12 @@ # Compile netmap as a module, useful if you want a netmap bridge # or loadable drivers. -SYSDIR?=${SRCTOP}/sys -.include "${SYSDIR}/conf/kern.opts.mk" +.include # FreeBSD 10 and earlier +# .include "${SYSDIR}/conf/kern.opts.mk" -.PATH: ${SYSDIR}/dev/netmap -.PATH.h: ${SYSDIR}/net -CFLAGS += -I${SYSDIR}/ -D INET +.PATH: ${.CURDIR}/../../dev/netmap +.PATH.h: ${.CURDIR}/../../net +CFLAGS += -I${.CURDIR}/../../ -D INET -D VIMAGE KMOD = netmap SRCS = device_if.h bus_if.h pci_if.h opt_netmap.h SRCS += netmap.c netmap.h netmap_kern.h @@ -20,8 +20,10 @@ SRCS += netmap_offloadings.c SRCS += netmap_pipe.c SRCS += netmap_monitor.c -SRCS += netmap_pt.c +SRCS += netmap_kloop.c SRCS += netmap_legacy.c +SRCS += netmap_bdg.c +SRCS += netmap_null.c SRCS += if_ptnet.c SRCS += opt_inet.h opt_inet6.h Index: head/sys/net/netmap.h =================================================================== --- head/sys/net/netmap.h +++ head/sys/net/netmap.h @@ -41,9 +41,9 @@ #ifndef _NET_NETMAP_H_ #define _NET_NETMAP_H_ -#define NETMAP_API 12 /* current API version */ +#define NETMAP_API 13 /* current API version */ -#define NETMAP_MIN_API 11 /* min and max versions accepted */ +#define NETMAP_MIN_API 13 /* min and max versions accepted */ #define NETMAP_MAX_API 15 /* * Some fields should be cache-aligned to reduce contention. @@ -333,12 +333,17 @@ */ /* - * check if space is available in the ring. + * Check if space is available in the ring. We use ring->head, which + * points to the next netmap slot to be published to netmap. It is + * possible that the applications moves ring->cur ahead of ring->tail + * (e.g., by setting ring->cur <== ring->tail), if it wants more slots + * than the ones currently available, and it wants to be notified when + * more arrive. See netmap(4) for more details and examples. */ static inline int nm_ring_empty(struct netmap_ring *ring) { - return (ring->cur == ring->tail); + return (ring->head == ring->tail); } /* @@ -479,6 +484,10 @@ * !=0: errno value */ uint32_t nro_status; + /* Option size, used only for options that can have variable size + * (e.g. because they contain arrays). For fixed-size options this + * field should be set to zero. */ + uint64_t nro_size; }; /* Header common to all requests. Do not reorder these fields, as we need @@ -518,12 +527,32 @@ NETMAP_REQ_VALE_POLLING_DISABLE, /* Get info about the pools of a memory allocator. */ NETMAP_REQ_POOLS_INFO_GET, + /* Start an in-kernel loop that syncs the rings periodically or + * on notifications. The loop runs in the context of the ioctl + * syscall, and only stops on NETMAP_REQ_SYNC_KLOOP_STOP. */ + NETMAP_REQ_SYNC_KLOOP_START, + /* Stops the thread executing the in-kernel loop. The thread + * returns from the ioctl syscall. */ + NETMAP_REQ_SYNC_KLOOP_STOP, + /* Enable CSB mode on a registered netmap control device. */ + NETMAP_REQ_CSB_ENABLE, }; enum { /* On NETMAP_REQ_REGISTER, ask netmap to use memory allocated * from user-space allocated memory pools (e.g. hugepages). */ NETMAP_REQ_OPT_EXTMEM = 1, + + /* ON NETMAP_REQ_SYNC_KLOOP_START, ask netmap to use eventfd-based + * notifications to synchronize the kernel loop with the application. + */ + NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS, + + /* On NETMAP_REQ_REGISTER, ask netmap to work in CSB mode, where + * head, cur and tail pointers are not exchanged through the + * struct netmap_ring header, but rather using an user-provided + * memory area (see struct nm_csb_atok and struct nm_csb_ktoa). */ + NETMAP_REQ_OPT_CSB, }; /* @@ -541,6 +570,7 @@ uint16_t nr_mem_id; /* id of the memory allocator */ uint16_t nr_ringid; /* ring(s) we care about */ uint32_t nr_mode; /* specify NR_REG_* modes */ + uint32_t nr_extra_bufs; /* number of requested extra buffers */ uint64_t nr_flags; /* additional flags (see below) */ /* monitors use nr_ringid and nr_mode to select the rings to monitor */ @@ -549,9 +579,7 @@ #define NR_ZCOPY_MON 0x400 /* request exclusive access to the selected rings */ #define NR_EXCLUSIVE 0x800 -/* request ptnetmap host support */ -#define NR_PASSTHROUGH_HOST NR_PTNETMAP_HOST /* deprecated */ -#define NR_PTNETMAP_HOST 0x1000 +/* 0x1000 unused */ #define NR_RX_RINGS_ONLY 0x2000 #define NR_TX_RINGS_ONLY 0x4000 /* Applications set this flag if they are able to deal with virtio-net headers, @@ -564,8 +592,6 @@ * NETMAP_DO_RX_POLL. */ #define NR_DO_RX_POLL 0x10000 #define NR_NO_TX_POLL 0x20000 - - uint32_t nr_extra_bufs; /* number of requested extra buffers */ }; /* Valid values for nmreq_register.nr_mode (see above). */ @@ -576,10 +602,11 @@ NR_REG_ONE_NIC = 4, NR_REG_PIPE_MASTER = 5, /* deprecated, use "x{y" port name syntax */ NR_REG_PIPE_SLAVE = 6, /* deprecated, use "x}y" port name syntax */ + NR_REG_NULL = 7, }; /* A single ioctl number is shared by all the new API command. - * Demultiplexing is done using the nr_hdr.nr_reqtype field. + * Demultiplexing is done using the hdr.nr_reqtype field. * FreeBSD uses the size value embedded in the _IOWR to determine * how much to copy in/out, so we define the ioctl() command * specifying only nmreq_header, and copyin/copyout the rest. */ @@ -595,16 +622,18 @@ /* * nr_reqtype: NETMAP_REQ_PORT_INFO_GET * Get information about a netmap port, including number of rings. - * slots per ring, id of the memory allocator, etc. + * slots per ring, id of the memory allocator, etc. The netmap + * control device used for this operation does not need to be bound + * to a netmap port. */ struct nmreq_port_info_get { - uint64_t nr_offset; /* nifp offset in the shared region */ uint64_t nr_memsize; /* size of the shared region */ uint32_t nr_tx_slots; /* slots in tx rings */ uint32_t nr_rx_slots; /* slots in rx rings */ uint16_t nr_tx_rings; /* number of tx rings */ uint16_t nr_rx_rings; /* number of rx rings */ - uint16_t nr_mem_id; /* id of the memory allocator */ + uint16_t nr_mem_id; /* memory allocator id (in/out) */ + uint16_t pad1; }; #define NM_BDG_NAME "vale" /* prefix for bridge port name */ @@ -620,6 +649,7 @@ struct nmreq_vale_attach { struct nmreq_register reg; uint32_t port_index; + uint32_t pad1; }; /* @@ -630,6 +660,7 @@ */ struct nmreq_vale_detach { uint32_t port_index; + uint32_t pad1; }; /* @@ -639,15 +670,18 @@ struct nmreq_vale_list { /* Name of the VALE port (valeXXX:YYY) or empty. */ uint16_t nr_bridge_idx; + uint16_t pad1; uint32_t nr_port_idx; }; /* * nr_reqtype: NETMAP_REQ_PORT_HDR_SET or NETMAP_REQ_PORT_HDR_GET - * Set the port header length. + * Set or get the port header length of the port identified by hdr.nr_name. + * The control device does not need to be bound to a netmap port. */ struct nmreq_port_hdr { uint32_t nr_hdr_len; + uint32_t pad1; }; /* @@ -660,6 +694,7 @@ uint16_t nr_tx_rings; /* number of tx rings */ uint16_t nr_rx_rings; /* number of rx rings */ uint16_t nr_mem_id; /* id of the memory allocator */ + uint16_t pad1; }; /* @@ -672,17 +707,20 @@ #define NETMAP_POLLING_MODE_MULTI_CPU 2 uint32_t nr_first_cpu_id; uint32_t nr_num_polling_cpus; + uint32_t pad1; }; /* * nr_reqtype: NETMAP_REQ_POOLS_INFO_GET - * Get info about the pools of the memory allocator of the port bound - * to a given netmap control device (used i.e. by a ptnetmap-enabled - * hypervisor). The nr_hdr.nr_name field is ignored. + * Get info about the pools of the memory allocator of the netmap + * port specified by hdr.nr_name and nr_mem_id. The netmap control + * device used for this operation does not need to be bound to a netmap + * port. */ struct nmreq_pools_info { uint64_t nr_memsize; - uint16_t nr_mem_id; + uint16_t nr_mem_id; /* in/out argument */ + uint16_t pad1[3]; uint64_t nr_if_pool_offset; uint32_t nr_if_pool_objtotal; uint32_t nr_if_pool_objsize; @@ -695,13 +733,151 @@ }; /* + * nr_reqtype: NETMAP_REQ_SYNC_KLOOP_START + * Start an in-kernel loop that syncs the rings periodically or on + * notifications. The loop runs in the context of the ioctl syscall, + * and only stops on NETMAP_REQ_SYNC_KLOOP_STOP. + * The registered netmap port must be open in CSB mode. + */ +struct nmreq_sync_kloop_start { + /* Sleeping is the default synchronization method for the kloop. + * The 'sleep_us' field specifies how many microsconds to sleep for + * when there is no work to do, before doing another kloop iteration. + */ + uint32_t sleep_us; + uint32_t pad1; +}; + +/* A CSB entry for the application --> kernel direction. */ +struct nm_csb_atok { + uint32_t head; /* AW+ KR+ the head of the appl netmap_ring */ + uint32_t cur; /* AW+ KR+ the cur of the appl netmap_ring */ + uint32_t appl_need_kick; /* AW+ KR+ kern --> appl notification enable */ + uint32_t sync_flags; /* AW+ KR+ the flags of the appl [tx|rx]sync() */ + uint32_t pad[12]; /* pad to a 64 bytes cacheline */ +}; + +/* A CSB entry for the application <-- kernel direction. */ +struct nm_csb_ktoa { + uint32_t hwcur; /* AR+ KW+ the hwcur of the kern netmap_kring */ + uint32_t hwtail; /* AR+ KW+ the hwtail of the kern netmap_kring */ + uint32_t kern_need_kick; /* AR+ KW+ appl-->kern notification enable */ + uint32_t pad[13]; +}; + +#ifdef __linux__ + +#ifdef __KERNEL__ +#define nm_stst_barrier smp_wmb +#else /* !__KERNEL__ */ +static inline void nm_stst_barrier(void) +{ + /* A memory barrier with release semantic has the combined + * effect of a store-store barrier and a load-store barrier, + * which is fine for us. */ + __atomic_thread_fence(__ATOMIC_RELEASE); +} +#endif /* !__KERNEL__ */ + +#elif defined(__FreeBSD__) + +#ifdef _KERNEL +#define nm_stst_barrier atomic_thread_fence_rel +#else /* !_KERNEL */ +static inline void nm_stst_barrier(void) +{ + __atomic_thread_fence(__ATOMIC_RELEASE); +} +#endif /* !_KERNEL */ + +#else /* !__linux__ && !__FreeBSD__ */ +#error "OS not supported" +#endif /* !__linux__ && !__FreeBSD__ */ + +/* Application side of sync-kloop: Write ring pointers (cur, head) to the CSB. + * This routine is coupled with sync_kloop_kernel_read(). */ +static inline void +nm_sync_kloop_appl_write(struct nm_csb_atok *atok, uint32_t cur, + uint32_t head) +{ + /* + * We need to write cur and head to the CSB but we cannot do it atomically. + * There is no way we can prevent the host from reading the updated value + * of one of the two and the old value of the other. However, if we make + * sure that the host never reads a value of head more recent than the + * value of cur we are safe. We can allow the host to read a value of cur + * more recent than the value of head, since in the netmap ring cur can be + * ahead of head and cur cannot wrap around head because it must be behind + * tail. Inverting the order of writes below could instead result into the + * host to think head went ahead of cur, which would cause the sync + * prologue to fail. + * + * The following memory barrier scheme is used to make this happen: + * + * Guest Host + * + * STORE(cur) LOAD(head) + * mb() <-----------> mb() + * STORE(head) LOAD(cur) + * + */ + atok->cur = cur; + nm_stst_barrier(); + atok->head = head; +} + +/* Application side of sync-kloop: Read kring pointers (hwcur, hwtail) from + * the CSB. This routine is coupled with sync_kloop_kernel_write(). */ +static inline void +nm_sync_kloop_appl_read(struct nm_csb_ktoa *ktoa, uint32_t *hwtail, + uint32_t *hwcur) +{ + /* + * We place a memory barrier to make sure that the update of hwtail never + * overtakes the update of hwcur. + * (see explanation in sync_kloop_kernel_write). + */ + *hwtail = ktoa->hwtail; + nm_stst_barrier(); + *hwcur = ktoa->hwcur; +} + +/* * data for NETMAP_REQ_OPT_* options */ +struct nmreq_opt_sync_kloop_eventfds { + struct nmreq_option nro_opt; /* common header */ + /* An array of N entries for bidirectional notifications between + * the kernel loop and the application. The number of entries and + * their order must agree with the CSB arrays passed in the + * NETMAP_REQ_OPT_CSB option. Each entry contains a file descriptor + * backed by an eventfd. + */ + struct { + /* Notifier for the application --> kernel loop direction. */ + int32_t ioeventfd; + /* Notifier for the kernel loop --> application direction. */ + int32_t irqfd; + } eventfds[0]; +}; + struct nmreq_opt_extmem { struct nmreq_option nro_opt; /* common header */ uint64_t nro_usrptr; /* (in) ptr to usr memory */ struct nmreq_pools_info nro_info; /* (in/out) */ +}; + +struct nmreq_opt_csb { + struct nmreq_option nro_opt; + + /* Array of CSB entries for application --> kernel communication + * (N entries). */ + uint64_t csb_atok; + + /* Array of CSB entries for kernel --> application communication + * (N entries). */ + uint64_t csb_ktoa; }; #endif /* _NET_NETMAP_H_ */ Index: head/sys/net/netmap_user.h =================================================================== --- head/sys/net/netmap_user.h +++ head/sys/net/netmap_user.h @@ -138,11 +138,12 @@ return nm_ring_next(r, r->tail) != r->head; } - +/* Compute the number of slots available in the netmap ring. We use + * ring->head as explained in the comment above nm_ring_empty(). */ static inline uint32_t nm_ring_space(struct netmap_ring *ring) { - int ret = ring->tail - ring->cur; + int ret = ring->tail - ring->head; if (ret < 0) ret += ring->num_slots; return ret; @@ -1091,18 +1092,36 @@ ring = NETMAP_RXRING(d->nifp, ri); for ( ; !nm_ring_empty(ring) && cnt != got; got++) { u_int idx, i; + u_char *oldbuf; + struct netmap_slot *slot; if (d->hdr.buf) { /* from previous round */ cb(arg, &d->hdr, d->hdr.buf); } i = ring->cur; - idx = ring->slot[i].buf_idx; + slot = &ring->slot[i]; + idx = slot->buf_idx; /* d->cur_rx_ring doesn't change inside this loop, but * set it here, so it reflects d->hdr.buf's ring */ d->cur_rx_ring = ri; - d->hdr.slot = &ring->slot[i]; - d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx); + d->hdr.slot = slot; + oldbuf = d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx); // __builtin_prefetch(buf); - d->hdr.len = d->hdr.caplen = ring->slot[i].len; + d->hdr.len = d->hdr.caplen = slot->len; + while (slot->flags & NS_MOREFRAG) { + u_char *nbuf; + u_int oldlen = slot->len; + i = nm_ring_next(ring, i); + slot = &ring->slot[i]; + d->hdr.len += slot->len; + nbuf = (u_char *)NETMAP_BUF(ring, slot->buf_idx); + if (oldbuf != NULL && nbuf - oldbuf == ring->nr_buf_size && + oldlen == ring->nr_buf_size) { + d->hdr.caplen += slot->len; + oldbuf = nbuf; + } else { + oldbuf = NULL; + } + } d->hdr.ts = ring->ts; ring->head = ring->cur = nm_ring_next(ring, i); } Index: head/sys/net/netmap_virt.h =================================================================== --- head/sys/net/netmap_virt.h +++ head/sys/net/netmap_virt.h @@ -1,7 +1,7 @@ /* * Copyright (C) 2013-2016 Luigi Rizzo * Copyright (C) 2013-2016 Giuseppe Lettieri - * Copyright (C) 2013-2016 Vincenzo Maffione + * Copyright (C) 2013-2018 Vincenzo Maffione * Copyright (C) 2015 Stefano Garzarella * All rights reserved. * @@ -33,14 +33,15 @@ #define NETMAP_VIRT_H /* - * ptnetmap_memdev: device used to expose memory into the guest VM + * Register offsets and other macros for the ptnetmap paravirtual devices: + * ptnetmap-memdev: device used to expose memory into the guest + * ptnet: paravirtualized NIC exposing a netmap port in the guest * * These macros are used in the hypervisor frontend (QEMU, bhyve) and in the * guest device driver. */ -/* PCI identifiers and PCI BARs for the ptnetmap memdev - * and ptnetmap network interface. */ +/* PCI identifiers and PCI BARs for ptnetmap-memdev and ptnet. */ #define PTNETMAP_MEMDEV_NAME "ptnetmap-memdev" #define PTNETMAP_PCI_VENDOR_ID 0x1b36 /* QEMU virtual devices */ #define PTNETMAP_PCI_DEVICE_ID 0x000c /* memory device */ @@ -49,7 +50,7 @@ #define PTNETMAP_MEM_PCI_BAR 1 #define PTNETMAP_MSIX_PCI_BAR 2 -/* Registers for the ptnetmap memdev */ +/* Device registers for ptnetmap-memdev */ #define PTNET_MDEV_IO_MEMSIZE_LO 0 /* netmap memory size (low) */ #define PTNET_MDEV_IO_MEMSIZE_HI 4 /* netmap_memory_size (high) */ #define PTNET_MDEV_IO_MEMID 8 /* memory allocator ID in the host */ @@ -64,74 +65,10 @@ #define PTNET_MDEV_IO_BUF_POOL_OBJSZ 96 #define PTNET_MDEV_IO_END 100 -/* - * ptnetmap configuration - * - * The ptnet kthreads (running in host kernel-space) need to be configured - * in order to know how to intercept guest kicks (I/O register writes) and - * how to inject MSI-X interrupts to the guest. The configuration may vary - * depending on the hypervisor. Currently, we support QEMU/KVM on Linux and - * and bhyve on FreeBSD. - * The configuration is passed by the hypervisor to the host netmap module - * by means of an ioctl() with nr_cmd=NETMAP_PT_HOST_CREATE, and it is - * specified by the ptnetmap_cfg struct. This struct contains an header - * with general informations and an array of entries whose size depends - * on the hypervisor. The NETMAP_PT_HOST_CREATE command is issued every - * time the kthreads are started. - */ -struct ptnetmap_cfg { -#define PTNETMAP_CFGTYPE_QEMU 0x1 -#define PTNETMAP_CFGTYPE_BHYVE 0x2 - uint16_t cfgtype; /* how to interpret the cfg entries */ - uint16_t entry_size; /* size of a config entry */ - uint32_t num_rings; /* number of config entries */ - void *csb_gh; /* CSB for guest --> host communication */ - void *csb_hg; /* CSB for host --> guest communication */ - /* Configuration entries are allocated right after the struct. */ -}; - -/* Configuration of a ptnetmap ring for QEMU. */ -struct ptnetmap_cfgentry_qemu { - uint32_t ioeventfd; /* to intercept guest register access */ - uint32_t irqfd; /* to inject guest interrupts */ -}; - -/* Configuration of a ptnetmap ring for bhyve. */ -struct ptnetmap_cfgentry_bhyve { - uint64_t wchan; /* tsleep() parameter, to wake up kthread */ - uint32_t ioctl_fd; /* ioctl fd */ - /* ioctl parameters to send irq */ - uint32_t ioctl_cmd; - /* vmm.ko MSIX parameters for IOCTL */ - struct { - uint64_t msg_data; - uint64_t addr; - } ioctl_data; -}; - -/* - * Pass a pointer to a userspace buffer to be passed to kernelspace for write - * or read. Used by NETMAP_PT_HOST_CREATE. - * XXX deprecated - */ -static inline void -nmreq_pointer_put(struct nmreq *nmr, void *userptr) -{ - uintptr_t *pp = (uintptr_t *)&nmr->nr_arg1; - *pp = (uintptr_t)userptr; -} - -static inline void * -nmreq_pointer_get(const struct nmreq *nmr) -{ - const uintptr_t *pp = (const uintptr_t *)&nmr->nr_arg1; - return (void *)*pp; -} - /* ptnetmap features */ #define PTNETMAP_F_VNET_HDR 1 -/* I/O registers for the ptnet device. */ +/* Device registers for the ptnet network device. */ #define PTNET_IO_PTFEAT 0 #define PTNET_IO_PTCTL 4 #define PTNET_IO_MAC_LO 8 @@ -153,140 +90,11 @@ #define PTNET_IO_KICK_BASE 128 #define PTNET_IO_MASK 0xff -/* ptnetmap control commands (values for PTCTL register) */ +/* ptnet control commands (values for PTCTL register): + * - CREATE starts the host sync-kloop + * - DELETE stops the host sync-kloop + */ #define PTNETMAP_PTCTL_CREATE 1 #define PTNETMAP_PTCTL_DELETE 2 - -/* ptnetmap synchronization variables shared between guest and host */ -struct ptnet_csb_gh { - uint32_t head; /* GW+ HR+ the head of the guest netmap_ring */ - uint32_t cur; /* GW+ HR+ the cur of the guest netmap_ring */ - uint32_t guest_need_kick; /* GW+ HR+ host-->guest notification enable */ - uint32_t sync_flags; /* GW+ HR+ the flags of the guest [tx|rx]sync() */ - char pad[48]; /* pad to a 64 bytes cacheline */ -}; -struct ptnet_csb_hg { - uint32_t hwcur; /* GR+ HW+ the hwcur of the host netmap_kring */ - uint32_t hwtail; /* GR+ HW+ the hwtail of the host netmap_kring */ - uint32_t host_need_kick; /* GR+ HW+ guest-->host notification enable */ - char pad[4+48]; -}; - -#ifdef WITH_PTNETMAP_GUEST - -/* ptnetmap_memdev routines used to talk with ptnetmap_memdev device driver */ -struct ptnetmap_memdev; -int nm_os_pt_memdev_iomap(struct ptnetmap_memdev *, vm_paddr_t *, void **, - uint64_t *); -void nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *); -uint32_t nm_os_pt_memdev_ioread(struct ptnetmap_memdev *, unsigned int); - -/* Guest driver: Write kring pointers (cur, head) to the CSB. - * This routine is coupled with ptnetmap_host_read_kring_csb(). */ -static inline void -ptnetmap_guest_write_kring_csb(struct ptnet_csb_gh *ptr, uint32_t cur, - uint32_t head) -{ - /* - * We need to write cur and head to the CSB but we cannot do it atomically. - * There is no way we can prevent the host from reading the updated value - * of one of the two and the old value of the other. However, if we make - * sure that the host never reads a value of head more recent than the - * value of cur we are safe. We can allow the host to read a value of cur - * more recent than the value of head, since in the netmap ring cur can be - * ahead of head and cur cannot wrap around head because it must be behind - * tail. Inverting the order of writes below could instead result into the - * host to think head went ahead of cur, which would cause the sync - * prologue to fail. - * - * The following memory barrier scheme is used to make this happen: - * - * Guest Host - * - * STORE(cur) LOAD(head) - * mb() <-----------> mb() - * STORE(head) LOAD(cur) - */ - ptr->cur = cur; - mb(); - ptr->head = head; -} - -/* Guest driver: Read kring pointers (hwcur, hwtail) from the CSB. - * This routine is coupled with ptnetmap_host_write_kring_csb(). */ -static inline void -ptnetmap_guest_read_kring_csb(struct ptnet_csb_hg *pthg, struct netmap_kring *kring) -{ - /* - * We place a memory barrier to make sure that the update of hwtail never - * overtakes the update of hwcur. - * (see explanation in ptnetmap_host_write_kring_csb). - */ - kring->nr_hwtail = pthg->hwtail; - mb(); - kring->nr_hwcur = pthg->hwcur; -} - -#endif /* WITH_PTNETMAP_GUEST */ - -#ifdef WITH_PTNETMAP_HOST -/* - * ptnetmap kernel thread routines - * */ - -/* Functions to read and write CSB fields in the host */ -#if defined (linux) -#define CSB_READ(csb, field, r) (get_user(r, &csb->field)) -#define CSB_WRITE(csb, field, v) (put_user(v, &csb->field)) -#else /* ! linux */ -#define CSB_READ(csb, field, r) (r = fuword32(&csb->field)) -#define CSB_WRITE(csb, field, v) (suword32(&csb->field, v)) -#endif /* ! linux */ - -/* Host netmap: Write kring pointers (hwcur, hwtail) to the CSB. - * This routine is coupled with ptnetmap_guest_read_kring_csb(). */ -static inline void -ptnetmap_host_write_kring_csb(struct ptnet_csb_hg __user *ptr, uint32_t hwcur, - uint32_t hwtail) -{ - /* - * The same scheme used in ptnetmap_guest_write_kring_csb() applies here. - * We allow the guest to read a value of hwcur more recent than the value - * of hwtail, since this would anyway result in a consistent view of the - * ring state (and hwcur can never wraparound hwtail, since hwcur must be - * behind head). - * - * The following memory barrier scheme is used to make this happen: - * - * Guest Host - * - * STORE(hwcur) LOAD(hwtail) - * mb() <-------------> mb() - * STORE(hwtail) LOAD(hwcur) - */ - CSB_WRITE(ptr, hwcur, hwcur); - mb(); - CSB_WRITE(ptr, hwtail, hwtail); -} - -/* Host netmap: Read kring pointers (head, cur, sync_flags) from the CSB. - * This routine is coupled with ptnetmap_guest_write_kring_csb(). */ -static inline void -ptnetmap_host_read_kring_csb(struct ptnet_csb_gh __user *ptr, - struct netmap_ring *shadow_ring, - uint32_t num_slots) -{ - /* - * We place a memory barrier to make sure that the update of head never - * overtakes the update of cur. - * (see explanation in ptnetmap_guest_write_kring_csb). - */ - CSB_READ(ptr, head, shadow_ring->head); - mb(); - CSB_READ(ptr, cur, shadow_ring->cur); - CSB_READ(ptr, sync_flags, shadow_ring->flags); -} - -#endif /* WITH_PTNETMAP_HOST */ #endif /* NETMAP_VIRT_H */