Page MenuHomeFreeBSD

D18015.diff
No OneTemporary

D18015.diff

Index: head/sys/conf/files
===================================================================
--- head/sys/conf/files
+++ head/sys/conf/files
@@ -2517,17 +2517,19 @@
dev/nand/nfc_if.m optional nand
dev/netmap/if_ptnet.c optional netmap inet
dev/netmap/netmap.c optional netmap
+dev/netmap/netmap_bdg.c optional netmap
dev/netmap/netmap_freebsd.c optional netmap
dev/netmap/netmap_generic.c optional netmap
+dev/netmap/netmap_kloop.c optional netmap
+dev/netmap/netmap_legacy.c optional netmap
dev/netmap/netmap_mbq.c optional netmap
dev/netmap/netmap_mem2.c optional netmap
dev/netmap/netmap_monitor.c optional netmap
+dev/netmap/netmap_null.c optional netmap
dev/netmap/netmap_offloadings.c optional netmap
dev/netmap/netmap_pipe.c optional netmap
dev/netmap/netmap_pt.c optional netmap
dev/netmap/netmap_vale.c optional netmap
-dev/netmap/netmap_legacy.c optional netmap
-dev/netmap/netmap_bdg.c optional netmap
# compile-with "${NORMAL_C} -Wconversion -Wextra"
dev/nfsmb/nfsmb.c optional nfsmb pci
dev/nge/if_nge.c optional nge
Index: head/sys/dev/netmap/if_ixl_netmap.h
===================================================================
--- head/sys/dev/netmap/if_ixl_netmap.h
+++ head/sys/dev/netmap/if_ixl_netmap.h
@@ -129,7 +129,7 @@
na.ifp = vsi->ifp;
na.na_flags = NAF_BDG_MAYSLEEP;
// XXX check that queues is set.
- nm_prinf("queues is %p\n", vsi->queues);
+ nm_prinf("queues is %p", vsi->queues);
if (vsi->queues) {
na.num_tx_desc = vsi->queues[0].num_desc;
na.num_rx_desc = vsi->queues[0].num_desc;
Index: head/sys/dev/netmap/if_ptnet.c
===================================================================
--- head/sys/dev/netmap/if_ptnet.c
+++ head/sys/dev/netmap/if_ptnet.c
@@ -128,8 +128,8 @@
struct resource *irq;
void *cookie;
int kring_id;
- struct ptnet_csb_gh *ptgh;
- struct ptnet_csb_hg *pthg;
+ struct nm_csb_atok *atok;
+ struct nm_csb_ktoa *ktoa;
unsigned int kick;
struct mtx lock;
struct buf_ring *bufring; /* for TX queues */
@@ -166,8 +166,8 @@
unsigned int num_tx_rings;
struct ptnet_queue *queues;
struct ptnet_queue *rxqueues;
- struct ptnet_csb_gh *csb_gh;
- struct ptnet_csb_hg *csb_hg;
+ struct nm_csb_atok *csb_gh;
+ struct nm_csb_ktoa *csb_hg;
unsigned int min_tx_space;
@@ -209,7 +209,7 @@
static int ptnet_irqs_init(struct ptnet_softc *sc);
static void ptnet_irqs_fini(struct ptnet_softc *sc);
-static uint32_t ptnet_nm_ptctl(if_t ifp, uint32_t cmd);
+static uint32_t ptnet_nm_ptctl(struct ptnet_softc *sc, uint32_t cmd);
static int ptnet_nm_config(struct netmap_adapter *na,
struct nm_config_info *info);
static void ptnet_update_vnet_hdr(struct ptnet_softc *sc);
@@ -327,7 +327,7 @@
sc->num_rings = num_tx_rings + num_rx_rings;
sc->num_tx_rings = num_tx_rings;
- if (sc->num_rings * sizeof(struct ptnet_csb_gh) > PAGE_SIZE) {
+ if (sc->num_rings * sizeof(struct nm_csb_atok) > PAGE_SIZE) {
device_printf(dev, "CSB cannot handle that many rings (%u)\n",
sc->num_rings);
err = ENOMEM;
@@ -342,7 +342,7 @@
err = ENOMEM;
goto err_path;
}
- sc->csb_hg = (struct ptnet_csb_hg *)(((char *)sc->csb_gh) + PAGE_SIZE);
+ sc->csb_hg = (struct nm_csb_ktoa *)(((char *)sc->csb_gh) + PAGE_SIZE);
{
/*
@@ -379,8 +379,8 @@
pq->sc = sc;
pq->kring_id = i;
pq->kick = PTNET_IO_KICK_BASE + 4 * i;
- pq->ptgh = sc->csb_gh + i;
- pq->pthg = sc->csb_hg + i;
+ pq->atok = sc->csb_gh + i;
+ pq->ktoa = sc->csb_hg + i;
snprintf(pq->lock_name, sizeof(pq->lock_name), "%s-%d",
device_get_nameunit(dev), i);
mtx_init(&pq->lock, pq->lock_name, NULL, MTX_DEF);
@@ -505,12 +505,25 @@
return err;
}
+/* Stop host sync-kloop if it was running. */
+static void
+ptnet_device_shutdown(struct ptnet_softc *sc)
+{
+ ptnet_nm_ptctl(sc, PTNETMAP_PTCTL_DELETE);
+ bus_write_4(sc->iomem, PTNET_IO_CSB_GH_BAH, 0);
+ bus_write_4(sc->iomem, PTNET_IO_CSB_GH_BAL, 0);
+ bus_write_4(sc->iomem, PTNET_IO_CSB_HG_BAH, 0);
+ bus_write_4(sc->iomem, PTNET_IO_CSB_HG_BAL, 0);
+}
+
static int
ptnet_detach(device_t dev)
{
struct ptnet_softc *sc = device_get_softc(dev);
int i;
+ ptnet_device_shutdown(sc);
+
#ifdef DEVICE_POLLING
if (sc->ifp->if_capenable & IFCAP_POLLING) {
ether_poll_deregister(sc->ifp);
@@ -543,10 +556,6 @@
ptnet_irqs_fini(sc);
if (sc->csb_gh) {
- bus_write_4(sc->iomem, PTNET_IO_CSB_GH_BAH, 0);
- bus_write_4(sc->iomem, PTNET_IO_CSB_GH_BAL, 0);
- bus_write_4(sc->iomem, PTNET_IO_CSB_HG_BAH, 0);
- bus_write_4(sc->iomem, PTNET_IO_CSB_HG_BAL, 0);
contigfree(sc->csb_gh, 2*PAGE_SIZE, M_DEVBUF);
sc->csb_gh = NULL;
sc->csb_hg = NULL;
@@ -583,9 +592,8 @@
static int
ptnet_suspend(device_t dev)
{
- struct ptnet_softc *sc;
+ struct ptnet_softc *sc = device_get_softc(dev);
- sc = device_get_softc(dev);
(void)sc;
return (0);
@@ -594,9 +602,8 @@
static int
ptnet_resume(device_t dev)
{
- struct ptnet_softc *sc;
+ struct ptnet_softc *sc = device_get_softc(dev);
- sc = device_get_softc(dev);
(void)sc;
return (0);
@@ -605,11 +612,11 @@
static int
ptnet_shutdown(device_t dev)
{
- /*
- * Suspend already does all of what we need to
- * do here; we just never expect to be resumed.
- */
- return (ptnet_suspend(dev));
+ struct ptnet_softc *sc = device_get_softc(dev);
+
+ ptnet_device_shutdown(sc);
+
+ return (0);
}
static int
@@ -796,7 +803,7 @@
/* Make sure the worker sees the
* IFF_DRV_RUNNING down. */
PTNET_Q_LOCK(pq);
- pq->ptgh->guest_need_kick = 0;
+ pq->atok->appl_need_kick = 0;
PTNET_Q_UNLOCK(pq);
/* Wait for rescheduling to finish. */
if (pq->taskq) {
@@ -810,7 +817,7 @@
for (i = 0; i < sc->num_rings; i++) {
pq = sc-> queues + i;
PTNET_Q_LOCK(pq);
- pq->ptgh->guest_need_kick = 1;
+ pq->atok->appl_need_kick = 1;
PTNET_Q_UNLOCK(pq);
}
}
@@ -881,7 +888,7 @@
return ret;
}
- if (sc->ptna->backend_regifs == 0) {
+ if (sc->ptna->backend_users == 0) {
ret = ptnet_nm_krings_create(na_nm);
if (ret) {
device_printf(sc->dev, "ptnet_nm_krings_create() "
@@ -962,7 +969,7 @@
ptnet_nm_register(na_dr, 0 /* off */);
- if (sc->ptna->backend_regifs == 0) {
+ if (sc->ptna->backend_users == 0) {
netmap_mem_rings_delete(na_dr);
ptnet_nm_krings_delete(na_nm);
}
@@ -1092,9 +1099,8 @@
}
static uint32_t
-ptnet_nm_ptctl(if_t ifp, uint32_t cmd)
+ptnet_nm_ptctl(struct ptnet_softc *sc, uint32_t cmd)
{
- struct ptnet_softc *sc = if_getsoftc(ifp);
/*
* Write a command and read back error status,
* with zero meaning success.
@@ -1130,8 +1136,8 @@
/* Sync krings from the host, reading from
* CSB. */
for (i = 0; i < sc->num_rings; i++) {
- struct ptnet_csb_gh *ptgh = sc->queues[i].ptgh;
- struct ptnet_csb_hg *pthg = sc->queues[i].pthg;
+ struct nm_csb_atok *atok = sc->queues[i].atok;
+ struct nm_csb_ktoa *ktoa = sc->queues[i].ktoa;
struct netmap_kring *kring;
if (i < na->num_tx_rings) {
@@ -1139,15 +1145,15 @@
} else {
kring = na->rx_rings[i - na->num_tx_rings];
}
- kring->rhead = kring->ring->head = ptgh->head;
- kring->rcur = kring->ring->cur = ptgh->cur;
- kring->nr_hwcur = pthg->hwcur;
+ kring->rhead = kring->ring->head = atok->head;
+ kring->rcur = kring->ring->cur = atok->cur;
+ kring->nr_hwcur = ktoa->hwcur;
kring->nr_hwtail = kring->rtail =
- kring->ring->tail = pthg->hwtail;
+ kring->ring->tail = ktoa->hwtail;
ND("%d,%d: csb {hc %u h %u c %u ht %u}", t, i,
- pthg->hwcur, ptgh->head, ptgh->cur,
- pthg->hwtail);
+ ktoa->hwcur, atok->head, atok->cur,
+ ktoa->hwtail);
ND("%d,%d: kring {hc %u rh %u rc %u h %u c %u ht %u rt %u t %u}",
t, i, kring->nr_hwcur, kring->rhead, kring->rcur,
kring->ring->head, kring->ring->cur, kring->nr_hwtail,
@@ -1178,7 +1184,7 @@
int i;
if (!onoff) {
- sc->ptna->backend_regifs--;
+ sc->ptna->backend_users--;
}
/* If this is the last netmap client, guest interrupt enable flags may
@@ -1191,17 +1197,17 @@
D("Exit netmap mode, re-enable interrupts");
for (i = 0; i < sc->num_rings; i++) {
pq = sc->queues + i;
- pq->ptgh->guest_need_kick = 1;
+ pq->atok->appl_need_kick = 1;
}
}
if (onoff) {
- if (sc->ptna->backend_regifs == 0) {
+ if (sc->ptna->backend_users == 0) {
/* Initialize notification enable fields in the CSB. */
for (i = 0; i < sc->num_rings; i++) {
pq = sc->queues + i;
- pq->pthg->host_need_kick = 1;
- pq->ptgh->guest_need_kick =
+ pq->ktoa->kern_need_kick = 1;
+ pq->atok->appl_need_kick =
(!(ifp->if_capenable & IFCAP_POLLING)
&& i >= sc->num_tx_rings);
}
@@ -1211,17 +1217,13 @@
/* Make sure the host adapter passed through is ready
* for txsync/rxsync. */
- ret = ptnet_nm_ptctl(ifp, PTNETMAP_PTCTL_CREATE);
+ ret = ptnet_nm_ptctl(sc, PTNETMAP_PTCTL_CREATE);
if (ret) {
return ret;
}
- }
- /* Sync from CSB must be done after REGIF PTCTL. Skip this
- * step only if this is a netmap client and it is not the
- * first one. */
- if ((!native && sc->ptna->backend_regifs == 0) ||
- (native && na->active_fds == 0)) {
+ /* Align the guest krings and rings to the state stored
+ * in the CSB. */
ptnet_sync_from_csb(sc, na);
}
@@ -1254,19 +1256,13 @@
}
}
- /* Sync from CSB must be done before UNREGIF PTCTL, on the last
- * netmap client. */
- if (native && na->active_fds == 0) {
- ptnet_sync_from_csb(sc, na);
+ if (sc->ptna->backend_users == 0) {
+ ret = ptnet_nm_ptctl(sc, PTNETMAP_PTCTL_DELETE);
}
-
- if (sc->ptna->backend_regifs == 0) {
- ret = ptnet_nm_ptctl(ifp, PTNETMAP_PTCTL_DELETE);
- }
}
if (onoff) {
- sc->ptna->backend_regifs++;
+ sc->ptna->backend_users++;
}
return ret;
@@ -1279,7 +1275,7 @@
struct ptnet_queue *pq = sc->queues + kring->ring_id;
bool notify;
- notify = netmap_pt_guest_txsync(pq->ptgh, pq->pthg, kring, flags);
+ notify = netmap_pt_guest_txsync(pq->atok, pq->ktoa, kring, flags);
if (notify) {
ptnet_kick(pq);
}
@@ -1294,7 +1290,7 @@
struct ptnet_queue *pq = sc->rxqueues + kring->ring_id;
bool notify;
- notify = netmap_pt_guest_rxsync(pq->ptgh, pq->pthg, kring, flags);
+ notify = netmap_pt_guest_rxsync(pq->atok, pq->ktoa, kring, flags);
if (notify) {
ptnet_kick(pq);
}
@@ -1310,7 +1306,7 @@
for (i = 0; i < sc->num_rings; i++) {
struct ptnet_queue *pq = sc->queues + i;
- pq->ptgh->guest_need_kick = onoff;
+ pq->atok->appl_need_kick = onoff;
}
}
@@ -1676,25 +1672,13 @@
}
/* End of offloading-related functions to be shared with vtnet. */
-static inline void
-ptnet_sync_tail(struct ptnet_csb_hg *pthg, struct netmap_kring *kring)
-{
- struct netmap_ring *ring = kring->ring;
-
- /* Update hwcur and hwtail as known by the host. */
- ptnetmap_guest_read_kring_csb(pthg, kring);
-
- /* nm_sync_finalize */
- ring->tail = kring->rtail = kring->nr_hwtail;
-}
-
static void
ptnet_ring_update(struct ptnet_queue *pq, struct netmap_kring *kring,
unsigned int head, unsigned int sync_flags)
{
struct netmap_ring *ring = kring->ring;
- struct ptnet_csb_gh *ptgh = pq->ptgh;
- struct ptnet_csb_hg *pthg = pq->pthg;
+ struct nm_csb_atok *atok = pq->atok;
+ struct nm_csb_ktoa *ktoa = pq->ktoa;
/* Some packets have been pushed to the netmap ring. We have
* to tell the host to process the new packets, updating cur
@@ -1704,11 +1688,11 @@
/* Mimic nm_txsync_prologue/nm_rxsync_prologue. */
kring->rcur = kring->rhead = head;
- ptnetmap_guest_write_kring_csb(ptgh, kring->rcur, kring->rhead);
+ ptnetmap_guest_write_kring_csb(atok, kring->rcur, kring->rhead);
/* Kick the host if needed. */
- if (NM_ACCESS_ONCE(pthg->host_need_kick)) {
- ptgh->sync_flags = sync_flags;
+ if (NM_ACCESS_ONCE(ktoa->kern_need_kick)) {
+ atok->sync_flags = sync_flags;
ptnet_kick(pq);
}
}
@@ -1728,8 +1712,8 @@
struct netmap_adapter *na = &sc->ptna->dr.up;
if_t ifp = sc->ifp;
unsigned int batch_count = 0;
- struct ptnet_csb_gh *ptgh;
- struct ptnet_csb_hg *pthg;
+ struct nm_csb_atok *atok;
+ struct nm_csb_ktoa *ktoa;
struct netmap_kring *kring;
struct netmap_ring *ring;
struct netmap_slot *slot;
@@ -1758,8 +1742,8 @@
return ENETDOWN;
}
- ptgh = pq->ptgh;
- pthg = pq->pthg;
+ atok = pq->atok;
+ ktoa = pq->ktoa;
kring = na->tx_rings[pq->kring_id];
ring = kring->ring;
lim = kring->nkr_num_slots - 1;
@@ -1771,17 +1755,17 @@
/* We ran out of slot, let's see if the host has
* freed up some, by reading hwcur and hwtail from
* the CSB. */
- ptnet_sync_tail(pthg, kring);
+ ptnet_sync_tail(ktoa, kring);
if (PTNET_TX_NOSPACE(head, kring, minspace)) {
/* Still no slots available. Reactivate the
* interrupts so that we can be notified
* when some free slots are made available by
* the host. */
- ptgh->guest_need_kick = 1;
+ atok->appl_need_kick = 1;
/* Double-check. */
- ptnet_sync_tail(pthg, kring);
+ ptnet_sync_tail(ktoa, kring);
if (likely(PTNET_TX_NOSPACE(head, kring,
minspace))) {
break;
@@ -1790,7 +1774,7 @@
RD(1, "Found more slots by doublecheck");
/* More slots were freed before reactivating
* the interrupts. */
- ptgh->guest_need_kick = 0;
+ atok->appl_need_kick = 0;
}
}
@@ -2020,8 +2004,8 @@
{
struct ptnet_softc *sc = pq->sc;
bool have_vnet_hdr = sc->vnet_hdr_len;
- struct ptnet_csb_gh *ptgh = pq->ptgh;
- struct ptnet_csb_hg *pthg = pq->pthg;
+ struct nm_csb_atok *atok = pq->atok;
+ struct nm_csb_ktoa *ktoa = pq->ktoa;
struct netmap_adapter *na = &sc->ptna->dr.up;
struct netmap_kring *kring = na->rx_rings[pq->kring_id];
struct netmap_ring *ring = kring->ring;
@@ -2053,21 +2037,21 @@
/* We ran out of slot, let's see if the host has
* added some, by reading hwcur and hwtail from
* the CSB. */
- ptnet_sync_tail(pthg, kring);
+ ptnet_sync_tail(ktoa, kring);
if (head == ring->tail) {
/* Still no slots available. Reactivate
* interrupts as they were disabled by the
* host thread right before issuing the
* last interrupt. */
- ptgh->guest_need_kick = 1;
+ atok->appl_need_kick = 1;
/* Double-check. */
- ptnet_sync_tail(pthg, kring);
+ ptnet_sync_tail(ktoa, kring);
if (likely(head == ring->tail)) {
break;
}
- ptgh->guest_need_kick = 0;
+ atok->appl_need_kick = 0;
}
}
Index: head/sys/dev/netmap/if_vtnet_netmap.h
===================================================================
--- head/sys/dev/netmap/if_vtnet_netmap.h
+++ head/sys/dev/netmap/if_vtnet_netmap.h
@@ -79,7 +79,7 @@
}
if (deq)
- nm_prinf("%d sgs dequeued from %s-%d (netmap=%d)\n",
+ nm_prinf("%d sgs dequeued from %s-%d (netmap=%d)",
deq, nm_txrx2str(t), idx, netmap_bufs);
}
@@ -230,7 +230,7 @@
/*writeable=*/0);
if (unlikely(err)) {
if (err != ENOSPC)
- nm_prerr("virtqueue_enqueue(%s) failed: %d\n",
+ nm_prerr("virtqueue_enqueue(%s) failed: %d",
kring->name, err);
break;
}
@@ -251,7 +251,7 @@
if (token == NULL)
break;
if (unlikely(token != (void *)txq))
- nm_prerr("BUG: TX token mismatch\n");
+ nm_prerr("BUG: TX token mismatch");
else
n++;
}
@@ -307,7 +307,7 @@
/*readable=*/0, /*writeable=*/sg.sg_nseg);
if (unlikely(err)) {
if (err != ENOSPC)
- nm_prerr("virtqueue_enqueue(%s) failed: %d\n",
+ nm_prerr("virtqueue_enqueue(%s) failed: %d",
kring->name, err);
break;
}
@@ -391,7 +391,7 @@
break;
}
if (unlikely(token != (void *)rxq)) {
- nm_prerr("BUG: RX token mismatch\n");
+ nm_prerr("BUG: RX token mismatch");
} else {
/* Skip the virtio-net header. */
len -= sc->vtnet_hdr_size;
@@ -533,7 +533,7 @@
netmap_attach(&na);
- nm_prinf("vtnet attached txq=%d, txd=%d rxq=%d, rxd=%d\n",
+ nm_prinf("vtnet attached txq=%d, txd=%d rxq=%d, rxd=%d",
na.num_tx_rings, na.num_tx_desc,
na.num_tx_rings, na.num_rx_desc);
}
Index: head/sys/dev/netmap/netmap.c
===================================================================
--- head/sys/dev/netmap/netmap.c
+++ head/sys/dev/netmap/netmap.c
@@ -480,6 +480,9 @@
/* user-controlled variables */
int netmap_verbose;
+#ifdef CONFIG_NETMAP_DEBUG
+int netmap_debug;
+#endif /* CONFIG_NETMAP_DEBUG */
static int netmap_no_timestamp; /* don't timestamp on rxsync */
int netmap_no_pendintr = 1;
@@ -527,9 +530,6 @@
/* Non-zero if ptnet devices are allowed to use virtio-net headers. */
int ptnet_vnet_hdr = 1;
-/* 0 if ptnetmap should not use worker threads for TX processing */
-int ptnetmap_tx_workers = 1;
-
/*
* SYSCTL calls are grouped between SYSBEGIN and SYSEND to be emulated
* in some other operating systems
@@ -540,6 +540,10 @@
SYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW, 0, "Netmap args");
SYSCTL_INT(_dev_netmap, OID_AUTO, verbose,
CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode");
+#ifdef CONFIG_NETMAP_DEBUG
+SYSCTL_INT(_dev_netmap, OID_AUTO, debug,
+ CTLFLAG_RW, &netmap_debug, 0, "Debug messages");
+#endif /* CONFIG_NETMAP_DEBUG */
SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp,
CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp");
SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr, CTLFLAG_RW, &netmap_no_pendintr,
@@ -569,8 +573,6 @@
#endif
SYSCTL_INT(_dev_netmap, OID_AUTO, ptnet_vnet_hdr, CTLFLAG_RW, &ptnet_vnet_hdr,
0, "Allow ptnet devices to use virtio-net headers");
-SYSCTL_INT(_dev_netmap, OID_AUTO, ptnetmap_tx_workers, CTLFLAG_RW,
- &ptnetmap_tx_workers, 0, "Use worker threads for pnetmap TX processing");
SYSEND;
@@ -692,7 +694,7 @@
op = "Clamp";
}
if (op && msg)
- nm_prinf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
+ nm_prinf("%s %s to %d (was %d)", op, msg, *v, oldv);
return *v;
}
@@ -776,13 +778,14 @@
na->num_rx_rings = info.num_rx_rings;
na->num_rx_desc = info.num_rx_descs;
na->rx_buf_maxsize = info.rx_buf_maxsize;
- D("configuration changed for %s: txring %d x %d, "
- "rxring %d x %d, rxbufsz %d",
- na->name, na->num_tx_rings, na->num_tx_desc,
- na->num_rx_rings, na->num_rx_desc, na->rx_buf_maxsize);
+ if (netmap_verbose)
+ nm_prinf("configuration changed for %s: txring %d x %d, "
+ "rxring %d x %d, rxbufsz %d",
+ na->name, na->num_tx_rings, na->num_tx_desc,
+ na->num_rx_rings, na->num_rx_desc, na->rx_buf_maxsize);
return 0;
}
- D("WARNING: configuration changed for %s while active: "
+ nm_prerr("WARNING: configuration changed for %s while active: "
"txring %d x %d, rxring %d x %d, rxbufsz %d",
na->name, info.num_tx_rings, info.num_tx_descs,
info.num_rx_rings, info.num_rx_descs,
@@ -828,7 +831,8 @@
enum txrx t;
if (na->tx_rings != NULL) {
- D("warning: krings were already created");
+ if (netmap_debug & NM_DEBUG_ON)
+ nm_prerr("warning: krings were already created");
return 0;
}
@@ -842,7 +846,7 @@
na->tx_rings = nm_os_malloc((size_t)len);
if (na->tx_rings == NULL) {
- D("Cannot allocate krings");
+ nm_prerr("Cannot allocate krings");
return ENOMEM;
}
na->rx_rings = na->tx_rings + n[NR_TX];
@@ -910,7 +914,8 @@
enum txrx t;
if (na->tx_rings == NULL) {
- D("warning: krings were already deleted");
+ if (netmap_debug & NM_DEBUG_ON)
+ nm_prerr("warning: krings were already deleted");
return;
}
@@ -1012,11 +1017,11 @@
* happens if the close() occurs while a concurrent
* syscall is running.
*/
- if (netmap_verbose)
- D("deleting last instance for %s", na->name);
+ if (netmap_debug & NM_DEBUG_ON)
+ nm_prinf("deleting last instance for %s", na->name);
if (nm_netmap_on(na)) {
- D("BUG: netmap on while going to delete the krings");
+ nm_prerr("BUG: netmap on while going to delete the krings");
}
na->nm_krings_delete(na);
@@ -1033,14 +1038,6 @@
priv->np_nifp = NULL;
}
-/* call with NMG_LOCK held */
-static __inline int
-nm_si_user(struct netmap_priv_d *priv, enum txrx t)
-{
- return (priv->np_na != NULL &&
- (priv->np_qlast[t] - priv->np_qfirst[t] > 1));
-}
-
struct netmap_priv_d*
netmap_priv_new(void)
{
@@ -1136,8 +1133,8 @@
/* Send packets up, outside the lock; head/prev machinery
* is only useful for Windows. */
while ((m = mbq_dequeue(q)) != NULL) {
- if (netmap_verbose & NM_VERB_HOST)
- D("sending up pkt %p size %d", m, MBUF_LEN(m));
+ if (netmap_debug & NM_DEBUG_HOST)
+ nm_prinf("sending up pkt %p size %d", m, MBUF_LEN(m));
prev = nm_os_send_up(dst, m, prev);
if (head == NULL)
head = prev;
@@ -1332,8 +1329,8 @@
m_copydata(m, 0, len, NMB(na, slot));
ND("nm %d len %d", nm_i, len);
- if (netmap_verbose)
- D("%s", nm_dump_buf(NMB(na, slot),len, 128, NULL));
+ if (netmap_debug & NM_DEBUG_HOST)
+ nm_prinf("%s", nm_dump_buf(NMB(na, slot),len, 128, NULL));
slot->len = len;
slot->flags = 0;
@@ -1500,7 +1497,7 @@
if (req->nr_mode == NR_REG_PIPE_MASTER ||
req->nr_mode == NR_REG_PIPE_SLAVE) {
/* Do not accept deprecated pipe modes. */
- D("Deprecated pipe nr_mode, use xx{yy or xx}yy syntax");
+ nm_prerr("Deprecated pipe nr_mode, use xx{yy or xx}yy syntax");
return EINVAL;
}
@@ -1527,9 +1524,7 @@
* 0 !NULL type matches and na created/found
* !0 !NULL impossible
*/
-
- /* try to see if this is a ptnetmap port */
- error = netmap_get_pt_host_na(hdr, na, nmd, create);
+ error = netmap_get_null_na(hdr, na, nmd, create);
if (error || *na != NULL)
goto out;
@@ -1739,7 +1734,7 @@
/*
* Error routine called when txsync/rxsync detects an error.
- * Can't do much more than resetting head =cur = hwcur, tail = hwtail
+ * Can't do much more than resetting head = cur = hwcur, tail = hwtail
* Return 1 on reinit.
*
* This routine is only called by the upper half of the kernel.
@@ -1810,12 +1805,6 @@
enum txrx t;
u_int j;
- if ((nr_flags & NR_PTNETMAP_HOST) && ((nr_mode != NR_REG_ALL_NIC) ||
- nr_flags & (NR_RX_RINGS_ONLY|NR_TX_RINGS_ONLY))) {
- D("Error: only NR_REG_ALL_NIC supported with netmap passthrough");
- return EINVAL;
- }
-
for_rx_tx(t) {
if (nr_flags & excluded_direction[t]) {
priv->np_qfirst[t] = priv->np_qlast[t] = 0;
@@ -1823,6 +1812,7 @@
}
switch (nr_mode) {
case NR_REG_ALL_NIC:
+ case NR_REG_NULL:
priv->np_qfirst[t] = 0;
priv->np_qlast[t] = nma_get_nrings(na, t);
ND("ALL/PIPE: %s %d %d", nm_txrx2str(t),
@@ -1831,7 +1821,7 @@
case NR_REG_SW:
case NR_REG_NIC_SW:
if (!(na->na_flags & NAF_HOST_RINGS)) {
- D("host rings not supported");
+ nm_prerr("host rings not supported");
return EINVAL;
}
priv->np_qfirst[t] = (nr_mode == NR_REG_SW ?
@@ -1844,7 +1834,7 @@
case NR_REG_ONE_NIC:
if (nr_ringid >= na->num_tx_rings &&
nr_ringid >= na->num_rx_rings) {
- D("invalid ring id %d", nr_ringid);
+ nm_prerr("invalid ring id %d", nr_ringid);
return EINVAL;
}
/* if not enough rings, use the first one */
@@ -1857,11 +1847,11 @@
priv->np_qfirst[t], priv->np_qlast[t]);
break;
default:
- D("invalid regif type %d", nr_mode);
+ nm_prerr("invalid regif type %d", nr_mode);
return EINVAL;
}
}
- priv->np_flags = nr_flags | nr_mode; // TODO
+ priv->np_flags = nr_flags;
/* Allow transparent forwarding mode in the host --> nic
* direction only if all the TX hw rings have been opened. */
@@ -1871,7 +1861,7 @@
}
if (netmap_verbose) {
- D("%s: tx [%d,%d) rx [%d,%d) id %d",
+ nm_prinf("%s: tx [%d,%d) rx [%d,%d) id %d",
na->name,
priv->np_qfirst[NR_TX],
priv->np_qlast[NR_TX],
@@ -1927,6 +1917,7 @@
}
priv->np_flags = 0;
priv->np_txpoll = 0;
+ priv->np_kloop_state = 0;
}
@@ -1943,8 +1934,8 @@
int excl = (priv->np_flags & NR_EXCLUSIVE);
enum txrx t;
- if (netmap_verbose)
- D("%s: grabbing tx [%d, %d) rx [%d, %d)",
+ if (netmap_debug & NM_DEBUG_ON)
+ nm_prinf("%s: grabbing tx [%d, %d) rx [%d, %d)",
na->name,
priv->np_qfirst[NR_TX],
priv->np_qlast[NR_TX],
@@ -2021,6 +2012,110 @@
return (priv->np_qfirst[NR_RX] != priv->np_qlast[NR_RX]);
}
+/* Validate the CSB entries for both directions (atok and ktoa).
+ * To be called under NMG_LOCK(). */
+static int
+netmap_csb_validate(struct netmap_priv_d *priv, struct nmreq_opt_csb *csbo)
+{
+ struct nm_csb_atok *csb_atok_base =
+ (struct nm_csb_atok *)(uintptr_t)csbo->csb_atok;
+ struct nm_csb_ktoa *csb_ktoa_base =
+ (struct nm_csb_ktoa *)(uintptr_t)csbo->csb_ktoa;
+ enum txrx t;
+ int num_rings[NR_TXRX], tot_rings;
+ size_t entry_size[2];
+ void *csb_start[2];
+ int i;
+
+ if (priv->np_kloop_state & NM_SYNC_KLOOP_RUNNING) {
+ nm_prerr("Cannot update CSB while kloop is running");
+ return EBUSY;
+ }
+
+ tot_rings = 0;
+ for_rx_tx(t) {
+ num_rings[t] = priv->np_qlast[t] - priv->np_qfirst[t];
+ tot_rings += num_rings[t];
+ }
+ if (tot_rings <= 0)
+ return 0;
+
+ if (!(priv->np_flags & NR_EXCLUSIVE)) {
+ nm_prerr("CSB mode requires NR_EXCLUSIVE");
+ return EINVAL;
+ }
+
+ entry_size[0] = sizeof(*csb_atok_base);
+ entry_size[1] = sizeof(*csb_ktoa_base);
+ csb_start[0] = (void *)csb_atok_base;
+ csb_start[1] = (void *)csb_ktoa_base;
+
+ for (i = 0; i < 2; i++) {
+ /* On Linux we could use access_ok() to simplify
+ * the validation. However, the advantage of
+ * this approach is that it works also on
+ * FreeBSD. */
+ size_t csb_size = tot_rings * entry_size[i];
+ void *tmp;
+ int err;
+
+ if ((uintptr_t)csb_start[i] & (entry_size[i]-1)) {
+ nm_prerr("Unaligned CSB address");
+ return EINVAL;
+ }
+
+ tmp = nm_os_malloc(csb_size);
+ if (!tmp)
+ return ENOMEM;
+ if (i == 0) {
+ /* Application --> kernel direction. */
+ err = copyin(csb_start[i], tmp, csb_size);
+ } else {
+ /* Kernel --> application direction. */
+ memset(tmp, 0, csb_size);
+ err = copyout(tmp, csb_start[i], csb_size);
+ }
+ nm_os_free(tmp);
+ if (err) {
+ nm_prerr("Invalid CSB address");
+ return err;
+ }
+ }
+
+ priv->np_csb_atok_base = csb_atok_base;
+ priv->np_csb_ktoa_base = csb_ktoa_base;
+
+ /* Initialize the CSB. */
+ for_rx_tx(t) {
+ for (i = 0; i < num_rings[t]; i++) {
+ struct netmap_kring *kring =
+ NMR(priv->np_na, t)[i + priv->np_qfirst[t]];
+ struct nm_csb_atok *csb_atok = csb_atok_base + i;
+ struct nm_csb_ktoa *csb_ktoa = csb_ktoa_base + i;
+
+ if (t == NR_RX) {
+ csb_atok += num_rings[NR_TX];
+ csb_ktoa += num_rings[NR_TX];
+ }
+
+ CSB_WRITE(csb_atok, head, kring->rhead);
+ CSB_WRITE(csb_atok, cur, kring->rcur);
+ CSB_WRITE(csb_atok, appl_need_kick, 1);
+ CSB_WRITE(csb_atok, sync_flags, 1);
+ CSB_WRITE(csb_ktoa, hwcur, kring->nr_hwcur);
+ CSB_WRITE(csb_ktoa, hwtail, kring->nr_hwtail);
+ CSB_WRITE(csb_ktoa, kern_need_kick, 1);
+
+ nm_prinf("csb_init for kring %s: head %u, cur %u, "
+ "hwcur %u, hwtail %u", kring->name,
+ kring->rhead, kring->rcur, kring->nr_hwcur,
+ kring->nr_hwtail);
+ }
+ }
+
+ return 0;
+}
+
/*
* possibly move the interface to netmap-mode.
* If success it returns a pointer to netmap_if, otherwise NULL.
@@ -2137,7 +2232,7 @@
na->name, mtu, na->rx_buf_maxsize, nbs);
if (na->rx_buf_maxsize == 0) {
- D("%s: error: rx_buf_maxsize == 0", na->name);
+ nm_prerr("%s: error: rx_buf_maxsize == 0", na->name);
error = EIO;
goto err_drop_mem;
}
@@ -2149,7 +2244,7 @@
* cannot be used in this case. */
if (nbs < mtu) {
nm_prerr("error: netmap buf size (%u) "
- "< device MTU (%u)\n", nbs, mtu);
+ "< device MTU (%u)", nbs, mtu);
error = EINVAL;
goto err_drop_mem;
}
@@ -2162,14 +2257,14 @@
if (!(na->na_flags & NAF_MOREFRAG)) {
nm_prerr("error: large MTU (%d) needed "
"but %s does not support "
- "NS_MOREFRAG\n", mtu,
+ "NS_MOREFRAG", mtu,
na->ifp->if_xname);
error = EINVAL;
goto err_drop_mem;
} else if (nbs < na->rx_buf_maxsize) {
nm_prerr("error: using NS_MOREFRAG on "
"%s requires netmap buf size "
- ">= %u\n", na->ifp->if_xname,
+ ">= %u", na->ifp->if_xname,
na->rx_buf_maxsize);
error = EINVAL;
goto err_drop_mem;
@@ -2177,7 +2272,7 @@
nm_prinf("info: netmap application on "
"%s needs to support "
"NS_MOREFRAG "
- "(MTU=%u,netmap_buf_size=%u)\n",
+ "(MTU=%u,netmap_buf_size=%u)",
na->ifp->if_xname, mtu, nbs);
}
}
@@ -2307,7 +2402,6 @@
struct ifnet *ifp = NULL;
int error = 0;
u_int i, qfirst, qlast;
- struct netmap_if *nifp;
struct netmap_kring **krings;
int sync_flags;
enum txrx t;
@@ -2316,14 +2410,10 @@
case NIOCCTRL: {
struct nmreq_header *hdr = (struct nmreq_header *)data;
- if (hdr->nr_version != NETMAP_API) {
- D("API mismatch for reqtype %d: got %d need %d",
- hdr->nr_version,
- hdr->nr_version, NETMAP_API);
- hdr->nr_version = NETMAP_API;
- }
if (hdr->nr_version < NETMAP_MIN_API ||
hdr->nr_version > NETMAP_MAX_API) {
+ nm_prerr("API mismatch: got %d need %d",
+ hdr->nr_version, NETMAP_API);
return EINVAL;
}
@@ -2345,13 +2435,13 @@
case NETMAP_REQ_REGISTER: {
struct nmreq_register *req =
(struct nmreq_register *)(uintptr_t)hdr->nr_body;
+ struct netmap_if *nifp;
+
/* Protect access to priv from concurrent requests. */
NMG_LOCK();
do {
- u_int memflags;
-#ifdef WITH_EXTMEM
struct nmreq_option *opt;
-#endif /* WITH_EXTMEM */
+ u_int memflags;
if (priv->np_nifp != NULL) { /* thread already registered */
error = EBUSY;
@@ -2382,6 +2472,10 @@
/* find the allocator and get a reference */
nmd = netmap_mem_find(req->nr_mem_id);
if (nmd == NULL) {
+ if (netmap_verbose) {
+ nm_prerr("%s: failed to find mem_id %u",
+ hdr->nr_name, req->nr_mem_id);
+ }
error = EINVAL;
break;
}
@@ -2397,6 +2491,8 @@
}
if (na->virt_hdr_len && !(req->nr_flags & NR_ACCEPT_VNET_HDR)) {
+ nm_prerr("virt_hdr_len=%d, but application does "
+ "not accept it", na->virt_hdr_len);
error = EIO;
break;
}
@@ -2406,6 +2502,23 @@
if (error) { /* reg. failed, release priv and ref */
break;
}
+
+ opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options,
+ NETMAP_REQ_OPT_CSB);
+ if (opt != NULL) {
+ struct nmreq_opt_csb *csbo =
+ (struct nmreq_opt_csb *)opt;
+ error = nmreq_checkduplicate(opt);
+ if (!error) {
+ error = netmap_csb_validate(priv, csbo);
+ }
+ opt->nro_status = error;
+ if (error) {
+ netmap_do_unregif(priv);
+ break;
+ }
+ }
+
nifp = priv->np_nifp;
priv->np_td = td; /* for debugging purposes */
@@ -2430,12 +2543,12 @@
if (req->nr_extra_bufs) {
if (netmap_verbose)
- D("requested %d extra buffers",
+ nm_prinf("requested %d extra buffers",
req->nr_extra_bufs);
req->nr_extra_bufs = netmap_extra_alloc(na,
&nifp->ni_bufs_head, req->nr_extra_bufs);
if (netmap_verbose)
- D("got %d extra buffers", req->nr_extra_bufs);
+ nm_prinf("got %d extra buffers", req->nr_extra_bufs);
}
req->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp);
@@ -2473,6 +2586,7 @@
* so that we can call netmap_get_na(). */
struct nmreq_register regreq;
bzero(&regreq, sizeof(regreq));
+ regreq.nr_mode = NR_REG_ALL_NIC;
regreq.nr_tx_slots = req->nr_tx_slots;
regreq.nr_rx_slots = req->nr_rx_slots;
regreq.nr_tx_rings = req->nr_tx_rings;
@@ -2494,6 +2608,10 @@
} else {
nmd = netmap_mem_find(req->nr_mem_id ? req->nr_mem_id : 1);
if (nmd == NULL) {
+ if (netmap_verbose)
+ nm_prerr("%s: failed to find mem_id %u",
+ hdr->nr_name,
+ req->nr_mem_id ? req->nr_mem_id : 1);
error = EINVAL;
break;
}
@@ -2505,8 +2623,6 @@
break;
if (na == NULL) /* only memory info */
break;
- req->nr_offset = 0;
- req->nr_rx_slots = req->nr_tx_slots = 0;
netmap_update_config(na);
req->nr_rx_rings = na->num_rx_rings;
req->nr_tx_rings = na->num_tx_rings;
@@ -2519,17 +2635,17 @@
}
#ifdef WITH_VALE
case NETMAP_REQ_VALE_ATTACH: {
- error = nm_bdg_ctl_attach(hdr, NULL /* userspace request */);
+ error = netmap_vale_attach(hdr, NULL /* userspace request */);
break;
}
case NETMAP_REQ_VALE_DETACH: {
- error = nm_bdg_ctl_detach(hdr, NULL /* userspace request */);
+ error = netmap_vale_detach(hdr, NULL /* userspace request */);
break;
}
case NETMAP_REQ_VALE_LIST: {
- error = netmap_bdg_list(hdr);
+ error = netmap_vale_list(hdr);
break;
}
@@ -2540,12 +2656,16 @@
* so that we can call netmap_get_bdg_na(). */
struct nmreq_register regreq;
bzero(&regreq, sizeof(regreq));
+ regreq.nr_mode = NR_REG_ALL_NIC;
+
/* For now we only support virtio-net headers, and only for
* VALE ports, but this may change in future. Valid lengths
* for the virtio-net header are 0 (no header), 10 and 12. */
if (req->nr_hdr_len != 0 &&
req->nr_hdr_len != sizeof(struct nm_vnet_hdr) &&
req->nr_hdr_len != 12) {
+ if (netmap_verbose)
+ nm_prerr("invalid hdr_len %u", req->nr_hdr_len);
error = EINVAL;
break;
}
@@ -2562,7 +2682,8 @@
if (na->virt_hdr_len) {
vpna->mfs = NETMAP_BUF_SIZE(na);
}
- D("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na);
+ if (netmap_verbose)
+ nm_prinf("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na);
netmap_adapter_put(na);
} else if (!na) {
error = ENXIO;
@@ -2581,6 +2702,7 @@
struct ifnet *ifp;
bzero(&regreq, sizeof(regreq));
+ regreq.nr_mode = NR_REG_ALL_NIC;
NMG_LOCK();
hdr->nr_reqtype = NETMAP_REQ_REGISTER;
hdr->nr_body = (uintptr_t)&regreq;
@@ -2612,22 +2734,80 @@
}
#endif /* WITH_VALE */
case NETMAP_REQ_POOLS_INFO_GET: {
+ /* Get information from the memory allocator used for
+ * hdr->nr_name. */
struct nmreq_pools_info *req =
(struct nmreq_pools_info *)(uintptr_t)hdr->nr_body;
- /* Get information from the memory allocator. This
- * netmap device must already be bound to a port.
- * Note that hdr->nr_name is ignored. */
NMG_LOCK();
- if (priv->np_na && priv->np_na->nm_mem) {
- struct netmap_mem_d *nmd = priv->np_na->nm_mem;
+ do {
+ /* Build a nmreq_register out of the nmreq_pools_info,
+ * so that we can call netmap_get_na(). */
+ struct nmreq_register regreq;
+ bzero(&regreq, sizeof(regreq));
+ regreq.nr_mem_id = req->nr_mem_id;
+ regreq.nr_mode = NR_REG_ALL_NIC;
+
+ hdr->nr_reqtype = NETMAP_REQ_REGISTER;
+ hdr->nr_body = (uintptr_t)&regreq;
+ error = netmap_get_na(hdr, &na, &ifp, NULL, 1 /* create */);
+ hdr->nr_reqtype = NETMAP_REQ_POOLS_INFO_GET; /* reset type */
+ hdr->nr_body = (uintptr_t)req; /* reset nr_body */
+ if (error) {
+ na = NULL;
+ ifp = NULL;
+ break;
+ }
+ nmd = na->nm_mem; /* grab the memory allocator */
+ if (nmd == NULL) {
+ error = EINVAL;
+ break;
+ }
+
+ /* Finalize the memory allocator, get the pools
+ * information and release the allocator. */
+ error = netmap_mem_finalize(nmd, na);
+ if (error) {
+ break;
+ }
error = netmap_mem_pools_info_get(req, nmd);
- } else {
+ netmap_mem_drop(na);
+ } while (0);
+ netmap_unget_na(na, ifp);
+ NMG_UNLOCK();
+ break;
+ }
+
+ case NETMAP_REQ_CSB_ENABLE: {
+ struct nmreq_option *opt;
+
+ opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options,
+ NETMAP_REQ_OPT_CSB);
+ if (opt == NULL) {
error = EINVAL;
+ } else {
+ struct nmreq_opt_csb *csbo =
+ (struct nmreq_opt_csb *)opt;
+ error = nmreq_checkduplicate(opt);
+ if (!error) {
+ NMG_LOCK();
+ error = netmap_csb_validate(priv, csbo);
+ NMG_UNLOCK();
+ }
+ opt->nro_status = error;
}
- NMG_UNLOCK();
break;
}
+ case NETMAP_REQ_SYNC_KLOOP_START: {
+ error = netmap_sync_kloop(priv, hdr);
+ break;
+ }
+
+ case NETMAP_REQ_SYNC_KLOOP_STOP: {
+ error = netmap_sync_kloop_stop(priv);
+ break;
+ }
+
default: {
error = EINVAL;
break;
@@ -2641,22 +2821,20 @@
case NIOCTXSYNC:
case NIOCRXSYNC: {
- nifp = priv->np_nifp;
-
- if (nifp == NULL) {
+ if (unlikely(priv->np_nifp == NULL)) {
error = ENXIO;
break;
}
mb(); /* make sure following reads are not from cache */
- na = priv->np_na; /* we have a reference */
-
- if (na == NULL) {
- D("Internal error: nifp != NULL && na == NULL");
- error = ENXIO;
+ if (unlikely(priv->np_csb_atok_base)) {
+ nm_prerr("Invalid sync in CSB mode");
+ error = EBUSY;
break;
}
+ na = priv->np_na; /* we have a reference */
+
mbq_init(&q);
t = (cmd == NIOCTXSYNC ? NR_TX : NR_RX);
krings = NMR(na, t);
@@ -2674,8 +2852,8 @@
}
if (cmd == NIOCTXSYNC) {
- if (netmap_verbose & NM_VERB_TXSYNC)
- D("pre txsync ring %d cur %d hwcur %d",
+ if (netmap_debug & NM_DEBUG_TXSYNC)
+ nm_prinf("pre txsync ring %d cur %d hwcur %d",
i, ring->cur,
kring->nr_hwcur);
if (nm_txsync_prologue(kring, ring) >= kring->nkr_num_slots) {
@@ -2683,8 +2861,8 @@
} else if (kring->nm_sync(kring, sync_flags | NAF_FORCE_RECLAIM) == 0) {
nm_sync_finalize(kring);
}
- if (netmap_verbose & NM_VERB_TXSYNC)
- D("post txsync ring %d cur %d hwcur %d",
+ if (netmap_debug & NM_DEBUG_TXSYNC)
+ nm_prinf("post txsync ring %d cur %d hwcur %d",
i, ring->cur,
kring->nr_hwcur);
} else {
@@ -2739,18 +2917,22 @@
case NETMAP_REQ_VALE_NEWIF:
return sizeof(struct nmreq_vale_newif);
case NETMAP_REQ_VALE_DELIF:
+ case NETMAP_REQ_SYNC_KLOOP_STOP:
+ case NETMAP_REQ_CSB_ENABLE:
return 0;
case NETMAP_REQ_VALE_POLLING_ENABLE:
case NETMAP_REQ_VALE_POLLING_DISABLE:
return sizeof(struct nmreq_vale_polling);
case NETMAP_REQ_POOLS_INFO_GET:
return sizeof(struct nmreq_pools_info);
+ case NETMAP_REQ_SYNC_KLOOP_START:
+ return sizeof(struct nmreq_sync_kloop_start);
}
return 0;
}
static size_t
-nmreq_opt_size_by_type(uint16_t nro_reqtype)
+nmreq_opt_size_by_type(uint32_t nro_reqtype, uint64_t nro_size)
{
size_t rv = sizeof(struct nmreq_option);
#ifdef NETMAP_REQ_OPT_DEBUG
@@ -2763,6 +2945,13 @@
rv = sizeof(struct nmreq_opt_extmem);
break;
#endif /* WITH_EXTMEM */
+ case NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS:
+ if (nro_size >= rv)
+ rv = nro_size;
+ break;
+ case NETMAP_REQ_OPT_CSB:
+ rv = sizeof(struct nmreq_opt_csb);
+ break;
}
/* subtract the common header */
return rv - sizeof(struct nmreq_option);
@@ -2778,8 +2967,11 @@
struct nmreq_option buf;
uint64_t *ptrs;
- if (hdr->nr_reserved)
+ if (hdr->nr_reserved) {
+ if (netmap_verbose)
+ nm_prerr("nr_reserved must be zero");
return EINVAL;
+ }
if (!nr_body_is_user)
return 0;
@@ -2796,6 +2988,8 @@
(!rqsz && hdr->nr_body != (uintptr_t)NULL)) {
/* Request body expected, but not found; or
* request body found but unexpected. */
+ if (netmap_verbose)
+ nm_prerr("nr_body expected but not found, or vice versa");
error = EINVAL;
goto out_err;
}
@@ -2809,7 +3003,7 @@
if (error)
goto out_err;
optsz += sizeof(*src);
- optsz += nmreq_opt_size_by_type(buf.nro_reqtype);
+ optsz += nmreq_opt_size_by_type(buf.nro_reqtype, buf.nro_size);
if (rqsz + optsz > NETMAP_REQ_MAXSIZE) {
error = EMSGSIZE;
goto out_err;
@@ -2863,7 +3057,8 @@
p = (char *)(opt + 1);
/* copy the option body */
- optsz = nmreq_opt_size_by_type(opt->nro_reqtype);
+ optsz = nmreq_opt_size_by_type(opt->nro_reqtype,
+ opt->nro_size);
if (optsz) {
/* the option body follows the option header */
error = copyin(src + 1, p, optsz);
@@ -2937,7 +3132,8 @@
/* copy the option body only if there was no error */
if (!rerror && !src->nro_status) {
- optsz = nmreq_opt_size_by_type(src->nro_reqtype);
+ optsz = nmreq_opt_size_by_type(src->nro_reqtype,
+ src->nro_size);
if (optsz) {
error = copyout(src + 1, dst + 1, optsz);
if (error) {
@@ -3015,7 +3211,8 @@
struct netmap_adapter *na;
struct netmap_kring *kring;
struct netmap_ring *ring;
- u_int i, check_all_tx, check_all_rx, want[NR_TXRX], revents = 0;
+ u_int i, want[NR_TXRX], revents = 0;
+ NM_SELINFO_T *si[NR_TXRX];
#define want_tx want[NR_TX]
#define want_rx want[NR_RX]
struct mbq q; /* packets from RX hw queues to host stack */
@@ -3038,27 +3235,31 @@
mbq_init(&q);
- if (priv->np_nifp == NULL) {
- D("No if registered");
+ if (unlikely(priv->np_nifp == NULL)) {
return POLLERR;
}
mb(); /* make sure following reads are not from cache */
na = priv->np_na;
- if (!nm_netmap_on(na))
+ if (unlikely(!nm_netmap_on(na)))
return POLLERR;
- if (netmap_verbose & 0x8000)
- D("device %s events 0x%x", na->name, events);
+ if (unlikely(priv->np_csb_atok_base)) {
+ nm_prerr("Invalid poll in CSB mode");
+ return POLLERR;
+ }
+
+ if (netmap_debug & NM_DEBUG_ON)
+ nm_prinf("device %s events 0x%x", na->name, events);
want_tx = events & (POLLOUT | POLLWRNORM);
want_rx = events & (POLLIN | POLLRDNORM);
/*
- * check_all_{tx|rx} are set if the card has more than one queue AND
- * the file descriptor is bound to all of them. If so, we sleep on
- * the "global" selinfo, otherwise we sleep on individual selinfo
- * (FreeBSD only allows two selinfo's per file descriptor).
+ * If the card has more than one queue AND the file descriptor is
+ * bound to all of them, we sleep on the "global" selinfo, otherwise
+ * we sleep on individual selinfo (FreeBSD only allows two selinfo's
+ * per file descriptor).
* The interrupt routine in the driver wake one or the other
* (or both) depending on which clients are active.
*
@@ -3067,8 +3268,10 @@
* there are pending packets to send. The latter can be disabled
* passing NETMAP_NO_TX_POLL in the NIOCREG call.
*/
- check_all_tx = nm_si_user(priv, NR_TX);
- check_all_rx = nm_si_user(priv, NR_RX);
+ si[NR_RX] = nm_si_user(priv, NR_RX) ? &na->si[NR_RX] :
+ &na->rx_rings[priv->np_qfirst[NR_RX]]->si;
+ si[NR_TX] = nm_si_user(priv, NR_TX) ? &na->si[NR_TX] :
+ &na->tx_rings[priv->np_qfirst[NR_TX]]->si;
#ifdef __FreeBSD__
/*
@@ -3105,10 +3308,8 @@
#ifdef linux
/* The selrecord must be unconditional on linux. */
- nm_os_selrecord(sr, check_all_tx ?
- &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]]->si);
- nm_os_selrecord(sr, check_all_rx ?
- &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]]->si);
+ nm_os_selrecord(sr, si[NR_RX]);
+ nm_os_selrecord(sr, si[NR_TX]);
#endif /* linux */
/*
@@ -3173,8 +3374,7 @@
send_down = 0;
if (want_tx && retry_tx && sr) {
#ifndef linux
- nm_os_selrecord(sr, check_all_tx ?
- &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]]->si);
+ nm_os_selrecord(sr, si[NR_TX]);
#endif /* !linux */
retry_tx = 0;
goto flush_tx;
@@ -3234,8 +3434,7 @@
#ifndef linux
if (retry_rx && sr) {
- nm_os_selrecord(sr, check_all_rx ?
- &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]]->si);
+ nm_os_selrecord(sr, si[NR_RX]);
}
#endif /* !linux */
if (send_down || retry_rx) {
@@ -3290,7 +3489,7 @@
}
if (!na->nm_intr) {
- D("Cannot %s interrupts for %s", onoff ? "enable" : "disable",
+ nm_prerr("Cannot %s interrupts for %s", onoff ? "enable" : "disable",
na->name);
return -1;
}
@@ -3328,12 +3527,6 @@
int
netmap_attach_common(struct netmap_adapter *na)
{
- if (na->num_tx_rings == 0 || na->num_rx_rings == 0) {
- D("%s: invalid rings tx %d rx %d",
- na->name, na->num_tx_rings, na->num_rx_rings);
- return EINVAL;
- }
-
if (!na->rx_buf_maxsize) {
/* Set a conservative default (larger is safer). */
na->rx_buf_maxsize = PAGE_SIZE;
@@ -3436,20 +3629,31 @@
struct ifnet *ifp = NULL;
if (size < sizeof(struct netmap_hw_adapter)) {
- D("Invalid netmap adapter size %d", (int)size);
+ if (netmap_debug & NM_DEBUG_ON)
+ nm_prerr("Invalid netmap adapter size %d", (int)size);
return EINVAL;
}
- if (arg == NULL || arg->ifp == NULL)
+ if (arg == NULL || arg->ifp == NULL) {
+ if (netmap_debug & NM_DEBUG_ON)
+ nm_prerr("either arg or arg->ifp is NULL");
return EINVAL;
+ }
+ if (arg->num_tx_rings == 0 || arg->num_rx_rings == 0) {
+ if (netmap_debug & NM_DEBUG_ON)
+ nm_prerr("%s: invalid rings tx %d rx %d",
+ arg->name, arg->num_tx_rings, arg->num_rx_rings);
+ return EINVAL;
+ }
+
ifp = arg->ifp;
if (NM_NA_CLASH(ifp)) {
/* If NA(ifp) is not null but there is no valid netmap
* adapter it means that someone else is using the same
* pointer (e.g. ax25_ptr on linux). This happens for
* instance when also PF_RING is in use. */
- D("Error: netmap adapter hook is busy");
+ nm_prerr("Error: netmap adapter hook is busy");
return EBUSY;
}
@@ -3458,7 +3662,7 @@
goto fail;
hwna->up = *arg;
hwna->up.na_flags |= NAF_HOST_RINGS | NAF_NATIVE;
- strncpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name));
+ strlcpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name));
if (override_reg) {
hwna->nm_hw_register = hwna->up.nm_register;
hwna->up.nm_register = netmap_hw_reg;
@@ -3483,7 +3687,7 @@
return 0;
fail:
- D("fail, arg %p ifp %p na %p", arg, ifp, hwna);
+ nm_prerr("fail, arg %p ifp %p na %p", arg, ifp, hwna);
return (hwna ? EINVAL : ENOMEM);
}
@@ -3521,7 +3725,8 @@
na->nm_dtor(na);
if (na->tx_rings) { /* XXX should not happen */
- D("freeing leftover tx_rings");
+ if (netmap_debug & NM_DEBUG_ON)
+ nm_prerr("freeing leftover tx_rings");
na->nm_krings_delete(na);
}
netmap_pipe_dealloc(na);
@@ -3619,7 +3824,7 @@
// mtx_lock(&na->core_lock);
if (!nm_netmap_on(na)) {
- D("%s not in netmap mode anymore", na->name);
+ nm_prerr("%s not in netmap mode anymore", na->name);
error = ENXIO;
goto done;
}
@@ -3638,7 +3843,7 @@
// XXX reconsider long packets if we handle fragments
if (len > NETMAP_BUF_SIZE(na)) { /* too long for us */
- D("%s from_host, drop packet size %d > %d", na->name,
+ nm_prerr("%s from_host, drop packet size %d > %d", na->name,
len, NETMAP_BUF_SIZE(na));
goto done;
}
@@ -3749,8 +3954,8 @@
new_hwofs -= lim + 1;
/* Always set the new offset value and realign the ring. */
- if (netmap_verbose)
- D("%s %s%d hwofs %d -> %d, hwtail %d -> %d",
+ if (netmap_debug & NM_DEBUG_ON)
+ nm_prinf("%s %s%d hwofs %d -> %d, hwtail %d -> %d",
na->name,
tx == NR_TX ? "TX" : "RX", n,
kring->nkr_hwofs, new_hwofs,
@@ -3796,8 +4001,8 @@
q &= NETMAP_RING_MASK;
- if (netmap_verbose) {
- RD(5, "received %s queue %d", work_done ? "RX" : "TX" , q);
+ if (netmap_debug & (NM_DEBUG_RXINTR|NM_DEBUG_TXINTR)) {
+ nm_prlim(5, "received %s queue %d", work_done ? "RX" : "TX" , q);
}
if (q >= nma_get_nrings(na, t))
@@ -3879,7 +4084,7 @@
struct ifnet *ifp = na->ifp;
/* We undo the setup for intercepting packets only if we are the
- * last user of this adapapter. */
+ * last user of this adapter. */
if (na->active_fds > 0) {
return;
}
@@ -3890,7 +4095,6 @@
na->na_flags &= ~NAF_NETMAP_ON;
}
-
/*
* Module loader and unloader
*
@@ -3915,7 +4119,7 @@
netmap_uninit_bridges();
netmap_mem_fini();
NMG_LOCK_DESTROY();
- nm_prinf("netmap: unloaded module.\n");
+ nm_prinf("netmap: unloaded module.");
}
@@ -3952,7 +4156,7 @@
if (error)
goto fail;
- nm_prinf("netmap: loaded module\n");
+ nm_prinf("netmap: loaded module");
return (0);
fail:
netmap_fini();
Index: head/sys/dev/netmap/netmap_bdg.h
===================================================================
--- head/sys/dev/netmap/netmap_bdg.h
+++ head/sys/dev/netmap/netmap_bdg.h
@@ -44,6 +44,40 @@
#endif /* __FreeBSD__ */
+/*
+ * The following bridge-related functions are used by other
+ * kernel modules.
+ *
+ * VALE only supports unicast or broadcast. The lookup
+ * function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports,
+ * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 to indicate
+ * drop.
+ */
+typedef uint32_t (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr,
+ struct netmap_vp_adapter *, void *private_data);
+typedef int (*bdg_config_fn_t)(struct nm_ifreq *);
+typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *);
+typedef void *(*bdg_update_private_data_fn_t)(void *private_data, void *callback_data, int *error);
+typedef int (*bdg_vp_create_fn_t)(struct nmreq_header *hdr,
+ struct ifnet *ifp, struct netmap_mem_d *nmd,
+ struct netmap_vp_adapter **ret);
+typedef int (*bdg_bwrap_attach_fn_t)(const char *nr_name, struct netmap_adapter *hwna);
+struct netmap_bdg_ops {
+ bdg_lookup_fn_t lookup;
+ bdg_config_fn_t config;
+ bdg_dtor_fn_t dtor;
+ bdg_vp_create_fn_t vp_create;
+ bdg_bwrap_attach_fn_t bwrap_attach;
+ char name[IFNAMSIZ];
+};
+int netmap_bwrap_attach(const char *name, struct netmap_adapter *, struct netmap_bdg_ops *);
+int netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token);
+
+#define NM_BRIDGES 8 /* number of bridges */
+#define NM_BDG_MAXPORTS 254 /* up to 254 */
+#define NM_BDG_BROADCAST NM_BDG_MAXPORTS
+#define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1)
+
/* XXX Should go away after fixing find_bridge() - Michio */
#define NM_BDG_HASH 1024 /* forwarding table entries */
@@ -95,7 +129,8 @@
* different ring index.
* The function is set by netmap_bdg_regops().
*/
- struct netmap_bdg_ops *bdg_ops;
+ struct netmap_bdg_ops bdg_ops;
+ struct netmap_bdg_ops bdg_saved_ops;
/*
* Contains the data structure used by the bdg_ops.lookup function.
@@ -111,6 +146,7 @@
*/
#define NM_BDG_ACTIVE 1
#define NM_BDG_EXCLUSIVE 2
+#define NM_BDG_NEED_BWRAP 4
uint8_t bdg_flags;
@@ -149,6 +185,13 @@
struct netmap_adapter *hwna);
int netmap_bwrap_krings_create_common(struct netmap_adapter *na);
void netmap_bwrap_krings_delete_common(struct netmap_adapter *na);
+struct nm_bridge *netmap_init_bridges2(u_int);
+void netmap_uninit_bridges2(struct nm_bridge *, u_int);
+int netmap_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
+ void *callback_data, void *auth_token);
+int netmap_bdg_config(struct nm_ifreq *nifr);
+int nm_is_bwrap(struct netmap_adapter *);
+
#define NM_NEED_BWRAP (-2)
#endif /* _NET_NETMAP_BDG_H_ */
Index: head/sys/dev/netmap/netmap_bdg.c
===================================================================
--- head/sys/dev/netmap/netmap_bdg.c
+++ head/sys/dev/netmap/netmap_bdg.c
@@ -126,7 +126,7 @@
* Right now we have a static array and deletions are protected
* by an exclusive lock.
*/
-static struct nm_bridge *nm_bridges;
+struct nm_bridge *nm_bridges;
#endif /* !CONFIG_NET_NS */
@@ -139,15 +139,15 @@
(c == '_');
}
-/* Validate the name of a VALE bridge port and return the
+/* Validate the name of a bdg port and return the
* position of the ":" character. */
static int
-nm_vale_name_validate(const char *name)
+nm_bdg_name_validate(const char *name, size_t prefixlen)
{
int colon_pos = -1;
int i;
- if (!name || strlen(name) < strlen(NM_BDG_NAME)) {
+ if (!name || strlen(name) < prefixlen) {
return -1;
}
@@ -186,9 +186,10 @@
netmap_bns_getbridges(&bridges, &num_bridges);
- namelen = nm_vale_name_validate(name);
+ namelen = nm_bdg_name_validate(name,
+ (ops != NULL ? strlen(ops->name) : 0));
if (namelen < 0) {
- D("invalid bridge name %s", name ? name : NULL);
+ nm_prerr("invalid bridge name %s", name ? name : NULL);
return NULL;
}
@@ -213,7 +214,7 @@
b->bdg_active_ports);
b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH);
if (b->ht == NULL) {
- D("failed to allocate hash table");
+ nm_prerr("failed to allocate hash table");
return NULL;
}
strncpy(b->bdg_basename, name, namelen);
@@ -222,7 +223,7 @@
for (i = 0; i < NM_BDG_MAXPORTS; i++)
b->bdg_port_index[i] = i;
/* set the default function */
- b->bdg_ops = ops;
+ b->bdg_ops = b->bdg_saved_ops = *ops;
b->private_data = b->ht;
b->bdg_flags = 0;
NM_BNS_GET(b);
@@ -240,13 +241,49 @@
ND("marking bridge %s as free", b->bdg_basename);
nm_os_free(b->ht);
- b->bdg_ops = NULL;
+ memset(&b->bdg_ops, 0, sizeof(b->bdg_ops));
+ memset(&b->bdg_saved_ops, 0, sizeof(b->bdg_saved_ops));
b->bdg_flags = 0;
NM_BNS_PUT(b);
return 0;
}
+/* Called by external kernel modules (e.g., Openvswitch).
+ * to modify the private data previously given to regops().
+ * 'name' may be just bridge's name (including ':' if it
+ * is not just NM_BDG_NAME).
+ * Called without NMG_LOCK.
+ */
+int
+netmap_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
+ void *callback_data, void *auth_token)
+{
+ void *private_data = NULL;
+ struct nm_bridge *b;
+ int error = 0;
+ NMG_LOCK();
+ b = nm_find_bridge(name, 0 /* don't create */, NULL);
+ if (!b) {
+ error = EINVAL;
+ goto unlock_update_priv;
+ }
+ if (!nm_bdg_valid_auth_token(b, auth_token)) {
+ error = EACCES;
+ goto unlock_update_priv;
+ }
+ BDG_WLOCK(b);
+ private_data = callback(b->private_data, callback_data, &error);
+ b->private_data = private_data;
+ BDG_WUNLOCK(b);
+
+unlock_update_priv:
+ NMG_UNLOCK();
+ return error;
+}
+
+
+
/* remove from bridge b the ports in slots hw and sw
* (sw can be -1 if not needed)
*/
@@ -267,8 +304,8 @@
acquire BDG_WLOCK() and copy back the array.
*/
- if (netmap_verbose)
- D("detach %d and %d (lim %d)", hw, sw, lim);
+ if (netmap_debug & NM_DEBUG_BDG)
+ nm_prinf("detach %d and %d (lim %d)", hw, sw, lim);
/* make a copy of the list of active ports, update it,
* and then copy back within BDG_WLOCK().
*/
@@ -291,12 +328,12 @@
}
}
if (hw >= 0 || sw >= 0) {
- D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
+ nm_prerr("delete failed hw %d sw %d, should panic...", hw, sw);
}
BDG_WLOCK(b);
- if (b->bdg_ops->dtor)
- b->bdg_ops->dtor(b->bdg_ports[s_hw]);
+ if (b->bdg_ops.dtor)
+ b->bdg_ops.dtor(b->bdg_ports[s_hw]);
b->bdg_ports[s_hw] = NULL;
if (s_sw >= 0) {
b->bdg_ports[s_sw] = NULL;
@@ -402,7 +439,7 @@
/* yes we should, see if we have space to attach entries */
needed = 2; /* in some cases we only need 1 */
if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
- D("bridge full %d, cannot create new port", b->bdg_active_ports);
+ nm_prerr("bridge full %d, cannot create new port", b->bdg_active_ports);
return ENOMEM;
}
/* record the next two ports available, but do not allocate yet */
@@ -428,9 +465,10 @@
}
/* bdg_netmap_attach creates a struct netmap_adapter */
- error = b->bdg_ops->vp_create(hdr, NULL, nmd, &vpna);
+ error = b->bdg_ops.vp_create(hdr, NULL, nmd, &vpna);
if (error) {
- D("error %d", error);
+ if (netmap_debug & NM_DEBUG_BDG)
+ nm_prerr("error %d", error);
goto out;
}
/* shortcut - we can skip get_hw_na(),
@@ -459,7 +497,7 @@
/* host adapter might not be created */
error = hw->nm_bdg_attach(nr_name, hw, b);
if (error == NM_NEED_BWRAP) {
- error = b->bdg_ops->bwrap_attach(nr_name, hw);
+ error = b->bdg_ops.bwrap_attach(nr_name, hw);
}
if (error)
goto out;
@@ -502,143 +540,14 @@
return error;
}
-/* Process NETMAP_REQ_VALE_ATTACH.
- */
-int
-nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token)
-{
- struct nmreq_vale_attach *req =
- (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
- struct netmap_vp_adapter * vpna;
- struct netmap_adapter *na = NULL;
- struct netmap_mem_d *nmd = NULL;
- struct nm_bridge *b = NULL;
- int error;
- NMG_LOCK();
- /* permission check for modified bridges */
- b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
- if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
- error = EACCES;
- goto unlock_exit;
- }
-
- if (req->reg.nr_mem_id) {
- nmd = netmap_mem_find(req->reg.nr_mem_id);
- if (nmd == NULL) {
- error = EINVAL;
- goto unlock_exit;
- }
- }
-
- /* check for existing one */
- error = netmap_get_vale_na(hdr, &na, nmd, 0);
- if (na) {
- error = EBUSY;
- goto unref_exit;
- }
- error = netmap_get_vale_na(hdr, &na,
- nmd, 1 /* create if not exists */);
- if (error) { /* no device */
- goto unlock_exit;
- }
-
- if (na == NULL) { /* VALE prefix missing */
- error = EINVAL;
- goto unlock_exit;
- }
-
- if (NETMAP_OWNED_BY_ANY(na)) {
- error = EBUSY;
- goto unref_exit;
- }
-
- if (na->nm_bdg_ctl) {
- /* nop for VALE ports. The bwrap needs to put the hwna
- * in netmap mode (see netmap_bwrap_bdg_ctl)
- */
- error = na->nm_bdg_ctl(hdr, na);
- if (error)
- goto unref_exit;
- ND("registered %s to netmap-mode", na->name);
- }
- vpna = (struct netmap_vp_adapter *)na;
- req->port_index = vpna->bdg_port;
- NMG_UNLOCK();
- return 0;
-
-unref_exit:
- netmap_adapter_put(na);
-unlock_exit:
- NMG_UNLOCK();
- return error;
-}
-
-static inline int
+int
nm_is_bwrap(struct netmap_adapter *na)
{
return na->nm_register == netmap_bwrap_reg;
}
-/* Process NETMAP_REQ_VALE_DETACH.
- */
-int
-nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token)
-{
- struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body;
- struct netmap_vp_adapter *vpna;
- struct netmap_adapter *na;
- struct nm_bridge *b = NULL;
- int error;
- NMG_LOCK();
- /* permission check for modified bridges */
- b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
- if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
- error = EACCES;
- goto unlock_exit;
- }
-
- error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */);
- if (error) { /* no device, or another bridge or user owns the device */
- goto unlock_exit;
- }
-
- if (na == NULL) { /* VALE prefix missing */
- error = EINVAL;
- goto unlock_exit;
- } else if (nm_is_bwrap(na) &&
- ((struct netmap_bwrap_adapter *)na)->na_polling_state) {
- /* Don't detach a NIC with polling */
- error = EBUSY;
- goto unref_exit;
- }
-
- vpna = (struct netmap_vp_adapter *)na;
- if (na->na_vp != vpna) {
- /* trying to detach first attach of VALE persistent port attached
- * to 2 bridges
- */
- error = EBUSY;
- goto unref_exit;
- }
- nmreq_det->port_index = vpna->bdg_port;
-
- if (na->nm_bdg_ctl) {
- /* remove the port from bridge. The bwrap
- * also needs to put the hwna in normal mode
- */
- error = na->nm_bdg_ctl(hdr, na);
- }
-
-unref_exit:
- netmap_adapter_put(na);
-unlock_exit:
- NMG_UNLOCK();
- return error;
-
-}
-
struct nm_bdg_polling_state;
struct
nm_bdg_kthread {
@@ -661,7 +570,7 @@
};
static void
-netmap_bwrap_polling(void *data, int is_kthread)
+netmap_bwrap_polling(void *data)
{
struct nm_bdg_kthread *nbk = data;
struct netmap_bwrap_adapter *bna;
@@ -693,7 +602,6 @@
bzero(&kcfg, sizeof(kcfg));
kcfg.worker_fn = netmap_bwrap_polling;
- kcfg.use_kthread = 1;
for (i = 0; i < bps->ncpus; i++) {
struct nm_bdg_kthread *t = bps->kthreads + i;
int all = (bps->ncpus == 1 &&
@@ -703,8 +611,9 @@
t->bps = bps;
t->qfirst = all ? bps->qfirst /* must be 0 */: affinity;
t->qlast = all ? bps->qlast : t->qfirst + 1;
- D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
- t->qlast);
+ if (netmap_verbose)
+ nm_prinf("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
+ t->qlast);
kcfg.type = i;
kcfg.worker_private = t;
@@ -732,7 +641,7 @@
int error, i, j;
if (!bps) {
- D("polling is not configured");
+ nm_prerr("polling is not configured");
return EFAULT;
}
bps->stopped = false;
@@ -741,7 +650,7 @@
struct nm_bdg_kthread *t = bps->kthreads + i;
error = nm_os_kctx_worker_start(t->nmk);
if (error) {
- D("error in nm_kthread_start()");
+ nm_prerr("error in nm_kthread_start(): %d", error);
goto cleanup;
}
}
@@ -784,10 +693,10 @@
avail_cpus = nm_os_ncpus();
if (req_cpus == 0) {
- D("req_cpus must be > 0");
+ nm_prerr("req_cpus must be > 0");
return EINVAL;
} else if (req_cpus >= avail_cpus) {
- D("Cannot use all the CPUs in the system");
+ nm_prerr("Cannot use all the CPUs in the system");
return EINVAL;
}
@@ -797,7 +706,7 @@
* For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2,
* ring 2 and 3 are polled by core 2 and 3, respectively. */
if (i + req_cpus > nma_get_nrings(na, NR_RX)) {
- D("Rings %u-%u not in range (have %d rings)",
+ nm_prerr("Rings %u-%u not in range (have %d rings)",
i, i + req_cpus, nma_get_nrings(na, NR_RX));
return EINVAL;
}
@@ -809,7 +718,7 @@
/* Poll all the rings using a core specified by nr_first_cpu_id.
* the number of cores must be 1. */
if (req_cpus != 1) {
- D("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU "
+ nm_prerr("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU "
"(was %d)", req_cpus);
return EINVAL;
}
@@ -817,7 +726,7 @@
qlast = nma_get_nrings(na, NR_RX);
core_from = i;
} else {
- D("Invalid polling mode");
+ nm_prerr("Invalid polling mode");
return EINVAL;
}
@@ -826,7 +735,7 @@
bps->qlast = qlast;
bps->cpu_from = core_from;
bps->ncpus = req_cpus;
- D("%s qfirst %u qlast %u cpu_from %u ncpus %u",
+ nm_prinf("%s qfirst %u qlast %u cpu_from %u ncpus %u",
req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ?
"MULTI" : "SINGLE",
qfirst, qlast, core_from, req_cpus);
@@ -842,7 +751,7 @@
bna = (struct netmap_bwrap_adapter *)na;
if (bna->na_polling_state) {
- D("ERROR adapter already in polling mode");
+ nm_prerr("ERROR adapter already in polling mode");
return EFAULT;
}
@@ -871,7 +780,7 @@
/* start kthread now */
error = nm_bdg_polling_start_kthreads(bps);
if (error) {
- D("ERROR nm_bdg_polling_start_kthread()");
+ nm_prerr("ERROR nm_bdg_polling_start_kthread()");
nm_os_free(bps->kthreads);
nm_os_free(bps);
bna->na_polling_state = NULL;
@@ -887,7 +796,7 @@
struct nm_bdg_polling_state *bps;
if (!bna->na_polling_state) {
- D("ERROR adapter is not in polling mode");
+ nm_prerr("ERROR adapter is not in polling mode");
return EFAULT;
}
bps = bna->na_polling_state;
@@ -932,86 +841,6 @@
return error;
}
-/* Process NETMAP_REQ_VALE_LIST. */
-int
-netmap_bdg_list(struct nmreq_header *hdr)
-{
- struct nmreq_vale_list *req =
- (struct nmreq_vale_list *)(uintptr_t)hdr->nr_body;
- int namelen = strlen(hdr->nr_name);
- struct nm_bridge *b, *bridges;
- struct netmap_vp_adapter *vpna;
- int error = 0, i, j;
- u_int num_bridges;
-
- netmap_bns_getbridges(&bridges, &num_bridges);
-
- /* this is used to enumerate bridges and ports */
- if (namelen) { /* look up indexes of bridge and port */
- if (strncmp(hdr->nr_name, NM_BDG_NAME,
- strlen(NM_BDG_NAME))) {
- return EINVAL;
- }
- NMG_LOCK();
- b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
- if (!b) {
- NMG_UNLOCK();
- return ENOENT;
- }
-
- req->nr_bridge_idx = b - bridges; /* bridge index */
- req->nr_port_idx = NM_BDG_NOPORT;
- for (j = 0; j < b->bdg_active_ports; j++) {
- i = b->bdg_port_index[j];
- vpna = b->bdg_ports[i];
- if (vpna == NULL) {
- D("This should not happen");
- continue;
- }
- /* the former and the latter identify a
- * virtual port and a NIC, respectively
- */
- if (!strcmp(vpna->up.name, hdr->nr_name)) {
- req->nr_port_idx = i; /* port index */
- break;
- }
- }
- NMG_UNLOCK();
- } else {
- /* return the first non-empty entry starting from
- * bridge nr_arg1 and port nr_arg2.
- *
- * Users can detect the end of the same bridge by
- * seeing the new and old value of nr_arg1, and can
- * detect the end of all the bridge by error != 0
- */
- i = req->nr_bridge_idx;
- j = req->nr_port_idx;
-
- NMG_LOCK();
- for (error = ENOENT; i < NM_BRIDGES; i++) {
- b = bridges + i;
- for ( ; j < NM_BDG_MAXPORTS; j++) {
- if (b->bdg_ports[j] == NULL)
- continue;
- vpna = b->bdg_ports[j];
- /* write back the VALE switch name */
- strncpy(hdr->nr_name, vpna->up.name,
- (size_t)IFNAMSIZ);
- error = 0;
- goto out;
- }
- j = 0; /* following bridges scan from 0 */
- }
- out:
- req->nr_bridge_idx = i;
- req->nr_port_idx = j;
- NMG_UNLOCK();
- }
-
- return error;
-}
-
/* Called by external kernel modules (e.g., Openvswitch).
* to set configure/lookup/dtor functions of a VALE instance.
* Register callbacks to the given bridge. 'name' may be just
@@ -1041,12 +870,19 @@
if (!bdg_ops) {
/* resetting the bridge */
bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
- b->bdg_ops = NULL;
+ b->bdg_ops = b->bdg_saved_ops;
b->private_data = b->ht;
} else {
/* modifying the bridge */
b->private_data = private_data;
- b->bdg_ops = bdg_ops;
+#define nm_bdg_override(m) if (bdg_ops->m) b->bdg_ops.m = bdg_ops->m
+ nm_bdg_override(lookup);
+ nm_bdg_override(config);
+ nm_bdg_override(dtor);
+ nm_bdg_override(vp_create);
+ nm_bdg_override(bwrap_attach);
+#undef nm_bdg_override
+
}
BDG_WUNLOCK(b);
@@ -1071,8 +907,8 @@
NMG_UNLOCK();
/* Don't call config() with NMG_LOCK() held */
BDG_RLOCK(b);
- if (b->bdg_ops->config != NULL)
- error = b->bdg_ops->config(nr);
+ if (b->bdg_ops.config != NULL)
+ error = b->bdg_ops.config(nr);
BDG_RUNLOCK(b);
return error;
}
@@ -1137,7 +973,7 @@
int n;
if (head > lim) {
- D("ouch dangerous reset!!!");
+ nm_prerr("ouch dangerous reset!!!");
n = netmap_ring_reinit(kring);
goto done;
}
@@ -1154,7 +990,7 @@
void *addr = NMB(na, slot);
if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
- D("bad buffer index %d, ignore ?",
+ nm_prerr("bad buffer index %d, ignore ?",
slot->buf_idx);
}
slot->flags &= ~NS_BUF_CHANGED;
@@ -1283,8 +1119,8 @@
int ret = NM_IRQ_COMPLETED;
int error;
- if (netmap_verbose)
- D("%s %s 0x%x", na->name, kring->name, flags);
+ if (netmap_debug & NM_DEBUG_RXINTR)
+ nm_prinf("%s %s 0x%x", na->name, kring->name, flags);
bkring = vpna->up.tx_rings[ring_nr];
@@ -1293,8 +1129,8 @@
return EIO;
}
- if (netmap_verbose)
- D("%s head %d cur %d tail %d", na->name,
+ if (netmap_debug & NM_DEBUG_RXINTR)
+ nm_prinf("%s head %d cur %d tail %d", na->name,
kring->rhead, kring->rcur, kring->rtail);
/* simulate a user wakeup on the rx ring
@@ -1305,7 +1141,7 @@
goto put_out;
if (kring->nr_hwcur == kring->nr_hwtail) {
if (netmap_verbose)
- D("how strange, interrupt with no packets on %s",
+ nm_prerr("how strange, interrupt with no packets on %s",
na->name);
goto put_out;
}
@@ -1593,8 +1429,8 @@
ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
na->name, ring_n,
kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
- ring->head, ring->cur, ring->tail,
- hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
+ kring->rhead, kring->rcur, kring->rtail,
+ hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
/* second step: the new packets are sent on the tx ring
* (which is actually the same ring)
*/
@@ -1612,7 +1448,7 @@
ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
na->name, ring_n,
kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
- ring->head, ring->cur, ring->tail,
+ kring->rhead, kring->rcur, kring->rtail,
hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
put_out:
nm_kr_put(hw_kring);
@@ -1688,7 +1524,7 @@
/* make sure the NIC is not already in use */
if (NETMAP_OWNED_BY_ANY(hwna)) {
- D("NIC %s busy, cannot attach to bridge", hwna->name);
+ nm_prerr("NIC %s busy, cannot attach to bridge", hwna->name);
return EBUSY;
}
@@ -1756,6 +1592,8 @@
hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
hostna->rx_buf_maxsize = hwna->rx_buf_maxsize;
}
+ if (hwna->na_flags & NAF_MOREFRAG)
+ na->na_flags |= NAF_MOREFRAG;
ND("%s<->%s txr %d txd %d rxr %d rxd %d",
na->name, ifp->if_xname,
Index: head/sys/dev/netmap/netmap_freebsd.c
===================================================================
--- head/sys/dev/netmap/netmap_freebsd.c
+++ head/sys/dev/netmap/netmap_freebsd.c
@@ -735,9 +735,9 @@
}
#endif /* WITH_EXTMEM */
-/* ======================== PTNETMAP SUPPORT ========================== */
+/* ================== PTNETMAP GUEST SUPPORT ==================== */
-#ifdef WITH_PTNETMAP_GUEST
+#ifdef WITH_PTNETMAP
#include <sys/bus.h>
#include <sys/rman.h>
#include <machine/bus.h> /* bus_dmamap_* */
@@ -932,7 +932,7 @@
return bus_generic_shutdown(dev);
}
-#endif /* WITH_PTNETMAP_GUEST */
+#endif /* WITH_PTNETMAP */
/*
* In order to track whether pages are still mapped, we hook into
@@ -1145,8 +1145,8 @@
}
struct nm_kctx_ctx {
- struct thread *user_td; /* thread user-space (kthread creator) to send ioctl */
- struct ptnetmap_cfgentry_bhyve cfg;
+ /* Userspace thread (kthread creator). */
+ struct thread *user_td;
/* worker function and parameter */
nm_kctx_worker_fn_t worker_fn;
@@ -1161,56 +1161,17 @@
struct nm_kctx {
struct thread *worker;
struct mtx worker_lock;
- uint64_t scheduled; /* pending wake_up request */
struct nm_kctx_ctx worker_ctx;
int run; /* used to stop kthread */
int attach_user; /* kthread attached to user_process */
int affinity;
};
-void inline
-nm_os_kctx_worker_wakeup(struct nm_kctx *nmk)
-{
- /*
- * There may be a race between FE and BE,
- * which call both this function, and worker kthread,
- * that reads nmk->scheduled.
- *
- * For us it is not important the counter value,
- * but simply that it has changed since the last
- * time the kthread saw it.
- */
- mtx_lock(&nmk->worker_lock);
- nmk->scheduled++;
- if (nmk->worker_ctx.cfg.wchan) {
- wakeup((void *)(uintptr_t)nmk->worker_ctx.cfg.wchan);
- }
- mtx_unlock(&nmk->worker_lock);
-}
-
-void inline
-nm_os_kctx_send_irq(struct nm_kctx *nmk)
-{
- struct nm_kctx_ctx *ctx = &nmk->worker_ctx;
- int err;
-
- if (ctx->user_td && ctx->cfg.ioctl_fd > 0) {
- err = kern_ioctl(ctx->user_td, ctx->cfg.ioctl_fd, ctx->cfg.ioctl_cmd,
- (caddr_t)&ctx->cfg.ioctl_data);
- if (err) {
- D("kern_ioctl error: %d ioctl parameters: fd %d com %lu data %p",
- err, ctx->cfg.ioctl_fd, (unsigned long)ctx->cfg.ioctl_cmd,
- &ctx->cfg.ioctl_data);
- }
- }
-}
-
static void
nm_kctx_worker(void *data)
{
struct nm_kctx *nmk = data;
struct nm_kctx_ctx *ctx = &nmk->worker_ctx;
- uint64_t old_scheduled = nmk->scheduled;
if (nmk->affinity >= 0) {
thread_lock(curthread);
@@ -1231,30 +1192,8 @@
kthread_suspend_check();
}
- /*
- * if wchan is not defined, we don't have notification
- * mechanism and we continually execute worker_fn()
- */
- if (!ctx->cfg.wchan) {
- ctx->worker_fn(ctx->worker_private, 1); /* worker body */
- } else {
- /* checks if there is a pending notification */
- mtx_lock(&nmk->worker_lock);
- if (likely(nmk->scheduled != old_scheduled)) {
- old_scheduled = nmk->scheduled;
- mtx_unlock(&nmk->worker_lock);
-
- ctx->worker_fn(ctx->worker_private, 1); /* worker body */
-
- continue;
- } else if (nmk->run) {
- /* wait on event with one second timeout */
- msleep((void *)(uintptr_t)ctx->cfg.wchan, &nmk->worker_lock,
- 0, "nmk_ev", hz);
- nmk->scheduled++;
- }
- mtx_unlock(&nmk->worker_lock);
- }
+ /* Continuously execute worker process. */
+ ctx->worker_fn(ctx->worker_private); /* worker body */
}
kthread_exit();
@@ -1284,11 +1223,6 @@
/* attach kthread to user process (ptnetmap) */
nmk->attach_user = cfg->attach_user;
- /* store kick/interrupt configuration */
- if (opaque) {
- nmk->worker_ctx.cfg = *((struct ptnetmap_cfgentry_bhyve *)opaque);
- }
-
return nmk;
}
@@ -1298,9 +1232,13 @@
struct proc *p = NULL;
int error = 0;
- if (nmk->worker) {
+ /* Temporarily disable this function as it is currently broken
+ * and causes kernel crashes. The failure can be triggered by
+ * the "vale_polling_enable_disable" test in ctrl-api-test.c. */
+ return EOPNOTSUPP;
+
+ if (nmk->worker)
return EBUSY;
- }
/* check if we want to attach kthread to user process */
if (nmk->attach_user) {
@@ -1329,15 +1267,14 @@
void
nm_os_kctx_worker_stop(struct nm_kctx *nmk)
{
- if (!nmk->worker) {
+ if (!nmk->worker)
return;
- }
+
/* tell to kthread to exit from main loop */
nmk->run = 0;
/* wake up kthread if it sleeps */
kthread_resume(nmk->worker);
- nm_os_kctx_worker_wakeup(nmk);
nmk->worker = NULL;
}
@@ -1347,11 +1284,9 @@
{
if (!nmk)
return;
- if (nmk->worker) {
- nm_os_kctx_worker_stop(nmk);
- }
- memset(&nmk->worker_ctx.cfg, 0, sizeof(nmk->worker_ctx.cfg));
+ if (nmk->worker)
+ nm_os_kctx_worker_stop(nmk);
free(nmk, M_DEVBUF);
}
Index: head/sys/dev/netmap/netmap_generic.c
===================================================================
--- head/sys/dev/netmap/netmap_generic.c
+++ head/sys/dev/netmap/netmap_generic.c
@@ -81,7 +81,6 @@
#include <net/if_var.h>
#include <machine/bus.h> /* bus_dmamap_* in netmap_kern.h */
-// XXX temporary - D() defined here
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
#include <dev/netmap/netmap_mem2.h>
@@ -179,7 +178,7 @@
r = mod_timer(&ctx->timer, jiffies +
msecs_to_jiffies(RATE_PERIOD * 1000));
if (unlikely(r))
- D("[v1000] Error: mod_timer()");
+ nm_prerr("mod_timer() failed");
}
static struct rate_context rate_ctx;
@@ -240,14 +239,14 @@
for_each_rx_kring_h(r, kring, na) {
if (nm_kring_pending_off(kring)) {
- D("Emulated adapter: ring '%s' deactivated", kring->name);
+ nm_prinf("Emulated adapter: ring '%s' deactivated", kring->name);
kring->nr_mode = NKR_NETMAP_OFF;
}
}
for_each_tx_kring_h(r, kring, na) {
if (nm_kring_pending_off(kring)) {
kring->nr_mode = NKR_NETMAP_OFF;
- D("Emulated adapter: ring '%s' deactivated", kring->name);
+ nm_prinf("Emulated adapter: ring '%s' deactivated", kring->name);
}
}
@@ -300,11 +299,11 @@
#ifdef RATE_GENERIC
if (--rate_ctx.refcount == 0) {
- D("del_timer()");
+ nm_prinf("del_timer()");
del_timer(&rate_ctx.timer);
}
#endif
- D("Emulated adapter for %s deactivated", na->name);
+ nm_prinf("Emulated adapter for %s deactivated", na->name);
}
return 0;
@@ -329,14 +328,14 @@
}
if (na->active_fds == 0) {
- D("Emulated adapter for %s activated", na->name);
+ nm_prinf("Emulated adapter for %s activated", na->name);
/* Do all memory allocations when (na->active_fds == 0), to
* simplify error management. */
/* Allocate memory for mitigation support on all the rx queues. */
gna->mit = nm_os_malloc(na->num_rx_rings * sizeof(struct nm_generic_mit));
if (!gna->mit) {
- D("mitigation allocation failed");
+ nm_prerr("mitigation allocation failed");
error = ENOMEM;
goto out;
}
@@ -363,7 +362,7 @@
kring->tx_pool =
nm_os_malloc(na->num_tx_desc * sizeof(struct mbuf *));
if (!kring->tx_pool) {
- D("tx_pool allocation failed");
+ nm_prerr("tx_pool allocation failed");
error = ENOMEM;
goto free_tx_pools;
}
@@ -374,14 +373,14 @@
for_each_rx_kring_h(r, kring, na) {
if (nm_kring_pending_on(kring)) {
- D("Emulated adapter: ring '%s' activated", kring->name);
+ nm_prinf("Emulated adapter: ring '%s' activated", kring->name);
kring->nr_mode = NKR_NETMAP_ON;
}
}
for_each_tx_kring_h(r, kring, na) {
if (nm_kring_pending_on(kring)) {
- D("Emulated adapter: ring '%s' activated", kring->name);
+ nm_prinf("Emulated adapter: ring '%s' activated", kring->name);
kring->nr_mode = NKR_NETMAP_ON;
}
}
@@ -399,14 +398,14 @@
/* Prepare to intercept incoming traffic. */
error = nm_os_catch_rx(gna, 1);
if (error) {
- D("nm_os_catch_rx(1) failed (%d)", error);
+ nm_prerr("nm_os_catch_rx(1) failed (%d)", error);
goto free_tx_pools;
}
/* Let netmap control the packet steering. */
error = nm_os_catch_tx(gna, 1);
if (error) {
- D("nm_os_catch_tx(1) failed (%d)", error);
+ nm_prerr("nm_os_catch_tx(1) failed (%d)", error);
goto catch_rx;
}
@@ -414,11 +413,11 @@
#ifdef RATE_GENERIC
if (rate_ctx.refcount == 0) {
- D("setup_timer()");
+ nm_prinf("setup_timer()");
memset(&rate_ctx, 0, sizeof(rate_ctx));
setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx);
if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) {
- D("Error: mod_timer()");
+ nm_prerr("Error: mod_timer()");
}
}
rate_ctx.refcount++;
@@ -462,7 +461,7 @@
unsigned int r_orig = r;
if (unlikely(!nm_netmap_on(na) || r >= na->num_tx_rings)) {
- D("Error: no netmap adapter on device %p",
+ nm_prerr("Error: no netmap adapter on device %p",
GEN_TX_MBUF_IFP(m));
return;
}
@@ -488,7 +487,7 @@
if (match) {
if (r != r_orig) {
- RD(1, "event %p migrated: ring %u --> %u",
+ nm_prlim(1, "event %p migrated: ring %u --> %u",
m, r_orig, r);
}
break;
@@ -497,7 +496,7 @@
if (++r == na->num_tx_rings) r = 0;
if (r == r_orig) {
- RD(1, "Cannot match event %p", m);
+ nm_prlim(1, "Cannot match event %p", m);
return;
}
}
@@ -528,7 +527,7 @@
u_int n = 0;
struct mbuf **tx_pool = kring->tx_pool;
- ND("hwcur = %d, hwtail = %d", kring->nr_hwcur, kring->nr_hwtail);
+ nm_prdis("hwcur = %d, hwtail = %d", kring->nr_hwcur, kring->nr_hwtail);
while (nm_i != hwcur) { /* buffers not completed */
struct mbuf *m = tx_pool[nm_i];
@@ -537,7 +536,7 @@
if (m == NULL) {
/* Nothing to do, this is going
* to be replenished. */
- RD(3, "Is this happening?");
+ nm_prlim(3, "Is this happening?");
} else if (MBUF_QUEUED(m)) {
break; /* Not dequeued yet. */
@@ -576,7 +575,7 @@
nm_i = nm_next(nm_i, lim);
}
kring->nr_hwtail = nm_prev(nm_i, lim);
- ND("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail);
+ nm_prdis("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail);
return n;
}
@@ -598,7 +597,7 @@
}
if (unlikely(e >= n)) {
- D("This cannot happen");
+ nm_prerr("This cannot happen");
e = 0;
}
@@ -654,7 +653,7 @@
kring->tx_pool[e] = NULL;
- ND(5, "Request Event at %d mbuf %p refcnt %d", e, m, m ? MBUF_REFCNT(m) : -2 );
+ nm_prdis("Request Event at %d mbuf %p refcnt %d", e, m, m ? MBUF_REFCNT(m) : -2 );
/* Decrement the refcount. This will free it if we lose the race
* with the driver. */
@@ -699,7 +698,7 @@
* but only when cur == hwtail, which means that the
* client is going to block. */
event = ring_middle(nm_i, head, lim);
- ND(3, "Place txqdisc event (hwcur=%u,event=%u,"
+ nm_prdis("Place txqdisc event (hwcur=%u,event=%u,"
"head=%u,hwtail=%u)", nm_i, event, head,
kring->nr_hwtail);
}
@@ -725,7 +724,7 @@
kring->tx_pool[nm_i] = m =
nm_os_get_mbuf(ifp, NETMAP_BUF_SIZE(na));
if (m == NULL) {
- RD(2, "Failed to replenish mbuf");
+ nm_prlim(2, "Failed to replenish mbuf");
/* Here we could schedule a timer which
* retries to replenish after a while,
* and notifies the client when it
@@ -854,7 +853,7 @@
/* This may happen when GRO/LRO features are enabled for
* the NIC driver when the generic adapter does not
* support RX scatter-gather. */
- RD(2, "Warning: driver pushed up big packet "
+ nm_prlim(2, "Warning: driver pushed up big packet "
"(size=%d)", (int)MBUF_LEN(m));
m_freem(m);
} else if (unlikely(mbq_len(&kring->rx_queue) > 1024)) {
@@ -1048,7 +1047,7 @@
*/
netmap_adapter_put(prev_na);
}
- D("Native netmap adapter %p restored", prev_na);
+ nm_prinf("Native netmap adapter %p restored", prev_na);
}
NM_RESTORE_NA(ifp, prev_na);
/*
@@ -1056,7 +1055,7 @@
* overrides WNA(ifp) if na->ifp is not NULL.
*/
na->ifp = NULL;
- D("Emulated netmap adapter for %s destroyed", na->name);
+ nm_prinf("Emulated netmap adapter for %s destroyed", na->name);
}
int
@@ -1086,7 +1085,7 @@
#ifdef __FreeBSD__
if (ifp->if_type == IFT_LOOP) {
- D("if_loop is not supported by %s", __func__);
+ nm_prerr("if_loop is not supported by %s", __func__);
return EINVAL;
}
#endif
@@ -1096,26 +1095,25 @@
* adapter it means that someone else is using the same
* pointer (e.g. ax25_ptr on linux). This happens for
* instance when also PF_RING is in use. */
- D("Error: netmap adapter hook is busy");
+ nm_prerr("Error: netmap adapter hook is busy");
return EBUSY;
}
num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */
nm_os_generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); /* ignore errors */
- ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc);
if (num_tx_desc == 0 || num_rx_desc == 0) {
- D("Device has no hw slots (tx %u, rx %u)", num_tx_desc, num_rx_desc);
+ nm_prerr("Device has no hw slots (tx %u, rx %u)", num_tx_desc, num_rx_desc);
return EINVAL;
}
gna = nm_os_malloc(sizeof(*gna));
if (gna == NULL) {
- D("no memory on attach, give up");
+ nm_prerr("no memory on attach, give up");
return ENOMEM;
}
na = (struct netmap_adapter *)gna;
- strncpy(na->name, ifp->if_xname, sizeof(na->name));
+ strlcpy(na->name, ifp->if_xname, sizeof(na->name));
na->ifp = ifp;
na->num_tx_desc = num_tx_desc;
na->num_rx_desc = num_rx_desc;
@@ -1129,10 +1127,10 @@
*/
na->na_flags = NAF_SKIP_INTR | NAF_HOST_RINGS;
- ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)",
+ nm_prdis("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)",
ifp->num_tx_queues, ifp->real_num_tx_queues,
ifp->tx_queue_len);
- ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)",
+ nm_prdis("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)",
ifp->num_rx_queues, ifp->real_num_rx_queues);
nm_os_generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings);
@@ -1151,7 +1149,7 @@
nm_os_generic_set_features(gna);
- D("Emulated adapter for %s created (prev was %p)", na->name, gna->prev);
+ nm_prinf("Emulated adapter for %s created (prev was %p)", na->name, gna->prev);
return retval;
}
Index: head/sys/dev/netmap/netmap_kern.h
===================================================================
--- head/sys/dev/netmap/netmap_kern.h
+++ head/sys/dev/netmap/netmap_kern.h
@@ -54,30 +54,31 @@
#if defined(CONFIG_NETMAP_GENERIC)
#define WITH_GENERIC
#endif
-#if defined(CONFIG_NETMAP_PTNETMAP_GUEST)
-#define WITH_PTNETMAP_GUEST
+#if defined(CONFIG_NETMAP_PTNETMAP)
+#define WITH_PTNETMAP
#endif
-#if defined(CONFIG_NETMAP_PTNETMAP_HOST)
-#define WITH_PTNETMAP_HOST
-#endif
#if defined(CONFIG_NETMAP_SINK)
#define WITH_SINK
#endif
+#if defined(CONFIG_NETMAP_NULL)
+#define WITH_NMNULL
+#endif
#elif defined (_WIN32)
#define WITH_VALE // comment out to disable VALE support
#define WITH_PIPES
#define WITH_MONITOR
#define WITH_GENERIC
+#define WITH_NMNULL
#else /* neither linux nor windows */
#define WITH_VALE // comment out to disable VALE support
#define WITH_PIPES
#define WITH_MONITOR
#define WITH_GENERIC
-#define WITH_PTNETMAP_HOST /* ptnetmap host support */
-#define WITH_PTNETMAP_GUEST /* ptnetmap guest support */
+#define WITH_PTNETMAP /* ptnetmap guest support */
#define WITH_EXTMEM
+#define WITH_NMNULL
#endif
#if defined(__FreeBSD__)
@@ -239,38 +240,54 @@
#define NMG_LOCK_ASSERT() NM_MTX_ASSERT(netmap_global_lock)
#if defined(__FreeBSD__)
-#define nm_prerr printf
-#define nm_prinf printf
+#define nm_prerr_int printf
+#define nm_prinf_int printf
#elif defined (_WIN32)
-#define nm_prerr DbgPrint
-#define nm_prinf DbgPrint
+#define nm_prerr_int DbgPrint
+#define nm_prinf_int DbgPrint
#elif defined(linux)
-#define nm_prerr(fmt, arg...) printk(KERN_ERR fmt, ##arg)
-#define nm_prinf(fmt, arg...) printk(KERN_INFO fmt, ##arg)
+#define nm_prerr_int(fmt, arg...) printk(KERN_ERR fmt, ##arg)
+#define nm_prinf_int(fmt, arg...) printk(KERN_INFO fmt, ##arg)
#endif
-#define ND(format, ...)
-#define D(format, ...) \
+#define nm_prinf(format, ...) \
do { \
struct timeval __xxts; \
microtime(&__xxts); \
- nm_prerr("%03d.%06d [%4d] %-25s " format "\n", \
+ nm_prinf_int("%03d.%06d [%4d] %-25s " format "\n",\
(int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \
__LINE__, __FUNCTION__, ##__VA_ARGS__); \
} while (0)
-/* rate limited, lps indicates how many per second */
-#define RD(lps, format, ...) \
+#define nm_prerr(format, ...) \
do { \
+ struct timeval __xxts; \
+ microtime(&__xxts); \
+ nm_prerr_int("%03d.%06d [%4d] %-25s " format "\n",\
+ (int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \
+ __LINE__, __FUNCTION__, ##__VA_ARGS__); \
+ } while (0)
+
+/* Disabled printf (used to be ND). */
+#define nm_prdis(format, ...)
+
+/* Rate limited, lps indicates how many per second. */
+#define nm_prlim(lps, format, ...) \
+ do { \
static int t0, __cnt; \
if (t0 != time_second) { \
t0 = time_second; \
__cnt = 0; \
} \
if (__cnt++ < lps) \
- D(format, ##__VA_ARGS__); \
+ nm_prinf(format, ##__VA_ARGS__); \
} while (0)
+/* Old macros. */
+#define ND nm_prdis
+#define D nm_prerr
+#define RD nm_prlim
+
struct netmap_adapter;
struct nm_bdg_fwd;
struct nm_bridge;
@@ -700,7 +717,7 @@
*/
#define NAF_HOST_RINGS 64 /* the adapter supports the host rings */
#define NAF_FORCE_NATIVE 128 /* the adapter is always NATIVE */
-#define NAF_PTNETMAP_HOST 256 /* the adapter supports ptnetmap in the host */
+/* free */
#define NAF_MOREFRAG 512 /* the adapter supports NS_MOREFRAG */
#define NAF_ZOMBIE (1U<<30) /* the nic driver has been unloaded */
#define NAF_BUSY (1U<<31) /* the adapter is used internally and
@@ -718,9 +735,9 @@
u_int num_tx_desc; /* number of descriptor in each queue */
u_int num_rx_desc;
- /* tx_rings and rx_rings are private but allocated
- * as a contiguous chunk of memory. Each array has
- * N+1 entries, for the adapter queues and for the host queue.
+ /* tx_rings and rx_rings are private but allocated as a
+ * contiguous chunk of memory. Each array has N+K entries,
+ * N for the hardware rings and K for the host rings.
*/
struct netmap_kring **tx_rings; /* array of TX rings. */
struct netmap_kring **rx_rings; /* array of RX rings. */
@@ -1080,12 +1097,12 @@
*/
struct netmap_vp_adapter *saved_na_vp;
};
-int nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token);
-int nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token);
int nm_bdg_polling(struct nmreq_header *hdr);
-int netmap_bdg_list(struct nmreq_header *hdr);
#ifdef WITH_VALE
+int netmap_vale_attach(struct nmreq_header *hdr, void *auth_token);
+int netmap_vale_detach(struct nmreq_header *hdr, void *auth_token);
+int netmap_vale_list(struct nmreq_header *hdr);
int netmap_vi_create(struct nmreq_header *hdr, int);
int nm_vi_create(struct nmreq_header *);
int nm_vi_destroy(const char *name);
@@ -1115,7 +1132,13 @@
#endif /* WITH_PIPES */
+#ifdef WITH_NMNULL
+struct netmap_null_adapter {
+ struct netmap_adapter up;
+};
+#endif /* WITH_NMNULL */
+
/* return slots reserved to rx clients; used in drivers */
static inline uint32_t
nm_kr_rxspace(struct netmap_kring *k)
@@ -1442,51 +1465,8 @@
int netmap_get_hw_na(struct ifnet *ifp,
struct netmap_mem_d *nmd, struct netmap_adapter **na);
-
-/*
- * The following bridge-related functions are used by other
- * kernel modules.
- *
- * VALE only supports unicast or broadcast. The lookup
- * function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports,
- * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 to indicate
- * drop.
- */
-typedef uint32_t (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr,
- struct netmap_vp_adapter *, void *private_data);
-typedef int (*bdg_config_fn_t)(struct nm_ifreq *);
-typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *);
-typedef void *(*bdg_update_private_data_fn_t)(void *private_data, void *callback_data, int *error);
-typedef int (*bdg_vp_create_fn_t)(struct nmreq_header *hdr,
- struct ifnet *ifp, struct netmap_mem_d *nmd,
- struct netmap_vp_adapter **ret);
-typedef int (*bdg_bwrap_attach_fn_t)(const char *nr_name, struct netmap_adapter *hwna);
-struct netmap_bdg_ops {
- bdg_lookup_fn_t lookup;
- bdg_config_fn_t config;
- bdg_dtor_fn_t dtor;
- bdg_vp_create_fn_t vp_create;
- bdg_bwrap_attach_fn_t bwrap_attach;
- char name[IFNAMSIZ];
-};
-int netmap_bwrap_attach(const char *name, struct netmap_adapter *, struct netmap_bdg_ops *);
-int netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token);
-
-#define NM_BRIDGES 8 /* number of bridges */
-#define NM_BDG_MAXPORTS 254 /* up to 254 */
-#define NM_BDG_BROADCAST NM_BDG_MAXPORTS
-#define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1)
-
-struct nm_bridge *netmap_init_bridges2(u_int);
-void netmap_uninit_bridges2(struct nm_bridge *, u_int);
-int netmap_init_bridges(void);
-void netmap_uninit_bridges(void);
-int nm_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
- void *callback_data, void *auth_token);
-int netmap_bdg_config(struct nm_ifreq *nifr);
-
#ifdef WITH_VALE
-uint32_t netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
+uint32_t netmap_vale_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
struct netmap_vp_adapter *, void *private_data);
/* these are redefined in case of no VALE support */
@@ -1525,11 +1505,20 @@
(((struct nmreq_register *)(uintptr_t)hdr->nr_body)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0)
#endif
+#ifdef WITH_NMNULL
+int netmap_get_null_na(struct nmreq_header *hdr, struct netmap_adapter **na,
+ struct netmap_mem_d *nmd, int create);
+#else /* !WITH_NMNULL */
+#define netmap_get_null_na(hdr, _2, _3, _4) \
+ (((struct nmreq_register *)(uintptr_t)hdr->nr_body)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0)
+#endif /* WITH_NMNULL */
+
#ifdef CONFIG_NET_NS
struct net *netmap_bns_get(void);
void netmap_bns_put(struct net *);
void netmap_bns_getbridges(struct nm_bridge **, u_int *);
#else
+extern struct nm_bridge *nm_bridges;
#define netmap_bns_get()
#define netmap_bns_put(_1)
#define netmap_bns_getbridges(b, n) \
@@ -1591,16 +1580,24 @@
#define NETMAP_BUF_SIZE(_na) ((_na)->na_lut.objsize)
extern int netmap_no_pendintr;
extern int netmap_mitigate;
-extern int netmap_verbose; /* for debugging */
-enum { /* verbose flags */
- NM_VERB_ON = 1, /* generic verbose */
- NM_VERB_HOST = 0x2, /* verbose host stack */
- NM_VERB_RXSYNC = 0x10, /* verbose on rxsync/txsync */
- NM_VERB_TXSYNC = 0x20,
- NM_VERB_RXINTR = 0x100, /* verbose on rx/tx intr (driver) */
- NM_VERB_TXINTR = 0x200,
- NM_VERB_NIC_RXSYNC = 0x1000, /* verbose on rx/tx intr (driver) */
- NM_VERB_NIC_TXSYNC = 0x2000,
+extern int netmap_verbose;
+#ifdef CONFIG_NETMAP_DEBUG
+extern int netmap_debug; /* for debugging */
+#else /* !CONFIG_NETMAP_DEBUG */
+#define netmap_debug (0)
+#endif /* !CONFIG_NETMAP_DEBUG */
+enum { /* debug flags */
+ NM_DEBUG_ON = 1, /* generic debug messsages */
+ NM_DEBUG_HOST = 0x2, /* debug host stack */
+ NM_DEBUG_RXSYNC = 0x10, /* debug on rxsync/txsync */
+ NM_DEBUG_TXSYNC = 0x20,
+ NM_DEBUG_RXINTR = 0x100, /* debug on rx/tx intr (driver) */
+ NM_DEBUG_TXINTR = 0x200,
+ NM_DEBUG_NIC_RXSYNC = 0x1000, /* debug on rx/tx intr (driver) */
+ NM_DEBUG_NIC_TXSYNC = 0x2000,
+ NM_DEBUG_MEM = 0x4000, /* verbose memory allocations/deallocations */
+ NM_DEBUG_VALE = 0x8000, /* debug messages from memory allocators */
+ NM_DEBUG_BDG = NM_DEBUG_VALE,
};
extern int netmap_txsync_retry;
@@ -1612,7 +1609,6 @@
#ifdef linux
extern int netmap_generic_txqdisc;
#endif
-extern int ptnetmap_tx_workers;
/*
* NA returns a pointer to the struct netmap adapter from the ifp.
@@ -1809,6 +1805,11 @@
netmap_idx_n2k(struct netmap_kring *kr, int idx)
{
int n = kr->nkr_num_slots;
+
+ if (likely(kr->nkr_hwofs == 0)) {
+ return idx;
+ }
+
idx += kr->nkr_hwofs;
if (idx < 0)
return idx + n;
@@ -1823,6 +1824,11 @@
netmap_idx_k2n(struct netmap_kring *kr, int idx)
{
int n = kr->nkr_num_slots;
+
+ if (likely(kr->nkr_hwofs == 0)) {
+ return idx;
+ }
+
idx -= kr->nkr_hwofs;
if (idx < 0)
return idx + n;
@@ -1911,6 +1917,9 @@
u_int np_qfirst[NR_TXRX],
np_qlast[NR_TXRX]; /* range of tx/rx rings to scan */
uint16_t np_txpoll;
+ uint16_t np_kloop_state; /* use with NMG_LOCK held */
+#define NM_SYNC_KLOOP_RUNNING (1 << 0)
+#define NM_SYNC_KLOOP_STOPPING (1 << 1)
int np_sync_flags; /* to be passed to nm_sync */
int np_refs; /* use with NMG_LOCK held */
@@ -1920,7 +1929,26 @@
* number of rings.
*/
NM_SELINFO_T *np_si[NR_TXRX];
+
+ /* In the optional CSB mode, the user must specify the start address
+ * of two arrays of Communication Status Block (CSB) entries, for the
+ * two directions (kernel read application write, and kernel write
+ * application read).
+ * The number of entries must agree with the number of rings bound to
+ * the netmap file descriptor. The entries corresponding to the TX
+ * rings are laid out before the ones corresponding to the RX rings.
+ *
+ * Array of CSB entries for application --> kernel communication
+ * (N entries). */
+ struct nm_csb_atok *np_csb_atok_base;
+ /* Array of CSB entries for kernel --> application communication
+ * (N entries). */
+ struct nm_csb_ktoa *np_csb_ktoa_base;
+
struct thread *np_td; /* kqueue, just debugging */
+#ifdef linux
+ struct file *np_filp; /* used by sync kloop */
+#endif /* linux */
};
struct netmap_priv_d *netmap_priv_new(void);
@@ -1943,6 +1971,14 @@
return 0;
}
+/* call with NMG_LOCK held */
+static __inline int
+nm_si_user(struct netmap_priv_d *priv, enum txrx t)
+{
+ return (priv->np_na != NULL &&
+ (priv->np_qlast[t] - priv->np_qfirst[t] > 1));
+}
+
#ifdef WITH_PIPES
int netmap_pipe_txsync(struct netmap_kring *txkring, int flags);
int netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags);
@@ -2143,17 +2179,14 @@
* kernel thread routines
*/
struct nm_kctx; /* OS-specific kernel context - opaque */
-typedef void (*nm_kctx_worker_fn_t)(void *data, int is_kthread);
-typedef void (*nm_kctx_notify_fn_t)(void *data);
+typedef void (*nm_kctx_worker_fn_t)(void *data);
/* kthread configuration */
struct nm_kctx_cfg {
long type; /* kthread type/identifier */
nm_kctx_worker_fn_t worker_fn; /* worker function */
void *worker_private;/* worker parameter */
- nm_kctx_notify_fn_t notify_fn; /* notify function */
int attach_user; /* attach kthread to user process */
- int use_kthread; /* use a kthread for the context */
};
/* kthread configuration */
struct nm_kctx *nm_os_kctx_create(struct nm_kctx_cfg *cfg,
@@ -2161,48 +2194,25 @@
int nm_os_kctx_worker_start(struct nm_kctx *);
void nm_os_kctx_worker_stop(struct nm_kctx *);
void nm_os_kctx_destroy(struct nm_kctx *);
-void nm_os_kctx_worker_wakeup(struct nm_kctx *nmk);
-void nm_os_kctx_send_irq(struct nm_kctx *);
void nm_os_kctx_worker_setaff(struct nm_kctx *, int);
u_int nm_os_ncpus(void);
-#ifdef WITH_PTNETMAP_HOST
+int netmap_sync_kloop(struct netmap_priv_d *priv,
+ struct nmreq_header *hdr);
+int netmap_sync_kloop_stop(struct netmap_priv_d *priv);
+
+#ifdef WITH_PTNETMAP
+/* ptnetmap guest routines */
+
/*
- * netmap adapter for host ptnetmap ports
+ * ptnetmap_memdev routines used to talk with ptnetmap_memdev device driver
*/
-struct netmap_pt_host_adapter {
- struct netmap_adapter up;
+struct ptnetmap_memdev;
+int nm_os_pt_memdev_iomap(struct ptnetmap_memdev *, vm_paddr_t *, void **,
+ uint64_t *);
+void nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *);
+uint32_t nm_os_pt_memdev_ioread(struct ptnetmap_memdev *, unsigned int);
- /* the passed-through adapter */
- struct netmap_adapter *parent;
- /* parent->na_flags, saved at NETMAP_PT_HOST_CREATE time,
- * and restored at NETMAP_PT_HOST_DELETE time */
- uint32_t parent_na_flags;
-
- int (*parent_nm_notify)(struct netmap_kring *kring, int flags);
- void *ptns;
-};
-
-/* ptnetmap host-side routines */
-int netmap_get_pt_host_na(struct nmreq_header *hdr, struct netmap_adapter **na,
- struct netmap_mem_d * nmd, int create);
-int ptnetmap_ctl(const char *nr_name, int create, struct netmap_adapter *na);
-
-static inline int
-nm_ptnetmap_host_on(struct netmap_adapter *na)
-{
- return na && na->na_flags & NAF_PTNETMAP_HOST;
-}
-#else /* !WITH_PTNETMAP_HOST */
-#define netmap_get_pt_host_na(hdr, _2, _3, _4) \
- (((struct nmreq_register *)(uintptr_t)hdr->nr_body)->nr_flags & (NR_PTNETMAP_HOST) ? EOPNOTSUPP : 0)
-#define ptnetmap_ctl(_1, _2, _3) EINVAL
-#define nm_ptnetmap_host_on(_1) EINVAL
-#endif /* !WITH_PTNETMAP_HOST */
-
-#ifdef WITH_PTNETMAP_GUEST
-/* ptnetmap GUEST routines */
-
/*
* netmap adapter for guest ptnetmap ports
*/
@@ -2218,27 +2228,84 @@
* network stack and netmap clients.
* Used to decide when we need (de)allocate krings/rings and
* start (stop) ptnetmap kthreads. */
- int backend_regifs;
+ int backend_users;
};
int netmap_pt_guest_attach(struct netmap_adapter *na,
unsigned int nifp_offset,
unsigned int memid);
-struct ptnet_csb_gh;
-struct ptnet_csb_hg;
-bool netmap_pt_guest_txsync(struct ptnet_csb_gh *ptgh,
- struct ptnet_csb_hg *pthg,
- struct netmap_kring *kring,
- int flags);
-bool netmap_pt_guest_rxsync(struct ptnet_csb_gh *ptgh,
- struct ptnet_csb_hg *pthg,
+bool netmap_pt_guest_txsync(struct nm_csb_atok *atok,
+ struct nm_csb_ktoa *ktoa,
struct netmap_kring *kring, int flags);
+bool netmap_pt_guest_rxsync(struct nm_csb_atok *atok,
+ struct nm_csb_ktoa *ktoa,
+ struct netmap_kring *kring, int flags);
int ptnet_nm_krings_create(struct netmap_adapter *na);
void ptnet_nm_krings_delete(struct netmap_adapter *na);
void ptnet_nm_dtor(struct netmap_adapter *na);
-#endif /* WITH_PTNETMAP_GUEST */
+/* Guest driver: Write kring pointers (cur, head) to the CSB.
+ * This routine is coupled with ptnetmap_host_read_kring_csb(). */
+static inline void
+ptnetmap_guest_write_kring_csb(struct nm_csb_atok *atok, uint32_t cur,
+ uint32_t head)
+{
+ /*
+ * We need to write cur and head to the CSB but we cannot do it atomically.
+ * There is no way we can prevent the host from reading the updated value
+ * of one of the two and the old value of the other. However, if we make
+ * sure that the host never reads a value of head more recent than the
+ * value of cur we are safe. We can allow the host to read a value of cur
+ * more recent than the value of head, since in the netmap ring cur can be
+ * ahead of head and cur cannot wrap around head because it must be behind
+ * tail. Inverting the order of writes below could instead result into the
+ * host to think head went ahead of cur, which would cause the sync
+ * prologue to fail.
+ *
+ * The following memory barrier scheme is used to make this happen:
+ *
+ * Guest Host
+ *
+ * STORE(cur) LOAD(head)
+ * mb() <-----------> mb()
+ * STORE(head) LOAD(cur)
+ */
+ atok->cur = cur;
+ nm_stst_barrier();
+ atok->head = head;
+}
+
+/* Guest driver: Read kring pointers (hwcur, hwtail) from the CSB.
+ * This routine is coupled with ptnetmap_host_write_kring_csb(). */
+static inline void
+ptnetmap_guest_read_kring_csb(struct nm_csb_ktoa *ktoa,
+ struct netmap_kring *kring)
+{
+ /*
+ * We place a memory barrier to make sure that the update of hwtail never
+ * overtakes the update of hwcur.
+ * (see explanation in ptnetmap_host_write_kring_csb).
+ */
+ kring->nr_hwtail = ktoa->hwtail;
+ nm_stst_barrier();
+ kring->nr_hwcur = ktoa->hwcur;
+}
+
+/* Helper function wrapping ptnetmap_guest_read_kring_csb(). */
+static inline void
+ptnet_sync_tail(struct nm_csb_ktoa *ktoa, struct netmap_kring *kring)
+{
+ struct netmap_ring *ring = kring->ring;
+
+ /* Update hwcur and hwtail as known by the host. */
+ ptnetmap_guest_read_kring_csb(ktoa, kring);
+
+ /* nm_sync_finalize */
+ ring->tail = kring->rtail = kring->nr_hwtail;
+}
+#endif /* WITH_PTNETMAP */
+
#ifdef __FreeBSD__
/*
* FreeBSD mbuf allocator/deallocator in emulation mode:
@@ -2354,5 +2421,17 @@
struct nmreq_option * nmreq_findoption(struct nmreq_option *, uint16_t);
int nmreq_checkduplicate(struct nmreq_option *);
+
+int netmap_init_bridges(void);
+void netmap_uninit_bridges(void);
+
+/* Functions to read and write CSB fields from the kernel. */
+#if defined (linux)
+#define CSB_READ(csb, field, r) (get_user(r, &csb->field))
+#define CSB_WRITE(csb, field, v) (put_user(v, &csb->field))
+#else /* ! linux */
+#define CSB_READ(csb, field, r) (r = fuword32(&csb->field))
+#define CSB_WRITE(csb, field, v) (suword32(&csb->field, v))
+#endif /* ! linux */
#endif /* _NET_NETMAP_KERN_H_ */
Index: head/sys/dev/netmap/netmap_kloop.c
===================================================================
--- head/sys/dev/netmap/netmap_kloop.c
+++ head/sys/dev/netmap/netmap_kloop.c
@@ -0,0 +1,916 @@
+/*
+ * Copyright (C) 2016-2018 Vincenzo Maffione
+ * Copyright (C) 2015 Stefano Garzarella
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * common headers
+ */
+#if defined(__FreeBSD__)
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/types.h>
+#include <sys/selinfo.h>
+#include <sys/socket.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <machine/bus.h>
+
+#define usleep_range(_1, _2) \
+ pause_sbt("sync-kloop-sleep", SBT_1US * _1, SBT_1US * 1, C_ABSOLUTE)
+
+#elif defined(linux)
+#include <bsd_glue.h>
+#include <linux/file.h>
+#include <linux/eventfd.h>
+#endif
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <net/netmap_virt.h>
+#include <dev/netmap/netmap_mem2.h>
+
+/* Support for eventfd-based notifications. */
+#if defined(linux)
+#define SYNC_KLOOP_POLL
+#endif
+
+/* Write kring pointers (hwcur, hwtail) to the CSB.
+ * This routine is coupled with ptnetmap_guest_read_kring_csb(). */
+static inline void
+sync_kloop_kernel_write(struct nm_csb_ktoa __user *ptr, uint32_t hwcur,
+ uint32_t hwtail)
+{
+ /*
+ * The same scheme used in ptnetmap_guest_write_kring_csb() applies here.
+ * We allow the application to read a value of hwcur more recent than the value
+ * of hwtail, since this would anyway result in a consistent view of the
+ * ring state (and hwcur can never wraparound hwtail, since hwcur must be
+ * behind head).
+ *
+ * The following memory barrier scheme is used to make this happen:
+ *
+ * Application Kernel
+ *
+ * STORE(hwcur) LOAD(hwtail)
+ * mb() <-------------> mb()
+ * STORE(hwtail) LOAD(hwcur)
+ */
+ CSB_WRITE(ptr, hwcur, hwcur);
+ nm_stst_barrier();
+ CSB_WRITE(ptr, hwtail, hwtail);
+}
+
+/* Read kring pointers (head, cur, sync_flags) from the CSB.
+ * This routine is coupled with ptnetmap_guest_write_kring_csb(). */
+static inline void
+sync_kloop_kernel_read(struct nm_csb_atok __user *ptr,
+ struct netmap_ring *shadow_ring,
+ uint32_t num_slots)
+{
+ /*
+ * We place a memory barrier to make sure that the update of head never
+ * overtakes the update of cur.
+ * (see explanation in ptnetmap_guest_write_kring_csb).
+ */
+ CSB_READ(ptr, head, shadow_ring->head);
+ nm_stst_barrier();
+ CSB_READ(ptr, cur, shadow_ring->cur);
+ CSB_READ(ptr, sync_flags, shadow_ring->flags);
+}
+
+/* Enable or disable application --> kernel kicks. */
+static inline void
+csb_ktoa_kick_enable(struct nm_csb_ktoa __user *csb_ktoa, uint32_t val)
+{
+ CSB_WRITE(csb_ktoa, kern_need_kick, val);
+}
+
+/* Are application interrupt enabled or disabled? */
+static inline uint32_t
+csb_atok_intr_enabled(struct nm_csb_atok __user *csb_atok)
+{
+ uint32_t v;
+
+ CSB_READ(csb_atok, appl_need_kick, v);
+
+ return v;
+}
+
+static inline void
+sync_kloop_kring_dump(const char *title, const struct netmap_kring *kring)
+{
+ nm_prinf("%s - name: %s hwcur: %d hwtail: %d "
+ "rhead: %d rcur: %d rtail: %d",
+ title, kring->name, kring->nr_hwcur, kring->nr_hwtail,
+ kring->rhead, kring->rcur, kring->rtail);
+}
+
+struct sync_kloop_ring_args {
+ struct netmap_kring *kring;
+ struct nm_csb_atok *csb_atok;
+ struct nm_csb_ktoa *csb_ktoa;
+#ifdef SYNC_KLOOP_POLL
+ struct eventfd_ctx *irq_ctx;
+#endif /* SYNC_KLOOP_POLL */
+};
+
+static void
+netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a)
+{
+ struct netmap_kring *kring = a->kring;
+ struct nm_csb_atok *csb_atok = a->csb_atok;
+ struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa;
+ struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */
+ bool more_txspace = false;
+ uint32_t num_slots;
+ int batch;
+
+ num_slots = kring->nkr_num_slots;
+
+ /* Disable application --> kernel notifications. */
+ csb_ktoa_kick_enable(csb_ktoa, 0);
+ /* Copy the application kring pointers from the CSB */
+ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
+
+ for (;;) {
+ batch = shadow_ring.head - kring->nr_hwcur;
+ if (batch < 0)
+ batch += num_slots;
+
+#ifdef PTN_TX_BATCH_LIM
+ if (batch > PTN_TX_BATCH_LIM(num_slots)) {
+ /* If application moves ahead too fast, let's cut the move so
+ * that we don't exceed our batch limit. */
+ uint32_t head_lim = kring->nr_hwcur + PTN_TX_BATCH_LIM(num_slots);
+
+ if (head_lim >= num_slots)
+ head_lim -= num_slots;
+ nm_prdis(1, "batch: %d head: %d head_lim: %d", batch, shadow_ring.head,
+ head_lim);
+ shadow_ring.head = head_lim;
+ batch = PTN_TX_BATCH_LIM(num_slots);
+ }
+#endif /* PTN_TX_BATCH_LIM */
+
+ if (nm_kr_txspace(kring) <= (num_slots >> 1)) {
+ shadow_ring.flags |= NAF_FORCE_RECLAIM;
+ }
+
+ /* Netmap prologue */
+ shadow_ring.tail = kring->rtail;
+ if (unlikely(nm_txsync_prologue(kring, &shadow_ring) >= num_slots)) {
+ /* Reinit ring and enable notifications. */
+ netmap_ring_reinit(kring);
+ csb_ktoa_kick_enable(csb_ktoa, 1);
+ break;
+ }
+
+ if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) {
+ sync_kloop_kring_dump("pre txsync", kring);
+ }
+
+ if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) {
+ /* Reenable notifications. */
+ csb_ktoa_kick_enable(csb_ktoa, 1);
+ nm_prerr("txsync() failed");
+ break;
+ }
+
+ /*
+ * Finalize
+ * Copy kernel hwcur and hwtail into the CSB for the application sync(), and
+ * do the nm_sync_finalize.
+ */
+ sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur,
+ kring->nr_hwtail);
+ if (kring->rtail != kring->nr_hwtail) {
+ /* Some more room available in the parent adapter. */
+ kring->rtail = kring->nr_hwtail;
+ more_txspace = true;
+ }
+
+ if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) {
+ sync_kloop_kring_dump("post txsync", kring);
+ }
+
+ /* Interrupt the application if needed. */
+#ifdef SYNC_KLOOP_POLL
+ if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) {
+ /* Disable application kick to avoid sending unnecessary kicks */
+ eventfd_signal(a->irq_ctx, 1);
+ more_txspace = false;
+ }
+#endif /* SYNC_KLOOP_POLL */
+
+ /* Read CSB to see if there is more work to do. */
+ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
+ if (shadow_ring.head == kring->rhead) {
+ /*
+ * No more packets to transmit. We enable notifications and
+ * go to sleep, waiting for a kick from the application when new
+ * new slots are ready for transmission.
+ */
+ /* Reenable notifications. */
+ csb_ktoa_kick_enable(csb_ktoa, 1);
+ /* Doublecheck. */
+ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
+ if (shadow_ring.head != kring->rhead) {
+ /* We won the race condition, there are more packets to
+ * transmit. Disable notifications and do another cycle */
+ csb_ktoa_kick_enable(csb_ktoa, 0);
+ continue;
+ }
+ break;
+ }
+
+ if (nm_kr_txempty(kring)) {
+ /* No more available TX slots. We stop waiting for a notification
+ * from the backend (netmap_tx_irq). */
+ nm_prdis(1, "TX ring");
+ break;
+ }
+ }
+
+#ifdef SYNC_KLOOP_POLL
+ if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) {
+ eventfd_signal(a->irq_ctx, 1);
+ }
+#endif /* SYNC_KLOOP_POLL */
+}
+
+/* RX cycle without receive any packets */
+#define SYNC_LOOP_RX_DRY_CYCLES_MAX 2
+
+static inline int
+sync_kloop_norxslots(struct netmap_kring *kring, uint32_t g_head)
+{
+ return (NM_ACCESS_ONCE(kring->nr_hwtail) == nm_prev(g_head,
+ kring->nkr_num_slots - 1));
+}
+
+static void
+netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a)
+{
+
+ struct netmap_kring *kring = a->kring;
+ struct nm_csb_atok *csb_atok = a->csb_atok;
+ struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa;
+ struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */
+ int dry_cycles = 0;
+ bool some_recvd = false;
+ uint32_t num_slots;
+
+ num_slots = kring->nkr_num_slots;
+
+ /* Get RX csb_atok and csb_ktoa pointers from the CSB. */
+ num_slots = kring->nkr_num_slots;
+
+ /* Disable notifications. */
+ csb_ktoa_kick_enable(csb_ktoa, 0);
+ /* Copy the application kring pointers from the CSB */
+ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
+
+ for (;;) {
+ uint32_t hwtail;
+
+ /* Netmap prologue */
+ shadow_ring.tail = kring->rtail;
+ if (unlikely(nm_rxsync_prologue(kring, &shadow_ring) >= num_slots)) {
+ /* Reinit ring and enable notifications. */
+ netmap_ring_reinit(kring);
+ csb_ktoa_kick_enable(csb_ktoa, 1);
+ break;
+ }
+
+ if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) {
+ sync_kloop_kring_dump("pre rxsync", kring);
+ }
+
+ if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) {
+ /* Reenable notifications. */
+ csb_ktoa_kick_enable(csb_ktoa, 1);
+ nm_prerr("rxsync() failed");
+ break;
+ }
+
+ /*
+ * Finalize
+ * Copy kernel hwcur and hwtail into the CSB for the application sync()
+ */
+ hwtail = NM_ACCESS_ONCE(kring->nr_hwtail);
+ sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur, hwtail);
+ if (kring->rtail != hwtail) {
+ kring->rtail = hwtail;
+ some_recvd = true;
+ dry_cycles = 0;
+ } else {
+ dry_cycles++;
+ }
+
+ if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) {
+ sync_kloop_kring_dump("post rxsync", kring);
+ }
+
+#ifdef SYNC_KLOOP_POLL
+ /* Interrupt the application if needed. */
+ if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) {
+ /* Disable application kick to avoid sending unnecessary kicks */
+ eventfd_signal(a->irq_ctx, 1);
+ some_recvd = false;
+ }
+#endif /* SYNC_KLOOP_POLL */
+
+ /* Read CSB to see if there is more work to do. */
+ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
+ if (sync_kloop_norxslots(kring, shadow_ring.head)) {
+ /*
+ * No more slots available for reception. We enable notification and
+ * go to sleep, waiting for a kick from the application when new receive
+ * slots are available.
+ */
+ /* Reenable notifications. */
+ csb_ktoa_kick_enable(csb_ktoa, 1);
+ /* Doublecheck. */
+ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
+ if (!sync_kloop_norxslots(kring, shadow_ring.head)) {
+ /* We won the race condition, more slots are available. Disable
+ * notifications and do another cycle. */
+ csb_ktoa_kick_enable(csb_ktoa, 0);
+ continue;
+ }
+ break;
+ }
+
+ hwtail = NM_ACCESS_ONCE(kring->nr_hwtail);
+ if (unlikely(hwtail == kring->rhead ||
+ dry_cycles >= SYNC_LOOP_RX_DRY_CYCLES_MAX)) {
+ /* No more packets to be read from the backend. We stop and
+ * wait for a notification from the backend (netmap_rx_irq). */
+ nm_prdis(1, "nr_hwtail: %d rhead: %d dry_cycles: %d",
+ hwtail, kring->rhead, dry_cycles);
+ break;
+ }
+ }
+
+ nm_kr_put(kring);
+
+#ifdef SYNC_KLOOP_POLL
+ /* Interrupt the application if needed. */
+ if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) {
+ eventfd_signal(a->irq_ctx, 1);
+ }
+#endif /* SYNC_KLOOP_POLL */
+}
+
+#ifdef SYNC_KLOOP_POLL
+struct sync_kloop_poll_entry {
+ /* Support for receiving notifications from
+ * a netmap ring or from the application. */
+ struct file *filp;
+ wait_queue_t wait;
+ wait_queue_head_t *wqh;
+
+ /* Support for sending notifications to the application. */
+ struct eventfd_ctx *irq_ctx;
+ struct file *irq_filp;
+};
+
+struct sync_kloop_poll_ctx {
+ poll_table wait_table;
+ unsigned int next_entry;
+ unsigned int num_entries;
+ struct sync_kloop_poll_entry entries[0];
+};
+
+static void
+sync_kloop_poll_table_queue_proc(struct file *file, wait_queue_head_t *wqh,
+ poll_table *pt)
+{
+ struct sync_kloop_poll_ctx *poll_ctx =
+ container_of(pt, struct sync_kloop_poll_ctx, wait_table);
+ struct sync_kloop_poll_entry *entry = poll_ctx->entries +
+ poll_ctx->next_entry;
+
+ BUG_ON(poll_ctx->next_entry >= poll_ctx->num_entries);
+ entry->wqh = wqh;
+ entry->filp = file;
+ /* Use the default wake up function. */
+ init_waitqueue_entry(&entry->wait, current);
+ add_wait_queue(wqh, &entry->wait);
+ poll_ctx->next_entry++;
+}
+#endif /* SYNC_KLOOP_POLL */
+
+int
+netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr)
+{
+ struct nmreq_sync_kloop_start *req =
+ (struct nmreq_sync_kloop_start *)(uintptr_t)hdr->nr_body;
+ struct nmreq_opt_sync_kloop_eventfds *eventfds_opt = NULL;
+#ifdef SYNC_KLOOP_POLL
+ struct sync_kloop_poll_ctx *poll_ctx = NULL;
+#endif /* SYNC_KLOOP_POLL */
+ int num_rx_rings, num_tx_rings, num_rings;
+ uint32_t sleep_us = req->sleep_us;
+ struct nm_csb_atok* csb_atok_base;
+ struct nm_csb_ktoa* csb_ktoa_base;
+ struct netmap_adapter *na;
+ struct nmreq_option *opt;
+ int err = 0;
+ int i;
+
+ if (sleep_us > 1000000) {
+ /* We do not accept sleeping for more than a second. */
+ return EINVAL;
+ }
+
+ if (priv->np_nifp == NULL) {
+ return ENXIO;
+ }
+ mb(); /* make sure following reads are not from cache */
+
+ na = priv->np_na;
+ if (!nm_netmap_on(na)) {
+ return ENXIO;
+ }
+
+ NMG_LOCK();
+ /* Make sure the application is working in CSB mode. */
+ if (!priv->np_csb_atok_base || !priv->np_csb_ktoa_base) {
+ NMG_UNLOCK();
+ nm_prerr("sync-kloop on %s requires "
+ "NETMAP_REQ_OPT_CSB option", na->name);
+ return EINVAL;
+ }
+
+ csb_atok_base = priv->np_csb_atok_base;
+ csb_ktoa_base = priv->np_csb_ktoa_base;
+
+ /* Make sure that no kloop is currently running. */
+ if (priv->np_kloop_state & NM_SYNC_KLOOP_RUNNING) {
+ err = EBUSY;
+ }
+ priv->np_kloop_state |= NM_SYNC_KLOOP_RUNNING;
+ NMG_UNLOCK();
+ if (err) {
+ return err;
+ }
+
+ num_rx_rings = priv->np_qlast[NR_RX] - priv->np_qfirst[NR_RX];
+ num_tx_rings = priv->np_qlast[NR_TX] - priv->np_qfirst[NR_TX];
+ num_rings = num_tx_rings + num_rx_rings;
+
+ /* Validate notification options. */
+ opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options,
+ NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS);
+ if (opt != NULL) {
+ err = nmreq_checkduplicate(opt);
+ if (err) {
+ opt->nro_status = err;
+ goto out;
+ }
+ if (opt->nro_size != sizeof(*eventfds_opt) +
+ sizeof(eventfds_opt->eventfds[0]) * num_rings) {
+ /* Option size not consistent with the number of
+ * entries. */
+ opt->nro_status = err = EINVAL;
+ goto out;
+ }
+#ifdef SYNC_KLOOP_POLL
+ eventfds_opt = (struct nmreq_opt_sync_kloop_eventfds *)opt;
+ opt->nro_status = 0;
+ /* We need 2 poll entries for TX and RX notifications coming
+ * from the netmap adapter, plus one entries per ring for the
+ * notifications coming from the application. */
+ poll_ctx = nm_os_malloc(sizeof(*poll_ctx) +
+ (2 + num_rings) * sizeof(poll_ctx->entries[0]));
+ init_poll_funcptr(&poll_ctx->wait_table,
+ sync_kloop_poll_table_queue_proc);
+ poll_ctx->num_entries = 2 + num_rings;
+ poll_ctx->next_entry = 0;
+ /* Poll for notifications coming from the applications through
+ * eventfds . */
+ for (i = 0; i < num_rings; i++) {
+ struct eventfd_ctx *irq;
+ struct file *filp;
+ unsigned long mask;
+
+ filp = eventfd_fget(eventfds_opt->eventfds[i].ioeventfd);
+ if (IS_ERR(filp)) {
+ err = PTR_ERR(filp);
+ goto out;
+ }
+ mask = filp->f_op->poll(filp, &poll_ctx->wait_table);
+ if (mask & POLLERR) {
+ err = EINVAL;
+ goto out;
+ }
+
+ filp = eventfd_fget(eventfds_opt->eventfds[i].irqfd);
+ if (IS_ERR(filp)) {
+ err = PTR_ERR(filp);
+ goto out;
+ }
+ poll_ctx->entries[i].irq_filp = filp;
+ irq = eventfd_ctx_fileget(filp);
+ if (IS_ERR(irq)) {
+ err = PTR_ERR(irq);
+ goto out;
+ }
+ poll_ctx->entries[i].irq_ctx = irq;
+ }
+ /* Poll for notifications coming from the netmap rings bound to
+ * this file descriptor. */
+ {
+ NM_SELINFO_T *si[NR_TXRX];
+
+ NMG_LOCK();
+ si[NR_RX] = nm_si_user(priv, NR_RX) ? &na->si[NR_RX] :
+ &na->rx_rings[priv->np_qfirst[NR_RX]]->si;
+ si[NR_TX] = nm_si_user(priv, NR_TX) ? &na->si[NR_TX] :
+ &na->tx_rings[priv->np_qfirst[NR_TX]]->si;
+ NMG_UNLOCK();
+ poll_wait(priv->np_filp, si[NR_RX], &poll_ctx->wait_table);
+ poll_wait(priv->np_filp, si[NR_TX], &poll_ctx->wait_table);
+ }
+#else /* SYNC_KLOOP_POLL */
+ opt->nro_status = EOPNOTSUPP;
+ goto out;
+#endif /* SYNC_KLOOP_POLL */
+ }
+
+ /* Main loop. */
+ for (;;) {
+ if (unlikely(NM_ACCESS_ONCE(priv->np_kloop_state) & NM_SYNC_KLOOP_STOPPING)) {
+ break;
+ }
+
+#ifdef SYNC_KLOOP_POLL
+ if (poll_ctx)
+ __set_current_state(TASK_INTERRUPTIBLE);
+#endif /* SYNC_KLOOP_POLL */
+
+ /* Process all the TX rings bound to this file descriptor. */
+ for (i = 0; i < num_tx_rings; i++) {
+ struct sync_kloop_ring_args a = {
+ .kring = NMR(na, NR_TX)[i + priv->np_qfirst[NR_TX]],
+ .csb_atok = csb_atok_base + i,
+ .csb_ktoa = csb_ktoa_base + i,
+ };
+
+#ifdef SYNC_KLOOP_POLL
+ if (poll_ctx)
+ a.irq_ctx = poll_ctx->entries[i].irq_ctx;
+#endif /* SYNC_KLOOP_POLL */
+ if (unlikely(nm_kr_tryget(a.kring, 1, NULL))) {
+ continue;
+ }
+ netmap_sync_kloop_tx_ring(&a);
+ nm_kr_put(a.kring);
+ }
+
+ /* Process all the RX rings bound to this file descriptor. */
+ for (i = 0; i < num_rx_rings; i++) {
+ struct sync_kloop_ring_args a = {
+ .kring = NMR(na, NR_RX)[i + priv->np_qfirst[NR_RX]],
+ .csb_atok = csb_atok_base + num_tx_rings + i,
+ .csb_ktoa = csb_ktoa_base + num_tx_rings + i,
+ };
+
+#ifdef SYNC_KLOOP_POLL
+ if (poll_ctx)
+ a.irq_ctx = poll_ctx->entries[num_tx_rings + i].irq_ctx;
+#endif /* SYNC_KLOOP_POLL */
+
+ if (unlikely(nm_kr_tryget(a.kring, 1, NULL))) {
+ continue;
+ }
+ netmap_sync_kloop_rx_ring(&a);
+ nm_kr_put(a.kring);
+ }
+
+#ifdef SYNC_KLOOP_POLL
+ if (poll_ctx) {
+ /* If a poll context is present, yield to the scheduler
+ * waiting for a notification to come either from
+ * netmap or the application. */
+ schedule_timeout_interruptible(msecs_to_jiffies(1000));
+ } else
+#endif /* SYNC_KLOOP_POLL */
+ {
+ /* Default synchronization method: sleep for a while. */
+ usleep_range(sleep_us, sleep_us);
+ }
+ }
+out:
+#ifdef SYNC_KLOOP_POLL
+ if (poll_ctx) {
+ /* Stop polling from netmap and the eventfds, and deallocate
+ * the poll context. */
+ __set_current_state(TASK_RUNNING);
+ for (i = 0; i < poll_ctx->next_entry; i++) {
+ struct sync_kloop_poll_entry *entry =
+ poll_ctx->entries + i;
+
+ if (entry->wqh)
+ remove_wait_queue(entry->wqh, &entry->wait);
+ /* We did not get a reference to the eventfds, but
+ * don't do that on netmap file descriptors (since
+ * a reference was not taken. */
+ if (entry->filp && entry->filp != priv->np_filp)
+ fput(entry->filp);
+ if (entry->irq_ctx)
+ eventfd_ctx_put(entry->irq_ctx);
+ if (entry->irq_filp)
+ fput(entry->irq_filp);
+ }
+ nm_os_free(poll_ctx);
+ poll_ctx = NULL;
+ }
+#endif /* SYNC_KLOOP_POLL */
+
+ /* Reset the kloop state. */
+ NMG_LOCK();
+ priv->np_kloop_state = 0;
+ NMG_UNLOCK();
+
+ return err;
+}
+
+int
+netmap_sync_kloop_stop(struct netmap_priv_d *priv)
+{
+ bool running = true;
+ int err = 0;
+
+ NMG_LOCK();
+ priv->np_kloop_state |= NM_SYNC_KLOOP_STOPPING;
+ NMG_UNLOCK();
+ while (running) {
+ usleep_range(1000, 1500);
+ NMG_LOCK();
+ running = (NM_ACCESS_ONCE(priv->np_kloop_state)
+ & NM_SYNC_KLOOP_RUNNING);
+ NMG_UNLOCK();
+ }
+
+ return err;
+}
+
+#ifdef WITH_PTNETMAP
+/*
+ * Guest ptnetmap txsync()/rxsync() routines, used in ptnet device drivers.
+ * These routines are reused across the different operating systems supported
+ * by netmap.
+ */
+
+/*
+ * Reconcile host and guest views of the transmit ring.
+ *
+ * Guest user wants to transmit packets up to the one before ring->head,
+ * and guest kernel knows tx_ring->hwcur is the first packet unsent
+ * by the host kernel.
+ *
+ * We push out as many packets as possible, and possibly
+ * reclaim buffers from previously completed transmission.
+ *
+ * Notifications from the host are enabled only if the user guest would
+ * block (no space in the ring).
+ */
+bool
+netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa,
+ struct netmap_kring *kring, int flags)
+{
+ bool notify = false;
+
+ /* Disable notifications */
+ atok->appl_need_kick = 0;
+
+ /*
+ * First part: tell the host (updating the CSB) to process the new
+ * packets.
+ */
+ kring->nr_hwcur = ktoa->hwcur;
+ ptnetmap_guest_write_kring_csb(atok, kring->rcur, kring->rhead);
+
+ /* Ask for a kick from a guest to the host if needed. */
+ if (((kring->rhead != kring->nr_hwcur || nm_kr_txempty(kring))
+ && NM_ACCESS_ONCE(ktoa->kern_need_kick)) ||
+ (flags & NAF_FORCE_RECLAIM)) {
+ atok->sync_flags = flags;
+ notify = true;
+ }
+
+ /*
+ * Second part: reclaim buffers for completed transmissions.
+ */
+ if (nm_kr_txempty(kring) || (flags & NAF_FORCE_RECLAIM)) {
+ ptnetmap_guest_read_kring_csb(ktoa, kring);
+ }
+
+ /*
+ * No more room in the ring for new transmissions. The user thread will
+ * go to sleep and we need to be notified by the host when more free
+ * space is available.
+ */
+ if (nm_kr_txempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
+ /* Reenable notifications. */
+ atok->appl_need_kick = 1;
+ /* Double check */
+ ptnetmap_guest_read_kring_csb(ktoa, kring);
+ /* If there is new free space, disable notifications */
+ if (unlikely(!nm_kr_txempty(kring))) {
+ atok->appl_need_kick = 0;
+ }
+ }
+
+ nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
+ kring->name, atok->head, atok->cur, ktoa->hwtail,
+ kring->rhead, kring->rcur, kring->nr_hwtail);
+
+ return notify;
+}
+
+/*
+ * Reconcile host and guest view of the receive ring.
+ *
+ * Update hwcur/hwtail from host (reading from CSB).
+ *
+ * If guest user has released buffers up to the one before ring->head, we
+ * also give them to the host.
+ *
+ * Notifications from the host are enabled only if the user guest would
+ * block (no more completed slots in the ring).
+ */
+bool
+netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa,
+ struct netmap_kring *kring, int flags)
+{
+ bool notify = false;
+
+ /* Disable notifications */
+ atok->appl_need_kick = 0;
+
+ /*
+ * First part: import newly received packets, by updating the kring
+ * hwtail to the hwtail known from the host (read from the CSB).
+ * This also updates the kring hwcur.
+ */
+ ptnetmap_guest_read_kring_csb(ktoa, kring);
+ kring->nr_kflags &= ~NKR_PENDINTR;
+
+ /*
+ * Second part: tell the host about the slots that guest user has
+ * released, by updating cur and head in the CSB.
+ */
+ if (kring->rhead != kring->nr_hwcur) {
+ ptnetmap_guest_write_kring_csb(atok, kring->rcur,
+ kring->rhead);
+ /* Ask for a kick from the guest to the host if needed. */
+ if (NM_ACCESS_ONCE(ktoa->kern_need_kick)) {
+ atok->sync_flags = flags;
+ notify = true;
+ }
+ }
+
+ /*
+ * No more completed RX slots. The user thread will go to sleep and
+ * we need to be notified by the host when more RX slots have been
+ * completed.
+ */
+ if (nm_kr_rxempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
+ /* Reenable notifications. */
+ atok->appl_need_kick = 1;
+ /* Double check */
+ ptnetmap_guest_read_kring_csb(ktoa, kring);
+ /* If there are new slots, disable notifications. */
+ if (!nm_kr_rxempty(kring)) {
+ atok->appl_need_kick = 0;
+ }
+ }
+
+ nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
+ kring->name, atok->head, atok->cur, ktoa->hwtail,
+ kring->rhead, kring->rcur, kring->nr_hwtail);
+
+ return notify;
+}
+
+/*
+ * Callbacks for ptnet drivers: nm_krings_create, nm_krings_delete, nm_dtor.
+ */
+int
+ptnet_nm_krings_create(struct netmap_adapter *na)
+{
+ struct netmap_pt_guest_adapter *ptna =
+ (struct netmap_pt_guest_adapter *)na; /* Upcast. */
+ struct netmap_adapter *na_nm = &ptna->hwup.up;
+ struct netmap_adapter *na_dr = &ptna->dr.up;
+ int ret;
+
+ if (ptna->backend_users) {
+ return 0;
+ }
+
+ /* Create krings on the public netmap adapter. */
+ ret = netmap_hw_krings_create(na_nm);
+ if (ret) {
+ return ret;
+ }
+
+ /* Copy krings into the netmap adapter private to the driver. */
+ na_dr->tx_rings = na_nm->tx_rings;
+ na_dr->rx_rings = na_nm->rx_rings;
+
+ return 0;
+}
+
+void
+ptnet_nm_krings_delete(struct netmap_adapter *na)
+{
+ struct netmap_pt_guest_adapter *ptna =
+ (struct netmap_pt_guest_adapter *)na; /* Upcast. */
+ struct netmap_adapter *na_nm = &ptna->hwup.up;
+ struct netmap_adapter *na_dr = &ptna->dr.up;
+
+ if (ptna->backend_users) {
+ return;
+ }
+
+ na_dr->tx_rings = NULL;
+ na_dr->rx_rings = NULL;
+
+ netmap_hw_krings_delete(na_nm);
+}
+
+void
+ptnet_nm_dtor(struct netmap_adapter *na)
+{
+ struct netmap_pt_guest_adapter *ptna =
+ (struct netmap_pt_guest_adapter *)na;
+
+ netmap_mem_put(ptna->dr.up.nm_mem);
+ memset(&ptna->dr, 0, sizeof(ptna->dr));
+ netmap_mem_pt_guest_ifp_del(na->nm_mem, na->ifp);
+}
+
+int
+netmap_pt_guest_attach(struct netmap_adapter *arg,
+ unsigned int nifp_offset, unsigned int memid)
+{
+ struct netmap_pt_guest_adapter *ptna;
+ struct ifnet *ifp = arg ? arg->ifp : NULL;
+ int error;
+
+ /* get allocator */
+ arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, memid);
+ if (arg->nm_mem == NULL)
+ return ENOMEM;
+ arg->na_flags |= NAF_MEM_OWNER;
+ error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter), 1);
+ if (error)
+ return error;
+
+ /* get the netmap_pt_guest_adapter */
+ ptna = (struct netmap_pt_guest_adapter *) NA(ifp);
+
+ /* Initialize a separate pass-through netmap adapter that is going to
+ * be used by the ptnet driver only, and so never exposed to netmap
+ * applications. We only need a subset of the available fields. */
+ memset(&ptna->dr, 0, sizeof(ptna->dr));
+ ptna->dr.up.ifp = ifp;
+ ptna->dr.up.nm_mem = netmap_mem_get(ptna->hwup.up.nm_mem);
+ ptna->dr.up.nm_config = ptna->hwup.up.nm_config;
+
+ ptna->backend_users = 0;
+
+ return 0;
+}
+
+#endif /* WITH_PTNETMAP */
Index: head/sys/dev/netmap/netmap_legacy.c
===================================================================
--- head/sys/dev/netmap/netmap_legacy.c
+++ head/sys/dev/netmap/netmap_legacy.c
@@ -56,6 +56,7 @@
*/
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_bdg.h>
static int
nmreq_register_from_legacy(struct nmreq *nmr, struct nmreq_header *hdr,
@@ -80,10 +81,11 @@
} else {
regmode = NR_REG_ALL_NIC;
}
- nmr->nr_flags = regmode |
- (nmr->nr_flags & (~NR_REG_MASK));
+ req->nr_mode = regmode;
+ } else {
+ req->nr_mode = nmr->nr_flags & NR_REG_MASK;
}
- req->nr_mode = nmr->nr_flags & NR_REG_MASK;
+
/* Fix nr_name, nr_mode and nr_ringid to handle pipe requests. */
if (req->nr_mode == NR_REG_PIPE_MASTER ||
req->nr_mode == NR_REG_PIPE_SLAVE) {
@@ -131,7 +133,7 @@
/* First prepare the request header. */
hdr->nr_version = NETMAP_API; /* new API */
- strncpy(hdr->nr_name, nmr->nr_name, sizeof(nmr->nr_name));
+ strlcpy(hdr->nr_name, nmr->nr_name, sizeof(nmr->nr_name));
hdr->nr_options = (uintptr_t)NULL;
hdr->nr_body = (uintptr_t)NULL;
@@ -221,7 +223,7 @@
}
case NETMAP_PT_HOST_CREATE:
case NETMAP_PT_HOST_DELETE: {
- D("Netmap passthrough not supported yet");
+ nm_prerr("Netmap passthrough not supported yet");
return NULL;
break;
}
@@ -242,7 +244,6 @@
if (!req) { goto oom; }
hdr->nr_body = (uintptr_t)req;
hdr->nr_reqtype = NETMAP_REQ_PORT_INFO_GET;
- req->nr_offset = nmr->nr_offset;
req->nr_memsize = nmr->nr_memsize;
req->nr_tx_slots = nmr->nr_tx_slots;
req->nr_rx_slots = nmr->nr_rx_slots;
@@ -262,7 +263,7 @@
}
nm_os_free(hdr);
}
- D("Failed to allocate memory for nmreq_xyz struct");
+ nm_prerr("Failed to allocate memory for nmreq_xyz struct");
return NULL;
}
@@ -300,7 +301,6 @@
case NETMAP_REQ_PORT_INFO_GET: {
struct nmreq_port_info_get *req =
(struct nmreq_port_info_get *)(uintptr_t)hdr->nr_body;
- nmr->nr_offset = req->nr_offset;
nmr->nr_memsize = req->nr_memsize;
nmr->nr_tx_slots = req->nr_tx_slots;
nmr->nr_rx_slots = req->nr_rx_slots;
@@ -321,7 +321,7 @@
case NETMAP_REQ_VALE_LIST: {
struct nmreq_vale_list *req =
(struct nmreq_vale_list *)(uintptr_t)hdr->nr_body;
- strncpy(nmr->nr_name, hdr->nr_name, sizeof(nmr->nr_name));
+ strlcpy(nmr->nr_name, hdr->nr_name, sizeof(nmr->nr_name));
nmr->nr_arg1 = req->nr_bridge_idx;
nmr->nr_arg2 = req->nr_port_idx;
break;
Index: head/sys/dev/netmap/netmap_mem2.h
===================================================================
--- head/sys/dev/netmap/netmap_mem2.h
+++ head/sys/dev/netmap/netmap_mem2.h
@@ -158,14 +158,14 @@
({ int *perr = _perr; if (perr) *(perr) = EOPNOTSUPP; NULL; })
#endif /* WITH_EXTMEM */
-#ifdef WITH_PTNETMAP_GUEST
+#ifdef WITH_PTNETMAP
struct netmap_mem_d* netmap_mem_pt_guest_new(struct ifnet *,
unsigned int nifp_offset,
unsigned int memid);
struct ptnetmap_memdev;
struct netmap_mem_d* netmap_mem_pt_guest_attach(struct ptnetmap_memdev *, uint16_t);
int netmap_mem_pt_guest_ifp_del(struct netmap_mem_d *, struct ifnet *);
-#endif /* WITH_PTNETMAP_GUEST */
+#endif /* WITH_PTNETMAP */
int netmap_mem_pools_info_get(struct nmreq_pools_info *,
struct netmap_mem_d *);
Index: head/sys/dev/netmap/netmap_mem2.c
===================================================================
--- head/sys/dev/netmap/netmap_mem2.c
+++ head/sys/dev/netmap/netmap_mem2.c
@@ -318,7 +318,7 @@
#ifdef NM_DEBUG_MEM_PUTGET
#define NM_DBG_REFC(nmd, func, line) \
- nm_prinf("%s:%d mem[%d] -> %d\n", func, line, (nmd)->nm_id, (nmd)->refcount);
+ nm_prinf("%d mem[%d] -> %d", line, (nmd)->nm_id, (nmd)->refcount);
#else
#define NM_DBG_REFC(nmd, func, line)
#endif
@@ -397,15 +397,15 @@
if (p->bitmap == NULL) {
/* Allocate the bitmap */
n = (p->objtotal + 31) / 32;
- p->bitmap = nm_os_malloc(sizeof(uint32_t) * n);
+ p->bitmap = nm_os_malloc(sizeof(p->bitmap[0]) * n);
if (p->bitmap == NULL) {
- D("Unable to create bitmap (%d entries) for allocator '%s'", (int)n,
+ nm_prerr("Unable to create bitmap (%d entries) for allocator '%s'", (int)n,
p->name);
return ENOMEM;
}
p->bitmap_slots = n;
} else {
- memset(p->bitmap, 0, p->bitmap_slots);
+ memset(p->bitmap, 0, p->bitmap_slots * sizeof(p->bitmap[0]));
}
p->objfree = 0;
@@ -416,16 +416,21 @@
*/
for (j = 0; j < p->objtotal; j++) {
if (p->invalid_bitmap && nm_isset(p->invalid_bitmap, j)) {
- D("skipping %s %d", p->name, j);
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("skipping %s %d", p->name, j);
continue;
}
p->bitmap[ (j>>5) ] |= ( 1U << (j & 31U) );
p->objfree++;
}
- ND("%s free %u", p->name, p->objfree);
- if (p->objfree == 0)
+ if (netmap_verbose)
+ nm_prinf("%s free %u", p->name, p->objfree);
+ if (p->objfree == 0) {
+ if (netmap_verbose)
+ nm_prerr("%s: no objects available", p->name);
return ENOMEM;
+ }
return 0;
}
@@ -447,6 +452,7 @@
* buffers 0 and 1 are reserved
*/
if (nmd->pools[NETMAP_BUF_POOL].objfree < 2) {
+ nm_prerr("%s: not enough buffers", nmd->pools[NETMAP_BUF_POOL].name);
return ENOMEM;
}
@@ -480,8 +486,10 @@
nmd->ops->nmd_deref(nmd);
nmd->active--;
- if (!nmd->active)
+ if (last_user) {
nmd->nm_grp = -1;
+ nmd->lasterr = 0;
+ }
NMA_UNLOCK(nmd);
return last_user;
@@ -720,16 +728,20 @@
{
int err = 0, id;
id = nm_iommu_group_id(dev);
- if (netmap_verbose)
- D("iommu_group %d", id);
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("iommu_group %d", id);
NMA_LOCK(nmd);
if (nmd->nm_grp < 0)
nmd->nm_grp = id;
- if (nmd->nm_grp != id)
+ if (nmd->nm_grp != id) {
+ if (netmap_verbose)
+ nm_prerr("iommu group mismatch: %u vs %u",
+ nmd->nm_grp, id);
nmd->lasterr = err = ENOMEM;
+ }
NMA_UNLOCK(nmd);
return err;
@@ -805,7 +817,7 @@
return pa;
}
/* this is only in case of errors */
- D("invalid ofs 0x%x out of 0x%x 0x%x 0x%x", (u_int)o,
+ nm_prerr("invalid ofs 0x%x out of 0x%x 0x%x 0x%x", (u_int)o,
p[NETMAP_IF_POOL].memtotal,
p[NETMAP_IF_POOL].memtotal
+ p[NETMAP_RING_POOL].memtotal,
@@ -854,13 +866,13 @@
int i, j;
if (netmap_mem_get_info(nmd, &memsize, &memflags, NULL)) {
- D("memory not finalised yet");
+ nm_prerr("memory not finalised yet");
return NULL;
}
mainMdl = IoAllocateMdl(NULL, memsize, FALSE, FALSE, NULL);
if (mainMdl == NULL) {
- D("failed to allocate mdl");
+ nm_prerr("failed to allocate mdl");
return NULL;
}
@@ -876,7 +888,7 @@
tempMdl = IoAllocateMdl(p->lut[0].vaddr, clsz, FALSE, FALSE, NULL);
if (tempMdl == NULL) {
NMA_UNLOCK(nmd);
- D("fail to allocate tempMdl");
+ nm_prerr("fail to allocate tempMdl");
IoFreeMdl(mainMdl);
return NULL;
}
@@ -971,7 +983,7 @@
p->name, ofs, i, vaddr);
return ofs;
}
- D("address %p is not contained inside any cluster (%s)",
+ nm_prerr("address %p is not contained inside any cluster (%s)",
vaddr, p->name);
return 0; /* An error occurred */
}
@@ -1002,12 +1014,12 @@
void *vaddr = NULL;
if (len > p->_objsize) {
- D("%s request size %d too large", p->name, len);
+ nm_prerr("%s request size %d too large", p->name, len);
return NULL;
}
if (p->objfree == 0) {
- D("no more %s objects", p->name);
+ nm_prerr("no more %s objects", p->name);
return NULL;
}
if (start)
@@ -1049,13 +1061,13 @@
uint32_t *ptr, mask;
if (j >= p->objtotal) {
- D("invalid index %u, max %u", j, p->objtotal);
+ nm_prerr("invalid index %u, max %u", j, p->objtotal);
return 1;
}
ptr = &p->bitmap[j / 32];
mask = (1 << (j % 32));
if (*ptr & mask) {
- D("ouch, double free on buffer %d", j);
+ nm_prerr("ouch, double free on buffer %d", j);
return 1;
} else {
*ptr |= mask;
@@ -1086,7 +1098,7 @@
netmap_obj_free(p, j);
return;
}
- D("address %p is not contained inside any cluster (%s)",
+ nm_prerr("address %p is not contained inside any cluster (%s)",
vaddr, p->name);
}
@@ -1127,7 +1139,7 @@
uint32_t cur = *head; /* save current head */
uint32_t *p = netmap_buf_malloc(nmd, &pos, head);
if (p == NULL) {
- D("no more buffers after %d of %d", i, n);
+ nm_prerr("no more buffers after %d of %d", i, n);
*head = cur; /* restore */
break;
}
@@ -1158,9 +1170,9 @@
break;
}
if (head != 0)
- D("breaking with head %d", head);
- if (netmap_verbose)
- D("freed %d buffers", i);
+ nm_prerr("breaking with head %d", head);
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("freed %d buffers", i);
}
@@ -1176,7 +1188,7 @@
for (i = 0; i < n; i++) {
void *vaddr = netmap_buf_malloc(nmd, &pos, &index);
if (vaddr == NULL) {
- D("no more buffers after %d of %d", i, n);
+ nm_prerr("no more buffers after %d of %d", i, n);
goto cleanup;
}
slot[i].buf_idx = index;
@@ -1217,7 +1229,7 @@
struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
if (i < 2 || i >= p->objtotal) {
- D("Cannot free buf#%d: should be in [2, %d[", i, p->objtotal);
+ nm_prerr("Cannot free buf#%d: should be in [2, %d[", i, p->objtotal);
return;
}
netmap_obj_free(p, i);
@@ -1317,22 +1329,22 @@
#define LINE_ROUND NM_CACHE_ALIGN // 64
if (objsize >= MAX_CLUSTSIZE) {
/* we could do it but there is no point */
- D("unsupported allocation for %d bytes", objsize);
+ nm_prerr("unsupported allocation for %d bytes", objsize);
return EINVAL;
}
/* make sure objsize is a multiple of LINE_ROUND */
i = (objsize & (LINE_ROUND - 1));
if (i) {
- D("XXX aligning object by %d bytes", LINE_ROUND - i);
+ nm_prinf("aligning object by %d bytes", LINE_ROUND - i);
objsize += LINE_ROUND - i;
}
if (objsize < p->objminsize || objsize > p->objmaxsize) {
- D("requested objsize %d out of range [%d, %d]",
+ nm_prerr("requested objsize %d out of range [%d, %d]",
objsize, p->objminsize, p->objmaxsize);
return EINVAL;
}
if (objtotal < p->nummin || objtotal > p->nummax) {
- D("requested objtotal %d out of range [%d, %d]",
+ nm_prerr("requested objtotal %d out of range [%d, %d]",
objtotal, p->nummin, p->nummax);
return EINVAL;
}
@@ -1354,13 +1366,13 @@
}
/* exact solution not found */
if (clustentries == 0) {
- D("unsupported allocation for %d bytes", objsize);
+ nm_prerr("unsupported allocation for %d bytes", objsize);
return EINVAL;
}
/* compute clustsize */
clustsize = clustentries * objsize;
- if (netmap_verbose)
- D("objsize %d clustsize %d objects %d",
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("objsize %d clustsize %d objects %d",
objsize, clustsize, clustentries);
/*
@@ -1403,7 +1415,7 @@
p->lut = nm_alloc_lut(p->objtotal);
if (p->lut == NULL) {
- D("Unable to create lookup table for '%s'", p->name);
+ nm_prerr("Unable to create lookup table for '%s'", p->name);
goto clean;
}
@@ -1430,7 +1442,7 @@
* If we get here, there is a severe memory shortage,
* so halve the allocated memory to reclaim some.
*/
- D("Unable to create cluster at %d for '%s' allocator",
+ nm_prerr("Unable to create cluster at %d for '%s' allocator",
i, p->name);
if (i < 2) /* nothing to halve */
goto out;
@@ -1466,7 +1478,7 @@
}
p->memtotal = p->numclusters * p->_clustsize;
if (netmap_verbose)
- D("Pre-allocated %d clusters (%d/%dKB) for '%s'",
+ nm_prinf("Pre-allocated %d clusters (%d/%dKB) for '%s'",
p->numclusters, p->_clustsize >> 10,
p->memtotal >> 10, p->name);
@@ -1498,8 +1510,8 @@
{
int i;
- if (netmap_verbose)
- D("resetting %p", nmd);
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("resetting %p", nmd);
for (i = 0; i < NETMAP_POOLS_NR; i++) {
netmap_reset_obj_allocator(&nmd->pools[i]);
}
@@ -1525,7 +1537,7 @@
(void)i;
(void)lim;
(void)lut;
- D("unsupported on Windows");
+ nm_prerr("unsupported on Windows");
#else /* linux */
ND("unmapping and freeing plut for %s", na->name);
if (lut->plut == NULL)
@@ -1561,7 +1573,7 @@
(void)i;
(void)lim;
(void)lut;
- D("unsupported on Windows");
+ nm_prerr("unsupported on Windows");
#else /* linux */
if (lut->plut != NULL) {
@@ -1572,7 +1584,7 @@
ND("allocating physical lut for %s", na->name);
lut->plut = nm_alloc_plut(lim);
if (lut->plut == NULL) {
- D("Failed to allocate physical lut for %s", na->name);
+ nm_prerr("Failed to allocate physical lut for %s", na->name);
return ENOMEM;
}
@@ -1589,7 +1601,7 @@
error = netmap_load_map(na, (bus_dma_tag_t) na->pdev, &lut->plut[i].paddr,
p->lut[i].vaddr, p->_clustsize);
if (error) {
- D("Failed to map cluster #%d from the %s pool", i, p->name);
+ nm_prerr("Failed to map cluster #%d from the %s pool", i, p->name);
break;
}
@@ -1627,13 +1639,13 @@
nmd->flags |= NETMAP_MEM_FINALIZED;
if (netmap_verbose)
- D("interfaces %d KB, rings %d KB, buffers %d MB",
+ nm_prinf("interfaces %d KB, rings %d KB, buffers %d MB",
nmd->pools[NETMAP_IF_POOL].memtotal >> 10,
nmd->pools[NETMAP_RING_POOL].memtotal >> 10,
nmd->pools[NETMAP_BUF_POOL].memtotal >> 20);
if (netmap_verbose)
- D("Free buffers: %d", nmd->pools[NETMAP_BUF_POOL].objfree);
+ nm_prinf("Free buffers: %d", nmd->pools[NETMAP_BUF_POOL].objfree);
return 0;
@@ -1740,7 +1752,7 @@
p[NETMAP_BUF_POOL].num = v;
if (netmap_verbose)
- D("req if %d*%d ring %d*%d buf %d*%d",
+ nm_prinf("req if %d*%d ring %d*%d buf %d*%d",
p[NETMAP_IF_POOL].num,
p[NETMAP_IF_POOL].size,
p[NETMAP_RING_POOL].num,
@@ -1850,13 +1862,13 @@
struct netmap_ring *ring = kring->ring;
if (ring == NULL || kring->users > 0 || (kring->nr_kflags & NKR_NEEDRING)) {
- if (netmap_verbose)
- D("NOT deleting ring %s (ring %p, users %d neekring %d)",
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("NOT deleting ring %s (ring %p, users %d neekring %d)",
kring->name, ring, kring->users, kring->nr_kflags & NKR_NEEDRING);
continue;
}
- if (netmap_verbose)
- D("deleting ring %s", kring->name);
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("deleting ring %s", kring->name);
if (!(kring->nr_kflags & NKR_FAKERING)) {
ND("freeing bufs for %s", kring->name);
netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots);
@@ -1891,19 +1903,19 @@
if (ring || (!kring->users && !(kring->nr_kflags & NKR_NEEDRING))) {
/* uneeded, or already created by somebody else */
- if (netmap_verbose)
- D("NOT creating ring %s (ring %p, users %d neekring %d)",
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("NOT creating ring %s (ring %p, users %d neekring %d)",
kring->name, ring, kring->users, kring->nr_kflags & NKR_NEEDRING);
continue;
}
- if (netmap_verbose)
- D("creating %s", kring->name);
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("creating %s", kring->name);
ndesc = kring->nkr_num_slots;
len = sizeof(struct netmap_ring) +
ndesc * sizeof(struct netmap_slot);
ring = netmap_ring_malloc(na->nm_mem, len);
if (ring == NULL) {
- D("Cannot allocate %s_ring", nm_txrx2str(t));
+ nm_prerr("Cannot allocate %s_ring", nm_txrx2str(t));
goto cleanup;
}
ND("txring at %p", ring);
@@ -1925,14 +1937,16 @@
ND("initializing slots for %s_ring", nm_txrx2str(t));
if (!(kring->nr_kflags & NKR_FAKERING)) {
/* this is a real ring */
- ND("allocating buffers for %s", kring->name);
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("allocating buffers for %s", kring->name);
if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
- D("Cannot allocate buffers for %s_ring", nm_txrx2str(t));
+ nm_prerr("Cannot allocate buffers for %s_ring", nm_txrx2str(t));
goto cleanup;
}
} else {
/* this is a fake ring, set all indices to 0 */
- ND("NOT allocating buffers for %s", kring->name);
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("NOT allocating buffers for %s", kring->name);
netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0);
}
/* ring info */
@@ -1998,7 +2012,7 @@
/* initialize base fields -- override const */
*(u_int *)(uintptr_t)&nifp->ni_tx_rings = na->num_tx_rings;
*(u_int *)(uintptr_t)&nifp->ni_rx_rings = na->num_rx_rings;
- strncpy(nifp->ni_name, na->name, (size_t)IFNAMSIZ);
+ strlcpy(nifp->ni_name, na->name, sizeof(nifp->ni_name));
/*
* fill the slots for the rx and tx rings. They contain the offset
@@ -2049,8 +2063,8 @@
netmap_mem2_deref(struct netmap_mem_d *nmd)
{
- if (netmap_verbose)
- D("active = %d", nmd->active);
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("active = %d", nmd->active);
}
@@ -2217,14 +2231,15 @@
pi->nr_buf_pool_objtotal = netmap_min_priv_params[NETMAP_BUF_POOL].num;
if (pi->nr_buf_pool_objsize == 0)
pi->nr_buf_pool_objsize = netmap_min_priv_params[NETMAP_BUF_POOL].size;
- D("if %d %d ring %d %d buf %d %d",
+ if (netmap_verbose & NM_DEBUG_MEM)
+ nm_prinf("if %d %d ring %d %d buf %d %d",
pi->nr_if_pool_objtotal, pi->nr_if_pool_objsize,
pi->nr_ring_pool_objtotal, pi->nr_ring_pool_objsize,
pi->nr_buf_pool_objtotal, pi->nr_buf_pool_objsize);
os = nm_os_extmem_create(usrptr, pi, &error);
if (os == NULL) {
- D("os extmem creation failed");
+ nm_prerr("os extmem creation failed");
goto out;
}
@@ -2233,7 +2248,8 @@
nm_os_extmem_delete(os);
return &nme->up;
}
- D("not found, creating new");
+ if (netmap_verbose & NM_DEBUG_MEM)
+ nm_prinf("not found, creating new");
nme = _netmap_mem_private_new(sizeof(*nme),
(struct netmap_obj_params[]){
@@ -2343,7 +2359,7 @@
#endif /* WITH_EXTMEM */
-#ifdef WITH_PTNETMAP_GUEST
+#ifdef WITH_PTNETMAP
struct mem_pt_if {
struct mem_pt_if *next;
struct ifnet *ifp;
@@ -2386,7 +2402,8 @@
NMA_UNLOCK(nmd);
- D("added (ifp=%p,nifp_offset=%u)", ptif->ifp, ptif->nifp_offset);
+ nm_prinf("ifp=%s,nifp_offset=%u",
+ ptif->ifp->if_xname, ptif->nifp_offset);
return 0;
}
@@ -2667,7 +2684,7 @@
continue;
kring->ring = (struct netmap_ring *)
((char *)nifp +
- nifp->ring_ofs[i + na->num_tx_rings + 1]);
+ nifp->ring_ofs[netmap_all_rings(na, NR_TX) + i]);
}
error = 0;
@@ -2832,4 +2849,4 @@
return nmd;
}
-#endif /* WITH_PTNETMAP_GUEST */
+#endif /* WITH_PTNETMAP */
Index: head/sys/dev/netmap/netmap_null.c
===================================================================
--- head/sys/dev/netmap/netmap_null.c
+++ head/sys/dev/netmap/netmap_null.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2018 Giuseppe Lettieri
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/* $FreeBSD$ */
+
+#if defined(__FreeBSD__)
+#include <sys/cdefs.h> /* prerequisite */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h> /* defines used in kernel.h */
+#include <sys/kernel.h> /* types used in module initialization */
+#include <sys/malloc.h>
+#include <sys/poll.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/selinfo.h>
+#include <sys/sysctl.h>
+#include <sys/socket.h> /* sockaddrs */
+#include <net/if.h>
+#include <net/if_var.h>
+#include <machine/bus.h> /* bus_dmamap_* */
+#include <sys/refcount.h>
+
+
+#elif defined(linux)
+
+#include "bsd_glue.h"
+
+#elif defined(__APPLE__)
+
+#warning OSX support is only partial
+#include "osx_glue.h"
+
+#elif defined(_WIN32)
+#include "win_glue.h"
+
+#else
+
+#error Unsupported platform
+
+#endif /* unsupported */
+
+/*
+ * common headers
+ */
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
+
+#ifdef WITH_NMNULL
+
+static int
+netmap_null_txsync(struct netmap_kring *kring, int flags)
+{
+ (void)kring;
+ (void)flags;
+ return 0;
+}
+
+static int
+netmap_null_rxsync(struct netmap_kring *kring, int flags)
+{
+ (void)kring;
+ (void)flags;
+ return 0;
+}
+
+static int
+netmap_null_krings_create(struct netmap_adapter *na)
+{
+ return netmap_krings_create(na, 0);
+}
+
+static void
+netmap_null_krings_delete(struct netmap_adapter *na)
+{
+ netmap_krings_delete(na);
+}
+
+static int
+netmap_null_reg(struct netmap_adapter *na, int onoff)
+{
+ if (na->active_fds == 0) {
+ if (onoff)
+ na->na_flags |= NAF_NETMAP_ON;
+ else
+ na->na_flags &= ~NAF_NETMAP_ON;
+ }
+ return 0;
+}
+
+static int
+netmap_null_bdg_attach(const char *name, struct netmap_adapter *na,
+ struct nm_bridge *b)
+{
+ (void)name;
+ (void)na;
+ (void)b;
+ return EINVAL;
+}
+
+int
+netmap_get_null_na(struct nmreq_header *hdr, struct netmap_adapter **na,
+ struct netmap_mem_d *nmd, int create)
+{
+ struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
+ struct netmap_null_adapter *nna;
+ int error;
+
+ if (req->nr_mode != NR_REG_NULL) {
+ nm_prdis("not a null port");
+ return 0;
+ }
+
+ if (!create) {
+ nm_prerr("null ports cannot be re-opened");
+ return EINVAL;
+ }
+
+ if (nmd == NULL) {
+ nm_prerr("null ports must use an existing allocator");
+ return EINVAL;
+ }
+
+ nna = nm_os_malloc(sizeof(*nna));
+ if (nna == NULL) {
+ error = ENOMEM;
+ goto err;
+ }
+ snprintf(nna->up.name, sizeof(nna->up.name), "null:%s", hdr->nr_name);
+
+ nna->up.nm_txsync = netmap_null_txsync;
+ nna->up.nm_rxsync = netmap_null_rxsync;
+ nna->up.nm_register = netmap_null_reg;
+ nna->up.nm_krings_create = netmap_null_krings_create;
+ nna->up.nm_krings_delete = netmap_null_krings_delete;
+ nna->up.nm_bdg_attach = netmap_null_bdg_attach;
+ nna->up.nm_mem = netmap_mem_get(nmd);
+
+ nna->up.num_tx_rings = req->nr_tx_rings;
+ nna->up.num_rx_rings = req->nr_rx_rings;
+ nna->up.num_tx_desc = req->nr_tx_slots;
+ nna->up.num_rx_desc = req->nr_rx_slots;
+ error = netmap_attach_common(&nna->up);
+ if (error)
+ goto free_nna;
+ *na = &nna->up;
+ netmap_adapter_get(*na);
+ nm_prdis("created null %s", nna->up.name);
+
+ return 0;
+
+free_nna:
+ nm_os_free(nna);
+err:
+ return error;
+}
+
+
+#endif /* WITH_NMNULL */
Index: head/sys/dev/netmap/netmap_pipe.c
===================================================================
--- head/sys/dev/netmap/netmap_pipe.c
+++ head/sys/dev/netmap/netmap_pipe.c
@@ -443,7 +443,7 @@
/* In case of no error we put our rings in netmap mode */
for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+ for (i = 0; i < nma_get_nrings(na, t); i++) {
struct netmap_kring *kring = NMR(na, t)[i];
if (nm_kring_pending_on(kring)) {
struct netmap_kring *sring, *dring;
@@ -490,7 +490,7 @@
if (na->active_fds == 0)
na->na_flags &= ~NAF_NETMAP_ON;
for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+ for (i = 0; i < nma_get_nrings(na, t); i++) {
struct netmap_kring *kring = NMR(na, t)[i];
if (nm_kring_pending_off(kring)) {
@@ -567,7 +567,7 @@
sna = na;
cleanup:
for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(sna, t) + 1; i++) {
+ for (i = 0; i < nma_get_nrings(sna, t); i++) {
struct netmap_kring *kring = NMR(sna, t)[i];
struct netmap_ring *ring = kring->ring;
uint32_t j, lim = kring->nkr_num_slots - 1;
@@ -674,11 +674,11 @@
int create_error;
/* Temporarily remove the pipe suffix. */
- strncpy(nr_name_orig, hdr->nr_name, sizeof(nr_name_orig));
+ strlcpy(nr_name_orig, hdr->nr_name, sizeof(nr_name_orig));
*cbra = '\0';
error = netmap_get_na(hdr, &pna, &ifp, nmd, create);
/* Restore the pipe suffix. */
- strncpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name));
+ strlcpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name));
if (!error)
break;
if (error != ENXIO || retries++) {
@@ -691,7 +691,7 @@
NMG_UNLOCK();
create_error = netmap_vi_create(hdr, 1 /* autodelete */);
NMG_LOCK();
- strncpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name));
+ strlcpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name));
if (create_error && create_error != EEXIST) {
if (create_error != EOPNOTSUPP) {
D("failed to create a persistent vale port: %d", create_error);
Index: head/sys/dev/netmap/netmap_vale.c
===================================================================
--- head/sys/dev/netmap/netmap_vale.c
+++ head/sys/dev/netmap/netmap_vale.c
@@ -121,18 +121,18 @@
"Max batch size to be used in the bridge");
SYSEND;
-static int netmap_vp_create(struct nmreq_header *hdr, struct ifnet *,
+static int netmap_vale_vp_create(struct nmreq_header *hdr, struct ifnet *,
struct netmap_mem_d *nmd, struct netmap_vp_adapter **);
-static int netmap_vp_bdg_attach(const char *, struct netmap_adapter *,
+static int netmap_vale_vp_bdg_attach(const char *, struct netmap_adapter *,
struct nm_bridge *);
static int netmap_vale_bwrap_attach(const char *, struct netmap_adapter *);
/*
- * For each output interface, nm_bdg_q is used to construct a list.
+ * For each output interface, nm_vale_q is used to construct a list.
* bq_len is the number of output buffers (we can have coalescing
* during the copy).
*/
-struct nm_bdg_q {
+struct nm_vale_q {
uint16_t bq_head;
uint16_t bq_tail;
uint32_t bq_len; /* number of buffers */
@@ -140,10 +140,10 @@
/* Holds the default callbacks */
struct netmap_bdg_ops vale_bdg_ops = {
- .lookup = netmap_bdg_learning,
+ .lookup = netmap_vale_learning,
.config = NULL,
.dtor = NULL,
- .vp_create = netmap_vp_create,
+ .vp_create = netmap_vale_vp_create,
.bwrap_attach = netmap_vale_bwrap_attach,
.name = NM_BDG_NAME,
};
@@ -212,14 +212,14 @@
/* all port:rings + broadcast */
num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
- l += sizeof(struct nm_bdg_q) * num_dstq;
+ l += sizeof(struct nm_vale_q) * num_dstq;
l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
nrings = netmap_real_rings(na, NR_TX);
kring = na->tx_rings;
for (i = 0; i < nrings; i++) {
struct nm_bdg_fwd *ft;
- struct nm_bdg_q *dstq;
+ struct nm_vale_q *dstq;
int j;
ft = nm_os_malloc(l);
@@ -227,7 +227,7 @@
nm_free_bdgfwd(na);
return ENOMEM;
}
- dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
+ dstq = (struct nm_vale_q *)(ft + NM_BDG_BATCH_MAX);
for (j = 0; j < num_dstq; j++) {
dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
dstq[j].bq_len = 0;
@@ -307,11 +307,228 @@
return ret;
}
+/* Process NETMAP_REQ_VALE_LIST. */
+int
+netmap_vale_list(struct nmreq_header *hdr)
+{
+ struct nmreq_vale_list *req =
+ (struct nmreq_vale_list *)(uintptr_t)hdr->nr_body;
+ int namelen = strlen(hdr->nr_name);
+ struct nm_bridge *b, *bridges;
+ struct netmap_vp_adapter *vpna;
+ int error = 0, i, j;
+ u_int num_bridges;
+ netmap_bns_getbridges(&bridges, &num_bridges);
+ /* this is used to enumerate bridges and ports */
+ if (namelen) { /* look up indexes of bridge and port */
+ if (strncmp(hdr->nr_name, NM_BDG_NAME,
+ strlen(NM_BDG_NAME))) {
+ return EINVAL;
+ }
+ NMG_LOCK();
+ b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
+ if (!b) {
+ NMG_UNLOCK();
+ return ENOENT;
+ }
+
+ req->nr_bridge_idx = b - bridges; /* bridge index */
+ req->nr_port_idx = NM_BDG_NOPORT;
+ for (j = 0; j < b->bdg_active_ports; j++) {
+ i = b->bdg_port_index[j];
+ vpna = b->bdg_ports[i];
+ if (vpna == NULL) {
+ nm_prerr("This should not happen");
+ continue;
+ }
+ /* the former and the latter identify a
+ * virtual port and a NIC, respectively
+ */
+ if (!strcmp(vpna->up.name, hdr->nr_name)) {
+ req->nr_port_idx = i; /* port index */
+ break;
+ }
+ }
+ NMG_UNLOCK();
+ } else {
+ /* return the first non-empty entry starting from
+ * bridge nr_arg1 and port nr_arg2.
+ *
+ * Users can detect the end of the same bridge by
+ * seeing the new and old value of nr_arg1, and can
+ * detect the end of all the bridge by error != 0
+ */
+ i = req->nr_bridge_idx;
+ j = req->nr_port_idx;
+
+ NMG_LOCK();
+ for (error = ENOENT; i < NM_BRIDGES; i++) {
+ b = bridges + i;
+ for ( ; j < NM_BDG_MAXPORTS; j++) {
+ if (b->bdg_ports[j] == NULL)
+ continue;
+ vpna = b->bdg_ports[j];
+ /* write back the VALE switch name */
+ strlcpy(hdr->nr_name, vpna->up.name,
+ sizeof(hdr->nr_name));
+ error = 0;
+ goto out;
+ }
+ j = 0; /* following bridges scan from 0 */
+ }
+ out:
+ req->nr_bridge_idx = i;
+ req->nr_port_idx = j;
+ NMG_UNLOCK();
+ }
+
+ return error;
+}
+
+/* Process NETMAP_REQ_VALE_ATTACH.
+ */
+int
+netmap_vale_attach(struct nmreq_header *hdr, void *auth_token)
+{
+ struct nmreq_vale_attach *req =
+ (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
+ struct netmap_vp_adapter * vpna;
+ struct netmap_adapter *na = NULL;
+ struct netmap_mem_d *nmd = NULL;
+ struct nm_bridge *b = NULL;
+ int error;
+
+ NMG_LOCK();
+ /* permission check for modified bridges */
+ b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
+ if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
+ error = EACCES;
+ goto unlock_exit;
+ }
+
+ if (req->reg.nr_mem_id) {
+ nmd = netmap_mem_find(req->reg.nr_mem_id);
+ if (nmd == NULL) {
+ error = EINVAL;
+ goto unlock_exit;
+ }
+ }
+
+ /* check for existing one */
+ error = netmap_get_vale_na(hdr, &na, nmd, 0);
+ if (na) {
+ error = EBUSY;
+ goto unref_exit;
+ }
+ error = netmap_get_vale_na(hdr, &na,
+ nmd, 1 /* create if not exists */);
+ if (error) { /* no device */
+ goto unlock_exit;
+ }
+
+ if (na == NULL) { /* VALE prefix missing */
+ error = EINVAL;
+ goto unlock_exit;
+ }
+
+ if (NETMAP_OWNED_BY_ANY(na)) {
+ error = EBUSY;
+ goto unref_exit;
+ }
+
+ if (na->nm_bdg_ctl) {
+ /* nop for VALE ports. The bwrap needs to put the hwna
+ * in netmap mode (see netmap_bwrap_bdg_ctl)
+ */
+ error = na->nm_bdg_ctl(hdr, na);
+ if (error)
+ goto unref_exit;
+ ND("registered %s to netmap-mode", na->name);
+ }
+ vpna = (struct netmap_vp_adapter *)na;
+ req->port_index = vpna->bdg_port;
+
+ if (nmd)
+ netmap_mem_put(nmd);
+
+ NMG_UNLOCK();
+ return 0;
+
+unref_exit:
+ netmap_adapter_put(na);
+unlock_exit:
+ if (nmd)
+ netmap_mem_put(nmd);
+
+ NMG_UNLOCK();
+ return error;
+}
+
+/* Process NETMAP_REQ_VALE_DETACH.
+ */
+int
+netmap_vale_detach(struct nmreq_header *hdr, void *auth_token)
+{
+ struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body;
+ struct netmap_vp_adapter *vpna;
+ struct netmap_adapter *na;
+ struct nm_bridge *b = NULL;
+ int error;
+
+ NMG_LOCK();
+ /* permission check for modified bridges */
+ b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
+ if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
+ error = EACCES;
+ goto unlock_exit;
+ }
+
+ error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */);
+ if (error) { /* no device, or another bridge or user owns the device */
+ goto unlock_exit;
+ }
+
+ if (na == NULL) { /* VALE prefix missing */
+ error = EINVAL;
+ goto unlock_exit;
+ } else if (nm_is_bwrap(na) &&
+ ((struct netmap_bwrap_adapter *)na)->na_polling_state) {
+ /* Don't detach a NIC with polling */
+ error = EBUSY;
+ goto unref_exit;
+ }
+
+ vpna = (struct netmap_vp_adapter *)na;
+ if (na->na_vp != vpna) {
+ /* trying to detach first attach of VALE persistent port attached
+ * to 2 bridges
+ */
+ error = EBUSY;
+ goto unref_exit;
+ }
+ nmreq_det->port_index = vpna->bdg_port;
+
+ if (na->nm_bdg_ctl) {
+ /* remove the port from bridge. The bwrap
+ * also needs to put the hwna in normal mode
+ */
+ error = na->nm_bdg_ctl(hdr, na);
+ }
+
+unref_exit:
+ netmap_adapter_put(na);
+unlock_exit:
+ NMG_UNLOCK();
+ return error;
+
+}
+
+
/* nm_dtor callback for ephemeral VALE ports */
static void
-netmap_vp_dtor(struct netmap_adapter *na)
+netmap_vale_vp_dtor(struct netmap_adapter *na)
{
struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
struct nm_bridge *b = vpna->na_bdg;
@@ -334,47 +551,13 @@
}
-/* Called by external kernel modules (e.g., Openvswitch).
- * to modify the private data previously given to regops().
- * 'name' may be just bridge's name (including ':' if it
- * is not just NM_BDG_NAME).
- * Called without NMG_LOCK.
- */
-int
-nm_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
- void *callback_data, void *auth_token)
-{
- void *private_data = NULL;
- struct nm_bridge *b;
- int error = 0;
- NMG_LOCK();
- b = nm_find_bridge(name, 0 /* don't create */, NULL);
- if (!b) {
- error = EINVAL;
- goto unlock_update_priv;
- }
- if (!nm_bdg_valid_auth_token(b, auth_token)) {
- error = EACCES;
- goto unlock_update_priv;
- }
- BDG_WLOCK(b);
- private_data = callback(b->private_data, callback_data, &error);
- b->private_data = private_data;
- BDG_WUNLOCK(b);
-
-unlock_update_priv:
- NMG_UNLOCK();
- return error;
-}
-
-
/* nm_krings_create callback for VALE ports.
* Calls the standard netmap_krings_create, then adds leases on rx
* rings and bdgfwd on tx rings.
*/
static int
-netmap_vp_krings_create(struct netmap_adapter *na)
+netmap_vale_vp_krings_create(struct netmap_adapter *na)
{
u_int tailroom;
int error, i;
@@ -409,7 +592,7 @@
/* nm_krings_delete callback for VALE ports. */
static void
-netmap_vp_krings_delete(struct netmap_adapter *na)
+netmap_vale_vp_krings_delete(struct netmap_adapter *na)
{
nm_free_bdgfwd(na);
netmap_krings_delete(na);
@@ -417,7 +600,7 @@
static int
-nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
+nm_vale_flush(struct nm_bdg_fwd *ft, u_int n,
struct netmap_vp_adapter *na, u_int ring_nr);
@@ -429,7 +612,7 @@
* Returns the next position in the ring.
*/
static int
-nm_bdg_preflush(struct netmap_kring *kring, u_int end)
+nm_vale_preflush(struct netmap_kring *kring, u_int end)
{
struct netmap_vp_adapter *na =
(struct netmap_vp_adapter*)kring->na;
@@ -470,7 +653,7 @@
buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
(void *)(uintptr_t)slot->ptr : NMB(&na->up, slot);
if (unlikely(buf == NULL)) {
- RD(5, "NULL %s buffer pointer from %s slot %d len %d",
+ nm_prlim(5, "NULL %s buffer pointer from %s slot %d len %d",
(slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
kring->name, j, ft[ft_i].ft_len);
buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up);
@@ -488,7 +671,7 @@
ft[ft_i - frags].ft_frags = frags;
frags = 1;
if (unlikely((int)ft_i >= bridge_batch))
- ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
+ ft_i = nm_vale_flush(ft, ft_i, na, ring_nr);
}
if (frags > 1) {
/* Here ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG, and we
@@ -496,10 +679,10 @@
frags--;
ft[ft_i - 1].ft_flags &= ~NS_MOREFRAG;
ft[ft_i - frags].ft_frags = frags;
- D("Truncate incomplete fragment at %d (%d frags)", ft_i, frags);
+ nm_prlim(5, "Truncate incomplete fragment at %d (%d frags)", ft_i, frags);
}
if (ft_i)
- ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
+ ft_i = nm_vale_flush(ft, ft_i, na, ring_nr);
BDG_RUNLOCK(b);
return j;
}
@@ -528,7 +711,7 @@
static __inline uint32_t
-nm_bridge_rthash(const uint8_t *addr)
+nm_vale_rthash(const uint8_t *addr)
{
uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
@@ -554,7 +737,7 @@
* ring in *dst_ring (at the moment, always use ring 0)
*/
uint32_t
-netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
+netmap_vale_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
struct netmap_vp_adapter *na, void *private_data)
{
uint8_t *buf = ((uint8_t *)ft->ft_buf) + ft->ft_offset;
@@ -586,17 +769,17 @@
*/
if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */
uint8_t *s = buf+6;
- sh = nm_bridge_rthash(s); /* hash of source */
+ sh = nm_vale_rthash(s); /* hash of source */
/* update source port forwarding entry */
na->last_smac = ht[sh].mac = smac; /* XXX expire ? */
ht[sh].ports = mysrc;
- if (netmap_verbose)
- D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
+ if (netmap_debug & NM_DEBUG_VALE)
+ nm_prinf("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
}
dst = NM_BDG_BROADCAST;
if ((buf[0] & 1) == 0) { /* unicast */
- dh = nm_bridge_rthash(buf); /* hash of dst */
+ dh = nm_vale_rthash(buf); /* hash of dst */
if (ht[dh].mac == dmac) { /* found dst */
dst = ht[dh].ports;
}
@@ -655,24 +838,28 @@
k->nkr_leases[lease_idx] = NR_NOSLOT;
k->nkr_lease_idx = nm_next(lease_idx, lim);
+#ifdef CONFIG_NETMAP_DEBUG
if (n > nm_kr_space(k, is_rx)) {
- D("invalid request for %d slots", n);
+ nm_prerr("invalid request for %d slots", n);
panic("x");
}
+#endif /* CONFIG NETMAP_DEBUG */
/* XXX verify that there are n slots */
k->nkr_hwlease += n;
if (k->nkr_hwlease > lim)
k->nkr_hwlease -= lim + 1;
+#ifdef CONFIG_NETMAP_DEBUG
if (k->nkr_hwlease >= k->nkr_num_slots ||
k->nr_hwcur >= k->nkr_num_slots ||
k->nr_hwtail >= k->nkr_num_slots ||
k->nkr_lease_idx >= k->nkr_num_slots) {
- D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
+ nm_prerr("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
k->na->name,
k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
k->nkr_lease_idx, k->nkr_num_slots);
}
+#endif /* CONFIG_NETMAP_DEBUG */
return lease_idx;
}
@@ -682,10 +869,10 @@
* number of ports, and lets us replace the learn and dispatch functions.
*/
int
-nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
+nm_vale_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
u_int ring_nr)
{
- struct nm_bdg_q *dst_ents, *brddst;
+ struct nm_vale_q *dst_ents, *brddst;
uint16_t num_dsts = 0, *dsts;
struct nm_bridge *b = na->na_bdg;
u_int i, me = na->bdg_port;
@@ -696,14 +883,14 @@
* queues per port plus one for the broadcast traffic.
* Then we have an array of destination indexes.
*/
- dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
+ dst_ents = (struct nm_vale_q *)(ft + NM_BDG_BATCH_MAX);
dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
/* first pass: find a destination for each packet in the batch */
for (i = 0; likely(i < n); i += ft[i].ft_frags) {
uint8_t dst_ring = ring_nr; /* default, same ring as origin */
uint16_t dst_port, d_i;
- struct nm_bdg_q *d;
+ struct nm_vale_q *d;
struct nm_bdg_fwd *start_ft = NULL;
ND("slot %d frags %d", i, ft[i].ft_frags);
@@ -720,7 +907,7 @@
*/
continue;
}
- dst_port = b->bdg_ops->lookup(start_ft, &dst_ring, na, b->private_data);
+ dst_port = b->bdg_ops.lookup(start_ft, &dst_ring, na, b->private_data);
if (netmap_verbose > 255)
RD(5, "slot %d port %d -> %d", i, me, dst_port);
if (dst_port >= NM_BDG_NOPORT)
@@ -778,7 +965,7 @@
u_int dst_nr, lim, j, d_i, next, brd_next;
u_int needed, howmany;
int retry = netmap_txsync_retry;
- struct nm_bdg_q *d;
+ struct nm_vale_q *d;
uint32_t my_start = 0, lease_idx = 0;
int nrings;
int virt_hdr_mismatch = 0;
@@ -862,7 +1049,7 @@
if (dst_na->retry && retry) {
/* try to get some free slot from the previous run */
- kring->nm_notify(kring, 0);
+ kring->nm_notify(kring, NAF_FORCE_RECLAIM);
/* actually useful only for bwraps, since there
* the notify will trigger a txsync on the hwna. VALE ports
* have dst_na->retry == 0
@@ -1030,7 +1217,7 @@
/* nm_txsync callback for VALE ports */
static int
-netmap_vp_txsync(struct netmap_kring *kring, int flags)
+netmap_vale_vp_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_vp_adapter *na =
(struct netmap_vp_adapter *)kring->na;
@@ -1049,17 +1236,17 @@
if (bridge_batch > NM_BDG_BATCH)
bridge_batch = NM_BDG_BATCH;
- done = nm_bdg_preflush(kring, head);
+ done = nm_vale_preflush(kring, head);
done:
if (done != head)
- D("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail);
+ nm_prerr("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail);
/*
* packets between 'done' and 'cur' are left unsent.
*/
kring->nr_hwcur = done;
kring->nr_hwtail = nm_prev(done, lim);
- if (netmap_verbose)
- D("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
+ if (netmap_debug & NM_DEBUG_TXSYNC)
+ nm_prinf("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
return 0;
}
@@ -1068,7 +1255,7 @@
* Only persistent VALE ports have a non-null ifp.
*/
static int
-netmap_vp_create(struct nmreq_header *hdr, struct ifnet *ifp,
+netmap_vale_vp_create(struct nmreq_header *hdr, struct ifnet *ifp,
struct netmap_mem_d *nmd, struct netmap_vp_adapter **ret)
{
struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
@@ -1089,7 +1276,7 @@
na = &vpna->up;
na->ifp = ifp;
- strncpy(na->name, hdr->nr_name, sizeof(na->name));
+ strlcpy(na->name, hdr->nr_name, sizeof(na->name));
/* bound checking */
na->num_tx_rings = req->nr_tx_rings;
@@ -1109,6 +1296,7 @@
*/
nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
/* validate extra bufs */
+ extrabufs = req->nr_extra_bufs;
nm_bound_var(&extrabufs, 0, 0,
128*NM_BDG_MAXSLOTS, NULL);
req->nr_extra_bufs = extrabufs; /* write back */
@@ -1121,7 +1309,7 @@
/*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero??
vpna->mfs = netmap_buf_size; */
if (netmap_verbose)
- D("max frame size %u", vpna->mfs);
+ nm_prinf("max frame size %u", vpna->mfs);
na->na_flags |= NAF_BDG_MAYSLEEP;
/* persistent VALE ports look like hw devices
@@ -1129,12 +1317,12 @@
*/
if (ifp)
na->na_flags |= NAF_NATIVE;
- na->nm_txsync = netmap_vp_txsync;
- na->nm_rxsync = netmap_vp_rxsync;
- na->nm_register = netmap_vp_reg;
- na->nm_krings_create = netmap_vp_krings_create;
- na->nm_krings_delete = netmap_vp_krings_delete;
- na->nm_dtor = netmap_vp_dtor;
+ na->nm_txsync = netmap_vale_vp_txsync;
+ na->nm_rxsync = netmap_vp_rxsync; /* use the one provided by bdg */
+ na->nm_register = netmap_vp_reg; /* use the one provided by bdg */
+ na->nm_krings_create = netmap_vale_vp_krings_create;
+ na->nm_krings_delete = netmap_vale_vp_krings_delete;
+ na->nm_dtor = netmap_vale_vp_dtor;
ND("nr_mem_id %d", req->nr_mem_id);
na->nm_mem = nmd ?
netmap_mem_get(nmd):
@@ -1144,7 +1332,7 @@
req->nr_extra_bufs, npipes, &error);
if (na->nm_mem == NULL)
goto err;
- na->nm_bdg_attach = netmap_vp_bdg_attach;
+ na->nm_bdg_attach = netmap_vale_vp_bdg_attach;
/* other nmd fields are set in the common routine */
error = netmap_attach_common(na);
if (error)
@@ -1163,19 +1351,16 @@
* The na_vp port is this same netmap_adapter. There is no host port.
*/
static int
-netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na,
+netmap_vale_vp_bdg_attach(const char *name, struct netmap_adapter *na,
struct nm_bridge *b)
{
struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
- if (b->bdg_ops != &vale_bdg_ops) {
+ if ((b->bdg_flags & NM_BDG_NEED_BWRAP) || vpna->na_bdg) {
return NM_NEED_BWRAP;
}
- if (vpna->na_bdg) {
- return NM_NEED_BWRAP;
- }
na->na_vp = vpna;
- strncpy(na->name, name, sizeof(na->name));
+ strlcpy(na->name, name, sizeof(na->name));
na->na_hostvp = NULL;
return 0;
}
@@ -1186,12 +1371,12 @@
int error;
/* impersonate a netmap_vp_adapter */
- error = netmap_vp_krings_create(na);
+ error = netmap_vale_vp_krings_create(na);
if (error)
return error;
error = netmap_bwrap_krings_create_common(na);
if (error) {
- netmap_vp_krings_delete(na);
+ netmap_vale_vp_krings_delete(na);
}
return error;
}
@@ -1200,7 +1385,7 @@
netmap_vale_bwrap_krings_delete(struct netmap_adapter *na)
{
netmap_bwrap_krings_delete_common(na);
- netmap_vp_krings_delete(na);
+ netmap_vale_vp_krings_delete(na);
}
static int
@@ -1216,9 +1401,9 @@
return ENOMEM;
}
na = &bna->up.up;
- strncpy(na->name, nr_name, sizeof(na->name));
+ strlcpy(na->name, nr_name, sizeof(na->name));
na->nm_register = netmap_bwrap_reg;
- na->nm_txsync = netmap_vp_txsync;
+ na->nm_txsync = netmap_vale_vp_txsync;
// na->nm_rxsync = netmap_bwrap_rxsync;
na->nm_krings_create = netmap_vale_bwrap_krings_create;
na->nm_krings_delete = netmap_vale_bwrap_krings_delete;
@@ -1313,7 +1498,8 @@
NMG_UNLOCK();
- D("destroying a persistent vale interface %s", ifp->if_xname);
+ if (netmap_verbose)
+ nm_prinf("destroying a persistent vale interface %s", ifp->if_xname);
/* Linux requires all the references are released
* before unregister
*/
@@ -1389,9 +1575,10 @@
}
}
/* netmap_vp_create creates a struct netmap_vp_adapter */
- error = netmap_vp_create(hdr, ifp, nmd, &vpna);
+ error = netmap_vale_vp_create(hdr, ifp, nmd, &vpna);
if (error) {
- D("error %d", error);
+ if (netmap_debug & NM_DEBUG_VALE)
+ nm_prerr("error %d", error);
goto err_1;
}
/* persist-specific routines */
Index: head/sys/modules/netmap/Makefile
===================================================================
--- head/sys/modules/netmap/Makefile
+++ head/sys/modules/netmap/Makefile
@@ -3,12 +3,12 @@
# Compile netmap as a module, useful if you want a netmap bridge
# or loadable drivers.
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
+.include <bsd.own.mk> # FreeBSD 10 and earlier
+# .include "${SYSDIR}/conf/kern.opts.mk"
-.PATH: ${SYSDIR}/dev/netmap
-.PATH.h: ${SYSDIR}/net
-CFLAGS += -I${SYSDIR}/ -D INET
+.PATH: ${.CURDIR}/../../dev/netmap
+.PATH.h: ${.CURDIR}/../../net
+CFLAGS += -I${.CURDIR}/../../ -D INET -D VIMAGE
KMOD = netmap
SRCS = device_if.h bus_if.h pci_if.h opt_netmap.h
SRCS += netmap.c netmap.h netmap_kern.h
@@ -20,8 +20,10 @@
SRCS += netmap_offloadings.c
SRCS += netmap_pipe.c
SRCS += netmap_monitor.c
-SRCS += netmap_pt.c
+SRCS += netmap_kloop.c
SRCS += netmap_legacy.c
+SRCS += netmap_bdg.c
+SRCS += netmap_null.c
SRCS += if_ptnet.c
SRCS += opt_inet.h opt_inet6.h
Index: head/sys/net/netmap.h
===================================================================
--- head/sys/net/netmap.h
+++ head/sys/net/netmap.h
@@ -41,9 +41,9 @@
#ifndef _NET_NETMAP_H_
#define _NET_NETMAP_H_
-#define NETMAP_API 12 /* current API version */
+#define NETMAP_API 13 /* current API version */
-#define NETMAP_MIN_API 11 /* min and max versions accepted */
+#define NETMAP_MIN_API 13 /* min and max versions accepted */
#define NETMAP_MAX_API 15
/*
* Some fields should be cache-aligned to reduce contention.
@@ -333,12 +333,17 @@
*/
/*
- * check if space is available in the ring.
+ * Check if space is available in the ring. We use ring->head, which
+ * points to the next netmap slot to be published to netmap. It is
+ * possible that the applications moves ring->cur ahead of ring->tail
+ * (e.g., by setting ring->cur <== ring->tail), if it wants more slots
+ * than the ones currently available, and it wants to be notified when
+ * more arrive. See netmap(4) for more details and examples.
*/
static inline int
nm_ring_empty(struct netmap_ring *ring)
{
- return (ring->cur == ring->tail);
+ return (ring->head == ring->tail);
}
/*
@@ -479,6 +484,10 @@
* !=0: errno value
*/
uint32_t nro_status;
+ /* Option size, used only for options that can have variable size
+ * (e.g. because they contain arrays). For fixed-size options this
+ * field should be set to zero. */
+ uint64_t nro_size;
};
/* Header common to all requests. Do not reorder these fields, as we need
@@ -518,12 +527,32 @@
NETMAP_REQ_VALE_POLLING_DISABLE,
/* Get info about the pools of a memory allocator. */
NETMAP_REQ_POOLS_INFO_GET,
+ /* Start an in-kernel loop that syncs the rings periodically or
+ * on notifications. The loop runs in the context of the ioctl
+ * syscall, and only stops on NETMAP_REQ_SYNC_KLOOP_STOP. */
+ NETMAP_REQ_SYNC_KLOOP_START,
+ /* Stops the thread executing the in-kernel loop. The thread
+ * returns from the ioctl syscall. */
+ NETMAP_REQ_SYNC_KLOOP_STOP,
+ /* Enable CSB mode on a registered netmap control device. */
+ NETMAP_REQ_CSB_ENABLE,
};
enum {
/* On NETMAP_REQ_REGISTER, ask netmap to use memory allocated
* from user-space allocated memory pools (e.g. hugepages). */
NETMAP_REQ_OPT_EXTMEM = 1,
+
+ /* ON NETMAP_REQ_SYNC_KLOOP_START, ask netmap to use eventfd-based
+ * notifications to synchronize the kernel loop with the application.
+ */
+ NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS,
+
+ /* On NETMAP_REQ_REGISTER, ask netmap to work in CSB mode, where
+ * head, cur and tail pointers are not exchanged through the
+ * struct netmap_ring header, but rather using an user-provided
+ * memory area (see struct nm_csb_atok and struct nm_csb_ktoa). */
+ NETMAP_REQ_OPT_CSB,
};
/*
@@ -541,6 +570,7 @@
uint16_t nr_mem_id; /* id of the memory allocator */
uint16_t nr_ringid; /* ring(s) we care about */
uint32_t nr_mode; /* specify NR_REG_* modes */
+ uint32_t nr_extra_bufs; /* number of requested extra buffers */
uint64_t nr_flags; /* additional flags (see below) */
/* monitors use nr_ringid and nr_mode to select the rings to monitor */
@@ -549,9 +579,7 @@
#define NR_ZCOPY_MON 0x400
/* request exclusive access to the selected rings */
#define NR_EXCLUSIVE 0x800
-/* request ptnetmap host support */
-#define NR_PASSTHROUGH_HOST NR_PTNETMAP_HOST /* deprecated */
-#define NR_PTNETMAP_HOST 0x1000
+/* 0x1000 unused */
#define NR_RX_RINGS_ONLY 0x2000
#define NR_TX_RINGS_ONLY 0x4000
/* Applications set this flag if they are able to deal with virtio-net headers,
@@ -564,8 +592,6 @@
* NETMAP_DO_RX_POLL. */
#define NR_DO_RX_POLL 0x10000
#define NR_NO_TX_POLL 0x20000
-
- uint32_t nr_extra_bufs; /* number of requested extra buffers */
};
/* Valid values for nmreq_register.nr_mode (see above). */
@@ -576,10 +602,11 @@
NR_REG_ONE_NIC = 4,
NR_REG_PIPE_MASTER = 5, /* deprecated, use "x{y" port name syntax */
NR_REG_PIPE_SLAVE = 6, /* deprecated, use "x}y" port name syntax */
+ NR_REG_NULL = 7,
};
/* A single ioctl number is shared by all the new API command.
- * Demultiplexing is done using the nr_hdr.nr_reqtype field.
+ * Demultiplexing is done using the hdr.nr_reqtype field.
* FreeBSD uses the size value embedded in the _IOWR to determine
* how much to copy in/out, so we define the ioctl() command
* specifying only nmreq_header, and copyin/copyout the rest. */
@@ -595,16 +622,18 @@
/*
* nr_reqtype: NETMAP_REQ_PORT_INFO_GET
* Get information about a netmap port, including number of rings.
- * slots per ring, id of the memory allocator, etc.
+ * slots per ring, id of the memory allocator, etc. The netmap
+ * control device used for this operation does not need to be bound
+ * to a netmap port.
*/
struct nmreq_port_info_get {
- uint64_t nr_offset; /* nifp offset in the shared region */
uint64_t nr_memsize; /* size of the shared region */
uint32_t nr_tx_slots; /* slots in tx rings */
uint32_t nr_rx_slots; /* slots in rx rings */
uint16_t nr_tx_rings; /* number of tx rings */
uint16_t nr_rx_rings; /* number of rx rings */
- uint16_t nr_mem_id; /* id of the memory allocator */
+ uint16_t nr_mem_id; /* memory allocator id (in/out) */
+ uint16_t pad1;
};
#define NM_BDG_NAME "vale" /* prefix for bridge port name */
@@ -620,6 +649,7 @@
struct nmreq_vale_attach {
struct nmreq_register reg;
uint32_t port_index;
+ uint32_t pad1;
};
/*
@@ -630,6 +660,7 @@
*/
struct nmreq_vale_detach {
uint32_t port_index;
+ uint32_t pad1;
};
/*
@@ -639,15 +670,18 @@
struct nmreq_vale_list {
/* Name of the VALE port (valeXXX:YYY) or empty. */
uint16_t nr_bridge_idx;
+ uint16_t pad1;
uint32_t nr_port_idx;
};
/*
* nr_reqtype: NETMAP_REQ_PORT_HDR_SET or NETMAP_REQ_PORT_HDR_GET
- * Set the port header length.
+ * Set or get the port header length of the port identified by hdr.nr_name.
+ * The control device does not need to be bound to a netmap port.
*/
struct nmreq_port_hdr {
uint32_t nr_hdr_len;
+ uint32_t pad1;
};
/*
@@ -660,6 +694,7 @@
uint16_t nr_tx_rings; /* number of tx rings */
uint16_t nr_rx_rings; /* number of rx rings */
uint16_t nr_mem_id; /* id of the memory allocator */
+ uint16_t pad1;
};
/*
@@ -672,17 +707,20 @@
#define NETMAP_POLLING_MODE_MULTI_CPU 2
uint32_t nr_first_cpu_id;
uint32_t nr_num_polling_cpus;
+ uint32_t pad1;
};
/*
* nr_reqtype: NETMAP_REQ_POOLS_INFO_GET
- * Get info about the pools of the memory allocator of the port bound
- * to a given netmap control device (used i.e. by a ptnetmap-enabled
- * hypervisor). The nr_hdr.nr_name field is ignored.
+ * Get info about the pools of the memory allocator of the netmap
+ * port specified by hdr.nr_name and nr_mem_id. The netmap control
+ * device used for this operation does not need to be bound to a netmap
+ * port.
*/
struct nmreq_pools_info {
uint64_t nr_memsize;
- uint16_t nr_mem_id;
+ uint16_t nr_mem_id; /* in/out argument */
+ uint16_t pad1[3];
uint64_t nr_if_pool_offset;
uint32_t nr_if_pool_objtotal;
uint32_t nr_if_pool_objsize;
@@ -695,13 +733,151 @@
};
/*
+ * nr_reqtype: NETMAP_REQ_SYNC_KLOOP_START
+ * Start an in-kernel loop that syncs the rings periodically or on
+ * notifications. The loop runs in the context of the ioctl syscall,
+ * and only stops on NETMAP_REQ_SYNC_KLOOP_STOP.
+ * The registered netmap port must be open in CSB mode.
+ */
+struct nmreq_sync_kloop_start {
+ /* Sleeping is the default synchronization method for the kloop.
+ * The 'sleep_us' field specifies how many microsconds to sleep for
+ * when there is no work to do, before doing another kloop iteration.
+ */
+ uint32_t sleep_us;
+ uint32_t pad1;
+};
+
+/* A CSB entry for the application --> kernel direction. */
+struct nm_csb_atok {
+ uint32_t head; /* AW+ KR+ the head of the appl netmap_ring */
+ uint32_t cur; /* AW+ KR+ the cur of the appl netmap_ring */
+ uint32_t appl_need_kick; /* AW+ KR+ kern --> appl notification enable */
+ uint32_t sync_flags; /* AW+ KR+ the flags of the appl [tx|rx]sync() */
+ uint32_t pad[12]; /* pad to a 64 bytes cacheline */
+};
+
+/* A CSB entry for the application <-- kernel direction. */
+struct nm_csb_ktoa {
+ uint32_t hwcur; /* AR+ KW+ the hwcur of the kern netmap_kring */
+ uint32_t hwtail; /* AR+ KW+ the hwtail of the kern netmap_kring */
+ uint32_t kern_need_kick; /* AR+ KW+ appl-->kern notification enable */
+ uint32_t pad[13];
+};
+
+#ifdef __linux__
+
+#ifdef __KERNEL__
+#define nm_stst_barrier smp_wmb
+#else /* !__KERNEL__ */
+static inline void nm_stst_barrier(void)
+{
+ /* A memory barrier with release semantic has the combined
+ * effect of a store-store barrier and a load-store barrier,
+ * which is fine for us. */
+ __atomic_thread_fence(__ATOMIC_RELEASE);
+}
+#endif /* !__KERNEL__ */
+
+#elif defined(__FreeBSD__)
+
+#ifdef _KERNEL
+#define nm_stst_barrier atomic_thread_fence_rel
+#else /* !_KERNEL */
+static inline void nm_stst_barrier(void)
+{
+ __atomic_thread_fence(__ATOMIC_RELEASE);
+}
+#endif /* !_KERNEL */
+
+#else /* !__linux__ && !__FreeBSD__ */
+#error "OS not supported"
+#endif /* !__linux__ && !__FreeBSD__ */
+
+/* Application side of sync-kloop: Write ring pointers (cur, head) to the CSB.
+ * This routine is coupled with sync_kloop_kernel_read(). */
+static inline void
+nm_sync_kloop_appl_write(struct nm_csb_atok *atok, uint32_t cur,
+ uint32_t head)
+{
+ /*
+ * We need to write cur and head to the CSB but we cannot do it atomically.
+ * There is no way we can prevent the host from reading the updated value
+ * of one of the two and the old value of the other. However, if we make
+ * sure that the host never reads a value of head more recent than the
+ * value of cur we are safe. We can allow the host to read a value of cur
+ * more recent than the value of head, since in the netmap ring cur can be
+ * ahead of head and cur cannot wrap around head because it must be behind
+ * tail. Inverting the order of writes below could instead result into the
+ * host to think head went ahead of cur, which would cause the sync
+ * prologue to fail.
+ *
+ * The following memory barrier scheme is used to make this happen:
+ *
+ * Guest Host
+ *
+ * STORE(cur) LOAD(head)
+ * mb() <-----------> mb()
+ * STORE(head) LOAD(cur)
+ *
+ */
+ atok->cur = cur;
+ nm_stst_barrier();
+ atok->head = head;
+}
+
+/* Application side of sync-kloop: Read kring pointers (hwcur, hwtail) from
+ * the CSB. This routine is coupled with sync_kloop_kernel_write(). */
+static inline void
+nm_sync_kloop_appl_read(struct nm_csb_ktoa *ktoa, uint32_t *hwtail,
+ uint32_t *hwcur)
+{
+ /*
+ * We place a memory barrier to make sure that the update of hwtail never
+ * overtakes the update of hwcur.
+ * (see explanation in sync_kloop_kernel_write).
+ */
+ *hwtail = ktoa->hwtail;
+ nm_stst_barrier();
+ *hwcur = ktoa->hwcur;
+}
+
+/*
* data for NETMAP_REQ_OPT_* options
*/
+struct nmreq_opt_sync_kloop_eventfds {
+ struct nmreq_option nro_opt; /* common header */
+ /* An array of N entries for bidirectional notifications between
+ * the kernel loop and the application. The number of entries and
+ * their order must agree with the CSB arrays passed in the
+ * NETMAP_REQ_OPT_CSB option. Each entry contains a file descriptor
+ * backed by an eventfd.
+ */
+ struct {
+ /* Notifier for the application --> kernel loop direction. */
+ int32_t ioeventfd;
+ /* Notifier for the kernel loop --> application direction. */
+ int32_t irqfd;
+ } eventfds[0];
+};
+
struct nmreq_opt_extmem {
struct nmreq_option nro_opt; /* common header */
uint64_t nro_usrptr; /* (in) ptr to usr memory */
struct nmreq_pools_info nro_info; /* (in/out) */
+};
+
+struct nmreq_opt_csb {
+ struct nmreq_option nro_opt;
+
+ /* Array of CSB entries for application --> kernel communication
+ * (N entries). */
+ uint64_t csb_atok;
+
+ /* Array of CSB entries for kernel --> application communication
+ * (N entries). */
+ uint64_t csb_ktoa;
};
#endif /* _NET_NETMAP_H_ */
Index: head/sys/net/netmap_user.h
===================================================================
--- head/sys/net/netmap_user.h
+++ head/sys/net/netmap_user.h
@@ -138,11 +138,12 @@
return nm_ring_next(r, r->tail) != r->head;
}
-
+/* Compute the number of slots available in the netmap ring. We use
+ * ring->head as explained in the comment above nm_ring_empty(). */
static inline uint32_t
nm_ring_space(struct netmap_ring *ring)
{
- int ret = ring->tail - ring->cur;
+ int ret = ring->tail - ring->head;
if (ret < 0)
ret += ring->num_slots;
return ret;
@@ -1091,18 +1092,36 @@
ring = NETMAP_RXRING(d->nifp, ri);
for ( ; !nm_ring_empty(ring) && cnt != got; got++) {
u_int idx, i;
+ u_char *oldbuf;
+ struct netmap_slot *slot;
if (d->hdr.buf) { /* from previous round */
cb(arg, &d->hdr, d->hdr.buf);
}
i = ring->cur;
- idx = ring->slot[i].buf_idx;
+ slot = &ring->slot[i];
+ idx = slot->buf_idx;
/* d->cur_rx_ring doesn't change inside this loop, but
* set it here, so it reflects d->hdr.buf's ring */
d->cur_rx_ring = ri;
- d->hdr.slot = &ring->slot[i];
- d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx);
+ d->hdr.slot = slot;
+ oldbuf = d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx);
// __builtin_prefetch(buf);
- d->hdr.len = d->hdr.caplen = ring->slot[i].len;
+ d->hdr.len = d->hdr.caplen = slot->len;
+ while (slot->flags & NS_MOREFRAG) {
+ u_char *nbuf;
+ u_int oldlen = slot->len;
+ i = nm_ring_next(ring, i);
+ slot = &ring->slot[i];
+ d->hdr.len += slot->len;
+ nbuf = (u_char *)NETMAP_BUF(ring, slot->buf_idx);
+ if (oldbuf != NULL && nbuf - oldbuf == ring->nr_buf_size &&
+ oldlen == ring->nr_buf_size) {
+ d->hdr.caplen += slot->len;
+ oldbuf = nbuf;
+ } else {
+ oldbuf = NULL;
+ }
+ }
d->hdr.ts = ring->ts;
ring->head = ring->cur = nm_ring_next(ring, i);
}
Index: head/sys/net/netmap_virt.h
===================================================================
--- head/sys/net/netmap_virt.h
+++ head/sys/net/netmap_virt.h
@@ -1,7 +1,7 @@
/*
* Copyright (C) 2013-2016 Luigi Rizzo
* Copyright (C) 2013-2016 Giuseppe Lettieri
- * Copyright (C) 2013-2016 Vincenzo Maffione
+ * Copyright (C) 2013-2018 Vincenzo Maffione
* Copyright (C) 2015 Stefano Garzarella
* All rights reserved.
*
@@ -33,14 +33,15 @@
#define NETMAP_VIRT_H
/*
- * ptnetmap_memdev: device used to expose memory into the guest VM
+ * Register offsets and other macros for the ptnetmap paravirtual devices:
+ * ptnetmap-memdev: device used to expose memory into the guest
+ * ptnet: paravirtualized NIC exposing a netmap port in the guest
*
* These macros are used in the hypervisor frontend (QEMU, bhyve) and in the
* guest device driver.
*/
-/* PCI identifiers and PCI BARs for the ptnetmap memdev
- * and ptnetmap network interface. */
+/* PCI identifiers and PCI BARs for ptnetmap-memdev and ptnet. */
#define PTNETMAP_MEMDEV_NAME "ptnetmap-memdev"
#define PTNETMAP_PCI_VENDOR_ID 0x1b36 /* QEMU virtual devices */
#define PTNETMAP_PCI_DEVICE_ID 0x000c /* memory device */
@@ -49,7 +50,7 @@
#define PTNETMAP_MEM_PCI_BAR 1
#define PTNETMAP_MSIX_PCI_BAR 2
-/* Registers for the ptnetmap memdev */
+/* Device registers for ptnetmap-memdev */
#define PTNET_MDEV_IO_MEMSIZE_LO 0 /* netmap memory size (low) */
#define PTNET_MDEV_IO_MEMSIZE_HI 4 /* netmap_memory_size (high) */
#define PTNET_MDEV_IO_MEMID 8 /* memory allocator ID in the host */
@@ -64,74 +65,10 @@
#define PTNET_MDEV_IO_BUF_POOL_OBJSZ 96
#define PTNET_MDEV_IO_END 100
-/*
- * ptnetmap configuration
- *
- * The ptnet kthreads (running in host kernel-space) need to be configured
- * in order to know how to intercept guest kicks (I/O register writes) and
- * how to inject MSI-X interrupts to the guest. The configuration may vary
- * depending on the hypervisor. Currently, we support QEMU/KVM on Linux and
- * and bhyve on FreeBSD.
- * The configuration is passed by the hypervisor to the host netmap module
- * by means of an ioctl() with nr_cmd=NETMAP_PT_HOST_CREATE, and it is
- * specified by the ptnetmap_cfg struct. This struct contains an header
- * with general informations and an array of entries whose size depends
- * on the hypervisor. The NETMAP_PT_HOST_CREATE command is issued every
- * time the kthreads are started.
- */
-struct ptnetmap_cfg {
-#define PTNETMAP_CFGTYPE_QEMU 0x1
-#define PTNETMAP_CFGTYPE_BHYVE 0x2
- uint16_t cfgtype; /* how to interpret the cfg entries */
- uint16_t entry_size; /* size of a config entry */
- uint32_t num_rings; /* number of config entries */
- void *csb_gh; /* CSB for guest --> host communication */
- void *csb_hg; /* CSB for host --> guest communication */
- /* Configuration entries are allocated right after the struct. */
-};
-
-/* Configuration of a ptnetmap ring for QEMU. */
-struct ptnetmap_cfgentry_qemu {
- uint32_t ioeventfd; /* to intercept guest register access */
- uint32_t irqfd; /* to inject guest interrupts */
-};
-
-/* Configuration of a ptnetmap ring for bhyve. */
-struct ptnetmap_cfgentry_bhyve {
- uint64_t wchan; /* tsleep() parameter, to wake up kthread */
- uint32_t ioctl_fd; /* ioctl fd */
- /* ioctl parameters to send irq */
- uint32_t ioctl_cmd;
- /* vmm.ko MSIX parameters for IOCTL */
- struct {
- uint64_t msg_data;
- uint64_t addr;
- } ioctl_data;
-};
-
-/*
- * Pass a pointer to a userspace buffer to be passed to kernelspace for write
- * or read. Used by NETMAP_PT_HOST_CREATE.
- * XXX deprecated
- */
-static inline void
-nmreq_pointer_put(struct nmreq *nmr, void *userptr)
-{
- uintptr_t *pp = (uintptr_t *)&nmr->nr_arg1;
- *pp = (uintptr_t)userptr;
-}
-
-static inline void *
-nmreq_pointer_get(const struct nmreq *nmr)
-{
- const uintptr_t *pp = (const uintptr_t *)&nmr->nr_arg1;
- return (void *)*pp;
-}
-
/* ptnetmap features */
#define PTNETMAP_F_VNET_HDR 1
-/* I/O registers for the ptnet device. */
+/* Device registers for the ptnet network device. */
#define PTNET_IO_PTFEAT 0
#define PTNET_IO_PTCTL 4
#define PTNET_IO_MAC_LO 8
@@ -153,140 +90,11 @@
#define PTNET_IO_KICK_BASE 128
#define PTNET_IO_MASK 0xff
-/* ptnetmap control commands (values for PTCTL register) */
+/* ptnet control commands (values for PTCTL register):
+ * - CREATE starts the host sync-kloop
+ * - DELETE stops the host sync-kloop
+ */
#define PTNETMAP_PTCTL_CREATE 1
#define PTNETMAP_PTCTL_DELETE 2
-
-/* ptnetmap synchronization variables shared between guest and host */
-struct ptnet_csb_gh {
- uint32_t head; /* GW+ HR+ the head of the guest netmap_ring */
- uint32_t cur; /* GW+ HR+ the cur of the guest netmap_ring */
- uint32_t guest_need_kick; /* GW+ HR+ host-->guest notification enable */
- uint32_t sync_flags; /* GW+ HR+ the flags of the guest [tx|rx]sync() */
- char pad[48]; /* pad to a 64 bytes cacheline */
-};
-struct ptnet_csb_hg {
- uint32_t hwcur; /* GR+ HW+ the hwcur of the host netmap_kring */
- uint32_t hwtail; /* GR+ HW+ the hwtail of the host netmap_kring */
- uint32_t host_need_kick; /* GR+ HW+ guest-->host notification enable */
- char pad[4+48];
-};
-
-#ifdef WITH_PTNETMAP_GUEST
-
-/* ptnetmap_memdev routines used to talk with ptnetmap_memdev device driver */
-struct ptnetmap_memdev;
-int nm_os_pt_memdev_iomap(struct ptnetmap_memdev *, vm_paddr_t *, void **,
- uint64_t *);
-void nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *);
-uint32_t nm_os_pt_memdev_ioread(struct ptnetmap_memdev *, unsigned int);
-
-/* Guest driver: Write kring pointers (cur, head) to the CSB.
- * This routine is coupled with ptnetmap_host_read_kring_csb(). */
-static inline void
-ptnetmap_guest_write_kring_csb(struct ptnet_csb_gh *ptr, uint32_t cur,
- uint32_t head)
-{
- /*
- * We need to write cur and head to the CSB but we cannot do it atomically.
- * There is no way we can prevent the host from reading the updated value
- * of one of the two and the old value of the other. However, if we make
- * sure that the host never reads a value of head more recent than the
- * value of cur we are safe. We can allow the host to read a value of cur
- * more recent than the value of head, since in the netmap ring cur can be
- * ahead of head and cur cannot wrap around head because it must be behind
- * tail. Inverting the order of writes below could instead result into the
- * host to think head went ahead of cur, which would cause the sync
- * prologue to fail.
- *
- * The following memory barrier scheme is used to make this happen:
- *
- * Guest Host
- *
- * STORE(cur) LOAD(head)
- * mb() <-----------> mb()
- * STORE(head) LOAD(cur)
- */
- ptr->cur = cur;
- mb();
- ptr->head = head;
-}
-
-/* Guest driver: Read kring pointers (hwcur, hwtail) from the CSB.
- * This routine is coupled with ptnetmap_host_write_kring_csb(). */
-static inline void
-ptnetmap_guest_read_kring_csb(struct ptnet_csb_hg *pthg, struct netmap_kring *kring)
-{
- /*
- * We place a memory barrier to make sure that the update of hwtail never
- * overtakes the update of hwcur.
- * (see explanation in ptnetmap_host_write_kring_csb).
- */
- kring->nr_hwtail = pthg->hwtail;
- mb();
- kring->nr_hwcur = pthg->hwcur;
-}
-
-#endif /* WITH_PTNETMAP_GUEST */
-
-#ifdef WITH_PTNETMAP_HOST
-/*
- * ptnetmap kernel thread routines
- * */
-
-/* Functions to read and write CSB fields in the host */
-#if defined (linux)
-#define CSB_READ(csb, field, r) (get_user(r, &csb->field))
-#define CSB_WRITE(csb, field, v) (put_user(v, &csb->field))
-#else /* ! linux */
-#define CSB_READ(csb, field, r) (r = fuword32(&csb->field))
-#define CSB_WRITE(csb, field, v) (suword32(&csb->field, v))
-#endif /* ! linux */
-
-/* Host netmap: Write kring pointers (hwcur, hwtail) to the CSB.
- * This routine is coupled with ptnetmap_guest_read_kring_csb(). */
-static inline void
-ptnetmap_host_write_kring_csb(struct ptnet_csb_hg __user *ptr, uint32_t hwcur,
- uint32_t hwtail)
-{
- /*
- * The same scheme used in ptnetmap_guest_write_kring_csb() applies here.
- * We allow the guest to read a value of hwcur more recent than the value
- * of hwtail, since this would anyway result in a consistent view of the
- * ring state (and hwcur can never wraparound hwtail, since hwcur must be
- * behind head).
- *
- * The following memory barrier scheme is used to make this happen:
- *
- * Guest Host
- *
- * STORE(hwcur) LOAD(hwtail)
- * mb() <-------------> mb()
- * STORE(hwtail) LOAD(hwcur)
- */
- CSB_WRITE(ptr, hwcur, hwcur);
- mb();
- CSB_WRITE(ptr, hwtail, hwtail);
-}
-
-/* Host netmap: Read kring pointers (head, cur, sync_flags) from the CSB.
- * This routine is coupled with ptnetmap_guest_write_kring_csb(). */
-static inline void
-ptnetmap_host_read_kring_csb(struct ptnet_csb_gh __user *ptr,
- struct netmap_ring *shadow_ring,
- uint32_t num_slots)
-{
- /*
- * We place a memory barrier to make sure that the update of head never
- * overtakes the update of cur.
- * (see explanation in ptnetmap_guest_write_kring_csb).
- */
- CSB_READ(ptr, head, shadow_ring->head);
- mb();
- CSB_READ(ptr, cur, shadow_ring->cur);
- CSB_READ(ptr, sync_flags, shadow_ring->flags);
-}
-
-#endif /* WITH_PTNETMAP_HOST */
#endif /* NETMAP_VIRT_H */

File Metadata

Mime Type
text/plain
Expires
Wed, Nov 27, 11:19 AM (20 h, 3 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14884039
Default Alt Text
D18015.diff (194 KB)

Event Timeline