Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F103199586
D18015.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
194 KB
Referenced Files
None
Subscribers
None
D18015.diff
View Options
Index: head/sys/conf/files
===================================================================
--- head/sys/conf/files
+++ head/sys/conf/files
@@ -2517,17 +2517,19 @@
dev/nand/nfc_if.m optional nand
dev/netmap/if_ptnet.c optional netmap inet
dev/netmap/netmap.c optional netmap
+dev/netmap/netmap_bdg.c optional netmap
dev/netmap/netmap_freebsd.c optional netmap
dev/netmap/netmap_generic.c optional netmap
+dev/netmap/netmap_kloop.c optional netmap
+dev/netmap/netmap_legacy.c optional netmap
dev/netmap/netmap_mbq.c optional netmap
dev/netmap/netmap_mem2.c optional netmap
dev/netmap/netmap_monitor.c optional netmap
+dev/netmap/netmap_null.c optional netmap
dev/netmap/netmap_offloadings.c optional netmap
dev/netmap/netmap_pipe.c optional netmap
dev/netmap/netmap_pt.c optional netmap
dev/netmap/netmap_vale.c optional netmap
-dev/netmap/netmap_legacy.c optional netmap
-dev/netmap/netmap_bdg.c optional netmap
# compile-with "${NORMAL_C} -Wconversion -Wextra"
dev/nfsmb/nfsmb.c optional nfsmb pci
dev/nge/if_nge.c optional nge
Index: head/sys/dev/netmap/if_ixl_netmap.h
===================================================================
--- head/sys/dev/netmap/if_ixl_netmap.h
+++ head/sys/dev/netmap/if_ixl_netmap.h
@@ -129,7 +129,7 @@
na.ifp = vsi->ifp;
na.na_flags = NAF_BDG_MAYSLEEP;
// XXX check that queues is set.
- nm_prinf("queues is %p\n", vsi->queues);
+ nm_prinf("queues is %p", vsi->queues);
if (vsi->queues) {
na.num_tx_desc = vsi->queues[0].num_desc;
na.num_rx_desc = vsi->queues[0].num_desc;
Index: head/sys/dev/netmap/if_ptnet.c
===================================================================
--- head/sys/dev/netmap/if_ptnet.c
+++ head/sys/dev/netmap/if_ptnet.c
@@ -128,8 +128,8 @@
struct resource *irq;
void *cookie;
int kring_id;
- struct ptnet_csb_gh *ptgh;
- struct ptnet_csb_hg *pthg;
+ struct nm_csb_atok *atok;
+ struct nm_csb_ktoa *ktoa;
unsigned int kick;
struct mtx lock;
struct buf_ring *bufring; /* for TX queues */
@@ -166,8 +166,8 @@
unsigned int num_tx_rings;
struct ptnet_queue *queues;
struct ptnet_queue *rxqueues;
- struct ptnet_csb_gh *csb_gh;
- struct ptnet_csb_hg *csb_hg;
+ struct nm_csb_atok *csb_gh;
+ struct nm_csb_ktoa *csb_hg;
unsigned int min_tx_space;
@@ -209,7 +209,7 @@
static int ptnet_irqs_init(struct ptnet_softc *sc);
static void ptnet_irqs_fini(struct ptnet_softc *sc);
-static uint32_t ptnet_nm_ptctl(if_t ifp, uint32_t cmd);
+static uint32_t ptnet_nm_ptctl(struct ptnet_softc *sc, uint32_t cmd);
static int ptnet_nm_config(struct netmap_adapter *na,
struct nm_config_info *info);
static void ptnet_update_vnet_hdr(struct ptnet_softc *sc);
@@ -327,7 +327,7 @@
sc->num_rings = num_tx_rings + num_rx_rings;
sc->num_tx_rings = num_tx_rings;
- if (sc->num_rings * sizeof(struct ptnet_csb_gh) > PAGE_SIZE) {
+ if (sc->num_rings * sizeof(struct nm_csb_atok) > PAGE_SIZE) {
device_printf(dev, "CSB cannot handle that many rings (%u)\n",
sc->num_rings);
err = ENOMEM;
@@ -342,7 +342,7 @@
err = ENOMEM;
goto err_path;
}
- sc->csb_hg = (struct ptnet_csb_hg *)(((char *)sc->csb_gh) + PAGE_SIZE);
+ sc->csb_hg = (struct nm_csb_ktoa *)(((char *)sc->csb_gh) + PAGE_SIZE);
{
/*
@@ -379,8 +379,8 @@
pq->sc = sc;
pq->kring_id = i;
pq->kick = PTNET_IO_KICK_BASE + 4 * i;
- pq->ptgh = sc->csb_gh + i;
- pq->pthg = sc->csb_hg + i;
+ pq->atok = sc->csb_gh + i;
+ pq->ktoa = sc->csb_hg + i;
snprintf(pq->lock_name, sizeof(pq->lock_name), "%s-%d",
device_get_nameunit(dev), i);
mtx_init(&pq->lock, pq->lock_name, NULL, MTX_DEF);
@@ -505,12 +505,25 @@
return err;
}
+/* Stop host sync-kloop if it was running. */
+static void
+ptnet_device_shutdown(struct ptnet_softc *sc)
+{
+ ptnet_nm_ptctl(sc, PTNETMAP_PTCTL_DELETE);
+ bus_write_4(sc->iomem, PTNET_IO_CSB_GH_BAH, 0);
+ bus_write_4(sc->iomem, PTNET_IO_CSB_GH_BAL, 0);
+ bus_write_4(sc->iomem, PTNET_IO_CSB_HG_BAH, 0);
+ bus_write_4(sc->iomem, PTNET_IO_CSB_HG_BAL, 0);
+}
+
static int
ptnet_detach(device_t dev)
{
struct ptnet_softc *sc = device_get_softc(dev);
int i;
+ ptnet_device_shutdown(sc);
+
#ifdef DEVICE_POLLING
if (sc->ifp->if_capenable & IFCAP_POLLING) {
ether_poll_deregister(sc->ifp);
@@ -543,10 +556,6 @@
ptnet_irqs_fini(sc);
if (sc->csb_gh) {
- bus_write_4(sc->iomem, PTNET_IO_CSB_GH_BAH, 0);
- bus_write_4(sc->iomem, PTNET_IO_CSB_GH_BAL, 0);
- bus_write_4(sc->iomem, PTNET_IO_CSB_HG_BAH, 0);
- bus_write_4(sc->iomem, PTNET_IO_CSB_HG_BAL, 0);
contigfree(sc->csb_gh, 2*PAGE_SIZE, M_DEVBUF);
sc->csb_gh = NULL;
sc->csb_hg = NULL;
@@ -583,9 +592,8 @@
static int
ptnet_suspend(device_t dev)
{
- struct ptnet_softc *sc;
+ struct ptnet_softc *sc = device_get_softc(dev);
- sc = device_get_softc(dev);
(void)sc;
return (0);
@@ -594,9 +602,8 @@
static int
ptnet_resume(device_t dev)
{
- struct ptnet_softc *sc;
+ struct ptnet_softc *sc = device_get_softc(dev);
- sc = device_get_softc(dev);
(void)sc;
return (0);
@@ -605,11 +612,11 @@
static int
ptnet_shutdown(device_t dev)
{
- /*
- * Suspend already does all of what we need to
- * do here; we just never expect to be resumed.
- */
- return (ptnet_suspend(dev));
+ struct ptnet_softc *sc = device_get_softc(dev);
+
+ ptnet_device_shutdown(sc);
+
+ return (0);
}
static int
@@ -796,7 +803,7 @@
/* Make sure the worker sees the
* IFF_DRV_RUNNING down. */
PTNET_Q_LOCK(pq);
- pq->ptgh->guest_need_kick = 0;
+ pq->atok->appl_need_kick = 0;
PTNET_Q_UNLOCK(pq);
/* Wait for rescheduling to finish. */
if (pq->taskq) {
@@ -810,7 +817,7 @@
for (i = 0; i < sc->num_rings; i++) {
pq = sc-> queues + i;
PTNET_Q_LOCK(pq);
- pq->ptgh->guest_need_kick = 1;
+ pq->atok->appl_need_kick = 1;
PTNET_Q_UNLOCK(pq);
}
}
@@ -881,7 +888,7 @@
return ret;
}
- if (sc->ptna->backend_regifs == 0) {
+ if (sc->ptna->backend_users == 0) {
ret = ptnet_nm_krings_create(na_nm);
if (ret) {
device_printf(sc->dev, "ptnet_nm_krings_create() "
@@ -962,7 +969,7 @@
ptnet_nm_register(na_dr, 0 /* off */);
- if (sc->ptna->backend_regifs == 0) {
+ if (sc->ptna->backend_users == 0) {
netmap_mem_rings_delete(na_dr);
ptnet_nm_krings_delete(na_nm);
}
@@ -1092,9 +1099,8 @@
}
static uint32_t
-ptnet_nm_ptctl(if_t ifp, uint32_t cmd)
+ptnet_nm_ptctl(struct ptnet_softc *sc, uint32_t cmd)
{
- struct ptnet_softc *sc = if_getsoftc(ifp);
/*
* Write a command and read back error status,
* with zero meaning success.
@@ -1130,8 +1136,8 @@
/* Sync krings from the host, reading from
* CSB. */
for (i = 0; i < sc->num_rings; i++) {
- struct ptnet_csb_gh *ptgh = sc->queues[i].ptgh;
- struct ptnet_csb_hg *pthg = sc->queues[i].pthg;
+ struct nm_csb_atok *atok = sc->queues[i].atok;
+ struct nm_csb_ktoa *ktoa = sc->queues[i].ktoa;
struct netmap_kring *kring;
if (i < na->num_tx_rings) {
@@ -1139,15 +1145,15 @@
} else {
kring = na->rx_rings[i - na->num_tx_rings];
}
- kring->rhead = kring->ring->head = ptgh->head;
- kring->rcur = kring->ring->cur = ptgh->cur;
- kring->nr_hwcur = pthg->hwcur;
+ kring->rhead = kring->ring->head = atok->head;
+ kring->rcur = kring->ring->cur = atok->cur;
+ kring->nr_hwcur = ktoa->hwcur;
kring->nr_hwtail = kring->rtail =
- kring->ring->tail = pthg->hwtail;
+ kring->ring->tail = ktoa->hwtail;
ND("%d,%d: csb {hc %u h %u c %u ht %u}", t, i,
- pthg->hwcur, ptgh->head, ptgh->cur,
- pthg->hwtail);
+ ktoa->hwcur, atok->head, atok->cur,
+ ktoa->hwtail);
ND("%d,%d: kring {hc %u rh %u rc %u h %u c %u ht %u rt %u t %u}",
t, i, kring->nr_hwcur, kring->rhead, kring->rcur,
kring->ring->head, kring->ring->cur, kring->nr_hwtail,
@@ -1178,7 +1184,7 @@
int i;
if (!onoff) {
- sc->ptna->backend_regifs--;
+ sc->ptna->backend_users--;
}
/* If this is the last netmap client, guest interrupt enable flags may
@@ -1191,17 +1197,17 @@
D("Exit netmap mode, re-enable interrupts");
for (i = 0; i < sc->num_rings; i++) {
pq = sc->queues + i;
- pq->ptgh->guest_need_kick = 1;
+ pq->atok->appl_need_kick = 1;
}
}
if (onoff) {
- if (sc->ptna->backend_regifs == 0) {
+ if (sc->ptna->backend_users == 0) {
/* Initialize notification enable fields in the CSB. */
for (i = 0; i < sc->num_rings; i++) {
pq = sc->queues + i;
- pq->pthg->host_need_kick = 1;
- pq->ptgh->guest_need_kick =
+ pq->ktoa->kern_need_kick = 1;
+ pq->atok->appl_need_kick =
(!(ifp->if_capenable & IFCAP_POLLING)
&& i >= sc->num_tx_rings);
}
@@ -1211,17 +1217,13 @@
/* Make sure the host adapter passed through is ready
* for txsync/rxsync. */
- ret = ptnet_nm_ptctl(ifp, PTNETMAP_PTCTL_CREATE);
+ ret = ptnet_nm_ptctl(sc, PTNETMAP_PTCTL_CREATE);
if (ret) {
return ret;
}
- }
- /* Sync from CSB must be done after REGIF PTCTL. Skip this
- * step only if this is a netmap client and it is not the
- * first one. */
- if ((!native && sc->ptna->backend_regifs == 0) ||
- (native && na->active_fds == 0)) {
+ /* Align the guest krings and rings to the state stored
+ * in the CSB. */
ptnet_sync_from_csb(sc, na);
}
@@ -1254,19 +1256,13 @@
}
}
- /* Sync from CSB must be done before UNREGIF PTCTL, on the last
- * netmap client. */
- if (native && na->active_fds == 0) {
- ptnet_sync_from_csb(sc, na);
+ if (sc->ptna->backend_users == 0) {
+ ret = ptnet_nm_ptctl(sc, PTNETMAP_PTCTL_DELETE);
}
-
- if (sc->ptna->backend_regifs == 0) {
- ret = ptnet_nm_ptctl(ifp, PTNETMAP_PTCTL_DELETE);
- }
}
if (onoff) {
- sc->ptna->backend_regifs++;
+ sc->ptna->backend_users++;
}
return ret;
@@ -1279,7 +1275,7 @@
struct ptnet_queue *pq = sc->queues + kring->ring_id;
bool notify;
- notify = netmap_pt_guest_txsync(pq->ptgh, pq->pthg, kring, flags);
+ notify = netmap_pt_guest_txsync(pq->atok, pq->ktoa, kring, flags);
if (notify) {
ptnet_kick(pq);
}
@@ -1294,7 +1290,7 @@
struct ptnet_queue *pq = sc->rxqueues + kring->ring_id;
bool notify;
- notify = netmap_pt_guest_rxsync(pq->ptgh, pq->pthg, kring, flags);
+ notify = netmap_pt_guest_rxsync(pq->atok, pq->ktoa, kring, flags);
if (notify) {
ptnet_kick(pq);
}
@@ -1310,7 +1306,7 @@
for (i = 0; i < sc->num_rings; i++) {
struct ptnet_queue *pq = sc->queues + i;
- pq->ptgh->guest_need_kick = onoff;
+ pq->atok->appl_need_kick = onoff;
}
}
@@ -1676,25 +1672,13 @@
}
/* End of offloading-related functions to be shared with vtnet. */
-static inline void
-ptnet_sync_tail(struct ptnet_csb_hg *pthg, struct netmap_kring *kring)
-{
- struct netmap_ring *ring = kring->ring;
-
- /* Update hwcur and hwtail as known by the host. */
- ptnetmap_guest_read_kring_csb(pthg, kring);
-
- /* nm_sync_finalize */
- ring->tail = kring->rtail = kring->nr_hwtail;
-}
-
static void
ptnet_ring_update(struct ptnet_queue *pq, struct netmap_kring *kring,
unsigned int head, unsigned int sync_flags)
{
struct netmap_ring *ring = kring->ring;
- struct ptnet_csb_gh *ptgh = pq->ptgh;
- struct ptnet_csb_hg *pthg = pq->pthg;
+ struct nm_csb_atok *atok = pq->atok;
+ struct nm_csb_ktoa *ktoa = pq->ktoa;
/* Some packets have been pushed to the netmap ring. We have
* to tell the host to process the new packets, updating cur
@@ -1704,11 +1688,11 @@
/* Mimic nm_txsync_prologue/nm_rxsync_prologue. */
kring->rcur = kring->rhead = head;
- ptnetmap_guest_write_kring_csb(ptgh, kring->rcur, kring->rhead);
+ ptnetmap_guest_write_kring_csb(atok, kring->rcur, kring->rhead);
/* Kick the host if needed. */
- if (NM_ACCESS_ONCE(pthg->host_need_kick)) {
- ptgh->sync_flags = sync_flags;
+ if (NM_ACCESS_ONCE(ktoa->kern_need_kick)) {
+ atok->sync_flags = sync_flags;
ptnet_kick(pq);
}
}
@@ -1728,8 +1712,8 @@
struct netmap_adapter *na = &sc->ptna->dr.up;
if_t ifp = sc->ifp;
unsigned int batch_count = 0;
- struct ptnet_csb_gh *ptgh;
- struct ptnet_csb_hg *pthg;
+ struct nm_csb_atok *atok;
+ struct nm_csb_ktoa *ktoa;
struct netmap_kring *kring;
struct netmap_ring *ring;
struct netmap_slot *slot;
@@ -1758,8 +1742,8 @@
return ENETDOWN;
}
- ptgh = pq->ptgh;
- pthg = pq->pthg;
+ atok = pq->atok;
+ ktoa = pq->ktoa;
kring = na->tx_rings[pq->kring_id];
ring = kring->ring;
lim = kring->nkr_num_slots - 1;
@@ -1771,17 +1755,17 @@
/* We ran out of slot, let's see if the host has
* freed up some, by reading hwcur and hwtail from
* the CSB. */
- ptnet_sync_tail(pthg, kring);
+ ptnet_sync_tail(ktoa, kring);
if (PTNET_TX_NOSPACE(head, kring, minspace)) {
/* Still no slots available. Reactivate the
* interrupts so that we can be notified
* when some free slots are made available by
* the host. */
- ptgh->guest_need_kick = 1;
+ atok->appl_need_kick = 1;
/* Double-check. */
- ptnet_sync_tail(pthg, kring);
+ ptnet_sync_tail(ktoa, kring);
if (likely(PTNET_TX_NOSPACE(head, kring,
minspace))) {
break;
@@ -1790,7 +1774,7 @@
RD(1, "Found more slots by doublecheck");
/* More slots were freed before reactivating
* the interrupts. */
- ptgh->guest_need_kick = 0;
+ atok->appl_need_kick = 0;
}
}
@@ -2020,8 +2004,8 @@
{
struct ptnet_softc *sc = pq->sc;
bool have_vnet_hdr = sc->vnet_hdr_len;
- struct ptnet_csb_gh *ptgh = pq->ptgh;
- struct ptnet_csb_hg *pthg = pq->pthg;
+ struct nm_csb_atok *atok = pq->atok;
+ struct nm_csb_ktoa *ktoa = pq->ktoa;
struct netmap_adapter *na = &sc->ptna->dr.up;
struct netmap_kring *kring = na->rx_rings[pq->kring_id];
struct netmap_ring *ring = kring->ring;
@@ -2053,21 +2037,21 @@
/* We ran out of slot, let's see if the host has
* added some, by reading hwcur and hwtail from
* the CSB. */
- ptnet_sync_tail(pthg, kring);
+ ptnet_sync_tail(ktoa, kring);
if (head == ring->tail) {
/* Still no slots available. Reactivate
* interrupts as they were disabled by the
* host thread right before issuing the
* last interrupt. */
- ptgh->guest_need_kick = 1;
+ atok->appl_need_kick = 1;
/* Double-check. */
- ptnet_sync_tail(pthg, kring);
+ ptnet_sync_tail(ktoa, kring);
if (likely(head == ring->tail)) {
break;
}
- ptgh->guest_need_kick = 0;
+ atok->appl_need_kick = 0;
}
}
Index: head/sys/dev/netmap/if_vtnet_netmap.h
===================================================================
--- head/sys/dev/netmap/if_vtnet_netmap.h
+++ head/sys/dev/netmap/if_vtnet_netmap.h
@@ -79,7 +79,7 @@
}
if (deq)
- nm_prinf("%d sgs dequeued from %s-%d (netmap=%d)\n",
+ nm_prinf("%d sgs dequeued from %s-%d (netmap=%d)",
deq, nm_txrx2str(t), idx, netmap_bufs);
}
@@ -230,7 +230,7 @@
/*writeable=*/0);
if (unlikely(err)) {
if (err != ENOSPC)
- nm_prerr("virtqueue_enqueue(%s) failed: %d\n",
+ nm_prerr("virtqueue_enqueue(%s) failed: %d",
kring->name, err);
break;
}
@@ -251,7 +251,7 @@
if (token == NULL)
break;
if (unlikely(token != (void *)txq))
- nm_prerr("BUG: TX token mismatch\n");
+ nm_prerr("BUG: TX token mismatch");
else
n++;
}
@@ -307,7 +307,7 @@
/*readable=*/0, /*writeable=*/sg.sg_nseg);
if (unlikely(err)) {
if (err != ENOSPC)
- nm_prerr("virtqueue_enqueue(%s) failed: %d\n",
+ nm_prerr("virtqueue_enqueue(%s) failed: %d",
kring->name, err);
break;
}
@@ -391,7 +391,7 @@
break;
}
if (unlikely(token != (void *)rxq)) {
- nm_prerr("BUG: RX token mismatch\n");
+ nm_prerr("BUG: RX token mismatch");
} else {
/* Skip the virtio-net header. */
len -= sc->vtnet_hdr_size;
@@ -533,7 +533,7 @@
netmap_attach(&na);
- nm_prinf("vtnet attached txq=%d, txd=%d rxq=%d, rxd=%d\n",
+ nm_prinf("vtnet attached txq=%d, txd=%d rxq=%d, rxd=%d",
na.num_tx_rings, na.num_tx_desc,
na.num_tx_rings, na.num_rx_desc);
}
Index: head/sys/dev/netmap/netmap.c
===================================================================
--- head/sys/dev/netmap/netmap.c
+++ head/sys/dev/netmap/netmap.c
@@ -480,6 +480,9 @@
/* user-controlled variables */
int netmap_verbose;
+#ifdef CONFIG_NETMAP_DEBUG
+int netmap_debug;
+#endif /* CONFIG_NETMAP_DEBUG */
static int netmap_no_timestamp; /* don't timestamp on rxsync */
int netmap_no_pendintr = 1;
@@ -527,9 +530,6 @@
/* Non-zero if ptnet devices are allowed to use virtio-net headers. */
int ptnet_vnet_hdr = 1;
-/* 0 if ptnetmap should not use worker threads for TX processing */
-int ptnetmap_tx_workers = 1;
-
/*
* SYSCTL calls are grouped between SYSBEGIN and SYSEND to be emulated
* in some other operating systems
@@ -540,6 +540,10 @@
SYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW, 0, "Netmap args");
SYSCTL_INT(_dev_netmap, OID_AUTO, verbose,
CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode");
+#ifdef CONFIG_NETMAP_DEBUG
+SYSCTL_INT(_dev_netmap, OID_AUTO, debug,
+ CTLFLAG_RW, &netmap_debug, 0, "Debug messages");
+#endif /* CONFIG_NETMAP_DEBUG */
SYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp,
CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp");
SYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr, CTLFLAG_RW, &netmap_no_pendintr,
@@ -569,8 +573,6 @@
#endif
SYSCTL_INT(_dev_netmap, OID_AUTO, ptnet_vnet_hdr, CTLFLAG_RW, &ptnet_vnet_hdr,
0, "Allow ptnet devices to use virtio-net headers");
-SYSCTL_INT(_dev_netmap, OID_AUTO, ptnetmap_tx_workers, CTLFLAG_RW,
- &ptnetmap_tx_workers, 0, "Use worker threads for pnetmap TX processing");
SYSEND;
@@ -692,7 +694,7 @@
op = "Clamp";
}
if (op && msg)
- nm_prinf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
+ nm_prinf("%s %s to %d (was %d)", op, msg, *v, oldv);
return *v;
}
@@ -776,13 +778,14 @@
na->num_rx_rings = info.num_rx_rings;
na->num_rx_desc = info.num_rx_descs;
na->rx_buf_maxsize = info.rx_buf_maxsize;
- D("configuration changed for %s: txring %d x %d, "
- "rxring %d x %d, rxbufsz %d",
- na->name, na->num_tx_rings, na->num_tx_desc,
- na->num_rx_rings, na->num_rx_desc, na->rx_buf_maxsize);
+ if (netmap_verbose)
+ nm_prinf("configuration changed for %s: txring %d x %d, "
+ "rxring %d x %d, rxbufsz %d",
+ na->name, na->num_tx_rings, na->num_tx_desc,
+ na->num_rx_rings, na->num_rx_desc, na->rx_buf_maxsize);
return 0;
}
- D("WARNING: configuration changed for %s while active: "
+ nm_prerr("WARNING: configuration changed for %s while active: "
"txring %d x %d, rxring %d x %d, rxbufsz %d",
na->name, info.num_tx_rings, info.num_tx_descs,
info.num_rx_rings, info.num_rx_descs,
@@ -828,7 +831,8 @@
enum txrx t;
if (na->tx_rings != NULL) {
- D("warning: krings were already created");
+ if (netmap_debug & NM_DEBUG_ON)
+ nm_prerr("warning: krings were already created");
return 0;
}
@@ -842,7 +846,7 @@
na->tx_rings = nm_os_malloc((size_t)len);
if (na->tx_rings == NULL) {
- D("Cannot allocate krings");
+ nm_prerr("Cannot allocate krings");
return ENOMEM;
}
na->rx_rings = na->tx_rings + n[NR_TX];
@@ -910,7 +914,8 @@
enum txrx t;
if (na->tx_rings == NULL) {
- D("warning: krings were already deleted");
+ if (netmap_debug & NM_DEBUG_ON)
+ nm_prerr("warning: krings were already deleted");
return;
}
@@ -1012,11 +1017,11 @@
* happens if the close() occurs while a concurrent
* syscall is running.
*/
- if (netmap_verbose)
- D("deleting last instance for %s", na->name);
+ if (netmap_debug & NM_DEBUG_ON)
+ nm_prinf("deleting last instance for %s", na->name);
if (nm_netmap_on(na)) {
- D("BUG: netmap on while going to delete the krings");
+ nm_prerr("BUG: netmap on while going to delete the krings");
}
na->nm_krings_delete(na);
@@ -1033,14 +1038,6 @@
priv->np_nifp = NULL;
}
-/* call with NMG_LOCK held */
-static __inline int
-nm_si_user(struct netmap_priv_d *priv, enum txrx t)
-{
- return (priv->np_na != NULL &&
- (priv->np_qlast[t] - priv->np_qfirst[t] > 1));
-}
-
struct netmap_priv_d*
netmap_priv_new(void)
{
@@ -1136,8 +1133,8 @@
/* Send packets up, outside the lock; head/prev machinery
* is only useful for Windows. */
while ((m = mbq_dequeue(q)) != NULL) {
- if (netmap_verbose & NM_VERB_HOST)
- D("sending up pkt %p size %d", m, MBUF_LEN(m));
+ if (netmap_debug & NM_DEBUG_HOST)
+ nm_prinf("sending up pkt %p size %d", m, MBUF_LEN(m));
prev = nm_os_send_up(dst, m, prev);
if (head == NULL)
head = prev;
@@ -1332,8 +1329,8 @@
m_copydata(m, 0, len, NMB(na, slot));
ND("nm %d len %d", nm_i, len);
- if (netmap_verbose)
- D("%s", nm_dump_buf(NMB(na, slot),len, 128, NULL));
+ if (netmap_debug & NM_DEBUG_HOST)
+ nm_prinf("%s", nm_dump_buf(NMB(na, slot),len, 128, NULL));
slot->len = len;
slot->flags = 0;
@@ -1500,7 +1497,7 @@
if (req->nr_mode == NR_REG_PIPE_MASTER ||
req->nr_mode == NR_REG_PIPE_SLAVE) {
/* Do not accept deprecated pipe modes. */
- D("Deprecated pipe nr_mode, use xx{yy or xx}yy syntax");
+ nm_prerr("Deprecated pipe nr_mode, use xx{yy or xx}yy syntax");
return EINVAL;
}
@@ -1527,9 +1524,7 @@
* 0 !NULL type matches and na created/found
* !0 !NULL impossible
*/
-
- /* try to see if this is a ptnetmap port */
- error = netmap_get_pt_host_na(hdr, na, nmd, create);
+ error = netmap_get_null_na(hdr, na, nmd, create);
if (error || *na != NULL)
goto out;
@@ -1739,7 +1734,7 @@
/*
* Error routine called when txsync/rxsync detects an error.
- * Can't do much more than resetting head =cur = hwcur, tail = hwtail
+ * Can't do much more than resetting head = cur = hwcur, tail = hwtail
* Return 1 on reinit.
*
* This routine is only called by the upper half of the kernel.
@@ -1810,12 +1805,6 @@
enum txrx t;
u_int j;
- if ((nr_flags & NR_PTNETMAP_HOST) && ((nr_mode != NR_REG_ALL_NIC) ||
- nr_flags & (NR_RX_RINGS_ONLY|NR_TX_RINGS_ONLY))) {
- D("Error: only NR_REG_ALL_NIC supported with netmap passthrough");
- return EINVAL;
- }
-
for_rx_tx(t) {
if (nr_flags & excluded_direction[t]) {
priv->np_qfirst[t] = priv->np_qlast[t] = 0;
@@ -1823,6 +1812,7 @@
}
switch (nr_mode) {
case NR_REG_ALL_NIC:
+ case NR_REG_NULL:
priv->np_qfirst[t] = 0;
priv->np_qlast[t] = nma_get_nrings(na, t);
ND("ALL/PIPE: %s %d %d", nm_txrx2str(t),
@@ -1831,7 +1821,7 @@
case NR_REG_SW:
case NR_REG_NIC_SW:
if (!(na->na_flags & NAF_HOST_RINGS)) {
- D("host rings not supported");
+ nm_prerr("host rings not supported");
return EINVAL;
}
priv->np_qfirst[t] = (nr_mode == NR_REG_SW ?
@@ -1844,7 +1834,7 @@
case NR_REG_ONE_NIC:
if (nr_ringid >= na->num_tx_rings &&
nr_ringid >= na->num_rx_rings) {
- D("invalid ring id %d", nr_ringid);
+ nm_prerr("invalid ring id %d", nr_ringid);
return EINVAL;
}
/* if not enough rings, use the first one */
@@ -1857,11 +1847,11 @@
priv->np_qfirst[t], priv->np_qlast[t]);
break;
default:
- D("invalid regif type %d", nr_mode);
+ nm_prerr("invalid regif type %d", nr_mode);
return EINVAL;
}
}
- priv->np_flags = nr_flags | nr_mode; // TODO
+ priv->np_flags = nr_flags;
/* Allow transparent forwarding mode in the host --> nic
* direction only if all the TX hw rings have been opened. */
@@ -1871,7 +1861,7 @@
}
if (netmap_verbose) {
- D("%s: tx [%d,%d) rx [%d,%d) id %d",
+ nm_prinf("%s: tx [%d,%d) rx [%d,%d) id %d",
na->name,
priv->np_qfirst[NR_TX],
priv->np_qlast[NR_TX],
@@ -1927,6 +1917,7 @@
}
priv->np_flags = 0;
priv->np_txpoll = 0;
+ priv->np_kloop_state = 0;
}
@@ -1943,8 +1934,8 @@
int excl = (priv->np_flags & NR_EXCLUSIVE);
enum txrx t;
- if (netmap_verbose)
- D("%s: grabbing tx [%d, %d) rx [%d, %d)",
+ if (netmap_debug & NM_DEBUG_ON)
+ nm_prinf("%s: grabbing tx [%d, %d) rx [%d, %d)",
na->name,
priv->np_qfirst[NR_TX],
priv->np_qlast[NR_TX],
@@ -2021,6 +2012,110 @@
return (priv->np_qfirst[NR_RX] != priv->np_qlast[NR_RX]);
}
+/* Validate the CSB entries for both directions (atok and ktoa).
+ * To be called under NMG_LOCK(). */
+static int
+netmap_csb_validate(struct netmap_priv_d *priv, struct nmreq_opt_csb *csbo)
+{
+ struct nm_csb_atok *csb_atok_base =
+ (struct nm_csb_atok *)(uintptr_t)csbo->csb_atok;
+ struct nm_csb_ktoa *csb_ktoa_base =
+ (struct nm_csb_ktoa *)(uintptr_t)csbo->csb_ktoa;
+ enum txrx t;
+ int num_rings[NR_TXRX], tot_rings;
+ size_t entry_size[2];
+ void *csb_start[2];
+ int i;
+
+ if (priv->np_kloop_state & NM_SYNC_KLOOP_RUNNING) {
+ nm_prerr("Cannot update CSB while kloop is running");
+ return EBUSY;
+ }
+
+ tot_rings = 0;
+ for_rx_tx(t) {
+ num_rings[t] = priv->np_qlast[t] - priv->np_qfirst[t];
+ tot_rings += num_rings[t];
+ }
+ if (tot_rings <= 0)
+ return 0;
+
+ if (!(priv->np_flags & NR_EXCLUSIVE)) {
+ nm_prerr("CSB mode requires NR_EXCLUSIVE");
+ return EINVAL;
+ }
+
+ entry_size[0] = sizeof(*csb_atok_base);
+ entry_size[1] = sizeof(*csb_ktoa_base);
+ csb_start[0] = (void *)csb_atok_base;
+ csb_start[1] = (void *)csb_ktoa_base;
+
+ for (i = 0; i < 2; i++) {
+ /* On Linux we could use access_ok() to simplify
+ * the validation. However, the advantage of
+ * this approach is that it works also on
+ * FreeBSD. */
+ size_t csb_size = tot_rings * entry_size[i];
+ void *tmp;
+ int err;
+
+ if ((uintptr_t)csb_start[i] & (entry_size[i]-1)) {
+ nm_prerr("Unaligned CSB address");
+ return EINVAL;
+ }
+
+ tmp = nm_os_malloc(csb_size);
+ if (!tmp)
+ return ENOMEM;
+ if (i == 0) {
+ /* Application --> kernel direction. */
+ err = copyin(csb_start[i], tmp, csb_size);
+ } else {
+ /* Kernel --> application direction. */
+ memset(tmp, 0, csb_size);
+ err = copyout(tmp, csb_start[i], csb_size);
+ }
+ nm_os_free(tmp);
+ if (err) {
+ nm_prerr("Invalid CSB address");
+ return err;
+ }
+ }
+
+ priv->np_csb_atok_base = csb_atok_base;
+ priv->np_csb_ktoa_base = csb_ktoa_base;
+
+ /* Initialize the CSB. */
+ for_rx_tx(t) {
+ for (i = 0; i < num_rings[t]; i++) {
+ struct netmap_kring *kring =
+ NMR(priv->np_na, t)[i + priv->np_qfirst[t]];
+ struct nm_csb_atok *csb_atok = csb_atok_base + i;
+ struct nm_csb_ktoa *csb_ktoa = csb_ktoa_base + i;
+
+ if (t == NR_RX) {
+ csb_atok += num_rings[NR_TX];
+ csb_ktoa += num_rings[NR_TX];
+ }
+
+ CSB_WRITE(csb_atok, head, kring->rhead);
+ CSB_WRITE(csb_atok, cur, kring->rcur);
+ CSB_WRITE(csb_atok, appl_need_kick, 1);
+ CSB_WRITE(csb_atok, sync_flags, 1);
+ CSB_WRITE(csb_ktoa, hwcur, kring->nr_hwcur);
+ CSB_WRITE(csb_ktoa, hwtail, kring->nr_hwtail);
+ CSB_WRITE(csb_ktoa, kern_need_kick, 1);
+
+ nm_prinf("csb_init for kring %s: head %u, cur %u, "
+ "hwcur %u, hwtail %u", kring->name,
+ kring->rhead, kring->rcur, kring->nr_hwcur,
+ kring->nr_hwtail);
+ }
+ }
+
+ return 0;
+}
+
/*
* possibly move the interface to netmap-mode.
* If success it returns a pointer to netmap_if, otherwise NULL.
@@ -2137,7 +2232,7 @@
na->name, mtu, na->rx_buf_maxsize, nbs);
if (na->rx_buf_maxsize == 0) {
- D("%s: error: rx_buf_maxsize == 0", na->name);
+ nm_prerr("%s: error: rx_buf_maxsize == 0", na->name);
error = EIO;
goto err_drop_mem;
}
@@ -2149,7 +2244,7 @@
* cannot be used in this case. */
if (nbs < mtu) {
nm_prerr("error: netmap buf size (%u) "
- "< device MTU (%u)\n", nbs, mtu);
+ "< device MTU (%u)", nbs, mtu);
error = EINVAL;
goto err_drop_mem;
}
@@ -2162,14 +2257,14 @@
if (!(na->na_flags & NAF_MOREFRAG)) {
nm_prerr("error: large MTU (%d) needed "
"but %s does not support "
- "NS_MOREFRAG\n", mtu,
+ "NS_MOREFRAG", mtu,
na->ifp->if_xname);
error = EINVAL;
goto err_drop_mem;
} else if (nbs < na->rx_buf_maxsize) {
nm_prerr("error: using NS_MOREFRAG on "
"%s requires netmap buf size "
- ">= %u\n", na->ifp->if_xname,
+ ">= %u", na->ifp->if_xname,
na->rx_buf_maxsize);
error = EINVAL;
goto err_drop_mem;
@@ -2177,7 +2272,7 @@
nm_prinf("info: netmap application on "
"%s needs to support "
"NS_MOREFRAG "
- "(MTU=%u,netmap_buf_size=%u)\n",
+ "(MTU=%u,netmap_buf_size=%u)",
na->ifp->if_xname, mtu, nbs);
}
}
@@ -2307,7 +2402,6 @@
struct ifnet *ifp = NULL;
int error = 0;
u_int i, qfirst, qlast;
- struct netmap_if *nifp;
struct netmap_kring **krings;
int sync_flags;
enum txrx t;
@@ -2316,14 +2410,10 @@
case NIOCCTRL: {
struct nmreq_header *hdr = (struct nmreq_header *)data;
- if (hdr->nr_version != NETMAP_API) {
- D("API mismatch for reqtype %d: got %d need %d",
- hdr->nr_version,
- hdr->nr_version, NETMAP_API);
- hdr->nr_version = NETMAP_API;
- }
if (hdr->nr_version < NETMAP_MIN_API ||
hdr->nr_version > NETMAP_MAX_API) {
+ nm_prerr("API mismatch: got %d need %d",
+ hdr->nr_version, NETMAP_API);
return EINVAL;
}
@@ -2345,13 +2435,13 @@
case NETMAP_REQ_REGISTER: {
struct nmreq_register *req =
(struct nmreq_register *)(uintptr_t)hdr->nr_body;
+ struct netmap_if *nifp;
+
/* Protect access to priv from concurrent requests. */
NMG_LOCK();
do {
- u_int memflags;
-#ifdef WITH_EXTMEM
struct nmreq_option *opt;
-#endif /* WITH_EXTMEM */
+ u_int memflags;
if (priv->np_nifp != NULL) { /* thread already registered */
error = EBUSY;
@@ -2382,6 +2472,10 @@
/* find the allocator and get a reference */
nmd = netmap_mem_find(req->nr_mem_id);
if (nmd == NULL) {
+ if (netmap_verbose) {
+ nm_prerr("%s: failed to find mem_id %u",
+ hdr->nr_name, req->nr_mem_id);
+ }
error = EINVAL;
break;
}
@@ -2397,6 +2491,8 @@
}
if (na->virt_hdr_len && !(req->nr_flags & NR_ACCEPT_VNET_HDR)) {
+ nm_prerr("virt_hdr_len=%d, but application does "
+ "not accept it", na->virt_hdr_len);
error = EIO;
break;
}
@@ -2406,6 +2502,23 @@
if (error) { /* reg. failed, release priv and ref */
break;
}
+
+ opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options,
+ NETMAP_REQ_OPT_CSB);
+ if (opt != NULL) {
+ struct nmreq_opt_csb *csbo =
+ (struct nmreq_opt_csb *)opt;
+ error = nmreq_checkduplicate(opt);
+ if (!error) {
+ error = netmap_csb_validate(priv, csbo);
+ }
+ opt->nro_status = error;
+ if (error) {
+ netmap_do_unregif(priv);
+ break;
+ }
+ }
+
nifp = priv->np_nifp;
priv->np_td = td; /* for debugging purposes */
@@ -2430,12 +2543,12 @@
if (req->nr_extra_bufs) {
if (netmap_verbose)
- D("requested %d extra buffers",
+ nm_prinf("requested %d extra buffers",
req->nr_extra_bufs);
req->nr_extra_bufs = netmap_extra_alloc(na,
&nifp->ni_bufs_head, req->nr_extra_bufs);
if (netmap_verbose)
- D("got %d extra buffers", req->nr_extra_bufs);
+ nm_prinf("got %d extra buffers", req->nr_extra_bufs);
}
req->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp);
@@ -2473,6 +2586,7 @@
* so that we can call netmap_get_na(). */
struct nmreq_register regreq;
bzero(®req, sizeof(regreq));
+ regreq.nr_mode = NR_REG_ALL_NIC;
regreq.nr_tx_slots = req->nr_tx_slots;
regreq.nr_rx_slots = req->nr_rx_slots;
regreq.nr_tx_rings = req->nr_tx_rings;
@@ -2494,6 +2608,10 @@
} else {
nmd = netmap_mem_find(req->nr_mem_id ? req->nr_mem_id : 1);
if (nmd == NULL) {
+ if (netmap_verbose)
+ nm_prerr("%s: failed to find mem_id %u",
+ hdr->nr_name,
+ req->nr_mem_id ? req->nr_mem_id : 1);
error = EINVAL;
break;
}
@@ -2505,8 +2623,6 @@
break;
if (na == NULL) /* only memory info */
break;
- req->nr_offset = 0;
- req->nr_rx_slots = req->nr_tx_slots = 0;
netmap_update_config(na);
req->nr_rx_rings = na->num_rx_rings;
req->nr_tx_rings = na->num_tx_rings;
@@ -2519,17 +2635,17 @@
}
#ifdef WITH_VALE
case NETMAP_REQ_VALE_ATTACH: {
- error = nm_bdg_ctl_attach(hdr, NULL /* userspace request */);
+ error = netmap_vale_attach(hdr, NULL /* userspace request */);
break;
}
case NETMAP_REQ_VALE_DETACH: {
- error = nm_bdg_ctl_detach(hdr, NULL /* userspace request */);
+ error = netmap_vale_detach(hdr, NULL /* userspace request */);
break;
}
case NETMAP_REQ_VALE_LIST: {
- error = netmap_bdg_list(hdr);
+ error = netmap_vale_list(hdr);
break;
}
@@ -2540,12 +2656,16 @@
* so that we can call netmap_get_bdg_na(). */
struct nmreq_register regreq;
bzero(®req, sizeof(regreq));
+ regreq.nr_mode = NR_REG_ALL_NIC;
+
/* For now we only support virtio-net headers, and only for
* VALE ports, but this may change in future. Valid lengths
* for the virtio-net header are 0 (no header), 10 and 12. */
if (req->nr_hdr_len != 0 &&
req->nr_hdr_len != sizeof(struct nm_vnet_hdr) &&
req->nr_hdr_len != 12) {
+ if (netmap_verbose)
+ nm_prerr("invalid hdr_len %u", req->nr_hdr_len);
error = EINVAL;
break;
}
@@ -2562,7 +2682,8 @@
if (na->virt_hdr_len) {
vpna->mfs = NETMAP_BUF_SIZE(na);
}
- D("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na);
+ if (netmap_verbose)
+ nm_prinf("Using vnet_hdr_len %d for %p", na->virt_hdr_len, na);
netmap_adapter_put(na);
} else if (!na) {
error = ENXIO;
@@ -2581,6 +2702,7 @@
struct ifnet *ifp;
bzero(®req, sizeof(regreq));
+ regreq.nr_mode = NR_REG_ALL_NIC;
NMG_LOCK();
hdr->nr_reqtype = NETMAP_REQ_REGISTER;
hdr->nr_body = (uintptr_t)®req;
@@ -2612,22 +2734,80 @@
}
#endif /* WITH_VALE */
case NETMAP_REQ_POOLS_INFO_GET: {
+ /* Get information from the memory allocator used for
+ * hdr->nr_name. */
struct nmreq_pools_info *req =
(struct nmreq_pools_info *)(uintptr_t)hdr->nr_body;
- /* Get information from the memory allocator. This
- * netmap device must already be bound to a port.
- * Note that hdr->nr_name is ignored. */
NMG_LOCK();
- if (priv->np_na && priv->np_na->nm_mem) {
- struct netmap_mem_d *nmd = priv->np_na->nm_mem;
+ do {
+ /* Build a nmreq_register out of the nmreq_pools_info,
+ * so that we can call netmap_get_na(). */
+ struct nmreq_register regreq;
+ bzero(®req, sizeof(regreq));
+ regreq.nr_mem_id = req->nr_mem_id;
+ regreq.nr_mode = NR_REG_ALL_NIC;
+
+ hdr->nr_reqtype = NETMAP_REQ_REGISTER;
+ hdr->nr_body = (uintptr_t)®req;
+ error = netmap_get_na(hdr, &na, &ifp, NULL, 1 /* create */);
+ hdr->nr_reqtype = NETMAP_REQ_POOLS_INFO_GET; /* reset type */
+ hdr->nr_body = (uintptr_t)req; /* reset nr_body */
+ if (error) {
+ na = NULL;
+ ifp = NULL;
+ break;
+ }
+ nmd = na->nm_mem; /* grab the memory allocator */
+ if (nmd == NULL) {
+ error = EINVAL;
+ break;
+ }
+
+ /* Finalize the memory allocator, get the pools
+ * information and release the allocator. */
+ error = netmap_mem_finalize(nmd, na);
+ if (error) {
+ break;
+ }
error = netmap_mem_pools_info_get(req, nmd);
- } else {
+ netmap_mem_drop(na);
+ } while (0);
+ netmap_unget_na(na, ifp);
+ NMG_UNLOCK();
+ break;
+ }
+
+ case NETMAP_REQ_CSB_ENABLE: {
+ struct nmreq_option *opt;
+
+ opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options,
+ NETMAP_REQ_OPT_CSB);
+ if (opt == NULL) {
error = EINVAL;
+ } else {
+ struct nmreq_opt_csb *csbo =
+ (struct nmreq_opt_csb *)opt;
+ error = nmreq_checkduplicate(opt);
+ if (!error) {
+ NMG_LOCK();
+ error = netmap_csb_validate(priv, csbo);
+ NMG_UNLOCK();
+ }
+ opt->nro_status = error;
}
- NMG_UNLOCK();
break;
}
+ case NETMAP_REQ_SYNC_KLOOP_START: {
+ error = netmap_sync_kloop(priv, hdr);
+ break;
+ }
+
+ case NETMAP_REQ_SYNC_KLOOP_STOP: {
+ error = netmap_sync_kloop_stop(priv);
+ break;
+ }
+
default: {
error = EINVAL;
break;
@@ -2641,22 +2821,20 @@
case NIOCTXSYNC:
case NIOCRXSYNC: {
- nifp = priv->np_nifp;
-
- if (nifp == NULL) {
+ if (unlikely(priv->np_nifp == NULL)) {
error = ENXIO;
break;
}
mb(); /* make sure following reads are not from cache */
- na = priv->np_na; /* we have a reference */
-
- if (na == NULL) {
- D("Internal error: nifp != NULL && na == NULL");
- error = ENXIO;
+ if (unlikely(priv->np_csb_atok_base)) {
+ nm_prerr("Invalid sync in CSB mode");
+ error = EBUSY;
break;
}
+ na = priv->np_na; /* we have a reference */
+
mbq_init(&q);
t = (cmd == NIOCTXSYNC ? NR_TX : NR_RX);
krings = NMR(na, t);
@@ -2674,8 +2852,8 @@
}
if (cmd == NIOCTXSYNC) {
- if (netmap_verbose & NM_VERB_TXSYNC)
- D("pre txsync ring %d cur %d hwcur %d",
+ if (netmap_debug & NM_DEBUG_TXSYNC)
+ nm_prinf("pre txsync ring %d cur %d hwcur %d",
i, ring->cur,
kring->nr_hwcur);
if (nm_txsync_prologue(kring, ring) >= kring->nkr_num_slots) {
@@ -2683,8 +2861,8 @@
} else if (kring->nm_sync(kring, sync_flags | NAF_FORCE_RECLAIM) == 0) {
nm_sync_finalize(kring);
}
- if (netmap_verbose & NM_VERB_TXSYNC)
- D("post txsync ring %d cur %d hwcur %d",
+ if (netmap_debug & NM_DEBUG_TXSYNC)
+ nm_prinf("post txsync ring %d cur %d hwcur %d",
i, ring->cur,
kring->nr_hwcur);
} else {
@@ -2739,18 +2917,22 @@
case NETMAP_REQ_VALE_NEWIF:
return sizeof(struct nmreq_vale_newif);
case NETMAP_REQ_VALE_DELIF:
+ case NETMAP_REQ_SYNC_KLOOP_STOP:
+ case NETMAP_REQ_CSB_ENABLE:
return 0;
case NETMAP_REQ_VALE_POLLING_ENABLE:
case NETMAP_REQ_VALE_POLLING_DISABLE:
return sizeof(struct nmreq_vale_polling);
case NETMAP_REQ_POOLS_INFO_GET:
return sizeof(struct nmreq_pools_info);
+ case NETMAP_REQ_SYNC_KLOOP_START:
+ return sizeof(struct nmreq_sync_kloop_start);
}
return 0;
}
static size_t
-nmreq_opt_size_by_type(uint16_t nro_reqtype)
+nmreq_opt_size_by_type(uint32_t nro_reqtype, uint64_t nro_size)
{
size_t rv = sizeof(struct nmreq_option);
#ifdef NETMAP_REQ_OPT_DEBUG
@@ -2763,6 +2945,13 @@
rv = sizeof(struct nmreq_opt_extmem);
break;
#endif /* WITH_EXTMEM */
+ case NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS:
+ if (nro_size >= rv)
+ rv = nro_size;
+ break;
+ case NETMAP_REQ_OPT_CSB:
+ rv = sizeof(struct nmreq_opt_csb);
+ break;
}
/* subtract the common header */
return rv - sizeof(struct nmreq_option);
@@ -2778,8 +2967,11 @@
struct nmreq_option buf;
uint64_t *ptrs;
- if (hdr->nr_reserved)
+ if (hdr->nr_reserved) {
+ if (netmap_verbose)
+ nm_prerr("nr_reserved must be zero");
return EINVAL;
+ }
if (!nr_body_is_user)
return 0;
@@ -2796,6 +2988,8 @@
(!rqsz && hdr->nr_body != (uintptr_t)NULL)) {
/* Request body expected, but not found; or
* request body found but unexpected. */
+ if (netmap_verbose)
+ nm_prerr("nr_body expected but not found, or vice versa");
error = EINVAL;
goto out_err;
}
@@ -2809,7 +3003,7 @@
if (error)
goto out_err;
optsz += sizeof(*src);
- optsz += nmreq_opt_size_by_type(buf.nro_reqtype);
+ optsz += nmreq_opt_size_by_type(buf.nro_reqtype, buf.nro_size);
if (rqsz + optsz > NETMAP_REQ_MAXSIZE) {
error = EMSGSIZE;
goto out_err;
@@ -2863,7 +3057,8 @@
p = (char *)(opt + 1);
/* copy the option body */
- optsz = nmreq_opt_size_by_type(opt->nro_reqtype);
+ optsz = nmreq_opt_size_by_type(opt->nro_reqtype,
+ opt->nro_size);
if (optsz) {
/* the option body follows the option header */
error = copyin(src + 1, p, optsz);
@@ -2937,7 +3132,8 @@
/* copy the option body only if there was no error */
if (!rerror && !src->nro_status) {
- optsz = nmreq_opt_size_by_type(src->nro_reqtype);
+ optsz = nmreq_opt_size_by_type(src->nro_reqtype,
+ src->nro_size);
if (optsz) {
error = copyout(src + 1, dst + 1, optsz);
if (error) {
@@ -3015,7 +3211,8 @@
struct netmap_adapter *na;
struct netmap_kring *kring;
struct netmap_ring *ring;
- u_int i, check_all_tx, check_all_rx, want[NR_TXRX], revents = 0;
+ u_int i, want[NR_TXRX], revents = 0;
+ NM_SELINFO_T *si[NR_TXRX];
#define want_tx want[NR_TX]
#define want_rx want[NR_RX]
struct mbq q; /* packets from RX hw queues to host stack */
@@ -3038,27 +3235,31 @@
mbq_init(&q);
- if (priv->np_nifp == NULL) {
- D("No if registered");
+ if (unlikely(priv->np_nifp == NULL)) {
return POLLERR;
}
mb(); /* make sure following reads are not from cache */
na = priv->np_na;
- if (!nm_netmap_on(na))
+ if (unlikely(!nm_netmap_on(na)))
return POLLERR;
- if (netmap_verbose & 0x8000)
- D("device %s events 0x%x", na->name, events);
+ if (unlikely(priv->np_csb_atok_base)) {
+ nm_prerr("Invalid poll in CSB mode");
+ return POLLERR;
+ }
+
+ if (netmap_debug & NM_DEBUG_ON)
+ nm_prinf("device %s events 0x%x", na->name, events);
want_tx = events & (POLLOUT | POLLWRNORM);
want_rx = events & (POLLIN | POLLRDNORM);
/*
- * check_all_{tx|rx} are set if the card has more than one queue AND
- * the file descriptor is bound to all of them. If so, we sleep on
- * the "global" selinfo, otherwise we sleep on individual selinfo
- * (FreeBSD only allows two selinfo's per file descriptor).
+ * If the card has more than one queue AND the file descriptor is
+ * bound to all of them, we sleep on the "global" selinfo, otherwise
+ * we sleep on individual selinfo (FreeBSD only allows two selinfo's
+ * per file descriptor).
* The interrupt routine in the driver wake one or the other
* (or both) depending on which clients are active.
*
@@ -3067,8 +3268,10 @@
* there are pending packets to send. The latter can be disabled
* passing NETMAP_NO_TX_POLL in the NIOCREG call.
*/
- check_all_tx = nm_si_user(priv, NR_TX);
- check_all_rx = nm_si_user(priv, NR_RX);
+ si[NR_RX] = nm_si_user(priv, NR_RX) ? &na->si[NR_RX] :
+ &na->rx_rings[priv->np_qfirst[NR_RX]]->si;
+ si[NR_TX] = nm_si_user(priv, NR_TX) ? &na->si[NR_TX] :
+ &na->tx_rings[priv->np_qfirst[NR_TX]]->si;
#ifdef __FreeBSD__
/*
@@ -3105,10 +3308,8 @@
#ifdef linux
/* The selrecord must be unconditional on linux. */
- nm_os_selrecord(sr, check_all_tx ?
- &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]]->si);
- nm_os_selrecord(sr, check_all_rx ?
- &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]]->si);
+ nm_os_selrecord(sr, si[NR_RX]);
+ nm_os_selrecord(sr, si[NR_TX]);
#endif /* linux */
/*
@@ -3173,8 +3374,7 @@
send_down = 0;
if (want_tx && retry_tx && sr) {
#ifndef linux
- nm_os_selrecord(sr, check_all_tx ?
- &na->si[NR_TX] : &na->tx_rings[priv->np_qfirst[NR_TX]]->si);
+ nm_os_selrecord(sr, si[NR_TX]);
#endif /* !linux */
retry_tx = 0;
goto flush_tx;
@@ -3234,8 +3434,7 @@
#ifndef linux
if (retry_rx && sr) {
- nm_os_selrecord(sr, check_all_rx ?
- &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]]->si);
+ nm_os_selrecord(sr, si[NR_RX]);
}
#endif /* !linux */
if (send_down || retry_rx) {
@@ -3290,7 +3489,7 @@
}
if (!na->nm_intr) {
- D("Cannot %s interrupts for %s", onoff ? "enable" : "disable",
+ nm_prerr("Cannot %s interrupts for %s", onoff ? "enable" : "disable",
na->name);
return -1;
}
@@ -3328,12 +3527,6 @@
int
netmap_attach_common(struct netmap_adapter *na)
{
- if (na->num_tx_rings == 0 || na->num_rx_rings == 0) {
- D("%s: invalid rings tx %d rx %d",
- na->name, na->num_tx_rings, na->num_rx_rings);
- return EINVAL;
- }
-
if (!na->rx_buf_maxsize) {
/* Set a conservative default (larger is safer). */
na->rx_buf_maxsize = PAGE_SIZE;
@@ -3436,20 +3629,31 @@
struct ifnet *ifp = NULL;
if (size < sizeof(struct netmap_hw_adapter)) {
- D("Invalid netmap adapter size %d", (int)size);
+ if (netmap_debug & NM_DEBUG_ON)
+ nm_prerr("Invalid netmap adapter size %d", (int)size);
return EINVAL;
}
- if (arg == NULL || arg->ifp == NULL)
+ if (arg == NULL || arg->ifp == NULL) {
+ if (netmap_debug & NM_DEBUG_ON)
+ nm_prerr("either arg or arg->ifp is NULL");
return EINVAL;
+ }
+ if (arg->num_tx_rings == 0 || arg->num_rx_rings == 0) {
+ if (netmap_debug & NM_DEBUG_ON)
+ nm_prerr("%s: invalid rings tx %d rx %d",
+ arg->name, arg->num_tx_rings, arg->num_rx_rings);
+ return EINVAL;
+ }
+
ifp = arg->ifp;
if (NM_NA_CLASH(ifp)) {
/* If NA(ifp) is not null but there is no valid netmap
* adapter it means that someone else is using the same
* pointer (e.g. ax25_ptr on linux). This happens for
* instance when also PF_RING is in use. */
- D("Error: netmap adapter hook is busy");
+ nm_prerr("Error: netmap adapter hook is busy");
return EBUSY;
}
@@ -3458,7 +3662,7 @@
goto fail;
hwna->up = *arg;
hwna->up.na_flags |= NAF_HOST_RINGS | NAF_NATIVE;
- strncpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name));
+ strlcpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name));
if (override_reg) {
hwna->nm_hw_register = hwna->up.nm_register;
hwna->up.nm_register = netmap_hw_reg;
@@ -3483,7 +3687,7 @@
return 0;
fail:
- D("fail, arg %p ifp %p na %p", arg, ifp, hwna);
+ nm_prerr("fail, arg %p ifp %p na %p", arg, ifp, hwna);
return (hwna ? EINVAL : ENOMEM);
}
@@ -3521,7 +3725,8 @@
na->nm_dtor(na);
if (na->tx_rings) { /* XXX should not happen */
- D("freeing leftover tx_rings");
+ if (netmap_debug & NM_DEBUG_ON)
+ nm_prerr("freeing leftover tx_rings");
na->nm_krings_delete(na);
}
netmap_pipe_dealloc(na);
@@ -3619,7 +3824,7 @@
// mtx_lock(&na->core_lock);
if (!nm_netmap_on(na)) {
- D("%s not in netmap mode anymore", na->name);
+ nm_prerr("%s not in netmap mode anymore", na->name);
error = ENXIO;
goto done;
}
@@ -3638,7 +3843,7 @@
// XXX reconsider long packets if we handle fragments
if (len > NETMAP_BUF_SIZE(na)) { /* too long for us */
- D("%s from_host, drop packet size %d > %d", na->name,
+ nm_prerr("%s from_host, drop packet size %d > %d", na->name,
len, NETMAP_BUF_SIZE(na));
goto done;
}
@@ -3749,8 +3954,8 @@
new_hwofs -= lim + 1;
/* Always set the new offset value and realign the ring. */
- if (netmap_verbose)
- D("%s %s%d hwofs %d -> %d, hwtail %d -> %d",
+ if (netmap_debug & NM_DEBUG_ON)
+ nm_prinf("%s %s%d hwofs %d -> %d, hwtail %d -> %d",
na->name,
tx == NR_TX ? "TX" : "RX", n,
kring->nkr_hwofs, new_hwofs,
@@ -3796,8 +4001,8 @@
q &= NETMAP_RING_MASK;
- if (netmap_verbose) {
- RD(5, "received %s queue %d", work_done ? "RX" : "TX" , q);
+ if (netmap_debug & (NM_DEBUG_RXINTR|NM_DEBUG_TXINTR)) {
+ nm_prlim(5, "received %s queue %d", work_done ? "RX" : "TX" , q);
}
if (q >= nma_get_nrings(na, t))
@@ -3879,7 +4084,7 @@
struct ifnet *ifp = na->ifp;
/* We undo the setup for intercepting packets only if we are the
- * last user of this adapapter. */
+ * last user of this adapter. */
if (na->active_fds > 0) {
return;
}
@@ -3890,7 +4095,6 @@
na->na_flags &= ~NAF_NETMAP_ON;
}
-
/*
* Module loader and unloader
*
@@ -3915,7 +4119,7 @@
netmap_uninit_bridges();
netmap_mem_fini();
NMG_LOCK_DESTROY();
- nm_prinf("netmap: unloaded module.\n");
+ nm_prinf("netmap: unloaded module.");
}
@@ -3952,7 +4156,7 @@
if (error)
goto fail;
- nm_prinf("netmap: loaded module\n");
+ nm_prinf("netmap: loaded module");
return (0);
fail:
netmap_fini();
Index: head/sys/dev/netmap/netmap_bdg.h
===================================================================
--- head/sys/dev/netmap/netmap_bdg.h
+++ head/sys/dev/netmap/netmap_bdg.h
@@ -44,6 +44,40 @@
#endif /* __FreeBSD__ */
+/*
+ * The following bridge-related functions are used by other
+ * kernel modules.
+ *
+ * VALE only supports unicast or broadcast. The lookup
+ * function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports,
+ * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 to indicate
+ * drop.
+ */
+typedef uint32_t (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr,
+ struct netmap_vp_adapter *, void *private_data);
+typedef int (*bdg_config_fn_t)(struct nm_ifreq *);
+typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *);
+typedef void *(*bdg_update_private_data_fn_t)(void *private_data, void *callback_data, int *error);
+typedef int (*bdg_vp_create_fn_t)(struct nmreq_header *hdr,
+ struct ifnet *ifp, struct netmap_mem_d *nmd,
+ struct netmap_vp_adapter **ret);
+typedef int (*bdg_bwrap_attach_fn_t)(const char *nr_name, struct netmap_adapter *hwna);
+struct netmap_bdg_ops {
+ bdg_lookup_fn_t lookup;
+ bdg_config_fn_t config;
+ bdg_dtor_fn_t dtor;
+ bdg_vp_create_fn_t vp_create;
+ bdg_bwrap_attach_fn_t bwrap_attach;
+ char name[IFNAMSIZ];
+};
+int netmap_bwrap_attach(const char *name, struct netmap_adapter *, struct netmap_bdg_ops *);
+int netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token);
+
+#define NM_BRIDGES 8 /* number of bridges */
+#define NM_BDG_MAXPORTS 254 /* up to 254 */
+#define NM_BDG_BROADCAST NM_BDG_MAXPORTS
+#define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1)
+
/* XXX Should go away after fixing find_bridge() - Michio */
#define NM_BDG_HASH 1024 /* forwarding table entries */
@@ -95,7 +129,8 @@
* different ring index.
* The function is set by netmap_bdg_regops().
*/
- struct netmap_bdg_ops *bdg_ops;
+ struct netmap_bdg_ops bdg_ops;
+ struct netmap_bdg_ops bdg_saved_ops;
/*
* Contains the data structure used by the bdg_ops.lookup function.
@@ -111,6 +146,7 @@
*/
#define NM_BDG_ACTIVE 1
#define NM_BDG_EXCLUSIVE 2
+#define NM_BDG_NEED_BWRAP 4
uint8_t bdg_flags;
@@ -149,6 +185,13 @@
struct netmap_adapter *hwna);
int netmap_bwrap_krings_create_common(struct netmap_adapter *na);
void netmap_bwrap_krings_delete_common(struct netmap_adapter *na);
+struct nm_bridge *netmap_init_bridges2(u_int);
+void netmap_uninit_bridges2(struct nm_bridge *, u_int);
+int netmap_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
+ void *callback_data, void *auth_token);
+int netmap_bdg_config(struct nm_ifreq *nifr);
+int nm_is_bwrap(struct netmap_adapter *);
+
#define NM_NEED_BWRAP (-2)
#endif /* _NET_NETMAP_BDG_H_ */
Index: head/sys/dev/netmap/netmap_bdg.c
===================================================================
--- head/sys/dev/netmap/netmap_bdg.c
+++ head/sys/dev/netmap/netmap_bdg.c
@@ -126,7 +126,7 @@
* Right now we have a static array and deletions are protected
* by an exclusive lock.
*/
-static struct nm_bridge *nm_bridges;
+struct nm_bridge *nm_bridges;
#endif /* !CONFIG_NET_NS */
@@ -139,15 +139,15 @@
(c == '_');
}
-/* Validate the name of a VALE bridge port and return the
+/* Validate the name of a bdg port and return the
* position of the ":" character. */
static int
-nm_vale_name_validate(const char *name)
+nm_bdg_name_validate(const char *name, size_t prefixlen)
{
int colon_pos = -1;
int i;
- if (!name || strlen(name) < strlen(NM_BDG_NAME)) {
+ if (!name || strlen(name) < prefixlen) {
return -1;
}
@@ -186,9 +186,10 @@
netmap_bns_getbridges(&bridges, &num_bridges);
- namelen = nm_vale_name_validate(name);
+ namelen = nm_bdg_name_validate(name,
+ (ops != NULL ? strlen(ops->name) : 0));
if (namelen < 0) {
- D("invalid bridge name %s", name ? name : NULL);
+ nm_prerr("invalid bridge name %s", name ? name : NULL);
return NULL;
}
@@ -213,7 +214,7 @@
b->bdg_active_ports);
b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH);
if (b->ht == NULL) {
- D("failed to allocate hash table");
+ nm_prerr("failed to allocate hash table");
return NULL;
}
strncpy(b->bdg_basename, name, namelen);
@@ -222,7 +223,7 @@
for (i = 0; i < NM_BDG_MAXPORTS; i++)
b->bdg_port_index[i] = i;
/* set the default function */
- b->bdg_ops = ops;
+ b->bdg_ops = b->bdg_saved_ops = *ops;
b->private_data = b->ht;
b->bdg_flags = 0;
NM_BNS_GET(b);
@@ -240,13 +241,49 @@
ND("marking bridge %s as free", b->bdg_basename);
nm_os_free(b->ht);
- b->bdg_ops = NULL;
+ memset(&b->bdg_ops, 0, sizeof(b->bdg_ops));
+ memset(&b->bdg_saved_ops, 0, sizeof(b->bdg_saved_ops));
b->bdg_flags = 0;
NM_BNS_PUT(b);
return 0;
}
+/* Called by external kernel modules (e.g., Openvswitch).
+ * to modify the private data previously given to regops().
+ * 'name' may be just bridge's name (including ':' if it
+ * is not just NM_BDG_NAME).
+ * Called without NMG_LOCK.
+ */
+int
+netmap_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
+ void *callback_data, void *auth_token)
+{
+ void *private_data = NULL;
+ struct nm_bridge *b;
+ int error = 0;
+ NMG_LOCK();
+ b = nm_find_bridge(name, 0 /* don't create */, NULL);
+ if (!b) {
+ error = EINVAL;
+ goto unlock_update_priv;
+ }
+ if (!nm_bdg_valid_auth_token(b, auth_token)) {
+ error = EACCES;
+ goto unlock_update_priv;
+ }
+ BDG_WLOCK(b);
+ private_data = callback(b->private_data, callback_data, &error);
+ b->private_data = private_data;
+ BDG_WUNLOCK(b);
+
+unlock_update_priv:
+ NMG_UNLOCK();
+ return error;
+}
+
+
+
/* remove from bridge b the ports in slots hw and sw
* (sw can be -1 if not needed)
*/
@@ -267,8 +304,8 @@
acquire BDG_WLOCK() and copy back the array.
*/
- if (netmap_verbose)
- D("detach %d and %d (lim %d)", hw, sw, lim);
+ if (netmap_debug & NM_DEBUG_BDG)
+ nm_prinf("detach %d and %d (lim %d)", hw, sw, lim);
/* make a copy of the list of active ports, update it,
* and then copy back within BDG_WLOCK().
*/
@@ -291,12 +328,12 @@
}
}
if (hw >= 0 || sw >= 0) {
- D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
+ nm_prerr("delete failed hw %d sw %d, should panic...", hw, sw);
}
BDG_WLOCK(b);
- if (b->bdg_ops->dtor)
- b->bdg_ops->dtor(b->bdg_ports[s_hw]);
+ if (b->bdg_ops.dtor)
+ b->bdg_ops.dtor(b->bdg_ports[s_hw]);
b->bdg_ports[s_hw] = NULL;
if (s_sw >= 0) {
b->bdg_ports[s_sw] = NULL;
@@ -402,7 +439,7 @@
/* yes we should, see if we have space to attach entries */
needed = 2; /* in some cases we only need 1 */
if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
- D("bridge full %d, cannot create new port", b->bdg_active_ports);
+ nm_prerr("bridge full %d, cannot create new port", b->bdg_active_ports);
return ENOMEM;
}
/* record the next two ports available, but do not allocate yet */
@@ -428,9 +465,10 @@
}
/* bdg_netmap_attach creates a struct netmap_adapter */
- error = b->bdg_ops->vp_create(hdr, NULL, nmd, &vpna);
+ error = b->bdg_ops.vp_create(hdr, NULL, nmd, &vpna);
if (error) {
- D("error %d", error);
+ if (netmap_debug & NM_DEBUG_BDG)
+ nm_prerr("error %d", error);
goto out;
}
/* shortcut - we can skip get_hw_na(),
@@ -459,7 +497,7 @@
/* host adapter might not be created */
error = hw->nm_bdg_attach(nr_name, hw, b);
if (error == NM_NEED_BWRAP) {
- error = b->bdg_ops->bwrap_attach(nr_name, hw);
+ error = b->bdg_ops.bwrap_attach(nr_name, hw);
}
if (error)
goto out;
@@ -502,143 +540,14 @@
return error;
}
-/* Process NETMAP_REQ_VALE_ATTACH.
- */
-int
-nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token)
-{
- struct nmreq_vale_attach *req =
- (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
- struct netmap_vp_adapter * vpna;
- struct netmap_adapter *na = NULL;
- struct netmap_mem_d *nmd = NULL;
- struct nm_bridge *b = NULL;
- int error;
- NMG_LOCK();
- /* permission check for modified bridges */
- b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
- if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
- error = EACCES;
- goto unlock_exit;
- }
-
- if (req->reg.nr_mem_id) {
- nmd = netmap_mem_find(req->reg.nr_mem_id);
- if (nmd == NULL) {
- error = EINVAL;
- goto unlock_exit;
- }
- }
-
- /* check for existing one */
- error = netmap_get_vale_na(hdr, &na, nmd, 0);
- if (na) {
- error = EBUSY;
- goto unref_exit;
- }
- error = netmap_get_vale_na(hdr, &na,
- nmd, 1 /* create if not exists */);
- if (error) { /* no device */
- goto unlock_exit;
- }
-
- if (na == NULL) { /* VALE prefix missing */
- error = EINVAL;
- goto unlock_exit;
- }
-
- if (NETMAP_OWNED_BY_ANY(na)) {
- error = EBUSY;
- goto unref_exit;
- }
-
- if (na->nm_bdg_ctl) {
- /* nop for VALE ports. The bwrap needs to put the hwna
- * in netmap mode (see netmap_bwrap_bdg_ctl)
- */
- error = na->nm_bdg_ctl(hdr, na);
- if (error)
- goto unref_exit;
- ND("registered %s to netmap-mode", na->name);
- }
- vpna = (struct netmap_vp_adapter *)na;
- req->port_index = vpna->bdg_port;
- NMG_UNLOCK();
- return 0;
-
-unref_exit:
- netmap_adapter_put(na);
-unlock_exit:
- NMG_UNLOCK();
- return error;
-}
-
-static inline int
+int
nm_is_bwrap(struct netmap_adapter *na)
{
return na->nm_register == netmap_bwrap_reg;
}
-/* Process NETMAP_REQ_VALE_DETACH.
- */
-int
-nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token)
-{
- struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body;
- struct netmap_vp_adapter *vpna;
- struct netmap_adapter *na;
- struct nm_bridge *b = NULL;
- int error;
- NMG_LOCK();
- /* permission check for modified bridges */
- b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
- if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
- error = EACCES;
- goto unlock_exit;
- }
-
- error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */);
- if (error) { /* no device, or another bridge or user owns the device */
- goto unlock_exit;
- }
-
- if (na == NULL) { /* VALE prefix missing */
- error = EINVAL;
- goto unlock_exit;
- } else if (nm_is_bwrap(na) &&
- ((struct netmap_bwrap_adapter *)na)->na_polling_state) {
- /* Don't detach a NIC with polling */
- error = EBUSY;
- goto unref_exit;
- }
-
- vpna = (struct netmap_vp_adapter *)na;
- if (na->na_vp != vpna) {
- /* trying to detach first attach of VALE persistent port attached
- * to 2 bridges
- */
- error = EBUSY;
- goto unref_exit;
- }
- nmreq_det->port_index = vpna->bdg_port;
-
- if (na->nm_bdg_ctl) {
- /* remove the port from bridge. The bwrap
- * also needs to put the hwna in normal mode
- */
- error = na->nm_bdg_ctl(hdr, na);
- }
-
-unref_exit:
- netmap_adapter_put(na);
-unlock_exit:
- NMG_UNLOCK();
- return error;
-
-}
-
struct nm_bdg_polling_state;
struct
nm_bdg_kthread {
@@ -661,7 +570,7 @@
};
static void
-netmap_bwrap_polling(void *data, int is_kthread)
+netmap_bwrap_polling(void *data)
{
struct nm_bdg_kthread *nbk = data;
struct netmap_bwrap_adapter *bna;
@@ -693,7 +602,6 @@
bzero(&kcfg, sizeof(kcfg));
kcfg.worker_fn = netmap_bwrap_polling;
- kcfg.use_kthread = 1;
for (i = 0; i < bps->ncpus; i++) {
struct nm_bdg_kthread *t = bps->kthreads + i;
int all = (bps->ncpus == 1 &&
@@ -703,8 +611,9 @@
t->bps = bps;
t->qfirst = all ? bps->qfirst /* must be 0 */: affinity;
t->qlast = all ? bps->qlast : t->qfirst + 1;
- D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
- t->qlast);
+ if (netmap_verbose)
+ nm_prinf("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
+ t->qlast);
kcfg.type = i;
kcfg.worker_private = t;
@@ -732,7 +641,7 @@
int error, i, j;
if (!bps) {
- D("polling is not configured");
+ nm_prerr("polling is not configured");
return EFAULT;
}
bps->stopped = false;
@@ -741,7 +650,7 @@
struct nm_bdg_kthread *t = bps->kthreads + i;
error = nm_os_kctx_worker_start(t->nmk);
if (error) {
- D("error in nm_kthread_start()");
+ nm_prerr("error in nm_kthread_start(): %d", error);
goto cleanup;
}
}
@@ -784,10 +693,10 @@
avail_cpus = nm_os_ncpus();
if (req_cpus == 0) {
- D("req_cpus must be > 0");
+ nm_prerr("req_cpus must be > 0");
return EINVAL;
} else if (req_cpus >= avail_cpus) {
- D("Cannot use all the CPUs in the system");
+ nm_prerr("Cannot use all the CPUs in the system");
return EINVAL;
}
@@ -797,7 +706,7 @@
* For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2,
* ring 2 and 3 are polled by core 2 and 3, respectively. */
if (i + req_cpus > nma_get_nrings(na, NR_RX)) {
- D("Rings %u-%u not in range (have %d rings)",
+ nm_prerr("Rings %u-%u not in range (have %d rings)",
i, i + req_cpus, nma_get_nrings(na, NR_RX));
return EINVAL;
}
@@ -809,7 +718,7 @@
/* Poll all the rings using a core specified by nr_first_cpu_id.
* the number of cores must be 1. */
if (req_cpus != 1) {
- D("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU "
+ nm_prerr("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU "
"(was %d)", req_cpus);
return EINVAL;
}
@@ -817,7 +726,7 @@
qlast = nma_get_nrings(na, NR_RX);
core_from = i;
} else {
- D("Invalid polling mode");
+ nm_prerr("Invalid polling mode");
return EINVAL;
}
@@ -826,7 +735,7 @@
bps->qlast = qlast;
bps->cpu_from = core_from;
bps->ncpus = req_cpus;
- D("%s qfirst %u qlast %u cpu_from %u ncpus %u",
+ nm_prinf("%s qfirst %u qlast %u cpu_from %u ncpus %u",
req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ?
"MULTI" : "SINGLE",
qfirst, qlast, core_from, req_cpus);
@@ -842,7 +751,7 @@
bna = (struct netmap_bwrap_adapter *)na;
if (bna->na_polling_state) {
- D("ERROR adapter already in polling mode");
+ nm_prerr("ERROR adapter already in polling mode");
return EFAULT;
}
@@ -871,7 +780,7 @@
/* start kthread now */
error = nm_bdg_polling_start_kthreads(bps);
if (error) {
- D("ERROR nm_bdg_polling_start_kthread()");
+ nm_prerr("ERROR nm_bdg_polling_start_kthread()");
nm_os_free(bps->kthreads);
nm_os_free(bps);
bna->na_polling_state = NULL;
@@ -887,7 +796,7 @@
struct nm_bdg_polling_state *bps;
if (!bna->na_polling_state) {
- D("ERROR adapter is not in polling mode");
+ nm_prerr("ERROR adapter is not in polling mode");
return EFAULT;
}
bps = bna->na_polling_state;
@@ -932,86 +841,6 @@
return error;
}
-/* Process NETMAP_REQ_VALE_LIST. */
-int
-netmap_bdg_list(struct nmreq_header *hdr)
-{
- struct nmreq_vale_list *req =
- (struct nmreq_vale_list *)(uintptr_t)hdr->nr_body;
- int namelen = strlen(hdr->nr_name);
- struct nm_bridge *b, *bridges;
- struct netmap_vp_adapter *vpna;
- int error = 0, i, j;
- u_int num_bridges;
-
- netmap_bns_getbridges(&bridges, &num_bridges);
-
- /* this is used to enumerate bridges and ports */
- if (namelen) { /* look up indexes of bridge and port */
- if (strncmp(hdr->nr_name, NM_BDG_NAME,
- strlen(NM_BDG_NAME))) {
- return EINVAL;
- }
- NMG_LOCK();
- b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
- if (!b) {
- NMG_UNLOCK();
- return ENOENT;
- }
-
- req->nr_bridge_idx = b - bridges; /* bridge index */
- req->nr_port_idx = NM_BDG_NOPORT;
- for (j = 0; j < b->bdg_active_ports; j++) {
- i = b->bdg_port_index[j];
- vpna = b->bdg_ports[i];
- if (vpna == NULL) {
- D("This should not happen");
- continue;
- }
- /* the former and the latter identify a
- * virtual port and a NIC, respectively
- */
- if (!strcmp(vpna->up.name, hdr->nr_name)) {
- req->nr_port_idx = i; /* port index */
- break;
- }
- }
- NMG_UNLOCK();
- } else {
- /* return the first non-empty entry starting from
- * bridge nr_arg1 and port nr_arg2.
- *
- * Users can detect the end of the same bridge by
- * seeing the new and old value of nr_arg1, and can
- * detect the end of all the bridge by error != 0
- */
- i = req->nr_bridge_idx;
- j = req->nr_port_idx;
-
- NMG_LOCK();
- for (error = ENOENT; i < NM_BRIDGES; i++) {
- b = bridges + i;
- for ( ; j < NM_BDG_MAXPORTS; j++) {
- if (b->bdg_ports[j] == NULL)
- continue;
- vpna = b->bdg_ports[j];
- /* write back the VALE switch name */
- strncpy(hdr->nr_name, vpna->up.name,
- (size_t)IFNAMSIZ);
- error = 0;
- goto out;
- }
- j = 0; /* following bridges scan from 0 */
- }
- out:
- req->nr_bridge_idx = i;
- req->nr_port_idx = j;
- NMG_UNLOCK();
- }
-
- return error;
-}
-
/* Called by external kernel modules (e.g., Openvswitch).
* to set configure/lookup/dtor functions of a VALE instance.
* Register callbacks to the given bridge. 'name' may be just
@@ -1041,12 +870,19 @@
if (!bdg_ops) {
/* resetting the bridge */
bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
- b->bdg_ops = NULL;
+ b->bdg_ops = b->bdg_saved_ops;
b->private_data = b->ht;
} else {
/* modifying the bridge */
b->private_data = private_data;
- b->bdg_ops = bdg_ops;
+#define nm_bdg_override(m) if (bdg_ops->m) b->bdg_ops.m = bdg_ops->m
+ nm_bdg_override(lookup);
+ nm_bdg_override(config);
+ nm_bdg_override(dtor);
+ nm_bdg_override(vp_create);
+ nm_bdg_override(bwrap_attach);
+#undef nm_bdg_override
+
}
BDG_WUNLOCK(b);
@@ -1071,8 +907,8 @@
NMG_UNLOCK();
/* Don't call config() with NMG_LOCK() held */
BDG_RLOCK(b);
- if (b->bdg_ops->config != NULL)
- error = b->bdg_ops->config(nr);
+ if (b->bdg_ops.config != NULL)
+ error = b->bdg_ops.config(nr);
BDG_RUNLOCK(b);
return error;
}
@@ -1137,7 +973,7 @@
int n;
if (head > lim) {
- D("ouch dangerous reset!!!");
+ nm_prerr("ouch dangerous reset!!!");
n = netmap_ring_reinit(kring);
goto done;
}
@@ -1154,7 +990,7 @@
void *addr = NMB(na, slot);
if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
- D("bad buffer index %d, ignore ?",
+ nm_prerr("bad buffer index %d, ignore ?",
slot->buf_idx);
}
slot->flags &= ~NS_BUF_CHANGED;
@@ -1283,8 +1119,8 @@
int ret = NM_IRQ_COMPLETED;
int error;
- if (netmap_verbose)
- D("%s %s 0x%x", na->name, kring->name, flags);
+ if (netmap_debug & NM_DEBUG_RXINTR)
+ nm_prinf("%s %s 0x%x", na->name, kring->name, flags);
bkring = vpna->up.tx_rings[ring_nr];
@@ -1293,8 +1129,8 @@
return EIO;
}
- if (netmap_verbose)
- D("%s head %d cur %d tail %d", na->name,
+ if (netmap_debug & NM_DEBUG_RXINTR)
+ nm_prinf("%s head %d cur %d tail %d", na->name,
kring->rhead, kring->rcur, kring->rtail);
/* simulate a user wakeup on the rx ring
@@ -1305,7 +1141,7 @@
goto put_out;
if (kring->nr_hwcur == kring->nr_hwtail) {
if (netmap_verbose)
- D("how strange, interrupt with no packets on %s",
+ nm_prerr("how strange, interrupt with no packets on %s",
na->name);
goto put_out;
}
@@ -1593,8 +1429,8 @@
ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
na->name, ring_n,
kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
- ring->head, ring->cur, ring->tail,
- hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
+ kring->rhead, kring->rcur, kring->rtail,
+ hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
/* second step: the new packets are sent on the tx ring
* (which is actually the same ring)
*/
@@ -1612,7 +1448,7 @@
ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
na->name, ring_n,
kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
- ring->head, ring->cur, ring->tail,
+ kring->rhead, kring->rcur, kring->rtail,
hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
put_out:
nm_kr_put(hw_kring);
@@ -1688,7 +1524,7 @@
/* make sure the NIC is not already in use */
if (NETMAP_OWNED_BY_ANY(hwna)) {
- D("NIC %s busy, cannot attach to bridge", hwna->name);
+ nm_prerr("NIC %s busy, cannot attach to bridge", hwna->name);
return EBUSY;
}
@@ -1756,6 +1592,8 @@
hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
hostna->rx_buf_maxsize = hwna->rx_buf_maxsize;
}
+ if (hwna->na_flags & NAF_MOREFRAG)
+ na->na_flags |= NAF_MOREFRAG;
ND("%s<->%s txr %d txd %d rxr %d rxd %d",
na->name, ifp->if_xname,
Index: head/sys/dev/netmap/netmap_freebsd.c
===================================================================
--- head/sys/dev/netmap/netmap_freebsd.c
+++ head/sys/dev/netmap/netmap_freebsd.c
@@ -735,9 +735,9 @@
}
#endif /* WITH_EXTMEM */
-/* ======================== PTNETMAP SUPPORT ========================== */
+/* ================== PTNETMAP GUEST SUPPORT ==================== */
-#ifdef WITH_PTNETMAP_GUEST
+#ifdef WITH_PTNETMAP
#include <sys/bus.h>
#include <sys/rman.h>
#include <machine/bus.h> /* bus_dmamap_* */
@@ -932,7 +932,7 @@
return bus_generic_shutdown(dev);
}
-#endif /* WITH_PTNETMAP_GUEST */
+#endif /* WITH_PTNETMAP */
/*
* In order to track whether pages are still mapped, we hook into
@@ -1145,8 +1145,8 @@
}
struct nm_kctx_ctx {
- struct thread *user_td; /* thread user-space (kthread creator) to send ioctl */
- struct ptnetmap_cfgentry_bhyve cfg;
+ /* Userspace thread (kthread creator). */
+ struct thread *user_td;
/* worker function and parameter */
nm_kctx_worker_fn_t worker_fn;
@@ -1161,56 +1161,17 @@
struct nm_kctx {
struct thread *worker;
struct mtx worker_lock;
- uint64_t scheduled; /* pending wake_up request */
struct nm_kctx_ctx worker_ctx;
int run; /* used to stop kthread */
int attach_user; /* kthread attached to user_process */
int affinity;
};
-void inline
-nm_os_kctx_worker_wakeup(struct nm_kctx *nmk)
-{
- /*
- * There may be a race between FE and BE,
- * which call both this function, and worker kthread,
- * that reads nmk->scheduled.
- *
- * For us it is not important the counter value,
- * but simply that it has changed since the last
- * time the kthread saw it.
- */
- mtx_lock(&nmk->worker_lock);
- nmk->scheduled++;
- if (nmk->worker_ctx.cfg.wchan) {
- wakeup((void *)(uintptr_t)nmk->worker_ctx.cfg.wchan);
- }
- mtx_unlock(&nmk->worker_lock);
-}
-
-void inline
-nm_os_kctx_send_irq(struct nm_kctx *nmk)
-{
- struct nm_kctx_ctx *ctx = &nmk->worker_ctx;
- int err;
-
- if (ctx->user_td && ctx->cfg.ioctl_fd > 0) {
- err = kern_ioctl(ctx->user_td, ctx->cfg.ioctl_fd, ctx->cfg.ioctl_cmd,
- (caddr_t)&ctx->cfg.ioctl_data);
- if (err) {
- D("kern_ioctl error: %d ioctl parameters: fd %d com %lu data %p",
- err, ctx->cfg.ioctl_fd, (unsigned long)ctx->cfg.ioctl_cmd,
- &ctx->cfg.ioctl_data);
- }
- }
-}
-
static void
nm_kctx_worker(void *data)
{
struct nm_kctx *nmk = data;
struct nm_kctx_ctx *ctx = &nmk->worker_ctx;
- uint64_t old_scheduled = nmk->scheduled;
if (nmk->affinity >= 0) {
thread_lock(curthread);
@@ -1231,30 +1192,8 @@
kthread_suspend_check();
}
- /*
- * if wchan is not defined, we don't have notification
- * mechanism and we continually execute worker_fn()
- */
- if (!ctx->cfg.wchan) {
- ctx->worker_fn(ctx->worker_private, 1); /* worker body */
- } else {
- /* checks if there is a pending notification */
- mtx_lock(&nmk->worker_lock);
- if (likely(nmk->scheduled != old_scheduled)) {
- old_scheduled = nmk->scheduled;
- mtx_unlock(&nmk->worker_lock);
-
- ctx->worker_fn(ctx->worker_private, 1); /* worker body */
-
- continue;
- } else if (nmk->run) {
- /* wait on event with one second timeout */
- msleep((void *)(uintptr_t)ctx->cfg.wchan, &nmk->worker_lock,
- 0, "nmk_ev", hz);
- nmk->scheduled++;
- }
- mtx_unlock(&nmk->worker_lock);
- }
+ /* Continuously execute worker process. */
+ ctx->worker_fn(ctx->worker_private); /* worker body */
}
kthread_exit();
@@ -1284,11 +1223,6 @@
/* attach kthread to user process (ptnetmap) */
nmk->attach_user = cfg->attach_user;
- /* store kick/interrupt configuration */
- if (opaque) {
- nmk->worker_ctx.cfg = *((struct ptnetmap_cfgentry_bhyve *)opaque);
- }
-
return nmk;
}
@@ -1298,9 +1232,13 @@
struct proc *p = NULL;
int error = 0;
- if (nmk->worker) {
+ /* Temporarily disable this function as it is currently broken
+ * and causes kernel crashes. The failure can be triggered by
+ * the "vale_polling_enable_disable" test in ctrl-api-test.c. */
+ return EOPNOTSUPP;
+
+ if (nmk->worker)
return EBUSY;
- }
/* check if we want to attach kthread to user process */
if (nmk->attach_user) {
@@ -1329,15 +1267,14 @@
void
nm_os_kctx_worker_stop(struct nm_kctx *nmk)
{
- if (!nmk->worker) {
+ if (!nmk->worker)
return;
- }
+
/* tell to kthread to exit from main loop */
nmk->run = 0;
/* wake up kthread if it sleeps */
kthread_resume(nmk->worker);
- nm_os_kctx_worker_wakeup(nmk);
nmk->worker = NULL;
}
@@ -1347,11 +1284,9 @@
{
if (!nmk)
return;
- if (nmk->worker) {
- nm_os_kctx_worker_stop(nmk);
- }
- memset(&nmk->worker_ctx.cfg, 0, sizeof(nmk->worker_ctx.cfg));
+ if (nmk->worker)
+ nm_os_kctx_worker_stop(nmk);
free(nmk, M_DEVBUF);
}
Index: head/sys/dev/netmap/netmap_generic.c
===================================================================
--- head/sys/dev/netmap/netmap_generic.c
+++ head/sys/dev/netmap/netmap_generic.c
@@ -81,7 +81,6 @@
#include <net/if_var.h>
#include <machine/bus.h> /* bus_dmamap_* in netmap_kern.h */
-// XXX temporary - D() defined here
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
#include <dev/netmap/netmap_mem2.h>
@@ -179,7 +178,7 @@
r = mod_timer(&ctx->timer, jiffies +
msecs_to_jiffies(RATE_PERIOD * 1000));
if (unlikely(r))
- D("[v1000] Error: mod_timer()");
+ nm_prerr("mod_timer() failed");
}
static struct rate_context rate_ctx;
@@ -240,14 +239,14 @@
for_each_rx_kring_h(r, kring, na) {
if (nm_kring_pending_off(kring)) {
- D("Emulated adapter: ring '%s' deactivated", kring->name);
+ nm_prinf("Emulated adapter: ring '%s' deactivated", kring->name);
kring->nr_mode = NKR_NETMAP_OFF;
}
}
for_each_tx_kring_h(r, kring, na) {
if (nm_kring_pending_off(kring)) {
kring->nr_mode = NKR_NETMAP_OFF;
- D("Emulated adapter: ring '%s' deactivated", kring->name);
+ nm_prinf("Emulated adapter: ring '%s' deactivated", kring->name);
}
}
@@ -300,11 +299,11 @@
#ifdef RATE_GENERIC
if (--rate_ctx.refcount == 0) {
- D("del_timer()");
+ nm_prinf("del_timer()");
del_timer(&rate_ctx.timer);
}
#endif
- D("Emulated adapter for %s deactivated", na->name);
+ nm_prinf("Emulated adapter for %s deactivated", na->name);
}
return 0;
@@ -329,14 +328,14 @@
}
if (na->active_fds == 0) {
- D("Emulated adapter for %s activated", na->name);
+ nm_prinf("Emulated adapter for %s activated", na->name);
/* Do all memory allocations when (na->active_fds == 0), to
* simplify error management. */
/* Allocate memory for mitigation support on all the rx queues. */
gna->mit = nm_os_malloc(na->num_rx_rings * sizeof(struct nm_generic_mit));
if (!gna->mit) {
- D("mitigation allocation failed");
+ nm_prerr("mitigation allocation failed");
error = ENOMEM;
goto out;
}
@@ -363,7 +362,7 @@
kring->tx_pool =
nm_os_malloc(na->num_tx_desc * sizeof(struct mbuf *));
if (!kring->tx_pool) {
- D("tx_pool allocation failed");
+ nm_prerr("tx_pool allocation failed");
error = ENOMEM;
goto free_tx_pools;
}
@@ -374,14 +373,14 @@
for_each_rx_kring_h(r, kring, na) {
if (nm_kring_pending_on(kring)) {
- D("Emulated adapter: ring '%s' activated", kring->name);
+ nm_prinf("Emulated adapter: ring '%s' activated", kring->name);
kring->nr_mode = NKR_NETMAP_ON;
}
}
for_each_tx_kring_h(r, kring, na) {
if (nm_kring_pending_on(kring)) {
- D("Emulated adapter: ring '%s' activated", kring->name);
+ nm_prinf("Emulated adapter: ring '%s' activated", kring->name);
kring->nr_mode = NKR_NETMAP_ON;
}
}
@@ -399,14 +398,14 @@
/* Prepare to intercept incoming traffic. */
error = nm_os_catch_rx(gna, 1);
if (error) {
- D("nm_os_catch_rx(1) failed (%d)", error);
+ nm_prerr("nm_os_catch_rx(1) failed (%d)", error);
goto free_tx_pools;
}
/* Let netmap control the packet steering. */
error = nm_os_catch_tx(gna, 1);
if (error) {
- D("nm_os_catch_tx(1) failed (%d)", error);
+ nm_prerr("nm_os_catch_tx(1) failed (%d)", error);
goto catch_rx;
}
@@ -414,11 +413,11 @@
#ifdef RATE_GENERIC
if (rate_ctx.refcount == 0) {
- D("setup_timer()");
+ nm_prinf("setup_timer()");
memset(&rate_ctx, 0, sizeof(rate_ctx));
setup_timer(&rate_ctx.timer, &rate_callback, (unsigned long)&rate_ctx);
if (mod_timer(&rate_ctx.timer, jiffies + msecs_to_jiffies(1500))) {
- D("Error: mod_timer()");
+ nm_prerr("Error: mod_timer()");
}
}
rate_ctx.refcount++;
@@ -462,7 +461,7 @@
unsigned int r_orig = r;
if (unlikely(!nm_netmap_on(na) || r >= na->num_tx_rings)) {
- D("Error: no netmap adapter on device %p",
+ nm_prerr("Error: no netmap adapter on device %p",
GEN_TX_MBUF_IFP(m));
return;
}
@@ -488,7 +487,7 @@
if (match) {
if (r != r_orig) {
- RD(1, "event %p migrated: ring %u --> %u",
+ nm_prlim(1, "event %p migrated: ring %u --> %u",
m, r_orig, r);
}
break;
@@ -497,7 +496,7 @@
if (++r == na->num_tx_rings) r = 0;
if (r == r_orig) {
- RD(1, "Cannot match event %p", m);
+ nm_prlim(1, "Cannot match event %p", m);
return;
}
}
@@ -528,7 +527,7 @@
u_int n = 0;
struct mbuf **tx_pool = kring->tx_pool;
- ND("hwcur = %d, hwtail = %d", kring->nr_hwcur, kring->nr_hwtail);
+ nm_prdis("hwcur = %d, hwtail = %d", kring->nr_hwcur, kring->nr_hwtail);
while (nm_i != hwcur) { /* buffers not completed */
struct mbuf *m = tx_pool[nm_i];
@@ -537,7 +536,7 @@
if (m == NULL) {
/* Nothing to do, this is going
* to be replenished. */
- RD(3, "Is this happening?");
+ nm_prlim(3, "Is this happening?");
} else if (MBUF_QUEUED(m)) {
break; /* Not dequeued yet. */
@@ -576,7 +575,7 @@
nm_i = nm_next(nm_i, lim);
}
kring->nr_hwtail = nm_prev(nm_i, lim);
- ND("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail);
+ nm_prdis("tx completed [%d] -> hwtail %d", n, kring->nr_hwtail);
return n;
}
@@ -598,7 +597,7 @@
}
if (unlikely(e >= n)) {
- D("This cannot happen");
+ nm_prerr("This cannot happen");
e = 0;
}
@@ -654,7 +653,7 @@
kring->tx_pool[e] = NULL;
- ND(5, "Request Event at %d mbuf %p refcnt %d", e, m, m ? MBUF_REFCNT(m) : -2 );
+ nm_prdis("Request Event at %d mbuf %p refcnt %d", e, m, m ? MBUF_REFCNT(m) : -2 );
/* Decrement the refcount. This will free it if we lose the race
* with the driver. */
@@ -699,7 +698,7 @@
* but only when cur == hwtail, which means that the
* client is going to block. */
event = ring_middle(nm_i, head, lim);
- ND(3, "Place txqdisc event (hwcur=%u,event=%u,"
+ nm_prdis("Place txqdisc event (hwcur=%u,event=%u,"
"head=%u,hwtail=%u)", nm_i, event, head,
kring->nr_hwtail);
}
@@ -725,7 +724,7 @@
kring->tx_pool[nm_i] = m =
nm_os_get_mbuf(ifp, NETMAP_BUF_SIZE(na));
if (m == NULL) {
- RD(2, "Failed to replenish mbuf");
+ nm_prlim(2, "Failed to replenish mbuf");
/* Here we could schedule a timer which
* retries to replenish after a while,
* and notifies the client when it
@@ -854,7 +853,7 @@
/* This may happen when GRO/LRO features are enabled for
* the NIC driver when the generic adapter does not
* support RX scatter-gather. */
- RD(2, "Warning: driver pushed up big packet "
+ nm_prlim(2, "Warning: driver pushed up big packet "
"(size=%d)", (int)MBUF_LEN(m));
m_freem(m);
} else if (unlikely(mbq_len(&kring->rx_queue) > 1024)) {
@@ -1048,7 +1047,7 @@
*/
netmap_adapter_put(prev_na);
}
- D("Native netmap adapter %p restored", prev_na);
+ nm_prinf("Native netmap adapter %p restored", prev_na);
}
NM_RESTORE_NA(ifp, prev_na);
/*
@@ -1056,7 +1055,7 @@
* overrides WNA(ifp) if na->ifp is not NULL.
*/
na->ifp = NULL;
- D("Emulated netmap adapter for %s destroyed", na->name);
+ nm_prinf("Emulated netmap adapter for %s destroyed", na->name);
}
int
@@ -1086,7 +1085,7 @@
#ifdef __FreeBSD__
if (ifp->if_type == IFT_LOOP) {
- D("if_loop is not supported by %s", __func__);
+ nm_prerr("if_loop is not supported by %s", __func__);
return EINVAL;
}
#endif
@@ -1096,26 +1095,25 @@
* adapter it means that someone else is using the same
* pointer (e.g. ax25_ptr on linux). This happens for
* instance when also PF_RING is in use. */
- D("Error: netmap adapter hook is busy");
+ nm_prerr("Error: netmap adapter hook is busy");
return EBUSY;
}
num_tx_desc = num_rx_desc = netmap_generic_ringsize; /* starting point */
nm_os_generic_find_num_desc(ifp, &num_tx_desc, &num_rx_desc); /* ignore errors */
- ND("Netmap ring size: TX = %d, RX = %d", num_tx_desc, num_rx_desc);
if (num_tx_desc == 0 || num_rx_desc == 0) {
- D("Device has no hw slots (tx %u, rx %u)", num_tx_desc, num_rx_desc);
+ nm_prerr("Device has no hw slots (tx %u, rx %u)", num_tx_desc, num_rx_desc);
return EINVAL;
}
gna = nm_os_malloc(sizeof(*gna));
if (gna == NULL) {
- D("no memory on attach, give up");
+ nm_prerr("no memory on attach, give up");
return ENOMEM;
}
na = (struct netmap_adapter *)gna;
- strncpy(na->name, ifp->if_xname, sizeof(na->name));
+ strlcpy(na->name, ifp->if_xname, sizeof(na->name));
na->ifp = ifp;
na->num_tx_desc = num_tx_desc;
na->num_rx_desc = num_rx_desc;
@@ -1129,10 +1127,10 @@
*/
na->na_flags = NAF_SKIP_INTR | NAF_HOST_RINGS;
- ND("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)",
+ nm_prdis("[GNA] num_tx_queues(%d), real_num_tx_queues(%d), len(%lu)",
ifp->num_tx_queues, ifp->real_num_tx_queues,
ifp->tx_queue_len);
- ND("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)",
+ nm_prdis("[GNA] num_rx_queues(%d), real_num_rx_queues(%d)",
ifp->num_rx_queues, ifp->real_num_rx_queues);
nm_os_generic_find_num_queues(ifp, &na->num_tx_rings, &na->num_rx_rings);
@@ -1151,7 +1149,7 @@
nm_os_generic_set_features(gna);
- D("Emulated adapter for %s created (prev was %p)", na->name, gna->prev);
+ nm_prinf("Emulated adapter for %s created (prev was %p)", na->name, gna->prev);
return retval;
}
Index: head/sys/dev/netmap/netmap_kern.h
===================================================================
--- head/sys/dev/netmap/netmap_kern.h
+++ head/sys/dev/netmap/netmap_kern.h
@@ -54,30 +54,31 @@
#if defined(CONFIG_NETMAP_GENERIC)
#define WITH_GENERIC
#endif
-#if defined(CONFIG_NETMAP_PTNETMAP_GUEST)
-#define WITH_PTNETMAP_GUEST
+#if defined(CONFIG_NETMAP_PTNETMAP)
+#define WITH_PTNETMAP
#endif
-#if defined(CONFIG_NETMAP_PTNETMAP_HOST)
-#define WITH_PTNETMAP_HOST
-#endif
#if defined(CONFIG_NETMAP_SINK)
#define WITH_SINK
#endif
+#if defined(CONFIG_NETMAP_NULL)
+#define WITH_NMNULL
+#endif
#elif defined (_WIN32)
#define WITH_VALE // comment out to disable VALE support
#define WITH_PIPES
#define WITH_MONITOR
#define WITH_GENERIC
+#define WITH_NMNULL
#else /* neither linux nor windows */
#define WITH_VALE // comment out to disable VALE support
#define WITH_PIPES
#define WITH_MONITOR
#define WITH_GENERIC
-#define WITH_PTNETMAP_HOST /* ptnetmap host support */
-#define WITH_PTNETMAP_GUEST /* ptnetmap guest support */
+#define WITH_PTNETMAP /* ptnetmap guest support */
#define WITH_EXTMEM
+#define WITH_NMNULL
#endif
#if defined(__FreeBSD__)
@@ -239,38 +240,54 @@
#define NMG_LOCK_ASSERT() NM_MTX_ASSERT(netmap_global_lock)
#if defined(__FreeBSD__)
-#define nm_prerr printf
-#define nm_prinf printf
+#define nm_prerr_int printf
+#define nm_prinf_int printf
#elif defined (_WIN32)
-#define nm_prerr DbgPrint
-#define nm_prinf DbgPrint
+#define nm_prerr_int DbgPrint
+#define nm_prinf_int DbgPrint
#elif defined(linux)
-#define nm_prerr(fmt, arg...) printk(KERN_ERR fmt, ##arg)
-#define nm_prinf(fmt, arg...) printk(KERN_INFO fmt, ##arg)
+#define nm_prerr_int(fmt, arg...) printk(KERN_ERR fmt, ##arg)
+#define nm_prinf_int(fmt, arg...) printk(KERN_INFO fmt, ##arg)
#endif
-#define ND(format, ...)
-#define D(format, ...) \
+#define nm_prinf(format, ...) \
do { \
struct timeval __xxts; \
microtime(&__xxts); \
- nm_prerr("%03d.%06d [%4d] %-25s " format "\n", \
+ nm_prinf_int("%03d.%06d [%4d] %-25s " format "\n",\
(int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \
__LINE__, __FUNCTION__, ##__VA_ARGS__); \
} while (0)
-/* rate limited, lps indicates how many per second */
-#define RD(lps, format, ...) \
+#define nm_prerr(format, ...) \
do { \
+ struct timeval __xxts; \
+ microtime(&__xxts); \
+ nm_prerr_int("%03d.%06d [%4d] %-25s " format "\n",\
+ (int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \
+ __LINE__, __FUNCTION__, ##__VA_ARGS__); \
+ } while (0)
+
+/* Disabled printf (used to be ND). */
+#define nm_prdis(format, ...)
+
+/* Rate limited, lps indicates how many per second. */
+#define nm_prlim(lps, format, ...) \
+ do { \
static int t0, __cnt; \
if (t0 != time_second) { \
t0 = time_second; \
__cnt = 0; \
} \
if (__cnt++ < lps) \
- D(format, ##__VA_ARGS__); \
+ nm_prinf(format, ##__VA_ARGS__); \
} while (0)
+/* Old macros. */
+#define ND nm_prdis
+#define D nm_prerr
+#define RD nm_prlim
+
struct netmap_adapter;
struct nm_bdg_fwd;
struct nm_bridge;
@@ -700,7 +717,7 @@
*/
#define NAF_HOST_RINGS 64 /* the adapter supports the host rings */
#define NAF_FORCE_NATIVE 128 /* the adapter is always NATIVE */
-#define NAF_PTNETMAP_HOST 256 /* the adapter supports ptnetmap in the host */
+/* free */
#define NAF_MOREFRAG 512 /* the adapter supports NS_MOREFRAG */
#define NAF_ZOMBIE (1U<<30) /* the nic driver has been unloaded */
#define NAF_BUSY (1U<<31) /* the adapter is used internally and
@@ -718,9 +735,9 @@
u_int num_tx_desc; /* number of descriptor in each queue */
u_int num_rx_desc;
- /* tx_rings and rx_rings are private but allocated
- * as a contiguous chunk of memory. Each array has
- * N+1 entries, for the adapter queues and for the host queue.
+ /* tx_rings and rx_rings are private but allocated as a
+ * contiguous chunk of memory. Each array has N+K entries,
+ * N for the hardware rings and K for the host rings.
*/
struct netmap_kring **tx_rings; /* array of TX rings. */
struct netmap_kring **rx_rings; /* array of RX rings. */
@@ -1080,12 +1097,12 @@
*/
struct netmap_vp_adapter *saved_na_vp;
};
-int nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token);
-int nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token);
int nm_bdg_polling(struct nmreq_header *hdr);
-int netmap_bdg_list(struct nmreq_header *hdr);
#ifdef WITH_VALE
+int netmap_vale_attach(struct nmreq_header *hdr, void *auth_token);
+int netmap_vale_detach(struct nmreq_header *hdr, void *auth_token);
+int netmap_vale_list(struct nmreq_header *hdr);
int netmap_vi_create(struct nmreq_header *hdr, int);
int nm_vi_create(struct nmreq_header *);
int nm_vi_destroy(const char *name);
@@ -1115,7 +1132,13 @@
#endif /* WITH_PIPES */
+#ifdef WITH_NMNULL
+struct netmap_null_adapter {
+ struct netmap_adapter up;
+};
+#endif /* WITH_NMNULL */
+
/* return slots reserved to rx clients; used in drivers */
static inline uint32_t
nm_kr_rxspace(struct netmap_kring *k)
@@ -1442,51 +1465,8 @@
int netmap_get_hw_na(struct ifnet *ifp,
struct netmap_mem_d *nmd, struct netmap_adapter **na);
-
-/*
- * The following bridge-related functions are used by other
- * kernel modules.
- *
- * VALE only supports unicast or broadcast. The lookup
- * function can return 0 .. NM_BDG_MAXPORTS-1 for regular ports,
- * NM_BDG_MAXPORTS for broadcast, NM_BDG_MAXPORTS+1 to indicate
- * drop.
- */
-typedef uint32_t (*bdg_lookup_fn_t)(struct nm_bdg_fwd *ft, uint8_t *ring_nr,
- struct netmap_vp_adapter *, void *private_data);
-typedef int (*bdg_config_fn_t)(struct nm_ifreq *);
-typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *);
-typedef void *(*bdg_update_private_data_fn_t)(void *private_data, void *callback_data, int *error);
-typedef int (*bdg_vp_create_fn_t)(struct nmreq_header *hdr,
- struct ifnet *ifp, struct netmap_mem_d *nmd,
- struct netmap_vp_adapter **ret);
-typedef int (*bdg_bwrap_attach_fn_t)(const char *nr_name, struct netmap_adapter *hwna);
-struct netmap_bdg_ops {
- bdg_lookup_fn_t lookup;
- bdg_config_fn_t config;
- bdg_dtor_fn_t dtor;
- bdg_vp_create_fn_t vp_create;
- bdg_bwrap_attach_fn_t bwrap_attach;
- char name[IFNAMSIZ];
-};
-int netmap_bwrap_attach(const char *name, struct netmap_adapter *, struct netmap_bdg_ops *);
-int netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token);
-
-#define NM_BRIDGES 8 /* number of bridges */
-#define NM_BDG_MAXPORTS 254 /* up to 254 */
-#define NM_BDG_BROADCAST NM_BDG_MAXPORTS
-#define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1)
-
-struct nm_bridge *netmap_init_bridges2(u_int);
-void netmap_uninit_bridges2(struct nm_bridge *, u_int);
-int netmap_init_bridges(void);
-void netmap_uninit_bridges(void);
-int nm_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
- void *callback_data, void *auth_token);
-int netmap_bdg_config(struct nm_ifreq *nifr);
-
#ifdef WITH_VALE
-uint32_t netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
+uint32_t netmap_vale_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
struct netmap_vp_adapter *, void *private_data);
/* these are redefined in case of no VALE support */
@@ -1525,11 +1505,20 @@
(((struct nmreq_register *)(uintptr_t)hdr->nr_body)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0)
#endif
+#ifdef WITH_NMNULL
+int netmap_get_null_na(struct nmreq_header *hdr, struct netmap_adapter **na,
+ struct netmap_mem_d *nmd, int create);
+#else /* !WITH_NMNULL */
+#define netmap_get_null_na(hdr, _2, _3, _4) \
+ (((struct nmreq_register *)(uintptr_t)hdr->nr_body)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0)
+#endif /* WITH_NMNULL */
+
#ifdef CONFIG_NET_NS
struct net *netmap_bns_get(void);
void netmap_bns_put(struct net *);
void netmap_bns_getbridges(struct nm_bridge **, u_int *);
#else
+extern struct nm_bridge *nm_bridges;
#define netmap_bns_get()
#define netmap_bns_put(_1)
#define netmap_bns_getbridges(b, n) \
@@ -1591,16 +1580,24 @@
#define NETMAP_BUF_SIZE(_na) ((_na)->na_lut.objsize)
extern int netmap_no_pendintr;
extern int netmap_mitigate;
-extern int netmap_verbose; /* for debugging */
-enum { /* verbose flags */
- NM_VERB_ON = 1, /* generic verbose */
- NM_VERB_HOST = 0x2, /* verbose host stack */
- NM_VERB_RXSYNC = 0x10, /* verbose on rxsync/txsync */
- NM_VERB_TXSYNC = 0x20,
- NM_VERB_RXINTR = 0x100, /* verbose on rx/tx intr (driver) */
- NM_VERB_TXINTR = 0x200,
- NM_VERB_NIC_RXSYNC = 0x1000, /* verbose on rx/tx intr (driver) */
- NM_VERB_NIC_TXSYNC = 0x2000,
+extern int netmap_verbose;
+#ifdef CONFIG_NETMAP_DEBUG
+extern int netmap_debug; /* for debugging */
+#else /* !CONFIG_NETMAP_DEBUG */
+#define netmap_debug (0)
+#endif /* !CONFIG_NETMAP_DEBUG */
+enum { /* debug flags */
+ NM_DEBUG_ON = 1, /* generic debug messsages */
+ NM_DEBUG_HOST = 0x2, /* debug host stack */
+ NM_DEBUG_RXSYNC = 0x10, /* debug on rxsync/txsync */
+ NM_DEBUG_TXSYNC = 0x20,
+ NM_DEBUG_RXINTR = 0x100, /* debug on rx/tx intr (driver) */
+ NM_DEBUG_TXINTR = 0x200,
+ NM_DEBUG_NIC_RXSYNC = 0x1000, /* debug on rx/tx intr (driver) */
+ NM_DEBUG_NIC_TXSYNC = 0x2000,
+ NM_DEBUG_MEM = 0x4000, /* verbose memory allocations/deallocations */
+ NM_DEBUG_VALE = 0x8000, /* debug messages from memory allocators */
+ NM_DEBUG_BDG = NM_DEBUG_VALE,
};
extern int netmap_txsync_retry;
@@ -1612,7 +1609,6 @@
#ifdef linux
extern int netmap_generic_txqdisc;
#endif
-extern int ptnetmap_tx_workers;
/*
* NA returns a pointer to the struct netmap adapter from the ifp.
@@ -1809,6 +1805,11 @@
netmap_idx_n2k(struct netmap_kring *kr, int idx)
{
int n = kr->nkr_num_slots;
+
+ if (likely(kr->nkr_hwofs == 0)) {
+ return idx;
+ }
+
idx += kr->nkr_hwofs;
if (idx < 0)
return idx + n;
@@ -1823,6 +1824,11 @@
netmap_idx_k2n(struct netmap_kring *kr, int idx)
{
int n = kr->nkr_num_slots;
+
+ if (likely(kr->nkr_hwofs == 0)) {
+ return idx;
+ }
+
idx -= kr->nkr_hwofs;
if (idx < 0)
return idx + n;
@@ -1911,6 +1917,9 @@
u_int np_qfirst[NR_TXRX],
np_qlast[NR_TXRX]; /* range of tx/rx rings to scan */
uint16_t np_txpoll;
+ uint16_t np_kloop_state; /* use with NMG_LOCK held */
+#define NM_SYNC_KLOOP_RUNNING (1 << 0)
+#define NM_SYNC_KLOOP_STOPPING (1 << 1)
int np_sync_flags; /* to be passed to nm_sync */
int np_refs; /* use with NMG_LOCK held */
@@ -1920,7 +1929,26 @@
* number of rings.
*/
NM_SELINFO_T *np_si[NR_TXRX];
+
+ /* In the optional CSB mode, the user must specify the start address
+ * of two arrays of Communication Status Block (CSB) entries, for the
+ * two directions (kernel read application write, and kernel write
+ * application read).
+ * The number of entries must agree with the number of rings bound to
+ * the netmap file descriptor. The entries corresponding to the TX
+ * rings are laid out before the ones corresponding to the RX rings.
+ *
+ * Array of CSB entries for application --> kernel communication
+ * (N entries). */
+ struct nm_csb_atok *np_csb_atok_base;
+ /* Array of CSB entries for kernel --> application communication
+ * (N entries). */
+ struct nm_csb_ktoa *np_csb_ktoa_base;
+
struct thread *np_td; /* kqueue, just debugging */
+#ifdef linux
+ struct file *np_filp; /* used by sync kloop */
+#endif /* linux */
};
struct netmap_priv_d *netmap_priv_new(void);
@@ -1943,6 +1971,14 @@
return 0;
}
+/* call with NMG_LOCK held */
+static __inline int
+nm_si_user(struct netmap_priv_d *priv, enum txrx t)
+{
+ return (priv->np_na != NULL &&
+ (priv->np_qlast[t] - priv->np_qfirst[t] > 1));
+}
+
#ifdef WITH_PIPES
int netmap_pipe_txsync(struct netmap_kring *txkring, int flags);
int netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags);
@@ -2143,17 +2179,14 @@
* kernel thread routines
*/
struct nm_kctx; /* OS-specific kernel context - opaque */
-typedef void (*nm_kctx_worker_fn_t)(void *data, int is_kthread);
-typedef void (*nm_kctx_notify_fn_t)(void *data);
+typedef void (*nm_kctx_worker_fn_t)(void *data);
/* kthread configuration */
struct nm_kctx_cfg {
long type; /* kthread type/identifier */
nm_kctx_worker_fn_t worker_fn; /* worker function */
void *worker_private;/* worker parameter */
- nm_kctx_notify_fn_t notify_fn; /* notify function */
int attach_user; /* attach kthread to user process */
- int use_kthread; /* use a kthread for the context */
};
/* kthread configuration */
struct nm_kctx *nm_os_kctx_create(struct nm_kctx_cfg *cfg,
@@ -2161,48 +2194,25 @@
int nm_os_kctx_worker_start(struct nm_kctx *);
void nm_os_kctx_worker_stop(struct nm_kctx *);
void nm_os_kctx_destroy(struct nm_kctx *);
-void nm_os_kctx_worker_wakeup(struct nm_kctx *nmk);
-void nm_os_kctx_send_irq(struct nm_kctx *);
void nm_os_kctx_worker_setaff(struct nm_kctx *, int);
u_int nm_os_ncpus(void);
-#ifdef WITH_PTNETMAP_HOST
+int netmap_sync_kloop(struct netmap_priv_d *priv,
+ struct nmreq_header *hdr);
+int netmap_sync_kloop_stop(struct netmap_priv_d *priv);
+
+#ifdef WITH_PTNETMAP
+/* ptnetmap guest routines */
+
/*
- * netmap adapter for host ptnetmap ports
+ * ptnetmap_memdev routines used to talk with ptnetmap_memdev device driver
*/
-struct netmap_pt_host_adapter {
- struct netmap_adapter up;
+struct ptnetmap_memdev;
+int nm_os_pt_memdev_iomap(struct ptnetmap_memdev *, vm_paddr_t *, void **,
+ uint64_t *);
+void nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *);
+uint32_t nm_os_pt_memdev_ioread(struct ptnetmap_memdev *, unsigned int);
- /* the passed-through adapter */
- struct netmap_adapter *parent;
- /* parent->na_flags, saved at NETMAP_PT_HOST_CREATE time,
- * and restored at NETMAP_PT_HOST_DELETE time */
- uint32_t parent_na_flags;
-
- int (*parent_nm_notify)(struct netmap_kring *kring, int flags);
- void *ptns;
-};
-
-/* ptnetmap host-side routines */
-int netmap_get_pt_host_na(struct nmreq_header *hdr, struct netmap_adapter **na,
- struct netmap_mem_d * nmd, int create);
-int ptnetmap_ctl(const char *nr_name, int create, struct netmap_adapter *na);
-
-static inline int
-nm_ptnetmap_host_on(struct netmap_adapter *na)
-{
- return na && na->na_flags & NAF_PTNETMAP_HOST;
-}
-#else /* !WITH_PTNETMAP_HOST */
-#define netmap_get_pt_host_na(hdr, _2, _3, _4) \
- (((struct nmreq_register *)(uintptr_t)hdr->nr_body)->nr_flags & (NR_PTNETMAP_HOST) ? EOPNOTSUPP : 0)
-#define ptnetmap_ctl(_1, _2, _3) EINVAL
-#define nm_ptnetmap_host_on(_1) EINVAL
-#endif /* !WITH_PTNETMAP_HOST */
-
-#ifdef WITH_PTNETMAP_GUEST
-/* ptnetmap GUEST routines */
-
/*
* netmap adapter for guest ptnetmap ports
*/
@@ -2218,27 +2228,84 @@
* network stack and netmap clients.
* Used to decide when we need (de)allocate krings/rings and
* start (stop) ptnetmap kthreads. */
- int backend_regifs;
+ int backend_users;
};
int netmap_pt_guest_attach(struct netmap_adapter *na,
unsigned int nifp_offset,
unsigned int memid);
-struct ptnet_csb_gh;
-struct ptnet_csb_hg;
-bool netmap_pt_guest_txsync(struct ptnet_csb_gh *ptgh,
- struct ptnet_csb_hg *pthg,
- struct netmap_kring *kring,
- int flags);
-bool netmap_pt_guest_rxsync(struct ptnet_csb_gh *ptgh,
- struct ptnet_csb_hg *pthg,
+bool netmap_pt_guest_txsync(struct nm_csb_atok *atok,
+ struct nm_csb_ktoa *ktoa,
struct netmap_kring *kring, int flags);
+bool netmap_pt_guest_rxsync(struct nm_csb_atok *atok,
+ struct nm_csb_ktoa *ktoa,
+ struct netmap_kring *kring, int flags);
int ptnet_nm_krings_create(struct netmap_adapter *na);
void ptnet_nm_krings_delete(struct netmap_adapter *na);
void ptnet_nm_dtor(struct netmap_adapter *na);
-#endif /* WITH_PTNETMAP_GUEST */
+/* Guest driver: Write kring pointers (cur, head) to the CSB.
+ * This routine is coupled with ptnetmap_host_read_kring_csb(). */
+static inline void
+ptnetmap_guest_write_kring_csb(struct nm_csb_atok *atok, uint32_t cur,
+ uint32_t head)
+{
+ /*
+ * We need to write cur and head to the CSB but we cannot do it atomically.
+ * There is no way we can prevent the host from reading the updated value
+ * of one of the two and the old value of the other. However, if we make
+ * sure that the host never reads a value of head more recent than the
+ * value of cur we are safe. We can allow the host to read a value of cur
+ * more recent than the value of head, since in the netmap ring cur can be
+ * ahead of head and cur cannot wrap around head because it must be behind
+ * tail. Inverting the order of writes below could instead result into the
+ * host to think head went ahead of cur, which would cause the sync
+ * prologue to fail.
+ *
+ * The following memory barrier scheme is used to make this happen:
+ *
+ * Guest Host
+ *
+ * STORE(cur) LOAD(head)
+ * mb() <-----------> mb()
+ * STORE(head) LOAD(cur)
+ */
+ atok->cur = cur;
+ nm_stst_barrier();
+ atok->head = head;
+}
+
+/* Guest driver: Read kring pointers (hwcur, hwtail) from the CSB.
+ * This routine is coupled with ptnetmap_host_write_kring_csb(). */
+static inline void
+ptnetmap_guest_read_kring_csb(struct nm_csb_ktoa *ktoa,
+ struct netmap_kring *kring)
+{
+ /*
+ * We place a memory barrier to make sure that the update of hwtail never
+ * overtakes the update of hwcur.
+ * (see explanation in ptnetmap_host_write_kring_csb).
+ */
+ kring->nr_hwtail = ktoa->hwtail;
+ nm_stst_barrier();
+ kring->nr_hwcur = ktoa->hwcur;
+}
+
+/* Helper function wrapping ptnetmap_guest_read_kring_csb(). */
+static inline void
+ptnet_sync_tail(struct nm_csb_ktoa *ktoa, struct netmap_kring *kring)
+{
+ struct netmap_ring *ring = kring->ring;
+
+ /* Update hwcur and hwtail as known by the host. */
+ ptnetmap_guest_read_kring_csb(ktoa, kring);
+
+ /* nm_sync_finalize */
+ ring->tail = kring->rtail = kring->nr_hwtail;
+}
+#endif /* WITH_PTNETMAP */
+
#ifdef __FreeBSD__
/*
* FreeBSD mbuf allocator/deallocator in emulation mode:
@@ -2354,5 +2421,17 @@
struct nmreq_option * nmreq_findoption(struct nmreq_option *, uint16_t);
int nmreq_checkduplicate(struct nmreq_option *);
+
+int netmap_init_bridges(void);
+void netmap_uninit_bridges(void);
+
+/* Functions to read and write CSB fields from the kernel. */
+#if defined (linux)
+#define CSB_READ(csb, field, r) (get_user(r, &csb->field))
+#define CSB_WRITE(csb, field, v) (put_user(v, &csb->field))
+#else /* ! linux */
+#define CSB_READ(csb, field, r) (r = fuword32(&csb->field))
+#define CSB_WRITE(csb, field, v) (suword32(&csb->field, v))
+#endif /* ! linux */
#endif /* _NET_NETMAP_KERN_H_ */
Index: head/sys/dev/netmap/netmap_kloop.c
===================================================================
--- head/sys/dev/netmap/netmap_kloop.c
+++ head/sys/dev/netmap/netmap_kloop.c
@@ -0,0 +1,916 @@
+/*
+ * Copyright (C) 2016-2018 Vincenzo Maffione
+ * Copyright (C) 2015 Stefano Garzarella
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/*
+ * common headers
+ */
+#if defined(__FreeBSD__)
+#include <sys/cdefs.h>
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/types.h>
+#include <sys/selinfo.h>
+#include <sys/socket.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <machine/bus.h>
+
+#define usleep_range(_1, _2) \
+ pause_sbt("sync-kloop-sleep", SBT_1US * _1, SBT_1US * 1, C_ABSOLUTE)
+
+#elif defined(linux)
+#include <bsd_glue.h>
+#include <linux/file.h>
+#include <linux/eventfd.h>
+#endif
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <net/netmap_virt.h>
+#include <dev/netmap/netmap_mem2.h>
+
+/* Support for eventfd-based notifications. */
+#if defined(linux)
+#define SYNC_KLOOP_POLL
+#endif
+
+/* Write kring pointers (hwcur, hwtail) to the CSB.
+ * This routine is coupled with ptnetmap_guest_read_kring_csb(). */
+static inline void
+sync_kloop_kernel_write(struct nm_csb_ktoa __user *ptr, uint32_t hwcur,
+ uint32_t hwtail)
+{
+ /*
+ * The same scheme used in ptnetmap_guest_write_kring_csb() applies here.
+ * We allow the application to read a value of hwcur more recent than the value
+ * of hwtail, since this would anyway result in a consistent view of the
+ * ring state (and hwcur can never wraparound hwtail, since hwcur must be
+ * behind head).
+ *
+ * The following memory barrier scheme is used to make this happen:
+ *
+ * Application Kernel
+ *
+ * STORE(hwcur) LOAD(hwtail)
+ * mb() <-------------> mb()
+ * STORE(hwtail) LOAD(hwcur)
+ */
+ CSB_WRITE(ptr, hwcur, hwcur);
+ nm_stst_barrier();
+ CSB_WRITE(ptr, hwtail, hwtail);
+}
+
+/* Read kring pointers (head, cur, sync_flags) from the CSB.
+ * This routine is coupled with ptnetmap_guest_write_kring_csb(). */
+static inline void
+sync_kloop_kernel_read(struct nm_csb_atok __user *ptr,
+ struct netmap_ring *shadow_ring,
+ uint32_t num_slots)
+{
+ /*
+ * We place a memory barrier to make sure that the update of head never
+ * overtakes the update of cur.
+ * (see explanation in ptnetmap_guest_write_kring_csb).
+ */
+ CSB_READ(ptr, head, shadow_ring->head);
+ nm_stst_barrier();
+ CSB_READ(ptr, cur, shadow_ring->cur);
+ CSB_READ(ptr, sync_flags, shadow_ring->flags);
+}
+
+/* Enable or disable application --> kernel kicks. */
+static inline void
+csb_ktoa_kick_enable(struct nm_csb_ktoa __user *csb_ktoa, uint32_t val)
+{
+ CSB_WRITE(csb_ktoa, kern_need_kick, val);
+}
+
+/* Are application interrupt enabled or disabled? */
+static inline uint32_t
+csb_atok_intr_enabled(struct nm_csb_atok __user *csb_atok)
+{
+ uint32_t v;
+
+ CSB_READ(csb_atok, appl_need_kick, v);
+
+ return v;
+}
+
+static inline void
+sync_kloop_kring_dump(const char *title, const struct netmap_kring *kring)
+{
+ nm_prinf("%s - name: %s hwcur: %d hwtail: %d "
+ "rhead: %d rcur: %d rtail: %d",
+ title, kring->name, kring->nr_hwcur, kring->nr_hwtail,
+ kring->rhead, kring->rcur, kring->rtail);
+}
+
+struct sync_kloop_ring_args {
+ struct netmap_kring *kring;
+ struct nm_csb_atok *csb_atok;
+ struct nm_csb_ktoa *csb_ktoa;
+#ifdef SYNC_KLOOP_POLL
+ struct eventfd_ctx *irq_ctx;
+#endif /* SYNC_KLOOP_POLL */
+};
+
+static void
+netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a)
+{
+ struct netmap_kring *kring = a->kring;
+ struct nm_csb_atok *csb_atok = a->csb_atok;
+ struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa;
+ struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */
+ bool more_txspace = false;
+ uint32_t num_slots;
+ int batch;
+
+ num_slots = kring->nkr_num_slots;
+
+ /* Disable application --> kernel notifications. */
+ csb_ktoa_kick_enable(csb_ktoa, 0);
+ /* Copy the application kring pointers from the CSB */
+ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
+
+ for (;;) {
+ batch = shadow_ring.head - kring->nr_hwcur;
+ if (batch < 0)
+ batch += num_slots;
+
+#ifdef PTN_TX_BATCH_LIM
+ if (batch > PTN_TX_BATCH_LIM(num_slots)) {
+ /* If application moves ahead too fast, let's cut the move so
+ * that we don't exceed our batch limit. */
+ uint32_t head_lim = kring->nr_hwcur + PTN_TX_BATCH_LIM(num_slots);
+
+ if (head_lim >= num_slots)
+ head_lim -= num_slots;
+ nm_prdis(1, "batch: %d head: %d head_lim: %d", batch, shadow_ring.head,
+ head_lim);
+ shadow_ring.head = head_lim;
+ batch = PTN_TX_BATCH_LIM(num_slots);
+ }
+#endif /* PTN_TX_BATCH_LIM */
+
+ if (nm_kr_txspace(kring) <= (num_slots >> 1)) {
+ shadow_ring.flags |= NAF_FORCE_RECLAIM;
+ }
+
+ /* Netmap prologue */
+ shadow_ring.tail = kring->rtail;
+ if (unlikely(nm_txsync_prologue(kring, &shadow_ring) >= num_slots)) {
+ /* Reinit ring and enable notifications. */
+ netmap_ring_reinit(kring);
+ csb_ktoa_kick_enable(csb_ktoa, 1);
+ break;
+ }
+
+ if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) {
+ sync_kloop_kring_dump("pre txsync", kring);
+ }
+
+ if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) {
+ /* Reenable notifications. */
+ csb_ktoa_kick_enable(csb_ktoa, 1);
+ nm_prerr("txsync() failed");
+ break;
+ }
+
+ /*
+ * Finalize
+ * Copy kernel hwcur and hwtail into the CSB for the application sync(), and
+ * do the nm_sync_finalize.
+ */
+ sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur,
+ kring->nr_hwtail);
+ if (kring->rtail != kring->nr_hwtail) {
+ /* Some more room available in the parent adapter. */
+ kring->rtail = kring->nr_hwtail;
+ more_txspace = true;
+ }
+
+ if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) {
+ sync_kloop_kring_dump("post txsync", kring);
+ }
+
+ /* Interrupt the application if needed. */
+#ifdef SYNC_KLOOP_POLL
+ if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) {
+ /* Disable application kick to avoid sending unnecessary kicks */
+ eventfd_signal(a->irq_ctx, 1);
+ more_txspace = false;
+ }
+#endif /* SYNC_KLOOP_POLL */
+
+ /* Read CSB to see if there is more work to do. */
+ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
+ if (shadow_ring.head == kring->rhead) {
+ /*
+ * No more packets to transmit. We enable notifications and
+ * go to sleep, waiting for a kick from the application when new
+ * new slots are ready for transmission.
+ */
+ /* Reenable notifications. */
+ csb_ktoa_kick_enable(csb_ktoa, 1);
+ /* Doublecheck. */
+ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
+ if (shadow_ring.head != kring->rhead) {
+ /* We won the race condition, there are more packets to
+ * transmit. Disable notifications and do another cycle */
+ csb_ktoa_kick_enable(csb_ktoa, 0);
+ continue;
+ }
+ break;
+ }
+
+ if (nm_kr_txempty(kring)) {
+ /* No more available TX slots. We stop waiting for a notification
+ * from the backend (netmap_tx_irq). */
+ nm_prdis(1, "TX ring");
+ break;
+ }
+ }
+
+#ifdef SYNC_KLOOP_POLL
+ if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) {
+ eventfd_signal(a->irq_ctx, 1);
+ }
+#endif /* SYNC_KLOOP_POLL */
+}
+
+/* RX cycle without receive any packets */
+#define SYNC_LOOP_RX_DRY_CYCLES_MAX 2
+
+static inline int
+sync_kloop_norxslots(struct netmap_kring *kring, uint32_t g_head)
+{
+ return (NM_ACCESS_ONCE(kring->nr_hwtail) == nm_prev(g_head,
+ kring->nkr_num_slots - 1));
+}
+
+static void
+netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a)
+{
+
+ struct netmap_kring *kring = a->kring;
+ struct nm_csb_atok *csb_atok = a->csb_atok;
+ struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa;
+ struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */
+ int dry_cycles = 0;
+ bool some_recvd = false;
+ uint32_t num_slots;
+
+ num_slots = kring->nkr_num_slots;
+
+ /* Get RX csb_atok and csb_ktoa pointers from the CSB. */
+ num_slots = kring->nkr_num_slots;
+
+ /* Disable notifications. */
+ csb_ktoa_kick_enable(csb_ktoa, 0);
+ /* Copy the application kring pointers from the CSB */
+ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
+
+ for (;;) {
+ uint32_t hwtail;
+
+ /* Netmap prologue */
+ shadow_ring.tail = kring->rtail;
+ if (unlikely(nm_rxsync_prologue(kring, &shadow_ring) >= num_slots)) {
+ /* Reinit ring and enable notifications. */
+ netmap_ring_reinit(kring);
+ csb_ktoa_kick_enable(csb_ktoa, 1);
+ break;
+ }
+
+ if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) {
+ sync_kloop_kring_dump("pre rxsync", kring);
+ }
+
+ if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) {
+ /* Reenable notifications. */
+ csb_ktoa_kick_enable(csb_ktoa, 1);
+ nm_prerr("rxsync() failed");
+ break;
+ }
+
+ /*
+ * Finalize
+ * Copy kernel hwcur and hwtail into the CSB for the application sync()
+ */
+ hwtail = NM_ACCESS_ONCE(kring->nr_hwtail);
+ sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur, hwtail);
+ if (kring->rtail != hwtail) {
+ kring->rtail = hwtail;
+ some_recvd = true;
+ dry_cycles = 0;
+ } else {
+ dry_cycles++;
+ }
+
+ if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) {
+ sync_kloop_kring_dump("post rxsync", kring);
+ }
+
+#ifdef SYNC_KLOOP_POLL
+ /* Interrupt the application if needed. */
+ if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) {
+ /* Disable application kick to avoid sending unnecessary kicks */
+ eventfd_signal(a->irq_ctx, 1);
+ some_recvd = false;
+ }
+#endif /* SYNC_KLOOP_POLL */
+
+ /* Read CSB to see if there is more work to do. */
+ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
+ if (sync_kloop_norxslots(kring, shadow_ring.head)) {
+ /*
+ * No more slots available for reception. We enable notification and
+ * go to sleep, waiting for a kick from the application when new receive
+ * slots are available.
+ */
+ /* Reenable notifications. */
+ csb_ktoa_kick_enable(csb_ktoa, 1);
+ /* Doublecheck. */
+ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots);
+ if (!sync_kloop_norxslots(kring, shadow_ring.head)) {
+ /* We won the race condition, more slots are available. Disable
+ * notifications and do another cycle. */
+ csb_ktoa_kick_enable(csb_ktoa, 0);
+ continue;
+ }
+ break;
+ }
+
+ hwtail = NM_ACCESS_ONCE(kring->nr_hwtail);
+ if (unlikely(hwtail == kring->rhead ||
+ dry_cycles >= SYNC_LOOP_RX_DRY_CYCLES_MAX)) {
+ /* No more packets to be read from the backend. We stop and
+ * wait for a notification from the backend (netmap_rx_irq). */
+ nm_prdis(1, "nr_hwtail: %d rhead: %d dry_cycles: %d",
+ hwtail, kring->rhead, dry_cycles);
+ break;
+ }
+ }
+
+ nm_kr_put(kring);
+
+#ifdef SYNC_KLOOP_POLL
+ /* Interrupt the application if needed. */
+ if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) {
+ eventfd_signal(a->irq_ctx, 1);
+ }
+#endif /* SYNC_KLOOP_POLL */
+}
+
+#ifdef SYNC_KLOOP_POLL
+struct sync_kloop_poll_entry {
+ /* Support for receiving notifications from
+ * a netmap ring or from the application. */
+ struct file *filp;
+ wait_queue_t wait;
+ wait_queue_head_t *wqh;
+
+ /* Support for sending notifications to the application. */
+ struct eventfd_ctx *irq_ctx;
+ struct file *irq_filp;
+};
+
+struct sync_kloop_poll_ctx {
+ poll_table wait_table;
+ unsigned int next_entry;
+ unsigned int num_entries;
+ struct sync_kloop_poll_entry entries[0];
+};
+
+static void
+sync_kloop_poll_table_queue_proc(struct file *file, wait_queue_head_t *wqh,
+ poll_table *pt)
+{
+ struct sync_kloop_poll_ctx *poll_ctx =
+ container_of(pt, struct sync_kloop_poll_ctx, wait_table);
+ struct sync_kloop_poll_entry *entry = poll_ctx->entries +
+ poll_ctx->next_entry;
+
+ BUG_ON(poll_ctx->next_entry >= poll_ctx->num_entries);
+ entry->wqh = wqh;
+ entry->filp = file;
+ /* Use the default wake up function. */
+ init_waitqueue_entry(&entry->wait, current);
+ add_wait_queue(wqh, &entry->wait);
+ poll_ctx->next_entry++;
+}
+#endif /* SYNC_KLOOP_POLL */
+
+int
+netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr)
+{
+ struct nmreq_sync_kloop_start *req =
+ (struct nmreq_sync_kloop_start *)(uintptr_t)hdr->nr_body;
+ struct nmreq_opt_sync_kloop_eventfds *eventfds_opt = NULL;
+#ifdef SYNC_KLOOP_POLL
+ struct sync_kloop_poll_ctx *poll_ctx = NULL;
+#endif /* SYNC_KLOOP_POLL */
+ int num_rx_rings, num_tx_rings, num_rings;
+ uint32_t sleep_us = req->sleep_us;
+ struct nm_csb_atok* csb_atok_base;
+ struct nm_csb_ktoa* csb_ktoa_base;
+ struct netmap_adapter *na;
+ struct nmreq_option *opt;
+ int err = 0;
+ int i;
+
+ if (sleep_us > 1000000) {
+ /* We do not accept sleeping for more than a second. */
+ return EINVAL;
+ }
+
+ if (priv->np_nifp == NULL) {
+ return ENXIO;
+ }
+ mb(); /* make sure following reads are not from cache */
+
+ na = priv->np_na;
+ if (!nm_netmap_on(na)) {
+ return ENXIO;
+ }
+
+ NMG_LOCK();
+ /* Make sure the application is working in CSB mode. */
+ if (!priv->np_csb_atok_base || !priv->np_csb_ktoa_base) {
+ NMG_UNLOCK();
+ nm_prerr("sync-kloop on %s requires "
+ "NETMAP_REQ_OPT_CSB option", na->name);
+ return EINVAL;
+ }
+
+ csb_atok_base = priv->np_csb_atok_base;
+ csb_ktoa_base = priv->np_csb_ktoa_base;
+
+ /* Make sure that no kloop is currently running. */
+ if (priv->np_kloop_state & NM_SYNC_KLOOP_RUNNING) {
+ err = EBUSY;
+ }
+ priv->np_kloop_state |= NM_SYNC_KLOOP_RUNNING;
+ NMG_UNLOCK();
+ if (err) {
+ return err;
+ }
+
+ num_rx_rings = priv->np_qlast[NR_RX] - priv->np_qfirst[NR_RX];
+ num_tx_rings = priv->np_qlast[NR_TX] - priv->np_qfirst[NR_TX];
+ num_rings = num_tx_rings + num_rx_rings;
+
+ /* Validate notification options. */
+ opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options,
+ NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS);
+ if (opt != NULL) {
+ err = nmreq_checkduplicate(opt);
+ if (err) {
+ opt->nro_status = err;
+ goto out;
+ }
+ if (opt->nro_size != sizeof(*eventfds_opt) +
+ sizeof(eventfds_opt->eventfds[0]) * num_rings) {
+ /* Option size not consistent with the number of
+ * entries. */
+ opt->nro_status = err = EINVAL;
+ goto out;
+ }
+#ifdef SYNC_KLOOP_POLL
+ eventfds_opt = (struct nmreq_opt_sync_kloop_eventfds *)opt;
+ opt->nro_status = 0;
+ /* We need 2 poll entries for TX and RX notifications coming
+ * from the netmap adapter, plus one entries per ring for the
+ * notifications coming from the application. */
+ poll_ctx = nm_os_malloc(sizeof(*poll_ctx) +
+ (2 + num_rings) * sizeof(poll_ctx->entries[0]));
+ init_poll_funcptr(&poll_ctx->wait_table,
+ sync_kloop_poll_table_queue_proc);
+ poll_ctx->num_entries = 2 + num_rings;
+ poll_ctx->next_entry = 0;
+ /* Poll for notifications coming from the applications through
+ * eventfds . */
+ for (i = 0; i < num_rings; i++) {
+ struct eventfd_ctx *irq;
+ struct file *filp;
+ unsigned long mask;
+
+ filp = eventfd_fget(eventfds_opt->eventfds[i].ioeventfd);
+ if (IS_ERR(filp)) {
+ err = PTR_ERR(filp);
+ goto out;
+ }
+ mask = filp->f_op->poll(filp, &poll_ctx->wait_table);
+ if (mask & POLLERR) {
+ err = EINVAL;
+ goto out;
+ }
+
+ filp = eventfd_fget(eventfds_opt->eventfds[i].irqfd);
+ if (IS_ERR(filp)) {
+ err = PTR_ERR(filp);
+ goto out;
+ }
+ poll_ctx->entries[i].irq_filp = filp;
+ irq = eventfd_ctx_fileget(filp);
+ if (IS_ERR(irq)) {
+ err = PTR_ERR(irq);
+ goto out;
+ }
+ poll_ctx->entries[i].irq_ctx = irq;
+ }
+ /* Poll for notifications coming from the netmap rings bound to
+ * this file descriptor. */
+ {
+ NM_SELINFO_T *si[NR_TXRX];
+
+ NMG_LOCK();
+ si[NR_RX] = nm_si_user(priv, NR_RX) ? &na->si[NR_RX] :
+ &na->rx_rings[priv->np_qfirst[NR_RX]]->si;
+ si[NR_TX] = nm_si_user(priv, NR_TX) ? &na->si[NR_TX] :
+ &na->tx_rings[priv->np_qfirst[NR_TX]]->si;
+ NMG_UNLOCK();
+ poll_wait(priv->np_filp, si[NR_RX], &poll_ctx->wait_table);
+ poll_wait(priv->np_filp, si[NR_TX], &poll_ctx->wait_table);
+ }
+#else /* SYNC_KLOOP_POLL */
+ opt->nro_status = EOPNOTSUPP;
+ goto out;
+#endif /* SYNC_KLOOP_POLL */
+ }
+
+ /* Main loop. */
+ for (;;) {
+ if (unlikely(NM_ACCESS_ONCE(priv->np_kloop_state) & NM_SYNC_KLOOP_STOPPING)) {
+ break;
+ }
+
+#ifdef SYNC_KLOOP_POLL
+ if (poll_ctx)
+ __set_current_state(TASK_INTERRUPTIBLE);
+#endif /* SYNC_KLOOP_POLL */
+
+ /* Process all the TX rings bound to this file descriptor. */
+ for (i = 0; i < num_tx_rings; i++) {
+ struct sync_kloop_ring_args a = {
+ .kring = NMR(na, NR_TX)[i + priv->np_qfirst[NR_TX]],
+ .csb_atok = csb_atok_base + i,
+ .csb_ktoa = csb_ktoa_base + i,
+ };
+
+#ifdef SYNC_KLOOP_POLL
+ if (poll_ctx)
+ a.irq_ctx = poll_ctx->entries[i].irq_ctx;
+#endif /* SYNC_KLOOP_POLL */
+ if (unlikely(nm_kr_tryget(a.kring, 1, NULL))) {
+ continue;
+ }
+ netmap_sync_kloop_tx_ring(&a);
+ nm_kr_put(a.kring);
+ }
+
+ /* Process all the RX rings bound to this file descriptor. */
+ for (i = 0; i < num_rx_rings; i++) {
+ struct sync_kloop_ring_args a = {
+ .kring = NMR(na, NR_RX)[i + priv->np_qfirst[NR_RX]],
+ .csb_atok = csb_atok_base + num_tx_rings + i,
+ .csb_ktoa = csb_ktoa_base + num_tx_rings + i,
+ };
+
+#ifdef SYNC_KLOOP_POLL
+ if (poll_ctx)
+ a.irq_ctx = poll_ctx->entries[num_tx_rings + i].irq_ctx;
+#endif /* SYNC_KLOOP_POLL */
+
+ if (unlikely(nm_kr_tryget(a.kring, 1, NULL))) {
+ continue;
+ }
+ netmap_sync_kloop_rx_ring(&a);
+ nm_kr_put(a.kring);
+ }
+
+#ifdef SYNC_KLOOP_POLL
+ if (poll_ctx) {
+ /* If a poll context is present, yield to the scheduler
+ * waiting for a notification to come either from
+ * netmap or the application. */
+ schedule_timeout_interruptible(msecs_to_jiffies(1000));
+ } else
+#endif /* SYNC_KLOOP_POLL */
+ {
+ /* Default synchronization method: sleep for a while. */
+ usleep_range(sleep_us, sleep_us);
+ }
+ }
+out:
+#ifdef SYNC_KLOOP_POLL
+ if (poll_ctx) {
+ /* Stop polling from netmap and the eventfds, and deallocate
+ * the poll context. */
+ __set_current_state(TASK_RUNNING);
+ for (i = 0; i < poll_ctx->next_entry; i++) {
+ struct sync_kloop_poll_entry *entry =
+ poll_ctx->entries + i;
+
+ if (entry->wqh)
+ remove_wait_queue(entry->wqh, &entry->wait);
+ /* We did not get a reference to the eventfds, but
+ * don't do that on netmap file descriptors (since
+ * a reference was not taken. */
+ if (entry->filp && entry->filp != priv->np_filp)
+ fput(entry->filp);
+ if (entry->irq_ctx)
+ eventfd_ctx_put(entry->irq_ctx);
+ if (entry->irq_filp)
+ fput(entry->irq_filp);
+ }
+ nm_os_free(poll_ctx);
+ poll_ctx = NULL;
+ }
+#endif /* SYNC_KLOOP_POLL */
+
+ /* Reset the kloop state. */
+ NMG_LOCK();
+ priv->np_kloop_state = 0;
+ NMG_UNLOCK();
+
+ return err;
+}
+
+int
+netmap_sync_kloop_stop(struct netmap_priv_d *priv)
+{
+ bool running = true;
+ int err = 0;
+
+ NMG_LOCK();
+ priv->np_kloop_state |= NM_SYNC_KLOOP_STOPPING;
+ NMG_UNLOCK();
+ while (running) {
+ usleep_range(1000, 1500);
+ NMG_LOCK();
+ running = (NM_ACCESS_ONCE(priv->np_kloop_state)
+ & NM_SYNC_KLOOP_RUNNING);
+ NMG_UNLOCK();
+ }
+
+ return err;
+}
+
+#ifdef WITH_PTNETMAP
+/*
+ * Guest ptnetmap txsync()/rxsync() routines, used in ptnet device drivers.
+ * These routines are reused across the different operating systems supported
+ * by netmap.
+ */
+
+/*
+ * Reconcile host and guest views of the transmit ring.
+ *
+ * Guest user wants to transmit packets up to the one before ring->head,
+ * and guest kernel knows tx_ring->hwcur is the first packet unsent
+ * by the host kernel.
+ *
+ * We push out as many packets as possible, and possibly
+ * reclaim buffers from previously completed transmission.
+ *
+ * Notifications from the host are enabled only if the user guest would
+ * block (no space in the ring).
+ */
+bool
+netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa,
+ struct netmap_kring *kring, int flags)
+{
+ bool notify = false;
+
+ /* Disable notifications */
+ atok->appl_need_kick = 0;
+
+ /*
+ * First part: tell the host (updating the CSB) to process the new
+ * packets.
+ */
+ kring->nr_hwcur = ktoa->hwcur;
+ ptnetmap_guest_write_kring_csb(atok, kring->rcur, kring->rhead);
+
+ /* Ask for a kick from a guest to the host if needed. */
+ if (((kring->rhead != kring->nr_hwcur || nm_kr_txempty(kring))
+ && NM_ACCESS_ONCE(ktoa->kern_need_kick)) ||
+ (flags & NAF_FORCE_RECLAIM)) {
+ atok->sync_flags = flags;
+ notify = true;
+ }
+
+ /*
+ * Second part: reclaim buffers for completed transmissions.
+ */
+ if (nm_kr_txempty(kring) || (flags & NAF_FORCE_RECLAIM)) {
+ ptnetmap_guest_read_kring_csb(ktoa, kring);
+ }
+
+ /*
+ * No more room in the ring for new transmissions. The user thread will
+ * go to sleep and we need to be notified by the host when more free
+ * space is available.
+ */
+ if (nm_kr_txempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
+ /* Reenable notifications. */
+ atok->appl_need_kick = 1;
+ /* Double check */
+ ptnetmap_guest_read_kring_csb(ktoa, kring);
+ /* If there is new free space, disable notifications */
+ if (unlikely(!nm_kr_txempty(kring))) {
+ atok->appl_need_kick = 0;
+ }
+ }
+
+ nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
+ kring->name, atok->head, atok->cur, ktoa->hwtail,
+ kring->rhead, kring->rcur, kring->nr_hwtail);
+
+ return notify;
+}
+
+/*
+ * Reconcile host and guest view of the receive ring.
+ *
+ * Update hwcur/hwtail from host (reading from CSB).
+ *
+ * If guest user has released buffers up to the one before ring->head, we
+ * also give them to the host.
+ *
+ * Notifications from the host are enabled only if the user guest would
+ * block (no more completed slots in the ring).
+ */
+bool
+netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa,
+ struct netmap_kring *kring, int flags)
+{
+ bool notify = false;
+
+ /* Disable notifications */
+ atok->appl_need_kick = 0;
+
+ /*
+ * First part: import newly received packets, by updating the kring
+ * hwtail to the hwtail known from the host (read from the CSB).
+ * This also updates the kring hwcur.
+ */
+ ptnetmap_guest_read_kring_csb(ktoa, kring);
+ kring->nr_kflags &= ~NKR_PENDINTR;
+
+ /*
+ * Second part: tell the host about the slots that guest user has
+ * released, by updating cur and head in the CSB.
+ */
+ if (kring->rhead != kring->nr_hwcur) {
+ ptnetmap_guest_write_kring_csb(atok, kring->rcur,
+ kring->rhead);
+ /* Ask for a kick from the guest to the host if needed. */
+ if (NM_ACCESS_ONCE(ktoa->kern_need_kick)) {
+ atok->sync_flags = flags;
+ notify = true;
+ }
+ }
+
+ /*
+ * No more completed RX slots. The user thread will go to sleep and
+ * we need to be notified by the host when more RX slots have been
+ * completed.
+ */
+ if (nm_kr_rxempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) {
+ /* Reenable notifications. */
+ atok->appl_need_kick = 1;
+ /* Double check */
+ ptnetmap_guest_read_kring_csb(ktoa, kring);
+ /* If there are new slots, disable notifications. */
+ if (!nm_kr_rxempty(kring)) {
+ atok->appl_need_kick = 0;
+ }
+ }
+
+ nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
+ kring->name, atok->head, atok->cur, ktoa->hwtail,
+ kring->rhead, kring->rcur, kring->nr_hwtail);
+
+ return notify;
+}
+
+/*
+ * Callbacks for ptnet drivers: nm_krings_create, nm_krings_delete, nm_dtor.
+ */
+int
+ptnet_nm_krings_create(struct netmap_adapter *na)
+{
+ struct netmap_pt_guest_adapter *ptna =
+ (struct netmap_pt_guest_adapter *)na; /* Upcast. */
+ struct netmap_adapter *na_nm = &ptna->hwup.up;
+ struct netmap_adapter *na_dr = &ptna->dr.up;
+ int ret;
+
+ if (ptna->backend_users) {
+ return 0;
+ }
+
+ /* Create krings on the public netmap adapter. */
+ ret = netmap_hw_krings_create(na_nm);
+ if (ret) {
+ return ret;
+ }
+
+ /* Copy krings into the netmap adapter private to the driver. */
+ na_dr->tx_rings = na_nm->tx_rings;
+ na_dr->rx_rings = na_nm->rx_rings;
+
+ return 0;
+}
+
+void
+ptnet_nm_krings_delete(struct netmap_adapter *na)
+{
+ struct netmap_pt_guest_adapter *ptna =
+ (struct netmap_pt_guest_adapter *)na; /* Upcast. */
+ struct netmap_adapter *na_nm = &ptna->hwup.up;
+ struct netmap_adapter *na_dr = &ptna->dr.up;
+
+ if (ptna->backend_users) {
+ return;
+ }
+
+ na_dr->tx_rings = NULL;
+ na_dr->rx_rings = NULL;
+
+ netmap_hw_krings_delete(na_nm);
+}
+
+void
+ptnet_nm_dtor(struct netmap_adapter *na)
+{
+ struct netmap_pt_guest_adapter *ptna =
+ (struct netmap_pt_guest_adapter *)na;
+
+ netmap_mem_put(ptna->dr.up.nm_mem);
+ memset(&ptna->dr, 0, sizeof(ptna->dr));
+ netmap_mem_pt_guest_ifp_del(na->nm_mem, na->ifp);
+}
+
+int
+netmap_pt_guest_attach(struct netmap_adapter *arg,
+ unsigned int nifp_offset, unsigned int memid)
+{
+ struct netmap_pt_guest_adapter *ptna;
+ struct ifnet *ifp = arg ? arg->ifp : NULL;
+ int error;
+
+ /* get allocator */
+ arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, memid);
+ if (arg->nm_mem == NULL)
+ return ENOMEM;
+ arg->na_flags |= NAF_MEM_OWNER;
+ error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter), 1);
+ if (error)
+ return error;
+
+ /* get the netmap_pt_guest_adapter */
+ ptna = (struct netmap_pt_guest_adapter *) NA(ifp);
+
+ /* Initialize a separate pass-through netmap adapter that is going to
+ * be used by the ptnet driver only, and so never exposed to netmap
+ * applications. We only need a subset of the available fields. */
+ memset(&ptna->dr, 0, sizeof(ptna->dr));
+ ptna->dr.up.ifp = ifp;
+ ptna->dr.up.nm_mem = netmap_mem_get(ptna->hwup.up.nm_mem);
+ ptna->dr.up.nm_config = ptna->hwup.up.nm_config;
+
+ ptna->backend_users = 0;
+
+ return 0;
+}
+
+#endif /* WITH_PTNETMAP */
Index: head/sys/dev/netmap/netmap_legacy.c
===================================================================
--- head/sys/dev/netmap/netmap_legacy.c
+++ head/sys/dev/netmap/netmap_legacy.c
@@ -56,6 +56,7 @@
*/
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_bdg.h>
static int
nmreq_register_from_legacy(struct nmreq *nmr, struct nmreq_header *hdr,
@@ -80,10 +81,11 @@
} else {
regmode = NR_REG_ALL_NIC;
}
- nmr->nr_flags = regmode |
- (nmr->nr_flags & (~NR_REG_MASK));
+ req->nr_mode = regmode;
+ } else {
+ req->nr_mode = nmr->nr_flags & NR_REG_MASK;
}
- req->nr_mode = nmr->nr_flags & NR_REG_MASK;
+
/* Fix nr_name, nr_mode and nr_ringid to handle pipe requests. */
if (req->nr_mode == NR_REG_PIPE_MASTER ||
req->nr_mode == NR_REG_PIPE_SLAVE) {
@@ -131,7 +133,7 @@
/* First prepare the request header. */
hdr->nr_version = NETMAP_API; /* new API */
- strncpy(hdr->nr_name, nmr->nr_name, sizeof(nmr->nr_name));
+ strlcpy(hdr->nr_name, nmr->nr_name, sizeof(nmr->nr_name));
hdr->nr_options = (uintptr_t)NULL;
hdr->nr_body = (uintptr_t)NULL;
@@ -221,7 +223,7 @@
}
case NETMAP_PT_HOST_CREATE:
case NETMAP_PT_HOST_DELETE: {
- D("Netmap passthrough not supported yet");
+ nm_prerr("Netmap passthrough not supported yet");
return NULL;
break;
}
@@ -242,7 +244,6 @@
if (!req) { goto oom; }
hdr->nr_body = (uintptr_t)req;
hdr->nr_reqtype = NETMAP_REQ_PORT_INFO_GET;
- req->nr_offset = nmr->nr_offset;
req->nr_memsize = nmr->nr_memsize;
req->nr_tx_slots = nmr->nr_tx_slots;
req->nr_rx_slots = nmr->nr_rx_slots;
@@ -262,7 +263,7 @@
}
nm_os_free(hdr);
}
- D("Failed to allocate memory for nmreq_xyz struct");
+ nm_prerr("Failed to allocate memory for nmreq_xyz struct");
return NULL;
}
@@ -300,7 +301,6 @@
case NETMAP_REQ_PORT_INFO_GET: {
struct nmreq_port_info_get *req =
(struct nmreq_port_info_get *)(uintptr_t)hdr->nr_body;
- nmr->nr_offset = req->nr_offset;
nmr->nr_memsize = req->nr_memsize;
nmr->nr_tx_slots = req->nr_tx_slots;
nmr->nr_rx_slots = req->nr_rx_slots;
@@ -321,7 +321,7 @@
case NETMAP_REQ_VALE_LIST: {
struct nmreq_vale_list *req =
(struct nmreq_vale_list *)(uintptr_t)hdr->nr_body;
- strncpy(nmr->nr_name, hdr->nr_name, sizeof(nmr->nr_name));
+ strlcpy(nmr->nr_name, hdr->nr_name, sizeof(nmr->nr_name));
nmr->nr_arg1 = req->nr_bridge_idx;
nmr->nr_arg2 = req->nr_port_idx;
break;
Index: head/sys/dev/netmap/netmap_mem2.h
===================================================================
--- head/sys/dev/netmap/netmap_mem2.h
+++ head/sys/dev/netmap/netmap_mem2.h
@@ -158,14 +158,14 @@
({ int *perr = _perr; if (perr) *(perr) = EOPNOTSUPP; NULL; })
#endif /* WITH_EXTMEM */
-#ifdef WITH_PTNETMAP_GUEST
+#ifdef WITH_PTNETMAP
struct netmap_mem_d* netmap_mem_pt_guest_new(struct ifnet *,
unsigned int nifp_offset,
unsigned int memid);
struct ptnetmap_memdev;
struct netmap_mem_d* netmap_mem_pt_guest_attach(struct ptnetmap_memdev *, uint16_t);
int netmap_mem_pt_guest_ifp_del(struct netmap_mem_d *, struct ifnet *);
-#endif /* WITH_PTNETMAP_GUEST */
+#endif /* WITH_PTNETMAP */
int netmap_mem_pools_info_get(struct nmreq_pools_info *,
struct netmap_mem_d *);
Index: head/sys/dev/netmap/netmap_mem2.c
===================================================================
--- head/sys/dev/netmap/netmap_mem2.c
+++ head/sys/dev/netmap/netmap_mem2.c
@@ -318,7 +318,7 @@
#ifdef NM_DEBUG_MEM_PUTGET
#define NM_DBG_REFC(nmd, func, line) \
- nm_prinf("%s:%d mem[%d] -> %d\n", func, line, (nmd)->nm_id, (nmd)->refcount);
+ nm_prinf("%d mem[%d] -> %d", line, (nmd)->nm_id, (nmd)->refcount);
#else
#define NM_DBG_REFC(nmd, func, line)
#endif
@@ -397,15 +397,15 @@
if (p->bitmap == NULL) {
/* Allocate the bitmap */
n = (p->objtotal + 31) / 32;
- p->bitmap = nm_os_malloc(sizeof(uint32_t) * n);
+ p->bitmap = nm_os_malloc(sizeof(p->bitmap[0]) * n);
if (p->bitmap == NULL) {
- D("Unable to create bitmap (%d entries) for allocator '%s'", (int)n,
+ nm_prerr("Unable to create bitmap (%d entries) for allocator '%s'", (int)n,
p->name);
return ENOMEM;
}
p->bitmap_slots = n;
} else {
- memset(p->bitmap, 0, p->bitmap_slots);
+ memset(p->bitmap, 0, p->bitmap_slots * sizeof(p->bitmap[0]));
}
p->objfree = 0;
@@ -416,16 +416,21 @@
*/
for (j = 0; j < p->objtotal; j++) {
if (p->invalid_bitmap && nm_isset(p->invalid_bitmap, j)) {
- D("skipping %s %d", p->name, j);
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("skipping %s %d", p->name, j);
continue;
}
p->bitmap[ (j>>5) ] |= ( 1U << (j & 31U) );
p->objfree++;
}
- ND("%s free %u", p->name, p->objfree);
- if (p->objfree == 0)
+ if (netmap_verbose)
+ nm_prinf("%s free %u", p->name, p->objfree);
+ if (p->objfree == 0) {
+ if (netmap_verbose)
+ nm_prerr("%s: no objects available", p->name);
return ENOMEM;
+ }
return 0;
}
@@ -447,6 +452,7 @@
* buffers 0 and 1 are reserved
*/
if (nmd->pools[NETMAP_BUF_POOL].objfree < 2) {
+ nm_prerr("%s: not enough buffers", nmd->pools[NETMAP_BUF_POOL].name);
return ENOMEM;
}
@@ -480,8 +486,10 @@
nmd->ops->nmd_deref(nmd);
nmd->active--;
- if (!nmd->active)
+ if (last_user) {
nmd->nm_grp = -1;
+ nmd->lasterr = 0;
+ }
NMA_UNLOCK(nmd);
return last_user;
@@ -720,16 +728,20 @@
{
int err = 0, id;
id = nm_iommu_group_id(dev);
- if (netmap_verbose)
- D("iommu_group %d", id);
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("iommu_group %d", id);
NMA_LOCK(nmd);
if (nmd->nm_grp < 0)
nmd->nm_grp = id;
- if (nmd->nm_grp != id)
+ if (nmd->nm_grp != id) {
+ if (netmap_verbose)
+ nm_prerr("iommu group mismatch: %u vs %u",
+ nmd->nm_grp, id);
nmd->lasterr = err = ENOMEM;
+ }
NMA_UNLOCK(nmd);
return err;
@@ -805,7 +817,7 @@
return pa;
}
/* this is only in case of errors */
- D("invalid ofs 0x%x out of 0x%x 0x%x 0x%x", (u_int)o,
+ nm_prerr("invalid ofs 0x%x out of 0x%x 0x%x 0x%x", (u_int)o,
p[NETMAP_IF_POOL].memtotal,
p[NETMAP_IF_POOL].memtotal
+ p[NETMAP_RING_POOL].memtotal,
@@ -854,13 +866,13 @@
int i, j;
if (netmap_mem_get_info(nmd, &memsize, &memflags, NULL)) {
- D("memory not finalised yet");
+ nm_prerr("memory not finalised yet");
return NULL;
}
mainMdl = IoAllocateMdl(NULL, memsize, FALSE, FALSE, NULL);
if (mainMdl == NULL) {
- D("failed to allocate mdl");
+ nm_prerr("failed to allocate mdl");
return NULL;
}
@@ -876,7 +888,7 @@
tempMdl = IoAllocateMdl(p->lut[0].vaddr, clsz, FALSE, FALSE, NULL);
if (tempMdl == NULL) {
NMA_UNLOCK(nmd);
- D("fail to allocate tempMdl");
+ nm_prerr("fail to allocate tempMdl");
IoFreeMdl(mainMdl);
return NULL;
}
@@ -971,7 +983,7 @@
p->name, ofs, i, vaddr);
return ofs;
}
- D("address %p is not contained inside any cluster (%s)",
+ nm_prerr("address %p is not contained inside any cluster (%s)",
vaddr, p->name);
return 0; /* An error occurred */
}
@@ -1002,12 +1014,12 @@
void *vaddr = NULL;
if (len > p->_objsize) {
- D("%s request size %d too large", p->name, len);
+ nm_prerr("%s request size %d too large", p->name, len);
return NULL;
}
if (p->objfree == 0) {
- D("no more %s objects", p->name);
+ nm_prerr("no more %s objects", p->name);
return NULL;
}
if (start)
@@ -1049,13 +1061,13 @@
uint32_t *ptr, mask;
if (j >= p->objtotal) {
- D("invalid index %u, max %u", j, p->objtotal);
+ nm_prerr("invalid index %u, max %u", j, p->objtotal);
return 1;
}
ptr = &p->bitmap[j / 32];
mask = (1 << (j % 32));
if (*ptr & mask) {
- D("ouch, double free on buffer %d", j);
+ nm_prerr("ouch, double free on buffer %d", j);
return 1;
} else {
*ptr |= mask;
@@ -1086,7 +1098,7 @@
netmap_obj_free(p, j);
return;
}
- D("address %p is not contained inside any cluster (%s)",
+ nm_prerr("address %p is not contained inside any cluster (%s)",
vaddr, p->name);
}
@@ -1127,7 +1139,7 @@
uint32_t cur = *head; /* save current head */
uint32_t *p = netmap_buf_malloc(nmd, &pos, head);
if (p == NULL) {
- D("no more buffers after %d of %d", i, n);
+ nm_prerr("no more buffers after %d of %d", i, n);
*head = cur; /* restore */
break;
}
@@ -1158,9 +1170,9 @@
break;
}
if (head != 0)
- D("breaking with head %d", head);
- if (netmap_verbose)
- D("freed %d buffers", i);
+ nm_prerr("breaking with head %d", head);
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("freed %d buffers", i);
}
@@ -1176,7 +1188,7 @@
for (i = 0; i < n; i++) {
void *vaddr = netmap_buf_malloc(nmd, &pos, &index);
if (vaddr == NULL) {
- D("no more buffers after %d of %d", i, n);
+ nm_prerr("no more buffers after %d of %d", i, n);
goto cleanup;
}
slot[i].buf_idx = index;
@@ -1217,7 +1229,7 @@
struct netmap_obj_pool *p = &nmd->pools[NETMAP_BUF_POOL];
if (i < 2 || i >= p->objtotal) {
- D("Cannot free buf#%d: should be in [2, %d[", i, p->objtotal);
+ nm_prerr("Cannot free buf#%d: should be in [2, %d[", i, p->objtotal);
return;
}
netmap_obj_free(p, i);
@@ -1317,22 +1329,22 @@
#define LINE_ROUND NM_CACHE_ALIGN // 64
if (objsize >= MAX_CLUSTSIZE) {
/* we could do it but there is no point */
- D("unsupported allocation for %d bytes", objsize);
+ nm_prerr("unsupported allocation for %d bytes", objsize);
return EINVAL;
}
/* make sure objsize is a multiple of LINE_ROUND */
i = (objsize & (LINE_ROUND - 1));
if (i) {
- D("XXX aligning object by %d bytes", LINE_ROUND - i);
+ nm_prinf("aligning object by %d bytes", LINE_ROUND - i);
objsize += LINE_ROUND - i;
}
if (objsize < p->objminsize || objsize > p->objmaxsize) {
- D("requested objsize %d out of range [%d, %d]",
+ nm_prerr("requested objsize %d out of range [%d, %d]",
objsize, p->objminsize, p->objmaxsize);
return EINVAL;
}
if (objtotal < p->nummin || objtotal > p->nummax) {
- D("requested objtotal %d out of range [%d, %d]",
+ nm_prerr("requested objtotal %d out of range [%d, %d]",
objtotal, p->nummin, p->nummax);
return EINVAL;
}
@@ -1354,13 +1366,13 @@
}
/* exact solution not found */
if (clustentries == 0) {
- D("unsupported allocation for %d bytes", objsize);
+ nm_prerr("unsupported allocation for %d bytes", objsize);
return EINVAL;
}
/* compute clustsize */
clustsize = clustentries * objsize;
- if (netmap_verbose)
- D("objsize %d clustsize %d objects %d",
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("objsize %d clustsize %d objects %d",
objsize, clustsize, clustentries);
/*
@@ -1403,7 +1415,7 @@
p->lut = nm_alloc_lut(p->objtotal);
if (p->lut == NULL) {
- D("Unable to create lookup table for '%s'", p->name);
+ nm_prerr("Unable to create lookup table for '%s'", p->name);
goto clean;
}
@@ -1430,7 +1442,7 @@
* If we get here, there is a severe memory shortage,
* so halve the allocated memory to reclaim some.
*/
- D("Unable to create cluster at %d for '%s' allocator",
+ nm_prerr("Unable to create cluster at %d for '%s' allocator",
i, p->name);
if (i < 2) /* nothing to halve */
goto out;
@@ -1466,7 +1478,7 @@
}
p->memtotal = p->numclusters * p->_clustsize;
if (netmap_verbose)
- D("Pre-allocated %d clusters (%d/%dKB) for '%s'",
+ nm_prinf("Pre-allocated %d clusters (%d/%dKB) for '%s'",
p->numclusters, p->_clustsize >> 10,
p->memtotal >> 10, p->name);
@@ -1498,8 +1510,8 @@
{
int i;
- if (netmap_verbose)
- D("resetting %p", nmd);
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("resetting %p", nmd);
for (i = 0; i < NETMAP_POOLS_NR; i++) {
netmap_reset_obj_allocator(&nmd->pools[i]);
}
@@ -1525,7 +1537,7 @@
(void)i;
(void)lim;
(void)lut;
- D("unsupported on Windows");
+ nm_prerr("unsupported on Windows");
#else /* linux */
ND("unmapping and freeing plut for %s", na->name);
if (lut->plut == NULL)
@@ -1561,7 +1573,7 @@
(void)i;
(void)lim;
(void)lut;
- D("unsupported on Windows");
+ nm_prerr("unsupported on Windows");
#else /* linux */
if (lut->plut != NULL) {
@@ -1572,7 +1584,7 @@
ND("allocating physical lut for %s", na->name);
lut->plut = nm_alloc_plut(lim);
if (lut->plut == NULL) {
- D("Failed to allocate physical lut for %s", na->name);
+ nm_prerr("Failed to allocate physical lut for %s", na->name);
return ENOMEM;
}
@@ -1589,7 +1601,7 @@
error = netmap_load_map(na, (bus_dma_tag_t) na->pdev, &lut->plut[i].paddr,
p->lut[i].vaddr, p->_clustsize);
if (error) {
- D("Failed to map cluster #%d from the %s pool", i, p->name);
+ nm_prerr("Failed to map cluster #%d from the %s pool", i, p->name);
break;
}
@@ -1627,13 +1639,13 @@
nmd->flags |= NETMAP_MEM_FINALIZED;
if (netmap_verbose)
- D("interfaces %d KB, rings %d KB, buffers %d MB",
+ nm_prinf("interfaces %d KB, rings %d KB, buffers %d MB",
nmd->pools[NETMAP_IF_POOL].memtotal >> 10,
nmd->pools[NETMAP_RING_POOL].memtotal >> 10,
nmd->pools[NETMAP_BUF_POOL].memtotal >> 20);
if (netmap_verbose)
- D("Free buffers: %d", nmd->pools[NETMAP_BUF_POOL].objfree);
+ nm_prinf("Free buffers: %d", nmd->pools[NETMAP_BUF_POOL].objfree);
return 0;
@@ -1740,7 +1752,7 @@
p[NETMAP_BUF_POOL].num = v;
if (netmap_verbose)
- D("req if %d*%d ring %d*%d buf %d*%d",
+ nm_prinf("req if %d*%d ring %d*%d buf %d*%d",
p[NETMAP_IF_POOL].num,
p[NETMAP_IF_POOL].size,
p[NETMAP_RING_POOL].num,
@@ -1850,13 +1862,13 @@
struct netmap_ring *ring = kring->ring;
if (ring == NULL || kring->users > 0 || (kring->nr_kflags & NKR_NEEDRING)) {
- if (netmap_verbose)
- D("NOT deleting ring %s (ring %p, users %d neekring %d)",
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("NOT deleting ring %s (ring %p, users %d neekring %d)",
kring->name, ring, kring->users, kring->nr_kflags & NKR_NEEDRING);
continue;
}
- if (netmap_verbose)
- D("deleting ring %s", kring->name);
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("deleting ring %s", kring->name);
if (!(kring->nr_kflags & NKR_FAKERING)) {
ND("freeing bufs for %s", kring->name);
netmap_free_bufs(na->nm_mem, ring->slot, kring->nkr_num_slots);
@@ -1891,19 +1903,19 @@
if (ring || (!kring->users && !(kring->nr_kflags & NKR_NEEDRING))) {
/* uneeded, or already created by somebody else */
- if (netmap_verbose)
- D("NOT creating ring %s (ring %p, users %d neekring %d)",
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("NOT creating ring %s (ring %p, users %d neekring %d)",
kring->name, ring, kring->users, kring->nr_kflags & NKR_NEEDRING);
continue;
}
- if (netmap_verbose)
- D("creating %s", kring->name);
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("creating %s", kring->name);
ndesc = kring->nkr_num_slots;
len = sizeof(struct netmap_ring) +
ndesc * sizeof(struct netmap_slot);
ring = netmap_ring_malloc(na->nm_mem, len);
if (ring == NULL) {
- D("Cannot allocate %s_ring", nm_txrx2str(t));
+ nm_prerr("Cannot allocate %s_ring", nm_txrx2str(t));
goto cleanup;
}
ND("txring at %p", ring);
@@ -1925,14 +1937,16 @@
ND("initializing slots for %s_ring", nm_txrx2str(t));
if (!(kring->nr_kflags & NKR_FAKERING)) {
/* this is a real ring */
- ND("allocating buffers for %s", kring->name);
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("allocating buffers for %s", kring->name);
if (netmap_new_bufs(na->nm_mem, ring->slot, ndesc)) {
- D("Cannot allocate buffers for %s_ring", nm_txrx2str(t));
+ nm_prerr("Cannot allocate buffers for %s_ring", nm_txrx2str(t));
goto cleanup;
}
} else {
/* this is a fake ring, set all indices to 0 */
- ND("NOT allocating buffers for %s", kring->name);
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("NOT allocating buffers for %s", kring->name);
netmap_mem_set_ring(na->nm_mem, ring->slot, ndesc, 0);
}
/* ring info */
@@ -1998,7 +2012,7 @@
/* initialize base fields -- override const */
*(u_int *)(uintptr_t)&nifp->ni_tx_rings = na->num_tx_rings;
*(u_int *)(uintptr_t)&nifp->ni_rx_rings = na->num_rx_rings;
- strncpy(nifp->ni_name, na->name, (size_t)IFNAMSIZ);
+ strlcpy(nifp->ni_name, na->name, sizeof(nifp->ni_name));
/*
* fill the slots for the rx and tx rings. They contain the offset
@@ -2049,8 +2063,8 @@
netmap_mem2_deref(struct netmap_mem_d *nmd)
{
- if (netmap_verbose)
- D("active = %d", nmd->active);
+ if (netmap_debug & NM_DEBUG_MEM)
+ nm_prinf("active = %d", nmd->active);
}
@@ -2217,14 +2231,15 @@
pi->nr_buf_pool_objtotal = netmap_min_priv_params[NETMAP_BUF_POOL].num;
if (pi->nr_buf_pool_objsize == 0)
pi->nr_buf_pool_objsize = netmap_min_priv_params[NETMAP_BUF_POOL].size;
- D("if %d %d ring %d %d buf %d %d",
+ if (netmap_verbose & NM_DEBUG_MEM)
+ nm_prinf("if %d %d ring %d %d buf %d %d",
pi->nr_if_pool_objtotal, pi->nr_if_pool_objsize,
pi->nr_ring_pool_objtotal, pi->nr_ring_pool_objsize,
pi->nr_buf_pool_objtotal, pi->nr_buf_pool_objsize);
os = nm_os_extmem_create(usrptr, pi, &error);
if (os == NULL) {
- D("os extmem creation failed");
+ nm_prerr("os extmem creation failed");
goto out;
}
@@ -2233,7 +2248,8 @@
nm_os_extmem_delete(os);
return &nme->up;
}
- D("not found, creating new");
+ if (netmap_verbose & NM_DEBUG_MEM)
+ nm_prinf("not found, creating new");
nme = _netmap_mem_private_new(sizeof(*nme),
(struct netmap_obj_params[]){
@@ -2343,7 +2359,7 @@
#endif /* WITH_EXTMEM */
-#ifdef WITH_PTNETMAP_GUEST
+#ifdef WITH_PTNETMAP
struct mem_pt_if {
struct mem_pt_if *next;
struct ifnet *ifp;
@@ -2386,7 +2402,8 @@
NMA_UNLOCK(nmd);
- D("added (ifp=%p,nifp_offset=%u)", ptif->ifp, ptif->nifp_offset);
+ nm_prinf("ifp=%s,nifp_offset=%u",
+ ptif->ifp->if_xname, ptif->nifp_offset);
return 0;
}
@@ -2667,7 +2684,7 @@
continue;
kring->ring = (struct netmap_ring *)
((char *)nifp +
- nifp->ring_ofs[i + na->num_tx_rings + 1]);
+ nifp->ring_ofs[netmap_all_rings(na, NR_TX) + i]);
}
error = 0;
@@ -2832,4 +2849,4 @@
return nmd;
}
-#endif /* WITH_PTNETMAP_GUEST */
+#endif /* WITH_PTNETMAP */
Index: head/sys/dev/netmap/netmap_null.c
===================================================================
--- head/sys/dev/netmap/netmap_null.c
+++ head/sys/dev/netmap/netmap_null.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (C) 2018 Giuseppe Lettieri
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/* $FreeBSD$ */
+
+#if defined(__FreeBSD__)
+#include <sys/cdefs.h> /* prerequisite */
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h> /* defines used in kernel.h */
+#include <sys/kernel.h> /* types used in module initialization */
+#include <sys/malloc.h>
+#include <sys/poll.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
+#include <sys/selinfo.h>
+#include <sys/sysctl.h>
+#include <sys/socket.h> /* sockaddrs */
+#include <net/if.h>
+#include <net/if_var.h>
+#include <machine/bus.h> /* bus_dmamap_* */
+#include <sys/refcount.h>
+
+
+#elif defined(linux)
+
+#include "bsd_glue.h"
+
+#elif defined(__APPLE__)
+
+#warning OSX support is only partial
+#include "osx_glue.h"
+
+#elif defined(_WIN32)
+#include "win_glue.h"
+
+#else
+
+#error Unsupported platform
+
+#endif /* unsupported */
+
+/*
+ * common headers
+ */
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
+
+#ifdef WITH_NMNULL
+
+static int
+netmap_null_txsync(struct netmap_kring *kring, int flags)
+{
+ (void)kring;
+ (void)flags;
+ return 0;
+}
+
+static int
+netmap_null_rxsync(struct netmap_kring *kring, int flags)
+{
+ (void)kring;
+ (void)flags;
+ return 0;
+}
+
+static int
+netmap_null_krings_create(struct netmap_adapter *na)
+{
+ return netmap_krings_create(na, 0);
+}
+
+static void
+netmap_null_krings_delete(struct netmap_adapter *na)
+{
+ netmap_krings_delete(na);
+}
+
+static int
+netmap_null_reg(struct netmap_adapter *na, int onoff)
+{
+ if (na->active_fds == 0) {
+ if (onoff)
+ na->na_flags |= NAF_NETMAP_ON;
+ else
+ na->na_flags &= ~NAF_NETMAP_ON;
+ }
+ return 0;
+}
+
+static int
+netmap_null_bdg_attach(const char *name, struct netmap_adapter *na,
+ struct nm_bridge *b)
+{
+ (void)name;
+ (void)na;
+ (void)b;
+ return EINVAL;
+}
+
+int
+netmap_get_null_na(struct nmreq_header *hdr, struct netmap_adapter **na,
+ struct netmap_mem_d *nmd, int create)
+{
+ struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
+ struct netmap_null_adapter *nna;
+ int error;
+
+ if (req->nr_mode != NR_REG_NULL) {
+ nm_prdis("not a null port");
+ return 0;
+ }
+
+ if (!create) {
+ nm_prerr("null ports cannot be re-opened");
+ return EINVAL;
+ }
+
+ if (nmd == NULL) {
+ nm_prerr("null ports must use an existing allocator");
+ return EINVAL;
+ }
+
+ nna = nm_os_malloc(sizeof(*nna));
+ if (nna == NULL) {
+ error = ENOMEM;
+ goto err;
+ }
+ snprintf(nna->up.name, sizeof(nna->up.name), "null:%s", hdr->nr_name);
+
+ nna->up.nm_txsync = netmap_null_txsync;
+ nna->up.nm_rxsync = netmap_null_rxsync;
+ nna->up.nm_register = netmap_null_reg;
+ nna->up.nm_krings_create = netmap_null_krings_create;
+ nna->up.nm_krings_delete = netmap_null_krings_delete;
+ nna->up.nm_bdg_attach = netmap_null_bdg_attach;
+ nna->up.nm_mem = netmap_mem_get(nmd);
+
+ nna->up.num_tx_rings = req->nr_tx_rings;
+ nna->up.num_rx_rings = req->nr_rx_rings;
+ nna->up.num_tx_desc = req->nr_tx_slots;
+ nna->up.num_rx_desc = req->nr_rx_slots;
+ error = netmap_attach_common(&nna->up);
+ if (error)
+ goto free_nna;
+ *na = &nna->up;
+ netmap_adapter_get(*na);
+ nm_prdis("created null %s", nna->up.name);
+
+ return 0;
+
+free_nna:
+ nm_os_free(nna);
+err:
+ return error;
+}
+
+
+#endif /* WITH_NMNULL */
Index: head/sys/dev/netmap/netmap_pipe.c
===================================================================
--- head/sys/dev/netmap/netmap_pipe.c
+++ head/sys/dev/netmap/netmap_pipe.c
@@ -443,7 +443,7 @@
/* In case of no error we put our rings in netmap mode */
for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+ for (i = 0; i < nma_get_nrings(na, t); i++) {
struct netmap_kring *kring = NMR(na, t)[i];
if (nm_kring_pending_on(kring)) {
struct netmap_kring *sring, *dring;
@@ -490,7 +490,7 @@
if (na->active_fds == 0)
na->na_flags &= ~NAF_NETMAP_ON;
for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+ for (i = 0; i < nma_get_nrings(na, t); i++) {
struct netmap_kring *kring = NMR(na, t)[i];
if (nm_kring_pending_off(kring)) {
@@ -567,7 +567,7 @@
sna = na;
cleanup:
for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(sna, t) + 1; i++) {
+ for (i = 0; i < nma_get_nrings(sna, t); i++) {
struct netmap_kring *kring = NMR(sna, t)[i];
struct netmap_ring *ring = kring->ring;
uint32_t j, lim = kring->nkr_num_slots - 1;
@@ -674,11 +674,11 @@
int create_error;
/* Temporarily remove the pipe suffix. */
- strncpy(nr_name_orig, hdr->nr_name, sizeof(nr_name_orig));
+ strlcpy(nr_name_orig, hdr->nr_name, sizeof(nr_name_orig));
*cbra = '\0';
error = netmap_get_na(hdr, &pna, &ifp, nmd, create);
/* Restore the pipe suffix. */
- strncpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name));
+ strlcpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name));
if (!error)
break;
if (error != ENXIO || retries++) {
@@ -691,7 +691,7 @@
NMG_UNLOCK();
create_error = netmap_vi_create(hdr, 1 /* autodelete */);
NMG_LOCK();
- strncpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name));
+ strlcpy(hdr->nr_name, nr_name_orig, sizeof(hdr->nr_name));
if (create_error && create_error != EEXIST) {
if (create_error != EOPNOTSUPP) {
D("failed to create a persistent vale port: %d", create_error);
Index: head/sys/dev/netmap/netmap_vale.c
===================================================================
--- head/sys/dev/netmap/netmap_vale.c
+++ head/sys/dev/netmap/netmap_vale.c
@@ -121,18 +121,18 @@
"Max batch size to be used in the bridge");
SYSEND;
-static int netmap_vp_create(struct nmreq_header *hdr, struct ifnet *,
+static int netmap_vale_vp_create(struct nmreq_header *hdr, struct ifnet *,
struct netmap_mem_d *nmd, struct netmap_vp_adapter **);
-static int netmap_vp_bdg_attach(const char *, struct netmap_adapter *,
+static int netmap_vale_vp_bdg_attach(const char *, struct netmap_adapter *,
struct nm_bridge *);
static int netmap_vale_bwrap_attach(const char *, struct netmap_adapter *);
/*
- * For each output interface, nm_bdg_q is used to construct a list.
+ * For each output interface, nm_vale_q is used to construct a list.
* bq_len is the number of output buffers (we can have coalescing
* during the copy).
*/
-struct nm_bdg_q {
+struct nm_vale_q {
uint16_t bq_head;
uint16_t bq_tail;
uint32_t bq_len; /* number of buffers */
@@ -140,10 +140,10 @@
/* Holds the default callbacks */
struct netmap_bdg_ops vale_bdg_ops = {
- .lookup = netmap_bdg_learning,
+ .lookup = netmap_vale_learning,
.config = NULL,
.dtor = NULL,
- .vp_create = netmap_vp_create,
+ .vp_create = netmap_vale_vp_create,
.bwrap_attach = netmap_vale_bwrap_attach,
.name = NM_BDG_NAME,
};
@@ -212,14 +212,14 @@
/* all port:rings + broadcast */
num_dstq = NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1;
l = sizeof(struct nm_bdg_fwd) * NM_BDG_BATCH_MAX;
- l += sizeof(struct nm_bdg_q) * num_dstq;
+ l += sizeof(struct nm_vale_q) * num_dstq;
l += sizeof(uint16_t) * NM_BDG_BATCH_MAX;
nrings = netmap_real_rings(na, NR_TX);
kring = na->tx_rings;
for (i = 0; i < nrings; i++) {
struct nm_bdg_fwd *ft;
- struct nm_bdg_q *dstq;
+ struct nm_vale_q *dstq;
int j;
ft = nm_os_malloc(l);
@@ -227,7 +227,7 @@
nm_free_bdgfwd(na);
return ENOMEM;
}
- dstq = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
+ dstq = (struct nm_vale_q *)(ft + NM_BDG_BATCH_MAX);
for (j = 0; j < num_dstq; j++) {
dstq[j].bq_head = dstq[j].bq_tail = NM_FT_NULL;
dstq[j].bq_len = 0;
@@ -307,11 +307,228 @@
return ret;
}
+/* Process NETMAP_REQ_VALE_LIST. */
+int
+netmap_vale_list(struct nmreq_header *hdr)
+{
+ struct nmreq_vale_list *req =
+ (struct nmreq_vale_list *)(uintptr_t)hdr->nr_body;
+ int namelen = strlen(hdr->nr_name);
+ struct nm_bridge *b, *bridges;
+ struct netmap_vp_adapter *vpna;
+ int error = 0, i, j;
+ u_int num_bridges;
+ netmap_bns_getbridges(&bridges, &num_bridges);
+ /* this is used to enumerate bridges and ports */
+ if (namelen) { /* look up indexes of bridge and port */
+ if (strncmp(hdr->nr_name, NM_BDG_NAME,
+ strlen(NM_BDG_NAME))) {
+ return EINVAL;
+ }
+ NMG_LOCK();
+ b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
+ if (!b) {
+ NMG_UNLOCK();
+ return ENOENT;
+ }
+
+ req->nr_bridge_idx = b - bridges; /* bridge index */
+ req->nr_port_idx = NM_BDG_NOPORT;
+ for (j = 0; j < b->bdg_active_ports; j++) {
+ i = b->bdg_port_index[j];
+ vpna = b->bdg_ports[i];
+ if (vpna == NULL) {
+ nm_prerr("This should not happen");
+ continue;
+ }
+ /* the former and the latter identify a
+ * virtual port and a NIC, respectively
+ */
+ if (!strcmp(vpna->up.name, hdr->nr_name)) {
+ req->nr_port_idx = i; /* port index */
+ break;
+ }
+ }
+ NMG_UNLOCK();
+ } else {
+ /* return the first non-empty entry starting from
+ * bridge nr_arg1 and port nr_arg2.
+ *
+ * Users can detect the end of the same bridge by
+ * seeing the new and old value of nr_arg1, and can
+ * detect the end of all the bridge by error != 0
+ */
+ i = req->nr_bridge_idx;
+ j = req->nr_port_idx;
+
+ NMG_LOCK();
+ for (error = ENOENT; i < NM_BRIDGES; i++) {
+ b = bridges + i;
+ for ( ; j < NM_BDG_MAXPORTS; j++) {
+ if (b->bdg_ports[j] == NULL)
+ continue;
+ vpna = b->bdg_ports[j];
+ /* write back the VALE switch name */
+ strlcpy(hdr->nr_name, vpna->up.name,
+ sizeof(hdr->nr_name));
+ error = 0;
+ goto out;
+ }
+ j = 0; /* following bridges scan from 0 */
+ }
+ out:
+ req->nr_bridge_idx = i;
+ req->nr_port_idx = j;
+ NMG_UNLOCK();
+ }
+
+ return error;
+}
+
+/* Process NETMAP_REQ_VALE_ATTACH.
+ */
+int
+netmap_vale_attach(struct nmreq_header *hdr, void *auth_token)
+{
+ struct nmreq_vale_attach *req =
+ (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
+ struct netmap_vp_adapter * vpna;
+ struct netmap_adapter *na = NULL;
+ struct netmap_mem_d *nmd = NULL;
+ struct nm_bridge *b = NULL;
+ int error;
+
+ NMG_LOCK();
+ /* permission check for modified bridges */
+ b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
+ if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
+ error = EACCES;
+ goto unlock_exit;
+ }
+
+ if (req->reg.nr_mem_id) {
+ nmd = netmap_mem_find(req->reg.nr_mem_id);
+ if (nmd == NULL) {
+ error = EINVAL;
+ goto unlock_exit;
+ }
+ }
+
+ /* check for existing one */
+ error = netmap_get_vale_na(hdr, &na, nmd, 0);
+ if (na) {
+ error = EBUSY;
+ goto unref_exit;
+ }
+ error = netmap_get_vale_na(hdr, &na,
+ nmd, 1 /* create if not exists */);
+ if (error) { /* no device */
+ goto unlock_exit;
+ }
+
+ if (na == NULL) { /* VALE prefix missing */
+ error = EINVAL;
+ goto unlock_exit;
+ }
+
+ if (NETMAP_OWNED_BY_ANY(na)) {
+ error = EBUSY;
+ goto unref_exit;
+ }
+
+ if (na->nm_bdg_ctl) {
+ /* nop for VALE ports. The bwrap needs to put the hwna
+ * in netmap mode (see netmap_bwrap_bdg_ctl)
+ */
+ error = na->nm_bdg_ctl(hdr, na);
+ if (error)
+ goto unref_exit;
+ ND("registered %s to netmap-mode", na->name);
+ }
+ vpna = (struct netmap_vp_adapter *)na;
+ req->port_index = vpna->bdg_port;
+
+ if (nmd)
+ netmap_mem_put(nmd);
+
+ NMG_UNLOCK();
+ return 0;
+
+unref_exit:
+ netmap_adapter_put(na);
+unlock_exit:
+ if (nmd)
+ netmap_mem_put(nmd);
+
+ NMG_UNLOCK();
+ return error;
+}
+
+/* Process NETMAP_REQ_VALE_DETACH.
+ */
+int
+netmap_vale_detach(struct nmreq_header *hdr, void *auth_token)
+{
+ struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body;
+ struct netmap_vp_adapter *vpna;
+ struct netmap_adapter *na;
+ struct nm_bridge *b = NULL;
+ int error;
+
+ NMG_LOCK();
+ /* permission check for modified bridges */
+ b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
+ if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
+ error = EACCES;
+ goto unlock_exit;
+ }
+
+ error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */);
+ if (error) { /* no device, or another bridge or user owns the device */
+ goto unlock_exit;
+ }
+
+ if (na == NULL) { /* VALE prefix missing */
+ error = EINVAL;
+ goto unlock_exit;
+ } else if (nm_is_bwrap(na) &&
+ ((struct netmap_bwrap_adapter *)na)->na_polling_state) {
+ /* Don't detach a NIC with polling */
+ error = EBUSY;
+ goto unref_exit;
+ }
+
+ vpna = (struct netmap_vp_adapter *)na;
+ if (na->na_vp != vpna) {
+ /* trying to detach first attach of VALE persistent port attached
+ * to 2 bridges
+ */
+ error = EBUSY;
+ goto unref_exit;
+ }
+ nmreq_det->port_index = vpna->bdg_port;
+
+ if (na->nm_bdg_ctl) {
+ /* remove the port from bridge. The bwrap
+ * also needs to put the hwna in normal mode
+ */
+ error = na->nm_bdg_ctl(hdr, na);
+ }
+
+unref_exit:
+ netmap_adapter_put(na);
+unlock_exit:
+ NMG_UNLOCK();
+ return error;
+
+}
+
+
/* nm_dtor callback for ephemeral VALE ports */
static void
-netmap_vp_dtor(struct netmap_adapter *na)
+netmap_vale_vp_dtor(struct netmap_adapter *na)
{
struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na;
struct nm_bridge *b = vpna->na_bdg;
@@ -334,47 +551,13 @@
}
-/* Called by external kernel modules (e.g., Openvswitch).
- * to modify the private data previously given to regops().
- * 'name' may be just bridge's name (including ':' if it
- * is not just NM_BDG_NAME).
- * Called without NMG_LOCK.
- */
-int
-nm_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
- void *callback_data, void *auth_token)
-{
- void *private_data = NULL;
- struct nm_bridge *b;
- int error = 0;
- NMG_LOCK();
- b = nm_find_bridge(name, 0 /* don't create */, NULL);
- if (!b) {
- error = EINVAL;
- goto unlock_update_priv;
- }
- if (!nm_bdg_valid_auth_token(b, auth_token)) {
- error = EACCES;
- goto unlock_update_priv;
- }
- BDG_WLOCK(b);
- private_data = callback(b->private_data, callback_data, &error);
- b->private_data = private_data;
- BDG_WUNLOCK(b);
-
-unlock_update_priv:
- NMG_UNLOCK();
- return error;
-}
-
-
/* nm_krings_create callback for VALE ports.
* Calls the standard netmap_krings_create, then adds leases on rx
* rings and bdgfwd on tx rings.
*/
static int
-netmap_vp_krings_create(struct netmap_adapter *na)
+netmap_vale_vp_krings_create(struct netmap_adapter *na)
{
u_int tailroom;
int error, i;
@@ -409,7 +592,7 @@
/* nm_krings_delete callback for VALE ports. */
static void
-netmap_vp_krings_delete(struct netmap_adapter *na)
+netmap_vale_vp_krings_delete(struct netmap_adapter *na)
{
nm_free_bdgfwd(na);
netmap_krings_delete(na);
@@ -417,7 +600,7 @@
static int
-nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n,
+nm_vale_flush(struct nm_bdg_fwd *ft, u_int n,
struct netmap_vp_adapter *na, u_int ring_nr);
@@ -429,7 +612,7 @@
* Returns the next position in the ring.
*/
static int
-nm_bdg_preflush(struct netmap_kring *kring, u_int end)
+nm_vale_preflush(struct netmap_kring *kring, u_int end)
{
struct netmap_vp_adapter *na =
(struct netmap_vp_adapter*)kring->na;
@@ -470,7 +653,7 @@
buf = ft[ft_i].ft_buf = (slot->flags & NS_INDIRECT) ?
(void *)(uintptr_t)slot->ptr : NMB(&na->up, slot);
if (unlikely(buf == NULL)) {
- RD(5, "NULL %s buffer pointer from %s slot %d len %d",
+ nm_prlim(5, "NULL %s buffer pointer from %s slot %d len %d",
(slot->flags & NS_INDIRECT) ? "INDIRECT" : "DIRECT",
kring->name, j, ft[ft_i].ft_len);
buf = ft[ft_i].ft_buf = NETMAP_BUF_BASE(&na->up);
@@ -488,7 +671,7 @@
ft[ft_i - frags].ft_frags = frags;
frags = 1;
if (unlikely((int)ft_i >= bridge_batch))
- ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
+ ft_i = nm_vale_flush(ft, ft_i, na, ring_nr);
}
if (frags > 1) {
/* Here ft_i > 0, ft[ft_i-1].flags has NS_MOREFRAG, and we
@@ -496,10 +679,10 @@
frags--;
ft[ft_i - 1].ft_flags &= ~NS_MOREFRAG;
ft[ft_i - frags].ft_frags = frags;
- D("Truncate incomplete fragment at %d (%d frags)", ft_i, frags);
+ nm_prlim(5, "Truncate incomplete fragment at %d (%d frags)", ft_i, frags);
}
if (ft_i)
- ft_i = nm_bdg_flush(ft, ft_i, na, ring_nr);
+ ft_i = nm_vale_flush(ft, ft_i, na, ring_nr);
BDG_RUNLOCK(b);
return j;
}
@@ -528,7 +711,7 @@
static __inline uint32_t
-nm_bridge_rthash(const uint8_t *addr)
+nm_vale_rthash(const uint8_t *addr)
{
uint32_t a = 0x9e3779b9, b = 0x9e3779b9, c = 0; // hask key
@@ -554,7 +737,7 @@
* ring in *dst_ring (at the moment, always use ring 0)
*/
uint32_t
-netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
+netmap_vale_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
struct netmap_vp_adapter *na, void *private_data)
{
uint8_t *buf = ((uint8_t *)ft->ft_buf) + ft->ft_offset;
@@ -586,17 +769,17 @@
*/
if (((buf[6] & 1) == 0) && (na->last_smac != smac)) { /* valid src */
uint8_t *s = buf+6;
- sh = nm_bridge_rthash(s); /* hash of source */
+ sh = nm_vale_rthash(s); /* hash of source */
/* update source port forwarding entry */
na->last_smac = ht[sh].mac = smac; /* XXX expire ? */
ht[sh].ports = mysrc;
- if (netmap_verbose)
- D("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
+ if (netmap_debug & NM_DEBUG_VALE)
+ nm_prinf("src %02x:%02x:%02x:%02x:%02x:%02x on port %d",
s[0], s[1], s[2], s[3], s[4], s[5], mysrc);
}
dst = NM_BDG_BROADCAST;
if ((buf[0] & 1) == 0) { /* unicast */
- dh = nm_bridge_rthash(buf); /* hash of dst */
+ dh = nm_vale_rthash(buf); /* hash of dst */
if (ht[dh].mac == dmac) { /* found dst */
dst = ht[dh].ports;
}
@@ -655,24 +838,28 @@
k->nkr_leases[lease_idx] = NR_NOSLOT;
k->nkr_lease_idx = nm_next(lease_idx, lim);
+#ifdef CONFIG_NETMAP_DEBUG
if (n > nm_kr_space(k, is_rx)) {
- D("invalid request for %d slots", n);
+ nm_prerr("invalid request for %d slots", n);
panic("x");
}
+#endif /* CONFIG NETMAP_DEBUG */
/* XXX verify that there are n slots */
k->nkr_hwlease += n;
if (k->nkr_hwlease > lim)
k->nkr_hwlease -= lim + 1;
+#ifdef CONFIG_NETMAP_DEBUG
if (k->nkr_hwlease >= k->nkr_num_slots ||
k->nr_hwcur >= k->nkr_num_slots ||
k->nr_hwtail >= k->nkr_num_slots ||
k->nkr_lease_idx >= k->nkr_num_slots) {
- D("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
+ nm_prerr("invalid kring %s, cur %d tail %d lease %d lease_idx %d lim %d",
k->na->name,
k->nr_hwcur, k->nr_hwtail, k->nkr_hwlease,
k->nkr_lease_idx, k->nkr_num_slots);
}
+#endif /* CONFIG_NETMAP_DEBUG */
return lease_idx;
}
@@ -682,10 +869,10 @@
* number of ports, and lets us replace the learn and dispatch functions.
*/
int
-nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
+nm_vale_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
u_int ring_nr)
{
- struct nm_bdg_q *dst_ents, *brddst;
+ struct nm_vale_q *dst_ents, *brddst;
uint16_t num_dsts = 0, *dsts;
struct nm_bridge *b = na->na_bdg;
u_int i, me = na->bdg_port;
@@ -696,14 +883,14 @@
* queues per port plus one for the broadcast traffic.
* Then we have an array of destination indexes.
*/
- dst_ents = (struct nm_bdg_q *)(ft + NM_BDG_BATCH_MAX);
+ dst_ents = (struct nm_vale_q *)(ft + NM_BDG_BATCH_MAX);
dsts = (uint16_t *)(dst_ents + NM_BDG_MAXPORTS * NM_BDG_MAXRINGS + 1);
/* first pass: find a destination for each packet in the batch */
for (i = 0; likely(i < n); i += ft[i].ft_frags) {
uint8_t dst_ring = ring_nr; /* default, same ring as origin */
uint16_t dst_port, d_i;
- struct nm_bdg_q *d;
+ struct nm_vale_q *d;
struct nm_bdg_fwd *start_ft = NULL;
ND("slot %d frags %d", i, ft[i].ft_frags);
@@ -720,7 +907,7 @@
*/
continue;
}
- dst_port = b->bdg_ops->lookup(start_ft, &dst_ring, na, b->private_data);
+ dst_port = b->bdg_ops.lookup(start_ft, &dst_ring, na, b->private_data);
if (netmap_verbose > 255)
RD(5, "slot %d port %d -> %d", i, me, dst_port);
if (dst_port >= NM_BDG_NOPORT)
@@ -778,7 +965,7 @@
u_int dst_nr, lim, j, d_i, next, brd_next;
u_int needed, howmany;
int retry = netmap_txsync_retry;
- struct nm_bdg_q *d;
+ struct nm_vale_q *d;
uint32_t my_start = 0, lease_idx = 0;
int nrings;
int virt_hdr_mismatch = 0;
@@ -862,7 +1049,7 @@
if (dst_na->retry && retry) {
/* try to get some free slot from the previous run */
- kring->nm_notify(kring, 0);
+ kring->nm_notify(kring, NAF_FORCE_RECLAIM);
/* actually useful only for bwraps, since there
* the notify will trigger a txsync on the hwna. VALE ports
* have dst_na->retry == 0
@@ -1030,7 +1217,7 @@
/* nm_txsync callback for VALE ports */
static int
-netmap_vp_txsync(struct netmap_kring *kring, int flags)
+netmap_vale_vp_txsync(struct netmap_kring *kring, int flags)
{
struct netmap_vp_adapter *na =
(struct netmap_vp_adapter *)kring->na;
@@ -1049,17 +1236,17 @@
if (bridge_batch > NM_BDG_BATCH)
bridge_batch = NM_BDG_BATCH;
- done = nm_bdg_preflush(kring, head);
+ done = nm_vale_preflush(kring, head);
done:
if (done != head)
- D("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail);
+ nm_prerr("early break at %d/ %d, tail %d", done, head, kring->nr_hwtail);
/*
* packets between 'done' and 'cur' are left unsent.
*/
kring->nr_hwcur = done;
kring->nr_hwtail = nm_prev(done, lim);
- if (netmap_verbose)
- D("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
+ if (netmap_debug & NM_DEBUG_TXSYNC)
+ nm_prinf("%s ring %d flags %d", na->up.name, kring->ring_id, flags);
return 0;
}
@@ -1068,7 +1255,7 @@
* Only persistent VALE ports have a non-null ifp.
*/
static int
-netmap_vp_create(struct nmreq_header *hdr, struct ifnet *ifp,
+netmap_vale_vp_create(struct nmreq_header *hdr, struct ifnet *ifp,
struct netmap_mem_d *nmd, struct netmap_vp_adapter **ret)
{
struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
@@ -1089,7 +1276,7 @@
na = &vpna->up;
na->ifp = ifp;
- strncpy(na->name, hdr->nr_name, sizeof(na->name));
+ strlcpy(na->name, hdr->nr_name, sizeof(na->name));
/* bound checking */
na->num_tx_rings = req->nr_tx_rings;
@@ -1109,6 +1296,7 @@
*/
nm_bound_var(&npipes, 2, 1, NM_MAXPIPES, NULL);
/* validate extra bufs */
+ extrabufs = req->nr_extra_bufs;
nm_bound_var(&extrabufs, 0, 0,
128*NM_BDG_MAXSLOTS, NULL);
req->nr_extra_bufs = extrabufs; /* write back */
@@ -1121,7 +1309,7 @@
/*if (vpna->mfs > netmap_buf_size) TODO netmap_buf_size is zero??
vpna->mfs = netmap_buf_size; */
if (netmap_verbose)
- D("max frame size %u", vpna->mfs);
+ nm_prinf("max frame size %u", vpna->mfs);
na->na_flags |= NAF_BDG_MAYSLEEP;
/* persistent VALE ports look like hw devices
@@ -1129,12 +1317,12 @@
*/
if (ifp)
na->na_flags |= NAF_NATIVE;
- na->nm_txsync = netmap_vp_txsync;
- na->nm_rxsync = netmap_vp_rxsync;
- na->nm_register = netmap_vp_reg;
- na->nm_krings_create = netmap_vp_krings_create;
- na->nm_krings_delete = netmap_vp_krings_delete;
- na->nm_dtor = netmap_vp_dtor;
+ na->nm_txsync = netmap_vale_vp_txsync;
+ na->nm_rxsync = netmap_vp_rxsync; /* use the one provided by bdg */
+ na->nm_register = netmap_vp_reg; /* use the one provided by bdg */
+ na->nm_krings_create = netmap_vale_vp_krings_create;
+ na->nm_krings_delete = netmap_vale_vp_krings_delete;
+ na->nm_dtor = netmap_vale_vp_dtor;
ND("nr_mem_id %d", req->nr_mem_id);
na->nm_mem = nmd ?
netmap_mem_get(nmd):
@@ -1144,7 +1332,7 @@
req->nr_extra_bufs, npipes, &error);
if (na->nm_mem == NULL)
goto err;
- na->nm_bdg_attach = netmap_vp_bdg_attach;
+ na->nm_bdg_attach = netmap_vale_vp_bdg_attach;
/* other nmd fields are set in the common routine */
error = netmap_attach_common(na);
if (error)
@@ -1163,19 +1351,16 @@
* The na_vp port is this same netmap_adapter. There is no host port.
*/
static int
-netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na,
+netmap_vale_vp_bdg_attach(const char *name, struct netmap_adapter *na,
struct nm_bridge *b)
{
struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
- if (b->bdg_ops != &vale_bdg_ops) {
+ if ((b->bdg_flags & NM_BDG_NEED_BWRAP) || vpna->na_bdg) {
return NM_NEED_BWRAP;
}
- if (vpna->na_bdg) {
- return NM_NEED_BWRAP;
- }
na->na_vp = vpna;
- strncpy(na->name, name, sizeof(na->name));
+ strlcpy(na->name, name, sizeof(na->name));
na->na_hostvp = NULL;
return 0;
}
@@ -1186,12 +1371,12 @@
int error;
/* impersonate a netmap_vp_adapter */
- error = netmap_vp_krings_create(na);
+ error = netmap_vale_vp_krings_create(na);
if (error)
return error;
error = netmap_bwrap_krings_create_common(na);
if (error) {
- netmap_vp_krings_delete(na);
+ netmap_vale_vp_krings_delete(na);
}
return error;
}
@@ -1200,7 +1385,7 @@
netmap_vale_bwrap_krings_delete(struct netmap_adapter *na)
{
netmap_bwrap_krings_delete_common(na);
- netmap_vp_krings_delete(na);
+ netmap_vale_vp_krings_delete(na);
}
static int
@@ -1216,9 +1401,9 @@
return ENOMEM;
}
na = &bna->up.up;
- strncpy(na->name, nr_name, sizeof(na->name));
+ strlcpy(na->name, nr_name, sizeof(na->name));
na->nm_register = netmap_bwrap_reg;
- na->nm_txsync = netmap_vp_txsync;
+ na->nm_txsync = netmap_vale_vp_txsync;
// na->nm_rxsync = netmap_bwrap_rxsync;
na->nm_krings_create = netmap_vale_bwrap_krings_create;
na->nm_krings_delete = netmap_vale_bwrap_krings_delete;
@@ -1313,7 +1498,8 @@
NMG_UNLOCK();
- D("destroying a persistent vale interface %s", ifp->if_xname);
+ if (netmap_verbose)
+ nm_prinf("destroying a persistent vale interface %s", ifp->if_xname);
/* Linux requires all the references are released
* before unregister
*/
@@ -1389,9 +1575,10 @@
}
}
/* netmap_vp_create creates a struct netmap_vp_adapter */
- error = netmap_vp_create(hdr, ifp, nmd, &vpna);
+ error = netmap_vale_vp_create(hdr, ifp, nmd, &vpna);
if (error) {
- D("error %d", error);
+ if (netmap_debug & NM_DEBUG_VALE)
+ nm_prerr("error %d", error);
goto err_1;
}
/* persist-specific routines */
Index: head/sys/modules/netmap/Makefile
===================================================================
--- head/sys/modules/netmap/Makefile
+++ head/sys/modules/netmap/Makefile
@@ -3,12 +3,12 @@
# Compile netmap as a module, useful if you want a netmap bridge
# or loadable drivers.
-SYSDIR?=${SRCTOP}/sys
-.include "${SYSDIR}/conf/kern.opts.mk"
+.include <bsd.own.mk> # FreeBSD 10 and earlier
+# .include "${SYSDIR}/conf/kern.opts.mk"
-.PATH: ${SYSDIR}/dev/netmap
-.PATH.h: ${SYSDIR}/net
-CFLAGS += -I${SYSDIR}/ -D INET
+.PATH: ${.CURDIR}/../../dev/netmap
+.PATH.h: ${.CURDIR}/../../net
+CFLAGS += -I${.CURDIR}/../../ -D INET -D VIMAGE
KMOD = netmap
SRCS = device_if.h bus_if.h pci_if.h opt_netmap.h
SRCS += netmap.c netmap.h netmap_kern.h
@@ -20,8 +20,10 @@
SRCS += netmap_offloadings.c
SRCS += netmap_pipe.c
SRCS += netmap_monitor.c
-SRCS += netmap_pt.c
+SRCS += netmap_kloop.c
SRCS += netmap_legacy.c
+SRCS += netmap_bdg.c
+SRCS += netmap_null.c
SRCS += if_ptnet.c
SRCS += opt_inet.h opt_inet6.h
Index: head/sys/net/netmap.h
===================================================================
--- head/sys/net/netmap.h
+++ head/sys/net/netmap.h
@@ -41,9 +41,9 @@
#ifndef _NET_NETMAP_H_
#define _NET_NETMAP_H_
-#define NETMAP_API 12 /* current API version */
+#define NETMAP_API 13 /* current API version */
-#define NETMAP_MIN_API 11 /* min and max versions accepted */
+#define NETMAP_MIN_API 13 /* min and max versions accepted */
#define NETMAP_MAX_API 15
/*
* Some fields should be cache-aligned to reduce contention.
@@ -333,12 +333,17 @@
*/
/*
- * check if space is available in the ring.
+ * Check if space is available in the ring. We use ring->head, which
+ * points to the next netmap slot to be published to netmap. It is
+ * possible that the applications moves ring->cur ahead of ring->tail
+ * (e.g., by setting ring->cur <== ring->tail), if it wants more slots
+ * than the ones currently available, and it wants to be notified when
+ * more arrive. See netmap(4) for more details and examples.
*/
static inline int
nm_ring_empty(struct netmap_ring *ring)
{
- return (ring->cur == ring->tail);
+ return (ring->head == ring->tail);
}
/*
@@ -479,6 +484,10 @@
* !=0: errno value
*/
uint32_t nro_status;
+ /* Option size, used only for options that can have variable size
+ * (e.g. because they contain arrays). For fixed-size options this
+ * field should be set to zero. */
+ uint64_t nro_size;
};
/* Header common to all requests. Do not reorder these fields, as we need
@@ -518,12 +527,32 @@
NETMAP_REQ_VALE_POLLING_DISABLE,
/* Get info about the pools of a memory allocator. */
NETMAP_REQ_POOLS_INFO_GET,
+ /* Start an in-kernel loop that syncs the rings periodically or
+ * on notifications. The loop runs in the context of the ioctl
+ * syscall, and only stops on NETMAP_REQ_SYNC_KLOOP_STOP. */
+ NETMAP_REQ_SYNC_KLOOP_START,
+ /* Stops the thread executing the in-kernel loop. The thread
+ * returns from the ioctl syscall. */
+ NETMAP_REQ_SYNC_KLOOP_STOP,
+ /* Enable CSB mode on a registered netmap control device. */
+ NETMAP_REQ_CSB_ENABLE,
};
enum {
/* On NETMAP_REQ_REGISTER, ask netmap to use memory allocated
* from user-space allocated memory pools (e.g. hugepages). */
NETMAP_REQ_OPT_EXTMEM = 1,
+
+ /* ON NETMAP_REQ_SYNC_KLOOP_START, ask netmap to use eventfd-based
+ * notifications to synchronize the kernel loop with the application.
+ */
+ NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS,
+
+ /* On NETMAP_REQ_REGISTER, ask netmap to work in CSB mode, where
+ * head, cur and tail pointers are not exchanged through the
+ * struct netmap_ring header, but rather using an user-provided
+ * memory area (see struct nm_csb_atok and struct nm_csb_ktoa). */
+ NETMAP_REQ_OPT_CSB,
};
/*
@@ -541,6 +570,7 @@
uint16_t nr_mem_id; /* id of the memory allocator */
uint16_t nr_ringid; /* ring(s) we care about */
uint32_t nr_mode; /* specify NR_REG_* modes */
+ uint32_t nr_extra_bufs; /* number of requested extra buffers */
uint64_t nr_flags; /* additional flags (see below) */
/* monitors use nr_ringid and nr_mode to select the rings to monitor */
@@ -549,9 +579,7 @@
#define NR_ZCOPY_MON 0x400
/* request exclusive access to the selected rings */
#define NR_EXCLUSIVE 0x800
-/* request ptnetmap host support */
-#define NR_PASSTHROUGH_HOST NR_PTNETMAP_HOST /* deprecated */
-#define NR_PTNETMAP_HOST 0x1000
+/* 0x1000 unused */
#define NR_RX_RINGS_ONLY 0x2000
#define NR_TX_RINGS_ONLY 0x4000
/* Applications set this flag if they are able to deal with virtio-net headers,
@@ -564,8 +592,6 @@
* NETMAP_DO_RX_POLL. */
#define NR_DO_RX_POLL 0x10000
#define NR_NO_TX_POLL 0x20000
-
- uint32_t nr_extra_bufs; /* number of requested extra buffers */
};
/* Valid values for nmreq_register.nr_mode (see above). */
@@ -576,10 +602,11 @@
NR_REG_ONE_NIC = 4,
NR_REG_PIPE_MASTER = 5, /* deprecated, use "x{y" port name syntax */
NR_REG_PIPE_SLAVE = 6, /* deprecated, use "x}y" port name syntax */
+ NR_REG_NULL = 7,
};
/* A single ioctl number is shared by all the new API command.
- * Demultiplexing is done using the nr_hdr.nr_reqtype field.
+ * Demultiplexing is done using the hdr.nr_reqtype field.
* FreeBSD uses the size value embedded in the _IOWR to determine
* how much to copy in/out, so we define the ioctl() command
* specifying only nmreq_header, and copyin/copyout the rest. */
@@ -595,16 +622,18 @@
/*
* nr_reqtype: NETMAP_REQ_PORT_INFO_GET
* Get information about a netmap port, including number of rings.
- * slots per ring, id of the memory allocator, etc.
+ * slots per ring, id of the memory allocator, etc. The netmap
+ * control device used for this operation does not need to be bound
+ * to a netmap port.
*/
struct nmreq_port_info_get {
- uint64_t nr_offset; /* nifp offset in the shared region */
uint64_t nr_memsize; /* size of the shared region */
uint32_t nr_tx_slots; /* slots in tx rings */
uint32_t nr_rx_slots; /* slots in rx rings */
uint16_t nr_tx_rings; /* number of tx rings */
uint16_t nr_rx_rings; /* number of rx rings */
- uint16_t nr_mem_id; /* id of the memory allocator */
+ uint16_t nr_mem_id; /* memory allocator id (in/out) */
+ uint16_t pad1;
};
#define NM_BDG_NAME "vale" /* prefix for bridge port name */
@@ -620,6 +649,7 @@
struct nmreq_vale_attach {
struct nmreq_register reg;
uint32_t port_index;
+ uint32_t pad1;
};
/*
@@ -630,6 +660,7 @@
*/
struct nmreq_vale_detach {
uint32_t port_index;
+ uint32_t pad1;
};
/*
@@ -639,15 +670,18 @@
struct nmreq_vale_list {
/* Name of the VALE port (valeXXX:YYY) or empty. */
uint16_t nr_bridge_idx;
+ uint16_t pad1;
uint32_t nr_port_idx;
};
/*
* nr_reqtype: NETMAP_REQ_PORT_HDR_SET or NETMAP_REQ_PORT_HDR_GET
- * Set the port header length.
+ * Set or get the port header length of the port identified by hdr.nr_name.
+ * The control device does not need to be bound to a netmap port.
*/
struct nmreq_port_hdr {
uint32_t nr_hdr_len;
+ uint32_t pad1;
};
/*
@@ -660,6 +694,7 @@
uint16_t nr_tx_rings; /* number of tx rings */
uint16_t nr_rx_rings; /* number of rx rings */
uint16_t nr_mem_id; /* id of the memory allocator */
+ uint16_t pad1;
};
/*
@@ -672,17 +707,20 @@
#define NETMAP_POLLING_MODE_MULTI_CPU 2
uint32_t nr_first_cpu_id;
uint32_t nr_num_polling_cpus;
+ uint32_t pad1;
};
/*
* nr_reqtype: NETMAP_REQ_POOLS_INFO_GET
- * Get info about the pools of the memory allocator of the port bound
- * to a given netmap control device (used i.e. by a ptnetmap-enabled
- * hypervisor). The nr_hdr.nr_name field is ignored.
+ * Get info about the pools of the memory allocator of the netmap
+ * port specified by hdr.nr_name and nr_mem_id. The netmap control
+ * device used for this operation does not need to be bound to a netmap
+ * port.
*/
struct nmreq_pools_info {
uint64_t nr_memsize;
- uint16_t nr_mem_id;
+ uint16_t nr_mem_id; /* in/out argument */
+ uint16_t pad1[3];
uint64_t nr_if_pool_offset;
uint32_t nr_if_pool_objtotal;
uint32_t nr_if_pool_objsize;
@@ -695,13 +733,151 @@
};
/*
+ * nr_reqtype: NETMAP_REQ_SYNC_KLOOP_START
+ * Start an in-kernel loop that syncs the rings periodically or on
+ * notifications. The loop runs in the context of the ioctl syscall,
+ * and only stops on NETMAP_REQ_SYNC_KLOOP_STOP.
+ * The registered netmap port must be open in CSB mode.
+ */
+struct nmreq_sync_kloop_start {
+ /* Sleeping is the default synchronization method for the kloop.
+ * The 'sleep_us' field specifies how many microsconds to sleep for
+ * when there is no work to do, before doing another kloop iteration.
+ */
+ uint32_t sleep_us;
+ uint32_t pad1;
+};
+
+/* A CSB entry for the application --> kernel direction. */
+struct nm_csb_atok {
+ uint32_t head; /* AW+ KR+ the head of the appl netmap_ring */
+ uint32_t cur; /* AW+ KR+ the cur of the appl netmap_ring */
+ uint32_t appl_need_kick; /* AW+ KR+ kern --> appl notification enable */
+ uint32_t sync_flags; /* AW+ KR+ the flags of the appl [tx|rx]sync() */
+ uint32_t pad[12]; /* pad to a 64 bytes cacheline */
+};
+
+/* A CSB entry for the application <-- kernel direction. */
+struct nm_csb_ktoa {
+ uint32_t hwcur; /* AR+ KW+ the hwcur of the kern netmap_kring */
+ uint32_t hwtail; /* AR+ KW+ the hwtail of the kern netmap_kring */
+ uint32_t kern_need_kick; /* AR+ KW+ appl-->kern notification enable */
+ uint32_t pad[13];
+};
+
+#ifdef __linux__
+
+#ifdef __KERNEL__
+#define nm_stst_barrier smp_wmb
+#else /* !__KERNEL__ */
+static inline void nm_stst_barrier(void)
+{
+ /* A memory barrier with release semantic has the combined
+ * effect of a store-store barrier and a load-store barrier,
+ * which is fine for us. */
+ __atomic_thread_fence(__ATOMIC_RELEASE);
+}
+#endif /* !__KERNEL__ */
+
+#elif defined(__FreeBSD__)
+
+#ifdef _KERNEL
+#define nm_stst_barrier atomic_thread_fence_rel
+#else /* !_KERNEL */
+static inline void nm_stst_barrier(void)
+{
+ __atomic_thread_fence(__ATOMIC_RELEASE);
+}
+#endif /* !_KERNEL */
+
+#else /* !__linux__ && !__FreeBSD__ */
+#error "OS not supported"
+#endif /* !__linux__ && !__FreeBSD__ */
+
+/* Application side of sync-kloop: Write ring pointers (cur, head) to the CSB.
+ * This routine is coupled with sync_kloop_kernel_read(). */
+static inline void
+nm_sync_kloop_appl_write(struct nm_csb_atok *atok, uint32_t cur,
+ uint32_t head)
+{
+ /*
+ * We need to write cur and head to the CSB but we cannot do it atomically.
+ * There is no way we can prevent the host from reading the updated value
+ * of one of the two and the old value of the other. However, if we make
+ * sure that the host never reads a value of head more recent than the
+ * value of cur we are safe. We can allow the host to read a value of cur
+ * more recent than the value of head, since in the netmap ring cur can be
+ * ahead of head and cur cannot wrap around head because it must be behind
+ * tail. Inverting the order of writes below could instead result into the
+ * host to think head went ahead of cur, which would cause the sync
+ * prologue to fail.
+ *
+ * The following memory barrier scheme is used to make this happen:
+ *
+ * Guest Host
+ *
+ * STORE(cur) LOAD(head)
+ * mb() <-----------> mb()
+ * STORE(head) LOAD(cur)
+ *
+ */
+ atok->cur = cur;
+ nm_stst_barrier();
+ atok->head = head;
+}
+
+/* Application side of sync-kloop: Read kring pointers (hwcur, hwtail) from
+ * the CSB. This routine is coupled with sync_kloop_kernel_write(). */
+static inline void
+nm_sync_kloop_appl_read(struct nm_csb_ktoa *ktoa, uint32_t *hwtail,
+ uint32_t *hwcur)
+{
+ /*
+ * We place a memory barrier to make sure that the update of hwtail never
+ * overtakes the update of hwcur.
+ * (see explanation in sync_kloop_kernel_write).
+ */
+ *hwtail = ktoa->hwtail;
+ nm_stst_barrier();
+ *hwcur = ktoa->hwcur;
+}
+
+/*
* data for NETMAP_REQ_OPT_* options
*/
+struct nmreq_opt_sync_kloop_eventfds {
+ struct nmreq_option nro_opt; /* common header */
+ /* An array of N entries for bidirectional notifications between
+ * the kernel loop and the application. The number of entries and
+ * their order must agree with the CSB arrays passed in the
+ * NETMAP_REQ_OPT_CSB option. Each entry contains a file descriptor
+ * backed by an eventfd.
+ */
+ struct {
+ /* Notifier for the application --> kernel loop direction. */
+ int32_t ioeventfd;
+ /* Notifier for the kernel loop --> application direction. */
+ int32_t irqfd;
+ } eventfds[0];
+};
+
struct nmreq_opt_extmem {
struct nmreq_option nro_opt; /* common header */
uint64_t nro_usrptr; /* (in) ptr to usr memory */
struct nmreq_pools_info nro_info; /* (in/out) */
+};
+
+struct nmreq_opt_csb {
+ struct nmreq_option nro_opt;
+
+ /* Array of CSB entries for application --> kernel communication
+ * (N entries). */
+ uint64_t csb_atok;
+
+ /* Array of CSB entries for kernel --> application communication
+ * (N entries). */
+ uint64_t csb_ktoa;
};
#endif /* _NET_NETMAP_H_ */
Index: head/sys/net/netmap_user.h
===================================================================
--- head/sys/net/netmap_user.h
+++ head/sys/net/netmap_user.h
@@ -138,11 +138,12 @@
return nm_ring_next(r, r->tail) != r->head;
}
-
+/* Compute the number of slots available in the netmap ring. We use
+ * ring->head as explained in the comment above nm_ring_empty(). */
static inline uint32_t
nm_ring_space(struct netmap_ring *ring)
{
- int ret = ring->tail - ring->cur;
+ int ret = ring->tail - ring->head;
if (ret < 0)
ret += ring->num_slots;
return ret;
@@ -1091,18 +1092,36 @@
ring = NETMAP_RXRING(d->nifp, ri);
for ( ; !nm_ring_empty(ring) && cnt != got; got++) {
u_int idx, i;
+ u_char *oldbuf;
+ struct netmap_slot *slot;
if (d->hdr.buf) { /* from previous round */
cb(arg, &d->hdr, d->hdr.buf);
}
i = ring->cur;
- idx = ring->slot[i].buf_idx;
+ slot = &ring->slot[i];
+ idx = slot->buf_idx;
/* d->cur_rx_ring doesn't change inside this loop, but
* set it here, so it reflects d->hdr.buf's ring */
d->cur_rx_ring = ri;
- d->hdr.slot = &ring->slot[i];
- d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx);
+ d->hdr.slot = slot;
+ oldbuf = d->hdr.buf = (u_char *)NETMAP_BUF(ring, idx);
// __builtin_prefetch(buf);
- d->hdr.len = d->hdr.caplen = ring->slot[i].len;
+ d->hdr.len = d->hdr.caplen = slot->len;
+ while (slot->flags & NS_MOREFRAG) {
+ u_char *nbuf;
+ u_int oldlen = slot->len;
+ i = nm_ring_next(ring, i);
+ slot = &ring->slot[i];
+ d->hdr.len += slot->len;
+ nbuf = (u_char *)NETMAP_BUF(ring, slot->buf_idx);
+ if (oldbuf != NULL && nbuf - oldbuf == ring->nr_buf_size &&
+ oldlen == ring->nr_buf_size) {
+ d->hdr.caplen += slot->len;
+ oldbuf = nbuf;
+ } else {
+ oldbuf = NULL;
+ }
+ }
d->hdr.ts = ring->ts;
ring->head = ring->cur = nm_ring_next(ring, i);
}
Index: head/sys/net/netmap_virt.h
===================================================================
--- head/sys/net/netmap_virt.h
+++ head/sys/net/netmap_virt.h
@@ -1,7 +1,7 @@
/*
* Copyright (C) 2013-2016 Luigi Rizzo
* Copyright (C) 2013-2016 Giuseppe Lettieri
- * Copyright (C) 2013-2016 Vincenzo Maffione
+ * Copyright (C) 2013-2018 Vincenzo Maffione
* Copyright (C) 2015 Stefano Garzarella
* All rights reserved.
*
@@ -33,14 +33,15 @@
#define NETMAP_VIRT_H
/*
- * ptnetmap_memdev: device used to expose memory into the guest VM
+ * Register offsets and other macros for the ptnetmap paravirtual devices:
+ * ptnetmap-memdev: device used to expose memory into the guest
+ * ptnet: paravirtualized NIC exposing a netmap port in the guest
*
* These macros are used in the hypervisor frontend (QEMU, bhyve) and in the
* guest device driver.
*/
-/* PCI identifiers and PCI BARs for the ptnetmap memdev
- * and ptnetmap network interface. */
+/* PCI identifiers and PCI BARs for ptnetmap-memdev and ptnet. */
#define PTNETMAP_MEMDEV_NAME "ptnetmap-memdev"
#define PTNETMAP_PCI_VENDOR_ID 0x1b36 /* QEMU virtual devices */
#define PTNETMAP_PCI_DEVICE_ID 0x000c /* memory device */
@@ -49,7 +50,7 @@
#define PTNETMAP_MEM_PCI_BAR 1
#define PTNETMAP_MSIX_PCI_BAR 2
-/* Registers for the ptnetmap memdev */
+/* Device registers for ptnetmap-memdev */
#define PTNET_MDEV_IO_MEMSIZE_LO 0 /* netmap memory size (low) */
#define PTNET_MDEV_IO_MEMSIZE_HI 4 /* netmap_memory_size (high) */
#define PTNET_MDEV_IO_MEMID 8 /* memory allocator ID in the host */
@@ -64,74 +65,10 @@
#define PTNET_MDEV_IO_BUF_POOL_OBJSZ 96
#define PTNET_MDEV_IO_END 100
-/*
- * ptnetmap configuration
- *
- * The ptnet kthreads (running in host kernel-space) need to be configured
- * in order to know how to intercept guest kicks (I/O register writes) and
- * how to inject MSI-X interrupts to the guest. The configuration may vary
- * depending on the hypervisor. Currently, we support QEMU/KVM on Linux and
- * and bhyve on FreeBSD.
- * The configuration is passed by the hypervisor to the host netmap module
- * by means of an ioctl() with nr_cmd=NETMAP_PT_HOST_CREATE, and it is
- * specified by the ptnetmap_cfg struct. This struct contains an header
- * with general informations and an array of entries whose size depends
- * on the hypervisor. The NETMAP_PT_HOST_CREATE command is issued every
- * time the kthreads are started.
- */
-struct ptnetmap_cfg {
-#define PTNETMAP_CFGTYPE_QEMU 0x1
-#define PTNETMAP_CFGTYPE_BHYVE 0x2
- uint16_t cfgtype; /* how to interpret the cfg entries */
- uint16_t entry_size; /* size of a config entry */
- uint32_t num_rings; /* number of config entries */
- void *csb_gh; /* CSB for guest --> host communication */
- void *csb_hg; /* CSB for host --> guest communication */
- /* Configuration entries are allocated right after the struct. */
-};
-
-/* Configuration of a ptnetmap ring for QEMU. */
-struct ptnetmap_cfgentry_qemu {
- uint32_t ioeventfd; /* to intercept guest register access */
- uint32_t irqfd; /* to inject guest interrupts */
-};
-
-/* Configuration of a ptnetmap ring for bhyve. */
-struct ptnetmap_cfgentry_bhyve {
- uint64_t wchan; /* tsleep() parameter, to wake up kthread */
- uint32_t ioctl_fd; /* ioctl fd */
- /* ioctl parameters to send irq */
- uint32_t ioctl_cmd;
- /* vmm.ko MSIX parameters for IOCTL */
- struct {
- uint64_t msg_data;
- uint64_t addr;
- } ioctl_data;
-};
-
-/*
- * Pass a pointer to a userspace buffer to be passed to kernelspace for write
- * or read. Used by NETMAP_PT_HOST_CREATE.
- * XXX deprecated
- */
-static inline void
-nmreq_pointer_put(struct nmreq *nmr, void *userptr)
-{
- uintptr_t *pp = (uintptr_t *)&nmr->nr_arg1;
- *pp = (uintptr_t)userptr;
-}
-
-static inline void *
-nmreq_pointer_get(const struct nmreq *nmr)
-{
- const uintptr_t *pp = (const uintptr_t *)&nmr->nr_arg1;
- return (void *)*pp;
-}
-
/* ptnetmap features */
#define PTNETMAP_F_VNET_HDR 1
-/* I/O registers for the ptnet device. */
+/* Device registers for the ptnet network device. */
#define PTNET_IO_PTFEAT 0
#define PTNET_IO_PTCTL 4
#define PTNET_IO_MAC_LO 8
@@ -153,140 +90,11 @@
#define PTNET_IO_KICK_BASE 128
#define PTNET_IO_MASK 0xff
-/* ptnetmap control commands (values for PTCTL register) */
+/* ptnet control commands (values for PTCTL register):
+ * - CREATE starts the host sync-kloop
+ * - DELETE stops the host sync-kloop
+ */
#define PTNETMAP_PTCTL_CREATE 1
#define PTNETMAP_PTCTL_DELETE 2
-
-/* ptnetmap synchronization variables shared between guest and host */
-struct ptnet_csb_gh {
- uint32_t head; /* GW+ HR+ the head of the guest netmap_ring */
- uint32_t cur; /* GW+ HR+ the cur of the guest netmap_ring */
- uint32_t guest_need_kick; /* GW+ HR+ host-->guest notification enable */
- uint32_t sync_flags; /* GW+ HR+ the flags of the guest [tx|rx]sync() */
- char pad[48]; /* pad to a 64 bytes cacheline */
-};
-struct ptnet_csb_hg {
- uint32_t hwcur; /* GR+ HW+ the hwcur of the host netmap_kring */
- uint32_t hwtail; /* GR+ HW+ the hwtail of the host netmap_kring */
- uint32_t host_need_kick; /* GR+ HW+ guest-->host notification enable */
- char pad[4+48];
-};
-
-#ifdef WITH_PTNETMAP_GUEST
-
-/* ptnetmap_memdev routines used to talk with ptnetmap_memdev device driver */
-struct ptnetmap_memdev;
-int nm_os_pt_memdev_iomap(struct ptnetmap_memdev *, vm_paddr_t *, void **,
- uint64_t *);
-void nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *);
-uint32_t nm_os_pt_memdev_ioread(struct ptnetmap_memdev *, unsigned int);
-
-/* Guest driver: Write kring pointers (cur, head) to the CSB.
- * This routine is coupled with ptnetmap_host_read_kring_csb(). */
-static inline void
-ptnetmap_guest_write_kring_csb(struct ptnet_csb_gh *ptr, uint32_t cur,
- uint32_t head)
-{
- /*
- * We need to write cur and head to the CSB but we cannot do it atomically.
- * There is no way we can prevent the host from reading the updated value
- * of one of the two and the old value of the other. However, if we make
- * sure that the host never reads a value of head more recent than the
- * value of cur we are safe. We can allow the host to read a value of cur
- * more recent than the value of head, since in the netmap ring cur can be
- * ahead of head and cur cannot wrap around head because it must be behind
- * tail. Inverting the order of writes below could instead result into the
- * host to think head went ahead of cur, which would cause the sync
- * prologue to fail.
- *
- * The following memory barrier scheme is used to make this happen:
- *
- * Guest Host
- *
- * STORE(cur) LOAD(head)
- * mb() <-----------> mb()
- * STORE(head) LOAD(cur)
- */
- ptr->cur = cur;
- mb();
- ptr->head = head;
-}
-
-/* Guest driver: Read kring pointers (hwcur, hwtail) from the CSB.
- * This routine is coupled with ptnetmap_host_write_kring_csb(). */
-static inline void
-ptnetmap_guest_read_kring_csb(struct ptnet_csb_hg *pthg, struct netmap_kring *kring)
-{
- /*
- * We place a memory barrier to make sure that the update of hwtail never
- * overtakes the update of hwcur.
- * (see explanation in ptnetmap_host_write_kring_csb).
- */
- kring->nr_hwtail = pthg->hwtail;
- mb();
- kring->nr_hwcur = pthg->hwcur;
-}
-
-#endif /* WITH_PTNETMAP_GUEST */
-
-#ifdef WITH_PTNETMAP_HOST
-/*
- * ptnetmap kernel thread routines
- * */
-
-/* Functions to read and write CSB fields in the host */
-#if defined (linux)
-#define CSB_READ(csb, field, r) (get_user(r, &csb->field))
-#define CSB_WRITE(csb, field, v) (put_user(v, &csb->field))
-#else /* ! linux */
-#define CSB_READ(csb, field, r) (r = fuword32(&csb->field))
-#define CSB_WRITE(csb, field, v) (suword32(&csb->field, v))
-#endif /* ! linux */
-
-/* Host netmap: Write kring pointers (hwcur, hwtail) to the CSB.
- * This routine is coupled with ptnetmap_guest_read_kring_csb(). */
-static inline void
-ptnetmap_host_write_kring_csb(struct ptnet_csb_hg __user *ptr, uint32_t hwcur,
- uint32_t hwtail)
-{
- /*
- * The same scheme used in ptnetmap_guest_write_kring_csb() applies here.
- * We allow the guest to read a value of hwcur more recent than the value
- * of hwtail, since this would anyway result in a consistent view of the
- * ring state (and hwcur can never wraparound hwtail, since hwcur must be
- * behind head).
- *
- * The following memory barrier scheme is used to make this happen:
- *
- * Guest Host
- *
- * STORE(hwcur) LOAD(hwtail)
- * mb() <-------------> mb()
- * STORE(hwtail) LOAD(hwcur)
- */
- CSB_WRITE(ptr, hwcur, hwcur);
- mb();
- CSB_WRITE(ptr, hwtail, hwtail);
-}
-
-/* Host netmap: Read kring pointers (head, cur, sync_flags) from the CSB.
- * This routine is coupled with ptnetmap_guest_write_kring_csb(). */
-static inline void
-ptnetmap_host_read_kring_csb(struct ptnet_csb_gh __user *ptr,
- struct netmap_ring *shadow_ring,
- uint32_t num_slots)
-{
- /*
- * We place a memory barrier to make sure that the update of head never
- * overtakes the update of cur.
- * (see explanation in ptnetmap_guest_write_kring_csb).
- */
- CSB_READ(ptr, head, shadow_ring->head);
- mb();
- CSB_READ(ptr, cur, shadow_ring->cur);
- CSB_READ(ptr, sync_flags, shadow_ring->flags);
-}
-
-#endif /* WITH_PTNETMAP_HOST */
#endif /* NETMAP_VIRT_H */
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sat, Nov 23, 4:23 AM (4 h, 38 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
14789682
Default Alt Text
D18015.diff (194 KB)
Attached To
Mode
D18015: netmap: align codebase to the current upstream (sha afa26bc3bc416)
Attached
Detach File
Event Timeline
Log In to Comment