Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F161240853
D17364.id49494.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
166 KB
Referenced Files
None
Subscribers
None
D17364.id49494.diff
View Options
Index: head/sys/conf/files
===================================================================
--- head/sys/conf/files
+++ head/sys/conf/files
@@ -2522,6 +2522,7 @@
dev/netmap/netmap_pt.c optional netmap
dev/netmap/netmap_vale.c optional netmap
dev/netmap/netmap_legacy.c optional netmap
+dev/netmap/netmap_bdg.c optional netmap
# compile-with "${NORMAL_C} -Wconversion -Wextra"
dev/nfsmb/nfsmb.c optional nfsmb pci
dev/nge/if_nge.c optional nge
Index: head/sys/dev/netmap/netmap.c
===================================================================
--- head/sys/dev/netmap/netmap.c
+++ head/sys/dev/netmap/netmap.c
@@ -521,6 +521,9 @@
int netmap_generic_ringsize = 1024;
int netmap_generic_rings = 1;
+/* Non-zero to enable checksum offloading in NIC drivers */
+int netmap_generic_hwcsum = 0;
+
/* Non-zero if ptnet devices are allowed to use virtio-net headers. */
int ptnet_vnet_hdr = 1;
@@ -549,6 +552,9 @@
SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0,
"Adapter mode. 0 selects the best option available,"
"1 forces native adapter, 2 forces emulated adapter");
+SYSCTL_INT(_dev_netmap, OID_AUTO, generic_hwcsum, CTLFLAG_RW, &netmap_generic_hwcsum,
+ 0, "Hardware checksums. 0 to disable checksum generation by the NIC (default),"
+ "1 to enable checksum generation by the NIC");
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit,
0, "RX notification interval in nanoseconds");
SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW,
@@ -827,8 +833,8 @@
}
/* account for the (possibly fake) host rings */
- n[NR_TX] = na->num_tx_rings + 1;
- n[NR_RX] = na->num_rx_rings + 1;
+ n[NR_TX] = netmap_all_rings(na, NR_TX);
+ n[NR_RX] = netmap_all_rings(na, NR_RX);
len = (n[NR_TX] + n[NR_RX]) *
(sizeof(struct netmap_kring) + sizeof(struct netmap_kring *))
@@ -930,11 +936,14 @@
void
netmap_hw_krings_delete(struct netmap_adapter *na)
{
- struct mbq *q = &na->rx_rings[na->num_rx_rings]->rx_queue;
+ u_int lim = netmap_real_rings(na, NR_RX), i;
- ND("destroy sw mbq with len %d", mbq_len(q));
- mbq_purge(q);
- mbq_safe_fini(q);
+ for (i = nma_get_nrings(na, NR_RX); i < lim; i++) {
+ struct mbq *q = &NMR(na, NR_RX)[i]->rx_queue;
+ ND("destroy sw mbq with len %d", mbq_len(q));
+ mbq_purge(q);
+ mbq_safe_fini(q);
+ }
netmap_krings_delete(na);
}
@@ -1535,7 +1544,7 @@
goto out;
/* try to see if this is a bridge port */
- error = netmap_get_bdg_na(hdr, na, nmd, create);
+ error = netmap_get_vale_na(hdr, na, nmd, create);
if (error)
goto out;
@@ -1827,7 +1836,7 @@
}
priv->np_qfirst[t] = (nr_mode == NR_REG_SW ?
nma_get_nrings(na, t) : 0);
- priv->np_qlast[t] = nma_get_nrings(na, t) + 1;
+ priv->np_qlast[t] = netmap_all_rings(na, t);
ND("%s: %s %d %d", nr_mode == NR_REG_SW ? "SW" : "NIC+SW",
nm_txrx2str(t),
priv->np_qfirst[t], priv->np_qlast[t]);
@@ -2543,7 +2552,7 @@
NMG_LOCK();
hdr->nr_reqtype = NETMAP_REQ_REGISTER;
hdr->nr_body = (uintptr_t)®req;
- error = netmap_get_bdg_na(hdr, &na, NULL, 0);
+ error = netmap_get_vale_na(hdr, &na, NULL, 0);
hdr->nr_reqtype = NETMAP_REQ_PORT_HDR_SET;
hdr->nr_body = (uintptr_t)req;
if (na && !error) {
@@ -3336,6 +3345,12 @@
}
na->pdev = na; /* make sure netmap_mem_map() is called */
#endif /* __FreeBSD__ */
+ if (na->na_flags & NAF_HOST_RINGS) {
+ if (na->num_host_rx_rings == 0)
+ na->num_host_rx_rings = 1;
+ if (na->num_host_tx_rings == 0)
+ na->num_host_tx_rings = 1;
+ }
if (na->nm_krings_create == NULL) {
/* we assume that we have been called by a driver,
* since other port types all provide their own
@@ -3357,7 +3372,7 @@
/* no special nm_bdg_attach callback. On VALE
* attach, we need to interpose a bwrap
*/
- na->nm_bdg_attach = netmap_bwrap_attach;
+ na->nm_bdg_attach = netmap_default_bdg_attach;
#endif
return 0;
@@ -3399,10 +3414,10 @@
static void
netmap_hw_dtor(struct netmap_adapter *na)
{
- if (nm_iszombie(na) || na->ifp == NULL)
+ if (na->ifp == NULL)
return;
- WNA(na->ifp) = NULL;
+ NM_DETACH_NA(na->ifp);
}
@@ -3426,10 +3441,10 @@
}
if (arg == NULL || arg->ifp == NULL)
- goto fail;
+ return EINVAL;
ifp = arg->ifp;
- if (NA(ifp) && !NM_NA_VALID(ifp)) {
+ if (NM_NA_CLASH(ifp)) {
/* If NA(ifp) is not null but there is no valid netmap
* adapter it means that someone else is using the same
* pointer (e.g. ax25_ptr on linux). This happens for
@@ -3456,28 +3471,8 @@
NM_ATTACH_NA(ifp, &hwna->up);
-#ifdef linux
- if (ifp->netdev_ops) {
- /* prepare a clone of the netdev ops */
-#ifndef NETMAP_LINUX_HAVE_NETDEV_OPS
- hwna->nm_ndo.ndo_start_xmit = ifp->netdev_ops;
-#else
- hwna->nm_ndo = *ifp->netdev_ops;
-#endif /* NETMAP_LINUX_HAVE_NETDEV_OPS */
- }
- hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit;
- hwna->nm_ndo.ndo_change_mtu = linux_netmap_change_mtu;
- if (ifp->ethtool_ops) {
- hwna->nm_eto = *ifp->ethtool_ops;
- }
- hwna->nm_eto.set_ringparam = linux_netmap_set_ringparam;
-#ifdef NETMAP_LINUX_HAVE_SET_CHANNELS
- hwna->nm_eto.set_channels = linux_netmap_set_channels;
-#endif /* NETMAP_LINUX_HAVE_SET_CHANNELS */
- if (arg->nm_config == NULL) {
- hwna->up.nm_config = netmap_linux_config;
- }
-#endif /* linux */
+ nm_os_onattach(ifp);
+
if (arg->nm_dtor == NULL) {
hwna->up.nm_dtor = netmap_hw_dtor;
}
@@ -3545,7 +3540,10 @@
int ret = netmap_krings_create(na, 0);
if (ret == 0) {
/* initialize the mbq for the sw rx ring */
- mbq_safe_init(&na->rx_rings[na->num_rx_rings]->rx_queue);
+ u_int lim = netmap_real_rings(na, NR_RX), i;
+ for (i = na->num_rx_rings; i < lim; i++) {
+ mbq_safe_init(&NMR(na, NR_RX)[i]->rx_queue);
+ }
ND("initialized sw rx queue %d", na->num_rx_rings);
}
return ret;
@@ -3608,8 +3606,14 @@
unsigned int txr;
struct mbq *q;
int busy;
+ u_int i;
- kring = na->rx_rings[na->num_rx_rings];
+ i = MBUF_TXQ(m);
+ if (i >= na->num_host_rx_rings) {
+ i = i % na->num_host_rx_rings;
+ }
+ kring = NMR(na, NR_RX)[nma_get_nrings(na, NR_RX) + i];
+
// XXX [Linux] we do not need this lock
// if we follow the down/configure/up protocol -gl
// mtx_lock(&na->core_lock);
@@ -3639,8 +3643,15 @@
goto done;
}
- if (nm_os_mbuf_has_offld(m)) {
- RD(1, "%s drop mbuf that needs offloadings", na->name);
+ if (!netmap_generic_hwcsum) {
+ if (nm_os_mbuf_has_csum_offld(m)) {
+ RD(1, "%s drop mbuf that needs checksum offload", na->name);
+ goto done;
+ }
+ }
+
+ if (nm_os_mbuf_has_seg_offld(m)) {
+ RD(1, "%s drop mbuf that needs generic segmentation offload", na->name);
goto done;
}
@@ -3843,6 +3854,40 @@
}
return netmap_common_irq(na, q, work_done);
+}
+
+/* set/clear native flags and if_transmit/netdev_ops */
+void
+nm_set_native_flags(struct netmap_adapter *na)
+{
+ struct ifnet *ifp = na->ifp;
+
+ /* We do the setup for intercepting packets only if we are the
+ * first user of this adapapter. */
+ if (na->active_fds > 0) {
+ return;
+ }
+
+ na->na_flags |= NAF_NETMAP_ON;
+ nm_os_onenter(ifp);
+ nm_update_hostrings_mode(na);
+}
+
+void
+nm_clear_native_flags(struct netmap_adapter *na)
+{
+ struct ifnet *ifp = na->ifp;
+
+ /* We undo the setup for intercepting packets only if we are the
+ * last user of this adapapter. */
+ if (na->active_fds > 0) {
+ return;
+ }
+
+ nm_update_hostrings_mode(na);
+ nm_os_onexit(ifp);
+
+ na->na_flags &= ~NAF_NETMAP_ON;
}
Index: head/sys/dev/netmap/netmap_bdg.h
===================================================================
--- head/sys/dev/netmap/netmap_bdg.h
+++ head/sys/dev/netmap/netmap_bdg.h
@@ -0,0 +1,155 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (C) 2013-2018 Universita` di Pisa
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+#ifndef _NET_NETMAP_BDG_H_
+#define _NET_NETMAP_BDG_H_
+
+#if defined(__FreeBSD__)
+#define BDG_RWLOCK_T struct rwlock // struct rwlock
+
+#define BDG_RWINIT(b) \
+ rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
+#define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock)
+#define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock)
+#define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock)
+#define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock)
+#define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock)
+#define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock)
+
+#endif /* __FreeBSD__ */
+
+/* XXX Should go away after fixing find_bridge() - Michio */
+#define NM_BDG_HASH 1024 /* forwarding table entries */
+
+/* XXX revise this */
+struct nm_hash_ent {
+ uint64_t mac; /* the top 2 bytes are the epoch */
+ uint64_t ports;
+};
+
+/* Default size for the Maximum Frame Size. */
+#define NM_BDG_MFS_DEFAULT 1514
+
+/*
+ * nm_bridge is a descriptor for a VALE switch.
+ * Interfaces for a bridge are all in bdg_ports[].
+ * The array has fixed size, an empty entry does not terminate
+ * the search, but lookups only occur on attach/detach so we
+ * don't mind if they are slow.
+ *
+ * The bridge is non blocking on the transmit ports: excess
+ * packets are dropped if there is no room on the output port.
+ *
+ * bdg_lock protects accesses to the bdg_ports array.
+ * This is a rw lock (or equivalent).
+ */
+#define NM_BDG_IFNAMSIZ IFNAMSIZ
+struct nm_bridge {
+ /* XXX what is the proper alignment/layout ? */
+ BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */
+ int bdg_namelen;
+ uint32_t bdg_active_ports;
+ char bdg_basename[NM_BDG_IFNAMSIZ];
+
+ /* Indexes of active ports (up to active_ports)
+ * and all other remaining ports.
+ */
+ uint32_t bdg_port_index[NM_BDG_MAXPORTS];
+ /* used by netmap_bdg_detach_common() */
+ uint32_t tmp_bdg_port_index[NM_BDG_MAXPORTS];
+
+ struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
+
+ /*
+ * Programmable lookup functions to figure out the destination port.
+ * It returns either of an index of the destination port,
+ * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
+ * forward this packet. ring_nr is the source ring index, and the
+ * function may overwrite this value to forward this packet to a
+ * different ring index.
+ * The function is set by netmap_bdg_regops().
+ */
+ struct netmap_bdg_ops *bdg_ops;
+
+ /*
+ * Contains the data structure used by the bdg_ops.lookup function.
+ * By default points to *ht which is allocated on attach and used by the default lookup
+ * otherwise will point to the data structure received by netmap_bdg_regops().
+ */
+ void *private_data;
+ struct nm_hash_ent *ht;
+
+ /* Currently used to specify if the bridge is still in use while empty and
+ * if it has been put in exclusive mode by an external module, see netmap_bdg_regops()
+ * and netmap_bdg_create().
+ */
+#define NM_BDG_ACTIVE 1
+#define NM_BDG_EXCLUSIVE 2
+ uint8_t bdg_flags;
+
+
+#ifdef CONFIG_NET_NS
+ struct net *ns;
+#endif /* CONFIG_NET_NS */
+};
+
+static inline void *
+nm_bdg_get_auth_token(struct nm_bridge *b)
+{
+ return b->ht;
+}
+
+/* bridge not in exclusive mode ==> always valid
+ * bridge in exclusive mode (created through netmap_bdg_create()) ==> check authentication token
+ */
+static inline int
+nm_bdg_valid_auth_token(struct nm_bridge *b, void *auth_token)
+{
+ return !(b->bdg_flags & NM_BDG_EXCLUSIVE) || b->ht == auth_token;
+}
+
+int netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
+ struct netmap_mem_d *nmd, int create, struct netmap_bdg_ops *ops);
+
+struct nm_bridge *nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops);
+int netmap_bdg_free(struct nm_bridge *b);
+void netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw);
+int netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na);
+int netmap_vp_reg(struct netmap_adapter *na, int onoff);
+int netmap_bwrap_reg(struct netmap_adapter *, int onoff);
+int netmap_vp_reg(struct netmap_adapter *na, int onoff);
+int netmap_vp_rxsync(struct netmap_kring *kring, int flags);
+int netmap_bwrap_notify(struct netmap_kring *kring, int flags);
+int netmap_bwrap_attach_common(struct netmap_adapter *na,
+ struct netmap_adapter *hwna);
+int netmap_bwrap_krings_create_common(struct netmap_adapter *na);
+void netmap_bwrap_krings_delete_common(struct netmap_adapter *na);
+#define NM_NEED_BWRAP (-2)
+#endif /* _NET_NETMAP_BDG_H_ */
+
Index: head/sys/dev/netmap/netmap_bdg.c
===================================================================
--- head/sys/dev/netmap/netmap_bdg.c
+++ head/sys/dev/netmap/netmap_bdg.c
@@ -0,0 +1,1827 @@
+/*
+ * Copyright (C) 2013-2016 Universita` di Pisa
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+/*
+ * This module implements the VALE switch for netmap
+
+--- VALE SWITCH ---
+
+NMG_LOCK() serializes all modifications to switches and ports.
+A switch cannot be deleted until all ports are gone.
+
+For each switch, an SX lock (RWlock on linux) protects
+deletion of ports. When configuring or deleting a new port, the
+lock is acquired in exclusive mode (after holding NMG_LOCK).
+When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
+The lock is held throughout the entire forwarding cycle,
+during which the thread may incur in a page fault.
+Hence it is important that sleepable shared locks are used.
+
+On the rx ring, the per-port lock is grabbed initially to reserve
+a number of slot in the ring, then the lock is released,
+packets are copied from source to destination, and then
+the lock is acquired again and the receive ring is updated.
+(A similar thing is done on the tx ring for NIC and host stack
+ports attached to the switch)
+
+ */
+
+/*
+ * OS-specific code that is used only within this file.
+ * Other OS-specific code that must be accessed by drivers
+ * is present in netmap_kern.h
+ */
+
+#if defined(__FreeBSD__)
+#include <sys/cdefs.h> /* prerequisite */
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/param.h> /* defines used in kernel.h */
+#include <sys/kernel.h> /* types used in module initialization */
+#include <sys/conf.h> /* cdevsw struct, UID, GID */
+#include <sys/sockio.h>
+#include <sys/socketvar.h> /* struct socket */
+#include <sys/malloc.h>
+#include <sys/poll.h>
+#include <sys/rwlock.h>
+#include <sys/socket.h> /* sockaddrs */
+#include <sys/selinfo.h>
+#include <sys/sysctl.h>
+#include <net/if.h>
+#include <net/if_var.h>
+#include <net/bpf.h> /* BIOCIMMEDIATE */
+#include <machine/bus.h> /* bus_dmamap_* */
+#include <sys/endian.h>
+#include <sys/refcount.h>
+#include <sys/smp.h>
+
+
+#elif defined(linux)
+
+#include "bsd_glue.h"
+
+#elif defined(__APPLE__)
+
+#warning OSX support is only partial
+#include "osx_glue.h"
+
+#elif defined(_WIN32)
+#include "win_glue.h"
+
+#else
+
+#error Unsupported platform
+
+#endif /* unsupported */
+
+/*
+ * common headers
+ */
+
+#include <net/netmap.h>
+#include <dev/netmap/netmap_kern.h>
+#include <dev/netmap/netmap_mem2.h>
+
+#include <dev/netmap/netmap_bdg.h>
+
+const char*
+netmap_bdg_name(struct netmap_vp_adapter *vp)
+{
+ struct nm_bridge *b = vp->na_bdg;
+ if (b == NULL)
+ return NULL;
+ return b->bdg_basename;
+}
+
+
+#ifndef CONFIG_NET_NS
+/*
+ * XXX in principle nm_bridges could be created dynamically
+ * Right now we have a static array and deletions are protected
+ * by an exclusive lock.
+ */
+static struct nm_bridge *nm_bridges;
+#endif /* !CONFIG_NET_NS */
+
+
+static int
+nm_is_id_char(const char c)
+{
+ return (c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') ||
+ (c == '_');
+}
+
+/* Validate the name of a VALE bridge port and return the
+ * position of the ":" character. */
+static int
+nm_vale_name_validate(const char *name)
+{
+ int colon_pos = -1;
+ int i;
+
+ if (!name || strlen(name) < strlen(NM_BDG_NAME)) {
+ return -1;
+ }
+
+ for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) {
+ if (name[i] == ':') {
+ colon_pos = i;
+ break;
+ } else if (!nm_is_id_char(name[i])) {
+ return -1;
+ }
+ }
+
+ if (strlen(name) - colon_pos > IFNAMSIZ) {
+ /* interface name too long */
+ return -1;
+ }
+
+ return colon_pos;
+}
+
+/*
+ * locate a bridge among the existing ones.
+ * MUST BE CALLED WITH NMG_LOCK()
+ *
+ * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
+ * We assume that this is called with a name of at least NM_NAME chars.
+ */
+struct nm_bridge *
+nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops)
+{
+ int i, namelen;
+ struct nm_bridge *b = NULL, *bridges;
+ u_int num_bridges;
+
+ NMG_LOCK_ASSERT();
+
+ netmap_bns_getbridges(&bridges, &num_bridges);
+
+ namelen = nm_vale_name_validate(name);
+ if (namelen < 0) {
+ D("invalid bridge name %s", name ? name : NULL);
+ return NULL;
+ }
+
+ /* lookup the name, remember empty slot if there is one */
+ for (i = 0; i < num_bridges; i++) {
+ struct nm_bridge *x = bridges + i;
+
+ if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) {
+ if (create && b == NULL)
+ b = x; /* record empty slot */
+ } else if (x->bdg_namelen != namelen) {
+ continue;
+ } else if (strncmp(name, x->bdg_basename, namelen) == 0) {
+ ND("found '%.*s' at %d", namelen, name, i);
+ b = x;
+ break;
+ }
+ }
+ if (i == num_bridges && b) { /* name not found, can create entry */
+ /* initialize the bridge */
+ ND("create new bridge %s with ports %d", b->bdg_basename,
+ b->bdg_active_ports);
+ b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH);
+ if (b->ht == NULL) {
+ D("failed to allocate hash table");
+ return NULL;
+ }
+ strncpy(b->bdg_basename, name, namelen);
+ b->bdg_namelen = namelen;
+ b->bdg_active_ports = 0;
+ for (i = 0; i < NM_BDG_MAXPORTS; i++)
+ b->bdg_port_index[i] = i;
+ /* set the default function */
+ b->bdg_ops = ops;
+ b->private_data = b->ht;
+ b->bdg_flags = 0;
+ NM_BNS_GET(b);
+ }
+ return b;
+}
+
+
+int
+netmap_bdg_free(struct nm_bridge *b)
+{
+ if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) {
+ return EBUSY;
+ }
+
+ ND("marking bridge %s as free", b->bdg_basename);
+ nm_os_free(b->ht);
+ b->bdg_ops = NULL;
+ b->bdg_flags = 0;
+ NM_BNS_PUT(b);
+ return 0;
+}
+
+
+/* remove from bridge b the ports in slots hw and sw
+ * (sw can be -1 if not needed)
+ */
+void
+netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
+{
+ int s_hw = hw, s_sw = sw;
+ int i, lim =b->bdg_active_ports;
+ uint32_t *tmp = b->tmp_bdg_port_index;
+
+ /*
+ New algorithm:
+ make a copy of bdg_port_index;
+ lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
+ in the array of bdg_port_index, replacing them with
+ entries from the bottom of the array;
+ decrement bdg_active_ports;
+ acquire BDG_WLOCK() and copy back the array.
+ */
+
+ if (netmap_verbose)
+ D("detach %d and %d (lim %d)", hw, sw, lim);
+ /* make a copy of the list of active ports, update it,
+ * and then copy back within BDG_WLOCK().
+ */
+ memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index));
+ for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
+ if (hw >= 0 && tmp[i] == hw) {
+ ND("detach hw %d at %d", hw, i);
+ lim--; /* point to last active port */
+ tmp[i] = tmp[lim]; /* swap with i */
+ tmp[lim] = hw; /* now this is inactive */
+ hw = -1;
+ } else if (sw >= 0 && tmp[i] == sw) {
+ ND("detach sw %d at %d", sw, i);
+ lim--;
+ tmp[i] = tmp[lim];
+ tmp[lim] = sw;
+ sw = -1;
+ } else {
+ i++;
+ }
+ }
+ if (hw >= 0 || sw >= 0) {
+ D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
+ }
+
+ BDG_WLOCK(b);
+ if (b->bdg_ops->dtor)
+ b->bdg_ops->dtor(b->bdg_ports[s_hw]);
+ b->bdg_ports[s_hw] = NULL;
+ if (s_sw >= 0) {
+ b->bdg_ports[s_sw] = NULL;
+ }
+ memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index));
+ b->bdg_active_ports = lim;
+ BDG_WUNLOCK(b);
+
+ ND("now %d active ports", lim);
+ netmap_bdg_free(b);
+}
+
+
+/* nm_bdg_ctl callback for VALE ports */
+int
+netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
+{
+ struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
+ struct nm_bridge *b = vpna->na_bdg;
+
+ if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
+ return 0; /* nothing to do */
+ }
+ if (b) {
+ netmap_set_all_rings(na, 0 /* disable */);
+ netmap_bdg_detach_common(b, vpna->bdg_port, -1);
+ vpna->na_bdg = NULL;
+ netmap_set_all_rings(na, 1 /* enable */);
+ }
+ /* I have took reference just for attach */
+ netmap_adapter_put(na);
+ return 0;
+}
+
+int
+netmap_default_bdg_attach(const char *name, struct netmap_adapter *na,
+ struct nm_bridge *b)
+{
+ return NM_NEED_BWRAP;
+}
+
+/* Try to get a reference to a netmap adapter attached to a VALE switch.
+ * If the adapter is found (or is created), this function returns 0, a
+ * non NULL pointer is returned into *na, and the caller holds a
+ * reference to the adapter.
+ * If an adapter is not found, then no reference is grabbed and the
+ * function returns an error code, or 0 if there is just a VALE prefix
+ * mismatch. Therefore the caller holds a reference when
+ * (*na != NULL && return == 0).
+ */
+int
+netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
+ struct netmap_mem_d *nmd, int create, struct netmap_bdg_ops *ops)
+{
+ char *nr_name = hdr->nr_name;
+ const char *ifname;
+ struct ifnet *ifp = NULL;
+ int error = 0;
+ struct netmap_vp_adapter *vpna, *hostna = NULL;
+ struct nm_bridge *b;
+ uint32_t i, j;
+ uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT;
+ int needed;
+
+ *na = NULL; /* default return value */
+
+ /* first try to see if this is a bridge port. */
+ NMG_LOCK_ASSERT();
+ if (strncmp(nr_name, ops->name, strlen(ops->name) - 1)) {
+ return 0; /* no error, but no VALE prefix */
+ }
+
+ b = nm_find_bridge(nr_name, create, ops);
+ if (b == NULL) {
+ ND("no bridges available for '%s'", nr_name);
+ return (create ? ENOMEM : ENXIO);
+ }
+ if (strlen(nr_name) < b->bdg_namelen) /* impossible */
+ panic("x");
+
+ /* Now we are sure that name starts with the bridge's name,
+ * lookup the port in the bridge. We need to scan the entire
+ * list. It is not important to hold a WLOCK on the bridge
+ * during the search because NMG_LOCK already guarantees
+ * that there are no other possible writers.
+ */
+
+ /* lookup in the local list of ports */
+ for (j = 0; j < b->bdg_active_ports; j++) {
+ i = b->bdg_port_index[j];
+ vpna = b->bdg_ports[i];
+ ND("checking %s", vpna->up.name);
+ if (!strcmp(vpna->up.name, nr_name)) {
+ netmap_adapter_get(&vpna->up);
+ ND("found existing if %s refs %d", nr_name)
+ *na = &vpna->up;
+ return 0;
+ }
+ }
+ /* not found, should we create it? */
+ if (!create)
+ return ENXIO;
+ /* yes we should, see if we have space to attach entries */
+ needed = 2; /* in some cases we only need 1 */
+ if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
+ D("bridge full %d, cannot create new port", b->bdg_active_ports);
+ return ENOMEM;
+ }
+ /* record the next two ports available, but do not allocate yet */
+ cand = b->bdg_port_index[b->bdg_active_ports];
+ cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
+ ND("+++ bridge %s port %s used %d avail %d %d",
+ b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
+
+ /*
+ * try see if there is a matching NIC with this name
+ * (after the bridge's name)
+ */
+ ifname = nr_name + b->bdg_namelen + 1;
+ ifp = ifunit_ref(ifname);
+ if (!ifp) {
+ /* Create an ephemeral virtual port.
+ * This block contains all the ephemeral-specific logic.
+ */
+
+ if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
+ error = EINVAL;
+ goto out;
+ }
+
+ /* bdg_netmap_attach creates a struct netmap_adapter */
+ error = b->bdg_ops->vp_create(hdr, NULL, nmd, &vpna);
+ if (error) {
+ D("error %d", error);
+ goto out;
+ }
+ /* shortcut - we can skip get_hw_na(),
+ * ownership check and nm_bdg_attach()
+ */
+
+ } else {
+ struct netmap_adapter *hw;
+
+ /* the vale:nic syntax is only valid for some commands */
+ switch (hdr->nr_reqtype) {
+ case NETMAP_REQ_VALE_ATTACH:
+ case NETMAP_REQ_VALE_DETACH:
+ case NETMAP_REQ_VALE_POLLING_ENABLE:
+ case NETMAP_REQ_VALE_POLLING_DISABLE:
+ break; /* ok */
+ default:
+ error = EINVAL;
+ goto out;
+ }
+
+ error = netmap_get_hw_na(ifp, nmd, &hw);
+ if (error || hw == NULL)
+ goto out;
+
+ /* host adapter might not be created */
+ error = hw->nm_bdg_attach(nr_name, hw, b);
+ if (error == NM_NEED_BWRAP) {
+ error = b->bdg_ops->bwrap_attach(nr_name, hw);
+ }
+ if (error)
+ goto out;
+ vpna = hw->na_vp;
+ hostna = hw->na_hostvp;
+ if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
+ /* Check if we need to skip the host rings. */
+ struct nmreq_vale_attach *areq =
+ (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
+ if (areq->reg.nr_mode != NR_REG_NIC_SW) {
+ hostna = NULL;
+ }
+ }
+ }
+
+ BDG_WLOCK(b);
+ vpna->bdg_port = cand;
+ ND("NIC %p to bridge port %d", vpna, cand);
+ /* bind the port to the bridge (virtual ports are not active) */
+ b->bdg_ports[cand] = vpna;
+ vpna->na_bdg = b;
+ b->bdg_active_ports++;
+ if (hostna != NULL) {
+ /* also bind the host stack to the bridge */
+ b->bdg_ports[cand2] = hostna;
+ hostna->bdg_port = cand2;
+ hostna->na_bdg = b;
+ b->bdg_active_ports++;
+ ND("host %p to bridge port %d", hostna, cand2);
+ }
+ ND("if %s refs %d", ifname, vpna->up.na_refcount);
+ BDG_WUNLOCK(b);
+ *na = &vpna->up;
+ netmap_adapter_get(*na);
+
+out:
+ if (ifp)
+ if_rele(ifp);
+
+ return error;
+}
+
+/* Process NETMAP_REQ_VALE_ATTACH.
+ */
+int
+nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token)
+{
+ struct nmreq_vale_attach *req =
+ (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
+ struct netmap_vp_adapter * vpna;
+ struct netmap_adapter *na = NULL;
+ struct netmap_mem_d *nmd = NULL;
+ struct nm_bridge *b = NULL;
+ int error;
+
+ NMG_LOCK();
+ /* permission check for modified bridges */
+ b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
+ if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
+ error = EACCES;
+ goto unlock_exit;
+ }
+
+ if (req->reg.nr_mem_id) {
+ nmd = netmap_mem_find(req->reg.nr_mem_id);
+ if (nmd == NULL) {
+ error = EINVAL;
+ goto unlock_exit;
+ }
+ }
+
+ /* check for existing one */
+ error = netmap_get_vale_na(hdr, &na, nmd, 0);
+ if (na) {
+ error = EBUSY;
+ goto unref_exit;
+ }
+ error = netmap_get_vale_na(hdr, &na,
+ nmd, 1 /* create if not exists */);
+ if (error) { /* no device */
+ goto unlock_exit;
+ }
+
+ if (na == NULL) { /* VALE prefix missing */
+ error = EINVAL;
+ goto unlock_exit;
+ }
+
+ if (NETMAP_OWNED_BY_ANY(na)) {
+ error = EBUSY;
+ goto unref_exit;
+ }
+
+ if (na->nm_bdg_ctl) {
+ /* nop for VALE ports. The bwrap needs to put the hwna
+ * in netmap mode (see netmap_bwrap_bdg_ctl)
+ */
+ error = na->nm_bdg_ctl(hdr, na);
+ if (error)
+ goto unref_exit;
+ ND("registered %s to netmap-mode", na->name);
+ }
+ vpna = (struct netmap_vp_adapter *)na;
+ req->port_index = vpna->bdg_port;
+ NMG_UNLOCK();
+ return 0;
+
+unref_exit:
+ netmap_adapter_put(na);
+unlock_exit:
+ NMG_UNLOCK();
+ return error;
+}
+
+static inline int
+nm_is_bwrap(struct netmap_adapter *na)
+{
+ return na->nm_register == netmap_bwrap_reg;
+}
+
+/* Process NETMAP_REQ_VALE_DETACH.
+ */
+int
+nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token)
+{
+ struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body;
+ struct netmap_vp_adapter *vpna;
+ struct netmap_adapter *na;
+ struct nm_bridge *b = NULL;
+ int error;
+
+ NMG_LOCK();
+ /* permission check for modified bridges */
+ b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
+ if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
+ error = EACCES;
+ goto unlock_exit;
+ }
+
+ error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */);
+ if (error) { /* no device, or another bridge or user owns the device */
+ goto unlock_exit;
+ }
+
+ if (na == NULL) { /* VALE prefix missing */
+ error = EINVAL;
+ goto unlock_exit;
+ } else if (nm_is_bwrap(na) &&
+ ((struct netmap_bwrap_adapter *)na)->na_polling_state) {
+ /* Don't detach a NIC with polling */
+ error = EBUSY;
+ goto unref_exit;
+ }
+
+ vpna = (struct netmap_vp_adapter *)na;
+ if (na->na_vp != vpna) {
+ /* trying to detach first attach of VALE persistent port attached
+ * to 2 bridges
+ */
+ error = EBUSY;
+ goto unref_exit;
+ }
+ nmreq_det->port_index = vpna->bdg_port;
+
+ if (na->nm_bdg_ctl) {
+ /* remove the port from bridge. The bwrap
+ * also needs to put the hwna in normal mode
+ */
+ error = na->nm_bdg_ctl(hdr, na);
+ }
+
+unref_exit:
+ netmap_adapter_put(na);
+unlock_exit:
+ NMG_UNLOCK();
+ return error;
+
+}
+
+struct nm_bdg_polling_state;
+struct
+nm_bdg_kthread {
+ struct nm_kctx *nmk;
+ u_int qfirst;
+ u_int qlast;
+ struct nm_bdg_polling_state *bps;
+};
+
+struct nm_bdg_polling_state {
+ bool configured;
+ bool stopped;
+ struct netmap_bwrap_adapter *bna;
+ uint32_t mode;
+ u_int qfirst;
+ u_int qlast;
+ u_int cpu_from;
+ u_int ncpus;
+ struct nm_bdg_kthread *kthreads;
+};
+
+static void
+netmap_bwrap_polling(void *data, int is_kthread)
+{
+ struct nm_bdg_kthread *nbk = data;
+ struct netmap_bwrap_adapter *bna;
+ u_int qfirst, qlast, i;
+ struct netmap_kring **kring0, *kring;
+
+ if (!nbk)
+ return;
+ qfirst = nbk->qfirst;
+ qlast = nbk->qlast;
+ bna = nbk->bps->bna;
+ kring0 = NMR(bna->hwna, NR_RX);
+
+ for (i = qfirst; i < qlast; i++) {
+ kring = kring0[i];
+ kring->nm_notify(kring, 0);
+ }
+}
+
+static int
+nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
+{
+ struct nm_kctx_cfg kcfg;
+ int i, j;
+
+ bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus);
+ if (bps->kthreads == NULL)
+ return ENOMEM;
+
+ bzero(&kcfg, sizeof(kcfg));
+ kcfg.worker_fn = netmap_bwrap_polling;
+ kcfg.use_kthread = 1;
+ for (i = 0; i < bps->ncpus; i++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ int all = (bps->ncpus == 1 &&
+ bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU);
+ int affinity = bps->cpu_from + i;
+
+ t->bps = bps;
+ t->qfirst = all ? bps->qfirst /* must be 0 */: affinity;
+ t->qlast = all ? bps->qlast : t->qfirst + 1;
+ D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
+ t->qlast);
+
+ kcfg.type = i;
+ kcfg.worker_private = t;
+ t->nmk = nm_os_kctx_create(&kcfg, NULL);
+ if (t->nmk == NULL) {
+ goto cleanup;
+ }
+ nm_os_kctx_worker_setaff(t->nmk, affinity);
+ }
+ return 0;
+
+cleanup:
+ for (j = 0; j < i; j++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ nm_os_kctx_destroy(t->nmk);
+ }
+ nm_os_free(bps->kthreads);
+ return EFAULT;
+}
+
+/* A variant of ptnetmap_start_kthreads() */
+static int
+nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
+{
+ int error, i, j;
+
+ if (!bps) {
+ D("polling is not configured");
+ return EFAULT;
+ }
+ bps->stopped = false;
+
+ for (i = 0; i < bps->ncpus; i++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ error = nm_os_kctx_worker_start(t->nmk);
+ if (error) {
+ D("error in nm_kthread_start()");
+ goto cleanup;
+ }
+ }
+ return 0;
+
+cleanup:
+ for (j = 0; j < i; j++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ nm_os_kctx_worker_stop(t->nmk);
+ }
+ bps->stopped = true;
+ return error;
+}
+
+static void
+nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps)
+{
+ int i;
+
+ if (!bps)
+ return;
+
+ for (i = 0; i < bps->ncpus; i++) {
+ struct nm_bdg_kthread *t = bps->kthreads + i;
+ nm_os_kctx_worker_stop(t->nmk);
+ nm_os_kctx_destroy(t->nmk);
+ }
+ bps->stopped = true;
+}
+
+static int
+get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na,
+ struct nm_bdg_polling_state *bps)
+{
+ unsigned int avail_cpus, core_from;
+ unsigned int qfirst, qlast;
+ uint32_t i = req->nr_first_cpu_id;
+ uint32_t req_cpus = req->nr_num_polling_cpus;
+
+ avail_cpus = nm_os_ncpus();
+
+ if (req_cpus == 0) {
+ D("req_cpus must be > 0");
+ return EINVAL;
+ } else if (req_cpus >= avail_cpus) {
+ D("Cannot use all the CPUs in the system");
+ return EINVAL;
+ }
+
+ if (req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU) {
+ /* Use a separate core for each ring. If nr_num_polling_cpus>1
+ * more consecutive rings are polled.
+ * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2,
+ * ring 2 and 3 are polled by core 2 and 3, respectively. */
+ if (i + req_cpus > nma_get_nrings(na, NR_RX)) {
+ D("Rings %u-%u not in range (have %d rings)",
+ i, i + req_cpus, nma_get_nrings(na, NR_RX));
+ return EINVAL;
+ }
+ qfirst = i;
+ qlast = qfirst + req_cpus;
+ core_from = qfirst;
+
+ } else if (req->nr_mode == NETMAP_POLLING_MODE_SINGLE_CPU) {
+ /* Poll all the rings using a core specified by nr_first_cpu_id.
+ * the number of cores must be 1. */
+ if (req_cpus != 1) {
+ D("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU "
+ "(was %d)", req_cpus);
+ return EINVAL;
+ }
+ qfirst = 0;
+ qlast = nma_get_nrings(na, NR_RX);
+ core_from = i;
+ } else {
+ D("Invalid polling mode");
+ return EINVAL;
+ }
+
+ bps->mode = req->nr_mode;
+ bps->qfirst = qfirst;
+ bps->qlast = qlast;
+ bps->cpu_from = core_from;
+ bps->ncpus = req_cpus;
+ D("%s qfirst %u qlast %u cpu_from %u ncpus %u",
+ req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ?
+ "MULTI" : "SINGLE",
+ qfirst, qlast, core_from, req_cpus);
+ return 0;
+}
+
+static int
+nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *na)
+{
+ struct nm_bdg_polling_state *bps;
+ struct netmap_bwrap_adapter *bna;
+ int error;
+
+ bna = (struct netmap_bwrap_adapter *)na;
+ if (bna->na_polling_state) {
+ D("ERROR adapter already in polling mode");
+ return EFAULT;
+ }
+
+ bps = nm_os_malloc(sizeof(*bps));
+ if (!bps)
+ return ENOMEM;
+ bps->configured = false;
+ bps->stopped = true;
+
+ if (get_polling_cfg(req, na, bps)) {
+ nm_os_free(bps);
+ return EINVAL;
+ }
+
+ if (nm_bdg_create_kthreads(bps)) {
+ nm_os_free(bps);
+ return EFAULT;
+ }
+
+ bps->configured = true;
+ bna->na_polling_state = bps;
+ bps->bna = bna;
+
+ /* disable interrupts if possible */
+ nma_intr_enable(bna->hwna, 0);
+ /* start kthread now */
+ error = nm_bdg_polling_start_kthreads(bps);
+ if (error) {
+ D("ERROR nm_bdg_polling_start_kthread()");
+ nm_os_free(bps->kthreads);
+ nm_os_free(bps);
+ bna->na_polling_state = NULL;
+ nma_intr_enable(bna->hwna, 1);
+ }
+ return error;
+}
+
+static int
+nm_bdg_ctl_polling_stop(struct netmap_adapter *na)
+{
+ struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na;
+ struct nm_bdg_polling_state *bps;
+
+ if (!bna->na_polling_state) {
+ D("ERROR adapter is not in polling mode");
+ return EFAULT;
+ }
+ bps = bna->na_polling_state;
+ nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state);
+ bps->configured = false;
+ nm_os_free(bps);
+ bna->na_polling_state = NULL;
+ /* reenable interrupts */
+ nma_intr_enable(bna->hwna, 1);
+ return 0;
+}
+
+int
+nm_bdg_polling(struct nmreq_header *hdr)
+{
+ struct nmreq_vale_polling *req =
+ (struct nmreq_vale_polling *)(uintptr_t)hdr->nr_body;
+ struct netmap_adapter *na = NULL;
+ int error = 0;
+
+ NMG_LOCK();
+ error = netmap_get_vale_na(hdr, &na, NULL, /*create=*/0);
+ if (na && !error) {
+ if (!nm_is_bwrap(na)) {
+ error = EOPNOTSUPP;
+ } else if (hdr->nr_reqtype == NETMAP_BDG_POLLING_ON) {
+ error = nm_bdg_ctl_polling_start(req, na);
+ if (!error)
+ netmap_adapter_get(na);
+ } else {
+ error = nm_bdg_ctl_polling_stop(na);
+ if (!error)
+ netmap_adapter_put(na);
+ }
+ netmap_adapter_put(na);
+ } else if (!na && !error) {
+ /* Not VALE port. */
+ error = EINVAL;
+ }
+ NMG_UNLOCK();
+
+ return error;
+}
+
+/* Process NETMAP_REQ_VALE_LIST. */
+int
+netmap_bdg_list(struct nmreq_header *hdr)
+{
+ struct nmreq_vale_list *req =
+ (struct nmreq_vale_list *)(uintptr_t)hdr->nr_body;
+ int namelen = strlen(hdr->nr_name);
+ struct nm_bridge *b, *bridges;
+ struct netmap_vp_adapter *vpna;
+ int error = 0, i, j;
+ u_int num_bridges;
+
+ netmap_bns_getbridges(&bridges, &num_bridges);
+
+ /* this is used to enumerate bridges and ports */
+ if (namelen) { /* look up indexes of bridge and port */
+ if (strncmp(hdr->nr_name, NM_BDG_NAME,
+ strlen(NM_BDG_NAME))) {
+ return EINVAL;
+ }
+ NMG_LOCK();
+ b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL);
+ if (!b) {
+ NMG_UNLOCK();
+ return ENOENT;
+ }
+
+ req->nr_bridge_idx = b - bridges; /* bridge index */
+ req->nr_port_idx = NM_BDG_NOPORT;
+ for (j = 0; j < b->bdg_active_ports; j++) {
+ i = b->bdg_port_index[j];
+ vpna = b->bdg_ports[i];
+ if (vpna == NULL) {
+ D("This should not happen");
+ continue;
+ }
+ /* the former and the latter identify a
+ * virtual port and a NIC, respectively
+ */
+ if (!strcmp(vpna->up.name, hdr->nr_name)) {
+ req->nr_port_idx = i; /* port index */
+ break;
+ }
+ }
+ NMG_UNLOCK();
+ } else {
+ /* return the first non-empty entry starting from
+ * bridge nr_arg1 and port nr_arg2.
+ *
+ * Users can detect the end of the same bridge by
+ * seeing the new and old value of nr_arg1, and can
+ * detect the end of all the bridge by error != 0
+ */
+ i = req->nr_bridge_idx;
+ j = req->nr_port_idx;
+
+ NMG_LOCK();
+ for (error = ENOENT; i < NM_BRIDGES; i++) {
+ b = bridges + i;
+ for ( ; j < NM_BDG_MAXPORTS; j++) {
+ if (b->bdg_ports[j] == NULL)
+ continue;
+ vpna = b->bdg_ports[j];
+ /* write back the VALE switch name */
+ strncpy(hdr->nr_name, vpna->up.name,
+ (size_t)IFNAMSIZ);
+ error = 0;
+ goto out;
+ }
+ j = 0; /* following bridges scan from 0 */
+ }
+ out:
+ req->nr_bridge_idx = i;
+ req->nr_port_idx = j;
+ NMG_UNLOCK();
+ }
+
+ return error;
+}
+
+/* Called by external kernel modules (e.g., Openvswitch).
+ * to set configure/lookup/dtor functions of a VALE instance.
+ * Register callbacks to the given bridge. 'name' may be just
+ * bridge's name (including ':' if it is not just NM_BDG_NAME).
+ *
+ * Called without NMG_LOCK.
+ */
+
+int
+netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token)
+{
+ struct nm_bridge *b;
+ int error = 0;
+
+ NMG_LOCK();
+ b = nm_find_bridge(name, 0 /* don't create */, NULL);
+ if (!b) {
+ error = ENXIO;
+ goto unlock_regops;
+ }
+ if (!nm_bdg_valid_auth_token(b, auth_token)) {
+ error = EACCES;
+ goto unlock_regops;
+ }
+
+ BDG_WLOCK(b);
+ if (!bdg_ops) {
+ /* resetting the bridge */
+ bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
+ b->bdg_ops = NULL;
+ b->private_data = b->ht;
+ } else {
+ /* modifying the bridge */
+ b->private_data = private_data;
+ b->bdg_ops = bdg_ops;
+ }
+ BDG_WUNLOCK(b);
+
+unlock_regops:
+ NMG_UNLOCK();
+ return error;
+}
+
+
+int
+netmap_bdg_config(struct nm_ifreq *nr)
+{
+ struct nm_bridge *b;
+ int error = EINVAL;
+
+ NMG_LOCK();
+ b = nm_find_bridge(nr->nifr_name, 0, NULL);
+ if (!b) {
+ NMG_UNLOCK();
+ return error;
+ }
+ NMG_UNLOCK();
+ /* Don't call config() with NMG_LOCK() held */
+ BDG_RLOCK(b);
+ if (b->bdg_ops->config != NULL)
+ error = b->bdg_ops->config(nr);
+ BDG_RUNLOCK(b);
+ return error;
+}
+
+
+/* nm_register callback for VALE ports */
+int
+netmap_vp_reg(struct netmap_adapter *na, int onoff)
+{
+ struct netmap_vp_adapter *vpna =
+ (struct netmap_vp_adapter*)na;
+ enum txrx t;
+ int i;
+
+ /* persistent ports may be put in netmap mode
+ * before being attached to a bridge
+ */
+ if (vpna->na_bdg)
+ BDG_WLOCK(vpna->na_bdg);
+ if (onoff) {
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_real_rings(na, t); i++) {
+ struct netmap_kring *kring = NMR(na, t)[i];
+
+ if (nm_kring_pending_on(kring))
+ kring->nr_mode = NKR_NETMAP_ON;
+ }
+ }
+ if (na->active_fds == 0)
+ na->na_flags |= NAF_NETMAP_ON;
+ /* XXX on FreeBSD, persistent VALE ports should also
+ * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
+ */
+ } else {
+ if (na->active_fds == 0)
+ na->na_flags &= ~NAF_NETMAP_ON;
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_real_rings(na, t); i++) {
+ struct netmap_kring *kring = NMR(na, t)[i];
+
+ if (nm_kring_pending_off(kring))
+ kring->nr_mode = NKR_NETMAP_OFF;
+ }
+ }
+ }
+ if (vpna->na_bdg)
+ BDG_WUNLOCK(vpna->na_bdg);
+ return 0;
+}
+
+
+/* rxsync code used by VALE ports nm_rxsync callback and also
+ * internally by the brwap
+ */
+static int
+netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
+{
+ struct netmap_adapter *na = kring->na;
+ struct netmap_ring *ring = kring->ring;
+ u_int nm_i, lim = kring->nkr_num_slots - 1;
+ u_int head = kring->rhead;
+ int n;
+
+ if (head > lim) {
+ D("ouch dangerous reset!!!");
+ n = netmap_ring_reinit(kring);
+ goto done;
+ }
+
+ /* First part, import newly received packets. */
+ /* actually nothing to do here, they are already in the kring */
+
+ /* Second part, skip past packets that userspace has released. */
+ nm_i = kring->nr_hwcur;
+ if (nm_i != head) {
+ /* consistency check, but nothing really important here */
+ for (n = 0; likely(nm_i != head); n++) {
+ struct netmap_slot *slot = &ring->slot[nm_i];
+ void *addr = NMB(na, slot);
+
+ if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
+ D("bad buffer index %d, ignore ?",
+ slot->buf_idx);
+ }
+ slot->flags &= ~NS_BUF_CHANGED;
+ nm_i = nm_next(nm_i, lim);
+ }
+ kring->nr_hwcur = head;
+ }
+
+ n = 0;
+done:
+ return n;
+}
+
+/*
+ * nm_rxsync callback for VALE ports
+ * user process reading from a VALE switch.
+ * Already protected against concurrent calls from userspace,
+ * but we must acquire the queue's lock to protect against
+ * writers on the same queue.
+ */
+int
+netmap_vp_rxsync(struct netmap_kring *kring, int flags)
+{
+ int n;
+
+ mtx_lock(&kring->q_lock);
+ n = netmap_vp_rxsync_locked(kring, flags);
+ mtx_unlock(&kring->q_lock);
+ return n;
+}
+
+int
+netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna,
+ struct netmap_bdg_ops *ops)
+{
+ return ops->bwrap_attach(nr_name, hwna);
+}
+
+
+/* Bridge wrapper code (bwrap).
+ * This is used to connect a non-VALE-port netmap_adapter (hwna) to a
+ * VALE switch.
+ * The main task is to swap the meaning of tx and rx rings to match the
+ * expectations of the VALE switch code (see nm_bdg_flush).
+ *
+ * The bwrap works by interposing a netmap_bwrap_adapter between the
+ * rest of the system and the hwna. The netmap_bwrap_adapter looks like
+ * a netmap_vp_adapter to the rest the system, but, internally, it
+ * translates all callbacks to what the hwna expects.
+ *
+ * Note that we have to intercept callbacks coming from two sides:
+ *
+ * - callbacks coming from the netmap module are intercepted by
+ * passing around the netmap_bwrap_adapter instead of the hwna
+ *
+ * - callbacks coming from outside of the netmap module only know
+ * about the hwna. This, however, only happens in interrupt
+ * handlers, where only the hwna->nm_notify callback is called.
+ * What the bwrap does is to overwrite the hwna->nm_notify callback
+ * with its own netmap_bwrap_intr_notify.
+ * XXX This assumes that the hwna->nm_notify callback was the
+ * standard netmap_notify(), as it is the case for nic adapters.
+ * Any additional action performed by hwna->nm_notify will not be
+ * performed by netmap_bwrap_intr_notify.
+ *
+ * Additionally, the bwrap can optionally attach the host rings pair
+ * of the wrapped adapter to a different port of the switch.
+ */
+
+
+static void
+netmap_bwrap_dtor(struct netmap_adapter *na)
+{
+ struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
+ struct netmap_adapter *hwna = bna->hwna;
+ struct nm_bridge *b = bna->up.na_bdg,
+ *bh = bna->host.na_bdg;
+
+ if (bna->host.up.nm_mem)
+ netmap_mem_put(bna->host.up.nm_mem);
+
+ if (b) {
+ netmap_bdg_detach_common(b, bna->up.bdg_port,
+ (bh ? bna->host.bdg_port : -1));
+ }
+
+ ND("na %p", na);
+ na->ifp = NULL;
+ bna->host.up.ifp = NULL;
+ hwna->na_vp = bna->saved_na_vp;
+ hwna->na_hostvp = NULL;
+ hwna->na_private = NULL;
+ hwna->na_flags &= ~NAF_BUSY;
+ netmap_adapter_put(hwna);
+
+}
+
+
+/*
+ * Intr callback for NICs connected to a bridge.
+ * Simply ignore tx interrupts (maybe we could try to recover space ?)
+ * and pass received packets from nic to the bridge.
+ *
+ * XXX TODO check locking: this is called from the interrupt
+ * handler so we should make sure that the interface is not
+ * disconnected while passing down an interrupt.
+ *
+ * Note, no user process can access this NIC or the host stack.
+ * The only part of the ring that is significant are the slots,
+ * and head/cur/tail are set from the kring as needed
+ * (part as a receive ring, part as a transmit ring).
+ *
+ * callback that overwrites the hwna notify callback.
+ * Packets come from the outside or from the host stack and are put on an
+ * hwna rx ring.
+ * The bridge wrapper then sends the packets through the bridge.
+ */
+static int
+netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
+{
+ struct netmap_adapter *na = kring->na;
+ struct netmap_bwrap_adapter *bna = na->na_private;
+ struct netmap_kring *bkring;
+ struct netmap_vp_adapter *vpna = &bna->up;
+ u_int ring_nr = kring->ring_id;
+ int ret = NM_IRQ_COMPLETED;
+ int error;
+
+ if (netmap_verbose)
+ D("%s %s 0x%x", na->name, kring->name, flags);
+
+ bkring = vpna->up.tx_rings[ring_nr];
+
+ /* make sure the ring is not disabled */
+ if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) {
+ return EIO;
+ }
+
+ if (netmap_verbose)
+ D("%s head %d cur %d tail %d", na->name,
+ kring->rhead, kring->rcur, kring->rtail);
+
+ /* simulate a user wakeup on the rx ring
+ * fetch packets that have arrived.
+ */
+ error = kring->nm_sync(kring, 0);
+ if (error)
+ goto put_out;
+ if (kring->nr_hwcur == kring->nr_hwtail) {
+ if (netmap_verbose)
+ D("how strange, interrupt with no packets on %s",
+ na->name);
+ goto put_out;
+ }
+
+ /* new packets are kring->rcur to kring->nr_hwtail, and the bkring
+ * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail
+ * to push all packets out.
+ */
+ bkring->rhead = bkring->rcur = kring->nr_hwtail;
+
+ bkring->nm_sync(bkring, flags);
+
+ /* mark all buffers as released on this ring */
+ kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail;
+ /* another call to actually release the buffers */
+ error = kring->nm_sync(kring, 0);
+
+ /* The second rxsync may have further advanced hwtail. If this happens,
+ * return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */
+ if (kring->rcur != kring->nr_hwtail) {
+ ret = NM_IRQ_RESCHED;
+ }
+put_out:
+ nm_kr_put(kring);
+
+ return error ? error : ret;
+}
+
+
+/* nm_register callback for bwrap */
+int
+netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
+{
+ struct netmap_bwrap_adapter *bna =
+ (struct netmap_bwrap_adapter *)na;
+ struct netmap_adapter *hwna = bna->hwna;
+ struct netmap_vp_adapter *hostna = &bna->host;
+ int error, i;
+ enum txrx t;
+
+ ND("%s %s", na->name, onoff ? "on" : "off");
+
+ if (onoff) {
+ /* netmap_do_regif has been called on the bwrap na.
+ * We need to pass the information about the
+ * memory allocator down to the hwna before
+ * putting it in netmap mode
+ */
+ hwna->na_lut = na->na_lut;
+
+ if (hostna->na_bdg) {
+ /* if the host rings have been attached to switch,
+ * we need to copy the memory allocator information
+ * in the hostna also
+ */
+ hostna->up.na_lut = na->na_lut;
+ }
+
+ }
+
+ /* pass down the pending ring state information */
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_all_rings(na, t); i++) {
+ NMR(hwna, nm_txrx_swap(t))[i]->nr_pending_mode =
+ NMR(na, t)[i]->nr_pending_mode;
+ }
+ }
+
+ /* forward the request to the hwna */
+ error = hwna->nm_register(hwna, onoff);
+ if (error)
+ return error;
+
+ /* copy up the current ring state information */
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_all_rings(na, t); i++) {
+ struct netmap_kring *kring = NMR(hwna, nm_txrx_swap(t))[i];
+ NMR(na, t)[i]->nr_mode = kring->nr_mode;
+ }
+ }
+
+ /* impersonate a netmap_vp_adapter */
+ netmap_vp_reg(na, onoff);
+ if (hostna->na_bdg)
+ netmap_vp_reg(&hostna->up, onoff);
+
+ if (onoff) {
+ u_int i;
+ /* intercept the hwna nm_nofify callback on the hw rings */
+ for (i = 0; i < hwna->num_rx_rings; i++) {
+ hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify;
+ hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify;
+ }
+ i = hwna->num_rx_rings; /* for safety */
+ /* save the host ring notify unconditionally */
+ for (; i < netmap_real_rings(hwna, NR_RX); i++) {
+ hwna->rx_rings[i]->save_notify =
+ hwna->rx_rings[i]->nm_notify;
+ if (hostna->na_bdg) {
+ /* also intercept the host ring notify */
+ hwna->rx_rings[i]->nm_notify =
+ netmap_bwrap_intr_notify;
+ na->tx_rings[i]->nm_sync = na->nm_txsync;
+ }
+ }
+ if (na->active_fds == 0)
+ na->na_flags |= NAF_NETMAP_ON;
+ } else {
+ u_int i;
+
+ if (na->active_fds == 0)
+ na->na_flags &= ~NAF_NETMAP_ON;
+
+ /* reset all notify callbacks (including host ring) */
+ for (i = 0; i < netmap_all_rings(hwna, NR_RX); i++) {
+ hwna->rx_rings[i]->nm_notify =
+ hwna->rx_rings[i]->save_notify;
+ hwna->rx_rings[i]->save_notify = NULL;
+ }
+ hwna->na_lut.lut = NULL;
+ hwna->na_lut.plut = NULL;
+ hwna->na_lut.objtotal = 0;
+ hwna->na_lut.objsize = 0;
+
+ /* pass ownership of the netmap rings to the hwna */
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_all_rings(na, t); i++) {
+ NMR(na, t)[i]->ring = NULL;
+ }
+ }
+ /* reset the number of host rings to default */
+ for_rx_tx(t) {
+ nma_set_host_nrings(hwna, t, 1);
+ }
+
+ }
+
+ return 0;
+}
+
+/* nm_config callback for bwrap */
+static int
+netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info)
+{
+ struct netmap_bwrap_adapter *bna =
+ (struct netmap_bwrap_adapter *)na;
+ struct netmap_adapter *hwna = bna->hwna;
+ int error;
+
+ /* Forward the request to the hwna. It may happen that nobody
+ * registered hwna yet, so netmap_mem_get_lut() may have not
+ * been called yet. */
+ error = netmap_mem_get_lut(hwna->nm_mem, &hwna->na_lut);
+ if (error)
+ return error;
+ netmap_update_config(hwna);
+ /* swap the results and propagate */
+ info->num_tx_rings = hwna->num_rx_rings;
+ info->num_tx_descs = hwna->num_rx_desc;
+ info->num_rx_rings = hwna->num_tx_rings;
+ info->num_rx_descs = hwna->num_tx_desc;
+ info->rx_buf_maxsize = hwna->rx_buf_maxsize;
+
+ return 0;
+}
+
+
+/* nm_krings_create callback for bwrap */
+int
+netmap_bwrap_krings_create_common(struct netmap_adapter *na)
+{
+ struct netmap_bwrap_adapter *bna =
+ (struct netmap_bwrap_adapter *)na;
+ struct netmap_adapter *hwna = bna->hwna;
+ struct netmap_adapter *hostna = &bna->host.up;
+ int i, error = 0;
+ enum txrx t;
+
+ /* also create the hwna krings */
+ error = hwna->nm_krings_create(hwna);
+ if (error) {
+ return error;
+ }
+
+ /* increment the usage counter for all the hwna krings */
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_all_rings(hwna, t); i++) {
+ NMR(hwna, t)[i]->users++;
+ }
+ }
+
+ /* now create the actual rings */
+ error = netmap_mem_rings_create(hwna);
+ if (error) {
+ goto err_dec_users;
+ }
+
+ /* cross-link the netmap rings
+ * The original number of rings comes from hwna,
+ * rx rings on one side equals tx rings on the other.
+ */
+ for_rx_tx(t) {
+ enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
+ for (i = 0; i < netmap_all_rings(hwna, r); i++) {
+ NMR(na, t)[i]->nkr_num_slots = NMR(hwna, r)[i]->nkr_num_slots;
+ NMR(na, t)[i]->ring = NMR(hwna, r)[i]->ring;
+ }
+ }
+
+ if (na->na_flags & NAF_HOST_RINGS) {
+ /* the hostna rings are the host rings of the bwrap.
+ * The corresponding krings must point back to the
+ * hostna
+ */
+ hostna->tx_rings = &na->tx_rings[na->num_tx_rings];
+ hostna->rx_rings = &na->rx_rings[na->num_rx_rings];
+ for_rx_tx(t) {
+ for (i = 0; i < nma_get_nrings(hostna, t); i++) {
+ NMR(hostna, t)[i]->na = hostna;
+ }
+ }
+ }
+
+ return 0;
+
+err_dec_users:
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_all_rings(hwna, t); i++) {
+ NMR(hwna, t)[i]->users--;
+ }
+ }
+ hwna->nm_krings_delete(hwna);
+ return error;
+}
+
+
+void
+netmap_bwrap_krings_delete_common(struct netmap_adapter *na)
+{
+ struct netmap_bwrap_adapter *bna =
+ (struct netmap_bwrap_adapter *)na;
+ struct netmap_adapter *hwna = bna->hwna;
+ enum txrx t;
+ int i;
+
+ ND("%s", na->name);
+
+ /* decrement the usage counter for all the hwna krings */
+ for_rx_tx(t) {
+ for (i = 0; i < netmap_all_rings(hwna, t); i++) {
+ NMR(hwna, t)[i]->users--;
+ }
+ }
+
+ /* delete any netmap rings that are no longer needed */
+ netmap_mem_rings_delete(hwna);
+ hwna->nm_krings_delete(hwna);
+}
+
+
+/* notify method for the bridge-->hwna direction */
+int
+netmap_bwrap_notify(struct netmap_kring *kring, int flags)
+{
+ struct netmap_adapter *na = kring->na;
+ struct netmap_bwrap_adapter *bna = na->na_private;
+ struct netmap_adapter *hwna = bna->hwna;
+ u_int ring_n = kring->ring_id;
+ u_int lim = kring->nkr_num_slots - 1;
+ struct netmap_kring *hw_kring;
+ int error;
+
+ ND("%s: na %s hwna %s",
+ (kring ? kring->name : "NULL!"),
+ (na ? na->name : "NULL!"),
+ (hwna ? hwna->name : "NULL!"));
+ hw_kring = hwna->tx_rings[ring_n];
+
+ if (nm_kr_tryget(hw_kring, 0, NULL)) {
+ return ENXIO;
+ }
+
+ /* first step: simulate a user wakeup on the rx ring */
+ netmap_vp_rxsync(kring, flags);
+ ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
+ na->name, ring_n,
+ kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
+ ring->head, ring->cur, ring->tail,
+ hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
+ /* second step: the new packets are sent on the tx ring
+ * (which is actually the same ring)
+ */
+ hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail;
+ error = hw_kring->nm_sync(hw_kring, flags);
+ if (error)
+ goto put_out;
+
+ /* third step: now we are back the rx ring */
+ /* claim ownership on all hw owned bufs */
+ kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */
+
+ /* fourth step: the user goes to sleep again, causing another rxsync */
+ netmap_vp_rxsync(kring, flags);
+ ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
+ na->name, ring_n,
+ kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
+ ring->head, ring->cur, ring->tail,
+ hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
+put_out:
+ nm_kr_put(hw_kring);
+
+ return error ? error : NM_IRQ_COMPLETED;
+}
+
+
+/* nm_bdg_ctl callback for the bwrap.
+ * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd].
+ * On attach, it needs to provide a fake netmap_priv_d structure and
+ * perform a netmap_do_regif() on the bwrap. This will put both the
+ * bwrap and the hwna in netmap mode, with the netmap rings shared
+ * and cross linked. Moroever, it will start intercepting interrupts
+ * directed to hwna.
+ */
+static int
+netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
+{
+ struct netmap_priv_d *npriv;
+ struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
+ int error = 0;
+
+ if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
+ struct nmreq_vale_attach *req =
+ (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
+ if (req->reg.nr_ringid != 0 ||
+ (req->reg.nr_mode != NR_REG_ALL_NIC &&
+ req->reg.nr_mode != NR_REG_NIC_SW)) {
+ /* We only support attaching all the NIC rings
+ * and/or the host stack. */
+ return EINVAL;
+ }
+ if (NETMAP_OWNED_BY_ANY(na)) {
+ return EBUSY;
+ }
+ if (bna->na_kpriv) {
+ /* nothing to do */
+ return 0;
+ }
+ npriv = netmap_priv_new();
+ if (npriv == NULL)
+ return ENOMEM;
+ npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */
+ error = netmap_do_regif(npriv, na, req->reg.nr_mode,
+ req->reg.nr_ringid, req->reg.nr_flags);
+ if (error) {
+ netmap_priv_delete(npriv);
+ return error;
+ }
+ bna->na_kpriv = npriv;
+ na->na_flags |= NAF_BUSY;
+ } else {
+ if (na->active_fds == 0) /* not registered */
+ return EINVAL;
+ netmap_priv_delete(bna->na_kpriv);
+ bna->na_kpriv = NULL;
+ na->na_flags &= ~NAF_BUSY;
+ }
+
+ return error;
+}
+
+/* attach a bridge wrapper to the 'real' device */
+int
+netmap_bwrap_attach_common(struct netmap_adapter *na,
+ struct netmap_adapter *hwna)
+{
+ struct netmap_bwrap_adapter *bna;
+ struct netmap_adapter *hostna = NULL;
+ int error = 0;
+ enum txrx t;
+
+ /* make sure the NIC is not already in use */
+ if (NETMAP_OWNED_BY_ANY(hwna)) {
+ D("NIC %s busy, cannot attach to bridge", hwna->name);
+ return EBUSY;
+ }
+
+ bna = (struct netmap_bwrap_adapter *)na;
+ /* make bwrap ifp point to the real ifp */
+ na->ifp = hwna->ifp;
+ if_ref(na->ifp);
+ na->na_private = bna;
+ /* fill the ring data for the bwrap adapter with rx/tx meanings
+ * swapped. The real cross-linking will be done during register,
+ * when all the krings will have been created.
+ */
+ for_rx_tx(t) {
+ enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
+ nma_set_nrings(na, t, nma_get_nrings(hwna, r));
+ nma_set_ndesc(na, t, nma_get_ndesc(hwna, r));
+ }
+ na->nm_dtor = netmap_bwrap_dtor;
+ na->nm_config = netmap_bwrap_config;
+ na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
+ na->pdev = hwna->pdev;
+ na->nm_mem = netmap_mem_get(hwna->nm_mem);
+ na->virt_hdr_len = hwna->virt_hdr_len;
+ na->rx_buf_maxsize = hwna->rx_buf_maxsize;
+
+ bna->hwna = hwna;
+ netmap_adapter_get(hwna);
+ hwna->na_private = bna; /* weak reference */
+ bna->saved_na_vp = hwna->na_vp;
+ hwna->na_vp = &bna->up;
+ bna->up.up.na_vp = &(bna->up);
+
+ if (hwna->na_flags & NAF_HOST_RINGS) {
+ if (hwna->na_flags & NAF_SW_ONLY)
+ na->na_flags |= NAF_SW_ONLY;
+ na->na_flags |= NAF_HOST_RINGS;
+ hostna = &bna->host.up;
+
+ /* limit the number of host rings to that of hw */
+ nm_bound_var(&hostna->num_tx_rings, 1, 1,
+ nma_get_nrings(hwna, NR_TX), NULL);
+ nm_bound_var(&hostna->num_rx_rings, 1, 1,
+ nma_get_nrings(hwna, NR_RX), NULL);
+
+ snprintf(hostna->name, sizeof(hostna->name), "%s^", na->name);
+ hostna->ifp = hwna->ifp;
+ for_rx_tx(t) {
+ enum txrx r = nm_txrx_swap(t);
+ u_int nr = nma_get_nrings(hostna, t);
+
+ nma_set_nrings(hostna, t, nr);
+ nma_set_host_nrings(na, t, nr);
+ if (nma_get_host_nrings(hwna, t) < nr) {
+ nma_set_host_nrings(hwna, t, nr);
+ }
+ nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r));
+ }
+ // hostna->nm_txsync = netmap_bwrap_host_txsync;
+ // hostna->nm_rxsync = netmap_bwrap_host_rxsync;
+ hostna->nm_mem = netmap_mem_get(na->nm_mem);
+ hostna->na_private = bna;
+ hostna->na_vp = &bna->up;
+ na->na_hostvp = hwna->na_hostvp =
+ hostna->na_hostvp = &bna->host;
+ hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
+ hostna->rx_buf_maxsize = hwna->rx_buf_maxsize;
+ }
+
+ ND("%s<->%s txr %d txd %d rxr %d rxd %d",
+ na->name, ifp->if_xname,
+ na->num_tx_rings, na->num_tx_desc,
+ na->num_rx_rings, na->num_rx_desc);
+
+ error = netmap_attach_common(na);
+ if (error) {
+ goto err_put;
+ }
+ hwna->na_flags |= NAF_BUSY;
+ return 0;
+
+err_put:
+ hwna->na_vp = hwna->na_hostvp = NULL;
+ netmap_adapter_put(hwna);
+ return error;
+
+}
+
+struct nm_bridge *
+netmap_init_bridges2(u_int n)
+{
+ int i;
+ struct nm_bridge *b;
+
+ b = nm_os_malloc(sizeof(struct nm_bridge) * n);
+ if (b == NULL)
+ return NULL;
+ for (i = 0; i < n; i++)
+ BDG_RWINIT(&b[i]);
+ return b;
+}
+
+void
+netmap_uninit_bridges2(struct nm_bridge *b, u_int n)
+{
+ int i;
+
+ if (b == NULL)
+ return;
+
+ for (i = 0; i < n; i++)
+ BDG_RWDESTROY(&b[i]);
+ nm_os_free(b);
+}
+
+int
+netmap_init_bridges(void)
+{
+#ifdef CONFIG_NET_NS
+ return netmap_bns_register();
+#else
+ nm_bridges = netmap_init_bridges2(NM_BRIDGES);
+ if (nm_bridges == NULL)
+ return ENOMEM;
+ return 0;
+#endif
+}
+
+void
+netmap_uninit_bridges(void)
+{
+#ifdef CONFIG_NET_NS
+ netmap_bns_unregister();
+#else
+ netmap_uninit_bridges2(nm_bridges, NM_BRIDGES);
+#endif
+}
Index: head/sys/dev/netmap/netmap_freebsd.c
===================================================================
--- head/sys/dev/netmap/netmap_freebsd.c
+++ head/sys/dev/netmap/netmap_freebsd.c
@@ -270,13 +270,19 @@
}
int
-nm_os_mbuf_has_offld(struct mbuf *m)
+nm_os_mbuf_has_csum_offld(struct mbuf *m)
{
return m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_SCTP |
CSUM_TCP_IPV6 | CSUM_UDP_IPV6 |
- CSUM_SCTP_IPV6 | CSUM_TSO);
+ CSUM_SCTP_IPV6);
}
+int
+nm_os_mbuf_has_seg_offld(struct mbuf *m)
+{
+ return m->m_pkthdr.csum_flags & CSUM_TSO;
+}
+
static void
freebsd_generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
{
@@ -632,7 +638,7 @@
void
nm_os_extmem_delete(struct nm_os_extmem *e)
{
- D("freeing %jx bytes", (uintmax_t)e->size);
+ D("freeing %zx bytes", (size_t)e->size);
vm_map_remove(kernel_map, e->kva, e->kva + e->size);
nm_os_free(e);
}
@@ -701,7 +707,7 @@
VMFS_OPTIMAL_SPACE, VM_PROT_READ | VM_PROT_WRITE,
VM_PROT_READ | VM_PROT_WRITE, 0);
if (rv != KERN_SUCCESS) {
- D("vm_map_find(%jx) failed", (uintmax_t)e->size);
+ D("vm_map_find(%zx) failed", (size_t)e->size);
goto out_rel;
}
rv = vm_map_wire(kernel_map, e->kva, e->kva + e->size,
@@ -1538,6 +1544,30 @@
CURVNET_RESTORE();
return error;
+}
+
+void
+nm_os_onattach(struct ifnet *ifp)
+{
+}
+
+void
+nm_os_onenter(struct ifnet *ifp)
+{
+ struct netmap_adapter *na = NA(ifp);
+
+ na->if_transmit = ifp->if_transmit;
+ ifp->if_transmit = netmap_transmit;
+ ifp->if_capenable |= IFCAP_NETMAP;
+}
+
+void
+nm_os_onexit(struct ifnet *ifp)
+{
+ struct netmap_adapter *na = NA(ifp);
+
+ ifp->if_transmit = na->if_transmit;
+ ifp->if_capenable &= ~IFCAP_NETMAP;
}
extern struct cdevsw netmap_cdevsw; /* XXX used in netmap.c, should go elsewhere */
Index: head/sys/dev/netmap/netmap_generic.c
===================================================================
--- head/sys/dev/netmap/netmap_generic.c
+++ head/sys/dev/netmap/netmap_generic.c
@@ -89,117 +89,6 @@
#define MBUF_RXQ(m) ((m)->m_pkthdr.flowid)
#define smp_mb()
-/*
- * FreeBSD mbuf allocator/deallocator in emulation mode:
- */
-#if __FreeBSD_version < 1100000
-
-/*
- * For older versions of FreeBSD:
- *
- * We allocate EXT_PACKET mbuf+clusters, but need to set M_NOFREE
- * so that the destructor, if invoked, will not free the packet.
- * In principle we should set the destructor only on demand,
- * but since there might be a race we better do it on allocation.
- * As a consequence, we also need to set the destructor or we
- * would leak buffers.
- */
-
-/* mbuf destructor, also need to change the type to EXT_EXTREF,
- * add an M_NOFREE flag, and then clear the flag and
- * chain into uma_zfree(zone_pack, mf)
- * (or reinstall the buffer ?)
- */
-#define SET_MBUF_DESTRUCTOR(m, fn) do { \
- (m)->m_ext.ext_free = (void *)fn; \
- (m)->m_ext.ext_type = EXT_EXTREF; \
-} while (0)
-
-static int
-void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2)
-{
- /* restore original mbuf */
- m->m_ext.ext_buf = m->m_data = m->m_ext.ext_arg1;
- m->m_ext.ext_arg1 = NULL;
- m->m_ext.ext_type = EXT_PACKET;
- m->m_ext.ext_free = NULL;
- if (MBUF_REFCNT(m) == 0)
- SET_MBUF_REFCNT(m, 1);
- uma_zfree(zone_pack, m);
-
- return 0;
-}
-
-static inline struct mbuf *
-nm_os_get_mbuf(struct ifnet *ifp, int len)
-{
- struct mbuf *m;
-
- (void)ifp;
- m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
- if (m) {
- /* m_getcl() (mb_ctor_mbuf) has an assert that checks that
- * M_NOFREE flag is not specified as third argument,
- * so we have to set M_NOFREE after m_getcl(). */
- m->m_flags |= M_NOFREE;
- m->m_ext.ext_arg1 = m->m_ext.ext_buf; // XXX save
- m->m_ext.ext_free = (void *)void_mbuf_dtor;
- m->m_ext.ext_type = EXT_EXTREF;
- ND(5, "create m %p refcnt %d", m, MBUF_REFCNT(m));
- }
- return m;
-}
-
-#else /* __FreeBSD_version >= 1100000 */
-
-/*
- * Newer versions of FreeBSD, using a straightforward scheme.
- *
- * We allocate mbufs with m_gethdr(), since the mbuf header is needed
- * by the driver. We also attach a customly-provided external storage,
- * which in this case is a netmap buffer. When calling m_extadd(), however
- * we pass a NULL address, since the real address (and length) will be
- * filled in by nm_os_generic_xmit_frame() right before calling
- * if_transmit().
- *
- * The dtor function does nothing, however we need it since mb_free_ext()
- * has a KASSERT(), checking that the mbuf dtor function is not NULL.
- */
-
-#if __FreeBSD_version <= 1200050
-static void void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2) { }
-#else /* __FreeBSD_version >= 1200051 */
-/* The arg1 and arg2 pointers argument were removed by r324446, which
- * in included since version 1200051. */
-static void void_mbuf_dtor(struct mbuf *m) { }
-#endif /* __FreeBSD_version >= 1200051 */
-
-#define SET_MBUF_DESTRUCTOR(m, fn) do { \
- (m)->m_ext.ext_free = (fn != NULL) ? \
- (void *)fn : (void *)void_mbuf_dtor; \
-} while (0)
-
-static inline struct mbuf *
-nm_os_get_mbuf(struct ifnet *ifp, int len)
-{
- struct mbuf *m;
-
- (void)ifp;
- (void)len;
-
- m = m_gethdr(M_NOWAIT, MT_DATA);
- if (m == NULL) {
- return m;
- }
-
- m_extadd(m, NULL /* buf */, 0 /* size */, void_mbuf_dtor,
- NULL, NULL, 0, EXT_NET_DRV);
-
- return m;
-}
-
-#endif /* __FreeBSD_version >= 1100000 */
-
#elif defined _WIN32
#include "win_glue.h"
@@ -1161,7 +1050,7 @@
}
D("Native netmap adapter %p restored", prev_na);
}
- NM_ATTACH_NA(ifp, prev_na);
+ NM_RESTORE_NA(ifp, prev_na);
/*
* netmap_detach_common(), that it's called after this function,
* overrides WNA(ifp) if na->ifp is not NULL.
@@ -1202,7 +1091,7 @@
}
#endif
- if (NA(ifp) && !NM_NA_VALID(ifp)) {
+ if (NM_NA_CLASH(ifp)) {
/* If NA(ifp) is not null but there is no valid netmap
* adapter it means that someone else is using the same
* pointer (e.g. ax25_ptr on linux). This happens for
@@ -1230,6 +1119,7 @@
na->ifp = ifp;
na->num_tx_desc = num_tx_desc;
na->num_rx_desc = num_rx_desc;
+ na->rx_buf_maxsize = 32768;
na->nm_register = &generic_netmap_register;
na->nm_txsync = &generic_netmap_txsync;
na->nm_rxsync = &generic_netmap_rxsync;
@@ -1253,8 +1143,8 @@
return retval;
}
- gna->prev = NA(ifp); /* save old na */
- if (gna->prev != NULL) {
+ if (NM_NA_VALID(ifp)) {
+ gna->prev = NA(ifp); /* save old na */
netmap_adapter_get(gna->prev);
}
NM_ATTACH_NA(ifp, na);
Index: head/sys/dev/netmap/netmap_kern.h
===================================================================
--- head/sys/dev/netmap/netmap_kern.h
+++ head/sys/dev/netmap/netmap_kern.h
@@ -275,6 +275,7 @@
struct nm_bdg_fwd;
struct nm_bridge;
struct netmap_priv_d;
+struct nm_bdg_args;
/* os-specific NM_SELINFO_T initialzation/destruction functions */
void nm_os_selinfo_init(NM_SELINFO_T *);
@@ -305,6 +306,12 @@
void nm_os_free(void *);
void nm_os_vfree(void *);
+/* os specific attach/detach enter/exit-netmap-mode routines */
+void nm_os_onattach(struct ifnet *);
+void nm_os_ondetach(struct ifnet *);
+void nm_os_onenter(struct ifnet *);
+void nm_os_onexit(struct ifnet *);
+
/* passes a packet up to the host stack.
* If the packet is sent (or dropped) immediately it returns NULL,
* otherwise it links the packet to prev and returns m.
@@ -313,7 +320,8 @@
*/
void *nm_os_send_up(struct ifnet *, struct mbuf *m, struct mbuf *prev);
-int nm_os_mbuf_has_offld(struct mbuf *m);
+int nm_os_mbuf_has_seg_offld(struct mbuf *m);
+int nm_os_mbuf_has_csum_offld(struct mbuf *m);
#include "netmap_mbq.h"
@@ -507,11 +515,10 @@
struct netmap_kring *pipe; /* if this is a pipe ring,
* pointer to the other end
*/
+ uint32_t pipe_tail; /* hwtail updated by the other end */
#endif /* WITH_PIPES */
-#ifdef WITH_VALE
int (*save_notify)(struct netmap_kring *kring, int flags);
-#endif
#ifdef WITH_MONITOR
/* array of krings that are monitoring this kring */
@@ -634,6 +641,7 @@
};
struct netmap_vp_adapter; // forward
+struct nm_bridge;
/* Struct to be filled by nm_config callbacks. */
struct nm_config_info {
@@ -645,6 +653,14 @@
};
/*
+ * default type for the magic field.
+ * May be overriden in glue code.
+ */
+#ifndef NM_OS_MAGIC
+#define NM_OS_MAGIC uint32_t
+#endif /* !NM_OS_MAGIC */
+
+/*
* The "struct netmap_adapter" extends the "struct adapter"
* (or equivalent) device descriptor.
* It contains all base fields needed to support netmap operation.
@@ -660,7 +676,7 @@
* always exists and is at least 32 bits) contains a magic
* value which we can use to detect that the interface is good.
*/
- uint32_t magic;
+ NM_OS_MAGIC magic;
uint32_t na_flags; /* enabled, and other flags */
#define NAF_SKIP_INTR 1 /* use the regular interrupt handler.
* useful during initialization
@@ -696,6 +712,8 @@
u_int num_rx_rings; /* number of adapter receive rings */
u_int num_tx_rings; /* number of adapter transmit rings */
+ u_int num_host_rx_rings; /* number of host receive rings */
+ u_int num_host_tx_rings; /* number of host transmit rings */
u_int num_tx_desc; /* number of descriptor in each queue */
u_int num_rx_desc;
@@ -783,7 +801,6 @@
int (*nm_config)(struct netmap_adapter *, struct nm_config_info *info);
int (*nm_krings_create)(struct netmap_adapter *);
void (*nm_krings_delete)(struct netmap_adapter *);
-#ifdef WITH_VALE
/*
* nm_bdg_attach() initializes the na_vp field to point
* to an adapter that can be attached to a VALE switch. If the
@@ -799,7 +816,8 @@
* initializations
* Called with NMG_LOCK held.
*/
- int (*nm_bdg_attach)(const char *bdg_name, struct netmap_adapter *);
+ int (*nm_bdg_attach)(const char *bdg_name, struct netmap_adapter *,
+ struct nm_bridge *);
int (*nm_bdg_ctl)(struct nmreq_header *, struct netmap_adapter *);
/* adapter used to attach this adapter to a VALE switch (if any) */
@@ -807,7 +825,6 @@
/* adapter used to attach the host rings of this adapter
* to a VALE switch (if any) */
struct netmap_vp_adapter *na_hostvp;
-#endif
/* standard refcount to control the lifetime of the adapter
* (it should be equal to the lifetime of the corresponding ifp)
@@ -843,6 +860,10 @@
unsigned rx_buf_maxsize;
char name[NETMAP_REQ_IFNAMSIZ]; /* used at least by pipes */
+
+#ifdef WITH_MONITOR
+ unsigned long monitor_id; /* debugging */
+#endif
};
static __inline u_int
@@ -866,6 +887,12 @@
return (t == NR_TX ? na->num_tx_rings : na->num_rx_rings);
}
+static __inline u_int
+nma_get_host_nrings(struct netmap_adapter *na, enum txrx t)
+{
+ return (t == NR_TX ? na->num_host_tx_rings : na->num_host_rx_rings);
+}
+
static __inline void
nma_set_nrings(struct netmap_adapter *na, enum txrx t, u_int v)
{
@@ -875,6 +902,15 @@
na->num_rx_rings = v;
}
+static __inline void
+nma_set_host_nrings(struct netmap_adapter *na, enum txrx t, u_int v)
+{
+ if (t == NR_TX)
+ na->num_host_tx_rings = v;
+ else
+ na->num_host_rx_rings = v;
+}
+
static __inline struct netmap_kring**
NMR(struct netmap_adapter *na, enum txrx t)
{
@@ -964,13 +1000,22 @@
};
#endif /* WITH_GENERIC */
-static __inline int
+static __inline u_int
netmap_real_rings(struct netmap_adapter *na, enum txrx t)
{
- return nma_get_nrings(na, t) + !!(na->na_flags & NAF_HOST_RINGS);
+ return nma_get_nrings(na, t) +
+ !!(na->na_flags & NAF_HOST_RINGS) * nma_get_host_nrings(na, t);
}
-#ifdef WITH_VALE
+/* account for fake rings */
+static __inline u_int
+netmap_all_rings(struct netmap_adapter *na, enum txrx t)
+{
+ return max(nma_get_nrings(na, t) + 1, netmap_real_rings(na, t));
+}
+
+int netmap_default_bdg_attach(const char *name, struct netmap_adapter *na,
+ struct nm_bridge *);
struct nm_bdg_polling_state;
/*
* Bridge wrapper for non VALE ports attached to a VALE switch.
@@ -1038,12 +1083,12 @@
int nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token);
int nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token);
int nm_bdg_polling(struct nmreq_header *hdr);
-int netmap_bwrap_attach(const char *name, struct netmap_adapter *);
+int netmap_bdg_list(struct nmreq_header *hdr);
+
+#ifdef WITH_VALE
int netmap_vi_create(struct nmreq_header *hdr, int);
int nm_vi_create(struct nmreq_header *);
int nm_vi_destroy(const char *name);
-int netmap_bdg_list(struct nmreq_header *hdr);
-
#else /* !WITH_VALE */
#define netmap_vi_create(hdr, a) (EOPNOTSUPP)
#endif /* WITH_VALE */
@@ -1262,7 +1307,6 @@
#define netmap_ifp_to_vp(_ifp) NULL
#define netmap_ifp_to_host_vp(_ifp) NULL
#define netmap_bdg_idx(_vp) -1
-#define netmap_bdg_name(_vp) NULL
#endif /* WITH_VALE */
static inline int
@@ -1293,68 +1337,9 @@
na->rx_rings[na->num_rx_rings]->nr_pending_mode;
}
-/* set/clear native flags and if_transmit/netdev_ops */
-static inline void
-nm_set_native_flags(struct netmap_adapter *na)
-{
- struct ifnet *ifp = na->ifp;
+void nm_set_native_flags(struct netmap_adapter *);
+void nm_clear_native_flags(struct netmap_adapter *);
- /* We do the setup for intercepting packets only if we are the
- * first user of this adapapter. */
- if (na->active_fds > 0) {
- return;
- }
-
- na->na_flags |= NAF_NETMAP_ON;
-#ifdef IFCAP_NETMAP /* or FreeBSD ? */
- ifp->if_capenable |= IFCAP_NETMAP;
-#endif
-#if defined (__FreeBSD__)
- na->if_transmit = ifp->if_transmit;
- ifp->if_transmit = netmap_transmit;
-#elif defined (_WIN32)
- (void)ifp; /* prevent a warning */
-#elif defined (linux)
- na->if_transmit = (void *)ifp->netdev_ops;
- ifp->netdev_ops = &((struct netmap_hw_adapter *)na)->nm_ndo;
- ((struct netmap_hw_adapter *)na)->save_ethtool = ifp->ethtool_ops;
- ifp->ethtool_ops = &((struct netmap_hw_adapter*)na)->nm_eto;
-#endif /* linux */
- nm_update_hostrings_mode(na);
-}
-
-static inline void
-nm_clear_native_flags(struct netmap_adapter *na)
-{
- struct ifnet *ifp = na->ifp;
-
- /* We undo the setup for intercepting packets only if we are the
- * last user of this adapapter. */
- if (na->active_fds > 0) {
- return;
- }
-
- nm_update_hostrings_mode(na);
-
-#if defined(__FreeBSD__)
- ifp->if_transmit = na->if_transmit;
-#elif defined(_WIN32)
- (void)ifp; /* prevent a warning */
-#else
- ifp->netdev_ops = (void *)na->if_transmit;
- ifp->ethtool_ops = ((struct netmap_hw_adapter*)na)->save_ethtool;
-#endif
- na->na_flags &= ~NAF_NETMAP_ON;
-#ifdef IFCAP_NETMAP /* or FreeBSD ? */
- ifp->if_capenable &= ~IFCAP_NETMAP;
-#endif
-}
-
-#ifdef linux
-int netmap_linux_config(struct netmap_adapter *na,
- struct nm_config_info *info);
-#endif /* linux */
-
/*
* nm_*sync_prologue() functions are used in ioctl/poll and ptnetmap
* kthreads.
@@ -1458,7 +1443,6 @@
struct netmap_mem_d *nmd, struct netmap_adapter **na);
-#ifdef WITH_VALE
/*
* The following bridge-related functions are used by other
* kernel modules.
@@ -1473,39 +1457,49 @@
typedef int (*bdg_config_fn_t)(struct nm_ifreq *);
typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *);
typedef void *(*bdg_update_private_data_fn_t)(void *private_data, void *callback_data, int *error);
+typedef int (*bdg_vp_create_fn_t)(struct nmreq_header *hdr,
+ struct ifnet *ifp, struct netmap_mem_d *nmd,
+ struct netmap_vp_adapter **ret);
+typedef int (*bdg_bwrap_attach_fn_t)(const char *nr_name, struct netmap_adapter *hwna);
struct netmap_bdg_ops {
bdg_lookup_fn_t lookup;
bdg_config_fn_t config;
bdg_dtor_fn_t dtor;
+ bdg_vp_create_fn_t vp_create;
+ bdg_bwrap_attach_fn_t bwrap_attach;
+ char name[IFNAMSIZ];
};
+int netmap_bwrap_attach(const char *name, struct netmap_adapter *, struct netmap_bdg_ops *);
+int netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token);
-uint32_t netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
- struct netmap_vp_adapter *, void *private_data);
-
#define NM_BRIDGES 8 /* number of bridges */
#define NM_BDG_MAXPORTS 254 /* up to 254 */
#define NM_BDG_BROADCAST NM_BDG_MAXPORTS
#define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1)
-/* these are redefined in case of no VALE support */
-int netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
- struct netmap_mem_d *nmd, int create);
struct nm_bridge *netmap_init_bridges2(u_int);
void netmap_uninit_bridges2(struct nm_bridge *, u_int);
int netmap_init_bridges(void);
void netmap_uninit_bridges(void);
-int netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token);
int nm_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback,
void *callback_data, void *auth_token);
int netmap_bdg_config(struct nm_ifreq *nifr);
-void *netmap_bdg_create(const char *bdg_name, int *return_status);
-int netmap_bdg_destroy(const char *bdg_name, void *auth_token);
+#ifdef WITH_VALE
+uint32_t netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
+ struct netmap_vp_adapter *, void *private_data);
+
+/* these are redefined in case of no VALE support */
+int netmap_get_vale_na(struct nmreq_header *hdr, struct netmap_adapter **na,
+ struct netmap_mem_d *nmd, int create);
+void *netmap_vale_create(const char *bdg_name, int *return_status);
+int netmap_vale_destroy(const char *bdg_name, void *auth_token);
+
#else /* !WITH_VALE */
-#define netmap_get_bdg_na(_1, _2, _3, _4) 0
-#define netmap_init_bridges(_1) 0
-#define netmap_uninit_bridges()
-#define netmap_bdg_regops(_1, _2) EINVAL
+#define netmap_bdg_learning(_1, _2, _3, _4) 0
+#define netmap_get_vale_na(_1, _2, _3, _4) 0
+#define netmap_bdg_create(_1, _2) NULL
+#define netmap_bdg_destroy(_1, _2) 0
#endif /* !WITH_VALE */
#ifdef WITH_PIPES
@@ -1611,6 +1605,7 @@
extern int netmap_txsync_retry;
extern int netmap_flags;
+extern int netmap_generic_hwcsum;
extern int netmap_generic_mit;
extern int netmap_generic_ringsize;
extern int netmap_generic_rings;
@@ -1620,12 +1615,18 @@
extern int ptnetmap_tx_workers;
/*
- * NA returns a pointer to the struct netmap adapter from the ifp,
- * WNA is used to write it.
+ * NA returns a pointer to the struct netmap adapter from the ifp.
+ * WNA is os-specific and must be defined in glue code.
*/
#define NA(_ifp) ((struct netmap_adapter *)WNA(_ifp))
/*
+ * we provide a default implementation of NM_ATTACH_NA/NM_DETACH_NA
+ * based on the WNA field.
+ * Glue code may override this by defining its own NM_ATTACH_NA
+ */
+#ifndef NM_ATTACH_NA
+/*
* On old versions of FreeBSD, NA(ifp) is a pspare. On linux we
* overload another pointer in the netdev.
*
@@ -1643,7 +1644,13 @@
NA(ifp)->magic = \
((uint32_t)(uintptr_t)NA(ifp)) ^ NETMAP_MAGIC; \
} while(0)
+#define NM_RESTORE_NA(ifp, na) WNA(ifp) = na;
+#define NM_DETACH_NA(ifp) do { WNA(ifp) = NULL; } while (0)
+#define NM_NA_CLASH(ifp) (NA(ifp) && !NM_NA_VALID(ifp))
+#endif /* !NM_ATTACH_NA */
+
+
#define NM_IS_NATIVE(ifp) (NM_NA_VALID(ifp) && NA(ifp)->nm_dtor == netmap_hw_dtor)
#if defined(__FreeBSD__)
@@ -1752,21 +1759,28 @@
}
}
+#ifdef NETMAP_LINUX_HAVE_DMASYNC
static inline void
-netmap_sync_map(struct netmap_adapter *na,
+netmap_sync_map_cpu(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map, u_int sz, enum txrx t)
{
if (*map) {
- if (t == NR_RX)
- dma_sync_single_for_cpu(na->pdev, *map, sz,
- DMA_FROM_DEVICE);
- else
- dma_sync_single_for_device(na->pdev, *map, sz,
- DMA_TO_DEVICE);
+ dma_sync_single_for_cpu(na->pdev, *map, sz,
+ (t == NR_TX ? DMA_TO_DEVICE : DMA_FROM_DEVICE));
}
}
static inline void
+netmap_sync_map_dev(struct netmap_adapter *na,
+ bus_dma_tag_t tag, bus_dmamap_t map, u_int sz, enum txrx t)
+{
+ if (*map) {
+ dma_sync_single_for_device(na->pdev, *map, sz,
+ (t == NR_TX ? DMA_TO_DEVICE : DMA_FROM_DEVICE));
+ }
+}
+
+static inline void
netmap_reload_map(struct netmap_adapter *na,
bus_dma_tag_t tag, bus_dmamap_t map, void *buf)
{
@@ -1780,6 +1794,10 @@
*map = dma_map_single(na->pdev, buf, sz,
DMA_BIDIRECTIONAL);
}
+#else /* !NETMAP_LINUX_HAVE_DMASYNC */
+#define netmap_sync_map_cpu(na, tag, map, sz, t)
+#define netmap_sync_map_dev(na, tag, map, sz, t)
+#endif /* NETMAP_LINUX_HAVE_DMASYNC */
#endif /* linux */
@@ -2220,6 +2238,119 @@
void ptnet_nm_krings_delete(struct netmap_adapter *na);
void ptnet_nm_dtor(struct netmap_adapter *na);
#endif /* WITH_PTNETMAP_GUEST */
+
+#ifdef __FreeBSD__
+/*
+ * FreeBSD mbuf allocator/deallocator in emulation mode:
+ */
+#if __FreeBSD_version < 1100000
+
+/*
+ * For older versions of FreeBSD:
+ *
+ * We allocate EXT_PACKET mbuf+clusters, but need to set M_NOFREE
+ * so that the destructor, if invoked, will not free the packet.
+ * In principle we should set the destructor only on demand,
+ * but since there might be a race we better do it on allocation.
+ * As a consequence, we also need to set the destructor or we
+ * would leak buffers.
+ */
+
+/* mbuf destructor, also need to change the type to EXT_EXTREF,
+ * add an M_NOFREE flag, and then clear the flag and
+ * chain into uma_zfree(zone_pack, mf)
+ * (or reinstall the buffer ?)
+ */
+#define SET_MBUF_DESTRUCTOR(m, fn) do { \
+ (m)->m_ext.ext_free = (void *)fn; \
+ (m)->m_ext.ext_type = EXT_EXTREF; \
+} while (0)
+
+static int
+void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2)
+{
+ /* restore original mbuf */
+ m->m_ext.ext_buf = m->m_data = m->m_ext.ext_arg1;
+ m->m_ext.ext_arg1 = NULL;
+ m->m_ext.ext_type = EXT_PACKET;
+ m->m_ext.ext_free = NULL;
+ if (MBUF_REFCNT(m) == 0)
+ SET_MBUF_REFCNT(m, 1);
+ uma_zfree(zone_pack, m);
+
+ return 0;
+}
+
+static inline struct mbuf *
+nm_os_get_mbuf(struct ifnet *ifp, int len)
+{
+ struct mbuf *m;
+
+ (void)ifp;
+ m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+ if (m) {
+ /* m_getcl() (mb_ctor_mbuf) has an assert that checks that
+ * M_NOFREE flag is not specified as third argument,
+ * so we have to set M_NOFREE after m_getcl(). */
+ m->m_flags |= M_NOFREE;
+ m->m_ext.ext_arg1 = m->m_ext.ext_buf; // XXX save
+ m->m_ext.ext_free = (void *)void_mbuf_dtor;
+ m->m_ext.ext_type = EXT_EXTREF;
+ ND(5, "create m %p refcnt %d", m, MBUF_REFCNT(m));
+ }
+ return m;
+}
+
+#else /* __FreeBSD_version >= 1100000 */
+
+/*
+ * Newer versions of FreeBSD, using a straightforward scheme.
+ *
+ * We allocate mbufs with m_gethdr(), since the mbuf header is needed
+ * by the driver. We also attach a customly-provided external storage,
+ * which in this case is a netmap buffer. When calling m_extadd(), however
+ * we pass a NULL address, since the real address (and length) will be
+ * filled in by nm_os_generic_xmit_frame() right before calling
+ * if_transmit().
+ *
+ * The dtor function does nothing, however we need it since mb_free_ext()
+ * has a KASSERT(), checking that the mbuf dtor function is not NULL.
+ */
+
+#if __FreeBSD_version <= 1200050
+static void void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2) { }
+#else /* __FreeBSD_version >= 1200051 */
+/* The arg1 and arg2 pointers argument were removed by r324446, which
+ * in included since version 1200051. */
+static void void_mbuf_dtor(struct mbuf *m) { }
+#endif /* __FreeBSD_version >= 1200051 */
+
+#define SET_MBUF_DESTRUCTOR(m, fn) do { \
+ (m)->m_ext.ext_free = (fn != NULL) ? \
+ (void *)fn : (void *)void_mbuf_dtor; \
+} while (0)
+
+static inline struct mbuf *
+nm_os_get_mbuf(struct ifnet *ifp, int len)
+{
+ struct mbuf *m;
+
+ (void)ifp;
+ (void)len;
+
+ m = m_gethdr(M_NOWAIT, MT_DATA);
+ if (m == NULL) {
+ return m;
+ }
+
+ m_extadd(m, NULL /* buf */, 0 /* size */, void_mbuf_dtor,
+ NULL, NULL, 0, EXT_NET_DRV);
+
+ return m;
+}
+
+#endif /* __FreeBSD_version >= 1100000 */
+#endif /* __FreeBSD__ */
struct nmreq_option * nmreq_findoption(struct nmreq_option *, uint16_t);
int nmreq_checkduplicate(struct nmreq_option *);
Index: head/sys/dev/netmap/netmap_mem2.c
===================================================================
--- head/sys/dev/netmap/netmap_mem2.c
+++ head/sys/dev/netmap/netmap_mem2.c
@@ -1845,7 +1845,7 @@
for_rx_tx(t) {
u_int i;
- for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+ for (i = 0; i < netmap_all_rings(na, t); i++) {
struct netmap_kring *kring = NMR(na, t)[i];
struct netmap_ring *ring = kring->ring;
@@ -1884,7 +1884,7 @@
for_rx_tx(t) {
u_int i;
- for (i = 0; i <= nma_get_nrings(na, t); i++) {
+ for (i = 0; i < netmap_all_rings(na, t); i++) {
struct netmap_kring *kring = NMR(na, t)[i];
struct netmap_ring *ring = kring->ring;
u_int len, ndesc;
@@ -1922,7 +1922,7 @@
netmap_mem_bufsize(na->nm_mem);
ND("%s h %d c %d t %d", kring->name,
ring->head, ring->cur, ring->tail);
- ND("initializing slots for %s_ring", nm_txrx2str(txrx));
+ ND("initializing slots for %s_ring", nm_txrx2str(t));
if (!(kring->nr_kflags & NKR_FAKERING)) {
/* this is a real ring */
ND("allocating buffers for %s", kring->name);
@@ -1980,7 +1980,7 @@
ntot = 0;
for_rx_tx(t) {
/* account for the (eventually fake) host rings */
- n[t] = nma_get_nrings(na, t) + 1;
+ n[t] = netmap_all_rings(na, t);
ntot += n[t];
}
/*
@@ -2654,14 +2654,14 @@
/* point each kring to the corresponding backend ring */
nifp = (struct netmap_if *)((char *)ptnmd->nm_addr + ptif->nifp_offset);
- for (i = 0; i <= na->num_tx_rings; i++) {
+ for (i = 0; i < netmap_all_rings(na, NR_TX); i++) {
struct netmap_kring *kring = na->tx_rings[i];
if (kring->ring)
continue;
kring->ring = (struct netmap_ring *)
((char *)nifp + nifp->ring_ofs[i]);
}
- for (i = 0; i <= na->num_rx_rings; i++) {
+ for (i = 0; i < netmap_all_rings(na, NR_RX); i++) {
struct netmap_kring *kring = na->rx_rings[i];
if (kring->ring)
continue;
Index: head/sys/dev/netmap/netmap_monitor.c
===================================================================
--- head/sys/dev/netmap/netmap_monitor.c
+++ head/sys/dev/netmap/netmap_monitor.c
@@ -152,6 +152,12 @@
static int
netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
{
+ struct netmap_monitor_adapter *mna =
+ (struct netmap_monitor_adapter *)kring->na;
+ if (unlikely(mna->priv.np_na == NULL)) {
+ /* parent left netmap mode */
+ return EIO;
+ }
ND("%s %x", kring->name, flags);
kring->nr_hwcur = kring->rhead;
mb();
@@ -164,11 +170,20 @@
netmap_monitor_krings_create(struct netmap_adapter *na)
{
int error = netmap_krings_create(na, 0);
+ enum txrx t;
+
if (error)
return error;
/* override the host rings callbacks */
- na->tx_rings[na->num_tx_rings]->nm_sync = netmap_monitor_txsync;
- na->rx_rings[na->num_rx_rings]->nm_sync = netmap_monitor_rxsync;
+ for_rx_tx(t) {
+ int i;
+ u_int first = nma_get_nrings(na, t);
+ for (i = 0; i < nma_get_host_nrings(na, t); i++) {
+ struct netmap_kring *kring = NMR(na, t)[first + i];
+ kring->nm_sync = t == NR_TX ? netmap_monitor_txsync :
+ netmap_monitor_rxsync;
+ }
+ }
return 0;
}
@@ -244,6 +259,48 @@
static int netmap_monitor_parent_rxsync(struct netmap_kring *, int);
static int netmap_monitor_parent_notify(struct netmap_kring *, int);
+static void
+nm_monitor_intercept_callbacks(struct netmap_kring *kring)
+{
+ ND("intercept callbacks on %s", kring->name);
+ kring->mon_sync = kring->nm_sync;
+ kring->mon_notify = kring->nm_notify;
+ if (kring->tx == NR_TX) {
+ kring->nm_sync = netmap_monitor_parent_txsync;
+ } else {
+ kring->nm_sync = netmap_monitor_parent_rxsync;
+ kring->nm_notify = netmap_monitor_parent_notify;
+ kring->mon_tail = kring->nr_hwtail;
+ }
+}
+
+static void
+nm_monitor_restore_callbacks(struct netmap_kring *kring)
+{
+ ND("restoring callbacks on %s", kring->name);
+ kring->nm_sync = kring->mon_sync;
+ kring->mon_sync = NULL;
+ if (kring->tx == NR_RX) {
+ kring->nm_notify = kring->mon_notify;
+ }
+ kring->mon_notify = NULL;
+}
+
+static struct netmap_kring *
+nm_zmon_list_head(struct netmap_kring *mkring, enum txrx t)
+{
+ struct netmap_adapter *na = mkring->na;
+ struct netmap_kring *kring = mkring;
+ struct netmap_zmon_list *z = &kring->zmon_list[t];
+ /* reach the head of the list */
+ while (nm_is_zmon(na) && z->prev != NULL) {
+ kring = z->prev;
+ na = kring->na;
+ z = &kring->zmon_list[t];
+ }
+ return nm_is_zmon(na) ? NULL : kring;
+}
+
/* add the monitor mkring to the list of monitors of kring.
* If this is the first monitor, intercept the callbacks
*/
@@ -254,51 +311,34 @@
enum txrx t = kring->tx;
struct netmap_zmon_list *z = &kring->zmon_list[t];
struct netmap_zmon_list *mz = &mkring->zmon_list[t];
+ struct netmap_kring *ikring = kring;
/* a zero-copy monitor which is not the first in the list
* must monitor the previous monitor
*/
if (zmon && z->prev != NULL)
- kring = z->prev;
+ ikring = z->prev; /* tail of the list */
/* synchronize with concurrently running nm_sync()s */
nm_kr_stop(kring, NM_KR_LOCKED);
- if (nm_monitor_none(kring)) {
- /* this is the first monitor, intercept callbacks */
- ND("intercept callbacks on %s", kring->name);
- kring->mon_sync = kring->nm_sync;
- kring->mon_notify = kring->nm_notify;
- if (kring->tx == NR_TX) {
- kring->nm_sync = netmap_monitor_parent_txsync;
- } else {
- kring->nm_sync = netmap_monitor_parent_rxsync;
- kring->nm_notify = netmap_monitor_parent_notify;
- kring->mon_tail = kring->nr_hwtail;
- }
+ if (nm_monitor_none(ikring)) {
+ /* this is the first monitor, intercept the callbacks */
+ ND("%s: intercept callbacks on %s", mkring->name, ikring->name);
+ nm_monitor_intercept_callbacks(ikring);
}
if (zmon) {
/* append the zmon to the list */
- struct netmap_monitor_adapter *mna =
- (struct netmap_monitor_adapter *)mkring->na;
- struct netmap_adapter *pna;
-
- if (z->prev != NULL)
- z->prev->zmon_list[t].next = mkring;
- mz->prev = z->prev;
- z->prev = mkring;
- if (z->next == NULL)
- z->next = mkring;
-
- /* grap a reference to the previous netmap adapter
+ ikring->zmon_list[t].next = mkring;
+ z->prev = mkring; /* new tail */
+ mz->prev = ikring;
+ mz->next = NULL;
+ /* grab a reference to the previous netmap adapter
* in the chain (this may be the monitored port
* or another zero-copy monitor)
*/
- pna = kring->na;
- netmap_adapter_get(pna);
- netmap_adapter_put(mna->priv.np_na);
- mna->priv.np_na = pna;
+ netmap_adapter_get(ikring->na);
} else {
/* make sure the monitor array exists and is big enough */
error = nm_monitor_alloc(kring, kring->n_monitors + 1);
@@ -318,29 +358,50 @@
* If this is the last monitor, restore the original callbacks
*/
static void
-netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring)
+netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring, enum txrx t)
{
- struct netmap_zmon_list *mz = &mkring->zmon_list[kring->tx];
int zmon = nm_is_zmon(mkring->na);
+ struct netmap_zmon_list *mz = &mkring->zmon_list[t];
+ struct netmap_kring *ikring = kring;
- if (zmon && mz->prev != NULL)
- kring = mz->prev;
+ if (zmon) {
+ /* get to the head of the list */
+ kring = nm_zmon_list_head(mkring, t);
+ ikring = mz->prev;
+ }
- /* synchronize with concurrently running nm_sync()s */
- nm_kr_stop(kring, NM_KR_LOCKED);
+ /* synchronize with concurrently running nm_sync()s
+ * if kring is NULL (orphaned list) the monitored port
+ * has exited netmap mode, so there is nothing to stop
+ */
+ if (kring != NULL)
+ nm_kr_stop(kring, NM_KR_LOCKED);
if (zmon) {
/* remove the monitor from the list */
- if (mz->prev != NULL)
- mz->prev->zmon_list[kring->tx].next = mz->next;
- else
- kring->zmon_list[kring->tx].next = mz->next;
if (mz->next != NULL) {
- mz->next->zmon_list[kring->tx].prev = mz->prev;
- } else {
- kring->zmon_list[kring->tx].prev = mz->prev;
+ mz->next->zmon_list[t].prev = mz->prev;
+ /* we also need to let the next monitor drop the
+ * reference to us and grab the reference to the
+ * previous ring owner, instead
+ */
+ if (mz->prev != NULL)
+ netmap_adapter_get(mz->prev->na);
+ netmap_adapter_put(mkring->na);
+ } else if (kring != NULL) {
+ /* in the monitored kring, prev is actually the
+ * pointer to the tail of the list
+ */
+ kring->zmon_list[t].prev =
+ (mz->prev != kring ? mz->prev : NULL);
}
+ if (mz->prev != NULL) {
+ netmap_adapter_put(mz->prev->na);
+ mz->prev->zmon_list[t].next = mz->next;
+ }
+ mz->prev = NULL;
+ mz->next = NULL;
} else {
/* this is a copy monitor */
uint32_t mon_pos = mkring->mon_pos[kring->tx];
@@ -356,21 +417,13 @@
}
}
- if (nm_monitor_none(kring)) {
+ if (ikring != NULL && nm_monitor_none(ikring)) {
/* this was the last monitor, restore the callbacks */
- ND("%s: restoring sync on %s: %p", mkring->name, kring->name,
- kring->mon_sync);
- kring->nm_sync = kring->mon_sync;
- kring->mon_sync = NULL;
- if (kring->tx == NR_RX) {
- ND("%s: restoring notify on %s: %p",
- mkring->name, kring->name, kring->mon_notify);
- kring->nm_notify = kring->mon_notify;
- kring->mon_notify = NULL;
- }
+ nm_monitor_restore_callbacks(ikring);
}
- nm_kr_start(kring);
+ if (kring != NULL)
+ nm_kr_start(kring);
}
@@ -389,9 +442,9 @@
for_rx_tx(t) {
u_int i;
- for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+ for (i = 0; i < netmap_all_rings(na, t); i++) {
struct netmap_kring *kring = NMR(na, t)[i];
- struct netmap_kring *zkring;
+ struct netmap_zmon_list *z = &kring->zmon_list[t];
u_int j;
for (j = 0; j < kring->n_monitors; j++) {
@@ -404,30 +457,34 @@
netmap_adapter_put(mna->priv.np_na);
mna->priv.np_na = NULL;
}
+ kring->monitors[j] = NULL;
}
- zkring = kring->zmon_list[kring->tx].next;
- if (zkring != NULL) {
- struct netmap_monitor_adapter *next =
- (struct netmap_monitor_adapter *)zkring->na;
- struct netmap_monitor_adapter *this =
- (struct netmap_monitor_adapter *)na;
- struct netmap_adapter *pna = this->priv.np_na;
- /* let the next monitor forget about us */
- if (next->priv.np_na != NULL) {
- netmap_adapter_put(next->priv.np_na);
+ if (!nm_is_zmon(na)) {
+ /* we are the head of at most one list */
+ struct netmap_kring *zkring;
+ for (zkring = z->next; zkring != NULL;
+ zkring = zkring->zmon_list[t].next)
+ {
+ struct netmap_monitor_adapter *next =
+ (struct netmap_monitor_adapter *)zkring->na;
+ /* let the monitor forget about us */
+ netmap_adapter_put(next->priv.np_na); /* nop if null */
+ next->priv.np_na = NULL;
}
- if (pna != NULL && nm_is_zmon(na)) {
- /* we are a monitor ourselves and we may
- * need to pass down the reference to
- * the previous adapter in the chain
- */
- netmap_adapter_get(pna);
- next->priv.np_na = pna;
- continue;
- }
- next->priv.np_na = NULL;
+ /* orhpan the zmon list */
+ if (z->next != NULL)
+ z->next->zmon_list[t].prev = NULL;
+ z->next = NULL;
+ z->prev = NULL;
}
+
+ if (!nm_monitor_none(kring)) {
+
+ kring->n_monitors = 0;
+ nm_monitor_dealloc(kring);
+ nm_monitor_restore_callbacks(kring);
+ }
}
}
}
@@ -455,7 +512,7 @@
return ENXIO;
}
for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+ for (i = 0; i < netmap_all_rings(na, t); i++) {
mkring = NMR(na, t)[i];
if (!nm_kring_pending_on(mkring))
continue;
@@ -477,7 +534,7 @@
if (na->active_fds == 0)
na->na_flags &= ~NAF_NETMAP_ON;
for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
+ for (i = 0; i < netmap_all_rings(na, t); i++) {
mkring = NMR(na, t)[i];
if (!nm_kring_pending_off(mkring))
continue;
@@ -495,7 +552,7 @@
continue;
if (mna->flags & nm_txrx2flag(s)) {
kring = NMR(pna, s)[i];
- netmap_monitor_del(mkring, kring);
+ netmap_monitor_del(mkring, kring, s);
}
}
}
@@ -593,6 +650,7 @@
ms->len = s->len;
s->len = tmp;
+ ms->flags = s->flags;
s->flags |= NS_BUF_CHANGED;
beg = nm_next(beg, lim);
@@ -710,6 +768,7 @@
memcpy(dst, src, copy_len);
ms->len = copy_len;
+ ms->flags = s->flags;
sent++;
beg = nm_next(beg, lim);
@@ -836,7 +895,6 @@
struct ifnet *ifp = NULL;
int error;
int zcopy = (req->nr_flags & NR_ZCOPY_MON);
- char monsuff[10] = "";
if (zcopy) {
req->nr_flags |= (NR_MONITOR_TX | NR_MONITOR_RX);
@@ -890,14 +948,11 @@
D("ringid error");
goto free_out;
}
- if (mna->priv.np_qlast[NR_TX] - mna->priv.np_qfirst[NR_TX] == 1) {
- snprintf(monsuff, 10, "-%d", mna->priv.np_qfirst[NR_TX]);
- }
- snprintf(mna->up.name, sizeof(mna->up.name), "%s%s/%s%s%s", pna->name,
- monsuff,
+ snprintf(mna->up.name, sizeof(mna->up.name), "%s/%s%s%s#%lu", pna->name,
zcopy ? "z" : "",
(req->nr_flags & NR_MONITOR_RX) ? "r" : "",
- (req->nr_flags & NR_MONITOR_TX) ? "t" : "");
+ (req->nr_flags & NR_MONITOR_TX) ? "t" : "",
+ pna->monitor_id++);
/* the monitor supports the host rings iff the parent does */
mna->up.na_flags |= (pna->na_flags & NAF_HOST_RINGS);
Index: head/sys/dev/netmap/netmap_pipe.c
===================================================================
--- head/sys/dev/netmap/netmap_pipe.c
+++ head/sys/dev/netmap/netmap_pipe.c
@@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
- * Copyright (C) 2014-2016 Giuseppe Lettieri
+ * Copyright (C) 2014-2018 Giuseppe Lettieri
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -185,8 +185,9 @@
netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
{
struct netmap_kring *rxkring = txkring->pipe;
- u_int k, lim = txkring->nkr_num_slots - 1;
+ u_int k, lim = txkring->nkr_num_slots - 1, nk;
int m; /* slots to transfer */
+ int complete; /* did we see a complete packet ? */
struct netmap_ring *txring = txkring->ring, *rxring = rxkring->ring;
ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name);
@@ -194,6 +195,9 @@
txkring->nr_hwcur, txkring->nr_hwtail,
txkring->rcur, txkring->rhead, txkring->rtail);
+ /* update the hwtail */
+ txkring->nr_hwtail = txkring->pipe_tail;
+
m = txkring->rhead - txkring->nr_hwcur; /* new slots */
if (m < 0)
m += txkring->nkr_num_slots;
@@ -203,29 +207,29 @@
return 0;
}
- for (k = txkring->nr_hwcur; m; m--, k = nm_next(k, lim)) {
+ for (k = txkring->nr_hwcur, nk = lim + 1, complete = 0; m;
+ m--, k = nm_next(k, lim), nk = (complete ? k : nk)) {
struct netmap_slot *rs = &rxring->slot[k];
struct netmap_slot *ts = &txring->slot[k];
- rs->len = ts->len;
- rs->ptr = ts->ptr;
-
+ *rs = *ts;
if (ts->flags & NS_BUF_CHANGED) {
- rs->buf_idx = ts->buf_idx;
- rs->flags |= NS_BUF_CHANGED;
ts->flags &= ~NS_BUF_CHANGED;
}
+ complete = !(ts->flags & NS_MOREFRAG);
}
- mb(); /* make sure the slots are updated before publishing them */
- rxkring->nr_hwtail = k;
txkring->nr_hwcur = k;
ND(20, "TX after : hwcur %d hwtail %d cur %d head %d tail %d k %d",
txkring->nr_hwcur, txkring->nr_hwtail,
txkring->rcur, txkring->rhead, txkring->rtail, k);
- rxkring->nm_notify(rxkring, 0);
+ if (likely(nk <= lim)) {
+ mb(); /* make sure the slots are updated before publishing them */
+ rxkring->pipe_tail = nk; /* only publish complete packets */
+ rxkring->nm_notify(rxkring, 0);
+ }
return 0;
}
@@ -243,6 +247,9 @@
rxkring->nr_hwcur, rxkring->nr_hwtail,
rxkring->rcur, rxkring->rhead, rxkring->rtail);
+ /* update the hwtail */
+ rxkring->nr_hwtail = rxkring->pipe_tail;
+
m = rxkring->rhead - rxkring->nr_hwcur; /* released slots */
if (m < 0)
m += rxkring->nkr_num_slots;
@@ -264,7 +271,7 @@
}
mb(); /* make sure the slots are updated before publishing them */
- txkring->nr_hwtail = nm_prev(k, lim);
+ txkring->pipe_tail = nm_prev(k, lim);
rxkring->nr_hwcur = k;
ND(20, "RX after : hwcur %d hwtail %d cur %d head %d tail %d k %d",
@@ -346,14 +353,19 @@
if (error)
goto del_krings1;
- /* cross link the krings */
+ /* cross link the krings and initialize the pipe_tails */
for_rx_tx(t) {
enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
for (i = 0; i < nma_get_nrings(na, t); i++) {
- NMR(na, t)[i]->pipe = NMR(ona, r)[i];
- NMR(ona, r)[i]->pipe = NMR(na, t)[i];
+ struct netmap_kring *k1 = NMR(na, t)[i],
+ *k2 = NMR(ona, r)[i];
+ k1->pipe = k2;
+ k2->pipe = k1;
/* mark all peer-adapter rings as fake */
- NMR(ona, r)[i]->nr_kflags |= NKR_FAKERING;
+ k2->nr_kflags |= NKR_FAKERING;
+ /* init tails */
+ k1->pipe_tail = k1->nr_hwtail;
+ k2->pipe_tail = k2->nr_hwtail;
}
}
@@ -436,6 +448,16 @@
if (nm_kring_pending_on(kring)) {
struct netmap_kring *sring, *dring;
+ kring->nr_mode = NKR_NETMAP_ON;
+ if ((kring->nr_kflags & NKR_FAKERING) &&
+ (kring->pipe->nr_kflags & NKR_FAKERING)) {
+ /* this is a re-open of a pipe
+ * end-point kept alive by the other end.
+ * We need to leave everything as it is
+ */
+ continue;
+ }
+
/* copy the buffers from the non-fake ring */
if (kring->nr_kflags & NKR_FAKERING) {
sring = kring->pipe;
@@ -556,10 +578,10 @@
if (ring == NULL)
continue;
- if (kring->nr_hwtail == kring->nr_hwcur)
- ring->slot[kring->nr_hwtail].buf_idx = 0;
+ if (kring->tx == NR_RX)
+ ring->slot[kring->pipe_tail].buf_idx = 0;
- for (j = nm_next(kring->nr_hwtail, lim);
+ for (j = nm_next(kring->pipe_tail, lim);
j != kring->nr_hwcur;
j = nm_next(j, lim))
{
Index: head/sys/dev/netmap/netmap_vale.c
===================================================================
--- head/sys/dev/netmap/netmap_vale.c
+++ head/sys/dev/netmap/netmap_vale.c
@@ -27,37 +27,6 @@
*/
-/*
- * This module implements the VALE switch for netmap
-
---- VALE SWITCH ---
-
-NMG_LOCK() serializes all modifications to switches and ports.
-A switch cannot be deleted until all ports are gone.
-
-For each switch, an SX lock (RWlock on linux) protects
-deletion of ports. When configuring or deleting a new port, the
-lock is acquired in exclusive mode (after holding NMG_LOCK).
-When forwarding, the lock is acquired in shared mode (without NMG_LOCK).
-The lock is held throughout the entire forwarding cycle,
-during which the thread may incur in a page fault.
-Hence it is important that sleepable shared locks are used.
-
-On the rx ring, the per-port lock is grabbed initially to reserve
-a number of slot in the ring, then the lock is released,
-packets are copied from source to destination, and then
-the lock is acquired again and the receive ring is updated.
-(A similar thing is done on the tx ring for NIC and host stack
-ports attached to the switch)
-
- */
-
-/*
- * OS-specific code that is used only within this file.
- * Other OS-specific code that must be accessed by drivers
- * is present in netmap_kern.h
- */
-
#if defined(__FreeBSD__)
#include <sys/cdefs.h> /* prerequisite */
__FBSDID("$FreeBSD$");
@@ -81,20 +50,9 @@
#include <machine/bus.h> /* bus_dmamap_* */
#include <sys/endian.h>
#include <sys/refcount.h>
+#include <sys/smp.h>
-#define BDG_RWLOCK_T struct rwlock // struct rwlock
-
-#define BDG_RWINIT(b) \
- rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS)
-#define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock)
-#define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock)
-#define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock)
-#define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock)
-#define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock)
-#define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock)
-
-
#elif defined(linux)
#include "bsd_glue.h"
@@ -120,6 +78,7 @@
#include <net/netmap.h>
#include <dev/netmap/netmap_kern.h>
#include <dev/netmap/netmap_mem2.h>
+#include <dev/netmap/netmap_bdg.h>
#ifdef WITH_VALE
@@ -143,15 +102,11 @@
#define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */
#define NM_BDG_MAXSLOTS 4096 /* XXX same as above */
#define NM_BRIDGE_RINGSIZE 1024 /* in the device */
-#define NM_BDG_HASH 1024 /* forwarding table entries */
#define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */
-#define NM_MULTISEG 64 /* max size of a chain of bufs */
/* actual size of the tables */
-#define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG)
+#define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NETMAP_MAX_FRAGS)
/* NM_FT_NULL terminates a list of slots in the ft */
#define NM_FT_NULL NM_BDG_BATCH_MAX
-/* Default size for the Maximum Frame Size. */
-#define NM_BDG_MFS_DEFAULT 1514
/*
@@ -168,8 +123,9 @@
static int netmap_vp_create(struct nmreq_header *hdr, struct ifnet *,
struct netmap_mem_d *nmd, struct netmap_vp_adapter **);
-static int netmap_vp_reg(struct netmap_adapter *na, int onoff);
-static int netmap_bwrap_reg(struct netmap_adapter *, int onoff);
+static int netmap_vp_bdg_attach(const char *, struct netmap_adapter *,
+ struct nm_bridge *);
+static int netmap_vale_bwrap_attach(const char *, struct netmap_adapter *);
/*
* For each output interface, nm_bdg_q is used to construct a list.
@@ -182,99 +138,17 @@
uint32_t bq_len; /* number of buffers */
};
-/* XXX revise this */
-struct nm_hash_ent {
- uint64_t mac; /* the top 2 bytes are the epoch */
- uint64_t ports;
-};
-
/* Holds the default callbacks */
-static struct netmap_bdg_ops default_bdg_ops = {netmap_bdg_learning, NULL, NULL};
-
-/*
- * nm_bridge is a descriptor for a VALE switch.
- * Interfaces for a bridge are all in bdg_ports[].
- * The array has fixed size, an empty entry does not terminate
- * the search, but lookups only occur on attach/detach so we
- * don't mind if they are slow.
- *
- * The bridge is non blocking on the transmit ports: excess
- * packets are dropped if there is no room on the output port.
- *
- * bdg_lock protects accesses to the bdg_ports array.
- * This is a rw lock (or equivalent).
- */
-#define NM_BDG_IFNAMSIZ IFNAMSIZ
-struct nm_bridge {
- /* XXX what is the proper alignment/layout ? */
- BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */
- int bdg_namelen;
- uint32_t bdg_active_ports;
- char bdg_basename[NM_BDG_IFNAMSIZ];
-
- /* Indexes of active ports (up to active_ports)
- * and all other remaining ports.
- */
- uint32_t bdg_port_index[NM_BDG_MAXPORTS];
- /* used by netmap_bdg_detach_common() */
- uint32_t tmp_bdg_port_index[NM_BDG_MAXPORTS];
-
- struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS];
-
- /*
- * Programmable lookup functions to figure out the destination port.
- * It returns either of an index of the destination port,
- * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to
- * forward this packet. ring_nr is the source ring index, and the
- * function may overwrite this value to forward this packet to a
- * different ring index.
- * The function is set by netmap_bdg_regops().
- */
- struct netmap_bdg_ops *bdg_ops;
-
- /*
- * Contains the data structure used by the bdg_ops.lookup function.
- * By default points to *ht which is allocated on attach and used by the default lookup
- * otherwise will point to the data structure received by netmap_bdg_regops().
- */
- void *private_data;
- struct nm_hash_ent *ht;
-
- /* Currently used to specify if the bridge is still in use while empty and
- * if it has been put in exclusive mode by an external module, see netmap_bdg_regops()
- * and netmap_bdg_create().
- */
-#define NM_BDG_ACTIVE 1
-#define NM_BDG_EXCLUSIVE 2
- uint8_t bdg_flags;
-
-
-#ifdef CONFIG_NET_NS
- struct net *ns;
-#endif /* CONFIG_NET_NS */
+struct netmap_bdg_ops vale_bdg_ops = {
+ .lookup = netmap_bdg_learning,
+ .config = NULL,
+ .dtor = NULL,
+ .vp_create = netmap_vp_create,
+ .bwrap_attach = netmap_vale_bwrap_attach,
+ .name = NM_BDG_NAME,
};
-const char*
-netmap_bdg_name(struct netmap_vp_adapter *vp)
-{
- struct nm_bridge *b = vp->na_bdg;
- if (b == NULL)
- return NULL;
- return b->bdg_basename;
-}
-
-
-#ifndef CONFIG_NET_NS
/*
- * XXX in principle nm_bridges could be created dynamically
- * Right now we have a static array and deletions are protected
- * by an exclusive lock.
- */
-static struct nm_bridge *nm_bridges;
-#endif /* !CONFIG_NET_NS */
-
-
-/*
* this is a slightly optimized copy routine which rounds
* to multiple of 64 bytes and is often faster than dealing
* with other odd sizes. We assume there is enough room
@@ -304,108 +178,7 @@
}
-static int
-nm_is_id_char(const char c)
-{
- return (c >= 'a' && c <= 'z') ||
- (c >= 'A' && c <= 'Z') ||
- (c >= '0' && c <= '9') ||
- (c == '_');
-}
-
-/* Validate the name of a VALE bridge port and return the
- * position of the ":" character. */
-static int
-nm_vale_name_validate(const char *name)
-{
- int colon_pos = -1;
- int i;
-
- if (!name || strlen(name) < strlen(NM_BDG_NAME)) {
- return -1;
- }
-
- for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) {
- if (name[i] == ':') {
- colon_pos = i;
- break;
- } else if (!nm_is_id_char(name[i])) {
- return -1;
- }
- }
-
- if (strlen(name) - colon_pos > IFNAMSIZ) {
- /* interface name too long */
- return -1;
- }
-
- return colon_pos;
-}
-
/*
- * locate a bridge among the existing ones.
- * MUST BE CALLED WITH NMG_LOCK()
- *
- * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME.
- * We assume that this is called with a name of at least NM_NAME chars.
- */
-static struct nm_bridge *
-nm_find_bridge(const char *name, int create)
-{
- int i, namelen;
- struct nm_bridge *b = NULL, *bridges;
- u_int num_bridges;
-
- NMG_LOCK_ASSERT();
-
- netmap_bns_getbridges(&bridges, &num_bridges);
-
- namelen = nm_vale_name_validate(name);
- if (namelen < 0) {
- D("invalid bridge name %s", name ? name : NULL);
- return NULL;
- }
-
- /* lookup the name, remember empty slot if there is one */
- for (i = 0; i < num_bridges; i++) {
- struct nm_bridge *x = bridges + i;
-
- if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) {
- if (create && b == NULL)
- b = x; /* record empty slot */
- } else if (x->bdg_namelen != namelen) {
- continue;
- } else if (strncmp(name, x->bdg_basename, namelen) == 0) {
- ND("found '%.*s' at %d", namelen, name, i);
- b = x;
- break;
- }
- }
- if (i == num_bridges && b) { /* name not found, can create entry */
- /* initialize the bridge */
- ND("create new bridge %s with ports %d", b->bdg_basename,
- b->bdg_active_ports);
- b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH);
- if (b->ht == NULL) {
- D("failed to allocate hash table");
- return NULL;
- }
- strncpy(b->bdg_basename, name, namelen);
- b->bdg_namelen = namelen;
- b->bdg_active_ports = 0;
- for (i = 0; i < NM_BDG_MAXPORTS; i++)
- b->bdg_port_index[i] = i;
- /* set the default function */
- b->bdg_ops = &default_bdg_ops;
- b->private_data = b->ht;
- b->bdg_flags = 0;
- NM_BNS_GET(b);
- }
- return b;
-}
-
-
-/*
* Free the forwarding tables for rings attached to switch ports.
*/
static void
@@ -464,99 +237,6 @@
return 0;
}
-static int
-netmap_bdg_free(struct nm_bridge *b)
-{
- if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) {
- return EBUSY;
- }
-
- ND("marking bridge %s as free", b->bdg_basename);
- nm_os_free(b->ht);
- b->bdg_ops = NULL;
- b->bdg_flags = 0;
- NM_BNS_PUT(b);
- return 0;
-}
-
-
-/* remove from bridge b the ports in slots hw and sw
- * (sw can be -1 if not needed)
- */
-static void
-netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw)
-{
- int s_hw = hw, s_sw = sw;
- int i, lim =b->bdg_active_ports;
- uint32_t *tmp = b->tmp_bdg_port_index;
-
- /*
- New algorithm:
- make a copy of bdg_port_index;
- lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port
- in the array of bdg_port_index, replacing them with
- entries from the bottom of the array;
- decrement bdg_active_ports;
- acquire BDG_WLOCK() and copy back the array.
- */
-
- if (netmap_verbose)
- D("detach %d and %d (lim %d)", hw, sw, lim);
- /* make a copy of the list of active ports, update it,
- * and then copy back within BDG_WLOCK().
- */
- memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index));
- for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) {
- if (hw >= 0 && tmp[i] == hw) {
- ND("detach hw %d at %d", hw, i);
- lim--; /* point to last active port */
- tmp[i] = tmp[lim]; /* swap with i */
- tmp[lim] = hw; /* now this is inactive */
- hw = -1;
- } else if (sw >= 0 && tmp[i] == sw) {
- ND("detach sw %d at %d", sw, i);
- lim--;
- tmp[i] = tmp[lim];
- tmp[lim] = sw;
- sw = -1;
- } else {
- i++;
- }
- }
- if (hw >= 0 || sw >= 0) {
- D("XXX delete failed hw %d sw %d, should panic...", hw, sw);
- }
-
- BDG_WLOCK(b);
- if (b->bdg_ops->dtor)
- b->bdg_ops->dtor(b->bdg_ports[s_hw]);
- b->bdg_ports[s_hw] = NULL;
- if (s_sw >= 0) {
- b->bdg_ports[s_sw] = NULL;
- }
- memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index));
- b->bdg_active_ports = lim;
- BDG_WUNLOCK(b);
-
- ND("now %d active ports", lim);
- netmap_bdg_free(b);
-}
-
-static inline void *
-nm_bdg_get_auth_token(struct nm_bridge *b)
-{
- return b->ht;
-}
-
-/* bridge not in exclusive mode ==> always valid
- * bridge in exclusive mode (created through netmap_bdg_create()) ==> check authentication token
- */
-static inline int
-nm_bdg_valid_auth_token(struct nm_bridge *b, void *auth_token)
-{
- return !(b->bdg_flags & NM_BDG_EXCLUSIVE) || b->ht == auth_token;
-}
-
/* Allows external modules to create bridges in exclusive mode,
* returns an authentication token that the external module will need
* to provide during nm_bdg_ctl_{attach, detach}(), netmap_bdg_regops(),
@@ -564,19 +244,19 @@
* Successfully executed if ret != NULL and *return_status == 0.
*/
void *
-netmap_bdg_create(const char *bdg_name, int *return_status)
+netmap_vale_create(const char *bdg_name, int *return_status)
{
struct nm_bridge *b = NULL;
void *ret = NULL;
NMG_LOCK();
- b = nm_find_bridge(bdg_name, 0 /* don't create */);
+ b = nm_find_bridge(bdg_name, 0 /* don't create */, NULL);
if (b) {
*return_status = EEXIST;
goto unlock_bdg_create;
}
- b = nm_find_bridge(bdg_name, 1 /* create */);
+ b = nm_find_bridge(bdg_name, 1 /* create */, &vale_bdg_ops);
if (!b) {
*return_status = ENOMEM;
goto unlock_bdg_create;
@@ -595,13 +275,13 @@
* netmap_bdg_create(), the bridge must be empty.
*/
int
-netmap_bdg_destroy(const char *bdg_name, void *auth_token)
+netmap_vale_destroy(const char *bdg_name, void *auth_token)
{
struct nm_bridge *b = NULL;
int ret = 0;
NMG_LOCK();
- b = nm_find_bridge(bdg_name, 0 /* don't create */);
+ b = nm_find_bridge(bdg_name, 0 /* don't create */, NULL);
if (!b) {
ret = ENXIO;
goto unlock_bdg_free;
@@ -629,27 +309,6 @@
-/* nm_bdg_ctl callback for VALE ports */
-static int
-netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
-{
- struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
- struct nm_bridge *b = vpna->na_bdg;
-
- if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
- return 0; /* nothing to do */
- }
- if (b) {
- netmap_set_all_rings(na, 0 /* disable */);
- netmap_bdg_detach_common(b, vpna->bdg_port, -1);
- vpna->na_bdg = NULL;
- netmap_set_all_rings(na, 1 /* enable */);
- }
- /* I have took reference just for attach */
- netmap_adapter_put(na);
- return 0;
-}
-
/* nm_dtor callback for ephemeral VALE ports */
static void
netmap_vp_dtor(struct netmap_adapter *na)
@@ -664,7 +323,7 @@
}
if (na->ifp != NULL && !nm_iszombie(na)) {
- WNA(na->ifp) = NULL;
+ NM_DETACH_NA(na->ifp);
if (vpna->autodelete) {
ND("releasing %s", na->ifp->if_xname);
NMG_UNLOCK();
@@ -674,897 +333,8 @@
}
}
-/* creates a persistent VALE port */
-int
-nm_vi_create(struct nmreq_header *hdr)
-{
- struct nmreq_vale_newif *req =
- (struct nmreq_vale_newif *)(uintptr_t)hdr->nr_body;
- int error = 0;
- /* Build a nmreq_register out of the nmreq_vale_newif,
- * so that we can call netmap_get_bdg_na(). */
- struct nmreq_register regreq;
- bzero(®req, sizeof(regreq));
- regreq.nr_tx_slots = req->nr_tx_slots;
- regreq.nr_rx_slots = req->nr_rx_slots;
- regreq.nr_tx_rings = req->nr_tx_rings;
- regreq.nr_rx_rings = req->nr_rx_rings;
- regreq.nr_mem_id = req->nr_mem_id;
- hdr->nr_reqtype = NETMAP_REQ_REGISTER;
- hdr->nr_body = (uintptr_t)®req;
- error = netmap_vi_create(hdr, 0 /* no autodelete */);
- hdr->nr_reqtype = NETMAP_REQ_VALE_NEWIF;
- hdr->nr_body = (uintptr_t)req;
- /* Write back to the original struct. */
- req->nr_tx_slots = regreq.nr_tx_slots;
- req->nr_rx_slots = regreq.nr_rx_slots;
- req->nr_tx_rings = regreq.nr_tx_rings;
- req->nr_rx_rings = regreq.nr_rx_rings;
- req->nr_mem_id = regreq.nr_mem_id;
- return error;
-}
-/* remove a persistent VALE port from the system */
-int
-nm_vi_destroy(const char *name)
-{
- struct ifnet *ifp;
- struct netmap_vp_adapter *vpna;
- int error;
-
- ifp = ifunit_ref(name);
- if (!ifp)
- return ENXIO;
- NMG_LOCK();
- /* make sure this is actually a VALE port */
- if (!NM_NA_VALID(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
- error = EINVAL;
- goto err;
- }
-
- vpna = (struct netmap_vp_adapter *)NA(ifp);
-
- /* we can only destroy ports that were created via NETMAP_BDG_NEWIF */
- if (vpna->autodelete) {
- error = EINVAL;
- goto err;
- }
-
- /* also make sure that nobody is using the inferface */
- if (NETMAP_OWNED_BY_ANY(&vpna->up) ||
- vpna->up.na_refcount > 1 /* any ref besides the one in nm_vi_create()? */) {
- error = EBUSY;
- goto err;
- }
-
- NMG_UNLOCK();
-
- D("destroying a persistent vale interface %s", ifp->if_xname);
- /* Linux requires all the references are released
- * before unregister
- */
- netmap_detach(ifp);
- if_rele(ifp);
- nm_os_vi_detach(ifp);
- return 0;
-
-err:
- NMG_UNLOCK();
- if_rele(ifp);
- return error;
-}
-
-static int
-nm_update_info(struct nmreq_register *req, struct netmap_adapter *na)
-{
- req->nr_rx_rings = na->num_rx_rings;
- req->nr_tx_rings = na->num_tx_rings;
- req->nr_rx_slots = na->num_rx_desc;
- req->nr_tx_slots = na->num_tx_desc;
- return netmap_mem_get_info(na->nm_mem, &req->nr_memsize, NULL,
- &req->nr_mem_id);
-}
-
-/*
- * Create a virtual interface registered to the system.
- * The interface will be attached to a bridge later.
- */
-int
-netmap_vi_create(struct nmreq_header *hdr, int autodelete)
-{
- struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
- struct ifnet *ifp;
- struct netmap_vp_adapter *vpna;
- struct netmap_mem_d *nmd = NULL;
- int error;
-
- if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
- return EINVAL;
- }
-
- /* don't include VALE prefix */
- if (!strncmp(hdr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME)))
- return EINVAL;
- if (strlen(hdr->nr_name) >= IFNAMSIZ) {
- return EINVAL;
- }
- ifp = ifunit_ref(hdr->nr_name);
- if (ifp) { /* already exist, cannot create new one */
- error = EEXIST;
- NMG_LOCK();
- if (NM_NA_VALID(ifp)) {
- int update_err = nm_update_info(req, NA(ifp));
- if (update_err)
- error = update_err;
- }
- NMG_UNLOCK();
- if_rele(ifp);
- return error;
- }
- error = nm_os_vi_persist(hdr->nr_name, &ifp);
- if (error)
- return error;
-
- NMG_LOCK();
- if (req->nr_mem_id) {
- nmd = netmap_mem_find(req->nr_mem_id);
- if (nmd == NULL) {
- error = EINVAL;
- goto err_1;
- }
- }
- /* netmap_vp_create creates a struct netmap_vp_adapter */
- error = netmap_vp_create(hdr, ifp, nmd, &vpna);
- if (error) {
- D("error %d", error);
- goto err_1;
- }
- /* persist-specific routines */
- vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
- if (!autodelete) {
- netmap_adapter_get(&vpna->up);
- } else {
- vpna->autodelete = 1;
- }
- NM_ATTACH_NA(ifp, &vpna->up);
- /* return the updated info */
- error = nm_update_info(req, &vpna->up);
- if (error) {
- goto err_2;
- }
- ND("returning nr_mem_id %d", req->nr_mem_id);
- if (nmd)
- netmap_mem_put(nmd);
- NMG_UNLOCK();
- ND("created %s", ifp->if_xname);
- return 0;
-
-err_2:
- netmap_detach(ifp);
-err_1:
- if (nmd)
- netmap_mem_put(nmd);
- NMG_UNLOCK();
- nm_os_vi_detach(ifp);
-
- return error;
-}
-
-/* Try to get a reference to a netmap adapter attached to a VALE switch.
- * If the adapter is found (or is created), this function returns 0, a
- * non NULL pointer is returned into *na, and the caller holds a
- * reference to the adapter.
- * If an adapter is not found, then no reference is grabbed and the
- * function returns an error code, or 0 if there is just a VALE prefix
- * mismatch. Therefore the caller holds a reference when
- * (*na != NULL && return == 0).
- */
-int
-netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na,
- struct netmap_mem_d *nmd, int create)
-{
- char *nr_name = hdr->nr_name;
- const char *ifname;
- struct ifnet *ifp = NULL;
- int error = 0;
- struct netmap_vp_adapter *vpna, *hostna = NULL;
- struct nm_bridge *b;
- uint32_t i, j;
- uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT;
- int needed;
-
- *na = NULL; /* default return value */
-
- /* first try to see if this is a bridge port. */
- NMG_LOCK_ASSERT();
- if (strncmp(nr_name, NM_BDG_NAME, sizeof(NM_BDG_NAME) - 1)) {
- return 0; /* no error, but no VALE prefix */
- }
-
- b = nm_find_bridge(nr_name, create);
- if (b == NULL) {
- ND("no bridges available for '%s'", nr_name);
- return (create ? ENOMEM : ENXIO);
- }
- if (strlen(nr_name) < b->bdg_namelen) /* impossible */
- panic("x");
-
- /* Now we are sure that name starts with the bridge's name,
- * lookup the port in the bridge. We need to scan the entire
- * list. It is not important to hold a WLOCK on the bridge
- * during the search because NMG_LOCK already guarantees
- * that there are no other possible writers.
- */
-
- /* lookup in the local list of ports */
- for (j = 0; j < b->bdg_active_ports; j++) {
- i = b->bdg_port_index[j];
- vpna = b->bdg_ports[i];
- ND("checking %s", vpna->up.name);
- if (!strcmp(vpna->up.name, nr_name)) {
- netmap_adapter_get(&vpna->up);
- ND("found existing if %s refs %d", nr_name)
- *na = &vpna->up;
- return 0;
- }
- }
- /* not found, should we create it? */
- if (!create)
- return ENXIO;
- /* yes we should, see if we have space to attach entries */
- needed = 2; /* in some cases we only need 1 */
- if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) {
- D("bridge full %d, cannot create new port", b->bdg_active_ports);
- return ENOMEM;
- }
- /* record the next two ports available, but do not allocate yet */
- cand = b->bdg_port_index[b->bdg_active_ports];
- cand2 = b->bdg_port_index[b->bdg_active_ports + 1];
- ND("+++ bridge %s port %s used %d avail %d %d",
- b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2);
-
- /*
- * try see if there is a matching NIC with this name
- * (after the bridge's name)
- */
- ifname = nr_name + b->bdg_namelen + 1;
- ifp = ifunit_ref(ifname);
- if (!ifp) {
- /* Create an ephemeral virtual port.
- * This block contains all the ephemeral-specific logic.
- */
-
- if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
- error = EINVAL;
- goto out;
- }
-
- /* bdg_netmap_attach creates a struct netmap_adapter */
- error = netmap_vp_create(hdr, NULL, nmd, &vpna);
- if (error) {
- D("error %d", error);
- goto out;
- }
- /* shortcut - we can skip get_hw_na(),
- * ownership check and nm_bdg_attach()
- */
-
- } else {
- struct netmap_adapter *hw;
-
- /* the vale:nic syntax is only valid for some commands */
- switch (hdr->nr_reqtype) {
- case NETMAP_REQ_VALE_ATTACH:
- case NETMAP_REQ_VALE_DETACH:
- case NETMAP_REQ_VALE_POLLING_ENABLE:
- case NETMAP_REQ_VALE_POLLING_DISABLE:
- break; /* ok */
- default:
- error = EINVAL;
- goto out;
- }
-
- error = netmap_get_hw_na(ifp, nmd, &hw);
- if (error || hw == NULL)
- goto out;
-
- /* host adapter might not be created */
- error = hw->nm_bdg_attach(nr_name, hw);
- if (error)
- goto out;
- vpna = hw->na_vp;
- hostna = hw->na_hostvp;
- if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
- /* Check if we need to skip the host rings. */
- struct nmreq_vale_attach *areq =
- (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
- if (areq->reg.nr_mode != NR_REG_NIC_SW) {
- hostna = NULL;
- }
- }
- }
-
- BDG_WLOCK(b);
- vpna->bdg_port = cand;
- ND("NIC %p to bridge port %d", vpna, cand);
- /* bind the port to the bridge (virtual ports are not active) */
- b->bdg_ports[cand] = vpna;
- vpna->na_bdg = b;
- b->bdg_active_ports++;
- if (hostna != NULL) {
- /* also bind the host stack to the bridge */
- b->bdg_ports[cand2] = hostna;
- hostna->bdg_port = cand2;
- hostna->na_bdg = b;
- b->bdg_active_ports++;
- ND("host %p to bridge port %d", hostna, cand2);
- }
- ND("if %s refs %d", ifname, vpna->up.na_refcount);
- BDG_WUNLOCK(b);
- *na = &vpna->up;
- netmap_adapter_get(*na);
-
-out:
- if (ifp)
- if_rele(ifp);
-
- return error;
-}
-
-/* Process NETMAP_REQ_VALE_ATTACH.
- */
-int
-nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token)
-{
- struct nmreq_vale_attach *req =
- (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
- struct netmap_vp_adapter * vpna;
- struct netmap_adapter *na;
- struct netmap_mem_d *nmd = NULL;
- struct nm_bridge *b = NULL;
- int error;
-
- NMG_LOCK();
- /* permission check for modified bridges */
- b = nm_find_bridge(hdr->nr_name, 0 /* don't create */);
- if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
- error = EACCES;
- goto unlock_exit;
- }
-
- if (req->reg.nr_mem_id) {
- nmd = netmap_mem_find(req->reg.nr_mem_id);
- if (nmd == NULL) {
- error = EINVAL;
- goto unlock_exit;
- }
- }
-
- /* check for existing one */
- error = netmap_get_bdg_na(hdr, &na, nmd, 0);
- if (!error) {
- error = EBUSY;
- goto unref_exit;
- }
- error = netmap_get_bdg_na(hdr, &na,
- nmd, 1 /* create if not exists */);
- if (error) { /* no device */
- goto unlock_exit;
- }
-
- if (na == NULL) { /* VALE prefix missing */
- error = EINVAL;
- goto unlock_exit;
- }
-
- if (NETMAP_OWNED_BY_ANY(na)) {
- error = EBUSY;
- goto unref_exit;
- }
-
- if (na->nm_bdg_ctl) {
- /* nop for VALE ports. The bwrap needs to put the hwna
- * in netmap mode (see netmap_bwrap_bdg_ctl)
- */
- error = na->nm_bdg_ctl(hdr, na);
- if (error)
- goto unref_exit;
- ND("registered %s to netmap-mode", na->name);
- }
- vpna = (struct netmap_vp_adapter *)na;
- req->port_index = vpna->bdg_port;
- NMG_UNLOCK();
- return 0;
-
-unref_exit:
- netmap_adapter_put(na);
-unlock_exit:
- NMG_UNLOCK();
- return error;
-}
-
-static inline int
-nm_is_bwrap(struct netmap_adapter *na)
-{
- return na->nm_register == netmap_bwrap_reg;
-}
-
-/* Process NETMAP_REQ_VALE_DETACH.
- */
-int
-nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token)
-{
- struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body;
- struct netmap_vp_adapter *vpna;
- struct netmap_adapter *na;
- struct nm_bridge *b = NULL;
- int error;
-
- NMG_LOCK();
- /* permission check for modified bridges */
- b = nm_find_bridge(hdr->nr_name, 0 /* don't create */);
- if (b && !nm_bdg_valid_auth_token(b, auth_token)) {
- error = EACCES;
- goto unlock_exit;
- }
-
- error = netmap_get_bdg_na(hdr, &na, NULL, 0 /* don't create */);
- if (error) { /* no device, or another bridge or user owns the device */
- goto unlock_exit;
- }
-
- if (na == NULL) { /* VALE prefix missing */
- error = EINVAL;
- goto unlock_exit;
- } else if (nm_is_bwrap(na) &&
- ((struct netmap_bwrap_adapter *)na)->na_polling_state) {
- /* Don't detach a NIC with polling */
- error = EBUSY;
- goto unref_exit;
- }
-
- vpna = (struct netmap_vp_adapter *)na;
- if (na->na_vp != vpna) {
- /* trying to detach first attach of VALE persistent port attached
- * to 2 bridges
- */
- error = EBUSY;
- goto unref_exit;
- }
- nmreq_det->port_index = vpna->bdg_port;
-
- if (na->nm_bdg_ctl) {
- /* remove the port from bridge. The bwrap
- * also needs to put the hwna in normal mode
- */
- error = na->nm_bdg_ctl(hdr, na);
- }
-
-unref_exit:
- netmap_adapter_put(na);
-unlock_exit:
- NMG_UNLOCK();
- return error;
-
-}
-
-struct nm_bdg_polling_state;
-struct
-nm_bdg_kthread {
- struct nm_kctx *nmk;
- u_int qfirst;
- u_int qlast;
- struct nm_bdg_polling_state *bps;
-};
-
-struct nm_bdg_polling_state {
- bool configured;
- bool stopped;
- struct netmap_bwrap_adapter *bna;
- uint32_t mode;
- u_int qfirst;
- u_int qlast;
- u_int cpu_from;
- u_int ncpus;
- struct nm_bdg_kthread *kthreads;
-};
-
-static void
-netmap_bwrap_polling(void *data, int is_kthread)
-{
- struct nm_bdg_kthread *nbk = data;
- struct netmap_bwrap_adapter *bna;
- u_int qfirst, qlast, i;
- struct netmap_kring **kring0, *kring;
-
- if (!nbk)
- return;
- qfirst = nbk->qfirst;
- qlast = nbk->qlast;
- bna = nbk->bps->bna;
- kring0 = NMR(bna->hwna, NR_RX);
-
- for (i = qfirst; i < qlast; i++) {
- kring = kring0[i];
- kring->nm_notify(kring, 0);
- }
-}
-
-static int
-nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
-{
- struct nm_kctx_cfg kcfg;
- int i, j;
-
- bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus);
- if (bps->kthreads == NULL)
- return ENOMEM;
-
- bzero(&kcfg, sizeof(kcfg));
- kcfg.worker_fn = netmap_bwrap_polling;
- kcfg.use_kthread = 1;
- for (i = 0; i < bps->ncpus; i++) {
- struct nm_bdg_kthread *t = bps->kthreads + i;
- int all = (bps->ncpus == 1 &&
- bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU);
- int affinity = bps->cpu_from + i;
-
- t->bps = bps;
- t->qfirst = all ? bps->qfirst /* must be 0 */: affinity;
- t->qlast = all ? bps->qlast : t->qfirst + 1;
- D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst,
- t->qlast);
-
- kcfg.type = i;
- kcfg.worker_private = t;
- t->nmk = nm_os_kctx_create(&kcfg, NULL);
- if (t->nmk == NULL) {
- goto cleanup;
- }
- nm_os_kctx_worker_setaff(t->nmk, affinity);
- }
- return 0;
-
-cleanup:
- for (j = 0; j < i; j++) {
- struct nm_bdg_kthread *t = bps->kthreads + i;
- nm_os_kctx_destroy(t->nmk);
- }
- nm_os_free(bps->kthreads);
- return EFAULT;
-}
-
-/* A variant of ptnetmap_start_kthreads() */
-static int
-nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
-{
- int error, i, j;
-
- if (!bps) {
- D("polling is not configured");
- return EFAULT;
- }
- bps->stopped = false;
-
- for (i = 0; i < bps->ncpus; i++) {
- struct nm_bdg_kthread *t = bps->kthreads + i;
- error = nm_os_kctx_worker_start(t->nmk);
- if (error) {
- D("error in nm_kthread_start()");
- goto cleanup;
- }
- }
- return 0;
-
-cleanup:
- for (j = 0; j < i; j++) {
- struct nm_bdg_kthread *t = bps->kthreads + i;
- nm_os_kctx_worker_stop(t->nmk);
- }
- bps->stopped = true;
- return error;
-}
-
-static void
-nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps)
-{
- int i;
-
- if (!bps)
- return;
-
- for (i = 0; i < bps->ncpus; i++) {
- struct nm_bdg_kthread *t = bps->kthreads + i;
- nm_os_kctx_worker_stop(t->nmk);
- nm_os_kctx_destroy(t->nmk);
- }
- bps->stopped = true;
-}
-
-static int
-get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na,
- struct nm_bdg_polling_state *bps)
-{
- unsigned int avail_cpus, core_from;
- unsigned int qfirst, qlast;
- uint32_t i = req->nr_first_cpu_id;
- uint32_t req_cpus = req->nr_num_polling_cpus;
-
- avail_cpus = nm_os_ncpus();
-
- if (req_cpus == 0) {
- D("req_cpus must be > 0");
- return EINVAL;
- } else if (req_cpus >= avail_cpus) {
- D("Cannot use all the CPUs in the system");
- return EINVAL;
- }
-
- if (req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU) {
- /* Use a separate core for each ring. If nr_num_polling_cpus>1
- * more consecutive rings are polled.
- * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2,
- * ring 2 and 3 are polled by core 2 and 3, respectively. */
- if (i + req_cpus > nma_get_nrings(na, NR_RX)) {
- D("Rings %u-%u not in range (have %d rings)",
- i, i + req_cpus, nma_get_nrings(na, NR_RX));
- return EINVAL;
- }
- qfirst = i;
- qlast = qfirst + req_cpus;
- core_from = qfirst;
-
- } else if (req->nr_mode == NETMAP_POLLING_MODE_SINGLE_CPU) {
- /* Poll all the rings using a core specified by nr_first_cpu_id.
- * the number of cores must be 1. */
- if (req_cpus != 1) {
- D("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU "
- "(was %d)", req_cpus);
- return EINVAL;
- }
- qfirst = 0;
- qlast = nma_get_nrings(na, NR_RX);
- core_from = i;
- } else {
- D("Invalid polling mode");
- return EINVAL;
- }
-
- bps->mode = req->nr_mode;
- bps->qfirst = qfirst;
- bps->qlast = qlast;
- bps->cpu_from = core_from;
- bps->ncpus = req_cpus;
- D("%s qfirst %u qlast %u cpu_from %u ncpus %u",
- req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ?
- "MULTI" : "SINGLE",
- qfirst, qlast, core_from, req_cpus);
- return 0;
-}
-
-static int
-nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *na)
-{
- struct nm_bdg_polling_state *bps;
- struct netmap_bwrap_adapter *bna;
- int error;
-
- bna = (struct netmap_bwrap_adapter *)na;
- if (bna->na_polling_state) {
- D("ERROR adapter already in polling mode");
- return EFAULT;
- }
-
- bps = nm_os_malloc(sizeof(*bps));
- if (!bps)
- return ENOMEM;
- bps->configured = false;
- bps->stopped = true;
-
- if (get_polling_cfg(req, na, bps)) {
- nm_os_free(bps);
- return EINVAL;
- }
-
- if (nm_bdg_create_kthreads(bps)) {
- nm_os_free(bps);
- return EFAULT;
- }
-
- bps->configured = true;
- bna->na_polling_state = bps;
- bps->bna = bna;
-
- /* disable interrupts if possible */
- nma_intr_enable(bna->hwna, 0);
- /* start kthread now */
- error = nm_bdg_polling_start_kthreads(bps);
- if (error) {
- D("ERROR nm_bdg_polling_start_kthread()");
- nm_os_free(bps->kthreads);
- nm_os_free(bps);
- bna->na_polling_state = NULL;
- nma_intr_enable(bna->hwna, 1);
- }
- return error;
-}
-
-static int
-nm_bdg_ctl_polling_stop(struct netmap_adapter *na)
-{
- struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na;
- struct nm_bdg_polling_state *bps;
-
- if (!bna->na_polling_state) {
- D("ERROR adapter is not in polling mode");
- return EFAULT;
- }
- bps = bna->na_polling_state;
- nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state);
- bps->configured = false;
- nm_os_free(bps);
- bna->na_polling_state = NULL;
- /* reenable interrupts */
- nma_intr_enable(bna->hwna, 1);
- return 0;
-}
-
-int
-nm_bdg_polling(struct nmreq_header *hdr)
-{
- struct nmreq_vale_polling *req =
- (struct nmreq_vale_polling *)(uintptr_t)hdr->nr_body;
- struct netmap_adapter *na = NULL;
- int error = 0;
-
- NMG_LOCK();
- error = netmap_get_bdg_na(hdr, &na, NULL, /*create=*/0);
- if (na && !error) {
- if (!nm_is_bwrap(na)) {
- error = EOPNOTSUPP;
- } else if (hdr->nr_reqtype == NETMAP_BDG_POLLING_ON) {
- error = nm_bdg_ctl_polling_start(req, na);
- if (!error)
- netmap_adapter_get(na);
- } else {
- error = nm_bdg_ctl_polling_stop(na);
- if (!error)
- netmap_adapter_put(na);
- }
- netmap_adapter_put(na);
- } else if (!na && !error) {
- /* Not VALE port. */
- error = EINVAL;
- }
- NMG_UNLOCK();
-
- return error;
-}
-
-/* Process NETMAP_REQ_VALE_LIST. */
-int
-netmap_bdg_list(struct nmreq_header *hdr)
-{
- struct nmreq_vale_list *req =
- (struct nmreq_vale_list *)(uintptr_t)hdr->nr_body;
- int namelen = strlen(hdr->nr_name);
- struct nm_bridge *b, *bridges;
- struct netmap_vp_adapter *vpna;
- int error = 0, i, j;
- u_int num_bridges;
-
- netmap_bns_getbridges(&bridges, &num_bridges);
-
- /* this is used to enumerate bridges and ports */
- if (namelen) { /* look up indexes of bridge and port */
- if (strncmp(hdr->nr_name, NM_BDG_NAME,
- strlen(NM_BDG_NAME))) {
- return EINVAL;
- }
- NMG_LOCK();
- b = nm_find_bridge(hdr->nr_name, 0 /* don't create */);
- if (!b) {
- NMG_UNLOCK();
- return ENOENT;
- }
-
- req->nr_bridge_idx = b - bridges; /* bridge index */
- req->nr_port_idx = NM_BDG_NOPORT;
- for (j = 0; j < b->bdg_active_ports; j++) {
- i = b->bdg_port_index[j];
- vpna = b->bdg_ports[i];
- if (vpna == NULL) {
- D("This should not happen");
- continue;
- }
- /* the former and the latter identify a
- * virtual port and a NIC, respectively
- */
- if (!strcmp(vpna->up.name, hdr->nr_name)) {
- req->nr_port_idx = i; /* port index */
- break;
- }
- }
- NMG_UNLOCK();
- } else {
- /* return the first non-empty entry starting from
- * bridge nr_arg1 and port nr_arg2.
- *
- * Users can detect the end of the same bridge by
- * seeing the new and old value of nr_arg1, and can
- * detect the end of all the bridge by error != 0
- */
- i = req->nr_bridge_idx;
- j = req->nr_port_idx;
-
- NMG_LOCK();
- for (error = ENOENT; i < NM_BRIDGES; i++) {
- b = bridges + i;
- for ( ; j < NM_BDG_MAXPORTS; j++) {
- if (b->bdg_ports[j] == NULL)
- continue;
- vpna = b->bdg_ports[j];
- /* write back the VALE switch name */
- strncpy(hdr->nr_name, vpna->up.name,
- (size_t)IFNAMSIZ);
- error = 0;
- goto out;
- }
- j = 0; /* following bridges scan from 0 */
- }
- out:
- req->nr_bridge_idx = i;
- req->nr_port_idx = j;
- NMG_UNLOCK();
- }
-
- return error;
-}
-
/* Called by external kernel modules (e.g., Openvswitch).
- * to set configure/lookup/dtor functions of a VALE instance.
- * Register callbacks to the given bridge. 'name' may be just
- * bridge's name (including ':' if it is not just NM_BDG_NAME).
- *
- * Called without NMG_LOCK.
- */
-
-int
-netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token)
-{
- struct nm_bridge *b;
- int error = 0;
-
- NMG_LOCK();
- b = nm_find_bridge(name, 0 /* don't create */);
- if (!b) {
- error = ENXIO;
- goto unlock_regops;
- }
- if (!nm_bdg_valid_auth_token(b, auth_token)) {
- error = EACCES;
- goto unlock_regops;
- }
-
- BDG_WLOCK(b);
- if (!bdg_ops) {
- /* resetting the bridge */
- bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH);
- b->bdg_ops = &default_bdg_ops;
- b->private_data = b->ht;
- } else {
- /* modifying the bridge */
- b->private_data = private_data;
- b->bdg_ops = bdg_ops;
- }
- BDG_WUNLOCK(b);
-
-unlock_regops:
- NMG_UNLOCK();
- return error;
-}
-
-/* Called by external kernel modules (e.g., Openvswitch).
* to modify the private data previously given to regops().
* 'name' may be just bridge's name (including ':' if it
* is not just NM_BDG_NAME).
@@ -1579,7 +349,7 @@
int error = 0;
NMG_LOCK();
- b = nm_find_bridge(name, 0 /* don't create */);
+ b = nm_find_bridge(name, 0 /* don't create */, NULL);
if (!b) {
error = EINVAL;
goto unlock_update_priv;
@@ -1598,28 +368,7 @@
return error;
}
-int
-netmap_bdg_config(struct nm_ifreq *nr)
-{
- struct nm_bridge *b;
- int error = EINVAL;
- NMG_LOCK();
- b = nm_find_bridge(nr->nifr_name, 0);
- if (!b) {
- NMG_UNLOCK();
- return error;
- }
- NMG_UNLOCK();
- /* Don't call config() with NMG_LOCK() held */
- BDG_RLOCK(b);
- if (b->bdg_ops->config != NULL)
- error = b->bdg_ops->config(nr);
- BDG_RUNLOCK(b);
- return error;
-}
-
-
/* nm_krings_create callback for VALE ports.
* Calls the standard netmap_krings_create, then adds leases on rx
* rings and bdgfwd on tx rings.
@@ -1798,52 +547,6 @@
#undef mix
-/* nm_register callback for VALE ports */
-static int
-netmap_vp_reg(struct netmap_adapter *na, int onoff)
-{
- struct netmap_vp_adapter *vpna =
- (struct netmap_vp_adapter*)na;
- enum txrx t;
- int i;
-
- /* persistent ports may be put in netmap mode
- * before being attached to a bridge
- */
- if (vpna->na_bdg)
- BDG_WLOCK(vpna->na_bdg);
- if (onoff) {
- for_rx_tx(t) {
- for (i = 0; i < netmap_real_rings(na, t); i++) {
- struct netmap_kring *kring = NMR(na, t)[i];
-
- if (nm_kring_pending_on(kring))
- kring->nr_mode = NKR_NETMAP_ON;
- }
- }
- if (na->active_fds == 0)
- na->na_flags |= NAF_NETMAP_ON;
- /* XXX on FreeBSD, persistent VALE ports should also
- * toggle IFCAP_NETMAP in na->ifp (2014-03-16)
- */
- } else {
- if (na->active_fds == 0)
- na->na_flags &= ~NAF_NETMAP_ON;
- for_rx_tx(t) {
- for (i = 0; i < netmap_real_rings(na, t); i++) {
- struct netmap_kring *kring = NMR(na, t)[i];
-
- if (nm_kring_pending_off(kring))
- kring->nr_mode = NKR_NETMAP_OFF;
- }
- }
- }
- if (vpna->na_bdg)
- BDG_WUNLOCK(vpna->na_bdg);
- return 0;
-}
-
-
/*
* Lookup function for a learning bridge.
* Update the hash table with the source address,
@@ -2361,86 +1064,6 @@
}
-/* rxsync code used by VALE ports nm_rxsync callback and also
- * internally by the brwap
- */
-static int
-netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags)
-{
- struct netmap_adapter *na = kring->na;
- struct netmap_ring *ring = kring->ring;
- u_int nm_i, lim = kring->nkr_num_slots - 1;
- u_int head = kring->rhead;
- int n;
-
- if (head > lim) {
- D("ouch dangerous reset!!!");
- n = netmap_ring_reinit(kring);
- goto done;
- }
-
- /* First part, import newly received packets. */
- /* actually nothing to do here, they are already in the kring */
-
- /* Second part, skip past packets that userspace has released. */
- nm_i = kring->nr_hwcur;
- if (nm_i != head) {
- /* consistency check, but nothing really important here */
- for (n = 0; likely(nm_i != head); n++) {
- struct netmap_slot *slot = &ring->slot[nm_i];
- void *addr = NMB(na, slot);
-
- if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */
- D("bad buffer index %d, ignore ?",
- slot->buf_idx);
- }
- slot->flags &= ~NS_BUF_CHANGED;
- nm_i = nm_next(nm_i, lim);
- }
- kring->nr_hwcur = head;
- }
-
- n = 0;
-done:
- return n;
-}
-
-/*
- * nm_rxsync callback for VALE ports
- * user process reading from a VALE switch.
- * Already protected against concurrent calls from userspace,
- * but we must acquire the queue's lock to protect against
- * writers on the same queue.
- */
-static int
-netmap_vp_rxsync(struct netmap_kring *kring, int flags)
-{
- int n;
-
- mtx_lock(&kring->q_lock);
- n = netmap_vp_rxsync_locked(kring, flags);
- mtx_unlock(&kring->q_lock);
- return n;
-}
-
-
-/* nm_bdg_attach callback for VALE ports
- * The na_vp port is this same netmap_adapter. There is no host port.
- */
-static int
-netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na)
-{
- struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
-
- if (vpna->na_bdg) {
- return netmap_bwrap_attach(name, na);
- }
- na->na_vp = vpna;
- strncpy(na->name, name, sizeof(na->name));
- na->na_hostvp = NULL;
- return 0;
-}
-
/* create a netmap_vp_adapter that describes a VALE port.
* Only persistent VALE ports have a non-null ifp.
*/
@@ -2536,635 +1159,270 @@
return error;
}
-/* Bridge wrapper code (bwrap).
- * This is used to connect a non-VALE-port netmap_adapter (hwna) to a
- * VALE switch.
- * The main task is to swap the meaning of tx and rx rings to match the
- * expectations of the VALE switch code (see nm_bdg_flush).
- *
- * The bwrap works by interposing a netmap_bwrap_adapter between the
- * rest of the system and the hwna. The netmap_bwrap_adapter looks like
- * a netmap_vp_adapter to the rest the system, but, internally, it
- * translates all callbacks to what the hwna expects.
- *
- * Note that we have to intercept callbacks coming from two sides:
- *
- * - callbacks coming from the netmap module are intercepted by
- * passing around the netmap_bwrap_adapter instead of the hwna
- *
- * - callbacks coming from outside of the netmap module only know
- * about the hwna. This, however, only happens in interrupt
- * handlers, where only the hwna->nm_notify callback is called.
- * What the bwrap does is to overwrite the hwna->nm_notify callback
- * with its own netmap_bwrap_intr_notify.
- * XXX This assumes that the hwna->nm_notify callback was the
- * standard netmap_notify(), as it is the case for nic adapters.
- * Any additional action performed by hwna->nm_notify will not be
- * performed by netmap_bwrap_intr_notify.
- *
- * Additionally, the bwrap can optionally attach the host rings pair
- * of the wrapped adapter to a different port of the switch.
+/* nm_bdg_attach callback for VALE ports
+ * The na_vp port is this same netmap_adapter. There is no host port.
*/
-
-
-static void
-netmap_bwrap_dtor(struct netmap_adapter *na)
-{
- struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
- struct netmap_adapter *hwna = bna->hwna;
- struct nm_bridge *b = bna->up.na_bdg,
- *bh = bna->host.na_bdg;
-
- if (bna->host.up.nm_mem)
- netmap_mem_put(bna->host.up.nm_mem);
-
- if (b) {
- netmap_bdg_detach_common(b, bna->up.bdg_port,
- (bh ? bna->host.bdg_port : -1));
- }
-
- ND("na %p", na);
- na->ifp = NULL;
- bna->host.up.ifp = NULL;
- hwna->na_vp = bna->saved_na_vp;
- hwna->na_hostvp = NULL;
- hwna->na_private = NULL;
- hwna->na_flags &= ~NAF_BUSY;
- netmap_adapter_put(hwna);
-
-}
-
-
-/*
- * Intr callback for NICs connected to a bridge.
- * Simply ignore tx interrupts (maybe we could try to recover space ?)
- * and pass received packets from nic to the bridge.
- *
- * XXX TODO check locking: this is called from the interrupt
- * handler so we should make sure that the interface is not
- * disconnected while passing down an interrupt.
- *
- * Note, no user process can access this NIC or the host stack.
- * The only part of the ring that is significant are the slots,
- * and head/cur/tail are set from the kring as needed
- * (part as a receive ring, part as a transmit ring).
- *
- * callback that overwrites the hwna notify callback.
- * Packets come from the outside or from the host stack and are put on an
- * hwna rx ring.
- * The bridge wrapper then sends the packets through the bridge.
- */
static int
-netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags)
+netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na,
+ struct nm_bridge *b)
{
- struct netmap_adapter *na = kring->na;
- struct netmap_bwrap_adapter *bna = na->na_private;
- struct netmap_kring *bkring;
- struct netmap_vp_adapter *vpna = &bna->up;
- u_int ring_nr = kring->ring_id;
- int ret = NM_IRQ_COMPLETED;
- int error;
+ struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na;
- if (netmap_verbose)
- D("%s %s 0x%x", na->name, kring->name, flags);
-
- bkring = vpna->up.tx_rings[ring_nr];
-
- /* make sure the ring is not disabled */
- if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) {
- return EIO;
+ if (b->bdg_ops != &vale_bdg_ops) {
+ return NM_NEED_BWRAP;
}
-
- if (netmap_verbose)
- D("%s head %d cur %d tail %d", na->name,
- kring->rhead, kring->rcur, kring->rtail);
-
- /* simulate a user wakeup on the rx ring
- * fetch packets that have arrived.
- */
- error = kring->nm_sync(kring, 0);
- if (error)
- goto put_out;
- if (kring->nr_hwcur == kring->nr_hwtail) {
- if (netmap_verbose)
- D("how strange, interrupt with no packets on %s",
- na->name);
- goto put_out;
+ if (vpna->na_bdg) {
+ return NM_NEED_BWRAP;
}
-
- /* new packets are kring->rcur to kring->nr_hwtail, and the bkring
- * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail
- * to push all packets out.
- */
- bkring->rhead = bkring->rcur = kring->nr_hwtail;
-
- netmap_vp_txsync(bkring, flags);
-
- /* mark all buffers as released on this ring */
- kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail;
- /* another call to actually release the buffers */
- error = kring->nm_sync(kring, 0);
-
- /* The second rxsync may have further advanced hwtail. If this happens,
- * return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */
- if (kring->rcur != kring->nr_hwtail) {
- ret = NM_IRQ_RESCHED;
- }
-put_out:
- nm_kr_put(kring);
-
- return error ? error : ret;
-}
-
-
-/* nm_register callback for bwrap */
-static int
-netmap_bwrap_reg(struct netmap_adapter *na, int onoff)
-{
- struct netmap_bwrap_adapter *bna =
- (struct netmap_bwrap_adapter *)na;
- struct netmap_adapter *hwna = bna->hwna;
- struct netmap_vp_adapter *hostna = &bna->host;
- int error, i;
- enum txrx t;
-
- ND("%s %s", na->name, onoff ? "on" : "off");
-
- if (onoff) {
- /* netmap_do_regif has been called on the bwrap na.
- * We need to pass the information about the
- * memory allocator down to the hwna before
- * putting it in netmap mode
- */
- hwna->na_lut = na->na_lut;
-
- if (hostna->na_bdg) {
- /* if the host rings have been attached to switch,
- * we need to copy the memory allocator information
- * in the hostna also
- */
- hostna->up.na_lut = na->na_lut;
- }
-
- }
-
- /* pass down the pending ring state information */
- for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(na, t) + 1; i++)
- NMR(hwna, t)[i]->nr_pending_mode =
- NMR(na, t)[i]->nr_pending_mode;
- }
-
- /* forward the request to the hwna */
- error = hwna->nm_register(hwna, onoff);
- if (error)
- return error;
-
- /* copy up the current ring state information */
- for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
- struct netmap_kring *kring = NMR(hwna, t)[i];
- NMR(na, t)[i]->nr_mode = kring->nr_mode;
- }
- }
-
- /* impersonate a netmap_vp_adapter */
- netmap_vp_reg(na, onoff);
- if (hostna->na_bdg)
- netmap_vp_reg(&hostna->up, onoff);
-
- if (onoff) {
- u_int i;
- /* intercept the hwna nm_nofify callback on the hw rings */
- for (i = 0; i < hwna->num_rx_rings; i++) {
- hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify;
- hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify;
- }
- i = hwna->num_rx_rings; /* for safety */
- /* save the host ring notify unconditionally */
- hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify;
- if (hostna->na_bdg) {
- /* also intercept the host ring notify */
- hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify;
- }
- if (na->active_fds == 0)
- na->na_flags |= NAF_NETMAP_ON;
- } else {
- u_int i;
-
- if (na->active_fds == 0)
- na->na_flags &= ~NAF_NETMAP_ON;
-
- /* reset all notify callbacks (including host ring) */
- for (i = 0; i <= hwna->num_rx_rings; i++) {
- hwna->rx_rings[i]->nm_notify = hwna->rx_rings[i]->save_notify;
- hwna->rx_rings[i]->save_notify = NULL;
- }
- hwna->na_lut.lut = NULL;
- hwna->na_lut.plut = NULL;
- hwna->na_lut.objtotal = 0;
- hwna->na_lut.objsize = 0;
-
- /* pass ownership of the netmap rings to the hwna */
- for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
- NMR(na, t)[i]->ring = NULL;
- }
- }
-
- }
-
+ na->na_vp = vpna;
+ strncpy(na->name, name, sizeof(na->name));
+ na->na_hostvp = NULL;
return 0;
}
-/* nm_config callback for bwrap */
static int
-netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info)
+netmap_vale_bwrap_krings_create(struct netmap_adapter *na)
{
- struct netmap_bwrap_adapter *bna =
- (struct netmap_bwrap_adapter *)na;
- struct netmap_adapter *hwna = bna->hwna;
int error;
- /* Forward the request to the hwna. It may happen that nobody
- * registered hwna yet, so netmap_mem_get_lut() may have not
- * been called yet. */
- error = netmap_mem_get_lut(hwna->nm_mem, &hwna->na_lut);
- if (error)
- return error;
- netmap_update_config(hwna);
- /* swap the results and propagate */
- info->num_tx_rings = hwna->num_rx_rings;
- info->num_tx_descs = hwna->num_rx_desc;
- info->num_rx_rings = hwna->num_tx_rings;
- info->num_rx_descs = hwna->num_tx_desc;
- info->rx_buf_maxsize = hwna->rx_buf_maxsize;
-
- return 0;
-}
-
-
-/* nm_krings_create callback for bwrap */
-static int
-netmap_bwrap_krings_create(struct netmap_adapter *na)
-{
- struct netmap_bwrap_adapter *bna =
- (struct netmap_bwrap_adapter *)na;
- struct netmap_adapter *hwna = bna->hwna;
- struct netmap_adapter *hostna = &bna->host.up;
- int i, error = 0;
- enum txrx t;
-
- ND("%s", na->name);
-
/* impersonate a netmap_vp_adapter */
error = netmap_vp_krings_create(na);
if (error)
return error;
-
- /* also create the hwna krings */
- error = hwna->nm_krings_create(hwna);
+ error = netmap_bwrap_krings_create_common(na);
if (error) {
- goto err_del_vp_rings;
+ netmap_vp_krings_delete(na);
}
-
- /* increment the usage counter for all the hwna krings */
- for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) {
- NMR(hwna, t)[i]->users++;
- }
- }
-
- /* now create the actual rings */
- error = netmap_mem_rings_create(hwna);
- if (error) {
- goto err_dec_users;
- }
-
- /* cross-link the netmap rings
- * The original number of rings comes from hwna,
- * rx rings on one side equals tx rings on the other.
- */
- for_rx_tx(t) {
- enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
- for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) {
- NMR(na, t)[i]->nkr_num_slots = NMR(hwna, r)[i]->nkr_num_slots;
- NMR(na, t)[i]->ring = NMR(hwna, r)[i]->ring;
- }
- }
-
- if (na->na_flags & NAF_HOST_RINGS) {
- /* the hostna rings are the host rings of the bwrap.
- * The corresponding krings must point back to the
- * hostna
- */
- hostna->tx_rings = &na->tx_rings[na->num_tx_rings];
- hostna->tx_rings[0]->na = hostna;
- hostna->rx_rings = &na->rx_rings[na->num_rx_rings];
- hostna->rx_rings[0]->na = hostna;
- }
-
- return 0;
-
-err_dec_users:
- for_rx_tx(t) {
- NMR(hwna, t)[i]->users--;
- }
- hwna->nm_krings_delete(hwna);
-err_del_vp_rings:
- netmap_vp_krings_delete(na);
-
return error;
}
-
static void
-netmap_bwrap_krings_delete(struct netmap_adapter *na)
+netmap_vale_bwrap_krings_delete(struct netmap_adapter *na)
{
- struct netmap_bwrap_adapter *bna =
- (struct netmap_bwrap_adapter *)na;
- struct netmap_adapter *hwna = bna->hwna;
- enum txrx t;
- int i;
-
- ND("%s", na->name);
-
- /* decrement the usage counter for all the hwna krings */
- for_rx_tx(t) {
- for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) {
- NMR(hwna, t)[i]->users--;
- }
- }
-
- /* delete any netmap rings that are no longer needed */
- netmap_mem_rings_delete(hwna);
- hwna->nm_krings_delete(hwna);
+ netmap_bwrap_krings_delete_common(na);
netmap_vp_krings_delete(na);
}
-
-/* notify method for the bridge-->hwna direction */
static int
-netmap_bwrap_notify(struct netmap_kring *kring, int flags)
+netmap_vale_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
{
- struct netmap_adapter *na = kring->na;
- struct netmap_bwrap_adapter *bna = na->na_private;
- struct netmap_adapter *hwna = bna->hwna;
- u_int ring_n = kring->ring_id;
- u_int lim = kring->nkr_num_slots - 1;
- struct netmap_kring *hw_kring;
- int error;
-
- ND("%s: na %s hwna %s",
- (kring ? kring->name : "NULL!"),
- (na ? na->name : "NULL!"),
- (hwna ? hwna->name : "NULL!"));
- hw_kring = hwna->tx_rings[ring_n];
-
- if (nm_kr_tryget(hw_kring, 0, NULL)) {
- return ENXIO;
- }
-
- /* first step: simulate a user wakeup on the rx ring */
- netmap_vp_rxsync(kring, flags);
- ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
- na->name, ring_n,
- kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
- ring->head, ring->cur, ring->tail,
- hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail);
- /* second step: the new packets are sent on the tx ring
- * (which is actually the same ring)
- */
- hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail;
- error = hw_kring->nm_sync(hw_kring, flags);
- if (error)
- goto put_out;
-
- /* third step: now we are back the rx ring */
- /* claim ownership on all hw owned bufs */
- kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */
-
- /* fourth step: the user goes to sleep again, causing another rxsync */
- netmap_vp_rxsync(kring, flags);
- ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)",
- na->name, ring_n,
- kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease,
- ring->head, ring->cur, ring->tail,
- hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail);
-put_out:
- nm_kr_put(hw_kring);
-
- return error ? error : NM_IRQ_COMPLETED;
-}
-
-
-/* nm_bdg_ctl callback for the bwrap.
- * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd].
- * On attach, it needs to provide a fake netmap_priv_d structure and
- * perform a netmap_do_regif() on the bwrap. This will put both the
- * bwrap and the hwna in netmap mode, with the netmap rings shared
- * and cross linked. Moroever, it will start intercepting interrupts
- * directed to hwna.
- */
-static int
-netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na)
-{
- struct netmap_priv_d *npriv;
- struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na;
- int error = 0;
-
- if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) {
- struct nmreq_vale_attach *req =
- (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body;
- if (req->reg.nr_ringid != 0 ||
- (req->reg.nr_mode != NR_REG_ALL_NIC &&
- req->reg.nr_mode != NR_REG_NIC_SW)) {
- /* We only support attaching all the NIC rings
- * and/or the host stack. */
- return EINVAL;
- }
- if (NETMAP_OWNED_BY_ANY(na)) {
- return EBUSY;
- }
- if (bna->na_kpriv) {
- /* nothing to do */
- return 0;
- }
- npriv = netmap_priv_new();
- if (npriv == NULL)
- return ENOMEM;
- npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */
- error = netmap_do_regif(npriv, na, req->reg.nr_mode,
- req->reg.nr_ringid, req->reg.nr_flags);
- if (error) {
- netmap_priv_delete(npriv);
- return error;
- }
- bna->na_kpriv = npriv;
- na->na_flags |= NAF_BUSY;
- } else {
- if (na->active_fds == 0) /* not registered */
- return EINVAL;
- netmap_priv_delete(bna->na_kpriv);
- bna->na_kpriv = NULL;
- na->na_flags &= ~NAF_BUSY;
- }
-
- return error;
-}
-
-/* attach a bridge wrapper to the 'real' device */
-int
-netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
-{
struct netmap_bwrap_adapter *bna;
struct netmap_adapter *na = NULL;
struct netmap_adapter *hostna = NULL;
- int error = 0;
- enum txrx t;
+ int error;
- /* make sure the NIC is not already in use */
- if (NETMAP_OWNED_BY_ANY(hwna)) {
- D("NIC %s busy, cannot attach to bridge", hwna->name);
- return EBUSY;
- }
-
bna = nm_os_malloc(sizeof(*bna));
if (bna == NULL) {
return ENOMEM;
}
-
na = &bna->up.up;
- /* make bwrap ifp point to the real ifp */
- na->ifp = hwna->ifp;
- if_ref(na->ifp);
- na->na_private = bna;
strncpy(na->name, nr_name, sizeof(na->name));
- /* fill the ring data for the bwrap adapter with rx/tx meanings
- * swapped. The real cross-linking will be done during register,
- * when all the krings will have been created.
- */
- for_rx_tx(t) {
- enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
- nma_set_nrings(na, t, nma_get_nrings(hwna, r));
- nma_set_ndesc(na, t, nma_get_ndesc(hwna, r));
- }
- na->nm_dtor = netmap_bwrap_dtor;
na->nm_register = netmap_bwrap_reg;
- // na->nm_txsync = netmap_bwrap_txsync;
+ na->nm_txsync = netmap_vp_txsync;
// na->nm_rxsync = netmap_bwrap_rxsync;
- na->nm_config = netmap_bwrap_config;
- na->nm_krings_create = netmap_bwrap_krings_create;
- na->nm_krings_delete = netmap_bwrap_krings_delete;
+ na->nm_krings_create = netmap_vale_bwrap_krings_create;
+ na->nm_krings_delete = netmap_vale_bwrap_krings_delete;
na->nm_notify = netmap_bwrap_notify;
- na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
- na->pdev = hwna->pdev;
- na->nm_mem = netmap_mem_get(hwna->nm_mem);
- na->virt_hdr_len = hwna->virt_hdr_len;
- na->rx_buf_maxsize = hwna->rx_buf_maxsize;
bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
/* Set the mfs, needed on the VALE mismatch datapath. */
bna->up.mfs = NM_BDG_MFS_DEFAULT;
- bna->hwna = hwna;
- netmap_adapter_get(hwna);
- hwna->na_private = bna; /* weak reference */
- bna->saved_na_vp = hwna->na_vp;
- hwna->na_vp = &bna->up;
- bna->up.up.na_vp = &(bna->up);
-
if (hwna->na_flags & NAF_HOST_RINGS) {
- if (hwna->na_flags & NAF_SW_ONLY)
- na->na_flags |= NAF_SW_ONLY;
- na->na_flags |= NAF_HOST_RINGS;
hostna = &bna->host.up;
- snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name);
- hostna->ifp = hwna->ifp;
- for_rx_tx(t) {
- enum txrx r = nm_txrx_swap(t);
- nma_set_nrings(hostna, t, 1);
- nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r));
- }
- // hostna->nm_txsync = netmap_bwrap_host_txsync;
- // hostna->nm_rxsync = netmap_bwrap_host_rxsync;
hostna->nm_notify = netmap_bwrap_notify;
- hostna->nm_mem = netmap_mem_get(na->nm_mem);
- hostna->na_private = bna;
- hostna->na_vp = &bna->up;
- na->na_hostvp = hwna->na_hostvp =
- hostna->na_hostvp = &bna->host;
- hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */
- hostna->rx_buf_maxsize = hwna->rx_buf_maxsize;
bna->host.mfs = NM_BDG_MFS_DEFAULT;
}
- ND("%s<->%s txr %d txd %d rxr %d rxd %d",
- na->name, ifp->if_xname,
- na->num_tx_rings, na->num_tx_desc,
- na->num_rx_rings, na->num_rx_desc);
-
- error = netmap_attach_common(na);
+ error = netmap_bwrap_attach_common(na, hwna);
if (error) {
- goto err_free;
+ nm_os_free(bna);
}
- hwna->na_flags |= NAF_BUSY;
- return 0;
-
-err_free:
- hwna->na_vp = hwna->na_hostvp = NULL;
- netmap_adapter_put(hwna);
- nm_os_free(bna);
return error;
-
}
-struct nm_bridge *
-netmap_init_bridges2(u_int n)
+int
+netmap_get_vale_na(struct nmreq_header *hdr, struct netmap_adapter **na,
+ struct netmap_mem_d *nmd, int create)
{
- int i;
- struct nm_bridge *b;
+ return netmap_get_bdg_na(hdr, na, nmd, create, &vale_bdg_ops);
+}
- b = nm_os_malloc(sizeof(struct nm_bridge) * n);
- if (b == NULL)
- return NULL;
- for (i = 0; i < n; i++)
- BDG_RWINIT(&b[i]);
- return b;
+
+/* creates a persistent VALE port */
+int
+nm_vi_create(struct nmreq_header *hdr)
+{
+ struct nmreq_vale_newif *req =
+ (struct nmreq_vale_newif *)(uintptr_t)hdr->nr_body;
+ int error = 0;
+ /* Build a nmreq_register out of the nmreq_vale_newif,
+ * so that we can call netmap_get_bdg_na(). */
+ struct nmreq_register regreq;
+ bzero(®req, sizeof(regreq));
+ regreq.nr_tx_slots = req->nr_tx_slots;
+ regreq.nr_rx_slots = req->nr_rx_slots;
+ regreq.nr_tx_rings = req->nr_tx_rings;
+ regreq.nr_rx_rings = req->nr_rx_rings;
+ regreq.nr_mem_id = req->nr_mem_id;
+ hdr->nr_reqtype = NETMAP_REQ_REGISTER;
+ hdr->nr_body = (uintptr_t)®req;
+ error = netmap_vi_create(hdr, 0 /* no autodelete */);
+ hdr->nr_reqtype = NETMAP_REQ_VALE_NEWIF;
+ hdr->nr_body = (uintptr_t)req;
+ /* Write back to the original struct. */
+ req->nr_tx_slots = regreq.nr_tx_slots;
+ req->nr_rx_slots = regreq.nr_rx_slots;
+ req->nr_tx_rings = regreq.nr_tx_rings;
+ req->nr_rx_rings = regreq.nr_rx_rings;
+ req->nr_mem_id = regreq.nr_mem_id;
+ return error;
}
-void
-netmap_uninit_bridges2(struct nm_bridge *b, u_int n)
+/* remove a persistent VALE port from the system */
+int
+nm_vi_destroy(const char *name)
{
- int i;
+ struct ifnet *ifp;
+ struct netmap_vp_adapter *vpna;
+ int error;
- if (b == NULL)
- return;
+ ifp = ifunit_ref(name);
+ if (!ifp)
+ return ENXIO;
+ NMG_LOCK();
+ /* make sure this is actually a VALE port */
+ if (!NM_NA_VALID(ifp) || NA(ifp)->nm_register != netmap_vp_reg) {
+ error = EINVAL;
+ goto err;
+ }
- for (i = 0; i < n; i++)
- BDG_RWDESTROY(&b[i]);
- nm_os_free(b);
+ vpna = (struct netmap_vp_adapter *)NA(ifp);
+
+ /* we can only destroy ports that were created via NETMAP_BDG_NEWIF */
+ if (vpna->autodelete) {
+ error = EINVAL;
+ goto err;
+ }
+
+ /* also make sure that nobody is using the inferface */
+ if (NETMAP_OWNED_BY_ANY(&vpna->up) ||
+ vpna->up.na_refcount > 1 /* any ref besides the one in nm_vi_create()? */) {
+ error = EBUSY;
+ goto err;
+ }
+
+ NMG_UNLOCK();
+
+ D("destroying a persistent vale interface %s", ifp->if_xname);
+ /* Linux requires all the references are released
+ * before unregister
+ */
+ netmap_detach(ifp);
+ if_rele(ifp);
+ nm_os_vi_detach(ifp);
+ return 0;
+
+err:
+ NMG_UNLOCK();
+ if_rele(ifp);
+ return error;
}
-int
-netmap_init_bridges(void)
+static int
+nm_update_info(struct nmreq_register *req, struct netmap_adapter *na)
{
-#ifdef CONFIG_NET_NS
- return netmap_bns_register();
-#else
- nm_bridges = netmap_init_bridges2(NM_BRIDGES);
- if (nm_bridges == NULL)
- return ENOMEM;
- return 0;
-#endif
+ req->nr_rx_rings = na->num_rx_rings;
+ req->nr_tx_rings = na->num_tx_rings;
+ req->nr_rx_slots = na->num_rx_desc;
+ req->nr_tx_slots = na->num_tx_desc;
+ return netmap_mem_get_info(na->nm_mem, &req->nr_memsize, NULL,
+ &req->nr_mem_id);
}
-void
-netmap_uninit_bridges(void)
+
+/*
+ * Create a virtual interface registered to the system.
+ * The interface will be attached to a bridge later.
+ */
+int
+netmap_vi_create(struct nmreq_header *hdr, int autodelete)
{
-#ifdef CONFIG_NET_NS
- netmap_bns_unregister();
-#else
- netmap_uninit_bridges2(nm_bridges, NM_BRIDGES);
-#endif
+ struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body;
+ struct ifnet *ifp;
+ struct netmap_vp_adapter *vpna;
+ struct netmap_mem_d *nmd = NULL;
+ int error;
+
+ if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) {
+ return EINVAL;
+ }
+
+ /* don't include VALE prefix */
+ if (!strncmp(hdr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME)))
+ return EINVAL;
+ if (strlen(hdr->nr_name) >= IFNAMSIZ) {
+ return EINVAL;
+ }
+ ifp = ifunit_ref(hdr->nr_name);
+ if (ifp) { /* already exist, cannot create new one */
+ error = EEXIST;
+ NMG_LOCK();
+ if (NM_NA_VALID(ifp)) {
+ int update_err = nm_update_info(req, NA(ifp));
+ if (update_err)
+ error = update_err;
+ }
+ NMG_UNLOCK();
+ if_rele(ifp);
+ return error;
+ }
+ error = nm_os_vi_persist(hdr->nr_name, &ifp);
+ if (error)
+ return error;
+
+ NMG_LOCK();
+ if (req->nr_mem_id) {
+ nmd = netmap_mem_find(req->nr_mem_id);
+ if (nmd == NULL) {
+ error = EINVAL;
+ goto err_1;
+ }
+ }
+ /* netmap_vp_create creates a struct netmap_vp_adapter */
+ error = netmap_vp_create(hdr, ifp, nmd, &vpna);
+ if (error) {
+ D("error %d", error);
+ goto err_1;
+ }
+ /* persist-specific routines */
+ vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
+ if (!autodelete) {
+ netmap_adapter_get(&vpna->up);
+ } else {
+ vpna->autodelete = 1;
+ }
+ NM_ATTACH_NA(ifp, &vpna->up);
+ /* return the updated info */
+ error = nm_update_info(req, &vpna->up);
+ if (error) {
+ goto err_2;
+ }
+ ND("returning nr_mem_id %d", req->nr_mem_id);
+ if (nmd)
+ netmap_mem_put(nmd);
+ NMG_UNLOCK();
+ ND("created %s", ifp->if_xname);
+ return 0;
+
+err_2:
+ netmap_detach(ifp);
+err_1:
+ if (nmd)
+ netmap_mem_put(nmd);
+ NMG_UNLOCK();
+ nm_os_vi_detach(ifp);
+
+ return error;
}
+
#endif /* WITH_VALE */
Index: head/sys/net/netmap.h
===================================================================
--- head/sys/net/netmap.h
+++ head/sys/net/netmap.h
@@ -237,6 +237,8 @@
* are the number of fragments.
*/
+#define NETMAP_MAX_FRAGS 64 /* max number of fragments */
+
/*
* struct netmap_ring
Index: head/sys/net/netmap_user.h
===================================================================
--- head/sys/net/netmap_user.h
+++ head/sys/net/netmap_user.h
@@ -1029,20 +1029,35 @@
for (c = 0; c < n ; c++, ri++) {
/* compute current ring to use */
struct netmap_ring *ring;
- uint32_t i, idx;
+ uint32_t i, j, idx;
+ size_t rem;
if (ri > d->last_tx_ring)
ri = d->first_tx_ring;
ring = NETMAP_TXRING(d->nifp, ri);
- if (nm_ring_empty(ring)) {
- continue;
+ rem = size;
+ j = ring->cur;
+ while (rem > ring->nr_buf_size && j != ring->tail) {
+ rem -= ring->nr_buf_size;
+ j = nm_ring_next(ring, j);
}
+ if (j == ring->tail && rem > 0)
+ continue;
i = ring->cur;
+ while (i != j) {
+ idx = ring->slot[i].buf_idx;
+ ring->slot[i].len = ring->nr_buf_size;
+ ring->slot[i].flags = NS_MOREFRAG;
+ nm_pkt_copy(buf, NETMAP_BUF(ring, idx), ring->nr_buf_size);
+ i = nm_ring_next(ring, i);
+ buf = (char *)buf + ring->nr_buf_size;
+ }
idx = ring->slot[i].buf_idx;
- ring->slot[i].len = size;
- nm_pkt_copy(buf, NETMAP_BUF(ring, idx), size);
- d->cur_tx_ring = ri;
+ ring->slot[i].len = rem;
+ ring->slot[i].flags = 0;
+ nm_pkt_copy(buf, NETMAP_BUF(ring, idx), rem);
ring->head = ring->cur = nm_ring_next(ring, i);
+ d->cur_tx_ring = ri;
return size;
}
return 0; /* fail */
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Fri, Jul 3, 1:16 AM (20 h, 8 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
34608013
Default Alt Text
D17364.id49494.diff (166 KB)
Attached To
Mode
D17364: netmap: align codebase to the current upstream (commit id 8374e1a7e6941)
Attached
Detach File
Event Timeline
Log In to Comment