Index: head/sys/conf/files =================================================================== --- head/sys/conf/files +++ head/sys/conf/files @@ -2522,6 +2522,7 @@ dev/netmap/netmap_pt.c optional netmap dev/netmap/netmap_vale.c optional netmap dev/netmap/netmap_legacy.c optional netmap +dev/netmap/netmap_bdg.c optional netmap # compile-with "${NORMAL_C} -Wconversion -Wextra" dev/nfsmb/nfsmb.c optional nfsmb pci dev/nge/if_nge.c optional nge Index: head/sys/dev/netmap/netmap.c =================================================================== --- head/sys/dev/netmap/netmap.c +++ head/sys/dev/netmap/netmap.c @@ -521,6 +521,9 @@ int netmap_generic_ringsize = 1024; int netmap_generic_rings = 1; +/* Non-zero to enable checksum offloading in NIC drivers */ +int netmap_generic_hwcsum = 0; + /* Non-zero if ptnet devices are allowed to use virtio-net headers. */ int ptnet_vnet_hdr = 1; @@ -549,6 +552,9 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0, "Adapter mode. 0 selects the best option available," "1 forces native adapter, 2 forces emulated adapter"); +SYSCTL_INT(_dev_netmap, OID_AUTO, generic_hwcsum, CTLFLAG_RW, &netmap_generic_hwcsum, + 0, "Hardware checksums. 0 to disable checksum generation by the NIC (default)," + "1 to enable checksum generation by the NIC"); SYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit, 0, "RX notification interval in nanoseconds"); SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW, @@ -827,8 +833,8 @@ } /* account for the (possibly fake) host rings */ - n[NR_TX] = na->num_tx_rings + 1; - n[NR_RX] = na->num_rx_rings + 1; + n[NR_TX] = netmap_all_rings(na, NR_TX); + n[NR_RX] = netmap_all_rings(na, NR_RX); len = (n[NR_TX] + n[NR_RX]) * (sizeof(struct netmap_kring) + sizeof(struct netmap_kring *)) @@ -930,11 +936,14 @@ void netmap_hw_krings_delete(struct netmap_adapter *na) { - struct mbq *q = &na->rx_rings[na->num_rx_rings]->rx_queue; + u_int lim = netmap_real_rings(na, NR_RX), i; - ND("destroy sw mbq with len %d", mbq_len(q)); - mbq_purge(q); - mbq_safe_fini(q); + for (i = nma_get_nrings(na, NR_RX); i < lim; i++) { + struct mbq *q = &NMR(na, NR_RX)[i]->rx_queue; + ND("destroy sw mbq with len %d", mbq_len(q)); + mbq_purge(q); + mbq_safe_fini(q); + } netmap_krings_delete(na); } @@ -1535,7 +1544,7 @@ goto out; /* try to see if this is a bridge port */ - error = netmap_get_bdg_na(hdr, na, nmd, create); + error = netmap_get_vale_na(hdr, na, nmd, create); if (error) goto out; @@ -1827,7 +1836,7 @@ } priv->np_qfirst[t] = (nr_mode == NR_REG_SW ? nma_get_nrings(na, t) : 0); - priv->np_qlast[t] = nma_get_nrings(na, t) + 1; + priv->np_qlast[t] = netmap_all_rings(na, t); ND("%s: %s %d %d", nr_mode == NR_REG_SW ? "SW" : "NIC+SW", nm_txrx2str(t), priv->np_qfirst[t], priv->np_qlast[t]); @@ -2543,7 +2552,7 @@ NMG_LOCK(); hdr->nr_reqtype = NETMAP_REQ_REGISTER; hdr->nr_body = (uintptr_t)®req; - error = netmap_get_bdg_na(hdr, &na, NULL, 0); + error = netmap_get_vale_na(hdr, &na, NULL, 0); hdr->nr_reqtype = NETMAP_REQ_PORT_HDR_SET; hdr->nr_body = (uintptr_t)req; if (na && !error) { @@ -3336,6 +3345,12 @@ } na->pdev = na; /* make sure netmap_mem_map() is called */ #endif /* __FreeBSD__ */ + if (na->na_flags & NAF_HOST_RINGS) { + if (na->num_host_rx_rings == 0) + na->num_host_rx_rings = 1; + if (na->num_host_tx_rings == 0) + na->num_host_tx_rings = 1; + } if (na->nm_krings_create == NULL) { /* we assume that we have been called by a driver, * since other port types all provide their own @@ -3357,7 +3372,7 @@ /* no special nm_bdg_attach callback. On VALE * attach, we need to interpose a bwrap */ - na->nm_bdg_attach = netmap_bwrap_attach; + na->nm_bdg_attach = netmap_default_bdg_attach; #endif return 0; @@ -3399,10 +3414,10 @@ static void netmap_hw_dtor(struct netmap_adapter *na) { - if (nm_iszombie(na) || na->ifp == NULL) + if (na->ifp == NULL) return; - WNA(na->ifp) = NULL; + NM_DETACH_NA(na->ifp); } @@ -3426,10 +3441,10 @@ } if (arg == NULL || arg->ifp == NULL) - goto fail; + return EINVAL; ifp = arg->ifp; - if (NA(ifp) && !NM_NA_VALID(ifp)) { + if (NM_NA_CLASH(ifp)) { /* If NA(ifp) is not null but there is no valid netmap * adapter it means that someone else is using the same * pointer (e.g. ax25_ptr on linux). This happens for @@ -3456,28 +3471,8 @@ NM_ATTACH_NA(ifp, &hwna->up); -#ifdef linux - if (ifp->netdev_ops) { - /* prepare a clone of the netdev ops */ -#ifndef NETMAP_LINUX_HAVE_NETDEV_OPS - hwna->nm_ndo.ndo_start_xmit = ifp->netdev_ops; -#else - hwna->nm_ndo = *ifp->netdev_ops; -#endif /* NETMAP_LINUX_HAVE_NETDEV_OPS */ - } - hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit; - hwna->nm_ndo.ndo_change_mtu = linux_netmap_change_mtu; - if (ifp->ethtool_ops) { - hwna->nm_eto = *ifp->ethtool_ops; - } - hwna->nm_eto.set_ringparam = linux_netmap_set_ringparam; -#ifdef NETMAP_LINUX_HAVE_SET_CHANNELS - hwna->nm_eto.set_channels = linux_netmap_set_channels; -#endif /* NETMAP_LINUX_HAVE_SET_CHANNELS */ - if (arg->nm_config == NULL) { - hwna->up.nm_config = netmap_linux_config; - } -#endif /* linux */ + nm_os_onattach(ifp); + if (arg->nm_dtor == NULL) { hwna->up.nm_dtor = netmap_hw_dtor; } @@ -3545,7 +3540,10 @@ int ret = netmap_krings_create(na, 0); if (ret == 0) { /* initialize the mbq for the sw rx ring */ - mbq_safe_init(&na->rx_rings[na->num_rx_rings]->rx_queue); + u_int lim = netmap_real_rings(na, NR_RX), i; + for (i = na->num_rx_rings; i < lim; i++) { + mbq_safe_init(&NMR(na, NR_RX)[i]->rx_queue); + } ND("initialized sw rx queue %d", na->num_rx_rings); } return ret; @@ -3608,8 +3606,14 @@ unsigned int txr; struct mbq *q; int busy; + u_int i; - kring = na->rx_rings[na->num_rx_rings]; + i = MBUF_TXQ(m); + if (i >= na->num_host_rx_rings) { + i = i % na->num_host_rx_rings; + } + kring = NMR(na, NR_RX)[nma_get_nrings(na, NR_RX) + i]; + // XXX [Linux] we do not need this lock // if we follow the down/configure/up protocol -gl // mtx_lock(&na->core_lock); @@ -3639,8 +3643,15 @@ goto done; } - if (nm_os_mbuf_has_offld(m)) { - RD(1, "%s drop mbuf that needs offloadings", na->name); + if (!netmap_generic_hwcsum) { + if (nm_os_mbuf_has_csum_offld(m)) { + RD(1, "%s drop mbuf that needs checksum offload", na->name); + goto done; + } + } + + if (nm_os_mbuf_has_seg_offld(m)) { + RD(1, "%s drop mbuf that needs generic segmentation offload", na->name); goto done; } @@ -3843,6 +3854,40 @@ } return netmap_common_irq(na, q, work_done); +} + +/* set/clear native flags and if_transmit/netdev_ops */ +void +nm_set_native_flags(struct netmap_adapter *na) +{ + struct ifnet *ifp = na->ifp; + + /* We do the setup for intercepting packets only if we are the + * first user of this adapapter. */ + if (na->active_fds > 0) { + return; + } + + na->na_flags |= NAF_NETMAP_ON; + nm_os_onenter(ifp); + nm_update_hostrings_mode(na); +} + +void +nm_clear_native_flags(struct netmap_adapter *na) +{ + struct ifnet *ifp = na->ifp; + + /* We undo the setup for intercepting packets only if we are the + * last user of this adapapter. */ + if (na->active_fds > 0) { + return; + } + + nm_update_hostrings_mode(na); + nm_os_onexit(ifp); + + na->na_flags &= ~NAF_NETMAP_ON; } Index: head/sys/dev/netmap/netmap_bdg.h =================================================================== --- head/sys/dev/netmap/netmap_bdg.h +++ head/sys/dev/netmap/netmap_bdg.h @@ -0,0 +1,155 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (C) 2013-2018 Universita` di Pisa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ +#ifndef _NET_NETMAP_BDG_H_ +#define _NET_NETMAP_BDG_H_ + +#if defined(__FreeBSD__) +#define BDG_RWLOCK_T struct rwlock // struct rwlock + +#define BDG_RWINIT(b) \ + rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS) +#define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock) +#define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock) +#define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock) +#define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock) +#define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock) +#define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock) + +#endif /* __FreeBSD__ */ + +/* XXX Should go away after fixing find_bridge() - Michio */ +#define NM_BDG_HASH 1024 /* forwarding table entries */ + +/* XXX revise this */ +struct nm_hash_ent { + uint64_t mac; /* the top 2 bytes are the epoch */ + uint64_t ports; +}; + +/* Default size for the Maximum Frame Size. */ +#define NM_BDG_MFS_DEFAULT 1514 + +/* + * nm_bridge is a descriptor for a VALE switch. + * Interfaces for a bridge are all in bdg_ports[]. + * The array has fixed size, an empty entry does not terminate + * the search, but lookups only occur on attach/detach so we + * don't mind if they are slow. + * + * The bridge is non blocking on the transmit ports: excess + * packets are dropped if there is no room on the output port. + * + * bdg_lock protects accesses to the bdg_ports array. + * This is a rw lock (or equivalent). + */ +#define NM_BDG_IFNAMSIZ IFNAMSIZ +struct nm_bridge { + /* XXX what is the proper alignment/layout ? */ + BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */ + int bdg_namelen; + uint32_t bdg_active_ports; + char bdg_basename[NM_BDG_IFNAMSIZ]; + + /* Indexes of active ports (up to active_ports) + * and all other remaining ports. + */ + uint32_t bdg_port_index[NM_BDG_MAXPORTS]; + /* used by netmap_bdg_detach_common() */ + uint32_t tmp_bdg_port_index[NM_BDG_MAXPORTS]; + + struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS]; + + /* + * Programmable lookup functions to figure out the destination port. + * It returns either of an index of the destination port, + * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to + * forward this packet. ring_nr is the source ring index, and the + * function may overwrite this value to forward this packet to a + * different ring index. + * The function is set by netmap_bdg_regops(). + */ + struct netmap_bdg_ops *bdg_ops; + + /* + * Contains the data structure used by the bdg_ops.lookup function. + * By default points to *ht which is allocated on attach and used by the default lookup + * otherwise will point to the data structure received by netmap_bdg_regops(). + */ + void *private_data; + struct nm_hash_ent *ht; + + /* Currently used to specify if the bridge is still in use while empty and + * if it has been put in exclusive mode by an external module, see netmap_bdg_regops() + * and netmap_bdg_create(). + */ +#define NM_BDG_ACTIVE 1 +#define NM_BDG_EXCLUSIVE 2 + uint8_t bdg_flags; + + +#ifdef CONFIG_NET_NS + struct net *ns; +#endif /* CONFIG_NET_NS */ +}; + +static inline void * +nm_bdg_get_auth_token(struct nm_bridge *b) +{ + return b->ht; +} + +/* bridge not in exclusive mode ==> always valid + * bridge in exclusive mode (created through netmap_bdg_create()) ==> check authentication token + */ +static inline int +nm_bdg_valid_auth_token(struct nm_bridge *b, void *auth_token) +{ + return !(b->bdg_flags & NM_BDG_EXCLUSIVE) || b->ht == auth_token; +} + +int netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na, + struct netmap_mem_d *nmd, int create, struct netmap_bdg_ops *ops); + +struct nm_bridge *nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops); +int netmap_bdg_free(struct nm_bridge *b); +void netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw); +int netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na); +int netmap_vp_reg(struct netmap_adapter *na, int onoff); +int netmap_bwrap_reg(struct netmap_adapter *, int onoff); +int netmap_vp_reg(struct netmap_adapter *na, int onoff); +int netmap_vp_rxsync(struct netmap_kring *kring, int flags); +int netmap_bwrap_notify(struct netmap_kring *kring, int flags); +int netmap_bwrap_attach_common(struct netmap_adapter *na, + struct netmap_adapter *hwna); +int netmap_bwrap_krings_create_common(struct netmap_adapter *na); +void netmap_bwrap_krings_delete_common(struct netmap_adapter *na); +#define NM_NEED_BWRAP (-2) +#endif /* _NET_NETMAP_BDG_H_ */ + Index: head/sys/dev/netmap/netmap_bdg.c =================================================================== --- head/sys/dev/netmap/netmap_bdg.c +++ head/sys/dev/netmap/netmap_bdg.c @@ -0,0 +1,1827 @@ +/* + * Copyright (C) 2013-2016 Universita` di Pisa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + +/* + * This module implements the VALE switch for netmap + +--- VALE SWITCH --- + +NMG_LOCK() serializes all modifications to switches and ports. +A switch cannot be deleted until all ports are gone. + +For each switch, an SX lock (RWlock on linux) protects +deletion of ports. When configuring or deleting a new port, the +lock is acquired in exclusive mode (after holding NMG_LOCK). +When forwarding, the lock is acquired in shared mode (without NMG_LOCK). +The lock is held throughout the entire forwarding cycle, +during which the thread may incur in a page fault. +Hence it is important that sleepable shared locks are used. + +On the rx ring, the per-port lock is grabbed initially to reserve +a number of slot in the ring, then the lock is released, +packets are copied from source to destination, and then +the lock is acquired again and the receive ring is updated. +(A similar thing is done on the tx ring for NIC and host stack +ports attached to the switch) + + */ + +/* + * OS-specific code that is used only within this file. + * Other OS-specific code that must be accessed by drivers + * is present in netmap_kern.h + */ + +#if defined(__FreeBSD__) +#include /* prerequisite */ +__FBSDID("$FreeBSD$"); + +#include +#include +#include /* defines used in kernel.h */ +#include /* types used in module initialization */ +#include /* cdevsw struct, UID, GID */ +#include +#include /* struct socket */ +#include +#include +#include +#include /* sockaddrs */ +#include +#include +#include +#include +#include /* BIOCIMMEDIATE */ +#include /* bus_dmamap_* */ +#include +#include +#include + + +#elif defined(linux) + +#include "bsd_glue.h" + +#elif defined(__APPLE__) + +#warning OSX support is only partial +#include "osx_glue.h" + +#elif defined(_WIN32) +#include "win_glue.h" + +#else + +#error Unsupported platform + +#endif /* unsupported */ + +/* + * common headers + */ + +#include +#include +#include + +#include + +const char* +netmap_bdg_name(struct netmap_vp_adapter *vp) +{ + struct nm_bridge *b = vp->na_bdg; + if (b == NULL) + return NULL; + return b->bdg_basename; +} + + +#ifndef CONFIG_NET_NS +/* + * XXX in principle nm_bridges could be created dynamically + * Right now we have a static array and deletions are protected + * by an exclusive lock. + */ +static struct nm_bridge *nm_bridges; +#endif /* !CONFIG_NET_NS */ + + +static int +nm_is_id_char(const char c) +{ + return (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + (c >= '0' && c <= '9') || + (c == '_'); +} + +/* Validate the name of a VALE bridge port and return the + * position of the ":" character. */ +static int +nm_vale_name_validate(const char *name) +{ + int colon_pos = -1; + int i; + + if (!name || strlen(name) < strlen(NM_BDG_NAME)) { + return -1; + } + + for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) { + if (name[i] == ':') { + colon_pos = i; + break; + } else if (!nm_is_id_char(name[i])) { + return -1; + } + } + + if (strlen(name) - colon_pos > IFNAMSIZ) { + /* interface name too long */ + return -1; + } + + return colon_pos; +} + +/* + * locate a bridge among the existing ones. + * MUST BE CALLED WITH NMG_LOCK() + * + * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME. + * We assume that this is called with a name of at least NM_NAME chars. + */ +struct nm_bridge * +nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops) +{ + int i, namelen; + struct nm_bridge *b = NULL, *bridges; + u_int num_bridges; + + NMG_LOCK_ASSERT(); + + netmap_bns_getbridges(&bridges, &num_bridges); + + namelen = nm_vale_name_validate(name); + if (namelen < 0) { + D("invalid bridge name %s", name ? name : NULL); + return NULL; + } + + /* lookup the name, remember empty slot if there is one */ + for (i = 0; i < num_bridges; i++) { + struct nm_bridge *x = bridges + i; + + if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) { + if (create && b == NULL) + b = x; /* record empty slot */ + } else if (x->bdg_namelen != namelen) { + continue; + } else if (strncmp(name, x->bdg_basename, namelen) == 0) { + ND("found '%.*s' at %d", namelen, name, i); + b = x; + break; + } + } + if (i == num_bridges && b) { /* name not found, can create entry */ + /* initialize the bridge */ + ND("create new bridge %s with ports %d", b->bdg_basename, + b->bdg_active_ports); + b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH); + if (b->ht == NULL) { + D("failed to allocate hash table"); + return NULL; + } + strncpy(b->bdg_basename, name, namelen); + b->bdg_namelen = namelen; + b->bdg_active_ports = 0; + for (i = 0; i < NM_BDG_MAXPORTS; i++) + b->bdg_port_index[i] = i; + /* set the default function */ + b->bdg_ops = ops; + b->private_data = b->ht; + b->bdg_flags = 0; + NM_BNS_GET(b); + } + return b; +} + + +int +netmap_bdg_free(struct nm_bridge *b) +{ + if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) { + return EBUSY; + } + + ND("marking bridge %s as free", b->bdg_basename); + nm_os_free(b->ht); + b->bdg_ops = NULL; + b->bdg_flags = 0; + NM_BNS_PUT(b); + return 0; +} + + +/* remove from bridge b the ports in slots hw and sw + * (sw can be -1 if not needed) + */ +void +netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) +{ + int s_hw = hw, s_sw = sw; + int i, lim =b->bdg_active_ports; + uint32_t *tmp = b->tmp_bdg_port_index; + + /* + New algorithm: + make a copy of bdg_port_index; + lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port + in the array of bdg_port_index, replacing them with + entries from the bottom of the array; + decrement bdg_active_ports; + acquire BDG_WLOCK() and copy back the array. + */ + + if (netmap_verbose) + D("detach %d and %d (lim %d)", hw, sw, lim); + /* make a copy of the list of active ports, update it, + * and then copy back within BDG_WLOCK(). + */ + memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index)); + for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { + if (hw >= 0 && tmp[i] == hw) { + ND("detach hw %d at %d", hw, i); + lim--; /* point to last active port */ + tmp[i] = tmp[lim]; /* swap with i */ + tmp[lim] = hw; /* now this is inactive */ + hw = -1; + } else if (sw >= 0 && tmp[i] == sw) { + ND("detach sw %d at %d", sw, i); + lim--; + tmp[i] = tmp[lim]; + tmp[lim] = sw; + sw = -1; + } else { + i++; + } + } + if (hw >= 0 || sw >= 0) { + D("XXX delete failed hw %d sw %d, should panic...", hw, sw); + } + + BDG_WLOCK(b); + if (b->bdg_ops->dtor) + b->bdg_ops->dtor(b->bdg_ports[s_hw]); + b->bdg_ports[s_hw] = NULL; + if (s_sw >= 0) { + b->bdg_ports[s_sw] = NULL; + } + memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index)); + b->bdg_active_ports = lim; + BDG_WUNLOCK(b); + + ND("now %d active ports", lim); + netmap_bdg_free(b); +} + + +/* nm_bdg_ctl callback for VALE ports */ +int +netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na) +{ + struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; + struct nm_bridge *b = vpna->na_bdg; + + if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { + return 0; /* nothing to do */ + } + if (b) { + netmap_set_all_rings(na, 0 /* disable */); + netmap_bdg_detach_common(b, vpna->bdg_port, -1); + vpna->na_bdg = NULL; + netmap_set_all_rings(na, 1 /* enable */); + } + /* I have took reference just for attach */ + netmap_adapter_put(na); + return 0; +} + +int +netmap_default_bdg_attach(const char *name, struct netmap_adapter *na, + struct nm_bridge *b) +{ + return NM_NEED_BWRAP; +} + +/* Try to get a reference to a netmap adapter attached to a VALE switch. + * If the adapter is found (or is created), this function returns 0, a + * non NULL pointer is returned into *na, and the caller holds a + * reference to the adapter. + * If an adapter is not found, then no reference is grabbed and the + * function returns an error code, or 0 if there is just a VALE prefix + * mismatch. Therefore the caller holds a reference when + * (*na != NULL && return == 0). + */ +int +netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na, + struct netmap_mem_d *nmd, int create, struct netmap_bdg_ops *ops) +{ + char *nr_name = hdr->nr_name; + const char *ifname; + struct ifnet *ifp = NULL; + int error = 0; + struct netmap_vp_adapter *vpna, *hostna = NULL; + struct nm_bridge *b; + uint32_t i, j; + uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT; + int needed; + + *na = NULL; /* default return value */ + + /* first try to see if this is a bridge port. */ + NMG_LOCK_ASSERT(); + if (strncmp(nr_name, ops->name, strlen(ops->name) - 1)) { + return 0; /* no error, but no VALE prefix */ + } + + b = nm_find_bridge(nr_name, create, ops); + if (b == NULL) { + ND("no bridges available for '%s'", nr_name); + return (create ? ENOMEM : ENXIO); + } + if (strlen(nr_name) < b->bdg_namelen) /* impossible */ + panic("x"); + + /* Now we are sure that name starts with the bridge's name, + * lookup the port in the bridge. We need to scan the entire + * list. It is not important to hold a WLOCK on the bridge + * during the search because NMG_LOCK already guarantees + * that there are no other possible writers. + */ + + /* lookup in the local list of ports */ + for (j = 0; j < b->bdg_active_ports; j++) { + i = b->bdg_port_index[j]; + vpna = b->bdg_ports[i]; + ND("checking %s", vpna->up.name); + if (!strcmp(vpna->up.name, nr_name)) { + netmap_adapter_get(&vpna->up); + ND("found existing if %s refs %d", nr_name) + *na = &vpna->up; + return 0; + } + } + /* not found, should we create it? */ + if (!create) + return ENXIO; + /* yes we should, see if we have space to attach entries */ + needed = 2; /* in some cases we only need 1 */ + if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { + D("bridge full %d, cannot create new port", b->bdg_active_ports); + return ENOMEM; + } + /* record the next two ports available, but do not allocate yet */ + cand = b->bdg_port_index[b->bdg_active_ports]; + cand2 = b->bdg_port_index[b->bdg_active_ports + 1]; + ND("+++ bridge %s port %s used %d avail %d %d", + b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2); + + /* + * try see if there is a matching NIC with this name + * (after the bridge's name) + */ + ifname = nr_name + b->bdg_namelen + 1; + ifp = ifunit_ref(ifname); + if (!ifp) { + /* Create an ephemeral virtual port. + * This block contains all the ephemeral-specific logic. + */ + + if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) { + error = EINVAL; + goto out; + } + + /* bdg_netmap_attach creates a struct netmap_adapter */ + error = b->bdg_ops->vp_create(hdr, NULL, nmd, &vpna); + if (error) { + D("error %d", error); + goto out; + } + /* shortcut - we can skip get_hw_na(), + * ownership check and nm_bdg_attach() + */ + + } else { + struct netmap_adapter *hw; + + /* the vale:nic syntax is only valid for some commands */ + switch (hdr->nr_reqtype) { + case NETMAP_REQ_VALE_ATTACH: + case NETMAP_REQ_VALE_DETACH: + case NETMAP_REQ_VALE_POLLING_ENABLE: + case NETMAP_REQ_VALE_POLLING_DISABLE: + break; /* ok */ + default: + error = EINVAL; + goto out; + } + + error = netmap_get_hw_na(ifp, nmd, &hw); + if (error || hw == NULL) + goto out; + + /* host adapter might not be created */ + error = hw->nm_bdg_attach(nr_name, hw, b); + if (error == NM_NEED_BWRAP) { + error = b->bdg_ops->bwrap_attach(nr_name, hw); + } + if (error) + goto out; + vpna = hw->na_vp; + hostna = hw->na_hostvp; + if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { + /* Check if we need to skip the host rings. */ + struct nmreq_vale_attach *areq = + (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; + if (areq->reg.nr_mode != NR_REG_NIC_SW) { + hostna = NULL; + } + } + } + + BDG_WLOCK(b); + vpna->bdg_port = cand; + ND("NIC %p to bridge port %d", vpna, cand); + /* bind the port to the bridge (virtual ports are not active) */ + b->bdg_ports[cand] = vpna; + vpna->na_bdg = b; + b->bdg_active_ports++; + if (hostna != NULL) { + /* also bind the host stack to the bridge */ + b->bdg_ports[cand2] = hostna; + hostna->bdg_port = cand2; + hostna->na_bdg = b; + b->bdg_active_ports++; + ND("host %p to bridge port %d", hostna, cand2); + } + ND("if %s refs %d", ifname, vpna->up.na_refcount); + BDG_WUNLOCK(b); + *na = &vpna->up; + netmap_adapter_get(*na); + +out: + if (ifp) + if_rele(ifp); + + return error; +} + +/* Process NETMAP_REQ_VALE_ATTACH. + */ +int +nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token) +{ + struct nmreq_vale_attach *req = + (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; + struct netmap_vp_adapter * vpna; + struct netmap_adapter *na = NULL; + struct netmap_mem_d *nmd = NULL; + struct nm_bridge *b = NULL; + int error; + + NMG_LOCK(); + /* permission check for modified bridges */ + b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); + if (b && !nm_bdg_valid_auth_token(b, auth_token)) { + error = EACCES; + goto unlock_exit; + } + + if (req->reg.nr_mem_id) { + nmd = netmap_mem_find(req->reg.nr_mem_id); + if (nmd == NULL) { + error = EINVAL; + goto unlock_exit; + } + } + + /* check for existing one */ + error = netmap_get_vale_na(hdr, &na, nmd, 0); + if (na) { + error = EBUSY; + goto unref_exit; + } + error = netmap_get_vale_na(hdr, &na, + nmd, 1 /* create if not exists */); + if (error) { /* no device */ + goto unlock_exit; + } + + if (na == NULL) { /* VALE prefix missing */ + error = EINVAL; + goto unlock_exit; + } + + if (NETMAP_OWNED_BY_ANY(na)) { + error = EBUSY; + goto unref_exit; + } + + if (na->nm_bdg_ctl) { + /* nop for VALE ports. The bwrap needs to put the hwna + * in netmap mode (see netmap_bwrap_bdg_ctl) + */ + error = na->nm_bdg_ctl(hdr, na); + if (error) + goto unref_exit; + ND("registered %s to netmap-mode", na->name); + } + vpna = (struct netmap_vp_adapter *)na; + req->port_index = vpna->bdg_port; + NMG_UNLOCK(); + return 0; + +unref_exit: + netmap_adapter_put(na); +unlock_exit: + NMG_UNLOCK(); + return error; +} + +static inline int +nm_is_bwrap(struct netmap_adapter *na) +{ + return na->nm_register == netmap_bwrap_reg; +} + +/* Process NETMAP_REQ_VALE_DETACH. + */ +int +nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token) +{ + struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body; + struct netmap_vp_adapter *vpna; + struct netmap_adapter *na; + struct nm_bridge *b = NULL; + int error; + + NMG_LOCK(); + /* permission check for modified bridges */ + b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); + if (b && !nm_bdg_valid_auth_token(b, auth_token)) { + error = EACCES; + goto unlock_exit; + } + + error = netmap_get_vale_na(hdr, &na, NULL, 0 /* don't create */); + if (error) { /* no device, or another bridge or user owns the device */ + goto unlock_exit; + } + + if (na == NULL) { /* VALE prefix missing */ + error = EINVAL; + goto unlock_exit; + } else if (nm_is_bwrap(na) && + ((struct netmap_bwrap_adapter *)na)->na_polling_state) { + /* Don't detach a NIC with polling */ + error = EBUSY; + goto unref_exit; + } + + vpna = (struct netmap_vp_adapter *)na; + if (na->na_vp != vpna) { + /* trying to detach first attach of VALE persistent port attached + * to 2 bridges + */ + error = EBUSY; + goto unref_exit; + } + nmreq_det->port_index = vpna->bdg_port; + + if (na->nm_bdg_ctl) { + /* remove the port from bridge. The bwrap + * also needs to put the hwna in normal mode + */ + error = na->nm_bdg_ctl(hdr, na); + } + +unref_exit: + netmap_adapter_put(na); +unlock_exit: + NMG_UNLOCK(); + return error; + +} + +struct nm_bdg_polling_state; +struct +nm_bdg_kthread { + struct nm_kctx *nmk; + u_int qfirst; + u_int qlast; + struct nm_bdg_polling_state *bps; +}; + +struct nm_bdg_polling_state { + bool configured; + bool stopped; + struct netmap_bwrap_adapter *bna; + uint32_t mode; + u_int qfirst; + u_int qlast; + u_int cpu_from; + u_int ncpus; + struct nm_bdg_kthread *kthreads; +}; + +static void +netmap_bwrap_polling(void *data, int is_kthread) +{ + struct nm_bdg_kthread *nbk = data; + struct netmap_bwrap_adapter *bna; + u_int qfirst, qlast, i; + struct netmap_kring **kring0, *kring; + + if (!nbk) + return; + qfirst = nbk->qfirst; + qlast = nbk->qlast; + bna = nbk->bps->bna; + kring0 = NMR(bna->hwna, NR_RX); + + for (i = qfirst; i < qlast; i++) { + kring = kring0[i]; + kring->nm_notify(kring, 0); + } +} + +static int +nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps) +{ + struct nm_kctx_cfg kcfg; + int i, j; + + bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus); + if (bps->kthreads == NULL) + return ENOMEM; + + bzero(&kcfg, sizeof(kcfg)); + kcfg.worker_fn = netmap_bwrap_polling; + kcfg.use_kthread = 1; + for (i = 0; i < bps->ncpus; i++) { + struct nm_bdg_kthread *t = bps->kthreads + i; + int all = (bps->ncpus == 1 && + bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU); + int affinity = bps->cpu_from + i; + + t->bps = bps; + t->qfirst = all ? bps->qfirst /* must be 0 */: affinity; + t->qlast = all ? bps->qlast : t->qfirst + 1; + D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst, + t->qlast); + + kcfg.type = i; + kcfg.worker_private = t; + t->nmk = nm_os_kctx_create(&kcfg, NULL); + if (t->nmk == NULL) { + goto cleanup; + } + nm_os_kctx_worker_setaff(t->nmk, affinity); + } + return 0; + +cleanup: + for (j = 0; j < i; j++) { + struct nm_bdg_kthread *t = bps->kthreads + i; + nm_os_kctx_destroy(t->nmk); + } + nm_os_free(bps->kthreads); + return EFAULT; +} + +/* A variant of ptnetmap_start_kthreads() */ +static int +nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps) +{ + int error, i, j; + + if (!bps) { + D("polling is not configured"); + return EFAULT; + } + bps->stopped = false; + + for (i = 0; i < bps->ncpus; i++) { + struct nm_bdg_kthread *t = bps->kthreads + i; + error = nm_os_kctx_worker_start(t->nmk); + if (error) { + D("error in nm_kthread_start()"); + goto cleanup; + } + } + return 0; + +cleanup: + for (j = 0; j < i; j++) { + struct nm_bdg_kthread *t = bps->kthreads + i; + nm_os_kctx_worker_stop(t->nmk); + } + bps->stopped = true; + return error; +} + +static void +nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps) +{ + int i; + + if (!bps) + return; + + for (i = 0; i < bps->ncpus; i++) { + struct nm_bdg_kthread *t = bps->kthreads + i; + nm_os_kctx_worker_stop(t->nmk); + nm_os_kctx_destroy(t->nmk); + } + bps->stopped = true; +} + +static int +get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na, + struct nm_bdg_polling_state *bps) +{ + unsigned int avail_cpus, core_from; + unsigned int qfirst, qlast; + uint32_t i = req->nr_first_cpu_id; + uint32_t req_cpus = req->nr_num_polling_cpus; + + avail_cpus = nm_os_ncpus(); + + if (req_cpus == 0) { + D("req_cpus must be > 0"); + return EINVAL; + } else if (req_cpus >= avail_cpus) { + D("Cannot use all the CPUs in the system"); + return EINVAL; + } + + if (req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU) { + /* Use a separate core for each ring. If nr_num_polling_cpus>1 + * more consecutive rings are polled. + * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2, + * ring 2 and 3 are polled by core 2 and 3, respectively. */ + if (i + req_cpus > nma_get_nrings(na, NR_RX)) { + D("Rings %u-%u not in range (have %d rings)", + i, i + req_cpus, nma_get_nrings(na, NR_RX)); + return EINVAL; + } + qfirst = i; + qlast = qfirst + req_cpus; + core_from = qfirst; + + } else if (req->nr_mode == NETMAP_POLLING_MODE_SINGLE_CPU) { + /* Poll all the rings using a core specified by nr_first_cpu_id. + * the number of cores must be 1. */ + if (req_cpus != 1) { + D("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU " + "(was %d)", req_cpus); + return EINVAL; + } + qfirst = 0; + qlast = nma_get_nrings(na, NR_RX); + core_from = i; + } else { + D("Invalid polling mode"); + return EINVAL; + } + + bps->mode = req->nr_mode; + bps->qfirst = qfirst; + bps->qlast = qlast; + bps->cpu_from = core_from; + bps->ncpus = req_cpus; + D("%s qfirst %u qlast %u cpu_from %u ncpus %u", + req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ? + "MULTI" : "SINGLE", + qfirst, qlast, core_from, req_cpus); + return 0; +} + +static int +nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *na) +{ + struct nm_bdg_polling_state *bps; + struct netmap_bwrap_adapter *bna; + int error; + + bna = (struct netmap_bwrap_adapter *)na; + if (bna->na_polling_state) { + D("ERROR adapter already in polling mode"); + return EFAULT; + } + + bps = nm_os_malloc(sizeof(*bps)); + if (!bps) + return ENOMEM; + bps->configured = false; + bps->stopped = true; + + if (get_polling_cfg(req, na, bps)) { + nm_os_free(bps); + return EINVAL; + } + + if (nm_bdg_create_kthreads(bps)) { + nm_os_free(bps); + return EFAULT; + } + + bps->configured = true; + bna->na_polling_state = bps; + bps->bna = bna; + + /* disable interrupts if possible */ + nma_intr_enable(bna->hwna, 0); + /* start kthread now */ + error = nm_bdg_polling_start_kthreads(bps); + if (error) { + D("ERROR nm_bdg_polling_start_kthread()"); + nm_os_free(bps->kthreads); + nm_os_free(bps); + bna->na_polling_state = NULL; + nma_intr_enable(bna->hwna, 1); + } + return error; +} + +static int +nm_bdg_ctl_polling_stop(struct netmap_adapter *na) +{ + struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; + struct nm_bdg_polling_state *bps; + + if (!bna->na_polling_state) { + D("ERROR adapter is not in polling mode"); + return EFAULT; + } + bps = bna->na_polling_state; + nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state); + bps->configured = false; + nm_os_free(bps); + bna->na_polling_state = NULL; + /* reenable interrupts */ + nma_intr_enable(bna->hwna, 1); + return 0; +} + +int +nm_bdg_polling(struct nmreq_header *hdr) +{ + struct nmreq_vale_polling *req = + (struct nmreq_vale_polling *)(uintptr_t)hdr->nr_body; + struct netmap_adapter *na = NULL; + int error = 0; + + NMG_LOCK(); + error = netmap_get_vale_na(hdr, &na, NULL, /*create=*/0); + if (na && !error) { + if (!nm_is_bwrap(na)) { + error = EOPNOTSUPP; + } else if (hdr->nr_reqtype == NETMAP_BDG_POLLING_ON) { + error = nm_bdg_ctl_polling_start(req, na); + if (!error) + netmap_adapter_get(na); + } else { + error = nm_bdg_ctl_polling_stop(na); + if (!error) + netmap_adapter_put(na); + } + netmap_adapter_put(na); + } else if (!na && !error) { + /* Not VALE port. */ + error = EINVAL; + } + NMG_UNLOCK(); + + return error; +} + +/* Process NETMAP_REQ_VALE_LIST. */ +int +netmap_bdg_list(struct nmreq_header *hdr) +{ + struct nmreq_vale_list *req = + (struct nmreq_vale_list *)(uintptr_t)hdr->nr_body; + int namelen = strlen(hdr->nr_name); + struct nm_bridge *b, *bridges; + struct netmap_vp_adapter *vpna; + int error = 0, i, j; + u_int num_bridges; + + netmap_bns_getbridges(&bridges, &num_bridges); + + /* this is used to enumerate bridges and ports */ + if (namelen) { /* look up indexes of bridge and port */ + if (strncmp(hdr->nr_name, NM_BDG_NAME, + strlen(NM_BDG_NAME))) { + return EINVAL; + } + NMG_LOCK(); + b = nm_find_bridge(hdr->nr_name, 0 /* don't create */, NULL); + if (!b) { + NMG_UNLOCK(); + return ENOENT; + } + + req->nr_bridge_idx = b - bridges; /* bridge index */ + req->nr_port_idx = NM_BDG_NOPORT; + for (j = 0; j < b->bdg_active_ports; j++) { + i = b->bdg_port_index[j]; + vpna = b->bdg_ports[i]; + if (vpna == NULL) { + D("This should not happen"); + continue; + } + /* the former and the latter identify a + * virtual port and a NIC, respectively + */ + if (!strcmp(vpna->up.name, hdr->nr_name)) { + req->nr_port_idx = i; /* port index */ + break; + } + } + NMG_UNLOCK(); + } else { + /* return the first non-empty entry starting from + * bridge nr_arg1 and port nr_arg2. + * + * Users can detect the end of the same bridge by + * seeing the new and old value of nr_arg1, and can + * detect the end of all the bridge by error != 0 + */ + i = req->nr_bridge_idx; + j = req->nr_port_idx; + + NMG_LOCK(); + for (error = ENOENT; i < NM_BRIDGES; i++) { + b = bridges + i; + for ( ; j < NM_BDG_MAXPORTS; j++) { + if (b->bdg_ports[j] == NULL) + continue; + vpna = b->bdg_ports[j]; + /* write back the VALE switch name */ + strncpy(hdr->nr_name, vpna->up.name, + (size_t)IFNAMSIZ); + error = 0; + goto out; + } + j = 0; /* following bridges scan from 0 */ + } + out: + req->nr_bridge_idx = i; + req->nr_port_idx = j; + NMG_UNLOCK(); + } + + return error; +} + +/* Called by external kernel modules (e.g., Openvswitch). + * to set configure/lookup/dtor functions of a VALE instance. + * Register callbacks to the given bridge. 'name' may be just + * bridge's name (including ':' if it is not just NM_BDG_NAME). + * + * Called without NMG_LOCK. + */ + +int +netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token) +{ + struct nm_bridge *b; + int error = 0; + + NMG_LOCK(); + b = nm_find_bridge(name, 0 /* don't create */, NULL); + if (!b) { + error = ENXIO; + goto unlock_regops; + } + if (!nm_bdg_valid_auth_token(b, auth_token)) { + error = EACCES; + goto unlock_regops; + } + + BDG_WLOCK(b); + if (!bdg_ops) { + /* resetting the bridge */ + bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); + b->bdg_ops = NULL; + b->private_data = b->ht; + } else { + /* modifying the bridge */ + b->private_data = private_data; + b->bdg_ops = bdg_ops; + } + BDG_WUNLOCK(b); + +unlock_regops: + NMG_UNLOCK(); + return error; +} + + +int +netmap_bdg_config(struct nm_ifreq *nr) +{ + struct nm_bridge *b; + int error = EINVAL; + + NMG_LOCK(); + b = nm_find_bridge(nr->nifr_name, 0, NULL); + if (!b) { + NMG_UNLOCK(); + return error; + } + NMG_UNLOCK(); + /* Don't call config() with NMG_LOCK() held */ + BDG_RLOCK(b); + if (b->bdg_ops->config != NULL) + error = b->bdg_ops->config(nr); + BDG_RUNLOCK(b); + return error; +} + + +/* nm_register callback for VALE ports */ +int +netmap_vp_reg(struct netmap_adapter *na, int onoff) +{ + struct netmap_vp_adapter *vpna = + (struct netmap_vp_adapter*)na; + enum txrx t; + int i; + + /* persistent ports may be put in netmap mode + * before being attached to a bridge + */ + if (vpna->na_bdg) + BDG_WLOCK(vpna->na_bdg); + if (onoff) { + for_rx_tx(t) { + for (i = 0; i < netmap_real_rings(na, t); i++) { + struct netmap_kring *kring = NMR(na, t)[i]; + + if (nm_kring_pending_on(kring)) + kring->nr_mode = NKR_NETMAP_ON; + } + } + if (na->active_fds == 0) + na->na_flags |= NAF_NETMAP_ON; + /* XXX on FreeBSD, persistent VALE ports should also + * toggle IFCAP_NETMAP in na->ifp (2014-03-16) + */ + } else { + if (na->active_fds == 0) + na->na_flags &= ~NAF_NETMAP_ON; + for_rx_tx(t) { + for (i = 0; i < netmap_real_rings(na, t); i++) { + struct netmap_kring *kring = NMR(na, t)[i]; + + if (nm_kring_pending_off(kring)) + kring->nr_mode = NKR_NETMAP_OFF; + } + } + } + if (vpna->na_bdg) + BDG_WUNLOCK(vpna->na_bdg); + return 0; +} + + +/* rxsync code used by VALE ports nm_rxsync callback and also + * internally by the brwap + */ +static int +netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags) +{ + struct netmap_adapter *na = kring->na; + struct netmap_ring *ring = kring->ring; + u_int nm_i, lim = kring->nkr_num_slots - 1; + u_int head = kring->rhead; + int n; + + if (head > lim) { + D("ouch dangerous reset!!!"); + n = netmap_ring_reinit(kring); + goto done; + } + + /* First part, import newly received packets. */ + /* actually nothing to do here, they are already in the kring */ + + /* Second part, skip past packets that userspace has released. */ + nm_i = kring->nr_hwcur; + if (nm_i != head) { + /* consistency check, but nothing really important here */ + for (n = 0; likely(nm_i != head); n++) { + struct netmap_slot *slot = &ring->slot[nm_i]; + void *addr = NMB(na, slot); + + if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */ + D("bad buffer index %d, ignore ?", + slot->buf_idx); + } + slot->flags &= ~NS_BUF_CHANGED; + nm_i = nm_next(nm_i, lim); + } + kring->nr_hwcur = head; + } + + n = 0; +done: + return n; +} + +/* + * nm_rxsync callback for VALE ports + * user process reading from a VALE switch. + * Already protected against concurrent calls from userspace, + * but we must acquire the queue's lock to protect against + * writers on the same queue. + */ +int +netmap_vp_rxsync(struct netmap_kring *kring, int flags) +{ + int n; + + mtx_lock(&kring->q_lock); + n = netmap_vp_rxsync_locked(kring, flags); + mtx_unlock(&kring->q_lock); + return n; +} + +int +netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna, + struct netmap_bdg_ops *ops) +{ + return ops->bwrap_attach(nr_name, hwna); +} + + +/* Bridge wrapper code (bwrap). + * This is used to connect a non-VALE-port netmap_adapter (hwna) to a + * VALE switch. + * The main task is to swap the meaning of tx and rx rings to match the + * expectations of the VALE switch code (see nm_bdg_flush). + * + * The bwrap works by interposing a netmap_bwrap_adapter between the + * rest of the system and the hwna. The netmap_bwrap_adapter looks like + * a netmap_vp_adapter to the rest the system, but, internally, it + * translates all callbacks to what the hwna expects. + * + * Note that we have to intercept callbacks coming from two sides: + * + * - callbacks coming from the netmap module are intercepted by + * passing around the netmap_bwrap_adapter instead of the hwna + * + * - callbacks coming from outside of the netmap module only know + * about the hwna. This, however, only happens in interrupt + * handlers, where only the hwna->nm_notify callback is called. + * What the bwrap does is to overwrite the hwna->nm_notify callback + * with its own netmap_bwrap_intr_notify. + * XXX This assumes that the hwna->nm_notify callback was the + * standard netmap_notify(), as it is the case for nic adapters. + * Any additional action performed by hwna->nm_notify will not be + * performed by netmap_bwrap_intr_notify. + * + * Additionally, the bwrap can optionally attach the host rings pair + * of the wrapped adapter to a different port of the switch. + */ + + +static void +netmap_bwrap_dtor(struct netmap_adapter *na) +{ + struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; + struct netmap_adapter *hwna = bna->hwna; + struct nm_bridge *b = bna->up.na_bdg, + *bh = bna->host.na_bdg; + + if (bna->host.up.nm_mem) + netmap_mem_put(bna->host.up.nm_mem); + + if (b) { + netmap_bdg_detach_common(b, bna->up.bdg_port, + (bh ? bna->host.bdg_port : -1)); + } + + ND("na %p", na); + na->ifp = NULL; + bna->host.up.ifp = NULL; + hwna->na_vp = bna->saved_na_vp; + hwna->na_hostvp = NULL; + hwna->na_private = NULL; + hwna->na_flags &= ~NAF_BUSY; + netmap_adapter_put(hwna); + +} + + +/* + * Intr callback for NICs connected to a bridge. + * Simply ignore tx interrupts (maybe we could try to recover space ?) + * and pass received packets from nic to the bridge. + * + * XXX TODO check locking: this is called from the interrupt + * handler so we should make sure that the interface is not + * disconnected while passing down an interrupt. + * + * Note, no user process can access this NIC or the host stack. + * The only part of the ring that is significant are the slots, + * and head/cur/tail are set from the kring as needed + * (part as a receive ring, part as a transmit ring). + * + * callback that overwrites the hwna notify callback. + * Packets come from the outside or from the host stack and are put on an + * hwna rx ring. + * The bridge wrapper then sends the packets through the bridge. + */ +static int +netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags) +{ + struct netmap_adapter *na = kring->na; + struct netmap_bwrap_adapter *bna = na->na_private; + struct netmap_kring *bkring; + struct netmap_vp_adapter *vpna = &bna->up; + u_int ring_nr = kring->ring_id; + int ret = NM_IRQ_COMPLETED; + int error; + + if (netmap_verbose) + D("%s %s 0x%x", na->name, kring->name, flags); + + bkring = vpna->up.tx_rings[ring_nr]; + + /* make sure the ring is not disabled */ + if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) { + return EIO; + } + + if (netmap_verbose) + D("%s head %d cur %d tail %d", na->name, + kring->rhead, kring->rcur, kring->rtail); + + /* simulate a user wakeup on the rx ring + * fetch packets that have arrived. + */ + error = kring->nm_sync(kring, 0); + if (error) + goto put_out; + if (kring->nr_hwcur == kring->nr_hwtail) { + if (netmap_verbose) + D("how strange, interrupt with no packets on %s", + na->name); + goto put_out; + } + + /* new packets are kring->rcur to kring->nr_hwtail, and the bkring + * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail + * to push all packets out. + */ + bkring->rhead = bkring->rcur = kring->nr_hwtail; + + bkring->nm_sync(bkring, flags); + + /* mark all buffers as released on this ring */ + kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail; + /* another call to actually release the buffers */ + error = kring->nm_sync(kring, 0); + + /* The second rxsync may have further advanced hwtail. If this happens, + * return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */ + if (kring->rcur != kring->nr_hwtail) { + ret = NM_IRQ_RESCHED; + } +put_out: + nm_kr_put(kring); + + return error ? error : ret; +} + + +/* nm_register callback for bwrap */ +int +netmap_bwrap_reg(struct netmap_adapter *na, int onoff) +{ + struct netmap_bwrap_adapter *bna = + (struct netmap_bwrap_adapter *)na; + struct netmap_adapter *hwna = bna->hwna; + struct netmap_vp_adapter *hostna = &bna->host; + int error, i; + enum txrx t; + + ND("%s %s", na->name, onoff ? "on" : "off"); + + if (onoff) { + /* netmap_do_regif has been called on the bwrap na. + * We need to pass the information about the + * memory allocator down to the hwna before + * putting it in netmap mode + */ + hwna->na_lut = na->na_lut; + + if (hostna->na_bdg) { + /* if the host rings have been attached to switch, + * we need to copy the memory allocator information + * in the hostna also + */ + hostna->up.na_lut = na->na_lut; + } + + } + + /* pass down the pending ring state information */ + for_rx_tx(t) { + for (i = 0; i < netmap_all_rings(na, t); i++) { + NMR(hwna, nm_txrx_swap(t))[i]->nr_pending_mode = + NMR(na, t)[i]->nr_pending_mode; + } + } + + /* forward the request to the hwna */ + error = hwna->nm_register(hwna, onoff); + if (error) + return error; + + /* copy up the current ring state information */ + for_rx_tx(t) { + for (i = 0; i < netmap_all_rings(na, t); i++) { + struct netmap_kring *kring = NMR(hwna, nm_txrx_swap(t))[i]; + NMR(na, t)[i]->nr_mode = kring->nr_mode; + } + } + + /* impersonate a netmap_vp_adapter */ + netmap_vp_reg(na, onoff); + if (hostna->na_bdg) + netmap_vp_reg(&hostna->up, onoff); + + if (onoff) { + u_int i; + /* intercept the hwna nm_nofify callback on the hw rings */ + for (i = 0; i < hwna->num_rx_rings; i++) { + hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify; + hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify; + } + i = hwna->num_rx_rings; /* for safety */ + /* save the host ring notify unconditionally */ + for (; i < netmap_real_rings(hwna, NR_RX); i++) { + hwna->rx_rings[i]->save_notify = + hwna->rx_rings[i]->nm_notify; + if (hostna->na_bdg) { + /* also intercept the host ring notify */ + hwna->rx_rings[i]->nm_notify = + netmap_bwrap_intr_notify; + na->tx_rings[i]->nm_sync = na->nm_txsync; + } + } + if (na->active_fds == 0) + na->na_flags |= NAF_NETMAP_ON; + } else { + u_int i; + + if (na->active_fds == 0) + na->na_flags &= ~NAF_NETMAP_ON; + + /* reset all notify callbacks (including host ring) */ + for (i = 0; i < netmap_all_rings(hwna, NR_RX); i++) { + hwna->rx_rings[i]->nm_notify = + hwna->rx_rings[i]->save_notify; + hwna->rx_rings[i]->save_notify = NULL; + } + hwna->na_lut.lut = NULL; + hwna->na_lut.plut = NULL; + hwna->na_lut.objtotal = 0; + hwna->na_lut.objsize = 0; + + /* pass ownership of the netmap rings to the hwna */ + for_rx_tx(t) { + for (i = 0; i < netmap_all_rings(na, t); i++) { + NMR(na, t)[i]->ring = NULL; + } + } + /* reset the number of host rings to default */ + for_rx_tx(t) { + nma_set_host_nrings(hwna, t, 1); + } + + } + + return 0; +} + +/* nm_config callback for bwrap */ +static int +netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info) +{ + struct netmap_bwrap_adapter *bna = + (struct netmap_bwrap_adapter *)na; + struct netmap_adapter *hwna = bna->hwna; + int error; + + /* Forward the request to the hwna. It may happen that nobody + * registered hwna yet, so netmap_mem_get_lut() may have not + * been called yet. */ + error = netmap_mem_get_lut(hwna->nm_mem, &hwna->na_lut); + if (error) + return error; + netmap_update_config(hwna); + /* swap the results and propagate */ + info->num_tx_rings = hwna->num_rx_rings; + info->num_tx_descs = hwna->num_rx_desc; + info->num_rx_rings = hwna->num_tx_rings; + info->num_rx_descs = hwna->num_tx_desc; + info->rx_buf_maxsize = hwna->rx_buf_maxsize; + + return 0; +} + + +/* nm_krings_create callback for bwrap */ +int +netmap_bwrap_krings_create_common(struct netmap_adapter *na) +{ + struct netmap_bwrap_adapter *bna = + (struct netmap_bwrap_adapter *)na; + struct netmap_adapter *hwna = bna->hwna; + struct netmap_adapter *hostna = &bna->host.up; + int i, error = 0; + enum txrx t; + + /* also create the hwna krings */ + error = hwna->nm_krings_create(hwna); + if (error) { + return error; + } + + /* increment the usage counter for all the hwna krings */ + for_rx_tx(t) { + for (i = 0; i < netmap_all_rings(hwna, t); i++) { + NMR(hwna, t)[i]->users++; + } + } + + /* now create the actual rings */ + error = netmap_mem_rings_create(hwna); + if (error) { + goto err_dec_users; + } + + /* cross-link the netmap rings + * The original number of rings comes from hwna, + * rx rings on one side equals tx rings on the other. + */ + for_rx_tx(t) { + enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ + for (i = 0; i < netmap_all_rings(hwna, r); i++) { + NMR(na, t)[i]->nkr_num_slots = NMR(hwna, r)[i]->nkr_num_slots; + NMR(na, t)[i]->ring = NMR(hwna, r)[i]->ring; + } + } + + if (na->na_flags & NAF_HOST_RINGS) { + /* the hostna rings are the host rings of the bwrap. + * The corresponding krings must point back to the + * hostna + */ + hostna->tx_rings = &na->tx_rings[na->num_tx_rings]; + hostna->rx_rings = &na->rx_rings[na->num_rx_rings]; + for_rx_tx(t) { + for (i = 0; i < nma_get_nrings(hostna, t); i++) { + NMR(hostna, t)[i]->na = hostna; + } + } + } + + return 0; + +err_dec_users: + for_rx_tx(t) { + for (i = 0; i < netmap_all_rings(hwna, t); i++) { + NMR(hwna, t)[i]->users--; + } + } + hwna->nm_krings_delete(hwna); + return error; +} + + +void +netmap_bwrap_krings_delete_common(struct netmap_adapter *na) +{ + struct netmap_bwrap_adapter *bna = + (struct netmap_bwrap_adapter *)na; + struct netmap_adapter *hwna = bna->hwna; + enum txrx t; + int i; + + ND("%s", na->name); + + /* decrement the usage counter for all the hwna krings */ + for_rx_tx(t) { + for (i = 0; i < netmap_all_rings(hwna, t); i++) { + NMR(hwna, t)[i]->users--; + } + } + + /* delete any netmap rings that are no longer needed */ + netmap_mem_rings_delete(hwna); + hwna->nm_krings_delete(hwna); +} + + +/* notify method for the bridge-->hwna direction */ +int +netmap_bwrap_notify(struct netmap_kring *kring, int flags) +{ + struct netmap_adapter *na = kring->na; + struct netmap_bwrap_adapter *bna = na->na_private; + struct netmap_adapter *hwna = bna->hwna; + u_int ring_n = kring->ring_id; + u_int lim = kring->nkr_num_slots - 1; + struct netmap_kring *hw_kring; + int error; + + ND("%s: na %s hwna %s", + (kring ? kring->name : "NULL!"), + (na ? na->name : "NULL!"), + (hwna ? hwna->name : "NULL!")); + hw_kring = hwna->tx_rings[ring_n]; + + if (nm_kr_tryget(hw_kring, 0, NULL)) { + return ENXIO; + } + + /* first step: simulate a user wakeup on the rx ring */ + netmap_vp_rxsync(kring, flags); + ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", + na->name, ring_n, + kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, + ring->head, ring->cur, ring->tail, + hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail); + /* second step: the new packets are sent on the tx ring + * (which is actually the same ring) + */ + hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail; + error = hw_kring->nm_sync(hw_kring, flags); + if (error) + goto put_out; + + /* third step: now we are back the rx ring */ + /* claim ownership on all hw owned bufs */ + kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */ + + /* fourth step: the user goes to sleep again, causing another rxsync */ + netmap_vp_rxsync(kring, flags); + ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", + na->name, ring_n, + kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, + ring->head, ring->cur, ring->tail, + hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); +put_out: + nm_kr_put(hw_kring); + + return error ? error : NM_IRQ_COMPLETED; +} + + +/* nm_bdg_ctl callback for the bwrap. + * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd]. + * On attach, it needs to provide a fake netmap_priv_d structure and + * perform a netmap_do_regif() on the bwrap. This will put both the + * bwrap and the hwna in netmap mode, with the netmap rings shared + * and cross linked. Moroever, it will start intercepting interrupts + * directed to hwna. + */ +static int +netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na) +{ + struct netmap_priv_d *npriv; + struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; + int error = 0; + + if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { + struct nmreq_vale_attach *req = + (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; + if (req->reg.nr_ringid != 0 || + (req->reg.nr_mode != NR_REG_ALL_NIC && + req->reg.nr_mode != NR_REG_NIC_SW)) { + /* We only support attaching all the NIC rings + * and/or the host stack. */ + return EINVAL; + } + if (NETMAP_OWNED_BY_ANY(na)) { + return EBUSY; + } + if (bna->na_kpriv) { + /* nothing to do */ + return 0; + } + npriv = netmap_priv_new(); + if (npriv == NULL) + return ENOMEM; + npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */ + error = netmap_do_regif(npriv, na, req->reg.nr_mode, + req->reg.nr_ringid, req->reg.nr_flags); + if (error) { + netmap_priv_delete(npriv); + return error; + } + bna->na_kpriv = npriv; + na->na_flags |= NAF_BUSY; + } else { + if (na->active_fds == 0) /* not registered */ + return EINVAL; + netmap_priv_delete(bna->na_kpriv); + bna->na_kpriv = NULL; + na->na_flags &= ~NAF_BUSY; + } + + return error; +} + +/* attach a bridge wrapper to the 'real' device */ +int +netmap_bwrap_attach_common(struct netmap_adapter *na, + struct netmap_adapter *hwna) +{ + struct netmap_bwrap_adapter *bna; + struct netmap_adapter *hostna = NULL; + int error = 0; + enum txrx t; + + /* make sure the NIC is not already in use */ + if (NETMAP_OWNED_BY_ANY(hwna)) { + D("NIC %s busy, cannot attach to bridge", hwna->name); + return EBUSY; + } + + bna = (struct netmap_bwrap_adapter *)na; + /* make bwrap ifp point to the real ifp */ + na->ifp = hwna->ifp; + if_ref(na->ifp); + na->na_private = bna; + /* fill the ring data for the bwrap adapter with rx/tx meanings + * swapped. The real cross-linking will be done during register, + * when all the krings will have been created. + */ + for_rx_tx(t) { + enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ + nma_set_nrings(na, t, nma_get_nrings(hwna, r)); + nma_set_ndesc(na, t, nma_get_ndesc(hwna, r)); + } + na->nm_dtor = netmap_bwrap_dtor; + na->nm_config = netmap_bwrap_config; + na->nm_bdg_ctl = netmap_bwrap_bdg_ctl; + na->pdev = hwna->pdev; + na->nm_mem = netmap_mem_get(hwna->nm_mem); + na->virt_hdr_len = hwna->virt_hdr_len; + na->rx_buf_maxsize = hwna->rx_buf_maxsize; + + bna->hwna = hwna; + netmap_adapter_get(hwna); + hwna->na_private = bna; /* weak reference */ + bna->saved_na_vp = hwna->na_vp; + hwna->na_vp = &bna->up; + bna->up.up.na_vp = &(bna->up); + + if (hwna->na_flags & NAF_HOST_RINGS) { + if (hwna->na_flags & NAF_SW_ONLY) + na->na_flags |= NAF_SW_ONLY; + na->na_flags |= NAF_HOST_RINGS; + hostna = &bna->host.up; + + /* limit the number of host rings to that of hw */ + nm_bound_var(&hostna->num_tx_rings, 1, 1, + nma_get_nrings(hwna, NR_TX), NULL); + nm_bound_var(&hostna->num_rx_rings, 1, 1, + nma_get_nrings(hwna, NR_RX), NULL); + + snprintf(hostna->name, sizeof(hostna->name), "%s^", na->name); + hostna->ifp = hwna->ifp; + for_rx_tx(t) { + enum txrx r = nm_txrx_swap(t); + u_int nr = nma_get_nrings(hostna, t); + + nma_set_nrings(hostna, t, nr); + nma_set_host_nrings(na, t, nr); + if (nma_get_host_nrings(hwna, t) < nr) { + nma_set_host_nrings(hwna, t, nr); + } + nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r)); + } + // hostna->nm_txsync = netmap_bwrap_host_txsync; + // hostna->nm_rxsync = netmap_bwrap_host_rxsync; + hostna->nm_mem = netmap_mem_get(na->nm_mem); + hostna->na_private = bna; + hostna->na_vp = &bna->up; + na->na_hostvp = hwna->na_hostvp = + hostna->na_hostvp = &bna->host; + hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */ + hostna->rx_buf_maxsize = hwna->rx_buf_maxsize; + } + + ND("%s<->%s txr %d txd %d rxr %d rxd %d", + na->name, ifp->if_xname, + na->num_tx_rings, na->num_tx_desc, + na->num_rx_rings, na->num_rx_desc); + + error = netmap_attach_common(na); + if (error) { + goto err_put; + } + hwna->na_flags |= NAF_BUSY; + return 0; + +err_put: + hwna->na_vp = hwna->na_hostvp = NULL; + netmap_adapter_put(hwna); + return error; + +} + +struct nm_bridge * +netmap_init_bridges2(u_int n) +{ + int i; + struct nm_bridge *b; + + b = nm_os_malloc(sizeof(struct nm_bridge) * n); + if (b == NULL) + return NULL; + for (i = 0; i < n; i++) + BDG_RWINIT(&b[i]); + return b; +} + +void +netmap_uninit_bridges2(struct nm_bridge *b, u_int n) +{ + int i; + + if (b == NULL) + return; + + for (i = 0; i < n; i++) + BDG_RWDESTROY(&b[i]); + nm_os_free(b); +} + +int +netmap_init_bridges(void) +{ +#ifdef CONFIG_NET_NS + return netmap_bns_register(); +#else + nm_bridges = netmap_init_bridges2(NM_BRIDGES); + if (nm_bridges == NULL) + return ENOMEM; + return 0; +#endif +} + +void +netmap_uninit_bridges(void) +{ +#ifdef CONFIG_NET_NS + netmap_bns_unregister(); +#else + netmap_uninit_bridges2(nm_bridges, NM_BRIDGES); +#endif +} Index: head/sys/dev/netmap/netmap_freebsd.c =================================================================== --- head/sys/dev/netmap/netmap_freebsd.c +++ head/sys/dev/netmap/netmap_freebsd.c @@ -270,13 +270,19 @@ } int -nm_os_mbuf_has_offld(struct mbuf *m) +nm_os_mbuf_has_csum_offld(struct mbuf *m) { return m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_SCTP | CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | - CSUM_SCTP_IPV6 | CSUM_TSO); + CSUM_SCTP_IPV6); } +int +nm_os_mbuf_has_seg_offld(struct mbuf *m) +{ + return m->m_pkthdr.csum_flags & CSUM_TSO; +} + static void freebsd_generic_rx_handler(struct ifnet *ifp, struct mbuf *m) { @@ -632,7 +638,7 @@ void nm_os_extmem_delete(struct nm_os_extmem *e) { - D("freeing %jx bytes", (uintmax_t)e->size); + D("freeing %zx bytes", (size_t)e->size); vm_map_remove(kernel_map, e->kva, e->kva + e->size); nm_os_free(e); } @@ -701,7 +707,7 @@ VMFS_OPTIMAL_SPACE, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0); if (rv != KERN_SUCCESS) { - D("vm_map_find(%jx) failed", (uintmax_t)e->size); + D("vm_map_find(%zx) failed", (size_t)e->size); goto out_rel; } rv = vm_map_wire(kernel_map, e->kva, e->kva + e->size, @@ -1538,6 +1544,30 @@ CURVNET_RESTORE(); return error; +} + +void +nm_os_onattach(struct ifnet *ifp) +{ +} + +void +nm_os_onenter(struct ifnet *ifp) +{ + struct netmap_adapter *na = NA(ifp); + + na->if_transmit = ifp->if_transmit; + ifp->if_transmit = netmap_transmit; + ifp->if_capenable |= IFCAP_NETMAP; +} + +void +nm_os_onexit(struct ifnet *ifp) +{ + struct netmap_adapter *na = NA(ifp); + + ifp->if_transmit = na->if_transmit; + ifp->if_capenable &= ~IFCAP_NETMAP; } extern struct cdevsw netmap_cdevsw; /* XXX used in netmap.c, should go elsewhere */ Index: head/sys/dev/netmap/netmap_generic.c =================================================================== --- head/sys/dev/netmap/netmap_generic.c +++ head/sys/dev/netmap/netmap_generic.c @@ -89,117 +89,6 @@ #define MBUF_RXQ(m) ((m)->m_pkthdr.flowid) #define smp_mb() -/* - * FreeBSD mbuf allocator/deallocator in emulation mode: - */ -#if __FreeBSD_version < 1100000 - -/* - * For older versions of FreeBSD: - * - * We allocate EXT_PACKET mbuf+clusters, but need to set M_NOFREE - * so that the destructor, if invoked, will not free the packet. - * In principle we should set the destructor only on demand, - * but since there might be a race we better do it on allocation. - * As a consequence, we also need to set the destructor or we - * would leak buffers. - */ - -/* mbuf destructor, also need to change the type to EXT_EXTREF, - * add an M_NOFREE flag, and then clear the flag and - * chain into uma_zfree(zone_pack, mf) - * (or reinstall the buffer ?) - */ -#define SET_MBUF_DESTRUCTOR(m, fn) do { \ - (m)->m_ext.ext_free = (void *)fn; \ - (m)->m_ext.ext_type = EXT_EXTREF; \ -} while (0) - -static int -void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2) -{ - /* restore original mbuf */ - m->m_ext.ext_buf = m->m_data = m->m_ext.ext_arg1; - m->m_ext.ext_arg1 = NULL; - m->m_ext.ext_type = EXT_PACKET; - m->m_ext.ext_free = NULL; - if (MBUF_REFCNT(m) == 0) - SET_MBUF_REFCNT(m, 1); - uma_zfree(zone_pack, m); - - return 0; -} - -static inline struct mbuf * -nm_os_get_mbuf(struct ifnet *ifp, int len) -{ - struct mbuf *m; - - (void)ifp; - m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); - if (m) { - /* m_getcl() (mb_ctor_mbuf) has an assert that checks that - * M_NOFREE flag is not specified as third argument, - * so we have to set M_NOFREE after m_getcl(). */ - m->m_flags |= M_NOFREE; - m->m_ext.ext_arg1 = m->m_ext.ext_buf; // XXX save - m->m_ext.ext_free = (void *)void_mbuf_dtor; - m->m_ext.ext_type = EXT_EXTREF; - ND(5, "create m %p refcnt %d", m, MBUF_REFCNT(m)); - } - return m; -} - -#else /* __FreeBSD_version >= 1100000 */ - -/* - * Newer versions of FreeBSD, using a straightforward scheme. - * - * We allocate mbufs with m_gethdr(), since the mbuf header is needed - * by the driver. We also attach a customly-provided external storage, - * which in this case is a netmap buffer. When calling m_extadd(), however - * we pass a NULL address, since the real address (and length) will be - * filled in by nm_os_generic_xmit_frame() right before calling - * if_transmit(). - * - * The dtor function does nothing, however we need it since mb_free_ext() - * has a KASSERT(), checking that the mbuf dtor function is not NULL. - */ - -#if __FreeBSD_version <= 1200050 -static void void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2) { } -#else /* __FreeBSD_version >= 1200051 */ -/* The arg1 and arg2 pointers argument were removed by r324446, which - * in included since version 1200051. */ -static void void_mbuf_dtor(struct mbuf *m) { } -#endif /* __FreeBSD_version >= 1200051 */ - -#define SET_MBUF_DESTRUCTOR(m, fn) do { \ - (m)->m_ext.ext_free = (fn != NULL) ? \ - (void *)fn : (void *)void_mbuf_dtor; \ -} while (0) - -static inline struct mbuf * -nm_os_get_mbuf(struct ifnet *ifp, int len) -{ - struct mbuf *m; - - (void)ifp; - (void)len; - - m = m_gethdr(M_NOWAIT, MT_DATA); - if (m == NULL) { - return m; - } - - m_extadd(m, NULL /* buf */, 0 /* size */, void_mbuf_dtor, - NULL, NULL, 0, EXT_NET_DRV); - - return m; -} - -#endif /* __FreeBSD_version >= 1100000 */ - #elif defined _WIN32 #include "win_glue.h" @@ -1161,7 +1050,7 @@ } D("Native netmap adapter %p restored", prev_na); } - NM_ATTACH_NA(ifp, prev_na); + NM_RESTORE_NA(ifp, prev_na); /* * netmap_detach_common(), that it's called after this function, * overrides WNA(ifp) if na->ifp is not NULL. @@ -1202,7 +1091,7 @@ } #endif - if (NA(ifp) && !NM_NA_VALID(ifp)) { + if (NM_NA_CLASH(ifp)) { /* If NA(ifp) is not null but there is no valid netmap * adapter it means that someone else is using the same * pointer (e.g. ax25_ptr on linux). This happens for @@ -1230,6 +1119,7 @@ na->ifp = ifp; na->num_tx_desc = num_tx_desc; na->num_rx_desc = num_rx_desc; + na->rx_buf_maxsize = 32768; na->nm_register = &generic_netmap_register; na->nm_txsync = &generic_netmap_txsync; na->nm_rxsync = &generic_netmap_rxsync; @@ -1253,8 +1143,8 @@ return retval; } - gna->prev = NA(ifp); /* save old na */ - if (gna->prev != NULL) { + if (NM_NA_VALID(ifp)) { + gna->prev = NA(ifp); /* save old na */ netmap_adapter_get(gna->prev); } NM_ATTACH_NA(ifp, na); Index: head/sys/dev/netmap/netmap_kern.h =================================================================== --- head/sys/dev/netmap/netmap_kern.h +++ head/sys/dev/netmap/netmap_kern.h @@ -275,6 +275,7 @@ struct nm_bdg_fwd; struct nm_bridge; struct netmap_priv_d; +struct nm_bdg_args; /* os-specific NM_SELINFO_T initialzation/destruction functions */ void nm_os_selinfo_init(NM_SELINFO_T *); @@ -305,6 +306,12 @@ void nm_os_free(void *); void nm_os_vfree(void *); +/* os specific attach/detach enter/exit-netmap-mode routines */ +void nm_os_onattach(struct ifnet *); +void nm_os_ondetach(struct ifnet *); +void nm_os_onenter(struct ifnet *); +void nm_os_onexit(struct ifnet *); + /* passes a packet up to the host stack. * If the packet is sent (or dropped) immediately it returns NULL, * otherwise it links the packet to prev and returns m. @@ -313,7 +320,8 @@ */ void *nm_os_send_up(struct ifnet *, struct mbuf *m, struct mbuf *prev); -int nm_os_mbuf_has_offld(struct mbuf *m); +int nm_os_mbuf_has_seg_offld(struct mbuf *m); +int nm_os_mbuf_has_csum_offld(struct mbuf *m); #include "netmap_mbq.h" @@ -507,11 +515,10 @@ struct netmap_kring *pipe; /* if this is a pipe ring, * pointer to the other end */ + uint32_t pipe_tail; /* hwtail updated by the other end */ #endif /* WITH_PIPES */ -#ifdef WITH_VALE int (*save_notify)(struct netmap_kring *kring, int flags); -#endif #ifdef WITH_MONITOR /* array of krings that are monitoring this kring */ @@ -634,6 +641,7 @@ }; struct netmap_vp_adapter; // forward +struct nm_bridge; /* Struct to be filled by nm_config callbacks. */ struct nm_config_info { @@ -645,6 +653,14 @@ }; /* + * default type for the magic field. + * May be overriden in glue code. + */ +#ifndef NM_OS_MAGIC +#define NM_OS_MAGIC uint32_t +#endif /* !NM_OS_MAGIC */ + +/* * The "struct netmap_adapter" extends the "struct adapter" * (or equivalent) device descriptor. * It contains all base fields needed to support netmap operation. @@ -660,7 +676,7 @@ * always exists and is at least 32 bits) contains a magic * value which we can use to detect that the interface is good. */ - uint32_t magic; + NM_OS_MAGIC magic; uint32_t na_flags; /* enabled, and other flags */ #define NAF_SKIP_INTR 1 /* use the regular interrupt handler. * useful during initialization @@ -696,6 +712,8 @@ u_int num_rx_rings; /* number of adapter receive rings */ u_int num_tx_rings; /* number of adapter transmit rings */ + u_int num_host_rx_rings; /* number of host receive rings */ + u_int num_host_tx_rings; /* number of host transmit rings */ u_int num_tx_desc; /* number of descriptor in each queue */ u_int num_rx_desc; @@ -783,7 +801,6 @@ int (*nm_config)(struct netmap_adapter *, struct nm_config_info *info); int (*nm_krings_create)(struct netmap_adapter *); void (*nm_krings_delete)(struct netmap_adapter *); -#ifdef WITH_VALE /* * nm_bdg_attach() initializes the na_vp field to point * to an adapter that can be attached to a VALE switch. If the @@ -799,7 +816,8 @@ * initializations * Called with NMG_LOCK held. */ - int (*nm_bdg_attach)(const char *bdg_name, struct netmap_adapter *); + int (*nm_bdg_attach)(const char *bdg_name, struct netmap_adapter *, + struct nm_bridge *); int (*nm_bdg_ctl)(struct nmreq_header *, struct netmap_adapter *); /* adapter used to attach this adapter to a VALE switch (if any) */ @@ -807,7 +825,6 @@ /* adapter used to attach the host rings of this adapter * to a VALE switch (if any) */ struct netmap_vp_adapter *na_hostvp; -#endif /* standard refcount to control the lifetime of the adapter * (it should be equal to the lifetime of the corresponding ifp) @@ -843,6 +860,10 @@ unsigned rx_buf_maxsize; char name[NETMAP_REQ_IFNAMSIZ]; /* used at least by pipes */ + +#ifdef WITH_MONITOR + unsigned long monitor_id; /* debugging */ +#endif }; static __inline u_int @@ -866,6 +887,12 @@ return (t == NR_TX ? na->num_tx_rings : na->num_rx_rings); } +static __inline u_int +nma_get_host_nrings(struct netmap_adapter *na, enum txrx t) +{ + return (t == NR_TX ? na->num_host_tx_rings : na->num_host_rx_rings); +} + static __inline void nma_set_nrings(struct netmap_adapter *na, enum txrx t, u_int v) { @@ -875,6 +902,15 @@ na->num_rx_rings = v; } +static __inline void +nma_set_host_nrings(struct netmap_adapter *na, enum txrx t, u_int v) +{ + if (t == NR_TX) + na->num_host_tx_rings = v; + else + na->num_host_rx_rings = v; +} + static __inline struct netmap_kring** NMR(struct netmap_adapter *na, enum txrx t) { @@ -964,13 +1000,22 @@ }; #endif /* WITH_GENERIC */ -static __inline int +static __inline u_int netmap_real_rings(struct netmap_adapter *na, enum txrx t) { - return nma_get_nrings(na, t) + !!(na->na_flags & NAF_HOST_RINGS); + return nma_get_nrings(na, t) + + !!(na->na_flags & NAF_HOST_RINGS) * nma_get_host_nrings(na, t); } -#ifdef WITH_VALE +/* account for fake rings */ +static __inline u_int +netmap_all_rings(struct netmap_adapter *na, enum txrx t) +{ + return max(nma_get_nrings(na, t) + 1, netmap_real_rings(na, t)); +} + +int netmap_default_bdg_attach(const char *name, struct netmap_adapter *na, + struct nm_bridge *); struct nm_bdg_polling_state; /* * Bridge wrapper for non VALE ports attached to a VALE switch. @@ -1038,12 +1083,12 @@ int nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token); int nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token); int nm_bdg_polling(struct nmreq_header *hdr); -int netmap_bwrap_attach(const char *name, struct netmap_adapter *); +int netmap_bdg_list(struct nmreq_header *hdr); + +#ifdef WITH_VALE int netmap_vi_create(struct nmreq_header *hdr, int); int nm_vi_create(struct nmreq_header *); int nm_vi_destroy(const char *name); -int netmap_bdg_list(struct nmreq_header *hdr); - #else /* !WITH_VALE */ #define netmap_vi_create(hdr, a) (EOPNOTSUPP) #endif /* WITH_VALE */ @@ -1262,7 +1307,6 @@ #define netmap_ifp_to_vp(_ifp) NULL #define netmap_ifp_to_host_vp(_ifp) NULL #define netmap_bdg_idx(_vp) -1 -#define netmap_bdg_name(_vp) NULL #endif /* WITH_VALE */ static inline int @@ -1293,68 +1337,9 @@ na->rx_rings[na->num_rx_rings]->nr_pending_mode; } -/* set/clear native flags and if_transmit/netdev_ops */ -static inline void -nm_set_native_flags(struct netmap_adapter *na) -{ - struct ifnet *ifp = na->ifp; +void nm_set_native_flags(struct netmap_adapter *); +void nm_clear_native_flags(struct netmap_adapter *); - /* We do the setup for intercepting packets only if we are the - * first user of this adapapter. */ - if (na->active_fds > 0) { - return; - } - - na->na_flags |= NAF_NETMAP_ON; -#ifdef IFCAP_NETMAP /* or FreeBSD ? */ - ifp->if_capenable |= IFCAP_NETMAP; -#endif -#if defined (__FreeBSD__) - na->if_transmit = ifp->if_transmit; - ifp->if_transmit = netmap_transmit; -#elif defined (_WIN32) - (void)ifp; /* prevent a warning */ -#elif defined (linux) - na->if_transmit = (void *)ifp->netdev_ops; - ifp->netdev_ops = &((struct netmap_hw_adapter *)na)->nm_ndo; - ((struct netmap_hw_adapter *)na)->save_ethtool = ifp->ethtool_ops; - ifp->ethtool_ops = &((struct netmap_hw_adapter*)na)->nm_eto; -#endif /* linux */ - nm_update_hostrings_mode(na); -} - -static inline void -nm_clear_native_flags(struct netmap_adapter *na) -{ - struct ifnet *ifp = na->ifp; - - /* We undo the setup for intercepting packets only if we are the - * last user of this adapapter. */ - if (na->active_fds > 0) { - return; - } - - nm_update_hostrings_mode(na); - -#if defined(__FreeBSD__) - ifp->if_transmit = na->if_transmit; -#elif defined(_WIN32) - (void)ifp; /* prevent a warning */ -#else - ifp->netdev_ops = (void *)na->if_transmit; - ifp->ethtool_ops = ((struct netmap_hw_adapter*)na)->save_ethtool; -#endif - na->na_flags &= ~NAF_NETMAP_ON; -#ifdef IFCAP_NETMAP /* or FreeBSD ? */ - ifp->if_capenable &= ~IFCAP_NETMAP; -#endif -} - -#ifdef linux -int netmap_linux_config(struct netmap_adapter *na, - struct nm_config_info *info); -#endif /* linux */ - /* * nm_*sync_prologue() functions are used in ioctl/poll and ptnetmap * kthreads. @@ -1458,7 +1443,6 @@ struct netmap_mem_d *nmd, struct netmap_adapter **na); -#ifdef WITH_VALE /* * The following bridge-related functions are used by other * kernel modules. @@ -1473,39 +1457,49 @@ typedef int (*bdg_config_fn_t)(struct nm_ifreq *); typedef void (*bdg_dtor_fn_t)(const struct netmap_vp_adapter *); typedef void *(*bdg_update_private_data_fn_t)(void *private_data, void *callback_data, int *error); +typedef int (*bdg_vp_create_fn_t)(struct nmreq_header *hdr, + struct ifnet *ifp, struct netmap_mem_d *nmd, + struct netmap_vp_adapter **ret); +typedef int (*bdg_bwrap_attach_fn_t)(const char *nr_name, struct netmap_adapter *hwna); struct netmap_bdg_ops { bdg_lookup_fn_t lookup; bdg_config_fn_t config; bdg_dtor_fn_t dtor; + bdg_vp_create_fn_t vp_create; + bdg_bwrap_attach_fn_t bwrap_attach; + char name[IFNAMSIZ]; }; +int netmap_bwrap_attach(const char *name, struct netmap_adapter *, struct netmap_bdg_ops *); +int netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token); -uint32_t netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, - struct netmap_vp_adapter *, void *private_data); - #define NM_BRIDGES 8 /* number of bridges */ #define NM_BDG_MAXPORTS 254 /* up to 254 */ #define NM_BDG_BROADCAST NM_BDG_MAXPORTS #define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1) -/* these are redefined in case of no VALE support */ -int netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na, - struct netmap_mem_d *nmd, int create); struct nm_bridge *netmap_init_bridges2(u_int); void netmap_uninit_bridges2(struct nm_bridge *, u_int); int netmap_init_bridges(void); void netmap_uninit_bridges(void); -int netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token); int nm_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback, void *callback_data, void *auth_token); int netmap_bdg_config(struct nm_ifreq *nifr); -void *netmap_bdg_create(const char *bdg_name, int *return_status); -int netmap_bdg_destroy(const char *bdg_name, void *auth_token); +#ifdef WITH_VALE +uint32_t netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring, + struct netmap_vp_adapter *, void *private_data); + +/* these are redefined in case of no VALE support */ +int netmap_get_vale_na(struct nmreq_header *hdr, struct netmap_adapter **na, + struct netmap_mem_d *nmd, int create); +void *netmap_vale_create(const char *bdg_name, int *return_status); +int netmap_vale_destroy(const char *bdg_name, void *auth_token); + #else /* !WITH_VALE */ -#define netmap_get_bdg_na(_1, _2, _3, _4) 0 -#define netmap_init_bridges(_1) 0 -#define netmap_uninit_bridges() -#define netmap_bdg_regops(_1, _2) EINVAL +#define netmap_bdg_learning(_1, _2, _3, _4) 0 +#define netmap_get_vale_na(_1, _2, _3, _4) 0 +#define netmap_bdg_create(_1, _2) NULL +#define netmap_bdg_destroy(_1, _2) 0 #endif /* !WITH_VALE */ #ifdef WITH_PIPES @@ -1611,6 +1605,7 @@ extern int netmap_txsync_retry; extern int netmap_flags; +extern int netmap_generic_hwcsum; extern int netmap_generic_mit; extern int netmap_generic_ringsize; extern int netmap_generic_rings; @@ -1620,12 +1615,18 @@ extern int ptnetmap_tx_workers; /* - * NA returns a pointer to the struct netmap adapter from the ifp, - * WNA is used to write it. + * NA returns a pointer to the struct netmap adapter from the ifp. + * WNA is os-specific and must be defined in glue code. */ #define NA(_ifp) ((struct netmap_adapter *)WNA(_ifp)) /* + * we provide a default implementation of NM_ATTACH_NA/NM_DETACH_NA + * based on the WNA field. + * Glue code may override this by defining its own NM_ATTACH_NA + */ +#ifndef NM_ATTACH_NA +/* * On old versions of FreeBSD, NA(ifp) is a pspare. On linux we * overload another pointer in the netdev. * @@ -1643,7 +1644,13 @@ NA(ifp)->magic = \ ((uint32_t)(uintptr_t)NA(ifp)) ^ NETMAP_MAGIC; \ } while(0) +#define NM_RESTORE_NA(ifp, na) WNA(ifp) = na; +#define NM_DETACH_NA(ifp) do { WNA(ifp) = NULL; } while (0) +#define NM_NA_CLASH(ifp) (NA(ifp) && !NM_NA_VALID(ifp)) +#endif /* !NM_ATTACH_NA */ + + #define NM_IS_NATIVE(ifp) (NM_NA_VALID(ifp) && NA(ifp)->nm_dtor == netmap_hw_dtor) #if defined(__FreeBSD__) @@ -1752,21 +1759,28 @@ } } +#ifdef NETMAP_LINUX_HAVE_DMASYNC static inline void -netmap_sync_map(struct netmap_adapter *na, +netmap_sync_map_cpu(struct netmap_adapter *na, bus_dma_tag_t tag, bus_dmamap_t map, u_int sz, enum txrx t) { if (*map) { - if (t == NR_RX) - dma_sync_single_for_cpu(na->pdev, *map, sz, - DMA_FROM_DEVICE); - else - dma_sync_single_for_device(na->pdev, *map, sz, - DMA_TO_DEVICE); + dma_sync_single_for_cpu(na->pdev, *map, sz, + (t == NR_TX ? DMA_TO_DEVICE : DMA_FROM_DEVICE)); } } static inline void +netmap_sync_map_dev(struct netmap_adapter *na, + bus_dma_tag_t tag, bus_dmamap_t map, u_int sz, enum txrx t) +{ + if (*map) { + dma_sync_single_for_device(na->pdev, *map, sz, + (t == NR_TX ? DMA_TO_DEVICE : DMA_FROM_DEVICE)); + } +} + +static inline void netmap_reload_map(struct netmap_adapter *na, bus_dma_tag_t tag, bus_dmamap_t map, void *buf) { @@ -1780,6 +1794,10 @@ *map = dma_map_single(na->pdev, buf, sz, DMA_BIDIRECTIONAL); } +#else /* !NETMAP_LINUX_HAVE_DMASYNC */ +#define netmap_sync_map_cpu(na, tag, map, sz, t) +#define netmap_sync_map_dev(na, tag, map, sz, t) +#endif /* NETMAP_LINUX_HAVE_DMASYNC */ #endif /* linux */ @@ -2220,6 +2238,119 @@ void ptnet_nm_krings_delete(struct netmap_adapter *na); void ptnet_nm_dtor(struct netmap_adapter *na); #endif /* WITH_PTNETMAP_GUEST */ + +#ifdef __FreeBSD__ +/* + * FreeBSD mbuf allocator/deallocator in emulation mode: + */ +#if __FreeBSD_version < 1100000 + +/* + * For older versions of FreeBSD: + * + * We allocate EXT_PACKET mbuf+clusters, but need to set M_NOFREE + * so that the destructor, if invoked, will not free the packet. + * In principle we should set the destructor only on demand, + * but since there might be a race we better do it on allocation. + * As a consequence, we also need to set the destructor or we + * would leak buffers. + */ + +/* mbuf destructor, also need to change the type to EXT_EXTREF, + * add an M_NOFREE flag, and then clear the flag and + * chain into uma_zfree(zone_pack, mf) + * (or reinstall the buffer ?) + */ +#define SET_MBUF_DESTRUCTOR(m, fn) do { \ + (m)->m_ext.ext_free = (void *)fn; \ + (m)->m_ext.ext_type = EXT_EXTREF; \ +} while (0) + +static int +void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2) +{ + /* restore original mbuf */ + m->m_ext.ext_buf = m->m_data = m->m_ext.ext_arg1; + m->m_ext.ext_arg1 = NULL; + m->m_ext.ext_type = EXT_PACKET; + m->m_ext.ext_free = NULL; + if (MBUF_REFCNT(m) == 0) + SET_MBUF_REFCNT(m, 1); + uma_zfree(zone_pack, m); + + return 0; +} + +static inline struct mbuf * +nm_os_get_mbuf(struct ifnet *ifp, int len) +{ + struct mbuf *m; + + (void)ifp; + m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); + if (m) { + /* m_getcl() (mb_ctor_mbuf) has an assert that checks that + * M_NOFREE flag is not specified as third argument, + * so we have to set M_NOFREE after m_getcl(). */ + m->m_flags |= M_NOFREE; + m->m_ext.ext_arg1 = m->m_ext.ext_buf; // XXX save + m->m_ext.ext_free = (void *)void_mbuf_dtor; + m->m_ext.ext_type = EXT_EXTREF; + ND(5, "create m %p refcnt %d", m, MBUF_REFCNT(m)); + } + return m; +} + +#else /* __FreeBSD_version >= 1100000 */ + +/* + * Newer versions of FreeBSD, using a straightforward scheme. + * + * We allocate mbufs with m_gethdr(), since the mbuf header is needed + * by the driver. We also attach a customly-provided external storage, + * which in this case is a netmap buffer. When calling m_extadd(), however + * we pass a NULL address, since the real address (and length) will be + * filled in by nm_os_generic_xmit_frame() right before calling + * if_transmit(). + * + * The dtor function does nothing, however we need it since mb_free_ext() + * has a KASSERT(), checking that the mbuf dtor function is not NULL. + */ + +#if __FreeBSD_version <= 1200050 +static void void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2) { } +#else /* __FreeBSD_version >= 1200051 */ +/* The arg1 and arg2 pointers argument were removed by r324446, which + * in included since version 1200051. */ +static void void_mbuf_dtor(struct mbuf *m) { } +#endif /* __FreeBSD_version >= 1200051 */ + +#define SET_MBUF_DESTRUCTOR(m, fn) do { \ + (m)->m_ext.ext_free = (fn != NULL) ? \ + (void *)fn : (void *)void_mbuf_dtor; \ +} while (0) + +static inline struct mbuf * +nm_os_get_mbuf(struct ifnet *ifp, int len) +{ + struct mbuf *m; + + (void)ifp; + (void)len; + + m = m_gethdr(M_NOWAIT, MT_DATA); + if (m == NULL) { + return m; + } + + m_extadd(m, NULL /* buf */, 0 /* size */, void_mbuf_dtor, + NULL, NULL, 0, EXT_NET_DRV); + + return m; +} + +#endif /* __FreeBSD_version >= 1100000 */ +#endif /* __FreeBSD__ */ struct nmreq_option * nmreq_findoption(struct nmreq_option *, uint16_t); int nmreq_checkduplicate(struct nmreq_option *); Index: head/sys/dev/netmap/netmap_mem2.c =================================================================== --- head/sys/dev/netmap/netmap_mem2.c +++ head/sys/dev/netmap/netmap_mem2.c @@ -1845,7 +1845,7 @@ for_rx_tx(t) { u_int i; - for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { + for (i = 0; i < netmap_all_rings(na, t); i++) { struct netmap_kring *kring = NMR(na, t)[i]; struct netmap_ring *ring = kring->ring; @@ -1884,7 +1884,7 @@ for_rx_tx(t) { u_int i; - for (i = 0; i <= nma_get_nrings(na, t); i++) { + for (i = 0; i < netmap_all_rings(na, t); i++) { struct netmap_kring *kring = NMR(na, t)[i]; struct netmap_ring *ring = kring->ring; u_int len, ndesc; @@ -1922,7 +1922,7 @@ netmap_mem_bufsize(na->nm_mem); ND("%s h %d c %d t %d", kring->name, ring->head, ring->cur, ring->tail); - ND("initializing slots for %s_ring", nm_txrx2str(txrx)); + ND("initializing slots for %s_ring", nm_txrx2str(t)); if (!(kring->nr_kflags & NKR_FAKERING)) { /* this is a real ring */ ND("allocating buffers for %s", kring->name); @@ -1980,7 +1980,7 @@ ntot = 0; for_rx_tx(t) { /* account for the (eventually fake) host rings */ - n[t] = nma_get_nrings(na, t) + 1; + n[t] = netmap_all_rings(na, t); ntot += n[t]; } /* @@ -2654,14 +2654,14 @@ /* point each kring to the corresponding backend ring */ nifp = (struct netmap_if *)((char *)ptnmd->nm_addr + ptif->nifp_offset); - for (i = 0; i <= na->num_tx_rings; i++) { + for (i = 0; i < netmap_all_rings(na, NR_TX); i++) { struct netmap_kring *kring = na->tx_rings[i]; if (kring->ring) continue; kring->ring = (struct netmap_ring *) ((char *)nifp + nifp->ring_ofs[i]); } - for (i = 0; i <= na->num_rx_rings; i++) { + for (i = 0; i < netmap_all_rings(na, NR_RX); i++) { struct netmap_kring *kring = na->rx_rings[i]; if (kring->ring) continue; Index: head/sys/dev/netmap/netmap_monitor.c =================================================================== --- head/sys/dev/netmap/netmap_monitor.c +++ head/sys/dev/netmap/netmap_monitor.c @@ -152,6 +152,12 @@ static int netmap_monitor_rxsync(struct netmap_kring *kring, int flags) { + struct netmap_monitor_adapter *mna = + (struct netmap_monitor_adapter *)kring->na; + if (unlikely(mna->priv.np_na == NULL)) { + /* parent left netmap mode */ + return EIO; + } ND("%s %x", kring->name, flags); kring->nr_hwcur = kring->rhead; mb(); @@ -164,11 +170,20 @@ netmap_monitor_krings_create(struct netmap_adapter *na) { int error = netmap_krings_create(na, 0); + enum txrx t; + if (error) return error; /* override the host rings callbacks */ - na->tx_rings[na->num_tx_rings]->nm_sync = netmap_monitor_txsync; - na->rx_rings[na->num_rx_rings]->nm_sync = netmap_monitor_rxsync; + for_rx_tx(t) { + int i; + u_int first = nma_get_nrings(na, t); + for (i = 0; i < nma_get_host_nrings(na, t); i++) { + struct netmap_kring *kring = NMR(na, t)[first + i]; + kring->nm_sync = t == NR_TX ? netmap_monitor_txsync : + netmap_monitor_rxsync; + } + } return 0; } @@ -244,6 +259,48 @@ static int netmap_monitor_parent_rxsync(struct netmap_kring *, int); static int netmap_monitor_parent_notify(struct netmap_kring *, int); +static void +nm_monitor_intercept_callbacks(struct netmap_kring *kring) +{ + ND("intercept callbacks on %s", kring->name); + kring->mon_sync = kring->nm_sync; + kring->mon_notify = kring->nm_notify; + if (kring->tx == NR_TX) { + kring->nm_sync = netmap_monitor_parent_txsync; + } else { + kring->nm_sync = netmap_monitor_parent_rxsync; + kring->nm_notify = netmap_monitor_parent_notify; + kring->mon_tail = kring->nr_hwtail; + } +} + +static void +nm_monitor_restore_callbacks(struct netmap_kring *kring) +{ + ND("restoring callbacks on %s", kring->name); + kring->nm_sync = kring->mon_sync; + kring->mon_sync = NULL; + if (kring->tx == NR_RX) { + kring->nm_notify = kring->mon_notify; + } + kring->mon_notify = NULL; +} + +static struct netmap_kring * +nm_zmon_list_head(struct netmap_kring *mkring, enum txrx t) +{ + struct netmap_adapter *na = mkring->na; + struct netmap_kring *kring = mkring; + struct netmap_zmon_list *z = &kring->zmon_list[t]; + /* reach the head of the list */ + while (nm_is_zmon(na) && z->prev != NULL) { + kring = z->prev; + na = kring->na; + z = &kring->zmon_list[t]; + } + return nm_is_zmon(na) ? NULL : kring; +} + /* add the monitor mkring to the list of monitors of kring. * If this is the first monitor, intercept the callbacks */ @@ -254,51 +311,34 @@ enum txrx t = kring->tx; struct netmap_zmon_list *z = &kring->zmon_list[t]; struct netmap_zmon_list *mz = &mkring->zmon_list[t]; + struct netmap_kring *ikring = kring; /* a zero-copy monitor which is not the first in the list * must monitor the previous monitor */ if (zmon && z->prev != NULL) - kring = z->prev; + ikring = z->prev; /* tail of the list */ /* synchronize with concurrently running nm_sync()s */ nm_kr_stop(kring, NM_KR_LOCKED); - if (nm_monitor_none(kring)) { - /* this is the first monitor, intercept callbacks */ - ND("intercept callbacks on %s", kring->name); - kring->mon_sync = kring->nm_sync; - kring->mon_notify = kring->nm_notify; - if (kring->tx == NR_TX) { - kring->nm_sync = netmap_monitor_parent_txsync; - } else { - kring->nm_sync = netmap_monitor_parent_rxsync; - kring->nm_notify = netmap_monitor_parent_notify; - kring->mon_tail = kring->nr_hwtail; - } + if (nm_monitor_none(ikring)) { + /* this is the first monitor, intercept the callbacks */ + ND("%s: intercept callbacks on %s", mkring->name, ikring->name); + nm_monitor_intercept_callbacks(ikring); } if (zmon) { /* append the zmon to the list */ - struct netmap_monitor_adapter *mna = - (struct netmap_monitor_adapter *)mkring->na; - struct netmap_adapter *pna; - - if (z->prev != NULL) - z->prev->zmon_list[t].next = mkring; - mz->prev = z->prev; - z->prev = mkring; - if (z->next == NULL) - z->next = mkring; - - /* grap a reference to the previous netmap adapter + ikring->zmon_list[t].next = mkring; + z->prev = mkring; /* new tail */ + mz->prev = ikring; + mz->next = NULL; + /* grab a reference to the previous netmap adapter * in the chain (this may be the monitored port * or another zero-copy monitor) */ - pna = kring->na; - netmap_adapter_get(pna); - netmap_adapter_put(mna->priv.np_na); - mna->priv.np_na = pna; + netmap_adapter_get(ikring->na); } else { /* make sure the monitor array exists and is big enough */ error = nm_monitor_alloc(kring, kring->n_monitors + 1); @@ -318,29 +358,50 @@ * If this is the last monitor, restore the original callbacks */ static void -netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring) +netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring, enum txrx t) { - struct netmap_zmon_list *mz = &mkring->zmon_list[kring->tx]; int zmon = nm_is_zmon(mkring->na); + struct netmap_zmon_list *mz = &mkring->zmon_list[t]; + struct netmap_kring *ikring = kring; - if (zmon && mz->prev != NULL) - kring = mz->prev; + if (zmon) { + /* get to the head of the list */ + kring = nm_zmon_list_head(mkring, t); + ikring = mz->prev; + } - /* synchronize with concurrently running nm_sync()s */ - nm_kr_stop(kring, NM_KR_LOCKED); + /* synchronize with concurrently running nm_sync()s + * if kring is NULL (orphaned list) the monitored port + * has exited netmap mode, so there is nothing to stop + */ + if (kring != NULL) + nm_kr_stop(kring, NM_KR_LOCKED); if (zmon) { /* remove the monitor from the list */ - if (mz->prev != NULL) - mz->prev->zmon_list[kring->tx].next = mz->next; - else - kring->zmon_list[kring->tx].next = mz->next; if (mz->next != NULL) { - mz->next->zmon_list[kring->tx].prev = mz->prev; - } else { - kring->zmon_list[kring->tx].prev = mz->prev; + mz->next->zmon_list[t].prev = mz->prev; + /* we also need to let the next monitor drop the + * reference to us and grab the reference to the + * previous ring owner, instead + */ + if (mz->prev != NULL) + netmap_adapter_get(mz->prev->na); + netmap_adapter_put(mkring->na); + } else if (kring != NULL) { + /* in the monitored kring, prev is actually the + * pointer to the tail of the list + */ + kring->zmon_list[t].prev = + (mz->prev != kring ? mz->prev : NULL); } + if (mz->prev != NULL) { + netmap_adapter_put(mz->prev->na); + mz->prev->zmon_list[t].next = mz->next; + } + mz->prev = NULL; + mz->next = NULL; } else { /* this is a copy monitor */ uint32_t mon_pos = mkring->mon_pos[kring->tx]; @@ -356,21 +417,13 @@ } } - if (nm_monitor_none(kring)) { + if (ikring != NULL && nm_monitor_none(ikring)) { /* this was the last monitor, restore the callbacks */ - ND("%s: restoring sync on %s: %p", mkring->name, kring->name, - kring->mon_sync); - kring->nm_sync = kring->mon_sync; - kring->mon_sync = NULL; - if (kring->tx == NR_RX) { - ND("%s: restoring notify on %s: %p", - mkring->name, kring->name, kring->mon_notify); - kring->nm_notify = kring->mon_notify; - kring->mon_notify = NULL; - } + nm_monitor_restore_callbacks(ikring); } - nm_kr_start(kring); + if (kring != NULL) + nm_kr_start(kring); } @@ -389,9 +442,9 @@ for_rx_tx(t) { u_int i; - for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { + for (i = 0; i < netmap_all_rings(na, t); i++) { struct netmap_kring *kring = NMR(na, t)[i]; - struct netmap_kring *zkring; + struct netmap_zmon_list *z = &kring->zmon_list[t]; u_int j; for (j = 0; j < kring->n_monitors; j++) { @@ -404,30 +457,34 @@ netmap_adapter_put(mna->priv.np_na); mna->priv.np_na = NULL; } + kring->monitors[j] = NULL; } - zkring = kring->zmon_list[kring->tx].next; - if (zkring != NULL) { - struct netmap_monitor_adapter *next = - (struct netmap_monitor_adapter *)zkring->na; - struct netmap_monitor_adapter *this = - (struct netmap_monitor_adapter *)na; - struct netmap_adapter *pna = this->priv.np_na; - /* let the next monitor forget about us */ - if (next->priv.np_na != NULL) { - netmap_adapter_put(next->priv.np_na); + if (!nm_is_zmon(na)) { + /* we are the head of at most one list */ + struct netmap_kring *zkring; + for (zkring = z->next; zkring != NULL; + zkring = zkring->zmon_list[t].next) + { + struct netmap_monitor_adapter *next = + (struct netmap_monitor_adapter *)zkring->na; + /* let the monitor forget about us */ + netmap_adapter_put(next->priv.np_na); /* nop if null */ + next->priv.np_na = NULL; } - if (pna != NULL && nm_is_zmon(na)) { - /* we are a monitor ourselves and we may - * need to pass down the reference to - * the previous adapter in the chain - */ - netmap_adapter_get(pna); - next->priv.np_na = pna; - continue; - } - next->priv.np_na = NULL; + /* orhpan the zmon list */ + if (z->next != NULL) + z->next->zmon_list[t].prev = NULL; + z->next = NULL; + z->prev = NULL; } + + if (!nm_monitor_none(kring)) { + + kring->n_monitors = 0; + nm_monitor_dealloc(kring); + nm_monitor_restore_callbacks(kring); + } } } } @@ -455,7 +512,7 @@ return ENXIO; } for_rx_tx(t) { - for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { + for (i = 0; i < netmap_all_rings(na, t); i++) { mkring = NMR(na, t)[i]; if (!nm_kring_pending_on(mkring)) continue; @@ -477,7 +534,7 @@ if (na->active_fds == 0) na->na_flags &= ~NAF_NETMAP_ON; for_rx_tx(t) { - for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { + for (i = 0; i < netmap_all_rings(na, t); i++) { mkring = NMR(na, t)[i]; if (!nm_kring_pending_off(mkring)) continue; @@ -495,7 +552,7 @@ continue; if (mna->flags & nm_txrx2flag(s)) { kring = NMR(pna, s)[i]; - netmap_monitor_del(mkring, kring); + netmap_monitor_del(mkring, kring, s); } } } @@ -593,6 +650,7 @@ ms->len = s->len; s->len = tmp; + ms->flags = s->flags; s->flags |= NS_BUF_CHANGED; beg = nm_next(beg, lim); @@ -710,6 +768,7 @@ memcpy(dst, src, copy_len); ms->len = copy_len; + ms->flags = s->flags; sent++; beg = nm_next(beg, lim); @@ -836,7 +895,6 @@ struct ifnet *ifp = NULL; int error; int zcopy = (req->nr_flags & NR_ZCOPY_MON); - char monsuff[10] = ""; if (zcopy) { req->nr_flags |= (NR_MONITOR_TX | NR_MONITOR_RX); @@ -890,14 +948,11 @@ D("ringid error"); goto free_out; } - if (mna->priv.np_qlast[NR_TX] - mna->priv.np_qfirst[NR_TX] == 1) { - snprintf(monsuff, 10, "-%d", mna->priv.np_qfirst[NR_TX]); - } - snprintf(mna->up.name, sizeof(mna->up.name), "%s%s/%s%s%s", pna->name, - monsuff, + snprintf(mna->up.name, sizeof(mna->up.name), "%s/%s%s%s#%lu", pna->name, zcopy ? "z" : "", (req->nr_flags & NR_MONITOR_RX) ? "r" : "", - (req->nr_flags & NR_MONITOR_TX) ? "t" : ""); + (req->nr_flags & NR_MONITOR_TX) ? "t" : "", + pna->monitor_id++); /* the monitor supports the host rings iff the parent does */ mna->up.na_flags |= (pna->na_flags & NAF_HOST_RINGS); Index: head/sys/dev/netmap/netmap_pipe.c =================================================================== --- head/sys/dev/netmap/netmap_pipe.c +++ head/sys/dev/netmap/netmap_pipe.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * - * Copyright (C) 2014-2016 Giuseppe Lettieri + * Copyright (C) 2014-2018 Giuseppe Lettieri * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -185,8 +185,9 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags) { struct netmap_kring *rxkring = txkring->pipe; - u_int k, lim = txkring->nkr_num_slots - 1; + u_int k, lim = txkring->nkr_num_slots - 1, nk; int m; /* slots to transfer */ + int complete; /* did we see a complete packet ? */ struct netmap_ring *txring = txkring->ring, *rxring = rxkring->ring; ND("%p: %s %x -> %s", txkring, txkring->name, flags, rxkring->name); @@ -194,6 +195,9 @@ txkring->nr_hwcur, txkring->nr_hwtail, txkring->rcur, txkring->rhead, txkring->rtail); + /* update the hwtail */ + txkring->nr_hwtail = txkring->pipe_tail; + m = txkring->rhead - txkring->nr_hwcur; /* new slots */ if (m < 0) m += txkring->nkr_num_slots; @@ -203,29 +207,29 @@ return 0; } - for (k = txkring->nr_hwcur; m; m--, k = nm_next(k, lim)) { + for (k = txkring->nr_hwcur, nk = lim + 1, complete = 0; m; + m--, k = nm_next(k, lim), nk = (complete ? k : nk)) { struct netmap_slot *rs = &rxring->slot[k]; struct netmap_slot *ts = &txring->slot[k]; - rs->len = ts->len; - rs->ptr = ts->ptr; - + *rs = *ts; if (ts->flags & NS_BUF_CHANGED) { - rs->buf_idx = ts->buf_idx; - rs->flags |= NS_BUF_CHANGED; ts->flags &= ~NS_BUF_CHANGED; } + complete = !(ts->flags & NS_MOREFRAG); } - mb(); /* make sure the slots are updated before publishing them */ - rxkring->nr_hwtail = k; txkring->nr_hwcur = k; ND(20, "TX after : hwcur %d hwtail %d cur %d head %d tail %d k %d", txkring->nr_hwcur, txkring->nr_hwtail, txkring->rcur, txkring->rhead, txkring->rtail, k); - rxkring->nm_notify(rxkring, 0); + if (likely(nk <= lim)) { + mb(); /* make sure the slots are updated before publishing them */ + rxkring->pipe_tail = nk; /* only publish complete packets */ + rxkring->nm_notify(rxkring, 0); + } return 0; } @@ -243,6 +247,9 @@ rxkring->nr_hwcur, rxkring->nr_hwtail, rxkring->rcur, rxkring->rhead, rxkring->rtail); + /* update the hwtail */ + rxkring->nr_hwtail = rxkring->pipe_tail; + m = rxkring->rhead - rxkring->nr_hwcur; /* released slots */ if (m < 0) m += rxkring->nkr_num_slots; @@ -264,7 +271,7 @@ } mb(); /* make sure the slots are updated before publishing them */ - txkring->nr_hwtail = nm_prev(k, lim); + txkring->pipe_tail = nm_prev(k, lim); rxkring->nr_hwcur = k; ND(20, "RX after : hwcur %d hwtail %d cur %d head %d tail %d k %d", @@ -346,14 +353,19 @@ if (error) goto del_krings1; - /* cross link the krings */ + /* cross link the krings and initialize the pipe_tails */ for_rx_tx(t) { enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ for (i = 0; i < nma_get_nrings(na, t); i++) { - NMR(na, t)[i]->pipe = NMR(ona, r)[i]; - NMR(ona, r)[i]->pipe = NMR(na, t)[i]; + struct netmap_kring *k1 = NMR(na, t)[i], + *k2 = NMR(ona, r)[i]; + k1->pipe = k2; + k2->pipe = k1; /* mark all peer-adapter rings as fake */ - NMR(ona, r)[i]->nr_kflags |= NKR_FAKERING; + k2->nr_kflags |= NKR_FAKERING; + /* init tails */ + k1->pipe_tail = k1->nr_hwtail; + k2->pipe_tail = k2->nr_hwtail; } } @@ -436,6 +448,16 @@ if (nm_kring_pending_on(kring)) { struct netmap_kring *sring, *dring; + kring->nr_mode = NKR_NETMAP_ON; + if ((kring->nr_kflags & NKR_FAKERING) && + (kring->pipe->nr_kflags & NKR_FAKERING)) { + /* this is a re-open of a pipe + * end-point kept alive by the other end. + * We need to leave everything as it is + */ + continue; + } + /* copy the buffers from the non-fake ring */ if (kring->nr_kflags & NKR_FAKERING) { sring = kring->pipe; @@ -556,10 +578,10 @@ if (ring == NULL) continue; - if (kring->nr_hwtail == kring->nr_hwcur) - ring->slot[kring->nr_hwtail].buf_idx = 0; + if (kring->tx == NR_RX) + ring->slot[kring->pipe_tail].buf_idx = 0; - for (j = nm_next(kring->nr_hwtail, lim); + for (j = nm_next(kring->pipe_tail, lim); j != kring->nr_hwcur; j = nm_next(j, lim)) { Index: head/sys/dev/netmap/netmap_vale.c =================================================================== --- head/sys/dev/netmap/netmap_vale.c +++ head/sys/dev/netmap/netmap_vale.c @@ -27,37 +27,6 @@ */ -/* - * This module implements the VALE switch for netmap - ---- VALE SWITCH --- - -NMG_LOCK() serializes all modifications to switches and ports. -A switch cannot be deleted until all ports are gone. - -For each switch, an SX lock (RWlock on linux) protects -deletion of ports. When configuring or deleting a new port, the -lock is acquired in exclusive mode (after holding NMG_LOCK). -When forwarding, the lock is acquired in shared mode (without NMG_LOCK). -The lock is held throughout the entire forwarding cycle, -during which the thread may incur in a page fault. -Hence it is important that sleepable shared locks are used. - -On the rx ring, the per-port lock is grabbed initially to reserve -a number of slot in the ring, then the lock is released, -packets are copied from source to destination, and then -the lock is acquired again and the receive ring is updated. -(A similar thing is done on the tx ring for NIC and host stack -ports attached to the switch) - - */ - -/* - * OS-specific code that is used only within this file. - * Other OS-specific code that must be accessed by drivers - * is present in netmap_kern.h - */ - #if defined(__FreeBSD__) #include /* prerequisite */ __FBSDID("$FreeBSD$"); @@ -81,20 +50,9 @@ #include /* bus_dmamap_* */ #include #include +#include -#define BDG_RWLOCK_T struct rwlock // struct rwlock - -#define BDG_RWINIT(b) \ - rw_init_flags(&(b)->bdg_lock, "bdg lock", RW_NOWITNESS) -#define BDG_WLOCK(b) rw_wlock(&(b)->bdg_lock) -#define BDG_WUNLOCK(b) rw_wunlock(&(b)->bdg_lock) -#define BDG_RLOCK(b) rw_rlock(&(b)->bdg_lock) -#define BDG_RTRYLOCK(b) rw_try_rlock(&(b)->bdg_lock) -#define BDG_RUNLOCK(b) rw_runlock(&(b)->bdg_lock) -#define BDG_RWDESTROY(b) rw_destroy(&(b)->bdg_lock) - - #elif defined(linux) #include "bsd_glue.h" @@ -120,6 +78,7 @@ #include #include #include +#include #ifdef WITH_VALE @@ -143,15 +102,11 @@ #define NM_BDG_MAXRINGS 16 /* XXX unclear how many. */ #define NM_BDG_MAXSLOTS 4096 /* XXX same as above */ #define NM_BRIDGE_RINGSIZE 1024 /* in the device */ -#define NM_BDG_HASH 1024 /* forwarding table entries */ #define NM_BDG_BATCH 1024 /* entries in the forwarding buffer */ -#define NM_MULTISEG 64 /* max size of a chain of bufs */ /* actual size of the tables */ -#define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NM_MULTISEG) +#define NM_BDG_BATCH_MAX (NM_BDG_BATCH + NETMAP_MAX_FRAGS) /* NM_FT_NULL terminates a list of slots in the ft */ #define NM_FT_NULL NM_BDG_BATCH_MAX -/* Default size for the Maximum Frame Size. */ -#define NM_BDG_MFS_DEFAULT 1514 /* @@ -168,8 +123,9 @@ static int netmap_vp_create(struct nmreq_header *hdr, struct ifnet *, struct netmap_mem_d *nmd, struct netmap_vp_adapter **); -static int netmap_vp_reg(struct netmap_adapter *na, int onoff); -static int netmap_bwrap_reg(struct netmap_adapter *, int onoff); +static int netmap_vp_bdg_attach(const char *, struct netmap_adapter *, + struct nm_bridge *); +static int netmap_vale_bwrap_attach(const char *, struct netmap_adapter *); /* * For each output interface, nm_bdg_q is used to construct a list. @@ -182,99 +138,17 @@ uint32_t bq_len; /* number of buffers */ }; -/* XXX revise this */ -struct nm_hash_ent { - uint64_t mac; /* the top 2 bytes are the epoch */ - uint64_t ports; -}; - /* Holds the default callbacks */ -static struct netmap_bdg_ops default_bdg_ops = {netmap_bdg_learning, NULL, NULL}; - -/* - * nm_bridge is a descriptor for a VALE switch. - * Interfaces for a bridge are all in bdg_ports[]. - * The array has fixed size, an empty entry does not terminate - * the search, but lookups only occur on attach/detach so we - * don't mind if they are slow. - * - * The bridge is non blocking on the transmit ports: excess - * packets are dropped if there is no room on the output port. - * - * bdg_lock protects accesses to the bdg_ports array. - * This is a rw lock (or equivalent). - */ -#define NM_BDG_IFNAMSIZ IFNAMSIZ -struct nm_bridge { - /* XXX what is the proper alignment/layout ? */ - BDG_RWLOCK_T bdg_lock; /* protects bdg_ports */ - int bdg_namelen; - uint32_t bdg_active_ports; - char bdg_basename[NM_BDG_IFNAMSIZ]; - - /* Indexes of active ports (up to active_ports) - * and all other remaining ports. - */ - uint32_t bdg_port_index[NM_BDG_MAXPORTS]; - /* used by netmap_bdg_detach_common() */ - uint32_t tmp_bdg_port_index[NM_BDG_MAXPORTS]; - - struct netmap_vp_adapter *bdg_ports[NM_BDG_MAXPORTS]; - - /* - * Programmable lookup functions to figure out the destination port. - * It returns either of an index of the destination port, - * NM_BDG_BROADCAST to broadcast this packet, or NM_BDG_NOPORT not to - * forward this packet. ring_nr is the source ring index, and the - * function may overwrite this value to forward this packet to a - * different ring index. - * The function is set by netmap_bdg_regops(). - */ - struct netmap_bdg_ops *bdg_ops; - - /* - * Contains the data structure used by the bdg_ops.lookup function. - * By default points to *ht which is allocated on attach and used by the default lookup - * otherwise will point to the data structure received by netmap_bdg_regops(). - */ - void *private_data; - struct nm_hash_ent *ht; - - /* Currently used to specify if the bridge is still in use while empty and - * if it has been put in exclusive mode by an external module, see netmap_bdg_regops() - * and netmap_bdg_create(). - */ -#define NM_BDG_ACTIVE 1 -#define NM_BDG_EXCLUSIVE 2 - uint8_t bdg_flags; - - -#ifdef CONFIG_NET_NS - struct net *ns; -#endif /* CONFIG_NET_NS */ +struct netmap_bdg_ops vale_bdg_ops = { + .lookup = netmap_bdg_learning, + .config = NULL, + .dtor = NULL, + .vp_create = netmap_vp_create, + .bwrap_attach = netmap_vale_bwrap_attach, + .name = NM_BDG_NAME, }; -const char* -netmap_bdg_name(struct netmap_vp_adapter *vp) -{ - struct nm_bridge *b = vp->na_bdg; - if (b == NULL) - return NULL; - return b->bdg_basename; -} - - -#ifndef CONFIG_NET_NS /* - * XXX in principle nm_bridges could be created dynamically - * Right now we have a static array and deletions are protected - * by an exclusive lock. - */ -static struct nm_bridge *nm_bridges; -#endif /* !CONFIG_NET_NS */ - - -/* * this is a slightly optimized copy routine which rounds * to multiple of 64 bytes and is often faster than dealing * with other odd sizes. We assume there is enough room @@ -304,108 +178,7 @@ } -static int -nm_is_id_char(const char c) -{ - return (c >= 'a' && c <= 'z') || - (c >= 'A' && c <= 'Z') || - (c >= '0' && c <= '9') || - (c == '_'); -} - -/* Validate the name of a VALE bridge port and return the - * position of the ":" character. */ -static int -nm_vale_name_validate(const char *name) -{ - int colon_pos = -1; - int i; - - if (!name || strlen(name) < strlen(NM_BDG_NAME)) { - return -1; - } - - for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) { - if (name[i] == ':') { - colon_pos = i; - break; - } else if (!nm_is_id_char(name[i])) { - return -1; - } - } - - if (strlen(name) - colon_pos > IFNAMSIZ) { - /* interface name too long */ - return -1; - } - - return colon_pos; -} - /* - * locate a bridge among the existing ones. - * MUST BE CALLED WITH NMG_LOCK() - * - * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME. - * We assume that this is called with a name of at least NM_NAME chars. - */ -static struct nm_bridge * -nm_find_bridge(const char *name, int create) -{ - int i, namelen; - struct nm_bridge *b = NULL, *bridges; - u_int num_bridges; - - NMG_LOCK_ASSERT(); - - netmap_bns_getbridges(&bridges, &num_bridges); - - namelen = nm_vale_name_validate(name); - if (namelen < 0) { - D("invalid bridge name %s", name ? name : NULL); - return NULL; - } - - /* lookup the name, remember empty slot if there is one */ - for (i = 0; i < num_bridges; i++) { - struct nm_bridge *x = bridges + i; - - if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) { - if (create && b == NULL) - b = x; /* record empty slot */ - } else if (x->bdg_namelen != namelen) { - continue; - } else if (strncmp(name, x->bdg_basename, namelen) == 0) { - ND("found '%.*s' at %d", namelen, name, i); - b = x; - break; - } - } - if (i == num_bridges && b) { /* name not found, can create entry */ - /* initialize the bridge */ - ND("create new bridge %s with ports %d", b->bdg_basename, - b->bdg_active_ports); - b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH); - if (b->ht == NULL) { - D("failed to allocate hash table"); - return NULL; - } - strncpy(b->bdg_basename, name, namelen); - b->bdg_namelen = namelen; - b->bdg_active_ports = 0; - for (i = 0; i < NM_BDG_MAXPORTS; i++) - b->bdg_port_index[i] = i; - /* set the default function */ - b->bdg_ops = &default_bdg_ops; - b->private_data = b->ht; - b->bdg_flags = 0; - NM_BNS_GET(b); - } - return b; -} - - -/* * Free the forwarding tables for rings attached to switch ports. */ static void @@ -464,99 +237,6 @@ return 0; } -static int -netmap_bdg_free(struct nm_bridge *b) -{ - if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) { - return EBUSY; - } - - ND("marking bridge %s as free", b->bdg_basename); - nm_os_free(b->ht); - b->bdg_ops = NULL; - b->bdg_flags = 0; - NM_BNS_PUT(b); - return 0; -} - - -/* remove from bridge b the ports in slots hw and sw - * (sw can be -1 if not needed) - */ -static void -netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) -{ - int s_hw = hw, s_sw = sw; - int i, lim =b->bdg_active_ports; - uint32_t *tmp = b->tmp_bdg_port_index; - - /* - New algorithm: - make a copy of bdg_port_index; - lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port - in the array of bdg_port_index, replacing them with - entries from the bottom of the array; - decrement bdg_active_ports; - acquire BDG_WLOCK() and copy back the array. - */ - - if (netmap_verbose) - D("detach %d and %d (lim %d)", hw, sw, lim); - /* make a copy of the list of active ports, update it, - * and then copy back within BDG_WLOCK(). - */ - memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index)); - for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { - if (hw >= 0 && tmp[i] == hw) { - ND("detach hw %d at %d", hw, i); - lim--; /* point to last active port */ - tmp[i] = tmp[lim]; /* swap with i */ - tmp[lim] = hw; /* now this is inactive */ - hw = -1; - } else if (sw >= 0 && tmp[i] == sw) { - ND("detach sw %d at %d", sw, i); - lim--; - tmp[i] = tmp[lim]; - tmp[lim] = sw; - sw = -1; - } else { - i++; - } - } - if (hw >= 0 || sw >= 0) { - D("XXX delete failed hw %d sw %d, should panic...", hw, sw); - } - - BDG_WLOCK(b); - if (b->bdg_ops->dtor) - b->bdg_ops->dtor(b->bdg_ports[s_hw]); - b->bdg_ports[s_hw] = NULL; - if (s_sw >= 0) { - b->bdg_ports[s_sw] = NULL; - } - memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index)); - b->bdg_active_ports = lim; - BDG_WUNLOCK(b); - - ND("now %d active ports", lim); - netmap_bdg_free(b); -} - -static inline void * -nm_bdg_get_auth_token(struct nm_bridge *b) -{ - return b->ht; -} - -/* bridge not in exclusive mode ==> always valid - * bridge in exclusive mode (created through netmap_bdg_create()) ==> check authentication token - */ -static inline int -nm_bdg_valid_auth_token(struct nm_bridge *b, void *auth_token) -{ - return !(b->bdg_flags & NM_BDG_EXCLUSIVE) || b->ht == auth_token; -} - /* Allows external modules to create bridges in exclusive mode, * returns an authentication token that the external module will need * to provide during nm_bdg_ctl_{attach, detach}(), netmap_bdg_regops(), @@ -564,19 +244,19 @@ * Successfully executed if ret != NULL and *return_status == 0. */ void * -netmap_bdg_create(const char *bdg_name, int *return_status) +netmap_vale_create(const char *bdg_name, int *return_status) { struct nm_bridge *b = NULL; void *ret = NULL; NMG_LOCK(); - b = nm_find_bridge(bdg_name, 0 /* don't create */); + b = nm_find_bridge(bdg_name, 0 /* don't create */, NULL); if (b) { *return_status = EEXIST; goto unlock_bdg_create; } - b = nm_find_bridge(bdg_name, 1 /* create */); + b = nm_find_bridge(bdg_name, 1 /* create */, &vale_bdg_ops); if (!b) { *return_status = ENOMEM; goto unlock_bdg_create; @@ -595,13 +275,13 @@ * netmap_bdg_create(), the bridge must be empty. */ int -netmap_bdg_destroy(const char *bdg_name, void *auth_token) +netmap_vale_destroy(const char *bdg_name, void *auth_token) { struct nm_bridge *b = NULL; int ret = 0; NMG_LOCK(); - b = nm_find_bridge(bdg_name, 0 /* don't create */); + b = nm_find_bridge(bdg_name, 0 /* don't create */, NULL); if (!b) { ret = ENXIO; goto unlock_bdg_free; @@ -629,27 +309,6 @@ -/* nm_bdg_ctl callback for VALE ports */ -static int -netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na) -{ - struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; - struct nm_bridge *b = vpna->na_bdg; - - if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { - return 0; /* nothing to do */ - } - if (b) { - netmap_set_all_rings(na, 0 /* disable */); - netmap_bdg_detach_common(b, vpna->bdg_port, -1); - vpna->na_bdg = NULL; - netmap_set_all_rings(na, 1 /* enable */); - } - /* I have took reference just for attach */ - netmap_adapter_put(na); - return 0; -} - /* nm_dtor callback for ephemeral VALE ports */ static void netmap_vp_dtor(struct netmap_adapter *na) @@ -664,7 +323,7 @@ } if (na->ifp != NULL && !nm_iszombie(na)) { - WNA(na->ifp) = NULL; + NM_DETACH_NA(na->ifp); if (vpna->autodelete) { ND("releasing %s", na->ifp->if_xname); NMG_UNLOCK(); @@ -674,897 +333,8 @@ } } -/* creates a persistent VALE port */ -int -nm_vi_create(struct nmreq_header *hdr) -{ - struct nmreq_vale_newif *req = - (struct nmreq_vale_newif *)(uintptr_t)hdr->nr_body; - int error = 0; - /* Build a nmreq_register out of the nmreq_vale_newif, - * so that we can call netmap_get_bdg_na(). */ - struct nmreq_register regreq; - bzero(®req, sizeof(regreq)); - regreq.nr_tx_slots = req->nr_tx_slots; - regreq.nr_rx_slots = req->nr_rx_slots; - regreq.nr_tx_rings = req->nr_tx_rings; - regreq.nr_rx_rings = req->nr_rx_rings; - regreq.nr_mem_id = req->nr_mem_id; - hdr->nr_reqtype = NETMAP_REQ_REGISTER; - hdr->nr_body = (uintptr_t)®req; - error = netmap_vi_create(hdr, 0 /* no autodelete */); - hdr->nr_reqtype = NETMAP_REQ_VALE_NEWIF; - hdr->nr_body = (uintptr_t)req; - /* Write back to the original struct. */ - req->nr_tx_slots = regreq.nr_tx_slots; - req->nr_rx_slots = regreq.nr_rx_slots; - req->nr_tx_rings = regreq.nr_tx_rings; - req->nr_rx_rings = regreq.nr_rx_rings; - req->nr_mem_id = regreq.nr_mem_id; - return error; -} -/* remove a persistent VALE port from the system */ -int -nm_vi_destroy(const char *name) -{ - struct ifnet *ifp; - struct netmap_vp_adapter *vpna; - int error; - - ifp = ifunit_ref(name); - if (!ifp) - return ENXIO; - NMG_LOCK(); - /* make sure this is actually a VALE port */ - if (!NM_NA_VALID(ifp) || NA(ifp)->nm_register != netmap_vp_reg) { - error = EINVAL; - goto err; - } - - vpna = (struct netmap_vp_adapter *)NA(ifp); - - /* we can only destroy ports that were created via NETMAP_BDG_NEWIF */ - if (vpna->autodelete) { - error = EINVAL; - goto err; - } - - /* also make sure that nobody is using the inferface */ - if (NETMAP_OWNED_BY_ANY(&vpna->up) || - vpna->up.na_refcount > 1 /* any ref besides the one in nm_vi_create()? */) { - error = EBUSY; - goto err; - } - - NMG_UNLOCK(); - - D("destroying a persistent vale interface %s", ifp->if_xname); - /* Linux requires all the references are released - * before unregister - */ - netmap_detach(ifp); - if_rele(ifp); - nm_os_vi_detach(ifp); - return 0; - -err: - NMG_UNLOCK(); - if_rele(ifp); - return error; -} - -static int -nm_update_info(struct nmreq_register *req, struct netmap_adapter *na) -{ - req->nr_rx_rings = na->num_rx_rings; - req->nr_tx_rings = na->num_tx_rings; - req->nr_rx_slots = na->num_rx_desc; - req->nr_tx_slots = na->num_tx_desc; - return netmap_mem_get_info(na->nm_mem, &req->nr_memsize, NULL, - &req->nr_mem_id); -} - -/* - * Create a virtual interface registered to the system. - * The interface will be attached to a bridge later. - */ -int -netmap_vi_create(struct nmreq_header *hdr, int autodelete) -{ - struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body; - struct ifnet *ifp; - struct netmap_vp_adapter *vpna; - struct netmap_mem_d *nmd = NULL; - int error; - - if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) { - return EINVAL; - } - - /* don't include VALE prefix */ - if (!strncmp(hdr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME))) - return EINVAL; - if (strlen(hdr->nr_name) >= IFNAMSIZ) { - return EINVAL; - } - ifp = ifunit_ref(hdr->nr_name); - if (ifp) { /* already exist, cannot create new one */ - error = EEXIST; - NMG_LOCK(); - if (NM_NA_VALID(ifp)) { - int update_err = nm_update_info(req, NA(ifp)); - if (update_err) - error = update_err; - } - NMG_UNLOCK(); - if_rele(ifp); - return error; - } - error = nm_os_vi_persist(hdr->nr_name, &ifp); - if (error) - return error; - - NMG_LOCK(); - if (req->nr_mem_id) { - nmd = netmap_mem_find(req->nr_mem_id); - if (nmd == NULL) { - error = EINVAL; - goto err_1; - } - } - /* netmap_vp_create creates a struct netmap_vp_adapter */ - error = netmap_vp_create(hdr, ifp, nmd, &vpna); - if (error) { - D("error %d", error); - goto err_1; - } - /* persist-specific routines */ - vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl; - if (!autodelete) { - netmap_adapter_get(&vpna->up); - } else { - vpna->autodelete = 1; - } - NM_ATTACH_NA(ifp, &vpna->up); - /* return the updated info */ - error = nm_update_info(req, &vpna->up); - if (error) { - goto err_2; - } - ND("returning nr_mem_id %d", req->nr_mem_id); - if (nmd) - netmap_mem_put(nmd); - NMG_UNLOCK(); - ND("created %s", ifp->if_xname); - return 0; - -err_2: - netmap_detach(ifp); -err_1: - if (nmd) - netmap_mem_put(nmd); - NMG_UNLOCK(); - nm_os_vi_detach(ifp); - - return error; -} - -/* Try to get a reference to a netmap adapter attached to a VALE switch. - * If the adapter is found (or is created), this function returns 0, a - * non NULL pointer is returned into *na, and the caller holds a - * reference to the adapter. - * If an adapter is not found, then no reference is grabbed and the - * function returns an error code, or 0 if there is just a VALE prefix - * mismatch. Therefore the caller holds a reference when - * (*na != NULL && return == 0). - */ -int -netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na, - struct netmap_mem_d *nmd, int create) -{ - char *nr_name = hdr->nr_name; - const char *ifname; - struct ifnet *ifp = NULL; - int error = 0; - struct netmap_vp_adapter *vpna, *hostna = NULL; - struct nm_bridge *b; - uint32_t i, j; - uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT; - int needed; - - *na = NULL; /* default return value */ - - /* first try to see if this is a bridge port. */ - NMG_LOCK_ASSERT(); - if (strncmp(nr_name, NM_BDG_NAME, sizeof(NM_BDG_NAME) - 1)) { - return 0; /* no error, but no VALE prefix */ - } - - b = nm_find_bridge(nr_name, create); - if (b == NULL) { - ND("no bridges available for '%s'", nr_name); - return (create ? ENOMEM : ENXIO); - } - if (strlen(nr_name) < b->bdg_namelen) /* impossible */ - panic("x"); - - /* Now we are sure that name starts with the bridge's name, - * lookup the port in the bridge. We need to scan the entire - * list. It is not important to hold a WLOCK on the bridge - * during the search because NMG_LOCK already guarantees - * that there are no other possible writers. - */ - - /* lookup in the local list of ports */ - for (j = 0; j < b->bdg_active_ports; j++) { - i = b->bdg_port_index[j]; - vpna = b->bdg_ports[i]; - ND("checking %s", vpna->up.name); - if (!strcmp(vpna->up.name, nr_name)) { - netmap_adapter_get(&vpna->up); - ND("found existing if %s refs %d", nr_name) - *na = &vpna->up; - return 0; - } - } - /* not found, should we create it? */ - if (!create) - return ENXIO; - /* yes we should, see if we have space to attach entries */ - needed = 2; /* in some cases we only need 1 */ - if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { - D("bridge full %d, cannot create new port", b->bdg_active_ports); - return ENOMEM; - } - /* record the next two ports available, but do not allocate yet */ - cand = b->bdg_port_index[b->bdg_active_ports]; - cand2 = b->bdg_port_index[b->bdg_active_ports + 1]; - ND("+++ bridge %s port %s used %d avail %d %d", - b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2); - - /* - * try see if there is a matching NIC with this name - * (after the bridge's name) - */ - ifname = nr_name + b->bdg_namelen + 1; - ifp = ifunit_ref(ifname); - if (!ifp) { - /* Create an ephemeral virtual port. - * This block contains all the ephemeral-specific logic. - */ - - if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) { - error = EINVAL; - goto out; - } - - /* bdg_netmap_attach creates a struct netmap_adapter */ - error = netmap_vp_create(hdr, NULL, nmd, &vpna); - if (error) { - D("error %d", error); - goto out; - } - /* shortcut - we can skip get_hw_na(), - * ownership check and nm_bdg_attach() - */ - - } else { - struct netmap_adapter *hw; - - /* the vale:nic syntax is only valid for some commands */ - switch (hdr->nr_reqtype) { - case NETMAP_REQ_VALE_ATTACH: - case NETMAP_REQ_VALE_DETACH: - case NETMAP_REQ_VALE_POLLING_ENABLE: - case NETMAP_REQ_VALE_POLLING_DISABLE: - break; /* ok */ - default: - error = EINVAL; - goto out; - } - - error = netmap_get_hw_na(ifp, nmd, &hw); - if (error || hw == NULL) - goto out; - - /* host adapter might not be created */ - error = hw->nm_bdg_attach(nr_name, hw); - if (error) - goto out; - vpna = hw->na_vp; - hostna = hw->na_hostvp; - if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { - /* Check if we need to skip the host rings. */ - struct nmreq_vale_attach *areq = - (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; - if (areq->reg.nr_mode != NR_REG_NIC_SW) { - hostna = NULL; - } - } - } - - BDG_WLOCK(b); - vpna->bdg_port = cand; - ND("NIC %p to bridge port %d", vpna, cand); - /* bind the port to the bridge (virtual ports are not active) */ - b->bdg_ports[cand] = vpna; - vpna->na_bdg = b; - b->bdg_active_ports++; - if (hostna != NULL) { - /* also bind the host stack to the bridge */ - b->bdg_ports[cand2] = hostna; - hostna->bdg_port = cand2; - hostna->na_bdg = b; - b->bdg_active_ports++; - ND("host %p to bridge port %d", hostna, cand2); - } - ND("if %s refs %d", ifname, vpna->up.na_refcount); - BDG_WUNLOCK(b); - *na = &vpna->up; - netmap_adapter_get(*na); - -out: - if (ifp) - if_rele(ifp); - - return error; -} - -/* Process NETMAP_REQ_VALE_ATTACH. - */ -int -nm_bdg_ctl_attach(struct nmreq_header *hdr, void *auth_token) -{ - struct nmreq_vale_attach *req = - (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; - struct netmap_vp_adapter * vpna; - struct netmap_adapter *na; - struct netmap_mem_d *nmd = NULL; - struct nm_bridge *b = NULL; - int error; - - NMG_LOCK(); - /* permission check for modified bridges */ - b = nm_find_bridge(hdr->nr_name, 0 /* don't create */); - if (b && !nm_bdg_valid_auth_token(b, auth_token)) { - error = EACCES; - goto unlock_exit; - } - - if (req->reg.nr_mem_id) { - nmd = netmap_mem_find(req->reg.nr_mem_id); - if (nmd == NULL) { - error = EINVAL; - goto unlock_exit; - } - } - - /* check for existing one */ - error = netmap_get_bdg_na(hdr, &na, nmd, 0); - if (!error) { - error = EBUSY; - goto unref_exit; - } - error = netmap_get_bdg_na(hdr, &na, - nmd, 1 /* create if not exists */); - if (error) { /* no device */ - goto unlock_exit; - } - - if (na == NULL) { /* VALE prefix missing */ - error = EINVAL; - goto unlock_exit; - } - - if (NETMAP_OWNED_BY_ANY(na)) { - error = EBUSY; - goto unref_exit; - } - - if (na->nm_bdg_ctl) { - /* nop for VALE ports. The bwrap needs to put the hwna - * in netmap mode (see netmap_bwrap_bdg_ctl) - */ - error = na->nm_bdg_ctl(hdr, na); - if (error) - goto unref_exit; - ND("registered %s to netmap-mode", na->name); - } - vpna = (struct netmap_vp_adapter *)na; - req->port_index = vpna->bdg_port; - NMG_UNLOCK(); - return 0; - -unref_exit: - netmap_adapter_put(na); -unlock_exit: - NMG_UNLOCK(); - return error; -} - -static inline int -nm_is_bwrap(struct netmap_adapter *na) -{ - return na->nm_register == netmap_bwrap_reg; -} - -/* Process NETMAP_REQ_VALE_DETACH. - */ -int -nm_bdg_ctl_detach(struct nmreq_header *hdr, void *auth_token) -{ - struct nmreq_vale_detach *nmreq_det = (void *)(uintptr_t)hdr->nr_body; - struct netmap_vp_adapter *vpna; - struct netmap_adapter *na; - struct nm_bridge *b = NULL; - int error; - - NMG_LOCK(); - /* permission check for modified bridges */ - b = nm_find_bridge(hdr->nr_name, 0 /* don't create */); - if (b && !nm_bdg_valid_auth_token(b, auth_token)) { - error = EACCES; - goto unlock_exit; - } - - error = netmap_get_bdg_na(hdr, &na, NULL, 0 /* don't create */); - if (error) { /* no device, or another bridge or user owns the device */ - goto unlock_exit; - } - - if (na == NULL) { /* VALE prefix missing */ - error = EINVAL; - goto unlock_exit; - } else if (nm_is_bwrap(na) && - ((struct netmap_bwrap_adapter *)na)->na_polling_state) { - /* Don't detach a NIC with polling */ - error = EBUSY; - goto unref_exit; - } - - vpna = (struct netmap_vp_adapter *)na; - if (na->na_vp != vpna) { - /* trying to detach first attach of VALE persistent port attached - * to 2 bridges - */ - error = EBUSY; - goto unref_exit; - } - nmreq_det->port_index = vpna->bdg_port; - - if (na->nm_bdg_ctl) { - /* remove the port from bridge. The bwrap - * also needs to put the hwna in normal mode - */ - error = na->nm_bdg_ctl(hdr, na); - } - -unref_exit: - netmap_adapter_put(na); -unlock_exit: - NMG_UNLOCK(); - return error; - -} - -struct nm_bdg_polling_state; -struct -nm_bdg_kthread { - struct nm_kctx *nmk; - u_int qfirst; - u_int qlast; - struct nm_bdg_polling_state *bps; -}; - -struct nm_bdg_polling_state { - bool configured; - bool stopped; - struct netmap_bwrap_adapter *bna; - uint32_t mode; - u_int qfirst; - u_int qlast; - u_int cpu_from; - u_int ncpus; - struct nm_bdg_kthread *kthreads; -}; - -static void -netmap_bwrap_polling(void *data, int is_kthread) -{ - struct nm_bdg_kthread *nbk = data; - struct netmap_bwrap_adapter *bna; - u_int qfirst, qlast, i; - struct netmap_kring **kring0, *kring; - - if (!nbk) - return; - qfirst = nbk->qfirst; - qlast = nbk->qlast; - bna = nbk->bps->bna; - kring0 = NMR(bna->hwna, NR_RX); - - for (i = qfirst; i < qlast; i++) { - kring = kring0[i]; - kring->nm_notify(kring, 0); - } -} - -static int -nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps) -{ - struct nm_kctx_cfg kcfg; - int i, j; - - bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus); - if (bps->kthreads == NULL) - return ENOMEM; - - bzero(&kcfg, sizeof(kcfg)); - kcfg.worker_fn = netmap_bwrap_polling; - kcfg.use_kthread = 1; - for (i = 0; i < bps->ncpus; i++) { - struct nm_bdg_kthread *t = bps->kthreads + i; - int all = (bps->ncpus == 1 && - bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU); - int affinity = bps->cpu_from + i; - - t->bps = bps; - t->qfirst = all ? bps->qfirst /* must be 0 */: affinity; - t->qlast = all ? bps->qlast : t->qfirst + 1; - D("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst, - t->qlast); - - kcfg.type = i; - kcfg.worker_private = t; - t->nmk = nm_os_kctx_create(&kcfg, NULL); - if (t->nmk == NULL) { - goto cleanup; - } - nm_os_kctx_worker_setaff(t->nmk, affinity); - } - return 0; - -cleanup: - for (j = 0; j < i; j++) { - struct nm_bdg_kthread *t = bps->kthreads + i; - nm_os_kctx_destroy(t->nmk); - } - nm_os_free(bps->kthreads); - return EFAULT; -} - -/* A variant of ptnetmap_start_kthreads() */ -static int -nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps) -{ - int error, i, j; - - if (!bps) { - D("polling is not configured"); - return EFAULT; - } - bps->stopped = false; - - for (i = 0; i < bps->ncpus; i++) { - struct nm_bdg_kthread *t = bps->kthreads + i; - error = nm_os_kctx_worker_start(t->nmk); - if (error) { - D("error in nm_kthread_start()"); - goto cleanup; - } - } - return 0; - -cleanup: - for (j = 0; j < i; j++) { - struct nm_bdg_kthread *t = bps->kthreads + i; - nm_os_kctx_worker_stop(t->nmk); - } - bps->stopped = true; - return error; -} - -static void -nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps) -{ - int i; - - if (!bps) - return; - - for (i = 0; i < bps->ncpus; i++) { - struct nm_bdg_kthread *t = bps->kthreads + i; - nm_os_kctx_worker_stop(t->nmk); - nm_os_kctx_destroy(t->nmk); - } - bps->stopped = true; -} - -static int -get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na, - struct nm_bdg_polling_state *bps) -{ - unsigned int avail_cpus, core_from; - unsigned int qfirst, qlast; - uint32_t i = req->nr_first_cpu_id; - uint32_t req_cpus = req->nr_num_polling_cpus; - - avail_cpus = nm_os_ncpus(); - - if (req_cpus == 0) { - D("req_cpus must be > 0"); - return EINVAL; - } else if (req_cpus >= avail_cpus) { - D("Cannot use all the CPUs in the system"); - return EINVAL; - } - - if (req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU) { - /* Use a separate core for each ring. If nr_num_polling_cpus>1 - * more consecutive rings are polled. - * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2, - * ring 2 and 3 are polled by core 2 and 3, respectively. */ - if (i + req_cpus > nma_get_nrings(na, NR_RX)) { - D("Rings %u-%u not in range (have %d rings)", - i, i + req_cpus, nma_get_nrings(na, NR_RX)); - return EINVAL; - } - qfirst = i; - qlast = qfirst + req_cpus; - core_from = qfirst; - - } else if (req->nr_mode == NETMAP_POLLING_MODE_SINGLE_CPU) { - /* Poll all the rings using a core specified by nr_first_cpu_id. - * the number of cores must be 1. */ - if (req_cpus != 1) { - D("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU " - "(was %d)", req_cpus); - return EINVAL; - } - qfirst = 0; - qlast = nma_get_nrings(na, NR_RX); - core_from = i; - } else { - D("Invalid polling mode"); - return EINVAL; - } - - bps->mode = req->nr_mode; - bps->qfirst = qfirst; - bps->qlast = qlast; - bps->cpu_from = core_from; - bps->ncpus = req_cpus; - D("%s qfirst %u qlast %u cpu_from %u ncpus %u", - req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ? - "MULTI" : "SINGLE", - qfirst, qlast, core_from, req_cpus); - return 0; -} - -static int -nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *na) -{ - struct nm_bdg_polling_state *bps; - struct netmap_bwrap_adapter *bna; - int error; - - bna = (struct netmap_bwrap_adapter *)na; - if (bna->na_polling_state) { - D("ERROR adapter already in polling mode"); - return EFAULT; - } - - bps = nm_os_malloc(sizeof(*bps)); - if (!bps) - return ENOMEM; - bps->configured = false; - bps->stopped = true; - - if (get_polling_cfg(req, na, bps)) { - nm_os_free(bps); - return EINVAL; - } - - if (nm_bdg_create_kthreads(bps)) { - nm_os_free(bps); - return EFAULT; - } - - bps->configured = true; - bna->na_polling_state = bps; - bps->bna = bna; - - /* disable interrupts if possible */ - nma_intr_enable(bna->hwna, 0); - /* start kthread now */ - error = nm_bdg_polling_start_kthreads(bps); - if (error) { - D("ERROR nm_bdg_polling_start_kthread()"); - nm_os_free(bps->kthreads); - nm_os_free(bps); - bna->na_polling_state = NULL; - nma_intr_enable(bna->hwna, 1); - } - return error; -} - -static int -nm_bdg_ctl_polling_stop(struct netmap_adapter *na) -{ - struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; - struct nm_bdg_polling_state *bps; - - if (!bna->na_polling_state) { - D("ERROR adapter is not in polling mode"); - return EFAULT; - } - bps = bna->na_polling_state; - nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state); - bps->configured = false; - nm_os_free(bps); - bna->na_polling_state = NULL; - /* reenable interrupts */ - nma_intr_enable(bna->hwna, 1); - return 0; -} - -int -nm_bdg_polling(struct nmreq_header *hdr) -{ - struct nmreq_vale_polling *req = - (struct nmreq_vale_polling *)(uintptr_t)hdr->nr_body; - struct netmap_adapter *na = NULL; - int error = 0; - - NMG_LOCK(); - error = netmap_get_bdg_na(hdr, &na, NULL, /*create=*/0); - if (na && !error) { - if (!nm_is_bwrap(na)) { - error = EOPNOTSUPP; - } else if (hdr->nr_reqtype == NETMAP_BDG_POLLING_ON) { - error = nm_bdg_ctl_polling_start(req, na); - if (!error) - netmap_adapter_get(na); - } else { - error = nm_bdg_ctl_polling_stop(na); - if (!error) - netmap_adapter_put(na); - } - netmap_adapter_put(na); - } else if (!na && !error) { - /* Not VALE port. */ - error = EINVAL; - } - NMG_UNLOCK(); - - return error; -} - -/* Process NETMAP_REQ_VALE_LIST. */ -int -netmap_bdg_list(struct nmreq_header *hdr) -{ - struct nmreq_vale_list *req = - (struct nmreq_vale_list *)(uintptr_t)hdr->nr_body; - int namelen = strlen(hdr->nr_name); - struct nm_bridge *b, *bridges; - struct netmap_vp_adapter *vpna; - int error = 0, i, j; - u_int num_bridges; - - netmap_bns_getbridges(&bridges, &num_bridges); - - /* this is used to enumerate bridges and ports */ - if (namelen) { /* look up indexes of bridge and port */ - if (strncmp(hdr->nr_name, NM_BDG_NAME, - strlen(NM_BDG_NAME))) { - return EINVAL; - } - NMG_LOCK(); - b = nm_find_bridge(hdr->nr_name, 0 /* don't create */); - if (!b) { - NMG_UNLOCK(); - return ENOENT; - } - - req->nr_bridge_idx = b - bridges; /* bridge index */ - req->nr_port_idx = NM_BDG_NOPORT; - for (j = 0; j < b->bdg_active_ports; j++) { - i = b->bdg_port_index[j]; - vpna = b->bdg_ports[i]; - if (vpna == NULL) { - D("This should not happen"); - continue; - } - /* the former and the latter identify a - * virtual port and a NIC, respectively - */ - if (!strcmp(vpna->up.name, hdr->nr_name)) { - req->nr_port_idx = i; /* port index */ - break; - } - } - NMG_UNLOCK(); - } else { - /* return the first non-empty entry starting from - * bridge nr_arg1 and port nr_arg2. - * - * Users can detect the end of the same bridge by - * seeing the new and old value of nr_arg1, and can - * detect the end of all the bridge by error != 0 - */ - i = req->nr_bridge_idx; - j = req->nr_port_idx; - - NMG_LOCK(); - for (error = ENOENT; i < NM_BRIDGES; i++) { - b = bridges + i; - for ( ; j < NM_BDG_MAXPORTS; j++) { - if (b->bdg_ports[j] == NULL) - continue; - vpna = b->bdg_ports[j]; - /* write back the VALE switch name */ - strncpy(hdr->nr_name, vpna->up.name, - (size_t)IFNAMSIZ); - error = 0; - goto out; - } - j = 0; /* following bridges scan from 0 */ - } - out: - req->nr_bridge_idx = i; - req->nr_port_idx = j; - NMG_UNLOCK(); - } - - return error; -} - /* Called by external kernel modules (e.g., Openvswitch). - * to set configure/lookup/dtor functions of a VALE instance. - * Register callbacks to the given bridge. 'name' may be just - * bridge's name (including ':' if it is not just NM_BDG_NAME). - * - * Called without NMG_LOCK. - */ - -int -netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token) -{ - struct nm_bridge *b; - int error = 0; - - NMG_LOCK(); - b = nm_find_bridge(name, 0 /* don't create */); - if (!b) { - error = ENXIO; - goto unlock_regops; - } - if (!nm_bdg_valid_auth_token(b, auth_token)) { - error = EACCES; - goto unlock_regops; - } - - BDG_WLOCK(b); - if (!bdg_ops) { - /* resetting the bridge */ - bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); - b->bdg_ops = &default_bdg_ops; - b->private_data = b->ht; - } else { - /* modifying the bridge */ - b->private_data = private_data; - b->bdg_ops = bdg_ops; - } - BDG_WUNLOCK(b); - -unlock_regops: - NMG_UNLOCK(); - return error; -} - -/* Called by external kernel modules (e.g., Openvswitch). * to modify the private data previously given to regops(). * 'name' may be just bridge's name (including ':' if it * is not just NM_BDG_NAME). @@ -1579,7 +349,7 @@ int error = 0; NMG_LOCK(); - b = nm_find_bridge(name, 0 /* don't create */); + b = nm_find_bridge(name, 0 /* don't create */, NULL); if (!b) { error = EINVAL; goto unlock_update_priv; @@ -1598,28 +368,7 @@ return error; } -int -netmap_bdg_config(struct nm_ifreq *nr) -{ - struct nm_bridge *b; - int error = EINVAL; - NMG_LOCK(); - b = nm_find_bridge(nr->nifr_name, 0); - if (!b) { - NMG_UNLOCK(); - return error; - } - NMG_UNLOCK(); - /* Don't call config() with NMG_LOCK() held */ - BDG_RLOCK(b); - if (b->bdg_ops->config != NULL) - error = b->bdg_ops->config(nr); - BDG_RUNLOCK(b); - return error; -} - - /* nm_krings_create callback for VALE ports. * Calls the standard netmap_krings_create, then adds leases on rx * rings and bdgfwd on tx rings. @@ -1798,52 +547,6 @@ #undef mix -/* nm_register callback for VALE ports */ -static int -netmap_vp_reg(struct netmap_adapter *na, int onoff) -{ - struct netmap_vp_adapter *vpna = - (struct netmap_vp_adapter*)na; - enum txrx t; - int i; - - /* persistent ports may be put in netmap mode - * before being attached to a bridge - */ - if (vpna->na_bdg) - BDG_WLOCK(vpna->na_bdg); - if (onoff) { - for_rx_tx(t) { - for (i = 0; i < netmap_real_rings(na, t); i++) { - struct netmap_kring *kring = NMR(na, t)[i]; - - if (nm_kring_pending_on(kring)) - kring->nr_mode = NKR_NETMAP_ON; - } - } - if (na->active_fds == 0) - na->na_flags |= NAF_NETMAP_ON; - /* XXX on FreeBSD, persistent VALE ports should also - * toggle IFCAP_NETMAP in na->ifp (2014-03-16) - */ - } else { - if (na->active_fds == 0) - na->na_flags &= ~NAF_NETMAP_ON; - for_rx_tx(t) { - for (i = 0; i < netmap_real_rings(na, t); i++) { - struct netmap_kring *kring = NMR(na, t)[i]; - - if (nm_kring_pending_off(kring)) - kring->nr_mode = NKR_NETMAP_OFF; - } - } - } - if (vpna->na_bdg) - BDG_WUNLOCK(vpna->na_bdg); - return 0; -} - - /* * Lookup function for a learning bridge. * Update the hash table with the source address, @@ -2361,86 +1064,6 @@ } -/* rxsync code used by VALE ports nm_rxsync callback and also - * internally by the brwap - */ -static int -netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags) -{ - struct netmap_adapter *na = kring->na; - struct netmap_ring *ring = kring->ring; - u_int nm_i, lim = kring->nkr_num_slots - 1; - u_int head = kring->rhead; - int n; - - if (head > lim) { - D("ouch dangerous reset!!!"); - n = netmap_ring_reinit(kring); - goto done; - } - - /* First part, import newly received packets. */ - /* actually nothing to do here, they are already in the kring */ - - /* Second part, skip past packets that userspace has released. */ - nm_i = kring->nr_hwcur; - if (nm_i != head) { - /* consistency check, but nothing really important here */ - for (n = 0; likely(nm_i != head); n++) { - struct netmap_slot *slot = &ring->slot[nm_i]; - void *addr = NMB(na, slot); - - if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */ - D("bad buffer index %d, ignore ?", - slot->buf_idx); - } - slot->flags &= ~NS_BUF_CHANGED; - nm_i = nm_next(nm_i, lim); - } - kring->nr_hwcur = head; - } - - n = 0; -done: - return n; -} - -/* - * nm_rxsync callback for VALE ports - * user process reading from a VALE switch. - * Already protected against concurrent calls from userspace, - * but we must acquire the queue's lock to protect against - * writers on the same queue. - */ -static int -netmap_vp_rxsync(struct netmap_kring *kring, int flags) -{ - int n; - - mtx_lock(&kring->q_lock); - n = netmap_vp_rxsync_locked(kring, flags); - mtx_unlock(&kring->q_lock); - return n; -} - - -/* nm_bdg_attach callback for VALE ports - * The na_vp port is this same netmap_adapter. There is no host port. - */ -static int -netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na) -{ - struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; - - if (vpna->na_bdg) { - return netmap_bwrap_attach(name, na); - } - na->na_vp = vpna; - strncpy(na->name, name, sizeof(na->name)); - na->na_hostvp = NULL; - return 0; -} - /* create a netmap_vp_adapter that describes a VALE port. * Only persistent VALE ports have a non-null ifp. */ @@ -2536,635 +1159,270 @@ return error; } -/* Bridge wrapper code (bwrap). - * This is used to connect a non-VALE-port netmap_adapter (hwna) to a - * VALE switch. - * The main task is to swap the meaning of tx and rx rings to match the - * expectations of the VALE switch code (see nm_bdg_flush). - * - * The bwrap works by interposing a netmap_bwrap_adapter between the - * rest of the system and the hwna. The netmap_bwrap_adapter looks like - * a netmap_vp_adapter to the rest the system, but, internally, it - * translates all callbacks to what the hwna expects. - * - * Note that we have to intercept callbacks coming from two sides: - * - * - callbacks coming from the netmap module are intercepted by - * passing around the netmap_bwrap_adapter instead of the hwna - * - * - callbacks coming from outside of the netmap module only know - * about the hwna. This, however, only happens in interrupt - * handlers, where only the hwna->nm_notify callback is called. - * What the bwrap does is to overwrite the hwna->nm_notify callback - * with its own netmap_bwrap_intr_notify. - * XXX This assumes that the hwna->nm_notify callback was the - * standard netmap_notify(), as it is the case for nic adapters. - * Any additional action performed by hwna->nm_notify will not be - * performed by netmap_bwrap_intr_notify. - * - * Additionally, the bwrap can optionally attach the host rings pair - * of the wrapped adapter to a different port of the switch. +/* nm_bdg_attach callback for VALE ports + * The na_vp port is this same netmap_adapter. There is no host port. */ - - -static void -netmap_bwrap_dtor(struct netmap_adapter *na) -{ - struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; - struct netmap_adapter *hwna = bna->hwna; - struct nm_bridge *b = bna->up.na_bdg, - *bh = bna->host.na_bdg; - - if (bna->host.up.nm_mem) - netmap_mem_put(bna->host.up.nm_mem); - - if (b) { - netmap_bdg_detach_common(b, bna->up.bdg_port, - (bh ? bna->host.bdg_port : -1)); - } - - ND("na %p", na); - na->ifp = NULL; - bna->host.up.ifp = NULL; - hwna->na_vp = bna->saved_na_vp; - hwna->na_hostvp = NULL; - hwna->na_private = NULL; - hwna->na_flags &= ~NAF_BUSY; - netmap_adapter_put(hwna); - -} - - -/* - * Intr callback for NICs connected to a bridge. - * Simply ignore tx interrupts (maybe we could try to recover space ?) - * and pass received packets from nic to the bridge. - * - * XXX TODO check locking: this is called from the interrupt - * handler so we should make sure that the interface is not - * disconnected while passing down an interrupt. - * - * Note, no user process can access this NIC or the host stack. - * The only part of the ring that is significant are the slots, - * and head/cur/tail are set from the kring as needed - * (part as a receive ring, part as a transmit ring). - * - * callback that overwrites the hwna notify callback. - * Packets come from the outside or from the host stack and are put on an - * hwna rx ring. - * The bridge wrapper then sends the packets through the bridge. - */ static int -netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags) +netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na, + struct nm_bridge *b) { - struct netmap_adapter *na = kring->na; - struct netmap_bwrap_adapter *bna = na->na_private; - struct netmap_kring *bkring; - struct netmap_vp_adapter *vpna = &bna->up; - u_int ring_nr = kring->ring_id; - int ret = NM_IRQ_COMPLETED; - int error; + struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; - if (netmap_verbose) - D("%s %s 0x%x", na->name, kring->name, flags); - - bkring = vpna->up.tx_rings[ring_nr]; - - /* make sure the ring is not disabled */ - if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) { - return EIO; + if (b->bdg_ops != &vale_bdg_ops) { + return NM_NEED_BWRAP; } - - if (netmap_verbose) - D("%s head %d cur %d tail %d", na->name, - kring->rhead, kring->rcur, kring->rtail); - - /* simulate a user wakeup on the rx ring - * fetch packets that have arrived. - */ - error = kring->nm_sync(kring, 0); - if (error) - goto put_out; - if (kring->nr_hwcur == kring->nr_hwtail) { - if (netmap_verbose) - D("how strange, interrupt with no packets on %s", - na->name); - goto put_out; + if (vpna->na_bdg) { + return NM_NEED_BWRAP; } - - /* new packets are kring->rcur to kring->nr_hwtail, and the bkring - * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail - * to push all packets out. - */ - bkring->rhead = bkring->rcur = kring->nr_hwtail; - - netmap_vp_txsync(bkring, flags); - - /* mark all buffers as released on this ring */ - kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail; - /* another call to actually release the buffers */ - error = kring->nm_sync(kring, 0); - - /* The second rxsync may have further advanced hwtail. If this happens, - * return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */ - if (kring->rcur != kring->nr_hwtail) { - ret = NM_IRQ_RESCHED; - } -put_out: - nm_kr_put(kring); - - return error ? error : ret; -} - - -/* nm_register callback for bwrap */ -static int -netmap_bwrap_reg(struct netmap_adapter *na, int onoff) -{ - struct netmap_bwrap_adapter *bna = - (struct netmap_bwrap_adapter *)na; - struct netmap_adapter *hwna = bna->hwna; - struct netmap_vp_adapter *hostna = &bna->host; - int error, i; - enum txrx t; - - ND("%s %s", na->name, onoff ? "on" : "off"); - - if (onoff) { - /* netmap_do_regif has been called on the bwrap na. - * We need to pass the information about the - * memory allocator down to the hwna before - * putting it in netmap mode - */ - hwna->na_lut = na->na_lut; - - if (hostna->na_bdg) { - /* if the host rings have been attached to switch, - * we need to copy the memory allocator information - * in the hostna also - */ - hostna->up.na_lut = na->na_lut; - } - - } - - /* pass down the pending ring state information */ - for_rx_tx(t) { - for (i = 0; i < nma_get_nrings(na, t) + 1; i++) - NMR(hwna, t)[i]->nr_pending_mode = - NMR(na, t)[i]->nr_pending_mode; - } - - /* forward the request to the hwna */ - error = hwna->nm_register(hwna, onoff); - if (error) - return error; - - /* copy up the current ring state information */ - for_rx_tx(t) { - for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { - struct netmap_kring *kring = NMR(hwna, t)[i]; - NMR(na, t)[i]->nr_mode = kring->nr_mode; - } - } - - /* impersonate a netmap_vp_adapter */ - netmap_vp_reg(na, onoff); - if (hostna->na_bdg) - netmap_vp_reg(&hostna->up, onoff); - - if (onoff) { - u_int i; - /* intercept the hwna nm_nofify callback on the hw rings */ - for (i = 0; i < hwna->num_rx_rings; i++) { - hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify; - hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify; - } - i = hwna->num_rx_rings; /* for safety */ - /* save the host ring notify unconditionally */ - hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify; - if (hostna->na_bdg) { - /* also intercept the host ring notify */ - hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify; - } - if (na->active_fds == 0) - na->na_flags |= NAF_NETMAP_ON; - } else { - u_int i; - - if (na->active_fds == 0) - na->na_flags &= ~NAF_NETMAP_ON; - - /* reset all notify callbacks (including host ring) */ - for (i = 0; i <= hwna->num_rx_rings; i++) { - hwna->rx_rings[i]->nm_notify = hwna->rx_rings[i]->save_notify; - hwna->rx_rings[i]->save_notify = NULL; - } - hwna->na_lut.lut = NULL; - hwna->na_lut.plut = NULL; - hwna->na_lut.objtotal = 0; - hwna->na_lut.objsize = 0; - - /* pass ownership of the netmap rings to the hwna */ - for_rx_tx(t) { - for (i = 0; i < nma_get_nrings(na, t) + 1; i++) { - NMR(na, t)[i]->ring = NULL; - } - } - - } - + na->na_vp = vpna; + strncpy(na->name, name, sizeof(na->name)); + na->na_hostvp = NULL; return 0; } -/* nm_config callback for bwrap */ static int -netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info) +netmap_vale_bwrap_krings_create(struct netmap_adapter *na) { - struct netmap_bwrap_adapter *bna = - (struct netmap_bwrap_adapter *)na; - struct netmap_adapter *hwna = bna->hwna; int error; - /* Forward the request to the hwna. It may happen that nobody - * registered hwna yet, so netmap_mem_get_lut() may have not - * been called yet. */ - error = netmap_mem_get_lut(hwna->nm_mem, &hwna->na_lut); - if (error) - return error; - netmap_update_config(hwna); - /* swap the results and propagate */ - info->num_tx_rings = hwna->num_rx_rings; - info->num_tx_descs = hwna->num_rx_desc; - info->num_rx_rings = hwna->num_tx_rings; - info->num_rx_descs = hwna->num_tx_desc; - info->rx_buf_maxsize = hwna->rx_buf_maxsize; - - return 0; -} - - -/* nm_krings_create callback for bwrap */ -static int -netmap_bwrap_krings_create(struct netmap_adapter *na) -{ - struct netmap_bwrap_adapter *bna = - (struct netmap_bwrap_adapter *)na; - struct netmap_adapter *hwna = bna->hwna; - struct netmap_adapter *hostna = &bna->host.up; - int i, error = 0; - enum txrx t; - - ND("%s", na->name); - /* impersonate a netmap_vp_adapter */ error = netmap_vp_krings_create(na); if (error) return error; - - /* also create the hwna krings */ - error = hwna->nm_krings_create(hwna); + error = netmap_bwrap_krings_create_common(na); if (error) { - goto err_del_vp_rings; + netmap_vp_krings_delete(na); } - - /* increment the usage counter for all the hwna krings */ - for_rx_tx(t) { - for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) { - NMR(hwna, t)[i]->users++; - } - } - - /* now create the actual rings */ - error = netmap_mem_rings_create(hwna); - if (error) { - goto err_dec_users; - } - - /* cross-link the netmap rings - * The original number of rings comes from hwna, - * rx rings on one side equals tx rings on the other. - */ - for_rx_tx(t) { - enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ - for (i = 0; i < nma_get_nrings(hwna, r) + 1; i++) { - NMR(na, t)[i]->nkr_num_slots = NMR(hwna, r)[i]->nkr_num_slots; - NMR(na, t)[i]->ring = NMR(hwna, r)[i]->ring; - } - } - - if (na->na_flags & NAF_HOST_RINGS) { - /* the hostna rings are the host rings of the bwrap. - * The corresponding krings must point back to the - * hostna - */ - hostna->tx_rings = &na->tx_rings[na->num_tx_rings]; - hostna->tx_rings[0]->na = hostna; - hostna->rx_rings = &na->rx_rings[na->num_rx_rings]; - hostna->rx_rings[0]->na = hostna; - } - - return 0; - -err_dec_users: - for_rx_tx(t) { - NMR(hwna, t)[i]->users--; - } - hwna->nm_krings_delete(hwna); -err_del_vp_rings: - netmap_vp_krings_delete(na); - return error; } - static void -netmap_bwrap_krings_delete(struct netmap_adapter *na) +netmap_vale_bwrap_krings_delete(struct netmap_adapter *na) { - struct netmap_bwrap_adapter *bna = - (struct netmap_bwrap_adapter *)na; - struct netmap_adapter *hwna = bna->hwna; - enum txrx t; - int i; - - ND("%s", na->name); - - /* decrement the usage counter for all the hwna krings */ - for_rx_tx(t) { - for (i = 0; i < nma_get_nrings(hwna, t) + 1; i++) { - NMR(hwna, t)[i]->users--; - } - } - - /* delete any netmap rings that are no longer needed */ - netmap_mem_rings_delete(hwna); - hwna->nm_krings_delete(hwna); + netmap_bwrap_krings_delete_common(na); netmap_vp_krings_delete(na); } - -/* notify method for the bridge-->hwna direction */ static int -netmap_bwrap_notify(struct netmap_kring *kring, int flags) +netmap_vale_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) { - struct netmap_adapter *na = kring->na; - struct netmap_bwrap_adapter *bna = na->na_private; - struct netmap_adapter *hwna = bna->hwna; - u_int ring_n = kring->ring_id; - u_int lim = kring->nkr_num_slots - 1; - struct netmap_kring *hw_kring; - int error; - - ND("%s: na %s hwna %s", - (kring ? kring->name : "NULL!"), - (na ? na->name : "NULL!"), - (hwna ? hwna->name : "NULL!")); - hw_kring = hwna->tx_rings[ring_n]; - - if (nm_kr_tryget(hw_kring, 0, NULL)) { - return ENXIO; - } - - /* first step: simulate a user wakeup on the rx ring */ - netmap_vp_rxsync(kring, flags); - ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", - na->name, ring_n, - kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, - ring->head, ring->cur, ring->tail, - hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_ring->rtail); - /* second step: the new packets are sent on the tx ring - * (which is actually the same ring) - */ - hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail; - error = hw_kring->nm_sync(hw_kring, flags); - if (error) - goto put_out; - - /* third step: now we are back the rx ring */ - /* claim ownership on all hw owned bufs */ - kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */ - - /* fourth step: the user goes to sleep again, causing another rxsync */ - netmap_vp_rxsync(kring, flags); - ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", - na->name, ring_n, - kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, - ring->head, ring->cur, ring->tail, - hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); -put_out: - nm_kr_put(hw_kring); - - return error ? error : NM_IRQ_COMPLETED; -} - - -/* nm_bdg_ctl callback for the bwrap. - * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd]. - * On attach, it needs to provide a fake netmap_priv_d structure and - * perform a netmap_do_regif() on the bwrap. This will put both the - * bwrap and the hwna in netmap mode, with the netmap rings shared - * and cross linked. Moroever, it will start intercepting interrupts - * directed to hwna. - */ -static int -netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na) -{ - struct netmap_priv_d *npriv; - struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; - int error = 0; - - if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { - struct nmreq_vale_attach *req = - (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; - if (req->reg.nr_ringid != 0 || - (req->reg.nr_mode != NR_REG_ALL_NIC && - req->reg.nr_mode != NR_REG_NIC_SW)) { - /* We only support attaching all the NIC rings - * and/or the host stack. */ - return EINVAL; - } - if (NETMAP_OWNED_BY_ANY(na)) { - return EBUSY; - } - if (bna->na_kpriv) { - /* nothing to do */ - return 0; - } - npriv = netmap_priv_new(); - if (npriv == NULL) - return ENOMEM; - npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */ - error = netmap_do_regif(npriv, na, req->reg.nr_mode, - req->reg.nr_ringid, req->reg.nr_flags); - if (error) { - netmap_priv_delete(npriv); - return error; - } - bna->na_kpriv = npriv; - na->na_flags |= NAF_BUSY; - } else { - if (na->active_fds == 0) /* not registered */ - return EINVAL; - netmap_priv_delete(bna->na_kpriv); - bna->na_kpriv = NULL; - na->na_flags &= ~NAF_BUSY; - } - - return error; -} - -/* attach a bridge wrapper to the 'real' device */ -int -netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna) -{ struct netmap_bwrap_adapter *bna; struct netmap_adapter *na = NULL; struct netmap_adapter *hostna = NULL; - int error = 0; - enum txrx t; + int error; - /* make sure the NIC is not already in use */ - if (NETMAP_OWNED_BY_ANY(hwna)) { - D("NIC %s busy, cannot attach to bridge", hwna->name); - return EBUSY; - } - bna = nm_os_malloc(sizeof(*bna)); if (bna == NULL) { return ENOMEM; } - na = &bna->up.up; - /* make bwrap ifp point to the real ifp */ - na->ifp = hwna->ifp; - if_ref(na->ifp); - na->na_private = bna; strncpy(na->name, nr_name, sizeof(na->name)); - /* fill the ring data for the bwrap adapter with rx/tx meanings - * swapped. The real cross-linking will be done during register, - * when all the krings will have been created. - */ - for_rx_tx(t) { - enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ - nma_set_nrings(na, t, nma_get_nrings(hwna, r)); - nma_set_ndesc(na, t, nma_get_ndesc(hwna, r)); - } - na->nm_dtor = netmap_bwrap_dtor; na->nm_register = netmap_bwrap_reg; - // na->nm_txsync = netmap_bwrap_txsync; + na->nm_txsync = netmap_vp_txsync; // na->nm_rxsync = netmap_bwrap_rxsync; - na->nm_config = netmap_bwrap_config; - na->nm_krings_create = netmap_bwrap_krings_create; - na->nm_krings_delete = netmap_bwrap_krings_delete; + na->nm_krings_create = netmap_vale_bwrap_krings_create; + na->nm_krings_delete = netmap_vale_bwrap_krings_delete; na->nm_notify = netmap_bwrap_notify; - na->nm_bdg_ctl = netmap_bwrap_bdg_ctl; - na->pdev = hwna->pdev; - na->nm_mem = netmap_mem_get(hwna->nm_mem); - na->virt_hdr_len = hwna->virt_hdr_len; - na->rx_buf_maxsize = hwna->rx_buf_maxsize; bna->up.retry = 1; /* XXX maybe this should depend on the hwna */ /* Set the mfs, needed on the VALE mismatch datapath. */ bna->up.mfs = NM_BDG_MFS_DEFAULT; - bna->hwna = hwna; - netmap_adapter_get(hwna); - hwna->na_private = bna; /* weak reference */ - bna->saved_na_vp = hwna->na_vp; - hwna->na_vp = &bna->up; - bna->up.up.na_vp = &(bna->up); - if (hwna->na_flags & NAF_HOST_RINGS) { - if (hwna->na_flags & NAF_SW_ONLY) - na->na_flags |= NAF_SW_ONLY; - na->na_flags |= NAF_HOST_RINGS; hostna = &bna->host.up; - snprintf(hostna->name, sizeof(hostna->name), "%s^", nr_name); - hostna->ifp = hwna->ifp; - for_rx_tx(t) { - enum txrx r = nm_txrx_swap(t); - nma_set_nrings(hostna, t, 1); - nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r)); - } - // hostna->nm_txsync = netmap_bwrap_host_txsync; - // hostna->nm_rxsync = netmap_bwrap_host_rxsync; hostna->nm_notify = netmap_bwrap_notify; - hostna->nm_mem = netmap_mem_get(na->nm_mem); - hostna->na_private = bna; - hostna->na_vp = &bna->up; - na->na_hostvp = hwna->na_hostvp = - hostna->na_hostvp = &bna->host; - hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */ - hostna->rx_buf_maxsize = hwna->rx_buf_maxsize; bna->host.mfs = NM_BDG_MFS_DEFAULT; } - ND("%s<->%s txr %d txd %d rxr %d rxd %d", - na->name, ifp->if_xname, - na->num_tx_rings, na->num_tx_desc, - na->num_rx_rings, na->num_rx_desc); - - error = netmap_attach_common(na); + error = netmap_bwrap_attach_common(na, hwna); if (error) { - goto err_free; + nm_os_free(bna); } - hwna->na_flags |= NAF_BUSY; - return 0; - -err_free: - hwna->na_vp = hwna->na_hostvp = NULL; - netmap_adapter_put(hwna); - nm_os_free(bna); return error; - } -struct nm_bridge * -netmap_init_bridges2(u_int n) +int +netmap_get_vale_na(struct nmreq_header *hdr, struct netmap_adapter **na, + struct netmap_mem_d *nmd, int create) { - int i; - struct nm_bridge *b; + return netmap_get_bdg_na(hdr, na, nmd, create, &vale_bdg_ops); +} - b = nm_os_malloc(sizeof(struct nm_bridge) * n); - if (b == NULL) - return NULL; - for (i = 0; i < n; i++) - BDG_RWINIT(&b[i]); - return b; + +/* creates a persistent VALE port */ +int +nm_vi_create(struct nmreq_header *hdr) +{ + struct nmreq_vale_newif *req = + (struct nmreq_vale_newif *)(uintptr_t)hdr->nr_body; + int error = 0; + /* Build a nmreq_register out of the nmreq_vale_newif, + * so that we can call netmap_get_bdg_na(). */ + struct nmreq_register regreq; + bzero(®req, sizeof(regreq)); + regreq.nr_tx_slots = req->nr_tx_slots; + regreq.nr_rx_slots = req->nr_rx_slots; + regreq.nr_tx_rings = req->nr_tx_rings; + regreq.nr_rx_rings = req->nr_rx_rings; + regreq.nr_mem_id = req->nr_mem_id; + hdr->nr_reqtype = NETMAP_REQ_REGISTER; + hdr->nr_body = (uintptr_t)®req; + error = netmap_vi_create(hdr, 0 /* no autodelete */); + hdr->nr_reqtype = NETMAP_REQ_VALE_NEWIF; + hdr->nr_body = (uintptr_t)req; + /* Write back to the original struct. */ + req->nr_tx_slots = regreq.nr_tx_slots; + req->nr_rx_slots = regreq.nr_rx_slots; + req->nr_tx_rings = regreq.nr_tx_rings; + req->nr_rx_rings = regreq.nr_rx_rings; + req->nr_mem_id = regreq.nr_mem_id; + return error; } -void -netmap_uninit_bridges2(struct nm_bridge *b, u_int n) +/* remove a persistent VALE port from the system */ +int +nm_vi_destroy(const char *name) { - int i; + struct ifnet *ifp; + struct netmap_vp_adapter *vpna; + int error; - if (b == NULL) - return; + ifp = ifunit_ref(name); + if (!ifp) + return ENXIO; + NMG_LOCK(); + /* make sure this is actually a VALE port */ + if (!NM_NA_VALID(ifp) || NA(ifp)->nm_register != netmap_vp_reg) { + error = EINVAL; + goto err; + } - for (i = 0; i < n; i++) - BDG_RWDESTROY(&b[i]); - nm_os_free(b); + vpna = (struct netmap_vp_adapter *)NA(ifp); + + /* we can only destroy ports that were created via NETMAP_BDG_NEWIF */ + if (vpna->autodelete) { + error = EINVAL; + goto err; + } + + /* also make sure that nobody is using the inferface */ + if (NETMAP_OWNED_BY_ANY(&vpna->up) || + vpna->up.na_refcount > 1 /* any ref besides the one in nm_vi_create()? */) { + error = EBUSY; + goto err; + } + + NMG_UNLOCK(); + + D("destroying a persistent vale interface %s", ifp->if_xname); + /* Linux requires all the references are released + * before unregister + */ + netmap_detach(ifp); + if_rele(ifp); + nm_os_vi_detach(ifp); + return 0; + +err: + NMG_UNLOCK(); + if_rele(ifp); + return error; } -int -netmap_init_bridges(void) +static int +nm_update_info(struct nmreq_register *req, struct netmap_adapter *na) { -#ifdef CONFIG_NET_NS - return netmap_bns_register(); -#else - nm_bridges = netmap_init_bridges2(NM_BRIDGES); - if (nm_bridges == NULL) - return ENOMEM; - return 0; -#endif + req->nr_rx_rings = na->num_rx_rings; + req->nr_tx_rings = na->num_tx_rings; + req->nr_rx_slots = na->num_rx_desc; + req->nr_tx_slots = na->num_tx_desc; + return netmap_mem_get_info(na->nm_mem, &req->nr_memsize, NULL, + &req->nr_mem_id); } -void -netmap_uninit_bridges(void) + +/* + * Create a virtual interface registered to the system. + * The interface will be attached to a bridge later. + */ +int +netmap_vi_create(struct nmreq_header *hdr, int autodelete) { -#ifdef CONFIG_NET_NS - netmap_bns_unregister(); -#else - netmap_uninit_bridges2(nm_bridges, NM_BRIDGES); -#endif + struct nmreq_register *req = (struct nmreq_register *)(uintptr_t)hdr->nr_body; + struct ifnet *ifp; + struct netmap_vp_adapter *vpna; + struct netmap_mem_d *nmd = NULL; + int error; + + if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) { + return EINVAL; + } + + /* don't include VALE prefix */ + if (!strncmp(hdr->nr_name, NM_BDG_NAME, strlen(NM_BDG_NAME))) + return EINVAL; + if (strlen(hdr->nr_name) >= IFNAMSIZ) { + return EINVAL; + } + ifp = ifunit_ref(hdr->nr_name); + if (ifp) { /* already exist, cannot create new one */ + error = EEXIST; + NMG_LOCK(); + if (NM_NA_VALID(ifp)) { + int update_err = nm_update_info(req, NA(ifp)); + if (update_err) + error = update_err; + } + NMG_UNLOCK(); + if_rele(ifp); + return error; + } + error = nm_os_vi_persist(hdr->nr_name, &ifp); + if (error) + return error; + + NMG_LOCK(); + if (req->nr_mem_id) { + nmd = netmap_mem_find(req->nr_mem_id); + if (nmd == NULL) { + error = EINVAL; + goto err_1; + } + } + /* netmap_vp_create creates a struct netmap_vp_adapter */ + error = netmap_vp_create(hdr, ifp, nmd, &vpna); + if (error) { + D("error %d", error); + goto err_1; + } + /* persist-specific routines */ + vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl; + if (!autodelete) { + netmap_adapter_get(&vpna->up); + } else { + vpna->autodelete = 1; + } + NM_ATTACH_NA(ifp, &vpna->up); + /* return the updated info */ + error = nm_update_info(req, &vpna->up); + if (error) { + goto err_2; + } + ND("returning nr_mem_id %d", req->nr_mem_id); + if (nmd) + netmap_mem_put(nmd); + NMG_UNLOCK(); + ND("created %s", ifp->if_xname); + return 0; + +err_2: + netmap_detach(ifp); +err_1: + if (nmd) + netmap_mem_put(nmd); + NMG_UNLOCK(); + nm_os_vi_detach(ifp); + + return error; } + #endif /* WITH_VALE */ Index: head/sys/net/netmap.h =================================================================== --- head/sys/net/netmap.h +++ head/sys/net/netmap.h @@ -237,6 +237,8 @@ * are the number of fragments. */ +#define NETMAP_MAX_FRAGS 64 /* max number of fragments */ + /* * struct netmap_ring Index: head/sys/net/netmap_user.h =================================================================== --- head/sys/net/netmap_user.h +++ head/sys/net/netmap_user.h @@ -1029,20 +1029,35 @@ for (c = 0; c < n ; c++, ri++) { /* compute current ring to use */ struct netmap_ring *ring; - uint32_t i, idx; + uint32_t i, j, idx; + size_t rem; if (ri > d->last_tx_ring) ri = d->first_tx_ring; ring = NETMAP_TXRING(d->nifp, ri); - if (nm_ring_empty(ring)) { - continue; + rem = size; + j = ring->cur; + while (rem > ring->nr_buf_size && j != ring->tail) { + rem -= ring->nr_buf_size; + j = nm_ring_next(ring, j); } + if (j == ring->tail && rem > 0) + continue; i = ring->cur; + while (i != j) { + idx = ring->slot[i].buf_idx; + ring->slot[i].len = ring->nr_buf_size; + ring->slot[i].flags = NS_MOREFRAG; + nm_pkt_copy(buf, NETMAP_BUF(ring, idx), ring->nr_buf_size); + i = nm_ring_next(ring, i); + buf = (char *)buf + ring->nr_buf_size; + } idx = ring->slot[i].buf_idx; - ring->slot[i].len = size; - nm_pkt_copy(buf, NETMAP_BUF(ring, idx), size); - d->cur_tx_ring = ri; + ring->slot[i].len = rem; + ring->slot[i].flags = 0; + nm_pkt_copy(buf, NETMAP_BUF(ring, idx), rem); ring->head = ring->cur = nm_ring_next(ring, i); + d->cur_tx_ring = ri; return size; } return 0; /* fail */