Index: head/sys/dev/netmap/netmap_bdg.c =================================================================== --- head/sys/dev/netmap/netmap_bdg.c (revision 343548) +++ head/sys/dev/netmap/netmap_bdg.c (revision 343549) @@ -1,1665 +1,1665 @@ /* * Copyright (C) 2013-2016 Universita` di Pisa * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * This module implements the VALE switch for netmap --- VALE SWITCH --- NMG_LOCK() serializes all modifications to switches and ports. A switch cannot be deleted until all ports are gone. For each switch, an SX lock (RWlock on linux) protects deletion of ports. When configuring or deleting a new port, the lock is acquired in exclusive mode (after holding NMG_LOCK). When forwarding, the lock is acquired in shared mode (without NMG_LOCK). The lock is held throughout the entire forwarding cycle, during which the thread may incur in a page fault. Hence it is important that sleepable shared locks are used. On the rx ring, the per-port lock is grabbed initially to reserve a number of slot in the ring, then the lock is released, packets are copied from source to destination, and then the lock is acquired again and the receive ring is updated. (A similar thing is done on the tx ring for NIC and host stack ports attached to the switch) */ /* * OS-specific code that is used only within this file. * Other OS-specific code that must be accessed by drivers * is present in netmap_kern.h */ #if defined(__FreeBSD__) #include /* prerequisite */ __FBSDID("$FreeBSD$"); #include #include #include /* defines used in kernel.h */ #include /* types used in module initialization */ #include /* cdevsw struct, UID, GID */ #include #include /* struct socket */ #include #include #include #include /* sockaddrs */ #include #include #include #include #include /* BIOCIMMEDIATE */ #include /* bus_dmamap_* */ #include #include #include #elif defined(linux) #include "bsd_glue.h" #elif defined(__APPLE__) #warning OSX support is only partial #include "osx_glue.h" #elif defined(_WIN32) #include "win_glue.h" #else #error Unsupported platform #endif /* unsupported */ /* * common headers */ #include #include #include #include const char* netmap_bdg_name(struct netmap_vp_adapter *vp) { struct nm_bridge *b = vp->na_bdg; if (b == NULL) return NULL; return b->bdg_basename; } #ifndef CONFIG_NET_NS /* * XXX in principle nm_bridges could be created dynamically * Right now we have a static array and deletions are protected * by an exclusive lock. */ struct nm_bridge *nm_bridges; #endif /* !CONFIG_NET_NS */ static int nm_is_id_char(const char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || (c == '_'); } /* Validate the name of a bdg port and return the * position of the ":" character. */ static int nm_bdg_name_validate(const char *name, size_t prefixlen) { int colon_pos = -1; int i; if (!name || strlen(name) < prefixlen) { return -1; } for (i = 0; i < NM_BDG_IFNAMSIZ && name[i]; i++) { if (name[i] == ':') { colon_pos = i; break; } else if (!nm_is_id_char(name[i])) { return -1; } } if (strlen(name) - colon_pos > IFNAMSIZ) { /* interface name too long */ return -1; } return colon_pos; } /* * locate a bridge among the existing ones. * MUST BE CALLED WITH NMG_LOCK() * * a ':' in the name terminates the bridge name. Otherwise, just NM_NAME. * We assume that this is called with a name of at least NM_NAME chars. */ struct nm_bridge * nm_find_bridge(const char *name, int create, struct netmap_bdg_ops *ops) { int i, namelen; struct nm_bridge *b = NULL, *bridges; u_int num_bridges; NMG_LOCK_ASSERT(); netmap_bns_getbridges(&bridges, &num_bridges); namelen = nm_bdg_name_validate(name, (ops != NULL ? strlen(ops->name) : 0)); if (namelen < 0) { nm_prerr("invalid bridge name %s", name ? name : NULL); return NULL; } /* lookup the name, remember empty slot if there is one */ for (i = 0; i < num_bridges; i++) { struct nm_bridge *x = bridges + i; if ((x->bdg_flags & NM_BDG_ACTIVE) + x->bdg_active_ports == 0) { if (create && b == NULL) b = x; /* record empty slot */ } else if (x->bdg_namelen != namelen) { continue; } else if (strncmp(name, x->bdg_basename, namelen) == 0) { ND("found '%.*s' at %d", namelen, name, i); b = x; break; } } if (i == num_bridges && b) { /* name not found, can create entry */ /* initialize the bridge */ ND("create new bridge %s with ports %d", b->bdg_basename, b->bdg_active_ports); b->ht = nm_os_malloc(sizeof(struct nm_hash_ent) * NM_BDG_HASH); if (b->ht == NULL) { nm_prerr("failed to allocate hash table"); return NULL; } strncpy(b->bdg_basename, name, namelen); b->bdg_namelen = namelen; b->bdg_active_ports = 0; for (i = 0; i < NM_BDG_MAXPORTS; i++) b->bdg_port_index[i] = i; /* set the default function */ b->bdg_ops = b->bdg_saved_ops = *ops; b->private_data = b->ht; b->bdg_flags = 0; NM_BNS_GET(b); } return b; } int netmap_bdg_free(struct nm_bridge *b) { if ((b->bdg_flags & NM_BDG_ACTIVE) + b->bdg_active_ports != 0) { return EBUSY; } ND("marking bridge %s as free", b->bdg_basename); nm_os_free(b->ht); memset(&b->bdg_ops, 0, sizeof(b->bdg_ops)); memset(&b->bdg_saved_ops, 0, sizeof(b->bdg_saved_ops)); b->bdg_flags = 0; NM_BNS_PUT(b); return 0; } /* Called by external kernel modules (e.g., Openvswitch). * to modify the private data previously given to regops(). * 'name' may be just bridge's name (including ':' if it * is not just NM_BDG_NAME). * Called without NMG_LOCK. */ int netmap_bdg_update_private_data(const char *name, bdg_update_private_data_fn_t callback, void *callback_data, void *auth_token) { void *private_data = NULL; struct nm_bridge *b; int error = 0; NMG_LOCK(); b = nm_find_bridge(name, 0 /* don't create */, NULL); if (!b) { error = EINVAL; goto unlock_update_priv; } if (!nm_bdg_valid_auth_token(b, auth_token)) { error = EACCES; goto unlock_update_priv; } BDG_WLOCK(b); private_data = callback(b->private_data, callback_data, &error); b->private_data = private_data; BDG_WUNLOCK(b); unlock_update_priv: NMG_UNLOCK(); return error; } /* remove from bridge b the ports in slots hw and sw * (sw can be -1 if not needed) */ void netmap_bdg_detach_common(struct nm_bridge *b, int hw, int sw) { int s_hw = hw, s_sw = sw; int i, lim =b->bdg_active_ports; uint32_t *tmp = b->tmp_bdg_port_index; /* New algorithm: make a copy of bdg_port_index; lookup NA(ifp)->bdg_port and SWNA(ifp)->bdg_port in the array of bdg_port_index, replacing them with entries from the bottom of the array; decrement bdg_active_ports; acquire BDG_WLOCK() and copy back the array. */ if (netmap_debug & NM_DEBUG_BDG) nm_prinf("detach %d and %d (lim %d)", hw, sw, lim); /* make a copy of the list of active ports, update it, * and then copy back within BDG_WLOCK(). */ memcpy(b->tmp_bdg_port_index, b->bdg_port_index, sizeof(b->tmp_bdg_port_index)); for (i = 0; (hw >= 0 || sw >= 0) && i < lim; ) { if (hw >= 0 && tmp[i] == hw) { ND("detach hw %d at %d", hw, i); lim--; /* point to last active port */ tmp[i] = tmp[lim]; /* swap with i */ tmp[lim] = hw; /* now this is inactive */ hw = -1; } else if (sw >= 0 && tmp[i] == sw) { ND("detach sw %d at %d", sw, i); lim--; tmp[i] = tmp[lim]; tmp[lim] = sw; sw = -1; } else { i++; } } if (hw >= 0 || sw >= 0) { nm_prerr("delete failed hw %d sw %d, should panic...", hw, sw); } BDG_WLOCK(b); if (b->bdg_ops.dtor) b->bdg_ops.dtor(b->bdg_ports[s_hw]); b->bdg_ports[s_hw] = NULL; if (s_sw >= 0) { b->bdg_ports[s_sw] = NULL; } memcpy(b->bdg_port_index, b->tmp_bdg_port_index, sizeof(b->tmp_bdg_port_index)); b->bdg_active_ports = lim; BDG_WUNLOCK(b); ND("now %d active ports", lim); netmap_bdg_free(b); } /* nm_bdg_ctl callback for VALE ports */ int netmap_vp_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na) { struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter *)na; struct nm_bridge *b = vpna->na_bdg; if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { return 0; /* nothing to do */ } if (b) { netmap_set_all_rings(na, 0 /* disable */); netmap_bdg_detach_common(b, vpna->bdg_port, -1); vpna->na_bdg = NULL; netmap_set_all_rings(na, 1 /* enable */); } /* I have took reference just for attach */ netmap_adapter_put(na); return 0; } int netmap_default_bdg_attach(const char *name, struct netmap_adapter *na, struct nm_bridge *b) { return NM_NEED_BWRAP; } /* Try to get a reference to a netmap adapter attached to a VALE switch. * If the adapter is found (or is created), this function returns 0, a * non NULL pointer is returned into *na, and the caller holds a * reference to the adapter. * If an adapter is not found, then no reference is grabbed and the * function returns an error code, or 0 if there is just a VALE prefix * mismatch. Therefore the caller holds a reference when * (*na != NULL && return == 0). */ int netmap_get_bdg_na(struct nmreq_header *hdr, struct netmap_adapter **na, struct netmap_mem_d *nmd, int create, struct netmap_bdg_ops *ops) { char *nr_name = hdr->nr_name; const char *ifname; struct ifnet *ifp = NULL; int error = 0; struct netmap_vp_adapter *vpna, *hostna = NULL; struct nm_bridge *b; uint32_t i, j; uint32_t cand = NM_BDG_NOPORT, cand2 = NM_BDG_NOPORT; int needed; *na = NULL; /* default return value */ /* first try to see if this is a bridge port. */ NMG_LOCK_ASSERT(); if (strncmp(nr_name, ops->name, strlen(ops->name) - 1)) { return 0; /* no error, but no VALE prefix */ } b = nm_find_bridge(nr_name, create, ops); if (b == NULL) { ND("no bridges available for '%s'", nr_name); return (create ? ENOMEM : ENXIO); } if (strlen(nr_name) < b->bdg_namelen) /* impossible */ panic("x"); /* Now we are sure that name starts with the bridge's name, * lookup the port in the bridge. We need to scan the entire * list. It is not important to hold a WLOCK on the bridge * during the search because NMG_LOCK already guarantees * that there are no other possible writers. */ /* lookup in the local list of ports */ for (j = 0; j < b->bdg_active_ports; j++) { i = b->bdg_port_index[j]; vpna = b->bdg_ports[i]; ND("checking %s", vpna->up.name); if (!strcmp(vpna->up.name, nr_name)) { netmap_adapter_get(&vpna->up); ND("found existing if %s refs %d", nr_name) *na = &vpna->up; return 0; } } /* not found, should we create it? */ if (!create) return ENXIO; /* yes we should, see if we have space to attach entries */ needed = 2; /* in some cases we only need 1 */ if (b->bdg_active_ports + needed >= NM_BDG_MAXPORTS) { nm_prerr("bridge full %d, cannot create new port", b->bdg_active_ports); return ENOMEM; } /* record the next two ports available, but do not allocate yet */ cand = b->bdg_port_index[b->bdg_active_ports]; cand2 = b->bdg_port_index[b->bdg_active_ports + 1]; ND("+++ bridge %s port %s used %d avail %d %d", b->bdg_basename, ifname, b->bdg_active_ports, cand, cand2); /* * try see if there is a matching NIC with this name * (after the bridge's name) */ ifname = nr_name + b->bdg_namelen + 1; ifp = ifunit_ref(ifname); if (!ifp) { /* Create an ephemeral virtual port. * This block contains all the ephemeral-specific logic. */ if (hdr->nr_reqtype != NETMAP_REQ_REGISTER) { error = EINVAL; goto out; } /* bdg_netmap_attach creates a struct netmap_adapter */ error = b->bdg_ops.vp_create(hdr, NULL, nmd, &vpna); if (error) { if (netmap_debug & NM_DEBUG_BDG) nm_prerr("error %d", error); goto out; } /* shortcut - we can skip get_hw_na(), * ownership check and nm_bdg_attach() */ } else { struct netmap_adapter *hw; /* the vale:nic syntax is only valid for some commands */ switch (hdr->nr_reqtype) { case NETMAP_REQ_VALE_ATTACH: case NETMAP_REQ_VALE_DETACH: case NETMAP_REQ_VALE_POLLING_ENABLE: case NETMAP_REQ_VALE_POLLING_DISABLE: break; /* ok */ default: error = EINVAL; goto out; } error = netmap_get_hw_na(ifp, nmd, &hw); if (error || hw == NULL) goto out; /* host adapter might not be created */ error = hw->nm_bdg_attach(nr_name, hw, b); if (error == NM_NEED_BWRAP) { error = b->bdg_ops.bwrap_attach(nr_name, hw); } if (error) goto out; vpna = hw->na_vp; hostna = hw->na_hostvp; if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { /* Check if we need to skip the host rings. */ struct nmreq_vale_attach *areq = (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; if (areq->reg.nr_mode != NR_REG_NIC_SW) { hostna = NULL; } } } BDG_WLOCK(b); vpna->bdg_port = cand; ND("NIC %p to bridge port %d", vpna, cand); /* bind the port to the bridge (virtual ports are not active) */ b->bdg_ports[cand] = vpna; vpna->na_bdg = b; b->bdg_active_ports++; if (hostna != NULL) { /* also bind the host stack to the bridge */ b->bdg_ports[cand2] = hostna; hostna->bdg_port = cand2; hostna->na_bdg = b; b->bdg_active_ports++; ND("host %p to bridge port %d", hostna, cand2); } ND("if %s refs %d", ifname, vpna->up.na_refcount); BDG_WUNLOCK(b); *na = &vpna->up; netmap_adapter_get(*na); out: if (ifp) if_rele(ifp); return error; } int nm_is_bwrap(struct netmap_adapter *na) { return na->nm_register == netmap_bwrap_reg; } struct nm_bdg_polling_state; struct nm_bdg_kthread { struct nm_kctx *nmk; u_int qfirst; u_int qlast; struct nm_bdg_polling_state *bps; }; struct nm_bdg_polling_state { bool configured; bool stopped; struct netmap_bwrap_adapter *bna; uint32_t mode; u_int qfirst; u_int qlast; u_int cpu_from; u_int ncpus; struct nm_bdg_kthread *kthreads; }; static void netmap_bwrap_polling(void *data) { struct nm_bdg_kthread *nbk = data; struct netmap_bwrap_adapter *bna; u_int qfirst, qlast, i; struct netmap_kring **kring0, *kring; if (!nbk) return; qfirst = nbk->qfirst; qlast = nbk->qlast; bna = nbk->bps->bna; kring0 = NMR(bna->hwna, NR_RX); for (i = qfirst; i < qlast; i++) { kring = kring0[i]; kring->nm_notify(kring, 0); } } static int nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps) { struct nm_kctx_cfg kcfg; int i, j; bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus); if (bps->kthreads == NULL) return ENOMEM; bzero(&kcfg, sizeof(kcfg)); kcfg.worker_fn = netmap_bwrap_polling; for (i = 0; i < bps->ncpus; i++) { struct nm_bdg_kthread *t = bps->kthreads + i; int all = (bps->ncpus == 1 && bps->mode == NETMAP_POLLING_MODE_SINGLE_CPU); int affinity = bps->cpu_from + i; t->bps = bps; t->qfirst = all ? bps->qfirst /* must be 0 */: affinity; t->qlast = all ? bps->qlast : t->qfirst + 1; if (netmap_verbose) nm_prinf("kthread %d a:%u qf:%u ql:%u", i, affinity, t->qfirst, t->qlast); kcfg.type = i; kcfg.worker_private = t; t->nmk = nm_os_kctx_create(&kcfg, NULL); if (t->nmk == NULL) { goto cleanup; } nm_os_kctx_worker_setaff(t->nmk, affinity); } return 0; cleanup: for (j = 0; j < i; j++) { struct nm_bdg_kthread *t = bps->kthreads + i; nm_os_kctx_destroy(t->nmk); } nm_os_free(bps->kthreads); return EFAULT; } /* A variant of ptnetmap_start_kthreads() */ static int nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps) { int error, i, j; if (!bps) { nm_prerr("polling is not configured"); return EFAULT; } bps->stopped = false; for (i = 0; i < bps->ncpus; i++) { struct nm_bdg_kthread *t = bps->kthreads + i; error = nm_os_kctx_worker_start(t->nmk); if (error) { nm_prerr("error in nm_kthread_start(): %d", error); goto cleanup; } } return 0; cleanup: for (j = 0; j < i; j++) { struct nm_bdg_kthread *t = bps->kthreads + i; nm_os_kctx_worker_stop(t->nmk); } bps->stopped = true; return error; } static void nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps) { int i; if (!bps) return; for (i = 0; i < bps->ncpus; i++) { struct nm_bdg_kthread *t = bps->kthreads + i; nm_os_kctx_worker_stop(t->nmk); nm_os_kctx_destroy(t->nmk); } bps->stopped = true; } static int get_polling_cfg(struct nmreq_vale_polling *req, struct netmap_adapter *na, struct nm_bdg_polling_state *bps) { unsigned int avail_cpus, core_from; unsigned int qfirst, qlast; uint32_t i = req->nr_first_cpu_id; uint32_t req_cpus = req->nr_num_polling_cpus; avail_cpus = nm_os_ncpus(); if (req_cpus == 0) { nm_prerr("req_cpus must be > 0"); return EINVAL; } else if (req_cpus >= avail_cpus) { nm_prerr("Cannot use all the CPUs in the system"); return EINVAL; } if (req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU) { /* Use a separate core for each ring. If nr_num_polling_cpus>1 * more consecutive rings are polled. * For example, if nr_first_cpu_id=2 and nr_num_polling_cpus=2, * ring 2 and 3 are polled by core 2 and 3, respectively. */ if (i + req_cpus > nma_get_nrings(na, NR_RX)) { nm_prerr("Rings %u-%u not in range (have %d rings)", i, i + req_cpus, nma_get_nrings(na, NR_RX)); return EINVAL; } qfirst = i; qlast = qfirst + req_cpus; core_from = qfirst; } else if (req->nr_mode == NETMAP_POLLING_MODE_SINGLE_CPU) { /* Poll all the rings using a core specified by nr_first_cpu_id. * the number of cores must be 1. */ if (req_cpus != 1) { nm_prerr("ncpus must be 1 for NETMAP_POLLING_MODE_SINGLE_CPU " "(was %d)", req_cpus); return EINVAL; } qfirst = 0; qlast = nma_get_nrings(na, NR_RX); core_from = i; } else { nm_prerr("Invalid polling mode"); return EINVAL; } bps->mode = req->nr_mode; bps->qfirst = qfirst; bps->qlast = qlast; bps->cpu_from = core_from; bps->ncpus = req_cpus; nm_prinf("%s qfirst %u qlast %u cpu_from %u ncpus %u", req->nr_mode == NETMAP_POLLING_MODE_MULTI_CPU ? "MULTI" : "SINGLE", qfirst, qlast, core_from, req_cpus); return 0; } static int nm_bdg_ctl_polling_start(struct nmreq_vale_polling *req, struct netmap_adapter *na) { struct nm_bdg_polling_state *bps; struct netmap_bwrap_adapter *bna; int error; bna = (struct netmap_bwrap_adapter *)na; if (bna->na_polling_state) { nm_prerr("ERROR adapter already in polling mode"); return EFAULT; } bps = nm_os_malloc(sizeof(*bps)); if (!bps) return ENOMEM; bps->configured = false; bps->stopped = true; if (get_polling_cfg(req, na, bps)) { nm_os_free(bps); return EINVAL; } if (nm_bdg_create_kthreads(bps)) { nm_os_free(bps); return EFAULT; } bps->configured = true; bna->na_polling_state = bps; bps->bna = bna; /* disable interrupts if possible */ nma_intr_enable(bna->hwna, 0); /* start kthread now */ error = nm_bdg_polling_start_kthreads(bps); if (error) { nm_prerr("ERROR nm_bdg_polling_start_kthread()"); nm_os_free(bps->kthreads); nm_os_free(bps); bna->na_polling_state = NULL; nma_intr_enable(bna->hwna, 1); } return error; } static int nm_bdg_ctl_polling_stop(struct netmap_adapter *na) { struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; struct nm_bdg_polling_state *bps; if (!bna->na_polling_state) { nm_prerr("ERROR adapter is not in polling mode"); return EFAULT; } bps = bna->na_polling_state; nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state); bps->configured = false; nm_os_free(bps); bna->na_polling_state = NULL; /* reenable interrupts */ nma_intr_enable(bna->hwna, 1); return 0; } int nm_bdg_polling(struct nmreq_header *hdr) { struct nmreq_vale_polling *req = (struct nmreq_vale_polling *)(uintptr_t)hdr->nr_body; struct netmap_adapter *na = NULL; int error = 0; NMG_LOCK(); error = netmap_get_vale_na(hdr, &na, NULL, /*create=*/0); if (na && !error) { if (!nm_is_bwrap(na)) { error = EOPNOTSUPP; } else if (hdr->nr_reqtype == NETMAP_BDG_POLLING_ON) { error = nm_bdg_ctl_polling_start(req, na); if (!error) netmap_adapter_get(na); } else { error = nm_bdg_ctl_polling_stop(na); if (!error) netmap_adapter_put(na); } netmap_adapter_put(na); } else if (!na && !error) { /* Not VALE port. */ error = EINVAL; } NMG_UNLOCK(); return error; } /* Called by external kernel modules (e.g., Openvswitch). * to set configure/lookup/dtor functions of a VALE instance. * Register callbacks to the given bridge. 'name' may be just * bridge's name (including ':' if it is not just NM_BDG_NAME). * * Called without NMG_LOCK. */ int netmap_bdg_regops(const char *name, struct netmap_bdg_ops *bdg_ops, void *private_data, void *auth_token) { struct nm_bridge *b; int error = 0; NMG_LOCK(); b = nm_find_bridge(name, 0 /* don't create */, NULL); if (!b) { error = ENXIO; goto unlock_regops; } if (!nm_bdg_valid_auth_token(b, auth_token)) { error = EACCES; goto unlock_regops; } BDG_WLOCK(b); if (!bdg_ops) { /* resetting the bridge */ bzero(b->ht, sizeof(struct nm_hash_ent) * NM_BDG_HASH); b->bdg_ops = b->bdg_saved_ops; b->private_data = b->ht; } else { /* modifying the bridge */ b->private_data = private_data; #define nm_bdg_override(m) if (bdg_ops->m) b->bdg_ops.m = bdg_ops->m nm_bdg_override(lookup); nm_bdg_override(config); nm_bdg_override(dtor); nm_bdg_override(vp_create); nm_bdg_override(bwrap_attach); #undef nm_bdg_override } BDG_WUNLOCK(b); unlock_regops: NMG_UNLOCK(); return error; } int netmap_bdg_config(struct nm_ifreq *nr) { struct nm_bridge *b; int error = EINVAL; NMG_LOCK(); b = nm_find_bridge(nr->nifr_name, 0, NULL); if (!b) { NMG_UNLOCK(); return error; } NMG_UNLOCK(); /* Don't call config() with NMG_LOCK() held */ BDG_RLOCK(b); if (b->bdg_ops.config != NULL) error = b->bdg_ops.config(nr); BDG_RUNLOCK(b); return error; } /* nm_register callback for VALE ports */ int netmap_vp_reg(struct netmap_adapter *na, int onoff) { struct netmap_vp_adapter *vpna = (struct netmap_vp_adapter*)na; enum txrx t; int i; /* persistent ports may be put in netmap mode * before being attached to a bridge */ if (vpna->na_bdg) BDG_WLOCK(vpna->na_bdg); if (onoff) { for_rx_tx(t) { for (i = 0; i < netmap_real_rings(na, t); i++) { struct netmap_kring *kring = NMR(na, t)[i]; if (nm_kring_pending_on(kring)) kring->nr_mode = NKR_NETMAP_ON; } } if (na->active_fds == 0) na->na_flags |= NAF_NETMAP_ON; /* XXX on FreeBSD, persistent VALE ports should also * toggle IFCAP_NETMAP in na->ifp (2014-03-16) */ } else { if (na->active_fds == 0) na->na_flags &= ~NAF_NETMAP_ON; for_rx_tx(t) { for (i = 0; i < netmap_real_rings(na, t); i++) { struct netmap_kring *kring = NMR(na, t)[i]; if (nm_kring_pending_off(kring)) kring->nr_mode = NKR_NETMAP_OFF; } } } if (vpna->na_bdg) BDG_WUNLOCK(vpna->na_bdg); return 0; } /* rxsync code used by VALE ports nm_rxsync callback and also * internally by the brwap */ static int netmap_vp_rxsync_locked(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct netmap_ring *ring = kring->ring; u_int nm_i, lim = kring->nkr_num_slots - 1; u_int head = kring->rhead; int n; if (head > lim) { nm_prerr("ouch dangerous reset!!!"); n = netmap_ring_reinit(kring); goto done; } /* First part, import newly received packets. */ /* actually nothing to do here, they are already in the kring */ /* Second part, skip past packets that userspace has released. */ nm_i = kring->nr_hwcur; if (nm_i != head) { /* consistency check, but nothing really important here */ for (n = 0; likely(nm_i != head); n++) { struct netmap_slot *slot = &ring->slot[nm_i]; void *addr = NMB(na, slot); if (addr == NETMAP_BUF_BASE(kring->na)) { /* bad buf */ nm_prerr("bad buffer index %d, ignore ?", slot->buf_idx); } slot->flags &= ~NS_BUF_CHANGED; nm_i = nm_next(nm_i, lim); } kring->nr_hwcur = head; } n = 0; done: return n; } /* * nm_rxsync callback for VALE ports * user process reading from a VALE switch. * Already protected against concurrent calls from userspace, * but we must acquire the queue's lock to protect against * writers on the same queue. */ int netmap_vp_rxsync(struct netmap_kring *kring, int flags) { int n; mtx_lock(&kring->q_lock); n = netmap_vp_rxsync_locked(kring, flags); mtx_unlock(&kring->q_lock); return n; } int netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna, struct netmap_bdg_ops *ops) { return ops->bwrap_attach(nr_name, hwna); } /* Bridge wrapper code (bwrap). * This is used to connect a non-VALE-port netmap_adapter (hwna) to a * VALE switch. * The main task is to swap the meaning of tx and rx rings to match the * expectations of the VALE switch code (see nm_bdg_flush). * * The bwrap works by interposing a netmap_bwrap_adapter between the * rest of the system and the hwna. The netmap_bwrap_adapter looks like * a netmap_vp_adapter to the rest the system, but, internally, it * translates all callbacks to what the hwna expects. * * Note that we have to intercept callbacks coming from two sides: * * - callbacks coming from the netmap module are intercepted by * passing around the netmap_bwrap_adapter instead of the hwna * * - callbacks coming from outside of the netmap module only know * about the hwna. This, however, only happens in interrupt * handlers, where only the hwna->nm_notify callback is called. * What the bwrap does is to overwrite the hwna->nm_notify callback * with its own netmap_bwrap_intr_notify. * XXX This assumes that the hwna->nm_notify callback was the * standard netmap_notify(), as it is the case for nic adapters. * Any additional action performed by hwna->nm_notify will not be * performed by netmap_bwrap_intr_notify. * * Additionally, the bwrap can optionally attach the host rings pair * of the wrapped adapter to a different port of the switch. */ static void netmap_bwrap_dtor(struct netmap_adapter *na) { struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; struct netmap_adapter *hwna = bna->hwna; struct nm_bridge *b = bna->up.na_bdg, *bh = bna->host.na_bdg; if (bna->host.up.nm_mem) netmap_mem_put(bna->host.up.nm_mem); if (b) { netmap_bdg_detach_common(b, bna->up.bdg_port, (bh ? bna->host.bdg_port : -1)); } ND("na %p", na); na->ifp = NULL; bna->host.up.ifp = NULL; hwna->na_vp = bna->saved_na_vp; hwna->na_hostvp = NULL; hwna->na_private = NULL; hwna->na_flags &= ~NAF_BUSY; netmap_adapter_put(hwna); } /* * Intr callback for NICs connected to a bridge. * Simply ignore tx interrupts (maybe we could try to recover space ?) * and pass received packets from nic to the bridge. * * XXX TODO check locking: this is called from the interrupt * handler so we should make sure that the interface is not * disconnected while passing down an interrupt. * * Note, no user process can access this NIC or the host stack. * The only part of the ring that is significant are the slots, * and head/cur/tail are set from the kring as needed * (part as a receive ring, part as a transmit ring). * * callback that overwrites the hwna notify callback. * Packets come from the outside or from the host stack and are put on an * hwna rx ring. * The bridge wrapper then sends the packets through the bridge. */ static int netmap_bwrap_intr_notify(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct netmap_bwrap_adapter *bna = na->na_private; struct netmap_kring *bkring; struct netmap_vp_adapter *vpna = &bna->up; u_int ring_nr = kring->ring_id; int ret = NM_IRQ_COMPLETED; int error; if (netmap_debug & NM_DEBUG_RXINTR) nm_prinf("%s %s 0x%x", na->name, kring->name, flags); bkring = vpna->up.tx_rings[ring_nr]; /* make sure the ring is not disabled */ if (nm_kr_tryget(kring, 0 /* can't sleep */, NULL)) { return EIO; } if (netmap_debug & NM_DEBUG_RXINTR) nm_prinf("%s head %d cur %d tail %d", na->name, kring->rhead, kring->rcur, kring->rtail); /* simulate a user wakeup on the rx ring * fetch packets that have arrived. */ error = kring->nm_sync(kring, 0); if (error) goto put_out; if (kring->nr_hwcur == kring->nr_hwtail) { if (netmap_verbose) - nm_prerr("how strange, interrupt with no packets on %s", - na->name); + nm_prlim(1, "interrupt with no packets on %s", + kring->name); goto put_out; } /* new packets are kring->rcur to kring->nr_hwtail, and the bkring * had hwcur == bkring->rhead. So advance bkring->rhead to kring->nr_hwtail * to push all packets out. */ bkring->rhead = bkring->rcur = kring->nr_hwtail; bkring->nm_sync(bkring, flags); /* mark all buffers as released on this ring */ kring->rhead = kring->rcur = kring->rtail = kring->nr_hwtail; /* another call to actually release the buffers */ error = kring->nm_sync(kring, 0); /* The second rxsync may have further advanced hwtail. If this happens, * return NM_IRQ_RESCHED, otherwise just return NM_IRQ_COMPLETED. */ if (kring->rcur != kring->nr_hwtail) { ret = NM_IRQ_RESCHED; } put_out: nm_kr_put(kring); return error ? error : ret; } /* nm_register callback for bwrap */ int netmap_bwrap_reg(struct netmap_adapter *na, int onoff) { struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; struct netmap_adapter *hwna = bna->hwna; struct netmap_vp_adapter *hostna = &bna->host; int error, i; enum txrx t; ND("%s %s", na->name, onoff ? "on" : "off"); if (onoff) { /* netmap_do_regif has been called on the bwrap na. * We need to pass the information about the * memory allocator down to the hwna before * putting it in netmap mode */ hwna->na_lut = na->na_lut; if (hostna->na_bdg) { /* if the host rings have been attached to switch, * we need to copy the memory allocator information * in the hostna also */ hostna->up.na_lut = na->na_lut; } } /* pass down the pending ring state information */ for_rx_tx(t) { for (i = 0; i < netmap_all_rings(na, t); i++) { NMR(hwna, nm_txrx_swap(t))[i]->nr_pending_mode = NMR(na, t)[i]->nr_pending_mode; } } /* forward the request to the hwna */ error = hwna->nm_register(hwna, onoff); if (error) return error; /* copy up the current ring state information */ for_rx_tx(t) { for (i = 0; i < netmap_all_rings(na, t); i++) { struct netmap_kring *kring = NMR(hwna, nm_txrx_swap(t))[i]; NMR(na, t)[i]->nr_mode = kring->nr_mode; } } /* impersonate a netmap_vp_adapter */ netmap_vp_reg(na, onoff); if (hostna->na_bdg) netmap_vp_reg(&hostna->up, onoff); if (onoff) { u_int i; /* intercept the hwna nm_nofify callback on the hw rings */ for (i = 0; i < hwna->num_rx_rings; i++) { hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify; hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify; } i = hwna->num_rx_rings; /* for safety */ /* save the host ring notify unconditionally */ for (; i < netmap_real_rings(hwna, NR_RX); i++) { hwna->rx_rings[i]->save_notify = hwna->rx_rings[i]->nm_notify; if (hostna->na_bdg) { /* also intercept the host ring notify */ hwna->rx_rings[i]->nm_notify = netmap_bwrap_intr_notify; na->tx_rings[i]->nm_sync = na->nm_txsync; } } if (na->active_fds == 0) na->na_flags |= NAF_NETMAP_ON; } else { u_int i; if (na->active_fds == 0) na->na_flags &= ~NAF_NETMAP_ON; /* reset all notify callbacks (including host ring) */ for (i = 0; i < netmap_all_rings(hwna, NR_RX); i++) { hwna->rx_rings[i]->nm_notify = hwna->rx_rings[i]->save_notify; hwna->rx_rings[i]->save_notify = NULL; } hwna->na_lut.lut = NULL; hwna->na_lut.plut = NULL; hwna->na_lut.objtotal = 0; hwna->na_lut.objsize = 0; /* pass ownership of the netmap rings to the hwna */ for_rx_tx(t) { for (i = 0; i < netmap_all_rings(na, t); i++) { NMR(na, t)[i]->ring = NULL; } } /* reset the number of host rings to default */ for_rx_tx(t) { nma_set_host_nrings(hwna, t, 1); } } return 0; } /* nm_config callback for bwrap */ static int netmap_bwrap_config(struct netmap_adapter *na, struct nm_config_info *info) { struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; struct netmap_adapter *hwna = bna->hwna; int error; /* Forward the request to the hwna. It may happen that nobody * registered hwna yet, so netmap_mem_get_lut() may have not * been called yet. */ error = netmap_mem_get_lut(hwna->nm_mem, &hwna->na_lut); if (error) return error; netmap_update_config(hwna); /* swap the results and propagate */ info->num_tx_rings = hwna->num_rx_rings; info->num_tx_descs = hwna->num_rx_desc; info->num_rx_rings = hwna->num_tx_rings; info->num_rx_descs = hwna->num_tx_desc; info->rx_buf_maxsize = hwna->rx_buf_maxsize; return 0; } /* nm_krings_create callback for bwrap */ int netmap_bwrap_krings_create_common(struct netmap_adapter *na) { struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; struct netmap_adapter *hwna = bna->hwna; struct netmap_adapter *hostna = &bna->host.up; int i, error = 0; enum txrx t; /* also create the hwna krings */ error = hwna->nm_krings_create(hwna); if (error) { return error; } /* increment the usage counter for all the hwna krings */ for_rx_tx(t) { for (i = 0; i < netmap_all_rings(hwna, t); i++) { NMR(hwna, t)[i]->users++; } } /* now create the actual rings */ error = netmap_mem_rings_create(hwna); if (error) { goto err_dec_users; } /* cross-link the netmap rings * The original number of rings comes from hwna, * rx rings on one side equals tx rings on the other. */ for_rx_tx(t) { enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ for (i = 0; i < netmap_all_rings(hwna, r); i++) { NMR(na, t)[i]->nkr_num_slots = NMR(hwna, r)[i]->nkr_num_slots; NMR(na, t)[i]->ring = NMR(hwna, r)[i]->ring; } } if (na->na_flags & NAF_HOST_RINGS) { /* the hostna rings are the host rings of the bwrap. * The corresponding krings must point back to the * hostna */ hostna->tx_rings = &na->tx_rings[na->num_tx_rings]; hostna->rx_rings = &na->rx_rings[na->num_rx_rings]; for_rx_tx(t) { for (i = 0; i < nma_get_nrings(hostna, t); i++) { NMR(hostna, t)[i]->na = hostna; } } } return 0; err_dec_users: for_rx_tx(t) { for (i = 0; i < netmap_all_rings(hwna, t); i++) { NMR(hwna, t)[i]->users--; } } hwna->nm_krings_delete(hwna); return error; } void netmap_bwrap_krings_delete_common(struct netmap_adapter *na) { struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter *)na; struct netmap_adapter *hwna = bna->hwna; enum txrx t; int i; ND("%s", na->name); /* decrement the usage counter for all the hwna krings */ for_rx_tx(t) { for (i = 0; i < netmap_all_rings(hwna, t); i++) { NMR(hwna, t)[i]->users--; } } /* delete any netmap rings that are no longer needed */ netmap_mem_rings_delete(hwna); hwna->nm_krings_delete(hwna); } /* notify method for the bridge-->hwna direction */ int netmap_bwrap_notify(struct netmap_kring *kring, int flags) { struct netmap_adapter *na = kring->na; struct netmap_bwrap_adapter *bna = na->na_private; struct netmap_adapter *hwna = bna->hwna; u_int ring_n = kring->ring_id; u_int lim = kring->nkr_num_slots - 1; struct netmap_kring *hw_kring; int error; ND("%s: na %s hwna %s", (kring ? kring->name : "NULL!"), (na ? na->name : "NULL!"), (hwna ? hwna->name : "NULL!")); hw_kring = hwna->tx_rings[ring_n]; if (nm_kr_tryget(hw_kring, 0, NULL)) { return ENXIO; } /* first step: simulate a user wakeup on the rx ring */ netmap_vp_rxsync(kring, flags); ND("%s[%d] PRE rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", na->name, ring_n, kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, kring->rhead, kring->rcur, kring->rtail, hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); /* second step: the new packets are sent on the tx ring * (which is actually the same ring) */ hw_kring->rhead = hw_kring->rcur = kring->nr_hwtail; error = hw_kring->nm_sync(hw_kring, flags); if (error) goto put_out; /* third step: now we are back the rx ring */ /* claim ownership on all hw owned bufs */ kring->rhead = kring->rcur = nm_next(hw_kring->nr_hwtail, lim); /* skip past reserved slot */ /* fourth step: the user goes to sleep again, causing another rxsync */ netmap_vp_rxsync(kring, flags); ND("%s[%d] PST rx(c%3d t%3d l%3d) ring(h%3d c%3d t%3d) tx(c%3d ht%3d t%3d)", na->name, ring_n, kring->nr_hwcur, kring->nr_hwtail, kring->nkr_hwlease, kring->rhead, kring->rcur, kring->rtail, hw_kring->nr_hwcur, hw_kring->nr_hwtail, hw_kring->rtail); put_out: nm_kr_put(hw_kring); return error ? error : NM_IRQ_COMPLETED; } /* nm_bdg_ctl callback for the bwrap. * Called on bridge-attach and detach, as an effect of vale-ctl -[ahd]. * On attach, it needs to provide a fake netmap_priv_d structure and * perform a netmap_do_regif() on the bwrap. This will put both the * bwrap and the hwna in netmap mode, with the netmap rings shared * and cross linked. Moroever, it will start intercepting interrupts * directed to hwna. */ static int netmap_bwrap_bdg_ctl(struct nmreq_header *hdr, struct netmap_adapter *na) { struct netmap_priv_d *npriv; struct netmap_bwrap_adapter *bna = (struct netmap_bwrap_adapter*)na; int error = 0; if (hdr->nr_reqtype == NETMAP_REQ_VALE_ATTACH) { struct nmreq_vale_attach *req = (struct nmreq_vale_attach *)(uintptr_t)hdr->nr_body; if (req->reg.nr_ringid != 0 || (req->reg.nr_mode != NR_REG_ALL_NIC && req->reg.nr_mode != NR_REG_NIC_SW)) { /* We only support attaching all the NIC rings * and/or the host stack. */ return EINVAL; } if (NETMAP_OWNED_BY_ANY(na)) { return EBUSY; } if (bna->na_kpriv) { /* nothing to do */ return 0; } npriv = netmap_priv_new(); if (npriv == NULL) return ENOMEM; npriv->np_ifp = na->ifp; /* let the priv destructor release the ref */ error = netmap_do_regif(npriv, na, req->reg.nr_mode, req->reg.nr_ringid, req->reg.nr_flags); if (error) { netmap_priv_delete(npriv); return error; } bna->na_kpriv = npriv; na->na_flags |= NAF_BUSY; } else { if (na->active_fds == 0) /* not registered */ return EINVAL; netmap_priv_delete(bna->na_kpriv); bna->na_kpriv = NULL; na->na_flags &= ~NAF_BUSY; } return error; } /* attach a bridge wrapper to the 'real' device */ int netmap_bwrap_attach_common(struct netmap_adapter *na, struct netmap_adapter *hwna) { struct netmap_bwrap_adapter *bna; struct netmap_adapter *hostna = NULL; int error = 0; enum txrx t; /* make sure the NIC is not already in use */ if (NETMAP_OWNED_BY_ANY(hwna)) { nm_prerr("NIC %s busy, cannot attach to bridge", hwna->name); return EBUSY; } bna = (struct netmap_bwrap_adapter *)na; /* make bwrap ifp point to the real ifp */ na->ifp = hwna->ifp; if_ref(na->ifp); na->na_private = bna; /* fill the ring data for the bwrap adapter with rx/tx meanings * swapped. The real cross-linking will be done during register, * when all the krings will have been created. */ for_rx_tx(t) { enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */ nma_set_nrings(na, t, nma_get_nrings(hwna, r)); nma_set_ndesc(na, t, nma_get_ndesc(hwna, r)); } na->nm_dtor = netmap_bwrap_dtor; na->nm_config = netmap_bwrap_config; na->nm_bdg_ctl = netmap_bwrap_bdg_ctl; na->pdev = hwna->pdev; na->nm_mem = netmap_mem_get(hwna->nm_mem); na->virt_hdr_len = hwna->virt_hdr_len; na->rx_buf_maxsize = hwna->rx_buf_maxsize; bna->hwna = hwna; netmap_adapter_get(hwna); hwna->na_private = bna; /* weak reference */ bna->saved_na_vp = hwna->na_vp; hwna->na_vp = &bna->up; bna->up.up.na_vp = &(bna->up); if (hwna->na_flags & NAF_HOST_RINGS) { if (hwna->na_flags & NAF_SW_ONLY) na->na_flags |= NAF_SW_ONLY; na->na_flags |= NAF_HOST_RINGS; hostna = &bna->host.up; /* limit the number of host rings to that of hw */ nm_bound_var(&hostna->num_tx_rings, 1, 1, nma_get_nrings(hwna, NR_TX), NULL); nm_bound_var(&hostna->num_rx_rings, 1, 1, nma_get_nrings(hwna, NR_RX), NULL); snprintf(hostna->name, sizeof(hostna->name), "%s^", na->name); hostna->ifp = hwna->ifp; for_rx_tx(t) { enum txrx r = nm_txrx_swap(t); u_int nr = nma_get_nrings(hostna, t); nma_set_nrings(hostna, t, nr); nma_set_host_nrings(na, t, nr); if (nma_get_host_nrings(hwna, t) < nr) { nma_set_host_nrings(hwna, t, nr); } nma_set_ndesc(hostna, t, nma_get_ndesc(hwna, r)); } // hostna->nm_txsync = netmap_bwrap_host_txsync; // hostna->nm_rxsync = netmap_bwrap_host_rxsync; hostna->nm_mem = netmap_mem_get(na->nm_mem); hostna->na_private = bna; hostna->na_vp = &bna->up; na->na_hostvp = hwna->na_hostvp = hostna->na_hostvp = &bna->host; hostna->na_flags = NAF_BUSY; /* prevent NIOCREGIF */ hostna->rx_buf_maxsize = hwna->rx_buf_maxsize; } if (hwna->na_flags & NAF_MOREFRAG) na->na_flags |= NAF_MOREFRAG; ND("%s<->%s txr %d txd %d rxr %d rxd %d", na->name, ifp->if_xname, na->num_tx_rings, na->num_tx_desc, na->num_rx_rings, na->num_rx_desc); error = netmap_attach_common(na); if (error) { goto err_put; } hwna->na_flags |= NAF_BUSY; return 0; err_put: hwna->na_vp = hwna->na_hostvp = NULL; netmap_adapter_put(hwna); return error; } struct nm_bridge * netmap_init_bridges2(u_int n) { int i; struct nm_bridge *b; b = nm_os_malloc(sizeof(struct nm_bridge) * n); if (b == NULL) return NULL; for (i = 0; i < n; i++) BDG_RWINIT(&b[i]); return b; } void netmap_uninit_bridges2(struct nm_bridge *b, u_int n) { int i; if (b == NULL) return; for (i = 0; i < n; i++) BDG_RWDESTROY(&b[i]); nm_os_free(b); } int netmap_init_bridges(void) { #ifdef CONFIG_NET_NS return netmap_bns_register(); #else nm_bridges = netmap_init_bridges2(NM_BRIDGES); if (nm_bridges == NULL) return ENOMEM; return 0; #endif } void netmap_uninit_bridges(void) { #ifdef CONFIG_NET_NS netmap_bns_unregister(); #else netmap_uninit_bridges2(nm_bridges, NM_BRIDGES); #endif } Index: head/sys/dev/netmap/netmap_freebsd.c =================================================================== --- head/sys/dev/netmap/netmap_freebsd.c (revision 343548) +++ head/sys/dev/netmap/netmap_freebsd.c (revision 343549) @@ -1,1574 +1,1571 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* $FreeBSD$ */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include /* POLLIN, POLLOUT */ #include /* types used in module initialization */ #include /* DEV_MODULE_ORDERED */ #include #include /* kern_ioctl() */ #include #include /* vtophys */ #include /* vtophys */ #include #include #include #include #include #include #include /* sockaddrs */ #include #include /* kthread_add() */ #include /* PROC_LOCK() */ #include /* RFNOWAIT */ #include /* sched_bind() */ #include /* mp_maxid */ #include #include #include /* IFT_ETHER */ #include /* ether_ifdetach */ #include /* LLADDR */ #include /* bus_dmamap_* */ #include /* in6_cksum_pseudo() */ #include /* in_pseudo(), in_cksum_hdr() */ #include #include #include #include /* ======================== FREEBSD-SPECIFIC ROUTINES ================== */ void nm_os_selinfo_init(NM_SELINFO_T *si) { struct mtx *m = &si->m; mtx_init(m, "nm_kn_lock", NULL, MTX_DEF); knlist_init_mtx(&si->si.si_note, m); } void nm_os_selinfo_uninit(NM_SELINFO_T *si) { /* XXX kqueue(9) needed; these will mirror knlist_init. */ knlist_delete(&si->si.si_note, curthread, /*islocked=*/0); knlist_destroy(&si->si.si_note); /* now we don't need the mutex anymore */ mtx_destroy(&si->m); } void * nm_os_malloc(size_t size) { return malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO); } void * nm_os_realloc(void *addr, size_t new_size, size_t old_size __unused) { return realloc(addr, new_size, M_DEVBUF, M_NOWAIT | M_ZERO); } void nm_os_free(void *addr) { free(addr, M_DEVBUF); } void nm_os_ifnet_lock(void) { IFNET_RLOCK(); } void nm_os_ifnet_unlock(void) { IFNET_RUNLOCK(); } static int netmap_use_count = 0; void nm_os_get_module(void) { netmap_use_count++; } void nm_os_put_module(void) { netmap_use_count--; } static void netmap_ifnet_arrival_handler(void *arg __unused, struct ifnet *ifp) { netmap_undo_zombie(ifp); } static void netmap_ifnet_departure_handler(void *arg __unused, struct ifnet *ifp) { netmap_make_zombie(ifp); } static eventhandler_tag nm_ifnet_ah_tag; static eventhandler_tag nm_ifnet_dh_tag; int nm_os_ifnet_init(void) { nm_ifnet_ah_tag = EVENTHANDLER_REGISTER(ifnet_arrival_event, netmap_ifnet_arrival_handler, NULL, EVENTHANDLER_PRI_ANY); nm_ifnet_dh_tag = EVENTHANDLER_REGISTER(ifnet_departure_event, netmap_ifnet_departure_handler, NULL, EVENTHANDLER_PRI_ANY); return 0; } void nm_os_ifnet_fini(void) { EVENTHANDLER_DEREGISTER(ifnet_arrival_event, nm_ifnet_ah_tag); EVENTHANDLER_DEREGISTER(ifnet_departure_event, nm_ifnet_dh_tag); } unsigned nm_os_ifnet_mtu(struct ifnet *ifp) { #if __FreeBSD_version < 1100030 return ifp->if_data.ifi_mtu; #else /* __FreeBSD_version >= 1100030 */ return ifp->if_mtu; #endif } rawsum_t nm_os_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum) { /* TODO XXX please use the FreeBSD implementation for this. */ uint16_t *words = (uint16_t *)data; int nw = len / 2; int i; for (i = 0; i < nw; i++) cur_sum += be16toh(words[i]); if (len & 1) cur_sum += (data[len-1] << 8); return cur_sum; } /* Fold a raw checksum: 'cur_sum' is in host byte order, while the * return value is in network byte order. */ uint16_t nm_os_csum_fold(rawsum_t cur_sum) { /* TODO XXX please use the FreeBSD implementation for this. */ while (cur_sum >> 16) cur_sum = (cur_sum & 0xFFFF) + (cur_sum >> 16); return htobe16((~cur_sum) & 0xFFFF); } uint16_t nm_os_csum_ipv4(struct nm_iphdr *iph) { #if 0 return in_cksum_hdr((void *)iph); #else return nm_os_csum_fold(nm_os_csum_raw((uint8_t*)iph, sizeof(struct nm_iphdr), 0)); #endif } void nm_os_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data, size_t datalen, uint16_t *check) { #ifdef INET uint16_t pseudolen = datalen + iph->protocol; /* Compute and insert the pseudo-header cheksum. */ *check = in_pseudo(iph->saddr, iph->daddr, htobe16(pseudolen)); /* Compute the checksum on TCP/UDP header + payload * (includes the pseudo-header). */ *check = nm_os_csum_fold(nm_os_csum_raw(data, datalen, 0)); #else static int notsupported = 0; if (!notsupported) { notsupported = 1; - D("inet4 segmentation not supported"); + nm_prerr("inet4 segmentation not supported"); } #endif } void nm_os_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data, size_t datalen, uint16_t *check) { #ifdef INET6 *check = in6_cksum_pseudo((void*)ip6h, datalen, ip6h->nexthdr, 0); *check = nm_os_csum_fold(nm_os_csum_raw(data, datalen, 0)); #else static int notsupported = 0; if (!notsupported) { notsupported = 1; - D("inet6 segmentation not supported"); + nm_prerr("inet6 segmentation not supported"); } #endif } /* on FreeBSD we send up one packet at a time */ void * nm_os_send_up(struct ifnet *ifp, struct mbuf *m, struct mbuf *prev) { NA(ifp)->if_input(ifp, m); return NULL; } int nm_os_mbuf_has_csum_offld(struct mbuf *m) { return m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_SCTP | CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6); } int nm_os_mbuf_has_seg_offld(struct mbuf *m) { return m->m_pkthdr.csum_flags & CSUM_TSO; } static void freebsd_generic_rx_handler(struct ifnet *ifp, struct mbuf *m) { int stolen; - if (!NM_NA_VALID(ifp)) { - RD(1, "Warning: got RX packet for invalid emulated adapter"); + if (unlikely(!NM_NA_VALID(ifp))) { + nm_prlim(1, "Warning: RX packet intercepted, but no" + " emulated adapter"); return; } stolen = generic_rx_handler(ifp, m); if (!stolen) { struct netmap_generic_adapter *gna = (struct netmap_generic_adapter *)NA(ifp); gna->save_if_input(ifp, m); } } /* * Intercept the rx routine in the standard device driver. * Second argument is non-zero to intercept, 0 to restore */ int nm_os_catch_rx(struct netmap_generic_adapter *gna, int intercept) { struct netmap_adapter *na = &gna->up.up; struct ifnet *ifp = na->ifp; int ret = 0; nm_os_ifnet_lock(); if (intercept) { if (gna->save_if_input) { - D("cannot intercept again"); - ret = EINVAL; /* already set */ + nm_prerr("RX on %s already intercepted", na->name); + ret = EBUSY; /* already set */ goto out; } gna->save_if_input = ifp->if_input; ifp->if_input = freebsd_generic_rx_handler; } else { - if (!gna->save_if_input){ - D("cannot restore"); + if (!gna->save_if_input) { + nm_prerr("Failed to undo RX intercept on %s", + na->name); ret = EINVAL; /* not saved */ goto out; } ifp->if_input = gna->save_if_input; gna->save_if_input = NULL; } out: nm_os_ifnet_unlock(); return ret; } /* * Intercept the packet steering routine in the tx path, * so that we can decide which queue is used for an mbuf. * Second argument is non-zero to intercept, 0 to restore. * On freebsd we just intercept if_transmit. */ int nm_os_catch_tx(struct netmap_generic_adapter *gna, int intercept) { struct netmap_adapter *na = &gna->up.up; struct ifnet *ifp = netmap_generic_getifp(gna); nm_os_ifnet_lock(); if (intercept) { na->if_transmit = ifp->if_transmit; ifp->if_transmit = netmap_transmit; } else { ifp->if_transmit = na->if_transmit; } nm_os_ifnet_unlock(); return 0; } /* * Transmit routine used by generic_netmap_txsync(). Returns 0 on success * and non-zero on error (which may be packet drops or other errors). * addr and len identify the netmap buffer, m is the (preallocated) * mbuf to use for transmissions. * * We should add a reference to the mbuf so the m_freem() at the end * of the transmission does not consume resources. * * On FreeBSD, and on multiqueue cards, we can force the queue using * if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) * i = m->m_pkthdr.flowid % adapter->num_queues; * else * i = curcpu % adapter->num_queues; * */ int nm_os_generic_xmit_frame(struct nm_os_gen_arg *a) { int ret; u_int len = a->len; struct ifnet *ifp = a->ifp; struct mbuf *m = a->m; #if __FreeBSD_version < 1100000 /* * Old FreeBSD versions. The mbuf has a cluster attached, * we need to copy from the cluster to the netmap buffer. */ if (MBUF_REFCNT(m) != 1) { - D("invalid refcnt %d for %p", MBUF_REFCNT(m), m); + nm_prerr("invalid refcnt %d for %p", MBUF_REFCNT(m), m); panic("in generic_xmit_frame"); } if (m->m_ext.ext_size < len) { - RD(5, "size %d < len %d", m->m_ext.ext_size, len); + nm_prlim(2, "size %d < len %d", m->m_ext.ext_size, len); len = m->m_ext.ext_size; } bcopy(a->addr, m->m_data, len); #else /* __FreeBSD_version >= 1100000 */ /* New FreeBSD versions. Link the external storage to * the netmap buffer, so that no copy is necessary. */ m->m_ext.ext_buf = m->m_data = a->addr; m->m_ext.ext_size = len; #endif /* __FreeBSD_version >= 1100000 */ m->m_len = m->m_pkthdr.len = len; /* mbuf refcnt is not contended, no need to use atomic * (a memory barrier is enough). */ SET_MBUF_REFCNT(m, 2); M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); m->m_pkthdr.flowid = a->ring_nr; m->m_pkthdr.rcvif = ifp; /* used for tx notification */ ret = NA(ifp)->if_transmit(ifp, m); return ret ? -1 : 0; } #if __FreeBSD_version >= 1100005 struct netmap_adapter * netmap_getna(if_t ifp) { return (NA((struct ifnet *)ifp)); } #endif /* __FreeBSD_version >= 1100005 */ /* * The following two functions are empty until we have a generic * way to extract the info from the ifp */ int nm_os_generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx) { return 0; } void nm_os_generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq) { unsigned num_rings = netmap_generic_rings ? netmap_generic_rings : 1; *txq = num_rings; *rxq = num_rings; } void nm_os_generic_set_features(struct netmap_generic_adapter *gna) { gna->rxsg = 1; /* Supported through m_copydata. */ gna->txqdisc = 0; /* Not supported. */ } void nm_os_mitigation_init(struct nm_generic_mit *mit, int idx, struct netmap_adapter *na) { - ND("called"); mit->mit_pending = 0; mit->mit_ring_idx = idx; mit->mit_na = na; } void nm_os_mitigation_start(struct nm_generic_mit *mit) { - ND("called"); } void nm_os_mitigation_restart(struct nm_generic_mit *mit) { - ND("called"); } int nm_os_mitigation_active(struct nm_generic_mit *mit) { - ND("called"); + return 0; } void nm_os_mitigation_cleanup(struct nm_generic_mit *mit) { - ND("called"); } static int nm_vi_dummy(struct ifnet *ifp, u_long cmd, caddr_t addr) { + return EINVAL; } static void nm_vi_start(struct ifnet *ifp) { panic("nm_vi_start() must not be called"); } /* * Index manager of persistent virtual interfaces. * It is used to decide the lowest byte of the MAC address. * We use the same algorithm with management of bridge port index. */ #define NM_VI_MAX 255 static struct { uint8_t index[NM_VI_MAX]; /* XXX just for a reasonable number */ uint8_t active; struct mtx lock; } nm_vi_indices; void nm_os_vi_init_index(void) { int i; for (i = 0; i < NM_VI_MAX; i++) nm_vi_indices.index[i] = i; nm_vi_indices.active = 0; mtx_init(&nm_vi_indices.lock, "nm_vi_indices_lock", NULL, MTX_DEF); } /* return -1 if no index available */ static int nm_vi_get_index(void) { int ret; mtx_lock(&nm_vi_indices.lock); ret = nm_vi_indices.active == NM_VI_MAX ? -1 : nm_vi_indices.index[nm_vi_indices.active++]; mtx_unlock(&nm_vi_indices.lock); return ret; } static void nm_vi_free_index(uint8_t val) { int i, lim; mtx_lock(&nm_vi_indices.lock); lim = nm_vi_indices.active; for (i = 0; i < lim; i++) { if (nm_vi_indices.index[i] == val) { /* swap index[lim-1] and j */ int tmp = nm_vi_indices.index[lim-1]; nm_vi_indices.index[lim-1] = val; nm_vi_indices.index[i] = tmp; nm_vi_indices.active--; break; } } if (lim == nm_vi_indices.active) - D("funny, index %u didn't found", val); + nm_prerr("Index %u not found", val); mtx_unlock(&nm_vi_indices.lock); } #undef NM_VI_MAX /* * Implementation of a netmap-capable virtual interface that * registered to the system. * It is based on if_tap.c and ip_fw_log.c in FreeBSD 9. * * Note: Linux sets refcount to 0 on allocation of net_device, * then increments it on registration to the system. * FreeBSD sets refcount to 1 on if_alloc(), and does not * increment this refcount on if_attach(). */ int nm_os_vi_persist(const char *name, struct ifnet **ret) { struct ifnet *ifp; u_short macaddr_hi; uint32_t macaddr_mid; u_char eaddr[6]; int unit = nm_vi_get_index(); /* just to decide MAC address */ if (unit < 0) return EBUSY; /* * We use the same MAC address generation method with tap * except for the highest octet is 00:be instead of 00:bd */ macaddr_hi = htons(0x00be); /* XXX tap + 1 */ macaddr_mid = (uint32_t) ticks; bcopy(&macaddr_hi, eaddr, sizeof(short)); bcopy(&macaddr_mid, &eaddr[2], sizeof(uint32_t)); eaddr[5] = (uint8_t)unit; ifp = if_alloc(IFT_ETHER); if (ifp == NULL) { - D("if_alloc failed"); + nm_prerr("if_alloc failed"); return ENOMEM; } if_initname(ifp, name, IF_DUNIT_NONE); ifp->if_mtu = 65536; ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_init = (void *)nm_vi_dummy; ifp->if_ioctl = nm_vi_dummy; ifp->if_start = nm_vi_start; ifp->if_mtu = ETHERMTU; IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); ifp->if_capabilities |= IFCAP_LINKSTATE; ifp->if_capenable |= IFCAP_LINKSTATE; ether_ifattach(ifp, eaddr); *ret = ifp; return 0; } /* unregister from the system and drop the final refcount */ void nm_os_vi_detach(struct ifnet *ifp) { nm_vi_free_index(((char *)IF_LLADDR(ifp))[5]); ether_ifdetach(ifp); if_free(ifp); } #ifdef WITH_EXTMEM #include #include struct nm_os_extmem { vm_object_t obj; vm_offset_t kva; vm_offset_t size; uintptr_t scan; }; void nm_os_extmem_delete(struct nm_os_extmem *e) { - D("freeing %zx bytes", (size_t)e->size); + nm_prinf("freeing %zx bytes", (size_t)e->size); vm_map_remove(kernel_map, e->kva, e->kva + e->size); nm_os_free(e); } char * nm_os_extmem_nextpage(struct nm_os_extmem *e) { char *rv = NULL; if (e->scan < e->kva + e->size) { rv = (char *)e->scan; e->scan += PAGE_SIZE; } return rv; } int nm_os_extmem_isequal(struct nm_os_extmem *e1, struct nm_os_extmem *e2) { return (e1->obj == e2->obj); } int nm_os_extmem_nr_pages(struct nm_os_extmem *e) { return e->size >> PAGE_SHIFT; } struct nm_os_extmem * nm_os_extmem_create(unsigned long p, struct nmreq_pools_info *pi, int *perror) { vm_map_t map; vm_map_entry_t entry; vm_object_t obj; vm_prot_t prot; vm_pindex_t index; boolean_t wired; struct nm_os_extmem *e = NULL; int rv, error = 0; e = nm_os_malloc(sizeof(*e)); if (e == NULL) { error = ENOMEM; goto out; } map = &curthread->td_proc->p_vmspace->vm_map; rv = vm_map_lookup(&map, p, VM_PROT_RW, &entry, &obj, &index, &prot, &wired); if (rv != KERN_SUCCESS) { - D("address %lx not found", p); + nm_prerr("address %lx not found", p); goto out_free; } /* check that we are given the whole vm_object ? */ vm_map_lookup_done(map, entry); // XXX can we really use obj after releasing the map lock? e->obj = obj; vm_object_reference(obj); /* wire the memory and add the vm_object to the kernel map, * to make sure that it is not fred even if the processes that * are mmap()ing it all exit */ e->kva = vm_map_min(kernel_map); e->size = obj->size << PAGE_SHIFT; rv = vm_map_find(kernel_map, obj, 0, &e->kva, e->size, 0, VMFS_OPTIMAL_SPACE, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0); if (rv != KERN_SUCCESS) { - D("vm_map_find(%zx) failed", (size_t)e->size); + nm_prerr("vm_map_find(%zx) failed", (size_t)e->size); goto out_rel; } rv = vm_map_wire(kernel_map, e->kva, e->kva + e->size, VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES); if (rv != KERN_SUCCESS) { - D("vm_map_wire failed"); + nm_prerr("vm_map_wire failed"); goto out_rem; } e->scan = e->kva; return e; out_rem: vm_map_remove(kernel_map, e->kva, e->kva + e->size); e->obj = NULL; out_rel: vm_object_deallocate(e->obj); out_free: nm_os_free(e); out: if (perror) *perror = error; return NULL; } #endif /* WITH_EXTMEM */ /* ================== PTNETMAP GUEST SUPPORT ==================== */ #ifdef WITH_PTNETMAP #include #include #include /* bus_dmamap_* */ #include #include #include /* * ptnetmap memory device (memdev) for freebsd guest, * ssed to expose host netmap memory to the guest through a PCI BAR. */ /* * ptnetmap memdev private data structure */ struct ptnetmap_memdev { device_t dev; struct resource *pci_io; struct resource *pci_mem; struct netmap_mem_d *nm_mem; }; static int ptn_memdev_probe(device_t); static int ptn_memdev_attach(device_t); static int ptn_memdev_detach(device_t); static int ptn_memdev_shutdown(device_t); static device_method_t ptn_memdev_methods[] = { DEVMETHOD(device_probe, ptn_memdev_probe), DEVMETHOD(device_attach, ptn_memdev_attach), DEVMETHOD(device_detach, ptn_memdev_detach), DEVMETHOD(device_shutdown, ptn_memdev_shutdown), DEVMETHOD_END }; static driver_t ptn_memdev_driver = { PTNETMAP_MEMDEV_NAME, ptn_memdev_methods, sizeof(struct ptnetmap_memdev), }; /* We use (SI_ORDER_MIDDLE+1) here, see DEV_MODULE_ORDERED() invocation * below. */ static devclass_t ptnetmap_devclass; DRIVER_MODULE_ORDERED(ptn_memdev, pci, ptn_memdev_driver, ptnetmap_devclass, NULL, NULL, SI_ORDER_MIDDLE + 1); /* * Map host netmap memory through PCI-BAR in the guest OS, * returning physical (nm_paddr) and virtual (nm_addr) addresses * of the netmap memory mapped in the guest. */ int nm_os_pt_memdev_iomap(struct ptnetmap_memdev *ptn_dev, vm_paddr_t *nm_paddr, void **nm_addr, uint64_t *mem_size) { int rid; - D("ptn_memdev_driver iomap"); + nm_prinf("ptn_memdev_driver iomap"); rid = PCIR_BAR(PTNETMAP_MEM_PCI_BAR); *mem_size = bus_read_4(ptn_dev->pci_io, PTNET_MDEV_IO_MEMSIZE_HI); *mem_size = bus_read_4(ptn_dev->pci_io, PTNET_MDEV_IO_MEMSIZE_LO) | (*mem_size << 32); /* map memory allocator */ ptn_dev->pci_mem = bus_alloc_resource(ptn_dev->dev, SYS_RES_MEMORY, &rid, 0, ~0, *mem_size, RF_ACTIVE); if (ptn_dev->pci_mem == NULL) { *nm_paddr = 0; *nm_addr = NULL; return ENOMEM; } *nm_paddr = rman_get_start(ptn_dev->pci_mem); *nm_addr = rman_get_virtual(ptn_dev->pci_mem); - D("=== BAR %d start %lx len %lx mem_size %lx ===", + nm_prinf("=== BAR %d start %lx len %lx mem_size %lx ===", PTNETMAP_MEM_PCI_BAR, (unsigned long)(*nm_paddr), (unsigned long)rman_get_size(ptn_dev->pci_mem), (unsigned long)*mem_size); return (0); } uint32_t nm_os_pt_memdev_ioread(struct ptnetmap_memdev *ptn_dev, unsigned int reg) { return bus_read_4(ptn_dev->pci_io, reg); } /* Unmap host netmap memory. */ void nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *ptn_dev) { - D("ptn_memdev_driver iounmap"); + nm_prinf("ptn_memdev_driver iounmap"); if (ptn_dev->pci_mem) { bus_release_resource(ptn_dev->dev, SYS_RES_MEMORY, PCIR_BAR(PTNETMAP_MEM_PCI_BAR), ptn_dev->pci_mem); ptn_dev->pci_mem = NULL; } } /* Device identification routine, return BUS_PROBE_DEFAULT on success, * positive on failure */ static int ptn_memdev_probe(device_t dev) { char desc[256]; if (pci_get_vendor(dev) != PTNETMAP_PCI_VENDOR_ID) return (ENXIO); if (pci_get_device(dev) != PTNETMAP_PCI_DEVICE_ID) return (ENXIO); snprintf(desc, sizeof(desc), "%s PCI adapter", PTNETMAP_MEMDEV_NAME); device_set_desc_copy(dev, desc); return (BUS_PROBE_DEFAULT); } /* Device initialization routine. */ static int ptn_memdev_attach(device_t dev) { struct ptnetmap_memdev *ptn_dev; int rid; uint16_t mem_id; - D("ptn_memdev_driver attach"); - ptn_dev = device_get_softc(dev); ptn_dev->dev = dev; pci_enable_busmaster(dev); rid = PCIR_BAR(PTNETMAP_IO_PCI_BAR); ptn_dev->pci_io = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid, RF_ACTIVE); if (ptn_dev->pci_io == NULL) { device_printf(dev, "cannot map I/O space\n"); return (ENXIO); } mem_id = bus_read_4(ptn_dev->pci_io, PTNET_MDEV_IO_MEMID); /* create guest allocator */ ptn_dev->nm_mem = netmap_mem_pt_guest_attach(ptn_dev, mem_id); if (ptn_dev->nm_mem == NULL) { ptn_memdev_detach(dev); return (ENOMEM); } netmap_mem_get(ptn_dev->nm_mem); - D("ptn_memdev_driver probe OK - host_mem_id: %d", mem_id); + nm_prinf("ptnetmap memdev attached, host memid: %u", mem_id); return (0); } /* Device removal routine. */ static int ptn_memdev_detach(device_t dev) { struct ptnetmap_memdev *ptn_dev; - D("ptn_memdev_driver detach"); ptn_dev = device_get_softc(dev); if (ptn_dev->nm_mem) { + nm_prinf("ptnetmap memdev detached, host memid %u", + netmap_mem_get_id(ptn_dev->nm_mem)); netmap_mem_put(ptn_dev->nm_mem); ptn_dev->nm_mem = NULL; } if (ptn_dev->pci_mem) { bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(PTNETMAP_MEM_PCI_BAR), ptn_dev->pci_mem); ptn_dev->pci_mem = NULL; } if (ptn_dev->pci_io) { bus_release_resource(dev, SYS_RES_IOPORT, PCIR_BAR(PTNETMAP_IO_PCI_BAR), ptn_dev->pci_io); ptn_dev->pci_io = NULL; } return (0); } static int ptn_memdev_shutdown(device_t dev) { - D("ptn_memdev_driver shutdown"); return bus_generic_shutdown(dev); } #endif /* WITH_PTNETMAP */ /* * In order to track whether pages are still mapped, we hook into * the standard cdev_pager and intercept the constructor and * destructor. */ struct netmap_vm_handle_t { struct cdev *dev; struct netmap_priv_d *priv; }; static int netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t foff, struct ucred *cred, u_short *color) { struct netmap_vm_handle_t *vmh = handle; if (netmap_verbose) - D("handle %p size %jd prot %d foff %jd", + nm_prinf("handle %p size %jd prot %d foff %jd", handle, (intmax_t)size, prot, (intmax_t)foff); if (color) *color = 0; dev_ref(vmh->dev); return 0; } static void netmap_dev_pager_dtor(void *handle) { struct netmap_vm_handle_t *vmh = handle; struct cdev *dev = vmh->dev; struct netmap_priv_d *priv = vmh->priv; if (netmap_verbose) - D("handle %p", handle); + nm_prinf("handle %p", handle); netmap_dtor(priv); free(vmh, M_DEVBUF); dev_rel(dev); } static int netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset, int prot, vm_page_t *mres) { struct netmap_vm_handle_t *vmh = object->handle; struct netmap_priv_d *priv = vmh->priv; struct netmap_adapter *na = priv->np_na; vm_paddr_t paddr; vm_page_t page; vm_memattr_t memattr; vm_pindex_t pidx; - ND("object %p offset %jd prot %d mres %p", + nm_prdis("object %p offset %jd prot %d mres %p", object, (intmax_t)offset, prot, mres); memattr = object->memattr; pidx = OFF_TO_IDX(offset); paddr = netmap_mem_ofstophys(na->nm_mem, offset); if (paddr == 0) return VM_PAGER_FAIL; if (((*mres)->flags & PG_FICTITIOUS) != 0) { /* * If the passed in result page is a fake page, update it with * the new physical address. */ page = *mres; vm_page_updatefake(page, paddr, memattr); } else { /* * Replace the passed in reqpage page with our own fake page and * free up the all of the original pages. */ #ifndef VM_OBJECT_WUNLOCK /* FreeBSD < 10.x */ #define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK #define VM_OBJECT_WLOCK VM_OBJECT_LOCK #endif /* VM_OBJECT_WUNLOCK */ VM_OBJECT_WUNLOCK(object); page = vm_page_getfake(paddr, memattr); VM_OBJECT_WLOCK(object); vm_page_lock(*mres); vm_page_free(*mres); vm_page_unlock(*mres); *mres = page; vm_page_insert(page, object, pidx); } page->valid = VM_PAGE_BITS_ALL; return (VM_PAGER_OK); } static struct cdev_pager_ops netmap_cdev_pager_ops = { .cdev_pg_ctor = netmap_dev_pager_ctor, .cdev_pg_dtor = netmap_dev_pager_dtor, .cdev_pg_fault = netmap_dev_pager_fault, }; static int netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff, vm_size_t objsize, vm_object_t *objp, int prot) { int error; struct netmap_vm_handle_t *vmh; struct netmap_priv_d *priv; vm_object_t obj; if (netmap_verbose) - D("cdev %p foff %jd size %jd objp %p prot %d", cdev, + nm_prinf("cdev %p foff %jd size %jd objp %p prot %d", cdev, (intmax_t )*foff, (intmax_t )objsize, objp, prot); vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF, M_NOWAIT | M_ZERO); if (vmh == NULL) return ENOMEM; vmh->dev = cdev; NMG_LOCK(); error = devfs_get_cdevpriv((void**)&priv); if (error) goto err_unlock; if (priv->np_nifp == NULL) { error = EINVAL; goto err_unlock; } vmh->priv = priv; priv->np_refs++; NMG_UNLOCK(); obj = cdev_pager_allocate(vmh, OBJT_DEVICE, &netmap_cdev_pager_ops, objsize, prot, *foff, NULL); if (obj == NULL) { - D("cdev_pager_allocate failed"); + nm_prerr("cdev_pager_allocate failed"); error = EINVAL; goto err_deref; } *objp = obj; return 0; err_deref: NMG_LOCK(); priv->np_refs--; err_unlock: NMG_UNLOCK(); // err: free(vmh, M_DEVBUF); return error; } /* * On FreeBSD the close routine is only called on the last close on * the device (/dev/netmap) so we cannot do anything useful. * To track close() on individual file descriptors we pass netmap_dtor() to * devfs_set_cdevpriv() on open(). The FreeBSD kernel will call the destructor * when the last fd pointing to the device is closed. * * Note that FreeBSD does not even munmap() on close() so we also have * to track mmap() ourselves, and postpone the call to * netmap_dtor() is called when the process has no open fds and no active * memory maps on /dev/netmap, as in linux. */ static int netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td) { if (netmap_verbose) - D("dev %p fflag 0x%x devtype %d td %p", + nm_prinf("dev %p fflag 0x%x devtype %d td %p", dev, fflag, devtype, td); return 0; } static int netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td) { struct netmap_priv_d *priv; int error; (void)dev; (void)oflags; (void)devtype; (void)td; NMG_LOCK(); priv = netmap_priv_new(); if (priv == NULL) { error = ENOMEM; goto out; } error = devfs_set_cdevpriv(priv, netmap_dtor); if (error) { netmap_priv_delete(priv); } out: NMG_UNLOCK(); return error; } /******************** kthread wrapper ****************/ #include u_int nm_os_ncpus(void) { return mp_maxid + 1; } struct nm_kctx_ctx { /* Userspace thread (kthread creator). */ struct thread *user_td; /* worker function and parameter */ nm_kctx_worker_fn_t worker_fn; void *worker_private; struct nm_kctx *nmk; /* integer to manage multiple worker contexts (e.g., RX or TX on ptnetmap) */ long type; }; struct nm_kctx { struct thread *worker; struct mtx worker_lock; struct nm_kctx_ctx worker_ctx; int run; /* used to stop kthread */ int attach_user; /* kthread attached to user_process */ int affinity; }; static void nm_kctx_worker(void *data) { struct nm_kctx *nmk = data; struct nm_kctx_ctx *ctx = &nmk->worker_ctx; if (nmk->affinity >= 0) { thread_lock(curthread); sched_bind(curthread, nmk->affinity); thread_unlock(curthread); } while (nmk->run) { /* * check if the parent process dies * (when kthread is attached to user process) */ if (ctx->user_td) { PROC_LOCK(curproc); thread_suspend_check(0); PROC_UNLOCK(curproc); } else { kthread_suspend_check(); } /* Continuously execute worker process. */ ctx->worker_fn(ctx->worker_private); /* worker body */ } kthread_exit(); } void nm_os_kctx_worker_setaff(struct nm_kctx *nmk, int affinity) { nmk->affinity = affinity; } struct nm_kctx * nm_os_kctx_create(struct nm_kctx_cfg *cfg, void *opaque) { struct nm_kctx *nmk = NULL; nmk = malloc(sizeof(*nmk), M_DEVBUF, M_NOWAIT | M_ZERO); if (!nmk) return NULL; mtx_init(&nmk->worker_lock, "nm_kthread lock", NULL, MTX_DEF); nmk->worker_ctx.worker_fn = cfg->worker_fn; nmk->worker_ctx.worker_private = cfg->worker_private; nmk->worker_ctx.type = cfg->type; nmk->affinity = -1; /* attach kthread to user process (ptnetmap) */ nmk->attach_user = cfg->attach_user; return nmk; } int nm_os_kctx_worker_start(struct nm_kctx *nmk) { struct proc *p = NULL; int error = 0; /* Temporarily disable this function as it is currently broken * and causes kernel crashes. The failure can be triggered by * the "vale_polling_enable_disable" test in ctrl-api-test.c. */ return EOPNOTSUPP; if (nmk->worker) return EBUSY; /* check if we want to attach kthread to user process */ if (nmk->attach_user) { nmk->worker_ctx.user_td = curthread; p = curthread->td_proc; } /* enable kthread main loop */ nmk->run = 1; /* create kthread */ if((error = kthread_add(nm_kctx_worker, nmk, p, &nmk->worker, RFNOWAIT /* to be checked */, 0, "nm-kthread-%ld", nmk->worker_ctx.type))) { goto err; } - D("nm_kthread started td %p", nmk->worker); + nm_prinf("nm_kthread started td %p", nmk->worker); return 0; err: - D("nm_kthread start failed err %d", error); + nm_prerr("nm_kthread start failed err %d", error); nmk->worker = NULL; return error; } void nm_os_kctx_worker_stop(struct nm_kctx *nmk) { if (!nmk->worker) return; /* tell to kthread to exit from main loop */ nmk->run = 0; /* wake up kthread if it sleeps */ kthread_resume(nmk->worker); nmk->worker = NULL; } void nm_os_kctx_destroy(struct nm_kctx *nmk) { if (!nmk) return; if (nmk->worker) nm_os_kctx_worker_stop(nmk); free(nmk, M_DEVBUF); } /******************** kqueue support ****************/ /* * In addition to calling selwakeuppri(), nm_os_selwakeup() also * needs to call KNOTE to wake up kqueue listeners. * We use a non-zero 'hint' argument to inform the netmap_knrw() * function that it is being called from 'nm_os_selwakeup'; this * is necessary because when netmap_knrw() is called by the kevent * subsystem (i.e. kevent_scan()) we also need to call netmap_poll(). * The knote uses a private mutex associated to the 'si' (see struct * selinfo, struct nm_selinfo, and nm_os_selinfo_init). * * The netmap_kqfilter() function registers one or another f_event * depending on read or write mode. A pointer to the struct * 'netmap_priv_d' is stored into kn->kn_hook, so that it can later * be passed to netmap_poll(). We pass NULL as a third argument to * netmap_poll(), so that the latter only runs the txsync/rxsync * (if necessary), and skips the nm_os_selrecord() calls. */ void nm_os_selwakeup(struct nm_selinfo *si) { if (netmap_verbose) nm_prinf("on knote %p", &si->si.si_note); selwakeuppri(&si->si, PI_NET); /* We use a non-zero hint to distinguish this notification call * from the call done in kqueue_scan(), which uses hint=0. */ KNOTE(&si->si.si_note, /*hint=*/0x100, mtx_owned(&si->m) ? KNF_LISTLOCKED : 0); } void nm_os_selrecord(struct thread *td, struct nm_selinfo *si) { selrecord(td, &si->si); } static void netmap_knrdetach(struct knote *kn) { struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook; struct selinfo *si = &priv->np_si[NR_RX]->si; - D("remove selinfo %p", si); + nm_prinf("remove selinfo %p", si); knlist_remove(&si->si_note, kn, /*islocked=*/0); } static void netmap_knwdetach(struct knote *kn) { struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook; struct selinfo *si = &priv->np_si[NR_TX]->si; - D("remove selinfo %p", si); + nm_prinf("remove selinfo %p", si); knlist_remove(&si->si_note, kn, /*islocked=*/0); } /* * Callback triggered by netmap notifications (see netmap_notify()), * and by the application calling kevent(). In the former case we * just return 1 (events ready), since we are not able to do better. * In the latter case we use netmap_poll() to see which events are * ready. */ static int netmap_knrw(struct knote *kn, long hint, int events) { struct netmap_priv_d *priv; int revents; if (hint != 0) { /* Called from netmap_notify(), typically from a * thread different from the one issuing kevent(). * Assume we are ready. */ return 1; } /* Called from kevent(). */ priv = kn->kn_hook; revents = netmap_poll(priv, events, /*thread=*/NULL); return (events & revents) ? 1 : 0; } static int netmap_knread(struct knote *kn, long hint) { return netmap_knrw(kn, hint, POLLIN); } static int netmap_knwrite(struct knote *kn, long hint) { return netmap_knrw(kn, hint, POLLOUT); } static struct filterops netmap_rfiltops = { .f_isfd = 1, .f_detach = netmap_knrdetach, .f_event = netmap_knread, }; static struct filterops netmap_wfiltops = { .f_isfd = 1, .f_detach = netmap_knwdetach, .f_event = netmap_knwrite, }; /* * This is called when a thread invokes kevent() to record * a change in the configuration of the kqueue(). * The 'priv' is the one associated to the open netmap device. */ static int netmap_kqfilter(struct cdev *dev, struct knote *kn) { struct netmap_priv_d *priv; int error; struct netmap_adapter *na; struct nm_selinfo *si; int ev = kn->kn_filter; if (ev != EVFILT_READ && ev != EVFILT_WRITE) { - D("bad filter request %d", ev); + nm_prerr("bad filter request %d", ev); return 1; } error = devfs_get_cdevpriv((void**)&priv); if (error) { - D("device not yet setup"); + nm_prerr("device not yet setup"); return 1; } na = priv->np_na; if (na == NULL) { - D("no netmap adapter for this file descriptor"); + nm_prerr("no netmap adapter for this file descriptor"); return 1; } /* the si is indicated in the priv */ si = priv->np_si[(ev == EVFILT_WRITE) ? NR_TX : NR_RX]; kn->kn_fop = (ev == EVFILT_WRITE) ? &netmap_wfiltops : &netmap_rfiltops; kn->kn_hook = priv; knlist_add(&si->si.si_note, kn, /*islocked=*/0); return 0; } static int freebsd_netmap_poll(struct cdev *cdevi __unused, int events, struct thread *td) { struct netmap_priv_d *priv; if (devfs_get_cdevpriv((void **)&priv)) { return POLLERR; } return netmap_poll(priv, events, td); } static int freebsd_netmap_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data, int ffla __unused, struct thread *td) { int error; struct netmap_priv_d *priv; CURVNET_SET(TD_TO_VNET(td)); error = devfs_get_cdevpriv((void **)&priv); if (error) { /* XXX ENOENT should be impossible, since the priv * is now created in the open */ if (error == ENOENT) error = ENXIO; goto out; } error = netmap_ioctl(priv, cmd, data, td, /*nr_body_is_user=*/1); out: CURVNET_RESTORE(); return error; } void nm_os_onattach(struct ifnet *ifp) { ifp->if_capabilities |= IFCAP_NETMAP; } void nm_os_onenter(struct ifnet *ifp) { struct netmap_adapter *na = NA(ifp); na->if_transmit = ifp->if_transmit; ifp->if_transmit = netmap_transmit; ifp->if_capenable |= IFCAP_NETMAP; } void nm_os_onexit(struct ifnet *ifp) { struct netmap_adapter *na = NA(ifp); ifp->if_transmit = na->if_transmit; ifp->if_capenable &= ~IFCAP_NETMAP; } extern struct cdevsw netmap_cdevsw; /* XXX used in netmap.c, should go elsewhere */ struct cdevsw netmap_cdevsw = { .d_version = D_VERSION, .d_name = "netmap", .d_open = netmap_open, .d_mmap_single = netmap_mmap_single, .d_ioctl = freebsd_netmap_ioctl, .d_poll = freebsd_netmap_poll, .d_kqfilter = netmap_kqfilter, .d_close = netmap_close, }; /*--- end of kqueue support ----*/ /* * Kernel entry point. * * Initialize/finalize the module and return. * * Return 0 on success, errno on failure. */ static int netmap_loader(__unused struct module *module, int event, __unused void *arg) { int error = 0; switch (event) { case MOD_LOAD: error = netmap_init(); break; case MOD_UNLOAD: /* * if some one is still using netmap, * then the module can not be unloaded. */ if (netmap_use_count) { - D("netmap module can not be unloaded - netmap_use_count: %d", + nm_prerr("netmap module can not be unloaded - netmap_use_count: %d", netmap_use_count); error = EBUSY; break; } netmap_fini(); break; default: error = EOPNOTSUPP; break; } return (error); } #ifdef DEV_MODULE_ORDERED /* * The netmap module contains three drivers: (i) the netmap character device * driver; (ii) the ptnetmap memdev PCI device driver, (iii) the ptnet PCI * device driver. The attach() routines of both (ii) and (iii) need the * lock of the global allocator, and such lock is initialized in netmap_init(), * which is part of (i). * Therefore, we make sure that (i) is loaded before (ii) and (iii), using * the 'order' parameter of driver declaration macros. For (i), we specify * SI_ORDER_MIDDLE, while higher orders are used with the DRIVER_MODULE_ORDERED * macros for (ii) and (iii). */ DEV_MODULE_ORDERED(netmap, netmap_loader, NULL, SI_ORDER_MIDDLE); #else /* !DEV_MODULE_ORDERED */ DEV_MODULE(netmap, netmap_loader, NULL); #endif /* DEV_MODULE_ORDERED */ MODULE_DEPEND(netmap, pci, 1, 1, 1); MODULE_VERSION(netmap, 1); /* reduce conditional code */ // linux API, use for the knlist in FreeBSD /* use a private mutex for the knlist */ Index: head/sys/dev/netmap/netmap_kloop.c =================================================================== --- head/sys/dev/netmap/netmap_kloop.c (revision 343548) +++ head/sys/dev/netmap/netmap_kloop.c (revision 343549) @@ -1,962 +1,977 @@ /* * Copyright (C) 2016-2018 Vincenzo Maffione * Copyright (C) 2015 Stefano Garzarella * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * common headers */ #if defined(__FreeBSD__) #include #include #include #include #include #include #include #include #include #define usleep_range(_1, _2) \ pause_sbt("sync-kloop-sleep", SBT_1US * _1, SBT_1US * 1, C_ABSOLUTE) #elif defined(linux) #include #include #include #endif #include #include #include #include /* Support for eventfd-based notifications. */ #if defined(linux) #define SYNC_KLOOP_POLL #endif /* Write kring pointers (hwcur, hwtail) to the CSB. * This routine is coupled with ptnetmap_guest_read_kring_csb(). */ static inline void sync_kloop_kernel_write(struct nm_csb_ktoa __user *ptr, uint32_t hwcur, uint32_t hwtail) { /* Issue a first store-store barrier to make sure writes to the * netmap ring do not overcome updates on ktoa->hwcur and ktoa->hwtail. */ nm_stst_barrier(); /* * The same scheme used in nm_sync_kloop_appl_write() applies here. * We allow the application to read a value of hwcur more recent than the value * of hwtail, since this would anyway result in a consistent view of the * ring state (and hwcur can never wraparound hwtail, since hwcur must be * behind head). * * The following memory barrier scheme is used to make this happen: * * Application Kernel * * STORE(hwcur) LOAD(hwtail) * wmb() <-------------> rmb() * STORE(hwtail) LOAD(hwcur) */ CSB_WRITE(ptr, hwcur, hwcur); nm_stst_barrier(); CSB_WRITE(ptr, hwtail, hwtail); } /* Read kring pointers (head, cur, sync_flags) from the CSB. * This routine is coupled with ptnetmap_guest_write_kring_csb(). */ static inline void sync_kloop_kernel_read(struct nm_csb_atok __user *ptr, struct netmap_ring *shadow_ring, uint32_t num_slots) { /* * We place a memory barrier to make sure that the update of head never * overtakes the update of cur. * (see explanation in sync_kloop_kernel_write). */ CSB_READ(ptr, head, shadow_ring->head); nm_ldld_barrier(); CSB_READ(ptr, cur, shadow_ring->cur); CSB_READ(ptr, sync_flags, shadow_ring->flags); /* Make sure that loads from atok->head and atok->cur are not delayed * after the loads from the netmap ring. */ nm_ldld_barrier(); } /* Enable or disable application --> kernel kicks. */ static inline void csb_ktoa_kick_enable(struct nm_csb_ktoa __user *csb_ktoa, uint32_t val) { CSB_WRITE(csb_ktoa, kern_need_kick, val); } #ifdef SYNC_KLOOP_POLL /* Are application interrupt enabled or disabled? */ static inline uint32_t csb_atok_intr_enabled(struct nm_csb_atok __user *csb_atok) { uint32_t v; CSB_READ(csb_atok, appl_need_kick, v); return v; } #endif /* SYNC_KLOOP_POLL */ static inline void sync_kloop_kring_dump(const char *title, const struct netmap_kring *kring) { nm_prinf("%s, kring %s, hwcur %d, rhead %d, " "rcur %d, rtail %d, hwtail %d", title, kring->name, kring->nr_hwcur, kring->rhead, kring->rcur, kring->rtail, kring->nr_hwtail); } struct sync_kloop_ring_args { struct netmap_kring *kring; struct nm_csb_atok *csb_atok; struct nm_csb_ktoa *csb_ktoa; #ifdef SYNC_KLOOP_POLL struct eventfd_ctx *irq_ctx; #endif /* SYNC_KLOOP_POLL */ }; static void netmap_sync_kloop_tx_ring(const struct sync_kloop_ring_args *a) { struct netmap_kring *kring = a->kring; struct nm_csb_atok *csb_atok = a->csb_atok; struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa; struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */ bool more_txspace = false; uint32_t num_slots; int batch; num_slots = kring->nkr_num_slots; /* Disable application --> kernel notifications. */ csb_ktoa_kick_enable(csb_ktoa, 0); /* Copy the application kring pointers from the CSB */ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); for (;;) { batch = shadow_ring.head - kring->nr_hwcur; if (batch < 0) batch += num_slots; #ifdef PTN_TX_BATCH_LIM if (batch > PTN_TX_BATCH_LIM(num_slots)) { /* If application moves ahead too fast, let's cut the move so * that we don't exceed our batch limit. */ uint32_t head_lim = kring->nr_hwcur + PTN_TX_BATCH_LIM(num_slots); if (head_lim >= num_slots) head_lim -= num_slots; nm_prdis(1, "batch: %d head: %d head_lim: %d", batch, shadow_ring.head, head_lim); shadow_ring.head = head_lim; batch = PTN_TX_BATCH_LIM(num_slots); } #endif /* PTN_TX_BATCH_LIM */ if (nm_kr_txspace(kring) <= (num_slots >> 1)) { shadow_ring.flags |= NAF_FORCE_RECLAIM; } /* Netmap prologue */ shadow_ring.tail = kring->rtail; if (unlikely(nm_txsync_prologue(kring, &shadow_ring) >= num_slots)) { /* Reinit ring and enable notifications. */ netmap_ring_reinit(kring); csb_ktoa_kick_enable(csb_ktoa, 1); break; } if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) { sync_kloop_kring_dump("pre txsync", kring); } if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) { /* Reenable notifications. */ csb_ktoa_kick_enable(csb_ktoa, 1); nm_prerr("txsync() failed"); break; } /* * Finalize * Copy kernel hwcur and hwtail into the CSB for the application sync(), and * do the nm_sync_finalize. */ sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur, kring->nr_hwtail); if (kring->rtail != kring->nr_hwtail) { /* Some more room available in the parent adapter. */ kring->rtail = kring->nr_hwtail; more_txspace = true; } if (unlikely(netmap_debug & NM_DEBUG_TXSYNC)) { sync_kloop_kring_dump("post txsync", kring); } /* Interrupt the application if needed. */ #ifdef SYNC_KLOOP_POLL if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) { /* Disable application kick to avoid sending unnecessary kicks */ eventfd_signal(a->irq_ctx, 1); more_txspace = false; } #endif /* SYNC_KLOOP_POLL */ /* Read CSB to see if there is more work to do. */ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); if (shadow_ring.head == kring->rhead) { /* * No more packets to transmit. We enable notifications and * go to sleep, waiting for a kick from the application when new * new slots are ready for transmission. */ /* Reenable notifications. */ csb_ktoa_kick_enable(csb_ktoa, 1); /* Double check, with store-load memory barrier. */ nm_stld_barrier(); sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); if (shadow_ring.head != kring->rhead) { /* We won the race condition, there are more packets to * transmit. Disable notifications and do another cycle */ csb_ktoa_kick_enable(csb_ktoa, 0); continue; } break; } if (nm_kr_txempty(kring)) { /* No more available TX slots. We stop waiting for a notification * from the backend (netmap_tx_irq). */ nm_prdis(1, "TX ring"); break; } } #ifdef SYNC_KLOOP_POLL if (a->irq_ctx && more_txspace && csb_atok_intr_enabled(csb_atok)) { eventfd_signal(a->irq_ctx, 1); } #endif /* SYNC_KLOOP_POLL */ } /* RX cycle without receive any packets */ #define SYNC_LOOP_RX_DRY_CYCLES_MAX 2 static inline int sync_kloop_norxslots(struct netmap_kring *kring, uint32_t g_head) { return (NM_ACCESS_ONCE(kring->nr_hwtail) == nm_prev(g_head, kring->nkr_num_slots - 1)); } static void netmap_sync_kloop_rx_ring(const struct sync_kloop_ring_args *a) { struct netmap_kring *kring = a->kring; struct nm_csb_atok *csb_atok = a->csb_atok; struct nm_csb_ktoa *csb_ktoa = a->csb_ktoa; struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */ int dry_cycles = 0; bool some_recvd = false; uint32_t num_slots; num_slots = kring->nkr_num_slots; /* Get RX csb_atok and csb_ktoa pointers from the CSB. */ num_slots = kring->nkr_num_slots; /* Disable notifications. */ csb_ktoa_kick_enable(csb_ktoa, 0); /* Copy the application kring pointers from the CSB */ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); for (;;) { uint32_t hwtail; /* Netmap prologue */ shadow_ring.tail = kring->rtail; if (unlikely(nm_rxsync_prologue(kring, &shadow_ring) >= num_slots)) { /* Reinit ring and enable notifications. */ netmap_ring_reinit(kring); csb_ktoa_kick_enable(csb_ktoa, 1); break; } if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) { sync_kloop_kring_dump("pre rxsync", kring); } if (unlikely(kring->nm_sync(kring, shadow_ring.flags))) { /* Reenable notifications. */ csb_ktoa_kick_enable(csb_ktoa, 1); nm_prerr("rxsync() failed"); break; } /* * Finalize * Copy kernel hwcur and hwtail into the CSB for the application sync() */ hwtail = NM_ACCESS_ONCE(kring->nr_hwtail); sync_kloop_kernel_write(csb_ktoa, kring->nr_hwcur, hwtail); if (kring->rtail != hwtail) { kring->rtail = hwtail; some_recvd = true; dry_cycles = 0; } else { dry_cycles++; } if (unlikely(netmap_debug & NM_DEBUG_RXSYNC)) { sync_kloop_kring_dump("post rxsync", kring); } #ifdef SYNC_KLOOP_POLL /* Interrupt the application if needed. */ if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) { /* Disable application kick to avoid sending unnecessary kicks */ eventfd_signal(a->irq_ctx, 1); some_recvd = false; } #endif /* SYNC_KLOOP_POLL */ /* Read CSB to see if there is more work to do. */ sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); if (sync_kloop_norxslots(kring, shadow_ring.head)) { /* * No more slots available for reception. We enable notification and * go to sleep, waiting for a kick from the application when new receive * slots are available. */ /* Reenable notifications. */ csb_ktoa_kick_enable(csb_ktoa, 1); /* Double check, with store-load memory barrier. */ nm_stld_barrier(); sync_kloop_kernel_read(csb_atok, &shadow_ring, num_slots); if (!sync_kloop_norxslots(kring, shadow_ring.head)) { /* We won the race condition, more slots are available. Disable * notifications and do another cycle. */ csb_ktoa_kick_enable(csb_ktoa, 0); continue; } break; } hwtail = NM_ACCESS_ONCE(kring->nr_hwtail); if (unlikely(hwtail == kring->rhead || dry_cycles >= SYNC_LOOP_RX_DRY_CYCLES_MAX)) { /* No more packets to be read from the backend. We stop and * wait for a notification from the backend (netmap_rx_irq). */ nm_prdis(1, "nr_hwtail: %d rhead: %d dry_cycles: %d", hwtail, kring->rhead, dry_cycles); break; } } nm_kr_put(kring); #ifdef SYNC_KLOOP_POLL /* Interrupt the application if needed. */ if (a->irq_ctx && some_recvd && csb_atok_intr_enabled(csb_atok)) { eventfd_signal(a->irq_ctx, 1); } #endif /* SYNC_KLOOP_POLL */ } #ifdef SYNC_KLOOP_POLL struct sync_kloop_poll_entry { /* Support for receiving notifications from * a netmap ring or from the application. */ struct file *filp; wait_queue_t wait; wait_queue_head_t *wqh; /* Support for sending notifications to the application. */ struct eventfd_ctx *irq_ctx; struct file *irq_filp; }; struct sync_kloop_poll_ctx { poll_table wait_table; unsigned int next_entry; unsigned int num_entries; struct sync_kloop_poll_entry entries[0]; }; static void sync_kloop_poll_table_queue_proc(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { struct sync_kloop_poll_ctx *poll_ctx = container_of(pt, struct sync_kloop_poll_ctx, wait_table); struct sync_kloop_poll_entry *entry = poll_ctx->entries + poll_ctx->next_entry; BUG_ON(poll_ctx->next_entry >= poll_ctx->num_entries); entry->wqh = wqh; entry->filp = file; /* Use the default wake up function. */ init_waitqueue_entry(&entry->wait, current); add_wait_queue(wqh, &entry->wait); poll_ctx->next_entry++; } #endif /* SYNC_KLOOP_POLL */ int netmap_sync_kloop(struct netmap_priv_d *priv, struct nmreq_header *hdr) { struct nmreq_sync_kloop_start *req = (struct nmreq_sync_kloop_start *)(uintptr_t)hdr->nr_body; struct nmreq_opt_sync_kloop_eventfds *eventfds_opt = NULL; #ifdef SYNC_KLOOP_POLL struct sync_kloop_poll_ctx *poll_ctx = NULL; #endif /* SYNC_KLOOP_POLL */ int num_rx_rings, num_tx_rings, num_rings; struct sync_kloop_ring_args *args = NULL; uint32_t sleep_us = req->sleep_us; struct nm_csb_atok* csb_atok_base; struct nm_csb_ktoa* csb_ktoa_base; struct netmap_adapter *na; struct nmreq_option *opt; int err = 0; int i; if (sleep_us > 1000000) { /* We do not accept sleeping for more than a second. */ return EINVAL; } if (priv->np_nifp == NULL) { return ENXIO; } mb(); /* make sure following reads are not from cache */ na = priv->np_na; if (!nm_netmap_on(na)) { return ENXIO; } NMG_LOCK(); /* Make sure the application is working in CSB mode. */ if (!priv->np_csb_atok_base || !priv->np_csb_ktoa_base) { NMG_UNLOCK(); nm_prerr("sync-kloop on %s requires " "NETMAP_REQ_OPT_CSB option", na->name); return EINVAL; } csb_atok_base = priv->np_csb_atok_base; csb_ktoa_base = priv->np_csb_ktoa_base; /* Make sure that no kloop is currently running. */ if (priv->np_kloop_state & NM_SYNC_KLOOP_RUNNING) { err = EBUSY; } priv->np_kloop_state |= NM_SYNC_KLOOP_RUNNING; NMG_UNLOCK(); if (err) { return err; } num_rx_rings = priv->np_qlast[NR_RX] - priv->np_qfirst[NR_RX]; num_tx_rings = priv->np_qlast[NR_TX] - priv->np_qfirst[NR_TX]; num_rings = num_tx_rings + num_rx_rings; args = nm_os_malloc(num_rings * sizeof(args[0])); if (!args) { err = ENOMEM; goto out; } /* Validate notification options. */ opt = nmreq_findoption((struct nmreq_option *)(uintptr_t)hdr->nr_options, NETMAP_REQ_OPT_SYNC_KLOOP_EVENTFDS); if (opt != NULL) { err = nmreq_checkduplicate(opt); if (err) { opt->nro_status = err; goto out; } if (opt->nro_size != sizeof(*eventfds_opt) + sizeof(eventfds_opt->eventfds[0]) * num_rings) { /* Option size not consistent with the number of * entries. */ opt->nro_status = err = EINVAL; goto out; } #ifdef SYNC_KLOOP_POLL eventfds_opt = (struct nmreq_opt_sync_kloop_eventfds *)opt; opt->nro_status = 0; /* We need 2 poll entries for TX and RX notifications coming * from the netmap adapter, plus one entries per ring for the * notifications coming from the application. */ poll_ctx = nm_os_malloc(sizeof(*poll_ctx) + (2 + num_rings) * sizeof(poll_ctx->entries[0])); init_poll_funcptr(&poll_ctx->wait_table, sync_kloop_poll_table_queue_proc); poll_ctx->num_entries = 2 + num_rings; poll_ctx->next_entry = 0; /* Poll for notifications coming from the applications through * eventfds . */ for (i = 0; i < num_rings; i++) { struct eventfd_ctx *irq; struct file *filp; unsigned long mask; filp = eventfd_fget(eventfds_opt->eventfds[i].ioeventfd); if (IS_ERR(filp)) { err = PTR_ERR(filp); goto out; } mask = filp->f_op->poll(filp, &poll_ctx->wait_table); if (mask & POLLERR) { err = EINVAL; goto out; } filp = eventfd_fget(eventfds_opt->eventfds[i].irqfd); if (IS_ERR(filp)) { err = PTR_ERR(filp); goto out; } poll_ctx->entries[i].irq_filp = filp; irq = eventfd_ctx_fileget(filp); if (IS_ERR(irq)) { err = PTR_ERR(irq); goto out; } poll_ctx->entries[i].irq_ctx = irq; } /* Poll for notifications coming from the netmap rings bound to * this file descriptor. */ { - NM_SELINFO_T *si[NR_TXRX]; - NMG_LOCK(); - si[NR_RX] = nm_si_user(priv, NR_RX) ? &na->si[NR_RX] : - &na->rx_rings[priv->np_qfirst[NR_RX]]->si; - si[NR_TX] = nm_si_user(priv, NR_TX) ? &na->si[NR_TX] : - &na->tx_rings[priv->np_qfirst[NR_TX]]->si; + poll_wait(priv->np_filp, priv->np_si[NR_TX], + &poll_ctx->wait_table); + poll_wait(priv->np_filp, priv->np_si[NR_RX], + &poll_ctx->wait_table); NMG_UNLOCK(); - poll_wait(priv->np_filp, si[NR_TX], &poll_ctx->wait_table); - poll_wait(priv->np_filp, si[NR_RX], &poll_ctx->wait_table); } #else /* SYNC_KLOOP_POLL */ opt->nro_status = EOPNOTSUPP; goto out; #endif /* SYNC_KLOOP_POLL */ } /* Prepare the arguments for netmap_sync_kloop_tx_ring() * and netmap_sync_kloop_rx_ring(). */ for (i = 0; i < num_tx_rings; i++) { struct sync_kloop_ring_args *a = args + i; a->kring = NMR(na, NR_TX)[i + priv->np_qfirst[NR_TX]]; a->csb_atok = csb_atok_base + i; a->csb_ktoa = csb_ktoa_base + i; #ifdef SYNC_KLOOP_POLL if (poll_ctx) a->irq_ctx = poll_ctx->entries[i].irq_ctx; #endif /* SYNC_KLOOP_POLL */ } for (i = 0; i < num_rx_rings; i++) { struct sync_kloop_ring_args *a = args + num_tx_rings + i; a->kring = NMR(na, NR_RX)[i + priv->np_qfirst[NR_RX]]; a->csb_atok = csb_atok_base + num_tx_rings + i; a->csb_ktoa = csb_ktoa_base + num_tx_rings + i; #ifdef SYNC_KLOOP_POLL if (poll_ctx) a->irq_ctx = poll_ctx->entries[num_tx_rings + i].irq_ctx; #endif /* SYNC_KLOOP_POLL */ } /* Main loop. */ for (;;) { if (unlikely(NM_ACCESS_ONCE(priv->np_kloop_state) & NM_SYNC_KLOOP_STOPPING)) { break; } #ifdef SYNC_KLOOP_POLL if (poll_ctx) { /* It is important to set the task state as * interruptible before processing any TX/RX ring, * so that if a notification on ring Y comes after * we have processed ring Y, but before we call * schedule(), we don't miss it. This is true because * the wake up function will change the the task state, * and therefore the schedule_timeout() call below * will observe the change). */ set_current_state(TASK_INTERRUPTIBLE); } #endif /* SYNC_KLOOP_POLL */ /* Process all the TX rings bound to this file descriptor. */ for (i = 0; i < num_tx_rings; i++) { struct sync_kloop_ring_args *a = args + i; if (unlikely(nm_kr_tryget(a->kring, 1, NULL))) { continue; } netmap_sync_kloop_tx_ring(a); nm_kr_put(a->kring); } /* Process all the RX rings bound to this file descriptor. */ for (i = 0; i < num_rx_rings; i++) { struct sync_kloop_ring_args *a = args + num_tx_rings + i; if (unlikely(nm_kr_tryget(a->kring, 1, NULL))) { continue; } netmap_sync_kloop_rx_ring(a); nm_kr_put(a->kring); } #ifdef SYNC_KLOOP_POLL if (poll_ctx) { /* If a poll context is present, yield to the scheduler * waiting for a notification to come either from * netmap or the application. */ - schedule_timeout(msecs_to_jiffies(20000)); + schedule_timeout(msecs_to_jiffies(3000)); } else #endif /* SYNC_KLOOP_POLL */ { /* Default synchronization method: sleep for a while. */ usleep_range(sleep_us, sleep_us); } } out: #ifdef SYNC_KLOOP_POLL if (poll_ctx) { /* Stop polling from netmap and the eventfds, and deallocate * the poll context. */ __set_current_state(TASK_RUNNING); for (i = 0; i < poll_ctx->next_entry; i++) { struct sync_kloop_poll_entry *entry = poll_ctx->entries + i; if (entry->wqh) remove_wait_queue(entry->wqh, &entry->wait); /* We did not get a reference to the eventfds, but * don't do that on netmap file descriptors (since * a reference was not taken. */ if (entry->filp && entry->filp != priv->np_filp) fput(entry->filp); if (entry->irq_ctx) eventfd_ctx_put(entry->irq_ctx); if (entry->irq_filp) fput(entry->irq_filp); } nm_os_free(poll_ctx); poll_ctx = NULL; } #endif /* SYNC_KLOOP_POLL */ if (args) { nm_os_free(args); args = NULL; } /* Reset the kloop state. */ NMG_LOCK(); priv->np_kloop_state = 0; NMG_UNLOCK(); return err; } int netmap_sync_kloop_stop(struct netmap_priv_d *priv) { + struct netmap_adapter *na; bool running = true; int err = 0; + if (priv->np_nifp == NULL) { + return ENXIO; + } + mb(); /* make sure following reads are not from cache */ + + na = priv->np_na; + if (!nm_netmap_on(na)) { + return ENXIO; + } + + /* Set the kloop stopping flag. */ NMG_LOCK(); priv->np_kloop_state |= NM_SYNC_KLOOP_STOPPING; NMG_UNLOCK(); + + /* Send a notification to the kloop, in case it is blocked in + * schedule_timeout(). We can use either RX or TX, because the + * kloop is waiting on both. */ + nm_os_selwakeup(priv->np_si[NR_RX]); + + /* Wait for the kloop to actually terminate. */ while (running) { usleep_range(1000, 1500); NMG_LOCK(); running = (NM_ACCESS_ONCE(priv->np_kloop_state) & NM_SYNC_KLOOP_RUNNING); NMG_UNLOCK(); } return err; } #ifdef WITH_PTNETMAP /* * Guest ptnetmap txsync()/rxsync() routines, used in ptnet device drivers. * These routines are reused across the different operating systems supported * by netmap. */ /* * Reconcile host and guest views of the transmit ring. * * Guest user wants to transmit packets up to the one before ring->head, * and guest kernel knows tx_ring->hwcur is the first packet unsent * by the host kernel. * * We push out as many packets as possible, and possibly * reclaim buffers from previously completed transmission. * * Notifications from the host are enabled only if the user guest would * block (no space in the ring). */ bool netmap_pt_guest_txsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, struct netmap_kring *kring, int flags) { bool notify = false; /* Disable notifications */ atok->appl_need_kick = 0; /* * First part: tell the host (updating the CSB) to process the new * packets. */ kring->nr_hwcur = ktoa->hwcur; nm_sync_kloop_appl_write(atok, kring->rcur, kring->rhead); /* Ask for a kick from a guest to the host if needed. */ if (((kring->rhead != kring->nr_hwcur || nm_kr_txempty(kring)) && NM_ACCESS_ONCE(ktoa->kern_need_kick)) || (flags & NAF_FORCE_RECLAIM)) { atok->sync_flags = flags; notify = true; } /* * Second part: reclaim buffers for completed transmissions. */ if (nm_kr_txempty(kring) || (flags & NAF_FORCE_RECLAIM)) { nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, &kring->nr_hwcur); } /* * No more room in the ring for new transmissions. The user thread will * go to sleep and we need to be notified by the host when more free * space is available. */ if (nm_kr_txempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) { /* Reenable notifications. */ atok->appl_need_kick = 1; /* Double check, with store-load memory barrier. */ nm_stld_barrier(); nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, &kring->nr_hwcur); /* If there is new free space, disable notifications */ if (unlikely(!nm_kr_txempty(kring))) { atok->appl_need_kick = 0; } } nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)", kring->name, atok->head, atok->cur, ktoa->hwtail, kring->rhead, kring->rcur, kring->nr_hwtail); return notify; } /* * Reconcile host and guest view of the receive ring. * * Update hwcur/hwtail from host (reading from CSB). * * If guest user has released buffers up to the one before ring->head, we * also give them to the host. * * Notifications from the host are enabled only if the user guest would * block (no more completed slots in the ring). */ bool netmap_pt_guest_rxsync(struct nm_csb_atok *atok, struct nm_csb_ktoa *ktoa, struct netmap_kring *kring, int flags) { bool notify = false; /* Disable notifications */ atok->appl_need_kick = 0; /* * First part: import newly received packets, by updating the kring * hwtail to the hwtail known from the host (read from the CSB). * This also updates the kring hwcur. */ nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, &kring->nr_hwcur); kring->nr_kflags &= ~NKR_PENDINTR; /* * Second part: tell the host about the slots that guest user has * released, by updating cur and head in the CSB. */ if (kring->rhead != kring->nr_hwcur) { nm_sync_kloop_appl_write(atok, kring->rcur, kring->rhead); /* Ask for a kick from the guest to the host if needed. */ if (NM_ACCESS_ONCE(ktoa->kern_need_kick)) { atok->sync_flags = flags; notify = true; } } /* * No more completed RX slots. The user thread will go to sleep and * we need to be notified by the host when more RX slots have been * completed. */ if (nm_kr_rxempty(kring) && !(kring->nr_kflags & NKR_NOINTR)) { /* Reenable notifications. */ atok->appl_need_kick = 1; /* Double check, with store-load memory barrier. */ nm_stld_barrier(); nm_sync_kloop_appl_read(ktoa, &kring->nr_hwtail, &kring->nr_hwcur); /* If there are new slots, disable notifications. */ if (!nm_kr_rxempty(kring)) { atok->appl_need_kick = 0; } } nm_prdis(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)", kring->name, atok->head, atok->cur, ktoa->hwtail, kring->rhead, kring->rcur, kring->nr_hwtail); return notify; } /* * Callbacks for ptnet drivers: nm_krings_create, nm_krings_delete, nm_dtor. */ int ptnet_nm_krings_create(struct netmap_adapter *na) { struct netmap_pt_guest_adapter *ptna = (struct netmap_pt_guest_adapter *)na; /* Upcast. */ struct netmap_adapter *na_nm = &ptna->hwup.up; struct netmap_adapter *na_dr = &ptna->dr.up; int ret; if (ptna->backend_users) { return 0; } /* Create krings on the public netmap adapter. */ ret = netmap_hw_krings_create(na_nm); if (ret) { return ret; } /* Copy krings into the netmap adapter private to the driver. */ na_dr->tx_rings = na_nm->tx_rings; na_dr->rx_rings = na_nm->rx_rings; return 0; } void ptnet_nm_krings_delete(struct netmap_adapter *na) { struct netmap_pt_guest_adapter *ptna = (struct netmap_pt_guest_adapter *)na; /* Upcast. */ struct netmap_adapter *na_nm = &ptna->hwup.up; struct netmap_adapter *na_dr = &ptna->dr.up; if (ptna->backend_users) { return; } na_dr->tx_rings = NULL; na_dr->rx_rings = NULL; netmap_hw_krings_delete(na_nm); } void ptnet_nm_dtor(struct netmap_adapter *na) { struct netmap_pt_guest_adapter *ptna = (struct netmap_pt_guest_adapter *)na; netmap_mem_put(ptna->dr.up.nm_mem); memset(&ptna->dr, 0, sizeof(ptna->dr)); netmap_mem_pt_guest_ifp_del(na->nm_mem, na->ifp); } int netmap_pt_guest_attach(struct netmap_adapter *arg, unsigned int nifp_offset, unsigned int memid) { struct netmap_pt_guest_adapter *ptna; struct ifnet *ifp = arg ? arg->ifp : NULL; int error; /* get allocator */ arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, memid); if (arg->nm_mem == NULL) return ENOMEM; arg->na_flags |= NAF_MEM_OWNER; error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter), 1); if (error) return error; /* get the netmap_pt_guest_adapter */ ptna = (struct netmap_pt_guest_adapter *) NA(ifp); /* Initialize a separate pass-through netmap adapter that is going to * be used by the ptnet driver only, and so never exposed to netmap * applications. We only need a subset of the available fields. */ memset(&ptna->dr, 0, sizeof(ptna->dr)); ptna->dr.up.ifp = ifp; ptna->dr.up.nm_mem = netmap_mem_get(ptna->hwup.up.nm_mem); ptna->dr.up.nm_config = ptna->hwup.up.nm_config; ptna->backend_users = 0; return 0; } #endif /* WITH_PTNETMAP */