Index: head/sys/dev/netmap/netmap_freebsd.c
===================================================================
--- head/sys/dev/netmap/netmap_freebsd.c	(revision 348021)
+++ head/sys/dev/netmap/netmap_freebsd.c	(revision 348022)
@@ -1,1618 +1,1619 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (C) 2013-2014 Universita` di Pisa. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *   1. Redistributions of source code must retain the above copyright
  *      notice, this list of conditions and the following disclaimer.
  *   2. Redistributions in binary form must reproduce the above copyright
  *      notice, this list of conditions and the following disclaimer in the
  *      documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /* $FreeBSD$ */
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/module.h>
 #include <sys/errno.h>
+#include <sys/eventhandler.h>
 #include <sys/jail.h>
 #include <sys/poll.h>  /* POLLIN, POLLOUT */
 #include <sys/kernel.h> /* types used in module initialization */
 #include <sys/conf.h>	/* DEV_MODULE_ORDERED */
 #include <sys/endian.h>
 #include <sys/syscallsubr.h> /* kern_ioctl() */
 
 #include <sys/rwlock.h>
 
 #include <vm/vm.h>      /* vtophys */
 #include <vm/pmap.h>    /* vtophys */
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/uma.h>
 
 
 #include <sys/malloc.h>
 #include <sys/socket.h> /* sockaddrs */
 #include <sys/selinfo.h>
 #include <sys/kthread.h> /* kthread_add() */
 #include <sys/proc.h> /* PROC_LOCK() */
 #include <sys/unistd.h> /* RFNOWAIT */
 #include <sys/sched.h> /* sched_bind() */
 #include <sys/smp.h> /* mp_maxid */
 #include <sys/taskqueue.h> /* taskqueue_enqueue(), taskqueue_create(), ... */
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h> /* IFT_ETHER */
 #include <net/ethernet.h> /* ether_ifdetach */
 #include <net/if_dl.h> /* LLADDR */
 #include <machine/bus.h>        /* bus_dmamap_* */
 #include <netinet/in.h>		/* in6_cksum_pseudo() */
 #include <machine/in_cksum.h>  /* in_pseudo(), in_cksum_hdr() */
 
 #include <net/netmap.h>
 #include <dev/netmap/netmap_kern.h>
 #include <net/netmap_virt.h>
 #include <dev/netmap/netmap_mem2.h>
 
 
 /* ======================== FREEBSD-SPECIFIC ROUTINES ================== */
 
 static void
 nm_kqueue_notify(void *opaque, int pending)
 {
 	struct nm_selinfo *si = opaque;
 
 	/* We use a non-zero hint to distinguish this notification call
 	 * from the call done in kqueue_scan(), which uses hint=0.
 	 */
 	KNOTE_UNLOCKED(&si->si.si_note, /*hint=*/0x100);
 }
 
 int nm_os_selinfo_init(NM_SELINFO_T *si, const char *name) {
 	int err;
 
 	TASK_INIT(&si->ntfytask, 0, nm_kqueue_notify, si);
 	si->ntfytq = taskqueue_create(name, M_NOWAIT,
 	    taskqueue_thread_enqueue, &si->ntfytq);
 	if (si->ntfytq == NULL)
 		return -ENOMEM;
 	err = taskqueue_start_threads(&si->ntfytq, 1, PI_NET, "tq %s", name);
 	if (err) {
 		taskqueue_free(si->ntfytq);
 		si->ntfytq = NULL;
 		return err;
 	}
 
 	snprintf(si->mtxname, sizeof(si->mtxname), "nmkl%s", name);
 	mtx_init(&si->m, si->mtxname, NULL, MTX_DEF);
 	knlist_init_mtx(&si->si.si_note, &si->m);
 	si->kqueue_users = 0;
 
 	return (0);
 }
 
 void
 nm_os_selinfo_uninit(NM_SELINFO_T *si)
 {
 	if (si->ntfytq == NULL) {
 		return;	/* si was not initialized */
 	}
 	taskqueue_drain(si->ntfytq, &si->ntfytask);
 	taskqueue_free(si->ntfytq);
 	si->ntfytq = NULL;
 	knlist_delete(&si->si.si_note, curthread, /*islocked=*/0);
 	knlist_destroy(&si->si.si_note);
 	/* now we don't need the mutex anymore */
 	mtx_destroy(&si->m);
 }
 
 void *
 nm_os_malloc(size_t size)
 {
 	return malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
 }
 
 void *
 nm_os_realloc(void *addr, size_t new_size, size_t old_size __unused)
 {
 	return realloc(addr, new_size, M_DEVBUF, M_NOWAIT | M_ZERO);
 }
 
 void
 nm_os_free(void *addr)
 {
 	free(addr, M_DEVBUF);
 }
 
 void
 nm_os_ifnet_lock(void)
 {
 	IFNET_RLOCK();
 }
 
 void
 nm_os_ifnet_unlock(void)
 {
 	IFNET_RUNLOCK();
 }
 
 static int netmap_use_count = 0;
 
 void
 nm_os_get_module(void)
 {
 	netmap_use_count++;
 }
 
 void
 nm_os_put_module(void)
 {
 	netmap_use_count--;
 }
 
 static void
 netmap_ifnet_arrival_handler(void *arg __unused, struct ifnet *ifp)
 {
 	netmap_undo_zombie(ifp);
 }
 
 static void
 netmap_ifnet_departure_handler(void *arg __unused, struct ifnet *ifp)
 {
 	netmap_make_zombie(ifp);
 }
 
 static eventhandler_tag nm_ifnet_ah_tag;
 static eventhandler_tag nm_ifnet_dh_tag;
 
 int
 nm_os_ifnet_init(void)
 {
 	nm_ifnet_ah_tag =
 		EVENTHANDLER_REGISTER(ifnet_arrival_event,
 				netmap_ifnet_arrival_handler,
 				NULL, EVENTHANDLER_PRI_ANY);
 	nm_ifnet_dh_tag =
 		EVENTHANDLER_REGISTER(ifnet_departure_event,
 				netmap_ifnet_departure_handler,
 				NULL, EVENTHANDLER_PRI_ANY);
 	return 0;
 }
 
 void
 nm_os_ifnet_fini(void)
 {
 	EVENTHANDLER_DEREGISTER(ifnet_arrival_event,
 			nm_ifnet_ah_tag);
 	EVENTHANDLER_DEREGISTER(ifnet_departure_event,
 			nm_ifnet_dh_tag);
 }
 
 unsigned
 nm_os_ifnet_mtu(struct ifnet *ifp)
 {
 #if __FreeBSD_version < 1100030
 	return ifp->if_data.ifi_mtu;
 #else /* __FreeBSD_version >= 1100030 */
 	return ifp->if_mtu;
 #endif
 }
 
 rawsum_t
 nm_os_csum_raw(uint8_t *data, size_t len, rawsum_t cur_sum)
 {
 	/* TODO XXX please use the FreeBSD implementation for this. */
 	uint16_t *words = (uint16_t *)data;
 	int nw = len / 2;
 	int i;
 
 	for (i = 0; i < nw; i++)
 		cur_sum += be16toh(words[i]);
 
 	if (len & 1)
 		cur_sum += (data[len-1] << 8);
 
 	return cur_sum;
 }
 
 /* Fold a raw checksum: 'cur_sum' is in host byte order, while the
  * return value is in network byte order.
  */
 uint16_t
 nm_os_csum_fold(rawsum_t cur_sum)
 {
 	/* TODO XXX please use the FreeBSD implementation for this. */
 	while (cur_sum >> 16)
 		cur_sum = (cur_sum & 0xFFFF) + (cur_sum >> 16);
 
 	return htobe16((~cur_sum) & 0xFFFF);
 }
 
 uint16_t nm_os_csum_ipv4(struct nm_iphdr *iph)
 {
 #if 0
 	return in_cksum_hdr((void *)iph);
 #else
 	return nm_os_csum_fold(nm_os_csum_raw((uint8_t*)iph, sizeof(struct nm_iphdr), 0));
 #endif
 }
 
 void
 nm_os_csum_tcpudp_ipv4(struct nm_iphdr *iph, void *data,
 					size_t datalen, uint16_t *check)
 {
 #ifdef INET
 	uint16_t pseudolen = datalen + iph->protocol;
 
 	/* Compute and insert the pseudo-header cheksum. */
 	*check = in_pseudo(iph->saddr, iph->daddr,
 				 htobe16(pseudolen));
 	/* Compute the checksum on TCP/UDP header + payload
 	 * (includes the pseudo-header).
 	 */
 	*check = nm_os_csum_fold(nm_os_csum_raw(data, datalen, 0));
 #else
 	static int notsupported = 0;
 	if (!notsupported) {
 		notsupported = 1;
 		nm_prerr("inet4 segmentation not supported");
 	}
 #endif
 }
 
 void
 nm_os_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
 					size_t datalen, uint16_t *check)
 {
 #ifdef INET6
 	*check = in6_cksum_pseudo((void*)ip6h, datalen, ip6h->nexthdr, 0);
 	*check = nm_os_csum_fold(nm_os_csum_raw(data, datalen, 0));
 #else
 	static int notsupported = 0;
 	if (!notsupported) {
 		notsupported = 1;
 		nm_prerr("inet6 segmentation not supported");
 	}
 #endif
 }
 
 /* on FreeBSD we send up one packet at a time */
 void *
 nm_os_send_up(struct ifnet *ifp, struct mbuf *m, struct mbuf *prev)
 {
 	NA(ifp)->if_input(ifp, m);
 	return NULL;
 }
 
 int
 nm_os_mbuf_has_csum_offld(struct mbuf *m)
 {
 	return m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP | CSUM_SCTP |
 					 CSUM_TCP_IPV6 | CSUM_UDP_IPV6 |
 					 CSUM_SCTP_IPV6);
 }
 
 int
 nm_os_mbuf_has_seg_offld(struct mbuf *m)
 {
 	return m->m_pkthdr.csum_flags & CSUM_TSO;
 }
 
 static void
 freebsd_generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
 {
 	int stolen;
 
 	if (unlikely(!NM_NA_VALID(ifp))) {
 		nm_prlim(1, "Warning: RX packet intercepted, but no"
 				" emulated adapter");
 		return;
 	}
 
 	stolen = generic_rx_handler(ifp, m);
 	if (!stolen) {
 		struct netmap_generic_adapter *gna =
 				(struct netmap_generic_adapter *)NA(ifp);
 		gna->save_if_input(ifp, m);
 	}
 }
 
 /*
  * Intercept the rx routine in the standard device driver.
  * Second argument is non-zero to intercept, 0 to restore
  */
 int
 nm_os_catch_rx(struct netmap_generic_adapter *gna, int intercept)
 {
 	struct netmap_adapter *na = &gna->up.up;
 	struct ifnet *ifp = na->ifp;
 	int ret = 0;
 
 	nm_os_ifnet_lock();
 	if (intercept) {
 		if (gna->save_if_input) {
 			nm_prerr("RX on %s already intercepted", na->name);
 			ret = EBUSY; /* already set */
 			goto out;
 		}
 		gna->save_if_input = ifp->if_input;
 		ifp->if_input = freebsd_generic_rx_handler;
 	} else {
 		if (!gna->save_if_input) {
 			nm_prerr("Failed to undo RX intercept on %s",
 				na->name);
 			ret = EINVAL;  /* not saved */
 			goto out;
 		}
 		ifp->if_input = gna->save_if_input;
 		gna->save_if_input = NULL;
 	}
 out:
 	nm_os_ifnet_unlock();
 
 	return ret;
 }
 
 
 /*
  * Intercept the packet steering routine in the tx path,
  * so that we can decide which queue is used for an mbuf.
  * Second argument is non-zero to intercept, 0 to restore.
  * On freebsd we just intercept if_transmit.
  */
 int
 nm_os_catch_tx(struct netmap_generic_adapter *gna, int intercept)
 {
 	struct netmap_adapter *na = &gna->up.up;
 	struct ifnet *ifp = netmap_generic_getifp(gna);
 
 	nm_os_ifnet_lock();
 	if (intercept) {
 		na->if_transmit = ifp->if_transmit;
 		ifp->if_transmit = netmap_transmit;
 	} else {
 		ifp->if_transmit = na->if_transmit;
 	}
 	nm_os_ifnet_unlock();
 
 	return 0;
 }
 
 
 /*
  * Transmit routine used by generic_netmap_txsync(). Returns 0 on success
  * and non-zero on error (which may be packet drops or other errors).
  * addr and len identify the netmap buffer, m is the (preallocated)
  * mbuf to use for transmissions.
  *
  * We should add a reference to the mbuf so the m_freem() at the end
  * of the transmission does not consume resources.
  *
  * On FreeBSD, and on multiqueue cards, we can force the queue using
  *      if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
  *              i = m->m_pkthdr.flowid % adapter->num_queues;
  *      else
  *              i = curcpu % adapter->num_queues;
  *
  */
 int
 nm_os_generic_xmit_frame(struct nm_os_gen_arg *a)
 {
 	int ret;
 	u_int len = a->len;
 	struct ifnet *ifp = a->ifp;
 	struct mbuf *m = a->m;
 
 #if __FreeBSD_version < 1100000
 	/*
 	 * Old FreeBSD versions. The mbuf has a cluster attached,
 	 * we need to copy from the cluster to the netmap buffer.
 	 */
 	if (MBUF_REFCNT(m) != 1) {
 		nm_prerr("invalid refcnt %d for %p", MBUF_REFCNT(m), m);
 		panic("in generic_xmit_frame");
 	}
 	if (m->m_ext.ext_size < len) {
 		nm_prlim(2, "size %d < len %d", m->m_ext.ext_size, len);
 		len = m->m_ext.ext_size;
 	}
 	bcopy(a->addr, m->m_data, len);
 #else  /* __FreeBSD_version >= 1100000 */
 	/* New FreeBSD versions. Link the external storage to
 	 * the netmap buffer, so that no copy is necessary. */
 	m->m_ext.ext_buf = m->m_data = a->addr;
 	m->m_ext.ext_size = len;
 #endif /* __FreeBSD_version >= 1100000 */
 
 	m->m_len = m->m_pkthdr.len = len;
 
 	/* mbuf refcnt is not contended, no need to use atomic
 	 * (a memory barrier is enough). */
 	SET_MBUF_REFCNT(m, 2);
 	M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
 	m->m_pkthdr.flowid = a->ring_nr;
 	m->m_pkthdr.rcvif = ifp; /* used for tx notification */
 	ret = NA(ifp)->if_transmit(ifp, m);
 	return ret ? -1 : 0;
 }
 
 
 #if __FreeBSD_version >= 1100005
 struct netmap_adapter *
 netmap_getna(if_t ifp)
 {
 	return (NA((struct ifnet *)ifp));
 }
 #endif /* __FreeBSD_version >= 1100005 */
 
 /*
  * The following two functions are empty until we have a generic
  * way to extract the info from the ifp
  */
 int
 nm_os_generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx)
 {
 	return 0;
 }
 
 
 void
 nm_os_generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq)
 {
 	unsigned num_rings = netmap_generic_rings ? netmap_generic_rings : 1;
 
 	*txq = num_rings;
 	*rxq = num_rings;
 }
 
 void
 nm_os_generic_set_features(struct netmap_generic_adapter *gna)
 {
 
 	gna->rxsg = 1; /* Supported through m_copydata. */
 	gna->txqdisc = 0; /* Not supported. */
 }
 
 void
 nm_os_mitigation_init(struct nm_generic_mit *mit, int idx, struct netmap_adapter *na)
 {
 	mit->mit_pending = 0;
 	mit->mit_ring_idx = idx;
 	mit->mit_na = na;
 }
 
 
 void
 nm_os_mitigation_start(struct nm_generic_mit *mit)
 {
 }
 
 
 void
 nm_os_mitigation_restart(struct nm_generic_mit *mit)
 {
 }
 
 
 int
 nm_os_mitigation_active(struct nm_generic_mit *mit)
 {
 
 	return 0;
 }
 
 
 void
 nm_os_mitigation_cleanup(struct nm_generic_mit *mit)
 {
 }
 
 static int
 nm_vi_dummy(struct ifnet *ifp, u_long cmd, caddr_t addr)
 {
 
 	return EINVAL;
 }
 
 static void
 nm_vi_start(struct ifnet *ifp)
 {
 	panic("nm_vi_start() must not be called");
 }
 
 /*
  * Index manager of persistent virtual interfaces.
  * It is used to decide the lowest byte of the MAC address.
  * We use the same algorithm with management of bridge port index.
  */
 #define NM_VI_MAX	255
 static struct {
 	uint8_t index[NM_VI_MAX]; /* XXX just for a reasonable number */
 	uint8_t active;
 	struct mtx lock;
 } nm_vi_indices;
 
 void
 nm_os_vi_init_index(void)
 {
 	int i;
 	for (i = 0; i < NM_VI_MAX; i++)
 		nm_vi_indices.index[i] = i;
 	nm_vi_indices.active = 0;
 	mtx_init(&nm_vi_indices.lock, "nm_vi_indices_lock", NULL, MTX_DEF);
 }
 
 /* return -1 if no index available */
 static int
 nm_vi_get_index(void)
 {
 	int ret;
 
 	mtx_lock(&nm_vi_indices.lock);
 	ret = nm_vi_indices.active == NM_VI_MAX ? -1 :
 		nm_vi_indices.index[nm_vi_indices.active++];
 	mtx_unlock(&nm_vi_indices.lock);
 	return ret;
 }
 
 static void
 nm_vi_free_index(uint8_t val)
 {
 	int i, lim;
 
 	mtx_lock(&nm_vi_indices.lock);
 	lim = nm_vi_indices.active;
 	for (i = 0; i < lim; i++) {
 		if (nm_vi_indices.index[i] == val) {
 			/* swap index[lim-1] and j */
 			int tmp = nm_vi_indices.index[lim-1];
 			nm_vi_indices.index[lim-1] = val;
 			nm_vi_indices.index[i] = tmp;
 			nm_vi_indices.active--;
 			break;
 		}
 	}
 	if (lim == nm_vi_indices.active)
 		nm_prerr("Index %u not found", val);
 	mtx_unlock(&nm_vi_indices.lock);
 }
 #undef NM_VI_MAX
 
 /*
  * Implementation of a netmap-capable virtual interface that
  * registered to the system.
  * It is based on if_tap.c and ip_fw_log.c in FreeBSD 9.
  *
  * Note: Linux sets refcount to 0 on allocation of net_device,
  * then increments it on registration to the system.
  * FreeBSD sets refcount to 1 on if_alloc(), and does not
  * increment this refcount on if_attach().
  */
 int
 nm_os_vi_persist(const char *name, struct ifnet **ret)
 {
 	struct ifnet *ifp;
 	u_short macaddr_hi;
 	uint32_t macaddr_mid;
 	u_char eaddr[6];
 	int unit = nm_vi_get_index(); /* just to decide MAC address */
 
 	if (unit < 0)
 		return EBUSY;
 	/*
 	 * We use the same MAC address generation method with tap
 	 * except for the highest octet is 00:be instead of 00:bd
 	 */
 	macaddr_hi = htons(0x00be); /* XXX tap + 1 */
 	macaddr_mid = (uint32_t) ticks;
 	bcopy(&macaddr_hi, eaddr, sizeof(short));
 	bcopy(&macaddr_mid, &eaddr[2], sizeof(uint32_t));
 	eaddr[5] = (uint8_t)unit;
 
 	ifp = if_alloc(IFT_ETHER);
 	if (ifp == NULL) {
 		nm_prerr("if_alloc failed");
 		return ENOMEM;
 	}
 	if_initname(ifp, name, IF_DUNIT_NONE);
 	ifp->if_mtu = 65536;
 	ifp->if_flags = IFF_UP | IFF_SIMPLEX | IFF_MULTICAST;
 	ifp->if_init = (void *)nm_vi_dummy;
 	ifp->if_ioctl = nm_vi_dummy;
 	ifp->if_start = nm_vi_start;
 	ifp->if_mtu = ETHERMTU;
 	IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
 	ifp->if_capabilities |= IFCAP_LINKSTATE;
 	ifp->if_capenable |= IFCAP_LINKSTATE;
 
 	ether_ifattach(ifp, eaddr);
 	*ret = ifp;
 	return 0;
 }
 
 /* unregister from the system and drop the final refcount */
 void
 nm_os_vi_detach(struct ifnet *ifp)
 {
 	nm_vi_free_index(((char *)IF_LLADDR(ifp))[5]);
 	ether_ifdetach(ifp);
 	if_free(ifp);
 }
 
 #ifdef WITH_EXTMEM
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
 struct nm_os_extmem {
 	vm_object_t obj;
 	vm_offset_t kva;
 	vm_offset_t size;
 	uintptr_t scan;
 };
 
 void
 nm_os_extmem_delete(struct nm_os_extmem *e)
 {
 	nm_prinf("freeing %zx bytes", (size_t)e->size);
 	vm_map_remove(kernel_map, e->kva, e->kva + e->size);
 	nm_os_free(e);
 }
 
 char *
 nm_os_extmem_nextpage(struct nm_os_extmem *e)
 {
 	char *rv = NULL;
 	if (e->scan < e->kva + e->size) {
 		rv = (char *)e->scan;
 		e->scan += PAGE_SIZE;
 	}
 	return rv;
 }
 
 int
 nm_os_extmem_isequal(struct nm_os_extmem *e1, struct nm_os_extmem *e2)
 {
 	return (e1->obj == e2->obj);
 }
 
 int
 nm_os_extmem_nr_pages(struct nm_os_extmem *e)
 {
 	return e->size >> PAGE_SHIFT;
 }
 
 struct nm_os_extmem *
 nm_os_extmem_create(unsigned long p, struct nmreq_pools_info *pi, int *perror)
 {
 	vm_map_t map;
 	vm_map_entry_t entry;
 	vm_object_t obj;
 	vm_prot_t prot;
 	vm_pindex_t index;
 	boolean_t wired;
 	struct nm_os_extmem *e = NULL;
 	int rv, error = 0;
 
 	e = nm_os_malloc(sizeof(*e));
 	if (e == NULL) {
 		error = ENOMEM;
 		goto out;
 	}
 
 	map = &curthread->td_proc->p_vmspace->vm_map;
 	rv = vm_map_lookup(&map, p, VM_PROT_RW, &entry,
 			&obj, &index, &prot, &wired);
 	if (rv != KERN_SUCCESS) {
 		nm_prerr("address %lx not found", p);
 		goto out_free;
 	}
 	/* check that we are given the whole vm_object ? */
 	vm_map_lookup_done(map, entry);
 
 	// XXX can we really use obj after releasing the map lock?
 	e->obj = obj;
 	vm_object_reference(obj);
 	/* wire the memory and add the vm_object to the kernel map,
 	 * to make sure that it is not fred even if the processes that
 	 * are mmap()ing it all exit
 	 */
 	e->kva = vm_map_min(kernel_map);
 	e->size = obj->size << PAGE_SHIFT;
 	rv = vm_map_find(kernel_map, obj, 0, &e->kva, e->size, 0,
 			VMFS_OPTIMAL_SPACE, VM_PROT_READ | VM_PROT_WRITE,
 			VM_PROT_READ | VM_PROT_WRITE, 0);
 	if (rv != KERN_SUCCESS) {
 		nm_prerr("vm_map_find(%zx) failed", (size_t)e->size);
 		goto out_rel;
 	}
 	rv = vm_map_wire(kernel_map, e->kva, e->kva + e->size,
 			VM_MAP_WIRE_SYSTEM | VM_MAP_WIRE_NOHOLES);
 	if (rv != KERN_SUCCESS) {
 		nm_prerr("vm_map_wire failed");
 		goto out_rem;
 	}
 
 	e->scan = e->kva;
 
 	return e;
 
 out_rem:
 	vm_map_remove(kernel_map, e->kva, e->kva + e->size);
 	e->obj = NULL;
 out_rel:
 	vm_object_deallocate(e->obj);
 out_free:
 	nm_os_free(e);
 out:
 	if (perror)
 		*perror = error;
 	return NULL;
 }
 #endif /* WITH_EXTMEM */
 
 /* ================== PTNETMAP GUEST SUPPORT ==================== */
 
 #ifdef WITH_PTNETMAP
 #include <sys/bus.h>
 #include <sys/rman.h>
 #include <machine/bus.h>        /* bus_dmamap_* */
 #include <machine/resource.h>
 #include <dev/pci/pcivar.h>
 #include <dev/pci/pcireg.h>
 /*
  * ptnetmap memory device (memdev) for freebsd guest,
  * ssed to expose host netmap memory to the guest through a PCI BAR.
  */
 
 /*
  * ptnetmap memdev private data structure
  */
 struct ptnetmap_memdev {
 	device_t dev;
 	struct resource *pci_io;
 	struct resource *pci_mem;
 	struct netmap_mem_d *nm_mem;
 };
 
 static int	ptn_memdev_probe(device_t);
 static int	ptn_memdev_attach(device_t);
 static int	ptn_memdev_detach(device_t);
 static int	ptn_memdev_shutdown(device_t);
 
 static device_method_t ptn_memdev_methods[] = {
 	DEVMETHOD(device_probe, ptn_memdev_probe),
 	DEVMETHOD(device_attach, ptn_memdev_attach),
 	DEVMETHOD(device_detach, ptn_memdev_detach),
 	DEVMETHOD(device_shutdown, ptn_memdev_shutdown),
 	DEVMETHOD_END
 };
 
 static driver_t ptn_memdev_driver = {
 	PTNETMAP_MEMDEV_NAME,
 	ptn_memdev_methods,
 	sizeof(struct ptnetmap_memdev),
 };
 
 /* We use (SI_ORDER_MIDDLE+1) here, see DEV_MODULE_ORDERED() invocation
  * below. */
 static devclass_t ptnetmap_devclass;
 DRIVER_MODULE_ORDERED(ptn_memdev, pci, ptn_memdev_driver, ptnetmap_devclass,
 		      NULL, NULL, SI_ORDER_MIDDLE + 1);
 
 /*
  * Map host netmap memory through PCI-BAR in the guest OS,
  * returning physical (nm_paddr) and virtual (nm_addr) addresses
  * of the netmap memory mapped in the guest.
  */
 int
 nm_os_pt_memdev_iomap(struct ptnetmap_memdev *ptn_dev, vm_paddr_t *nm_paddr,
 		      void **nm_addr, uint64_t *mem_size)
 {
 	int rid;
 
 	nm_prinf("ptn_memdev_driver iomap");
 
 	rid = PCIR_BAR(PTNETMAP_MEM_PCI_BAR);
 	*mem_size = bus_read_4(ptn_dev->pci_io, PTNET_MDEV_IO_MEMSIZE_HI);
 	*mem_size = bus_read_4(ptn_dev->pci_io, PTNET_MDEV_IO_MEMSIZE_LO) |
 			(*mem_size << 32);
 
 	/* map memory allocator */
 	ptn_dev->pci_mem = bus_alloc_resource(ptn_dev->dev, SYS_RES_MEMORY,
 			&rid, 0, ~0, *mem_size, RF_ACTIVE);
 	if (ptn_dev->pci_mem == NULL) {
 		*nm_paddr = 0;
 		*nm_addr = NULL;
 		return ENOMEM;
 	}
 
 	*nm_paddr = rman_get_start(ptn_dev->pci_mem);
 	*nm_addr = rman_get_virtual(ptn_dev->pci_mem);
 
 	nm_prinf("=== BAR %d start %lx len %lx mem_size %lx ===",
 			PTNETMAP_MEM_PCI_BAR,
 			(unsigned long)(*nm_paddr),
 			(unsigned long)rman_get_size(ptn_dev->pci_mem),
 			(unsigned long)*mem_size);
 	return (0);
 }
 
 uint32_t
 nm_os_pt_memdev_ioread(struct ptnetmap_memdev *ptn_dev, unsigned int reg)
 {
 	return bus_read_4(ptn_dev->pci_io, reg);
 }
 
 /* Unmap host netmap memory. */
 void
 nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *ptn_dev)
 {
 	nm_prinf("ptn_memdev_driver iounmap");
 
 	if (ptn_dev->pci_mem) {
 		bus_release_resource(ptn_dev->dev, SYS_RES_MEMORY,
 			PCIR_BAR(PTNETMAP_MEM_PCI_BAR), ptn_dev->pci_mem);
 		ptn_dev->pci_mem = NULL;
 	}
 }
 
 /* Device identification routine, return BUS_PROBE_DEFAULT on success,
  * positive on failure */
 static int
 ptn_memdev_probe(device_t dev)
 {
 	char desc[256];
 
 	if (pci_get_vendor(dev) != PTNETMAP_PCI_VENDOR_ID)
 		return (ENXIO);
 	if (pci_get_device(dev) != PTNETMAP_PCI_DEVICE_ID)
 		return (ENXIO);
 
 	snprintf(desc, sizeof(desc), "%s PCI adapter",
 			PTNETMAP_MEMDEV_NAME);
 	device_set_desc_copy(dev, desc);
 
 	return (BUS_PROBE_DEFAULT);
 }
 
 /* Device initialization routine. */
 static int
 ptn_memdev_attach(device_t dev)
 {
 	struct ptnetmap_memdev *ptn_dev;
 	int rid;
 	uint16_t mem_id;
 
 	ptn_dev = device_get_softc(dev);
 	ptn_dev->dev = dev;
 
 	pci_enable_busmaster(dev);
 
 	rid = PCIR_BAR(PTNETMAP_IO_PCI_BAR);
 	ptn_dev->pci_io = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid,
 						 RF_ACTIVE);
 	if (ptn_dev->pci_io == NULL) {
 	        device_printf(dev, "cannot map I/O space\n");
 	        return (ENXIO);
 	}
 
 	mem_id = bus_read_4(ptn_dev->pci_io, PTNET_MDEV_IO_MEMID);
 
 	/* create guest allocator */
 	ptn_dev->nm_mem = netmap_mem_pt_guest_attach(ptn_dev, mem_id);
 	if (ptn_dev->nm_mem == NULL) {
 		ptn_memdev_detach(dev);
 	        return (ENOMEM);
 	}
 	netmap_mem_get(ptn_dev->nm_mem);
 
 	nm_prinf("ptnetmap memdev attached, host memid: %u", mem_id);
 
 	return (0);
 }
 
 /* Device removal routine. */
 static int
 ptn_memdev_detach(device_t dev)
 {
 	struct ptnetmap_memdev *ptn_dev;
 
 	ptn_dev = device_get_softc(dev);
 
 	if (ptn_dev->nm_mem) {
 		nm_prinf("ptnetmap memdev detached, host memid %u",
 			netmap_mem_get_id(ptn_dev->nm_mem));
 		netmap_mem_put(ptn_dev->nm_mem);
 		ptn_dev->nm_mem = NULL;
 	}
 	if (ptn_dev->pci_mem) {
 		bus_release_resource(dev, SYS_RES_MEMORY,
 			PCIR_BAR(PTNETMAP_MEM_PCI_BAR), ptn_dev->pci_mem);
 		ptn_dev->pci_mem = NULL;
 	}
 	if (ptn_dev->pci_io) {
 		bus_release_resource(dev, SYS_RES_IOPORT,
 			PCIR_BAR(PTNETMAP_IO_PCI_BAR), ptn_dev->pci_io);
 		ptn_dev->pci_io = NULL;
 	}
 
 	return (0);
 }
 
 static int
 ptn_memdev_shutdown(device_t dev)
 {
 	return bus_generic_shutdown(dev);
 }
 
 #endif /* WITH_PTNETMAP */
 
 /*
  * In order to track whether pages are still mapped, we hook into
  * the standard cdev_pager and intercept the constructor and
  * destructor.
  */
 
 struct netmap_vm_handle_t {
 	struct cdev 		*dev;
 	struct netmap_priv_d	*priv;
 };
 
 
 static int
 netmap_dev_pager_ctor(void *handle, vm_ooffset_t size, vm_prot_t prot,
 		vm_ooffset_t foff, struct ucred *cred, u_short *color)
 {
 	struct netmap_vm_handle_t *vmh = handle;
 
 	if (netmap_verbose)
 		nm_prinf("handle %p size %jd prot %d foff %jd",
 			handle, (intmax_t)size, prot, (intmax_t)foff);
 	if (color)
 		*color = 0;
 	dev_ref(vmh->dev);
 	return 0;
 }
 
 
 static void
 netmap_dev_pager_dtor(void *handle)
 {
 	struct netmap_vm_handle_t *vmh = handle;
 	struct cdev *dev = vmh->dev;
 	struct netmap_priv_d *priv = vmh->priv;
 
 	if (netmap_verbose)
 		nm_prinf("handle %p", handle);
 	netmap_dtor(priv);
 	free(vmh, M_DEVBUF);
 	dev_rel(dev);
 }
 
 
 static int
 netmap_dev_pager_fault(vm_object_t object, vm_ooffset_t offset,
 	int prot, vm_page_t *mres)
 {
 	struct netmap_vm_handle_t *vmh = object->handle;
 	struct netmap_priv_d *priv = vmh->priv;
 	struct netmap_adapter *na = priv->np_na;
 	vm_paddr_t paddr;
 	vm_page_t page;
 	vm_memattr_t memattr;
 	vm_pindex_t pidx;
 
 	nm_prdis("object %p offset %jd prot %d mres %p",
 			object, (intmax_t)offset, prot, mres);
 	memattr = object->memattr;
 	pidx = OFF_TO_IDX(offset);
 	paddr = netmap_mem_ofstophys(na->nm_mem, offset);
 	if (paddr == 0)
 		return VM_PAGER_FAIL;
 
 	if (((*mres)->flags & PG_FICTITIOUS) != 0) {
 		/*
 		 * If the passed in result page is a fake page, update it with
 		 * the new physical address.
 		 */
 		page = *mres;
 		vm_page_updatefake(page, paddr, memattr);
 	} else {
 		/*
 		 * Replace the passed in reqpage page with our own fake page and
 		 * free up the all of the original pages.
 		 */
 #ifndef VM_OBJECT_WUNLOCK	/* FreeBSD < 10.x */
 #define VM_OBJECT_WUNLOCK VM_OBJECT_UNLOCK
 #define VM_OBJECT_WLOCK	VM_OBJECT_LOCK
 #endif /* VM_OBJECT_WUNLOCK */
 
 		VM_OBJECT_WUNLOCK(object);
 		page = vm_page_getfake(paddr, memattr);
 		VM_OBJECT_WLOCK(object);
 		vm_page_lock(*mres);
 		vm_page_free(*mres);
 		vm_page_unlock(*mres);
 		*mres = page;
 		vm_page_insert(page, object, pidx);
 	}
 	page->valid = VM_PAGE_BITS_ALL;
 	return (VM_PAGER_OK);
 }
 
 
 static struct cdev_pager_ops netmap_cdev_pager_ops = {
 	.cdev_pg_ctor = netmap_dev_pager_ctor,
 	.cdev_pg_dtor = netmap_dev_pager_dtor,
 	.cdev_pg_fault = netmap_dev_pager_fault,
 };
 
 
 static int
 netmap_mmap_single(struct cdev *cdev, vm_ooffset_t *foff,
 	vm_size_t objsize,  vm_object_t *objp, int prot)
 {
 	int error;
 	struct netmap_vm_handle_t *vmh;
 	struct netmap_priv_d *priv;
 	vm_object_t obj;
 
 	if (netmap_verbose)
 		nm_prinf("cdev %p foff %jd size %jd objp %p prot %d", cdev,
 		    (intmax_t )*foff, (intmax_t )objsize, objp, prot);
 
 	vmh = malloc(sizeof(struct netmap_vm_handle_t), M_DEVBUF,
 			      M_NOWAIT | M_ZERO);
 	if (vmh == NULL)
 		return ENOMEM;
 	vmh->dev = cdev;
 
 	NMG_LOCK();
 	error = devfs_get_cdevpriv((void**)&priv);
 	if (error)
 		goto err_unlock;
 	if (priv->np_nifp == NULL) {
 		error = EINVAL;
 		goto err_unlock;
 	}
 	vmh->priv = priv;
 	priv->np_refs++;
 	NMG_UNLOCK();
 
 	obj = cdev_pager_allocate(vmh, OBJT_DEVICE,
 		&netmap_cdev_pager_ops, objsize, prot,
 		*foff, NULL);
 	if (obj == NULL) {
 		nm_prerr("cdev_pager_allocate failed");
 		error = EINVAL;
 		goto err_deref;
 	}
 
 	*objp = obj;
 	return 0;
 
 err_deref:
 	NMG_LOCK();
 	priv->np_refs--;
 err_unlock:
 	NMG_UNLOCK();
 // err:
 	free(vmh, M_DEVBUF);
 	return error;
 }
 
 /*
  * On FreeBSD the close routine is only called on the last close on
  * the device (/dev/netmap) so we cannot do anything useful.
  * To track close() on individual file descriptors we pass netmap_dtor() to
  * devfs_set_cdevpriv() on open(). The FreeBSD kernel will call the destructor
  * when the last fd pointing to the device is closed.
  *
  * Note that FreeBSD does not even munmap() on close() so we also have
  * to track mmap() ourselves, and postpone the call to
  * netmap_dtor() is called when the process has no open fds and no active
  * memory maps on /dev/netmap, as in linux.
  */
 static int
 netmap_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
 {
 	if (netmap_verbose)
 		nm_prinf("dev %p fflag 0x%x devtype %d td %p",
 			dev, fflag, devtype, td);
 	return 0;
 }
 
 
 static int
 netmap_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
 {
 	struct netmap_priv_d *priv;
 	int error;
 
 	(void)dev;
 	(void)oflags;
 	(void)devtype;
 	(void)td;
 
 	NMG_LOCK();
 	priv = netmap_priv_new();
 	if (priv == NULL) {
 		error = ENOMEM;
 		goto out;
 	}
 	error = devfs_set_cdevpriv(priv, netmap_dtor);
 	if (error) {
 		netmap_priv_delete(priv);
 	}
 out:
 	NMG_UNLOCK();
 	return error;
 }
 
 /******************** kthread wrapper ****************/
 #include <sys/sysproto.h>
 u_int
 nm_os_ncpus(void)
 {
 	return mp_maxid + 1;
 }
 
 struct nm_kctx_ctx {
 	/* Userspace thread (kthread creator). */
 	struct thread *user_td;
 
 	/* worker function and parameter */
 	nm_kctx_worker_fn_t worker_fn;
 	void *worker_private;
 
 	struct nm_kctx *nmk;
 
 	/* integer to manage multiple worker contexts (e.g., RX or TX on ptnetmap) */
 	long type;
 };
 
 struct nm_kctx {
 	struct thread *worker;
 	struct mtx worker_lock;
 	struct nm_kctx_ctx worker_ctx;
 	int run;			/* used to stop kthread */
 	int attach_user;		/* kthread attached to user_process */
 	int affinity;
 };
 
 static void
 nm_kctx_worker(void *data)
 {
 	struct nm_kctx *nmk = data;
 	struct nm_kctx_ctx *ctx = &nmk->worker_ctx;
 
 	if (nmk->affinity >= 0) {
 		thread_lock(curthread);
 		sched_bind(curthread, nmk->affinity);
 		thread_unlock(curthread);
 	}
 
 	while (nmk->run) {
 		/*
 		 * check if the parent process dies
 		 * (when kthread is attached to user process)
 		 */
 		if (ctx->user_td) {
 			PROC_LOCK(curproc);
 			thread_suspend_check(0);
 			PROC_UNLOCK(curproc);
 		} else {
 			kthread_suspend_check();
 		}
 
 		/* Continuously execute worker process. */
 		ctx->worker_fn(ctx->worker_private); /* worker body */
 	}
 
 	kthread_exit();
 }
 
 void
 nm_os_kctx_worker_setaff(struct nm_kctx *nmk, int affinity)
 {
 	nmk->affinity = affinity;
 }
 
 struct nm_kctx *
 nm_os_kctx_create(struct nm_kctx_cfg *cfg, void *opaque)
 {
 	struct nm_kctx *nmk = NULL;
 
 	nmk = malloc(sizeof(*nmk),  M_DEVBUF, M_NOWAIT | M_ZERO);
 	if (!nmk)
 		return NULL;
 
 	mtx_init(&nmk->worker_lock, "nm_kthread lock", NULL, MTX_DEF);
 	nmk->worker_ctx.worker_fn = cfg->worker_fn;
 	nmk->worker_ctx.worker_private = cfg->worker_private;
 	nmk->worker_ctx.type = cfg->type;
 	nmk->affinity = -1;
 
 	/* attach kthread to user process (ptnetmap) */
 	nmk->attach_user = cfg->attach_user;
 
 	return nmk;
 }
 
 int
 nm_os_kctx_worker_start(struct nm_kctx *nmk)
 {
 	struct proc *p = NULL;
 	int error = 0;
 
 	/* Temporarily disable this function as it is currently broken
 	 * and causes kernel crashes. The failure can be triggered by
 	 * the "vale_polling_enable_disable" test in ctrl-api-test.c. */
 	return EOPNOTSUPP;
 
 	if (nmk->worker)
 		return EBUSY;
 
 	/* check if we want to attach kthread to user process */
 	if (nmk->attach_user) {
 		nmk->worker_ctx.user_td = curthread;
 		p = curthread->td_proc;
 	}
 
 	/* enable kthread main loop */
 	nmk->run = 1;
 	/* create kthread */
 	if((error = kthread_add(nm_kctx_worker, nmk, p,
 			&nmk->worker, RFNOWAIT /* to be checked */, 0, "nm-kthread-%ld",
 			nmk->worker_ctx.type))) {
 		goto err;
 	}
 
 	nm_prinf("nm_kthread started td %p", nmk->worker);
 
 	return 0;
 err:
 	nm_prerr("nm_kthread start failed err %d", error);
 	nmk->worker = NULL;
 	return error;
 }
 
 void
 nm_os_kctx_worker_stop(struct nm_kctx *nmk)
 {
 	if (!nmk->worker)
 		return;
 
 	/* tell to kthread to exit from main loop */
 	nmk->run = 0;
 
 	/* wake up kthread if it sleeps */
 	kthread_resume(nmk->worker);
 
 	nmk->worker = NULL;
 }
 
 void
 nm_os_kctx_destroy(struct nm_kctx *nmk)
 {
 	if (!nmk)
 		return;
 
 	if (nmk->worker)
 		nm_os_kctx_worker_stop(nmk);
 
 	free(nmk, M_DEVBUF);
 }
 
 /******************** kqueue support ****************/
 
 /*
  * In addition to calling selwakeuppri(), nm_os_selwakeup() also
  * needs to call knote() to wake up kqueue listeners.
  * This operation is deferred to a taskqueue in order to avoid possible
  * lock order reversals; these may happen because knote() grabs a
  * private lock associated to the 'si' (see struct selinfo,
  * struct nm_selinfo, and nm_os_selinfo_init), and nm_os_selwakeup()
  * can be called while holding the lock associated to a different
  * 'si'.
  * When calling knote() we use a non-zero 'hint' argument to inform
  * the netmap_knrw() function that it is being called from
  * 'nm_os_selwakeup'; this is necessary because when netmap_knrw() is
  * called by the kevent subsystem (i.e. kevent_scan()) we also need to
  * call netmap_poll().
  *
  * The netmap_kqfilter() function registers one or another f_event
  * depending on read or write mode. A pointer to the struct
  * 'netmap_priv_d' is stored into kn->kn_hook, so that it can later
  * be passed to netmap_poll(). We pass NULL as a third argument to
  * netmap_poll(), so that the latter only runs the txsync/rxsync
  * (if necessary), and skips the nm_os_selrecord() calls.
  */
 
 
 void
 nm_os_selwakeup(struct nm_selinfo *si)
 {
 	selwakeuppri(&si->si, PI_NET);
 	if (si->kqueue_users > 0) {
 		taskqueue_enqueue(si->ntfytq, &si->ntfytask);
 	}
 }
 
 void
 nm_os_selrecord(struct thread *td, struct nm_selinfo *si)
 {
 	selrecord(td, &si->si);
 }
 
 static void
 netmap_knrdetach(struct knote *kn)
 {
 	struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook;
 	struct nm_selinfo *si = priv->np_si[NR_RX];
 
 	knlist_remove(&si->si.si_note, kn, /*islocked=*/0);
 	NMG_LOCK();
 	KASSERT(si->kqueue_users > 0, ("kqueue_user underflow on %s",
 	    si->mtxname));
 	si->kqueue_users--;
 	nm_prinf("kqueue users for %s: %d", si->mtxname, si->kqueue_users);
 	NMG_UNLOCK();
 }
 
 static void
 netmap_knwdetach(struct knote *kn)
 {
 	struct netmap_priv_d *priv = (struct netmap_priv_d *)kn->kn_hook;
 	struct nm_selinfo *si = priv->np_si[NR_TX];
 
 	knlist_remove(&si->si.si_note, kn, /*islocked=*/0);
 	NMG_LOCK();
 	si->kqueue_users--;
 	nm_prinf("kqueue users for %s: %d", si->mtxname, si->kqueue_users);
 	NMG_UNLOCK();
 }
 
 /*
  * Callback triggered by netmap notifications (see netmap_notify()),
  * and by the application calling kevent(). In the former case we
  * just return 1 (events ready), since we are not able to do better.
  * In the latter case we use netmap_poll() to see which events are
  * ready.
  */
 static int
 netmap_knrw(struct knote *kn, long hint, int events)
 {
 	struct netmap_priv_d *priv;
 	int revents;
 
 	if (hint != 0) {
 		/* Called from netmap_notify(), typically from a
 		 * thread different from the one issuing kevent().
 		 * Assume we are ready. */
 		return 1;
 	}
 
 	/* Called from kevent(). */
 	priv = kn->kn_hook;
 	revents = netmap_poll(priv, events, /*thread=*/NULL);
 
 	return (events & revents) ? 1 : 0;
 }
 
 static int
 netmap_knread(struct knote *kn, long hint)
 {
 	return netmap_knrw(kn, hint, POLLIN);
 }
 
 static int
 netmap_knwrite(struct knote *kn, long hint)
 {
 	return netmap_knrw(kn, hint, POLLOUT);
 }
 
 static struct filterops netmap_rfiltops = {
 	.f_isfd = 1,
 	.f_detach = netmap_knrdetach,
 	.f_event = netmap_knread,
 };
 
 static struct filterops netmap_wfiltops = {
 	.f_isfd = 1,
 	.f_detach = netmap_knwdetach,
 	.f_event = netmap_knwrite,
 };
 
 
 /*
  * This is called when a thread invokes kevent() to record
  * a change in the configuration of the kqueue().
  * The 'priv' is the one associated to the open netmap device.
  */
 static int
 netmap_kqfilter(struct cdev *dev, struct knote *kn)
 {
 	struct netmap_priv_d *priv;
 	int error;
 	struct netmap_adapter *na;
 	struct nm_selinfo *si;
 	int ev = kn->kn_filter;
 
 	if (ev != EVFILT_READ && ev != EVFILT_WRITE) {
 		nm_prerr("bad filter request %d", ev);
 		return 1;
 	}
 	error = devfs_get_cdevpriv((void**)&priv);
 	if (error) {
 		nm_prerr("device not yet setup");
 		return 1;
 	}
 	na = priv->np_na;
 	if (na == NULL) {
 		nm_prerr("no netmap adapter for this file descriptor");
 		return 1;
 	}
 	/* the si is indicated in the priv */
 	si = priv->np_si[(ev == EVFILT_WRITE) ? NR_TX : NR_RX];
 	kn->kn_fop = (ev == EVFILT_WRITE) ?
 		&netmap_wfiltops : &netmap_rfiltops;
 	kn->kn_hook = priv;
 	NMG_LOCK();
 	si->kqueue_users++;
 	nm_prinf("kqueue users for %s: %d", si->mtxname, si->kqueue_users);
 	NMG_UNLOCK();
 	knlist_add(&si->si.si_note, kn, /*islocked=*/0);
 
 	return 0;
 }
 
 static int
 freebsd_netmap_poll(struct cdev *cdevi __unused, int events, struct thread *td)
 {
 	struct netmap_priv_d *priv;
 	if (devfs_get_cdevpriv((void **)&priv)) {
 		return POLLERR;
 	}
 	return netmap_poll(priv, events, td);
 }
 
 static int
 freebsd_netmap_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t data,
 		int ffla __unused, struct thread *td)
 {
 	int error;
 	struct netmap_priv_d *priv;
 
 	CURVNET_SET(TD_TO_VNET(td));
 	error = devfs_get_cdevpriv((void **)&priv);
 	if (error) {
 		/* XXX ENOENT should be impossible, since the priv
 		 * is now created in the open */
 		if (error == ENOENT)
 			error = ENXIO;
 		goto out;
 	}
 	error = netmap_ioctl(priv, cmd, data, td, /*nr_body_is_user=*/1);
 out:
 	CURVNET_RESTORE();
 
 	return error;
 }
 
 void
 nm_os_onattach(struct ifnet *ifp)
 {
 	ifp->if_capabilities |= IFCAP_NETMAP;
 }
 
 void
 nm_os_onenter(struct ifnet *ifp)
 {
 	struct netmap_adapter *na = NA(ifp);
 
 	na->if_transmit = ifp->if_transmit;
 	ifp->if_transmit = netmap_transmit;
 	ifp->if_capenable |= IFCAP_NETMAP;
 }
 
 void
 nm_os_onexit(struct ifnet *ifp)
 {
 	struct netmap_adapter *na = NA(ifp);
 
 	ifp->if_transmit = na->if_transmit;
 	ifp->if_capenable &= ~IFCAP_NETMAP;
 }
 
 extern struct cdevsw netmap_cdevsw; /* XXX used in netmap.c, should go elsewhere */
 struct cdevsw netmap_cdevsw = {
 	.d_version = D_VERSION,
 	.d_name = "netmap",
 	.d_open = netmap_open,
 	.d_mmap_single = netmap_mmap_single,
 	.d_ioctl = freebsd_netmap_ioctl,
 	.d_poll = freebsd_netmap_poll,
 	.d_kqfilter = netmap_kqfilter,
 	.d_close = netmap_close,
 };
 /*--- end of kqueue support ----*/
 
 /*
  * Kernel entry point.
  *
  * Initialize/finalize the module and return.
  *
  * Return 0 on success, errno on failure.
  */
 static int
 netmap_loader(__unused struct module *module, int event, __unused void *arg)
 {
 	int error = 0;
 
 	switch (event) {
 	case MOD_LOAD:
 		error = netmap_init();
 		break;
 
 	case MOD_UNLOAD:
 		/*
 		 * if some one is still using netmap,
 		 * then the module can not be unloaded.
 		 */
 		if (netmap_use_count) {
 			nm_prerr("netmap module can not be unloaded - netmap_use_count: %d",
 					netmap_use_count);
 			error = EBUSY;
 			break;
 		}
 		netmap_fini();
 		break;
 
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 
 	return (error);
 }
 
 #ifdef DEV_MODULE_ORDERED
 /*
  * The netmap module contains three drivers: (i) the netmap character device
  * driver; (ii) the ptnetmap memdev PCI device driver, (iii) the ptnet PCI
  * device driver. The attach() routines of both (ii) and (iii) need the
  * lock of the global allocator, and such lock is initialized in netmap_init(),
  * which is part of (i).
  * Therefore, we make sure that (i) is loaded before (ii) and (iii), using
  * the 'order' parameter of driver declaration macros. For (i), we specify
  * SI_ORDER_MIDDLE, while higher orders are used with the DRIVER_MODULE_ORDERED
  * macros for (ii) and (iii).
  */
 DEV_MODULE_ORDERED(netmap, netmap_loader, NULL, SI_ORDER_MIDDLE);
 #else /* !DEV_MODULE_ORDERED */
 DEV_MODULE(netmap, netmap_loader, NULL);
 #endif /* DEV_MODULE_ORDERED  */
 MODULE_DEPEND(netmap, pci, 1, 1, 1);
 MODULE_VERSION(netmap, 1);
 /* reduce conditional code */
 // linux API, use for the knlist in FreeBSD
 /* use a private mutex for the knlist */
Index: head/sys/netinet/netdump/netdump_client.c
===================================================================
--- head/sys/netinet/netdump/netdump_client.c	(revision 348021)
+++ head/sys/netinet/netdump/netdump_client.c	(revision 348022)
@@ -1,1510 +1,1511 @@
 /*-
  * Copyright (c) 2005-2014 Sandvine Incorporated. All rights reserved.
  * Copyright (c) 2000 Darrell Anderson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * netdump_client.c
  * FreeBSD subsystem supporting netdump network dumps.
  * A dedicated server must be running to accept client dumps.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/disk.h>
 #include <sys/endian.h>
+#include <sys/eventhandler.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/kerneldump.h>
 #include <sys/mbuf.h>
 #include <sys/module.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/if_var.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/in_var.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
 #include <netinet/ip_options.h>
 #include <netinet/udp.h>
 #include <netinet/udp_var.h>
 #include <netinet/netdump/netdump.h>
 
 #include <machine/in_cksum.h>
 #include <machine/pcb.h>
 
 #define	NETDDEBUG(f, ...) do {						\
 	if (nd_debug > 0)						\
 		printf(("%s: " f), __func__, ## __VA_ARGS__);		\
 } while (0)
 #define	NETDDEBUG_IF(i, f, ...) do {					\
 	if (nd_debug > 0)						\
 		if_printf((i), ("%s: " f), __func__, ## __VA_ARGS__);	\
 } while (0)
 #define	NETDDEBUGV(f, ...) do {						\
 	if (nd_debug > 1)						\
 		printf(("%s: " f), __func__, ## __VA_ARGS__);		\
 } while (0)
 #define	NETDDEBUGV_IF(i, f, ...) do {					\
 	if (nd_debug > 1)						\
 		if_printf((i), ("%s: " f), __func__, ## __VA_ARGS__);	\
 } while (0)
 
 static int	 netdump_arp_gw(void);
 static void	 netdump_cleanup(void);
 static int	 netdump_configure(struct diocskerneldump_arg *,
 		    struct thread *);
 static int	 netdump_dumper(void *priv __unused, void *virtual,
 		    vm_offset_t physical __unused, off_t offset, size_t length);
 static bool	 netdump_enabled(void);
 static int	 netdump_enabled_sysctl(SYSCTL_HANDLER_ARGS);
 static int	 netdump_ether_output(struct mbuf *m, struct ifnet *ifp,
 		    struct ether_addr dst, u_short etype);
 static void	 netdump_handle_arp(struct mbuf **mb);
 static void	 netdump_handle_ip(struct mbuf **mb);
 static int	 netdump_ioctl(struct cdev *dev __unused, u_long cmd,
 		    caddr_t addr, int flags __unused, struct thread *td);
 static int	 netdump_modevent(module_t mod, int type, void *priv);
 static void	 netdump_network_poll(void);
 static void	 netdump_pkt_in(struct ifnet *ifp, struct mbuf *m);
 static void	 netdump_reinit_internal(struct ifnet *ifp);
 static int	 netdump_send(uint32_t type, off_t offset, unsigned char *data,
 		    uint32_t datalen);
 static int	 netdump_send_arp(in_addr_t dst);
 static int	 netdump_start(struct dumperinfo *di);
 static int	 netdump_udp_output(struct mbuf *m);
 static void	 netdump_unconfigure(void);
 
 /* Must be at least as big as the chunks dumpsys() gives us. */
 static unsigned char nd_buf[MAXDUMPPGS * PAGE_SIZE];
 static uint32_t nd_seqno;
 static int dump_failed, have_gw_mac;
 static void (*drv_if_input)(struct ifnet *, struct mbuf *);
 static int restore_gw_addr;
 
 static uint64_t rcvd_acks;
 CTASSERT(sizeof(rcvd_acks) * NBBY == NETDUMP_MAX_IN_FLIGHT);
 
 /* Configuration parameters. */
 static struct {
 	char		 ndc_iface[IFNAMSIZ];
 	union kd_ip	 ndc_server;
 	union kd_ip	 ndc_client;
 	union kd_ip	 ndc_gateway;
 	uint8_t		 ndc_af;
 } nd_conf;
 #define	nd_server	nd_conf.ndc_server.in4
 #define	nd_client	nd_conf.ndc_client.in4
 #define	nd_gateway	nd_conf.ndc_gateway.in4
 
 /* General dynamic settings. */
 static struct sx nd_conf_lk;
 SX_SYSINIT(nd_conf, &nd_conf_lk, "netdump configuration lock");
 #define NETDUMP_WLOCK()			sx_xlock(&nd_conf_lk)
 #define NETDUMP_WUNLOCK()		sx_xunlock(&nd_conf_lk)
 #define NETDUMP_RLOCK()			sx_slock(&nd_conf_lk)
 #define NETDUMP_RUNLOCK()		sx_sunlock(&nd_conf_lk)
 #define NETDUMP_ASSERT_WLOCKED()	sx_assert(&nd_conf_lk, SA_XLOCKED)
 #define NETDUMP_ASSERT_LOCKED()		sx_assert(&nd_conf_lk, SA_LOCKED)
 static struct ether_addr nd_gw_mac;
 static struct ifnet *nd_ifp;
 static eventhandler_tag nd_detach_cookie;
 static uint16_t nd_server_port = NETDUMP_PORT;
 
 FEATURE(netdump, "Netdump client support");
 
 static SYSCTL_NODE(_net, OID_AUTO, netdump, CTLFLAG_RD, NULL,
     "netdump parameters");
 
 static int nd_debug;
 SYSCTL_INT(_net_netdump, OID_AUTO, debug, CTLFLAG_RWTUN,
     &nd_debug, 0,
     "Debug message verbosity");
 SYSCTL_PROC(_net_netdump, OID_AUTO, enabled, CTLFLAG_RD | CTLTYPE_INT,
     &nd_ifp, 0, netdump_enabled_sysctl, "I", "netdump configuration status");
 static char nd_path[MAXPATHLEN];
 SYSCTL_STRING(_net_netdump, OID_AUTO, path, CTLFLAG_RW,
     nd_path, sizeof(nd_path),
     "Server path for output files");
 static int nd_polls = 2000;
 SYSCTL_INT(_net_netdump, OID_AUTO, polls, CTLFLAG_RWTUN,
     &nd_polls, 0,
     "Number of times to poll before assuming packet loss (0.5ms per poll)");
 static int nd_retries = 10;
 SYSCTL_INT(_net_netdump, OID_AUTO, retries, CTLFLAG_RWTUN,
     &nd_retries, 0,
     "Number of retransmit attempts before giving up");
 static int nd_arp_retries = 3;
 SYSCTL_INT(_net_netdump, OID_AUTO, arp_retries, CTLFLAG_RWTUN,
     &nd_arp_retries, 0,
     "Number of ARP attempts before giving up");
 
 static bool
 netdump_enabled(void)
 {
 
 	NETDUMP_ASSERT_LOCKED();
 	return (nd_ifp != NULL);
 }
 
 static int
 netdump_enabled_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	int en, error;
 
 	NETDUMP_RLOCK();
 	en = netdump_enabled();
 	NETDUMP_RUNLOCK();
 
 	error = SYSCTL_OUT(req, &en, sizeof(en));
 	if (error != 0 || req->newptr == NULL)
 		return (error);
 	return (EPERM);
 }
 
 /*
  * Checks for netdump support on a network interface
  *
  * Parameters:
  *	ifp	The network interface that is being tested for support
  *
  * Returns:
  *	int	1 if the interface is supported, 0 if not
  */
 static bool
 netdump_supported_nic(struct ifnet *ifp)
 {
 
 	return (ifp->if_netdump_methods != NULL);
 }
 
 /*-
  * Network specific primitives.
  * Following down the code they are divided ordered as:
  * - Packet buffer primitives
  * - Output primitives
  * - Input primitives
  * - Polling primitives
  */
 
 /*
  * Handles creation of the ethernet header, then places outgoing packets into
  * the tx buffer for the NIC
  *
  * Parameters:
  *	m	The mbuf containing the packet to be sent (will be freed by
  *		this function or the NIC driver)
  *	ifp	The interface to send on
  *	dst	The destination ethernet address (source address will be looked
  *		up using ifp)
  *	etype	The ETHERTYPE_* value for the protocol that is being sent
  *
  * Returns:
  *	int	see errno.h, 0 for success
  */
 static int
 netdump_ether_output(struct mbuf *m, struct ifnet *ifp, struct ether_addr dst,
     u_short etype)
 {
 	struct ether_header *eh;
 
 	if (((ifp->if_flags & (IFF_MONITOR | IFF_UP)) != IFF_UP) ||
 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) != IFF_DRV_RUNNING) {
 		if_printf(ifp, "netdump_ether_output: interface isn't up\n");
 		m_freem(m);
 		return (ENETDOWN);
 	}
 
 	/* Fill in the ethernet header. */
 	M_PREPEND(m, ETHER_HDR_LEN, M_NOWAIT);
 	if (m == NULL) {
 		printf("%s: out of mbufs\n", __func__);
 		return (ENOBUFS);
 	}
 	eh = mtod(m, struct ether_header *);
 	memcpy(eh->ether_shost, IF_LLADDR(ifp), ETHER_ADDR_LEN);
 	memcpy(eh->ether_dhost, dst.octet, ETHER_ADDR_LEN);
 	eh->ether_type = htons(etype);
 	return ((ifp->if_netdump_methods->nd_transmit)(ifp, m));
 }
 
 /*
  * Unreliable transmission of an mbuf chain to the netdump server
  * Note: can't handle fragmentation; fails if the packet is larger than
  *	 nd_ifp->if_mtu after adding the UDP/IP headers
  *
  * Parameters:
  *	m	mbuf chain
  *
  * Returns:
  *	int	see errno.h, 0 for success
  */
 static int
 netdump_udp_output(struct mbuf *m)
 {
 	struct udpiphdr *ui;
 	struct ip *ip;
 
 	MPASS(netdump_enabled());
 
 	M_PREPEND(m, sizeof(struct udpiphdr), M_NOWAIT);
 	if (m == NULL) {
 		printf("%s: out of mbufs\n", __func__);
 		return (ENOBUFS);
 	}
 
 	if (m->m_pkthdr.len > nd_ifp->if_mtu) {
 		printf("netdump_udp_output: Packet is too big: %d > MTU %u\n",
 		    m->m_pkthdr.len, nd_ifp->if_mtu);
 		m_freem(m);
 		return (ENOBUFS);
 	}
 
 	ui = mtod(m, struct udpiphdr *);
 	bzero(ui->ui_x1, sizeof(ui->ui_x1));
 	ui->ui_pr = IPPROTO_UDP;
 	ui->ui_len = htons(m->m_pkthdr.len - sizeof(struct ip));
 	ui->ui_ulen = ui->ui_len;
 	ui->ui_src = nd_client;
 	ui->ui_dst = nd_server;
 	/* Use this src port so that the server can connect() the socket */
 	ui->ui_sport = htons(NETDUMP_ACKPORT);
 	ui->ui_dport = htons(nd_server_port);
 	ui->ui_sum = 0;
 	if ((ui->ui_sum = in_cksum(m, m->m_pkthdr.len)) == 0)
 		ui->ui_sum = 0xffff;
 
 	ip = mtod(m, struct ip *);
 	ip->ip_v = IPVERSION;
 	ip->ip_hl = sizeof(struct ip) >> 2;
 	ip->ip_tos = 0;
 	ip->ip_len = htons(m->m_pkthdr.len);
 	ip->ip_id = 0;
 	ip->ip_off = htons(IP_DF);
 	ip->ip_ttl = 255;
 	ip->ip_sum = 0;
 	ip->ip_sum = in_cksum(m, sizeof(struct ip));
 
 	return (netdump_ether_output(m, nd_ifp, nd_gw_mac, ETHERTYPE_IP));
 }
 
 /*
  * Builds and sends a single ARP request to locate the server
  *
  * Return value:
  *	0 on success
  *	errno on error
  */
 static int
 netdump_send_arp(in_addr_t dst)
 {
 	struct ether_addr bcast;
 	struct mbuf *m;
 	struct arphdr *ah;
 	int pktlen;
 
 	MPASS(netdump_enabled());
 
 	/* Fill-up a broadcast address. */
 	memset(&bcast, 0xFF, ETHER_ADDR_LEN);
 	m = m_gethdr(M_NOWAIT, MT_DATA);
 	if (m == NULL) {
 		printf("netdump_send_arp: Out of mbufs\n");
 		return (ENOBUFS);
 	}
 	pktlen = arphdr_len2(ETHER_ADDR_LEN, sizeof(struct in_addr));
 	m->m_len = pktlen;
 	m->m_pkthdr.len = pktlen;
 	MH_ALIGN(m, pktlen);
 	ah = mtod(m, struct arphdr *);
 	ah->ar_hrd = htons(ARPHRD_ETHER);
 	ah->ar_pro = htons(ETHERTYPE_IP);
 	ah->ar_hln = ETHER_ADDR_LEN;
 	ah->ar_pln = sizeof(struct in_addr);
 	ah->ar_op = htons(ARPOP_REQUEST);
 	memcpy(ar_sha(ah), IF_LLADDR(nd_ifp), ETHER_ADDR_LEN);
 	((struct in_addr *)ar_spa(ah))->s_addr = nd_client.s_addr;
 	bzero(ar_tha(ah), ETHER_ADDR_LEN);
 	((struct in_addr *)ar_tpa(ah))->s_addr = dst;
 	return (netdump_ether_output(m, nd_ifp, bcast, ETHERTYPE_ARP));
 }
 
 /*
  * Sends ARP requests to locate the server and waits for a response.
  * We first try to ARP the server itself, and fall back to the provided
  * gateway if the server appears to be off-link.
  *
  * Return value:
  *	0 on success
  *	errno on error
  */
 static int
 netdump_arp_gw(void)
 {
 	in_addr_t dst;
 	int error, polls, retries;
 
 	dst = nd_server.s_addr;
 restart:
 	for (retries = 0; retries < nd_arp_retries && have_gw_mac == 0;
 	    retries++) {
 		error = netdump_send_arp(dst);
 		if (error != 0)
 			return (error);
 		for (polls = 0; polls < nd_polls && have_gw_mac == 0; polls++) {
 			netdump_network_poll();
 			DELAY(500);
 		}
 		if (have_gw_mac == 0)
 			printf("(ARP retry)");
 	}
 	if (have_gw_mac != 0)
 		return (0);
 	if (dst == nd_server.s_addr && nd_server.s_addr != nd_gateway.s_addr) {
 		printf("Failed to ARP server, trying to reach gateway...\n");
 		dst = nd_gateway.s_addr;
 		goto restart;
 	}
 
 	printf("\nARP timed out.\n");
 	return (ETIMEDOUT);
 }
 
 /*
  * Dummy free function for netdump clusters.
  */
 static void
 netdump_mbuf_free(struct mbuf *m __unused)
 {
 }
 
 /*
  * Construct and reliably send a netdump packet.  May fail from a resource
  * shortage or extreme number of unacknowledged retransmissions.  Wait for
  * an acknowledgement before returning.  Splits packets into chunks small
  * enough to be sent without fragmentation (looks up the interface MTU)
  *
  * Parameters:
  *	type	netdump packet type (HERALD, FINISHED, or VMCORE)
  *	offset	vmcore data offset (bytes)
  *	data	vmcore data
  *	datalen	vmcore data size (bytes)
  *
  * Returns:
  *	int see errno.h, 0 for success
  */
 static int
 netdump_send(uint32_t type, off_t offset, unsigned char *data, uint32_t datalen)
 {
 	struct netdump_msg_hdr *nd_msg_hdr;
 	struct mbuf *m, *m2;
 	uint64_t want_acks;
 	uint32_t i, pktlen, sent_so_far;
 	int retries, polls, error;
 
 	want_acks = 0;
 	rcvd_acks = 0;
 	retries = 0;
 
 	MPASS(netdump_enabled());
 
 retransmit:
 	/* Chunks can be too big to fit in packets. */
 	for (i = sent_so_far = 0; sent_so_far < datalen ||
 	    (i == 0 && datalen == 0); i++) {
 		pktlen = datalen - sent_so_far;
 
 		/* First bound: the packet structure. */
 		pktlen = min(pktlen, NETDUMP_DATASIZE);
 
 		/* Second bound: the interface MTU (assume no IP options). */
 		pktlen = min(pktlen, nd_ifp->if_mtu - sizeof(struct udpiphdr) -
 		    sizeof(struct netdump_msg_hdr));
 
 		/*
 		 * Check if it is retransmitting and this has been ACKed
 		 * already.
 		 */
 		if ((rcvd_acks & (1 << i)) != 0) {
 			sent_so_far += pktlen;
 			continue;
 		}
 
 		/*
 		 * Get and fill a header mbuf, then chain data as an extended
 		 * mbuf.
 		 */
 		m = m_gethdr(M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			printf("netdump_send: Out of mbufs\n");
 			return (ENOBUFS);
 		}
 		m->m_len = sizeof(struct netdump_msg_hdr);
 		m->m_pkthdr.len = sizeof(struct netdump_msg_hdr);
 		MH_ALIGN(m, sizeof(struct netdump_msg_hdr));
 		nd_msg_hdr = mtod(m, struct netdump_msg_hdr *);
 		nd_msg_hdr->mh_seqno = htonl(nd_seqno + i);
 		nd_msg_hdr->mh_type = htonl(type);
 		nd_msg_hdr->mh_offset = htobe64(offset + sent_so_far);
 		nd_msg_hdr->mh_len = htonl(pktlen);
 		nd_msg_hdr->mh__pad = 0;
 
 		if (pktlen != 0) {
 			m2 = m_get(M_NOWAIT, MT_DATA);
 			if (m2 == NULL) {
 				m_freem(m);
 				printf("netdump_send: Out of mbufs\n");
 				return (ENOBUFS);
 			}
 			MEXTADD(m2, data + sent_so_far, pktlen,
 			    netdump_mbuf_free, NULL, NULL, 0, EXT_DISPOSABLE);
 			m2->m_len = pktlen;
 
 			m_cat(m, m2);
 			m->m_pkthdr.len += pktlen;
 		}
 		error = netdump_udp_output(m);
 		if (error != 0)
 			return (error);
 
 		/* Note that we're waiting for this packet in the bitfield. */
 		want_acks |= (1 << i);
 		sent_so_far += pktlen;
 	}
 	if (i >= NETDUMP_MAX_IN_FLIGHT)
 		printf("Warning: Sent more than %d packets (%d). "
 		    "Acknowledgements will fail unless the size of "
 		    "rcvd_acks/want_acks is increased.\n",
 		    NETDUMP_MAX_IN_FLIGHT, i);
 
 	/*
 	 * Wait for acks.  A *real* window would speed things up considerably.
 	 */
 	polls = 0;
 	while (rcvd_acks != want_acks) {
 		if (polls++ > nd_polls) {
 			if (retries++ > nd_retries)
 				return (ETIMEDOUT);
 			printf(". ");
 			goto retransmit;
 		}
 		netdump_network_poll();
 		DELAY(500);
 	}
 	nd_seqno += i;
 	return (0);
 }
 
 /*
  * Handler for IP packets: checks their sanity and then processes any netdump
  * ACK packets it finds.
  *
  * It needs to replicate partially the behaviour of ip_input() and
  * udp_input().
  *
  * Parameters:
  *	mb	a pointer to an mbuf * containing the packet received
  *		Updates *mb if m_pullup et al change the pointer
  *		Assumes the calling function will take care of freeing the mbuf
  */
 static void
 netdump_handle_ip(struct mbuf **mb)
 {
 	struct ip *ip;
 	struct udpiphdr *udp;
 	struct netdump_ack *nd_ack;
 	struct mbuf *m;
 	int rcv_ackno;
 	unsigned short hlen;
 
 	/* IP processing. */
 	m = *mb;
 	if (m->m_pkthdr.len < sizeof(struct ip)) {
 		NETDDEBUG("dropping packet too small for IP header\n");
 		return;
 	}
 	if (m->m_len < sizeof(struct ip)) {
 		m = m_pullup(m, sizeof(struct ip));
 		*mb = m;
 		if (m == NULL) {
 			NETDDEBUG("m_pullup failed\n");
 			return;
 		}
 	}
 	ip = mtod(m, struct ip *);
 
 	/* IP version. */
 	if (ip->ip_v != IPVERSION) {
 		NETDDEBUG("bad IP version %d\n", ip->ip_v);
 		return;
 	}
 
 	/* Header length. */
 	hlen = ip->ip_hl << 2;
 	if (hlen < sizeof(struct ip)) {
 		NETDDEBUG("bad IP header length (%hu)\n", hlen);
 		return;
 	}
 	if (hlen > m->m_len) {
 		m = m_pullup(m, hlen);
 		*mb = m;
 		if (m == NULL) {
 			NETDDEBUG("m_pullup failed\n");
 			return;
 		}
 		ip = mtod(m, struct ip *);
 	}
 	/* Ignore packets with IP options. */
 	if (hlen > sizeof(struct ip)) {
 		NETDDEBUG("drop packet with IP options\n");
 		return;
 	}
 
 #ifdef INVARIANTS
 	if ((IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) ||
 	    IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) &&
 	    (m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
 		NETDDEBUG("Bad IP header (RFC1122)\n");
 		return;
 	}
 #endif
 
 	/* Checksum. */
 	if ((m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) != 0) {
 		if ((m->m_pkthdr.csum_flags & CSUM_IP_VALID) == 0) {
 			NETDDEBUG("bad IP checksum\n");
 			return;
 		}
 	} else {
 		/* XXX */ ;
 	}
 
 	/* Convert fields to host byte order. */
 	ip->ip_len = ntohs(ip->ip_len);
 	if (ip->ip_len < hlen) {
 		NETDDEBUG("IP packet smaller (%hu) than header (%hu)\n",
 		    ip->ip_len, hlen);
 		return;
 	}
 	if (m->m_pkthdr.len < ip->ip_len) {
 		NETDDEBUG("IP packet bigger (%hu) than ethernet packet (%d)\n",
 		    ip->ip_len, m->m_pkthdr.len);
 		return;
 	}
 	if (m->m_pkthdr.len > ip->ip_len) {
 
 		/* Truncate the packet to the IP length. */
 		if (m->m_len == m->m_pkthdr.len) {
 			m->m_len = ip->ip_len;
 			m->m_pkthdr.len = ip->ip_len;
 		} else
 			m_adj(m, ip->ip_len - m->m_pkthdr.len);
 	}
 
 	ip->ip_off = ntohs(ip->ip_off);
 
 	/* Check that the source is the server's IP. */
 	if (ip->ip_src.s_addr != nd_server.s_addr) {
 		NETDDEBUG("drop packet not from server (from 0x%x)\n",
 		    ip->ip_src.s_addr);
 		return;
 	}
 
 	/* Check if the destination IP is ours. */
 	if (ip->ip_dst.s_addr != nd_client.s_addr) {
 		NETDDEBUGV("drop packet not to our IP\n");
 		return;
 	}
 
 	if (ip->ip_p != IPPROTO_UDP) {
 		NETDDEBUG("drop non-UDP packet\n");
 		return;
 	}
 
 	/* Do not deal with fragments. */
 	if ((ip->ip_off & (IP_MF | IP_OFFMASK)) != 0) {
 		NETDDEBUG("drop fragmented packet\n");
 		return;
 	}
 
 	/* UDP custom is to have packet length not include IP header. */
 	ip->ip_len -= hlen;
 
 	/* UDP processing. */
 
 	/* Get IP and UDP headers together, along with the netdump packet. */
 	if (m->m_pkthdr.len <
 	    sizeof(struct udpiphdr) + sizeof(struct netdump_ack)) {
 		NETDDEBUG("ignoring small packet\n");
 		return;
 	}
 	if (m->m_len < sizeof(struct udpiphdr) + sizeof(struct netdump_ack)) {
 		m = m_pullup(m, sizeof(struct udpiphdr) +
 		    sizeof(struct netdump_ack));
 		*mb = m;
 		if (m == NULL) {
 			NETDDEBUG("m_pullup failed\n");
 			return;
 		}
 	}
 	udp = mtod(m, struct udpiphdr *);
 
 	if (ntohs(udp->ui_u.uh_dport) != NETDUMP_ACKPORT) {
 		NETDDEBUG("not on the netdump port.\n");
 		return;
 	}
 
 	/* Netdump processing. */
 
 	/*
 	 * Packet is meant for us.  Extract the ack sequence number and the
 	 * port number if necessary.
 	 */
 	nd_ack = (struct netdump_ack *)(mtod(m, caddr_t) +
 	    sizeof(struct udpiphdr));
 	rcv_ackno = ntohl(nd_ack->na_seqno);
 	if (nd_server_port == NETDUMP_PORT)
 		nd_server_port = ntohs(udp->ui_u.uh_sport);
 	if (rcv_ackno >= nd_seqno + NETDUMP_MAX_IN_FLIGHT)
 		printf("%s: ACK %d too far in future!\n", __func__, rcv_ackno);
 	else if (rcv_ackno >= nd_seqno) {
 		/* We're interested in this ack. Record it. */
 		rcvd_acks |= 1 << (rcv_ackno - nd_seqno);
 	}
 }
 
 /*
  * Handler for ARP packets: checks their sanity and then
  * 1. If the ARP is a request for our IP, respond with our MAC address
  * 2. If the ARP is a response from our server, record its MAC address
  *
  * It needs to replicate partially the behaviour of arpintr() and
  * in_arpinput().
  *
  * Parameters:
  *	mb	a pointer to an mbuf * containing the packet received
  *		Updates *mb if m_pullup et al change the pointer
  *		Assumes the calling function will take care of freeing the mbuf
  */
 static void
 netdump_handle_arp(struct mbuf **mb)
 {
 	char buf[INET_ADDRSTRLEN];
 	struct in_addr isaddr, itaddr, myaddr;
 	struct ether_addr dst;
 	struct mbuf *m;
 	struct arphdr *ah;
 	struct ifnet *ifp;
 	uint8_t *enaddr;
 	int req_len, op;
 
 	m = *mb;
 	ifp = m->m_pkthdr.rcvif;
 	if (m->m_len < sizeof(struct arphdr)) {
 		m = m_pullup(m, sizeof(struct arphdr));
 		*mb = m;
 		if (m == NULL) {
 			NETDDEBUG("runt packet: m_pullup failed\n");
 			return;
 		}
 	}
 
 	ah = mtod(m, struct arphdr *);
 	if (ntohs(ah->ar_hrd) != ARPHRD_ETHER) {
 		NETDDEBUG("unknown hardware address 0x%2D)\n",
 		    (unsigned char *)&ah->ar_hrd, "");
 		return;
 	}
 	if (ntohs(ah->ar_pro) != ETHERTYPE_IP) {
 		NETDDEBUG("drop ARP for unknown protocol %d\n",
 		    ntohs(ah->ar_pro));
 		return;
 	}
 	req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr));
 	if (m->m_len < req_len) {
 		m = m_pullup(m, req_len);
 		*mb = m;
 		if (m == NULL) {
 			NETDDEBUG("runt packet: m_pullup failed\n");
 			return;
 		}
 	}
 	ah = mtod(m, struct arphdr *);
 
 	op = ntohs(ah->ar_op);
 	memcpy(&isaddr, ar_spa(ah), sizeof(isaddr));
 	memcpy(&itaddr, ar_tpa(ah), sizeof(itaddr));
 	enaddr = (uint8_t *)IF_LLADDR(ifp);
 	myaddr = nd_client;
 
 	if (memcmp(ar_sha(ah), enaddr, ifp->if_addrlen) == 0) {
 		NETDDEBUG("ignoring ARP from myself\n");
 		return;
 	}
 
 	if (isaddr.s_addr == nd_client.s_addr) {
 		printf("%s: %*D is using my IP address %s!\n", __func__,
 		    ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
 		    inet_ntoa_r(isaddr, buf));
 		return;
 	}
 
 	if (memcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen) == 0) {
 		NETDDEBUG("ignoring ARP from broadcast address\n");
 		return;
 	}
 
 	if (op == ARPOP_REPLY) {
 		if (isaddr.s_addr != nd_gateway.s_addr &&
 		    isaddr.s_addr != nd_server.s_addr) {
 			inet_ntoa_r(isaddr, buf);
 			NETDDEBUG(
 			    "ignoring ARP reply from %s (not netdump server)\n",
 			    buf);
 			return;
 		}
 		memcpy(nd_gw_mac.octet, ar_sha(ah),
 		    min(ah->ar_hln, ETHER_ADDR_LEN));
 		have_gw_mac = 1;
 		NETDDEBUG("got server MAC address %6D\n", nd_gw_mac.octet, ":");
 		return;
 	}
 
 	if (op != ARPOP_REQUEST) {
 		NETDDEBUG("ignoring ARP non-request/reply\n");
 		return;
 	}
 
 	if (itaddr.s_addr != nd_client.s_addr) {
 		NETDDEBUG("ignoring ARP not to our IP\n");
 		return;
 	}
 
 	memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
 	memcpy(ar_sha(ah), enaddr, ah->ar_hln);
 	memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln);
 	memcpy(ar_spa(ah), &itaddr, ah->ar_pln);
 	ah->ar_op = htons(ARPOP_REPLY);
 	ah->ar_pro = htons(ETHERTYPE_IP);
 	m->m_flags &= ~(M_BCAST|M_MCAST);
 	m->m_len = arphdr_len(ah);
 	m->m_pkthdr.len = m->m_len;
 
 	memcpy(dst.octet, ar_tha(ah), ETHER_ADDR_LEN);
 	netdump_ether_output(m, ifp, dst, ETHERTYPE_ARP);
 	*mb = NULL;
 }
 
 /*
  * Handler for incoming packets directly from the network adapter
  * Identifies the packet type (IP or ARP) and passes it along to one of the
  * helper functions netdump_handle_ip or netdump_handle_arp.
  *
  * It needs to replicate partially the behaviour of ether_input() and
  * ether_demux().
  *
  * Parameters:
  *	ifp	the interface the packet came from (should be nd_ifp)
  *	m	an mbuf containing the packet received
  */
 static void
 netdump_pkt_in(struct ifnet *ifp, struct mbuf *m)
 {
 	struct ifreq ifr;
 	struct ether_header *eh;
 	u_short etype;
 
 	/* Ethernet processing. */
 	if ((m->m_flags & M_PKTHDR) == 0) {
 		NETDDEBUG_IF(ifp, "discard frame without packet header\n");
 		goto done;
 	}
 	if (m->m_len < ETHER_HDR_LEN) {
 		NETDDEBUG_IF(ifp,
 	    "discard frame without leading eth header (len %u pktlen %u)\n",
 		    m->m_len, m->m_pkthdr.len);
 		goto done;
 	}
 	if ((m->m_flags & M_HASFCS) != 0) {
 		m_adj(m, -ETHER_CRC_LEN);
 		m->m_flags &= ~M_HASFCS;
 	}
 	eh = mtod(m, struct ether_header *);
 	etype = ntohs(eh->ether_type);
 	if ((m->m_flags & M_VLANTAG) != 0 || etype == ETHERTYPE_VLAN) {
 		NETDDEBUG_IF(ifp, "ignoring vlan packets\n");
 		goto done;
 	}
 	if (if_gethwaddr(ifp, &ifr) != 0) {
 		NETDDEBUG_IF(ifp, "failed to get hw addr for interface\n");
 		goto done;
 	}
 	if (memcmp(ifr.ifr_addr.sa_data, eh->ether_dhost,
 	    ETHER_ADDR_LEN) != 0) {
 		NETDDEBUG_IF(ifp,
 		    "discard frame with incorrect destination addr\n");
 		goto done;
 	}
 
 	/* Done ethernet processing. Strip off the ethernet header. */
 	m_adj(m, ETHER_HDR_LEN);
 	switch (etype) {
 	case ETHERTYPE_ARP:
 		netdump_handle_arp(&m);
 		break;
 	case ETHERTYPE_IP:
 		netdump_handle_ip(&m);
 		break;
 	default:
 		NETDDEBUG_IF(ifp, "dropping unknown ethertype %hu\n", etype);
 		break;
 	}
 done:
 	if (m != NULL)
 		m_freem(m);
 }
 
 /*
  * After trapping, instead of assuming that most of the network stack is sane,
  * we just poll the driver directly for packets.
  */
 static void
 netdump_network_poll(void)
 {
 
 	MPASS(netdump_enabled());
 
 	nd_ifp->if_netdump_methods->nd_poll(nd_ifp, 1000);
 }
 
 /*-
  * Dumping specific primitives.
  */
 
 /*
  * Callback from dumpsys() to dump a chunk of memory.
  * Copies it out to our static buffer then sends it across the network.
  * Detects the initial KDH and makes sure it is given a special packet type.
  *
  * Parameters:
  *	priv	 Unused. Optional private pointer.
  *	virtual  Virtual address (where to read the data from)
  *	physical Unused. Physical memory address.
  *	offset	 Offset from start of core file
  *	length	 Data length
  *
  * Return value:
  *	0 on success
  *	errno on error
  */
 static int
 netdump_dumper(void *priv __unused, void *virtual,
     vm_offset_t physical __unused, off_t offset, size_t length)
 {
 	int error;
 
 	NETDDEBUGV("netdump_dumper(NULL, %p, NULL, %ju, %zu)\n",
 	    virtual, (uintmax_t)offset, length);
 
 	if (virtual == NULL) {
 		if (dump_failed != 0)
 			printf("failed to dump the kernel core\n");
 		else if (netdump_send(NETDUMP_FINISHED, 0, NULL, 0) != 0)
 			printf("failed to close the transaction\n");
 		else
 			printf("\nnetdump finished.\n");
 		netdump_cleanup();
 		return (0);
 	}
 	if (length > sizeof(nd_buf))
 		return (ENOSPC);
 
 	memmove(nd_buf, virtual, length);
 	error = netdump_send(NETDUMP_VMCORE, offset, nd_buf, length);
 	if (error != 0) {
 		dump_failed = 1;
 		return (error);
 	}
 	return (0);
 }
 
 /*
  * Perform any initalization needed prior to transmitting the kernel core.
  */
 static int
 netdump_start(struct dumperinfo *di)
 {
 	char *path;
 	char buf[INET_ADDRSTRLEN];
 	uint32_t len;
 	int error;
 
 	error = 0;
 
 	/* Check if the dumping is allowed to continue. */
 	if (!netdump_enabled())
 		return (EINVAL);
 
 	if (panicstr == NULL) {
 		printf(
 		    "netdump_start: netdump may only be used after a panic\n");
 		return (EINVAL);
 	}
 
 	if (nd_server.s_addr == INADDR_ANY) {
 		printf("netdump_start: can't netdump; no server IP given\n");
 		return (EINVAL);
 	}
 	if (nd_client.s_addr == INADDR_ANY) {
 		printf("netdump_start: can't netdump; no client IP given\n");
 		return (EINVAL);
 	}
 
 	/* We start dumping at offset 0. */
 	di->dumpoff = 0;
 
 	nd_seqno = 1;
 
 	/*
 	 * nd_server_port could have switched after the first ack the
 	 * first time it gets called.  Adjust it accordingly.
 	 */
 	nd_server_port = NETDUMP_PORT;
 
 	/* Switch to the netdump mbuf zones. */
 	netdump_mbuf_dump();
 
 	nd_ifp->if_netdump_methods->nd_event(nd_ifp, NETDUMP_START);
 
 	/* Make the card use *our* receive callback. */
 	drv_if_input = nd_ifp->if_input;
 	nd_ifp->if_input = netdump_pkt_in;
 
 	if (nd_gateway.s_addr == INADDR_ANY) {
 		restore_gw_addr = 1;
 		nd_gateway.s_addr = nd_server.s_addr;
 	}
 
 	printf("netdump in progress. searching for server...\n");
 	if (netdump_arp_gw()) {
 		printf("failed to locate server MAC address\n");
 		error = EINVAL;
 		goto trig_abort;
 	}
 
 	if (nd_path[0] != '\0') {
 		path = nd_path;
 		len = strlen(path) + 1;
 	} else {
 		path = NULL;
 		len = 0;
 	}
 	if (netdump_send(NETDUMP_HERALD, 0, path, len) != 0) {
 		printf("failed to contact netdump server\n");
 		error = EINVAL;
 		goto trig_abort;
 	}
 	printf("netdumping to %s (%6D)\n", inet_ntoa_r(nd_server, buf),
 	    nd_gw_mac.octet, ":");
 	return (0);
 
 trig_abort:
 	netdump_cleanup();
 	return (error);
 }
 
 static int
 netdump_write_headers(struct dumperinfo *di, struct kerneldumpheader *kdh,
     void *key, uint32_t keysize)
 {
 	int error;
 
 	memcpy(nd_buf, kdh, sizeof(*kdh));
 	error = netdump_send(NETDUMP_KDH, 0, nd_buf, sizeof(*kdh));
 	if (error == 0 && keysize > 0) {
 		if (keysize > sizeof(nd_buf))
 			return (EINVAL);
 		memcpy(nd_buf, key, keysize);
 		error = netdump_send(NETDUMP_EKCD_KEY, 0, nd_buf, keysize);
 	}
 	return (error);
 }
 
 /*
  * Cleanup routine for a possibly failed netdump.
  */
 static void
 netdump_cleanup(void)
 {
 
 	if (restore_gw_addr != 0) {
 		nd_gateway.s_addr = INADDR_ANY;
 		restore_gw_addr = 0;
 	}
 	if (drv_if_input != NULL) {
 		nd_ifp->if_input = drv_if_input;
 		drv_if_input = NULL;
 	}
 	nd_ifp->if_netdump_methods->nd_event(nd_ifp, NETDUMP_END);
 }
 
 /*-
  * KLD specific code.
  */
 
 static struct cdevsw netdump_cdevsw = {
 	.d_version =	D_VERSION,
 	.d_ioctl =	netdump_ioctl,
 	.d_name =	"netdump",
 };
 
 static struct cdev *netdump_cdev;
 
 static void
 netdump_unconfigure(void)
 {
 	struct diocskerneldump_arg kda;
 
 	NETDUMP_ASSERT_WLOCKED();
 	KASSERT(netdump_enabled(), ("%s: nd_ifp NULL", __func__));
 
 	bzero(&kda, sizeof(kda));
 	kda.kda_index = KDA_REMOVE_DEV;
 	(void)dumper_remove(nd_conf.ndc_iface, &kda);
 
 	netdump_mbuf_drain();
 
 	if_rele(nd_ifp);
 	nd_ifp = NULL;
 
 	bzero(&nd_conf, sizeof(nd_conf));
 }
 
 static void
 netdump_ifdetach(void *arg __unused, struct ifnet *ifp)
 {
 
 	NETDUMP_WLOCK();
 	if (ifp == nd_ifp)
 		netdump_unconfigure();
 	NETDUMP_WUNLOCK();
 }
 
 static int
 netdump_configure(struct diocskerneldump_arg *conf, struct thread *td)
 {
 	struct ifnet *ifp;
 
 	NETDUMP_ASSERT_WLOCKED();
 
 	CURVNET_SET(TD_TO_VNET(td));
 	if (!IS_DEFAULT_VNET(curvnet)) {
 		CURVNET_RESTORE();
 		return (EINVAL);
 	}
 	ifp = ifunit_ref(conf->kda_iface);
 	CURVNET_RESTORE();
 
 	if (ifp == NULL)
 		return (ENOENT);
 	if ((if_getflags(ifp) & IFF_UP) == 0) {
 		if_rele(ifp);
 		return (ENXIO);
 	}
 	if (!netdump_supported_nic(ifp) || ifp->if_type != IFT_ETHER) {
 		if_rele(ifp);
 		return (ENODEV);
 	}
 
 	if (netdump_enabled())
 		if_rele(nd_ifp);
 	nd_ifp = ifp;
 
 	netdump_reinit_internal(ifp);
 
 #define COPY_SIZED(elm) do {	\
 	_Static_assert(sizeof(nd_conf.ndc_ ## elm) ==			\
 	    sizeof(conf->kda_ ## elm), "elm " __XSTRING(elm) " mismatch"); \
 	memcpy(&nd_conf.ndc_ ## elm, &conf->kda_ ## elm,		\
 	    sizeof(nd_conf.ndc_ ## elm));				\
 } while (0)
 	COPY_SIZED(iface);
 	COPY_SIZED(server);
 	COPY_SIZED(client);
 	COPY_SIZED(gateway);
 	COPY_SIZED(af);
 #undef COPY_SIZED
 
 	return (0);
 }
 
 /*
  * Reinitialize the mbuf pool used by drivers while dumping. This is called
  * from the generic ioctl handler for SIOCSIFMTU after any NIC driver has
  * reconfigured itself.  (I.e., it may not be a configured netdump interface.)
  */
 void
 netdump_reinit(struct ifnet *ifp)
 {
 
 	NETDUMP_WLOCK();
 	if (ifp != nd_ifp) {
 		NETDUMP_WUNLOCK();
 		return;
 	}
 	netdump_reinit_internal(ifp);
 	NETDUMP_WUNLOCK();
 }
 
 static void
 netdump_reinit_internal(struct ifnet *ifp)
 {
 	int clsize, nmbuf, ncl, nrxr;
 
 	NETDUMP_ASSERT_WLOCKED();
 
 	ifp->if_netdump_methods->nd_init(ifp, &nrxr, &ncl, &clsize);
 	KASSERT(nrxr > 0, ("invalid receive ring count %d", nrxr));
 
 	/*
 	 * We need two headers per message on the transmit side. Multiply by
 	 * four to give us some breathing room.
 	 */
 	nmbuf = ncl * (4 + nrxr);
 	ncl *= nrxr;
 	netdump_mbuf_reinit(nmbuf, ncl, clsize);
 }
 
 /*
  * ioctl(2) handler for the netdump device. This is currently only used to
  * register netdump as a dump device.
  *
  * Parameters:
  *     dev, Unused.
  *     cmd, The ioctl to be handled.
  *     addr, The parameter for the ioctl.
  *     flags, Unused.
  *     td, The thread invoking this ioctl.
  *
  * Returns:
  *     0 on success, and an errno value on failure.
  */
 static int
 netdump_ioctl(struct cdev *dev __unused, u_long cmd, caddr_t addr,
     int flags __unused, struct thread *td)
 {
 	struct diocskerneldump_arg kda_copy, *conf;
 	struct dumperinfo dumper;
 	uint8_t *encryptedkey;
 	int error;
 #ifdef COMPAT_FREEBSD11
 	u_int u;
 #endif
 #ifdef COMPAT_FREEBSD12
 	struct diocskerneldump_arg_freebsd12 *kda12;
 	struct netdump_conf_freebsd12 *conf12;
 #endif
 
 	conf = NULL;
 	error = 0;
 	NETDUMP_WLOCK();
 
 	switch (cmd) {
 #ifdef COMPAT_FREEBSD11
 	case DIOCSKERNELDUMP_FREEBSD11:
 		gone_in(13, "11.x ABI compatibility");
 		u = *(u_int *)addr;
 		if (u != 0) {
 			error = ENXIO;
 			break;
 		}
 		if (netdump_enabled())
 			netdump_unconfigure();
 		break;
 #endif
 #ifdef COMPAT_FREEBSD12
 		/*
 		 * Used by dumpon(8) in 12.x for clearing previous
 		 * configuration -- then NETDUMPSCONF_FREEBSD12 is used to
 		 * actually configure netdump.
 		 */
 	case DIOCSKERNELDUMP_FREEBSD12:
 		gone_in(14, "12.x ABI compatibility");
 
 		kda12 = (void *)addr;
 		if (kda12->kda12_enable) {
 			error = ENXIO;
 			break;
 		}
 		if (netdump_enabled())
 			netdump_unconfigure();
 		break;
 
 	case NETDUMPGCONF_FREEBSD12:
 		gone_in(14, "FreeBSD 12.x ABI compat");
 		conf12 = (void *)addr;
 
 		if (!netdump_enabled()) {
 			error = ENXIO;
 			break;
 		}
 		if (nd_conf.ndc_af != AF_INET) {
 			error = EOPNOTSUPP;
 			break;
 		}
 
 		strlcpy(conf12->ndc12_iface, nd_ifp->if_xname,
 		    sizeof(conf12->ndc12_iface));
 		memcpy(&conf12->ndc12_server, &nd_server,
 		    sizeof(conf12->ndc12_server));
 		memcpy(&conf12->ndc12_client, &nd_client,
 		    sizeof(conf12->ndc12_client));
 		memcpy(&conf12->ndc12_gateway, &nd_gateway,
 		    sizeof(conf12->ndc12_gateway));
 		break;
 #endif
 	case DIOCGKERNELDUMP:
 		conf = (void *)addr;
 		/*
 		 * For now, index is ignored; netdump doesn't support multiple
 		 * configurations (yet).
 		 */
 		if (!netdump_enabled()) {
 			error = ENXIO;
 			conf = NULL;
 			break;
 		}
 
 		strlcpy(conf->kda_iface, nd_ifp->if_xname,
 		    sizeof(conf->kda_iface));
 		memcpy(&conf->kda_server, &nd_server, sizeof(nd_server));
 		memcpy(&conf->kda_client, &nd_client, sizeof(nd_client));
 		memcpy(&conf->kda_gateway, &nd_gateway, sizeof(nd_gateway));
 		conf->kda_af = nd_conf.ndc_af;
 		conf = NULL;
 		break;
 
 #ifdef COMPAT_FREEBSD12
 	case NETDUMPSCONF_FREEBSD12:
 		gone_in(14, "FreeBSD 12.x ABI compat");
 
 		conf12 = (struct netdump_conf_freebsd12 *)addr;
 
 		_Static_assert(offsetof(struct diocskerneldump_arg, kda_server)
 		    == offsetof(struct netdump_conf_freebsd12, ndc12_server),
 		    "simplifying assumption");
 
 		memset(&kda_copy, 0, sizeof(kda_copy));
 		memcpy(&kda_copy, conf12,
 		    offsetof(struct diocskerneldump_arg, kda_server));
 
 		/* 12.x ABI could only configure IPv4 (INET) netdump. */
 		kda_copy.kda_af = AF_INET;
 		memcpy(&kda_copy.kda_server.in4, &conf12->ndc12_server,
 		    sizeof(struct in_addr));
 		memcpy(&kda_copy.kda_client.in4, &conf12->ndc12_client,
 		    sizeof(struct in_addr));
 		memcpy(&kda_copy.kda_gateway.in4, &conf12->ndc12_gateway,
 		    sizeof(struct in_addr));
 
 		kda_copy.kda_index =
 		    (conf12->ndc12_kda.kda12_enable ? 0 : KDA_REMOVE_ALL);
 
 		conf = &kda_copy;
 		explicit_bzero(conf12, sizeof(*conf12));
 		/* FALLTHROUGH */
 #endif
 	case DIOCSKERNELDUMP:
 		encryptedkey = NULL;
 		if (cmd == DIOCSKERNELDUMP) {
 			conf = (void *)addr;
 			memcpy(&kda_copy, conf, sizeof(kda_copy));
 		}
 		/* Netdump only supports IP4 at this time. */
 		if (conf->kda_af != AF_INET) {
 			error = EPROTONOSUPPORT;
 			break;
 		}
 
 		conf->kda_iface[sizeof(conf->kda_iface) - 1] = '\0';
 		if (conf->kda_index == KDA_REMOVE ||
 		    conf->kda_index == KDA_REMOVE_DEV ||
 		    conf->kda_index == KDA_REMOVE_ALL) {
 			if (netdump_enabled())
 				netdump_unconfigure();
 			if (conf->kda_index == KDA_REMOVE_ALL)
 				error = dumper_remove(NULL, conf);
 			break;
 		}
 
 		error = netdump_configure(conf, td);
 		if (error != 0)
 			break;
 
 		if (conf->kda_encryption != KERNELDUMP_ENC_NONE) {
 			if (conf->kda_encryptedkeysize <= 0 ||
 			    conf->kda_encryptedkeysize >
 			    KERNELDUMP_ENCKEY_MAX_SIZE) {
 				error = EINVAL;
 				break;
 			}
 			encryptedkey = malloc(conf->kda_encryptedkeysize,
 			    M_TEMP, M_WAITOK);
 			error = copyin(conf->kda_encryptedkey, encryptedkey,
 			    conf->kda_encryptedkeysize);
 			if (error != 0) {
 				free(encryptedkey, M_TEMP);
 				break;
 			}
 
 			conf->kda_encryptedkey = encryptedkey;
 		}
 
 		memset(&dumper, 0, sizeof(dumper));
 		dumper.dumper_start = netdump_start;
 		dumper.dumper_hdr = netdump_write_headers;
 		dumper.dumper = netdump_dumper;
 		dumper.priv = NULL;
 		dumper.blocksize = NETDUMP_DATASIZE;
 		dumper.maxiosize = MAXDUMPPGS * PAGE_SIZE;
 		dumper.mediaoffset = 0;
 		dumper.mediasize = 0;
 
 		error = dumper_insert(&dumper, conf->kda_iface, conf);
 		if (encryptedkey != NULL) {
 			explicit_bzero(encryptedkey,
 			    conf->kda_encryptedkeysize);
 			free(encryptedkey, M_TEMP);
 		}
 		if (error != 0)
 			netdump_unconfigure();
 		break;
 	default:
 		error = ENOTTY;
 		break;
 	}
 	explicit_bzero(&kda_copy, sizeof(kda_copy));
 	if (conf != NULL)
 		explicit_bzero(conf, sizeof(*conf));
 	NETDUMP_WUNLOCK();
 	return (error);
 }
 
 /*
  * Called upon system init or kld load.  Initializes the netdump parameters to
  * sane defaults (locates the first available NIC and uses the first IPv4 IP on
  * that card as the client IP).  Leaves the server IP unconfigured.
  *
  * Parameters:
  *	mod, Unused.
  *	what, The module event type.
  *	priv, Unused.
  *
  * Returns:
  *	int, An errno value if an error occured, 0 otherwise.
  */
 static int
 netdump_modevent(module_t mod __unused, int what, void *priv __unused)
 {
 	struct diocskerneldump_arg conf;
 	char *arg;
 	int error;
 
 	error = 0;
 	switch (what) {
 	case MOD_LOAD:
 		error = make_dev_p(MAKEDEV_WAITOK, &netdump_cdev,
 		    &netdump_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "netdump");
 		if (error != 0)
 			return (error);
 
 		nd_detach_cookie = EVENTHANDLER_REGISTER(ifnet_departure_event,
 		    netdump_ifdetach, NULL, EVENTHANDLER_PRI_ANY);
 
 		if ((arg = kern_getenv("net.dump.iface")) != NULL) {
 			strlcpy(conf.kda_iface, arg, sizeof(conf.kda_iface));
 			freeenv(arg);
 
 			if ((arg = kern_getenv("net.dump.server")) != NULL) {
 				inet_aton(arg, &conf.kda_server.in4);
 				freeenv(arg);
 			}
 			if ((arg = kern_getenv("net.dump.client")) != NULL) {
 				inet_aton(arg, &conf.kda_client.in4);
 				freeenv(arg);
 			}
 			if ((arg = kern_getenv("net.dump.gateway")) != NULL) {
 				inet_aton(arg, &conf.kda_gateway.in4);
 				freeenv(arg);
 			}
 			conf.kda_af = AF_INET;
 
 			/* Ignore errors; we print a message to the console. */
 			NETDUMP_WLOCK();
 			(void)netdump_configure(&conf, curthread);
 			NETDUMP_WUNLOCK();
 		}
 		break;
 	case MOD_UNLOAD:
 		NETDUMP_WLOCK();
 		if (netdump_enabled()) {
 			printf("netdump: disabling dump device for unload\n");
 			netdump_unconfigure();
 		}
 		NETDUMP_WUNLOCK();
 		destroy_dev(netdump_cdev);
 		EVENTHANDLER_DEREGISTER(ifnet_departure_event,
 		    nd_detach_cookie);
 		break;
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	return (error);
 }
 
 static moduledata_t netdump_mod = {
 	"netdump",
 	netdump_modevent,
 	NULL,
 };
 
 MODULE_VERSION(netdump, 1);
 DECLARE_MODULE(netdump, netdump_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);