Index: stable/12/usr.sbin/bhyve/mevent.c
===================================================================
--- stable/12/usr.sbin/bhyve/mevent.c	(revision 353120)
+++ stable/12/usr.sbin/bhyve/mevent.c	(revision 353121)
@@ -1,483 +1,496 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2011 NetApp, Inc.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * Micro event library for FreeBSD, designed for a single i/o thread 
  * using kqueue, and having events be persistent by default.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <assert.h>
 #ifndef WITHOUT_CAPSICUM
 #include <capsicum_helpers.h>
 #endif
 #include <err.h>
 #include <errno.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
 #include <sysexits.h>
 #include <unistd.h>
 
 #include <sys/types.h>
 #ifndef WITHOUT_CAPSICUM
 #include <sys/capsicum.h>
 #endif
 #include <sys/event.h>
 #include <sys/time.h>
 
 #include <pthread.h>
 #include <pthread_np.h>
 
 #include "mevent.h"
 
 #define	MEVENT_MAX	64
 
-#define	MEV_ADD		1
-#define	MEV_ENABLE	2
-#define	MEV_DISABLE	3
-#define	MEV_DEL_PENDING	4
+#define	MEV_ADD			1
+#define	MEV_ENABLE		2
+#define	MEV_DISABLE		3
+#define	MEV_DEL_PENDING		4
+#define	MEV_ADD_DISABLED	5
 
 extern char *vmname;
 
 static pthread_t mevent_tid;
 static int mevent_timid = 43;
 static int mevent_pipefd[2];
 static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER;
 
 struct mevent {
 	void	(*me_func)(int, enum ev_type, void *);
 #define me_msecs me_fd
 	int	me_fd;
 	int	me_timid;
 	enum ev_type me_type;
 	void    *me_param;
 	int	me_cq;
 	int	me_state;
 	int	me_closefd;
 	LIST_ENTRY(mevent) me_list;
 };
 
 static LIST_HEAD(listhead, mevent) global_head, change_head;
 
 static void
 mevent_qlock(void)
 {
 	pthread_mutex_lock(&mevent_lmutex);
 }
 
 static void
 mevent_qunlock(void)
 {
 	pthread_mutex_unlock(&mevent_lmutex);
 }
 
 static void
 mevent_pipe_read(int fd, enum ev_type type, void *param)
 {
 	char buf[MEVENT_MAX];
 	int status;
 
 	/*
 	 * Drain the pipe read side. The fd is non-blocking so this is
 	 * safe to do.
 	 */
 	do {
 		status = read(fd, buf, sizeof(buf));
 	} while (status == MEVENT_MAX);
 }
 
 static void
 mevent_notify(void)
 {
 	char c;
 	
 	/*
 	 * If calling from outside the i/o thread, write a byte on the
 	 * pipe to force the i/o thread to exit the blocking kevent call.
 	 */
 	if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) {
 		write(mevent_pipefd[1], &c, 1);
 	}
 }
 
 static int
 mevent_kq_filter(struct mevent *mevp)
 {
 	int retval;
 
 	retval = 0;
 
 	if (mevp->me_type == EVF_READ)
 		retval = EVFILT_READ;
 
 	if (mevp->me_type == EVF_WRITE)
 		retval = EVFILT_WRITE;
 
 	if (mevp->me_type == EVF_TIMER)
 		retval = EVFILT_TIMER;
 
 	if (mevp->me_type == EVF_SIGNAL)
 		retval = EVFILT_SIGNAL;
 
 	return (retval);
 }
 
 static int
 mevent_kq_flags(struct mevent *mevp)
 {
 	int ret;
 
 	switch (mevp->me_state) {
 	case MEV_ADD:
 		ret = EV_ADD;		/* implicitly enabled */
 		break;
+	case MEV_ADD_DISABLED:
+		ret = EV_ADD | EV_DISABLE;
+		break;
 	case MEV_ENABLE:
 		ret = EV_ENABLE;
 		break;
 	case MEV_DISABLE:
 		ret = EV_DISABLE;
 		break;
 	case MEV_DEL_PENDING:
 		ret = EV_DELETE;
 		break;
 	default:
 		assert(0);
 		break;
 	}
 
 	return (ret);
 }
 
 static int
 mevent_kq_fflags(struct mevent *mevp)
 {
 	/* XXX nothing yet, perhaps EV_EOF for reads ? */
 	return (0);
 }
 
 static int
 mevent_build(int mfd, struct kevent *kev)
 {
 	struct mevent *mevp, *tmpp;
 	int i;
 
 	i = 0;
 
 	mevent_qlock();
 
 	LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) {
 		if (mevp->me_closefd) {
 			/*
 			 * A close of the file descriptor will remove the
 			 * event
 			 */
 			close(mevp->me_fd);
 		} else {
 			if (mevp->me_type == EVF_TIMER) {
 				kev[i].ident = mevp->me_timid;
 				kev[i].data = mevp->me_msecs;
 			} else {
 				kev[i].ident = mevp->me_fd;
 				kev[i].data = 0;
 			}
 			kev[i].filter = mevent_kq_filter(mevp);
 			kev[i].flags = mevent_kq_flags(mevp);
 			kev[i].fflags = mevent_kq_fflags(mevp);
 			kev[i].udata = mevp;
 			i++;
 		}
 
 		mevp->me_cq = 0;
 		LIST_REMOVE(mevp, me_list);
 
 		if (mevp->me_state == MEV_DEL_PENDING) {
 			free(mevp);
 		} else {
 			LIST_INSERT_HEAD(&global_head, mevp, me_list);
 		}
 
 		assert(i < MEVENT_MAX);
 	}
 
 	mevent_qunlock();
 
 	return (i);
 }
 
 static void
 mevent_handle(struct kevent *kev, int numev)
 {
 	struct mevent *mevp;
 	int i;
 
 	for (i = 0; i < numev; i++) {
 		mevp = kev[i].udata;
 
 		/* XXX check for EV_ERROR ? */
 
 		(*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param);
 	}
 }
 
-struct mevent *
-mevent_add(int tfd, enum ev_type type,
-	   void (*func)(int, enum ev_type, void *), void *param)
+static struct mevent *
+mevent_add_state(int tfd, enum ev_type type,
+	   void (*func)(int, enum ev_type, void *), void *param,
+	   int state)
 {
 	struct mevent *lp, *mevp;
 
 	if (tfd < 0 || func == NULL) {
 		return (NULL);
 	}
 
 	mevp = NULL;
 
 	mevent_qlock();
 
 	/*
 	 * Verify that the fd/type tuple is not present in any list
 	 */
 	LIST_FOREACH(lp, &global_head, me_list) {
 		if (type != EVF_TIMER && lp->me_fd == tfd &&
 		    lp->me_type == type) {
 			goto exit;
 		}
 	}
 
 	LIST_FOREACH(lp, &change_head, me_list) {
 		if (type != EVF_TIMER && lp->me_fd == tfd &&
 		    lp->me_type == type) {
 			goto exit;
 		}
 	}
 
 	/*
 	 * Allocate an entry, populate it, and add it to the change list.
 	 */
 	mevp = calloc(1, sizeof(struct mevent));
 	if (mevp == NULL) {
 		goto exit;
 	}
 
 	if (type == EVF_TIMER) {
 		mevp->me_msecs = tfd;
 		mevp->me_timid = mevent_timid++;
 	} else
 		mevp->me_fd = tfd;
 	mevp->me_type = type;
 	mevp->me_func = func;
 	mevp->me_param = param;
 
 	LIST_INSERT_HEAD(&change_head, mevp, me_list);
 	mevp->me_cq = 1;
-	mevp->me_state = MEV_ADD;
+	mevp->me_state = state;
 	mevent_notify();
 
 exit:
 	mevent_qunlock();
 
 	return (mevp);
+}
+
+struct mevent *
+mevent_add(int tfd, enum ev_type type,
+	   void (*func)(int, enum ev_type, void *), void *param)
+{
+
+	return mevent_add_state(tfd, type, func, param, MEV_ADD);
 }
 
 static int
 mevent_update(struct mevent *evp, int newstate)
 {
 	/*
 	 * It's not possible to enable/disable a deleted event
 	 */
 	if (evp->me_state == MEV_DEL_PENDING)
 		return (EINVAL);
 
 	/*
 	 * No update needed if state isn't changing
 	 */
 	if (evp->me_state == newstate)
 		return (0);
 	
 	mevent_qlock();
 
 	evp->me_state = newstate;
 
 	/*
 	 * Place the entry onto the changed list if not already there.
 	 */
 	if (evp->me_cq == 0) {
 		evp->me_cq = 1;
 		LIST_REMOVE(evp, me_list);
 		LIST_INSERT_HEAD(&change_head, evp, me_list);
 		mevent_notify();
 	}
 
 	mevent_qunlock();
 
 	return (0);
 }
 
 int
 mevent_enable(struct mevent *evp)
 {
 
 	return (mevent_update(evp, MEV_ENABLE));
 }
 
 int
 mevent_disable(struct mevent *evp)
 {
 
 	return (mevent_update(evp, MEV_DISABLE));
 }
 
 static int
 mevent_delete_event(struct mevent *evp, int closefd)
 {
 	mevent_qlock();
 
 	/*
          * Place the entry onto the changed list if not already there, and
 	 * mark as to be deleted.
          */
         if (evp->me_cq == 0) {
 		evp->me_cq = 1;
 		LIST_REMOVE(evp, me_list);
 		LIST_INSERT_HEAD(&change_head, evp, me_list);
 		mevent_notify();
         }
 	evp->me_state = MEV_DEL_PENDING;
 
 	if (closefd)
 		evp->me_closefd = 1;
 
 	mevent_qunlock();
 
 	return (0);
 }
 
 int
 mevent_delete(struct mevent *evp)
 {
 
 	return (mevent_delete_event(evp, 0));
 }
 
 int
 mevent_delete_close(struct mevent *evp)
 {
 
 	return (mevent_delete_event(evp, 1));
 }
 
 static void
 mevent_set_name(void)
 {
 
 	pthread_set_name_np(mevent_tid, "mevent");
 }
 
 void
 mevent_dispatch(void)
 {
 	struct kevent changelist[MEVENT_MAX];
 	struct kevent eventlist[MEVENT_MAX];
 	struct mevent *pipev;
 	int mfd;
 	int numev;
 	int ret;
 #ifndef WITHOUT_CAPSICUM
 	cap_rights_t rights;
 #endif
 
 	mevent_tid = pthread_self();
 	mevent_set_name();
 
 	mfd = kqueue();
 	assert(mfd > 0);
 
 #ifndef WITHOUT_CAPSICUM
 	cap_rights_init(&rights, CAP_KQUEUE);
 	if (caph_rights_limit(mfd, &rights) == -1)
 		errx(EX_OSERR, "Unable to apply rights for sandbox");
 #endif
 
 	/*
 	 * Open the pipe that will be used for other threads to force
 	 * the blocking kqueue call to exit by writing to it. Set the
 	 * descriptor to non-blocking.
 	 */
 	ret = pipe(mevent_pipefd);
 	if (ret < 0) {
 		perror("pipe");
 		exit(0);
 	}
 
 #ifndef WITHOUT_CAPSICUM
 	cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
 	if (caph_rights_limit(mevent_pipefd[0], &rights) == -1)
 		errx(EX_OSERR, "Unable to apply rights for sandbox");
 	if (caph_rights_limit(mevent_pipefd[1], &rights) == -1)
 		errx(EX_OSERR, "Unable to apply rights for sandbox");
 #endif
 
 	/*
 	 * Add internal event handler for the pipe write fd
 	 */
 	pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL);
 	assert(pipev != NULL);
 
 	for (;;) {
 		/*
 		 * Build changelist if required.
 		 * XXX the changelist can be put into the blocking call
 		 * to eliminate the extra syscall. Currently better for
 		 * debug.
 		 */
 		numev = mevent_build(mfd, changelist);
 		if (numev) {
 			ret = kevent(mfd, changelist, numev, NULL, 0, NULL);
 			if (ret == -1) {
 				perror("Error return from kevent change");
 			}
 		}
 
 		/*
 		 * Block awaiting events
 		 */
 		ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL);
 		if (ret == -1 && errno != EINTR) {
 			perror("Error return from kevent monitor");
 		}
 		
 		/*
 		 * Handle reported events
 		 */
 		mevent_handle(eventlist, ret);
 	}			
 }
Index: stable/12/usr.sbin/bhyve/net_backends.c
===================================================================
--- stable/12/usr.sbin/bhyve/net_backends.c	(revision 353120)
+++ stable/12/usr.sbin/bhyve/net_backends.c	(revision 353121)
@@ -1,807 +1,871 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * This file implements multiple network backends (tap, netmap, ...),
  * to be used by network frontends such as virtio-net and e1000.
  * The API to access the backend (e.g. send/receive packets, negotiate
  * features) is exported by net_backends.h.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>		/* u_short etc */
 #ifndef WITHOUT_CAPSICUM
 #include <sys/capsicum.h>
 #endif
 #include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <sys/uio.h>
 
 #include <net/if.h>
 #include <net/netmap.h>
 #include <net/netmap_virt.h>
 #define NETMAP_WITH_LIBS
 #include <net/netmap_user.h>
 
 #ifndef WITHOUT_CAPSICUM
 #include <capsicum_helpers.h>
 #endif
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <string.h>
 #include <unistd.h>
 #include <sysexits.h>
 #include <assert.h>
 #include <pthread.h>
 #include <pthread_np.h>
 #include <poll.h>
 #include <assert.h>
 
 
 #include "iov.h"
 #include "mevent.h"
 #include "net_backends.h"
 
 #include <sys/linker_set.h>
 
 /*
  * Each network backend registers a set of function pointers that are
  * used to implement the net backends API.
  * This might need to be exposed if we implement backends in separate files.
  */
 struct net_backend {
 	const char *prefix;	/* prefix matching this backend */
 
 	/*
 	 * Routines used to initialize and cleanup the resources needed
 	 * by a backend. The cleanup function is used internally,
 	 * and should not be called by the frontend.
 	 */
 	int (*init)(struct net_backend *be, const char *devname,
 	    net_be_rxeof_t cb, void *param);
 	void (*cleanup)(struct net_backend *be);
 
 	/*
 	 * Called to serve a guest transmit request. The scatter-gather
 	 * vector provided by the caller has 'iovcnt' elements and contains
 	 * the packet to send.
 	 */
 	ssize_t (*send)(struct net_backend *be, struct iovec *iov, int iovcnt);
 
 	/*
 	 * Called to receive a packet from the backend. When the function
 	 * returns a positive value 'len', the scatter-gather vector
 	 * provided by the caller contains a packet with such length.
 	 * The function returns 0 if the backend doesn't have a new packet to
 	 * receive.
 	 */
 	ssize_t (*recv)(struct net_backend *be, struct iovec *iov, int iovcnt);
 
 	/*
+	 * Ask the backend to enable or disable receive operation in the
+	 * backend. On return from a disable operation, it is guaranteed
+	 * that the receive callback won't be called until receive is
+	 * enabled again. Note however that it is up to the caller to make
+	 * sure that netbe_recv() is not currently being executed by another
+	 * thread.
+	 */
+	void (*recv_enable)(struct net_backend *be);
+	void (*recv_disable)(struct net_backend *be);
+
+	/*
 	 * Ask the backend for the virtio-net features it is able to
 	 * support. Possible features are TSO, UFO and checksum offloading
 	 * in both rx and tx direction and for both IPv4 and IPv6.
 	 */
 	uint64_t (*get_cap)(struct net_backend *be);
 
 	/*
 	 * Tell the backend to enable/disable the specified virtio-net
 	 * features (capabilities).
 	 */
 	int (*set_cap)(struct net_backend *be, uint64_t features,
 	    unsigned int vnet_hdr_len);
 
 	struct pci_vtnet_softc *sc;
 	int fd;
 
 	/*
 	 * Length of the virtio-net header used by the backend and the
 	 * frontend, respectively. A zero value means that the header
 	 * is not used.
 	 */
 	unsigned int be_vnet_hdr_len;
 	unsigned int fe_vnet_hdr_len;
 
 	/* Size of backend-specific private data. */
 	size_t priv_size;
 
 	/* Room for backend-specific data. */
 	char opaque[0];
 };
 
 SET_DECLARE(net_backend_set, struct net_backend);
 
 #define VNET_HDR_LEN	sizeof(struct virtio_net_rxhdr)
 
 #define WPRINTF(params) printf params
 
 /*
  * The tap backend
  */
 
 struct tap_priv {
 	struct mevent *mevp;
 };
 
 static void
 tap_cleanup(struct net_backend *be)
 {
 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
 
 	if (priv->mevp) {
 		mevent_delete(priv->mevp);
 	}
 	if (be->fd != -1) {
 		close(be->fd);
 		be->fd = -1;
 	}
 }
 
 static int
 tap_init(struct net_backend *be, const char *devname,
 	 net_be_rxeof_t cb, void *param)
 {
 	struct tap_priv *priv = (struct tap_priv *)be->opaque;
 	char tbuf[80];
 	int opt = 1;
 #ifndef WITHOUT_CAPSICUM
 	cap_rights_t rights;
 #endif
 
 	if (cb == NULL) {
 		WPRINTF(("TAP backend requires non-NULL callback\n"));
 		return (-1);
 	}
 
 	strcpy(tbuf, "/dev/");
 	strlcat(tbuf, devname, sizeof(tbuf));
 
 	be->fd = open(tbuf, O_RDWR);
 	if (be->fd == -1) {
 		WPRINTF(("open of tap device %s failed\n", tbuf));
 		goto error;
 	}
 
 	/*
 	 * Set non-blocking and register for read
 	 * notifications with the event loop
 	 */
 	if (ioctl(be->fd, FIONBIO, &opt) < 0) {
 		WPRINTF(("tap device O_NONBLOCK failed\n"));
 		goto error;
 	}
 
 #ifndef WITHOUT_CAPSICUM
 	cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
 	if (caph_rights_limit(be->fd, &rights) == -1)
 		errx(EX_OSERR, "Unable to apply rights for sandbox");
 #endif
 
 	priv->mevp = mevent_add(be->fd, EVF_READ, cb, param);
 	if (priv->mevp == NULL) {
 		WPRINTF(("Could not register event\n"));
 		goto error;
 	}
 
 	return (0);
 
 error:
 	tap_cleanup(be);
 	return (-1);
 }
 
 /*
  * Called to send a buffer chain out to the tap device
  */
 static ssize_t
 tap_send(struct net_backend *be, struct iovec *iov, int iovcnt)
 {
 	return (writev(be->fd, iov, iovcnt));
 }
 
 static ssize_t
 tap_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
 {
 	ssize_t ret;
 
 	/* Should never be called without a valid tap fd */
 	assert(be->fd != -1);
 
 	ret = readv(be->fd, iov, iovcnt);
 
 	if (ret < 0 && errno == EWOULDBLOCK) {
 		return (0);
 	}
 
 	return (ret);
 }
 
+static void
+tap_recv_enable(struct net_backend *be)
+{
+	struct tap_priv *priv = (struct tap_priv *)be->opaque;
+
+	mevent_enable(priv->mevp);
+}
+
+static void
+tap_recv_disable(struct net_backend *be)
+{
+	struct tap_priv *priv = (struct tap_priv *)be->opaque;
+
+	mevent_disable(priv->mevp);
+}
+
 static uint64_t
 tap_get_cap(struct net_backend *be)
 {
 
 	return (0); /* no capabilities for now */
 }
 
 static int
 tap_set_cap(struct net_backend *be, uint64_t features,
 		unsigned vnet_hdr_len)
 {
 
 	return ((features || vnet_hdr_len) ? -1 : 0);
 }
 
 static struct net_backend tap_backend = {
 	.prefix = "tap",
 	.priv_size = sizeof(struct tap_priv),
 	.init = tap_init,
 	.cleanup = tap_cleanup,
 	.send = tap_send,
 	.recv = tap_recv,
+	.recv_enable = tap_recv_enable,
+	.recv_disable = tap_recv_disable,
 	.get_cap = tap_get_cap,
 	.set_cap = tap_set_cap,
 };
 
 /* A clone of the tap backend, with a different prefix. */
 static struct net_backend vmnet_backend = {
 	.prefix = "vmnet",
 	.priv_size = sizeof(struct tap_priv),
 	.init = tap_init,
 	.cleanup = tap_cleanup,
 	.send = tap_send,
 	.recv = tap_recv,
+	.recv_enable = tap_recv_enable,
+	.recv_disable = tap_recv_disable,
 	.get_cap = tap_get_cap,
 	.set_cap = tap_set_cap,
 };
 
 DATA_SET(net_backend_set, tap_backend);
 DATA_SET(net_backend_set, vmnet_backend);
 
 /*
  * The netmap backend
  */
 
 /* The virtio-net features supported by netmap. */
 #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \
 		VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \
 		VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \
 		VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO)
 
 struct netmap_priv {
 	char ifname[IFNAMSIZ];
 	struct nm_desc *nmd;
 	uint16_t memid;
 	struct netmap_ring *rx;
 	struct netmap_ring *tx;
 	struct mevent *mevp;
 	net_be_rxeof_t cb;
 	void *cb_param;
 };
 
 static void
 nmreq_init(struct nmreq *req, char *ifname)
 {
 
 	memset(req, 0, sizeof(*req));
 	strlcpy(req->nr_name, ifname, sizeof(req->nr_name));
 	req->nr_version = NETMAP_API;
 }
 
 static int
 netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len)
 {
 	int err;
 	struct nmreq req;
 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
 
 	nmreq_init(&req, priv->ifname);
 	req.nr_cmd = NETMAP_BDG_VNET_HDR;
 	req.nr_arg1 = vnet_hdr_len;
 	err = ioctl(be->fd, NIOCREGIF, &req);
 	if (err) {
 		WPRINTF(("Unable to set vnet header length %d\n",
 				vnet_hdr_len));
 		return (err);
 	}
 
 	be->be_vnet_hdr_len = vnet_hdr_len;
 
 	return (0);
 }
 
 static int
 netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len)
 {
 	int prev_hdr_len = be->be_vnet_hdr_len;
 	int ret;
 
 	if (vnet_hdr_len == prev_hdr_len) {
 		return (1);
 	}
 
 	ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len);
 	if (ret) {
 		return (0);
 	}
 
 	netmap_set_vnet_hdr_len(be, prev_hdr_len);
 
 	return (1);
 }
 
 static uint64_t
 netmap_get_cap(struct net_backend *be)
 {
 
 	return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ?
 	    NETMAP_FEATURES : 0);
 }
 
 static int
 netmap_set_cap(struct net_backend *be, uint64_t features,
 	       unsigned vnet_hdr_len)
 {
 
 	return (netmap_set_vnet_hdr_len(be, vnet_hdr_len));
 }
 
 static int
 netmap_init(struct net_backend *be, const char *devname,
 	    net_be_rxeof_t cb, void *param)
 {
 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
 
 	strlcpy(priv->ifname, devname, sizeof(priv->ifname));
 	priv->ifname[sizeof(priv->ifname) - 1] = '\0';
 
 	priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL);
 	if (priv->nmd == NULL) {
 		WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)\n",
 			devname, strerror(errno)));
 		free(priv);
 		return (-1);
 	}
 
 	priv->memid = priv->nmd->req.nr_arg2;
 	priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0);
 	priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0);
 	priv->cb = cb;
 	priv->cb_param = param;
 	be->fd = priv->nmd->fd;
 
 	priv->mevp = mevent_add(be->fd, EVF_READ, cb, param);
 	if (priv->mevp == NULL) {
 		WPRINTF(("Could not register event\n"));
 		return (-1);
 	}
 
 	return (0);
 }
 
 static void
 netmap_cleanup(struct net_backend *be)
 {
 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
 
 	if (priv->mevp) {
 		mevent_delete(priv->mevp);
 	}
 	if (priv->nmd) {
 		nm_close(priv->nmd);
 	}
 	be->fd = -1;
 }
 
 static ssize_t
 netmap_send(struct net_backend *be, struct iovec *iov,
 	    int iovcnt)
 {
 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
 	struct netmap_ring *ring;
 	ssize_t totlen = 0;
 	int nm_buf_size;
 	int nm_buf_len;
 	uint32_t head;
 	void *nm_buf;
 	int j;
 
 	ring = priv->tx;
 	head = ring->head;
 	if (head == ring->tail) {
 		WPRINTF(("No space, drop %zu bytes\n", count_iov(iov, iovcnt)));
 		goto txsync;
 	}
 	nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
 	nm_buf_size = ring->nr_buf_size;
 	nm_buf_len = 0;
 
 	for (j = 0; j < iovcnt; j++) {
 		int iov_frag_size = iov[j].iov_len;
 		void *iov_frag_buf = iov[j].iov_base;
 
 		totlen += iov_frag_size;
 
 		/*
 		 * Split each iovec fragment over more netmap slots, if
 		 * necessary.
 		 */
 		for (;;) {
 			int copylen;
 
 			copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size;
 			memcpy(nm_buf, iov_frag_buf, copylen);
 
 			iov_frag_buf += copylen;
 			iov_frag_size -= copylen;
 			nm_buf += copylen;
 			nm_buf_size -= copylen;
 			nm_buf_len += copylen;
 
 			if (iov_frag_size == 0) {
 				break;
 			}
 
 			ring->slot[head].len = nm_buf_len;
 			ring->slot[head].flags = NS_MOREFRAG;
 			head = nm_ring_next(ring, head);
 			if (head == ring->tail) {
 				/*
 				 * We ran out of netmap slots while
 				 * splitting the iovec fragments.
 				 */
 				WPRINTF(("No space, drop %zu bytes\n",
 				   count_iov(iov, iovcnt)));
 				goto txsync;
 			}
 			nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
 			nm_buf_size = ring->nr_buf_size;
 			nm_buf_len = 0;
 		}
 	}
 
 	/* Complete the last slot, which must not have NS_MOREFRAG set. */
 	ring->slot[head].len = nm_buf_len;
 	ring->slot[head].flags = 0;
 	head = nm_ring_next(ring, head);
 
 	/* Now update ring->head and ring->cur. */
 	ring->head = ring->cur = head;
 txsync:
 	ioctl(be->fd, NIOCTXSYNC, NULL);
 
 	return (totlen);
 }
 
 static ssize_t
 netmap_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
 {
 	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
 	struct netmap_slot *slot = NULL;
 	struct netmap_ring *ring;
 	void *iov_frag_buf;
 	int iov_frag_size;
 	ssize_t totlen = 0;
 	uint32_t head;
 
 	assert(iovcnt);
 
 	ring = priv->rx;
 	head = ring->head;
 	iov_frag_buf = iov->iov_base;
 	iov_frag_size = iov->iov_len;
 
 	do {
 		int nm_buf_len;
 		void *nm_buf;
 
 		if (head == ring->tail) {
 			return (0);
 		}
 
 		slot = ring->slot + head;
 		nm_buf = NETMAP_BUF(ring, slot->buf_idx);
 		nm_buf_len = slot->len;
 
 		for (;;) {
 			int copylen = nm_buf_len < iov_frag_size ?
 			    nm_buf_len : iov_frag_size;
 
 			memcpy(iov_frag_buf, nm_buf, copylen);
 			nm_buf += copylen;
 			nm_buf_len -= copylen;
 			iov_frag_buf += copylen;
 			iov_frag_size -= copylen;
 			totlen += copylen;
 
 			if (nm_buf_len == 0) {
 				break;
 			}
 
 			iov++;
 			iovcnt--;
 			if (iovcnt == 0) {
 				/* No space to receive. */
 				WPRINTF(("Short iov, drop %zd bytes\n",
 				    totlen));
 				return (-ENOSPC);
 			}
 			iov_frag_buf = iov->iov_base;
 			iov_frag_size = iov->iov_len;
 		}
 
 		head = nm_ring_next(ring, head);
 
 	} while (slot->flags & NS_MOREFRAG);
 
 	/* Release slots to netmap. */
 	ring->head = ring->cur = head;
 
 	return (totlen);
 }
 
+static void
+netmap_recv_enable(struct net_backend *be)
+{
+	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
+
+	mevent_enable(priv->mevp);
+}
+
+static void
+netmap_recv_disable(struct net_backend *be)
+{
+	struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
+
+	mevent_disable(priv->mevp);
+}
+
 static struct net_backend netmap_backend = {
 	.prefix = "netmap",
 	.priv_size = sizeof(struct netmap_priv),
 	.init = netmap_init,
 	.cleanup = netmap_cleanup,
 	.send = netmap_send,
 	.recv = netmap_recv,
+	.recv_enable = netmap_recv_enable,
+	.recv_disable = netmap_recv_disable,
 	.get_cap = netmap_get_cap,
 	.set_cap = netmap_set_cap,
 };
 
 /* A clone of the netmap backend, with a different prefix. */
 static struct net_backend vale_backend = {
 	.prefix = "vale",
 	.priv_size = sizeof(struct netmap_priv),
 	.init = netmap_init,
 	.cleanup = netmap_cleanup,
 	.send = netmap_send,
 	.recv = netmap_recv,
+	.recv_enable = netmap_recv_enable,
+	.recv_disable = netmap_recv_disable,
 	.get_cap = netmap_get_cap,
 	.set_cap = netmap_set_cap,
 };
 
 DATA_SET(net_backend_set, netmap_backend);
 DATA_SET(net_backend_set, vale_backend);
 
 /*
  * Initialize a backend and attach to the frontend.
  * This is called during frontend initialization.
  *  @pbe is a pointer to the backend to be initialized
  *  @devname is the backend-name as supplied on the command line,
  * 	e.g. -s 2:0,frontend-name,backend-name[,other-args]
  *  @cb is the receive callback supplied by the frontend,
  *	and it is invoked in the event loop when a receive
  *	event is generated in the hypervisor,
  *  @param is a pointer to the frontend, and normally used as
  *	the argument for the callback.
  */
 int
 netbe_init(struct net_backend **ret, const char *devname, net_be_rxeof_t cb,
     void *param)
 {
 	struct net_backend **pbe, *nbe, *tbe = NULL;
 	int err;
 
 	/*
 	 * Find the network backend that matches the user-provided
 	 * device name. net_backend_set is built using a linker set.
 	 */
 	SET_FOREACH(pbe, net_backend_set) {
 		if (strncmp(devname, (*pbe)->prefix,
 		    strlen((*pbe)->prefix)) == 0) {
 			tbe = *pbe;
 			assert(tbe->init != NULL);
 			assert(tbe->cleanup != NULL);
 			assert(tbe->send != NULL);
 			assert(tbe->recv != NULL);
 			assert(tbe->get_cap != NULL);
 			assert(tbe->set_cap != NULL);
 			break;
 		}
 	}
 
 	*ret = NULL;
 	if (tbe == NULL)
 		return (EINVAL);
 	nbe = calloc(1, sizeof(*nbe) + tbe->priv_size);
 	*nbe = *tbe;	/* copy the template */
 	nbe->fd = -1;
 	nbe->sc = param;
 	nbe->be_vnet_hdr_len = 0;
 	nbe->fe_vnet_hdr_len = 0;
 
 	/* Initialize the backend. */
 	err = nbe->init(nbe, devname, cb, param);
 	if (err) {
 		free(nbe);
 		return (err);
 	}
 
 	*ret = nbe;
 
 	return (0);
 }
 
 void
 netbe_cleanup(struct net_backend *be)
 {
 
 	if (be != NULL) {
 		be->cleanup(be);
 		free(be);
 	}
 }
 
 uint64_t
 netbe_get_cap(struct net_backend *be)
 {
 
 	assert(be != NULL);
 	return (be->get_cap(be));
 }
 
 int
 netbe_set_cap(struct net_backend *be, uint64_t features,
 	      unsigned vnet_hdr_len)
 {
 	int ret;
 
 	assert(be != NULL);
 
 	/* There are only three valid lengths, i.e., 0, 10 and 12. */
 	if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN
 		&& vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t)))
 		return (-1);
 
 	be->fe_vnet_hdr_len = vnet_hdr_len;
 
 	ret = be->set_cap(be, features, vnet_hdr_len);
 	assert(be->be_vnet_hdr_len == 0 ||
 	       be->be_vnet_hdr_len == be->fe_vnet_hdr_len);
 
 	return (ret);
 }
 
 static __inline struct iovec *
 iov_trim(struct iovec *iov, int *iovcnt, unsigned int tlen)
 {
 	struct iovec *riov;
 
 	/* XXX short-cut: assume first segment is >= tlen */
 	assert(iov[0].iov_len >= tlen);
 
 	iov[0].iov_len -= tlen;
 	if (iov[0].iov_len == 0) {
 		assert(*iovcnt > 1);
 		*iovcnt -= 1;
 		riov = &iov[1];
 	} else {
 		iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen);
 		riov = &iov[0];
 	}
 
 	return (riov);
 }
 
 ssize_t
 netbe_send(struct net_backend *be, struct iovec *iov, int iovcnt)
 {
 
 	assert(be != NULL);
 	if (be->be_vnet_hdr_len != be->fe_vnet_hdr_len) {
 		/*
 		 * The frontend uses a virtio-net header, but the backend
 		 * does not. We ignore it (as it must be all zeroes) and
 		 * strip it.
 		 */
 		assert(be->be_vnet_hdr_len == 0);
 		iov = iov_trim(iov, &iovcnt, be->fe_vnet_hdr_len);
 	}
 
 	return (be->send(be, iov, iovcnt));
 }
 
 /*
  * Try to read a packet from the backend, without blocking.
  * If no packets are available, return 0. In case of success, return
  * the length of the packet just read. Return -1 in case of errors.
  */
 ssize_t
 netbe_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
 {
 	/* Length of prepended virtio-net header. */
 	unsigned int hlen = be->fe_vnet_hdr_len;
 	int ret;
 
 	assert(be != NULL);
 
 	if (hlen && hlen != be->be_vnet_hdr_len) {
 		/*
 		 * The frontend uses a virtio-net header, but the backend
 		 * does not. We need to prepend a zeroed header.
 		 */
 		struct virtio_net_rxhdr *vh;
 
 		assert(be->be_vnet_hdr_len == 0);
 
 		/*
 		 * Get a pointer to the rx header, and use the
 		 * data immediately following it for the packet buffer.
 		 */
 		vh = iov[0].iov_base;
 		iov = iov_trim(iov, &iovcnt, hlen);
 
 		/*
 		 * The only valid field in the rx packet header is the
 		 * number of buffers if merged rx bufs were negotiated.
 		 */
 		memset(vh, 0, hlen);
 		if (hlen == VNET_HDR_LEN) {
 			vh->vrh_bufs = 1;
 		}
 	}
 
 	ret = be->recv(be, iov, iovcnt);
 	if (ret > 0) {
 		ret += hlen;
 	}
 
 	return (ret);
 }
 
 /*
  * Read a packet from the backend and discard it.
  * Returns the size of the discarded packet or zero if no packet was available.
  * A negative error code is returned in case of read error.
  */
 ssize_t
 netbe_rx_discard(struct net_backend *be)
 {
 	/*
 	 * MP note: the dummybuf is only used to discard frames,
 	 * so there is no need for it to be per-vtnet or locked.
 	 * We only make it large enough for TSO-sized segment.
 	 */
 	static uint8_t dummybuf[65536 + 64];
 	struct iovec iov;
 
 	iov.iov_base = dummybuf;
 	iov.iov_len = sizeof(dummybuf);
 
 	return netbe_recv(be, &iov, 1);
 }
 
+void
+netbe_rx_disable(struct net_backend *be)
+{
+
+	return be->recv_enable(be);
+}
+
+void
+netbe_rx_enable(struct net_backend *be)
+{
+
+	return be->recv_disable(be);
+}
Index: stable/12/usr.sbin/bhyve/net_backends.h
===================================================================
--- stable/12/usr.sbin/bhyve/net_backends.h	(revision 353120)
+++ stable/12/usr.sbin/bhyve/net_backends.h	(revision 353121)
@@ -1,89 +1,91 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef __NET_BACKENDS_H__
 #define __NET_BACKENDS_H__
 
 #include <stdint.h>
 
 /* Opaque type representing a network backend. */
 typedef struct net_backend net_backend_t;
 
 /* Interface between network frontends and the network backends. */
 typedef void (*net_be_rxeof_t)(int, enum ev_type, void *param);
 int	netbe_init(net_backend_t **be, const char *devname, net_be_rxeof_t cb,
             void *param);
 void	netbe_cleanup(net_backend_t *be);
 uint64_t netbe_get_cap(net_backend_t *be);
 int	 netbe_set_cap(net_backend_t *be, uint64_t cap,
              unsigned vnet_hdr_len);
 ssize_t	netbe_send(net_backend_t *be, struct iovec *iov, int iovcnt);
 ssize_t	netbe_recv(net_backend_t *be, struct iovec *iov, int iovcnt);
 ssize_t	netbe_rx_discard(net_backend_t *be);
+void	netbe_rx_disable(net_backend_t *be);
+void	netbe_rx_enable(net_backend_t *be);
 
 
 /*
  * Network device capabilities taken from the VirtIO standard.
  * Despite the name, these capabilities can be used by different frontents
  * (virtio-net, ptnet) and supported by different backends (netmap, tap, ...).
  */
 #define	VIRTIO_NET_F_CSUM	(1 <<  0) /* host handles partial cksum */
 #define	VIRTIO_NET_F_GUEST_CSUM	(1 <<  1) /* guest handles partial cksum */
 #define	VIRTIO_NET_F_MAC	(1 <<  5) /* host supplies MAC */
 #define	VIRTIO_NET_F_GSO_DEPREC	(1 <<  6) /* deprecated: host handles GSO */
 #define	VIRTIO_NET_F_GUEST_TSO4	(1 <<  7) /* guest can rcv TSOv4 */
 #define	VIRTIO_NET_F_GUEST_TSO6	(1 <<  8) /* guest can rcv TSOv6 */
 #define	VIRTIO_NET_F_GUEST_ECN	(1 <<  9) /* guest can rcv TSO with ECN */
 #define	VIRTIO_NET_F_GUEST_UFO	(1 << 10) /* guest can rcv UFO */
 #define	VIRTIO_NET_F_HOST_TSO4	(1 << 11) /* host can rcv TSOv4 */
 #define	VIRTIO_NET_F_HOST_TSO6	(1 << 12) /* host can rcv TSOv6 */
 #define	VIRTIO_NET_F_HOST_ECN	(1 << 13) /* host can rcv TSO with ECN */
 #define	VIRTIO_NET_F_HOST_UFO	(1 << 14) /* host can rcv UFO */
 #define	VIRTIO_NET_F_MRG_RXBUF	(1 << 15) /* host can merge RX buffers */
 #define	VIRTIO_NET_F_STATUS	(1 << 16) /* config status field available */
 #define	VIRTIO_NET_F_CTRL_VQ	(1 << 17) /* control channel available */
 #define	VIRTIO_NET_F_CTRL_RX	(1 << 18) /* control channel RX mode support */
 #define	VIRTIO_NET_F_CTRL_VLAN	(1 << 19) /* control channel VLAN filtering */
 #define	VIRTIO_NET_F_GUEST_ANNOUNCE \
 				(1 << 21) /* guest can send gratuitous pkts */
 
 /*
  * Fixed network header size
  */
 struct virtio_net_rxhdr {
 	uint8_t		vrh_flags;
 	uint8_t		vrh_gso_type;
 	uint16_t	vrh_hdr_len;
 	uint16_t	vrh_gso_size;
 	uint16_t	vrh_csum_start;
 	uint16_t	vrh_csum_offset;
 	uint16_t	vrh_bufs;
 } __packed;
 
 #endif /* __NET_BACKENDS_H__ */
Index: stable/12
===================================================================
--- stable/12	(revision 353120)
+++ stable/12	(revision 353121)

Property changes on: stable/12
___________________________________________________________________
Modified: svn:mergeinfo
## -0,0 +0,1 ##
   Merged /head:r352841