Index: usr.sbin/bhyve/Makefile =================================================================== --- usr.sbin/bhyve/Makefile +++ usr.sbin/bhyve/Makefile @@ -83,6 +83,10 @@ .if ${MK_INET6_SUPPORT} != "no" CFLAGS+=-DINET6 .endif +.if ${MK_NETGRAPH_SUPPORT} != "no" +CFLAGS+=-DNETGRAPH +LIBADD+= netgraph +.endif .if ${MK_OPENSSL} == "no" CFLAGS+=-DNO_OPENSSL .else Index: usr.sbin/bhyve/net_backends.c =================================================================== --- usr.sbin/bhyve/net_backends.c +++ usr.sbin/bhyve/net_backends.c @@ -43,6 +43,7 @@ #endif #include #include +#include #include #include @@ -69,6 +70,9 @@ #include #include +#ifdef NETGRAPH +#include +#endif #include "debug.h" #include "iov.h" @@ -732,6 +736,315 @@ DATA_SET(net_backend_set, netmap_backend); DATA_SET(net_backend_set, vale_backend); + + +#ifdef NETGRAPH + +/* + * Netgraph backend + */ + +#define NG_SBUF_MAX_SIZE (4 * 1024 * 1024) + +struct ng_priv { + struct mevent *mevp; + char bbuf[1 << 16]; + ssize_t bbuflen; +}; + +static void +ng_cleanup(struct net_backend *be) +{ + struct ng_priv *p = (struct ng_priv *)be->opaque; + + if (p->mevp) { + mevent_delete(p->mevp); + } + if (be->fd != -1) { + close(be->fd); + be->fd = -1; + } +} + +static int +ng_init(struct net_backend *be, const char *devname, + net_be_rxeof_t cb, void *param) +{ + struct ng_priv *p = (struct ng_priv *)be->opaque; + struct ngm_connect ngc; + char *ngopts, *tofree; + char nodename[NG_NODESIZ]; + int sbsz; + int ctrl_sock; + int flags; + int path_provided; + int peerhook_provided; + int socket_provided; + unsigned long maxsbsz; + size_t msbsz; +#ifndef WITHOUT_CAPSICUM + cap_rights_t rights; +#endif + + if (cb == NULL) { + WPRINTF(("Netgraph backend requires non-NULL callback")); + return (-1); + } + + be->fd = -1; + + memset(&ngc, 0, sizeof(ngc)); + + strncpy(ngc.ourhook, "vmlink", NG_HOOKSIZ - 1); + + tofree = ngopts = strdup(devname); + + if (ngopts == NULL) { + WPRINTF(("strdup error")); + goto error; + } + + socket_provided = 0; + path_provided = 0; + peerhook_provided = 0; + + (void)strsep(&ngopts, ":"); + + while (ngopts != NULL) { + char *value = ngopts; + char *key; + + key = strsep(&value, "="); + if (value == NULL) + break; + ngopts = value; + (void) strsep(&ngopts, ":"); + + if (strcmp(key, "socket") == 0) { + strncpy(nodename, value, NG_NODESIZ - 1); + socket_provided = 1; + } else if (strcmp(key, "path") == 0) { + strncpy(ngc.path, value, NG_PATHSIZ - 1); + ngc.path[strlen(ngc.path)] = ':'; + path_provided = 1; + } else if (strcmp(key, "hook") == 0) { + strncpy(ngc.ourhook, value, NG_HOOKSIZ - 1); + } else if (strcmp(key, "peerhook") == 0) { + strncpy(ngc.peerhook, value, NG_HOOKSIZ - 1); + peerhook_provided = 1; + } else { + WPRINTF(("incorrect network backend options: %s", key)); + free(tofree); + goto error; + } + } + + free(tofree); + + if (!path_provided) { + WPRINTF(("path must be provided")); + goto error; + } + + if (!peerhook_provided) { + WPRINTF(("peer hook must be provided")); + goto error; + } + + if (NgMkSockNode(socket_provided ? nodename : NULL, + &ctrl_sock, &be->fd) < 0) { + WPRINTF(("can't get Netgraph sockets")); + goto error; + } + + if (NgSendMsg(ctrl_sock, ".", + NGM_GENERIC_COOKIE, + NGM_CONNECT, &ngc, sizeof(ngc)) < 0) { + WPRINTF(("can't connect to node")); + goto error; + } + + close(ctrl_sock); + + flags = fcntl(be->fd, F_GETFL); + + if (flags < 0) { + WPRINTF(("can't get socket flags")); + goto error; + } + + if (fcntl(be->fd, F_SETFL, flags | O_NONBLOCK) < 0) { + WPRINTF(("can't set O_NONBLOCK flag")); + goto error; + } + + /* + * The default ng_socket(4) buffer's size is too low. + * Use the minimum value between NG_SBUF_MAX_SIZE + * and kern.ipc.maxsockbuf. + */ + msbsz = sizeof(maxsbsz); + if (sysctlbyname("kern.ipc.maxsockbuf", &maxsbsz, &msbsz, + NULL, 0) < 0) { + WPRINTF(("can't get 'kern.ipc.maxsockbuf' value")); + goto error; + } + + /* + * We can't set the socket buffer size to kern.ipc.maxsockbuf value, + * as it takes into account the mbuf(9) overhead. + * If kern.ipc.maxsockbuf value is lower than NG_SBUF_MAX_SIZE, + * use only 75 percent of this value. + */ + maxsbsz = (maxsbsz >> 1) + (maxsbsz >> 2); + sbsz = MIN(NG_SBUF_MAX_SIZE, maxsbsz); + + if (setsockopt(be->fd, SOL_SOCKET, SO_SNDBUF, &sbsz, + sizeof(sbsz)) < 0) { + WPRINTF(("can't set TX buffer size")); + goto error; + } + + if (setsockopt(be->fd, SOL_SOCKET, SO_RCVBUF, &sbsz, + sizeof(sbsz)) < 0) { + WPRINTF(("can't set RX buffer size")); + goto error; + } + +#ifndef WITHOUT_CAPSICUM + cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE); + if (caph_rights_limit(be->fd, &rights) == -1) + errx(EX_OSERR, "Unable to apply rights for sandbox"); +#endif + + memset(p->bbuf, 0, sizeof(p->bbuf)); + p->bbuflen = 0; + + p->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param); + if (p->mevp == NULL) { + WPRINTF(("Could not register event")); + goto error; + } + + return (0); + +error: + ng_cleanup(be); + return (-1); +} + +static ssize_t +ng_send(struct net_backend *be, const struct iovec *iov, int iovcnt) +{ + return (writev(be->fd, iov, iovcnt)); +} + +static ssize_t +ng_peek_recvlen(struct net_backend *be) +{ + struct ng_priv *priv = (struct ng_priv *)be->opaque; + ssize_t ret; + + if (priv->bbuflen > 0) { + /* + * We already have a packet in the bounce buffer. + * Just return its length. + */ + return priv->bbuflen; + } + + /* + * Read the next packet (if any) into the bounce buffer, so + * that we get to know its length and we can return that + * to the caller. + */ + ret = read(be->fd, priv->bbuf, sizeof(priv->bbuf)); + if (ret < 0 && errno == EWOULDBLOCK) { + return (0); + } + + if (ret > 0) + priv->bbuflen = ret; + + return (ret); +} + +static ssize_t +ng_recv(struct net_backend *be, const struct iovec *iov, int iovcnt) +{ + struct ng_priv *priv = (struct ng_priv *)be->opaque; + ssize_t ret; + + if (priv->bbuflen > 0) { + /* + * A packet is available in the bounce buffer, so + * we read it from there. + */ + ret = buf_to_iov(priv->bbuf, priv->bbuflen, + iov, iovcnt, 0); + + /* Mark the bounce buffer as empty. */ + priv->bbuflen = 0; + + return (ret); + } + + ret = readv(be->fd, iov, iovcnt); + if (ret < 0 && errno == EWOULDBLOCK) { + return (0); + } + + return (ret); +} + +static void +ng_recv_enable(struct net_backend *be) +{ + struct ng_priv *priv = (struct ng_priv *)be->opaque; + + mevent_enable(priv->mevp); +} + +static void +ng_recv_disable(struct net_backend *be) +{ + struct ng_priv *priv = (struct ng_priv *)be->opaque; + + mevent_disable(priv->mevp); +} + +static uint64_t +ng_get_cap(struct net_backend *be) +{ + + return (0); /* no capabilities for now */ +} + +static int +ng_set_cap(struct net_backend *be, uint64_t features, + unsigned vnet_hdr_len) +{ + + return (0); +} + +static struct net_backend ng_backend = { + .prefix = "netgraph", + .priv_size = sizeof(struct ng_priv), + .init = ng_init, + .cleanup = ng_cleanup, + .send = ng_send, + .peek_recvlen = ng_peek_recvlen, + .recv = ng_recv, + .recv_enable = ng_recv_enable, + .recv_disable = ng_recv_disable, + .get_cap = ng_get_cap, + .set_cap = ng_set_cap, +}; + +DATA_SET(net_backend_set, ng_backend); + +#endif /* NETGRAPH */ /* * Initialize a backend and attach to the frontend.