diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -34,6 +34,7 @@ mem.c \ mevent.c \ net_backend_netmap.c \ + net_backend_slirp.c \ net_backends.c \ net_utils.c \ pci_emul.c \ diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8 --- a/usr.sbin/bhyve/bhyve.8 +++ b/usr.sbin/bhyve/bhyve.8 @@ -22,7 +22,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd October 12, 2023 +.Dd November 20, 2023 .Dt BHYVE 8 .Os .Sh NAME @@ -426,6 +426,10 @@ .Op Cm \&,mac= Ar xx:xx:xx:xx:xx:xx .Op Cm \&,mtu= Ar N .Xc +.It +.Xo +.Cm slirp,hostfwd= Ar proto : Ar hostaddr : Ar hostport - Ar guestaddr : Ar guestport +.Xc .El .Sm on .Pp @@ -469,6 +473,20 @@ .Xr netgraph 4 addressing rules. .Pp +The slirp backend can be used to provide a NATed network to the guest. +This backend has poor performance but does not require any network +configuration on the host system. +It depends on the +.Pa net/libslirp +port. +The +.Cm hostfwd +option takes a 5-tuple describing how connections from the host are to be +forwarded to the guest. +Multiple rules can be specified, separated by semicolons. +Note that semicolons must be escaped or quoted to prevent the shell from +interpreting them. +.Pp Block storage device backends: .Sm off .Bl -bullet diff --git a/usr.sbin/bhyve/bhyve_config.5 b/usr.sbin/bhyve/bhyve_config.5 --- a/usr.sbin/bhyve/bhyve_config.5 +++ b/usr.sbin/bhyve/bhyve_config.5 @@ -23,7 +23,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd August 19, 2022 +.Dd November 20, 2023 .Dt BHYVE_CONFIG 5 .Os .Sh NAME @@ -401,6 +401,26 @@ is passed to .Xr nm_open to connect to a netmap port. +.It slirp +Use the slirp backend to provide a userspace network stack. +The +.Va hostfwd +variable is used to configure how packets from the host are translated +before being sent to the guest. +.Bl -column "peerhook" "Format" "Default" +.It Sy Name Ta Sy Format Ta Sy Default Ta Sy Description +.It Va hostfwd Ta string Ta Ta +A semicolon-separated list of host forwarding rules, each of the form +.Ar proto:haddr:hport-gaddr:gport , +where +.Ar proto +is either +.Ql tcp +or +.Ql udp . +If the guest address is equal to the empty string, packets will be +forwarded to the first DHCP-assigned address in the guest. +.El .El .Pp If diff --git a/usr.sbin/bhyve/libslirp.h b/usr.sbin/bhyve/libslirp.h new file mode 100644 --- /dev/null +++ b/usr.sbin/bhyve/libslirp.h @@ -0,0 +1,365 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 1995,1996 Danny Gasparovski. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * DANNY GASPAROVSKI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef LIBSLIRP_H +#define LIBSLIRP_H + +#include +#include +#include + +#ifdef _WIN32 +#include +#include +#include +#include +typedef SSIZE_T slirp_ssize_t; +#ifdef BUILDING_LIBSLIRP +# define SLIRP_EXPORT __declspec(dllexport) +#else +# define SLIRP_EXPORT __declspec(dllimport) +#endif +#else +#include +typedef ssize_t slirp_ssize_t; +#include +#include +#define SLIRP_EXPORT +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* Opaque structure containing the slirp state */ +typedef struct Slirp Slirp; + +/* Flags passed to SlirpAddPollCb and to be returned by SlirpGetREventsCb. */ +enum { + SLIRP_POLL_IN = 1 << 0, + SLIRP_POLL_OUT = 1 << 1, + SLIRP_POLL_PRI = 1 << 2, + SLIRP_POLL_ERR = 1 << 3, + SLIRP_POLL_HUP = 1 << 4, +}; + +/* Callback for application to get data from the guest */ +typedef slirp_ssize_t (*SlirpReadCb)(void *buf, size_t len, void *opaque); +/* Callback for application to send data to the guest */ +typedef slirp_ssize_t (*SlirpWriteCb)(const void *buf, size_t len, void *opaque); +/* Timer callback */ +typedef void (*SlirpTimerCb)(void *opaque); +/* Callback for libslirp to register polling callbacks */ +typedef int (*SlirpAddPollCb)(int fd, int events, void *opaque); +/* Callback for libslirp to get polling result */ +typedef int (*SlirpGetREventsCb)(int idx, void *opaque); + +/* For now libslirp creates only a timer for the IPv6 RA */ +typedef enum SlirpTimerId { + SLIRP_TIMER_RA, + SLIRP_TIMER_NUM, +} SlirpTimerId; + +/* + * Callbacks from slirp, to be set by the application. + * + * The opaque parameter is set to the opaque pointer given in the slirp_new / + * slirp_init call. + */ +typedef struct SlirpCb { + /* + * Send an ethernet frame to the guest network. The opaque parameter is the + * one given to slirp_init(). If the guest is not ready to receive a frame, + * the function can just drop the data. TCP will then handle retransmissions + * at a lower pace. + * <0 reports an IO error. + */ + SlirpWriteCb send_packet; + /* Print a message for an error due to guest misbehavior. */ + void (*guest_error)(const char *msg, void *opaque); + /* Return the virtual clock value in nanoseconds */ + int64_t (*clock_get_ns)(void *opaque); + /* Create a new timer with the given callback and opaque data. Not + * needed if timer_new_opaque is provided. */ + void *(*timer_new)(SlirpTimerCb cb, void *cb_opaque, void *opaque); + /* Remove and free a timer */ + void (*timer_free)(void *timer, void *opaque); + /* Modify a timer to expire at @expire_time (ms) */ + void (*timer_mod)(void *timer, int64_t expire_time, void *opaque); + /* Register a fd for future polling */ + void (*register_poll_fd)(int fd, void *opaque); + /* Unregister a fd */ + void (*unregister_poll_fd)(int fd, void *opaque); + /* Kick the io-thread, to signal that new events may be processed because some TCP buffer + * can now receive more data, i.e. slirp_socket_can_recv will return 1. */ + void (*notify)(void *opaque); + + /* + * Fields introduced in SlirpConfig version 4 begin + */ + + /* Initialization has completed and a Slirp* has been created. */ + void (*init_completed)(Slirp *slirp, void *opaque); + /* Create a new timer. When the timer fires, the application passes + * the SlirpTimerId and cb_opaque to slirp_handle_timer. */ + void *(*timer_new_opaque)(SlirpTimerId id, void *cb_opaque, void *opaque); +} SlirpCb; + +#define SLIRP_CONFIG_VERSION_MIN 1 +#define SLIRP_CONFIG_VERSION_MAX 5 + +typedef struct SlirpConfig { + /* Version must be provided */ + uint32_t version; + /* + * Fields introduced in SlirpConfig version 1 begin + */ + /* Whether to prevent the guest from accessing the Internet */ + int restricted; + /* Whether IPv4 is enabled */ + bool in_enabled; + /* Virtual network for the guest */ + struct in_addr vnetwork; + /* Mask for the virtual network for the guest */ + struct in_addr vnetmask; + /* Virtual address for the host exposed to the guest */ + struct in_addr vhost; + /* Whether IPv6 is enabled */ + bool in6_enabled; + /* Virtual IPv6 network for the guest */ + struct in6_addr vprefix_addr6; + /* Len of the virtual IPv6 network for the guest */ + uint8_t vprefix_len; + /* Virtual address for the host exposed to the guest */ + struct in6_addr vhost6; + /* Hostname exposed to the guest in DHCP hostname option */ + const char *vhostname; + /* Hostname exposed to the guest in the DHCP TFTP server name option */ + const char *tftp_server_name; + /* Path of the files served by TFTP */ + const char *tftp_path; + /* Boot file name exposed to the guest via DHCP */ + const char *bootfile; + /* Start of the DHCP range */ + struct in_addr vdhcp_start; + /* Virtual address for the DNS server exposed to the guest */ + struct in_addr vnameserver; + /* Virtual IPv6 address for the DNS server exposed to the guest */ + struct in6_addr vnameserver6; + /* DNS search names exposed to the guest via DHCP */ + const char **vdnssearch; + /* Domain name exposed to the guest via DHCP */ + const char *vdomainname; + /* MTU when sending packets to the guest */ + /* Default: IF_MTU_DEFAULT */ + size_t if_mtu; + /* MRU when receiving packets from the guest */ + /* Default: IF_MRU_DEFAULT */ + size_t if_mru; + /* Prohibit connecting to 127.0.0.1:* */ + bool disable_host_loopback; + /* + * Enable emulation code (*warning*: this code isn't safe, it is not + * recommended to enable it) + */ + bool enable_emu; + + /* + * Fields introduced in SlirpConfig version 2 begin + */ + /* Address to be used when sending data to the Internet */ + struct sockaddr_in *outbound_addr; + /* IPv6 Address to be used when sending data to the Internet */ + struct sockaddr_in6 *outbound_addr6; + + /* + * Fields introduced in SlirpConfig version 3 begin + */ + /* slirp will not redirect/serve any DNS packet */ + bool disable_dns; + + /* + * Fields introduced in SlirpConfig version 4 begin + */ + /* slirp will not reply to any DHCP requests */ + bool disable_dhcp; + + /* + * Fields introduced in SlirpConfig version 5 begin + */ + /* Manufacturer ID (IANA Private Enterprise number) */ + uint32_t mfr_id; + /* + * MAC address allocated for an out-of-band management controller, to be + * retrieved through NC-SI. + */ + uint8_t oob_eth_addr[6]; +} SlirpConfig; + +/* Create a new instance of a slirp stack */ +SLIRP_EXPORT +Slirp *slirp_new(const SlirpConfig *cfg, const SlirpCb *callbacks, + void *opaque); +/* slirp_init is deprecated in favor of slirp_new */ +SLIRP_EXPORT +Slirp *slirp_init(int restricted, bool in_enabled, struct in_addr vnetwork, + struct in_addr vnetmask, struct in_addr vhost, + bool in6_enabled, struct in6_addr vprefix_addr6, + uint8_t vprefix_len, struct in6_addr vhost6, + const char *vhostname, const char *tftp_server_name, + const char *tftp_path, const char *bootfile, + struct in_addr vdhcp_start, struct in_addr vnameserver, + struct in6_addr vnameserver6, const char **vdnssearch, + const char *vdomainname, const SlirpCb *callbacks, + void *opaque); +/* Shut down an instance of a slirp stack */ +SLIRP_EXPORT +void slirp_cleanup(Slirp *slirp); + +/* This is called by the application when it is about to sleep through poll(). + * *timeout is set to the amount of virtual time (in ms) that the application intends to + * wait (UINT32_MAX if infinite). slirp_pollfds_fill updates it according to + * e.g. TCP timers, so the application knows it should sleep a smaller amount of + * time. slirp_pollfds_fill calls add_poll for each file descriptor + * that should be monitored along the sleep. The opaque pointer is passed as + * such to add_poll, and add_poll returns an index. */ +SLIRP_EXPORT +void slirp_pollfds_fill(Slirp *slirp, uint32_t *timeout, + SlirpAddPollCb add_poll, void *opaque); + +/* This is called by the application after sleeping, to report which file + * descriptors are available. slirp_pollfds_poll calls get_revents on each file + * descriptor, giving it the index that add_poll returned during the + * slirp_pollfds_fill call, to know whether the descriptor is available for + * read/write/etc. (SLIRP_POLL_*) + * select_error should be passed 1 if poll() returned an error. */ +SLIRP_EXPORT +void slirp_pollfds_poll(Slirp *slirp, int select_error, + SlirpGetREventsCb get_revents, void *opaque); + +/* This is called by the application when the guest emits a packet on the + * guest network, to be interpreted by slirp. */ +SLIRP_EXPORT +void slirp_input(Slirp *slirp, const uint8_t *pkt, int pkt_len); + +/* This is called by the application when a timer expires, if it provides + * the timer_new_opaque callback. It is not needed if the application only + * uses timer_new. */ +SLIRP_EXPORT +void slirp_handle_timer(Slirp *slirp, SlirpTimerId id, void *cb_opaque); + +/* These set up / remove port forwarding between a host port in the real world + * and the guest network. */ +SLIRP_EXPORT +int slirp_add_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, + int host_port, struct in_addr guest_addr, int guest_port); +SLIRP_EXPORT +int slirp_remove_hostfwd(Slirp *slirp, int is_udp, struct in_addr host_addr, + int host_port); + +#define SLIRP_HOSTFWD_UDP 1 +#define SLIRP_HOSTFWD_V6ONLY 2 +SLIRP_EXPORT +int slirp_add_hostxfwd(Slirp *slirp, + const struct sockaddr *haddr, socklen_t haddrlen, + const struct sockaddr *gaddr, socklen_t gaddrlen, + int flags); +SLIRP_EXPORT +int slirp_remove_hostxfwd(Slirp *slirp, + const struct sockaddr *haddr, socklen_t haddrlen, + int flags); + +/* Set up port forwarding between a port in the guest network and a + * command running on the host */ +SLIRP_EXPORT +int slirp_add_exec(Slirp *slirp, const char *cmdline, + struct in_addr *guest_addr, int guest_port); +/* Set up port forwarding between a port in the guest network and a + * Unix port on the host */ +SLIRP_EXPORT +int slirp_add_unix(Slirp *slirp, const char *unixsock, + struct in_addr *guest_addr, int guest_port); +/* Set up port forwarding between a port in the guest network and a + * callback that will receive the data coming from the port */ +SLIRP_EXPORT +int slirp_add_guestfwd(Slirp *slirp, SlirpWriteCb write_cb, void *opaque, + struct in_addr *guest_addr, int guest_port); + +/* TODO: rather identify a guestfwd through an opaque pointer instead of through + * the guest_addr */ + +/* This is called by the application for a guestfwd, to determine how much data + * can be received by the forwarded port through a call to slirp_socket_recv. */ +SLIRP_EXPORT +size_t slirp_socket_can_recv(Slirp *slirp, struct in_addr guest_addr, + int guest_port); +/* This is called by the application for a guestfwd, to provide the data to be + * sent on the forwarded port */ +SLIRP_EXPORT +void slirp_socket_recv(Slirp *slirp, struct in_addr guest_addr, int guest_port, + const uint8_t *buf, int size); + +/* Remove entries added by slirp_add_exec, slirp_add_unix or slirp_add_guestfwd */ +SLIRP_EXPORT +int slirp_remove_guestfwd(Slirp *slirp, struct in_addr guest_addr, + int guest_port); + +/* Return a human-readable state of the slirp stack */ +SLIRP_EXPORT +char *slirp_connection_info(Slirp *slirp); + +/* Return a human-readable state of the NDP/ARP tables */ +SLIRP_EXPORT +char *slirp_neighbor_info(Slirp *slirp); + +/* Save the slirp state through the write_cb. The opaque pointer is passed as + * such to the write_cb. */ +SLIRP_EXPORT +int slirp_state_save(Slirp *s, SlirpWriteCb write_cb, void *opaque); + +/* Returns the version of the slirp state, to be saved along the state */ +SLIRP_EXPORT +int slirp_state_version(void); + +/* Load the slirp state through the read_cb. The opaque pointer is passed as + * such to the read_cb. The version should be given as it was obtained from + * slirp_state_version when slirp_state_save was called. */ +SLIRP_EXPORT +int slirp_state_load(Slirp *s, int version_id, SlirpReadCb read_cb, + void *opaque); + +/* Return the version of the slirp implementation */ +SLIRP_EXPORT +const char *slirp_version_string(void); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* LIBSLIRP_H */ diff --git a/usr.sbin/bhyve/net_backend_slirp.c b/usr.sbin/bhyve/net_backend_slirp.c new file mode 100644 --- /dev/null +++ b/usr.sbin/bhyve/net_backend_slirp.c @@ -0,0 +1,662 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023 Mark Johnston + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * The slirp backend enables unprivileged networking via libslirp, which must be + * installed on the host system via pkg or the ports tree. bhyve dlopen()s + * libslirp.so upon instantiating the slirp backend. Various network parameters + * are hard-coded in _slirp_init(). + * + * Packets received from the guest (i.e., transmitted by the frontend, such as a + * virtio NIC device model) are injected into the slirp backend via slirp_send(). + * Packets to be transmitted to the guest (i.e., inserted into the frontend's + * receive buffers) are buffered in a per-interface socket pair and read by the + * mevent loop. Sockets instantiated by libslirp are monitored by a thread + * which uses poll() and slirp_pollfds_poll() to drive libslirp events; this + * thread also handles timeout events from the libslirp context. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "config.h" +#include "debug.h" +#include "libslirp.h" +#include "mevent.h" +#include "net_backends.h" +#include "net_backends_priv.h" + +typedef int (*slirp_add_hostxfwd_p_t)(Slirp *, + const struct sockaddr *, socklen_t, const struct sockaddr *, socklen_t, + int); +typedef void (*slirp_cleanup_p_t)(Slirp *); +typedef void (*slirp_input_p_t)(Slirp *, const uint8_t *, int); +typedef Slirp *(*slirp_new_p_t)(const SlirpConfig *, const SlirpCb *, void *); +typedef void (*slirp_pollfds_fill_p_t)(Slirp *, uint32_t *timeout, + SlirpAddPollCb, void *); +typedef void (*slirp_pollfds_poll_p_t)(Slirp *, int, SlirpGetREventsCb, void *); + +/* Function pointer table, initialized by slirp_init_once(). */ +static slirp_add_hostxfwd_p_t slirp_add_hostxfwd_p; +static slirp_cleanup_p_t slirp_cleanup_p; +static slirp_input_p_t slirp_input_p; +static slirp_new_p_t slirp_new_p; +static slirp_pollfds_fill_p_t slirp_pollfds_fill_p; +static slirp_pollfds_poll_p_t slirp_pollfds_poll_p; + +static int +slirp_init_once(void) +{ + static void *handle = NULL; + + if (handle != NULL) + return (0); + handle = dlopen("libslirp.so.0", RTLD_LAZY); + if (handle == NULL) { + EPRINTLN("Unable to open libslirp.so.0: %s", dlerror()); + return (-1); + } + +#define IMPORT_SYM(sym) do { \ + sym##_p = (sym##_p_t)dlsym(handle, #sym); \ + if (sym##_p == NULL) { \ + EPRINTLN("failed to resolve %s", #sym); \ + goto err; \ + } \ +} while (0) + IMPORT_SYM(slirp_add_hostxfwd); + IMPORT_SYM(slirp_cleanup); + IMPORT_SYM(slirp_input); + IMPORT_SYM(slirp_new); + IMPORT_SYM(slirp_pollfds_fill); + IMPORT_SYM(slirp_pollfds_poll); +#undef IMPORT_SYM + + /* + * libslirp uses glib, which uses tzdata to format log messages. Help + * it out. + * + * XXX-MJ glib will also look for charset files, not sure what we can do + * about that... + */ + caph_cache_tzdata(); + + return (0); + +err: + dlclose(handle); + handle = NULL; + return (-1); +} + +struct slirp_priv { + Slirp *slirp; + +#define SLIRP_MTU 2048 + struct mevent *mevp; + int pipe[2]; + + pthread_t pollfd_td; + struct pollfd *pollfds; + size_t npollfds; + + /* Serializes libslirp calls. */ + pthread_mutex_t mtx; +}; + +static void +slirp_priv_init(struct slirp_priv *priv) +{ + int error; + + memset(priv, 0, sizeof(*priv)); + priv->pipe[0] = priv->pipe[1] = -1; + error = pthread_mutex_init(&priv->mtx, NULL); + assert(error == 0); +} + +static void +slirp_priv_cleanup(struct slirp_priv *priv) +{ + int error; + + if (priv->pipe[0] != -1) { + error = close(priv->pipe[0]); + assert(error == 0); + } + if (priv->pipe[1] != -1) { + error = close(priv->pipe[1]); + assert(error == 0); + } + if (priv->mevp) + mevent_delete(priv->mevp); + if (priv->slirp != NULL) + slirp_cleanup_p(priv->slirp); + error = pthread_mutex_destroy(&priv->mtx); + assert(error == 0); +} + +static int64_t +slirp_cb_clock_get_ns(void *param __unused) +{ + struct timespec ts; + int error; + + error = clock_gettime(CLOCK_MONOTONIC, &ts); + assert(error == 0); + return ((int64_t)(ts.tv_sec * 1000000000L + ts.tv_nsec)); +} + +static void +slirp_cb_notify(void *param __unused) +{ +} + +static void +slirp_cb_register_poll_fd(int fd, void *param __unused) +{ + const int one = 1; + + (void)setsockopt(fd, SOL_SOCKET, SO_NOSIGPIPE, &one, sizeof(int)); +} + +static ssize_t +slirp_cb_send_packet(const void *buf, size_t len, void *param) +{ + struct slirp_priv *priv; + ssize_t n; + + priv = param; + + assert(len <= SLIRP_MTU); + n = send(priv->pipe[1], buf, len, 0); + if (n < 0) { + EPRINTLN("slirp_cb_send_packet: send: %s", strerror(errno)); + return (n); + } + assert((size_t)n == len); + + return (n); +} + +static void +slirp_cb_unregister_poll_fd(int fd __unused, void *opaque __unused) +{ +} + +/* Callbacks invoked from within libslirp. */ +static const struct SlirpCb slirp_cbs = { + .clock_get_ns = slirp_cb_clock_get_ns, + .notify = slirp_cb_notify, + .register_poll_fd = slirp_cb_register_poll_fd, + .send_packet = slirp_cb_send_packet, + .unregister_poll_fd = slirp_cb_unregister_poll_fd, +}; + +static int +slirpev2pollev(int events) +{ + int ret; + + ret = 0; + if (events & SLIRP_POLL_IN) + ret |= POLLIN; + if (events & SLIRP_POLL_OUT) + ret |= POLLOUT; + if (events & SLIRP_POLL_PRI) + ret |= POLLPRI; + if (events & SLIRP_POLL_ERR) + ret |= POLLERR; + if (events & SLIRP_POLL_HUP) + ret |= POLLHUP; + return (ret); +} + +static int +pollev2slirpev(int events) +{ + int ret; + + ret = 0; + if (events & POLLIN) + ret |= SLIRP_POLL_IN; + if (events & POLLOUT) + ret |= SLIRP_POLL_OUT; + if (events & POLLPRI) + ret |= SLIRP_POLL_PRI; + if (events & POLLERR) + ret |= SLIRP_POLL_ERR; + if (events & POLLHUP) + ret |= SLIRP_POLL_HUP; + return (ret); +} + +static int +slirp_addpoll_cb(int fd, int events, void *param) +{ + struct slirp_priv *priv; + struct pollfd *pollfd, *pollfds; + size_t i; + + priv = param; + + for (i = 0; i < priv->npollfds; i++) + if (priv->pollfds[i].fd == -1) + break; + if (i == priv->npollfds) { + const size_t POLLFD_GROW = 4; + + priv->npollfds += POLLFD_GROW; + pollfds = realloc(priv->pollfds, + sizeof(*pollfds) * priv->npollfds); + if (pollfds == NULL) + return (-1); + for (i = priv->npollfds - POLLFD_GROW; i < priv->npollfds; i++) + pollfds[i].fd = -1; + priv->pollfds = pollfds; + + i = priv->npollfds - POLLFD_GROW; + } + pollfd = &priv->pollfds[i]; + pollfd->fd = fd; + pollfd->events = slirpev2pollev(events); + pollfd->revents = 0; + + return ((int)i); +} + +static int +slirp_poll_revents(int idx, void *param) +{ + struct slirp_priv *priv; + struct pollfd *pollfd; + + priv = param; + pollfd = &priv->pollfds[idx]; + assert(pollfd->fd != -1); + return (pollev2slirpev(pollfd->revents)); +} + +static void * +slirp_pollfd_td_loop(void *param) +{ + struct slirp_priv *priv; + struct pollfd *pollfds; + size_t npollfds; + uint32_t timeout; + int error; + + pthread_set_name_np(pthread_self(), "slirp pollfd"); + priv = param; + + pthread_mutex_lock(&priv->mtx); + for (;;) { + for (size_t i = 0; i < priv->npollfds; i++) + priv->pollfds[i].fd = -1; + + timeout = UINT32_MAX; + slirp_pollfds_fill_p(priv->slirp, &timeout, slirp_addpoll_cb, + priv); + + pollfds = priv->pollfds; + npollfds = priv->npollfds; + pthread_mutex_unlock(&priv->mtx); + for (;;) { + error = poll(pollfds, npollfds, timeout); + if (error == -1) { + if (errno != EINTR) { + EPRINTLN("poll: %s", strerror(errno)); + exit(1); + } + continue; + } + break; + } + pthread_mutex_lock(&priv->mtx); + slirp_pollfds_poll_p(priv->slirp, error == -1, + slirp_poll_revents, priv); + } +} + +static int +parse_addr(char *addr, struct sockaddr_in *sinp) +{ + char *port; + int error, porti; + + memset(sinp, 0, sizeof(*sinp)); + sinp->sin_family = AF_INET; + sinp->sin_len = sizeof(struct sockaddr_in); + + port = strchr(addr, ':'); + if (port == NULL) + return (EINVAL); + *port++ = '\0'; + + if (strlen(addr) > 0) { + error = inet_pton(AF_INET, addr, &sinp->sin_addr); + if (error != 1) + return (error == 0 ? EPFNOSUPPORT : errno); + } else { + sinp->sin_addr.s_addr = htonl(INADDR_ANY); + } + + porti = strlen(port) > 0 ? atoi(port) : 0; + if (porti < 0 || porti > UINT16_MAX) + return (EINVAL); + sinp->sin_port = htons(porti); + + return (0); +} + +static int +parse_hostfwd_rule(const char *descr, int *is_udp, struct sockaddr *hostaddr, + struct sockaddr *guestaddr) +{ + struct sockaddr_in *hostaddrp, *guestaddrp; + const char *proto; + char *p, *host, *guest; + int error; + + error = 0; + *is_udp = 0; + + p = strdup(descr); + if (p == NULL) + return (ENOMEM); + + host = strchr(p, ':'); + if (host == NULL) { + error = EINVAL; + goto out; + } + *host++ = '\0'; + + proto = p; + *is_udp = strcmp(proto, "udp") == 0; + + guest = strchr(host, '-'); + if (guest == NULL) { + error = EINVAL; + goto out; + } + *guest++ = '\0'; + + hostaddrp = (struct sockaddr_in *)hostaddr; + error = parse_addr(host, hostaddrp); + if (error != 0) + goto out; + + guestaddrp = (struct sockaddr_in *)guestaddr; + error = parse_addr(guest, guestaddrp); + if (error != 0) + goto out; + +out: + free(p); + return (error); +} + +static int +config_one_hostfwd(struct slirp_priv *priv, const char *rule) +{ + struct sockaddr hostaddr, guestaddr; + int error, is_udp; + + error = parse_hostfwd_rule(rule, &is_udp, &hostaddr, &guestaddr); + if (error != 0) { + EPRINTLN("Unable to parse hostfwd rule '%s': %s", + rule, strerror(error)); + return (error); + } + + error = slirp_add_hostxfwd_p(priv->slirp, &hostaddr, hostaddr.sa_len, + &guestaddr, guestaddr.sa_len, is_udp ? SLIRP_HOSTFWD_UDP : 0); + if (error != 0) { + EPRINTLN("Unable to add hostfwd rule '%s': %s", + rule, strerror(errno)); + return (error); + } + + return (0); +} + +static int +_slirp_init(struct net_backend *be, const char *devname __unused, + nvlist_t *nvl, net_be_rxeof_t cb, void *param) +{ + struct slirp_priv *priv = NET_BE_PRIV(be); + SlirpConfig config = { + .version = 4, + .if_mtu = SLIRP_MTU, + .restricted = true, + .in_enabled = true, + .vnetwork.s_addr = htonl(0x0a000200), /* 10.0.2.0/24 */ + .vnetmask.s_addr = htonl(0xffffff00), + .vdhcp_start.s_addr = htonl(0x0a00020f),/* 10.0.2.15 */ + .vhost.s_addr = htonl(0x0a000202), /* 10.0.2.2 */ + .enable_emu = false, + }; + const char *hostfwd; + int error, sndbuf; + + error = slirp_init_once(); + if (error != 0) + return (error); + + slirp_priv_init(priv); + + priv->slirp = slirp_new_p(&config, &slirp_cbs, priv); + if (priv->slirp == NULL) { + EPRINTLN("Unable to create slirp instance"); + goto err; + } + + hostfwd = get_config_value_node(nvl, "hostfwd"); + if (hostfwd != NULL) { + char *rules, *tofree; + const char *rule; + + tofree = rules = strdup(hostfwd); + if (rules == NULL) + goto err; + while ((rule = strsep(&rules, ";")) != NULL) { + error = config_one_hostfwd(priv, rule); + if (error != 0) + goto err; + } + free(tofree); + } + + error = socketpair(PF_LOCAL, SOCK_DGRAM, 0, priv->pipe); + if (error != 0) { + EPRINTLN("Unable to create pipe: %s", strerror(errno)); + goto err; + } + + /* + * Try to avoid dropping buffered packets in slirp_cb_send_packet(). + */ + sndbuf = 1024 * 1024; + error = setsockopt(priv->pipe[1], SOL_SOCKET, SO_SNDBUF, &sndbuf, + sizeof(sndbuf)); + if (error != 0) { + EPRINTLN("Could not set socket buffer size: %s", + strerror(errno)); + goto err; + } + + be->fd = priv->pipe[0]; + priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param); + if (priv->mevp == NULL) { + EPRINTLN("Could not register event"); + goto err; + } + + error = pthread_create(&priv->pollfd_td, NULL, slirp_pollfd_td_loop, + priv); + if (error != 0) { + EPRINTLN("Unable to create pollfd thread: %s", strerror(error)); + goto err; + } + + return (0); + +err: + slirp_priv_cleanup(priv); + return (-1); +} + +static ssize_t +slirp_send(struct net_backend *be, const struct iovec *iov, int iovcnt) +{ + struct slirp_priv *priv = NET_BE_PRIV(be); + + if (iovcnt == 1) { + /* We can avoid copying if there's a single segment. */ + pthread_mutex_lock(&priv->mtx); + slirp_input_p(priv->slirp, iov->iov_base, + (int)iov->iov_len); + pthread_mutex_unlock(&priv->mtx); + return (iov[0].iov_len); + } else { + uint8_t *pkt; + size_t pktlen; + + pktlen = 0; + for (int i = 0; i < iovcnt; i++) + pktlen += iov[i].iov_len; + pkt = malloc(pktlen); + if (pkt == NULL) + return (-1); + pktlen = 0; + for (int i = 0; i < iovcnt; i++) { + memcpy(pkt + pktlen, iov[i].iov_base, iov[i].iov_len); + pktlen += iov[i].iov_len; + } + pthread_mutex_lock(&priv->mtx); + slirp_input_p(priv->slirp, pkt, (int)pktlen); + pthread_mutex_unlock(&priv->mtx); + free(pkt); + return (pktlen); + } +} + +static void +_slirp_cleanup(struct net_backend *be) +{ + struct slirp_priv *priv = NET_BE_PRIV(be); + + slirp_priv_cleanup(priv); +} + +static ssize_t +slirp_peek_recvlen(struct net_backend *be) +{ + struct slirp_priv *priv = NET_BE_PRIV(be); + ssize_t n; + + n = recv(priv->pipe[0], NULL, 0, MSG_PEEK | MSG_DONTWAIT | MSG_TRUNC); + if (n < 0) + return (errno == EWOULDBLOCK ? 0 : -1); + assert((size_t)n <= SLIRP_MTU); + return (n); +} + +static ssize_t +slirp_recv(struct net_backend *be, const struct iovec *iov, int iovcnt) +{ + struct slirp_priv *priv = NET_BE_PRIV(be); + ssize_t n; + + n = readv(priv->pipe[0], iov, iovcnt); + if (n < 0) + return (-1); + assert(n <= SLIRP_MTU); + return (n); +} + +static void +slirp_recv_enable(struct net_backend *be) +{ + struct slirp_priv *priv = NET_BE_PRIV(be); + + mevent_enable(priv->mevp); +} + +static void +slirp_recv_disable(struct net_backend *be __unused) +{ + struct slirp_priv *priv = NET_BE_PRIV(be); + + mevent_enable(priv->mevp); +} + +static uint64_t +slirp_get_cap(struct net_backend *be __unused) +{ + return (0); +} + +static int +slirp_set_cap(struct net_backend *be __unused, uint64_t features __unused, + unsigned int vnet_hdr_len __unused) +{ + return ((features || vnet_hdr_len) ? -1 : 0); +} + +static struct net_backend slirp_backend = { + .prefix = "slirp", + .priv_size = sizeof(struct slirp_priv), + .init = _slirp_init, + .cleanup = _slirp_cleanup, + .send = slirp_send, + .peek_recvlen = slirp_peek_recvlen, + .recv = slirp_recv, + .recv_enable = slirp_recv_enable, + .recv_disable = slirp_recv_disable, + .get_cap = slirp_get_cap, + .set_cap = slirp_set_cap, +}; + +DATA_SET(net_backend_set, slirp_backend);