Index: share/man/man4/tap.4 =================================================================== --- share/man/man4/tap.4 +++ share/man/man4/tap.4 @@ -1,14 +1,14 @@ .\" $FreeBSD$ .\" Based on PR#2411 .\" -.Dd November 29, 2017 +.Dd April 24, 2019 .Dt TAP 4 .Os .Sh NAME .Nm tap .Nd Ethernet tunnel software network interface .Sh SYNOPSIS -.Cd device tap +.Cd device tun .Sh DESCRIPTION The .Nm @@ -51,7 +51,7 @@ .Dq Li tap1 , etc., one for each control device that has been opened. These Ethernet network interfaces persist until -.Pa if_tap.ko +.Pa if_tun.ko module is unloaded, or until removed with "ifconfig destroy" (see below). .Pp .Nm @@ -96,7 +96,7 @@ .Ef .Pp Control devices (once successfully opened) persist until -.Pa if_tap.ko +.Pa if_tun.ko is unloaded or the interface is destroyed. .Pp Each interface supports the usual Ethernet network interface @@ -296,27 +296,6 @@ for the old .Em VMnet device driver. -The driver uses the minor number -to select between -.Nm -and -.Nm vmnet -devices. -.Em VMnet -minor numbers begin at -.Va 0x800000 -+ -.Va N ; -where -.Va N -is a -.Em VMnet -unit number. -In this case the control device is expected to be -.Pa /dev/vmnet Ns Sy N , -and the network interface will be -.Sy vmnet Ns Ar N . -Additionally, .Em VMnet devices do not .Xr ifconfig 8 Index: sys/conf/NOTES =================================================================== --- sys/conf/NOTES +++ sys/conf/NOTES @@ -895,9 +895,6 @@ # which discards all packets sent and receives none. device edsc -# The `tap' device is a pty-like virtual Ethernet interface -device tap - # The `tun' device implements (user-)ppp and nos-tun(8) device tun Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -4080,7 +4080,6 @@ net/if_spppsubr.c optional sppp | netgraph_sppp net/if_stf.c optional stf inet inet6 net/if_tun.c optional tun -net/if_tap.c optional tap net/if_vlan.c optional vlan net/if_vxlan.c optional vxlan inet | vxlan inet6 net/ifdi_if.m optional ether pci iflib Index: sys/modules/Makefile =================================================================== --- sys/modules/Makefile +++ sys/modules/Makefile @@ -163,7 +163,6 @@ if_lagg \ ${_if_ndis} \ ${_if_stf} \ - if_tap \ if_tun \ if_vlan \ if_vxlan \ Index: sys/modules/if_tap/Makefile =================================================================== --- sys/modules/if_tap/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# Copyright (C) 1999-2000 by Maksim Yevmenkin -# -# $FreeBSD$ - -.PATH: ${SRCTOP}/sys/net - -KMOD= if_tap -SRCS= if_tap.c opt_inet.h vnode_if.h - -.include Index: sys/net/if_tap.c =================================================================== --- sys/net/if_tap.c +++ /dev/null @@ -1,1144 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD - * - * Copyright (C) 1999-2000 by Maksim Yevmenkin - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * BASED ON: - * ------------------------------------------------------------------------- - * - * Copyright (c) 1988, Julian Onions - * Nottingham University 1987. - */ - -/* - * $FreeBSD$ - * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $ - */ - -#include "opt_inet.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include -#include - -#define CDEV_NAME "tap" -#define TAPDEBUG if (tapdebug) printf - -static const char tapname[] = "tap"; -static const char vmnetname[] = "vmnet"; -#define TAPMAXUNIT 0x7fff -#define VMNET_DEV_MASK CLONE_FLAG0 - -/* module */ -static int tapmodevent(module_t, int, void *); - -/* device */ -static void tapclone(void *, struct ucred *, char *, int, - struct cdev **); -static void tapcreate(struct cdev *); - -/* network interface */ -static void tapifstart(struct ifnet *); -static int tapifioctl(struct ifnet *, u_long, caddr_t); -static void tapifinit(void *); - -static int tap_clone_create(struct if_clone *, int, caddr_t); -static void tap_clone_destroy(struct ifnet *); -static struct if_clone *tap_cloner; -static int vmnet_clone_create(struct if_clone *, int, caddr_t); -static void vmnet_clone_destroy(struct ifnet *); -static struct if_clone *vmnet_cloner; - -/* character device */ -static d_open_t tapopen; -static d_close_t tapclose; -static d_read_t tapread; -static d_write_t tapwrite; -static d_ioctl_t tapioctl; -static d_poll_t tappoll; -static d_kqfilter_t tapkqfilter; - -/* kqueue(2) */ -static int tapkqread(struct knote *, long); -static int tapkqwrite(struct knote *, long); -static void tapkqdetach(struct knote *); - -static struct filterops tap_read_filterops = { - .f_isfd = 1, - .f_attach = NULL, - .f_detach = tapkqdetach, - .f_event = tapkqread, -}; - -static struct filterops tap_write_filterops = { - .f_isfd = 1, - .f_attach = NULL, - .f_detach = tapkqdetach, - .f_event = tapkqwrite, -}; - -static struct cdevsw tap_cdevsw = { - .d_version = D_VERSION, - .d_flags = D_NEEDMINOR, - .d_open = tapopen, - .d_close = tapclose, - .d_read = tapread, - .d_write = tapwrite, - .d_ioctl = tapioctl, - .d_poll = tappoll, - .d_name = CDEV_NAME, - .d_kqfilter = tapkqfilter, -}; - -/* - * All global variables in if_tap.c are locked with tapmtx, with the - * exception of tapdebug, which is accessed unlocked; tapclones is - * static at runtime. - */ -static struct mtx tapmtx; -static int tapdebug = 0; /* debug flag */ -static int tapuopen = 0; /* allow user open() */ -static int tapuponopen = 0; /* IFF_UP on open() */ -static int tapdclone = 1; /* enable devfs cloning */ -static SLIST_HEAD(, tap_softc) taphead; /* first device */ -static struct clonedevs *tapclones; - -MALLOC_DECLARE(M_TAP); -MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface"); -SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, ""); - -static struct sx tap_ioctl_sx; -SX_SYSINIT(tap_ioctl_sx, &tap_ioctl_sx, "tap_ioctl"); - -SYSCTL_DECL(_net_link); -static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0, - "Ethernet tunnel software network interface"); -SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0, - "Allow user to open /dev/tap (based on node permissions)"); -SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0, - "Bring interface up when /dev/tap is opened"); -SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0, - "Enable legacy devfs interface creation"); -SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, ""); - -DEV_MODULE(if_tap, tapmodevent, NULL); - -static int -tap_clone_create(struct if_clone *ifc, int unit, caddr_t params) -{ - struct cdev *dev; - int i; - - /* Find any existing device, or allocate new unit number. */ - i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, 0); - if (i) { - dev = make_dev(&tap_cdevsw, unit, UID_ROOT, GID_WHEEL, 0600, - "%s%d", tapname, unit); - } - - tapcreate(dev); - return (0); -} - -/* vmnet devices are tap devices in disguise */ -static int -vmnet_clone_create(struct if_clone *ifc, int unit, caddr_t params) -{ - struct cdev *dev; - int i; - - /* Find any existing device, or allocate new unit number. */ - i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, VMNET_DEV_MASK); - if (i) { - dev = make_dev(&tap_cdevsw, unit | VMNET_DEV_MASK, UID_ROOT, - GID_WHEEL, 0600, "%s%d", vmnetname, unit); - } - - tapcreate(dev); - return (0); -} - -static void -tap_destroy(struct tap_softc *tp) -{ - struct ifnet *ifp = tp->tap_ifp; - - CURVNET_SET(ifp->if_vnet); - sx_xlock(&tap_ioctl_sx); - ifp->if_softc = NULL; - sx_xunlock(&tap_ioctl_sx); - - destroy_dev(tp->tap_dev); - seldrain(&tp->tap_rsel); - knlist_clear(&tp->tap_rsel.si_note, 0); - knlist_destroy(&tp->tap_rsel.si_note); - ether_ifdetach(ifp); - if_free(ifp); - - mtx_destroy(&tp->tap_mtx); - free(tp, M_TAP); - CURVNET_RESTORE(); -} - -static void -tap_clone_destroy(struct ifnet *ifp) -{ - struct tap_softc *tp = ifp->if_softc; - - mtx_lock(&tapmtx); - SLIST_REMOVE(&taphead, tp, tap_softc, tap_next); - mtx_unlock(&tapmtx); - tap_destroy(tp); -} - -/* vmnet devices are tap devices in disguise */ -static void -vmnet_clone_destroy(struct ifnet *ifp) -{ - tap_clone_destroy(ifp); -} - -/* - * tapmodevent - * - * module event handler - */ -static int -tapmodevent(module_t mod, int type, void *data) -{ - static eventhandler_tag eh_tag = NULL; - struct tap_softc *tp = NULL; - struct ifnet *ifp = NULL; - - switch (type) { - case MOD_LOAD: - - /* intitialize device */ - - mtx_init(&tapmtx, "tapmtx", NULL, MTX_DEF); - SLIST_INIT(&taphead); - - clone_setup(&tapclones); - eh_tag = EVENTHANDLER_REGISTER(dev_clone, tapclone, 0, 1000); - if (eh_tag == NULL) { - clone_cleanup(&tapclones); - mtx_destroy(&tapmtx); - return (ENOMEM); - } - tap_cloner = if_clone_simple(tapname, tap_clone_create, - tap_clone_destroy, 0); - vmnet_cloner = if_clone_simple(vmnetname, vmnet_clone_create, - vmnet_clone_destroy, 0); - return (0); - - case MOD_UNLOAD: - /* - * The EBUSY algorithm here can't quite atomically - * guarantee that this is race-free since we have to - * release the tap mtx to deregister the clone handler. - */ - mtx_lock(&tapmtx); - SLIST_FOREACH(tp, &taphead, tap_next) { - mtx_lock(&tp->tap_mtx); - if (tp->tap_flags & TAP_OPEN) { - mtx_unlock(&tp->tap_mtx); - mtx_unlock(&tapmtx); - return (EBUSY); - } - mtx_unlock(&tp->tap_mtx); - } - mtx_unlock(&tapmtx); - - EVENTHANDLER_DEREGISTER(dev_clone, eh_tag); - if_clone_detach(tap_cloner); - if_clone_detach(vmnet_cloner); - drain_dev_clone_events(); - - mtx_lock(&tapmtx); - while ((tp = SLIST_FIRST(&taphead)) != NULL) { - SLIST_REMOVE_HEAD(&taphead, tap_next); - mtx_unlock(&tapmtx); - - ifp = tp->tap_ifp; - - TAPDEBUG("detaching %s\n", ifp->if_xname); - - tap_destroy(tp); - mtx_lock(&tapmtx); - } - mtx_unlock(&tapmtx); - clone_cleanup(&tapclones); - - mtx_destroy(&tapmtx); - - break; - - default: - return (EOPNOTSUPP); - } - - return (0); -} /* tapmodevent */ - - -/* - * DEVFS handler - * - * We need to support two kind of devices - tap and vmnet - */ -static void -tapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev) -{ - char devname[SPECNAMELEN + 1]; - int i, unit, append_unit; - int extra; - - if (*dev != NULL) - return; - - if (!tapdclone || - (!tapuopen && priv_check_cred(cred, PRIV_NET_IFCREATE) != 0)) - return; - - unit = 0; - append_unit = 0; - extra = 0; - - /* We're interested in only tap/vmnet devices. */ - if (strcmp(name, tapname) == 0) { - unit = -1; - } else if (strcmp(name, vmnetname) == 0) { - unit = -1; - extra = VMNET_DEV_MASK; - } else if (dev_stdclone(name, NULL, tapname, &unit) != 1) { - if (dev_stdclone(name, NULL, vmnetname, &unit) != 1) { - return; - } else { - extra = VMNET_DEV_MASK; - } - } - - if (unit == -1) - append_unit = 1; - - CURVNET_SET(CRED_TO_VNET(cred)); - /* find any existing device, or allocate new unit number */ - i = clone_create(&tapclones, &tap_cdevsw, &unit, dev, extra); - if (i) { - if (append_unit) { - /* - * We were passed 'tun' or 'tap', with no unit specified - * so we'll need to append it now. - */ - namelen = snprintf(devname, sizeof(devname), "%s%d", name, - unit); - name = devname; - } - - *dev = make_dev_credf(MAKEDEV_REF, &tap_cdevsw, unit | extra, - cred, UID_ROOT, GID_WHEEL, 0600, "%s", name); - } - - if_clone_create(name, namelen, NULL); - CURVNET_RESTORE(); -} /* tapclone */ - - -/* - * tapcreate - * - * to create interface - */ -static void -tapcreate(struct cdev *dev) -{ - struct ifnet *ifp = NULL; - struct tap_softc *tp = NULL; - unsigned short macaddr_hi; - uint32_t macaddr_mid; - int unit; - const char *name = NULL; - u_char eaddr[6]; - - /* allocate driver storage and create device */ - tp = malloc(sizeof(*tp), M_TAP, M_WAITOK | M_ZERO); - mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF); - mtx_lock(&tapmtx); - SLIST_INSERT_HEAD(&taphead, tp, tap_next); - mtx_unlock(&tapmtx); - - unit = dev2unit(dev); - - /* select device: tap or vmnet */ - if (unit & VMNET_DEV_MASK) { - name = vmnetname; - tp->tap_flags |= TAP_VMNET; - } else - name = tapname; - - unit &= TAPMAXUNIT; - - TAPDEBUG("tapcreate(%s%d). minor = %#x\n", name, unit, dev2unit(dev)); - - /* generate fake MAC address: 00 bd xx xx xx unit_no */ - macaddr_hi = htons(0x00bd); - macaddr_mid = (uint32_t) ticks; - bcopy(&macaddr_hi, eaddr, sizeof(short)); - bcopy(&macaddr_mid, &eaddr[2], sizeof(uint32_t)); - eaddr[5] = (u_char)unit; - - /* fill the rest and attach interface */ - ifp = tp->tap_ifp = if_alloc(IFT_ETHER); - if (ifp == NULL) - panic("%s%d: can not if_alloc()", name, unit); - ifp->if_softc = tp; - if_initname(ifp, name, unit); - ifp->if_init = tapifinit; - ifp->if_start = tapifstart; - ifp->if_ioctl = tapifioctl; - ifp->if_mtu = ETHERMTU; - ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST); - IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); - ifp->if_capabilities |= IFCAP_LINKSTATE; - ifp->if_capenable |= IFCAP_LINKSTATE; - - dev->si_drv1 = tp; - tp->tap_dev = dev; - - ether_ifattach(ifp, eaddr); - - mtx_lock(&tp->tap_mtx); - tp->tap_flags |= TAP_INITED; - mtx_unlock(&tp->tap_mtx); - - knlist_init_mtx(&tp->tap_rsel.si_note, &tp->tap_mtx); - - TAPDEBUG("interface %s is created. minor = %#x\n", - ifp->if_xname, dev2unit(dev)); -} /* tapcreate */ - - -/* - * tapopen - * - * to open tunnel. must be superuser - */ -static int -tapopen(struct cdev *dev, int flag, int mode, struct thread *td) -{ - struct tap_softc *tp = NULL; - struct ifnet *ifp = NULL; - int error; - - if (tapuopen == 0) { - error = priv_check(td, PRIV_NET_TAP); - if (error) - return (error); - } - - if ((dev2unit(dev) & CLONE_UNITMASK) > TAPMAXUNIT) - return (ENXIO); - - tp = dev->si_drv1; - - mtx_lock(&tp->tap_mtx); - if (tp->tap_flags & TAP_OPEN) { - mtx_unlock(&tp->tap_mtx); - return (EBUSY); - } - - bcopy(IF_LLADDR(tp->tap_ifp), tp->ether_addr, sizeof(tp->ether_addr)); - tp->tap_pid = td->td_proc->p_pid; - tp->tap_flags |= TAP_OPEN; - ifp = tp->tap_ifp; - - ifp->if_drv_flags |= IFF_DRV_RUNNING; - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; - if (tapuponopen) - ifp->if_flags |= IFF_UP; - if_link_state_change(ifp, LINK_STATE_UP); - mtx_unlock(&tp->tap_mtx); - - TAPDEBUG("%s is open. minor = %#x\n", ifp->if_xname, dev2unit(dev)); - - return (0); -} /* tapopen */ - - -/* - * tapclose - * - * close the device - mark i/f down & delete routing info - */ -static int -tapclose(struct cdev *dev, int foo, int bar, struct thread *td) -{ - struct ifaddr *ifa; - struct tap_softc *tp = dev->si_drv1; - struct ifnet *ifp = tp->tap_ifp; - - /* junk all pending output */ - mtx_lock(&tp->tap_mtx); - CURVNET_SET(ifp->if_vnet); - IF_DRAIN(&ifp->if_snd); - - /* - * Do not bring the interface down, and do not anything with - * interface, if we are in VMnet mode. Just close the device. - */ - if (((tp->tap_flags & TAP_VMNET) == 0) && - (ifp->if_flags & (IFF_UP | IFF_LINK0)) == IFF_UP) { - mtx_unlock(&tp->tap_mtx); - if_down(ifp); - mtx_lock(&tp->tap_mtx); - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - mtx_unlock(&tp->tap_mtx); - CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - rtinit(ifa, (int)RTM_DELETE, 0); - } - if_purgeaddrs(ifp); - mtx_lock(&tp->tap_mtx); - } - } - - if_link_state_change(ifp, LINK_STATE_DOWN); - CURVNET_RESTORE(); - - funsetown(&tp->tap_sigio); - selwakeuppri(&tp->tap_rsel, PZERO+1); - KNOTE_LOCKED(&tp->tap_rsel.si_note, 0); - - tp->tap_flags &= ~TAP_OPEN; - tp->tap_pid = 0; - mtx_unlock(&tp->tap_mtx); - - TAPDEBUG("%s is closed. minor = %#x\n", - ifp->if_xname, dev2unit(dev)); - - return (0); -} /* tapclose */ - - -/* - * tapifinit - * - * network interface initialization function - */ -static void -tapifinit(void *xtp) -{ - struct tap_softc *tp = (struct tap_softc *)xtp; - struct ifnet *ifp = tp->tap_ifp; - - TAPDEBUG("initializing %s\n", ifp->if_xname); - - mtx_lock(&tp->tap_mtx); - ifp->if_drv_flags |= IFF_DRV_RUNNING; - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; - mtx_unlock(&tp->tap_mtx); - - /* attempt to start output */ - tapifstart(ifp); -} /* tapifinit */ - - -/* - * tapifioctl - * - * Process an ioctl request on network interface - */ -static int -tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data) -{ - struct tap_softc *tp; - struct ifreq *ifr = (struct ifreq *)data; - struct ifstat *ifs = NULL; - struct ifmediareq *ifmr = NULL; - int dummy, error = 0; - - sx_xlock(&tap_ioctl_sx); - tp = ifp->if_softc; - if (tp == NULL) { - error = ENXIO; - goto bad; - } - switch (cmd) { - case SIOCSIFFLAGS: /* XXX -- just like vmnet does */ - case SIOCADDMULTI: - case SIOCDELMULTI: - break; - - case SIOCGIFMEDIA: - ifmr = (struct ifmediareq *)data; - dummy = ifmr->ifm_count; - ifmr->ifm_count = 1; - ifmr->ifm_status = IFM_AVALID; - ifmr->ifm_active = IFM_ETHER; - if (tp->tap_flags & TAP_OPEN) - ifmr->ifm_status |= IFM_ACTIVE; - ifmr->ifm_current = ifmr->ifm_active; - if (dummy >= 1) { - int media = IFM_ETHER; - error = copyout(&media, ifmr->ifm_ulist, - sizeof(int)); - } - break; - - case SIOCSIFMTU: - ifp->if_mtu = ifr->ifr_mtu; - break; - - case SIOCGIFSTATUS: - ifs = (struct ifstat *)data; - mtx_lock(&tp->tap_mtx); - if (tp->tap_pid != 0) - snprintf(ifs->ascii, sizeof(ifs->ascii), - "\tOpened by PID %d\n", tp->tap_pid); - else - ifs->ascii[0] = '\0'; - mtx_unlock(&tp->tap_mtx); - break; - - default: - error = ether_ioctl(ifp, cmd, data); - break; - } - -bad: - sx_xunlock(&tap_ioctl_sx); - return (error); -} /* tapifioctl */ - - -/* - * tapifstart - * - * queue packets from higher level ready to put out - */ -static void -tapifstart(struct ifnet *ifp) -{ - struct tap_softc *tp = ifp->if_softc; - - TAPDEBUG("%s starting\n", ifp->if_xname); - - /* - * do not junk pending output if we are in VMnet mode. - * XXX: can this do any harm because of queue overflow? - */ - - mtx_lock(&tp->tap_mtx); - if (((tp->tap_flags & TAP_VMNET) == 0) && - ((tp->tap_flags & TAP_READY) != TAP_READY)) { - struct mbuf *m; - - /* Unlocked read. */ - TAPDEBUG("%s not ready, tap_flags = 0x%x\n", ifp->if_xname, - tp->tap_flags); - - for (;;) { - IF_DEQUEUE(&ifp->if_snd, m); - if (m != NULL) { - m_freem(m); - if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); - } else - break; - } - mtx_unlock(&tp->tap_mtx); - - return; - } - - ifp->if_drv_flags |= IFF_DRV_OACTIVE; - - if (!IFQ_IS_EMPTY(&ifp->if_snd)) { - if (tp->tap_flags & TAP_RWAIT) { - tp->tap_flags &= ~TAP_RWAIT; - wakeup(tp); - } - - if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) { - mtx_unlock(&tp->tap_mtx); - pgsigio(&tp->tap_sigio, SIGIO, 0); - mtx_lock(&tp->tap_mtx); - } - - selwakeuppri(&tp->tap_rsel, PZERO+1); - KNOTE_LOCKED(&tp->tap_rsel.si_note, 0); - if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */ - } - - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; - mtx_unlock(&tp->tap_mtx); -} /* tapifstart */ - - -/* - * tapioctl - * - * the cdevsw interface is now pretty minimal - */ -static int -tapioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td) -{ - struct ifreq ifr; - struct tap_softc *tp = dev->si_drv1; - struct ifnet *ifp = tp->tap_ifp; - struct tapinfo *tapp = NULL; - int f; - int error; -#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ - defined(COMPAT_FREEBSD4) - int ival; -#endif - - switch (cmd) { - case TAPSIFINFO: - tapp = (struct tapinfo *)data; - if (ifp->if_type != tapp->type) - return (EPROTOTYPE); - mtx_lock(&tp->tap_mtx); - if (ifp->if_mtu != tapp->mtu) { - strlcpy(ifr.ifr_name, if_name(ifp), IFNAMSIZ); - ifr.ifr_mtu = tapp->mtu; - CURVNET_SET(ifp->if_vnet); - error = ifhwioctl(SIOCSIFMTU, ifp, - (caddr_t)&ifr, td); - CURVNET_RESTORE(); - if (error) { - mtx_unlock(&tp->tap_mtx); - return (error); - } - } - ifp->if_baudrate = tapp->baudrate; - mtx_unlock(&tp->tap_mtx); - break; - - case TAPGIFINFO: - tapp = (struct tapinfo *)data; - mtx_lock(&tp->tap_mtx); - tapp->mtu = ifp->if_mtu; - tapp->type = ifp->if_type; - tapp->baudrate = ifp->if_baudrate; - mtx_unlock(&tp->tap_mtx); - break; - - case TAPSDEBUG: - tapdebug = *(int *)data; - break; - - case TAPGDEBUG: - *(int *)data = tapdebug; - break; - - case TAPGIFNAME: { - struct ifreq *ifr = (struct ifreq *) data; - - strlcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ); - } break; - - case FIONBIO: - break; - - case FIOASYNC: - mtx_lock(&tp->tap_mtx); - if (*(int *)data) - tp->tap_flags |= TAP_ASYNC; - else - tp->tap_flags &= ~TAP_ASYNC; - mtx_unlock(&tp->tap_mtx); - break; - - case FIONREAD: - if (!IFQ_IS_EMPTY(&ifp->if_snd)) { - struct mbuf *mb; - - IFQ_LOCK(&ifp->if_snd); - IFQ_POLL_NOLOCK(&ifp->if_snd, mb); - for (*(int *)data = 0; mb != NULL; - mb = mb->m_next) - *(int *)data += mb->m_len; - IFQ_UNLOCK(&ifp->if_snd); - } else - *(int *)data = 0; - break; - - case FIOSETOWN: - return (fsetown(*(int *)data, &tp->tap_sigio)); - - case FIOGETOWN: - *(int *)data = fgetown(&tp->tap_sigio); - return (0); - - /* this is deprecated, FIOSETOWN should be used instead */ - case TIOCSPGRP: - return (fsetown(-(*(int *)data), &tp->tap_sigio)); - - /* this is deprecated, FIOGETOWN should be used instead */ - case TIOCGPGRP: - *(int *)data = -fgetown(&tp->tap_sigio); - return (0); - - /* VMware/VMnet port ioctl's */ - -#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ - defined(COMPAT_FREEBSD4) - case _IO('V', 0): - ival = IOCPARM_IVAL(data); - data = (caddr_t)&ival; - /* FALLTHROUGH */ -#endif - case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */ - f = *(int *)data; - f &= 0x0fff; - f &= ~IFF_CANTCHANGE; - f |= IFF_UP; - - mtx_lock(&tp->tap_mtx); - ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE); - mtx_unlock(&tp->tap_mtx); - break; - - case SIOCGIFADDR: /* get MAC address of the remote side */ - mtx_lock(&tp->tap_mtx); - bcopy(tp->ether_addr, data, sizeof(tp->ether_addr)); - mtx_unlock(&tp->tap_mtx); - break; - - case SIOCSIFADDR: /* set MAC address of the remote side */ - mtx_lock(&tp->tap_mtx); - bcopy(data, tp->ether_addr, sizeof(tp->ether_addr)); - mtx_unlock(&tp->tap_mtx); - break; - - default: - return (ENOTTY); - } - return (0); -} /* tapioctl */ - - -/* - * tapread - * - * the cdevsw read interface - reads a packet at a time, or at - * least as much of a packet as can be read - */ -static int -tapread(struct cdev *dev, struct uio *uio, int flag) -{ - struct tap_softc *tp = dev->si_drv1; - struct ifnet *ifp = tp->tap_ifp; - struct mbuf *m = NULL; - int error = 0, len; - - TAPDEBUG("%s reading, minor = %#x\n", ifp->if_xname, dev2unit(dev)); - - mtx_lock(&tp->tap_mtx); - if ((tp->tap_flags & TAP_READY) != TAP_READY) { - mtx_unlock(&tp->tap_mtx); - - /* Unlocked read. */ - TAPDEBUG("%s not ready. minor = %#x, tap_flags = 0x%x\n", - ifp->if_xname, dev2unit(dev), tp->tap_flags); - - return (EHOSTDOWN); - } - - tp->tap_flags &= ~TAP_RWAIT; - - /* sleep until we get a packet */ - do { - IF_DEQUEUE(&ifp->if_snd, m); - - if (m == NULL) { - if (flag & O_NONBLOCK) { - mtx_unlock(&tp->tap_mtx); - return (EWOULDBLOCK); - } - - tp->tap_flags |= TAP_RWAIT; - error = mtx_sleep(tp, &tp->tap_mtx, PCATCH | (PZERO + 1), - "taprd", 0); - if (error) { - mtx_unlock(&tp->tap_mtx); - return (error); - } - } - } while (m == NULL); - mtx_unlock(&tp->tap_mtx); - - /* feed packet to bpf */ - BPF_MTAP(ifp, m); - - /* xfer packet to user space */ - while ((m != NULL) && (uio->uio_resid > 0) && (error == 0)) { - len = min(uio->uio_resid, m->m_len); - if (len == 0) - break; - - error = uiomove(mtod(m, void *), len, uio); - m = m_free(m); - } - - if (m != NULL) { - TAPDEBUG("%s dropping mbuf, minor = %#x\n", ifp->if_xname, - dev2unit(dev)); - m_freem(m); - } - - return (error); -} /* tapread */ - - -/* - * tapwrite - * - * the cdevsw write interface - an atomic write is a packet - or else! - */ -static int -tapwrite(struct cdev *dev, struct uio *uio, int flag) -{ - struct ether_header *eh; - struct tap_softc *tp = dev->si_drv1; - struct ifnet *ifp = tp->tap_ifp; - struct mbuf *m; - - TAPDEBUG("%s writing, minor = %#x\n", - ifp->if_xname, dev2unit(dev)); - - if (uio->uio_resid == 0) - return (0); - - if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) { - TAPDEBUG("%s invalid packet len = %zd, minor = %#x\n", - ifp->if_xname, uio->uio_resid, dev2unit(dev)); - - return (EIO); - } - - if ((m = m_uiotombuf(uio, M_NOWAIT, 0, ETHER_ALIGN, - M_PKTHDR)) == NULL) { - if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); - return (ENOBUFS); - } - - m->m_pkthdr.rcvif = ifp; - - /* - * Only pass a unicast frame to ether_input(), if it would actually - * have been received by non-virtual hardware. - */ - if (m->m_len < sizeof(struct ether_header)) { - m_freem(m); - return (0); - } - eh = mtod(m, struct ether_header *); - - if (eh && (ifp->if_flags & IFF_PROMISC) == 0 && - !ETHER_IS_MULTICAST(eh->ether_dhost) && - bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) { - m_freem(m); - return (0); - } - - /* Pass packet up to parent. */ - CURVNET_SET(ifp->if_vnet); - (*ifp->if_input)(ifp, m); - CURVNET_RESTORE(); - if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); /* ibytes are counted in parent */ - - return (0); -} /* tapwrite */ - - -/* - * tappoll - * - * the poll interface, this is only useful on reads - * really. the write detect always returns true, write never blocks - * anyway, it either accepts the packet or drops it - */ -static int -tappoll(struct cdev *dev, int events, struct thread *td) -{ - struct tap_softc *tp = dev->si_drv1; - struct ifnet *ifp = tp->tap_ifp; - int revents = 0; - - TAPDEBUG("%s polling, minor = %#x\n", - ifp->if_xname, dev2unit(dev)); - - if (events & (POLLIN | POLLRDNORM)) { - IFQ_LOCK(&ifp->if_snd); - if (!IFQ_IS_EMPTY(&ifp->if_snd)) { - TAPDEBUG("%s have data in queue. len = %d, " \ - "minor = %#x\n", ifp->if_xname, - ifp->if_snd.ifq_len, dev2unit(dev)); - - revents |= (events & (POLLIN | POLLRDNORM)); - } else { - TAPDEBUG("%s waiting for data, minor = %#x\n", - ifp->if_xname, dev2unit(dev)); - - selrecord(td, &tp->tap_rsel); - } - IFQ_UNLOCK(&ifp->if_snd); - } - - if (events & (POLLOUT | POLLWRNORM)) - revents |= (events & (POLLOUT | POLLWRNORM)); - - return (revents); -} /* tappoll */ - - -/* - * tap_kqfilter - * - * support for kevent() system call - */ -static int -tapkqfilter(struct cdev *dev, struct knote *kn) -{ - struct tap_softc *tp = dev->si_drv1; - struct ifnet *ifp = tp->tap_ifp; - - switch (kn->kn_filter) { - case EVFILT_READ: - TAPDEBUG("%s kqfilter: EVFILT_READ, minor = %#x\n", - ifp->if_xname, dev2unit(dev)); - kn->kn_fop = &tap_read_filterops; - break; - - case EVFILT_WRITE: - TAPDEBUG("%s kqfilter: EVFILT_WRITE, minor = %#x\n", - ifp->if_xname, dev2unit(dev)); - kn->kn_fop = &tap_write_filterops; - break; - - default: - TAPDEBUG("%s kqfilter: invalid filter, minor = %#x\n", - ifp->if_xname, dev2unit(dev)); - return (EINVAL); - /* NOT REACHED */ - } - - kn->kn_hook = tp; - knlist_add(&tp->tap_rsel.si_note, kn, 0); - - return (0); -} /* tapkqfilter */ - - -/* - * tap_kqread - * - * Return true if there is data in the interface queue - */ -static int -tapkqread(struct knote *kn, long hint) -{ - int ret; - struct tap_softc *tp = kn->kn_hook; - struct cdev *dev = tp->tap_dev; - struct ifnet *ifp = tp->tap_ifp; - - if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) { - TAPDEBUG("%s have data in queue. len = %d, minor = %#x\n", - ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev)); - ret = 1; - } else { - TAPDEBUG("%s waiting for data, minor = %#x\n", - ifp->if_xname, dev2unit(dev)); - ret = 0; - } - - return (ret); -} /* tapkqread */ - - -/* - * tap_kqwrite - * - * Always can write. Return the MTU in kn->data - */ -static int -tapkqwrite(struct knote *kn, long hint) -{ - struct tap_softc *tp = kn->kn_hook; - struct ifnet *ifp = tp->tap_ifp; - - kn->kn_data = ifp->if_mtu; - - return (1); -} /* tapkqwrite */ - - -static void -tapkqdetach(struct knote *kn) -{ - struct tap_softc *tp = kn->kn_hook; - - knlist_remove(&tp->tap_rsel.si_note, kn, 0); -} /* tapkqdetach */ - Index: sys/net/if_tapvar.h =================================================================== --- sys/net/if_tapvar.h +++ /dev/null @@ -1,71 +0,0 @@ -/*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD - * - * Copyright (C) 1999-2000 by Maksim Yevmenkin - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * BASED ON: - * ------------------------------------------------------------------------- - * - * Copyright (c) 1998 Brian Somers - * All rights reserved. - * - * Copyright (c) 1988, Julian Onions - * Nottingham University 1987. - */ - -/* - * $FreeBSD$ - * $Id: if_tapvar.h,v 0.6 2000/07/11 02:16:08 max Exp $ - */ - -#ifndef _NET_IF_TAPVAR_H_ -#define _NET_IF_TAPVAR_H_ - -/* - * tap_mtx locks tap_flags, tap_pid. tap_next locked with global tapmtx. - * Other fields locked by owning subsystems. - */ -struct tap_softc { - struct ifnet *tap_ifp; - u_short tap_flags; /* misc flags */ -#define TAP_OPEN (1 << 0) -#define TAP_INITED (1 << 1) -#define TAP_RWAIT (1 << 2) -#define TAP_ASYNC (1 << 3) -#define TAP_READY (TAP_OPEN|TAP_INITED) -#define TAP_VMNET (1 << 4) - - u_int8_t ether_addr[ETHER_ADDR_LEN]; /* ether addr of the remote side */ - - pid_t tap_pid; /* PID of process to open */ - struct sigio *tap_sigio; /* information for async I/O */ - struct selinfo tap_rsel; /* read select */ - - SLIST_ENTRY(tap_softc) tap_next; /* next device in chain */ - struct cdev *tap_dev; - struct mtx tap_mtx; /* per-softc mutex */ -}; - -#endif /* !_NET_IF_TAPVAR_H_ */ Index: sys/net/if_tun.c =================================================================== --- sys/net/if_tun.c +++ sys/net/if_tun.c @@ -1,4 +1,33 @@ /* $NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $ */ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (C) 1999-2000 by Maksim Yevmenkin + * All rights reserved. + * Copyright (c) 2019 Kyle Evans + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ /*- * Copyright (c) 1988, Julian Onions @@ -45,9 +74,12 @@ #include #include +#include #include #include #include +#include +#include #include #include #include @@ -56,13 +88,15 @@ #include #endif #include +#include #include #include #include - #include +struct tun_driver; + /* * tun_list is protected by global tunmtx. Other mutable fields are * protected by tun->tun_mtx, or by their owning subsystem. tun_dev is @@ -82,32 +116,44 @@ #define TUN_ASYNC 0x0080 #define TUN_IFHEAD 0x0100 #define TUN_DYING 0x0200 +#define TUN_L2 0x0400 +#define TUN_VMNET 0x0800 #define TUN_READY (TUN_OPEN | TUN_INITED) pid_t tun_pid; /* owning pid */ struct ifnet *tun_ifp; /* the interface */ struct sigio *tun_sigio; /* information for async I/O */ + struct tun_driver *tun_drv; /* appropriate driver */ struct selinfo tun_rsel; /* read select */ struct mtx tun_mtx; /* protect mutable softc fields */ struct cv tun_cv; /* protect against ref'd dev destroy */ + struct ether_addr tun_ether; /* remote address */ }; #define TUN2IFP(sc) ((sc)->tun_ifp) #define TUNDEBUG if (tundebug) if_printf +#define TUN_LOCK(tp) mtx_lock(&(tp)->tun_mtx) +#define TUN_UNLOCK(tp) mtx_unlock(&(tp)->tun_mtx) + /* * All mutable global variables in if_tun are locked using tunmtx, with - * the exception of tundebug, which is used unlocked, and tunclones, - * which is static after setup. + * the exception of tundebug, which is used unlocked, and the drivers' *clones, + * which are static after setup. */ static struct mtx tunmtx; static eventhandler_tag tag; static const char tunname[] = "tun"; +static const char tapname[] = "tap"; +static const char vmnetname[] = "vmnet"; static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface"); static int tundebug = 0; static int tundclone = 1; -static struct clonedevs *tunclones; +static int tap_allow_uopen = 0; /* allow user open() */ +static int tapuponopen = 0; /* IFF_UP on open() */ +static int tapdclone = 1; /* enable devfs cloning */ + static TAILQ_HEAD(,tun_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead); SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, ""); @@ -115,27 +161,49 @@ SX_SYSINIT(tun_ioctl_sx, &tun_ioctl_sx, "tun_ioctl"); SYSCTL_DECL(_net_link); +/* tun */ static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0, "IP tunnel software network interface."); SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tundclone, 0, "Enable legacy devfs interface creation."); +/* tap */ +static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0, + "Ethernet tunnel software network interface"); +SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tap_allow_uopen, 0, + "Allow user to open /dev/tap (based on node permissions)"); +SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0, + "Bring interface up when /dev/tap is opened"); +SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0, + "Enable legacy devfs interface creation"); +SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tundebug, 0, ""); + +static int tun_name2info(const char *name, int *unit, int *flags); static void tunclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev); -static void tuncreate(const char *name, struct cdev *dev); +static void tuncreate(struct cdev *dev, struct tun_driver *); static int tunifioctl(struct ifnet *, u_long, caddr_t); static void tuninit(struct ifnet *); +static void tunifinit(void *xtp); static int tunmodevent(module_t, int, void *); static int tunoutput(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *ro); static void tunstart(struct ifnet *); +static void tunstart_l2(struct ifnet *); static int tun_clone_match(struct if_clone *ifc, const char *name); +static int tap_clone_match(struct if_clone *ifc, const char *name); +static int vmnet_clone_match(struct if_clone *ifc, const char *name); static int tun_clone_create(struct if_clone *, char *, size_t, caddr_t); static int tun_clone_destroy(struct if_clone *, struct ifnet *); -static struct unrhdr *tun_unrhdr; + VNET_DEFINE_STATIC(struct if_clone *, tun_cloner); +VNET_DEFINE_STATIC(struct if_clone *, tap_cloner); +VNET_DEFINE_STATIC(struct if_clone *, vmnet_cloner); + #define V_tun_cloner VNET(tun_cloner) +#define V_tap_cloner VNET(tap_cloner) +#define V_vmnet_cloner VNET(vmnet_cloner) static d_open_t tunopen; static d_close_t tunclose; @@ -163,57 +231,200 @@ .f_event = tunkqwrite, }; -static struct cdevsw tun_cdevsw = { - .d_version = D_VERSION, - .d_flags = D_NEEDMINOR, - .d_open = tunopen, - .d_close = tunclose, - .d_read = tunread, - .d_write = tunwrite, - .d_ioctl = tunioctl, - .d_poll = tunpoll, - .d_kqfilter = tunkqfilter, - .d_name = tunname, +#define TUN_DRIVER_IDENT_MASK (TUN_L2 | TUN_VMNET) + +static struct tun_driver { + int tun_flags; + struct unrhdr *unrhdr; + struct cdevsw cdevsw; + struct clonedevs *clones; +} tun_drivers[] = { + { + .tun_flags = 0, + .cdevsw = { + .d_version = D_VERSION, + .d_flags = D_NEEDMINOR, + .d_open = tunopen, + .d_close = tunclose, + .d_read = tunread, + .d_write = tunwrite, + .d_ioctl = tunioctl, + .d_poll = tunpoll, + .d_kqfilter = tunkqfilter, + .d_name = tunname, + }, + }, + { + .tun_flags = TUN_L2, + .cdevsw = { + .d_version = D_VERSION, + .d_flags = D_NEEDMINOR, + .d_open = tunopen, + .d_close = tunclose, + .d_read = tunread, + .d_write = tunwrite, + .d_ioctl = tunioctl, + .d_poll = tunpoll, + .d_kqfilter = tunkqfilter, + .d_name = tapname, + }, + }, + { + .tun_flags = TUN_L2 | TUN_VMNET, + .cdevsw = { + .d_version = D_VERSION, + .d_flags = D_NEEDMINOR, + .d_open = tunopen, + .d_close = tunclose, + .d_read = tunread, + .d_write = tunwrite, + .d_ioctl = tunioctl, + .d_poll = tunpoll, + .d_kqfilter = tunkqfilter, + .d_name = vmnetname, + }, + }, }; +/* + * Sets unit and/or flags given the device name. + */ +static int +tun_name2info(const char *name, int *outunit, int *outflags) +{ + struct tun_driver *drv; + char *dname; + int i, flags, unit; + bool found; + + if (name == NULL) + return (EINVAL); + + /* + * Needed for dev_stdclone, but dev_stdclone will not modify, it just + * wants to be able to pass back a char * through the second param. We + * will always set that as NULL here, so we'll fake it. + */ + dname = __DECONST(char *, name); + found = false; + for (i = 0; i < nitems(tun_drivers); ++i) { + drv = &tun_drivers[i]; + + if (strcmp(name, drv->cdevsw.d_name) == 0) { + found = true; + unit = -1; + flags = drv->tun_flags; + break; + } + + if (dev_stdclone(dname, NULL, drv->cdevsw.d_name, &unit) == 1) { + found = true; + flags = drv->tun_flags; + break; + } + } + + if (!found) + return (ENXIO); + + if (outunit != NULL) + *outunit = unit; + if (outflags != NULL) + *outflags = flags; + return (0); +} + static int tun_clone_match(struct if_clone *ifc, const char *name) { - if (strncmp(tunname, name, 3) == 0 && - (name[3] == '\0' || isdigit(name[3]))) - return (1); + int tunflags; + + if (tun_name2info(name, NULL, &tunflags) == 0) { + if ((tunflags & TUN_L2) == 0) + return (1); + } + + return (0); +} + +static int +tap_clone_match(struct if_clone *ifc, const char *name) +{ + int tunflags; + + if (tun_name2info(name, NULL, &tunflags) == 0) { + if ((tunflags & (TUN_L2 | TUN_VMNET)) == TUN_L2) + return (1); + } + + return (0); +} + +static int +vmnet_clone_match(struct if_clone *ifc, const char *name) +{ + int tunflags; + + if (tun_name2info(name, NULL, &tunflags) == 0) { + if ((tunflags & TUN_VMNET) != 0) + return (1); + } return (0); } +static struct tun_driver * +tun_driver_from_flags(int tun_flags) +{ + struct tun_driver *drv; + int i; + + for (i = 0; i < nitems(tun_drivers); ++i) { + drv = &tun_drivers[i]; + if ((tun_flags & TUN_DRIVER_IDENT_MASK) == drv->tun_flags) + return (drv); + } + + return (NULL); +} + + static int tun_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params) { + struct tun_driver *drv; struct cdev *dev; - int err, unit, i; + int err, i, tunflags, unit; - err = ifc_name2unit(name, &unit); + tunflags = 0; + /* The name here tells us exactly what we're creating */ + err = tun_name2info(name, &unit, &tunflags); if (err != 0) return (err); + drv = tun_driver_from_flags(tunflags); + if (drv == NULL) + return (ENXIO); + if (unit != -1) { /* If this unit number is still available that/s okay. */ - if (alloc_unr_specific(tun_unrhdr, unit) == -1) + if (alloc_unr_specific(drv->unrhdr, unit) == -1) return (EEXIST); } else { - unit = alloc_unr(tun_unrhdr); + unit = alloc_unr(drv->unrhdr); } - snprintf(name, IFNAMSIZ, "%s%d", tunname, unit); + snprintf(name, IFNAMSIZ, "%s%d", drv->cdevsw.d_name, unit); /* find any existing device, or allocate new unit number */ - i = clone_create(&tunclones, &tun_cdevsw, &unit, &dev, 0); + i = clone_create(&drv->clones, &drv->cdevsw, &unit, &dev, 0); if (i) { /* No preexisting struct cdev *, create one */ - dev = make_dev(&tun_cdevsw, unit, - UID_UUCP, GID_DIALER, 0600, "%s%d", tunname, unit); + dev = make_dev(&drv->cdevsw, unit, UID_UUCP, GID_DIALER, 0600, + "%s%d", drv->cdevsw.d_name, unit); } - tuncreate(tunname, dev); + + tuncreate(dev, drv); return (0); } @@ -223,33 +434,47 @@ struct cdev **dev) { char devname[SPECNAMELEN + 1]; - int u, i, append_unit; + struct tun_driver *drv; + int append_unit, i, u, tunflags; + bool mayclone; if (*dev != NULL) return; + tunflags = 0; + if (tun_name2info(name, &u, &tunflags) != 0) + return; /* Not recognized */ + + if (u != -1 && u > IF_MAXUNIT) + return; /* Unit number too high */ + + mayclone = priv_check_cred(cred, PRIV_NET_IFCREATE) == 0; + if ((tunflags & TUN_L2) != 0) { + /* tap/vmnet allow user open with a sysctl */ + mayclone = (mayclone || tap_allow_uopen) && tapdclone; + } else { + mayclone = mayclone && tundclone; + } + /* * If tun cloning is enabled, only the superuser can create an * interface. */ - if (!tundclone || priv_check_cred(cred, PRIV_NET_IFCREATE) != 0) + if (!mayclone) return; - if (strcmp(name, tunname) == 0) { - u = -1; - } else if (dev_stdclone(name, NULL, tunname, &u) != 1) - return; /* Don't recognise the name */ - if (u != -1 && u > IF_MAXUNIT) - return; /* Unit number too high */ - if (u == -1) append_unit = 1; else append_unit = 0; + drv = tun_driver_from_flags(tunflags); + if (drv == NULL) + return; + CURVNET_SET(CRED_TO_VNET(cred)); /* find any existing device, or allocate new unit number */ - i = clone_create(&tunclones, &tun_cdevsw, &u, dev, 0); + i = clone_create(&drv->clones, &drv->cdevsw, &u, dev, 0); if (i) { if (append_unit) { namelen = snprintf(devname, sizeof(devname), "%s%d", @@ -257,7 +482,7 @@ name = devname; } /* No preexisting struct cdev *, create one */ - *dev = make_dev_credf(MAKEDEV_REF, &tun_cdevsw, u, cred, + *dev = make_dev_credf(MAKEDEV_REF, &drv->cdevsw, u, cred, UID_UUCP, GID_DIALER, 0600, "%s", name); } @@ -270,12 +495,12 @@ { struct cdev *dev; - mtx_lock(&tp->tun_mtx); + TUN_LOCK(tp); tp->tun_flags |= TUN_DYING; if ((tp->tun_flags & TUN_OPEN) != 0) cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx); else - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); CURVNET_SET(TUN2IFP(tp)->if_vnet); sx_xlock(&tun_ioctl_sx); @@ -285,7 +510,7 @@ dev = tp->tun_dev; bpfdetach(TUN2IFP(tp)); if_detach(TUN2IFP(tp)); - free_unr(tun_unrhdr, TUN2IFP(tp)->if_dunit); + free_unr(tp->tun_drv->unrhdr, TUN2IFP(tp)->if_dunit); if_free(TUN2IFP(tp)); destroy_dev(dev); seldrain(&tp->tun_rsel); @@ -298,7 +523,7 @@ } static int -tun_clone_destroy(struct if_clone *ifc, struct ifnet *ifp) +tun_clone_destroy(struct if_clone *ifc __unused, struct ifnet *ifp) { struct tun_softc *tp = ifp->if_softc; @@ -313,8 +538,13 @@ static void vnet_tun_init(const void *unused __unused) { + V_tun_cloner = if_clone_advanced(tunname, 0, tun_clone_match, tun_clone_create, tun_clone_destroy); + V_tap_cloner = if_clone_advanced(tapname, 0, tap_clone_match, + tun_clone_create, tun_clone_destroy); + V_vmnet_cloner = if_clone_advanced(vmnetname, 0, vmnet_clone_match, + tun_clone_create, tun_clone_destroy); } VNET_SYSINIT(vnet_tun_init, SI_SUB_PROTO_IF, SI_ORDER_ANY, vnet_tun_init, NULL); @@ -322,7 +552,10 @@ static void vnet_tun_uninit(const void *unused __unused) { + if_clone_detach(V_tun_cloner); + if_clone_detach(V_tap_cloner); + if_clone_detach(V_vmnet_cloner); } VNET_SYSUNINIT(vnet_tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, vnet_tun_uninit, NULL); @@ -330,7 +563,9 @@ static void tun_uninit(const void *unused __unused) { + struct tun_driver *drv; struct tun_softc *tp; + int i; EVENTHANDLER_DEREGISTER(dev_clone, tag); drain_dev_clone_events(); @@ -343,8 +578,11 @@ mtx_lock(&tunmtx); } mtx_unlock(&tunmtx); - delete_unrhdr(tun_unrhdr); - clone_cleanup(&tunclones); + for (i = 0; i < nitems(tun_drivers); ++i) { + drv = &tun_drivers[i]; + delete_unrhdr(drv->unrhdr); + clone_cleanup(&drv->clones); + } mtx_destroy(&tunmtx); } SYSUNINIT(tun_uninit, SI_SUB_PROTO_IF, SI_ORDER_ANY, tun_uninit, NULL); @@ -352,12 +590,17 @@ static int tunmodevent(module_t mod, int type, void *data) { + struct tun_driver *drv; + int i; switch (type) { case MOD_LOAD: mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF); - clone_setup(&tunclones); - tun_unrhdr = new_unrhdr(0, IF_MAXUNIT, &tunmtx); + for (i = 0; i < nitems(tun_drivers); ++i) { + drv = &tun_drivers[i]; + clone_setup(&drv->clones); + drv->unrhdr = new_unrhdr(0, IF_MAXUNIT, &tunmtx); + } tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000); if (tag == NULL) return (ENOMEM); @@ -386,7 +629,7 @@ struct tun_softc *tp = ifp->if_softc; struct mbuf *m; - TUNDEBUG(ifp,"%s starting\n", ifp->if_xname); + TUNDEBUG(ifp, "starting\n"); if (ALTQ_IS_ENABLED(&ifp->if_snd)) { IFQ_LOCK(&ifp->if_snd); IFQ_POLL_NOLOCK(&ifp->if_snd, m); @@ -397,7 +640,7 @@ IFQ_UNLOCK(&ifp->if_snd); } - mtx_lock(&tp->tun_mtx); + TUN_LOCK(tp); if (tp->tun_flags & TUN_RWAIT) { tp->tun_flags &= ~TUN_RWAIT; wakeup(tp); @@ -405,49 +648,139 @@ selwakeuppri(&tp->tun_rsel, PZERO + 1); KNOTE_LOCKED(&tp->tun_rsel.si_note, 0); if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) { - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); pgsigio(&tp->tun_sigio, SIGIO, 0); } else - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); } +/* + * tunstart_l2 + * + * queue packets from higher level ready to put out + */ +static void +tunstart_l2(struct ifnet *ifp) +{ + struct tun_softc *tp = ifp->if_softc; + + TUNDEBUG(ifp, "starting\n"); + + /* + * do not junk pending output if we are in VMnet mode. + * XXX: can this do any harm because of queue overflow? + */ + + TUN_LOCK(tp); + if (((tp->tun_flags & TUN_VMNET) == 0) && + ((tp->tun_flags & TUN_READY) != TUN_READY)) { + struct mbuf *m; + + /* Unlocked read. */ + TUNDEBUG(ifp, "not ready, tun_flags = 0x%x\n", tp->tun_flags); + + for (;;) { + IF_DEQUEUE(&ifp->if_snd, m); + if (m != NULL) { + m_freem(m); + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + } else + break; + } + TUN_UNLOCK(tp); + + return; + } + + ifp->if_drv_flags |= IFF_DRV_OACTIVE; + + if (!IFQ_IS_EMPTY(&ifp->if_snd)) { + if (tp->tun_flags & TUN_RWAIT) { + tp->tun_flags &= ~TUN_RWAIT; + wakeup(tp); + } + + if ((tp->tun_flags & TUN_ASYNC) && (tp->tun_sigio != NULL)) { + TUN_UNLOCK(tp); + pgsigio(&tp->tun_sigio, SIGIO, 0); + TUN_LOCK(tp); + } + + selwakeuppri(&tp->tun_rsel, PZERO+1); + KNOTE_LOCKED(&tp->tun_rsel.si_note, 0); + if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */ + } + + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + TUN_UNLOCK(tp); +} /* tunstart_l2 */ + + /* XXX: should return an error code so it can fail. */ static void -tuncreate(const char *name, struct cdev *dev) +tuncreate(struct cdev *dev, struct tun_driver *drv) { struct tun_softc *sc; struct ifnet *ifp; + struct ether_addr eaddr; + int iflags; + u_char type; sc = malloc(sizeof(*sc), M_TUN, M_WAITOK | M_ZERO); mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF); cv_init(&sc->tun_cv, "tun_condvar"); - sc->tun_flags = TUN_INITED; + sc->tun_flags = drv->tun_flags; sc->tun_dev = dev; + sc->tun_drv = drv; mtx_lock(&tunmtx); TAILQ_INSERT_TAIL(&tunhead, sc, tun_list); mtx_unlock(&tunmtx); - ifp = sc->tun_ifp = if_alloc(IFT_PPP); + iflags = IFF_MULTICAST; + if ((sc->tun_flags & TUN_L2) != 0) { + type = IFT_ETHER; + iflags |= IFF_BROADCAST | IFF_SIMPLEX; + } else { + type = IFT_PPP; + iflags |= IFF_POINTOPOINT; + } + ifp = sc->tun_ifp = if_alloc(type); if (ifp == NULL) panic("%s%d: failed to if_alloc() interface.\n", - name, dev2unit(dev)); - if_initname(ifp, name, dev2unit(dev)); - ifp->if_mtu = TUNMTU; - ifp->if_ioctl = tunifioctl; - ifp->if_output = tunoutput; - ifp->if_start = tunstart; - ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; + drv->cdevsw.d_name, dev2unit(dev)); ifp->if_softc = sc; + if_initname(ifp, drv->cdevsw.d_name, dev2unit(dev)); + ifp->if_ioctl = tunifioctl; + ifp->if_flags = iflags; IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); - ifp->if_snd.ifq_drv_maxlen = 0; - IFQ_SET_READY(&ifp->if_snd); knlist_init_mtx(&sc->tun_rsel.si_note, &sc->tun_mtx); ifp->if_capabilities |= IFCAP_LINKSTATE; ifp->if_capenable |= IFCAP_LINKSTATE; - if_attach(ifp); - bpfattach(ifp, DLT_NULL, sizeof(u_int32_t)); + if ((sc->tun_flags & TUN_L2) != 0) { + ifp->if_mtu = ETHERMTU; + ifp->if_init = tunifinit; + ifp->if_start = tunstart_l2; + + ether_gen_addr(ifp, &eaddr); + ether_ifattach(ifp, eaddr.octet); + } else { + ifp->if_mtu = TUNMTU; + ifp->if_start = tunstart; + ifp->if_output = tunoutput; + + ifp->if_snd.ifq_drv_maxlen = 0; + IFQ_SET_READY(&ifp->if_snd); + + if_attach(ifp); + bpfattach(ifp, DLT_NULL, sizeof(u_int32_t)); + } dev->si_drv1 = sc; + + TUN_LOCK(sc); + sc->tun_flags |= TUN_INITED; + TUN_UNLOCK(sc); + TUNDEBUG(ifp, "interface %s is created, minor = %#x\n", ifp->if_xname, dev2unit(dev)); } @@ -456,7 +789,23 @@ tunopen(struct cdev *dev, int flag, int mode, struct thread *td) { struct ifnet *ifp; + struct tun_driver *drv; struct tun_softc *tp; + int error, tunflags; + + tunflags = 0; + error = tun_name2info(dev->si_name, NULL, &tunflags); + if (error != 0) + return (error); /* Shouldn't happen */ + + if ((tunflags & TUN_L2) != 0) { + /* Restrict? */ + if (tap_allow_uopen == 0) { + error = priv_check(td, PRIV_NET_TAP); + if (error != 0) + return (error); + } + } /* * XXXRW: Non-atomic test and set of dev->si_drv1 requires @@ -464,22 +813,39 @@ */ tp = dev->si_drv1; if (!tp) { - tuncreate(tunname, dev); + drv = tun_driver_from_flags(tunflags); + if (drv == NULL) + return (ENXIO); + + tuncreate(dev, drv); tp = dev->si_drv1; } - mtx_lock(&tp->tun_mtx); + TUN_LOCK(tp); if ((tp->tun_flags & (TUN_OPEN | TUN_DYING)) != 0) { - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); return (EBUSY); } + ifp = TUN2IFP(tp); + + if ((tp->tun_flags & TUN_L2) != 0) { + bcopy(IF_LLADDR(ifp), tp->tun_ether.octet, + sizeof(tp->tun_ether.octet)); + + ifp->if_drv_flags |= IFF_DRV_RUNNING; + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + + if (tapuponopen) + ifp->if_flags |= IFF_UP; + } + tp->tun_pid = td->td_proc->p_pid; tp->tun_flags |= TUN_OPEN; - ifp = TUN2IFP(tp); + if_link_state_change(ifp, LINK_STATE_UP); TUNDEBUG(ifp, "open\n"); - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); return (0); } @@ -493,11 +859,12 @@ { struct tun_softc *tp; struct ifnet *ifp; + bool l2tun; tp = dev->si_drv1; ifp = TUN2IFP(tp); - mtx_lock(&tp->tun_mtx); + TUN_LOCK(tp); /* * Simply close the device if this isn't the controlling process. This * may happen if, for instance, the tunnel has been handed off to @@ -505,7 +872,7 @@ * without putting us into an inconsistent state. */ if (td->td_proc->p_pid != tp->tun_pid) { - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); return (0); } @@ -513,12 +880,23 @@ * junk all pending output */ CURVNET_SET(ifp->if_vnet); - IFQ_PURGE(&ifp->if_snd); + + l2tun = false; + if ((tp->tun_flags & TUN_L2) != 0) { + l2tun = true; + IF_DRAIN(&ifp->if_snd); + } else { + IFQ_PURGE(&ifp->if_snd); + } + + /* For vmnet, we won't do most of the address/route bits */ + if ((tp->tun_flags & TUN_VMNET) != 0) + goto out; if (ifp->if_flags & IFF_UP) { - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); if_down(ifp); - mtx_lock(&tp->tun_mtx); + TUN_LOCK(tp); } /* Delete all addresses and routes which reference this interface. */ @@ -526,10 +904,10 @@ struct ifaddr *ifa; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { /* deal w/IPv4 PtP destination; unlocked read */ - if (ifa->ifa_addr->sa_family == AF_INET) { + if (!l2tun && ifa->ifa_addr->sa_family == AF_INET) { rtinit(ifa, (int)RTM_DELETE, tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0); } else { @@ -537,8 +915,10 @@ } } if_purgeaddrs(ifp); - mtx_lock(&tp->tun_mtx); + TUN_LOCK(tp); } + +out: if_link_state_change(ifp, LINK_STATE_DOWN); CURVNET_RESTORE(); @@ -550,7 +930,7 @@ tp->tun_pid = 0; cv_broadcast(&tp->tun_cv); - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); return (0); } @@ -564,29 +944,48 @@ TUNDEBUG(ifp, "tuninit\n"); - mtx_lock(&tp->tun_mtx); - ifp->if_flags |= IFF_UP; + TUN_LOCK(tp); ifp->if_drv_flags |= IFF_DRV_RUNNING; - getmicrotime(&ifp->if_lastchange); - + if ((tp->tun_flags & TUN_L2) == 0) { + ifp->if_flags |= IFF_UP; + getmicrotime(&ifp->if_lastchange); #ifdef INET - if_addr_rlock(ifp); - CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { - if (ifa->ifa_addr->sa_family == AF_INET) { - struct sockaddr_in *si; - - si = (struct sockaddr_in *)ifa->ifa_addr; - if (si->sin_addr.s_addr) - tp->tun_flags |= TUN_IASET; - - si = (struct sockaddr_in *)ifa->ifa_dstaddr; - if (si && si->sin_addr.s_addr) - tp->tun_flags |= TUN_DSTADDR; + if_addr_rlock(ifp); + CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family == AF_INET) { + struct sockaddr_in *si; + + si = (struct sockaddr_in *)ifa->ifa_addr; + if (si->sin_addr.s_addr) + tp->tun_flags |= TUN_IASET; + + si = (struct sockaddr_in *)ifa->ifa_dstaddr; + if (si && si->sin_addr.s_addr) + tp->tun_flags |= TUN_DSTADDR; + } } - } - if_addr_runlock(ifp); + if_addr_runlock(ifp); #endif - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); + } else { + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + TUN_UNLOCK(tp); + /* attempt to start output */ + tunstart_l2(ifp); + } + +} + +/* + * Used only for l2 tunnel. + */ +static void +tunifinit(void *xtp) +{ + struct tun_softc *tp; + + tp = (struct tun_softc *)xtp; + tuninit(tp->tun_ifp); } /* @@ -598,24 +997,28 @@ struct ifreq *ifr = (struct ifreq *)data; struct tun_softc *tp; struct ifstat *ifs; - int error = 0; + struct ifmediareq *ifmr; + int dummy, error = 0; + bool l2tun; + ifmr = NULL; sx_xlock(&tun_ioctl_sx); tp = ifp->if_softc; if (tp == NULL) { error = ENXIO; goto bad; } + l2tun = (tp->tun_flags & TUN_L2) != 0; switch(cmd) { case SIOCGIFSTATUS: ifs = (struct ifstat *)data; - mtx_lock(&tp->tun_mtx); + TUN_LOCK(tp); if (tp->tun_pid) snprintf(ifs->ascii, sizeof(ifs->ascii), "\tOpened by PID %d\n", tp->tun_pid); else ifs->ascii[0] = '\0'; - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); break; case SIOCSIFADDR: tuninit(ifp); @@ -629,8 +1032,31 @@ case SIOCADDMULTI: case SIOCDELMULTI: break; + case SIOCGIFMEDIA: + if (!l2tun) { + error = EINVAL; + break; + } + + ifmr = (struct ifmediareq *)data; + dummy = ifmr->ifm_count; + ifmr->ifm_count = 1; + ifmr->ifm_status = IFM_AVALID; + ifmr->ifm_active = IFM_ETHER; + if (tp->tun_flags & TUN_OPEN) + ifmr->ifm_status |= IFM_ACTIVE; + ifmr->ifm_current = ifmr->ifm_active; + if (dummy >= 1) { + int media = IFM_ETHER; + error = copyout(&media, ifmr->ifm_ulist, sizeof(int)); + } + break; default: - error = EINVAL; + if (l2tun) { + error = ether_ioctl(ifp, cmd, data); + } else { + error = EINVAL; + } } bad: sx_xunlock(&tun_ioctl_sx); @@ -660,9 +1086,9 @@ #endif /* Could be unlocked read? */ - mtx_lock(&tp->tun_mtx); + TUN_LOCK(tp); cached_tun_flags = tp->tun_flags; - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); if ((cached_tun_flags & TUN_READY) != TUN_READY) { TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags); m_freem (m0); @@ -733,17 +1159,71 @@ tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td) { - struct ifreq ifr; + struct ifreq ifr, *ifrp; struct tun_softc *tp = dev->si_drv1; struct tuninfo *tunp; - int error; + int error, f; +#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ + defined(COMPAT_FREEBSD4) + int ival; +#endif + bool l2tun; + + l2tun = (tp->tun_flags & TUN_L2) != 0; + if (l2tun) { + /* tap specific ioctls */ + switch(cmd) { + case TAPGIFNAME: + ifrp = (struct ifreq *)data; + strlcpy(ifrp->ifr_name, TUN2IFP(tp)->if_xname, + IFNAMSIZ); + + return (0); + /* VMware/VMnet port ioctl's */ +#if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ + defined(COMPAT_FREEBSD4) + case _IO('V', 0): + ival = IOCPARM_IVAL(data); + data = (caddr_t)&ival; + /* FALLTHROUGH */ +#endif + case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */ + f = *(int *)data; + f &= 0x0fff; + f &= ~IFF_CANTCHANGE; + f |= IFF_UP; + + TUN_LOCK(tp); + TUN2IFP(tp)->if_flags = f | + (TUN2IFP(tp)->if_flags & IFF_CANTCHANGE); + TUN_UNLOCK(tp); + + return (0); + case SIOCGIFADDR: /* get MAC address of the remote side */ + TUN_LOCK(tp); + bcopy(&tp->tun_ether.octet, data, + sizeof(tp->tun_ether.octet)); + TUN_UNLOCK(tp); + + return (0); + case SIOCSIFADDR: /* set MAC address of the remote side */ + TUN_LOCK(tp); + bcopy(data, &tp->tun_ether.octet, + sizeof(tp->tun_ether.octet)); + TUN_UNLOCK(tp); + + return (0); + } + + /* Fall through to the common ioctls if unhandled */ + } switch (cmd) { case TUNSIFINFO: tunp = (struct tuninfo *)data; if (TUN2IFP(tp)->if_type != tunp->type) return (EPROTOTYPE); - mtx_lock(&tp->tun_mtx); + TUN_LOCK(tp); if (TUN2IFP(tp)->if_mtu != tunp->mtu) { strlcpy(ifr.ifr_name, if_name(TUN2IFP(tp)), IFNAMSIZ); ifr.ifr_mtu = tunp->mtu; @@ -752,20 +1232,20 @@ (caddr_t)&ifr, td); CURVNET_RESTORE(); if (error) { - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); return (error); } } TUN2IFP(tp)->if_baudrate = tunp->baudrate; - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); break; case TUNGIFINFO: tunp = (struct tuninfo *)data; - mtx_lock(&tp->tun_mtx); + TUN_LOCK(tp); tunp->mtu = TUN2IFP(tp)->if_mtu; tunp->type = TUN2IFP(tp)->if_type; tunp->baudrate = TUN2IFP(tp)->if_baudrate; - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); break; case TUNSDEBUG: tundebug = *(int *)data; @@ -774,27 +1254,27 @@ *(int *)data = tundebug; break; case TUNSLMODE: - mtx_lock(&tp->tun_mtx); + TUN_LOCK(tp); if (*(int *)data) { tp->tun_flags |= TUN_LMODE; tp->tun_flags &= ~TUN_IFHEAD; } else tp->tun_flags &= ~TUN_LMODE; - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); break; case TUNSIFHEAD: - mtx_lock(&tp->tun_mtx); + TUN_LOCK(tp); if (*(int *)data) { tp->tun_flags |= TUN_IFHEAD; tp->tun_flags &= ~TUN_LMODE; } else tp->tun_flags &= ~TUN_IFHEAD; - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); break; case TUNGIFHEAD: - mtx_lock(&tp->tun_mtx); + TUN_LOCK(tp); *(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0; - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); break; case TUNSIFMODE: /* deny this if UP */ @@ -804,30 +1284,30 @@ switch (*(int *)data & ~IFF_MULTICAST) { case IFF_POINTOPOINT: case IFF_BROADCAST: - mtx_lock(&tp->tun_mtx); + TUN_LOCK(tp); TUN2IFP(tp)->if_flags &= ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST); TUN2IFP(tp)->if_flags |= *(int *)data; - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); break; default: return(EINVAL); } break; case TUNSIFPID: - mtx_lock(&tp->tun_mtx); + TUN_LOCK(tp); tp->tun_pid = curthread->td_proc->p_pid; - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); break; case FIONBIO: break; case FIOASYNC: - mtx_lock(&tp->tun_mtx); + TUN_LOCK(tp); if (*(int *)data) tp->tun_flags |= TUN_ASYNC; else tp->tun_flags &= ~TUN_ASYNC; - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); break; case FIONREAD: if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) { @@ -875,9 +1355,9 @@ int error=0, len; TUNDEBUG (ifp, "read\n"); - mtx_lock(&tp->tun_mtx); + TUN_LOCK(tp); if ((tp->tun_flags & TUN_READY) != TUN_READY) { - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags); return (EHOSTDOWN); } @@ -888,19 +1368,22 @@ IFQ_DEQUEUE(&ifp->if_snd, m); if (m == NULL) { if (flag & O_NONBLOCK) { - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); return (EWOULDBLOCK); } tp->tun_flags |= TUN_RWAIT; error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1), "tunread", 0); if (error != 0) { - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); return (error); } } } while (m == NULL); - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); + + if ((tp->tun_flags & TUN_L2) != 0) + BPF_MTAP(ifp, m); while (m && uio->uio_resid > 0 && error == 0) { len = min(uio->uio_resid, m->m_len); @@ -916,56 +1399,59 @@ return (error); } -/* - * the cdevsw write interface - an atomic write is a packet - or else! - */ -static int -tunwrite(struct cdev *dev, struct uio *uio, int flag) +static int +tunwrite_l2(struct tun_softc *tp, struct mbuf *m) { - struct tun_softc *tp = dev->si_drv1; - struct ifnet *ifp = TUN2IFP(tp); - struct mbuf *m; - uint32_t family, mru; - int isr; + struct ether_header *eh; + struct ifnet *ifp; - TUNDEBUG(ifp, "tunwrite\n"); + ifp = TUN2IFP(tp); - if ((ifp->if_flags & IFF_UP) != IFF_UP) - /* ignore silently */ + /* + * Only pass a unicast frame to ether_input(), if it would + * actually have been received by non-virtual hardware. + */ + if (m->m_len < sizeof(struct ether_header)) { + m_freem(m); return (0); + } - if (uio->uio_resid == 0) - return (0); + eh = mtod(m, struct ether_header *); - mru = TUNMRU; - if (tp->tun_flags & TUN_IFHEAD) - mru += sizeof(family); - if (uio->uio_resid < 0 || uio->uio_resid > mru) { - TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid); - return (EIO); + if (eh && (ifp->if_flags & IFF_PROMISC) == 0 && + !ETHER_IS_MULTICAST(eh->ether_dhost) && + bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) { + m_freem(m); + return (0); } - if ((m = m_uiotombuf(uio, M_NOWAIT, 0, 0, M_PKTHDR)) == NULL) { - if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); - return (ENOBUFS); - } + /* Pass packet up to parent. */ + CURVNET_SET(ifp->if_vnet); + (*ifp->if_input)(ifp, m); + CURVNET_RESTORE(); + /* ibytes are counted in parent */ + if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); + return (0); +} - m->m_pkthdr.rcvif = ifp; -#ifdef MAC - mac_ifnet_create_mbuf(ifp, m); -#endif +static int +tunwrite_l3(struct tun_softc *tp, struct mbuf *m) +{ + struct ifnet *ifp; + int family, isr; + ifp = TUN2IFP(tp); /* Could be unlocked read? */ - mtx_lock(&tp->tun_mtx); + TUN_LOCK(tp); if (tp->tun_flags & TUN_IFHEAD) { - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); if (m->m_len < sizeof(family) && - (m = m_pullup(m, sizeof(family))) == NULL) + (m = m_pullup(m, sizeof(family))) == NULL) return (ENOBUFS); family = ntohl(*mtod(m, u_int32_t *)); m_adj(m, sizeof(family)); } else { - mtx_unlock(&tp->tun_mtx); + TUN_UNLOCK(tp); family = AF_INET; } @@ -996,6 +1482,57 @@ return (0); } +/* + * the cdevsw write interface - an atomic write is a packet - or else! + */ +static int +tunwrite(struct cdev *dev, struct uio *uio, int flag) +{ + struct tun_softc *tp; + struct ifnet *ifp; + struct mbuf *m; + uint32_t mru; + int align; + bool l2tun; + + tp = dev->si_drv1; + ifp = TUN2IFP(tp); + TUNDEBUG(ifp, "tunwrite\n"); + if ((ifp->if_flags & IFF_UP) != IFF_UP) + /* ignore silently */ + return (0); + + if (uio->uio_resid == 0) + return (0); + + l2tun = (tp->tun_flags & TUN_L2) != 0; + align = 0; + mru = l2tun ? TAPMRU : TUNMRU; + if (l2tun) + align = ETHER_ALIGN; + else if ((tp->tun_flags & TUN_IFHEAD) != 0) + mru += sizeof(uint32_t); /* family */ + if (uio->uio_resid < 0 || uio->uio_resid > mru) { + TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid); + return (EIO); + } + + if ((m = m_uiotombuf(uio, M_NOWAIT, 0, align, M_PKTHDR)) == NULL) { + if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); + return (ENOBUFS); + } + + m->m_pkthdr.rcvif = ifp; +#ifdef MAC + mac_ifnet_create_mbuf(ifp, m); +#endif + + if (l2tun) + return (tunwrite_l2(tp, m)); + + return (tunwrite_l3(tp, m)); +} + /* * tunpoll - the poll interface, this is only useful on reads * really. The write detect always returns true, write never blocks @@ -1007,14 +1544,12 @@ struct tun_softc *tp = dev->si_drv1; struct ifnet *ifp = TUN2IFP(tp); int revents = 0; - struct mbuf *m; TUNDEBUG(ifp, "tunpoll\n"); if (events & (POLLIN | POLLRDNORM)) { IFQ_LOCK(&ifp->if_snd); - IFQ_POLL_NOLOCK(&ifp->if_snd, m); - if (m != NULL) { + if (!IFQ_IS_EMPTY(&ifp->if_snd)) { TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len); revents |= events & (POLLIN | POLLRDNORM); } else {