diff --git a/etc/mtree/BSD.include.dist b/etc/mtree/BSD.include.dist
index 0f85798815d5..e7784cbb0a47 100644
--- a/etc/mtree/BSD.include.dist
+++ b/etc/mtree/BSD.include.dist
@@ -1,318 +1,316 @@
 # $FreeBSD$
 #
 # Please see the file src/etc/mtree/README before making changes to this file.
 #
 
 /set type=dir uname=root gname=wheel mode=0755
 .
     arpa
     ..
     atf-c
     ..
     atf-c++
     ..
     bsm
     ..
     bsnmp
     ..
     c++
         v1
             experimental
             ..
             ext
             ..
             tr1
             ..
         ..
     ..
     cam
         ata
         ..
         mmc
         ..
         nvme
         ..
         scsi
         ..
     ..
     casper
     ..
     crypto
     ..
     dev
         acpica
         ..
         agp
         ..
         an
         ..
         ciss
         ..
         evdev
         ..
         filemon
         ..
         firewire
         ..
         hid
         ..
         hwpmc
         ..
         hyperv
         ..
         ic
         ..
         iicbus
         ..
-        if_wg
-        ..
         io
         ..
         mfi
         ..
         mlx5
         ..
         mmc
         ..
         mpt
             mpilib
             ..
         ..
         nvme
         ..
         ofw
         ..
         pbio
         ..
         pci
         ..
         powermac_nvram
         ..
         ppbus
         ..
         pwm
         ..
         smbus
         ..
         speaker
         ..
         tcp_log
         ..
         usb
         ..
         veriexec
         ..
         vkbd
         ..
         wi
         ..
     ..
     devdctl
     ..
     edit
         readline
         ..
     ..
     fs
         cuse
         ..
         devfs
         ..
         fdescfs
         ..
         msdosfs
         ..
         nfs
         ..
         nullfs
         ..
         procfs
         ..
         smbfs
         ..
         udf
         ..
         unionfs
         ..
     ..
     gcc
         4.2
         ..
     ..
     geom
         cache
         ..
         concat
         ..
         eli
         ..
         gate
         ..
         journal
         ..
         label
         ..
         mirror
         ..
         mountver
         ..
         multipath
         ..
         nop
         ..
         raid
         ..
         raid3
         ..
         shsec
         ..
         stripe
         ..
         virstor
         ..
     ..
     gssapi
     ..
     infiniband
         complib
         ..
         iba
         ..
         opensm
         ..
         vendor
         ..
     ..
     isofs
         cd9660
         ..
     ..
     kadm5
     ..
     krb5
     ..
     lib80211
     ..
     lib9p
     ..
     libipt
     ..
     libmilter
     ..
     libxo
     ..
     lzma
     ..
     machine
         pc
         ..
     ..
     net
         altq
         ..
         route
         ..
     ..
     net80211
     ..
     netgraph
         atm
         ..
         bluetooth
             include
             ..
         ..
         netflow
         ..
     ..
     netinet
         cc
         ..
         netdump
         ..
         tcp_stacks
         ..
     ..
     netinet6
     ..
     netipsec
     ..
     netnatm
         api
         ..
         msg
         ..
         saal
         ..
         sig
         ..
     ..
     netpfil
         pf
         ..
     ..
     netsmb
     ..
     nfs
     ..
     nfsclient
     ..
     nfsserver
     ..
     opencsd
         c_api
         ..
         etmv3
         ..
         etmv4
         ..
         ptm
         ..
         stm
         ..
     ..
     openssl
     ..
     pcap
     ..
     protocols
     ..
     rdma
     ..
     rpc
     ..
     rpcsvc
     ..
     security
         audit
         ..
         mac_biba
         ..
         mac_bsdextended
         ..
         mac_lomac
         ..
         mac_mls
         ..
         mac_partition
         ..
         mac_veriexec
         ..
     ..
     sys
         disk
         ..
     ..
     teken
     ..
     ufs
         ffs
         ..
         ufs
         ..
     ..
     vm
     ..
     xlocale
     ..
 ..
diff --git a/include/Makefile b/include/Makefile
index d47879e11c93..eebc7f0e121f 100644
--- a/include/Makefile
+++ b/include/Makefile
@@ -1,484 +1,479 @@
 #	@(#)Makefile	8.2 (Berkeley) 1/4/94
 # $FreeBSD$
 #
 # Doing a "make install" builds /usr/include.
 
 .include <src.opts.mk>
 
 PACKAGE=runtime
 CLEANFILES= osreldate.h version
 SUBDIR= arpa protocols rpcsvc rpc xlocale
 SUBDIR_PARALLEL=
 INCS=	a.out.h ar.h assert.h bitstring.h complex.h cpio.h _ctype.h ctype.h \
 	db.h \
 	dirent.h dlfcn.h elf.h elf-hints.h err.h fmtmsg.h fnmatch.h fstab.h \
 	fts.h ftw.h getopt.h glob.h grp.h \
 	ieeefp.h ifaddrs.h \
 	inttypes.h iso646.h kenv.h langinfo.h libgen.h limits.h link.h \
 	locale.h malloc.h malloc_np.h memory.h monetary.h mpool.h mqueue.h \
 	ndbm.h netconfig.h \
 	netdb.h nl_types.h nlist.h nss.h nsswitch.h paths.h \
 	printf.h proc_service.h pthread.h \
 	pthread_np.h pwd.h ranlib.h readpassphrase.h regex.h \
 	res_update.h resolv.h runetype.h search.h semaphore.h setjmp.h \
 	signal.h spawn.h stab.h stdalign.h stdbool.h stddef.h \
 	stdnoreturn.h stdio.h stdlib.h string.h stringlist.h \
 	strings.h sysexits.h tar.h termios.h tgmath.h \
 	time.h timeconv.h timers.h ttyent.h \
 	uchar.h ulimit.h unistd.h utime.h utmpx.h uuid.h varargs.h \
 	wchar.h wctype.h wordexp.h xlocale.h
 
 .PATH: ${SRCTOP}/contrib/libc-vis
 INCS+=	vis.h
 
 MHDRS=	float.h floatingpoint.h stdarg.h
 
 PHDRS=	sched.h _semaphore.h
 
 LHDRS=	aio.h errno.h fcntl.h linker_set.h poll.h stdatomic.h stdint.h \
 	syslog.h ucontext.h
 
 LDIRS=	geom net net80211 netgraph netinet netinet6 \
 	netipsec netsmb nfs nfsclient nfsserver sys vm
 
 LSUBDIRS=	dev/acpica dev/agp dev/an dev/ciss dev/filemon dev/firewire \
 	dev/hwpmc dev/hyperv \
-	dev/ic dev/iicbus dev/if_wg dev/io dev/mfi dev/mmc dev/nvme \
+	dev/ic dev/iicbus dev/io dev/mfi dev/mmc dev/nvme \
 	dev/ofw dev/pbio dev/pci ${_dev_powermac_nvram} dev/ppbus dev/pwm \
 	dev/smbus dev/speaker dev/tcp_log dev/veriexec dev/vkbd \
 	fs/devfs fs/fdescfs fs/msdosfs fs/nfs fs/nullfs \
 	fs/procfs fs/smbfs fs/udf fs/unionfs \
 	geom/cache geom/concat geom/eli geom/gate geom/journal geom/label \
 	geom/mirror geom/mountver geom/multipath geom/nop \
 	geom/raid geom/raid3 geom/shsec geom/stripe geom/virstor \
 	net/altq \
 	net/route \
 	netgraph/atm netgraph/netflow \
 	netinet/cc \
 	netinet/netdump \
 	netinet/tcp_stacks \
 	security/mac_biba security/mac_bsdextended security/mac_lomac \
 	security/mac_mls security/mac_partition \
 	security/mac_veriexec \
 	sys/disk \
 	ufs/ffs ufs/ufs
 
 LSUBSUBDIRS=	dev/mpt/mpilib
 
 .PATH: ${SRCTOP}/sys/dev/acpica
 ACPICA=		acpi_hpet.h \
 		acpiio.h
 ACPICADIR=	${INCLUDEDIR}/dev/acpica
 
 .PATH: ${SRCTOP}/sys/dev/agp
 AGP=		agpreg.h
 AGPDIR=		${INCLUDEDIR}/dev/agp
 
 .PATH: ${SRCTOP}/sys/bsm
 BSM=		audit.h \
 		audit_errno.h \
 		audit_internal.h \
 		audit_record.h \
 		audit_domain.h \
 		audit_fcntl.h \
 		audit_kevents.h \
 		audit_socket_type.h
 BSMPACKAGE=	libbsm
 BSMDIR=		${INCLUDEDIR}/bsm
 
 .PATH: ${SRCTOP}/sys/security/audit
 SECAUDIT=	audit.h \
 		audit_ioctl.h \
 		audit_private.h
 SECAUDITPACKAGE=	libbsm
 SECAUDITDIR=	${INCLUDEDIR}/security/audit
 
 .PATH:	${SRCTOP}/sys/cam
 CAM=	cam.h \
 	cam_ccb.h \
 	cam_compat.h \
 	cam_debug.h \
 	cam_iosched.h \
 	cam_periph.h \
 	cam_queue.h \
 	cam_sim.h \
 	cam_xpt.h \
 	cam_xpt_internal.h \
 	cam_xpt_periph.h \
 	cam_xpt_sim.h
 CAMDIR=	${INCLUDEDIR}/cam
 
 .PATH: ${SRCTOP}/sys/cam/ata
 CAMATA=		ata_all.h
 CAMATADIR=	${INCLUDEDIR}/cam/ata
 
 .PATH: ${SRCTOP}/sys/cam/mmc
 CAMMMC=		mmc.h \
 		mmc_bus.h \
 		mmc_all.h
 CAMMMCDIR=	${INCLUDEDIR}/cam/mmc
 
 .PATH: ${SRCTOP}/sys/cam/nvme
 CAMNVME=	nvme_all.h
 CAMNVMEDIR=	${INCLUDEDIR}/cam/nvme
 
 .PATH: ${SRCTOP}/sys/cam/scsi
 CAMSCSI=	scsi_all.h \
 		scsi_cd.h \
 		scsi_ch.h \
 		scsi_da.h \
 		scsi_enc.h \
 		scsi_enc_internal.h \
 		scsi_iu.h \
 		scsi_message.h \
 		scsi_pass.h \
 		scsi_pt.h \
 		scsi_sa.h \
 		scsi_ses.h \
 		scsi_sg.h \
 		scsi_targetio.h \
 		smp_all.h
 CAMSCSIDIR=	${INCLUDEDIR}/cam/scsi
 
 .PATH: ${SRCTOP}/sys/fs/cd9660
 FS9660=		cd9660_mount.h \
 		cd9660_node.h \
 		cd9660_rrip.h \
 		iso.h \
 		iso_rrip.h
 FS9660DIR=	${INCLUDEDIR}/isofs/cd9660
 
 .PATH: ${SRCTOP}/sys/dev/evdev
 EVDEV=		input.h \
 		input-event-codes.h \
 		uinput.h
 EVDEVDIR=	${INCLUDEDIR}/dev/evdev
 
 .PATH: ${SRCTOP}/sys/dev/hid
 HID=		hid.h \
 		hidraw.h
 HIDDIR=		${INCLUDEDIR}/dev/hid
 
 .PATH: ${SRCTOP}/sys/dev/hyperv/include ${SRCTOP}/sys/dev/hyperv/utilities
 HYPERV=		hv_snapshot.h \
 		hyperv.h
 HYPERVDIR=	${INCLUDEDIR}/dev/hyperv
 
 .PATH: ${SRCTOP}/sys/opencrypto
 OPENCRYPTO=	cryptodev.h
 OPENCRYPTODIR=	${INCLUDEDIR}/crypto
 
 .PATH: ${SRCTOP}/sys/dev/pci
 PCI=		pcireg.h
 PCIDIR=		${INCLUDEDIR}/dev/pci
 
 .PATH: ${SRCTOP}/sys/dev/veriexec
 VERIEXEC=	veriexec_ioctl.h
 VERIEXECDIR=	${INCLUDEDIR}/dev/veriexec
 
 .PATH: ${SRCTOP}/sys/contrib/ipfilter/netinet
 IPFILTER=	ip_auth.h \
 		ip_compat.h \
 		ip_dstlist.h \
 		ip_fil.h \
 		ip_frag.h \
 		ip_htable.h \
 		ip_lookup.h \
 		ip_nat.h \
 		ip_pool.h \
 		ip_proxy.h \
 		ip_rules.h \
 		ip_scan.h \
 		ip_state.h \
 		ip_sync.h \
 		ipf_rb.h \
 		ipl.h \
 		radix_ipf.h
 IPFILTERDIR=	${INCLUDEDIR}/netinet
 
 .PATH: ${SRCTOP}/sys/netpfil/pf
 PF=		pf.h \
 		pf_altq.h \
 		pf_mtag.h
 PFPACKAGE=	pf
 PFDIR=	${INCLUDEDIR}/netpfil/pf
 
 .PATH: ${SRCTOP}/sys/rpc
 RPC=		rpcsec_tls.h \
 		types.h
 RPCDIR=		${INCLUDEDIR}/rpc
 
 .PATH: ${SRCTOP}/sys/teken
 TEKEN=		teken.h
 TEKENDIR=	${INCLUDEDIR}/teken
 
 .PATH: ${SRCTOP}/sys/contrib/openzfs/include/sys
 NVPAIR=		nvpair.h
 NVPAIRDIR=	${INCLUDEDIR}/sys
 
 .PATH: ${SRCTOP}/sys/dev/mlx5
 MLX5=		mlx5io.h
 MLX5DIR=	${INCLUDEDIR}/dev/mlx5
 
-.PATH: ${SRCTOP}/sys/dev/if_wg
-WG=		if_wg.h
-WGDIR=		${INCLUDEDIR}/dev/if_wg
-
 INCSGROUPS=	INCS \
 		ACPICA \
 		AGP \
 		CAM \
 		CAMATA \
 		CAMMMC \
 		CAMNVME \
 		CAMSCSI \
 		CRYPTO \
 		EVDEV \
 		FS9660 \
 		HID \
 		HYPERV \
 		OPENCRYPTO \
 		PCI \
 		RPC \
 		TEKEN \
-		VERIEXEC \
-		WG
+		VERIEXEC
 
 .if ${MK_AUDIT} != "no"
 INCSGROUPS+=	BSM
 INCSGROUPS+=	SECAUDIT
 .endif
 
 .if ${MK_IPFILTER} != "no"
 INCSGROUPS+=	IPFILTER
 .endif
 
 .if ${MK_PF} != "no"
 INCSGROUPS+=	PF
 .endif
 
 .if ${MK_CDDL} != "no"
 INCSGROUPS+=	NVPAIR
 .endif
 
 .if ${MK_MLX5TOOL} != "no"
 INCSGROUPS+=	MLX5
 .endif
 
 .if ${MK_BLUETOOTH} != "no"
 LSUBSUBDIRS+=	netgraph/bluetooth/include
 .endif
 
 .if ${MK_CUSE} != "no"
 LSUBDIRS+=	fs/cuse
 .endif
 
 .if ${MK_GSSAPI} != "no"
 SUBDIR+=	gssapi
 INCS+=		gssapi.h
 .endif
 
 .if ${MK_HESIOD} != "no"
 INCS+=	hesiod.h
 .endif
 
 # Handle the #define aliases for libiconv
 .if ${MK_ICONV} == "yes"
 INCS+=		iconv.h
 .endif
 
 .if ${MK_USB} != "no"
 LSUBDIRS+=	dev/usb
 .endif
 
 .if ${MACHINE_CPUARCH} == "powerpc" && ${MACHINE_ARCH} != "powerpcspe"
 _dev_powermac_nvram=	dev/powermac_nvram
 .endif
 
 # Define SHARED to indicate whether you want symbolic links to the system
 # source (``symlinks''), or a separate copy (``copies'').  ``symlinks'' is
 # probably only useful for developers and should be avoided if you do not
 # wish to tie your /usr/include and /usr/src together.
 #SHARED=	symlinks
 SHARED?=	copies
 
 INCS+=	osreldate.h
 
 SYSDIR=			${SRCTOP}/sys
 NEWVERS_SH=		${SYSDIR}/conf/newvers.sh
 PARAM_H=		${SYSDIR}/sys/param.h
 MK_OSRELDATE_SH=	${.CURDIR}/mk-osreldate.sh
 
 SYMLINKS+= ${LIBDIR:C,[^/]+,..,g:C,^/,,}${INCLUDEDIR} ${LIBDIR}/include
 
 osreldate.h: ${NEWVERS_SH} ${PARAM_H} ${MK_OSRELDATE_SH}
 	env NEWVERS_SH=${NEWVERS_SH} PARAMFILE=${PARAM_H} SYSDIR=${SYSDIR} \
 	    sh ${MK_OSRELDATE_SH}
 
 .for i in ${LHDRS}
 INCSLINKS+=	sys/$i ${INCLUDEDIR}/$i
 .endfor
 .for i in ${MHDRS}
 INCSLINKS+=	machine/$i ${INCLUDEDIR}/$i
 .endfor
 .for i in ${PHDRS}
 INCSLINKS+=	sys/$i ${INCLUDEDIR}/$i
 .endfor
 
 .if ${MACHINE} != ${MACHINE_CPUARCH}
 _MARCHS=	${MACHINE_CPUARCH}
 .endif
 .if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64"
 _MARCHS+=	x86
 .endif
 
 META_TARGETS+=	compat
 stage_includes: ${SHARED}
 SDESTDIR=	${SYSROOT:U${DESTDIR}}
 
 # Take care of stale directory-level symlinks.
 # Note: The "|| true" after find is needed in case one of the directories does
 # not exist (yet).
 compat:
 	cd ${SDESTDIR}${INCLUDEDIR}; find ${LDIRS} ${LSUBDIRS} machine ${_MARCHS} \
 	    crypto -maxdepth 0 -mindepth 0 -type l -print -delete || true
 	mtree -deU ${NO_ROOT:D-W} ${MTREE_FOLLOWS_SYMLINKS} \
 	    -f ${SRCTOP}/etc/mtree/BSD.include.dist \
 	    -p ${SDESTDIR}${INCLUDEDIR} > /dev/null
 
 copies: .PHONY .META
 	cd ${SDESTDIR}${INCLUDEDIR}; find ${LDIRS} ${LSUBDIRS} ${LSUBSUBDIRS} crypto \
 	    machine machine/pc ${_MARCHS} -maxdepth 1 -mindepth 1 -type l \
 	    -name "*.h" -print -delete || true
 .for i in ${LDIRS} ${LSUBDIRS:Ndev/agp:Ndev/acpica:Ndev/evdev:Ndev/hid:Ndev/hyperv:Ndev/pci:Ndev/veriexec} ${LSUBSUBDIRS}
 	cd ${SRCTOP}/sys; \
 	${INSTALL} -C ${TAG_ARGS} -o ${BINOWN} -g ${BINGRP} -m 444 $i/*.h \
 	    ${SDESTDIR}${INCLUDEDIR}/$i
 .endfor
 	cd ${SRCTOP}/sys/${MACHINE}/include; \
 	${INSTALL} -C ${TAG_ARGS} -o ${BINOWN} -g ${BINGRP} -m 444 *.h \
 	    ${SDESTDIR}${INCLUDEDIR}/machine
 .if exists(${SRCTOP}/sys/${MACHINE}/include/pc)
 	cd ${SRCTOP}/sys/${MACHINE}/include/pc; \
 	${INSTALL} -C ${TAG_ARGS} -o ${BINOWN} -g ${BINGRP} -m 444 *.h \
 	    ${SDESTDIR}${INCLUDEDIR}/machine/pc
 .endif
 .for _MARCH in ${_MARCHS}
 .if exists(${SRCTOP}/sys/${_MARCH}/include)
 	${INSTALL} -d ${TAG_ARGS} -o ${BINOWN} -g ${BINGRP} -m 755 \
 	    ${SDESTDIR}${INCLUDEDIR}/${_MARCH}; \
 	cd ${SRCTOP}/sys/${_MARCH}/include; \
 	${INSTALL} -C ${TAG_ARGS} -o ${BINOWN} -g ${BINGRP} -m 444 *.h \
 	    ${SDESTDIR}${INCLUDEDIR}/${_MARCH}
 .if exists(${SRCTOP}/sys/${_MARCH}/include/pc)
 	${INSTALL} -d ${TAG_ARGS} -o ${BINOWN} -g ${BINGRP} -m 755 \
 	    ${SDESTDIR}${INCLUDEDIR}/${_MARCH}/pc; \
 	cd ${SRCTOP}/sys/${_MARCH}/include/pc; \
 	${INSTALL} -C ${TAG_ARGS} -o ${BINOWN} -g ${BINGRP} -m 444 *.h \
 	    ${SDESTDIR}${INCLUDEDIR}/${_MARCH}/pc
 .endif
 .endif
 .endfor
 
 symlinks: .PHONY .META
 	@${ECHO} "Setting up symlinks to kernel source tree..."
 .for i in ${LDIRS}
 	cd ${SRCTOP}; ${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} \
 	    $$(printf '../../../%s ' sys/$i/*.h) ${SDESTDIR}${INCLUDEDIR}/$i
 .endfor
 .for i in ${LSUBDIRS:Ndev/agp:Ndev/acpica:Ndev/evdev:Ndev/hid:Ndev/hyperv:Ndev/pci:Ndev/veriexec}
 	cd ${SRCTOP}; ${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} \
 	    $$(printf '../../../../%s ' sys/$i/*.h) ${SDESTDIR}${INCLUDEDIR}/$i
 .endfor
 	${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} \
 	    $$(printf '../../../../sys/dev/acpica/%s ' acpiio.h acpi_hpet.h) \
 	    ${SDESTDIR}${INCLUDEDIR}/dev/acpica; \
 	${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} ../../../../sys/dev/agp/agpreg.h \
 	    ${SDESTDIR}${INCLUDEDIR}/dev/agp; \
 	${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} \
 	    $$(printf '../../../../sys/dev/evdev/%s ' input.h input-event-codes.h uinput.h) \
 	    ${SDESTDIR}${INCLUDEDIR}/dev/evdev;
 	${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} \
 	    $$(printf '../../../../sys/dev/hid/%s ' hid.h hidraw.h) \
 	    ${SDESTDIR}${INCLUDEDIR}/dev/hid; \
 	${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} ../../../../sys/dev/hyperv/include/hyperv.h \
 	    ${SDESTDIR}${INCLUDEDIR}/dev/hyperv; \
 	${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} ../../../../sys/dev/hyperv/utilities/hv_snapshot.h \
 	    ${SDESTDIR}${INCLUDEDIR}/dev/hyperv; \
 	${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} ../../../../sys/dev/pci/pcireg.h \
 	    ${SDESTDIR}${INCLUDEDIR}/dev/pci; \
 	${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} ../../../../sys/dev/veriexec/veriexec_ioctl.h \
 	    ${SDESTDIR}${INCLUDEDIR}/dev/veriexec;
 .for i in ${LSUBSUBDIRS}
 	cd ${SRCTOP}; ${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} \
 	    $$(printf '../../../../../%s ' sys/$i/*.h) ${SDESTDIR}${INCLUDEDIR}/$i
 .endfor
 .if ${MK_IPFILTER} != "no"
 	cd ${SRCTOP}; ${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} \
 	    $$(printf '../../../%s ' sys/contrib/ipfilter/netinet/*.h) \
 	    ${SDESTDIR}${INCLUDEDIR}/netinet;
 .endif
 .if ${MK_PF} != "no"
 	cd ${SRCTOP}; ${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} \
 	    $$(printf '../../../../%s ' sys/netpfil/pf/*.h) \
 	    ${SDESTDIR}${INCLUDEDIR}/netpfil/pf;
 .endif
 	${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} ../../../sys/crypto/rijndael/rijndael.h \
 	    ${SDESTDIR}${INCLUDEDIR}/crypto; \
 	cd ${SRCTOP}; ${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} \
 	    $$(printf '../../../%s ' sys/opencrypto/*.h) \
 	    ${SDESTDIR}${INCLUDEDIR}/crypto; \
 	cd ${SRCTOP}; ${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} \
 	    $$(printf '../../../%s ' sys/${MACHINE}/include/*.h) \
 	    ${SDESTDIR}${INCLUDEDIR}/machine;
 .if exists(${SRCTOP}/sys/${MACHINE}/include/pc)
 	cd ${SRCTOP}; ${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} \
 	    $$(printf '../../../../%s ' sys/${MACHINE}/include/pc/*.h) \
 	    ${SDESTDIR}${INCLUDEDIR}/machine/pc;
 .endif
 .for _MARCH in ${_MARCHS}
 .if exists(${SRCTOP}/sys/${_MARCH}/include)
 	${INSTALL} -d ${TAG_ARGS:D${TAG_ARGS},dev} -o ${BINOWN} -g ${BINGRP} -m 755 \
 	    ${SDESTDIR}${INCLUDEDIR}/${_MARCH}; \
 	cd ${SRCTOP}; ${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} \
 	    $$(printf '../../../%s ' sys/${_MARCH}/include/*.h) \
 	    ${SDESTDIR}${INCLUDEDIR}/${_MARCH};
 .if exists(${SRCTOP}/sys/${_MARCH}/include/pc)
 	${INSTALL} -d ${TAG_ARGS:D${TAG_ARGS},dev} -o ${BINOWN} -g ${BINGRP} -m 755 \
 	    ${SDESTDIR}${INCLUDEDIR}/${_MARCH}/pc; \
 	cd ${SRCTOP}; ${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} \
 	    $$(printf '../../../../%s ' sys/${_MARCH}/include/pc/*.h) \
 	    ${SDESTDIR}${INCLUDEDIR}/${_MARCH}/pc;
 .endif
 .endif
 .endfor
 	cd ${SRCTOP}; ${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} \
 	    $$(printf '../../../../%s ' sys/fs/cd9660/*.h) \
 	    ${SDESTDIR}${INCLUDEDIR}/isofs/cd9660; \
 	cd ${SRCTOP}; ${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} \
 	    $$(printf '../../../sys/rpc/%s ' rpcsec_tls.h types.h) \
 	    ${SDESTDIR}${INCLUDEDIR}/rpc;
 	cd ${SRCTOP}/sys/rpc;
 .if ${MK_CDDL} != "no"
 	${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} \
 	    ../../../sys/contrib/openzfs/include/sys/nvpair.h \
 	    ${SDESTDIR}${INCLUDEDIR}/sys
 .endif
 .if ${MK_MLX5TOOL} != "no"
 	${INSTALL_SYMLINK} ${TAG_ARGS:D${TAG_ARGS},dev} ../../../../sys/dev/mlx5/mlx5io.h \
 	    ${SDESTDIR}${INCLUDEDIR}/dev/mlx5
 .endif
 
 .include <bsd.prog.mk>
 
 installincludes: ${SHARED}
 ${SHARED}: compat
 
 .if ${MACHINE} == "host" && !defined(_SKIP_BUILD)
 # we're here because we are building a sysroot...
 # we need MACHINE et al set correctly
 HOST_MACHINE!= uname -m
 HOST_MACHINE_ARCH!= uname -p
 MACHINE:= ${HOST_MACHINE}
 MACHINE_ARCH:= ${HOST_MACHINE_ARCH}
 .endif
diff --git a/sbin/ifconfig/Makefile b/sbin/ifconfig/Makefile
index 61cb8ab933fd..b178dc0c7e6a 100644
--- a/sbin/ifconfig/Makefile
+++ b/sbin/ifconfig/Makefile
@@ -1,82 +1,81 @@
 #	From: @(#)Makefile	8.1 (Berkeley) 6/5/93
 # $FreeBSD$
 
 .include <src.opts.mk>
 
 PACKAGE=runtime
 PROG=	ifconfig
 
 SRCS=	ifconfig.c		# base support
 
 #
 # NB: The order here defines the order in which the constructors
 #     are called.  This in turn defines the default order in which
 #     status is displayed.  Probably should add a priority mechanism
 #     to the registration process so we don't depend on this aspect
 #     of the toolchain.
 #
 SRCS+=	af_link.c		# LLC support
 .if ${MK_INET_SUPPORT} != "no"
 SRCS+=	af_inet.c		# IPv4 support
 .endif
 .if ${MK_INET6_SUPPORT} != "no"
 SRCS+=	af_inet6.c		# IPv6 support
 .endif
 .if ${MK_INET6_SUPPORT} != "no"
 SRCS+=	af_nd6.c		# ND6 support
 .endif
 
 SRCS+=	ifclone.c		# clone device support
 SRCS+=	ifmac.c			# MAC support
 SRCS+=	ifmedia.c		# SIOC[GS]IFMEDIA support
 SRCS+=	iffib.c			# non-default FIB support
 SRCS+=	ifvlan.c		# SIOC[GS]ETVLAN support
 SRCS+=	ifvxlan.c		# VXLAN support
 SRCS+=	ifgre.c			# GRE keys etc
 SRCS+=	ifgif.c			# GIF reversed header workaround
 SRCS+=	ifipsec.c		# IPsec VTI
-SRCS+=	ifwg.c		# Wireguard
 
 SRCS+=	sfp.c			# SFP/SFP+ information
 LIBADD+=	ifconfig m util
 CFLAGS+=	-I${SRCTOP}/lib/libifconfig -I${OBJTOP}/lib/libifconfig
 
 .if ${MK_WIRELESS_SUPPORT} != "no"
 SRCS+=	ifieee80211.c		# SIOC[GS]IEEE80211 support
 LIBADD+=	80211
 .endif
 
 SRCS+=	carp.c			# SIOC[GS]VH support
 SRCS+=	ifgroup.c		# ...
 .if ${MK_PF} != "no"
 SRCS+=	ifpfsync.c		# pfsync(4) support
 .endif
 
 SRCS+=	ifbridge.c		# bridge support
 SRCS+=	iflagg.c		# lagg support
 
 .if ${MK_EXPERIMENTAL} != "no"
 CFLAGS+= -DDRAFT_IETF_6MAN_IPV6ONLY_FLAG
 CFLAGS+= -DEXPERIMENTAL
 .endif
 .if ${MK_INET6_SUPPORT} != "no"
 CFLAGS+= -DINET6
 .endif
 .if ${MK_INET_SUPPORT} != "no"
 CFLAGS+= -DINET
 .endif
 .if ${MK_JAIL} != "no" && !defined(RESCUE)
 CFLAGS+= -DJAIL
 LIBADD+=	jail
 .endif
 LIBADD+=	nv
 
 MAN=	ifconfig.8
 
 CFLAGS+= -Wall -Wmissing-prototypes -Wcast-qual -Wwrite-strings -Wnested-externs
 WARNS?=	2
 
 HAS_TESTS=
 SUBDIR.${MK_TESTS}+= tests
 
 .include <bsd.prog.mk>
diff --git a/sbin/ifconfig/ifwg.c b/sbin/ifconfig/ifwg.c
deleted file mode 100644
index a102f392cf80..000000000000
--- a/sbin/ifconfig/ifwg.c
+++ /dev/null
@@ -1,731 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause
- *
- * Copyright (c) 2020 Rubicon Communications, LLC (Netgate)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *   1. Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *   2. Redistributions in binary form must reproduce the above copyright
- *      notice, this list of conditions and the following disclaimer in the
- *      documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#ifndef RESCUE
-#include <sys/param.h>
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/sysctl.h>
-#include <sys/time.h>
-#include <sys/nv.h>
-
-#include <net/ethernet.h>
-#include <net/if.h>
-#include <net/if_dl.h>
-#include <net/if_types.h>
-#include <net/if_media.h>
-#include <net/route.h>
-
-#include <netinet/in.h>
-#include <arpa/inet.h>
-
-#include <dev/if_wg/if_wg.h>
-
-#include <assert.h>
-#include <ctype.h>
-#include <err.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <netdb.h>
-#include <string.h>
-#include <unistd.h>
-#include <stdarg.h>
-#include <stddef.h>		/* NB: for offsetof */
-#include <locale.h>
-#include <langinfo.h>
-#include <resolv.h>
-
-#include "ifconfig.h"
-
-static void wgfinish(int s, void *arg);
-
-static bool wgfinish_registered;
-
-static int allowed_ips_count;
-static int allowed_ips_max;
-static nvlist_t **allowed_ips, *nvl_peer;
-
-#define	ALLOWEDIPS_START 16
-#define	WG_KEY_SIZE_BASE64 ((((WG_KEY_SIZE) + 2) / 3) * 4 + 1)
-#define	WG_KEY_SIZE_HEX (WG_KEY_SIZE * 2 + 1)
-#define	WG_MAX_STRLEN 64
-
-struct allowedip {
-	union {
-		struct in_addr ip4;
-		struct in6_addr ip6;
-	};
-};
-
-static void
-register_wgfinish(void)
-{
-
-	if (wgfinish_registered)
-		return;
-	callback_register(wgfinish, NULL);
-	wgfinish_registered = true;
-}
-
-static nvlist_t *
-nvl_device(void)
-{
-	static nvlist_t *_nvl_device;
-
-	if (_nvl_device == NULL)
-		_nvl_device = nvlist_create(0);
-	register_wgfinish();
-	return (_nvl_device);
-}
-
-static bool
-key_from_base64(uint8_t key[static WG_KEY_SIZE], const char *base64)
-{
-
-	if (strlen(base64) != WG_KEY_SIZE_BASE64 - 1) {
-		warnx("bad key len - need %d got %zu\n", WG_KEY_SIZE_BASE64 - 1, strlen(base64));
-		return false;
-	}
-	if (base64[WG_KEY_SIZE_BASE64 - 2] != '=') {
-		warnx("bad key terminator, expected '=' got '%c'", base64[WG_KEY_SIZE_BASE64 - 2]);
-		return false;
-	}
-	return (b64_pton(base64, key, WG_KEY_SIZE));
-}
-
-static void
-parse_endpoint(const char *endpoint_)
-{
-	int err;
-	char *base, *endpoint, *port, *colon, *tmp;
-	struct addrinfo hints, *res;
-
-	endpoint = base = strdup(endpoint_);
-	colon = rindex(endpoint, ':');
-	if (colon == NULL)
-		errx(1, "bad endpoint format %s - no port delimiter found", endpoint);
-	*colon = '\0';
-	port = colon + 1;
-
-	/* [::]:<> */
-	if (endpoint[0] == '[') {
-		endpoint++;
-		tmp = index(endpoint, ']');
-		if (tmp == NULL)
-			errx(1, "bad endpoint format %s - '[' found with no matching ']'", endpoint);
-		*tmp = '\0';
-	}
-	bzero(&hints, sizeof(hints));
-	hints.ai_family = AF_UNSPEC;
-	err = getaddrinfo(endpoint, port, &hints, &res);
-	if (err)
-		errx(1, "%s", gai_strerror(err));
-	nvlist_add_binary(nvl_peer, "endpoint", res->ai_addr, res->ai_addrlen);
-	freeaddrinfo(res);
-	free(base);
-}
-
-static void
-in_len2mask(struct in_addr *mask, u_int len)
-{
-	u_int i;
-	u_char *p;
-
-	p = (u_char *)mask;
-	memset(mask, 0, sizeof(*mask));
-	for (i = 0; i < len / NBBY; i++)
-		p[i] = 0xff;
-	if (len % NBBY)
-		p[i] = (0xff00 >> (len % NBBY)) & 0xff;
-}
-
-static u_int
-in_mask2len(struct in_addr *mask)
-{
-	u_int x, y;
-	u_char *p;
-
-	p = (u_char *)mask;
-	for (x = 0; x < sizeof(*mask); x++) {
-		if (p[x] != 0xff)
-			break;
-	}
-	y = 0;
-	if (x < sizeof(*mask)) {
-		for (y = 0; y < NBBY; y++) {
-			if ((p[x] & (0x80 >> y)) == 0)
-				break;
-		}
-	}
-	return x * NBBY + y;
-}
-
-static void
-in6_prefixlen2mask(struct in6_addr *maskp, int len)
-{
-	static const u_char maskarray[NBBY] = {0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
-	int bytelen, bitlen, i;
-
-	/* sanity check */
-	if (len < 0 || len > 128) {
-		errx(1, "in6_prefixlen2mask: invalid prefix length(%d)\n",
-		    len);
-		return;
-	}
-
-	memset(maskp, 0, sizeof(*maskp));
-	bytelen = len / NBBY;
-	bitlen = len % NBBY;
-	for (i = 0; i < bytelen; i++)
-		maskp->s6_addr[i] = 0xff;
-	if (bitlen)
-		maskp->s6_addr[bytelen] = maskarray[bitlen - 1];
-}
-
-static int
-in6_mask2len(struct in6_addr *mask, u_char *lim0)
-{
-	int x = 0, y;
-	u_char *lim = lim0, *p;
-
-	/* ignore the scope_id part */
-	if (lim0 == NULL || lim0 - (u_char *)mask > sizeof(*mask))
-		lim = (u_char *)mask + sizeof(*mask);
-	for (p = (u_char *)mask; p < lim; x++, p++) {
-		if (*p != 0xff)
-			break;
-	}
-	y = 0;
-	if (p < lim) {
-		for (y = 0; y < NBBY; y++) {
-			if ((*p & (0x80 >> y)) == 0)
-				break;
-		}
-	}
-
-	/*
-	 * when the limit pointer is given, do a stricter check on the
-	 * remaining bits.
-	 */
-	if (p < lim) {
-		if (y != 0 && (*p & (0x00ff >> y)) != 0)
-			return -1;
-		for (p = p + 1; p < lim; p++)
-			if (*p != 0)
-				return -1;
-	}
-
-	return x * NBBY + y;
-}
-
-static bool
-parse_ip(struct allowedip *aip, uint16_t *family, const char *value)
-{
-	struct addrinfo hints, *res;
-	int err;
-	bool ret;
-
-	ret = true;
-	bzero(aip, sizeof(*aip));
-	bzero(&hints, sizeof(hints));
-	hints.ai_family = AF_UNSPEC;
-	hints.ai_flags = AI_NUMERICHOST;
-	err = getaddrinfo(value, NULL, &hints, &res);
-	if (err)
-		errx(1, "%s", gai_strerror(err));
-
-	*family = res->ai_family;
-	if (res->ai_family == AF_INET) {
-		struct sockaddr_in *sin = (struct sockaddr_in *)res->ai_addr;
-
-		aip->ip4 = sin->sin_addr;
-	} else if (res->ai_family == AF_INET6) {
-		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)res->ai_addr;
-
-		aip->ip6 = sin6->sin6_addr;
-	} else {
-		ret = false;
-	}
-
-	freeaddrinfo(res);
-	return (ret);
-}
-
-static void
-sa_ntop(const struct sockaddr *sa, char *buf, int *port)
-{
-	const struct sockaddr_in *sin;
-	const struct sockaddr_in6 *sin6;
-	int err;
-
-	err = getnameinfo(sa, sa->sa_len, buf, INET6_ADDRSTRLEN, NULL,
-	    0, NI_NUMERICHOST);
-
-	if (sa->sa_family == AF_INET) {
-		sin = (const struct sockaddr_in *)sa;
-		if (port)
-			*port = sin->sin_port;
-	} else if (sa->sa_family == AF_INET6) {
-		sin6 = (const struct sockaddr_in6 *)sa;
-		if (port)
-			*port = sin6->sin6_port;
-	}
-
-	if (err)
-		errx(1, "%s", gai_strerror(err));
-}
-
-static void
-dump_peer(const nvlist_t *nvl_peer_cfg)
-{
-	const void *key;
-	const struct sockaddr *endpoint;
-	char outbuf[WG_MAX_STRLEN];
-	char addr_buf[INET6_ADDRSTRLEN];
-	size_t aip_count, size;
-	int port;
-	uint16_t persistent_keepalive;
-	const nvlist_t * const *nvl_aips;
-
-	printf("[Peer]\n");
-	if (nvlist_exists_binary(nvl_peer_cfg, "public-key")) {
-		key = nvlist_get_binary(nvl_peer_cfg, "public-key", &size);
-		b64_ntop((const uint8_t *)key, size, outbuf, WG_MAX_STRLEN);
-		printf("PublicKey = %s\n", outbuf);
-	}
-	if (nvlist_exists_binary(nvl_peer_cfg, "preshared-key")) {
-		key = nvlist_get_binary(nvl_peer_cfg, "preshared-key", &size);
-		b64_ntop((const uint8_t *)key, size, outbuf, WG_MAX_STRLEN);
-		printf("PresharedKey = %s\n", outbuf);
-	}
-	if (nvlist_exists_binary(nvl_peer_cfg, "endpoint")) {
-		endpoint = nvlist_get_binary(nvl_peer_cfg, "endpoint", &size);
-		sa_ntop(endpoint, addr_buf, &port);
-		printf("Endpoint = %s:%d\n", addr_buf, ntohs(port));
-	}
-	if (nvlist_exists_number(nvl_peer_cfg,
-	    "persistent-keepalive-interval")) {
-		persistent_keepalive = nvlist_get_number(nvl_peer_cfg,
-		    "persistent-keepalive-interval");
-		printf("PersistentKeepalive = %d\n", persistent_keepalive);
-	}
-	if (!nvlist_exists_nvlist_array(nvl_peer_cfg, "allowed-ips"))
-		return;
-
-	nvl_aips = nvlist_get_nvlist_array(nvl_peer_cfg, "allowed-ips", &aip_count);
-	if (nvl_aips == NULL || aip_count == 0)
-		return;
-
-	printf("AllowedIPs = ");
-	for (size_t i = 0; i < aip_count; i++) {
-		uint8_t cidr;
-		struct sockaddr_storage ss;
-		sa_family_t family;
-
-		if (!nvlist_exists_number(nvl_aips[i], "cidr"))
-			continue;
-		cidr = nvlist_get_number(nvl_aips[i], "cidr");
-		if (nvlist_exists_binary(nvl_aips[i], "ipv4")) {
-			struct sockaddr_in *sin = (struct sockaddr_in *)&ss;
-			const struct in_addr *ip4;
-
-			ip4 = nvlist_get_binary(nvl_aips[i], "ipv4", &size);
-			if (ip4 == NULL || cidr > 32)
-				continue;
-			sin->sin_len = sizeof(*sin);
-			sin->sin_family = AF_INET;
-			sin->sin_addr = *ip4;
-		} else if (nvlist_exists_binary(nvl_aips[i], "ipv6")) {
-			struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&ss;
-			const struct in6_addr *ip6;
-
-			ip6 = nvlist_get_binary(nvl_aips[i], "ipv6", &size);
-			if (ip6 == NULL || cidr > 128)
-				continue;
-			sin6->sin6_len = sizeof(*sin6);
-			sin6->sin6_family = AF_INET6;
-			sin6->sin6_addr = *ip6;
-		} else {
-			continue;
-		}
-
-		family = ss.ss_family;
-		getnameinfo((struct sockaddr *)&ss, ss.ss_len, addr_buf,
-		    INET6_ADDRSTRLEN, NULL, 0, NI_NUMERICHOST);
-		printf("%s/%d", addr_buf, cidr);
-		if (i < aip_count - 1)
-			printf(", ");
-	}
-	printf("\n");
-}
-
-static int
-get_nvl_out_size(int sock, u_long op, size_t *size)
-{
-	struct wg_data_io wgd;
-	int err;
-
-	memset(&wgd, 0, sizeof(wgd));
-
-	strlcpy(wgd.wgd_name, name, sizeof(wgd.wgd_name));
-	wgd.wgd_size = 0;
-	wgd.wgd_data = NULL;
-
-	err = ioctl(sock, op, &wgd);
-	if (err)
-		return (err);
-	*size = wgd.wgd_size;
-	return (0);
-}
-
-static int
-do_cmd(int sock, u_long op, void *arg, size_t argsize, int set)
-{
-	struct wg_data_io wgd;
-
-	memset(&wgd, 0, sizeof(wgd));
-
-	strlcpy(wgd.wgd_name, name, sizeof(wgd.wgd_name));
-	wgd.wgd_size = argsize;
-	wgd.wgd_data = arg;
-
-	return (ioctl(sock, op, &wgd));
-}
-
-static
-DECL_CMD_FUNC(peerlist, val, d)
-{
-	size_t size, peercount;
-	void *packed;
-	const nvlist_t *nvl;
-	const nvlist_t *const *nvl_peerlist;
-
-	if (get_nvl_out_size(s, SIOCGWG, &size))
-		errx(1, "can't get peer list size");
-	if ((packed = malloc(size)) == NULL)
-		errx(1, "malloc failed for peer list");
-	if (do_cmd(s, SIOCGWG, packed, size, 0))
-		errx(1, "failed to obtain peer list");
-
-	nvl = nvlist_unpack(packed, size, 0);
-	if (!nvlist_exists_nvlist_array(nvl, "peers"))
-		return;
-	nvl_peerlist = nvlist_get_nvlist_array(nvl, "peers", &peercount);
-
-	for (int i = 0; i < peercount; i++, nvl_peerlist++) {
-		dump_peer(*nvl_peerlist);
-	}
-}
-
-static void
-wgfinish(int s, void *arg)
-{
-	void *packed;
-	size_t size;
-	static nvlist_t *nvl_dev;
-
-	nvl_dev = nvl_device();
-	if (nvl_peer != NULL) {
-		if (!nvlist_exists_binary(nvl_peer, "public-key"))
-			errx(1, "must specify a public-key for adding peer");
-		if (allowed_ips_count != 0) {
-			nvlist_add_nvlist_array(nvl_peer, "allowed-ips",
-			    (const nvlist_t * const *)allowed_ips,
-			    allowed_ips_count);
-			for (size_t i = 0; i < allowed_ips_count; i++) {
-				nvlist_destroy(allowed_ips[i]);
-			}
-
-			free(allowed_ips);
-		}
-
-		nvlist_add_nvlist_array(nvl_dev, "peers",
-		    (const nvlist_t * const *)&nvl_peer, 1);
-	}
-
-	packed = nvlist_pack(nvl_dev, &size);
-
-	if (do_cmd(s, SIOCSWG, packed, size, true))
-		errx(1, "failed to configure");
-}
-
-static
-DECL_CMD_FUNC(peerstart, val, d)
-{
-
-	if (nvl_peer != NULL)
-		errx(1, "cannot both add and remove a peer");
-	register_wgfinish();
-	nvl_peer = nvlist_create(0);
-	allowed_ips = calloc(ALLOWEDIPS_START, sizeof(*allowed_ips));
-	allowed_ips_max = ALLOWEDIPS_START;
-	if (allowed_ips == NULL)
-		errx(1, "failed to allocate array for allowedips");
-}
-
-static
-DECL_CMD_FUNC(peerdel, val, d)
-{
-
-	if (nvl_peer != NULL)
-		errx(1, "cannot both add and remove a peer");
-	register_wgfinish();
-	nvl_peer = nvlist_create(0);
-	nvlist_add_bool(nvl_peer, "remove", true);
-}
-
-static
-DECL_CMD_FUNC(setwglistenport, val, d)
-{
-	struct addrinfo hints, *res;
-	const struct sockaddr_in *sin;
-	const struct sockaddr_in6 *sin6;
-
-	u_long ul;
-	int err;
-
-	bzero(&hints, sizeof(hints));
-	hints.ai_family = AF_UNSPEC;
-	hints.ai_flags = AI_NUMERICHOST;
-	err = getaddrinfo(NULL, val, &hints, &res);
-	if (err)
-		errx(1, "%s", gai_strerror(err));
-
-	if (res->ai_family == AF_INET) {
-		sin = (struct sockaddr_in *)res->ai_addr;
-		ul = sin->sin_port;
-	} else if (res->ai_family == AF_INET6) {
-		sin6 = (struct sockaddr_in6 *)res->ai_addr;
-		ul = sin6->sin6_port;
-	} else {
-		errx(1, "unknown family");
-	}
-	ul = ntohs((u_short)ul);
-	nvlist_add_number(nvl_device(), "listen-port", ul);
-}
-
-static
-DECL_CMD_FUNC(setwgprivkey, val, d)
-{
-	uint8_t key[WG_KEY_SIZE];
-
-	if (!key_from_base64(key, val))
-		errx(1, "invalid key %s", val);
-	nvlist_add_binary(nvl_device(), "private-key", key, WG_KEY_SIZE);
-}
-
-static
-DECL_CMD_FUNC(setwgpubkey, val, d)
-{
-	uint8_t key[WG_KEY_SIZE];
-
-	if (nvl_peer == NULL)
-		errx(1, "setting public key only valid when adding peer");
-
-	if (!key_from_base64(key, val))
-		errx(1, "invalid key %s", val);
-	nvlist_add_binary(nvl_peer, "public-key", key, WG_KEY_SIZE);
-}
-
-static
-DECL_CMD_FUNC(setwgpresharedkey, val, d)
-{
-	uint8_t key[WG_KEY_SIZE];
-
-	if (nvl_peer == NULL)
-		errx(1, "setting preshared-key only valid when adding peer");
-
-	if (!key_from_base64(key, val))
-		errx(1, "invalid key %s", val);
-	nvlist_add_binary(nvl_peer, "preshared-key", key, WG_KEY_SIZE);
-}
-
-
-static
-DECL_CMD_FUNC(setwgpersistentkeepalive, val, d)
-{
-	unsigned long persistent_keepalive;
-	char *endp;
-
-	if (nvl_peer == NULL)
-		errx(1, "setting persistent keepalive only valid when adding peer");
-
-	errno = 0;
-	persistent_keepalive = strtoul(val, &endp, 0);
-	if (errno != 0 || *endp != '\0')
-		errx(1, "persistent-keepalive must be numeric (seconds)");
-	if (persistent_keepalive > USHRT_MAX)
-		errx(1, "persistent-keepalive '%lu' too large",
-		    persistent_keepalive);
-	nvlist_add_number(nvl_peer, "persistent-keepalive-interval",
-	    persistent_keepalive);
-}
-
-static
-DECL_CMD_FUNC(setallowedips, val, d)
-{
-	char *base, *allowedip, *mask;
-	u_long ul;
-	char *endp;
-	struct allowedip aip;
-	nvlist_t *nvl_aip;
-	uint16_t family;
-
-	if (nvl_peer == NULL)
-		errx(1, "setting allowed ip only valid when adding peer");
-	if (allowed_ips_count == allowed_ips_max) {
-		allowed_ips_max *= 2;
-		allowed_ips = reallocarray(allowed_ips, allowed_ips_max,
-		    sizeof(*allowed_ips));
-		if (allowed_ips == NULL)
-			errx(1, "failed to grow allowed ip array");
-	}
-
-	allowed_ips[allowed_ips_count] = nvl_aip = nvlist_create(0);
-	if (nvl_aip == NULL)
-		errx(1, "failed to create new allowedip nvlist");
-
-	base = allowedip = strdup(val);
-	mask = index(allowedip, '/');
-	if (mask == NULL)
-		errx(1, "mask separator not found in allowedip %s", val);
-	*mask = '\0';
-	mask++;
-
-	parse_ip(&aip, &family, allowedip);
-	ul = strtoul(mask, &endp, 0);
-	if (*endp != '\0')
-		errx(1, "invalid value for allowedip mask");
-
-	nvlist_add_number(nvl_aip, "cidr", ul);
-	if (family == AF_INET) {
-		nvlist_add_binary(nvl_aip, "ipv4", &aip.ip4, sizeof(aip.ip4));
-	} else if (family == AF_INET6) {
-		nvlist_add_binary(nvl_aip, "ipv6", &aip.ip6, sizeof(aip.ip6));
-	} else {
-		/* Shouldn't happen */
-		nvlist_destroy(nvl_aip);
-		goto out;
-	}
-
-	allowed_ips_count++;
-
-out:
-	free(base);
-}
-
-static
-DECL_CMD_FUNC(setendpoint, val, d)
-{
-	if (nvl_peer == NULL)
-		errx(1, "setting endpoint only valid when adding peer");
-	parse_endpoint(val);
-}
-
-static void
-wireguard_status(int s)
-{
-	size_t size;
-	void *packed;
-	nvlist_t *nvl;
-	char buf[WG_KEY_SIZE_BASE64];
-	const void *key;
-	uint16_t listen_port;
-
-	if (get_nvl_out_size(s, SIOCGWG, &size))
-		return;
-	if ((packed = malloc(size)) == NULL)
-		return;
-	if (do_cmd(s, SIOCGWG, packed, size, 0))
-		return;
-	nvl = nvlist_unpack(packed, size, 0);
-	if (nvlist_exists_number(nvl, "listen-port")) {
-		listen_port = nvlist_get_number(nvl, "listen-port");
-		printf("\tlisten-port: %d\n", listen_port);
-	}
-	if (nvlist_exists_binary(nvl, "private-key")) {
-		key = nvlist_get_binary(nvl, "private-key", &size);
-		b64_ntop((const uint8_t *)key, size, buf, WG_MAX_STRLEN);
-		printf("\tprivate-key: %s\n", buf);
-	}
-	if (nvlist_exists_binary(nvl, "public-key")) {
-		key = nvlist_get_binary(nvl, "public-key", &size);
-		b64_ntop((const uint8_t *)key, size, buf, WG_MAX_STRLEN);
-		printf("\tpublic-key:  %s\n", buf);
-	}
-}
-
-static struct cmd wireguard_cmds[] = {
-    DEF_CMD_ARG("listen-port",  setwglistenport),
-    DEF_CMD_ARG("private-key",  setwgprivkey),
-    /* XXX peer-list is deprecated. */
-    DEF_CMD("peer-list",  0, peerlist),
-    DEF_CMD("peers",  0, peerlist),
-    DEF_CMD("peer",  0, peerstart),
-    DEF_CMD("-peer",  0, peerdel),
-    DEF_CMD_ARG("preshared-key",  setwgpresharedkey),
-    DEF_CMD_ARG("public-key",  setwgpubkey),
-    DEF_CMD_ARG("persistent-keepalive",  setwgpersistentkeepalive),
-    DEF_CMD_ARG("allowed-ips",  setallowedips),
-    DEF_CMD_ARG("endpoint",  setendpoint),
-};
-
-static struct afswtch af_wireguard = {
-	.af_name	= "af_wireguard",
-	.af_af		= AF_UNSPEC,
-	.af_other_status = wireguard_status,
-};
-
-static void
-wg_create(int s, struct ifreq *ifr)
-{
-
-	setproctitle("ifconfig %s create ...\n", name);
-
-	ifr->ifr_data = NULL;
-	if (ioctl(s, SIOCIFCREATE, ifr) < 0)
-		err(1, "SIOCIFCREATE");
-}
-
-static __constructor void
-wireguard_ctor(void)
-{
-	int i;
-
-	for (i = 0; i < nitems(wireguard_cmds);  i++)
-		cmd_register(&wireguard_cmds[i]);
-	af_register(&af_wireguard);
-	clone_setdefcallback_prefix("wg", wg_create);
-}
-
-#endif
diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile
index 54fd89fe7590..c29bb80c7a58 100644
--- a/share/man/man4/Makefile
+++ b/share/man/man4/Makefile
@@ -1,1044 +1,1042 @@
 #	@(#)Makefile	8.1 (Berkeley) 6/18/93
 # $FreeBSD$
 
 .include <src.opts.mk>
 
 MAN=	aac.4 \
 	aacraid.4 \
 	acpi.4 \
 	${_acpi_asus.4} \
 	${_acpi_asus_wmi.4} \
 	${_acpi_dock.4} \
 	${_acpi_fujitsu.4} \
 	${_acpi_hp.4} \
 	${_acpi_ibm.4} \
 	${_acpi_panasonic.4} \
 	${_acpi_rapidstart.4} \
 	${_acpi_sony.4} \
 	acpi_thermal.4 \
 	acpi_battery.4 \
 	${_acpi_toshiba.4} \
 	acpi_video.4 \
 	${_acpi_wmi.4} \
 	ada.4 \
 	adm6996fc.4 \
 	ads111x.4 \
 	ae.4 \
 	${_aesni.4} \
 	age.4 \
 	agp.4 \
 	ahc.4 \
 	ahci.4 \
 	ahd.4 \
 	${_aibs.4} \
 	aio.4 \
 	alc.4 \
 	ale.4 \
 	alpm.4 \
 	altera_atse.4 \
 	altera_avgen.4 \
 	altera_jtag_uart.4 \
 	altera_sdcard.4 \
 	altq.4 \
 	amdpm.4 \
 	${_amdsbwd.4} \
 	${_amdsmb.4} \
 	${_amdsmn.4} \
 	${_amdtemp.4} \
 	${_bxe.4} \
 	amr.4 \
 	an.4 \
 	${_aout.4} \
 	${_apic.4} \
 	arcmsr.4 \
 	${_asmc.4} \
 	at45d.4 \
 	ata.4 \
 	ath.4 \
 	ath_ahb.4 \
 	ath_hal.4 \
 	ath_pci.4 \
 	atkbd.4 \
 	atkbdc.4 \
 	atp.4 \
 	${_atf_test_case.4} \
 	${_atrtc.4} \
 	${_attimer.4} \
 	audit.4 \
 	auditpipe.4 \
 	aue.4 \
 	axe.4 \
 	axge.4 \
 	axp.4 \
 	bce.4 \
 	bcma.4 \
 	bfe.4 \
 	bge.4 \
 	${_bhyve.4} \
 	bhnd.4 \
 	bhnd_chipc.4 \
 	bhnd_pmu.4 \
 	bhndb.4 \
 	bhndb_pci.4 \
 	blackhole.4 \
 	bnxt.4 \
 	bpf.4 \
 	bridge.4 \
 	bt.4 \
 	bwi.4 \
 	bwn.4 \
 	${_bytgpio.4} \
 	capsicum.4 \
 	cardbus.4 \
 	carp.4 \
 	cas.4 \
 	cc_cdg.4 \
 	cc_chd.4 \
 	cc_cubic.4 \
 	cc_dctcp.4 \
 	cc_hd.4 \
 	cc_htcp.4 \
 	cc_newreno.4 \
 	cc_vegas.4 \
 	${_ccd.4} \
 	ccr.4 \
 	cd.4 \
 	cdce.4 \
 	cdceem.4 \
 	cfi.4 \
 	cfumass.4 \
 	${_cgem.4} \
 	ch.4 \
 	chromebook_platform.4 \
 	${_chvgpio.4} \
 	ciss.4 \
 	cloudabi.4 \
 	${_coretemp.4} \
 	cp2112.4 \
 	${_cpuctl.4} \
 	cpufreq.4 \
 	crypto.4 \
 	ctl.4 \
 	cue.4 \
 	cxgb.4 \
 	cxgbe.4 \
 	cxgbev.4 \
 	cyapa.4 \
 	da.4 \
 	dc.4 \
 	dcons.4 \
 	dcons_crom.4 \
 	ddb.4 \
 	devctl.4 \
 	disc.4 \
 	disk.4 \
 	divert.4 \
 	${_dpms.4} \
 	ds1307.4 \
 	ds3231.4 \
 	${_dtrace_provs} \
 	dummynet.4 \
 	edsc.4 \
 	ehci.4 \
 	em.4 \
 	ena.4 \
 	enc.4 \
 	epair.4 \
 	esp.4 \
 	est.4 \
 	et.4 \
 	etherswitch.4 \
 	eventtimers.4 \
 	exca.4 \
 	e6060sw.4 \
 	fd.4 \
 	fdc.4 \
 	fdt.4 \
 	fdt_pinctrl.4 \
 	fdtbus.4 \
 	ffclock.4 \
 	filemon.4 \
 	firewire.4 \
 	${_ftwd.4} \
 	full.4 \
 	fwe.4 \
 	fwip.4 \
 	fwohci.4 \
 	fxp.4 \
 	gbde.4 \
 	gdb.4 \
 	gem.4 \
 	geom.4 \
 	geom_linux_lvm.4 \
 	geom_map.4 \
 	geom_uzip.4 \
 	gif.4 \
 	gpio.4 \
 	gpioiic.4 \
 	gpiokeys.4 \
 	gpioled.4 \
 	gpioths.4 \
 	gre.4 \
 	h_ertt.4 \
 	hconf.4 \
 	hcons.4 \
 	hgame.4 \
 	hidbus.4 \
 	hidquirk.4 \
 	hidraw.4 \
 	hifn.4 \
 	hkbd.4 \
 	hms.4 \
 	hmt.4 \
 	hpen.4 \
 	hpet.4 \
 	${_hpt27xx.4} \
 	${_hptiop.4} \
 	${_hptmv.4} \
 	${_hptnr.4} \
 	${_hptrr.4} \
 	hsctrl.4 \
 	htu21.4 \
 	${_hv_kvp.4} \
 	${_hv_netvsc.4} \
 	${_hv_storvsc.4} \
 	${_hv_utils.4} \
 	${_hv_vmbus.4} \
 	${_hv_vss.4} \
 	hwpmc.4 \
 	${_hwpstate_intel.4} \
 	iavf.4 \
 	ichsmb.4 \
 	${_ichwd.4} \
 	icmp.4 \
 	icmp6.4 \
 	ida.4 \
 	if_ipsec.4 \
 	iflib.4 \
 	ifmib.4 \
 	ig4.4 \
 	igmp.4 \
 	iic.4 \
 	iic_gpiomux.4 \
 	iicbb.4 \
 	iicbus.4 \
 	iichid.4 \
 	iicmux.4 \
 	iicsmb.4 \
 	iir.4 \
 	${_imcsmb.4} \
 	inet.4 \
 	inet6.4 \
 	intpm.4 \
 	intro.4 \
 	${_io.4} \
 	${_ioat.4} \
 	ip.4 \
 	ip6.4 \
 	ipfirewall.4 \
 	ipheth.4 \
 	${_ipmi.4} \
 	ips.4 \
 	ipsec.4 \
 	ipw.4 \
 	ipwfw.4 \
 	isci.4 \
 	isl.4 \
 	ismt.4 \
 	isp.4 \
 	ispfw.4 \
 	${_itwd.4} \
 	iwi.4 \
 	iwifw.4 \
 	iwm.4 \
 	iwmfw.4 \
 	iwn.4 \
 	iwnfw.4 \
 	ixgbe.4 \
 	ixl.4 \
 	jedec_dimm.4 \
 	jme.4 \
 	kbdmux.4 \
 	kcov.4 \
 	keyboard.4 \
 	kld.4 \
 	ksyms.4 \
 	ksz8995ma.4 \
 	ktls.4 \
 	ktr.4 \
 	kue.4 \
 	lagg.4 \
 	le.4 \
 	led.4 \
 	lge.4 \
 	${_linux.4} \
 	liquidio.4 \
 	lm75.4 \
 	lo.4 \
 	lp.4 \
 	lpbb.4 \
 	lpt.4 \
 	ltc430x.4 \
 	mac.4 \
 	mac_biba.4 \
 	mac_bsdextended.4 \
 	mac_ifoff.4 \
 	mac_lomac.4 \
 	mac_mls.4 \
 	mac_none.4 \
 	mac_ntpd.4 \
 	mac_partition.4 \
 	mac_portacl.4 \
 	mac_seeotheruids.4 \
 	mac_stub.4 \
 	mac_test.4 \
 	malo.4 \
 	md.4 \
 	mdio.4 \
 	me.4 \
 	mem.4 \
 	meteor.4 \
 	mfi.4 \
 	miibus.4 \
 	mld.4 \
 	mlx.4 \
 	mlx4en.4 \
 	mlx5en.4 \
 	mly.4 \
 	mmc.4 \
 	mmcsd.4 \
 	mn.4 \
 	mod_cc.4 \
 	mos.4 \
 	mouse.4 \
 	mpr.4 \
 	mps.4 \
 	mpt.4 \
 	mrsas.4 \
 	msk.4 \
 	mtio.4 \
 	multicast.4 \
 	muge.4 \
 	mvs.4 \
 	mwl.4 \
 	mwlfw.4 \
 	mx25l.4 \
 	mxge.4 \
 	my.4 \
 	net80211.4 \
 	netdump.4 \
 	netfpga10g_nf10bmac.4 \
 	netgdb.4 \
 	netgraph.4 \
 	netintro.4 \
 	netmap.4 \
 	${_nfe.4} \
 	${_nfsmb.4} \
 	ng_async.4 \
 	ngatmbase.4 \
 	ng_atmllc.4 \
 	ng_bpf.4 \
 	ng_bridge.4 \
 	ng_btsocket.4 \
 	ng_car.4 \
 	ng_ccatm.4 \
 	ng_checksum.4 \
 	ng_cisco.4 \
 	ng_deflate.4 \
 	ng_device.4 \
 	nge.4 \
 	ng_echo.4 \
 	ng_eiface.4 \
 	ng_etf.4 \
 	ng_ether.4 \
 	ng_ether_echo.4 \
 	ng_frame_relay.4 \
 	ng_gif.4 \
 	ng_gif_demux.4 \
 	ng_h4.4 \
 	ng_hci.4 \
 	ng_hole.4 \
 	ng_hub.4 \
 	ng_iface.4 \
 	ng_ipfw.4 \
 	ng_ip_input.4 \
 	ng_ksocket.4 \
 	ng_l2cap.4 \
 	ng_l2tp.4 \
 	ng_lmi.4 \
 	ng_macfilter.4 \
 	ng_mppc.4 \
 	ng_nat.4 \
 	ng_netflow.4 \
 	ng_one2many.4 \
 	ng_patch.4 \
 	ng_pipe.4 \
 	ng_ppp.4 \
 	ng_pppoe.4 \
 	ng_pptpgre.4 \
 	ng_pred1.4 \
 	ng_rfc1490.4 \
 	ng_socket.4 \
 	ng_source.4 \
 	ng_split.4 \
 	ng_sppp.4 \
 	ng_sscfu.4 \
 	ng_sscop.4 \
 	ng_tag.4 \
 	ng_tcpmss.4 \
 	ng_tee.4 \
 	ng_tty.4 \
 	ng_ubt.4 \
 	ng_UI.4 \
 	ng_uni.4 \
 	ng_vjc.4 \
 	ng_vlan.4 \
 	ng_vlan_rotate.4 \
 	nmdm.4 \
 	${_ntb.4} \
 	${_ntb_hw_amd.4} \
 	${_ntb_hw_intel.4} \
 	${_ntb_hw_plx.4} \
 	${_ntb_transport.4} \
 	${_nda.4} \
 	${_if_ntb.4} \
 	null.4 \
 	numa.4 \
 	${_nvd.4} \
 	${_nvdimm.4} \
 	${_nvme.4} \
 	${_nvram.4} \
 	${_nvram2env.4} \
 	oce.4 \
 	ocs_fc.4\
 	ohci.4 \
 	openfirm.4 \
 	orm.4 \
 	${_ossl.4} \
 	ow.4 \
 	ow_temp.4 \
 	owc.4 \
 	${_padlock.4} \
 	pass.4 \
 	pccard.4 \
 	pccbb.4 \
 	pcf.4 \
 	${_pchtherm.4} \
 	pci.4 \
 	pcib.4 \
 	pcic.4 \
 	pcm.4 \
 	${_pf.4} \
 	${_pflog.4} \
 	${_pfsync.4} \
 	pim.4 \
 	pms.4 \
 	polling.4 \
 	ppbus.4 \
 	ppc.4 \
 	ppi.4 \
 	procdesc.4 \
 	proto.4 \
 	ps4dshock.4 \
 	psm.4 \
 	pst.4 \
 	pt.4 \
 	ptnet.4 \
 	pts.4 \
 	pty.4 \
 	puc.4 \
 	pwmc.4 \
 	${_qat.4} \
 	${_qlxge.4} \
 	${_qlxgb.4} \
 	${_qlxgbe.4} \
 	${_qlnxe.4} \
 	ral.4 \
 	random.4 \
 	rctl.4 \
 	re.4 \
 	rgephy.4 \
 	rights.4 \
 	rl.4 \
 	rndtest.4 \
 	route.4 \
 	rtsx.4 \
 	rtwn.4 \
 	rtwnfw.4 \
 	rtwn_pci.4 \
 	rue.4 \
 	sa.4 \
 	safe.4 \
 	safexcel.4 \
 	sbp.4 \
 	sbp_targ.4 \
 	scc.4 \
 	sched_4bsd.4 \
 	sched_ule.4 \
 	screen.4 \
 	scsi.4 \
 	sctp.4 \
 	sdhci.4 \
 	sem.4 \
 	send.4 \
 	ses.4 \
 	${_sfxge.4} \
 	sge.4 \
 	siba.4 \
 	siftr.4 \
 	siis.4 \
 	simplebus.4 \
 	sis.4 \
 	sk.4 \
 	${_smartpqi.4} \
 	smb.4 \
 	smbios.4 \
 	smbus.4 \
 	smp.4 \
 	smsc.4 \
 	snd_ad1816.4 \
 	snd_als4000.4 \
 	snd_atiixp.4 \
 	snd_cmi.4 \
 	snd_cs4281.4 \
 	snd_csa.4 \
 	snd_ds1.4 \
 	snd_emu10k1.4 \
 	snd_emu10kx.4 \
 	snd_envy24.4 \
 	snd_envy24ht.4 \
 	snd_es137x.4 \
 	snd_ess.4 \
 	snd_fm801.4 \
 	snd_gusc.4 \
 	snd_hda.4 \
 	snd_hdspe.4 \
 	snd_ich.4 \
 	snd_maestro3.4 \
 	snd_maestro.4 \
 	snd_mss.4 \
 	snd_neomagic.4 \
 	snd_sbc.4 \
 	snd_solo.4 \
 	snd_spicds.4 \
 	snd_t4dwave.4 \
 	snd_uaudio.4 \
 	snd_via8233.4 \
 	snd_via82c686.4 \
 	snd_vibes.4 \
 	sndstat.4 \
 	snp.4 \
 	spigen.4 \
 	${_spkr.4} \
 	splash.4 \
 	sppp.4 \
 	ste.4 \
 	stf.4 \
 	stge.4 \
 	${_sume.4} \
 	${_superio.4} \
 	sym.4 \
 	syncache.4 \
 	syncer.4 \
 	syscons.4 \
 	sysmouse.4 \
 	tap.4 \
 	targ.4 \
 	tcp.4 \
 	tcp_bbr.4 \
 	tdfx.4 \
 	terasic_mtl.4 \
 	termios.4 \
 	textdump.4 \
 	ti.4 \
 	timecounters.4 \
 	${_tpm.4} \
 	tty.4 \
 	tun.4 \
 	twa.4 \
 	twe.4 \
 	tws.4 \
 	udp.4 \
 	udplite.4 \
 	ure.4 \
 	vale.4 \
 	vga.4 \
 	vge.4 \
 	viapm.4 \
 	${_viawd.4} \
 	virtio.4 \
 	virtio_balloon.4 \
 	virtio_blk.4 \
 	virtio_console.4 \
 	virtio_random.4 \
 	virtio_scsi.4 \
 	${_vmci.4} \
 	vkbd.4 \
 	vlan.4 \
 	vxlan.4 \
 	${_vmd.4} \
 	${_vmm.4} \
 	${_vmx.4} \
 	vr.4 \
 	vt.4 \
 	vte.4 \
 	vtnet.4 \
 	watchdog.4 \
 	${_wbwd.4} \
-	wg.4 \
 	witness.4 \
 	wlan.4 \
 	wlan_acl.4 \
 	wlan_amrr.4 \
 	wlan_ccmp.4 \
 	wlan_tkip.4 \
 	wlan_wep.4 \
 	wlan_xauth.4 \
 	wmt.4 \
 	${_wpi.4} \
 	wsp.4 \
 	xb360gp.4 \
 	${_xen.4} \
 	xhci.4 \
 	xl.4 \
 	${_xnb.4} \
 	xpt.4 \
 	zero.4
 
 MLINKS=	ads111x.4 ads1013.4 \
 	ads111x.4 ads1014.4 \
 	ads111x.4 ads1015.4 \
 	ads111x.4 ads1113.4 \
 	ads111x.4 ads1114.4 \
 	ads111x.4 ads1115.4
 MLINKS+=ae.4 if_ae.4
 MLINKS+=age.4 if_age.4
 MLINKS+=agp.4 agpgart.4
 MLINKS+=alc.4 if_alc.4
 MLINKS+=ale.4 if_ale.4
 MLINKS+=altera_atse.4 atse.4
 MLINKS+=altera_sdcard.4 altera_sdcardc.4
 MLINKS+=altq.4 ALTQ.4
 MLINKS+=ath.4 if_ath.4
 MLINKS+=ath_pci.4 if_ath_pci.4
 MLINKS+=an.4 if_an.4
 MLINKS+=aue.4 if_aue.4
 MLINKS+=axe.4 if_axe.4
 MLINKS+=bce.4 if_bce.4
 MLINKS+=bfe.4 if_bfe.4
 MLINKS+=bge.4 if_bge.4
 MLINKS+=bnxt.4 if_bnxt.4
 MLINKS+=bridge.4 if_bridge.4
 MLINKS+=bwi.4 if_bwi.4
 MLINKS+=bwn.4 if_bwn.4
 MLINKS+=${_bxe.4} ${_if_bxe.4}
 MLINKS+=cas.4 if_cas.4
 MLINKS+=cdce.4 if_cdce.4
 MLINKS+=cfi.4 cfid.4
 MLINKS+=cloudabi.4 cloudabi32.4 \
 	cloudabi.4 cloudabi64.4
 MLINKS+=crypto.4 cryptodev.4
 MLINKS+=cue.4 if_cue.4
 MLINKS+=cxgb.4 if_cxgb.4
 MLINKS+=cxgbe.4 if_cxgbe.4 \
 	cxgbe.4 vcxgbe.4 \
 	cxgbe.4 if_vcxgbe.4 \
 	cxgbe.4 cxl.4 \
 	cxgbe.4 if_cxl.4 \
 	cxgbe.4 vcxl.4 \
 	cxgbe.4 if_vcxl.4 \
 	cxgbe.4 cc.4 \
 	cxgbe.4 if_cc.4 \
 	cxgbe.4 vcc.4 \
 	cxgbe.4 if_vcc.4
 MLINKS+=cxgbev.4 if_cxgbev.4 \
 	cxgbev.4 cxlv.4 \
 	cxgbev.4 if_cxlv.4 \
 	cxgbev.4 ccv.4 \
 	cxgbev.4 if_ccv.4
 MLINKS+=dc.4 if_dc.4
 MLINKS+=disc.4 if_disc.4
 MLINKS+=edsc.4 if_edsc.4
 MLINKS+=em.4 if_em.4 \
 	em.4 igb.4 \
 	em.4 if_igb.4
 MLINKS+=enc.4 if_enc.4
 MLINKS+=epair.4 if_epair.4
 MLINKS+=et.4 if_et.4
 MLINKS+=fd.4 stderr.4 \
 	fd.4 stdin.4 \
 	fd.4 stdout.4
 MLINKS+=fdt.4 FDT.4
 MLINKS+=firewire.4 ieee1394.4
 MLINKS+=fwe.4 if_fwe.4
 MLINKS+=fwip.4 if_fwip.4
 MLINKS+=fxp.4 if_fxp.4
 MLINKS+=gem.4 if_gem.4
 MLINKS+=geom.4 GEOM.4
 MLINKS+=gif.4 if_gif.4
 MLINKS+=gpio.4 gpiobus.4
 MLINKS+=gpioths.4 dht11.4
 MLINKS+=gpioths.4 dht22.4
 MLINKS+=gre.4 if_gre.4
 MLINKS+=hpet.4 acpi_hpet.4
 MLINKS+=${_hptrr.4} ${_rr232x.4}
 MLINKS+=${_attimer.4} ${_i8254.4}
 MLINKS+=ip.4 rawip.4
 MLINKS+=ipfirewall.4 ipaccounting.4 \
 	ipfirewall.4 ipacct.4 \
 	ipfirewall.4 ipfw.4
 MLINKS+=ipheth.4 if_ipheth.4
 MLINKS+=ipw.4 if_ipw.4
 MLINKS+=iwi.4 if_iwi.4
 MLINKS+=iwm.4 if_iwm.4
 MLINKS+=iwn.4 if_iwn.4
 MLINKS+=ixgbe.4 ix.4
 MLINKS+=ixgbe.4 if_ix.4
 MLINKS+=ixgbe.4 if_ixgbe.4
 MLINKS+=ixl.4 if_ixl.4
 MLINKS+=iavf.4 if_iavf.4
 MLINKS+=jme.4 if_jme.4
 MLINKS+=kue.4 if_kue.4
 MLINKS+=lagg.4 trunk.4
 MLINKS+=lagg.4 if_lagg.4
 MLINKS+=le.4 if_le.4
 MLINKS+=lge.4 if_lge.4
 MLINKS+=lo.4 loop.4
 MLINKS+=lp.4 plip.4
 MLINKS+=malo.4 if_malo.4
 MLINKS+=md.4 vn.4
 MLINKS+=mem.4 kmem.4
 MLINKS+=mfi.4 mfi_linux.4 \
 	mfi.4 mfip.4
 MLINKS+=mlx5en.4 mce.4
 MLINKS+=mn.4 if_mn.4
 MLINKS+=mos.4 if_mos.4
 MLINKS+=msk.4 if_msk.4
 MLINKS+=mwl.4 if_mwl.4
 MLINKS+=mxge.4 if_mxge.4
 MLINKS+=my.4 if_my.4
 MLINKS+=netfpga10g_nf10bmac.4 if_nf10bmac.4
 MLINKS+=netintro.4 net.4 \
 	netintro.4 networking.4
 MLINKS+=${_nfe.4} ${_if_nfe.4}
 MLINKS+=nge.4 if_nge.4
 MLINKS+=openfirm.4 openfirmware.4
 MLINKS+=ow.4 onewire.4
 MLINKS+=pccbb.4 cbb.4
 MLINKS+=pcm.4 snd.4 \
 	pcm.4 sound.4
 MLINKS+=pms.4 pmspcv.4
 MLINKS+=ptnet.4 if_ptnet.4
 MLINKS+=ral.4 if_ral.4
 MLINKS+=re.4 if_re.4
 MLINKS+=rl.4 if_rl.4
 MLINKS+=rtwn_pci.4 if_rtwn_pci.4
 MLINKS+=rue.4 if_rue.4
 MLINKS+=scsi.4 CAM.4 \
 	scsi.4 cam.4 \
 	scsi.4 scbus.4 \
 	scsi.4 SCSI.4
 MLINKS+=sge.4 if_sge.4
 MLINKS+=sis.4 if_sis.4
 MLINKS+=sk.4 if_sk.4
 MLINKS+=smp.4 SMP.4
 MLINKS+=smsc.4 if_smsc.4
 MLINKS+=snd_envy24.4 snd_ak452x.4
 MLINKS+=snd_sbc.4 snd_sb16.4 \
 	snd_sbc.4 snd_sb8.4
 MLINKS+=${_spkr.4} ${_speaker.4}
 MLINKS+=splash.4 screensaver.4
 MLINKS+=ste.4 if_ste.4
 MLINKS+=stf.4 if_stf.4
 MLINKS+=stge.4 if_stge.4
 MLINKS+=syncache.4 syncookies.4
 MLINKS+=syscons.4 sc.4
 MLINKS+=tap.4 if_tap.4 \
 	tap.4 vmnet.4 \
 	tap.4 if_vmnet.4
 MLINKS+=tdfx.4 tdfx_linux.4
 MLINKS+=ti.4 if_ti.4
 MLINKS+=tun.4 if_tun.4
 MLINKS+=ure.4 if_ure.4
 MLINKS+=vge.4 if_vge.4
 MLINKS+=vlan.4 if_vlan.4
 MLINKS+=vxlan.4 if_vxlan.4
 MLINKS+=${_vmx.4} ${_if_vmx.4}
 MLINKS+=vr.4 if_vr.4
 MLINKS+=vte.4 if_vte.4
 MLINKS+=vtnet.4 if_vtnet.4
 MLINKS+=watchdog.4 SW_WATCHDOG.4
-MLINKS+=wg.4 if_wg.4
 MLINKS+=${_wpi.4} ${_if_wpi.4}
 MLINKS+=xl.4 if_xl.4
 
 .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386"
 _acpi_asus.4=	acpi_asus.4
 _acpi_asus_wmi.4=	acpi_asus_wmi.4
 _acpi_dock.4=	acpi_dock.4
 _acpi_fujitsu.4=acpi_fujitsu.4
 _acpi_hp.4=	acpi_hp.4
 _acpi_ibm.4=	acpi_ibm.4
 _acpi_panasonic.4=acpi_panasonic.4
 _acpi_rapidstart.4=acpi_rapidstart.4
 _acpi_sony.4=	acpi_sony.4
 _acpi_toshiba.4=acpi_toshiba.4
 _acpi_wmi.4=	acpi_wmi.4
 _aesni.4=	aesni.4
 _aout.4=	aout.4
 _apic.4=	apic.4
 _atrtc.4=	atrtc.4
 _attimer.4=	attimer.4
 _aibs.4=	aibs.4
 _amdsbwd.4=	amdsbwd.4
 _amdsmb.4=	amdsmb.4
 _amdsmn.4=	amdsmn.4
 _amdtemp.4=	amdtemp.4
 _asmc.4=	asmc.4
 _bxe.4=		bxe.4
 _bytgpio.4=	bytgpio.4
 _chvgpio.4=	chvgpio.4
 _coretemp.4=	coretemp.4
 _cpuctl.4=	cpuctl.4
 _dpms.4=	dpms.4
 _ftwd.4=	ftwd.4
 _hpt27xx.4=	hpt27xx.4
 _hptiop.4=	hptiop.4
 _hptmv.4=	hptmv.4
 _hptnr.4=	hptnr.4
 _hptrr.4=	hptrr.4
 _hv_kvp.4=	hv_kvp.4
 _hv_netvsc.4=	hv_netvsc.4
 _hv_storvsc.4=	hv_storvsc.4
 _hv_utils.4=	hv_utils.4
 _hv_vmbus.4=	hv_vmbus.4
 _hv_vss.4=	hv_vss.4
 _hwpstate_intel.4=	hwpstate_intel.4
 _i8254.4=	i8254.4
 _ichwd.4=	ichwd.4
 _if_bxe.4=	if_bxe.4
 _if_nfe.4=	if_nfe.4
 _if_urtw.4=	if_urtw.4
 _if_vmx.4=	if_vmx.4
 _if_wpi.4=	if_wpi.4
 _imcsmb.4=	imcsmb.4
 _ipmi.4=	ipmi.4
 _io.4=		io.4
 _itwd.4=	itwd.4
 _linux.4=	linux.4
 _nda.4=		nda.4
 _nfe.4=		nfe.4
 _nfsmb.4=	nfsmb.4
 _if_ntb.4=	if_ntb.4
 _ntb.4=		ntb.4
 _ntb_hw_amd.4=	ntb_hw_amd.4
 _ntb_hw_intel.4=	ntb_hw_intel.4
 _ntb_hw_plx.4=	ntb_hw_plx.4
 _ntb_transport.4=ntb_transport.4
 _nvd.4=		nvd.4
 _nvme.4=	nvme.4
 _nvram.4=	nvram.4
 _ossl.4=	ossl.4
 _padlock.4=	padlock.4
 _pchtherm.4=	pchtherm.4
 _qat.4=		qat.4
 _rr232x.4=	rr232x.4
 _speaker.4=	speaker.4
 _spkr.4=	spkr.4
 _superio.4=	superio.4
 _tpm.4=		tpm.4
 _urtw.4=	urtw.4
 _viawd.4=	viawd.4
 _vmci.4=	vmci.4
 _vmx.4=		vmx.4
 _wbwd.4=	wbwd.4
 _wpi.4=		wpi.4
 _xen.4=		xen.4
 _xnb.4=		xnb.4
 
 .endif
 
 .if ${MACHINE_CPUARCH} == "amd64"
 _ioat.4=	ioat.4
 _nvdimm.4=	nvdimm.4
 _qlxge.4=	qlxge.4
 _qlxgb.4=	qlxgb.4
 _qlxgbe.4=	qlxgbe.4
 _qlnxe.4=	qlnxe.4
 _sfxge.4=	sfxge.4
 _smartpqi.4=	smartpqi.4
 _sume.4=	sume.4
 _vmd.4=		vmd.4
 
 MLINKS+=qlxge.4 if_qlxge.4
 MLINKS+=qlxgb.4 if_qlxgb.4
 MLINKS+=qlxgbe.4 if_qlxgbe.4
 MLINKS+=qlnxe.4 if_qlnxe.4
 MLINKS+=sfxge.4 if_sfxge.4
 MLINKS+=sume.4 if_sume.4
 
 .if ${MK_BHYVE} != "no"
 _bhyve.4=	bhyve.4
 _vmm.4=		vmm.4
 .endif
 .endif
 
 .if ${MACHINE_CPUARCH} == "mips"
 _nvram2env.4=	nvram2env.4
 .endif
 
 .if ${MACHINE_CPUARCH} == "powerpc"
 _nvd.4= 	nvd.4
 _nvme.4=	nvme.4
 .endif
 
 .if ${MACHINE_CPUARCH} == "arm" || ${MACHINE_CPUARCH} == "aarch64" || \
 	 ${MACHINE_CPUARCH} == "riscv"
 _cgem.4=	cgem.4
 MLINKS+=cgem.4 if_cgem.4
 .endif
 
 .if empty(MAN_ARCH)
 __arches=	${MACHINE} ${MACHINE_ARCH} ${MACHINE_CPUARCH}
 .elif ${MAN_ARCH} == "all"
 __arches=	${:!/bin/sh -c "/bin/ls -d ${.CURDIR}/man4.*"!:E}
 .else
 __arches=	${MAN_ARCH}
 .endif
 .for __arch in ${__arches:O:u}
 .if exists(${.CURDIR}/man4.${__arch})
 SUBDIR+=	man4.${__arch}
 .endif
 .endfor
 
 .if ${MK_BLUETOOTH} != "no"
 MAN+=		ng_bluetooth.4
 .endif
 
 .if ${MK_CCD} != "no"
 _ccd.4=		ccd.4
 .endif
 
 .if ${MK_CDDL} != "no"
 _dtrace_provs=	dtrace_audit.4 \
 		dtrace_io.4 \
 		dtrace_ip.4 \
 		dtrace_lockstat.4 \
 		dtrace_proc.4 \
 		dtrace_sched.4 \
 		dtrace_sctp.4 \
 		dtrace_tcp.4 \
 		dtrace_udp.4 \
 		dtrace_udplite.4
 
 MLINKS+=	dtrace_audit.4 dtaudit.4
 .endif
 
 .if ${MK_EFI} != "no"
 MAN+=		efidev.4
 
 MLINKS+=	efidev.4 efirtc.4
 .endif
 
 .if ${MK_ISCSI} != "no"
 MAN+=		cfiscsi.4
 MAN+=		iscsi.4
 MAN+=		iscsi_initiator.4
 MAN+=		iser.4
 .endif
 
 .if ${MK_OFED} != "no"
 MAN+=		mlx4ib.4
 MAN+=		mlx5ib.4
 .endif
 
 .if ${MK_MLX5TOOL} != "no"
 MAN+=		mlx5io.4
 .endif
 
 .if ${MK_TESTS} != "no"
 ATF=            ${SRCTOP}/contrib/atf
 .PATH:          ${ATF}/doc
 _atf_test_case.4=	atf-test-case.4
 .endif
 
 .if ${MK_PF} != "no"
 _pf.4=		pf.4
 _pflog.4=	pflog.4
 _pfsync.4=	pfsync.4
 .endif
 
 .if ${MK_USB} != "no"
 MAN+=	\
 	otus.4 \
 	otusfw.4 \
 	rsu.4 \
 	rsufw.4 \
 	rtwn_usb.4 \
 	rum.4 \
 	run.4 \
 	runfw.4 \
 	u3g.4 \
 	uark.4 \
 	uart.4 \
 	uath.4 \
 	ubsa.4 \
 	ubser.4 \
 	ubtbcmfw.4 \
 	uchcom.4 \
 	ucom.4 \
 	ucycom.4 \
 	udav.4 \
 	udbp.4 \
 	udl.4 \
 	uep.4 \
 	ufoma.4 \
 	uftdi.4 \
 	ugen.4 \
 	ugold.4 \
 	uhci.4 \
 	uhid.4 \
 	uhso.4 \
 	uipaq.4 \
 	ukbd.4 \
 	uled.4 \
 	ulpt.4 \
 	umass.4 \
 	umcs.4 \
 	umct.4 \
 	umodem.4 \
 	umoscom.4 \
 	ums.4 \
 	unix.4 \
 	upgt.4 \
 	uplcom.4 \
 	ural.4 \
 	urio.4 \
 	urndis.4 \
 	${_urtw.4} \
 	usb.4 \
 	usb_quirk.4 \
 	usb_template.4 \
 	usbhid.4 \
 	usfs.4 \
 	uslcom.4 \
 	uvisor.4 \
 	uvscom.4 \
 	zyd.4
 
 MLINKS+=otus.4 if_otus.4
 MLINKS+=rsu.4 if_rsu.4
 MLINKS+=rtwn_usb.4 if_rtwn_usb.4
 MLINKS+=rum.4 if_rum.4
 MLINKS+=run.4 if_run.4
 MLINKS+=u3g.4 u3gstub.4
 MLINKS+=uath.4 if_uath.4
 MLINKS+=udav.4 if_udav.4
 MLINKS+=upgt.4 if_upgt.4
 MLINKS+=ural.4 if_ural.4
 MLINKS+=urndis.4 if_urndis.4
 MLINKS+=${_urtw.4} ${_if_urtw.4}
 MLINKS+=zyd.4 if_zyd.4
 .endif
 
 .include <bsd.prog.mk>
diff --git a/share/man/man4/wg.4 b/share/man/man4/wg.4
deleted file mode 100644
index 29215bd438ff..000000000000
--- a/share/man/man4/wg.4
+++ /dev/null
@@ -1,259 +0,0 @@
-.\" Copyright (c) 2020 Gordon Bergling <gbe@FreeBSD.org>
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\"    notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\"    notice, this list of conditions and the following disclaimer in the
-.\"    documentation and/or other materials provided with the distribution.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" $FreeBSD$
-.\"
-.Dd March 12, 2021
-.Dt WG 4
-.Os
-.Sh NAME
-.Nm wg
-.Nd "WireGuard - pseudo-device"
-.Sh SYNOPSIS
-To load the driver as a module at boot time, place the following line in
-.Xr loader.conf 5 :
-.Bd -literal -offset indent
-if_wg_load="YES"
-.Ed
-.Sh DESCRIPTION
-The
-.Nm
-driver provides Virtual Private Network (VPN) interfaces for the secure
-exchange of layer 3 traffic with other WireGuard peers using the WireGuard
-protocol.
-.Pp
-A
-.Nm
-interface recognises one or more peers, establishes a secure tunnel with
-each on demand, and tracks each peer's UDP endpoint for exchanging encrypted
-traffic with.
-.Pp
-The interfaces can be created at runtime using the
-.Ic ifconfig Cm wg Ns Ar N Cm create
-command.
-The interface itself can be configured with
-.Xr ifconfig 8 .
-.Pp
-The following parameters are available:
-.Bl -tag -width indent
-.It Cm listen-port
-The listing port of the
-.Nm
-interface.
-.It Cm public-key
-The public key of the
-.Nm
-interface.
-.It Cm private-key
-The private key of the
-.Nm
-interface.
-.It Cm preshared-key
-Defines a pre-shared key for the
-.Nm
-interface.
-.It Cm allowed-ips
-A list of allowed IP addresses.
-.It Cm endpoint
-The IP address of the WiredGuard to connect to.
-.It Cm peers
-A list of peering IP addresses to connect to.
-.It Cm persistent-keepalive-interval
-Interval, in seconds, at which to send persistent keepalive packets.
-.El
-.Pp
-The
-.Nm
-interfaces support the following
-.Xr ioctl 2 Ns s :
-.Bl -tag -width Ds -offset indent
-.It Dv SIOCSWG Fa "struct  wg_device_io *"
-Set the device configuration.
-.It Dv SIOCGWG Fa "struct wg_device_io *"
-Get the device configuration.
-.El
-.Pp
-The following glossary provides a brief overview of WireGuard
-terminology:
-.Bl -tag -width indent -offset 3n
-.It Peer
-Peers exchange IPv4 or IPv6 traffic over secure tunnels.
-Each
-.Nm
-interface may be configured to recognise one or more peers.
-.It Key
-Each peer uses its private key and corresponding public key to
-identify itself to others.
-A peer configures a
-.Nm
-interface with its own private key and with the public keys of its peers.
-.It Pre-shared key
-In addition to the public keys, each peer pair may be configured with a
-unique pre-shared symmetric key.
-This is used in their handshake to guard against future compromise of the
-peers' encrypted tunnel if a quantum-computational attack on their
-Diffie-Hellman exchange becomes feasible.
-It is optional, but recommended.
-.It Allowed IPs
-A single
-.Nm
-interface may maintain concurrent tunnels connecting diverse networks.
-The interface therefore implements rudimentary routing and reverse-path
-filtering functions for its tunneled traffic.
-These functions reference a set of allowed IP ranges configured against
-each peer.
-.Pp
-The interface will route outbound tunneled traffic to the peer configured
-with the most specific matching allowed IP address range, or drop it
-if no such match exists.
-.Pp
-The interface will accept tunneled traffic only from the peer
-configured with the most specific matching allowed IP address range
-for the incoming traffic, or drop it if no such match exists.
-That is, tunneled traffic routed to a given peer cannot return through
-another peer of the same
-.Nm
-interface.
-This ensures that peers cannot spoof another's traffic.
-.It Handshake
-Two peers handshake to mutually authenticate each other and to
-establish a shared series of secret ephemeral encryption keys.
-Any peer may initiate a handshake.
-Handshakes occur only when there is traffic to send, and recur every
-two minutes during transfers.
-.It Connectionless
-Due to the handshake behavior, there is no connected or disconnected
-state.
-.El
-.Ss Keys
-Private keys for WireGuard can be generated from any sufficiently
-secure random source.
-The Curve25519 keys and the pre-shared keys are both 32 bytes
-long and are commonly encoded in base64 for ease of use.
-.Pp
-Keys can be generated with
-.Xr openssl 1
-as follows:
-.Pp
-.Dl $ openssl rand -base64 32
-.Pp
-Although a valid Curve25519 key must have 5 bits set to
-specific values, this is done by the interface and so it
-will accept any random 32-byte base64 string.
-.Pp
-When an interface has a private key set with
-.Nm public-key ,
-the corresponding
-public key is shown in the status output of the interface:
-.Bd -literal -offset indent
-# ifconfig wg0 | grep public-key
-       public-key:  7lWtsDdqaGB3EY9WNxRN3hVaHMtu1zXw71+bOjNOVUw=
-.Ed
-.Sh EXAMPLES
-Create a
-.Nm
-interface and set random private key.
-.Bd -literal -offset indent
-# ifconfig wg0 create listen-port 54321 private-key `openssl rand -base64 32`
-.Ed
-.Pp
-Retrieve the associated public key from a
-.Nm
-interface.
-.Bd -literal -offset indent
-$ ifconfig wg0 | awk '/public-key/ { print $2 }'`
-.Ed
-.Pp
-Connect to a specific endpoint using its public-key and set the allowed IP address
-.Bd -literal -offset indent
-# ifconfig wg0 peer public-key '7lWtsDdqaGB3EY9WNxRN3hVaHMtu1zXw71+bOjNOVUw=' endpoint 10.0.1.100:54321 allowed-ips 192.168.2.100/32
-.Ed
-.Pp
-Remove a peer
-.Bd -literal -offset indent
-# ifconfig wg0 -peer public-key '7lWtsDdqaGB3EY9WNxRN3hVaHMtu1zXw71+bOjNOVUw='
-.Ed
-.Sh DIAGNOSTICS
-The
-.Nm
-interface supports runtime debugging, which can be enabled with:
-.Pp
-.D1 Ic ifconfig Cm wg Ns Ar N Cm debug
-.Pp
-Some common error messages include:
-.Bl -diag
-.It "Handshake for peer X did not complete after 5 seconds, retrying"
-Peer X did not reply to our initiation packet, for example because:
-.Bl -bullet
-.It
-The peer does not have the local interface configured as a peer.
-Peers must be able to mutually authenticate each other.
-.It
-The peer endpoint IP address is incorrectly configured.
-.It
-There are firewall rules preventing communication between hosts.
-.El
-.It "Invalid handshake initiation"
-The incoming handshake packet could not be processed.
-This is likely due to the local interface not containing
-the correct public key for the peer.
-.It "Invalid initiation MAC"
-The incoming handshake initiation packet had an invalid MAC.
-This is likely because the initiation sender has the wrong public key
-for the handshake receiver.
-.It "Packet has unallowed src IP from peer X"
-After decryption, an incoming data packet has a source IP address that
-is not assigned to the allowed IPs of Peer X.
-.El
-.Sh SEE ALSO
-.Xr inet 4 ,
-.Xr ip 4 ,
-.Xr netintro 4 ,
-.Xr ipf 5 ,
-.Xr pf.conf 5 ,
-.Xr ifconfig 8 ,
-.Xr ipfw 8
-.Rs
-.%T WireGuard whitepaper
-.%U https://www.wireguard.com/papers/wireguard.pdf
-.Re
-.Sh HISTORY
-The
-.Nm
-device driver first appeared in
-.Fx 13.0 .
-.Sh AUTHORS
-The
-.Nm
-device driver written by
-.An Jason A. Donenfeld Aq Mt Jason@zx2c4.com ,
-.An Matt Dunwoodie Aq Mt ncon@nconroy.net ,
-and
-.An Kyle Evans Aq Mt kevans@FreeBSD.org .
-.Pp
-This manual page was written by
-.An Gordon Bergling Aq Mt gbe@FreeBSD.org
-and is based on the
-.Ox
-manual page written by
-.An David Gwynne Aq Mt dlg@openbsd.org .
diff --git a/sys/dev/if_wg/crypto.c b/sys/dev/if_wg/crypto.c
deleted file mode 100644
index f28585429272..000000000000
--- a/sys/dev/if_wg/crypto.c
+++ /dev/null
@@ -1,1705 +0,0 @@
-/*
- * Copyright (C) 2015-2021 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <sys/types.h>
-#include <sys/endian.h>
-#include <sys/systm.h>
-
-#include "crypto.h"
-
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-#ifndef noinline
-#define noinline __attribute__((noinline))
-#endif
-#ifndef __aligned
-#define __aligned(x) __attribute__((aligned(x)))
-#endif
-#ifndef DIV_ROUND_UP
-#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
-#endif
-
-#define le32_to_cpup(a) le32toh(*(a))
-#define le64_to_cpup(a) le64toh(*(a))
-#define cpu_to_le32(a) htole32(a)
-#define cpu_to_le64(a) htole64(a)
-
-static inline uint32_t get_unaligned_le32(const uint8_t *a)
-{
-	uint32_t l;
-	__builtin_memcpy(&l, a, sizeof(l));
-	return le32_to_cpup(&l);
-}
-static inline uint64_t get_unaligned_le64(const uint8_t *a)
-{
-	uint64_t l;
-	__builtin_memcpy(&l, a, sizeof(l));
-	return le64_to_cpup(&l);
-}
-static inline void put_unaligned_le32(uint32_t s, uint8_t *d)
-{
-	uint32_t l = cpu_to_le32(s);
-	__builtin_memcpy(d, &l, sizeof(l));
-}
-static inline void cpu_to_le32_array(uint32_t *buf, unsigned int words)
-{
-        while (words--) {
-		*buf = cpu_to_le32(*buf);
-		++buf;
-	}
-}
-static inline void le32_to_cpu_array(uint32_t *buf, unsigned int words)
-{
-        while (words--) {
-		*buf = le32_to_cpup(buf);
-		++buf;
-        }
-}
-
-static inline uint32_t rol32(uint32_t word, unsigned int shift)
-{
-        return (word << (shift & 31)) | (word >> ((-shift) & 31));
-}
-static inline uint32_t ror32(uint32_t word, unsigned int shift)
-{
-	return (word >> (shift & 31)) | (word << ((-shift) & 31));
-}
-
-static void xor_cpy(uint8_t *dst, const uint8_t *src1, const uint8_t *src2,
-		    size_t len)
-{
-	size_t i;
-
-	for (i = 0; i < len; ++i)
-		dst[i] = src1[i] ^ src2[i];
-}
-
-#define QUARTER_ROUND(x, a, b, c, d) ( \
-	x[a] += x[b], \
-	x[d] = rol32((x[d] ^ x[a]), 16), \
-	x[c] += x[d], \
-	x[b] = rol32((x[b] ^ x[c]), 12), \
-	x[a] += x[b], \
-	x[d] = rol32((x[d] ^ x[a]), 8), \
-	x[c] += x[d], \
-	x[b] = rol32((x[b] ^ x[c]), 7) \
-)
-
-#define C(i, j) (i * 4 + j)
-
-#define DOUBLE_ROUND(x) ( \
-	/* Column Round */ \
-	QUARTER_ROUND(x, C(0, 0), C(1, 0), C(2, 0), C(3, 0)), \
-	QUARTER_ROUND(x, C(0, 1), C(1, 1), C(2, 1), C(3, 1)), \
-	QUARTER_ROUND(x, C(0, 2), C(1, 2), C(2, 2), C(3, 2)), \
-	QUARTER_ROUND(x, C(0, 3), C(1, 3), C(2, 3), C(3, 3)), \
-	/* Diagonal Round */ \
-	QUARTER_ROUND(x, C(0, 0), C(1, 1), C(2, 2), C(3, 3)), \
-	QUARTER_ROUND(x, C(0, 1), C(1, 2), C(2, 3), C(3, 0)), \
-	QUARTER_ROUND(x, C(0, 2), C(1, 3), C(2, 0), C(3, 1)), \
-	QUARTER_ROUND(x, C(0, 3), C(1, 0), C(2, 1), C(3, 2)) \
-)
-
-#define TWENTY_ROUNDS(x) ( \
-	DOUBLE_ROUND(x), \
-	DOUBLE_ROUND(x), \
-	DOUBLE_ROUND(x), \
-	DOUBLE_ROUND(x), \
-	DOUBLE_ROUND(x), \
-	DOUBLE_ROUND(x), \
-	DOUBLE_ROUND(x), \
-	DOUBLE_ROUND(x), \
-	DOUBLE_ROUND(x), \
-	DOUBLE_ROUND(x) \
-)
-
-enum chacha20_lengths {
-	CHACHA20_NONCE_SIZE = 16,
-	CHACHA20_KEY_SIZE = 32,
-	CHACHA20_KEY_WORDS = CHACHA20_KEY_SIZE / sizeof(uint32_t),
-	CHACHA20_BLOCK_SIZE = 64,
-	CHACHA20_BLOCK_WORDS = CHACHA20_BLOCK_SIZE / sizeof(uint32_t),
-	HCHACHA20_NONCE_SIZE = CHACHA20_NONCE_SIZE,
-	HCHACHA20_KEY_SIZE = CHACHA20_KEY_SIZE
-};
-
-enum chacha20_constants { /* expand 32-byte k */
-	CHACHA20_CONSTANT_EXPA = 0x61707865U,
-	CHACHA20_CONSTANT_ND_3 = 0x3320646eU,
-	CHACHA20_CONSTANT_2_BY = 0x79622d32U,
-	CHACHA20_CONSTANT_TE_K = 0x6b206574U
-};
-
-struct chacha20_ctx {
-	union {
-		uint32_t state[16];
-		struct {
-			uint32_t constant[4];
-			uint32_t key[8];
-			uint32_t counter[4];
-		};
-	};
-};
-
-static void chacha20_init(struct chacha20_ctx *ctx,
-			  const uint8_t key[CHACHA20_KEY_SIZE],
-			  const uint64_t nonce)
-{
-	ctx->constant[0] = CHACHA20_CONSTANT_EXPA;
-	ctx->constant[1] = CHACHA20_CONSTANT_ND_3;
-	ctx->constant[2] = CHACHA20_CONSTANT_2_BY;
-	ctx->constant[3] = CHACHA20_CONSTANT_TE_K;
-	ctx->key[0] = get_unaligned_le32(key + 0);
-	ctx->key[1] = get_unaligned_le32(key + 4);
-	ctx->key[2] = get_unaligned_le32(key + 8);
-	ctx->key[3] = get_unaligned_le32(key + 12);
-	ctx->key[4] = get_unaligned_le32(key + 16);
-	ctx->key[5] = get_unaligned_le32(key + 20);
-	ctx->key[6] = get_unaligned_le32(key + 24);
-	ctx->key[7] = get_unaligned_le32(key + 28);
-	ctx->counter[0] = 0;
-	ctx->counter[1] = 0;
-	ctx->counter[2] = nonce & 0xffffffffU;
-	ctx->counter[3] = nonce >> 32;
-}
-
-static void chacha20_block(struct chacha20_ctx *ctx, uint32_t *stream)
-{
-	uint32_t x[CHACHA20_BLOCK_WORDS];
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(x); ++i)
-		x[i] = ctx->state[i];
-
-	TWENTY_ROUNDS(x);
-
-	for (i = 0; i < ARRAY_SIZE(x); ++i)
-		stream[i] = cpu_to_le32(x[i] + ctx->state[i]);
-
-	ctx->counter[0] += 1;
-}
-
-static void chacha20(struct chacha20_ctx *ctx, uint8_t *out, const uint8_t *in,
-		     uint32_t len)
-{
-	uint32_t buf[CHACHA20_BLOCK_WORDS];
-
-	while (len >= CHACHA20_BLOCK_SIZE) {
-		chacha20_block(ctx, buf);
-		xor_cpy(out, in, (uint8_t *)buf, CHACHA20_BLOCK_SIZE);
-		len -= CHACHA20_BLOCK_SIZE;
-		out += CHACHA20_BLOCK_SIZE;
-		in += CHACHA20_BLOCK_SIZE;
-	}
-	if (len) {
-		chacha20_block(ctx, buf);
-		xor_cpy(out, in, (uint8_t *)buf, len);
-	}
-}
-
-static void hchacha20(uint32_t derived_key[CHACHA20_KEY_WORDS],
-		      const uint8_t nonce[HCHACHA20_NONCE_SIZE],
-		      const uint8_t key[HCHACHA20_KEY_SIZE])
-{
-	uint32_t x[] = { CHACHA20_CONSTANT_EXPA,
-		    CHACHA20_CONSTANT_ND_3,
-		    CHACHA20_CONSTANT_2_BY,
-		    CHACHA20_CONSTANT_TE_K,
-		    get_unaligned_le32(key +  0),
-		    get_unaligned_le32(key +  4),
-		    get_unaligned_le32(key +  8),
-		    get_unaligned_le32(key + 12),
-		    get_unaligned_le32(key + 16),
-		    get_unaligned_le32(key + 20),
-		    get_unaligned_le32(key + 24),
-		    get_unaligned_le32(key + 28),
-		    get_unaligned_le32(nonce +  0),
-		    get_unaligned_le32(nonce +  4),
-		    get_unaligned_le32(nonce +  8),
-		    get_unaligned_le32(nonce + 12)
-	};
-
-	TWENTY_ROUNDS(x);
-
-	memcpy(derived_key + 0, x +  0, sizeof(uint32_t) * 4);
-	memcpy(derived_key + 4, x + 12, sizeof(uint32_t) * 4);
-}
-
-enum poly1305_lengths {
-	POLY1305_BLOCK_SIZE = 16,
-	POLY1305_KEY_SIZE = 32,
-	POLY1305_MAC_SIZE = 16
-};
-
-struct poly1305_internal {
-	uint32_t h[5];
-	uint32_t r[5];
-	uint32_t s[4];
-};
-
-struct poly1305_ctx {
-	struct poly1305_internal state;
-	uint32_t nonce[4];
-	uint8_t data[POLY1305_BLOCK_SIZE];
-	size_t num;
-};
-
-static void poly1305_init_core(struct poly1305_internal *st,
-			       const uint8_t key[16])
-{
-	/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
-	st->r[0] = (get_unaligned_le32(&key[0])) & 0x3ffffff;
-	st->r[1] = (get_unaligned_le32(&key[3]) >> 2) & 0x3ffff03;
-	st->r[2] = (get_unaligned_le32(&key[6]) >> 4) & 0x3ffc0ff;
-	st->r[3] = (get_unaligned_le32(&key[9]) >> 6) & 0x3f03fff;
-	st->r[4] = (get_unaligned_le32(&key[12]) >> 8) & 0x00fffff;
-
-	/* s = 5*r */
-	st->s[0] = st->r[1] * 5;
-	st->s[1] = st->r[2] * 5;
-	st->s[2] = st->r[3] * 5;
-	st->s[3] = st->r[4] * 5;
-
-	/* h = 0 */
-	st->h[0] = 0;
-	st->h[1] = 0;
-	st->h[2] = 0;
-	st->h[3] = 0;
-	st->h[4] = 0;
-}
-
-static void poly1305_blocks_core(struct poly1305_internal *st,
-				 const uint8_t *input, size_t len,
-				 const uint32_t padbit)
-{
-	const uint32_t hibit = padbit << 24;
-	uint32_t r0, r1, r2, r3, r4;
-	uint32_t s1, s2, s3, s4;
-	uint32_t h0, h1, h2, h3, h4;
-	uint64_t d0, d1, d2, d3, d4;
-	uint32_t c;
-
-	r0 = st->r[0];
-	r1 = st->r[1];
-	r2 = st->r[2];
-	r3 = st->r[3];
-	r4 = st->r[4];
-
-	s1 = st->s[0];
-	s2 = st->s[1];
-	s3 = st->s[2];
-	s4 = st->s[3];
-
-	h0 = st->h[0];
-	h1 = st->h[1];
-	h2 = st->h[2];
-	h3 = st->h[3];
-	h4 = st->h[4];
-
-	while (len >= POLY1305_BLOCK_SIZE) {
-		/* h += m[i] */
-		h0 += (get_unaligned_le32(&input[0])) & 0x3ffffff;
-		h1 += (get_unaligned_le32(&input[3]) >> 2) & 0x3ffffff;
-		h2 += (get_unaligned_le32(&input[6]) >> 4) & 0x3ffffff;
-		h3 += (get_unaligned_le32(&input[9]) >> 6) & 0x3ffffff;
-		h4 += (get_unaligned_le32(&input[12]) >> 8) | hibit;
-
-		/* h *= r */
-		d0 = ((uint64_t)h0 * r0) + ((uint64_t)h1 * s4) +
-		     ((uint64_t)h2 * s3) + ((uint64_t)h3 * s2) +
-		     ((uint64_t)h4 * s1);
-		d1 = ((uint64_t)h0 * r1) + ((uint64_t)h1 * r0) +
-		     ((uint64_t)h2 * s4) + ((uint64_t)h3 * s3) +
-		     ((uint64_t)h4 * s2);
-		d2 = ((uint64_t)h0 * r2) + ((uint64_t)h1 * r1) +
-		     ((uint64_t)h2 * r0) + ((uint64_t)h3 * s4) +
-		     ((uint64_t)h4 * s3);
-		d3 = ((uint64_t)h0 * r3) + ((uint64_t)h1 * r2) +
-		     ((uint64_t)h2 * r1) + ((uint64_t)h3 * r0) +
-		     ((uint64_t)h4 * s4);
-		d4 = ((uint64_t)h0 * r4) + ((uint64_t)h1 * r3) +
-		     ((uint64_t)h2 * r2) + ((uint64_t)h3 * r1) +
-		     ((uint64_t)h4 * r0);
-
-		/* (partial) h %= p */
-		c = (uint32_t)(d0 >> 26);
-		h0 = (uint32_t)d0 & 0x3ffffff;
-		d1 += c;
-		c = (uint32_t)(d1 >> 26);
-		h1 = (uint32_t)d1 & 0x3ffffff;
-		d2 += c;
-		c = (uint32_t)(d2 >> 26);
-		h2 = (uint32_t)d2 & 0x3ffffff;
-		d3 += c;
-		c = (uint32_t)(d3 >> 26);
-		h3 = (uint32_t)d3 & 0x3ffffff;
-		d4 += c;
-		c = (uint32_t)(d4 >> 26);
-		h4 = (uint32_t)d4 & 0x3ffffff;
-		h0 += c * 5;
-		c = (h0 >> 26);
-		h0 = h0 & 0x3ffffff;
-		h1 += c;
-
-		input += POLY1305_BLOCK_SIZE;
-		len -= POLY1305_BLOCK_SIZE;
-	}
-
-	st->h[0] = h0;
-	st->h[1] = h1;
-	st->h[2] = h2;
-	st->h[3] = h3;
-	st->h[4] = h4;
-}
-
-static void poly1305_emit_core(struct poly1305_internal *st, uint8_t mac[16],
-			       const uint32_t nonce[4])
-{
-	uint32_t h0, h1, h2, h3, h4, c;
-	uint32_t g0, g1, g2, g3, g4;
-	uint64_t f;
-	uint32_t mask;
-
-	/* fully carry h */
-	h0 = st->h[0];
-	h1 = st->h[1];
-	h2 = st->h[2];
-	h3 = st->h[3];
-	h4 = st->h[4];
-
-	c = h1 >> 26;
-	h1 = h1 & 0x3ffffff;
-	h2 += c;
-	c = h2 >> 26;
-	h2 = h2 & 0x3ffffff;
-	h3 += c;
-	c = h3 >> 26;
-	h3 = h3 & 0x3ffffff;
-	h4 += c;
-	c = h4 >> 26;
-	h4 = h4 & 0x3ffffff;
-	h0 += c * 5;
-	c = h0 >> 26;
-	h0 = h0 & 0x3ffffff;
-	h1 += c;
-
-	/* compute h + -p */
-	g0 = h0 + 5;
-	c = g0 >> 26;
-	g0 &= 0x3ffffff;
-	g1 = h1 + c;
-	c = g1 >> 26;
-	g1 &= 0x3ffffff;
-	g2 = h2 + c;
-	c = g2 >> 26;
-	g2 &= 0x3ffffff;
-	g3 = h3 + c;
-	c = g3 >> 26;
-	g3 &= 0x3ffffff;
-	g4 = h4 + c - (1UL << 26);
-
-	/* select h if h < p, or h + -p if h >= p */
-	mask = (g4 >> ((sizeof(uint32_t) * 8) - 1)) - 1;
-	g0 &= mask;
-	g1 &= mask;
-	g2 &= mask;
-	g3 &= mask;
-	g4 &= mask;
-	mask = ~mask;
-
-	h0 = (h0 & mask) | g0;
-	h1 = (h1 & mask) | g1;
-	h2 = (h2 & mask) | g2;
-	h3 = (h3 & mask) | g3;
-	h4 = (h4 & mask) | g4;
-
-	/* h = h % (2^128) */
-	h0 = ((h0) | (h1 << 26)) & 0xffffffff;
-	h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
-	h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
-	h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
-
-	/* mac = (h + nonce) % (2^128) */
-	f = (uint64_t)h0 + nonce[0];
-	h0 = (uint32_t)f;
-	f = (uint64_t)h1 + nonce[1] + (f >> 32);
-	h1 = (uint32_t)f;
-	f = (uint64_t)h2 + nonce[2] + (f >> 32);
-	h2 = (uint32_t)f;
-	f = (uint64_t)h3 + nonce[3] + (f >> 32);
-	h3 = (uint32_t)f;
-
-	put_unaligned_le32(h0, &mac[0]);
-	put_unaligned_le32(h1, &mac[4]);
-	put_unaligned_le32(h2, &mac[8]);
-	put_unaligned_le32(h3, &mac[12]);
-}
-
-static void poly1305_init(struct poly1305_ctx *ctx,
-			  const uint8_t key[POLY1305_KEY_SIZE])
-{
-	ctx->nonce[0] = get_unaligned_le32(&key[16]);
-	ctx->nonce[1] = get_unaligned_le32(&key[20]);
-	ctx->nonce[2] = get_unaligned_le32(&key[24]);
-	ctx->nonce[3] = get_unaligned_le32(&key[28]);
-
-	poly1305_init_core(&ctx->state, key);
-
-	ctx->num = 0;
-}
-
-static void poly1305_update(struct poly1305_ctx *ctx, const uint8_t *input,
-			    size_t len)
-{
-	const size_t num = ctx->num;
-	size_t rem;
-
-	if (num) {
-		rem = POLY1305_BLOCK_SIZE - num;
-		if (len < rem) {
-			memcpy(ctx->data + num, input, len);
-			ctx->num = num + len;
-			return;
-		}
-		memcpy(ctx->data + num, input, rem);
-		poly1305_blocks_core(&ctx->state, ctx->data,
-				     POLY1305_BLOCK_SIZE, 1);
-		input += rem;
-		len -= rem;
-	}
-
-	rem = len % POLY1305_BLOCK_SIZE;
-	len -= rem;
-
-	if (len >= POLY1305_BLOCK_SIZE) {
-		poly1305_blocks_core(&ctx->state, input, len, 1);
-		input += len;
-	}
-
-	if (rem)
-		memcpy(ctx->data, input, rem);
-
-	ctx->num = rem;
-}
-
-static void poly1305_final(struct poly1305_ctx *ctx,
-			   uint8_t mac[POLY1305_MAC_SIZE])
-{
-	size_t num = ctx->num;
-
-	if (num) {
-		ctx->data[num++] = 1;
-		while (num < POLY1305_BLOCK_SIZE)
-			ctx->data[num++] = 0;
-		poly1305_blocks_core(&ctx->state, ctx->data,
-				     POLY1305_BLOCK_SIZE, 0);
-	}
-
-	poly1305_emit_core(&ctx->state, mac, ctx->nonce);
-
-	explicit_bzero(ctx, sizeof(*ctx));
-}
-
-
-static const uint8_t pad0[16] = { 0 };
-
-void
-chacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src, const size_t src_len,
-			 const uint8_t *ad, const size_t ad_len,
-			 const uint64_t nonce,
-			 const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
-{
-	struct poly1305_ctx poly1305_state;
-	struct chacha20_ctx chacha20_state;
-	union {
-		uint8_t block0[POLY1305_KEY_SIZE];
-		uint64_t lens[2];
-	} b = { { 0 } };
-
-	chacha20_init(&chacha20_state, key, nonce);
-	chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0));
-	poly1305_init(&poly1305_state, b.block0);
-
-	poly1305_update(&poly1305_state, ad, ad_len);
-	poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf);
-
-	chacha20(&chacha20_state, dst, src, src_len);
-
-	poly1305_update(&poly1305_state, dst, src_len);
-	poly1305_update(&poly1305_state, pad0, (0x10 - src_len) & 0xf);
-
-	b.lens[0] = cpu_to_le64(ad_len);
-	b.lens[1] = cpu_to_le64(src_len);
-	poly1305_update(&poly1305_state, (uint8_t *)b.lens, sizeof(b.lens));
-
-	poly1305_final(&poly1305_state, dst + src_len);
-
-	explicit_bzero(&chacha20_state, sizeof(chacha20_state));
-	explicit_bzero(&b, sizeof(b));
-}
-
-bool
-chacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src, const size_t src_len,
-			 const uint8_t *ad, const size_t ad_len,
-			 const uint64_t nonce,
-			 const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
-{
-	struct poly1305_ctx poly1305_state;
-	struct chacha20_ctx chacha20_state;
-	bool ret;
-	size_t dst_len;
-	union {
-		uint8_t block0[POLY1305_KEY_SIZE];
-		uint8_t mac[POLY1305_MAC_SIZE];
-		uint64_t lens[2];
-	} b = { { 0 } };
-
-	if (src_len < POLY1305_MAC_SIZE)
-		return false;
-
-	chacha20_init(&chacha20_state, key, nonce);
-	chacha20(&chacha20_state, b.block0, b.block0, sizeof(b.block0));
-	poly1305_init(&poly1305_state, b.block0);
-
-	poly1305_update(&poly1305_state, ad, ad_len);
-	poly1305_update(&poly1305_state, pad0, (0x10 - ad_len) & 0xf);
-
-	dst_len = src_len - POLY1305_MAC_SIZE;
-	poly1305_update(&poly1305_state, src, dst_len);
-	poly1305_update(&poly1305_state, pad0, (0x10 - dst_len) & 0xf);
-
-	b.lens[0] = cpu_to_le64(ad_len);
-	b.lens[1] = cpu_to_le64(dst_len);
-	poly1305_update(&poly1305_state, (uint8_t *)b.lens, sizeof(b.lens));
-
-	poly1305_final(&poly1305_state, b.mac);
-
-	ret = timingsafe_bcmp(b.mac, src + dst_len, POLY1305_MAC_SIZE) == 0;
-	if (ret)
-		chacha20(&chacha20_state, dst, src, dst_len);
-
-	explicit_bzero(&chacha20_state, sizeof(chacha20_state));
-	explicit_bzero(&b, sizeof(b));
-
-	return ret;
-}
-
-void
-xchacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src,
-			  const size_t src_len, const uint8_t *ad,
-			  const size_t ad_len,
-			  const uint8_t nonce[XCHACHA20POLY1305_NONCE_SIZE],
-			  const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
-{
-	uint32_t derived_key[CHACHA20_KEY_WORDS];
-
-	hchacha20(derived_key, nonce, key);
-	cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key));
-	chacha20poly1305_encrypt(dst, src, src_len, ad, ad_len,
-				 get_unaligned_le64(nonce + 16),
-				 (uint8_t *)derived_key);
-	explicit_bzero(derived_key, CHACHA20POLY1305_KEY_SIZE);
-}
-
-bool
-xchacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src,
-			  const size_t src_len,  const uint8_t *ad,
-			  const size_t ad_len,
-			  const uint8_t nonce[XCHACHA20POLY1305_NONCE_SIZE],
-			  const uint8_t key[CHACHA20POLY1305_KEY_SIZE])
-{
-	bool ret;
-	uint32_t derived_key[CHACHA20_KEY_WORDS];
-
-	hchacha20(derived_key, nonce, key);
-	cpu_to_le32_array(derived_key, ARRAY_SIZE(derived_key));
-	ret = chacha20poly1305_decrypt(dst, src, src_len, ad, ad_len,
-				       get_unaligned_le64(nonce + 16),
-				       (uint8_t *)derived_key);
-	explicit_bzero(derived_key, CHACHA20POLY1305_KEY_SIZE);
-	return ret;
-}
-
-
-static const uint32_t blake2s_iv[8] = {
-	0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
-	0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
-};
-
-static const uint8_t blake2s_sigma[10][16] = {
-	{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
-	{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
-	{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
-	{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
-	{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
-	{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
-	{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
-	{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
-	{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
-	{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
-};
-
-static inline void blake2s_set_lastblock(struct blake2s_state *state)
-{
-	state->f[0] = -1;
-}
-
-static inline void blake2s_increment_counter(struct blake2s_state *state,
-					     const uint32_t inc)
-{
-	state->t[0] += inc;
-	state->t[1] += (state->t[0] < inc);
-}
-
-static inline void blake2s_init_param(struct blake2s_state *state,
-				      const uint32_t param)
-{
-	int i;
-
-	memset(state, 0, sizeof(*state));
-	for (i = 0; i < 8; ++i)
-		state->h[i] = blake2s_iv[i];
-	state->h[0] ^= param;
-}
-
-void blake2s_init(struct blake2s_state *state, const size_t outlen)
-{
-	blake2s_init_param(state, 0x01010000 | outlen);
-	state->outlen = outlen;
-}
-
-void blake2s_init_key(struct blake2s_state *state, const size_t outlen,
-		      const uint8_t *key, const size_t keylen)
-{
-	uint8_t block[BLAKE2S_BLOCK_SIZE] = { 0 };
-
-	blake2s_init_param(state, 0x01010000 | keylen << 8 | outlen);
-	state->outlen = outlen;
-	memcpy(block, key, keylen);
-	blake2s_update(state, block, BLAKE2S_BLOCK_SIZE);
-	explicit_bzero(block, BLAKE2S_BLOCK_SIZE);
-}
-
-static inline void blake2s_compress(struct blake2s_state *state,
-				    const uint8_t *block, size_t nblocks,
-				    const uint32_t inc)
-{
-	uint32_t m[16];
-	uint32_t v[16];
-	int i;
-
-	while (nblocks > 0) {
-		blake2s_increment_counter(state, inc);
-		memcpy(m, block, BLAKE2S_BLOCK_SIZE);
-		le32_to_cpu_array(m, ARRAY_SIZE(m));
-		memcpy(v, state->h, 32);
-		v[ 8] = blake2s_iv[0];
-		v[ 9] = blake2s_iv[1];
-		v[10] = blake2s_iv[2];
-		v[11] = blake2s_iv[3];
-		v[12] = blake2s_iv[4] ^ state->t[0];
-		v[13] = blake2s_iv[5] ^ state->t[1];
-		v[14] = blake2s_iv[6] ^ state->f[0];
-		v[15] = blake2s_iv[7] ^ state->f[1];
-
-#define G(r, i, a, b, c, d) do { \
-	a += b + m[blake2s_sigma[r][2 * i + 0]]; \
-	d = ror32(d ^ a, 16); \
-	c += d; \
-	b = ror32(b ^ c, 12); \
-	a += b + m[blake2s_sigma[r][2 * i + 1]]; \
-	d = ror32(d ^ a, 8); \
-	c += d; \
-	b = ror32(b ^ c, 7); \
-} while (0)
-
-#define ROUND(r) do { \
-	G(r, 0, v[0], v[ 4], v[ 8], v[12]); \
-	G(r, 1, v[1], v[ 5], v[ 9], v[13]); \
-	G(r, 2, v[2], v[ 6], v[10], v[14]); \
-	G(r, 3, v[3], v[ 7], v[11], v[15]); \
-	G(r, 4, v[0], v[ 5], v[10], v[15]); \
-	G(r, 5, v[1], v[ 6], v[11], v[12]); \
-	G(r, 6, v[2], v[ 7], v[ 8], v[13]); \
-	G(r, 7, v[3], v[ 4], v[ 9], v[14]); \
-} while (0)
-		ROUND(0);
-		ROUND(1);
-		ROUND(2);
-		ROUND(3);
-		ROUND(4);
-		ROUND(5);
-		ROUND(6);
-		ROUND(7);
-		ROUND(8);
-		ROUND(9);
-
-#undef G
-#undef ROUND
-
-		for (i = 0; i < 8; ++i)
-			state->h[i] ^= v[i] ^ v[i + 8];
-
-		block += BLAKE2S_BLOCK_SIZE;
-		--nblocks;
-	}
-}
-
-void blake2s_update(struct blake2s_state *state, const uint8_t *in, size_t inlen)
-{
-	const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
-
-	if (!inlen)
-		return;
-	if (inlen > fill) {
-		memcpy(state->buf + state->buflen, in, fill);
-		blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
-		state->buflen = 0;
-		in += fill;
-		inlen -= fill;
-	}
-	if (inlen > BLAKE2S_BLOCK_SIZE) {
-		const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
-		/* Hash one less (full) block than strictly possible */
-		blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
-		in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
-		inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
-	}
-	memcpy(state->buf + state->buflen, in, inlen);
-	state->buflen += inlen;
-}
-
-void blake2s_final(struct blake2s_state *state, uint8_t *out)
-{
-	blake2s_set_lastblock(state);
-	memset(state->buf + state->buflen, 0,
-	       BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
-	blake2s_compress(state, state->buf, 1, state->buflen);
-	cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
-	memcpy(out, state->h, state->outlen);
-	explicit_bzero(state, sizeof(*state));
-}
-
-void blake2s(uint8_t *out, const uint8_t *in, const uint8_t *key,
-	     const size_t outlen, const size_t inlen, const size_t keylen)
-{
-	struct blake2s_state state;
-
-	if (keylen)
-		blake2s_init_key(&state, outlen, key, keylen);
-	else
-		blake2s_init(&state, outlen);
-
-	blake2s_update(&state, in, inlen);
-	blake2s_final(&state, out);
-}
-
-void blake2s_hmac(uint8_t *out, const uint8_t *in, const uint8_t *key, const size_t outlen,
-		  const size_t inlen, const size_t keylen)
-{
-	struct blake2s_state state;
-	uint8_t x_key[BLAKE2S_BLOCK_SIZE] __aligned(sizeof(uint32_t)) = { 0 };
-	uint8_t i_hash[BLAKE2S_HASH_SIZE] __aligned(sizeof(uint32_t));
-	int i;
-
-	if (keylen > BLAKE2S_BLOCK_SIZE) {
-		blake2s_init(&state, BLAKE2S_HASH_SIZE);
-		blake2s_update(&state, key, keylen);
-		blake2s_final(&state, x_key);
-	} else
-		memcpy(x_key, key, keylen);
-
-	for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
-		x_key[i] ^= 0x36;
-
-	blake2s_init(&state, BLAKE2S_HASH_SIZE);
-	blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
-	blake2s_update(&state, in, inlen);
-	blake2s_final(&state, i_hash);
-
-	for (i = 0; i < BLAKE2S_BLOCK_SIZE; ++i)
-		x_key[i] ^= 0x5c ^ 0x36;
-
-	blake2s_init(&state, BLAKE2S_HASH_SIZE);
-	blake2s_update(&state, x_key, BLAKE2S_BLOCK_SIZE);
-	blake2s_update(&state, i_hash, BLAKE2S_HASH_SIZE);
-	blake2s_final(&state, i_hash);
-
-	memcpy(out, i_hash, outlen);
-	explicit_bzero(x_key, BLAKE2S_BLOCK_SIZE);
-	explicit_bzero(i_hash, BLAKE2S_HASH_SIZE);
-}
-
-
-/* Below here is fiat's implementation of x25519.
- *
- * Copyright (C) 2015-2016 The fiat-crypto Authors.
- * Copyright (C) 2018-2021 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- *
- * This is a machine-generated formally verified implementation of Curve25519
- * ECDH from: <https://github.com/mit-plv/fiat-crypto>. Though originally
- * machine generated, it has been tweaked to be suitable for use in the kernel.
- * It is optimized for 32-bit machines and machines that cannot work efficiently
- * with 128-bit integer types.
- */
-
-/* fe means field element. Here the field is \Z/(2^255-19). An element t,
- * entries t[0]...t[9], represents the integer t[0]+2^26 t[1]+2^51 t[2]+2^77
- * t[3]+2^102 t[4]+...+2^230 t[9].
- * fe limbs are bounded by 1.125*2^26,1.125*2^25,1.125*2^26,1.125*2^25,etc.
- * Multiplication and carrying produce fe from fe_loose.
- */
-typedef struct fe { uint32_t v[10]; } fe;
-
-/* fe_loose limbs are bounded by 3.375*2^26,3.375*2^25,3.375*2^26,3.375*2^25,etc
- * Addition and subtraction produce fe_loose from (fe, fe).
- */
-typedef struct fe_loose { uint32_t v[10]; } fe_loose;
-
-static inline void fe_frombytes_impl(uint32_t h[10], const uint8_t *s)
-{
-	/* Ignores top bit of s. */
-	uint32_t a0 = get_unaligned_le32(s);
-	uint32_t a1 = get_unaligned_le32(s+4);
-	uint32_t a2 = get_unaligned_le32(s+8);
-	uint32_t a3 = get_unaligned_le32(s+12);
-	uint32_t a4 = get_unaligned_le32(s+16);
-	uint32_t a5 = get_unaligned_le32(s+20);
-	uint32_t a6 = get_unaligned_le32(s+24);
-	uint32_t a7 = get_unaligned_le32(s+28);
-	h[0] = a0&((1<<26)-1);                    /* 26 used, 32-26 left.   26 */
-	h[1] = (a0>>26) | ((a1&((1<<19)-1))<< 6); /* (32-26) + 19 =  6+19 = 25 */
-	h[2] = (a1>>19) | ((a2&((1<<13)-1))<<13); /* (32-19) + 13 = 13+13 = 26 */
-	h[3] = (a2>>13) | ((a3&((1<< 6)-1))<<19); /* (32-13) +  6 = 19+ 6 = 25 */
-	h[4] = (a3>> 6);                          /* (32- 6)              = 26 */
-	h[5] = a4&((1<<25)-1);                    /*                        25 */
-	h[6] = (a4>>25) | ((a5&((1<<19)-1))<< 7); /* (32-25) + 19 =  7+19 = 26 */
-	h[7] = (a5>>19) | ((a6&((1<<12)-1))<<13); /* (32-19) + 12 = 13+12 = 25 */
-	h[8] = (a6>>12) | ((a7&((1<< 6)-1))<<20); /* (32-12) +  6 = 20+ 6 = 26 */
-	h[9] = (a7>> 6)&((1<<25)-1); /*                                     25 */
-}
-
-static inline void fe_frombytes(fe *h, const uint8_t *s)
-{
-	fe_frombytes_impl(h->v, s);
-}
-
-static inline uint8_t /*bool*/
-addcarryx_u25(uint8_t /*bool*/ c, uint32_t a, uint32_t b, uint32_t *low)
-{
-	/* This function extracts 25 bits of result and 1 bit of carry
-	 * (26 total), so a 32-bit intermediate is sufficient.
-	 */
-	uint32_t x = a + b + c;
-	*low = x & ((1 << 25) - 1);
-	return (x >> 25) & 1;
-}
-
-static inline uint8_t /*bool*/
-addcarryx_u26(uint8_t /*bool*/ c, uint32_t a, uint32_t b, uint32_t *low)
-{
-	/* This function extracts 26 bits of result and 1 bit of carry
-	 * (27 total), so a 32-bit intermediate is sufficient.
-	 */
-	uint32_t x = a + b + c;
-	*low = x & ((1 << 26) - 1);
-	return (x >> 26) & 1;
-}
-
-static inline uint8_t /*bool*/
-subborrow_u25(uint8_t /*bool*/ c, uint32_t a, uint32_t b, uint32_t *low)
-{
-	/* This function extracts 25 bits of result and 1 bit of borrow
-	 * (26 total), so a 32-bit intermediate is sufficient.
-	 */
-	uint32_t x = a - b - c;
-	*low = x & ((1 << 25) - 1);
-	return x >> 31;
-}
-
-static inline uint8_t /*bool*/
-subborrow_u26(uint8_t /*bool*/ c, uint32_t a, uint32_t b, uint32_t *low)
-{
-	/* This function extracts 26 bits of result and 1 bit of borrow
-	 *(27 total), so a 32-bit intermediate is sufficient.
-	 */
-	uint32_t x = a - b - c;
-	*low = x & ((1 << 26) - 1);
-	return x >> 31;
-}
-
-static inline uint32_t cmovznz32(uint32_t t, uint32_t z, uint32_t nz)
-{
-	t = -!!t; /* all set if nonzero, 0 if 0 */
-	return (t&nz) | ((~t)&z);
-}
-
-static inline void fe_freeze(uint32_t out[10], const uint32_t in1[10])
-{
-	const uint32_t x17 = in1[9];
-	const uint32_t x18 = in1[8];
-	const uint32_t x16 = in1[7];
-	const uint32_t x14 = in1[6];
-	const uint32_t x12 = in1[5];
-	const uint32_t x10 = in1[4];
-	const uint32_t x8 = in1[3];
-	const uint32_t x6 = in1[2];
-	const uint32_t x4 = in1[1];
-	const uint32_t x2 = in1[0];
-	uint32_t x20; uint8_t/*bool*/ x21 = subborrow_u26(0x0, x2, 0x3ffffed, &x20);
-	uint32_t x23; uint8_t/*bool*/ x24 = subborrow_u25(x21, x4, 0x1ffffff, &x23);
-	uint32_t x26; uint8_t/*bool*/ x27 = subborrow_u26(x24, x6, 0x3ffffff, &x26);
-	uint32_t x29; uint8_t/*bool*/ x30 = subborrow_u25(x27, x8, 0x1ffffff, &x29);
-	uint32_t x32; uint8_t/*bool*/ x33 = subborrow_u26(x30, x10, 0x3ffffff, &x32);
-	uint32_t x35; uint8_t/*bool*/ x36 = subborrow_u25(x33, x12, 0x1ffffff, &x35);
-	uint32_t x38; uint8_t/*bool*/ x39 = subborrow_u26(x36, x14, 0x3ffffff, &x38);
-	uint32_t x41; uint8_t/*bool*/ x42 = subborrow_u25(x39, x16, 0x1ffffff, &x41);
-	uint32_t x44; uint8_t/*bool*/ x45 = subborrow_u26(x42, x18, 0x3ffffff, &x44);
-	uint32_t x47; uint8_t/*bool*/ x48 = subborrow_u25(x45, x17, 0x1ffffff, &x47);
-	uint32_t x49 = cmovznz32(x48, 0x0, 0xffffffff);
-	uint32_t x50 = (x49 & 0x3ffffed);
-	uint32_t x52; uint8_t/*bool*/ x53 = addcarryx_u26(0x0, x20, x50, &x52);
-	uint32_t x54 = (x49 & 0x1ffffff);
-	uint32_t x56; uint8_t/*bool*/ x57 = addcarryx_u25(x53, x23, x54, &x56);
-	uint32_t x58 = (x49 & 0x3ffffff);
-	uint32_t x60; uint8_t/*bool*/ x61 = addcarryx_u26(x57, x26, x58, &x60);
-	uint32_t x62 = (x49 & 0x1ffffff);
-	uint32_t x64; uint8_t/*bool*/ x65 = addcarryx_u25(x61, x29, x62, &x64);
-	uint32_t x66 = (x49 & 0x3ffffff);
-	uint32_t x68; uint8_t/*bool*/ x69 = addcarryx_u26(x65, x32, x66, &x68);
-	uint32_t x70 = (x49 & 0x1ffffff);
-	uint32_t x72; uint8_t/*bool*/ x73 = addcarryx_u25(x69, x35, x70, &x72);
-	uint32_t x74 = (x49 & 0x3ffffff);
-	uint32_t x76; uint8_t/*bool*/ x77 = addcarryx_u26(x73, x38, x74, &x76);
-	uint32_t x78 = (x49 & 0x1ffffff);
-	uint32_t x80; uint8_t/*bool*/ x81 = addcarryx_u25(x77, x41, x78, &x80);
-	uint32_t x82 = (x49 & 0x3ffffff);
-	uint32_t x84; uint8_t/*bool*/ x85 = addcarryx_u26(x81, x44, x82, &x84);
-	uint32_t x86 = (x49 & 0x1ffffff);
-	uint32_t x88; addcarryx_u25(x85, x47, x86, &x88);
-	out[0] = x52;
-	out[1] = x56;
-	out[2] = x60;
-	out[3] = x64;
-	out[4] = x68;
-	out[5] = x72;
-	out[6] = x76;
-	out[7] = x80;
-	out[8] = x84;
-	out[9] = x88;
-}
-
-static inline void fe_tobytes(uint8_t s[32], const fe *f)
-{
-	uint32_t h[10];
-	fe_freeze(h, f->v);
-	s[0] = h[0] >> 0;
-	s[1] = h[0] >> 8;
-	s[2] = h[0] >> 16;
-	s[3] = (h[0] >> 24) | (h[1] << 2);
-	s[4] = h[1] >> 6;
-	s[5] = h[1] >> 14;
-	s[6] = (h[1] >> 22) | (h[2] << 3);
-	s[7] = h[2] >> 5;
-	s[8] = h[2] >> 13;
-	s[9] = (h[2] >> 21) | (h[3] << 5);
-	s[10] = h[3] >> 3;
-	s[11] = h[3] >> 11;
-	s[12] = (h[3] >> 19) | (h[4] << 6);
-	s[13] = h[4] >> 2;
-	s[14] = h[4] >> 10;
-	s[15] = h[4] >> 18;
-	s[16] = h[5] >> 0;
-	s[17] = h[5] >> 8;
-	s[18] = h[5] >> 16;
-	s[19] = (h[5] >> 24) | (h[6] << 1);
-	s[20] = h[6] >> 7;
-	s[21] = h[6] >> 15;
-	s[22] = (h[6] >> 23) | (h[7] << 3);
-	s[23] = h[7] >> 5;
-	s[24] = h[7] >> 13;
-	s[25] = (h[7] >> 21) | (h[8] << 4);
-	s[26] = h[8] >> 4;
-	s[27] = h[8] >> 12;
-	s[28] = (h[8] >> 20) | (h[9] << 6);
-	s[29] = h[9] >> 2;
-	s[30] = h[9] >> 10;
-	s[31] = h[9] >> 18;
-}
-
-/* h = f */
-static inline void fe_copy(fe *h, const fe *f)
-{
-	memmove(h, f, sizeof(uint32_t) * 10);
-}
-
-static inline void fe_copy_lt(fe_loose *h, const fe *f)
-{
-	memmove(h, f, sizeof(uint32_t) * 10);
-}
-
-/* h = 0 */
-static inline void fe_0(fe *h)
-{
-	memset(h, 0, sizeof(uint32_t) * 10);
-}
-
-/* h = 1 */
-static inline void fe_1(fe *h)
-{
-	memset(h, 0, sizeof(uint32_t) * 10);
-	h->v[0] = 1;
-}
-
-static void fe_add_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10])
-{
-	const uint32_t x20 = in1[9];
-	const uint32_t x21 = in1[8];
-	const uint32_t x19 = in1[7];
-	const uint32_t x17 = in1[6];
-	const uint32_t x15 = in1[5];
-	const uint32_t x13 = in1[4];
-	const uint32_t x11 = in1[3];
-	const uint32_t x9 = in1[2];
-	const uint32_t x7 = in1[1];
-	const uint32_t x5 = in1[0];
-	const uint32_t x38 = in2[9];
-	const uint32_t x39 = in2[8];
-	const uint32_t x37 = in2[7];
-	const uint32_t x35 = in2[6];
-	const uint32_t x33 = in2[5];
-	const uint32_t x31 = in2[4];
-	const uint32_t x29 = in2[3];
-	const uint32_t x27 = in2[2];
-	const uint32_t x25 = in2[1];
-	const uint32_t x23 = in2[0];
-	out[0] = (x5 + x23);
-	out[1] = (x7 + x25);
-	out[2] = (x9 + x27);
-	out[3] = (x11 + x29);
-	out[4] = (x13 + x31);
-	out[5] = (x15 + x33);
-	out[6] = (x17 + x35);
-	out[7] = (x19 + x37);
-	out[8] = (x21 + x39);
-	out[9] = (x20 + x38);
-}
-
-/* h = f + g
- * Can overlap h with f or g.
- */
-static inline void fe_add(fe_loose *h, const fe *f, const fe *g)
-{
-	fe_add_impl(h->v, f->v, g->v);
-}
-
-static void fe_sub_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10])
-{
-	const uint32_t x20 = in1[9];
-	const uint32_t x21 = in1[8];
-	const uint32_t x19 = in1[7];
-	const uint32_t x17 = in1[6];
-	const uint32_t x15 = in1[5];
-	const uint32_t x13 = in1[4];
-	const uint32_t x11 = in1[3];
-	const uint32_t x9 = in1[2];
-	const uint32_t x7 = in1[1];
-	const uint32_t x5 = in1[0];
-	const uint32_t x38 = in2[9];
-	const uint32_t x39 = in2[8];
-	const uint32_t x37 = in2[7];
-	const uint32_t x35 = in2[6];
-	const uint32_t x33 = in2[5];
-	const uint32_t x31 = in2[4];
-	const uint32_t x29 = in2[3];
-	const uint32_t x27 = in2[2];
-	const uint32_t x25 = in2[1];
-	const uint32_t x23 = in2[0];
-	out[0] = ((0x7ffffda + x5) - x23);
-	out[1] = ((0x3fffffe + x7) - x25);
-	out[2] = ((0x7fffffe + x9) - x27);
-	out[3] = ((0x3fffffe + x11) - x29);
-	out[4] = ((0x7fffffe + x13) - x31);
-	out[5] = ((0x3fffffe + x15) - x33);
-	out[6] = ((0x7fffffe + x17) - x35);
-	out[7] = ((0x3fffffe + x19) - x37);
-	out[8] = ((0x7fffffe + x21) - x39);
-	out[9] = ((0x3fffffe + x20) - x38);
-}
-
-/* h = f - g
- * Can overlap h with f or g.
- */
-static inline void fe_sub(fe_loose *h, const fe *f, const fe *g)
-{
-	fe_sub_impl(h->v, f->v, g->v);
-}
-
-static void fe_mul_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10])
-{
-	const uint32_t x20 = in1[9];
-	const uint32_t x21 = in1[8];
-	const uint32_t x19 = in1[7];
-	const uint32_t x17 = in1[6];
-	const uint32_t x15 = in1[5];
-	const uint32_t x13 = in1[4];
-	const uint32_t x11 = in1[3];
-	const uint32_t x9 = in1[2];
-	const uint32_t x7 = in1[1];
-	const uint32_t x5 = in1[0];
-	const uint32_t x38 = in2[9];
-	const uint32_t x39 = in2[8];
-	const uint32_t x37 = in2[7];
-	const uint32_t x35 = in2[6];
-	const uint32_t x33 = in2[5];
-	const uint32_t x31 = in2[4];
-	const uint32_t x29 = in2[3];
-	const uint32_t x27 = in2[2];
-	const uint32_t x25 = in2[1];
-	const uint32_t x23 = in2[0];
-	uint64_t x40 = ((uint64_t)x23 * x5);
-	uint64_t x41 = (((uint64_t)x23 * x7) + ((uint64_t)x25 * x5));
-	uint64_t x42 = ((((uint64_t)(0x2 * x25) * x7) + ((uint64_t)x23 * x9)) + ((uint64_t)x27 * x5));
-	uint64_t x43 = (((((uint64_t)x25 * x9) + ((uint64_t)x27 * x7)) + ((uint64_t)x23 * x11)) + ((uint64_t)x29 * x5));
-	uint64_t x44 = (((((uint64_t)x27 * x9) + (0x2 * (((uint64_t)x25 * x11) + ((uint64_t)x29 * x7)))) + ((uint64_t)x23 * x13)) + ((uint64_t)x31 * x5));
-	uint64_t x45 = (((((((uint64_t)x27 * x11) + ((uint64_t)x29 * x9)) + ((uint64_t)x25 * x13)) + ((uint64_t)x31 * x7)) + ((uint64_t)x23 * x15)) + ((uint64_t)x33 * x5));
-	uint64_t x46 = (((((0x2 * ((((uint64_t)x29 * x11) + ((uint64_t)x25 * x15)) + ((uint64_t)x33 * x7))) + ((uint64_t)x27 * x13)) + ((uint64_t)x31 * x9)) + ((uint64_t)x23 * x17)) + ((uint64_t)x35 * x5));
-	uint64_t x47 = (((((((((uint64_t)x29 * x13) + ((uint64_t)x31 * x11)) + ((uint64_t)x27 * x15)) + ((uint64_t)x33 * x9)) + ((uint64_t)x25 * x17)) + ((uint64_t)x35 * x7)) + ((uint64_t)x23 * x19)) + ((uint64_t)x37 * x5));
-	uint64_t x48 = (((((((uint64_t)x31 * x13) + (0x2 * (((((uint64_t)x29 * x15) + ((uint64_t)x33 * x11)) + ((uint64_t)x25 * x19)) + ((uint64_t)x37 * x7)))) + ((uint64_t)x27 * x17)) + ((uint64_t)x35 * x9)) + ((uint64_t)x23 * x21)) + ((uint64_t)x39 * x5));
-	uint64_t x49 = (((((((((((uint64_t)x31 * x15) + ((uint64_t)x33 * x13)) + ((uint64_t)x29 * x17)) + ((uint64_t)x35 * x11)) + ((uint64_t)x27 * x19)) + ((uint64_t)x37 * x9)) + ((uint64_t)x25 * x21)) + ((uint64_t)x39 * x7)) + ((uint64_t)x23 * x20)) + ((uint64_t)x38 * x5));
-	uint64_t x50 = (((((0x2 * ((((((uint64_t)x33 * x15) + ((uint64_t)x29 * x19)) + ((uint64_t)x37 * x11)) + ((uint64_t)x25 * x20)) + ((uint64_t)x38 * x7))) + ((uint64_t)x31 * x17)) + ((uint64_t)x35 * x13)) + ((uint64_t)x27 * x21)) + ((uint64_t)x39 * x9));
-	uint64_t x51 = (((((((((uint64_t)x33 * x17) + ((uint64_t)x35 * x15)) + ((uint64_t)x31 * x19)) + ((uint64_t)x37 * x13)) + ((uint64_t)x29 * x21)) + ((uint64_t)x39 * x11)) + ((uint64_t)x27 * x20)) + ((uint64_t)x38 * x9));
-	uint64_t x52 = (((((uint64_t)x35 * x17) + (0x2 * (((((uint64_t)x33 * x19) + ((uint64_t)x37 * x15)) + ((uint64_t)x29 * x20)) + ((uint64_t)x38 * x11)))) + ((uint64_t)x31 * x21)) + ((uint64_t)x39 * x13));
-	uint64_t x53 = (((((((uint64_t)x35 * x19) + ((uint64_t)x37 * x17)) + ((uint64_t)x33 * x21)) + ((uint64_t)x39 * x15)) + ((uint64_t)x31 * x20)) + ((uint64_t)x38 * x13));
-	uint64_t x54 = (((0x2 * ((((uint64_t)x37 * x19) + ((uint64_t)x33 * x20)) + ((uint64_t)x38 * x15))) + ((uint64_t)x35 * x21)) + ((uint64_t)x39 * x17));
-	uint64_t x55 = (((((uint64_t)x37 * x21) + ((uint64_t)x39 * x19)) + ((uint64_t)x35 * x20)) + ((uint64_t)x38 * x17));
-	uint64_t x56 = (((uint64_t)x39 * x21) + (0x2 * (((uint64_t)x37 * x20) + ((uint64_t)x38 * x19))));
-	uint64_t x57 = (((uint64_t)x39 * x20) + ((uint64_t)x38 * x21));
-	uint64_t x58 = ((uint64_t)(0x2 * x38) * x20);
-	uint64_t x59 = (x48 + (x58 << 0x4));
-	uint64_t x60 = (x59 + (x58 << 0x1));
-	uint64_t x61 = (x60 + x58);
-	uint64_t x62 = (x47 + (x57 << 0x4));
-	uint64_t x63 = (x62 + (x57 << 0x1));
-	uint64_t x64 = (x63 + x57);
-	uint64_t x65 = (x46 + (x56 << 0x4));
-	uint64_t x66 = (x65 + (x56 << 0x1));
-	uint64_t x67 = (x66 + x56);
-	uint64_t x68 = (x45 + (x55 << 0x4));
-	uint64_t x69 = (x68 + (x55 << 0x1));
-	uint64_t x70 = (x69 + x55);
-	uint64_t x71 = (x44 + (x54 << 0x4));
-	uint64_t x72 = (x71 + (x54 << 0x1));
-	uint64_t x73 = (x72 + x54);
-	uint64_t x74 = (x43 + (x53 << 0x4));
-	uint64_t x75 = (x74 + (x53 << 0x1));
-	uint64_t x76 = (x75 + x53);
-	uint64_t x77 = (x42 + (x52 << 0x4));
-	uint64_t x78 = (x77 + (x52 << 0x1));
-	uint64_t x79 = (x78 + x52);
-	uint64_t x80 = (x41 + (x51 << 0x4));
-	uint64_t x81 = (x80 + (x51 << 0x1));
-	uint64_t x82 = (x81 + x51);
-	uint64_t x83 = (x40 + (x50 << 0x4));
-	uint64_t x84 = (x83 + (x50 << 0x1));
-	uint64_t x85 = (x84 + x50);
-	uint64_t x86 = (x85 >> 0x1a);
-	uint32_t x87 = ((uint32_t)x85 & 0x3ffffff);
-	uint64_t x88 = (x86 + x82);
-	uint64_t x89 = (x88 >> 0x19);
-	uint32_t x90 = ((uint32_t)x88 & 0x1ffffff);
-	uint64_t x91 = (x89 + x79);
-	uint64_t x92 = (x91 >> 0x1a);
-	uint32_t x93 = ((uint32_t)x91 & 0x3ffffff);
-	uint64_t x94 = (x92 + x76);
-	uint64_t x95 = (x94 >> 0x19);
-	uint32_t x96 = ((uint32_t)x94 & 0x1ffffff);
-	uint64_t x97 = (x95 + x73);
-	uint64_t x98 = (x97 >> 0x1a);
-	uint32_t x99 = ((uint32_t)x97 & 0x3ffffff);
-	uint64_t x100 = (x98 + x70);
-	uint64_t x101 = (x100 >> 0x19);
-	uint32_t x102 = ((uint32_t)x100 & 0x1ffffff);
-	uint64_t x103 = (x101 + x67);
-	uint64_t x104 = (x103 >> 0x1a);
-	uint32_t x105 = ((uint32_t)x103 & 0x3ffffff);
-	uint64_t x106 = (x104 + x64);
-	uint64_t x107 = (x106 >> 0x19);
-	uint32_t x108 = ((uint32_t)x106 & 0x1ffffff);
-	uint64_t x109 = (x107 + x61);
-	uint64_t x110 = (x109 >> 0x1a);
-	uint32_t x111 = ((uint32_t)x109 & 0x3ffffff);
-	uint64_t x112 = (x110 + x49);
-	uint64_t x113 = (x112 >> 0x19);
-	uint32_t x114 = ((uint32_t)x112 & 0x1ffffff);
-	uint64_t x115 = (x87 + (0x13 * x113));
-	uint32_t x116 = (uint32_t) (x115 >> 0x1a);
-	uint32_t x117 = ((uint32_t)x115 & 0x3ffffff);
-	uint32_t x118 = (x116 + x90);
-	uint32_t x119 = (x118 >> 0x19);
-	uint32_t x120 = (x118 & 0x1ffffff);
-	out[0] = x117;
-	out[1] = x120;
-	out[2] = (x119 + x93);
-	out[3] = x96;
-	out[4] = x99;
-	out[5] = x102;
-	out[6] = x105;
-	out[7] = x108;
-	out[8] = x111;
-	out[9] = x114;
-}
-
-static inline void fe_mul_ttt(fe *h, const fe *f, const fe *g)
-{
-	fe_mul_impl(h->v, f->v, g->v);
-}
-
-static inline void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g)
-{
-	fe_mul_impl(h->v, f->v, g->v);
-}
-
-static inline void
-fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g)
-{
-	fe_mul_impl(h->v, f->v, g->v);
-}
-
-static void fe_sqr_impl(uint32_t out[10], const uint32_t in1[10])
-{
-	const uint32_t x17 = in1[9];
-	const uint32_t x18 = in1[8];
-	const uint32_t x16 = in1[7];
-	const uint32_t x14 = in1[6];
-	const uint32_t x12 = in1[5];
-	const uint32_t x10 = in1[4];
-	const uint32_t x8 = in1[3];
-	const uint32_t x6 = in1[2];
-	const uint32_t x4 = in1[1];
-	const uint32_t x2 = in1[0];
-	uint64_t x19 = ((uint64_t)x2 * x2);
-	uint64_t x20 = ((uint64_t)(0x2 * x2) * x4);
-	uint64_t x21 = (0x2 * (((uint64_t)x4 * x4) + ((uint64_t)x2 * x6)));
-	uint64_t x22 = (0x2 * (((uint64_t)x4 * x6) + ((uint64_t)x2 * x8)));
-	uint64_t x23 = ((((uint64_t)x6 * x6) + ((uint64_t)(0x4 * x4) * x8)) + ((uint64_t)(0x2 * x2) * x10));
-	uint64_t x24 = (0x2 * ((((uint64_t)x6 * x8) + ((uint64_t)x4 * x10)) + ((uint64_t)x2 * x12)));
-	uint64_t x25 = (0x2 * (((((uint64_t)x8 * x8) + ((uint64_t)x6 * x10)) + ((uint64_t)x2 * x14)) + ((uint64_t)(0x2 * x4) * x12)));
-	uint64_t x26 = (0x2 * (((((uint64_t)x8 * x10) + ((uint64_t)x6 * x12)) + ((uint64_t)x4 * x14)) + ((uint64_t)x2 * x16)));
-	uint64_t x27 = (((uint64_t)x10 * x10) + (0x2 * ((((uint64_t)x6 * x14) + ((uint64_t)x2 * x18)) + (0x2 * (((uint64_t)x4 * x16) + ((uint64_t)x8 * x12))))));
-	uint64_t x28 = (0x2 * ((((((uint64_t)x10 * x12) + ((uint64_t)x8 * x14)) + ((uint64_t)x6 * x16)) + ((uint64_t)x4 * x18)) + ((uint64_t)x2 * x17)));
-	uint64_t x29 = (0x2 * (((((uint64_t)x12 * x12) + ((uint64_t)x10 * x14)) + ((uint64_t)x6 * x18)) + (0x2 * (((uint64_t)x8 * x16) + ((uint64_t)x4 * x17)))));
-	uint64_t x30 = (0x2 * (((((uint64_t)x12 * x14) + ((uint64_t)x10 * x16)) + ((uint64_t)x8 * x18)) + ((uint64_t)x6 * x17)));
-	uint64_t x31 = (((uint64_t)x14 * x14) + (0x2 * (((uint64_t)x10 * x18) + (0x2 * (((uint64_t)x12 * x16) + ((uint64_t)x8 * x17))))));
-	uint64_t x32 = (0x2 * ((((uint64_t)x14 * x16) + ((uint64_t)x12 * x18)) + ((uint64_t)x10 * x17)));
-	uint64_t x33 = (0x2 * ((((uint64_t)x16 * x16) + ((uint64_t)x14 * x18)) + ((uint64_t)(0x2 * x12) * x17)));
-	uint64_t x34 = (0x2 * (((uint64_t)x16 * x18) + ((uint64_t)x14 * x17)));
-	uint64_t x35 = (((uint64_t)x18 * x18) + ((uint64_t)(0x4 * x16) * x17));
-	uint64_t x36 = ((uint64_t)(0x2 * x18) * x17);
-	uint64_t x37 = ((uint64_t)(0x2 * x17) * x17);
-	uint64_t x38 = (x27 + (x37 << 0x4));
-	uint64_t x39 = (x38 + (x37 << 0x1));
-	uint64_t x40 = (x39 + x37);
-	uint64_t x41 = (x26 + (x36 << 0x4));
-	uint64_t x42 = (x41 + (x36 << 0x1));
-	uint64_t x43 = (x42 + x36);
-	uint64_t x44 = (x25 + (x35 << 0x4));
-	uint64_t x45 = (x44 + (x35 << 0x1));
-	uint64_t x46 = (x45 + x35);
-	uint64_t x47 = (x24 + (x34 << 0x4));
-	uint64_t x48 = (x47 + (x34 << 0x1));
-	uint64_t x49 = (x48 + x34);
-	uint64_t x50 = (x23 + (x33 << 0x4));
-	uint64_t x51 = (x50 + (x33 << 0x1));
-	uint64_t x52 = (x51 + x33);
-	uint64_t x53 = (x22 + (x32 << 0x4));
-	uint64_t x54 = (x53 + (x32 << 0x1));
-	uint64_t x55 = (x54 + x32);
-	uint64_t x56 = (x21 + (x31 << 0x4));
-	uint64_t x57 = (x56 + (x31 << 0x1));
-	uint64_t x58 = (x57 + x31);
-	uint64_t x59 = (x20 + (x30 << 0x4));
-	uint64_t x60 = (x59 + (x30 << 0x1));
-	uint64_t x61 = (x60 + x30);
-	uint64_t x62 = (x19 + (x29 << 0x4));
-	uint64_t x63 = (x62 + (x29 << 0x1));
-	uint64_t x64 = (x63 + x29);
-	uint64_t x65 = (x64 >> 0x1a);
-	uint32_t x66 = ((uint32_t)x64 & 0x3ffffff);
-	uint64_t x67 = (x65 + x61);
-	uint64_t x68 = (x67 >> 0x19);
-	uint32_t x69 = ((uint32_t)x67 & 0x1ffffff);
-	uint64_t x70 = (x68 + x58);
-	uint64_t x71 = (x70 >> 0x1a);
-	uint32_t x72 = ((uint32_t)x70 & 0x3ffffff);
-	uint64_t x73 = (x71 + x55);
-	uint64_t x74 = (x73 >> 0x19);
-	uint32_t x75 = ((uint32_t)x73 & 0x1ffffff);
-	uint64_t x76 = (x74 + x52);
-	uint64_t x77 = (x76 >> 0x1a);
-	uint32_t x78 = ((uint32_t)x76 & 0x3ffffff);
-	uint64_t x79 = (x77 + x49);
-	uint64_t x80 = (x79 >> 0x19);
-	uint32_t x81 = ((uint32_t)x79 & 0x1ffffff);
-	uint64_t x82 = (x80 + x46);
-	uint64_t x83 = (x82 >> 0x1a);
-	uint32_t x84 = ((uint32_t)x82 & 0x3ffffff);
-	uint64_t x85 = (x83 + x43);
-	uint64_t x86 = (x85 >> 0x19);
-	uint32_t x87 = ((uint32_t)x85 & 0x1ffffff);
-	uint64_t x88 = (x86 + x40);
-	uint64_t x89 = (x88 >> 0x1a);
-	uint32_t x90 = ((uint32_t)x88 & 0x3ffffff);
-	uint64_t x91 = (x89 + x28);
-	uint64_t x92 = (x91 >> 0x19);
-	uint32_t x93 = ((uint32_t)x91 & 0x1ffffff);
-	uint64_t x94 = (x66 + (0x13 * x92));
-	uint32_t x95 = (uint32_t) (x94 >> 0x1a);
-	uint32_t x96 = ((uint32_t)x94 & 0x3ffffff);
-	uint32_t x97 = (x95 + x69);
-	uint32_t x98 = (x97 >> 0x19);
-	uint32_t x99 = (x97 & 0x1ffffff);
-	out[0] = x96;
-	out[1] = x99;
-	out[2] = (x98 + x72);
-	out[3] = x75;
-	out[4] = x78;
-	out[5] = x81;
-	out[6] = x84;
-	out[7] = x87;
-	out[8] = x90;
-	out[9] = x93;
-}
-
-static inline void fe_sq_tl(fe *h, const fe_loose *f)
-{
-	fe_sqr_impl(h->v, f->v);
-}
-
-static inline void fe_sq_tt(fe *h, const fe *f)
-{
-	fe_sqr_impl(h->v, f->v);
-}
-
-static inline void fe_loose_invert(fe *out, const fe_loose *z)
-{
-	fe t0;
-	fe t1;
-	fe t2;
-	fe t3;
-	int i;
-
-	fe_sq_tl(&t0, z);
-	fe_sq_tt(&t1, &t0);
-	for (i = 1; i < 2; ++i)
-		fe_sq_tt(&t1, &t1);
-	fe_mul_tlt(&t1, z, &t1);
-	fe_mul_ttt(&t0, &t0, &t1);
-	fe_sq_tt(&t2, &t0);
-	fe_mul_ttt(&t1, &t1, &t2);
-	fe_sq_tt(&t2, &t1);
-	for (i = 1; i < 5; ++i)
-		fe_sq_tt(&t2, &t2);
-	fe_mul_ttt(&t1, &t2, &t1);
-	fe_sq_tt(&t2, &t1);
-	for (i = 1; i < 10; ++i)
-		fe_sq_tt(&t2, &t2);
-	fe_mul_ttt(&t2, &t2, &t1);
-	fe_sq_tt(&t3, &t2);
-	for (i = 1; i < 20; ++i)
-		fe_sq_tt(&t3, &t3);
-	fe_mul_ttt(&t2, &t3, &t2);
-	fe_sq_tt(&t2, &t2);
-	for (i = 1; i < 10; ++i)
-		fe_sq_tt(&t2, &t2);
-	fe_mul_ttt(&t1, &t2, &t1);
-	fe_sq_tt(&t2, &t1);
-	for (i = 1; i < 50; ++i)
-		fe_sq_tt(&t2, &t2);
-	fe_mul_ttt(&t2, &t2, &t1);
-	fe_sq_tt(&t3, &t2);
-	for (i = 1; i < 100; ++i)
-		fe_sq_tt(&t3, &t3);
-	fe_mul_ttt(&t2, &t3, &t2);
-	fe_sq_tt(&t2, &t2);
-	for (i = 1; i < 50; ++i)
-		fe_sq_tt(&t2, &t2);
-	fe_mul_ttt(&t1, &t2, &t1);
-	fe_sq_tt(&t1, &t1);
-	for (i = 1; i < 5; ++i)
-		fe_sq_tt(&t1, &t1);
-	fe_mul_ttt(out, &t1, &t0);
-}
-
-static inline void fe_invert(fe *out, const fe *z)
-{
-	fe_loose l;
-	fe_copy_lt(&l, z);
-	fe_loose_invert(out, &l);
-}
-
-/* Replace (f,g) with (g,f) if b == 1;
- * replace (f,g) with (f,g) if b == 0.
- *
- * Preconditions: b in {0,1}
- */
-static inline void fe_cswap(fe *f, fe *g, unsigned int b)
-{
-	unsigned i;
-	b = 0 - b;
-	for (i = 0; i < 10; i++) {
-		uint32_t x = f->v[i] ^ g->v[i];
-		x &= b;
-		f->v[i] ^= x;
-		g->v[i] ^= x;
-	}
-}
-
-/* NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.*/
-static inline void fe_mul_121666_impl(uint32_t out[10], const uint32_t in1[10])
-{
-	const uint32_t x20 = in1[9];
-	const uint32_t x21 = in1[8];
-	const uint32_t x19 = in1[7];
-	const uint32_t x17 = in1[6];
-	const uint32_t x15 = in1[5];
-	const uint32_t x13 = in1[4];
-	const uint32_t x11 = in1[3];
-	const uint32_t x9 = in1[2];
-	const uint32_t x7 = in1[1];
-	const uint32_t x5 = in1[0];
-	const uint32_t x38 = 0;
-	const uint32_t x39 = 0;
-	const uint32_t x37 = 0;
-	const uint32_t x35 = 0;
-	const uint32_t x33 = 0;
-	const uint32_t x31 = 0;
-	const uint32_t x29 = 0;
-	const uint32_t x27 = 0;
-	const uint32_t x25 = 0;
-	const uint32_t x23 = 121666;
-	uint64_t x40 = ((uint64_t)x23 * x5);
-	uint64_t x41 = (((uint64_t)x23 * x7) + ((uint64_t)x25 * x5));
-	uint64_t x42 = ((((uint64_t)(0x2 * x25) * x7) + ((uint64_t)x23 * x9)) + ((uint64_t)x27 * x5));
-	uint64_t x43 = (((((uint64_t)x25 * x9) + ((uint64_t)x27 * x7)) + ((uint64_t)x23 * x11)) + ((uint64_t)x29 * x5));
-	uint64_t x44 = (((((uint64_t)x27 * x9) + (0x2 * (((uint64_t)x25 * x11) + ((uint64_t)x29 * x7)))) + ((uint64_t)x23 * x13)) + ((uint64_t)x31 * x5));
-	uint64_t x45 = (((((((uint64_t)x27 * x11) + ((uint64_t)x29 * x9)) + ((uint64_t)x25 * x13)) + ((uint64_t)x31 * x7)) + ((uint64_t)x23 * x15)) + ((uint64_t)x33 * x5));
-	uint64_t x46 = (((((0x2 * ((((uint64_t)x29 * x11) + ((uint64_t)x25 * x15)) + ((uint64_t)x33 * x7))) + ((uint64_t)x27 * x13)) + ((uint64_t)x31 * x9)) + ((uint64_t)x23 * x17)) + ((uint64_t)x35 * x5));
-	uint64_t x47 = (((((((((uint64_t)x29 * x13) + ((uint64_t)x31 * x11)) + ((uint64_t)x27 * x15)) + ((uint64_t)x33 * x9)) + ((uint64_t)x25 * x17)) + ((uint64_t)x35 * x7)) + ((uint64_t)x23 * x19)) + ((uint64_t)x37 * x5));
-	uint64_t x48 = (((((((uint64_t)x31 * x13) + (0x2 * (((((uint64_t)x29 * x15) + ((uint64_t)x33 * x11)) + ((uint64_t)x25 * x19)) + ((uint64_t)x37 * x7)))) + ((uint64_t)x27 * x17)) + ((uint64_t)x35 * x9)) + ((uint64_t)x23 * x21)) + ((uint64_t)x39 * x5));
-	uint64_t x49 = (((((((((((uint64_t)x31 * x15) + ((uint64_t)x33 * x13)) + ((uint64_t)x29 * x17)) + ((uint64_t)x35 * x11)) + ((uint64_t)x27 * x19)) + ((uint64_t)x37 * x9)) + ((uint64_t)x25 * x21)) + ((uint64_t)x39 * x7)) + ((uint64_t)x23 * x20)) + ((uint64_t)x38 * x5));
-	uint64_t x50 = (((((0x2 * ((((((uint64_t)x33 * x15) + ((uint64_t)x29 * x19)) + ((uint64_t)x37 * x11)) + ((uint64_t)x25 * x20)) + ((uint64_t)x38 * x7))) + ((uint64_t)x31 * x17)) + ((uint64_t)x35 * x13)) + ((uint64_t)x27 * x21)) + ((uint64_t)x39 * x9));
-	uint64_t x51 = (((((((((uint64_t)x33 * x17) + ((uint64_t)x35 * x15)) + ((uint64_t)x31 * x19)) + ((uint64_t)x37 * x13)) + ((uint64_t)x29 * x21)) + ((uint64_t)x39 * x11)) + ((uint64_t)x27 * x20)) + ((uint64_t)x38 * x9));
-	uint64_t x52 = (((((uint64_t)x35 * x17) + (0x2 * (((((uint64_t)x33 * x19) + ((uint64_t)x37 * x15)) + ((uint64_t)x29 * x20)) + ((uint64_t)x38 * x11)))) + ((uint64_t)x31 * x21)) + ((uint64_t)x39 * x13));
-	uint64_t x53 = (((((((uint64_t)x35 * x19) + ((uint64_t)x37 * x17)) + ((uint64_t)x33 * x21)) + ((uint64_t)x39 * x15)) + ((uint64_t)x31 * x20)) + ((uint64_t)x38 * x13));
-	uint64_t x54 = (((0x2 * ((((uint64_t)x37 * x19) + ((uint64_t)x33 * x20)) + ((uint64_t)x38 * x15))) + ((uint64_t)x35 * x21)) + ((uint64_t)x39 * x17));
-	uint64_t x55 = (((((uint64_t)x37 * x21) + ((uint64_t)x39 * x19)) + ((uint64_t)x35 * x20)) + ((uint64_t)x38 * x17));
-	uint64_t x56 = (((uint64_t)x39 * x21) + (0x2 * (((uint64_t)x37 * x20) + ((uint64_t)x38 * x19))));
-	uint64_t x57 = (((uint64_t)x39 * x20) + ((uint64_t)x38 * x21));
-	uint64_t x58 = ((uint64_t)(0x2 * x38) * x20);
-	uint64_t x59 = (x48 + (x58 << 0x4));
-	uint64_t x60 = (x59 + (x58 << 0x1));
-	uint64_t x61 = (x60 + x58);
-	uint64_t x62 = (x47 + (x57 << 0x4));
-	uint64_t x63 = (x62 + (x57 << 0x1));
-	uint64_t x64 = (x63 + x57);
-	uint64_t x65 = (x46 + (x56 << 0x4));
-	uint64_t x66 = (x65 + (x56 << 0x1));
-	uint64_t x67 = (x66 + x56);
-	uint64_t x68 = (x45 + (x55 << 0x4));
-	uint64_t x69 = (x68 + (x55 << 0x1));
-	uint64_t x70 = (x69 + x55);
-	uint64_t x71 = (x44 + (x54 << 0x4));
-	uint64_t x72 = (x71 + (x54 << 0x1));
-	uint64_t x73 = (x72 + x54);
-	uint64_t x74 = (x43 + (x53 << 0x4));
-	uint64_t x75 = (x74 + (x53 << 0x1));
-	uint64_t x76 = (x75 + x53);
-	uint64_t x77 = (x42 + (x52 << 0x4));
-	uint64_t x78 = (x77 + (x52 << 0x1));
-	uint64_t x79 = (x78 + x52);
-	uint64_t x80 = (x41 + (x51 << 0x4));
-	uint64_t x81 = (x80 + (x51 << 0x1));
-	uint64_t x82 = (x81 + x51);
-	uint64_t x83 = (x40 + (x50 << 0x4));
-	uint64_t x84 = (x83 + (x50 << 0x1));
-	uint64_t x85 = (x84 + x50);
-	uint64_t x86 = (x85 >> 0x1a);
-	uint32_t x87 = ((uint32_t)x85 & 0x3ffffff);
-	uint64_t x88 = (x86 + x82);
-	uint64_t x89 = (x88 >> 0x19);
-	uint32_t x90 = ((uint32_t)x88 & 0x1ffffff);
-	uint64_t x91 = (x89 + x79);
-	uint64_t x92 = (x91 >> 0x1a);
-	uint32_t x93 = ((uint32_t)x91 & 0x3ffffff);
-	uint64_t x94 = (x92 + x76);
-	uint64_t x95 = (x94 >> 0x19);
-	uint32_t x96 = ((uint32_t)x94 & 0x1ffffff);
-	uint64_t x97 = (x95 + x73);
-	uint64_t x98 = (x97 >> 0x1a);
-	uint32_t x99 = ((uint32_t)x97 & 0x3ffffff);
-	uint64_t x100 = (x98 + x70);
-	uint64_t x101 = (x100 >> 0x19);
-	uint32_t x102 = ((uint32_t)x100 & 0x1ffffff);
-	uint64_t x103 = (x101 + x67);
-	uint64_t x104 = (x103 >> 0x1a);
-	uint32_t x105 = ((uint32_t)x103 & 0x3ffffff);
-	uint64_t x106 = (x104 + x64);
-	uint64_t x107 = (x106 >> 0x19);
-	uint32_t x108 = ((uint32_t)x106 & 0x1ffffff);
-	uint64_t x109 = (x107 + x61);
-	uint64_t x110 = (x109 >> 0x1a);
-	uint32_t x111 = ((uint32_t)x109 & 0x3ffffff);
-	uint64_t x112 = (x110 + x49);
-	uint64_t x113 = (x112 >> 0x19);
-	uint32_t x114 = ((uint32_t)x112 & 0x1ffffff);
-	uint64_t x115 = (x87 + (0x13 * x113));
-	uint32_t x116 = (uint32_t) (x115 >> 0x1a);
-	uint32_t x117 = ((uint32_t)x115 & 0x3ffffff);
-	uint32_t x118 = (x116 + x90);
-	uint32_t x119 = (x118 >> 0x19);
-	uint32_t x120 = (x118 & 0x1ffffff);
-	out[0] = x117;
-	out[1] = x120;
-	out[2] = (x119 + x93);
-	out[3] = x96;
-	out[4] = x99;
-	out[5] = x102;
-	out[6] = x105;
-	out[7] = x108;
-	out[8] = x111;
-	out[9] = x114;
-}
-
-static inline void fe_mul121666(fe *h, const fe_loose *f)
-{
-	fe_mul_121666_impl(h->v, f->v);
-}
-
-static const uint8_t curve25519_null_point[CURVE25519_KEY_SIZE];
-
-bool curve25519(uint8_t out[CURVE25519_KEY_SIZE],
-		const uint8_t scalar[CURVE25519_KEY_SIZE],
-		const uint8_t point[CURVE25519_KEY_SIZE])
-{
-	fe x1, x2, z2, x3, z3;
-	fe_loose x2l, z2l, x3l;
-	unsigned swap = 0;
-	int pos;
-	uint8_t e[32];
-
-	memcpy(e, scalar, 32);
-	curve25519_clamp_secret(e);
-
-	/* The following implementation was transcribed to Coq and proven to
-	 * correspond to unary scalar multiplication in affine coordinates given
-	 * that x1 != 0 is the x coordinate of some point on the curve. It was
-	 * also checked in Coq that doing a ladderstep with x1 = x3 = 0 gives
-	 * z2' = z3' = 0, and z2 = z3 = 0 gives z2' = z3' = 0. The statement was
-	 * quantified over the underlying field, so it applies to Curve25519
-	 * itself and the quadratic twist of Curve25519. It was not proven in
-	 * Coq that prime-field arithmetic correctly simulates extension-field
-	 * arithmetic on prime-field values. The decoding of the byte array
-	 * representation of e was not considered.
-	 *
-	 * Specification of Montgomery curves in affine coordinates:
-	 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Spec/MontgomeryCurve.v#L27>
-	 *
-	 * Proof that these form a group that is isomorphic to a Weierstrass
-	 * curve:
-	 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/AffineProofs.v#L35>
-	 *
-	 * Coq transcription and correctness proof of the loop
-	 * (where scalarbits=255):
-	 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L118>
-	 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L278>
-	 * preconditions: 0 <= e < 2^255 (not necessarily e < order),
-	 * fe_invert(0) = 0
-	 */
-	fe_frombytes(&x1, point);
-	fe_1(&x2);
-	fe_0(&z2);
-	fe_copy(&x3, &x1);
-	fe_1(&z3);
-
-	for (pos = 254; pos >= 0; --pos) {
-		fe tmp0, tmp1;
-		fe_loose tmp0l, tmp1l;
-		/* loop invariant as of right before the test, for the case
-		 * where x1 != 0:
-		 *   pos >= -1; if z2 = 0 then x2 is nonzero; if z3 = 0 then x3
-		 *   is nonzero
-		 *   let r := e >> (pos+1) in the following equalities of
-		 *   projective points:
-		 *   to_xz (r*P)     === if swap then (x3, z3) else (x2, z2)
-		 *   to_xz ((r+1)*P) === if swap then (x2, z2) else (x3, z3)
-		 *   x1 is the nonzero x coordinate of the nonzero
-		 *   point (r*P-(r+1)*P)
-		 */
-		unsigned b = 1 & (e[pos / 8] >> (pos & 7));
-		swap ^= b;
-		fe_cswap(&x2, &x3, swap);
-		fe_cswap(&z2, &z3, swap);
-		swap = b;
-		/* Coq transcription of ladderstep formula (called from
-		 * transcribed loop):
-		 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L89>
-		 * <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L131>
-		 * x1 != 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L217>
-		 * x1  = 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L147>
-		 */
-		fe_sub(&tmp0l, &x3, &z3);
-		fe_sub(&tmp1l, &x2, &z2);
-		fe_add(&x2l, &x2, &z2);
-		fe_add(&z2l, &x3, &z3);
-		fe_mul_tll(&z3, &tmp0l, &x2l);
-		fe_mul_tll(&z2, &z2l, &tmp1l);
-		fe_sq_tl(&tmp0, &tmp1l);
-		fe_sq_tl(&tmp1, &x2l);
-		fe_add(&x3l, &z3, &z2);
-		fe_sub(&z2l, &z3, &z2);
-		fe_mul_ttt(&x2, &tmp1, &tmp0);
-		fe_sub(&tmp1l, &tmp1, &tmp0);
-		fe_sq_tl(&z2, &z2l);
-		fe_mul121666(&z3, &tmp1l);
-		fe_sq_tl(&x3, &x3l);
-		fe_add(&tmp0l, &tmp0, &z3);
-		fe_mul_ttt(&z3, &x1, &z2);
-		fe_mul_tll(&z2, &tmp1l, &tmp0l);
-	}
-	/* here pos=-1, so r=e, so to_xz (e*P) === if swap then (x3, z3)
-	 * else (x2, z2)
-	 */
-	fe_cswap(&x2, &x3, swap);
-	fe_cswap(&z2, &z3, swap);
-
-	fe_invert(&z2, &z2);
-	fe_mul_ttt(&x2, &x2, &z2);
-	fe_tobytes(out, &x2);
-
-	explicit_bzero(&x1, sizeof(x1));
-	explicit_bzero(&x2, sizeof(x2));
-	explicit_bzero(&z2, sizeof(z2));
-	explicit_bzero(&x3, sizeof(x3));
-	explicit_bzero(&z3, sizeof(z3));
-	explicit_bzero(&x2l, sizeof(x2l));
-	explicit_bzero(&z2l, sizeof(z2l));
-	explicit_bzero(&x3l, sizeof(x3l));
-	explicit_bzero(&e, sizeof(e));
-
-	return timingsafe_bcmp(out, curve25519_null_point, CURVE25519_KEY_SIZE) != 0;
-}
diff --git a/sys/dev/if_wg/crypto.h b/sys/dev/if_wg/crypto.h
deleted file mode 100644
index 6e045c2fe0bf..000000000000
--- a/sys/dev/if_wg/crypto.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (C) 2015-2021 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#ifndef _WG_CRYPTO
-#define _WG_CRYPTO
-
-#include <sys/types.h>
-
-enum chacha20poly1305_lengths {
-	XCHACHA20POLY1305_NONCE_SIZE = 24,
-	CHACHA20POLY1305_KEY_SIZE = 32,
-	CHACHA20POLY1305_AUTHTAG_SIZE = 16
-};
-
-void
-chacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src, const size_t src_len,
-			 const uint8_t *ad, const size_t ad_len,
-			 const uint64_t nonce,
-			 const uint8_t key[CHACHA20POLY1305_KEY_SIZE]);
-
-bool
-chacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src, const size_t src_len,
-			 const uint8_t *ad, const size_t ad_len,
-			 const uint64_t nonce,
-			 const uint8_t key[CHACHA20POLY1305_KEY_SIZE]);
-
-void
-xchacha20poly1305_encrypt(uint8_t *dst, const uint8_t *src,
-			  const size_t src_len, const uint8_t *ad,
-			  const size_t ad_len,
-			  const uint8_t nonce[XCHACHA20POLY1305_NONCE_SIZE],
-			  const uint8_t key[CHACHA20POLY1305_KEY_SIZE]);
-
-bool
-xchacha20poly1305_decrypt(uint8_t *dst, const uint8_t *src,
-			  const size_t src_len,  const uint8_t *ad,
-			  const size_t ad_len,
-			  const uint8_t nonce[XCHACHA20POLY1305_NONCE_SIZE],
-			  const uint8_t key[CHACHA20POLY1305_KEY_SIZE]);
-
-
-enum blake2s_lengths {
-	BLAKE2S_BLOCK_SIZE = 64,
-	BLAKE2S_HASH_SIZE = 32,
-	BLAKE2S_KEY_SIZE = 32
-};
-
-struct blake2s_state {
-	uint32_t h[8];
-	uint32_t t[2];
-	uint32_t f[2];
-	uint8_t buf[BLAKE2S_BLOCK_SIZE];
-	unsigned int buflen;
-	unsigned int outlen;
-};
-
-void blake2s_init(struct blake2s_state *state, const size_t outlen);
-
-void blake2s_init_key(struct blake2s_state *state, const size_t outlen,
-		      const uint8_t *key, const size_t keylen);
-
-void blake2s_update(struct blake2s_state *state, const uint8_t *in, size_t inlen);
-
-void blake2s_final(struct blake2s_state *state, uint8_t *out);
-
-void blake2s(uint8_t *out, const uint8_t *in, const uint8_t *key,
-	     const size_t outlen, const size_t inlen, const size_t keylen);
-
-void blake2s_hmac(uint8_t *out, const uint8_t *in, const uint8_t *key,
-		  const size_t outlen, const size_t inlen, const size_t keylen);
-
-enum curve25519_lengths {
-        CURVE25519_KEY_SIZE = 32
-};
-
-bool curve25519(uint8_t mypublic[static CURVE25519_KEY_SIZE],
-		const uint8_t secret[static CURVE25519_KEY_SIZE],
-		const uint8_t basepoint[static CURVE25519_KEY_SIZE]);
-
-static inline bool
-curve25519_generate_public(uint8_t pub[static CURVE25519_KEY_SIZE],
-			   const uint8_t secret[static CURVE25519_KEY_SIZE])
-{
-	static const uint8_t basepoint[CURVE25519_KEY_SIZE] = { 9 };
-
-	return curve25519(pub, secret, basepoint);
-}
-
-static inline void curve25519_clamp_secret(uint8_t secret[static CURVE25519_KEY_SIZE])
-{
-        secret[0] &= 248;
-        secret[31] = (secret[31] & 127) | 64;
-}
-
-static inline void curve25519_generate_secret(uint8_t secret[CURVE25519_KEY_SIZE])
-{
-	arc4random_buf(secret, CURVE25519_KEY_SIZE);
-	curve25519_clamp_secret(secret);
-}
-
-#endif
diff --git a/sys/dev/if_wg/if_wg.c b/sys/dev/if_wg/if_wg.c
deleted file mode 100644
index 8c11cc58a3bb..000000000000
--- a/sys/dev/if_wg/if_wg.c
+++ /dev/null
@@ -1,3462 +0,0 @@
-/*
- * Copyright (C) 2015-2021 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- * Copyright (C) 2019-2021 Matt Dunwoodie <ncon@noconroy.net>
- * Copyright (c) 2019-2020 Rubicon Communications, LLC (Netgate)
- * Copyright (c) 2021 Kyle Evans <kevans@FreeBSD.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-/* TODO audit imports */
-#include "opt_inet.h"
-#include "opt_inet6.h"
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/types.h>
-#include <sys/systm.h>
-#include <vm/uma.h>
-
-#include <sys/mbuf.h>
-#include <sys/socket.h>
-#include <sys/kernel.h>
-
-#include <sys/sockio.h>
-#include <sys/socketvar.h>
-#include <sys/errno.h>
-#include <sys/jail.h>
-#include <sys/priv.h>
-#include <sys/proc.h>
-#include <sys/lock.h>
-#include <sys/rwlock.h>
-#include <sys/rmlock.h>
-#include <sys/protosw.h>
-#include <sys/module.h>
-#include <sys/endian.h>
-#include <sys/kdb.h>
-#include <sys/sx.h>
-#include <sys/sysctl.h>
-#include <sys/gtaskqueue.h>
-#include <sys/smp.h>
-#include <sys/nv.h>
-
-#include <net/bpf.h>
-
-#include <sys/syslog.h>
-
-#include <net/if.h>
-#include <net/if_var.h>
-#include <net/if_clone.h>
-#include <net/if_types.h>
-#include <net/ethernet.h>
-#include <net/radix.h>
-
-#include <netinet/in.h>
-#include <netinet/in_var.h>
-#include <netinet/ip.h>
-#include <netinet/ip_var.h>
-#include <netinet/ip6.h>
-#include <netinet6/ip6_var.h>
-#include <netinet6/scope6_var.h>
-#include <netinet/udp.h>
-#include <netinet/ip_icmp.h>
-#include <netinet/icmp6.h>
-#include <netinet/in_pcb.h>
-#include <netinet6/in6_pcb.h>
-#include <netinet/udp_var.h>
-
-#include <machine/in_cksum.h>
-
-#include "support.h"
-#include "wg_noise.h"
-#include "wg_cookie.h"
-#include "if_wg.h"
-
-/* It'd be nice to use IF_MAXMTU, but that means more complicated mbuf allocations,
- * so instead just do the biggest mbuf we can easily allocate minus the usual maximum
- * IPv6 overhead of 80 bytes. If somebody wants bigger frames, we can revisit this. */
-#define	MAX_MTU			(MJUM16BYTES - 80)
-
-#define	DEFAULT_MTU		1420
-
-#define MAX_STAGED_PKT		128
-#define MAX_QUEUED_PKT		1024
-#define MAX_QUEUED_PKT_MASK	(MAX_QUEUED_PKT - 1)
-
-#define MAX_QUEUED_HANDSHAKES	4096
-
-#define HASHTABLE_PEER_SIZE	(1 << 11)
-#define HASHTABLE_INDEX_SIZE	(1 << 13)
-#define MAX_PEERS_PER_IFACE	(1 << 20)
-
-#define REKEY_TIMEOUT		5
-#define REKEY_TIMEOUT_JITTER	334 /* 1/3 sec, round for arc4random_uniform */
-#define KEEPALIVE_TIMEOUT	10
-#define MAX_TIMER_HANDSHAKES	(90 / REKEY_TIMEOUT)
-#define NEW_HANDSHAKE_TIMEOUT	(REKEY_TIMEOUT + KEEPALIVE_TIMEOUT)
-#define UNDERLOAD_TIMEOUT	1
-
-#define DPRINTF(sc,  ...) if (wireguard_debug) if_printf(sc->sc_ifp, ##__VA_ARGS__)
-
-/* First byte indicating packet type on the wire */
-#define WG_PKT_INITIATION htole32(1)
-#define WG_PKT_RESPONSE htole32(2)
-#define WG_PKT_COOKIE htole32(3)
-#define WG_PKT_DATA htole32(4)
-
-#define WG_PKT_WITH_PADDING(n)	(((n) + (16-1)) & (~(16-1)))
-#define WG_KEY_SIZE		32
-
-struct wg_pkt_initiation {
-	uint32_t		t;
-	uint32_t		s_idx;
-	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
-	uint8_t			es[NOISE_PUBLIC_KEY_LEN + NOISE_AUTHTAG_LEN];
-	uint8_t			ets[NOISE_TIMESTAMP_LEN + NOISE_AUTHTAG_LEN];
-	struct cookie_macs	m;
-};
-
-struct wg_pkt_response {
-	uint32_t		t;
-	uint32_t		s_idx;
-	uint32_t		r_idx;
-	uint8_t			ue[NOISE_PUBLIC_KEY_LEN];
-	uint8_t			en[0 + NOISE_AUTHTAG_LEN];
-	struct cookie_macs	m;
-};
-
-struct wg_pkt_cookie {
-	uint32_t		t;
-	uint32_t		r_idx;
-	uint8_t			nonce[COOKIE_NONCE_SIZE];
-	uint8_t			ec[COOKIE_ENCRYPTED_SIZE];
-};
-
-struct wg_pkt_data {
-	uint32_t		t;
-	uint32_t		r_idx;
-	uint8_t			nonce[sizeof(uint64_t)];
-	uint8_t			buf[];
-};
-
-struct wg_endpoint {
-	union {
-		struct sockaddr		r_sa;
-		struct sockaddr_in	r_sin;
-#ifdef INET6
-		struct sockaddr_in6	r_sin6;
-#endif
-	} e_remote;
-	union {
-		struct in_addr		l_in;
-#ifdef INET6
-		struct in6_pktinfo	l_pktinfo6;
-#define l_in6 l_pktinfo6.ipi6_addr
-#endif
-	} e_local;
-};
-
-struct wg_tag {
-	struct m_tag		 t_tag;
-	struct wg_endpoint	 t_endpoint;
-	struct wg_peer		*t_peer;
-	struct mbuf		*t_mbuf;
-	int			 t_done;
-	int			 t_mtu;
-};
-
-struct wg_index {
-	LIST_ENTRY(wg_index)	 i_entry;
-	SLIST_ENTRY(wg_index)	 i_unused_entry;
-	uint32_t		 i_key;
-	struct noise_remote	*i_value;
-};
-
-struct wg_timers {
-	/* t_lock is for blocking wg_timers_event_* when setting t_disabled. */
-	struct rwlock		 t_lock;
-
-	int			 t_disabled;
-	int			 t_need_another_keepalive;
-	uint16_t		 t_persistent_keepalive_interval;
-	struct callout		 t_new_handshake;
-	struct callout		 t_send_keepalive;
-	struct callout		 t_retry_handshake;
-	struct callout		 t_zero_key_material;
-	struct callout		 t_persistent_keepalive;
-
-	struct mtx		 t_handshake_mtx;
-	struct timespec		 t_handshake_last_sent;
-	struct timespec		 t_handshake_complete;
-	volatile int		 t_handshake_retries;
-};
-
-struct wg_aip {
-	struct radix_node	 r_nodes[2];
-	CK_LIST_ENTRY(wg_aip)	 r_entry;
-	struct sockaddr_storage	 r_addr;
-	struct sockaddr_storage	 r_mask;
-	struct wg_peer		*r_peer;
-};
-
-struct wg_queue {
-	struct mtx	q_mtx;
-	struct mbufq	q;
-};
-
-struct wg_peer {
-	CK_LIST_ENTRY(wg_peer)		 p_hash_entry;
-	CK_LIST_ENTRY(wg_peer)		 p_entry;
-	uint64_t			 p_id;
-	struct wg_softc			*p_sc;
-
-	struct noise_remote		 p_remote;
-	struct cookie_maker		 p_cookie;
-	struct wg_timers		 p_timers;
-
-	struct rwlock			 p_endpoint_lock;
-	struct wg_endpoint		 p_endpoint;
-
-	SLIST_HEAD(,wg_index)		 p_unused_index;
-	struct wg_index			 p_index[3];
-
-	struct wg_queue	 		 p_stage_queue;
-	struct wg_queue	 		 p_encap_queue;
-	struct wg_queue	 		 p_decap_queue;
-
-	struct grouptask		 p_clear_secrets;
-	struct grouptask		 p_send_initiation;
-	struct grouptask		 p_send_keepalive;
-	struct grouptask		 p_send;
-	struct grouptask		 p_recv;
-
-	counter_u64_t			 p_tx_bytes;
-	counter_u64_t			 p_rx_bytes;
-
-	CK_LIST_HEAD(, wg_aip)		 p_aips;
-	struct mtx			 p_lock;
-	struct epoch_context		 p_ctx;
-};
-
-enum route_direction {
-	/* TODO OpenBSD doesn't use IN/OUT, instead passes the address buffer
-	 * directly to route_lookup. */
-	IN,
-	OUT,
-};
-
-struct wg_aip_table {
-	size_t 			 t_count;
-	struct radix_node_head	*t_ip;
-	struct radix_node_head	*t_ip6;
-};
-
-struct wg_allowedip {
-	uint16_t family;
-	union {
-		struct in_addr ip4;
-		struct in6_addr ip6;
-	};
-	uint8_t cidr;
-};
-
-struct wg_hashtable {
-	struct mtx			 h_mtx;
-	SIPHASH_KEY			 h_secret;
-	CK_LIST_HEAD(, wg_peer)		 h_peers_list;
-	CK_LIST_HEAD(, wg_peer)		*h_peers;
-	u_long				 h_peers_mask;
-	size_t				 h_num_peers;
-};
-
-struct wg_socket {
-	struct mtx	 so_mtx;
-	struct socket	*so_so4;
-	struct socket	*so_so6;
-	uint32_t	 so_user_cookie;
-	in_port_t	 so_port;
-};
-
-struct wg_softc {
-	LIST_ENTRY(wg_softc)	 sc_entry;
-	struct ifnet		*sc_ifp;
-	int			 sc_flags;
-
-	struct ucred		*sc_ucred;
-	struct wg_socket	 sc_socket;
-	struct wg_hashtable	 sc_hashtable;
-	struct wg_aip_table	 sc_aips;
-
-	struct mbufq		 sc_handshake_queue;
-	struct grouptask	 sc_handshake;
-
-	struct noise_local	 sc_local;
-	struct cookie_checker	 sc_cookie;
-
-	struct buf_ring		*sc_encap_ring;
-	struct buf_ring		*sc_decap_ring;
-
-	struct grouptask	*sc_encrypt;
-	struct grouptask	*sc_decrypt;
-
-	struct rwlock		 sc_index_lock;
-	LIST_HEAD(,wg_index)	*sc_index;
-	u_long			 sc_index_mask;
-
-	struct sx		 sc_lock;
-	volatile u_int		 sc_peer_count;
-};
-
-#define	WGF_DYING	0x0001
-
-/* TODO the following defines are freebsd specific, we should see what is
- * necessary and cleanup from there (i suspect a lot can be junked). */
-
-#ifndef ENOKEY
-#define	ENOKEY	ENOTCAPABLE
-#endif
-
-#if __FreeBSD_version > 1300000
-typedef void timeout_t (void *);
-#endif
-
-#define	GROUPTASK_DRAIN(gtask)			\
-	gtaskqueue_drain((gtask)->gt_taskqueue, &(gtask)->gt_task)
-
-#define MTAG_WIREGUARD	0xBEAD
-#define M_ENQUEUED	M_PROTO1
-
-static int clone_count;
-static uma_zone_t ratelimit_zone;
-static int wireguard_debug;
-static volatile unsigned long peer_counter = 0;
-static const char wgname[] = "wg";
-static unsigned wg_osd_jail_slot;
-
-static struct sx wg_sx;
-SX_SYSINIT(wg_sx, &wg_sx, "wg_sx");
-
-static LIST_HEAD(, wg_softc)	wg_list = LIST_HEAD_INITIALIZER(wg_list);
-
-SYSCTL_NODE(_net, OID_AUTO, wg, CTLFLAG_RW, 0, "WireGuard");
-SYSCTL_INT(_net_wg, OID_AUTO, debug, CTLFLAG_RWTUN, &wireguard_debug, 0,
-	"enable debug logging");
-
-TASKQGROUP_DECLARE(if_io_tqg);
-
-MALLOC_DEFINE(M_WG, "WG", "wireguard");
-VNET_DEFINE_STATIC(struct if_clone *, wg_cloner);
-
-
-#define	V_wg_cloner	VNET(wg_cloner)
-#define	WG_CAPS		IFCAP_LINKSTATE
-#define	ph_family	PH_loc.eight[5]
-
-struct wg_timespec64 {
-	uint64_t	tv_sec;
-	uint64_t	tv_nsec;
-};
-
-struct wg_peer_export {
-	struct sockaddr_storage		endpoint;
-	struct timespec			last_handshake;
-	uint8_t				public_key[WG_KEY_SIZE];
-	uint8_t				preshared_key[NOISE_SYMMETRIC_KEY_LEN];
-	size_t				endpoint_sz;
-	struct wg_allowedip		*aip;
-	uint64_t			rx_bytes;
-	uint64_t			tx_bytes;
-	int				aip_count;
-	uint16_t			persistent_keepalive;
-};
-
-static struct wg_tag *wg_tag_get(struct mbuf *);
-static struct wg_endpoint *wg_mbuf_endpoint_get(struct mbuf *);
-static int wg_socket_init(struct wg_softc *, in_port_t);
-static int wg_socket_bind(struct socket *, struct socket *, in_port_t *);
-static void wg_socket_set(struct wg_softc *, struct socket *, struct socket *);
-static void wg_socket_uninit(struct wg_softc *);
-static void wg_socket_set_cookie(struct wg_softc *, uint32_t);
-static int wg_send(struct wg_softc *, struct wg_endpoint *, struct mbuf *);
-static void wg_timers_event_data_sent(struct wg_timers *);
-static void wg_timers_event_data_received(struct wg_timers *);
-static void wg_timers_event_any_authenticated_packet_sent(struct wg_timers *);
-static void wg_timers_event_any_authenticated_packet_received(struct wg_timers *);
-static void wg_timers_event_any_authenticated_packet_traversal(struct wg_timers *);
-static void wg_timers_event_handshake_initiated(struct wg_timers *);
-static void wg_timers_event_handshake_responded(struct wg_timers *);
-static void wg_timers_event_handshake_complete(struct wg_timers *);
-static void wg_timers_event_session_derived(struct wg_timers *);
-static void wg_timers_event_want_initiation(struct wg_timers *);
-static void wg_timers_event_reset_handshake_last_sent(struct wg_timers *);
-static void wg_timers_run_send_initiation(struct wg_timers *, int);
-static void wg_timers_run_retry_handshake(struct wg_timers *);
-static void wg_timers_run_send_keepalive(struct wg_timers *);
-static void wg_timers_run_new_handshake(struct wg_timers *);
-static void wg_timers_run_zero_key_material(struct wg_timers *);
-static void wg_timers_run_persistent_keepalive(struct wg_timers *);
-static void wg_timers_init(struct wg_timers *);
-static void wg_timers_enable(struct wg_timers *);
-static void wg_timers_disable(struct wg_timers *);
-static void wg_timers_set_persistent_keepalive(struct wg_timers *, uint16_t);
-static void wg_timers_get_last_handshake(struct wg_timers *, struct timespec *);
-static int wg_timers_expired_handshake_last_sent(struct wg_timers *);
-static int wg_timers_check_handshake_last_sent(struct wg_timers *);
-static void wg_queue_init(struct wg_queue *, const char *);
-static void wg_queue_deinit(struct wg_queue *);
-static void wg_queue_purge(struct wg_queue *);
-static struct mbuf *wg_queue_dequeue(struct wg_queue *, struct wg_tag **);
-static int wg_queue_len(struct wg_queue *);
-static int wg_queue_in(struct wg_peer *, struct mbuf *);
-static void wg_queue_out(struct wg_peer *);
-static void wg_queue_stage(struct wg_peer *, struct mbuf *);
-static int wg_aip_init(struct wg_aip_table *);
-static void wg_aip_destroy(struct wg_aip_table *);
-static void wg_aip_populate_aip4(struct wg_aip *, const struct in_addr *, uint8_t);
-static void wg_aip_populate_aip6(struct wg_aip *, const struct in6_addr *, uint8_t);
-static int wg_aip_add(struct wg_aip_table *, struct wg_peer *, const struct wg_allowedip *);
-static int wg_peer_remove(struct radix_node *, void *);
-static void wg_peer_remove_all(struct wg_softc *);
-static int wg_aip_delete(struct wg_aip_table *, struct wg_peer *);
-static struct wg_peer *wg_aip_lookup(struct wg_aip_table *, struct mbuf *, enum route_direction);
-static void wg_hashtable_init(struct wg_hashtable *);
-static void wg_hashtable_destroy(struct wg_hashtable *);
-static void wg_hashtable_peer_insert(struct wg_hashtable *, struct wg_peer *);
-static struct wg_peer *wg_peer_lookup(struct wg_softc *, const uint8_t [32]);
-static void wg_hashtable_peer_remove(struct wg_hashtable *, struct wg_peer *);
-static int wg_cookie_validate_packet(struct cookie_checker *, struct mbuf *, int);
-static struct wg_peer *wg_peer_alloc(struct wg_softc *);
-static void wg_peer_free_deferred(epoch_context_t);
-static void wg_peer_destroy(struct wg_peer *);
-static void wg_peer_send_buf(struct wg_peer *, uint8_t *, size_t);
-static void wg_send_initiation(struct wg_peer *);
-static void wg_send_response(struct wg_peer *);
-static void wg_send_cookie(struct wg_softc *, struct cookie_macs *, uint32_t, struct mbuf *);
-static void wg_peer_set_endpoint_from_tag(struct wg_peer *, struct wg_tag *);
-static void wg_peer_clear_src(struct wg_peer *);
-static void wg_peer_get_endpoint(struct wg_peer *, struct wg_endpoint *);
-static void wg_deliver_out(struct wg_peer *);
-static void wg_deliver_in(struct wg_peer *);
-static void wg_send_buf(struct wg_softc *, struct wg_endpoint *, uint8_t *, size_t);
-static void wg_send_keepalive(struct wg_peer *);
-static void wg_handshake(struct wg_softc *, struct mbuf *);
-static void wg_encap(struct wg_softc *, struct mbuf *);
-static void wg_decap(struct wg_softc *, struct mbuf *);
-static void wg_softc_handshake_receive(struct wg_softc *);
-static void wg_softc_decrypt(struct wg_softc *);
-static void wg_softc_encrypt(struct wg_softc *);
-static struct noise_remote *wg_remote_get(struct wg_softc *, uint8_t [NOISE_PUBLIC_KEY_LEN]);
-static uint32_t wg_index_set(struct wg_softc *, struct noise_remote *);
-static struct noise_remote *wg_index_get(struct wg_softc *, uint32_t);
-static void wg_index_drop(struct wg_softc *, uint32_t);
-static int wg_update_endpoint_addrs(struct wg_endpoint *, const struct sockaddr *, struct ifnet *);
-static void wg_input(struct mbuf *, int, struct inpcb *, const struct sockaddr *, void *);
-static void wg_encrypt_dispatch(struct wg_softc *);
-static void wg_decrypt_dispatch(struct wg_softc *);
-static void crypto_taskq_setup(struct wg_softc *);
-static void crypto_taskq_destroy(struct wg_softc *);
-static int wg_clone_create(struct if_clone *, int, caddr_t);
-static void wg_qflush(struct ifnet *);
-static int wg_transmit(struct ifnet *, struct mbuf *);
-static int wg_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *);
-static void wg_clone_destroy(struct ifnet *);
-static int wg_peer_to_export(struct wg_peer *, struct wg_peer_export *);
-static bool wgc_privileged(struct wg_softc *);
-static int wgc_get(struct wg_softc *, struct wg_data_io *);
-static int wgc_set(struct wg_softc *, struct wg_data_io *);
-static int wg_up(struct wg_softc *);
-static void wg_down(struct wg_softc *);
-static void wg_reassign(struct ifnet *, struct vnet *, char *unused);
-static void wg_init(void *);
-static int wg_ioctl(struct ifnet *, u_long, caddr_t);
-static void vnet_wg_init(const void *);
-static void vnet_wg_uninit(const void *);
-static void wg_module_init(void);
-static void wg_module_deinit(void);
-
-/* TODO Peer */
-static struct wg_peer *
-wg_peer_alloc(struct wg_softc *sc)
-{
-	struct wg_peer *peer;
-
-	sx_assert(&sc->sc_lock, SX_XLOCKED);
-
-	peer = malloc(sizeof(*peer), M_WG, M_WAITOK|M_ZERO);
-	peer->p_sc = sc;
-	peer->p_id = peer_counter++;
-	CK_LIST_INIT(&peer->p_aips);
-
-	rw_init(&peer->p_endpoint_lock, "wg_peer_endpoint");
-	wg_queue_init(&peer->p_stage_queue, "stageq");
-	wg_queue_init(&peer->p_encap_queue, "txq");
-	wg_queue_init(&peer->p_decap_queue, "rxq");
-
-	GROUPTASK_INIT(&peer->p_send_initiation, 0, (gtask_fn_t *)wg_send_initiation, peer);
-	taskqgroup_attach(qgroup_if_io_tqg, &peer->p_send_initiation, peer, NULL, NULL, "wg initiation");
-	GROUPTASK_INIT(&peer->p_send_keepalive, 0, (gtask_fn_t *)wg_send_keepalive, peer);
-	taskqgroup_attach(qgroup_if_io_tqg, &peer->p_send_keepalive, peer, NULL, NULL, "wg keepalive");
-	GROUPTASK_INIT(&peer->p_clear_secrets, 0, (gtask_fn_t *)noise_remote_clear, &peer->p_remote);
-	taskqgroup_attach(qgroup_if_io_tqg, &peer->p_clear_secrets,
-	    &peer->p_remote, NULL, NULL, "wg clear secrets");
-
-	GROUPTASK_INIT(&peer->p_send, 0, (gtask_fn_t *)wg_deliver_out, peer);
-	taskqgroup_attach(qgroup_if_io_tqg, &peer->p_send, peer, NULL, NULL, "wg send");
-	GROUPTASK_INIT(&peer->p_recv, 0, (gtask_fn_t *)wg_deliver_in, peer);
-	taskqgroup_attach(qgroup_if_io_tqg, &peer->p_recv, peer, NULL, NULL, "wg recv");
-
-	wg_timers_init(&peer->p_timers);
-
-	peer->p_tx_bytes = counter_u64_alloc(M_WAITOK);
-	peer->p_rx_bytes = counter_u64_alloc(M_WAITOK);
-
-	SLIST_INIT(&peer->p_unused_index);
-	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[0],
-	    i_unused_entry);
-	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[1],
-	    i_unused_entry);
-	SLIST_INSERT_HEAD(&peer->p_unused_index, &peer->p_index[2],
-	    i_unused_entry);
-
-	return (peer);
-}
-
-#define WG_HASHTABLE_PEER_FOREACH(peer, i, ht) \
-	for (i = 0; i < HASHTABLE_PEER_SIZE; i++) \
-		LIST_FOREACH(peer, &(ht)->h_peers[i], p_hash_entry)
-#define WG_HASHTABLE_PEER_FOREACH_SAFE(peer, i, ht, tpeer) \
-	for (i = 0; i < HASHTABLE_PEER_SIZE; i++) \
-		CK_LIST_FOREACH_SAFE(peer, &(ht)->h_peers[i], p_hash_entry, tpeer)
-static void
-wg_hashtable_init(struct wg_hashtable *ht)
-{
-	mtx_init(&ht->h_mtx, "hash lock", NULL, MTX_DEF);
-	arc4random_buf(&ht->h_secret, sizeof(ht->h_secret));
-	ht->h_num_peers = 0;
-	ht->h_peers = hashinit(HASHTABLE_PEER_SIZE, M_DEVBUF,
-			&ht->h_peers_mask);
-}
-
-static void
-wg_hashtable_destroy(struct wg_hashtable *ht)
-{
-	MPASS(ht->h_num_peers == 0);
-	mtx_destroy(&ht->h_mtx);
-	hashdestroy(ht->h_peers, M_DEVBUF, ht->h_peers_mask);
-}
-
-static void
-wg_hashtable_peer_insert(struct wg_hashtable *ht, struct wg_peer *peer)
-{
-	uint64_t key;
-
-	key = siphash24(&ht->h_secret, peer->p_remote.r_public,
-			sizeof(peer->p_remote.r_public));
-
-	mtx_lock(&ht->h_mtx);
-	ht->h_num_peers++;
-	CK_LIST_INSERT_HEAD(&ht->h_peers[key & ht->h_peers_mask], peer, p_hash_entry);
-	CK_LIST_INSERT_HEAD(&ht->h_peers_list, peer, p_entry);
-	mtx_unlock(&ht->h_mtx);
-}
-
-static struct wg_peer *
-wg_peer_lookup(struct wg_softc *sc,
-    const uint8_t pubkey[WG_KEY_SIZE])
-{
-	struct wg_hashtable *ht = &sc->sc_hashtable;
-	uint64_t key;
-	struct wg_peer *i = NULL;
-
-	key = siphash24(&ht->h_secret, pubkey, WG_KEY_SIZE);
-
-	mtx_lock(&ht->h_mtx);
-	CK_LIST_FOREACH(i, &ht->h_peers[key & ht->h_peers_mask], p_hash_entry) {
-		if (timingsafe_bcmp(i->p_remote.r_public, pubkey,
-					WG_KEY_SIZE) == 0)
-			break;
-	}
-	mtx_unlock(&ht->h_mtx);
-
-	return i;
-}
-
-static void
-wg_hashtable_peer_remove(struct wg_hashtable *ht, struct wg_peer *peer)
-{
-	mtx_lock(&ht->h_mtx);
-	ht->h_num_peers--;
-	CK_LIST_REMOVE(peer, p_hash_entry);
-	CK_LIST_REMOVE(peer, p_entry);
-	mtx_unlock(&ht->h_mtx);
-}
-
-static void
-wg_peer_free_deferred(epoch_context_t ctx)
-{
-	struct wg_peer *peer = __containerof(ctx, struct wg_peer, p_ctx);
-	counter_u64_free(peer->p_tx_bytes);
-	counter_u64_free(peer->p_rx_bytes);
-	rw_destroy(&peer->p_timers.t_lock);
-	rw_destroy(&peer->p_endpoint_lock);
-	free(peer, M_WG);
-}
-
-static void
-wg_peer_destroy(struct wg_peer *peer)
-{
-	/* Callers should already have called:
-	 *    wg_hashtable_peer_remove(&sc->sc_hashtable, peer);
-	 */
-	wg_aip_delete(&peer->p_sc->sc_aips, peer);
-	MPASS(CK_LIST_EMPTY(&peer->p_aips));
-
-	/* We disable all timers, so we can't call the following tasks. */
-	wg_timers_disable(&peer->p_timers);
-
-	/* Ensure the tasks have finished running */
-	GROUPTASK_DRAIN(&peer->p_clear_secrets);
-	GROUPTASK_DRAIN(&peer->p_send_initiation);
-	GROUPTASK_DRAIN(&peer->p_send_keepalive);
-	GROUPTASK_DRAIN(&peer->p_recv);
-	GROUPTASK_DRAIN(&peer->p_send);
-
-	taskqgroup_detach(qgroup_if_io_tqg, &peer->p_clear_secrets);
-	taskqgroup_detach(qgroup_if_io_tqg, &peer->p_send_initiation);
-	taskqgroup_detach(qgroup_if_io_tqg, &peer->p_send_keepalive);
-	taskqgroup_detach(qgroup_if_io_tqg, &peer->p_recv);
-	taskqgroup_detach(qgroup_if_io_tqg, &peer->p_send);
-
-	wg_queue_deinit(&peer->p_decap_queue);
-	wg_queue_deinit(&peer->p_encap_queue);
-	wg_queue_deinit(&peer->p_stage_queue);
-
-	/* Final cleanup */
-	--peer->p_sc->sc_peer_count;
-	noise_remote_clear(&peer->p_remote);
-	DPRINTF(peer->p_sc, "Peer %llu destroyed\n", (unsigned long long)peer->p_id);
-	NET_EPOCH_CALL(wg_peer_free_deferred, &peer->p_ctx);
-}
-
-static void
-wg_peer_set_endpoint_from_tag(struct wg_peer *peer, struct wg_tag *t)
-{
-	struct wg_endpoint *e = &t->t_endpoint;
-
-	MPASS(e->e_remote.r_sa.sa_family != 0);
-	if (memcmp(e, &peer->p_endpoint, sizeof(*e)) == 0)
-		return;
-
-	peer->p_endpoint = *e;
-}
-
-static void
-wg_peer_clear_src(struct wg_peer *peer)
-{
-	rw_rlock(&peer->p_endpoint_lock);
-	bzero(&peer->p_endpoint.e_local, sizeof(peer->p_endpoint.e_local));
-	rw_runlock(&peer->p_endpoint_lock);
-}
-
-static void
-wg_peer_get_endpoint(struct wg_peer *p, struct wg_endpoint *e)
-{
-	memcpy(e, &p->p_endpoint, sizeof(*e));
-}
-
-/* Allowed IP */
-static int
-wg_aip_init(struct wg_aip_table *tbl)
-{
-	int rc;
-
-	tbl->t_count = 0;
-	rc = rn_inithead((void **)&tbl->t_ip,
-	    offsetof(struct sockaddr_in, sin_addr) * NBBY);
-
-	if (rc == 0)
-		return (ENOMEM);
-	RADIX_NODE_HEAD_LOCK_INIT(tbl->t_ip);
-#ifdef INET6
-	rc = rn_inithead((void **)&tbl->t_ip6,
-	    offsetof(struct sockaddr_in6, sin6_addr) * NBBY);
-	if (rc == 0) {
-		free(tbl->t_ip, M_RTABLE);
-		return (ENOMEM);
-	}
-	RADIX_NODE_HEAD_LOCK_INIT(tbl->t_ip6);
-#endif
-	return (0);
-}
-
-static void
-wg_aip_destroy(struct wg_aip_table *tbl)
-{
-	RADIX_NODE_HEAD_DESTROY(tbl->t_ip);
-	free(tbl->t_ip, M_RTABLE);
-#ifdef INET6
-	RADIX_NODE_HEAD_DESTROY(tbl->t_ip6);
-	free(tbl->t_ip6, M_RTABLE);
-#endif
-}
-
-static void
-wg_aip_populate_aip4(struct wg_aip *aip, const struct in_addr *addr,
-    uint8_t mask)
-{
-	struct sockaddr_in *raddr, *rmask;
-	uint8_t *p;
-	unsigned int i;
-
-	raddr = (struct sockaddr_in *)&aip->r_addr;
-	rmask = (struct sockaddr_in *)&aip->r_mask;
-
-	raddr->sin_len = sizeof(*raddr);
-	raddr->sin_family = AF_INET;
-	raddr->sin_addr = *addr;
-
-	rmask->sin_len = sizeof(*rmask);
-	p = (uint8_t *)&rmask->sin_addr.s_addr;
-	for (i = 0; i < mask / NBBY; i++)
-		p[i] = 0xff;
-	if ((mask % NBBY) != 0)
-		p[i] = (0xff00 >> (mask % NBBY)) & 0xff;
-	raddr->sin_addr.s_addr &= rmask->sin_addr.s_addr;
-}
-
-static void
-wg_aip_populate_aip6(struct wg_aip *aip, const struct in6_addr *addr,
-    uint8_t mask)
-{
-	struct sockaddr_in6 *raddr, *rmask;
-
-	raddr = (struct sockaddr_in6 *)&aip->r_addr;
-	rmask = (struct sockaddr_in6 *)&aip->r_mask;
-
-	raddr->sin6_len = sizeof(*raddr);
-	raddr->sin6_family = AF_INET6;
-	raddr->sin6_addr = *addr;
-
-	rmask->sin6_len = sizeof(*rmask);
-	in6_prefixlen2mask(&rmask->sin6_addr, mask);
-	for (int i = 0; i < 4; ++i)
-		raddr->sin6_addr.__u6_addr.__u6_addr32[i] &= rmask->sin6_addr.__u6_addr.__u6_addr32[i];
-}
-
-/* wg_aip_take assumes that the caller guarantees the allowed-ip exists. */
-static void
-wg_aip_take(struct radix_node_head *root, struct wg_peer *peer,
-    struct wg_aip *route)
-{
-	struct radix_node *node;
-	struct wg_peer *ppeer;
-
-	RADIX_NODE_HEAD_LOCK_ASSERT(root);
-
-	node = root->rnh_lookup(&route->r_addr, &route->r_mask,
-	    &root->rh);
-	MPASS(node != NULL);
-
-	route = (struct wg_aip *)node;
-	ppeer = route->r_peer;
-	if (ppeer != peer) {
-		route->r_peer = peer;
-
-		CK_LIST_REMOVE(route, r_entry);
-		CK_LIST_INSERT_HEAD(&peer->p_aips, route, r_entry);
-	}
-}
-
-static int
-wg_aip_add(struct wg_aip_table *tbl, struct wg_peer *peer,
-			 const struct wg_allowedip *aip)
-{
-	struct radix_node	*node;
-	struct radix_node_head	*root;
-	struct wg_aip *route;
-	sa_family_t family;
-	bool needfree = false;
-
-	family = aip->family;
-	if (family != AF_INET && family != AF_INET6) {
-		return (EINVAL);
-	}
-
-	route = malloc(sizeof(*route), M_WG, M_WAITOK|M_ZERO);
-	switch (family) {
-	case AF_INET:
-		root = tbl->t_ip;
-
-		wg_aip_populate_aip4(route, &aip->ip4, aip->cidr);
-		break;
-	case AF_INET6:
-		root = tbl->t_ip6;
-
-		wg_aip_populate_aip6(route, &aip->ip6, aip->cidr);
-		break;
-	}
-
-	route->r_peer = peer;
-
-	RADIX_NODE_HEAD_LOCK(root);
-	node = root->rnh_addaddr(&route->r_addr, &route->r_mask, &root->rh,
-							route->r_nodes);
-	if (node == route->r_nodes) {
-		tbl->t_count++;
-		CK_LIST_INSERT_HEAD(&peer->p_aips, route, r_entry);
-	} else {
-		needfree = true;
-		wg_aip_take(root, peer, route);
-	}
-	RADIX_NODE_HEAD_UNLOCK(root);
-	if (needfree) {
-		free(route, M_WG);
-	}
-	return (0);
-}
-
-static struct wg_peer *
-wg_aip_lookup(struct wg_aip_table *tbl, struct mbuf *m,
-		enum route_direction dir)
-{
-	RADIX_NODE_HEAD_RLOCK_TRACKER;
-	struct ip *iphdr;
-	struct ip6_hdr *ip6hdr;
-	struct radix_node_head *root;
-	struct radix_node	*node;
-	struct wg_peer	*peer = NULL;
-	struct sockaddr_in sin;
-	struct sockaddr_in6 sin6;
-	void *addr;
-	int version;
-
-	NET_EPOCH_ASSERT();
-	iphdr = mtod(m, struct ip *);
-	version = iphdr->ip_v;
-
-	if (__predict_false(dir != IN && dir != OUT))
-		return NULL;
-
-	if (version == 4) {
-		root = tbl->t_ip;
-		memset(&sin, 0, sizeof(sin));
-		sin.sin_len = sizeof(struct sockaddr_in);
-		if (dir == IN)
-			sin.sin_addr = iphdr->ip_src;
-		else
-			sin.sin_addr = iphdr->ip_dst;
-		addr = &sin;
-	} else if (version == 6) {
-		ip6hdr = mtod(m, struct ip6_hdr *);
-		memset(&sin6, 0, sizeof(sin6));
-		sin6.sin6_len = sizeof(struct sockaddr_in6);
-
-		root = tbl->t_ip6;
-		if (dir == IN)
-			addr = &ip6hdr->ip6_src;
-		else
-			addr = &ip6hdr->ip6_dst;
-		memcpy(&sin6.sin6_addr, addr, sizeof(sin6.sin6_addr));
-		addr = &sin6;
-	} else  {
-		return (NULL);
-	}
-	RADIX_NODE_HEAD_RLOCK(root);
-	if ((node = root->rnh_matchaddr(addr, &root->rh)) != NULL) {
-		peer = ((struct wg_aip *) node)->r_peer;
-	}
-	RADIX_NODE_HEAD_RUNLOCK(root);
-	return (peer);
-}
-
-struct peer_del_arg {
-	struct radix_node_head * pda_head;
-	struct wg_peer *pda_peer;
-	struct wg_aip_table *pda_tbl;
-};
-
-static int
-wg_peer_remove(struct radix_node *rn, void *arg)
-{
-	struct peer_del_arg *pda = arg;
-	struct wg_peer *peer = pda->pda_peer;
-	struct radix_node_head * rnh = pda->pda_head;
-	struct wg_aip_table *tbl = pda->pda_tbl;
-	struct wg_aip *route = (struct wg_aip *)rn;
-	struct radix_node *x;
-
-	if (route->r_peer != peer)
-		return (0);
-	x = (struct radix_node *)rnh->rnh_deladdr(&route->r_addr,
-	    &route->r_mask, &rnh->rh);
-	if (x != NULL)	 {
-		tbl->t_count--;
-		CK_LIST_REMOVE(route, r_entry);
-		free(route, M_WG);
-	}
-	return (0);
-}
-
-static void
-wg_peer_remove_all(struct wg_softc *sc)
-{
-	struct wg_peer *peer, *tpeer;
-
-	sx_assert(&sc->sc_lock, SX_XLOCKED);
-
-	CK_LIST_FOREACH_SAFE(peer, &sc->sc_hashtable.h_peers_list,
-			     p_entry, tpeer) {
-		wg_hashtable_peer_remove(&sc->sc_hashtable, peer);
-		wg_peer_destroy(peer);
-	}
-}
-
-static int
-wg_aip_delete(struct wg_aip_table *tbl, struct wg_peer *peer)
-{
-	struct peer_del_arg pda;
-
-	pda.pda_peer = peer;
-	pda.pda_tbl = tbl;
-	RADIX_NODE_HEAD_LOCK(tbl->t_ip);
-	pda.pda_head = tbl->t_ip;
-	rn_walktree(&tbl->t_ip->rh, wg_peer_remove, &pda);
-	RADIX_NODE_HEAD_UNLOCK(tbl->t_ip);
-
-	RADIX_NODE_HEAD_LOCK(tbl->t_ip6);
-	pda.pda_head = tbl->t_ip6;
-	rn_walktree(&tbl->t_ip6->rh, wg_peer_remove, &pda);
-	RADIX_NODE_HEAD_UNLOCK(tbl->t_ip6);
-	return (0);
-}
-
-static int
-wg_socket_init(struct wg_softc *sc, in_port_t port)
-{
-	struct thread *td;
-	struct ucred *cred;
-	struct socket *so4, *so6;
-	int rc;
-
-	sx_assert(&sc->sc_lock, SX_XLOCKED);
-
-	so4 = so6 = NULL;
-	td = curthread;
-	if ((cred = sc->sc_ucred) == NULL)
-		return (EBUSY);
-
-	/*
-	 * For socket creation, we use the creds of the thread that created the
-	 * tunnel rather than the current thread to maintain the semantics that
-	 * WireGuard has on Linux with network namespaces -- that the sockets
-	 * are created in their home vnet so that they can be configured and
-	 * functionally attached to a foreign vnet as the jail's only interface
-	 * to the network.
-	 */
-	rc = socreate(AF_INET, &so4, SOCK_DGRAM, IPPROTO_UDP, cred, td);
-	if (rc != 0)
-		goto out;
-
-	rc = udp_set_kernel_tunneling(so4, wg_input, NULL, sc);
-	/*
-	 * udp_set_kernel_tunneling can only fail if there is already a tunneling function set.
-	 * This should never happen with a new socket.
-	 */
-	MPASS(rc == 0);
-
-	rc = socreate(AF_INET6, &so6, SOCK_DGRAM, IPPROTO_UDP, cred, td);
-	if (rc != 0)
-		goto out;
-	rc = udp_set_kernel_tunneling(so6, wg_input, NULL, sc);
-	MPASS(rc == 0);
-
-	so4->so_user_cookie = so6->so_user_cookie = sc->sc_socket.so_user_cookie;
-
-	rc = wg_socket_bind(so4, so6, &port);
-	if (rc == 0) {
-		sc->sc_socket.so_port = port;
-		wg_socket_set(sc, so4, so6);
-	}
-out:
-	if (rc != 0) {
-		if (so4 != NULL)
-			soclose(so4);
-		if (so6 != NULL)
-			soclose(so6);
-	}
-	return (rc);
-}
-
-static void wg_socket_set_cookie(struct wg_softc *sc, uint32_t user_cookie)
-{
-	struct wg_socket *so = &sc->sc_socket;
-
-	sx_assert(&sc->sc_lock, SX_XLOCKED);
-
-	so->so_user_cookie = user_cookie;
-	if (so->so_so4)
-		so->so_so4->so_user_cookie = user_cookie;
-	if (so->so_so6)
-		so->so_so6->so_user_cookie = user_cookie;
-}
-
-static void
-wg_socket_uninit(struct wg_softc *sc)
-{
-	wg_socket_set(sc, NULL, NULL);
-}
-
-static void
-wg_socket_set(struct wg_softc *sc, struct socket *new_so4, struct socket *new_so6)
-{
-	struct wg_socket *so = &sc->sc_socket;
-	struct socket *so4, *so6;
-
-	sx_assert(&sc->sc_lock, SX_XLOCKED);
-
-	so4 = atomic_load_ptr(&so->so_so4);
-	so6 = atomic_load_ptr(&so->so_so6);
-	atomic_store_ptr(&so->so_so4, new_so4);
-	atomic_store_ptr(&so->so_so6, new_so6);
-
-	if (!so4 && !so6)
-		return;
-	NET_EPOCH_WAIT();
-	if (so4)
-		soclose(so4);
-	if (so6)
-		soclose(so6);
-}
-
-union wg_sockaddr {
-	struct sockaddr sa;
-	struct sockaddr_in in4;
-	struct sockaddr_in6 in6;
-};
-
-static int
-wg_socket_bind(struct socket *so4, struct socket *so6, in_port_t *requested_port)
-{
-	int rc;
-	struct thread *td;
-	union wg_sockaddr laddr;
-	struct sockaddr_in *sin;
-	struct sockaddr_in6 *sin6;
-	in_port_t port = *requested_port;
-
-	td = curthread;
-	bzero(&laddr, sizeof(laddr));
-	sin = &laddr.in4;
-	sin->sin_len = sizeof(laddr.in4);
-	sin->sin_family = AF_INET;
-	sin->sin_port = htons(port);
-	sin->sin_addr = (struct in_addr) { 0 };
-
-	if ((rc = sobind(so4, &laddr.sa, td)) != 0)
-		return (rc);
-
-	if (port == 0) {
-		rc = sogetsockaddr(so4, (struct sockaddr **)&sin);
-		if (rc != 0)
-			return (rc);
-		port = ntohs(sin->sin_port);
-		free(sin, M_SONAME);
-	}
-
-	sin6 = &laddr.in6;
-	sin6->sin6_len = sizeof(laddr.in6);
-	sin6->sin6_family = AF_INET6;
-	sin6->sin6_port = htons(port);
-	sin6->sin6_addr = (struct in6_addr) { .s6_addr = { 0 } };
-	rc = sobind(so6, &laddr.sa, td);
-	if (rc != 0)
-		return (rc);
-	*requested_port = port;
-	return (0);
-}
-
-static int
-wg_send(struct wg_softc *sc, struct wg_endpoint *e, struct mbuf *m)
-{
-	struct epoch_tracker et;
-	struct sockaddr *sa;
-	struct wg_socket *so = &sc->sc_socket;
-	struct socket *so4, *so6;
-	struct mbuf *control = NULL;
-	int ret = 0;
-	size_t len = m->m_pkthdr.len;
-
-	/* Get local control address before locking */
-	if (e->e_remote.r_sa.sa_family == AF_INET) {
-		if (e->e_local.l_in.s_addr != INADDR_ANY)
-			control = sbcreatecontrol((caddr_t)&e->e_local.l_in,
-			    sizeof(struct in_addr), IP_SENDSRCADDR,
-			    IPPROTO_IP);
-#ifdef INET6
-	} else if (e->e_remote.r_sa.sa_family == AF_INET6) {
-		if (!IN6_IS_ADDR_UNSPECIFIED(&e->e_local.l_in6))
-			control = sbcreatecontrol((caddr_t)&e->e_local.l_pktinfo6,
-			    sizeof(struct in6_pktinfo), IPV6_PKTINFO,
-			    IPPROTO_IPV6);
-#endif
-	} else {
-		m_freem(m);
-		return (EAFNOSUPPORT);
-	}
-
-	/* Get remote address */
-	sa = &e->e_remote.r_sa;
-
-	NET_EPOCH_ENTER(et);
-	so4 = atomic_load_ptr(&so->so_so4);
-	so6 = atomic_load_ptr(&so->so_so6);
-	if (e->e_remote.r_sa.sa_family == AF_INET && so4 != NULL)
-		ret = sosend(so4, sa, NULL, m, control, 0, curthread);
-	else if (e->e_remote.r_sa.sa_family == AF_INET6 && so6 != NULL)
-		ret = sosend(so6, sa, NULL, m, control, 0, curthread);
-	else {
-		ret = ENOTCONN;
-		m_freem(control);
-		m_freem(m);
-	}
-	NET_EPOCH_EXIT(et);
-	if (ret == 0) {
-		if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1);
-		if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, len);
-	}
-	return (ret);
-}
-
-static void
-wg_send_buf(struct wg_softc *sc, struct wg_endpoint *e, uint8_t *buf,
-    size_t len)
-{
-	struct mbuf	*m;
-	int		 ret = 0;
-
-retry:
-	m = m_gethdr(M_WAITOK, MT_DATA);
-	m->m_len = 0;
-	m_copyback(m, 0, len, buf);
-
-	if (ret == 0) {
-		ret = wg_send(sc, e, m);
-		/* Retry if we couldn't bind to e->e_local */
-		if (ret == EADDRNOTAVAIL) {
-			bzero(&e->e_local, sizeof(e->e_local));
-			goto retry;
-		}
-	} else {
-		ret = wg_send(sc, e, m);
-	}
-	if (ret)
-		DPRINTF(sc, "Unable to send packet: %d\n", ret);
-}
-
-/* TODO Tag */
-static struct wg_tag *
-wg_tag_get(struct mbuf *m)
-{
-	struct m_tag *tag;
-
-	tag = m_tag_find(m, MTAG_WIREGUARD, NULL);
-	if (tag == NULL) {
-		tag = m_tag_get(MTAG_WIREGUARD, sizeof(struct wg_tag), M_NOWAIT|M_ZERO);
-		m_tag_prepend(m, tag);
-		MPASS(!SLIST_EMPTY(&m->m_pkthdr.tags));
-		MPASS(m_tag_locate(m, MTAG_ABI_COMPAT, MTAG_WIREGUARD, NULL) == tag);
-	}
-	return (struct wg_tag *)tag;
-}
-
-static struct wg_endpoint *
-wg_mbuf_endpoint_get(struct mbuf *m)
-{
-	struct wg_tag *hdr;
-
-	if ((hdr = wg_tag_get(m)) == NULL)
-		return (NULL);
-
-	return (&hdr->t_endpoint);
-}
-
-/* Timers */
-static void
-wg_timers_init(struct wg_timers *t)
-{
-	bzero(t, sizeof(*t));
-
-	t->t_disabled = 1;
-	rw_init(&t->t_lock, "wg peer timers");
-	callout_init(&t->t_retry_handshake, true);
-	callout_init(&t->t_send_keepalive, true);
-	callout_init(&t->t_new_handshake, true);
-	callout_init(&t->t_zero_key_material, true);
-	callout_init(&t->t_persistent_keepalive, true);
-}
-
-static void
-wg_timers_enable(struct wg_timers *t)
-{
-	rw_wlock(&t->t_lock);
-	t->t_disabled = 0;
-	rw_wunlock(&t->t_lock);
-	wg_timers_run_persistent_keepalive(t);
-}
-
-static void
-wg_timers_disable(struct wg_timers *t)
-{
-	rw_wlock(&t->t_lock);
-	t->t_disabled = 1;
-	t->t_need_another_keepalive = 0;
-	rw_wunlock(&t->t_lock);
-
-	callout_stop(&t->t_retry_handshake);
-	callout_stop(&t->t_send_keepalive);
-	callout_stop(&t->t_new_handshake);
-	callout_stop(&t->t_zero_key_material);
-	callout_stop(&t->t_persistent_keepalive);
-}
-
-static void
-wg_timers_set_persistent_keepalive(struct wg_timers *t, uint16_t interval)
-{
-	rw_rlock(&t->t_lock);
-	if (!t->t_disabled) {
-		t->t_persistent_keepalive_interval = interval;
-		wg_timers_run_persistent_keepalive(t);
-	}
-	rw_runlock(&t->t_lock);
-}
-
-static void
-wg_timers_get_last_handshake(struct wg_timers *t, struct timespec *time)
-{
-	rw_rlock(&t->t_lock);
-	time->tv_sec = t->t_handshake_complete.tv_sec;
-	time->tv_nsec = t->t_handshake_complete.tv_nsec;
-	rw_runlock(&t->t_lock);
-}
-
-static int
-wg_timers_expired_handshake_last_sent(struct wg_timers *t)
-{
-	struct timespec uptime;
-	struct timespec expire = { .tv_sec = REKEY_TIMEOUT, .tv_nsec = 0 };
-
-	getnanouptime(&uptime);
-	timespecadd(&t->t_handshake_last_sent, &expire, &expire);
-	return timespeccmp(&uptime, &expire, >) ? ETIMEDOUT : 0;
-}
-
-static int
-wg_timers_check_handshake_last_sent(struct wg_timers *t)
-{
-	int ret;
-
-	rw_wlock(&t->t_lock);
-	if ((ret = wg_timers_expired_handshake_last_sent(t)) == ETIMEDOUT)
-		getnanouptime(&t->t_handshake_last_sent);
-	rw_wunlock(&t->t_lock);
-	return (ret);
-}
-
-/* Should be called after an authenticated data packet is sent. */
-static void
-wg_timers_event_data_sent(struct wg_timers *t)
-{
-	rw_rlock(&t->t_lock);
-	if (!t->t_disabled && !callout_pending(&t->t_new_handshake))
-		callout_reset(&t->t_new_handshake, MSEC_2_TICKS(
-		    NEW_HANDSHAKE_TIMEOUT * 1000 +
-		    arc4random_uniform(REKEY_TIMEOUT_JITTER)),
-		    (timeout_t *)wg_timers_run_new_handshake, t);
-	rw_runlock(&t->t_lock);
-}
-
-/* Should be called after an authenticated data packet is received. */
-static void
-wg_timers_event_data_received(struct wg_timers *t)
-{
-	rw_rlock(&t->t_lock);
-	if (!t->t_disabled) {
-		if (!callout_pending(&t->t_send_keepalive)) {
-			callout_reset(&t->t_send_keepalive,
-			    MSEC_2_TICKS(KEEPALIVE_TIMEOUT * 1000),
-			    (timeout_t *)wg_timers_run_send_keepalive, t);
-		} else {
-			t->t_need_another_keepalive = 1;
-		}
-	}
-	rw_runlock(&t->t_lock);
-}
-
-/*
- * Should be called after any type of authenticated packet is sent, whether
- * keepalive, data, or handshake.
- */
-static void
-wg_timers_event_any_authenticated_packet_sent(struct wg_timers *t)
-{
-	callout_stop(&t->t_send_keepalive);
-}
-
-/*
- * Should be called after any type of authenticated packet is received, whether
- * keepalive, data, or handshake.
- */
-static void
-wg_timers_event_any_authenticated_packet_received(struct wg_timers *t)
-{
-	callout_stop(&t->t_new_handshake);
-}
-
-/*
- * Should be called before a packet with authentication, whether
- * keepalive, data, or handshake is sent, or after one is received.
- */
-static void
-wg_timers_event_any_authenticated_packet_traversal(struct wg_timers *t)
-{
-	rw_rlock(&t->t_lock);
-	if (!t->t_disabled && t->t_persistent_keepalive_interval > 0)
-		callout_reset(&t->t_persistent_keepalive,
-		     MSEC_2_TICKS(t->t_persistent_keepalive_interval * 1000),
-		    (timeout_t *)wg_timers_run_persistent_keepalive, t);
-	rw_runlock(&t->t_lock);
-}
-
-/* Should be called after a handshake initiation message is sent. */
-static void
-wg_timers_event_handshake_initiated(struct wg_timers *t)
-{
-	rw_rlock(&t->t_lock);
-	if (!t->t_disabled)
-		callout_reset(&t->t_retry_handshake, MSEC_2_TICKS(
-		    REKEY_TIMEOUT * 1000 +
-		    arc4random_uniform(REKEY_TIMEOUT_JITTER)),
-		    (timeout_t *)wg_timers_run_retry_handshake, t);
-	rw_runlock(&t->t_lock);
-}
-
-static void
-wg_timers_event_handshake_responded(struct wg_timers *t)
-{
-	rw_wlock(&t->t_lock);
-	getnanouptime(&t->t_handshake_last_sent);
-	rw_wunlock(&t->t_lock);
-}
-
-/*
- * Should be called after a handshake response message is received and processed
- * or when getting key confirmation via the first data message.
- */
-static void
-wg_timers_event_handshake_complete(struct wg_timers *t)
-{
-	rw_wlock(&t->t_lock);
-	if (!t->t_disabled) {
-		callout_stop(&t->t_retry_handshake);
-		t->t_handshake_retries = 0;
-		getnanotime(&t->t_handshake_complete);
-		wg_timers_run_send_keepalive(t);
-	}
-	rw_wunlock(&t->t_lock);
-}
-
-/*
- * Should be called after an ephemeral key is created, which is before sending a
- * handshake response or after receiving a handshake response.
- */
-static void
-wg_timers_event_session_derived(struct wg_timers *t)
-{
-	rw_rlock(&t->t_lock);
-	if (!t->t_disabled) {
-		callout_reset(&t->t_zero_key_material,
-		    MSEC_2_TICKS(REJECT_AFTER_TIME * 3 * 1000),
-		    (timeout_t *)wg_timers_run_zero_key_material, t);
-	}
-	rw_runlock(&t->t_lock);
-}
-
-static void
-wg_timers_event_want_initiation(struct wg_timers *t)
-{
-	rw_rlock(&t->t_lock);
-	if (!t->t_disabled)
-		wg_timers_run_send_initiation(t, 0);
-	rw_runlock(&t->t_lock);
-}
-
-static void
-wg_timers_event_reset_handshake_last_sent(struct wg_timers *t)
-{
-	rw_wlock(&t->t_lock);
-	t->t_handshake_last_sent.tv_sec -= (REKEY_TIMEOUT + 1);
-	rw_wunlock(&t->t_lock);
-}
-
-static void
-wg_timers_run_send_initiation(struct wg_timers *t, int is_retry)
-{
-	struct wg_peer	 *peer = __containerof(t, struct wg_peer, p_timers);
-	if (!is_retry)
-		t->t_handshake_retries = 0;
-	if (wg_timers_expired_handshake_last_sent(t) == ETIMEDOUT)
-		GROUPTASK_ENQUEUE(&peer->p_send_initiation);
-}
-
-static void
-wg_timers_run_retry_handshake(struct wg_timers *t)
-{
-	struct wg_peer	*peer = __containerof(t, struct wg_peer, p_timers);
-
-	rw_wlock(&t->t_lock);
-	if (t->t_handshake_retries <= MAX_TIMER_HANDSHAKES) {
-		t->t_handshake_retries++;
-		rw_wunlock(&t->t_lock);
-
-		DPRINTF(peer->p_sc, "Handshake for peer %llu did not complete "
-		    "after %d seconds, retrying (try %d)\n",
-			(unsigned long long)peer->p_id,
-		    REKEY_TIMEOUT, t->t_handshake_retries + 1);
-		wg_peer_clear_src(peer);
-		wg_timers_run_send_initiation(t, 1);
-	} else {
-		rw_wunlock(&t->t_lock);
-
-		DPRINTF(peer->p_sc, "Handshake for peer %llu did not complete "
-		    "after %d retries, giving up\n",
-			(unsigned long long) peer->p_id, MAX_TIMER_HANDSHAKES + 2);
-
-		callout_stop(&t->t_send_keepalive);
-		wg_queue_purge(&peer->p_stage_queue);
-		if (!callout_pending(&t->t_zero_key_material))
-			callout_reset(&t->t_zero_key_material,
-			    MSEC_2_TICKS(REJECT_AFTER_TIME * 3 * 1000),
-			    (timeout_t *)wg_timers_run_zero_key_material, t);
-	}
-}
-
-static void
-wg_timers_run_send_keepalive(struct wg_timers *t)
-{
-	struct wg_peer	*peer = __containerof(t, struct wg_peer, p_timers);
-
-	GROUPTASK_ENQUEUE(&peer->p_send_keepalive);
-	if (t->t_need_another_keepalive) {
-		t->t_need_another_keepalive = 0;
-		callout_reset(&t->t_send_keepalive,
-		    MSEC_2_TICKS(KEEPALIVE_TIMEOUT * 1000),
-		    (timeout_t *)wg_timers_run_send_keepalive, t);
-	}
-}
-
-static void
-wg_timers_run_new_handshake(struct wg_timers *t)
-{
-	struct wg_peer	*peer = __containerof(t, struct wg_peer, p_timers);
-
-	DPRINTF(peer->p_sc, "Retrying handshake with peer %llu because we "
-	    "stopped hearing back after %d seconds\n",
-		(unsigned long long)peer->p_id, NEW_HANDSHAKE_TIMEOUT);
-	wg_peer_clear_src(peer);
-
-	wg_timers_run_send_initiation(t, 0);
-}
-
-static void
-wg_timers_run_zero_key_material(struct wg_timers *t)
-{
-	struct wg_peer *peer = __containerof(t, struct wg_peer, p_timers);
-
-	DPRINTF(peer->p_sc, "Zeroing out all keys for peer %llu, since we "
-	    "haven't received a new one in %d seconds\n",
-		(unsigned long long)peer->p_id, REJECT_AFTER_TIME * 3);
-	GROUPTASK_ENQUEUE(&peer->p_clear_secrets);
-}
-
-static void
-wg_timers_run_persistent_keepalive(struct wg_timers *t)
-{
-	struct wg_peer	 *peer = __containerof(t, struct wg_peer, p_timers);
-
-	if (t->t_persistent_keepalive_interval != 0)
-		GROUPTASK_ENQUEUE(&peer->p_send_keepalive);
-}
-
-/* TODO Handshake */
-static void
-wg_peer_send_buf(struct wg_peer *peer, uint8_t *buf, size_t len)
-{
-	struct wg_endpoint endpoint;
-
-	counter_u64_add(peer->p_tx_bytes, len);
-	wg_timers_event_any_authenticated_packet_traversal(&peer->p_timers);
-	wg_timers_event_any_authenticated_packet_sent(&peer->p_timers);
-	wg_peer_get_endpoint(peer, &endpoint);
-	wg_send_buf(peer->p_sc, &endpoint, buf, len);
-}
-
-static void
-wg_send_initiation(struct wg_peer *peer)
-{
-	struct wg_pkt_initiation pkt;
-	struct epoch_tracker et;
-
-	if (wg_timers_check_handshake_last_sent(&peer->p_timers) != ETIMEDOUT)
-		return;
-	DPRINTF(peer->p_sc, "Sending handshake initiation to peer %llu\n",
-		(unsigned long long)peer->p_id);
-
-	NET_EPOCH_ENTER(et);
-	if (noise_create_initiation(&peer->p_remote, &pkt.s_idx, pkt.ue,
-	    pkt.es, pkt.ets) != 0)
-		goto out;
-	pkt.t = WG_PKT_INITIATION;
-	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
-	    sizeof(pkt)-sizeof(pkt.m));
-	wg_peer_send_buf(peer, (uint8_t *)&pkt, sizeof(pkt));
-	wg_timers_event_handshake_initiated(&peer->p_timers);
-out:
-	NET_EPOCH_EXIT(et);
-}
-
-static void
-wg_send_response(struct wg_peer *peer)
-{
-	struct wg_pkt_response pkt;
-	struct epoch_tracker et;
-
-	NET_EPOCH_ENTER(et);
-
-	DPRINTF(peer->p_sc, "Sending handshake response to peer %llu\n",
-	    (unsigned long long)peer->p_id);
-
-	if (noise_create_response(&peer->p_remote, &pkt.s_idx, &pkt.r_idx,
-	    pkt.ue, pkt.en) != 0)
-		goto out;
-	if (noise_remote_begin_session(&peer->p_remote) != 0)
-		goto out;
-
-	wg_timers_event_session_derived(&peer->p_timers);
-	pkt.t = WG_PKT_RESPONSE;
-	cookie_maker_mac(&peer->p_cookie, &pkt.m, &pkt,
-	     sizeof(pkt)-sizeof(pkt.m));
-	wg_timers_event_handshake_responded(&peer->p_timers);
-	wg_peer_send_buf(peer, (uint8_t*)&pkt, sizeof(pkt));
-out:
-	NET_EPOCH_EXIT(et);
-}
-
-static void
-wg_send_cookie(struct wg_softc *sc, struct cookie_macs *cm, uint32_t idx,
-    struct mbuf *m)
-{
-	struct wg_pkt_cookie	pkt;
-	struct wg_endpoint *e;
-
-	DPRINTF(sc, "Sending cookie response for denied handshake message\n");
-
-	pkt.t = WG_PKT_COOKIE;
-	pkt.r_idx = idx;
-
-	e = wg_mbuf_endpoint_get(m);
-	cookie_checker_create_payload(&sc->sc_cookie, cm, pkt.nonce,
-	    pkt.ec, &e->e_remote.r_sa);
-	wg_send_buf(sc, e, (uint8_t *)&pkt, sizeof(pkt));
-}
-
-static void
-wg_send_keepalive(struct wg_peer *peer)
-{
-	struct mbuf *m = NULL;
-	struct wg_tag *t;
-	struct epoch_tracker et;
-
-	if (wg_queue_len(&peer->p_stage_queue) != 0) {
-		NET_EPOCH_ENTER(et);
-		goto send;
-	}
-	if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
-		return;
-	if ((t = wg_tag_get(m)) == NULL) {
-		m_freem(m);
-		return;
-	}
-	t->t_peer = peer;
-	t->t_mbuf = NULL;
-	t->t_done = 0;
-	t->t_mtu = 0; /* MTU == 0 OK for keepalive */
-
-	NET_EPOCH_ENTER(et);
-	wg_queue_stage(peer, m);
-send:
-	wg_queue_out(peer);
-	NET_EPOCH_EXIT(et);
-}
-
-static int
-wg_cookie_validate_packet(struct cookie_checker *checker, struct mbuf *m,
-    int under_load)
-{
-	struct wg_pkt_initiation *init;
-	struct wg_pkt_response *resp;
-	struct cookie_macs *macs;
-	struct wg_endpoint *e;
-	int type, size;
-	void *data;
-
-	type = *mtod(m, uint32_t *);
-	data = m->m_data;
-	e = wg_mbuf_endpoint_get(m);
-	if (type == WG_PKT_INITIATION) {
-		init = mtod(m, struct wg_pkt_initiation *);
-		macs = &init->m;
-		size = sizeof(*init) - sizeof(*macs);
-	} else if (type == WG_PKT_RESPONSE) {
-		resp = mtod(m, struct wg_pkt_response *);
-		macs = &resp->m;
-		size = sizeof(*resp) - sizeof(*macs);
-	} else
-		return 0;
-
-	return (cookie_checker_validate_macs(checker, macs, data, size,
-	    under_load, &e->e_remote.r_sa));
-}
-
-
-static void
-wg_handshake(struct wg_softc *sc, struct mbuf *m)
-{
-	struct wg_pkt_initiation *init;
-	struct wg_pkt_response *resp;
-	struct noise_remote	*remote;
-	struct wg_pkt_cookie		*cook;
-	struct wg_peer	*peer;
-	struct wg_tag *t;
-
-	/* This is global, so that our load calculation applies to the whole
-	 * system. We don't care about races with it at all.
-	 */
-	static struct timeval wg_last_underload;
-	static const struct timeval underload_interval = { UNDERLOAD_TIMEOUT, 0 };
-	bool packet_needs_cookie = false;
-	int underload, res;
-
-	underload = mbufq_len(&sc->sc_handshake_queue) >=
-			MAX_QUEUED_HANDSHAKES / 8;
-	if (underload)
-		getmicrouptime(&wg_last_underload);
-	else if (wg_last_underload.tv_sec != 0) {
-		if (!ratecheck(&wg_last_underload, &underload_interval))
-			underload = 1;
-		else
-			bzero(&wg_last_underload, sizeof(wg_last_underload));
-	}
-
-	res = wg_cookie_validate_packet(&sc->sc_cookie, m, underload);
-
-	if (res && res != EAGAIN) {
-		printf("validate_packet got %d\n", res);
-		goto free;
-	}
-	if (res == EINVAL) {
-		DPRINTF(sc, "Invalid initiation MAC\n");
-		goto free;
-	} else if (res == ECONNREFUSED) {
-		DPRINTF(sc, "Handshake ratelimited\n");
-		goto free;
-	} else if (res == EAGAIN) {
-		packet_needs_cookie = true;
-	} else if (res != 0) {
-		DPRINTF(sc, "Unexpected handshake ratelimit response: %d\n", res);
-		goto free;
-	}
-
-	t = wg_tag_get(m);
-	switch (*mtod(m, uint32_t *)) {
-	case WG_PKT_INITIATION:
-		init = mtod(m, struct wg_pkt_initiation *);
-
-		if (packet_needs_cookie) {
-			wg_send_cookie(sc, &init->m, init->s_idx, m);
-			goto free;
-		}
-		if (noise_consume_initiation(&sc->sc_local, &remote,
-		    init->s_idx, init->ue, init->es, init->ets) != 0) {
-			DPRINTF(sc, "Invalid handshake initiation");
-			goto free;
-		}
-
-		peer = __containerof(remote, struct wg_peer, p_remote);
-		DPRINTF(sc, "Receiving handshake initiation from peer %llu\n",
-		    (unsigned long long)peer->p_id);
-		counter_u64_add(peer->p_rx_bytes, sizeof(*init));
-		if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
-		if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, sizeof(*init));
-		wg_peer_set_endpoint_from_tag(peer, t);
-		wg_send_response(peer);
-		break;
-	case WG_PKT_RESPONSE:
-		resp = mtod(m, struct wg_pkt_response *);
-
-		if (packet_needs_cookie) {
-			wg_send_cookie(sc, &resp->m, resp->s_idx, m);
-			goto free;
-		}
-
-		if ((remote = wg_index_get(sc, resp->r_idx)) == NULL) {
-			DPRINTF(sc, "Unknown handshake response\n");
-			goto free;
-		}
-		peer = __containerof(remote, struct wg_peer, p_remote);
-		if (noise_consume_response(remote, resp->s_idx, resp->r_idx,
-		    resp->ue, resp->en) != 0) {
-			DPRINTF(sc, "Invalid handshake response\n");
-			goto free;
-		}
-
-		DPRINTF(sc, "Receiving handshake response from peer %llu\n",
-				(unsigned long long)peer->p_id);
-		counter_u64_add(peer->p_rx_bytes, sizeof(*resp));
-		if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
-		if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, sizeof(*resp));
-		wg_peer_set_endpoint_from_tag(peer, t);
-		if (noise_remote_begin_session(&peer->p_remote) == 0) {
-			wg_timers_event_session_derived(&peer->p_timers);
-			wg_timers_event_handshake_complete(&peer->p_timers);
-		}
-		break;
-	case WG_PKT_COOKIE:
-		cook = mtod(m, struct wg_pkt_cookie *);
-
-		if ((remote = wg_index_get(sc, cook->r_idx)) == NULL) {
-			DPRINTF(sc, "Unknown cookie index\n");
-			goto free;
-		}
-
-		peer = __containerof(remote, struct wg_peer, p_remote);
-
-		if (cookie_maker_consume_payload(&peer->p_cookie,
-		    cook->nonce, cook->ec) != 0) {
-			DPRINTF(sc, "Could not decrypt cookie response\n");
-			goto free;
-		}
-
-		DPRINTF(sc, "Receiving cookie response\n");
-		goto free;
-	default:
-		goto free;
-	}
-	MPASS(peer != NULL);
-	wg_timers_event_any_authenticated_packet_received(&peer->p_timers);
-	wg_timers_event_any_authenticated_packet_traversal(&peer->p_timers);
-
-free:
-	m_freem(m);
-}
-
-static void
-wg_softc_handshake_receive(struct wg_softc *sc)
-{
-	struct mbuf *m;
-
-	while ((m = mbufq_dequeue(&sc->sc_handshake_queue)) != NULL)
-		wg_handshake(sc, m);
-}
-
-/* TODO Encrypt */
-static void
-wg_encap(struct wg_softc *sc, struct mbuf *m)
-{
-	struct wg_pkt_data *data;
-	size_t padding_len, plaintext_len, out_len;
-	struct mbuf *mc;
-	struct wg_peer *peer;
-	struct wg_tag *t;
-	uint64_t nonce;
-	int res, allocation_order;
-
-	NET_EPOCH_ASSERT();
-	t = wg_tag_get(m);
-	peer = t->t_peer;
-
-	plaintext_len = MIN(WG_PKT_WITH_PADDING(m->m_pkthdr.len), t->t_mtu);
-	padding_len = plaintext_len - m->m_pkthdr.len;
-	out_len = sizeof(struct wg_pkt_data) + plaintext_len + NOISE_AUTHTAG_LEN;
-
-	if (out_len <= MCLBYTES)
-		allocation_order = MCLBYTES;
-	else if (out_len <= MJUMPAGESIZE)
-		allocation_order = MJUMPAGESIZE;
-	else if (out_len <= MJUM9BYTES)
-		allocation_order = MJUM9BYTES;
-	else if (out_len <= MJUM16BYTES)
-		allocation_order = MJUM16BYTES;
-	else
-		goto error;
-
-	if ((mc = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, allocation_order)) == NULL)
-		goto error;
-
-	data = mtod(mc, struct wg_pkt_data *);
-	m_copydata(m, 0, m->m_pkthdr.len, data->buf);
-	bzero(data->buf + m->m_pkthdr.len, padding_len);
-
-	data->t = WG_PKT_DATA;
-
-	res = noise_remote_encrypt(&peer->p_remote, &data->r_idx, &nonce,
-	    data->buf, plaintext_len);
-	nonce = htole64(nonce); /* Wire format is little endian. */
-	memcpy(data->nonce, &nonce, sizeof(data->nonce));
-
-	if (__predict_false(res)) {
-		if (res == EINVAL) {
-			wg_timers_event_want_initiation(&peer->p_timers);
-			m_freem(mc);
-			goto error;
-		} else if (res == ESTALE) {
-			wg_timers_event_want_initiation(&peer->p_timers);
-		} else {
-			m_freem(mc);
-			goto error;
-		}
-	}
-
-	/* A packet with length 0 is a keepalive packet */
-	if (m->m_pkthdr.len == 0)
-		DPRINTF(sc, "Sending keepalive packet to peer %llu\n",
-		    (unsigned long long)peer->p_id);
-	/*
-	 * Set the correct output value here since it will be copied
-	 * when we move the pkthdr in send.
-	 */
-	mc->m_len = mc->m_pkthdr.len = out_len;
-	mc->m_flags &= ~(M_MCAST | M_BCAST);
-
-	t->t_mbuf = mc;
- error:
-	/* XXX membar ? */
-	t->t_done = 1;
-	GROUPTASK_ENQUEUE(&peer->p_send);
-}
-
-static void
-wg_decap(struct wg_softc *sc, struct mbuf *m)
-{
-	struct wg_pkt_data *data;
-	struct wg_peer *peer, *routed_peer;
-	struct wg_tag *t;
-	size_t plaintext_len;
-	uint8_t version;
-	uint64_t nonce;
-	int res;
-
-	NET_EPOCH_ASSERT();
-	data = mtod(m, struct wg_pkt_data *);
-	plaintext_len = m->m_pkthdr.len - sizeof(struct wg_pkt_data);
-
-	t = wg_tag_get(m);
-	peer = t->t_peer;
-
-	memcpy(&nonce, data->nonce, sizeof(nonce));
-	nonce = le64toh(nonce); /* Wire format is little endian. */
-
-	res = noise_remote_decrypt(&peer->p_remote, data->r_idx, nonce,
-	    data->buf, plaintext_len);
-
-	if (__predict_false(res)) {
-		if (res == EINVAL) {
-			goto error;
-		} else if (res == ECONNRESET) {
-			wg_timers_event_handshake_complete(&peer->p_timers);
-		} else if (res == ESTALE) {
-			wg_timers_event_want_initiation(&peer->p_timers);
-		} else  {
-			panic("unexpected response: %d\n", res);
-		}
-	}
-	wg_peer_set_endpoint_from_tag(peer, t);
-
-	/* Remove the data header, and crypto mac tail from the packet */
-	m_adj(m, sizeof(struct wg_pkt_data));
-	m_adj(m, -NOISE_AUTHTAG_LEN);
-
-	/* A packet with length 0 is a keepalive packet */
-	if (m->m_pkthdr.len == 0) {
-		DPRINTF(peer->p_sc, "Receiving keepalive packet from peer "
-		    "%llu\n", (unsigned long long)peer->p_id);
-		goto done;
-	}
-
-	version = mtod(m, struct ip *)->ip_v;
-	if (!((version == 4 && m->m_pkthdr.len >= sizeof(struct ip)) ||
-	    (version == 6 && m->m_pkthdr.len >= sizeof(struct ip6_hdr)))) {
-		DPRINTF(peer->p_sc, "Packet is neither ipv4 nor ipv6 from peer "
-				"%llu\n", (unsigned long long)peer->p_id);
-		goto error;
-	}
-
-	routed_peer = wg_aip_lookup(&peer->p_sc->sc_aips, m, IN);
-	if (routed_peer != peer) {
-		DPRINTF(peer->p_sc, "Packet has unallowed src IP from peer "
-		    "%llu\n", (unsigned long long)peer->p_id);
-		goto error;
-	}
-
-done:
-	t->t_mbuf = m;
-error:
-	t->t_done = 1;
-	GROUPTASK_ENQUEUE(&peer->p_recv);
-}
-
-static void
-wg_softc_decrypt(struct wg_softc *sc)
-{
-	struct epoch_tracker et;
-	struct mbuf *m;
-
-	NET_EPOCH_ENTER(et);
-	while ((m = buf_ring_dequeue_mc(sc->sc_decap_ring)) != NULL)
-		wg_decap(sc, m);
-	NET_EPOCH_EXIT(et);
-}
-
-static void
-wg_softc_encrypt(struct wg_softc *sc)
-{
-	struct mbuf *m;
-	struct epoch_tracker et;
-
-	NET_EPOCH_ENTER(et);
-	while ((m = buf_ring_dequeue_mc(sc->sc_encap_ring)) != NULL)
-		wg_encap(sc, m);
-	NET_EPOCH_EXIT(et);
-}
-
-static void
-wg_encrypt_dispatch(struct wg_softc *sc)
-{
-	for (int i = 0; i < mp_ncpus; i++) {
-		if (sc->sc_encrypt[i].gt_task.ta_flags & TASK_ENQUEUED)
-			continue;
-		GROUPTASK_ENQUEUE(&sc->sc_encrypt[i]);
-	}
-}
-
-static void
-wg_decrypt_dispatch(struct wg_softc *sc)
-{
-	for (int i = 0; i < mp_ncpus; i++) {
-		if (sc->sc_decrypt[i].gt_task.ta_flags & TASK_ENQUEUED)
-			continue;
-		GROUPTASK_ENQUEUE(&sc->sc_decrypt[i]);
-	}
-}
-
-static void
-wg_deliver_out(struct wg_peer *peer)
-{
-	struct epoch_tracker et;
-	struct wg_tag *t;
-	struct mbuf *m;
-	struct wg_endpoint endpoint;
-	size_t len;
-	int ret;
-
-	NET_EPOCH_ENTER(et);
-	if (peer->p_sc->sc_ifp->if_link_state == LINK_STATE_DOWN)
-		goto done;
-
-	wg_peer_get_endpoint(peer, &endpoint);
-
-	while ((m = wg_queue_dequeue(&peer->p_encap_queue, &t)) != NULL) {
-		/* t_mbuf will contain the encrypted packet */
-		if (t->t_mbuf == NULL) {
-			if_inc_counter(peer->p_sc->sc_ifp, IFCOUNTER_OERRORS, 1);
-			m_freem(m);
-			continue;
-		}
-		len = t->t_mbuf->m_pkthdr.len;
-		ret = wg_send(peer->p_sc, &endpoint, t->t_mbuf);
-
-		if (ret == 0) {
-			wg_timers_event_any_authenticated_packet_traversal(
-			    &peer->p_timers);
-			wg_timers_event_any_authenticated_packet_sent(
-			    &peer->p_timers);
-
-			if (m->m_pkthdr.len != 0)
-				wg_timers_event_data_sent(&peer->p_timers);
-			counter_u64_add(peer->p_tx_bytes, len);
-		} else if (ret == EADDRNOTAVAIL) {
-			wg_peer_clear_src(peer);
-			wg_peer_get_endpoint(peer, &endpoint);
-		}
-		m_freem(m);
-	}
-done:
-	NET_EPOCH_EXIT(et);
-}
-
-static void
-wg_deliver_in(struct wg_peer *peer)
-{
-	struct mbuf *m;
-	struct ifnet *ifp;
-	struct wg_softc *sc;
-	struct epoch_tracker et;
-	struct wg_tag *t;
-	uint32_t af;
-	int version;
-
-	NET_EPOCH_ENTER(et);
-	sc = peer->p_sc;
-	ifp = sc->sc_ifp;
-
-	while ((m = wg_queue_dequeue(&peer->p_decap_queue, &t)) != NULL) {
-		/* t_mbuf will contain the encrypted packet */
-		if (t->t_mbuf == NULL) {
-			if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
-			m_freem(m);
-			continue;
-		}
-		MPASS(m == t->t_mbuf);
-
-		wg_timers_event_any_authenticated_packet_received(
-		    &peer->p_timers);
-		wg_timers_event_any_authenticated_packet_traversal(
-		    &peer->p_timers);
-
-		counter_u64_add(peer->p_rx_bytes, m->m_pkthdr.len + sizeof(struct wg_pkt_data) + NOISE_AUTHTAG_LEN);
-		if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1);
-		if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len + sizeof(struct wg_pkt_data) + NOISE_AUTHTAG_LEN);
-
-		if (m->m_pkthdr.len == 0) {
-			m_freem(m);
-			continue;
-		}
-
-		m->m_flags &= ~(M_MCAST | M_BCAST);
-		m->m_pkthdr.rcvif = ifp;
-		version = mtod(m, struct ip *)->ip_v;
-		if (version == IPVERSION) {
-			af = AF_INET;
-			BPF_MTAP2(ifp, &af, sizeof(af), m);
-			CURVNET_SET(ifp->if_vnet);
-			ip_input(m);
-			CURVNET_RESTORE();
-		} else if (version == 6) {
-			af = AF_INET6;
-			BPF_MTAP2(ifp, &af, sizeof(af), m);
-			CURVNET_SET(ifp->if_vnet);
-			ip6_input(m);
-			CURVNET_RESTORE();
-		} else
-			m_freem(m);
-
-		wg_timers_event_data_received(&peer->p_timers);
-	}
-	NET_EPOCH_EXIT(et);
-}
-
-static int
-wg_queue_in(struct wg_peer *peer, struct mbuf *m)
-{
-	struct buf_ring *parallel = peer->p_sc->sc_decap_ring;
-	struct wg_queue		*serial = &peer->p_decap_queue;
-	struct wg_tag		*t;
-	int rc;
-
-	MPASS(wg_tag_get(m) != NULL);
-
-	mtx_lock(&serial->q_mtx);
-	if ((rc = mbufq_enqueue(&serial->q, m)) == ENOBUFS) {
-		m_freem(m);
-		if_inc_counter(peer->p_sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
-	} else {
-		m->m_flags |= M_ENQUEUED;
-		rc = buf_ring_enqueue(parallel, m);
-		if (rc == ENOBUFS) {
-			t = wg_tag_get(m);
-			t->t_done = 1;
-		}
-	}
-	mtx_unlock(&serial->q_mtx);
-	return (rc);
-}
-
-static void
-wg_queue_stage(struct wg_peer *peer, struct mbuf *m)
-{
-	struct wg_queue *q = &peer->p_stage_queue;
-	mtx_lock(&q->q_mtx);
-	STAILQ_INSERT_TAIL(&q->q.mq_head, m, m_stailqpkt);
-	q->q.mq_len++;
-	while (mbufq_full(&q->q)) {
-		m = mbufq_dequeue(&q->q);
-		if (m) {
-			m_freem(m);
-			if_inc_counter(peer->p_sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
-		}
-	}
-	mtx_unlock(&q->q_mtx);
-}
-
-static void
-wg_queue_out(struct wg_peer *peer)
-{
-	struct buf_ring *parallel = peer->p_sc->sc_encap_ring;
-	struct wg_queue		*serial = &peer->p_encap_queue;
-	struct wg_tag		*t;
-	struct mbufq		 staged;
-	struct mbuf		*m;
-
-	if (noise_remote_ready(&peer->p_remote) != 0) {
-		if (wg_queue_len(&peer->p_stage_queue))
-			wg_timers_event_want_initiation(&peer->p_timers);
-		return;
-	}
-
-	/* We first "steal" the staged queue to a local queue, so that we can do these
-	 * remaining operations without having to hold the staged queue mutex. */
-	STAILQ_INIT(&staged.mq_head);
-	mtx_lock(&peer->p_stage_queue.q_mtx);
-	STAILQ_SWAP(&staged.mq_head, &peer->p_stage_queue.q.mq_head, mbuf);
-	staged.mq_len = peer->p_stage_queue.q.mq_len;
-	peer->p_stage_queue.q.mq_len = 0;
-	staged.mq_maxlen = peer->p_stage_queue.q.mq_maxlen;
-	mtx_unlock(&peer->p_stage_queue.q_mtx);
-
-	while ((m = mbufq_dequeue(&staged)) != NULL) {
-		if ((t = wg_tag_get(m)) == NULL) {
-			m_freem(m);
-			continue;
-		}
-		t->t_peer = peer;
-		mtx_lock(&serial->q_mtx);
-		if (mbufq_enqueue(&serial->q, m) != 0) {
-			m_freem(m);
-			if_inc_counter(peer->p_sc->sc_ifp, IFCOUNTER_OQDROPS, 1);
-		} else {
-			m->m_flags |= M_ENQUEUED;
-			if (buf_ring_enqueue(parallel, m)) {
-				t = wg_tag_get(m);
-				t->t_done = 1;
-			}
-		}
-		mtx_unlock(&serial->q_mtx);
-	}
-	wg_encrypt_dispatch(peer->p_sc);
-}
-
-static struct mbuf *
-wg_queue_dequeue(struct wg_queue *q, struct wg_tag **t)
-{
-	struct mbuf *m_, *m;
-
-	m = NULL;
-	mtx_lock(&q->q_mtx);
-	m_ = mbufq_first(&q->q);
-	if (m_ != NULL && (*t = wg_tag_get(m_))->t_done) {
-		m = mbufq_dequeue(&q->q);
-		m->m_flags &= ~M_ENQUEUED;
-	}
-	mtx_unlock(&q->q_mtx);
-	return (m);
-}
-
-static int
-wg_queue_len(struct wg_queue *q)
-{
-	/* This access races. We might consider adding locking here. */
-	return (mbufq_len(&q->q));
-}
-
-static void
-wg_queue_init(struct wg_queue *q, const char *name)
-{
-	mtx_init(&q->q_mtx, name, NULL, MTX_DEF);
-	mbufq_init(&q->q, MAX_QUEUED_PKT);
-}
-
-static void
-wg_queue_deinit(struct wg_queue *q)
-{
-	wg_queue_purge(q);
-	mtx_destroy(&q->q_mtx);
-}
-
-static void
-wg_queue_purge(struct wg_queue *q)
-{
-	mtx_lock(&q->q_mtx);
-	mbufq_drain(&q->q);
-	mtx_unlock(&q->q_mtx);
-}
-
-/* TODO Indexes */
-static struct noise_remote *
-wg_remote_get(struct wg_softc *sc, uint8_t public[NOISE_PUBLIC_KEY_LEN])
-{
-	struct wg_peer *peer;
-
-	if ((peer = wg_peer_lookup(sc, public)) == NULL)
-		return (NULL);
-	return (&peer->p_remote);
-}
-
-static uint32_t
-wg_index_set(struct wg_softc *sc, struct noise_remote *remote)
-{
-	struct wg_index *index, *iter;
-	struct wg_peer	*peer;
-	uint32_t	 key;
-
-	/* We can modify this without a lock as wg_index_set, wg_index_drop are
-	 * guaranteed to be serialised (per remote). */
-	peer = __containerof(remote, struct wg_peer, p_remote);
-	index = SLIST_FIRST(&peer->p_unused_index);
-	MPASS(index != NULL);
-	SLIST_REMOVE_HEAD(&peer->p_unused_index, i_unused_entry);
-
-	index->i_value = remote;
-
-	rw_wlock(&sc->sc_index_lock);
-assign_id:
-	key = index->i_key = arc4random();
-	key &= sc->sc_index_mask;
-	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
-		if (iter->i_key == index->i_key)
-			goto assign_id;
-
-	LIST_INSERT_HEAD(&sc->sc_index[key], index, i_entry);
-
-	rw_wunlock(&sc->sc_index_lock);
-
-	/* Likewise, no need to lock for index here. */
-	return index->i_key;
-}
-
-static struct noise_remote *
-wg_index_get(struct wg_softc *sc, uint32_t key0)
-{
-	struct wg_index		*iter;
-	struct noise_remote	*remote = NULL;
-	uint32_t		 key = key0 & sc->sc_index_mask;
-
-	rw_enter_read(&sc->sc_index_lock);
-	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
-		if (iter->i_key == key0) {
-			remote = iter->i_value;
-			break;
-		}
-	rw_exit_read(&sc->sc_index_lock);
-	return remote;
-}
-
-static void
-wg_index_drop(struct wg_softc *sc, uint32_t key0)
-{
-	struct wg_index	*iter;
-	struct wg_peer	*peer = NULL;
-	uint32_t	 key = key0 & sc->sc_index_mask;
-
-	rw_enter_write(&sc->sc_index_lock);
-	LIST_FOREACH(iter, &sc->sc_index[key], i_entry)
-		if (iter->i_key == key0) {
-			LIST_REMOVE(iter, i_entry);
-			break;
-		}
-	rw_exit_write(&sc->sc_index_lock);
-
-	if (iter == NULL)
-		return;
-
-	/* We expect a peer */
-	peer = __containerof(iter->i_value, struct wg_peer, p_remote);
-	MPASS(peer != NULL);
-	SLIST_INSERT_HEAD(&peer->p_unused_index, iter, i_unused_entry);
-}
-
-static int
-wg_update_endpoint_addrs(struct wg_endpoint *e, const struct sockaddr *srcsa,
-    struct ifnet *rcvif)
-{
-	const struct sockaddr_in *sa4;
-#ifdef INET6
-	const struct sockaddr_in6 *sa6;
-#endif
-	int ret = 0;
-
-	/*
-	 * UDP passes a 2-element sockaddr array: first element is the
-	 * source addr/port, second the destination addr/port.
-	 */
-	if (srcsa->sa_family == AF_INET) {
-		sa4 = (const struct sockaddr_in *)srcsa;
-		e->e_remote.r_sin = sa4[0];
-		e->e_local.l_in = sa4[1].sin_addr;
-#ifdef INET6
-	} else if (srcsa->sa_family == AF_INET6) {
-		sa6 = (const struct sockaddr_in6 *)srcsa;
-		e->e_remote.r_sin6 = sa6[0];
-		e->e_local.l_in6 = sa6[1].sin6_addr;
-#endif
-	} else {
-		ret = EAFNOSUPPORT;
-	}
-
-	return (ret);
-}
-
-static void
-wg_input(struct mbuf *m0, int offset, struct inpcb *inpcb,
-		 const struct sockaddr *srcsa, void *_sc)
-{
-	struct wg_pkt_data *pkt_data;
-	struct wg_endpoint *e;
-	struct wg_softc *sc = _sc;
-	struct mbuf *m;
-	int pktlen, pkttype;
-	struct noise_remote *remote;
-	struct wg_tag *t;
-	void *data;
-
-	/* Caller provided us with srcsa, no need for this header. */
-	m_adj(m0, offset + sizeof(struct udphdr));
-
-	/*
-	 * Ensure mbuf has at least enough contiguous data to peel off our
-	 * headers at the beginning.
-	 */
-	if ((m = m_defrag(m0, M_NOWAIT)) == NULL) {
-		m_freem(m0);
-		return;
-	}
-	data = mtod(m, void *);
-	pkttype = *(uint32_t*)data;
-	t = wg_tag_get(m);
-	if (t == NULL) {
-		goto free;
-	}
-	e = wg_mbuf_endpoint_get(m);
-
-	if (wg_update_endpoint_addrs(e, srcsa, m->m_pkthdr.rcvif)) {
-		goto free;
-	}
-
-	pktlen = m->m_pkthdr.len;
-
-	if ((pktlen == sizeof(struct wg_pkt_initiation) &&
-		 pkttype == WG_PKT_INITIATION) ||
-		(pktlen == sizeof(struct wg_pkt_response) &&
-		 pkttype == WG_PKT_RESPONSE) ||
-		(pktlen == sizeof(struct wg_pkt_cookie) &&
-		 pkttype == WG_PKT_COOKIE)) {
-		if (mbufq_enqueue(&sc->sc_handshake_queue, m) == 0) {
-			GROUPTASK_ENQUEUE(&sc->sc_handshake);
-		} else {
-			DPRINTF(sc, "Dropping handshake packet\n");
-			m_freem(m);
-		}
-	} else if (pktlen >= sizeof(struct wg_pkt_data) + NOISE_AUTHTAG_LEN
-	    && pkttype == WG_PKT_DATA) {
-
-		pkt_data = data;
-		remote = wg_index_get(sc, pkt_data->r_idx);
-		if (remote == NULL) {
-			if_inc_counter(sc->sc_ifp, IFCOUNTER_IERRORS, 1);
-			m_freem(m);
-		} else if (buf_ring_count(sc->sc_decap_ring) > MAX_QUEUED_PKT) {
-			if_inc_counter(sc->sc_ifp, IFCOUNTER_IQDROPS, 1);
-			m_freem(m);
-		} else {
-			t->t_peer = __containerof(remote, struct wg_peer,
-			    p_remote);
-			t->t_mbuf = NULL;
-			t->t_done = 0;
-
-			wg_queue_in(t->t_peer, m);
-			wg_decrypt_dispatch(sc);
-		}
-	} else {
-free:
-		m_freem(m);
-	}
-}
-
-static int
-wg_transmit(struct ifnet *ifp, struct mbuf *m)
-{
-	struct wg_softc *sc;
-	sa_family_t family;
-	struct epoch_tracker et;
-	struct wg_peer *peer;
-	struct wg_tag *t;
-	uint32_t af;
-	int rc;
-
-	/*
-	 * Work around lifetime issue in the ipv6 mld code.
-	 */
-	if (__predict_false(ifp->if_flags & IFF_DYING))
-		return (ENXIO);
-
-	rc = 0;
-	sc = ifp->if_softc;
-	if ((t = wg_tag_get(m)) == NULL) {
-		rc = ENOBUFS;
-		goto early_out;
-	}
-	af = m->m_pkthdr.ph_family;
-	BPF_MTAP2(ifp, &af, sizeof(af), m);
-
-	NET_EPOCH_ENTER(et);
-	peer = wg_aip_lookup(&sc->sc_aips, m, OUT);
-	if (__predict_false(peer == NULL)) {
-		rc = ENOKEY;
-		goto err;
-	}
-
-	family = peer->p_endpoint.e_remote.r_sa.sa_family;
-	if (__predict_false(family != AF_INET && family != AF_INET6)) {
-		DPRINTF(sc, "No valid endpoint has been configured or "
-			    "discovered for peer %llu\n", (unsigned long long)peer->p_id);
-
-		rc = EHOSTUNREACH;
-		goto err;
-	}
-	t->t_peer = peer;
-	t->t_mbuf = NULL;
-	t->t_done = 0;
-	t->t_mtu = ifp->if_mtu;
-
-	wg_queue_stage(peer, m);
-	wg_queue_out(peer);
-	NET_EPOCH_EXIT(et);
-	return (rc);
-err:
-	NET_EPOCH_EXIT(et);
-early_out:
-	if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
-	/* TODO: send ICMP unreachable */
-	m_free(m);
-	return (rc);
-}
-
-static int
-wg_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa, struct route *rt)
-{
-	m->m_pkthdr.ph_family =  sa->sa_family;
-	return (wg_transmit(ifp, m));
-}
-
-static int
-wg_peer_add(struct wg_softc *sc, const nvlist_t *nvl)
-{
-	uint8_t			 public[WG_KEY_SIZE];
-	const void *pub_key;
-	const struct sockaddr *endpoint;
-	int err;
-	size_t size;
-	struct wg_peer *peer = NULL;
-	bool need_insert = false;
-
-	sx_assert(&sc->sc_lock, SX_XLOCKED);
-
-	if (!nvlist_exists_binary(nvl, "public-key")) {
-		return (EINVAL);
-	}
-	pub_key = nvlist_get_binary(nvl, "public-key", &size);
-	if (size != WG_KEY_SIZE) {
-		return (EINVAL);
-	}
-	if (noise_local_keys(&sc->sc_local, public, NULL) == 0 &&
-	    bcmp(public, pub_key, WG_KEY_SIZE) == 0) {
-		return (0); // Silently ignored; not actually a failure.
-	}
-	peer = wg_peer_lookup(sc, pub_key);
-	if (nvlist_exists_bool(nvl, "remove") &&
-		nvlist_get_bool(nvl, "remove")) {
-		if (peer != NULL) {
-			wg_hashtable_peer_remove(&sc->sc_hashtable, peer);
-			wg_peer_destroy(peer);
-		}
-		return (0);
-	}
-	if (nvlist_exists_bool(nvl, "replace-allowedips") &&
-		nvlist_get_bool(nvl, "replace-allowedips") &&
-	    peer != NULL) {
-
-		wg_aip_delete(&peer->p_sc->sc_aips, peer);
-	}
-	if (peer == NULL) {
-		if (sc->sc_peer_count >= MAX_PEERS_PER_IFACE)
-			return (E2BIG);
-		sc->sc_peer_count++;
-
-		need_insert = true;
-		peer = wg_peer_alloc(sc);
-		MPASS(peer != NULL);
-		noise_remote_init(&peer->p_remote, pub_key, &sc->sc_local);
-		cookie_maker_init(&peer->p_cookie, pub_key);
-	}
-	if (nvlist_exists_binary(nvl, "endpoint")) {
-		endpoint = nvlist_get_binary(nvl, "endpoint", &size);
-		if (size > sizeof(peer->p_endpoint.e_remote)) {
-			err = EINVAL;
-			goto out;
-		}
-		memcpy(&peer->p_endpoint.e_remote, endpoint, size);
-	}
-	if (nvlist_exists_binary(nvl, "preshared-key")) {
-		const void *key;
-
-		key = nvlist_get_binary(nvl, "preshared-key", &size);
-		if (size != WG_KEY_SIZE) {
-			err = EINVAL;
-			goto out;
-		}
-		noise_remote_set_psk(&peer->p_remote, key);
-	}
-	if (nvlist_exists_number(nvl, "persistent-keepalive-interval")) {
-		uint64_t pki = nvlist_get_number(nvl, "persistent-keepalive-interval");
-		if (pki > UINT16_MAX) {
-			err = EINVAL;
-			goto out;
-		}
-		wg_timers_set_persistent_keepalive(&peer->p_timers, pki);
-	}
-	if (nvlist_exists_nvlist_array(nvl, "allowed-ips")) {
-		const void *binary;
-		uint64_t cidr;
-		const nvlist_t * const * aipl;
-		struct wg_allowedip aip;
-		size_t allowedip_count;
-
-		aipl = nvlist_get_nvlist_array(nvl, "allowed-ips",
-		    &allowedip_count);
-		for (size_t idx = 0; idx < allowedip_count; idx++) {
-			if (!nvlist_exists_number(aipl[idx], "cidr"))
-				continue;
-			cidr = nvlist_get_number(aipl[idx], "cidr");
-			if (nvlist_exists_binary(aipl[idx], "ipv4")) {
-				binary = nvlist_get_binary(aipl[idx], "ipv4", &size);
-				if (binary == NULL || cidr > 32 || size != sizeof(aip.ip4)) {
-					err = EINVAL;
-					goto out;
-				}
-				aip.family = AF_INET;
-				memcpy(&aip.ip4, binary, sizeof(aip.ip4));
-			} else if (nvlist_exists_binary(aipl[idx], "ipv6")) {
-				binary = nvlist_get_binary(aipl[idx], "ipv6", &size);
-				if (binary == NULL || cidr > 128 || size != sizeof(aip.ip6)) {
-					err = EINVAL;
-					goto out;
-				}
-				aip.family = AF_INET6;
-				memcpy(&aip.ip6, binary, sizeof(aip.ip6));
-			} else {
-				continue;
-			}
-			aip.cidr = cidr;
-
-			if ((err = wg_aip_add(&sc->sc_aips, peer, &aip)) != 0) {
-				goto out;
-			}
-		}
-	}
-	if (need_insert) {
-		wg_hashtable_peer_insert(&sc->sc_hashtable, peer);
-		if (sc->sc_ifp->if_link_state == LINK_STATE_UP)
-			wg_timers_enable(&peer->p_timers);
-	}
-	return (0);
-
-out:
-	if (need_insert) /* If we fail, only destroy if it was new. */
-		wg_peer_destroy(peer);
-	return (err);
-}
-
-static int
-wgc_set(struct wg_softc *sc, struct wg_data_io *wgd)
-{
-	uint8_t public[WG_KEY_SIZE], private[WG_KEY_SIZE];
-	struct ifnet *ifp;
-	void *nvlpacked;
-	nvlist_t *nvl;
-	ssize_t size;
-	int err;
-
-	ifp = sc->sc_ifp;
-	if (wgd->wgd_size == 0 || wgd->wgd_data == NULL)
-		return (EFAULT);
-
-	sx_xlock(&sc->sc_lock);
-
-	nvlpacked = malloc(wgd->wgd_size, M_TEMP, M_WAITOK);
-	err = copyin(wgd->wgd_data, nvlpacked, wgd->wgd_size);
-	if (err)
-		goto out;
-	nvl = nvlist_unpack(nvlpacked, wgd->wgd_size, 0);
-	if (nvl == NULL) {
-		err = EBADMSG;
-		goto out;
-	}
-	if (nvlist_exists_bool(nvl, "replace-peers") &&
-		nvlist_get_bool(nvl, "replace-peers"))
-		wg_peer_remove_all(sc);
-	if (nvlist_exists_number(nvl, "listen-port")) {
-		uint64_t new_port = nvlist_get_number(nvl, "listen-port");
-		if (new_port > UINT16_MAX) {
-			err = EINVAL;
-			goto out;
-		}
-		if (new_port != sc->sc_socket.so_port) {
-			if ((ifp->if_drv_flags & IFF_DRV_RUNNING) != 0) {
-				if ((err = wg_socket_init(sc, new_port)) != 0)
-					goto out;
-			} else
-				sc->sc_socket.so_port = new_port;
-		}
-	}
-	if (nvlist_exists_binary(nvl, "private-key")) {
-		const void *key = nvlist_get_binary(nvl, "private-key", &size);
-		if (size != WG_KEY_SIZE) {
-			err = EINVAL;
-			goto out;
-		}
-
-		if (noise_local_keys(&sc->sc_local, NULL, private) != 0 ||
-		    timingsafe_bcmp(private, key, WG_KEY_SIZE) != 0) {
-			struct noise_local *local;
-			struct wg_peer *peer;
-			struct wg_hashtable *ht = &sc->sc_hashtable;
-			bool has_identity;
-
-			if (curve25519_generate_public(public, key)) {
-				/* Peer conflict: remove conflicting peer. */
-				if ((peer = wg_peer_lookup(sc, public)) !=
-				    NULL) {
-					wg_hashtable_peer_remove(ht, peer);
-					wg_peer_destroy(peer);
-				}
-			}
-
-			/*
-			 * Set the private key and invalidate all existing
-			 * handshakes.
-			 */
-			local = &sc->sc_local;
-			noise_local_lock_identity(local);
-			/* Note: we might be removing the private key. */
-			has_identity = noise_local_set_private(local, key) == 0;
-			mtx_lock(&ht->h_mtx);
-			CK_LIST_FOREACH(peer, &ht->h_peers_list, p_entry) {
-				noise_remote_precompute(&peer->p_remote);
-				wg_timers_event_reset_handshake_last_sent(
-				    &peer->p_timers);
-				noise_remote_expire_current(&peer->p_remote);
-			}
-			mtx_unlock(&ht->h_mtx);
-			cookie_checker_update(&sc->sc_cookie,
-			    has_identity ? public : NULL);
-			noise_local_unlock_identity(local);
-		}
-	}
-	if (nvlist_exists_number(nvl, "user-cookie")) {
-		uint64_t user_cookie = nvlist_get_number(nvl, "user-cookie");
-		if (user_cookie > UINT32_MAX) {
-			err = EINVAL;
-			goto out;
-		}
-		wg_socket_set_cookie(sc, user_cookie);
-	}
-	if (nvlist_exists_nvlist_array(nvl, "peers")) {
-		size_t peercount;
-		const nvlist_t * const*nvl_peers;
-
-		nvl_peers = nvlist_get_nvlist_array(nvl, "peers", &peercount);
-		for (int i = 0; i < peercount; i++) {
-			err = wg_peer_add(sc, nvl_peers[i]);
-			if (err != 0)
-				goto out;
-		}
-	}
-
-	nvlist_destroy(nvl);
-out:
-	free(nvlpacked, M_TEMP);
-	sx_xunlock(&sc->sc_lock);
-	return (err);
-}
-
-static unsigned int
-in_mask2len(struct in_addr *mask)
-{
-	unsigned int x, y;
-	uint8_t *p;
-
-	p = (uint8_t *)mask;
-	for (x = 0; x < sizeof(*mask); x++) {
-		if (p[x] != 0xff)
-			break;
-	}
-	y = 0;
-	if (x < sizeof(*mask)) {
-		for (y = 0; y < NBBY; y++) {
-			if ((p[x] & (0x80 >> y)) == 0)
-				break;
-		}
-	}
-	return x * NBBY + y;
-}
-
-static int
-wg_peer_to_export(struct wg_peer *peer, struct wg_peer_export *exp)
-{
-	struct wg_endpoint *ep;
-	struct wg_aip *rt;
-	struct noise_remote *remote;
-	int i;
-
-	/* Non-sleepable context. */
-	NET_EPOCH_ASSERT();
-
-	bzero(&exp->endpoint, sizeof(exp->endpoint));
-	remote = &peer->p_remote;
-	ep = &peer->p_endpoint;
-	if (ep->e_remote.r_sa.sa_family != 0) {
-		exp->endpoint_sz = (ep->e_remote.r_sa.sa_family == AF_INET) ?
-		    sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6);
-
-		memcpy(&exp->endpoint, &ep->e_remote, exp->endpoint_sz);
-	}
-
-	/* We always export it. */
-	(void)noise_remote_keys(remote, exp->public_key, exp->preshared_key);
-	exp->persistent_keepalive =
-	    peer->p_timers.t_persistent_keepalive_interval;
-	wg_timers_get_last_handshake(&peer->p_timers, &exp->last_handshake);
-	exp->rx_bytes = counter_u64_fetch(peer->p_rx_bytes);
-	exp->tx_bytes = counter_u64_fetch(peer->p_tx_bytes);
-
-	exp->aip_count = 0;
-	CK_LIST_FOREACH(rt, &peer->p_aips, r_entry) {
-		exp->aip_count++;
-	}
-
-	/* Early success; no allowed-ips to copy out. */
-	if (exp->aip_count == 0)
-		return (0);
-
-	exp->aip = malloc(exp->aip_count * sizeof(*exp->aip), M_TEMP, M_NOWAIT);
-	if (exp->aip == NULL)
-		return (ENOMEM);
-
-	i = 0;
-	CK_LIST_FOREACH(rt, &peer->p_aips, r_entry) {
-		exp->aip[i].family = rt->r_addr.ss_family;
-		if (exp->aip[i].family == AF_INET) {
-			struct sockaddr_in *sin =
-			    (struct sockaddr_in *)&rt->r_addr;
-
-			exp->aip[i].ip4 = sin->sin_addr;
-
-			sin = (struct sockaddr_in *)&rt->r_mask;
-			exp->aip[i].cidr = in_mask2len(&sin->sin_addr);
-		} else if (exp->aip[i].family == AF_INET6) {
-			struct sockaddr_in6 *sin6 =
-			    (struct sockaddr_in6 *)&rt->r_addr;
-
-			exp->aip[i].ip6 = sin6->sin6_addr;
-
-			sin6 = (struct sockaddr_in6 *)&rt->r_mask;
-			exp->aip[i].cidr = in6_mask2len(&sin6->sin6_addr, NULL);
-		}
-		i++;
-		if (i == exp->aip_count)
-			break;
-	}
-
-	/* Again, AllowedIPs might have shrank; update it. */
-	exp->aip_count = i;
-
-	return (0);
-}
-
-static nvlist_t *
-wg_peer_export_to_nvl(struct wg_softc *sc, struct wg_peer_export *exp)
-{
-	struct wg_timespec64 ts64;
-	nvlist_t *nvl, **nvl_aips;
-	size_t i;
-	uint16_t family;
-
-	nvl_aips = NULL;
-	if ((nvl = nvlist_create(0)) == NULL)
-		return (NULL);
-
-	nvlist_add_binary(nvl, "public-key", exp->public_key,
-	    sizeof(exp->public_key));
-	if (wgc_privileged(sc))
-		nvlist_add_binary(nvl, "preshared-key", exp->preshared_key,
-		    sizeof(exp->preshared_key));
-	if (exp->endpoint_sz != 0)
-		nvlist_add_binary(nvl, "endpoint", &exp->endpoint,
-		    exp->endpoint_sz);
-
-	if (exp->aip_count != 0) {
-		nvl_aips = mallocarray(exp->aip_count, sizeof(*nvl_aips),
-		    M_WG, M_WAITOK | M_ZERO);
-	}
-
-	for (i = 0; i < exp->aip_count; i++) {
-		nvl_aips[i] = nvlist_create(0);
-		if (nvl_aips[i] == NULL)
-			goto err;
-		family = exp->aip[i].family;
-		nvlist_add_number(nvl_aips[i], "cidr", exp->aip[i].cidr);
-		if (family == AF_INET)
-			nvlist_add_binary(nvl_aips[i], "ipv4",
-			    &exp->aip[i].ip4, sizeof(exp->aip[i].ip4));
-		else if (family == AF_INET6)
-			nvlist_add_binary(nvl_aips[i], "ipv6",
-			    &exp->aip[i].ip6, sizeof(exp->aip[i].ip6));
-	}
-
-	if (i != 0) {
-		nvlist_add_nvlist_array(nvl, "allowed-ips",
-		    (const nvlist_t *const *)nvl_aips, i);
-	}
-
-	for (i = 0; i < exp->aip_count; ++i)
-		nvlist_destroy(nvl_aips[i]);
-
-	free(nvl_aips, M_WG);
-	nvl_aips = NULL;
-
-	ts64.tv_sec = exp->last_handshake.tv_sec;
-	ts64.tv_nsec = exp->last_handshake.tv_nsec;
-	nvlist_add_binary(nvl, "last-handshake-time", &ts64, sizeof(ts64));
-
-	if (exp->persistent_keepalive != 0)
-		nvlist_add_number(nvl, "persistent-keepalive-interval",
-		    exp->persistent_keepalive);
-
-	if (exp->rx_bytes != 0)
-		nvlist_add_number(nvl, "rx-bytes", exp->rx_bytes);
-	if (exp->tx_bytes != 0)
-		nvlist_add_number(nvl, "tx-bytes", exp->tx_bytes);
-
-	return (nvl);
-err:
-	for (i = 0; i < exp->aip_count && nvl_aips[i] != NULL; i++) {
-		nvlist_destroy(nvl_aips[i]);
-	}
-
-	free(nvl_aips, M_WG);
-	nvlist_destroy(nvl);
-	return (NULL);
-}
-
-static int
-wg_marshal_peers(struct wg_softc *sc, nvlist_t **nvlp, nvlist_t ***nvl_arrayp, int *peer_countp)
-{
-	struct wg_peer *peer;
-	int err, i, peer_count;
-	nvlist_t *nvl, **nvl_array;
-	struct epoch_tracker et;
-	struct wg_peer_export *wpe;
-
-	nvl = NULL;
-	nvl_array = NULL;
-	if (nvl_arrayp)
-		*nvl_arrayp = NULL;
-	if (nvlp)
-		*nvlp = NULL;
-	if (peer_countp)
-		*peer_countp = 0;
-	peer_count = sc->sc_hashtable.h_num_peers;
-	if (peer_count == 0) {
-		return (ENOENT);
-	}
-
-	if (nvlp && (nvl = nvlist_create(0)) == NULL)
-		return (ENOMEM);
-
-	err = i = 0;
-	nvl_array = malloc(peer_count*sizeof(void*), M_TEMP, M_WAITOK | M_ZERO);
-	wpe = malloc(peer_count*sizeof(*wpe), M_TEMP, M_WAITOK | M_ZERO);
-
-	NET_EPOCH_ENTER(et);
-	CK_LIST_FOREACH(peer, &sc->sc_hashtable.h_peers_list, p_entry) {
-		if ((err = wg_peer_to_export(peer, &wpe[i])) != 0) {
-			break;
-		}
-
-		i++;
-		if (i == peer_count)
-			break;
-	}
-	NET_EPOCH_EXIT(et);
-
-	if (err != 0)
-		goto out;
-
-	/* Update the peer count, in case we found fewer entries. */
-	*peer_countp = peer_count = i;
-	if (peer_count == 0) {
-		err = ENOENT;
-		goto out;
-	}
-
-	for (i = 0; i < peer_count; i++) {
-		int idx;
-
-		/*
-		 * Peers are added to the list in reverse order, effectively,
-		 * because it's simpler/quicker to add at the head every time.
-		 *
-		 * Export them in reverse order.  No worries if we fail mid-way
-		 * through, the cleanup below will DTRT.
-		 */
-		idx = peer_count - i - 1;
-		nvl_array[idx] = wg_peer_export_to_nvl(sc, &wpe[i]);
-		if (nvl_array[idx] == NULL) {
-			break;
-		}
-	}
-
-	if (i < peer_count) {
-		/* Error! */
-		*peer_countp = 0;
-		err = ENOMEM;
-	} else if (nvl) {
-		nvlist_add_nvlist_array(nvl, "peers",
-		    (const nvlist_t * const *)nvl_array, peer_count);
-		if ((err = nvlist_error(nvl))) {
-			goto out;
-		}
-		*nvlp = nvl;
-	}
-	*nvl_arrayp = nvl_array;
- out:
-	if (err != 0) {
-		/* Note that nvl_array is populated in reverse order. */
-		for (i = 0; i < peer_count; i++) {
-			nvlist_destroy(nvl_array[i]);
-		}
-
-		free(nvl_array, M_TEMP);
-		if (nvl != NULL)
-			nvlist_destroy(nvl);
-	}
-
-	for (i = 0; i < peer_count; i++)
-		free(wpe[i].aip, M_TEMP);
-	free(wpe, M_TEMP);
-	return (err);
-}
-
-static int
-wgc_get(struct wg_softc *sc, struct wg_data_io *wgd)
-{
-	nvlist_t *nvl, **nvl_array;
-	void *packed;
-	size_t size;
-	int peer_count, err;
-
-	nvl = nvlist_create(0);
-	if (nvl == NULL)
-		return (ENOMEM);
-
-	sx_slock(&sc->sc_lock);
-
-	err = 0;
-	packed = NULL;
-	if (sc->sc_socket.so_port != 0)
-		nvlist_add_number(nvl, "listen-port", sc->sc_socket.so_port);
-	if (sc->sc_socket.so_user_cookie != 0)
-		nvlist_add_number(nvl, "user-cookie", sc->sc_socket.so_user_cookie);
-	if (sc->sc_local.l_has_identity) {
-		nvlist_add_binary(nvl, "public-key", sc->sc_local.l_public, WG_KEY_SIZE);
-		if (wgc_privileged(sc))
-			nvlist_add_binary(nvl, "private-key", sc->sc_local.l_private, WG_KEY_SIZE);
-	}
-	if (sc->sc_hashtable.h_num_peers > 0) {
-		err = wg_marshal_peers(sc, NULL, &nvl_array, &peer_count);
-		if (err)
-			goto out_nvl;
-		nvlist_add_nvlist_array(nvl, "peers",
-		    (const nvlist_t * const *)nvl_array, peer_count);
-	}
-	packed = nvlist_pack(nvl, &size);
-	if (packed == NULL) {
-		err = ENOMEM;
-		goto out_nvl;
-	}
-	if (wgd->wgd_size == 0) {
-		wgd->wgd_size = size;
-		goto out_packed;
-	}
-	if (wgd->wgd_size < size) {
-		err = ENOSPC;
-		goto out_packed;
-	}
-	if (wgd->wgd_data == NULL) {
-		err = EFAULT;
-		goto out_packed;
-	}
-	err = copyout(packed, wgd->wgd_data, size);
-	wgd->wgd_size = size;
-
-out_packed:
-	free(packed, M_NVLIST);
-out_nvl:
-	nvlist_destroy(nvl);
-	sx_sunlock(&sc->sc_lock);
-	return (err);
-}
-
-static int
-wg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
-{
-	struct wg_data_io *wgd = (struct wg_data_io *)data;
-	struct ifreq *ifr = (struct ifreq *)data;
-	struct wg_softc	*sc = ifp->if_softc;
-	int ret = 0;
-
-	switch (cmd) {
-	case SIOCSWG:
-		ret = priv_check(curthread, PRIV_NET_WG);
-		if (ret == 0)
-			ret = wgc_set(sc, wgd);
-		break;
-	case SIOCGWG:
-		ret = wgc_get(sc, wgd);
-		break;
-	/* Interface IOCTLs */
-	case SIOCSIFADDR:
-		/*
-		 * This differs from *BSD norms, but is more uniform with how
-		 * WireGuard behaves elsewhere.
-		 */
-		break;
-	case SIOCSIFFLAGS:
-		if ((ifp->if_flags & IFF_UP) != 0)
-			ret = wg_up(sc);
-		else
-			wg_down(sc);
-		break;
-	case SIOCSIFMTU:
-		if (ifr->ifr_mtu <= 0 || ifr->ifr_mtu > MAX_MTU)
-			ret = EINVAL;
-		else
-			ifp->if_mtu = ifr->ifr_mtu;
-		break;
-	case SIOCADDMULTI:
-	case SIOCDELMULTI:
-		break;
-	default:
-		ret = ENOTTY;
-	}
-
-	return ret;
-}
-
-static int
-wg_up(struct wg_softc *sc)
-{
-	struct wg_hashtable *ht = &sc->sc_hashtable;
-	struct ifnet *ifp = sc->sc_ifp;
-	struct wg_peer *peer;
-	int rc = EBUSY;
-
-	sx_xlock(&sc->sc_lock);
-	/* Jail's being removed, no more wg_up(). */
-	if ((sc->sc_flags & WGF_DYING) != 0)
-		goto out;
-
-	/* Silent success if we're already running. */
-	rc = 0;
-	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
-		goto out;
-	ifp->if_drv_flags |= IFF_DRV_RUNNING;
-
-	rc = wg_socket_init(sc, sc->sc_socket.so_port);
-	if (rc == 0) {
-		mtx_lock(&ht->h_mtx);
-		CK_LIST_FOREACH(peer, &ht->h_peers_list, p_entry) {
-			wg_timers_enable(&peer->p_timers);
-			wg_queue_out(peer);
-		}
-		mtx_unlock(&ht->h_mtx);
-
-		if_link_state_change(sc->sc_ifp, LINK_STATE_UP);
-	} else {
-		ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-	}
-out:
-	sx_xunlock(&sc->sc_lock);
-	return (rc);
-}
-
-static void
-wg_down(struct wg_softc *sc)
-{
-	struct wg_hashtable *ht = &sc->sc_hashtable;
-	struct ifnet *ifp = sc->sc_ifp;
-	struct wg_peer *peer;
-
-	sx_xlock(&sc->sc_lock);
-	if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) {
-		sx_xunlock(&sc->sc_lock);
-		return;
-	}
-	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
-
-	mtx_lock(&ht->h_mtx);
-	CK_LIST_FOREACH(peer, &ht->h_peers_list, p_entry) {
-                wg_queue_purge(&peer->p_stage_queue);
-                wg_timers_disable(&peer->p_timers);
-	}
-	mtx_unlock(&ht->h_mtx);
-
-	mbufq_drain(&sc->sc_handshake_queue);
-
-	mtx_lock(&ht->h_mtx);
-	CK_LIST_FOREACH(peer, &ht->h_peers_list, p_entry) {
-                noise_remote_clear(&peer->p_remote);
-                wg_timers_event_reset_handshake_last_sent(&peer->p_timers);
-	}
-	mtx_unlock(&ht->h_mtx);
-
-	if_link_state_change(sc->sc_ifp, LINK_STATE_DOWN);
-	wg_socket_uninit(sc);
-
-	sx_xunlock(&sc->sc_lock);
-}
-
-static void
-crypto_taskq_setup(struct wg_softc *sc)
-{
-
-	sc->sc_encrypt = malloc(sizeof(struct grouptask)*mp_ncpus, M_WG, M_WAITOK);
-	sc->sc_decrypt = malloc(sizeof(struct grouptask)*mp_ncpus, M_WG, M_WAITOK);
-
-	for (int i = 0; i < mp_ncpus; i++) {
-		GROUPTASK_INIT(&sc->sc_encrypt[i], 0,
-		     (gtask_fn_t *)wg_softc_encrypt, sc);
-		taskqgroup_attach_cpu(qgroup_if_io_tqg, &sc->sc_encrypt[i], sc, i, NULL, NULL, "wg encrypt");
-		GROUPTASK_INIT(&sc->sc_decrypt[i], 0,
-		    (gtask_fn_t *)wg_softc_decrypt, sc);
-		taskqgroup_attach_cpu(qgroup_if_io_tqg, &sc->sc_decrypt[i], sc, i, NULL, NULL, "wg decrypt");
-	}
-}
-
-static void
-crypto_taskq_destroy(struct wg_softc *sc)
-{
-	for (int i = 0; i < mp_ncpus; i++) {
-		taskqgroup_detach(qgroup_if_io_tqg, &sc->sc_encrypt[i]);
-		taskqgroup_detach(qgroup_if_io_tqg, &sc->sc_decrypt[i]);
-	}
-	free(sc->sc_encrypt, M_WG);
-	free(sc->sc_decrypt, M_WG);
-}
-
-static int
-wg_clone_create(struct if_clone *ifc, int unit, caddr_t params)
-{
-	struct wg_softc *sc;
-	struct ifnet *ifp;
-	struct noise_upcall noise_upcall;
-
-	sc = malloc(sizeof(*sc), M_WG, M_WAITOK | M_ZERO);
-	sc->sc_ucred = crhold(curthread->td_ucred);
-	ifp = sc->sc_ifp = if_alloc(IFT_WIREGUARD);
-	ifp->if_softc = sc;
-	if_initname(ifp, wgname, unit);
-
-	noise_upcall.u_arg = sc;
-	noise_upcall.u_remote_get =
-		(struct noise_remote *(*)(void *, uint8_t *))wg_remote_get;
-	noise_upcall.u_index_set =
-		(uint32_t (*)(void *, struct noise_remote *))wg_index_set;
-	noise_upcall.u_index_drop =
-		(void (*)(void *, uint32_t))wg_index_drop;
-	noise_local_init(&sc->sc_local, &noise_upcall);
-	cookie_checker_init(&sc->sc_cookie, ratelimit_zone);
-
-	sc->sc_socket.so_port = 0;
-
-	atomic_add_int(&clone_count, 1);
-	ifp->if_capabilities = ifp->if_capenable = WG_CAPS;
-
-	mbufq_init(&sc->sc_handshake_queue, MAX_QUEUED_HANDSHAKES);
-	sx_init(&sc->sc_lock, "wg softc lock");
-	rw_init(&sc->sc_index_lock, "wg index lock");
-	sc->sc_peer_count = 0;
-	sc->sc_encap_ring = buf_ring_alloc(MAX_QUEUED_PKT, M_WG, M_WAITOK, NULL);
-	sc->sc_decap_ring = buf_ring_alloc(MAX_QUEUED_PKT, M_WG, M_WAITOK, NULL);
-	GROUPTASK_INIT(&sc->sc_handshake, 0,
-	    (gtask_fn_t *)wg_softc_handshake_receive, sc);
-	taskqgroup_attach(qgroup_if_io_tqg, &sc->sc_handshake, sc, NULL, NULL, "wg tx initiation");
-	crypto_taskq_setup(sc);
-
-	wg_hashtable_init(&sc->sc_hashtable);
-	sc->sc_index = hashinit(HASHTABLE_INDEX_SIZE, M_DEVBUF, &sc->sc_index_mask);
-	wg_aip_init(&sc->sc_aips);
-
-	if_setmtu(ifp, ETHERMTU - 80);
-	ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_NOARP;
-	ifp->if_init = wg_init;
-	ifp->if_reassign = wg_reassign;
-	ifp->if_qflush = wg_qflush;
-	ifp->if_transmit = wg_transmit;
-	ifp->if_output = wg_output;
-	ifp->if_ioctl = wg_ioctl;
-
-	if_attach(ifp);
-	bpfattach(ifp, DLT_NULL, sizeof(uint32_t));
-
-	sx_xlock(&wg_sx);
-	LIST_INSERT_HEAD(&wg_list, sc, sc_entry);
-	sx_xunlock(&wg_sx);
-
-	return 0;
-}
-
-static void
-wg_clone_destroy(struct ifnet *ifp)
-{
-	struct wg_softc *sc = ifp->if_softc;
-	struct ucred *cred;
-
-	sx_xlock(&wg_sx);
-	sx_xlock(&sc->sc_lock);
-	sc->sc_flags |= WGF_DYING;
-	cred = sc->sc_ucred;
-	sc->sc_ucred = NULL;
-	sx_xunlock(&sc->sc_lock);
-	LIST_REMOVE(sc, sc_entry);
-	sx_xunlock(&wg_sx);
-
-	if_link_state_change(sc->sc_ifp, LINK_STATE_DOWN);
-
-	sx_xlock(&sc->sc_lock);
-	wg_socket_uninit(sc);
-	sx_xunlock(&sc->sc_lock);
-
-	/*
-	 * No guarantees that all traffic have passed until the epoch has
-	 * elapsed with the socket closed.
-	 */
-	NET_EPOCH_WAIT();
-
-	taskqgroup_drain_all(qgroup_if_io_tqg);
-	sx_xlock(&sc->sc_lock);
-	wg_peer_remove_all(sc);
-	epoch_drain_callbacks(net_epoch_preempt);
-	sx_xunlock(&sc->sc_lock);
-	sx_destroy(&sc->sc_lock);
-	rw_destroy(&sc->sc_index_lock);
-	taskqgroup_detach(qgroup_if_io_tqg, &sc->sc_handshake);
-	crypto_taskq_destroy(sc);
-	buf_ring_free(sc->sc_encap_ring, M_WG);
-	buf_ring_free(sc->sc_decap_ring, M_WG);
-
-	wg_aip_destroy(&sc->sc_aips);
-	wg_hashtable_destroy(&sc->sc_hashtable);
-
-	if (cred != NULL)
-		crfree(cred);
-	if_detach(sc->sc_ifp);
-	if_free(sc->sc_ifp);
-	/* Ensure any local/private keys are cleaned up */
-	explicit_bzero(sc, sizeof(*sc));
-	free(sc, M_WG);
-
-	atomic_add_int(&clone_count, -1);
-}
-
-static void
-wg_qflush(struct ifnet *ifp __unused)
-{
-}
-
-/*
- * Privileged information (private-key, preshared-key) are only exported for
- * root and jailed root by default.
- */
-static bool
-wgc_privileged(struct wg_softc *sc)
-{
-	struct thread *td;
-
-	td = curthread;
-	return (priv_check(td, PRIV_NET_WG) == 0);
-}
-
-static void
-wg_reassign(struct ifnet *ifp, struct vnet *new_vnet __unused,
-    char *unused __unused)
-{
-	struct wg_softc *sc;
-
-	sc = ifp->if_softc;
-	wg_down(sc);
-}
-
-static void
-wg_init(void *xsc)
-{
-	struct wg_softc *sc;
-
-	sc = xsc;
-	wg_up(sc);
-}
-
-static void
-vnet_wg_init(const void *unused __unused)
-{
-
-	V_wg_cloner = if_clone_simple(wgname, wg_clone_create, wg_clone_destroy,
-	    0);
-}
-VNET_SYSINIT(vnet_wg_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
-    vnet_wg_init, NULL);
-
-static void
-vnet_wg_uninit(const void *unused __unused)
-{
-
-	if_clone_detach(V_wg_cloner);
-}
-VNET_SYSUNINIT(vnet_wg_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
-    vnet_wg_uninit, NULL);
-
-static int
-wg_prison_remove(void *obj, void *data __unused)
-{
-	const struct prison *pr = obj;
-	struct wg_softc *sc;
-	struct ucred *cred;
-	bool dying;
-
-	/*
-	 * Do a pass through all if_wg interfaces and release creds on any from
-	 * the jail that are supposed to be going away.  This will, in turn, let
-	 * the jail die so that we don't end up with Schrödinger's jail.
-	 */
-	sx_slock(&wg_sx);
-	LIST_FOREACH(sc, &wg_list, sc_entry) {
-		cred = NULL;
-
-		sx_xlock(&sc->sc_lock);
-		dying = (sc->sc_flags & WGF_DYING) != 0;
-		if (!dying && sc->sc_ucred != NULL &&
-		    sc->sc_ucred->cr_prison == pr) {
-			/* Home jail is going away. */
-			cred = sc->sc_ucred;
-			sc->sc_ucred = NULL;
-
-			sc->sc_flags |= WGF_DYING;
-		}
-
-		/*
-		 * If this is our foreign vnet going away, we'll also down the
-		 * link and kill the socket because traffic needs to stop.  Any
-		 * address will be revoked in the rehoming process.
-		 */
-		if (cred != NULL || (!dying &&
-		    sc->sc_ifp->if_vnet == pr->pr_vnet)) {
-			if_link_state_change(sc->sc_ifp, LINK_STATE_DOWN);
-			/* Have to kill the sockets, as they also hold refs. */
-			wg_socket_uninit(sc);
-		}
-
-		sx_xunlock(&sc->sc_lock);
-
-		if (cred != NULL) {
-			CURVNET_SET(sc->sc_ifp->if_vnet);
-			if_purgeaddrs(sc->sc_ifp);
-			CURVNET_RESTORE();
-			crfree(cred);
-		}
-	}
-	sx_sunlock(&wg_sx);
-
-	return (0);
-}
-
-static void
-wg_module_init(void)
-{
-	osd_method_t methods[PR_MAXMETHOD] = {
-		[PR_METHOD_REMOVE] = wg_prison_remove,
-	};
-
-	ratelimit_zone = uma_zcreate("wg ratelimit", sizeof(struct ratelimit),
-	     NULL, NULL, NULL, NULL, 0, 0);
-	wg_osd_jail_slot = osd_jail_register(NULL, methods);
-}
-
-static void
-wg_module_deinit(void)
-{
-
-	uma_zdestroy(ratelimit_zone);
-	osd_jail_deregister(wg_osd_jail_slot);
-
-	MPASS(LIST_EMPTY(&wg_list));
-}
-
-static int
-wg_module_event_handler(module_t mod, int what, void *arg)
-{
-
-	switch (what) {
-		case MOD_LOAD:
-			wg_module_init();
-			break;
-		case MOD_UNLOAD:
-			if (atomic_load_int(&clone_count) == 0)
-				wg_module_deinit();
-			else
-				return (EBUSY);
-			break;
-		default:
-			return (EOPNOTSUPP);
-	}
-	return (0);
-}
-
-static moduledata_t wg_moduledata = {
-	"wg",
-	wg_module_event_handler,
-	NULL
-};
-
-DECLARE_MODULE(wg, wg_moduledata, SI_SUB_PSEUDO, SI_ORDER_ANY);
-MODULE_VERSION(wg, 1);
-MODULE_DEPEND(wg, crypto, 1, 1, 1);
diff --git a/sys/dev/if_wg/if_wg.h b/sys/dev/if_wg/if_wg.h
deleted file mode 100644
index 2a100456d406..000000000000
--- a/sys/dev/if_wg/if_wg.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2019 Matt Dunwoodie <ncon@noconroy.net>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- *
- * $FreeBSD$
- */
-
-#ifndef __IF_WG_H__
-#define __IF_WG_H__
-
-#include <net/if.h>
-#include <netinet/in.h>
-
-struct wg_data_io {
-	char	 wgd_name[IFNAMSIZ];
-	void	*wgd_data;
-	size_t	 wgd_size;
-};
-
-#define WG_KEY_SIZE	32
-
-#define SIOCSWG _IOWR('i', 210, struct wg_data_io)
-#define SIOCGWG _IOWR('i', 211, struct wg_data_io)
-
-#endif /* __IF_WG_H__ */
diff --git a/sys/dev/if_wg/support.h b/sys/dev/if_wg/support.h
deleted file mode 100644
index 412806b465af..000000000000
--- a/sys/dev/if_wg/support.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* SPDX-License-Identifier: MIT
- *
- * Copyright (C) 2021 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- * Copyright (C) 2021 Matt Dunwoodie <ncon@noconroy.net>
- */
-
-#ifndef _WG_SUPPORT
-#define _WG_SUPPORT
-
-#include <sys/types.h>
-#include <sys/limits.h>
-#include <sys/endian.h>
-#include <sys/libkern.h>
-#include <sys/malloc.h>
-#include <sys/proc.h>
-#include <sys/lock.h>
-#include <vm/uma.h>
-
-/* TODO the following is openbsd compat defines to allow us to copy the wg_*
- * files from openbsd (almost) verbatim. this will greatly increase maintenance
- * across the platforms. it should be moved to it's own file. the only thing
- * we're missing from this is struct pool (freebsd: uma_zone_t), which isn't a
- * show stopper, but is something worth considering in the future.
- *  - md */
-
-#define rw_assert_wrlock(x) rw_assert(x, RA_WLOCKED)
-#define rw_enter_write rw_wlock
-#define rw_exit_write rw_wunlock
-#define rw_enter_read rw_rlock
-#define rw_exit_read rw_runlock
-#define rw_exit rw_unlock
-
-#define RW_DOWNGRADE 1
-#define rw_enter(x, y) do {		\
-	CTASSERT(y == RW_DOWNGRADE);	\
-	rw_downgrade(x);		\
-} while (0)
-
-MALLOC_DECLARE(M_WG);
-
-#include <crypto/siphash/siphash.h>
-typedef struct {
-	uint64_t	k0;
-	uint64_t	k1;
-} SIPHASH_KEY;
-
-static inline uint64_t
-siphash24(const SIPHASH_KEY *key, const void *src, size_t len)
-{
-	SIPHASH_CTX ctx;
-
-	return (SipHashX(&ctx, 2, 4, (const uint8_t *)key, src, len));
-}
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-
-#endif
diff --git a/sys/dev/if_wg/wg_cookie.c b/sys/dev/if_wg/wg_cookie.c
deleted file mode 100644
index c56b2fb2e75e..000000000000
--- a/sys/dev/if_wg/wg_cookie.c
+++ /dev/null
@@ -1,438 +0,0 @@
-/*
- * Copyright (C) 2015-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- * Copyright (C) 2019-2020 Matt Dunwoodie <ncon@noconroy.net>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <sys/types.h>
-#include <sys/systm.h>
-#include <sys/param.h>
-#include <sys/rwlock.h>
-#include <sys/malloc.h> /* Because systm doesn't include M_NOWAIT, M_DEVBUF */
-#include <sys/socket.h>
-
-#include "support.h"
-#include "wg_cookie.h"
-
-static void	cookie_precompute_key(uint8_t *,
-			const uint8_t[COOKIE_INPUT_SIZE], const char *);
-static void	cookie_macs_mac1(struct cookie_macs *, const void *, size_t,
-			const uint8_t[COOKIE_KEY_SIZE]);
-static void	cookie_macs_mac2(struct cookie_macs *, const void *, size_t,
-			const uint8_t[COOKIE_COOKIE_SIZE]);
-static int	cookie_timer_expired(struct timespec *, time_t, long);
-static void	cookie_checker_make_cookie(struct cookie_checker *,
-			uint8_t[COOKIE_COOKIE_SIZE], struct sockaddr *);
-static int	ratelimit_init(struct ratelimit *, uma_zone_t);
-static void	ratelimit_deinit(struct ratelimit *);
-static void	ratelimit_gc(struct ratelimit *, int);
-static int	ratelimit_allow(struct ratelimit *, struct sockaddr *);
-
-/* Public Functions */
-void
-cookie_maker_init(struct cookie_maker *cp, const uint8_t key[COOKIE_INPUT_SIZE])
-{
-	bzero(cp, sizeof(*cp));
-	cookie_precompute_key(cp->cp_mac1_key, key, COOKIE_MAC1_KEY_LABEL);
-	cookie_precompute_key(cp->cp_cookie_key, key, COOKIE_COOKIE_KEY_LABEL);
-	rw_init(&cp->cp_lock, "cookie_maker");
-}
-
-int
-cookie_checker_init(struct cookie_checker *cc, uma_zone_t zone)
-{
-	int res;
-	bzero(cc, sizeof(*cc));
-
-	rw_init(&cc->cc_key_lock, "cookie_checker_key");
-	rw_init(&cc->cc_secret_lock, "cookie_checker_secret");
-
-	if ((res = ratelimit_init(&cc->cc_ratelimit_v4, zone)) != 0)
-		return res;
-#ifdef INET6
-	if ((res = ratelimit_init(&cc->cc_ratelimit_v6, zone)) != 0) {
-		ratelimit_deinit(&cc->cc_ratelimit_v4);
-		return res;
-	}
-#endif
-	return 0;
-}
-
-void
-cookie_checker_update(struct cookie_checker *cc,
-    const uint8_t key[COOKIE_INPUT_SIZE])
-{
-	rw_enter_write(&cc->cc_key_lock);
-	if (key) {
-		cookie_precompute_key(cc->cc_mac1_key, key, COOKIE_MAC1_KEY_LABEL);
-		cookie_precompute_key(cc->cc_cookie_key, key, COOKIE_COOKIE_KEY_LABEL);
-	} else {
-		bzero(cc->cc_mac1_key, sizeof(cc->cc_mac1_key));
-		bzero(cc->cc_cookie_key, sizeof(cc->cc_cookie_key));
-	}
-	rw_exit_write(&cc->cc_key_lock);
-}
-
-void
-cookie_checker_deinit(struct cookie_checker *cc)
-{
-	ratelimit_deinit(&cc->cc_ratelimit_v4);
-#ifdef INET6
-	ratelimit_deinit(&cc->cc_ratelimit_v6);
-#endif
-}
-
-void
-cookie_checker_create_payload(struct cookie_checker *cc,
-    struct cookie_macs *cm, uint8_t nonce[COOKIE_NONCE_SIZE],
-    uint8_t ecookie[COOKIE_ENCRYPTED_SIZE], struct sockaddr *sa)
-{
-	uint8_t cookie[COOKIE_COOKIE_SIZE];
-
-	cookie_checker_make_cookie(cc, cookie, sa);
-	arc4random_buf(nonce, COOKIE_NONCE_SIZE);
-
-	rw_enter_read(&cc->cc_key_lock);
-	xchacha20poly1305_encrypt(ecookie, cookie, COOKIE_COOKIE_SIZE,
-	    cm->mac1, COOKIE_MAC_SIZE, nonce, cc->cc_cookie_key);
-	rw_exit_read(&cc->cc_key_lock);
-
-	explicit_bzero(cookie, sizeof(cookie));
-}
-
-int
-cookie_maker_consume_payload(struct cookie_maker *cp,
-    uint8_t nonce[COOKIE_NONCE_SIZE], uint8_t ecookie[COOKIE_ENCRYPTED_SIZE])
-{
-	int ret = 0;
-	uint8_t cookie[COOKIE_COOKIE_SIZE];
-
-	rw_enter_write(&cp->cp_lock);
-
-	if (cp->cp_mac1_valid == 0) {
-		ret = ETIMEDOUT;
-		goto error;
-	}
-
-	if (xchacha20poly1305_decrypt(cookie, ecookie, COOKIE_ENCRYPTED_SIZE,
-	    cp->cp_mac1_last, COOKIE_MAC_SIZE, nonce, cp->cp_cookie_key) == 0) {
-		ret = EINVAL;
-		goto error;
-	}
-
-	memcpy(cp->cp_cookie, cookie, COOKIE_COOKIE_SIZE);
-	getnanouptime(&cp->cp_birthdate);
-	cp->cp_mac1_valid = 0;
-
-error:
-	rw_exit_write(&cp->cp_lock);
-	return ret;
-}
-
-void
-cookie_maker_mac(struct cookie_maker *cp, struct cookie_macs *cm, void *buf,
-		size_t len)
-{
-	rw_enter_read(&cp->cp_lock);
-
-	cookie_macs_mac1(cm, buf, len, cp->cp_mac1_key);
-
-	memcpy(cp->cp_mac1_last, cm->mac1, COOKIE_MAC_SIZE);
-	cp->cp_mac1_valid = 1;
-
-	if (!cookie_timer_expired(&cp->cp_birthdate,
-	    COOKIE_SECRET_MAX_AGE - COOKIE_SECRET_LATENCY, 0))
-		cookie_macs_mac2(cm, buf, len, cp->cp_cookie);
-	else
-		bzero(cm->mac2, COOKIE_MAC_SIZE);
-
-	rw_exit_read(&cp->cp_lock);
-}
-
-int
-cookie_checker_validate_macs(struct cookie_checker *cc, struct cookie_macs *cm,
-		void *buf, size_t len, int busy, struct sockaddr *sa)
-{
-	struct cookie_macs our_cm;
-	uint8_t cookie[COOKIE_COOKIE_SIZE];
-
-	/* Validate incoming MACs */
-	rw_enter_read(&cc->cc_key_lock);
-	cookie_macs_mac1(&our_cm, buf, len, cc->cc_mac1_key);
-	rw_exit_read(&cc->cc_key_lock);
-
-	/* If mac1 is invald, we want to drop the packet */
-	if (timingsafe_bcmp(our_cm.mac1, cm->mac1, COOKIE_MAC_SIZE) != 0)
-		return EINVAL;
-
-	if (busy != 0) {
-		cookie_checker_make_cookie(cc, cookie, sa);
-		cookie_macs_mac2(&our_cm, buf, len, cookie);
-
-		/* If the mac2 is invalid, we want to send a cookie response */
-		if (timingsafe_bcmp(our_cm.mac2, cm->mac2, COOKIE_MAC_SIZE) != 0)
-			return EAGAIN;
-
-		/* If the mac2 is valid, we may want rate limit the peer.
-		 * ratelimit_allow will return either 0 or ECONNREFUSED,
-		 * implying there is no ratelimiting, or we should ratelimit
-		 * (refuse) respectively. */
-		if (sa->sa_family == AF_INET)
-			return ratelimit_allow(&cc->cc_ratelimit_v4, sa);
-#ifdef INET6
-		else if (sa->sa_family == AF_INET6)
-			return ratelimit_allow(&cc->cc_ratelimit_v6, sa);
-#endif
-		else
-			return EAFNOSUPPORT;
-	}
-	return 0;
-}
-
-/* Private functions */
-static void
-cookie_precompute_key(uint8_t *key, const uint8_t input[COOKIE_INPUT_SIZE],
-    const char *label)
-{
-	struct blake2s_state blake;
-
-	blake2s_init(&blake, COOKIE_KEY_SIZE);
-	blake2s_update(&blake, label, strlen(label));
-	blake2s_update(&blake, input, COOKIE_INPUT_SIZE);
-	/* TODO we shouldn't need to provide outlen to _final. we can align
-	 * this with openbsd after fixing the blake library. */
-	blake2s_final(&blake, key);
-}
-
-static void
-cookie_macs_mac1(struct cookie_macs *cm, const void *buf, size_t len,
-    const uint8_t key[COOKIE_KEY_SIZE])
-{
-	struct blake2s_state state;
-	blake2s_init_key(&state, COOKIE_MAC_SIZE, key, COOKIE_KEY_SIZE);
-	blake2s_update(&state, buf, len);
-	blake2s_final(&state, cm->mac1);
-}
-
-static void
-cookie_macs_mac2(struct cookie_macs *cm, const void *buf, size_t len,
-		const uint8_t key[COOKIE_COOKIE_SIZE])
-{
-	struct blake2s_state state;
-	blake2s_init_key(&state, COOKIE_MAC_SIZE, key, COOKIE_COOKIE_SIZE);
-	blake2s_update(&state, buf, len);
-	blake2s_update(&state, cm->mac1, COOKIE_MAC_SIZE);
-	blake2s_final(&state, cm->mac2);
-}
-
-static int
-cookie_timer_expired(struct timespec *birthdate, time_t sec, long nsec)
-{
-	struct timespec	uptime;
-	struct timespec	expire = { .tv_sec = sec, .tv_nsec = nsec };
-
-	if (birthdate->tv_sec == 0 && birthdate->tv_nsec == 0)
-		return ETIMEDOUT;
-
-	getnanouptime(&uptime);
-	timespecadd(birthdate, &expire, &expire);
-	return timespeccmp(&uptime, &expire, >) ? ETIMEDOUT : 0;
-}
-
-static void
-cookie_checker_make_cookie(struct cookie_checker *cc,
-		uint8_t cookie[COOKIE_COOKIE_SIZE], struct sockaddr *sa)
-{
-	struct blake2s_state state;
-
-	rw_enter_write(&cc->cc_secret_lock);
-	if (cookie_timer_expired(&cc->cc_secret_birthdate,
-	    COOKIE_SECRET_MAX_AGE, 0)) {
-		arc4random_buf(cc->cc_secret, COOKIE_SECRET_SIZE);
-		getnanouptime(&cc->cc_secret_birthdate);
-	}
-	blake2s_init_key(&state, COOKIE_COOKIE_SIZE, cc->cc_secret,
-	    COOKIE_SECRET_SIZE);
-	rw_exit_write(&cc->cc_secret_lock);
-
-	if (sa->sa_family == AF_INET) {
-		blake2s_update(&state, (uint8_t *)&satosin(sa)->sin_addr,
-				sizeof(struct in_addr));
-		blake2s_update(&state, (uint8_t *)&satosin(sa)->sin_port,
-				sizeof(in_port_t));
-		blake2s_final(&state, cookie);
-#ifdef INET6
-	} else if (sa->sa_family == AF_INET6) {
-		blake2s_update(&state, (uint8_t *)&satosin6(sa)->sin6_addr,
-				sizeof(struct in6_addr));
-		blake2s_update(&state, (uint8_t *)&satosin6(sa)->sin6_port,
-				sizeof(in_port_t));
-		blake2s_final(&state, cookie);
-#endif
-	} else {
-		arc4random_buf(cookie, COOKIE_COOKIE_SIZE);
-	}
-}
-
-static int
-ratelimit_init(struct ratelimit *rl, uma_zone_t zone)
-{
-	rw_init(&rl->rl_lock, "ratelimit_lock");
-	arc4random_buf(&rl->rl_secret, sizeof(rl->rl_secret));
-	rl->rl_table = hashinit_flags(RATELIMIT_SIZE, M_DEVBUF,
-	    &rl->rl_table_mask, M_NOWAIT);
-	rl->rl_zone = zone;
-	rl->rl_table_num = 0;
-	return rl->rl_table == NULL ? ENOBUFS : 0;
-}
-
-static void
-ratelimit_deinit(struct ratelimit *rl)
-{
-	rw_enter_write(&rl->rl_lock);
-	ratelimit_gc(rl, 1);
-	hashdestroy(rl->rl_table, M_DEVBUF, rl->rl_table_mask);
-	rw_exit_write(&rl->rl_lock);
-}
-
-static void
-ratelimit_gc(struct ratelimit *rl, int force)
-{
-	size_t i;
-	struct ratelimit_entry *r, *tr;
-	struct timespec expiry;
-
-	rw_assert_wrlock(&rl->rl_lock);
-
-	if (force) {
-		for (i = 0; i < RATELIMIT_SIZE; i++) {
-			LIST_FOREACH_SAFE(r, &rl->rl_table[i], r_entry, tr) {
-				rl->rl_table_num--;
-				LIST_REMOVE(r, r_entry);
-				uma_zfree(rl->rl_zone, r);
-			}
-		}
-		return;
-	}
-
-	if ((cookie_timer_expired(&rl->rl_last_gc, ELEMENT_TIMEOUT, 0) &&
-	    rl->rl_table_num > 0)) {
-		getnanouptime(&rl->rl_last_gc);
-		getnanouptime(&expiry);
-		expiry.tv_sec -= ELEMENT_TIMEOUT;
-
-		for (i = 0; i < RATELIMIT_SIZE; i++) {
-			LIST_FOREACH_SAFE(r, &rl->rl_table[i], r_entry, tr) {
-				if (timespeccmp(&r->r_last_time, &expiry, <)) {
-					rl->rl_table_num--;
-					LIST_REMOVE(r, r_entry);
-					uma_zfree(rl->rl_zone, r);
-				}
-			}
-		}
-	}
-}
-
-static int
-ratelimit_allow(struct ratelimit *rl, struct sockaddr *sa)
-{
-	uint64_t key, tokens;
-	struct timespec diff;
-	struct ratelimit_entry *r;
-	int ret = ECONNREFUSED;
-
-	if (sa->sa_family == AF_INET)
-		/* TODO siphash24 is the FreeBSD siphash, OK? */
-		key = siphash24(&rl->rl_secret, &satosin(sa)->sin_addr,
-				IPV4_MASK_SIZE);
-#ifdef INET6
-	else if (sa->sa_family == AF_INET6)
-		key = siphash24(&rl->rl_secret, &satosin6(sa)->sin6_addr,
-				IPV6_MASK_SIZE);
-#endif
-	else
-		return ret;
-
-	rw_enter_write(&rl->rl_lock);
-
-	LIST_FOREACH(r, &rl->rl_table[key & rl->rl_table_mask], r_entry) {
-		if (r->r_af != sa->sa_family)
-			continue;
-
-		if (r->r_af == AF_INET && bcmp(&r->r_in,
-		    &satosin(sa)->sin_addr, IPV4_MASK_SIZE) != 0)
-			continue;
-
-#ifdef INET6
-		if (r->r_af == AF_INET6 && bcmp(&r->r_in6,
-		    &satosin6(sa)->sin6_addr, IPV6_MASK_SIZE) != 0)
-			continue;
-#endif
-
-		/* If we get to here, we've found an entry for the endpoint.
-		 * We apply standard token bucket, by calculating the time
-		 * lapsed since our last_time, adding that, ensuring that we
-		 * cap the tokens at TOKEN_MAX. If the endpoint has no tokens
-		 * left (that is tokens <= INITIATION_COST) then we block the
-		 * request, otherwise we subtract the INITITIATION_COST and
-		 * return OK. */
-		diff = r->r_last_time;
-		getnanouptime(&r->r_last_time);
-		timespecsub(&r->r_last_time, &diff, &diff);
-
-		tokens = r->r_tokens + diff.tv_sec * NSEC_PER_SEC + diff.tv_nsec;
-
-		if (tokens > TOKEN_MAX)
-			tokens = TOKEN_MAX;
-
-		if (tokens >= INITIATION_COST) {
-			r->r_tokens = tokens - INITIATION_COST;
-			goto ok;
-		} else {
-			r->r_tokens = tokens;
-			goto error;
-		}
-	}
-
-	/* If we get to here, we didn't have an entry for the endpoint. */
-	ratelimit_gc(rl, 0);
-
-	/* Hard limit on number of entries */
-	if (rl->rl_table_num >= RATELIMIT_SIZE_MAX)
-		goto error;
-
-	/* Goto error if out of memory */
-	if ((r = uma_zalloc(rl->rl_zone, M_NOWAIT)) == NULL)
-		goto error;
-
-	rl->rl_table_num++;
-
-	/* Insert entry into the hashtable and ensure it's initialised */
-	LIST_INSERT_HEAD(&rl->rl_table[key & rl->rl_table_mask], r, r_entry);
-	r->r_af = sa->sa_family;
-	if (r->r_af == AF_INET)
-		memcpy(&r->r_in, &satosin(sa)->sin_addr, IPV4_MASK_SIZE);
-#ifdef INET6
-	else if (r->r_af == AF_INET6)
-		memcpy(&r->r_in6, &satosin6(sa)->sin6_addr, IPV6_MASK_SIZE);
-#endif
-
-	getnanouptime(&r->r_last_time);
-	r->r_tokens = TOKEN_MAX - INITIATION_COST;
-ok:
-	ret = 0;
-error:
-	rw_exit_write(&rl->rl_lock);
-	return ret;
-}
diff --git a/sys/dev/if_wg/wg_cookie.h b/sys/dev/if_wg/wg_cookie.h
deleted file mode 100644
index 699f3ebf40c1..000000000000
--- a/sys/dev/if_wg/wg_cookie.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (C) 2015-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- * Copyright (C) 2019-2020 Matt Dunwoodie <ncon@noconroy.net>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#ifndef __COOKIE_H__
-#define __COOKIE_H__
-
-#include <sys/types.h>
-#include <sys/time.h>
-#include <sys/rwlock.h>
-#include <sys/queue.h>
-
-#include <netinet/in.h>
-
-#include "crypto.h"
-
-#define COOKIE_MAC_SIZE		16
-#define COOKIE_KEY_SIZE		32
-#define COOKIE_NONCE_SIZE	XCHACHA20POLY1305_NONCE_SIZE
-#define COOKIE_COOKIE_SIZE	16
-#define COOKIE_SECRET_SIZE	32
-#define COOKIE_INPUT_SIZE	32
-#define COOKIE_ENCRYPTED_SIZE	(COOKIE_COOKIE_SIZE + COOKIE_MAC_SIZE)
-
-#define COOKIE_MAC1_KEY_LABEL	"mac1----"
-#define COOKIE_COOKIE_KEY_LABEL	"cookie--"
-#define COOKIE_SECRET_MAX_AGE	120
-#define COOKIE_SECRET_LATENCY	5
-
-/* Constants for initiation rate limiting */
-#define RATELIMIT_SIZE		(1 << 13)
-#define RATELIMIT_SIZE_MAX	(RATELIMIT_SIZE * 8)
-#define NSEC_PER_SEC		1000000000LL
-#define INITIATIONS_PER_SECOND	20
-#define INITIATIONS_BURSTABLE	5
-#define INITIATION_COST		(NSEC_PER_SEC / INITIATIONS_PER_SECOND)
-#define TOKEN_MAX		(INITIATION_COST * INITIATIONS_BURSTABLE)
-#define ELEMENT_TIMEOUT		1
-#define IPV4_MASK_SIZE		4 /* Use all 4 bytes of IPv4 address */
-#define IPV6_MASK_SIZE		8 /* Use top 8 bytes (/64) of IPv6 address */
-
-struct cookie_macs {
-	uint8_t	mac1[COOKIE_MAC_SIZE];
-	uint8_t	mac2[COOKIE_MAC_SIZE];
-};
-
-struct ratelimit_entry {
-	LIST_ENTRY(ratelimit_entry)	 r_entry;
-	sa_family_t			 r_af;
-	union {
-		struct in_addr		 r_in;
-#ifdef INET6
-		struct in6_addr		 r_in6;
-#endif
-	};
-	struct timespec			 r_last_time;	/* nanouptime */
-	uint64_t			 r_tokens;
-};
-
-struct ratelimit {
-	SIPHASH_KEY			 rl_secret;
-	uma_zone_t			 rl_zone;
-
-	struct rwlock			 rl_lock;
-	LIST_HEAD(, ratelimit_entry)	*rl_table;
-	u_long				 rl_table_mask;
-	size_t				 rl_table_num;
-	struct timespec			 rl_last_gc;	/* nanouptime */
-};
-
-struct cookie_maker {
-	uint8_t		cp_mac1_key[COOKIE_KEY_SIZE];
-	uint8_t		cp_cookie_key[COOKIE_KEY_SIZE];
-
-	struct rwlock	cp_lock;
-	uint8_t		cp_cookie[COOKIE_COOKIE_SIZE];
-	struct timespec	cp_birthdate;	/* nanouptime */
-	int		cp_mac1_valid;
-	uint8_t		cp_mac1_last[COOKIE_MAC_SIZE];
-};
-
-struct cookie_checker {
-	struct ratelimit	cc_ratelimit_v4;
-#ifdef INET6
-	struct ratelimit	cc_ratelimit_v6;
-#endif
-
-	struct rwlock		cc_key_lock;
-	uint8_t			cc_mac1_key[COOKIE_KEY_SIZE];
-	uint8_t			cc_cookie_key[COOKIE_KEY_SIZE];
-
-	struct rwlock		cc_secret_lock;
-	struct timespec		cc_secret_birthdate;	/* nanouptime */
-	uint8_t			cc_secret[COOKIE_SECRET_SIZE];
-};
-
-void	cookie_maker_init(struct cookie_maker *, const uint8_t[COOKIE_INPUT_SIZE]);
-int	cookie_checker_init(struct cookie_checker *, uma_zone_t);
-void	cookie_checker_update(struct cookie_checker *,
-	    const uint8_t[COOKIE_INPUT_SIZE]);
-void	cookie_checker_deinit(struct cookie_checker *);
-void	cookie_checker_create_payload(struct cookie_checker *,
-	    struct cookie_macs *cm, uint8_t[COOKIE_NONCE_SIZE],
-	    uint8_t [COOKIE_ENCRYPTED_SIZE], struct sockaddr *);
-int	cookie_maker_consume_payload(struct cookie_maker *,
-	    uint8_t[COOKIE_NONCE_SIZE], uint8_t[COOKIE_ENCRYPTED_SIZE]);
-void	cookie_maker_mac(struct cookie_maker *, struct cookie_macs *,
-	    void *, size_t);
-int	cookie_checker_validate_macs(struct cookie_checker *,
-	    struct cookie_macs *, void *, size_t, int, struct sockaddr *);
-
-#endif /* __COOKIE_H__ */
diff --git a/sys/dev/if_wg/wg_noise.c b/sys/dev/if_wg/wg_noise.c
deleted file mode 100644
index ae527eb99bde..000000000000
--- a/sys/dev/if_wg/wg_noise.c
+++ /dev/null
@@ -1,963 +0,0 @@
-/*
- * Copyright (C) 2015-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- * Copyright (C) 2019-2020 Matt Dunwoodie <ncon@noconroy.net>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <sys/types.h>
-#include <sys/systm.h>
-#include <sys/param.h>
-#include <sys/rwlock.h>
-
-#include "support.h"
-#include "wg_noise.h"
-
-/* Private functions */
-static struct noise_keypair *
-		noise_remote_keypair_allocate(struct noise_remote *);
-static void
-		noise_remote_keypair_free(struct noise_remote *,
-			struct noise_keypair *);
-static uint32_t	noise_remote_handshake_index_get(struct noise_remote *);
-static void	noise_remote_handshake_index_drop(struct noise_remote *);
-
-static uint64_t	noise_counter_send(struct noise_counter *);
-static int	noise_counter_recv(struct noise_counter *, uint64_t);
-
-static void	noise_kdf(uint8_t *, uint8_t *, uint8_t *, const uint8_t *,
-			size_t, size_t, size_t, size_t,
-			const uint8_t [NOISE_HASH_LEN]);
-static int	noise_mix_dh(
-			uint8_t [NOISE_HASH_LEN],
-			uint8_t [NOISE_SYMMETRIC_KEY_LEN],
-			const uint8_t [NOISE_PUBLIC_KEY_LEN],
-			const uint8_t [NOISE_PUBLIC_KEY_LEN]);
-static int	noise_mix_ss(
-			uint8_t ck[NOISE_HASH_LEN],
-			uint8_t key[NOISE_SYMMETRIC_KEY_LEN],
-			const uint8_t ss[NOISE_PUBLIC_KEY_LEN]);
-static void	noise_mix_hash(
-			uint8_t [NOISE_HASH_LEN],
-			const uint8_t *,
-			size_t);
-static void	noise_mix_psk(
-			uint8_t [NOISE_HASH_LEN],
-			uint8_t [NOISE_HASH_LEN],
-			uint8_t [NOISE_SYMMETRIC_KEY_LEN],
-			const uint8_t [NOISE_SYMMETRIC_KEY_LEN]);
-static void	noise_param_init(
-			uint8_t [NOISE_HASH_LEN],
-			uint8_t [NOISE_HASH_LEN],
-			const uint8_t [NOISE_PUBLIC_KEY_LEN]);
-
-static void	noise_msg_encrypt(uint8_t *, const uint8_t *, size_t,
-			uint8_t [NOISE_SYMMETRIC_KEY_LEN],
-			uint8_t [NOISE_HASH_LEN]);
-static int	noise_msg_decrypt(uint8_t *, const uint8_t *, size_t,
-			uint8_t [NOISE_SYMMETRIC_KEY_LEN],
-			uint8_t [NOISE_HASH_LEN]);
-static void	noise_msg_ephemeral(
-			uint8_t [NOISE_HASH_LEN],
-			uint8_t [NOISE_HASH_LEN],
-			const uint8_t src[NOISE_PUBLIC_KEY_LEN]);
-
-static void	noise_tai64n_now(uint8_t [NOISE_TIMESTAMP_LEN]);
-static int	noise_timer_expired(struct timespec *, time_t, long);
-
-/* Set/Get noise parameters */
-void
-noise_local_init(struct noise_local *l, struct noise_upcall *upcall)
-{
-	bzero(l, sizeof(*l));
-	rw_init(&l->l_identity_lock, "noise_local_identity");
-	l->l_upcall = *upcall;
-}
-
-void
-noise_local_lock_identity(struct noise_local *l)
-{
-	rw_enter_write(&l->l_identity_lock);
-}
-
-void
-noise_local_unlock_identity(struct noise_local *l)
-{
-	rw_exit_write(&l->l_identity_lock);
-}
-
-int
-noise_local_set_private(struct noise_local *l,
-    const uint8_t private[NOISE_PUBLIC_KEY_LEN])
-{
-	rw_assert_wrlock(&l->l_identity_lock);
-
-	memcpy(l->l_private, private, NOISE_PUBLIC_KEY_LEN);
-	curve25519_clamp_secret(l->l_private);
-	l->l_has_identity = curve25519_generate_public(l->l_public, private);
-
-	return l->l_has_identity ? 0 : ENXIO;
-}
-
-int
-noise_local_keys(struct noise_local *l, uint8_t public[NOISE_PUBLIC_KEY_LEN],
-    uint8_t private[NOISE_PUBLIC_KEY_LEN])
-{
-	int ret = 0;
-	rw_enter_read(&l->l_identity_lock);
-	if (l->l_has_identity) {
-		if (public != NULL)
-			memcpy(public, l->l_public, NOISE_PUBLIC_KEY_LEN);
-		if (private != NULL)
-			memcpy(private, l->l_private, NOISE_PUBLIC_KEY_LEN);
-	} else {
-		ret = ENXIO;
-	}
-	rw_exit_read(&l->l_identity_lock);
-	return ret;
-}
-
-void
-noise_remote_init(struct noise_remote *r,
-    const uint8_t public[NOISE_PUBLIC_KEY_LEN], struct noise_local *l)
-{
-	bzero(r, sizeof(*r));
-	memcpy(r->r_public, public, NOISE_PUBLIC_KEY_LEN);
-	rw_init(&r->r_handshake_lock, "noise_handshake");
-	rw_init(&r->r_keypair_lock, "noise_keypair");
-
-	SLIST_INSERT_HEAD(&r->r_unused_keypairs, &r->r_keypair[0], kp_entry);
-	SLIST_INSERT_HEAD(&r->r_unused_keypairs, &r->r_keypair[1], kp_entry);
-	SLIST_INSERT_HEAD(&r->r_unused_keypairs, &r->r_keypair[2], kp_entry);
-
-	KASSERT(l != NULL, ("must provide local"));
-	r->r_local = l;
-
-	rw_enter_write(&l->l_identity_lock);
-	noise_remote_precompute(r);
-	rw_exit_write(&l->l_identity_lock);
-}
-
-int
-noise_remote_set_psk(struct noise_remote *r,
-    const uint8_t psk[NOISE_SYMMETRIC_KEY_LEN])
-{
-	int same;
-	rw_enter_write(&r->r_handshake_lock);
-	same = !timingsafe_bcmp(r->r_psk, psk, NOISE_SYMMETRIC_KEY_LEN);
-	if (!same) {
-		memcpy(r->r_psk, psk, NOISE_SYMMETRIC_KEY_LEN);
-	}
-	rw_exit_write(&r->r_handshake_lock);
-	return same ? EEXIST : 0;
-}
-
-int
-noise_remote_keys(struct noise_remote *r, uint8_t public[NOISE_PUBLIC_KEY_LEN],
-    uint8_t psk[NOISE_SYMMETRIC_KEY_LEN])
-{
-	static uint8_t null_psk[NOISE_SYMMETRIC_KEY_LEN];
-	int ret;
-
-	if (public != NULL)
-		memcpy(public, r->r_public, NOISE_PUBLIC_KEY_LEN);
-
-	rw_enter_read(&r->r_handshake_lock);
-	if (psk != NULL)
-		memcpy(psk, r->r_psk, NOISE_SYMMETRIC_KEY_LEN);
-	ret = timingsafe_bcmp(r->r_psk, null_psk, NOISE_SYMMETRIC_KEY_LEN);
-	rw_exit_read(&r->r_handshake_lock);
-
-	/* If r_psk != null_psk return 0, else ENOENT (no psk) */
-	return ret ? 0 : ENOENT;
-}
-
-void
-noise_remote_precompute(struct noise_remote *r)
-{
-	struct noise_local *l = r->r_local;
-	rw_assert_wrlock(&l->l_identity_lock);
-	if (!l->l_has_identity)
-		bzero(r->r_ss, NOISE_PUBLIC_KEY_LEN);
-	else if (!curve25519(r->r_ss, l->l_private, r->r_public))
-		bzero(r->r_ss, NOISE_PUBLIC_KEY_LEN);
-
-	rw_enter_write(&r->r_handshake_lock);
-	noise_remote_handshake_index_drop(r);
-	explicit_bzero(&r->r_handshake, sizeof(r->r_handshake));
-	rw_exit_write(&r->r_handshake_lock);
-}
-
-/* Handshake functions */
-int
-noise_create_initiation(struct noise_remote *r, uint32_t *s_idx,
-    uint8_t ue[NOISE_PUBLIC_KEY_LEN],
-    uint8_t es[NOISE_PUBLIC_KEY_LEN + NOISE_AUTHTAG_LEN],
-    uint8_t ets[NOISE_TIMESTAMP_LEN + NOISE_AUTHTAG_LEN])
-{
-	struct noise_handshake *hs = &r->r_handshake;
-	struct noise_local *l = r->r_local;
-	uint8_t key[NOISE_SYMMETRIC_KEY_LEN];
-	int ret = EINVAL;
-
-	rw_enter_read(&l->l_identity_lock);
-	rw_enter_write(&r->r_handshake_lock);
-	if (!l->l_has_identity)
-		goto error;
-	noise_param_init(hs->hs_ck, hs->hs_hash, r->r_public);
-
-	/* e */
-	curve25519_generate_secret(hs->hs_e);
-	if (curve25519_generate_public(ue, hs->hs_e) == 0)
-		goto error;
-	noise_msg_ephemeral(hs->hs_ck, hs->hs_hash, ue);
-
-	/* es */
-	if (noise_mix_dh(hs->hs_ck, key, hs->hs_e, r->r_public) != 0)
-		goto error;
-
-	/* s */
-	noise_msg_encrypt(es, l->l_public,
-	    NOISE_PUBLIC_KEY_LEN, key, hs->hs_hash);
-
-	/* ss */
-	if (noise_mix_ss(hs->hs_ck, key, r->r_ss) != 0)
-		goto error;
-
-	/* {t} */
-	noise_tai64n_now(ets);
-	noise_msg_encrypt(ets, ets,
-	    NOISE_TIMESTAMP_LEN, key, hs->hs_hash);
-
-	noise_remote_handshake_index_drop(r);
-	hs->hs_state = CREATED_INITIATION;
-	hs->hs_local_index = noise_remote_handshake_index_get(r);
-	*s_idx = hs->hs_local_index;
-	ret = 0;
-error:
-	rw_exit_write(&r->r_handshake_lock);
-	rw_exit_read(&l->l_identity_lock);
-	explicit_bzero(key, NOISE_SYMMETRIC_KEY_LEN);
-	return ret;
-}
-
-int
-noise_consume_initiation(struct noise_local *l, struct noise_remote **rp,
-    uint32_t s_idx, uint8_t ue[NOISE_PUBLIC_KEY_LEN],
-    uint8_t es[NOISE_PUBLIC_KEY_LEN + NOISE_AUTHTAG_LEN],
-    uint8_t ets[NOISE_TIMESTAMP_LEN + NOISE_AUTHTAG_LEN])
-{
-	struct noise_remote *r;
-	struct noise_handshake hs;
-	uint8_t key[NOISE_SYMMETRIC_KEY_LEN];
-	uint8_t r_public[NOISE_PUBLIC_KEY_LEN];
-	uint8_t	timestamp[NOISE_TIMESTAMP_LEN];
-	int ret = EINVAL;
-
-	rw_enter_read(&l->l_identity_lock);
-	if (!l->l_has_identity)
-		goto error;
-	noise_param_init(hs.hs_ck, hs.hs_hash, l->l_public);
-
-	/* e */
-	noise_msg_ephemeral(hs.hs_ck, hs.hs_hash, ue);
-
-	/* es */
-	if (noise_mix_dh(hs.hs_ck, key, l->l_private, ue) != 0)
-		goto error;
-
-	/* s */
-	if (noise_msg_decrypt(r_public, es,
-	    NOISE_PUBLIC_KEY_LEN + NOISE_AUTHTAG_LEN, key, hs.hs_hash) != 0)
-		goto error;
-
-	/* Lookup the remote we received from */
-	if ((r = l->l_upcall.u_remote_get(l->l_upcall.u_arg, r_public)) == NULL)
-		goto error;
-
-	/* ss */
-	if (noise_mix_ss(hs.hs_ck, key, r->r_ss) != 0)
-		goto error;
-
-	/* {t} */
-	if (noise_msg_decrypt(timestamp, ets,
-	    NOISE_TIMESTAMP_LEN + NOISE_AUTHTAG_LEN, key, hs.hs_hash) != 0)
-		goto error;
-
-	hs.hs_state = CONSUMED_INITIATION;
-	hs.hs_local_index = 0;
-	hs.hs_remote_index = s_idx;
-	memcpy(hs.hs_e, ue, NOISE_PUBLIC_KEY_LEN);
-
-	/* We have successfully computed the same results, now we ensure that
-	 * this is not an initiation replay, or a flood attack */
-	rw_enter_write(&r->r_handshake_lock);
-
-	/* Replay */
-	if (memcmp(timestamp, r->r_timestamp, NOISE_TIMESTAMP_LEN) > 0)
-		memcpy(r->r_timestamp, timestamp, NOISE_TIMESTAMP_LEN);
-	else
-		goto error_set;
-	/* Flood attack */
-	if (noise_timer_expired(&r->r_last_init, 0, REJECT_INTERVAL))
-		getnanouptime(&r->r_last_init);
-	else
-		goto error_set;
-
-	/* Ok, we're happy to accept this initiation now */
-	noise_remote_handshake_index_drop(r);
-	r->r_handshake = hs;
-	*rp = r;
-	ret = 0;
-error_set:
-	rw_exit_write(&r->r_handshake_lock);
-error:
-	rw_exit_read(&l->l_identity_lock);
-	explicit_bzero(key, NOISE_SYMMETRIC_KEY_LEN);
-	explicit_bzero(&hs, sizeof(hs));
-	return ret;
-}
-
-int
-noise_create_response(struct noise_remote *r, uint32_t *s_idx, uint32_t *r_idx,
-    uint8_t ue[NOISE_PUBLIC_KEY_LEN], uint8_t en[0 + NOISE_AUTHTAG_LEN])
-{
-	struct noise_handshake *hs = &r->r_handshake;
-	uint8_t key[NOISE_SYMMETRIC_KEY_LEN];
-	uint8_t e[NOISE_PUBLIC_KEY_LEN];
-	int ret = EINVAL;
-
-	rw_enter_read(&r->r_local->l_identity_lock);
-	rw_enter_write(&r->r_handshake_lock);
-
-	if (hs->hs_state != CONSUMED_INITIATION)
-		goto error;
-
-	/* e */
-	curve25519_generate_secret(e);
-	if (curve25519_generate_public(ue, e) == 0)
-		goto error;
-	noise_msg_ephemeral(hs->hs_ck, hs->hs_hash, ue);
-
-	/* ee */
-	if (noise_mix_dh(hs->hs_ck, NULL, e, hs->hs_e) != 0)
-		goto error;
-
-	/* se */
-	if (noise_mix_dh(hs->hs_ck, NULL, e, r->r_public) != 0)
-		goto error;
-
-	/* psk */
-	noise_mix_psk(hs->hs_ck, hs->hs_hash, key, r->r_psk);
-
-	/* {} */
-	noise_msg_encrypt(en, NULL, 0, key, hs->hs_hash);
-
-	hs->hs_state = CREATED_RESPONSE;
-	hs->hs_local_index = noise_remote_handshake_index_get(r);
-	*r_idx = hs->hs_remote_index;
-	*s_idx = hs->hs_local_index;
-	ret = 0;
-error:
-	rw_exit_write(&r->r_handshake_lock);
-	rw_exit_read(&r->r_local->l_identity_lock);
-	explicit_bzero(key, NOISE_SYMMETRIC_KEY_LEN);
-	explicit_bzero(e, NOISE_PUBLIC_KEY_LEN);
-	return ret;
-}
-
-int
-noise_consume_response(struct noise_remote *r, uint32_t s_idx, uint32_t r_idx,
-    uint8_t ue[NOISE_PUBLIC_KEY_LEN], uint8_t en[0 + NOISE_AUTHTAG_LEN])
-{
-	struct noise_local *l = r->r_local;
-	struct noise_handshake hs;
-	uint8_t key[NOISE_SYMMETRIC_KEY_LEN];
-	uint8_t preshared_key[NOISE_PUBLIC_KEY_LEN];
-	int ret = EINVAL;
-
-	rw_enter_read(&l->l_identity_lock);
-	if (!l->l_has_identity)
-		goto error;
-
-	rw_enter_read(&r->r_handshake_lock);
-	hs = r->r_handshake;
-	memcpy(preshared_key, r->r_psk, NOISE_SYMMETRIC_KEY_LEN);
-	rw_exit_read(&r->r_handshake_lock);
-
-	if (hs.hs_state != CREATED_INITIATION ||
-	    hs.hs_local_index != r_idx)
-		goto error;
-
-	/* e */
-	noise_msg_ephemeral(hs.hs_ck, hs.hs_hash, ue);
-
-	/* ee */
-	if (noise_mix_dh(hs.hs_ck, NULL, hs.hs_e, ue) != 0)
-		goto error;
-
-	/* se */
-	if (noise_mix_dh(hs.hs_ck, NULL, l->l_private, ue) != 0)
-		goto error;
-
-	/* psk */
-	noise_mix_psk(hs.hs_ck, hs.hs_hash, key, preshared_key);
-
-	/* {} */
-	if (noise_msg_decrypt(NULL, en,
-	    0 + NOISE_AUTHTAG_LEN, key, hs.hs_hash) != 0)
-		goto error;
-
-	hs.hs_remote_index = s_idx;
-
-	rw_enter_write(&r->r_handshake_lock);
-	if (r->r_handshake.hs_state == hs.hs_state &&
-	    r->r_handshake.hs_local_index == hs.hs_local_index) {
-		r->r_handshake = hs;
-		r->r_handshake.hs_state = CONSUMED_RESPONSE;
-		ret = 0;
-	}
-	rw_exit_write(&r->r_handshake_lock);
-error:
-	rw_exit_read(&l->l_identity_lock);
-	explicit_bzero(&hs, sizeof(hs));
-	explicit_bzero(key, NOISE_SYMMETRIC_KEY_LEN);
-	return ret;
-}
-
-int
-noise_remote_begin_session(struct noise_remote *r)
-{
-	struct noise_handshake *hs = &r->r_handshake;
-	struct noise_keypair kp, *next, *current, *previous;
-
-	rw_enter_write(&r->r_handshake_lock);
-
-	/* We now derive the keypair from the handshake */
-	if (hs->hs_state == CONSUMED_RESPONSE) {
-		kp.kp_is_initiator = 1;
-		noise_kdf(kp.kp_send, kp.kp_recv, NULL, NULL,
-		    NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0,
-		    hs->hs_ck);
-	} else if (hs->hs_state == CREATED_RESPONSE) {
-		kp.kp_is_initiator = 0;
-		noise_kdf(kp.kp_recv, kp.kp_send, NULL, NULL,
-		    NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0,
-		    hs->hs_ck);
-	} else {
-		rw_exit_write(&r->r_handshake_lock);
-		return EINVAL;
-	}
-
-	kp.kp_valid = 1;
-	kp.kp_local_index = hs->hs_local_index;
-	kp.kp_remote_index = hs->hs_remote_index;
-	getnanouptime(&kp.kp_birthdate);
-	bzero(&kp.kp_ctr, sizeof(kp.kp_ctr));
-	rw_init(&kp.kp_ctr.c_lock, "noise_counter");
-
-	/* Now we need to add_new_keypair */
-	rw_enter_write(&r->r_keypair_lock);
-	next = r->r_next;
-	current = r->r_current;
-	previous = r->r_previous;
-
-	if (kp.kp_is_initiator) {
-		if (next != NULL) {
-			r->r_next = NULL;
-			r->r_previous = next;
-			noise_remote_keypair_free(r, current);
-		} else {
-			r->r_previous = current;
-		}
-
-		noise_remote_keypair_free(r, previous);
-
-		r->r_current = noise_remote_keypair_allocate(r);
-		*r->r_current = kp;
-	} else {
-		noise_remote_keypair_free(r, next);
-		r->r_previous = NULL;
-		noise_remote_keypair_free(r, previous);
-
-		r->r_next = noise_remote_keypair_allocate(r);
-		*r->r_next = kp;
-	}
-	rw_exit_write(&r->r_keypair_lock);
-
-	explicit_bzero(&r->r_handshake, sizeof(r->r_handshake));
-	rw_exit_write(&r->r_handshake_lock);
-
-	explicit_bzero(&kp, sizeof(kp));
-	return 0;
-}
-
-void
-noise_remote_clear(struct noise_remote *r)
-{
-	rw_enter_write(&r->r_handshake_lock);
-	noise_remote_handshake_index_drop(r);
-	explicit_bzero(&r->r_handshake, sizeof(r->r_handshake));
-	rw_exit_write(&r->r_handshake_lock);
-
-	rw_enter_write(&r->r_keypair_lock);
-	noise_remote_keypair_free(r, r->r_next);
-	noise_remote_keypair_free(r, r->r_current);
-	noise_remote_keypair_free(r, r->r_previous);
-	r->r_next = NULL;
-	r->r_current = NULL;
-	r->r_previous = NULL;
-	rw_exit_write(&r->r_keypair_lock);
-}
-
-void
-noise_remote_expire_current(struct noise_remote *r)
-{
-	rw_enter_write(&r->r_keypair_lock);
-	if (r->r_next != NULL)
-		r->r_next->kp_valid = 0;
-	if (r->r_current != NULL)
-		r->r_current->kp_valid = 0;
-	rw_exit_write(&r->r_keypair_lock);
-}
-
-int
-noise_remote_ready(struct noise_remote *r)
-{
-	struct noise_keypair *kp;
-	int ret;
-
-	rw_enter_read(&r->r_keypair_lock);
-	/* kp_ctr isn't locked here, we're happy to accept a racy read. */
-	if ((kp = r->r_current) == NULL ||
-	    !kp->kp_valid ||
-	    noise_timer_expired(&kp->kp_birthdate, REJECT_AFTER_TIME, 0) ||
-	    kp->kp_ctr.c_recv >= REJECT_AFTER_MESSAGES ||
-	    kp->kp_ctr.c_send >= REJECT_AFTER_MESSAGES)
-		ret = EINVAL;
-	else
-		ret = 0;
-	rw_exit_read(&r->r_keypair_lock);
-	return ret;
-}
-
-int
-noise_remote_encrypt(struct noise_remote *r, uint32_t *r_idx, uint64_t *nonce,
-    uint8_t *buf, size_t buflen)
-{
-	struct noise_keypair *kp;
-	int ret = EINVAL;
-
-	rw_enter_read(&r->r_keypair_lock);
-	if ((kp = r->r_current) == NULL)
-		goto error;
-
-	/* We confirm that our values are within our tolerances. We want:
-	 *  - a valid keypair
-	 *  - our keypair to be less than REJECT_AFTER_TIME seconds old
-	 *  - our receive counter to be less than REJECT_AFTER_MESSAGES
-	 *  - our send counter to be less than REJECT_AFTER_MESSAGES
-	 *
-	 * kp_ctr isn't locked here, we're happy to accept a racy read. */
-	if (!kp->kp_valid ||
-	    noise_timer_expired(&kp->kp_birthdate, REJECT_AFTER_TIME, 0) ||
-	    kp->kp_ctr.c_recv >= REJECT_AFTER_MESSAGES ||
-	    ((*nonce = noise_counter_send(&kp->kp_ctr)) > REJECT_AFTER_MESSAGES))
-		goto error;
-
-	/* We encrypt into the same buffer, so the caller must ensure that buf
-	 * has NOISE_AUTHTAG_LEN bytes to store the MAC. The nonce and index
-	 * are passed back out to the caller through the provided data pointer. */
-	*r_idx = kp->kp_remote_index;
-	chacha20poly1305_encrypt(buf, buf, buflen,
-	    NULL, 0, *nonce, kp->kp_send);
-
-	/* If our values are still within tolerances, but we are approaching
-	 * the tolerances, we notify the caller with ESTALE that they should
-	 * establish a new keypair. The current keypair can continue to be used
-	 * until the tolerances are hit. We notify if:
-	 *  - our send counter is valid and not less than REKEY_AFTER_MESSAGES
-	 *  - we're the initiator and our keypair is older than
-	 *    REKEY_AFTER_TIME seconds */
-	ret = ESTALE;
-	if ((kp->kp_valid && *nonce >= REKEY_AFTER_MESSAGES) ||
-	    (kp->kp_is_initiator &&
-	    noise_timer_expired(&kp->kp_birthdate, REKEY_AFTER_TIME, 0)))
-		goto error;
-
-	ret = 0;
-error:
-	rw_exit_read(&r->r_keypair_lock);
-	return ret;
-}
-
-int
-noise_remote_decrypt(struct noise_remote *r, uint32_t r_idx, uint64_t nonce,
-    uint8_t *buf, size_t buflen)
-{
-	struct noise_keypair *kp;
-	int ret = EINVAL;
-
-	/* We retrieve the keypair corresponding to the provided index. We
-	 * attempt the current keypair first as that is most likely. We also
-	 * want to make sure that the keypair is valid as it would be
-	 * catastrophic to decrypt against a zero'ed keypair. */
-	rw_enter_read(&r->r_keypair_lock);
-
-	if (r->r_current != NULL && r->r_current->kp_local_index == r_idx) {
-		kp = r->r_current;
-	} else if (r->r_previous != NULL && r->r_previous->kp_local_index == r_idx) {
-		kp = r->r_previous;
-	} else if (r->r_next != NULL && r->r_next->kp_local_index == r_idx) {
-		kp = r->r_next;
-	} else {
-		goto error;
-	}
-
-	/* We confirm that our values are within our tolerances. These values
-	 * are the same as the encrypt routine.
-	 *
-	 * kp_ctr isn't locked here, we're happy to accept a racy read. */
-	if (noise_timer_expired(&kp->kp_birthdate, REJECT_AFTER_TIME, 0) ||
-	    kp->kp_ctr.c_recv >= REJECT_AFTER_MESSAGES)
-		goto error;
-
-	/* Decrypt, then validate the counter. We don't want to validate the
-	 * counter before decrypting as we do not know the message is authentic
-	 * prior to decryption. */
-	if (chacha20poly1305_decrypt(buf, buf, buflen,
-	    NULL, 0, nonce, kp->kp_recv) == 0)
-		goto error;
-
-	if (noise_counter_recv(&kp->kp_ctr, nonce) != 0)
-		goto error;
-
-	/* If we've received the handshake confirming data packet then move the
-	 * next keypair into current. If we do slide the next keypair in, then
-	 * we skip the REKEY_AFTER_TIME_RECV check. This is safe to do as a
-	 * data packet can't confirm a session that we are an INITIATOR of. */
-	if (kp == r->r_next) {
-		rw_exit_read(&r->r_keypair_lock);
-		rw_enter_write(&r->r_keypair_lock);
-		if (kp == r->r_next && kp->kp_local_index == r_idx) {
-			noise_remote_keypair_free(r, r->r_previous);
-			r->r_previous = r->r_current;
-			r->r_current = r->r_next;
-			r->r_next = NULL;
-
-			ret = ECONNRESET;
-			goto error;
-		}
-		rw_enter(&r->r_keypair_lock, RW_DOWNGRADE);
-	}
-
-	/* Similar to when we encrypt, we want to notify the caller when we
-	 * are approaching our tolerances. We notify if:
-	 *  - we're the initiator and the current keypair is older than
-	 *    REKEY_AFTER_TIME_RECV seconds. */
-	ret = ESTALE;
-	kp = r->r_current;
-	if (kp != NULL &&
-	    kp->kp_valid &&
-	    kp->kp_is_initiator &&
-	    noise_timer_expired(&kp->kp_birthdate, REKEY_AFTER_TIME_RECV, 0))
-		goto error;
-
-	ret = 0;
-
-error:
-	rw_exit(&r->r_keypair_lock);
-	return ret;
-}
-
-/* Private functions - these should not be called outside this file under any
- * circumstances. */
-static struct noise_keypair *
-noise_remote_keypair_allocate(struct noise_remote *r)
-{
-	struct noise_keypair *kp;
-	kp = SLIST_FIRST(&r->r_unused_keypairs);
-	SLIST_REMOVE_HEAD(&r->r_unused_keypairs, kp_entry);
-	return kp;
-}
-
-static void
-noise_remote_keypair_free(struct noise_remote *r, struct noise_keypair *kp)
-{
-	struct noise_upcall *u = &r->r_local->l_upcall;
-	if (kp != NULL) {
-		SLIST_INSERT_HEAD(&r->r_unused_keypairs, kp, kp_entry);
-		u->u_index_drop(u->u_arg, kp->kp_local_index);
-		bzero(kp->kp_send, sizeof(kp->kp_send));
-		bzero(kp->kp_recv, sizeof(kp->kp_recv));
-	}
-}
-
-static uint32_t
-noise_remote_handshake_index_get(struct noise_remote *r)
-{
-	struct noise_upcall *u = &r->r_local->l_upcall;
-	return u->u_index_set(u->u_arg, r);
-}
-
-static void
-noise_remote_handshake_index_drop(struct noise_remote *r)
-{
-	struct noise_handshake *hs = &r->r_handshake;
-	struct noise_upcall *u = &r->r_local->l_upcall;
-	rw_assert_wrlock(&r->r_handshake_lock);
-	if (hs->hs_state != HS_ZEROED)
-		u->u_index_drop(u->u_arg, hs->hs_local_index);
-}
-
-static uint64_t
-noise_counter_send(struct noise_counter *ctr)
-{
-	uint64_t ret;
-	rw_enter_write(&ctr->c_lock);
-	ret = ctr->c_send++;
-	rw_exit_write(&ctr->c_lock);
-	return ret;
-}
-
-static int
-noise_counter_recv(struct noise_counter *ctr, uint64_t recv)
-{
-	uint64_t i, top, index_recv, index_ctr;
-	unsigned long bit;
-	int ret = EEXIST;
-
-	rw_enter_write(&ctr->c_lock);
-
-	/* Check that the recv counter is valid */
-	if (ctr->c_recv >= REJECT_AFTER_MESSAGES ||
-	    recv >= REJECT_AFTER_MESSAGES)
-		goto error;
-
-	/* If the packet is out of the window, invalid */
-	if (recv + COUNTER_WINDOW_SIZE < ctr->c_recv)
-		goto error;
-
-	/* If the new counter is ahead of the current counter, we'll need to
-	 * zero out the bitmap that has previously been used */
-	index_recv = recv / COUNTER_BITS;
-	index_ctr = ctr->c_recv / COUNTER_BITS;
-
-	if (recv > ctr->c_recv) {
-		top = MIN(index_recv - index_ctr, COUNTER_NUM);
-		for (i = 1; i <= top; i++)
-			ctr->c_backtrack[
-			    (i + index_ctr) & (COUNTER_NUM - 1)] = 0;
-		ctr->c_recv = recv;
-	}
-
-	index_recv %= COUNTER_NUM;
-	bit = 1ul << (recv % COUNTER_BITS);
-
-	if (ctr->c_backtrack[index_recv] & bit)
-		goto error;
-
-	ctr->c_backtrack[index_recv] |= bit;
-
-	ret = 0;
-error:
-	rw_exit_write(&ctr->c_lock);
-	return ret;
-}
-
-static void
-noise_kdf(uint8_t *a, uint8_t *b, uint8_t *c, const uint8_t *x,
-    size_t a_len, size_t b_len, size_t c_len, size_t x_len,
-    const uint8_t ck[NOISE_HASH_LEN])
-{
-	uint8_t out[BLAKE2S_HASH_SIZE + 1];
-	uint8_t sec[BLAKE2S_HASH_SIZE];
-
-#ifdef DIAGNOSTIC
-	MPASS(a_len <= BLAKE2S_HASH_SIZE && b_len <= BLAKE2S_HASH_SIZE &&
-			c_len <= BLAKE2S_HASH_SIZE);
-	MPASS(!(b || b_len || c || c_len) || (a && a_len));
-	MPASS(!(c || c_len) || (b && b_len));
-#endif
-
-	/* Extract entropy from "x" into sec */
-	blake2s_hmac(sec, x, ck, BLAKE2S_HASH_SIZE, x_len, NOISE_HASH_LEN);
-
-	if (a == NULL || a_len == 0)
-		goto out;
-
-	/* Expand first key: key = sec, data = 0x1 */
-	out[0] = 1;
-	blake2s_hmac(out, out, sec, BLAKE2S_HASH_SIZE, 1, BLAKE2S_HASH_SIZE);
-	memcpy(a, out, a_len);
-
-	if (b == NULL || b_len == 0)
-		goto out;
-
-	/* Expand second key: key = sec, data = "a" || 0x2 */
-	out[BLAKE2S_HASH_SIZE] = 2;
-	blake2s_hmac(out, out, sec, BLAKE2S_HASH_SIZE, BLAKE2S_HASH_SIZE + 1,
-			BLAKE2S_HASH_SIZE);
-	memcpy(b, out, b_len);
-
-	if (c == NULL || c_len == 0)
-		goto out;
-
-	/* Expand third key: key = sec, data = "b" || 0x3 */
-	out[BLAKE2S_HASH_SIZE] = 3;
-	blake2s_hmac(out, out, sec, BLAKE2S_HASH_SIZE, BLAKE2S_HASH_SIZE + 1,
-			BLAKE2S_HASH_SIZE);
-	memcpy(c, out, c_len);
-
-out:
-	/* Clear sensitive data from stack */
-	explicit_bzero(sec, BLAKE2S_HASH_SIZE);
-	explicit_bzero(out, BLAKE2S_HASH_SIZE + 1);
-}
-
-static int
-noise_mix_dh(uint8_t ck[NOISE_HASH_LEN], uint8_t key[NOISE_SYMMETRIC_KEY_LEN],
-    const uint8_t private[NOISE_PUBLIC_KEY_LEN],
-    const uint8_t public[NOISE_PUBLIC_KEY_LEN])
-{
-	uint8_t dh[NOISE_PUBLIC_KEY_LEN];
-
-	if (!curve25519(dh, private, public))
-		return EINVAL;
-	noise_kdf(ck, key, NULL, dh,
-	    NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, ck);
-	explicit_bzero(dh, NOISE_PUBLIC_KEY_LEN);
-	return 0;
-}
-
-static int
-noise_mix_ss(uint8_t ck[NOISE_HASH_LEN], uint8_t key[NOISE_SYMMETRIC_KEY_LEN],
-    const uint8_t ss[NOISE_PUBLIC_KEY_LEN])
-{
-	static uint8_t null_point[NOISE_PUBLIC_KEY_LEN];
-	if (timingsafe_bcmp(ss, null_point, NOISE_PUBLIC_KEY_LEN) == 0)
-		return ENOENT;
-	noise_kdf(ck, key, NULL, ss,
-	    NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, ck);
-	return 0;
-}
-
-static void
-noise_mix_hash(uint8_t hash[NOISE_HASH_LEN], const uint8_t *src,
-    size_t src_len)
-{
-	struct blake2s_state blake;
-
-	blake2s_init(&blake, NOISE_HASH_LEN);
-	blake2s_update(&blake, hash, NOISE_HASH_LEN);
-	blake2s_update(&blake, src, src_len);
-	blake2s_final(&blake, hash);
-}
-
-static void
-noise_mix_psk(uint8_t ck[NOISE_HASH_LEN], uint8_t hash[NOISE_HASH_LEN],
-    uint8_t key[NOISE_SYMMETRIC_KEY_LEN],
-    const uint8_t psk[NOISE_SYMMETRIC_KEY_LEN])
-{
-	uint8_t tmp[NOISE_HASH_LEN];
-
-	noise_kdf(ck, tmp, key, psk,
-	    NOISE_HASH_LEN, NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN,
-	    NOISE_SYMMETRIC_KEY_LEN, ck);
-	noise_mix_hash(hash, tmp, NOISE_HASH_LEN);
-	explicit_bzero(tmp, NOISE_HASH_LEN);
-}
-
-static void
-noise_param_init(uint8_t ck[NOISE_HASH_LEN], uint8_t hash[NOISE_HASH_LEN],
-    const uint8_t s[NOISE_PUBLIC_KEY_LEN])
-{
-	struct blake2s_state blake;
-
-	blake2s(ck, (uint8_t *)NOISE_HANDSHAKE_NAME, NULL,
-	    NOISE_HASH_LEN, strlen(NOISE_HANDSHAKE_NAME), 0);
-	blake2s_init(&blake, NOISE_HASH_LEN);
-	blake2s_update(&blake, ck, NOISE_HASH_LEN);
-	blake2s_update(&blake, (uint8_t *)NOISE_IDENTIFIER_NAME,
-	    strlen(NOISE_IDENTIFIER_NAME));
-	blake2s_final(&blake, hash);
-
-	noise_mix_hash(hash, s, NOISE_PUBLIC_KEY_LEN);
-}
-
-static void
-noise_msg_encrypt(uint8_t *dst, const uint8_t *src, size_t src_len,
-    uint8_t key[NOISE_SYMMETRIC_KEY_LEN], uint8_t hash[NOISE_HASH_LEN])
-{
-	/* Nonce always zero for Noise_IK */
-	chacha20poly1305_encrypt(dst, src, src_len,
-	    hash, NOISE_HASH_LEN, 0, key);
-	noise_mix_hash(hash, dst, src_len + NOISE_AUTHTAG_LEN);
-}
-
-static int
-noise_msg_decrypt(uint8_t *dst, const uint8_t *src, size_t src_len,
-    uint8_t key[NOISE_SYMMETRIC_KEY_LEN], uint8_t hash[NOISE_HASH_LEN])
-{
-	/* Nonce always zero for Noise_IK */
-	if (!chacha20poly1305_decrypt(dst, src, src_len,
-	    hash, NOISE_HASH_LEN, 0, key))
-		return EINVAL;
-	noise_mix_hash(hash, src, src_len);
-	return 0;
-}
-
-static void
-noise_msg_ephemeral(uint8_t ck[NOISE_HASH_LEN], uint8_t hash[NOISE_HASH_LEN],
-    const uint8_t src[NOISE_PUBLIC_KEY_LEN])
-{
-	noise_mix_hash(hash, src, NOISE_PUBLIC_KEY_LEN);
-	noise_kdf(ck, NULL, NULL, src, NOISE_HASH_LEN, 0, 0,
-		  NOISE_PUBLIC_KEY_LEN, ck);
-}
-
-static void
-noise_tai64n_now(uint8_t output[NOISE_TIMESTAMP_LEN])
-{
-	struct timespec time;
-	uint64_t sec;
-	uint32_t nsec;
-
-	getnanotime(&time);
-
-	/* Round down the nsec counter to limit precise timing leak. */
-	time.tv_nsec &= REJECT_INTERVAL_MASK;
-
-	/* https://cr.yp.to/libtai/tai64.html */
-	sec = htobe64(0x400000000000000aULL + time.tv_sec);
-	nsec = htobe32(time.tv_nsec);
-
-	/* memcpy to output buffer, assuming output could be unaligned. */
-	memcpy(output, &sec, sizeof(sec));
-	memcpy(output + sizeof(sec), &nsec, sizeof(nsec));
-}
-
-static int
-noise_timer_expired(struct timespec *birthdate, time_t sec, long nsec)
-{
-	struct timespec uptime;
-	struct timespec expire = { .tv_sec = sec, .tv_nsec = nsec };
-
-	/* We don't really worry about a zeroed birthdate, to avoid the extra
-	 * check on every encrypt/decrypt. This does mean that r_last_init
-	 * check may fail if getnanouptime is < REJECT_INTERVAL from 0. */
-
-	getnanouptime(&uptime);
-	timespecadd(birthdate, &expire, &expire);
-	return timespeccmp(&uptime, &expire, >) ? ETIMEDOUT : 0;
-}
diff --git a/sys/dev/if_wg/wg_noise.h b/sys/dev/if_wg/wg_noise.h
deleted file mode 100644
index 95617ae9bfef..000000000000
--- a/sys/dev/if_wg/wg_noise.h
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright (C) 2015-2020 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
- * Copyright (C) 2019-2020 Matt Dunwoodie <ncon@noconroy.net>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#ifndef __NOISE_H__
-#define __NOISE_H__
-
-#include <sys/types.h>
-#include <sys/time.h>
-#include <sys/rwlock.h>
-
-#include "crypto.h"
-
-#define NOISE_PUBLIC_KEY_LEN	CURVE25519_KEY_SIZE
-#define NOISE_SYMMETRIC_KEY_LEN	CHACHA20POLY1305_KEY_SIZE
-#define NOISE_TIMESTAMP_LEN	(sizeof(uint64_t) + sizeof(uint32_t))
-#define NOISE_AUTHTAG_LEN	CHACHA20POLY1305_AUTHTAG_SIZE
-#define NOISE_HASH_LEN		BLAKE2S_HASH_SIZE
-
-/* Protocol string constants */
-#define NOISE_HANDSHAKE_NAME	"Noise_IKpsk2_25519_ChaChaPoly_BLAKE2s"
-#define NOISE_IDENTIFIER_NAME	"WireGuard v1 zx2c4 Jason@zx2c4.com"
-
-/* Constants for the counter */
-#define COUNTER_BITS_TOTAL	8192
-#define COUNTER_BITS		(sizeof(unsigned long) * 8)
-#define COUNTER_NUM		(COUNTER_BITS_TOTAL / COUNTER_BITS)
-#define COUNTER_WINDOW_SIZE	(COUNTER_BITS_TOTAL - COUNTER_BITS)
-
-/* Constants for the keypair */
-#define REKEY_AFTER_MESSAGES	(1ull << 60)
-#define REJECT_AFTER_MESSAGES	(UINT64_MAX - COUNTER_WINDOW_SIZE - 1)
-#define REKEY_AFTER_TIME	120
-#define REKEY_AFTER_TIME_RECV	165
-#define REJECT_AFTER_TIME	180
-#define REJECT_INTERVAL		(1000000000 / 50) /* fifty times per sec */
-/* 24 = floor(log2(REJECT_INTERVAL)) */
-#define REJECT_INTERVAL_MASK	(~((1ull<<24)-1))
-
-enum noise_state_hs {
-	HS_ZEROED = 0,
-	CREATED_INITIATION,
-	CONSUMED_INITIATION,
-	CREATED_RESPONSE,
-	CONSUMED_RESPONSE,
-};
-
-struct noise_handshake {
-	enum noise_state_hs	 hs_state;
-	uint32_t		 hs_local_index;
-	uint32_t		 hs_remote_index;
-	uint8_t		 	 hs_e[NOISE_PUBLIC_KEY_LEN];
-	uint8_t		 	 hs_hash[NOISE_HASH_LEN];
-	uint8_t		 	 hs_ck[NOISE_HASH_LEN];
-};
-
-struct noise_counter {
-	struct rwlock		 c_lock;
-	uint64_t		 c_send;
-	uint64_t		 c_recv;
-	unsigned long		 c_backtrack[COUNTER_NUM];
-};
-
-struct noise_keypair {
-	SLIST_ENTRY(noise_keypair)	kp_entry;
-	int				kp_valid;
-	int				kp_is_initiator;
-	uint32_t			kp_local_index;
-	uint32_t			kp_remote_index;
-	uint8_t				kp_send[NOISE_SYMMETRIC_KEY_LEN];
-	uint8_t				kp_recv[NOISE_SYMMETRIC_KEY_LEN];
-	struct timespec			kp_birthdate; /* nanouptime */
-	struct noise_counter		kp_ctr;
-};
-
-struct noise_remote {
-	uint8_t				 r_public[NOISE_PUBLIC_KEY_LEN];
-	struct noise_local		*r_local;
-	uint8_t		 		 r_ss[NOISE_PUBLIC_KEY_LEN];
-
-	struct rwlock			 r_handshake_lock;
-	struct noise_handshake		 r_handshake;
-	uint8_t				 r_psk[NOISE_SYMMETRIC_KEY_LEN];
-	uint8_t				 r_timestamp[NOISE_TIMESTAMP_LEN];
-	struct timespec			 r_last_init; /* nanouptime */
-
-	struct rwlock			 r_keypair_lock;
-	SLIST_HEAD(,noise_keypair)	 r_unused_keypairs;
-	struct noise_keypair		*r_next, *r_current, *r_previous;
-	struct noise_keypair		 r_keypair[3]; /* 3: next, current, previous. */
-
-};
-
-struct noise_local {
-	struct rwlock		l_identity_lock;
-	int			l_has_identity;
-	uint8_t			l_public[NOISE_PUBLIC_KEY_LEN];
-	uint8_t			l_private[NOISE_PUBLIC_KEY_LEN];
-
-	struct noise_upcall {
-		void	 *u_arg;
-		struct noise_remote *
-			(*u_remote_get)(void *, uint8_t[NOISE_PUBLIC_KEY_LEN]);
-		uint32_t
-			(*u_index_set)(void *, struct noise_remote *);
-		void	(*u_index_drop)(void *, uint32_t);
-	}			l_upcall;
-};
-
-/* Set/Get noise parameters */
-void	noise_local_init(struct noise_local *, struct noise_upcall *);
-void	noise_local_lock_identity(struct noise_local *);
-void	noise_local_unlock_identity(struct noise_local *);
-int	noise_local_set_private(struct noise_local *,
-	    const uint8_t[NOISE_PUBLIC_KEY_LEN]);
-int	noise_local_keys(struct noise_local *, uint8_t[NOISE_PUBLIC_KEY_LEN],
-	    uint8_t[NOISE_PUBLIC_KEY_LEN]);
-
-void	noise_remote_init(struct noise_remote *,
-	    const uint8_t[NOISE_PUBLIC_KEY_LEN], struct noise_local *);
-int	noise_remote_set_psk(struct noise_remote *,
-	    const uint8_t[NOISE_SYMMETRIC_KEY_LEN]);
-int	noise_remote_keys(struct noise_remote *, uint8_t[NOISE_PUBLIC_KEY_LEN],
-	    uint8_t[NOISE_SYMMETRIC_KEY_LEN]);
-
-/* Should be called anytime noise_local_set_private is called */
-void	noise_remote_precompute(struct noise_remote *);
-
-/* Cryptographic functions */
-int	noise_create_initiation(
-	    struct noise_remote *,
-	    uint32_t *s_idx,
-	    uint8_t ue[NOISE_PUBLIC_KEY_LEN],
-	    uint8_t es[NOISE_PUBLIC_KEY_LEN + NOISE_AUTHTAG_LEN],
-	    uint8_t ets[NOISE_TIMESTAMP_LEN + NOISE_AUTHTAG_LEN]);
-
-int	noise_consume_initiation(
-	    struct noise_local *,
-	    struct noise_remote **,
-	    uint32_t s_idx,
-	    uint8_t ue[NOISE_PUBLIC_KEY_LEN],
-	    uint8_t es[NOISE_PUBLIC_KEY_LEN + NOISE_AUTHTAG_LEN],
-	    uint8_t ets[NOISE_TIMESTAMP_LEN + NOISE_AUTHTAG_LEN]);
-
-int	noise_create_response(
-	    struct noise_remote *,
-	    uint32_t *s_idx,
-	    uint32_t *r_idx,
-	    uint8_t ue[NOISE_PUBLIC_KEY_LEN],
-	    uint8_t en[0 + NOISE_AUTHTAG_LEN]);
-
-int	noise_consume_response(
-	    struct noise_remote *,
-	    uint32_t s_idx,
-	    uint32_t r_idx,
-	    uint8_t ue[NOISE_PUBLIC_KEY_LEN],
-	    uint8_t en[0 + NOISE_AUTHTAG_LEN]);
-
-int	noise_remote_begin_session(struct noise_remote *);
-void	noise_remote_clear(struct noise_remote *);
-void	noise_remote_expire_current(struct noise_remote *);
-
-int	noise_remote_ready(struct noise_remote *);
-
-int	noise_remote_encrypt(
-	    struct noise_remote *,
-	    uint32_t *r_idx,
-	    uint64_t *nonce,
-	    uint8_t *buf,
-	    size_t buflen);
-int	noise_remote_decrypt(
-	    struct noise_remote *,
-	    uint32_t r_idx,
-	    uint64_t nonce,
-	    uint8_t *buf,
-	    size_t buflen);
-
-#endif /* __NOISE_H__ */
diff --git a/sys/kern/kern_jail.c b/sys/kern/kern_jail.c
index 30499dce729c..b5c8f6ebf9be 100644
--- a/sys/kern/kern_jail.c
+++ b/sys/kern/kern_jail.c
@@ -1,4596 +1,4595 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1999 Poul-Henning Kamp.
  * Copyright (c) 2008 Bjoern A. Zeeb.
  * Copyright (c) 2009 James Gritton.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ddb.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/errno.h>
 #include <sys/sysproto.h>
 #include <sys/malloc.h>
 #include <sys/osd.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/taskqueue.h>
 #include <sys/fcntl.h>
 #include <sys/jail.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/racct.h>
 #include <sys/rctl.h>
 #include <sys/refcount.h>
 #include <sys/sx.h>
 #include <sys/sysent.h>
 #include <sys/namei.h>
 #include <sys/mount.h>
 #include <sys/queue.h>
 #include <sys/socket.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/uuid.h>
 #include <sys/vnode.h>
 
 #include <net/if.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 
 #ifdef DDB
 #include <ddb/ddb.h>
 #endif /* DDB */
 
 #include <security/mac/mac_framework.h>
 
 #define	DEFAULT_HOSTUUID	"00000000-0000-0000-0000-000000000000"
 #define	PRISON0_HOSTUUID_MODULE	"hostuuid"
 
 MALLOC_DEFINE(M_PRISON, "prison", "Prison structures");
 static MALLOC_DEFINE(M_PRISON_RACCT, "prison_racct", "Prison racct structures");
 
 /* Keep struct prison prison0 and some code in kern_jail_set() readable. */
 #ifdef INET
 #ifdef INET6
 #define	_PR_IP_SADDRSEL	PR_IP4_SADDRSEL|PR_IP6_SADDRSEL
 #else
 #define	_PR_IP_SADDRSEL	PR_IP4_SADDRSEL
 #endif
 #else /* !INET */
 #ifdef INET6
 #define	_PR_IP_SADDRSEL	PR_IP6_SADDRSEL
 #else
 #define	_PR_IP_SADDRSEL	0
 #endif
 #endif
 
 /* prison0 describes what is "real" about the system. */
 struct prison prison0 = {
 	.pr_id		= 0,
 	.pr_name	= "0",
 	.pr_ref		= 1,
 	.pr_uref	= 1,
 	.pr_path	= "/",
 	.pr_securelevel	= -1,
 	.pr_devfs_rsnum = 0,
 	.pr_state	= PRISON_STATE_ALIVE,
 	.pr_childmax	= JAIL_MAX,
 	.pr_hostuuid	= DEFAULT_HOSTUUID,
 	.pr_children	= LIST_HEAD_INITIALIZER(prison0.pr_children),
 #ifdef VIMAGE
 	.pr_flags	= PR_HOST|PR_VNET|_PR_IP_SADDRSEL,
 #else
 	.pr_flags	= PR_HOST|_PR_IP_SADDRSEL,
 #endif
 	.pr_allow	= PR_ALLOW_ALL_STATIC,
 };
 MTX_SYSINIT(prison0, &prison0.pr_mtx, "jail mutex", MTX_DEF);
 
 struct bool_flags {
 	const char	*name;
 	const char	*noname;
 	volatile u_int	 flag;
 };
 struct jailsys_flags {
 	const char	*name;
 	unsigned	 disable;
 	unsigned	 new;
 };
 
 /* allprison, allprison_racct and lastprid are protected by allprison_lock. */
 struct	sx allprison_lock;
 SX_SYSINIT(allprison_lock, &allprison_lock, "allprison");
 struct	prisonlist allprison = TAILQ_HEAD_INITIALIZER(allprison);
 LIST_HEAD(, prison_racct) allprison_racct;
 int	lastprid = 0;
 
 static int get_next_prid(struct prison **insprp);
 static int do_jail_attach(struct thread *td, struct prison *pr, int drflags);
 static void prison_complete(void *context, int pending);
 static void prison_deref(struct prison *pr, int flags);
 static void prison_deref_kill(struct prison *pr, struct prisonlist *freeprison);
 static int prison_lock_xlock(struct prison *pr, int flags);
 static void prison_free_not_last(struct prison *pr);
 static void prison_proc_free_not_last(struct prison *pr);
 static void prison_set_allow_locked(struct prison *pr, unsigned flag,
     int enable);
 static char *prison_path(struct prison *pr1, struct prison *pr2);
 #ifdef RACCT
 static void prison_racct_attach(struct prison *pr);
 static void prison_racct_modify(struct prison *pr);
 static void prison_racct_detach(struct prison *pr);
 #endif
 
 /* Flags for prison_deref */
 #define	PD_DEREF	0x01	/* Decrement pr_ref */
 #define	PD_DEUREF	0x02	/* Decrement pr_uref */
 #define	PD_KILL		0x04	/* Remove jail, kill processes, etc */
 #define	PD_LOCKED	0x10	/* pr_mtx is held */
 #define	PD_LIST_SLOCKED	0x20	/* allprison_lock is held shared */
 #define	PD_LIST_XLOCKED	0x40	/* allprison_lock is held exclusive */
 #define PD_OP_FLAGS	0x07	/* Operation flags */
 #define PD_LOCK_FLAGS	0x70	/* Lock status flags */
 
 /*
  * Parameter names corresponding to PR_* flag values.  Size values are for kvm
  * as we cannot figure out the size of a sparse array, or an array without a
  * terminating entry.
  */
 static struct bool_flags pr_flag_bool[] = {
 	{"persist", "nopersist", PR_PERSIST},
 #ifdef INET
 	{"ip4.saddrsel", "ip4.nosaddrsel", PR_IP4_SADDRSEL},
 #endif
 #ifdef INET6
 	{"ip6.saddrsel", "ip6.nosaddrsel", PR_IP6_SADDRSEL},
 #endif
 };
 const size_t pr_flag_bool_size = sizeof(pr_flag_bool);
 
 static struct jailsys_flags pr_flag_jailsys[] = {
 	{"host", 0, PR_HOST},
 #ifdef VIMAGE
 	{"vnet", 0, PR_VNET},
 #endif
 #ifdef INET
 	{"ip4", PR_IP4_USER, PR_IP4_USER},
 #endif
 #ifdef INET6
 	{"ip6", PR_IP6_USER, PR_IP6_USER},
 #endif
 };
 const size_t pr_flag_jailsys_size = sizeof(pr_flag_jailsys);
 
 /*
  * Make this array full-size so dynamic parameters can be added.
  * It is protected by prison0.mtx, but lockless reading is allowed
  * with an atomic check of the flag values.
  */
 static struct bool_flags pr_flag_allow[NBBY * NBPW] = {
 	{"allow.set_hostname", "allow.noset_hostname", PR_ALLOW_SET_HOSTNAME},
 	{"allow.sysvipc", "allow.nosysvipc", PR_ALLOW_SYSVIPC},
 	{"allow.raw_sockets", "allow.noraw_sockets", PR_ALLOW_RAW_SOCKETS},
 	{"allow.chflags", "allow.nochflags", PR_ALLOW_CHFLAGS},
 	{"allow.mount", "allow.nomount", PR_ALLOW_MOUNT},
 	{"allow.quotas", "allow.noquotas", PR_ALLOW_QUOTAS},
 	{"allow.socket_af", "allow.nosocket_af", PR_ALLOW_SOCKET_AF},
 	{"allow.mlock", "allow.nomlock", PR_ALLOW_MLOCK},
 	{"allow.reserved_ports", "allow.noreserved_ports",
 	 PR_ALLOW_RESERVED_PORTS},
 	{"allow.read_msgbuf", "allow.noread_msgbuf", PR_ALLOW_READ_MSGBUF},
 	{"allow.unprivileged_proc_debug", "allow.nounprivileged_proc_debug",
 	 PR_ALLOW_UNPRIV_DEBUG},
 	{"allow.suser", "allow.nosuser", PR_ALLOW_SUSER},
 };
 static unsigned pr_allow_all = PR_ALLOW_ALL_STATIC;
 const size_t pr_flag_allow_size = sizeof(pr_flag_allow);
 
 #define	JAIL_DEFAULT_ALLOW		(PR_ALLOW_SET_HOSTNAME | \
 					 PR_ALLOW_RESERVED_PORTS | \
 					 PR_ALLOW_UNPRIV_DEBUG | \
 					 PR_ALLOW_SUSER)
 #define	JAIL_DEFAULT_ENFORCE_STATFS	2
 #define	JAIL_DEFAULT_DEVFS_RSNUM	0
 static unsigned jail_default_allow = JAIL_DEFAULT_ALLOW;
 static int jail_default_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS;
 static int jail_default_devfs_rsnum = JAIL_DEFAULT_DEVFS_RSNUM;
 #if defined(INET) || defined(INET6)
 static unsigned jail_max_af_ips = 255;
 #endif
 
 /*
  * Initialize the parts of prison0 that can't be static-initialized with
  * constants.  This is called from proc0_init() after creating thread0 cpuset.
  */
 void
 prison0_init(void)
 {
 	uint8_t *file, *data;
 	size_t size;
 
 	prison0.pr_cpuset = cpuset_ref(thread0.td_cpuset);
 	prison0.pr_osreldate = osreldate;
 	strlcpy(prison0.pr_osrelease, osrelease, sizeof(prison0.pr_osrelease));
 
 	/* If we have a preloaded hostuuid, use it. */
 	file = preload_search_by_type(PRISON0_HOSTUUID_MODULE);
 	if (file != NULL) {
 		data = preload_fetch_addr(file);
 		size = preload_fetch_size(file);
 		if (data != NULL) {
 			/*
 			 * The preloaded data may include trailing whitespace, almost
 			 * certainly a newline; skip over any whitespace or
 			 * non-printable characters to be safe.
 			 */
 			while (size > 0 && data[size - 1] <= 0x20) {
 				data[size--] = '\0';
 			}
 			if (validate_uuid(data, size, NULL, 0) == 0) {
 				(void)strlcpy(prison0.pr_hostuuid, data,
 				    size + 1);
 			} else if (bootverbose) {
 				printf("hostuuid: preload data malformed: '%s'",
 				    data);
 			}
 		}
 	}
 	if (bootverbose)
 		printf("hostuuid: using %s\n", prison0.pr_hostuuid);
 }
 
 /*
  * struct jail_args {
  *	struct jail *jail;
  * };
  */
 int
 sys_jail(struct thread *td, struct jail_args *uap)
 {
 	uint32_t version;
 	int error;
 	struct jail j;
 
 	error = copyin(uap->jail, &version, sizeof(uint32_t));
 	if (error)
 		return (error);
 
 	switch (version) {
 	case 0:
 	{
 		struct jail_v0 j0;
 
 		/* FreeBSD single IPv4 jails. */
 		bzero(&j, sizeof(struct jail));
 		error = copyin(uap->jail, &j0, sizeof(struct jail_v0));
 		if (error)
 			return (error);
 		j.version = j0.version;
 		j.path = j0.path;
 		j.hostname = j0.hostname;
 		j.ip4s = htonl(j0.ip_number);	/* jail_v0 is host order */
 		break;
 	}
 
 	case 1:
 		/*
 		 * Version 1 was used by multi-IPv4 jail implementations
 		 * that never made it into the official kernel.
 		 */
 		return (EINVAL);
 
 	case 2:	/* JAIL_API_VERSION */
 		/* FreeBSD multi-IPv4/IPv6,noIP jails. */
 		error = copyin(uap->jail, &j, sizeof(struct jail));
 		if (error)
 			return (error);
 		break;
 
 	default:
 		/* Sci-Fi jails are not supported, sorry. */
 		return (EINVAL);
 	}
 	return (kern_jail(td, &j));
 }
 
 int
 kern_jail(struct thread *td, struct jail *j)
 {
 	struct iovec optiov[2 * (4 + nitems(pr_flag_allow)
 #ifdef INET
 			    + 1
 #endif
 #ifdef INET6
 			    + 1
 #endif
 			    )];
 	struct uio opt;
 	char *u_path, *u_hostname, *u_name;
 	struct bool_flags *bf;
 #ifdef INET
 	uint32_t ip4s;
 	struct in_addr *u_ip4;
 #endif
 #ifdef INET6
 	struct in6_addr *u_ip6;
 #endif
 	size_t tmplen;
 	int error, enforce_statfs;
 
 	bzero(&optiov, sizeof(optiov));
 	opt.uio_iov = optiov;
 	opt.uio_iovcnt = 0;
 	opt.uio_offset = -1;
 	opt.uio_resid = -1;
 	opt.uio_segflg = UIO_SYSSPACE;
 	opt.uio_rw = UIO_READ;
 	opt.uio_td = td;
 
 	/* Set permissions for top-level jails from sysctls. */
 	if (!jailed(td->td_ucred)) {
 		for (bf = pr_flag_allow;
 		     bf < pr_flag_allow + nitems(pr_flag_allow) &&
 			atomic_load_int(&bf->flag) != 0;
 		     bf++) {
 			optiov[opt.uio_iovcnt].iov_base = __DECONST(char *,
 			    (jail_default_allow & bf->flag)
 			    ? bf->name : bf->noname);
 			optiov[opt.uio_iovcnt].iov_len =
 			    strlen(optiov[opt.uio_iovcnt].iov_base) + 1;
 			opt.uio_iovcnt += 2;
 		}
 		optiov[opt.uio_iovcnt].iov_base = "enforce_statfs";
 		optiov[opt.uio_iovcnt].iov_len = sizeof("enforce_statfs");
 		opt.uio_iovcnt++;
 		enforce_statfs = jail_default_enforce_statfs;
 		optiov[opt.uio_iovcnt].iov_base = &enforce_statfs;
 		optiov[opt.uio_iovcnt].iov_len = sizeof(enforce_statfs);
 		opt.uio_iovcnt++;
 	}
 
 	tmplen = MAXPATHLEN + MAXHOSTNAMELEN + MAXHOSTNAMELEN;
 #ifdef INET
 	ip4s = (j->version == 0) ? 1 : j->ip4s;
 	if (ip4s > jail_max_af_ips)
 		return (EINVAL);
 	tmplen += ip4s * sizeof(struct in_addr);
 #else
 	if (j->ip4s > 0)
 		return (EINVAL);
 #endif
 #ifdef INET6
 	if (j->ip6s > jail_max_af_ips)
 		return (EINVAL);
 	tmplen += j->ip6s * sizeof(struct in6_addr);
 #else
 	if (j->ip6s > 0)
 		return (EINVAL);
 #endif
 	u_path = malloc(tmplen, M_TEMP, M_WAITOK);
 	u_hostname = u_path + MAXPATHLEN;
 	u_name = u_hostname + MAXHOSTNAMELEN;
 #ifdef INET
 	u_ip4 = (struct in_addr *)(u_name + MAXHOSTNAMELEN);
 #endif
 #ifdef INET6
 #ifdef INET
 	u_ip6 = (struct in6_addr *)(u_ip4 + ip4s);
 #else
 	u_ip6 = (struct in6_addr *)(u_name + MAXHOSTNAMELEN);
 #endif
 #endif
 	optiov[opt.uio_iovcnt].iov_base = "path";
 	optiov[opt.uio_iovcnt].iov_len = sizeof("path");
 	opt.uio_iovcnt++;
 	optiov[opt.uio_iovcnt].iov_base = u_path;
 	error = copyinstr(j->path, u_path, MAXPATHLEN,
 	    &optiov[opt.uio_iovcnt].iov_len);
 	if (error) {
 		free(u_path, M_TEMP);
 		return (error);
 	}
 	opt.uio_iovcnt++;
 	optiov[opt.uio_iovcnt].iov_base = "host.hostname";
 	optiov[opt.uio_iovcnt].iov_len = sizeof("host.hostname");
 	opt.uio_iovcnt++;
 	optiov[opt.uio_iovcnt].iov_base = u_hostname;
 	error = copyinstr(j->hostname, u_hostname, MAXHOSTNAMELEN,
 	    &optiov[opt.uio_iovcnt].iov_len);
 	if (error) {
 		free(u_path, M_TEMP);
 		return (error);
 	}
 	opt.uio_iovcnt++;
 	if (j->jailname != NULL) {
 		optiov[opt.uio_iovcnt].iov_base = "name";
 		optiov[opt.uio_iovcnt].iov_len = sizeof("name");
 		opt.uio_iovcnt++;
 		optiov[opt.uio_iovcnt].iov_base = u_name;
 		error = copyinstr(j->jailname, u_name, MAXHOSTNAMELEN,
 		    &optiov[opt.uio_iovcnt].iov_len);
 		if (error) {
 			free(u_path, M_TEMP);
 			return (error);
 		}
 		opt.uio_iovcnt++;
 	}
 #ifdef INET
 	optiov[opt.uio_iovcnt].iov_base = "ip4.addr";
 	optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr");
 	opt.uio_iovcnt++;
 	optiov[opt.uio_iovcnt].iov_base = u_ip4;
 	optiov[opt.uio_iovcnt].iov_len = ip4s * sizeof(struct in_addr);
 	if (j->version == 0)
 		u_ip4->s_addr = j->ip4s;
 	else {
 		error = copyin(j->ip4, u_ip4, optiov[opt.uio_iovcnt].iov_len);
 		if (error) {
 			free(u_path, M_TEMP);
 			return (error);
 		}
 	}
 	opt.uio_iovcnt++;
 #endif
 #ifdef INET6
 	optiov[opt.uio_iovcnt].iov_base = "ip6.addr";
 	optiov[opt.uio_iovcnt].iov_len = sizeof("ip6.addr");
 	opt.uio_iovcnt++;
 	optiov[opt.uio_iovcnt].iov_base = u_ip6;
 	optiov[opt.uio_iovcnt].iov_len = j->ip6s * sizeof(struct in6_addr);
 	error = copyin(j->ip6, u_ip6, optiov[opt.uio_iovcnt].iov_len);
 	if (error) {
 		free(u_path, M_TEMP);
 		return (error);
 	}
 	opt.uio_iovcnt++;
 #endif
 	KASSERT(opt.uio_iovcnt <= nitems(optiov),
 		("kern_jail: too many iovecs (%d)", opt.uio_iovcnt));
 	error = kern_jail_set(td, &opt, JAIL_CREATE | JAIL_ATTACH);
 	free(u_path, M_TEMP);
 	return (error);
 }
 
 /*
  * struct jail_set_args {
  *	struct iovec *iovp;
  *	unsigned int iovcnt;
  *	int flags;
  * };
  */
 int
 sys_jail_set(struct thread *td, struct jail_set_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	/* Check that we have an even number of iovecs. */
 	if (uap->iovcnt & 1)
 		return (EINVAL);
 
 	error = copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_jail_set(td, auio, uap->flags);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 kern_jail_set(struct thread *td, struct uio *optuio, int flags)
 {
 	struct nameidata nd;
 #ifdef INET
 	struct in_addr *ip4;
 #endif
 #ifdef INET6
 	struct in6_addr *ip6;
 #endif
 	struct vfsopt *opt;
 	struct vfsoptlist *opts;
 	struct prison *pr, *deadpr, *inspr, *mypr, *ppr, *tpr;
 	struct vnode *root;
 	char *domain, *errmsg, *host, *name, *namelc, *p, *path, *uuid;
 	char *g_path, *osrelstr;
 	struct bool_flags *bf;
 	struct jailsys_flags *jsf;
 #if defined(INET) || defined(INET6)
 	struct prison *tppr;
 	void *op;
 #endif
 	unsigned long hid;
 	size_t namelen, onamelen, pnamelen;
 	int born, created, cuflags, descend, drflags, enforce;
 	int error, errmsg_len, errmsg_pos;
 	int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel;
 	int jid, jsys, len, level;
 	int childmax, osreldt, rsnum, slevel;
 #if defined(INET) || defined(INET6)
 	int ii, ij;
 #endif
 #ifdef INET
 	int ip4s, redo_ip4;
 #endif
 #ifdef INET6
 	int ip6s, redo_ip6;
 #endif
 	uint64_t pr_allow, ch_allow, pr_flags, ch_flags;
 	uint64_t pr_allow_diff;
 	unsigned tallow;
 	char numbuf[12];
 
 	error = priv_check(td, PRIV_JAIL_SET);
 	if (!error && (flags & JAIL_ATTACH))
 		error = priv_check(td, PRIV_JAIL_ATTACH);
 	if (error)
 		return (error);
 	mypr = td->td_ucred->cr_prison;
 	if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
 		return (EPERM);
 	if (flags & ~JAIL_SET_MASK)
 		return (EINVAL);
 
 	/*
 	 * Check all the parameters before committing to anything.  Not all
 	 * errors can be caught early, but we may as well try.  Also, this
 	 * takes care of some expensive stuff (path lookup) before getting
 	 * the allprison lock.
 	 *
 	 * XXX Jails are not filesystems, and jail parameters are not mount
 	 *     options.  But it makes more sense to re-use the vfsopt code
 	 *     than duplicate it under a different name.
 	 */
 	error = vfs_buildopts(optuio, &opts);
 	if (error)
 		return (error);
 #ifdef INET
 	ip4 = NULL;
 #endif
 #ifdef INET6
 	ip6 = NULL;
 #endif
 	g_path = NULL;
 
 	cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
 	if (!cuflags) {
 		error = EINVAL;
 		vfs_opterror(opts, "no valid operation (create or update)");
 		goto done_errmsg;
 	}
 
 	error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
 	if (error == ENOENT)
 		jid = 0;
 	else if (error != 0)
 		goto done_free;
 
 	error = vfs_copyopt(opts, "securelevel", &slevel, sizeof(slevel));
 	if (error == ENOENT)
 		gotslevel = 0;
 	else if (error != 0)
 		goto done_free;
 	else
 		gotslevel = 1;
 
 	error =
 	    vfs_copyopt(opts, "children.max", &childmax, sizeof(childmax));
 	if (error == ENOENT)
 		gotchildmax = 0;
 	else if (error != 0)
 		goto done_free;
 	else
 		gotchildmax = 1;
 
 	error = vfs_copyopt(opts, "enforce_statfs", &enforce, sizeof(enforce));
 	if (error == ENOENT)
 		gotenforce = 0;
 	else if (error != 0)
 		goto done_free;
 	else if (enforce < 0 || enforce > 2) {
 		error = EINVAL;
 		goto done_free;
 	} else
 		gotenforce = 1;
 
 	error = vfs_copyopt(opts, "devfs_ruleset", &rsnum, sizeof(rsnum));
 	if (error == ENOENT)
 		gotrsnum = 0;
 	else if (error != 0)
 		goto done_free;
 	else
 		gotrsnum = 1;
 
 	pr_flags = ch_flags = 0;
 	for (bf = pr_flag_bool;
 	     bf < pr_flag_bool + nitems(pr_flag_bool);
 	     bf++) {
 		vfs_flagopt(opts, bf->name, &pr_flags, bf->flag);
 		vfs_flagopt(opts, bf->noname, &ch_flags, bf->flag);
 	}
 	ch_flags |= pr_flags;
 	for (jsf = pr_flag_jailsys;
 	     jsf < pr_flag_jailsys + nitems(pr_flag_jailsys);
 	     jsf++) {
 		error = vfs_copyopt(opts, jsf->name, &jsys, sizeof(jsys));
 		if (error == ENOENT)
 			continue;
 		if (error != 0)
 			goto done_free;
 		switch (jsys) {
 		case JAIL_SYS_DISABLE:
 			if (!jsf->disable) {
 				error = EINVAL;
 				goto done_free;
 			}
 			pr_flags |= jsf->disable;
 			break;
 		case JAIL_SYS_NEW:
 			pr_flags |= jsf->new;
 			break;
 		case JAIL_SYS_INHERIT:
 			break;
 		default:
 			error = EINVAL;
 			goto done_free;
 		}
 		ch_flags |= jsf->new | jsf->disable;
 	}
 	if ((flags & (JAIL_CREATE | JAIL_ATTACH)) == JAIL_CREATE
 	    && !(pr_flags & PR_PERSIST)) {
 		error = EINVAL;
 		vfs_opterror(opts, "new jail must persist or attach");
 		goto done_errmsg;
 	}
 #ifdef VIMAGE
 	if ((flags & JAIL_UPDATE) && (ch_flags & PR_VNET)) {
 		error = EINVAL;
 		vfs_opterror(opts, "vnet cannot be changed after creation");
 		goto done_errmsg;
 	}
 #endif
 #ifdef INET
 	if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP4_USER)) {
 		error = EINVAL;
 		vfs_opterror(opts, "ip4 cannot be changed after creation");
 		goto done_errmsg;
 	}
 #endif
 #ifdef INET6
 	if ((flags & JAIL_UPDATE) && (ch_flags & PR_IP6_USER)) {
 		error = EINVAL;
 		vfs_opterror(opts, "ip6 cannot be changed after creation");
 		goto done_errmsg;
 	}
 #endif
 
 	pr_allow = ch_allow = 0;
 	for (bf = pr_flag_allow;
 	     bf < pr_flag_allow + nitems(pr_flag_allow) &&
 		atomic_load_int(&bf->flag) != 0;
 	     bf++) {
 		vfs_flagopt(opts, bf->name, &pr_allow, bf->flag);
 		vfs_flagopt(opts, bf->noname, &ch_allow, bf->flag);
 	}
 	ch_allow |= pr_allow;
 
 	error = vfs_getopt(opts, "name", (void **)&name, &len);
 	if (error == ENOENT)
 		name = NULL;
 	else if (error != 0)
 		goto done_free;
 	else {
 		if (len == 0 || name[len - 1] != '\0') {
 			error = EINVAL;
 			goto done_free;
 		}
 		if (len > MAXHOSTNAMELEN) {
 			error = ENAMETOOLONG;
 			goto done_free;
 		}
 	}
 
 	error = vfs_getopt(opts, "host.hostname", (void **)&host, &len);
 	if (error == ENOENT)
 		host = NULL;
 	else if (error != 0)
 		goto done_free;
 	else {
 		ch_flags |= PR_HOST;
 		pr_flags |= PR_HOST;
 		if (len == 0 || host[len - 1] != '\0') {
 			error = EINVAL;
 			goto done_free;
 		}
 		if (len > MAXHOSTNAMELEN) {
 			error = ENAMETOOLONG;
 			goto done_free;
 		}
 	}
 
 	error = vfs_getopt(opts, "host.domainname", (void **)&domain, &len);
 	if (error == ENOENT)
 		domain = NULL;
 	else if (error != 0)
 		goto done_free;
 	else {
 		ch_flags |= PR_HOST;
 		pr_flags |= PR_HOST;
 		if (len == 0 || domain[len - 1] != '\0') {
 			error = EINVAL;
 			goto done_free;
 		}
 		if (len > MAXHOSTNAMELEN) {
 			error = ENAMETOOLONG;
 			goto done_free;
 		}
 	}
 
 	error = vfs_getopt(opts, "host.hostuuid", (void **)&uuid, &len);
 	if (error == ENOENT)
 		uuid = NULL;
 	else if (error != 0)
 		goto done_free;
 	else {
 		ch_flags |= PR_HOST;
 		pr_flags |= PR_HOST;
 		if (len == 0 || uuid[len - 1] != '\0') {
 			error = EINVAL;
 			goto done_free;
 		}
 		if (len > HOSTUUIDLEN) {
 			error = ENAMETOOLONG;
 			goto done_free;
 		}
 	}
 
 #ifdef COMPAT_FREEBSD32
 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
 		uint32_t hid32;
 
 		error = vfs_copyopt(opts, "host.hostid", &hid32, sizeof(hid32));
 		hid = hid32;
 	} else
 #endif
 		error = vfs_copyopt(opts, "host.hostid", &hid, sizeof(hid));
 	if (error == ENOENT)
 		gothid = 0;
 	else if (error != 0)
 		goto done_free;
 	else {
 		gothid = 1;
 		ch_flags |= PR_HOST;
 		pr_flags |= PR_HOST;
 	}
 
 #ifdef INET
 	error = vfs_getopt(opts, "ip4.addr", &op, &ip4s);
 	if (error == ENOENT)
 		ip4s = 0;
 	else if (error != 0)
 		goto done_free;
 	else if (ip4s & (sizeof(*ip4) - 1)) {
 		error = EINVAL;
 		goto done_free;
 	} else {
 		ch_flags |= PR_IP4_USER;
 		pr_flags |= PR_IP4_USER;
 		if (ip4s > 0) {
 			ip4s /= sizeof(*ip4);
 			if (ip4s > jail_max_af_ips) {
 				error = EINVAL;
 				vfs_opterror(opts, "too many IPv4 addresses");
 				goto done_errmsg;
 			}
 			ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK);
 			bcopy(op, ip4, ip4s * sizeof(*ip4));
 			/*
 			 * IP addresses are all sorted but ip[0] to preserve
 			 * the primary IP address as given from userland.
 			 * This special IP is used for unbound outgoing
 			 * connections as well for "loopback" traffic in case
 			 * source address selection cannot find any more fitting
 			 * address to connect from.
 			 */
 			if (ip4s > 1)
 				qsort(ip4 + 1, ip4s - 1, sizeof(*ip4),
 				    prison_qcmp_v4);
 			/*
 			 * Check for duplicate addresses and do some simple
 			 * zero and broadcast checks. If users give other bogus
 			 * addresses it is their problem.
 			 *
 			 * We do not have to care about byte order for these
 			 * checks so we will do them in NBO.
 			 */
 			for (ii = 0; ii < ip4s; ii++) {
 				if (ip4[ii].s_addr == INADDR_ANY ||
 				    ip4[ii].s_addr == INADDR_BROADCAST) {
 					error = EINVAL;
 					goto done_free;
 				}
 				if ((ii+1) < ip4s &&
 				    (ip4[0].s_addr == ip4[ii+1].s_addr ||
 				     ip4[ii].s_addr == ip4[ii+1].s_addr)) {
 					error = EINVAL;
 					goto done_free;
 				}
 			}
 		}
 	}
 #endif
 
 #ifdef INET6
 	error = vfs_getopt(opts, "ip6.addr", &op, &ip6s);
 	if (error == ENOENT)
 		ip6s = 0;
 	else if (error != 0)
 		goto done_free;
 	else if (ip6s & (sizeof(*ip6) - 1)) {
 		error = EINVAL;
 		goto done_free;
 	} else {
 		ch_flags |= PR_IP6_USER;
 		pr_flags |= PR_IP6_USER;
 		if (ip6s > 0) {
 			ip6s /= sizeof(*ip6);
 			if (ip6s > jail_max_af_ips) {
 				error = EINVAL;
 				vfs_opterror(opts, "too many IPv6 addresses");
 				goto done_errmsg;
 			}
 			ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK);
 			bcopy(op, ip6, ip6s * sizeof(*ip6));
 			if (ip6s > 1)
 				qsort(ip6 + 1, ip6s - 1, sizeof(*ip6),
 				    prison_qcmp_v6);
 			for (ii = 0; ii < ip6s; ii++) {
 				if (IN6_IS_ADDR_UNSPECIFIED(&ip6[ii])) {
 					error = EINVAL;
 					goto done_free;
 				}
 				if ((ii+1) < ip6s &&
 				    (IN6_ARE_ADDR_EQUAL(&ip6[0], &ip6[ii+1]) ||
 				     IN6_ARE_ADDR_EQUAL(&ip6[ii], &ip6[ii+1])))
 				{
 					error = EINVAL;
 					goto done_free;
 				}
 			}
 		}
 	}
 #endif
 
 #if defined(VIMAGE) && (defined(INET) || defined(INET6))
 	if ((ch_flags & PR_VNET) && (ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
 		error = EINVAL;
 		vfs_opterror(opts,
 		    "vnet jails cannot have IP address restrictions");
 		goto done_errmsg;
 	}
 #endif
 
 	error = vfs_getopt(opts, "osrelease", (void **)&osrelstr, &len);
 	if (error == ENOENT)
 		osrelstr = NULL;
 	else if (error != 0)
 		goto done_free;
 	else {
 		if (flags & JAIL_UPDATE) {
 			error = EINVAL;
 			vfs_opterror(opts,
 			    "osrelease cannot be changed after creation");
 			goto done_errmsg;
 		}
 		if (len == 0 || osrelstr[len - 1] != '\0') {
 			error = EINVAL;
 			goto done_free;
 		}
 		if (len >= OSRELEASELEN) {
 			error = ENAMETOOLONG;
 			vfs_opterror(opts,
 			    "osrelease string must be 1-%d bytes long",
 			    OSRELEASELEN - 1);
 			goto done_errmsg;
 		}
 	}
 
 	error = vfs_copyopt(opts, "osreldate", &osreldt, sizeof(osreldt));
 	if (error == ENOENT)
 		osreldt = 0;
 	else if (error != 0)
 		goto done_free;
 	else {
 		if (flags & JAIL_UPDATE) {
 			error = EINVAL;
 			vfs_opterror(opts,
 			    "osreldate cannot be changed after creation");
 			goto done_errmsg;
 		}
 		if (osreldt == 0) {
 			error = EINVAL;
 			vfs_opterror(opts, "osreldate cannot be 0");
 			goto done_errmsg;
 		}
 	}
 
 	root = NULL;
 	error = vfs_getopt(opts, "path", (void **)&path, &len);
 	if (error == ENOENT)
 		path = NULL;
 	else if (error != 0)
 		goto done_free;
 	else {
 		if (flags & JAIL_UPDATE) {
 			error = EINVAL;
 			vfs_opterror(opts,
 			    "path cannot be changed after creation");
 			goto done_errmsg;
 		}
 		if (len == 0 || path[len - 1] != '\0') {
 			error = EINVAL;
 			goto done_free;
 		}
 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
 		    path, td);
 		error = namei(&nd);
 		if (error)
 			goto done_free;
 		root = nd.ni_vp;
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		g_path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
 		strlcpy(g_path, path, MAXPATHLEN);
 		error = vn_path_to_global_path(td, root, g_path, MAXPATHLEN);
 		if (error == 0) {
 			path = g_path;
 		} else {
 			/* exit on other errors */
 			goto done_free;
 		}
 		if (root->v_type != VDIR) {
 			error = ENOTDIR;
 			vput(root);
 			goto done_free;
 		}
 		VOP_UNLOCK(root);
 	}
 
 	/*
 	 * Find the specified jail, or at least its parent.
 	 * This abuses the file error codes ENOENT and EEXIST.
 	 */
 	pr = NULL;
 	inspr = NULL;
 	if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) {
 		namelc = strrchr(name, '.');
 		jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10);
 		if (*p != '\0')
 			jid = 0;
 	}
 	sx_xlock(&allprison_lock);
 	drflags = PD_LIST_XLOCKED;
 	ppr = mypr;
 	if (!prison_isalive(ppr)) {
 		/* This jail is dying.  This process will surely follow. */
 		error = EAGAIN;
 		goto done_deref;
 	}
 	if (jid != 0) {
 		if (jid < 0) {
 			error = EINVAL;
 			vfs_opterror(opts, "negative jid");
 			goto done_deref;
 		}
 		/*
 		 * See if a requested jid already exists.  Keep track of
 		 * where it can be inserted later.
 		 */
 		TAILQ_FOREACH(inspr, &allprison, pr_list) {
 			if (inspr->pr_id < jid)
 				continue;
 			if (inspr->pr_id > jid)
 				break;
 			pr = inspr;
 			mtx_lock(&pr->pr_mtx);
 			drflags |= PD_LOCKED;
 			inspr = NULL;
 			break;
 		}
 		if (pr != NULL) {
 			/* Create: jid must not exist. */
 			if (cuflags == JAIL_CREATE) {
 				/*
 				 * Even creators that cannot see the jail will
 				 * get EEXIST.
 				 */
 				error = EEXIST;
 				vfs_opterror(opts, "jail %d already exists",
 				    jid);
 				goto done_deref;
 			}
 			if (!prison_ischild(mypr, pr)) {
 				/*
 				 * Updaters get ENOENT if they cannot see the
 				 * jail.  This is true even for CREATE | UPDATE,
 				 * which normally cannot give this error.
 				 */
 				error = ENOENT;
 				vfs_opterror(opts, "jail %d not found", jid);
 				goto done_deref;
 			}
 			ppr = pr->pr_parent;
 			if (!prison_isalive(ppr)) {
 				error = ENOENT;
 				vfs_opterror(opts, "jail %d is dying",
 				    ppr->pr_id);
 				goto done_deref;
 			}
 			if (!prison_isalive(pr)) {
 				if (!(flags & JAIL_DYING)) {
 					error = ENOENT;
 					vfs_opterror(opts, "jail %d is dying",
 					    jid);
 					goto done_deref;
 				}
 				if ((flags & JAIL_ATTACH) ||
 				    (pr_flags & PR_PERSIST)) {
 					/*
 					 * A dying jail might be resurrected
 					 * (via attach or persist), but first
 					 * it must determine if another jail
 					 * has claimed its name.  Accomplish
 					 * this by implicitly re-setting the
 					 * name.
 					 */
 					if (name == NULL)
 						name = prison_name(mypr, pr);
 				}
 			}
 		} else {
 			/* Update: jid must exist. */
 			if (cuflags == JAIL_UPDATE) {
 				error = ENOENT;
 				vfs_opterror(opts, "jail %d not found", jid);
 				goto done_deref;
 			}
 		}
 	}
 	/*
 	 * If the caller provided a name, look for a jail by that name.
 	 * This has different semantics for creates and updates keyed by jid
 	 * (where the name must not already exist in a different jail),
 	 * and updates keyed by the name itself (where the name must exist
 	 * because that is the jail being updated).
 	 */
 	namelc = NULL;
 	if (name != NULL) {
 		namelc = strrchr(name, '.');
 		if (namelc == NULL)
 			namelc = name;
 		else {
 			/*
 			 * This is a hierarchical name.  Split it into the
 			 * parent and child names, and make sure the parent
 			 * exists or matches an already found jail.
 			 */
 			if (pr != NULL) {
 				if (strncmp(name, ppr->pr_name, namelc - name)
 				    || ppr->pr_name[namelc - name] != '\0') {
 					error = EINVAL;
 					vfs_opterror(opts,
 					    "cannot change jail's parent");
 					goto done_deref;
 				}
 			} else {
 				*namelc = '\0';
 				ppr = prison_find_name(mypr, name);
 				if (ppr == NULL) {
 					error = ENOENT;
 					vfs_opterror(opts,
 					    "jail \"%s\" not found", name);
 					goto done_deref;
 				}
 				mtx_unlock(&ppr->pr_mtx);
 				if (!prison_isalive(ppr)) {
 					error = ENOENT;
 					vfs_opterror(opts,
 					    "jail \"%s\" is dying", name);
 					goto done_deref;
 				}
 				*namelc = '.';
 			}
 			namelc++;
 		}
 		if (namelc[0] != '\0') {
 			pnamelen =
 			    (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
 			deadpr = NULL;
 			FOREACH_PRISON_CHILD(ppr, tpr) {
 				if (tpr != pr &&
 				    !strcmp(tpr->pr_name + pnamelen, namelc)) {
 					if (prison_isalive(tpr)) {
 						if (pr == NULL &&
 						    cuflags != JAIL_CREATE) {
 							/*
 							 * Use this jail
 							 * for updates.
 							 */
 							pr = tpr;
 							mtx_lock(&pr->pr_mtx);
 							drflags |= PD_LOCKED;
 							break;
 						}
 						/*
 						 * Create, or update(jid):
 						 * name must not exist in an
 						 * active sibling jail.
 						 */
 						error = EEXIST;
 						vfs_opterror(opts,
 						   "jail \"%s\" already exists",
 						   name);
 						goto done_deref;
 					}
 					if (pr == NULL &&
 					    cuflags != JAIL_CREATE) {
 						deadpr = tpr;
 					}
 				}
 			}
 			/* If no active jail is found, use a dying one. */
 			if (deadpr != NULL && pr == NULL) {
 				if (flags & JAIL_DYING) {
 					pr = deadpr;
 					mtx_lock(&pr->pr_mtx);
 					drflags |= PD_LOCKED;
 				} else if (cuflags == JAIL_UPDATE) {
 					error = ENOENT;
 					vfs_opterror(opts,
 					    "jail \"%s\" is dying", name);
 					goto done_deref;
 				}
 			}
 			/* Update: name must exist if no jid. */
 			else if (cuflags == JAIL_UPDATE && pr == NULL) {
 				error = ENOENT;
 				vfs_opterror(opts, "jail \"%s\" not found",
 				    name);
 				goto done_deref;
 			}
 		}
 	}
 	/* Update: must provide a jid or name. */
 	else if (cuflags == JAIL_UPDATE && pr == NULL) {
 		error = ENOENT;
 		vfs_opterror(opts, "update specified no jail");
 		goto done_deref;
 	}
 
 	/* If there's no prison to update, create a new one and link it in. */
 	created = pr == NULL;
 	if (created) {
 		for (tpr = mypr; tpr != NULL; tpr = tpr->pr_parent)
 			if (tpr->pr_childcount >= tpr->pr_childmax) {
 				error = EPERM;
 				vfs_opterror(opts, "prison limit exceeded");
 				goto done_deref;
 			}
 		if (jid == 0 && (jid = get_next_prid(&inspr)) == 0) {
 			error = EAGAIN;
 			vfs_opterror(opts, "no available jail IDs");
 			goto done_deref;
 		}
 
 		pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
 		pr->pr_state = PRISON_STATE_INVALID;
 		refcount_init(&pr->pr_ref, 1);
 		refcount_init(&pr->pr_uref, 0);
 		drflags |= PD_DEREF;
 		LIST_INIT(&pr->pr_children);
 		mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
 		TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
 
 		pr->pr_id = jid;
 		if (inspr != NULL)
 			TAILQ_INSERT_BEFORE(inspr, pr, pr_list);
 		else
 			TAILQ_INSERT_TAIL(&allprison, pr, pr_list);
 
 		pr->pr_parent = ppr;
 		prison_hold(ppr);
 		prison_proc_hold(ppr);
 		LIST_INSERT_HEAD(&ppr->pr_children, pr, pr_sibling);
 		for (tpr = ppr; tpr != NULL; tpr = tpr->pr_parent)
 			tpr->pr_childcount++;
 
 		/* Set some default values, and inherit some from the parent. */
 		if (namelc == NULL)
 			namelc = "";
 		if (path == NULL) {
 			path = "/";
 			root = mypr->pr_root;
 			vref(root);
 		}
 		strlcpy(pr->pr_hostuuid, DEFAULT_HOSTUUID, HOSTUUIDLEN);
 		pr->pr_flags |= PR_HOST;
 #if defined(INET) || defined(INET6)
 #ifdef VIMAGE
 		if (!(pr_flags & PR_VNET))
 #endif
 		{
 #ifdef INET
 			if (!(ch_flags & PR_IP4_USER))
 				pr->pr_flags |= PR_IP4 | PR_IP4_USER;
 			else if (!(pr_flags & PR_IP4_USER)) {
 				pr->pr_flags |= ppr->pr_flags & PR_IP4;
 				if (ppr->pr_ip4 != NULL) {
 					pr->pr_ip4s = ppr->pr_ip4s;
 					pr->pr_ip4 = malloc(pr->pr_ip4s *
 					    sizeof(struct in_addr), M_PRISON,
 					    M_WAITOK);
 					bcopy(ppr->pr_ip4, pr->pr_ip4,
 					    pr->pr_ip4s * sizeof(*pr->pr_ip4));
 				}
 			}
 #endif
 #ifdef INET6
 			if (!(ch_flags & PR_IP6_USER))
 				pr->pr_flags |= PR_IP6 | PR_IP6_USER;
 			else if (!(pr_flags & PR_IP6_USER)) {
 				pr->pr_flags |= ppr->pr_flags & PR_IP6;
 				if (ppr->pr_ip6 != NULL) {
 					pr->pr_ip6s = ppr->pr_ip6s;
 					pr->pr_ip6 = malloc(pr->pr_ip6s *
 					    sizeof(struct in6_addr), M_PRISON,
 					    M_WAITOK);
 					bcopy(ppr->pr_ip6, pr->pr_ip6,
 					    pr->pr_ip6s * sizeof(*pr->pr_ip6));
 				}
 			}
 #endif
 		}
 #endif
 		/* Source address selection is always on by default. */
 		pr->pr_flags |= _PR_IP_SADDRSEL;
 
 		pr->pr_securelevel = ppr->pr_securelevel;
 		pr->pr_allow = JAIL_DEFAULT_ALLOW & ppr->pr_allow;
 		pr->pr_enforce_statfs = jail_default_enforce_statfs;
 		pr->pr_devfs_rsnum = ppr->pr_devfs_rsnum;
 
 		pr->pr_osreldate = osreldt ? osreldt : ppr->pr_osreldate;
 		if (osrelstr == NULL)
 			strlcpy(pr->pr_osrelease, ppr->pr_osrelease,
 			    sizeof(pr->pr_osrelease));
 		else
 			strlcpy(pr->pr_osrelease, osrelstr,
 			    sizeof(pr->pr_osrelease));
 
 #ifdef VIMAGE
 		/* Allocate a new vnet if specified. */
 		pr->pr_vnet = (pr_flags & PR_VNET)
 		    ? vnet_alloc() : ppr->pr_vnet;
 #endif
 		/*
 		 * Allocate a dedicated cpuset for each jail.
 		 * Unlike other initial settings, this may return an erorr.
 		 */
 		error = cpuset_create_root(ppr, &pr->pr_cpuset);
 		if (error)
 			goto done_deref;
 
 		mtx_lock(&pr->pr_mtx);
 		drflags |= PD_LOCKED;
 	} else {
 		/*
 		 * Grab a reference for existing prisons, to ensure they
 		 * continue to exist for the duration of the call.
 		 */
 		prison_hold(pr);
 		drflags |= PD_DEREF;
 #if defined(VIMAGE) && (defined(INET) || defined(INET6))
 		if ((pr->pr_flags & PR_VNET) &&
 		    (ch_flags & (PR_IP4_USER | PR_IP6_USER))) {
 			error = EINVAL;
 			vfs_opterror(opts,
 			    "vnet jails cannot have IP address restrictions");
 			goto done_deref;
 		}
 #endif
 #ifdef INET
 		if (PR_IP4_USER & ch_flags & (pr_flags ^ pr->pr_flags)) {
 			error = EINVAL;
 			vfs_opterror(opts,
 			    "ip4 cannot be changed after creation");
 			goto done_deref;
 		}
 #endif
 #ifdef INET6
 		if (PR_IP6_USER & ch_flags & (pr_flags ^ pr->pr_flags)) {
 			error = EINVAL;
 			vfs_opterror(opts,
 			    "ip6 cannot be changed after creation");
 			goto done_deref;
 		}
 #endif
 	}
 
 	/* Do final error checking before setting anything. */
 	if (gotslevel) {
 		if (slevel < ppr->pr_securelevel) {
 			error = EPERM;
 			goto done_deref;
 		}
 	}
 	if (gotchildmax) {
 		if (childmax >= ppr->pr_childmax) {
 			error = EPERM;
 			goto done_deref;
 		}
 	}
 	if (gotenforce) {
 		if (enforce < ppr->pr_enforce_statfs) {
 			error = EPERM;
 			goto done_deref;
 		}
 	}
 	if (gotrsnum) {
 		/*
 		 * devfs_rsnum is a uint16_t
 		 */
 		if (rsnum < 0 || rsnum > 65535) {
 			error = EINVAL;
 			goto done_deref;
 		}
 		/*
 		 * Nested jails always inherit parent's devfs ruleset
 		 */
 		if (jailed(td->td_ucred)) {
 			if (rsnum > 0 && rsnum != ppr->pr_devfs_rsnum) {
 				error = EPERM;
 				goto done_deref;
 			} else
 				rsnum = ppr->pr_devfs_rsnum;
 		}
 	}
 #ifdef INET
 	if (ip4s > 0) {
 		if (ppr->pr_flags & PR_IP4) {
 			/*
 			 * Make sure the new set of IP addresses is a
 			 * subset of the parent's list.  Don't worry
 			 * about the parent being unlocked, as any
 			 * setting is done with allprison_lock held.
 			 */
 			for (ij = 0; ij < ppr->pr_ip4s; ij++)
 				if (ip4[0].s_addr == ppr->pr_ip4[ij].s_addr)
 					break;
 			if (ij == ppr->pr_ip4s) {
 				error = EPERM;
 				goto done_deref;
 			}
 			if (ip4s > 1) {
 				for (ii = ij = 1; ii < ip4s; ii++) {
 					if (ip4[ii].s_addr ==
 					    ppr->pr_ip4[0].s_addr)
 						continue;
 					for (; ij < ppr->pr_ip4s; ij++)
 						if (ip4[ii].s_addr ==
 						    ppr->pr_ip4[ij].s_addr)
 							break;
 					if (ij == ppr->pr_ip4s)
 						break;
 				}
 				if (ij == ppr->pr_ip4s) {
 					error = EPERM;
 					goto done_deref;
 				}
 			}
 		}
 		/*
 		 * Check for conflicting IP addresses.  We permit them
 		 * if there is no more than one IP on each jail.  If
 		 * there is a duplicate on a jail with more than one
 		 * IP stop checking and return error.
 		 */
 #ifdef VIMAGE
 		for (tppr = ppr; tppr != &prison0; tppr = tppr->pr_parent)
 			if (tppr->pr_flags & PR_VNET)
 				break;
 #else
 		tppr = &prison0;
 #endif
 		FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) {
 			if (tpr == pr ||
 #ifdef VIMAGE
 			    (tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
 #endif
 			    !prison_isalive(tpr)) {
 				descend = 0;
 				continue;
 			}
 			if (!(tpr->pr_flags & PR_IP4_USER))
 				continue;
 			descend = 0;
 			if (tpr->pr_ip4 == NULL ||
 			    (ip4s == 1 && tpr->pr_ip4s == 1))
 				continue;
 			for (ii = 0; ii < ip4s; ii++) {
 				if (prison_check_ip4_locked(tpr, &ip4[ii]) ==
 				    0) {
 					error = EADDRINUSE;
 					vfs_opterror(opts,
 					    "IPv4 addresses clash");
 					goto done_deref;
 				}
 			}
 		}
 	}
 #endif
 #ifdef INET6
 	if (ip6s > 0) {
 		if (ppr->pr_flags & PR_IP6) {
 			/*
 			 * Make sure the new set of IP addresses is a
 			 * subset of the parent's list.
 			 */
 			for (ij = 0; ij < ppr->pr_ip6s; ij++)
 				if (IN6_ARE_ADDR_EQUAL(&ip6[0],
 				    &ppr->pr_ip6[ij]))
 					break;
 			if (ij == ppr->pr_ip6s) {
 				error = EPERM;
 				goto done_deref;
 			}
 			if (ip6s > 1) {
 				for (ii = ij = 1; ii < ip6s; ii++) {
 					if (IN6_ARE_ADDR_EQUAL(&ip6[ii],
 					     &ppr->pr_ip6[0]))
 						continue;
 					for (; ij < ppr->pr_ip6s; ij++)
 						if (IN6_ARE_ADDR_EQUAL(
 						    &ip6[ii], &ppr->pr_ip6[ij]))
 							break;
 					if (ij == ppr->pr_ip6s)
 						break;
 				}
 				if (ij == ppr->pr_ip6s) {
 					error = EPERM;
 					goto done_deref;
 				}
 			}
 		}
 		/* Check for conflicting IP addresses. */
 #ifdef VIMAGE
 		for (tppr = ppr; tppr != &prison0; tppr = tppr->pr_parent)
 			if (tppr->pr_flags & PR_VNET)
 				break;
 #else
 		tppr = &prison0;
 #endif
 		FOREACH_PRISON_DESCENDANT(tppr, tpr, descend) {
 			if (tpr == pr ||
 #ifdef VIMAGE
 			    (tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
 #endif
 			    !prison_isalive(tpr)) {
 				descend = 0;
 				continue;
 			}
 			if (!(tpr->pr_flags & PR_IP6_USER))
 				continue;
 			descend = 0;
 			if (tpr->pr_ip6 == NULL ||
 			    (ip6s == 1 && tpr->pr_ip6s == 1))
 				continue;
 			for (ii = 0; ii < ip6s; ii++) {
 				if (prison_check_ip6_locked(tpr, &ip6[ii]) ==
 				    0) {
 					error = EADDRINUSE;
 					vfs_opterror(opts,
 					    "IPv6 addresses clash");
 					goto done_deref;
 				}
 			}
 		}
 	}
 #endif
 	onamelen = namelen = 0;
 	if (namelc != NULL) {
 		/* Give a default name of the jid.  Also allow the name to be
 		 * explicitly the jid - but not any other number, and only in
 		 * normal form (no leading zero/etc).
 		 */
 		if (namelc[0] == '\0')
 			snprintf(namelc = numbuf, sizeof(numbuf), "%d", jid);
 		else if ((strtoul(namelc, &p, 10) != jid ||
 			  namelc[0] < '1' || namelc[0] > '9') && *p == '\0') {
 			error = EINVAL;
 			vfs_opterror(opts,
 			    "name cannot be numeric (unless it is the jid)");
 			goto done_deref;
 		}
 		/*
 		 * Make sure the name isn't too long for the prison or its
 		 * children.
 		 */
 		pnamelen = (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
 		onamelen = strlen(pr->pr_name + pnamelen);
 		namelen = strlen(namelc);
 		if (pnamelen + namelen + 1 > sizeof(pr->pr_name)) {
 			error = ENAMETOOLONG;
 			goto done_deref;
 		}
 		FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
 			if (strlen(tpr->pr_name) + (namelen - onamelen) >=
 			    sizeof(pr->pr_name)) {
 				error = ENAMETOOLONG;
 				goto done_deref;
 			}
 		}
 	}
 	pr_allow_diff = pr_allow & ~ppr->pr_allow;
 	if (pr_allow_diff & ~PR_ALLOW_DIFFERENCES) {
 		error = EPERM;
 		goto done_deref;
 	}
 
 	/*
 	 * Let modules check their parameters.  This requires unlocking and
 	 * then re-locking the prison, but this is still a valid state as long
 	 * as allprison_lock remains xlocked.
 	 */
 	mtx_unlock(&pr->pr_mtx);
 	drflags &= ~PD_LOCKED;
 	error = osd_jail_call(pr, PR_METHOD_CHECK, opts);
 	if (error != 0)
 		goto done_deref;
 	mtx_lock(&pr->pr_mtx);
 	drflags |= PD_LOCKED;
 
 	/* At this point, all valid parameters should have been noted. */
 	TAILQ_FOREACH(opt, opts, link) {
 		if (!opt->seen && strcmp(opt->name, "errmsg")) {
 			error = EINVAL;
 			vfs_opterror(opts, "unknown parameter: %s", opt->name);
 			goto done_deref;
 		}
 	}
 
 	/* Set the parameters of the prison. */
 #ifdef INET
 	redo_ip4 = 0;
 	if (pr_flags & PR_IP4_USER) {
 		pr->pr_flags |= PR_IP4;
 		free(pr->pr_ip4, M_PRISON);
 		pr->pr_ip4s = ip4s;
 		pr->pr_ip4 = ip4;
 		ip4 = NULL;
 		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 #ifdef VIMAGE
 			if (tpr->pr_flags & PR_VNET) {
 				descend = 0;
 				continue;
 			}
 #endif
 			if (prison_restrict_ip4(tpr, NULL)) {
 				redo_ip4 = 1;
 				descend = 0;
 			}
 		}
 	}
 #endif
 #ifdef INET6
 	redo_ip6 = 0;
 	if (pr_flags & PR_IP6_USER) {
 		pr->pr_flags |= PR_IP6;
 		free(pr->pr_ip6, M_PRISON);
 		pr->pr_ip6s = ip6s;
 		pr->pr_ip6 = ip6;
 		ip6 = NULL;
 		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 #ifdef VIMAGE
 			if (tpr->pr_flags & PR_VNET) {
 				descend = 0;
 				continue;
 			}
 #endif
 			if (prison_restrict_ip6(tpr, NULL)) {
 				redo_ip6 = 1;
 				descend = 0;
 			}
 		}
 	}
 #endif
 	if (gotslevel) {
 		pr->pr_securelevel = slevel;
 		/* Set all child jails to be at least this level. */
 		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
 			if (tpr->pr_securelevel < slevel)
 				tpr->pr_securelevel = slevel;
 	}
 	if (gotchildmax) {
 		pr->pr_childmax = childmax;
 		/* Set all child jails to under this limit. */
 		FOREACH_PRISON_DESCENDANT_LOCKED_LEVEL(pr, tpr, descend, level)
 			if (tpr->pr_childmax > childmax - level)
 				tpr->pr_childmax = childmax > level
 				    ? childmax - level : 0;
 	}
 	if (gotenforce) {
 		pr->pr_enforce_statfs = enforce;
 		/* Pass this restriction on to the children. */
 		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
 			if (tpr->pr_enforce_statfs < enforce)
 				tpr->pr_enforce_statfs = enforce;
 	}
 	if (gotrsnum) {
 		pr->pr_devfs_rsnum = rsnum;
 		/* Pass this restriction on to the children. */
 		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
 			tpr->pr_devfs_rsnum = rsnum;
 	}
 	if (namelc != NULL) {
 		if (ppr == &prison0)
 			strlcpy(pr->pr_name, namelc, sizeof(pr->pr_name));
 		else
 			snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s",
 			    ppr->pr_name, namelc);
 		/* Change this component of child names. */
 		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 			bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen,
 			    strlen(tpr->pr_name + onamelen) + 1);
 			bcopy(pr->pr_name, tpr->pr_name, namelen);
 		}
 	}
 	if (path != NULL) {
 		/* Try to keep a real-rooted full pathname. */
 		strlcpy(pr->pr_path, path, sizeof(pr->pr_path));
 		pr->pr_root = root;
 		root = NULL;
 	}
 	if (PR_HOST & ch_flags & ~pr_flags) {
 		if (pr->pr_flags & PR_HOST) {
 			/*
 			 * Copy the parent's host info.  As with pr_ip4 above,
 			 * the lack of a lock on the parent is not a problem;
 			 * it is always set with allprison_lock at least
 			 * shared, and is held exclusively here.
 			 */
 			strlcpy(pr->pr_hostname, pr->pr_parent->pr_hostname,
 			    sizeof(pr->pr_hostname));
 			strlcpy(pr->pr_domainname, pr->pr_parent->pr_domainname,
 			    sizeof(pr->pr_domainname));
 			strlcpy(pr->pr_hostuuid, pr->pr_parent->pr_hostuuid,
 			    sizeof(pr->pr_hostuuid));
 			pr->pr_hostid = pr->pr_parent->pr_hostid;
 		}
 	} else if (host != NULL || domain != NULL || uuid != NULL || gothid) {
 		/* Set this prison, and any descendants without PR_HOST. */
 		if (host != NULL)
 			strlcpy(pr->pr_hostname, host, sizeof(pr->pr_hostname));
 		if (domain != NULL)
 			strlcpy(pr->pr_domainname, domain, 
 			    sizeof(pr->pr_domainname));
 		if (uuid != NULL)
 			strlcpy(pr->pr_hostuuid, uuid, sizeof(pr->pr_hostuuid));
 		if (gothid)
 			pr->pr_hostid = hid;
 		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 			if (tpr->pr_flags & PR_HOST)
 				descend = 0;
 			else {
 				if (host != NULL)
 					strlcpy(tpr->pr_hostname,
 					    pr->pr_hostname,
 					    sizeof(tpr->pr_hostname));
 				if (domain != NULL)
 					strlcpy(tpr->pr_domainname, 
 					    pr->pr_domainname,
 					    sizeof(tpr->pr_domainname));
 				if (uuid != NULL)
 					strlcpy(tpr->pr_hostuuid,
 					    pr->pr_hostuuid,
 					    sizeof(tpr->pr_hostuuid));
 				if (gothid)
 					tpr->pr_hostid = hid;
 			}
 		}
 	}
 	pr->pr_allow = (pr->pr_allow & ~ch_allow) | pr_allow;
 	if ((tallow = ch_allow & ~pr_allow))
 		prison_set_allow_locked(pr, tallow, 0);
 	/*
 	 * Persistent prisons get an extra reference, and prisons losing their
 	 * persist flag lose that reference.
 	 */
 	born = !prison_isalive(pr);
 	if (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags)) {
 		if (pr_flags & PR_PERSIST) {
 			prison_hold(pr);
 			/*
 			 * This may make a dead prison alive again, but wait
 			 * to label it as such until after OSD calls have had
 			 * a chance to run (and perhaps to fail).
 			 */
 			refcount_acquire(&pr->pr_uref);
 		} else {
 			drflags |= PD_DEUREF;
 			prison_free_not_last(pr);
 		}
 	}
 	pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags;
 	mtx_unlock(&pr->pr_mtx);
 	drflags &= ~PD_LOCKED;
 	/*
 	 * Any errors past this point will need to de-persist newly created
 	 * prisons, as well as call remove methods.
 	 */
 	if (born)
 		drflags |= PD_KILL;
 
 #ifdef RACCT
 	if (racct_enable && created)
 		prison_racct_attach(pr);
 #endif
 
 	/* Locks may have prevented a complete restriction of child IP
 	 * addresses.  If so, allocate some more memory and try again.
 	 */
 #ifdef INET
 	while (redo_ip4) {
 		ip4s = pr->pr_ip4s;
 		ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK);
 		mtx_lock(&pr->pr_mtx);
 		redo_ip4 = 0;
 		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 #ifdef VIMAGE
 			if (tpr->pr_flags & PR_VNET) {
 				descend = 0;
 				continue;
 			}
 #endif
 			if (prison_restrict_ip4(tpr, ip4)) {
 				if (ip4 != NULL)
 					ip4 = NULL;
 				else
 					redo_ip4 = 1;
 			}
 		}
 		mtx_unlock(&pr->pr_mtx);
 	}
 #endif
 #ifdef INET6
 	while (redo_ip6) {
 		ip6s = pr->pr_ip6s;
 		ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK);
 		mtx_lock(&pr->pr_mtx);
 		redo_ip6 = 0;
 		FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
 #ifdef VIMAGE
 			if (tpr->pr_flags & PR_VNET) {
 				descend = 0;
 				continue;
 			}
 #endif
 			if (prison_restrict_ip6(tpr, ip6)) {
 				if (ip6 != NULL)
 					ip6 = NULL;
 				else
 					redo_ip6 = 1;
 			}
 		}
 		mtx_unlock(&pr->pr_mtx);
 	}
 #endif
 
 	/* Let the modules do their work. */
 	if (born) {
 		error = osd_jail_call(pr, PR_METHOD_CREATE, opts);
 		if (error)
 			goto done_deref;
 	}
 	error = osd_jail_call(pr, PR_METHOD_SET, opts);
 	if (error)
 		goto done_deref;
 
 	/*
 	 * A new prison is now ready to be seen; either it has gained a user
 	 * reference via persistence, or is about to gain one via attachment.
 	 */
 	if (born) {
 		drflags = prison_lock_xlock(pr, drflags);
 		pr->pr_state = PRISON_STATE_ALIVE;
 	}
 
 	/* Attach this process to the prison if requested. */
 	if (flags & JAIL_ATTACH) {
 		error = do_jail_attach(td, pr,
 		    prison_lock_xlock(pr, drflags & PD_LOCK_FLAGS));
 		drflags &= ~(PD_LOCKED | PD_LIST_XLOCKED);
 		if (error) {
 			vfs_opterror(opts, "attach failed");
 			goto done_deref;
 		}
 	}
 
 #ifdef RACCT
 	if (racct_enable && !created) {
 		if (drflags & PD_LOCKED) {
 			mtx_unlock(&pr->pr_mtx);
 			drflags &= ~PD_LOCKED;
 		}
 		if (drflags & PD_LIST_XLOCKED) {
 			sx_xunlock(&allprison_lock);
 			drflags &= ~PD_LIST_XLOCKED;
 		}
 		prison_racct_modify(pr);
 	}
 #endif
 
 	drflags &= ~PD_KILL;
 	td->td_retval[0] = pr->pr_id;
 
  done_deref:
 	/* Release any temporary prison holds and/or locks. */
 	if (pr != NULL)
 		prison_deref(pr, drflags);
 	else if (drflags & PD_LIST_SLOCKED)
 		sx_sunlock(&allprison_lock);
 	else if (drflags & PD_LIST_XLOCKED)
 		sx_xunlock(&allprison_lock);
 	if (root != NULL)
 		vrele(root);
  done_errmsg:
 	if (error) {
 		/* Write the error message back to userspace. */
 		if (vfs_getopt(opts, "errmsg", (void **)&errmsg,
 		    &errmsg_len) == 0 && errmsg_len > 0) {
 			errmsg_pos = 2 * vfs_getopt_pos(opts, "errmsg") + 1;
 			if (optuio->uio_segflg == UIO_SYSSPACE)
 				bcopy(errmsg,
 				    optuio->uio_iov[errmsg_pos].iov_base,
 				    errmsg_len);
 			else
 				copyout(errmsg,
 				    optuio->uio_iov[errmsg_pos].iov_base,
 				    errmsg_len);
 		}
 	}
  done_free:
 #ifdef INET
 	free(ip4, M_PRISON);
 #endif
 #ifdef INET6
 	free(ip6, M_PRISON);
 #endif
 	if (g_path != NULL)
 		free(g_path, M_TEMP);
 	vfs_freeopts(opts);
 	return (error);
 }
 
 /*
  * Find the next available prison ID.  Return the ID on success, or zero
  * on failure.  Also set a pointer to the allprison list entry the prison
  * should be inserted before.
  */
 static int
 get_next_prid(struct prison **insprp)
 {
 	struct prison *inspr;
 	int jid, maxid;
 
 	jid = lastprid % JAIL_MAX + 1;
 	if (TAILQ_EMPTY(&allprison) ||
 	    TAILQ_LAST(&allprison, prisonlist)->pr_id < jid) {
 		/*
 		 * A common case is for all jails to be implicitly numbered,
 		 * which means they'll go on the end of the list, at least
 		 * for the first JAIL_MAX times.
 		 */
 		inspr = NULL;
 	} else {
 		/*
 		 * Take two passes through the allprison list: first starting
 		 * with the proposed jid, then ending with it.
 		 */
 		for (maxid = JAIL_MAX; maxid != 0; ) {
 			TAILQ_FOREACH(inspr, &allprison, pr_list) {
 				if (inspr->pr_id < jid)
 					continue;
 				if (inspr->pr_id > jid) {
 					/* Found an opening. */
 					maxid = 0;
 					break;
 				}
 				if (++jid > maxid) {
 					if (lastprid == maxid || lastprid == 0)
 					{
 						/*
 						 * The entire legal range
 						 * has been traversed
 						 */
 						return 0;
 					}
 					/* Try again from the start. */
 					jid = 1;
 					maxid = lastprid;
 					break;
 				}
 			}
 			if (inspr == NULL) {
 				/* Found room at the end of the list. */
 				break;
 			}
 		}
 	}
 	*insprp = inspr;
 	lastprid = jid;
 	return (jid);
 }
 
 /*
  * struct jail_get_args {
  *	struct iovec *iovp;
  *	unsigned int iovcnt;
  *	int flags;
  * };
  */
 int
 sys_jail_get(struct thread *td, struct jail_get_args *uap)
 {
 	struct uio *auio;
 	int error;
 
 	/* Check that we have an even number of iovecs. */
 	if (uap->iovcnt & 1)
 		return (EINVAL);
 
 	error = copyinuio(uap->iovp, uap->iovcnt, &auio);
 	if (error)
 		return (error);
 	error = kern_jail_get(td, auio, uap->flags);
 	if (error == 0)
 		error = copyout(auio->uio_iov, uap->iovp,
 		    uap->iovcnt * sizeof (struct iovec));
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 kern_jail_get(struct thread *td, struct uio *optuio, int flags)
 {
 	struct bool_flags *bf;
 	struct jailsys_flags *jsf;
 	struct prison *pr, *mypr;
 	struct vfsopt *opt;
 	struct vfsoptlist *opts;
 	char *errmsg, *name;
 	int drflags, error, errmsg_len, errmsg_pos, i, jid, len, pos;
 	unsigned f;
 
 	if (flags & ~JAIL_GET_MASK)
 		return (EINVAL);
 
 	/* Get the parameter list. */
 	error = vfs_buildopts(optuio, &opts);
 	if (error)
 		return (error);
 	errmsg_pos = vfs_getopt_pos(opts, "errmsg");
 	mypr = td->td_ucred->cr_prison;
 	pr = NULL;
 
 	/*
 	 * Find the prison specified by one of: lastjid, jid, name.
 	 */
 	sx_slock(&allprison_lock);
 	drflags = PD_LIST_SLOCKED;
 	error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid));
 	if (error == 0) {
 		TAILQ_FOREACH(pr, &allprison, pr_list) {
 			if (pr->pr_id > jid &&
 			    ((flags & JAIL_DYING) || prison_isalive(pr)) &&
 			    prison_ischild(mypr, pr)) {
 				mtx_lock(&pr->pr_mtx);
 				drflags |= PD_LOCKED;
 				goto found_prison;
 			}
 		}
 		error = ENOENT;
 		vfs_opterror(opts, "no jail after %d", jid);
 		goto done;
 	} else if (error != ENOENT)
 		goto done;
 
 	error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
 	if (error == 0) {
 		if (jid != 0) {
 			pr = prison_find_child(mypr, jid);
 			if (pr != NULL) {
 				drflags |= PD_LOCKED;
 				if (!(prison_isalive(pr) ||
 				    (flags & JAIL_DYING))) {
 					error = ENOENT;
 					vfs_opterror(opts, "jail %d is dying",
 					    jid);
 					goto done;
 				}
 				goto found_prison;
 			}
 			error = ENOENT;
 			vfs_opterror(opts, "jail %d not found", jid);
 			goto done;
 		}
 	} else if (error != ENOENT)
 		goto done;
 
 	error = vfs_getopt(opts, "name", (void **)&name, &len);
 	if (error == 0) {
 		if (len == 0 || name[len - 1] != '\0') {
 			error = EINVAL;
 			goto done;
 		}
 		pr = prison_find_name(mypr, name);
 		if (pr != NULL) {
 			drflags |= PD_LOCKED;
 			if (!(prison_isalive(pr) || (flags & JAIL_DYING))) {
 				error = ENOENT;
 				vfs_opterror(opts, "jail \"%s\" is dying",
 				    name);
 				goto done;
 			}
 			goto found_prison;
 		}
 		error = ENOENT;
 		vfs_opterror(opts, "jail \"%s\" not found", name);
 		goto done;
 	} else if (error != ENOENT)
 		goto done;
 
 	vfs_opterror(opts, "no jail specified");
 	error = ENOENT;
 	goto done;
 
  found_prison:
 	/* Get the parameters of the prison. */
 	prison_hold(pr);
 	drflags |= PD_DEREF;
 	td->td_retval[0] = pr->pr_id;
 	error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id));
 	if (error != 0 && error != ENOENT)
 		goto done;
 	i = (pr->pr_parent == mypr) ? 0 : pr->pr_parent->pr_id;
 	error = vfs_setopt(opts, "parent", &i, sizeof(i));
 	if (error != 0 && error != ENOENT)
 		goto done;
 	error = vfs_setopts(opts, "name", prison_name(mypr, pr));
 	if (error != 0 && error != ENOENT)
 		goto done;
 	error = vfs_setopt(opts, "cpuset.id", &pr->pr_cpuset->cs_id,
 	    sizeof(pr->pr_cpuset->cs_id));
 	if (error != 0 && error != ENOENT)
 		goto done;
 	error = vfs_setopts(opts, "path", prison_path(mypr, pr));
 	if (error != 0 && error != ENOENT)
 		goto done;
 #ifdef INET
 	error = vfs_setopt_part(opts, "ip4.addr", pr->pr_ip4,
 	    pr->pr_ip4s * sizeof(*pr->pr_ip4));
 	if (error != 0 && error != ENOENT)
 		goto done;
 #endif
 #ifdef INET6
 	error = vfs_setopt_part(opts, "ip6.addr", pr->pr_ip6,
 	    pr->pr_ip6s * sizeof(*pr->pr_ip6));
 	if (error != 0 && error != ENOENT)
 		goto done;
 #endif
 	error = vfs_setopt(opts, "securelevel", &pr->pr_securelevel,
 	    sizeof(pr->pr_securelevel));
 	if (error != 0 && error != ENOENT)
 		goto done;
 	error = vfs_setopt(opts, "children.cur", &pr->pr_childcount,
 	    sizeof(pr->pr_childcount));
 	if (error != 0 && error != ENOENT)
 		goto done;
 	error = vfs_setopt(opts, "children.max", &pr->pr_childmax,
 	    sizeof(pr->pr_childmax));
 	if (error != 0 && error != ENOENT)
 		goto done;
 	error = vfs_setopts(opts, "host.hostname", pr->pr_hostname);
 	if (error != 0 && error != ENOENT)
 		goto done;
 	error = vfs_setopts(opts, "host.domainname", pr->pr_domainname);
 	if (error != 0 && error != ENOENT)
 		goto done;
 	error = vfs_setopts(opts, "host.hostuuid", pr->pr_hostuuid);
 	if (error != 0 && error != ENOENT)
 		goto done;
 #ifdef COMPAT_FREEBSD32
 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) {
 		uint32_t hid32 = pr->pr_hostid;
 
 		error = vfs_setopt(opts, "host.hostid", &hid32, sizeof(hid32));
 	} else
 #endif
 	error = vfs_setopt(opts, "host.hostid", &pr->pr_hostid,
 	    sizeof(pr->pr_hostid));
 	if (error != 0 && error != ENOENT)
 		goto done;
 	error = vfs_setopt(opts, "enforce_statfs", &pr->pr_enforce_statfs,
 	    sizeof(pr->pr_enforce_statfs));
 	if (error != 0 && error != ENOENT)
 		goto done;
 	error = vfs_setopt(opts, "devfs_ruleset", &pr->pr_devfs_rsnum,
 	    sizeof(pr->pr_devfs_rsnum));
 	if (error != 0 && error != ENOENT)
 		goto done;
 	for (bf = pr_flag_bool;
 	     bf < pr_flag_bool + nitems(pr_flag_bool);
 	     bf++) {
 		i = (pr->pr_flags & bf->flag) ? 1 : 0;
 		error = vfs_setopt(opts, bf->name, &i, sizeof(i));
 		if (error != 0 && error != ENOENT)
 			goto done;
 		i = !i;
 		error = vfs_setopt(opts, bf->noname, &i, sizeof(i));
 		if (error != 0 && error != ENOENT)
 			goto done;
 	}
 	for (jsf = pr_flag_jailsys;
 	     jsf < pr_flag_jailsys + nitems(pr_flag_jailsys);
 	     jsf++) {
 		f = pr->pr_flags & (jsf->disable | jsf->new);
 		i = (f != 0 && f == jsf->disable) ? JAIL_SYS_DISABLE
 		    : (f == jsf->new) ? JAIL_SYS_NEW
 		    : JAIL_SYS_INHERIT;
 		error = vfs_setopt(opts, jsf->name, &i, sizeof(i));
 		if (error != 0 && error != ENOENT)
 			goto done;
 	}
 	for (bf = pr_flag_allow;
 	     bf < pr_flag_allow + nitems(pr_flag_allow) &&
 		atomic_load_int(&bf->flag) != 0;
 	     bf++) {
 		i = (pr->pr_allow & bf->flag) ? 1 : 0;
 		error = vfs_setopt(opts, bf->name, &i, sizeof(i));
 		if (error != 0 && error != ENOENT)
 			goto done;
 		i = !i;
 		error = vfs_setopt(opts, bf->noname, &i, sizeof(i));
 		if (error != 0 && error != ENOENT)
 			goto done;
 	}
 	i = !prison_isalive(pr);
 	error = vfs_setopt(opts, "dying", &i, sizeof(i));
 	if (error != 0 && error != ENOENT)
 		goto done;
 	i = !i;
 	error = vfs_setopt(opts, "nodying", &i, sizeof(i));
 	if (error != 0 && error != ENOENT)
 		goto done;
 	error = vfs_setopt(opts, "osreldate", &pr->pr_osreldate,
 	    sizeof(pr->pr_osreldate));
 	if (error != 0 && error != ENOENT)
 		goto done;
 	error = vfs_setopts(opts, "osrelease", pr->pr_osrelease);
 	if (error != 0 && error != ENOENT)
 		goto done;
 
 	/* Get the module parameters. */
 	mtx_unlock(&pr->pr_mtx);
 	drflags &= ~PD_LOCKED;
 	error = osd_jail_call(pr, PR_METHOD_GET, opts);
 	if (error)
 		goto done;
 	prison_deref(pr, drflags);
 	pr = NULL;
 	drflags = 0;
 
 	/* By now, all parameters should have been noted. */
 	TAILQ_FOREACH(opt, opts, link) {
 		if (!opt->seen && strcmp(opt->name, "errmsg")) {
 			error = EINVAL;
 			vfs_opterror(opts, "unknown parameter: %s", opt->name);
 			goto done;
 		}
 	}
 
 	/* Write the fetched parameters back to userspace. */
 	error = 0;
 	TAILQ_FOREACH(opt, opts, link) {
 		if (opt->pos >= 0 && opt->pos != errmsg_pos) {
 			pos = 2 * opt->pos + 1;
 			optuio->uio_iov[pos].iov_len = opt->len;
 			if (opt->value != NULL) {
 				if (optuio->uio_segflg == UIO_SYSSPACE) {
 					bcopy(opt->value,
 					    optuio->uio_iov[pos].iov_base,
 					    opt->len);
 				} else {
 					error = copyout(opt->value,
 					    optuio->uio_iov[pos].iov_base,
 					    opt->len);
 					if (error)
 						break;
 				}
 			}
 		}
 	}
 
  done:
 	/* Release any temporary prison holds and/or locks. */
 	if (pr != NULL)
 		prison_deref(pr, drflags);
 	else if (drflags & PD_LIST_SLOCKED)
 		sx_sunlock(&allprison_lock);
 	if (error && errmsg_pos >= 0) {
 		/* Write the error message back to userspace. */
 		vfs_getopt(opts, "errmsg", (void **)&errmsg, &errmsg_len);
 		errmsg_pos = 2 * errmsg_pos + 1;
 		if (errmsg_len > 0) {
 			if (optuio->uio_segflg == UIO_SYSSPACE)
 				bcopy(errmsg,
 				    optuio->uio_iov[errmsg_pos].iov_base,
 				    errmsg_len);
 			else
 				copyout(errmsg,
 				    optuio->uio_iov[errmsg_pos].iov_base,
 				    errmsg_len);
 		}
 	}
 	vfs_freeopts(opts);
 	return (error);
 }
 
 /*
  * struct jail_remove_args {
  *	int jid;
  * };
  */
 int
 sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
 {
 	struct prison *pr;
 	int error;
 
 	error = priv_check(td, PRIV_JAIL_REMOVE);
 	if (error)
 		return (error);
 
 	sx_xlock(&allprison_lock);
 	pr = prison_find_child(td->td_ucred->cr_prison, uap->jid);
 	if (pr == NULL) {
 		sx_xunlock(&allprison_lock);
 		return (EINVAL);
 	}
 	if (!prison_isalive(pr)) {
 		/* Silently ignore already-dying prisons. */
 		mtx_unlock(&pr->pr_mtx);
 		sx_xunlock(&allprison_lock);
 		return (0);
 	}
 	prison_deref(pr, PD_KILL | PD_LOCKED | PD_LIST_XLOCKED);
 	return (0);
 }
 
 /*
  * struct jail_attach_args {
  *	int jid;
  * };
  */
 int
 sys_jail_attach(struct thread *td, struct jail_attach_args *uap)
 {
 	struct prison *pr;
 	int error;
 
 	error = priv_check(td, PRIV_JAIL_ATTACH);
 	if (error)
 		return (error);
 
 	sx_slock(&allprison_lock);
 	pr = prison_find_child(td->td_ucred->cr_prison, uap->jid);
 	if (pr == NULL) {
 		sx_sunlock(&allprison_lock);
 		return (EINVAL);
 	}
 
 	/* Do not allow a process to attach to a prison that is not alive. */
 	if (!prison_isalive(pr)) {
 		mtx_unlock(&pr->pr_mtx);
 		sx_sunlock(&allprison_lock);
 		return (EINVAL);
 	}
 
 	return (do_jail_attach(td, pr, PD_LOCKED | PD_LIST_SLOCKED));
 }
 
 static int
 do_jail_attach(struct thread *td, struct prison *pr, int drflags)
 {
 	struct proc *p;
 	struct ucred *newcred, *oldcred;
 	int error;
 
 	mtx_assert(&pr->pr_mtx, MA_OWNED);
 	sx_assert(&allprison_lock, SX_LOCKED);
 	drflags &= PD_LOCK_FLAGS;
 	/*
 	 * XXX: Note that there is a slight race here if two threads
 	 * in the same privileged process attempt to attach to two
 	 * different jails at the same time.  It is important for
 	 * user processes not to do this, or they might end up with
 	 * a process root from one prison, but attached to the jail
 	 * of another.
 	 */
 	prison_hold(pr);
 	refcount_acquire(&pr->pr_uref);
 	drflags |= PD_DEREF | PD_DEUREF;
 	mtx_unlock(&pr->pr_mtx);
 	drflags &= ~PD_LOCKED;
 
 	/* Let modules do whatever they need to prepare for attaching. */
 	error = osd_jail_call(pr, PR_METHOD_ATTACH, td);
 	if (error) {
 		prison_deref(pr, drflags);
 		return (error);
 	}
 	sx_unlock(&allprison_lock);
 	drflags &= ~(PD_LIST_SLOCKED | PD_LIST_XLOCKED);
 
 	/*
 	 * Reparent the newly attached process to this jail.
 	 */
 	p = td->td_proc;
 	error = cpuset_setproc_update_set(p, pr->pr_cpuset);
 	if (error)
 		goto e_revert_osd;
 
 	vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY);
 	if ((error = change_dir(pr->pr_root, td)) != 0)
 		goto e_unlock;
 #ifdef MAC
 	if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root)))
 		goto e_unlock;
 #endif
 	VOP_UNLOCK(pr->pr_root);
 	if ((error = pwd_chroot_chdir(td, pr->pr_root)))
 		goto e_revert_osd;
 
 	newcred = crget();
 	PROC_LOCK(p);
 	oldcred = crcopysafe(p, newcred);
 	newcred->cr_prison = pr;
 	proc_set_cred(p, newcred);
 	setsugid(p);
 #ifdef RACCT
 	racct_proc_ucred_changed(p, oldcred, newcred);
 	crhold(newcred);
 #endif
 	PROC_UNLOCK(p);
 #ifdef RCTL
 	rctl_proc_ucred_changed(p, newcred);
 	crfree(newcred);
 #endif
 	prison_deref(oldcred->cr_prison, drflags);
 	crfree(oldcred);
 
 	/*
 	 * If the prison was killed while changing credentials, die along
 	 * with it.
 	 */
 	if (!prison_isalive(pr)) {
 		PROC_LOCK(p);
 		kern_psignal(p, SIGKILL);
 		PROC_UNLOCK(p);
 	}
 
 	return (0);
 
  e_unlock:
 	VOP_UNLOCK(pr->pr_root);
  e_revert_osd:
 	/* Tell modules this thread is still in its old jail after all. */
 	sx_slock(&allprison_lock);
 	drflags |= PD_LIST_SLOCKED;
 	(void)osd_jail_call(td->td_ucred->cr_prison, PR_METHOD_ATTACH, td);
 	prison_deref(pr, drflags);
 	return (error);
 }
 
 /*
  * Returns a locked prison instance, or NULL on failure.
  */
 struct prison *
 prison_find(int prid)
 {
 	struct prison *pr;
 
 	sx_assert(&allprison_lock, SX_LOCKED);
 	TAILQ_FOREACH(pr, &allprison, pr_list) {
 		if (pr->pr_id < prid)
 			continue;
 		if (pr->pr_id > prid)
 			break;
 		KASSERT(prison_isvalid(pr), ("Found invalid prison %p", pr));
 		mtx_lock(&pr->pr_mtx);
 		return (pr);
 	}
 	return (NULL);
 }
 
 /*
  * Find a prison that is a descendant of mypr.  Returns a locked prison or NULL.
  */
 struct prison *
 prison_find_child(struct prison *mypr, int prid)
 {
 	struct prison *pr;
 	int descend;
 
 	sx_assert(&allprison_lock, SX_LOCKED);
 	FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
 		if (pr->pr_id == prid) {
 			KASSERT(prison_isvalid(pr),
 			    ("Found invalid prison %p", pr));
 			mtx_lock(&pr->pr_mtx);
 			return (pr);
 		}
 	}
 	return (NULL);
 }
 
 /*
  * Look for the name relative to mypr.  Returns a locked prison or NULL.
  */
 struct prison *
 prison_find_name(struct prison *mypr, const char *name)
 {
 	struct prison *pr, *deadpr;
 	size_t mylen;
 	int descend;
 
 	sx_assert(&allprison_lock, SX_LOCKED);
 	mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1;
 	deadpr = NULL;
 	FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
 		if (!strcmp(pr->pr_name + mylen, name)) {
 			KASSERT(prison_isvalid(pr),
 			    ("Found invalid prison %p", pr));
 			if (prison_isalive(pr)) {
 				mtx_lock(&pr->pr_mtx);
 				return (pr);
 			}
 			deadpr = pr;
 		}
 	}
 	/* There was no valid prison - perhaps there was a dying one. */
 	if (deadpr != NULL)
 		mtx_lock(&deadpr->pr_mtx);
 	return (deadpr);
 }
 
 /*
  * See if a prison has the specific flag set.  The prison should be locked,
  * unless checking for flags that are only set at jail creation (such as
  * PR_IP4 and PR_IP6), or only the single bit is examined, without regard
  * to any other prison data.
  */
 int
 prison_flag(struct ucred *cred, unsigned flag)
 {
 
 	return (cred->cr_prison->pr_flags & flag);
 }
 
 int
 prison_allow(struct ucred *cred, unsigned flag)
 {
 
 	return ((cred->cr_prison->pr_allow & flag) != 0);
 }
 
 /*
  * Hold a prison reference, by incrementing pr_ref.  It is generally
  * an error to hold a prison that does not already have a reference.
  * A prison record will remain valid as long as it has at least one
  * reference, and will not be removed as long as either the prison
  * mutex or the allprison lock is held (allprison_lock may be shared).
  */
 void
 prison_hold_locked(struct prison *pr)
 {
 
 	/* Locking is no longer required. */
 	prison_hold(pr);
 }
 
 void
 prison_hold(struct prison *pr)
 {
 #ifdef INVARIANTS
 	int was_valid = refcount_acquire_if_not_zero(&pr->pr_ref);
 
 	KASSERT(was_valid,
 	    ("Trying to hold dead prison %p (jid=%d).", pr, pr->pr_id));
 #else
 	refcount_acquire(&pr->pr_ref);
 #endif
 }
 
 /*
  * Remove a prison reference.  If that was the last reference, the
  * prison will be removed (at a later time).
  */
 void
 prison_free_locked(struct prison *pr)
 {
 
 	mtx_assert(&pr->pr_mtx, MA_OWNED);
 	/*
 	 * Locking is no longer required, but unlock because the caller
 	 * expects it.
 	 */
 	mtx_unlock(&pr->pr_mtx);
 	prison_free(pr);
 }
 
 void
 prison_free(struct prison *pr)
 {
 
 	KASSERT(refcount_load(&pr->pr_ref) > 0,
 	    ("Trying to free dead prison %p (jid=%d).",
 	     pr, pr->pr_id));
 	if (!refcount_release_if_not_last(&pr->pr_ref)) {
 		/*
 		 * Don't remove the last reference in this context,
 		 * in case there are locks held.
 		 */
 		taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
 	}
 }
 
 static void
 prison_free_not_last(struct prison *pr)
 {
 #ifdef INVARIANTS
 	int lastref;
 
 	KASSERT(refcount_load(&pr->pr_ref) > 0,
 	    ("Trying to free dead prison %p (jid=%d).",
 	     pr, pr->pr_id));
 	lastref = refcount_release(&pr->pr_ref);
 	KASSERT(!lastref,
 	    ("prison_free_not_last freed last ref on prison %p (jid=%d).",
 	     pr, pr->pr_id));
 #else
 	refcount_release(&pr->pr_ref);
 #endif
 }
 
 /*
  * Hold a a prison for user visibility, by incrementing pr_uref.
  * It is generally an error to hold a prison that isn't already
  * user-visible, except through the the jail system calls.  It is also
  * an error to hold an invalid prison.  A prison record will remain
  * alive as long as it has at least one user reference, and will not
  * be set to the dying state until the prison mutex and allprison_lock
  * are both freed.
  */
 void
 prison_proc_hold(struct prison *pr)
 {
 #ifdef INVARIANTS
 	int was_alive = refcount_acquire_if_not_zero(&pr->pr_uref);
 
 	KASSERT(was_alive,
 	    ("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id));
 #else
 	refcount_acquire(&pr->pr_uref);
 #endif
 }
 
 /*
  * Remove a prison user reference.  If it was the last reference, the
  * prison will be considered "dying", and may be removed once all of
  * its references are dropped.
  */
 void
 prison_proc_free(struct prison *pr)
 {
 
 	/*
 	 * Locking is only required when releasing the last reference.
 	 * This allows assurance that a locked prison will remain alive
 	 * until it is unlocked.
 	 */
 	KASSERT(refcount_load(&pr->pr_uref) > 0,
 	    ("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id));
 	if (!refcount_release_if_not_last(&pr->pr_uref)) {
 		/*
 		 * Don't remove the last user reference in this context,
 		 * which is expected to be a process that is not only locked,
 		 * but also half dead.  Add a reference so any calls to
 		 * prison_free() won't re-submit the task.
 		 */
 		prison_hold(pr);
 		mtx_lock(&pr->pr_mtx);
 		KASSERT(!(pr->pr_flags & PR_COMPLETE_PROC),
 		    ("Redundant last reference in prison_proc_free (jid=%d)",
 		     pr->pr_id));
 		pr->pr_flags |= PR_COMPLETE_PROC;
 		mtx_unlock(&pr->pr_mtx);
 		taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
 	}
 }
 
 static void
 prison_proc_free_not_last(struct prison *pr)
 {
 #ifdef INVARIANTS
 	int lastref;
 
 	KASSERT(refcount_load(&pr->pr_uref) > 0,
 	    ("Trying to free dead prison %p (jid=%d).",
 	     pr, pr->pr_id));
 	lastref = refcount_release(&pr->pr_uref);
 	KASSERT(!lastref,
 	    ("prison_proc_free_not_last freed last uref on prison %p (jid=%d).",
 	     pr, pr->pr_id));
 #else
 	refcount_release(&pr->pr_uref);
 #endif
 }
 
 /*
  * Complete a call to either prison_free or prison_proc_free.
  */
 static void
 prison_complete(void *context, int pending)
 {
 	struct prison *pr = context;
 	int drflags;
 
 	/*
 	 * This could be called to release the last reference, or the last
 	 * user reference (plus the reference held in prison_proc_free).
 	 */
 	drflags = prison_lock_xlock(pr, PD_DEREF);
 	if (pr->pr_flags & PR_COMPLETE_PROC) {
 		pr->pr_flags &= ~PR_COMPLETE_PROC;
 		drflags |= PD_DEUREF;
 	}
 	prison_deref(pr, drflags);
 }
 
 /*
  * Remove a prison reference and/or user reference (usually).
  * This assumes context that allows sleeping (for allprison_lock),
  * with no non-sleeping locks held, except perhaps the prison itself.
  * If there are no more references, release and delist the prison.
  * On completion, the prison lock and the allprison lock are both
  * unlocked.
  */
 static void
 prison_deref(struct prison *pr, int flags)
 {
 	struct prisonlist freeprison;
 	struct prison *killpr, *rpr, *ppr, *tpr;
 	struct proc *p;
 
 	killpr = NULL;
 	TAILQ_INIT(&freeprison);
 	/*
 	 * Release this prison as requested, which may cause its parent
 	 * to be released, and then maybe its grandparent, etc.
 	 */
 	for (;;) {
 		if (flags & PD_KILL) {
 			/* Kill the prison and its descendents. */
 			KASSERT(pr != &prison0,
 			    ("prison_deref trying to kill prison0"));
 			if (!(flags & PD_DEREF)) {
 				prison_hold(pr);
 				flags |= PD_DEREF;
 			}
 			flags = prison_lock_xlock(pr, flags);
 			prison_deref_kill(pr, &freeprison);
 		}
 		if (flags & PD_DEUREF) {
 			/* Drop a user reference. */
 			KASSERT(refcount_load(&pr->pr_uref) > 0,
 			    ("prison_deref PD_DEUREF on a dead prison (jid=%d)",
 			     pr->pr_id));
 			if (!refcount_release_if_not_last(&pr->pr_uref)) {
 				if (!(flags & PD_DEREF)) {
 					prison_hold(pr);
 					flags |= PD_DEREF;
 				}
 				flags = prison_lock_xlock(pr, flags);
 				if (refcount_release(&pr->pr_uref) &&
 				    pr->pr_state == PRISON_STATE_ALIVE) {
 					/*
 					 * When the last user references goes,
 					 * this becomes a dying prison.
 					 */
 					KASSERT(
 					    refcount_load(&prison0.pr_uref) > 0,
 					    ("prison0 pr_uref=0"));
 					pr->pr_state = PRISON_STATE_DYING;
 					mtx_unlock(&pr->pr_mtx);
 					flags &= ~PD_LOCKED;
 					(void)osd_jail_call(pr,
 					    PR_METHOD_REMOVE, NULL);
 				}
 			}
 		}
 		if (flags & PD_KILL) {
 			/*
 			 * Any remaining user references are probably processes
 			 * that need to be killed, either in this prison or its
 			 * descendants.
 			 */
 			if (refcount_load(&pr->pr_uref) > 0)
 				killpr = pr;
 			/* Make sure the parent prison doesn't get killed. */
 			flags &= ~PD_KILL;
 		}
 		if (flags & PD_DEREF) {
 			/* Drop a reference. */
 			KASSERT(refcount_load(&pr->pr_ref) > 0,
 			    ("prison_deref PD_DEREF on a dead prison (jid=%d)",
 			     pr->pr_id));
 			if (!refcount_release_if_not_last(&pr->pr_ref)) {
 				flags = prison_lock_xlock(pr, flags);
 				if (refcount_release(&pr->pr_ref)) {
 					/*
 					 * When the last reference goes,
 					 * unlink the prison and set it aside.
 					 */
 					KASSERT(
 					    refcount_load(&pr->pr_uref) == 0,
 					    ("prison_deref: last ref, "
 					     "but still has %d urefs (jid=%d)",
 					     pr->pr_uref, pr->pr_id));
 					KASSERT(
 					    refcount_load(&prison0.pr_ref) != 0,
 					    ("prison0 pr_ref=0"));
 					pr->pr_state = PRISON_STATE_INVALID;
 					TAILQ_REMOVE(&allprison, pr, pr_list);
 					LIST_REMOVE(pr, pr_sibling);
 					TAILQ_INSERT_TAIL(&freeprison, pr,
 					    pr_list);
 					for (ppr = pr->pr_parent;
 					     ppr != NULL;
 					     ppr = ppr->pr_parent)
 						ppr->pr_childcount--;
 					/*
 					 * Removing a prison frees references
 					 * from its parent.
 					 */
 					mtx_unlock(&pr->pr_mtx);
 					flags &= ~PD_LOCKED;
 					pr = pr->pr_parent;
 					flags |= PD_DEREF | PD_DEUREF;
 					continue;
 				}
 			}
 		}
 		break;
 	}
 
 	/* Release all the prison locks. */
 	if (flags & PD_LOCKED)
 		mtx_unlock(&pr->pr_mtx);
 	if (flags & PD_LIST_SLOCKED)
 		sx_sunlock(&allprison_lock);
 	else if (flags & PD_LIST_XLOCKED)
 		sx_xunlock(&allprison_lock);
 
 	/* Kill any processes attached to a killed prison. */
 	if (killpr != NULL) {
 		sx_slock(&allproc_lock);
 		FOREACH_PROC_IN_SYSTEM(p) {
 			PROC_LOCK(p);
 			if (p->p_state != PRS_NEW && p->p_ucred != NULL) {
 				for (ppr = p->p_ucred->cr_prison;
 				     ppr != &prison0;
 				     ppr = ppr->pr_parent)
 					if (ppr == killpr) {
 						kern_psignal(p, SIGKILL);
 						break;
 					}
 			}
 			PROC_UNLOCK(p);
 		}
 		sx_sunlock(&allproc_lock);
 	}
 
 	/*
 	 * Finish removing any unreferenced prisons, which couldn't happen
 	 * while allprison_lock was held (to avoid a LOR on vrele).
 	 */
 	TAILQ_FOREACH_SAFE(rpr, &freeprison, pr_list, tpr) {
 #ifdef VIMAGE
 		if (rpr->pr_vnet != rpr->pr_parent->pr_vnet)
 			vnet_destroy(rpr->pr_vnet);
 #endif
 		if (rpr->pr_root != NULL)
 			vrele(rpr->pr_root);
 		mtx_destroy(&rpr->pr_mtx);
 #ifdef INET
 		free(rpr->pr_ip4, M_PRISON);
 #endif
 #ifdef INET6
 		free(rpr->pr_ip6, M_PRISON);
 #endif
 		if (rpr->pr_cpuset != NULL)
 			cpuset_rel(rpr->pr_cpuset);
 		osd_jail_exit(rpr);
 #ifdef RACCT
 		if (racct_enable)
 			prison_racct_detach(rpr);
 #endif
 		TAILQ_REMOVE(&freeprison, rpr, pr_list);
 		free(rpr, M_PRISON);
 	}
 }
 
 /*
  * Kill the prison and its descendants.  Mark them as dying, clear the
  * persist flag, and call module remove methods.
  */
 static void
 prison_deref_kill(struct prison *pr, struct prisonlist *freeprison)
 {
 	struct prison *cpr, *ppr, *rpr;
 	bool descend;
 
 	/*
 	 * Unlike the descendants, the target prison can be killed
 	 * even if it is currently dying.  This is useful for failed
 	 * creation in jail_set(2).
 	 */
 	KASSERT(refcount_load(&pr->pr_ref) > 0,
 	    ("Trying to kill dead prison %p (jid=%d).",
 	     pr, pr->pr_id));
 	refcount_acquire(&pr->pr_uref);
 	pr->pr_state = PRISON_STATE_DYING;
 	mtx_unlock(&pr->pr_mtx);
 
 	rpr = NULL;
 	FOREACH_PRISON_DESCENDANT_PRE_POST(pr, cpr, descend) {
 		if (descend) {
 			if (!prison_isalive(cpr)) {
 				descend = false;
 				continue;
 			}
 			prison_hold(cpr);
 			prison_proc_hold(cpr);
 			mtx_lock(&cpr->pr_mtx);
 			cpr->pr_state = PRISON_STATE_DYING;
 			cpr->pr_flags |= PR_REMOVE;
 			mtx_unlock(&cpr->pr_mtx);
 			continue;
 		}
 		if (!(cpr->pr_flags & PR_REMOVE))
 			continue;
 		(void)osd_jail_call(cpr, PR_METHOD_REMOVE, NULL);
 		mtx_lock(&cpr->pr_mtx);
 		cpr->pr_flags &= ~PR_REMOVE;
 		if (cpr->pr_flags & PR_PERSIST) {
 			cpr->pr_flags &= ~PR_PERSIST;
 			prison_proc_free_not_last(cpr);
 			prison_free_not_last(cpr);
 		}
 		(void)refcount_release(&cpr->pr_uref);
 		if (refcount_release(&cpr->pr_ref)) {
 			/*
 			 * When the last reference goes, unlink the prison
 			 * and set it aside for prison_deref() to handle.
 			 * Delay unlinking the sibling list to keep the loop
 			 * safe.
 			 */
 			if (rpr != NULL)
 				LIST_REMOVE(rpr, pr_sibling);
 			rpr = cpr;
 			rpr->pr_state = PRISON_STATE_INVALID;
 			TAILQ_REMOVE(&allprison, rpr, pr_list);
 			TAILQ_INSERT_TAIL(freeprison, rpr, pr_list);
 			/*
 			 * Removing a prison frees references from its parent.
 			 */
 			ppr = rpr->pr_parent;
 			prison_proc_free_not_last(ppr);
 			prison_free_not_last(ppr);
 			for (; ppr != NULL; ppr = ppr->pr_parent)
 				ppr->pr_childcount--;
 		}
 		mtx_unlock(&cpr->pr_mtx);
 	}
 	if (rpr != NULL)
 		LIST_REMOVE(rpr, pr_sibling);
 
 	(void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
 	mtx_lock(&pr->pr_mtx);
 	if (pr->pr_flags & PR_PERSIST) {
 		pr->pr_flags &= ~PR_PERSIST;
 		prison_proc_free_not_last(pr);
 		prison_free_not_last(pr);
 	}
 	(void)refcount_release(&pr->pr_uref);
 }
 
 /*
  * Given the current locking state in the flags, make sure allprison_lock
  * is held exclusive, and the prison is locked.  Return flags indicating
  * the new state.
  */
 static int
 prison_lock_xlock(struct prison *pr, int flags)
 {
 
 	if (!(flags & PD_LIST_XLOCKED)) {
 		/*
 		 * Get allprison_lock, which may be an upgrade,
 		 * and may require unlocking the prison.
 		 */
 		if (flags & PD_LOCKED) {
 			mtx_unlock(&pr->pr_mtx);
 			flags &= ~PD_LOCKED;
 		}
 		if (flags & PD_LIST_SLOCKED) {
 			if (!sx_try_upgrade(&allprison_lock)) {
 				sx_sunlock(&allprison_lock);
 				sx_xlock(&allprison_lock);
 			}
 			flags &= ~PD_LIST_SLOCKED;
 		} else
 			sx_xlock(&allprison_lock);
 		flags |= PD_LIST_XLOCKED;
 	}
 	if (!(flags & PD_LOCKED)) {
 		/* Lock the prison mutex. */
 		mtx_lock(&pr->pr_mtx);
 		flags |= PD_LOCKED;
 	}
 	return flags;
 }
 
 /*
  * Set or clear a permission bit in the pr_allow field, passing restrictions
  * (cleared permission) down to child jails.
  */
 void
 prison_set_allow(struct ucred *cred, unsigned flag, int enable)
 {
 	struct prison *pr;
 
 	pr = cred->cr_prison;
 	sx_slock(&allprison_lock);
 	mtx_lock(&pr->pr_mtx);
 	prison_set_allow_locked(pr, flag, enable);
 	mtx_unlock(&pr->pr_mtx);
 	sx_sunlock(&allprison_lock);
 }
 
 static void
 prison_set_allow_locked(struct prison *pr, unsigned flag, int enable)
 {
 	struct prison *cpr;
 	int descend;
 
 	if (enable != 0)
 		pr->pr_allow |= flag;
 	else {
 		pr->pr_allow &= ~flag;
 		FOREACH_PRISON_DESCENDANT_LOCKED(pr, cpr, descend)
 			cpr->pr_allow &= ~flag;
 	}
 }
 
 /*
  * Check if a jail supports the given address family.
  *
  * Returns 0 if not jailed or the address family is supported, EAFNOSUPPORT
  * if not.
  */
 int
 prison_check_af(struct ucred *cred, int af)
 {
 	struct prison *pr;
 	int error;
 
 	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 
 	pr = cred->cr_prison;
 #ifdef VIMAGE
 	/* Prisons with their own network stack are not limited. */
 	if (prison_owns_vnet(cred))
 		return (0);
 #endif
 
 	error = 0;
 	switch (af)
 	{
 #ifdef INET
 	case AF_INET:
 		if (pr->pr_flags & PR_IP4)
 		{
 			mtx_lock(&pr->pr_mtx);
 			if ((pr->pr_flags & PR_IP4) && pr->pr_ip4 == NULL)
 				error = EAFNOSUPPORT;
 			mtx_unlock(&pr->pr_mtx);
 		}
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		if (pr->pr_flags & PR_IP6)
 		{
 			mtx_lock(&pr->pr_mtx);
 			if ((pr->pr_flags & PR_IP6) && pr->pr_ip6 == NULL)
 				error = EAFNOSUPPORT;
 			mtx_unlock(&pr->pr_mtx);
 		}
 		break;
 #endif
 	case AF_LOCAL:
 	case AF_ROUTE:
 		break;
 	default:
 		if (!(pr->pr_allow & PR_ALLOW_SOCKET_AF))
 			error = EAFNOSUPPORT;
 	}
 	return (error);
 }
 
 /*
  * Check if given address belongs to the jail referenced by cred (wrapper to
  * prison_check_ip[46]).
  *
  * Returns 0 if jail doesn't restrict the address family or if address belongs
  * to jail, EADDRNOTAVAIL if the address doesn't belong, or EAFNOSUPPORT if
  * the jail doesn't allow the address family.  IPv4 Address passed in in NBO.
  */
 int
 prison_if(struct ucred *cred, const struct sockaddr *sa)
 {
 #ifdef INET
 	const struct sockaddr_in *sai;
 #endif
 #ifdef INET6
 	const struct sockaddr_in6 *sai6;
 #endif
 	int error;
 
 	KASSERT(cred != NULL, ("%s: cred is NULL", __func__));
 	KASSERT(sa != NULL, ("%s: sa is NULL", __func__));
 
 #ifdef VIMAGE
 	if (prison_owns_vnet(cred))
 		return (0);
 #endif
 
 	error = 0;
 	switch (sa->sa_family)
 	{
 #ifdef INET
 	case AF_INET:
 		sai = (const struct sockaddr_in *)sa;
 		error = prison_check_ip4(cred, &sai->sin_addr);
 		break;
 #endif
 #ifdef INET6
 	case AF_INET6:
 		sai6 = (const struct sockaddr_in6 *)sa;
 		error = prison_check_ip6(cred, &sai6->sin6_addr);
 		break;
 #endif
 	default:
 		if (!(cred->cr_prison->pr_allow & PR_ALLOW_SOCKET_AF))
 			error = EAFNOSUPPORT;
 	}
 	return (error);
 }
 
 /*
  * Return 0 if jails permit p1 to frob p2, otherwise ESRCH.
  */
 int
 prison_check(struct ucred *cred1, struct ucred *cred2)
 {
 
 	return ((cred1->cr_prison == cred2->cr_prison ||
 	    prison_ischild(cred1->cr_prison, cred2->cr_prison)) ? 0 : ESRCH);
 }
 
 /*
  * Return 1 if p2 is a child of p1, otherwise 0.
  */
 int
 prison_ischild(struct prison *pr1, struct prison *pr2)
 {
 
 	for (pr2 = pr2->pr_parent; pr2 != NULL; pr2 = pr2->pr_parent)
 		if (pr1 == pr2)
 			return (1);
 	return (0);
 }
 
 /*
  * Return true if the prison is currently alive.  A prison is alive if it
  * holds user references and it isn't being removed.
  */
 bool
 prison_isalive(struct prison *pr)
 {
 
 	if (__predict_false(pr->pr_state != PRISON_STATE_ALIVE))
 		return (false);
 	return (true);
 }
 
 /*
  * Return true if the prison is currently valid.  A prison is valid if it has
  * been fully created, and is not being destroyed.  Note that dying prisons
  * are still considered valid.  Invalid prisons won't be found under normal
  * circumstances, as they're only put in that state by functions that have
  * an exclusive hold on allprison_lock.
  */
 bool
 prison_isvalid(struct prison *pr)
 {
 
 	if (__predict_false(pr->pr_state == PRISON_STATE_INVALID))
 		return (false);
 	if (__predict_false(refcount_load(&pr->pr_ref) == 0))
 		return (false);
 	return (true);
 }
 
 /*
  * Return 1 if the passed credential is in a jail and that jail does not
  * have its own virtual network stack, otherwise 0.
  */
 int
 jailed_without_vnet(struct ucred *cred)
 {
 
 	if (!jailed(cred))
 		return (0);
 #ifdef VIMAGE
 	if (prison_owns_vnet(cred))
 		return (0);
 #endif
 
 	return (1);
 }
 
 /*
  * Return the correct hostname (domainname, et al) for the passed credential.
  */
 void
 getcredhostname(struct ucred *cred, char *buf, size_t size)
 {
 	struct prison *pr;
 
 	/*
 	 * A NULL credential can be used to shortcut to the physical
 	 * system's hostname.
 	 */
 	pr = (cred != NULL) ? cred->cr_prison : &prison0;
 	mtx_lock(&pr->pr_mtx);
 	strlcpy(buf, pr->pr_hostname, size);
 	mtx_unlock(&pr->pr_mtx);
 }
 
 void
 getcreddomainname(struct ucred *cred, char *buf, size_t size)
 {
 
 	mtx_lock(&cred->cr_prison->pr_mtx);
 	strlcpy(buf, cred->cr_prison->pr_domainname, size);
 	mtx_unlock(&cred->cr_prison->pr_mtx);
 }
 
 void
 getcredhostuuid(struct ucred *cred, char *buf, size_t size)
 {
 
 	mtx_lock(&cred->cr_prison->pr_mtx);
 	strlcpy(buf, cred->cr_prison->pr_hostuuid, size);
 	mtx_unlock(&cred->cr_prison->pr_mtx);
 }
 
 void
 getcredhostid(struct ucred *cred, unsigned long *hostid)
 {
 
 	mtx_lock(&cred->cr_prison->pr_mtx);
 	*hostid = cred->cr_prison->pr_hostid;
 	mtx_unlock(&cred->cr_prison->pr_mtx);
 }
 
 void
 getjailname(struct ucred *cred, char *name, size_t len)
 {
 
 	mtx_lock(&cred->cr_prison->pr_mtx);
 	strlcpy(name, cred->cr_prison->pr_name, len);
 	mtx_unlock(&cred->cr_prison->pr_mtx);
 }
 
 #ifdef VIMAGE
 /*
  * Determine whether the prison represented by cred owns
  * its vnet rather than having it inherited.
  *
  * Returns 1 in case the prison owns the vnet, 0 otherwise.
  */
 int
 prison_owns_vnet(struct ucred *cred)
 {
 
 	/*
 	 * vnets cannot be added/removed after jail creation,
 	 * so no need to lock here.
 	 */
 	return (cred->cr_prison->pr_flags & PR_VNET ? 1 : 0);
 }
 #endif
 
 /*
  * Determine whether the subject represented by cred can "see"
  * status of a mount point.
  * Returns: 0 for permitted, ENOENT otherwise.
  * XXX: This function should be called cr_canseemount() and should be
  *      placed in kern_prot.c.
  */
 int
 prison_canseemount(struct ucred *cred, struct mount *mp)
 {
 	struct prison *pr;
 	struct statfs *sp;
 	size_t len;
 
 	pr = cred->cr_prison;
 	if (pr->pr_enforce_statfs == 0)
 		return (0);
 	if (pr->pr_root->v_mount == mp)
 		return (0);
 	if (pr->pr_enforce_statfs == 2)
 		return (ENOENT);
 	/*
 	 * If jail's chroot directory is set to "/" we should be able to see
 	 * all mount-points from inside a jail.
 	 * This is ugly check, but this is the only situation when jail's
 	 * directory ends with '/'.
 	 */
 	if (strcmp(pr->pr_path, "/") == 0)
 		return (0);
 	len = strlen(pr->pr_path);
 	sp = &mp->mnt_stat;
 	if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0)
 		return (ENOENT);
 	/*
 	 * Be sure that we don't have situation where jail's root directory
 	 * is "/some/path" and mount point is "/some/pathpath".
 	 */
 	if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/')
 		return (ENOENT);
 	return (0);
 }
 
 void
 prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp)
 {
 	char jpath[MAXPATHLEN];
 	struct prison *pr;
 	size_t len;
 
 	pr = cred->cr_prison;
 	if (pr->pr_enforce_statfs == 0)
 		return;
 	if (prison_canseemount(cred, mp) != 0) {
 		bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 		strlcpy(sp->f_mntonname, "[restricted]",
 		    sizeof(sp->f_mntonname));
 		return;
 	}
 	if (pr->pr_root->v_mount == mp) {
 		/*
 		 * Clear current buffer data, so we are sure nothing from
 		 * the valid path left there.
 		 */
 		bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 		*sp->f_mntonname = '/';
 		return;
 	}
 	/*
 	 * If jail's chroot directory is set to "/" we should be able to see
 	 * all mount-points from inside a jail.
 	 */
 	if (strcmp(pr->pr_path, "/") == 0)
 		return;
 	len = strlen(pr->pr_path);
 	strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath));
 	/*
 	 * Clear current buffer data, so we are sure nothing from
 	 * the valid path left there.
 	 */
 	bzero(sp->f_mntonname, sizeof(sp->f_mntonname));
 	if (*jpath == '\0') {
 		/* Should never happen. */
 		*sp->f_mntonname = '/';
 	} else {
 		strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname));
 	}
 }
 
 /*
  * Check with permission for a specific privilege is granted within jail.  We
  * have a specific list of accepted privileges; the rest are denied.
  */
 int
 prison_priv_check(struct ucred *cred, int priv)
 {
 	struct prison *pr;
 	int error;
 
 	/*
 	 * Some policies have custom handlers. This routine should not be
 	 * called for them. See priv_check_cred().
 	 */
 	switch (priv) {
 	case PRIV_VFS_LOOKUP:
 	case PRIV_VFS_GENERATION:
 		KASSERT(0, ("prison_priv_check instead of a custom handler "
 		    "called for %d\n", priv));
 	}
 
 	if (!jailed(cred))
 		return (0);
 
 #ifdef VIMAGE
 	/*
 	 * Privileges specific to prisons with a virtual network stack.
 	 * There might be a duplicate entry here in case the privilege
 	 * is only granted conditionally in the legacy jail case.
 	 */
 	switch (priv) {
 #ifdef notyet
 		/*
 		 * NFS-specific privileges.
 		 */
 	case PRIV_NFS_DAEMON:
 	case PRIV_NFS_LOCKD:
 #endif
 		/*
 		 * Network stack privileges.
 		 */
 	case PRIV_NET_BRIDGE:
 	case PRIV_NET_GRE:
 	case PRIV_NET_BPF:
 	case PRIV_NET_RAW:		/* Dup, cond. in legacy jail case. */
 	case PRIV_NET_ROUTE:
 	case PRIV_NET_TAP:
 	case PRIV_NET_SETIFMTU:
 	case PRIV_NET_SETIFFLAGS:
 	case PRIV_NET_SETIFCAP:
 	case PRIV_NET_SETIFDESCR:
 	case PRIV_NET_SETIFNAME	:
 	case PRIV_NET_SETIFMETRIC:
 	case PRIV_NET_SETIFPHYS:
 	case PRIV_NET_SETIFMAC:
 	case PRIV_NET_SETLANPCP:
 	case PRIV_NET_ADDMULTI:
 	case PRIV_NET_DELMULTI:
 	case PRIV_NET_HWIOCTL:
 	case PRIV_NET_SETLLADDR:
 	case PRIV_NET_ADDIFGROUP:
 	case PRIV_NET_DELIFGROUP:
 	case PRIV_NET_IFCREATE:
 	case PRIV_NET_IFDESTROY:
 	case PRIV_NET_ADDIFADDR:
 	case PRIV_NET_DELIFADDR:
 	case PRIV_NET_LAGG:
 	case PRIV_NET_GIF:
 	case PRIV_NET_SETIFVNET:
 	case PRIV_NET_SETIFFIB:
-	case PRIV_NET_WG:
 
 		/*
 		 * 802.11-related privileges.
 		 */
 	case PRIV_NET80211_VAP_GETKEY:
 	case PRIV_NET80211_VAP_MANAGE:
 
 #ifdef notyet
 		/*
 		 * ATM privileges.
 		 */
 	case PRIV_NETATM_CFG:
 	case PRIV_NETATM_ADD:
 	case PRIV_NETATM_DEL:
 	case PRIV_NETATM_SET:
 
 		/*
 		 * Bluetooth privileges.
 		 */
 	case PRIV_NETBLUETOOTH_RAW:
 #endif
 
 		/*
 		 * Netgraph and netgraph module privileges.
 		 */
 	case PRIV_NETGRAPH_CONTROL:
 #ifdef notyet
 	case PRIV_NETGRAPH_TTY:
 #endif
 
 		/*
 		 * IPv4 and IPv6 privileges.
 		 */
 	case PRIV_NETINET_IPFW:
 	case PRIV_NETINET_DIVERT:
 	case PRIV_NETINET_PF:
 	case PRIV_NETINET_DUMMYNET:
 	case PRIV_NETINET_CARP:
 	case PRIV_NETINET_MROUTE:
 	case PRIV_NETINET_RAW:
 	case PRIV_NETINET_ADDRCTRL6:
 	case PRIV_NETINET_ND6:
 	case PRIV_NETINET_SCOPE6:
 	case PRIV_NETINET_ALIFETIME6:
 	case PRIV_NETINET_IPSEC:
 	case PRIV_NETINET_BINDANY:
 
 #ifdef notyet
 		/*
 		 * NCP privileges.
 		 */
 	case PRIV_NETNCP:
 
 		/*
 		 * SMB privileges.
 		 */
 	case PRIV_NETSMB:
 #endif
 
 	/*
 	 * No default: or deny here.
 	 * In case of no permit fall through to next switch().
 	 */
 		if (cred->cr_prison->pr_flags & PR_VNET)
 			return (0);
 	}
 #endif /* VIMAGE */
 
 	switch (priv) {
 		/*
 		 * Allow ktrace privileges for root in jail.
 		 */
 	case PRIV_KTRACE:
 
 #if 0
 		/*
 		 * Allow jailed processes to configure audit identity and
 		 * submit audit records (login, etc).  In the future we may
 		 * want to further refine the relationship between audit and
 		 * jail.
 		 */
 	case PRIV_AUDIT_GETAUDIT:
 	case PRIV_AUDIT_SETAUDIT:
 	case PRIV_AUDIT_SUBMIT:
 #endif
 
 		/*
 		 * Allow jailed processes to manipulate process UNIX
 		 * credentials in any way they see fit.
 		 */
 	case PRIV_CRED_SETUID:
 	case PRIV_CRED_SETEUID:
 	case PRIV_CRED_SETGID:
 	case PRIV_CRED_SETEGID:
 	case PRIV_CRED_SETGROUPS:
 	case PRIV_CRED_SETREUID:
 	case PRIV_CRED_SETREGID:
 	case PRIV_CRED_SETRESUID:
 	case PRIV_CRED_SETRESGID:
 
 		/*
 		 * Jail implements visibility constraints already, so allow
 		 * jailed root to override uid/gid-based constraints.
 		 */
 	case PRIV_SEEOTHERGIDS:
 	case PRIV_SEEOTHERUIDS:
 
 		/*
 		 * Jail implements inter-process debugging limits already, so
 		 * allow jailed root various debugging privileges.
 		 */
 	case PRIV_DEBUG_DIFFCRED:
 	case PRIV_DEBUG_SUGID:
 	case PRIV_DEBUG_UNPRIV:
 
 		/*
 		 * Allow jail to set various resource limits and login
 		 * properties, and for now, exceed process resource limits.
 		 */
 	case PRIV_PROC_LIMIT:
 	case PRIV_PROC_SETLOGIN:
 	case PRIV_PROC_SETRLIMIT:
 
 		/*
 		 * System V and POSIX IPC privileges are granted in jail.
 		 */
 	case PRIV_IPC_READ:
 	case PRIV_IPC_WRITE:
 	case PRIV_IPC_ADMIN:
 	case PRIV_IPC_MSGSIZE:
 	case PRIV_MQ_ADMIN:
 
 		/*
 		 * Jail operations within a jail work on child jails.
 		 */
 	case PRIV_JAIL_ATTACH:
 	case PRIV_JAIL_SET:
 	case PRIV_JAIL_REMOVE:
 
 		/*
 		 * Jail implements its own inter-process limits, so allow
 		 * root processes in jail to change scheduling on other
 		 * processes in the same jail.  Likewise for signalling.
 		 */
 	case PRIV_SCHED_DIFFCRED:
 	case PRIV_SCHED_CPUSET:
 	case PRIV_SIGNAL_DIFFCRED:
 	case PRIV_SIGNAL_SUGID:
 
 		/*
 		 * Allow jailed processes to write to sysctls marked as jail
 		 * writable.
 		 */
 	case PRIV_SYSCTL_WRITEJAIL:
 
 		/*
 		 * Allow root in jail to manage a variety of quota
 		 * properties.  These should likely be conditional on a
 		 * configuration option.
 		 */
 	case PRIV_VFS_GETQUOTA:
 	case PRIV_VFS_SETQUOTA:
 
 		/*
 		 * Since Jail relies on chroot() to implement file system
 		 * protections, grant many VFS privileges to root in jail.
 		 * Be careful to exclude mount-related and NFS-related
 		 * privileges.
 		 */
 	case PRIV_VFS_READ:
 	case PRIV_VFS_WRITE:
 	case PRIV_VFS_ADMIN:
 	case PRIV_VFS_EXEC:
 	case PRIV_VFS_BLOCKRESERVE:	/* XXXRW: Slightly surprising. */
 	case PRIV_VFS_CHFLAGS_DEV:
 	case PRIV_VFS_CHOWN:
 	case PRIV_VFS_CHROOT:
 	case PRIV_VFS_RETAINSUGID:
 	case PRIV_VFS_FCHROOT:
 	case PRIV_VFS_LINK:
 	case PRIV_VFS_SETGID:
 	case PRIV_VFS_STAT:
 	case PRIV_VFS_STICKYFILE:
 
 		/*
 		 * As in the non-jail case, non-root users are expected to be
 		 * able to read kernel/phyiscal memory (provided /dev/[k]mem
 		 * exists in the jail and they have permission to access it).
 		 */
 	case PRIV_KMEM_READ:
 		return (0);
 
 		/*
 		 * Depending on the global setting, allow privilege of
 		 * setting system flags.
 		 */
 	case PRIV_VFS_SYSFLAGS:
 		if (cred->cr_prison->pr_allow & PR_ALLOW_CHFLAGS)
 			return (0);
 		else
 			return (EPERM);
 
 		/*
 		 * Depending on the global setting, allow privilege of
 		 * mounting/unmounting file systems.
 		 */
 	case PRIV_VFS_MOUNT:
 	case PRIV_VFS_UNMOUNT:
 	case PRIV_VFS_MOUNT_NONUSER:
 	case PRIV_VFS_MOUNT_OWNER:
 		pr = cred->cr_prison;
 		prison_lock(pr);
 		if (pr->pr_allow & PR_ALLOW_MOUNT && pr->pr_enforce_statfs < 2)
 			error = 0;
 		else
 			error = EPERM;
 		prison_unlock(pr);
 		return (error);
 
 		/*
 		 * Jails should hold no disposition on the PRIV_VFS_READ_DIR
 		 * policy.  priv_check_cred will not specifically allow it, and
 		 * we may want a MAC policy to allow it.
 		 */
 	case PRIV_VFS_READ_DIR:
 		return (0);
 
 		/*
 		 * Conditionnaly allow locking (unlocking) physical pages
 		 * in memory.
 		 */
 	case PRIV_VM_MLOCK:
 	case PRIV_VM_MUNLOCK:
 		if (cred->cr_prison->pr_allow & PR_ALLOW_MLOCK)
 			return (0);
 		else
 			return (EPERM);
 
 		/*
 		 * Conditionally allow jailed root to bind reserved ports.
 		 */
 	case PRIV_NETINET_RESERVEDPORT:
 		if (cred->cr_prison->pr_allow & PR_ALLOW_RESERVED_PORTS)
 			return (0);
 		else
 			return (EPERM);
 
 		/*
 		 * Allow jailed root to reuse in-use ports.
 		 */
 	case PRIV_NETINET_REUSEPORT:
 		return (0);
 
 		/*
 		 * Allow jailed root to set certain IPv4/6 (option) headers.
 		 */
 	case PRIV_NETINET_SETHDROPTS:
 		return (0);
 
 		/*
 		 * Conditionally allow creating raw sockets in jail.
 		 */
 	case PRIV_NETINET_RAW:
 		if (cred->cr_prison->pr_allow & PR_ALLOW_RAW_SOCKETS)
 			return (0);
 		else
 			return (EPERM);
 
 		/*
 		 * Since jail implements its own visibility limits on netstat
 		 * sysctls, allow getcred.  This allows identd to work in
 		 * jail.
 		 */
 	case PRIV_NETINET_GETCRED:
 		return (0);
 
 		/*
 		 * Allow jailed root to set loginclass.
 		 */
 	case PRIV_PROC_SETLOGINCLASS:
 		return (0);
 
 		/*
 		 * Do not allow a process inside a jail to read the kernel
 		 * message buffer unless explicitly permitted.
 		 */
 	case PRIV_MSGBUF:
 		if (cred->cr_prison->pr_allow & PR_ALLOW_READ_MSGBUF)
 			return (0);
 		return (EPERM);
 
 	default:
 		/*
 		 * In all remaining cases, deny the privilege request.  This
 		 * includes almost all network privileges, many system
 		 * configuration privileges.
 		 */
 		return (EPERM);
 	}
 }
 
 /*
  * Return the part of pr2's name that is relative to pr1, or the whole name
  * if it does not directly follow.
  */
 
 char *
 prison_name(struct prison *pr1, struct prison *pr2)
 {
 	char *name;
 
 	/* Jails see themselves as "0" (if they see themselves at all). */
 	if (pr1 == pr2)
 		return "0";
 	name = pr2->pr_name;
 	if (prison_ischild(pr1, pr2)) {
 		/*
 		 * pr1 isn't locked (and allprison_lock may not be either)
 		 * so its length can't be counted on.  But the number of dots
 		 * can be counted on - and counted.
 		 */
 		for (; pr1 != &prison0; pr1 = pr1->pr_parent)
 			name = strchr(name, '.') + 1;
 	}
 	return (name);
 }
 
 /*
  * Return the part of pr2's path that is relative to pr1, or the whole path
  * if it does not directly follow.
  */
 static char *
 prison_path(struct prison *pr1, struct prison *pr2)
 {
 	char *path1, *path2;
 	int len1;
 
 	path1 = pr1->pr_path;
 	path2 = pr2->pr_path;
 	if (!strcmp(path1, "/"))
 		return (path2);
 	len1 = strlen(path1);
 	if (strncmp(path1, path2, len1))
 		return (path2);
 	if (path2[len1] == '\0')
 		return "/";
 	if (path2[len1] == '/')
 		return (path2 + len1);
 	return (path2);
 }
 
 /*
  * Jail-related sysctls.
  */
 static SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Jails");
 
 static int
 sysctl_jail_list(SYSCTL_HANDLER_ARGS)
 {
 	struct xprison *xp;
 	struct prison *pr, *cpr;
 #ifdef INET
 	struct in_addr *ip4 = NULL;
 	int ip4s = 0;
 #endif
 #ifdef INET6
 	struct in6_addr *ip6 = NULL;
 	int ip6s = 0;
 #endif
 	int descend, error;
 
 	xp = malloc(sizeof(*xp), M_TEMP, M_WAITOK);
 	pr = req->td->td_ucred->cr_prison;
 	error = 0;
 	sx_slock(&allprison_lock);
 	FOREACH_PRISON_DESCENDANT(pr, cpr, descend) {
 #if defined(INET) || defined(INET6)
  again:
 #endif
 		mtx_lock(&cpr->pr_mtx);
 #ifdef INET
 		if (cpr->pr_ip4s > 0) {
 			if (ip4s < cpr->pr_ip4s) {
 				ip4s = cpr->pr_ip4s;
 				mtx_unlock(&cpr->pr_mtx);
 				ip4 = realloc(ip4, ip4s *
 				    sizeof(struct in_addr), M_TEMP, M_WAITOK);
 				goto again;
 			}
 			bcopy(cpr->pr_ip4, ip4,
 			    cpr->pr_ip4s * sizeof(struct in_addr));
 		}
 #endif
 #ifdef INET6
 		if (cpr->pr_ip6s > 0) {
 			if (ip6s < cpr->pr_ip6s) {
 				ip6s = cpr->pr_ip6s;
 				mtx_unlock(&cpr->pr_mtx);
 				ip6 = realloc(ip6, ip6s *
 				    sizeof(struct in6_addr), M_TEMP, M_WAITOK);
 				goto again;
 			}
 			bcopy(cpr->pr_ip6, ip6,
 			    cpr->pr_ip6s * sizeof(struct in6_addr));
 		}
 #endif
 		bzero(xp, sizeof(*xp));
 		xp->pr_version = XPRISON_VERSION;
 		xp->pr_id = cpr->pr_id;
 		xp->pr_state = cpr->pr_state;
 		strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path));
 		strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host));
 		strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name));
 #ifdef INET
 		xp->pr_ip4s = cpr->pr_ip4s;
 #endif
 #ifdef INET6
 		xp->pr_ip6s = cpr->pr_ip6s;
 #endif
 		mtx_unlock(&cpr->pr_mtx);
 		error = SYSCTL_OUT(req, xp, sizeof(*xp));
 		if (error)
 			break;
 #ifdef INET
 		if (xp->pr_ip4s > 0) {
 			error = SYSCTL_OUT(req, ip4,
 			    xp->pr_ip4s * sizeof(struct in_addr));
 			if (error)
 				break;
 		}
 #endif
 #ifdef INET6
 		if (xp->pr_ip6s > 0) {
 			error = SYSCTL_OUT(req, ip6,
 			    xp->pr_ip6s * sizeof(struct in6_addr));
 			if (error)
 				break;
 		}
 #endif
 	}
 	sx_sunlock(&allprison_lock);
 	free(xp, M_TEMP);
 #ifdef INET
 	free(ip4, M_TEMP);
 #endif
 #ifdef INET6
 	free(ip6, M_TEMP);
 #endif
 	return (error);
 }
 
 SYSCTL_OID(_security_jail, OID_AUTO, list,
     CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
     sysctl_jail_list, "S", "List of active jails");
 
 static int
 sysctl_jail_jailed(SYSCTL_HANDLER_ARGS)
 {
 	int error, injail;
 
 	injail = jailed(req->td->td_ucred);
 	error = SYSCTL_OUT(req, &injail, sizeof(injail));
 
 	return (error);
 }
 
 SYSCTL_PROC(_security_jail, OID_AUTO, jailed,
     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
     sysctl_jail_jailed, "I", "Process in jail?");
 
 static int
 sysctl_jail_vnet(SYSCTL_HANDLER_ARGS)
 {
 	int error, havevnet;
 #ifdef VIMAGE
 	struct ucred *cred = req->td->td_ucred;
 
 	havevnet = jailed(cred) && prison_owns_vnet(cred);
 #else
 	havevnet = 0;
 #endif
 	error = SYSCTL_OUT(req, &havevnet, sizeof(havevnet));
 
 	return (error);
 }
 
 SYSCTL_PROC(_security_jail, OID_AUTO, vnet,
     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
     sysctl_jail_vnet, "I", "Jail owns vnet?");
 
 #if defined(INET) || defined(INET6)
 SYSCTL_UINT(_security_jail, OID_AUTO, jail_max_af_ips, CTLFLAG_RW,
     &jail_max_af_ips, 0,
     "Number of IP addresses a jail may have at most per address family (deprecated)");
 #endif
 
 /*
  * Default parameters for jail(2) compatibility.  For historical reasons,
  * the sysctl names have varying similarity to the parameter names.  Prisons
  * just see their own parameters, and can't change them.
  */
 static int
 sysctl_jail_default_allow(SYSCTL_HANDLER_ARGS)
 {
 	int error, i;
 
 	/* Get the current flag value, and convert it to a boolean. */
 	if (req->td->td_ucred->cr_prison == &prison0) {
 		mtx_lock(&prison0.pr_mtx);
 		i = (jail_default_allow & arg2) != 0;
 		mtx_unlock(&prison0.pr_mtx);
 	} else
 		i = prison_allow(req->td->td_ucred, arg2);
 
 	if (arg1 != NULL)
 		i = !i;
 	error = sysctl_handle_int(oidp, &i, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	i = i ? arg2 : 0;
 	if (arg1 != NULL)
 		i ^= arg2;
 	/*
 	 * The sysctls don't have CTLFLAGS_PRISON, so assume prison0
 	 * for writing.
 	 */
 	mtx_lock(&prison0.pr_mtx);
 	jail_default_allow = (jail_default_allow & ~arg2) | i;
 	mtx_unlock(&prison0.pr_mtx);
 	return (0);
 }
 
 SYSCTL_PROC(_security_jail, OID_AUTO, set_hostname_allowed,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     NULL, PR_ALLOW_SET_HOSTNAME, sysctl_jail_default_allow, "I",
     "Processes in jail can set their hostnames (deprecated)");
 SYSCTL_PROC(_security_jail, OID_AUTO, socket_unixiproute_only,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     (void *)1, PR_ALLOW_SOCKET_AF, sysctl_jail_default_allow, "I",
     "Processes in jail are limited to creating UNIX/IP/route sockets only (deprecated)");
 SYSCTL_PROC(_security_jail, OID_AUTO, sysvipc_allowed,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     NULL, PR_ALLOW_SYSVIPC, sysctl_jail_default_allow, "I",
     "Processes in jail can use System V IPC primitives (deprecated)");
 SYSCTL_PROC(_security_jail, OID_AUTO, allow_raw_sockets,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     NULL, PR_ALLOW_RAW_SOCKETS, sysctl_jail_default_allow, "I",
     "Prison root can create raw sockets (deprecated)");
 SYSCTL_PROC(_security_jail, OID_AUTO, chflags_allowed,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     NULL, PR_ALLOW_CHFLAGS, sysctl_jail_default_allow, "I",
     "Processes in jail can alter system file flags (deprecated)");
 SYSCTL_PROC(_security_jail, OID_AUTO, mount_allowed,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     NULL, PR_ALLOW_MOUNT, sysctl_jail_default_allow, "I",
     "Processes in jail can mount/unmount jail-friendly file systems (deprecated)");
 
 static int
 sysctl_jail_default_level(SYSCTL_HANDLER_ARGS)
 {
 	struct prison *pr;
 	int level, error;
 
 	pr = req->td->td_ucred->cr_prison;
 	level = (pr == &prison0) ? *(int *)arg1 : *(int *)((char *)pr + arg2);
 	error = sysctl_handle_int(oidp, &level, 0, req);
 	if (error || !req->newptr)
 		return (error);
 	*(int *)arg1 = level;
 	return (0);
 }
 
 SYSCTL_PROC(_security_jail, OID_AUTO, enforce_statfs,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
     &jail_default_enforce_statfs, offsetof(struct prison, pr_enforce_statfs),
     sysctl_jail_default_level, "I",
     "Processes in jail cannot see all mounted file systems (deprecated)");
 
 SYSCTL_PROC(_security_jail, OID_AUTO, devfs_ruleset,
     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
     &jail_default_devfs_rsnum, offsetof(struct prison, pr_devfs_rsnum),
     sysctl_jail_default_level, "I",
     "Ruleset for the devfs filesystem in jail (deprecated)");
 
 /*
  * Nodes to describe jail parameters.  Maximum length of string parameters
  * is returned in the string itself, and the other parameters exist merely
  * to make themselves and their types known.
  */
 SYSCTL_NODE(_security_jail, OID_AUTO, param, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "Jail parameters");
 
 int
 sysctl_jail_param(SYSCTL_HANDLER_ARGS)
 {
 	int i;
 	long l;
 	size_t s;
 	char numbuf[12];
 
 	switch (oidp->oid_kind & CTLTYPE)
 	{
 	case CTLTYPE_LONG:
 	case CTLTYPE_ULONG:
 		l = 0;
 #ifdef SCTL_MASK32
 		if (!(req->flags & SCTL_MASK32))
 #endif
 			return (SYSCTL_OUT(req, &l, sizeof(l)));
 	case CTLTYPE_INT:
 	case CTLTYPE_UINT:
 		i = 0;
 		return (SYSCTL_OUT(req, &i, sizeof(i)));
 	case CTLTYPE_STRING:
 		snprintf(numbuf, sizeof(numbuf), "%jd", (intmax_t)arg2);
 		return
 		    (sysctl_handle_string(oidp, numbuf, sizeof(numbuf), req));
 	case CTLTYPE_STRUCT:
 		s = (size_t)arg2;
 		return (SYSCTL_OUT(req, &s, sizeof(s)));
 	}
 	return (0);
 }
 
 /*
  * CTLFLAG_RDTUN in the following indicates jail parameters that can be set at
  * jail creation time but cannot be changed in an existing jail.
  */
 SYSCTL_JAIL_PARAM(, jid, CTLTYPE_INT | CTLFLAG_RDTUN, "I", "Jail ID");
 SYSCTL_JAIL_PARAM(, parent, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail parent ID");
 SYSCTL_JAIL_PARAM_STRING(, name, CTLFLAG_RW, MAXHOSTNAMELEN, "Jail name");
 SYSCTL_JAIL_PARAM_STRING(, path, CTLFLAG_RDTUN, MAXPATHLEN, "Jail root path");
 SYSCTL_JAIL_PARAM(, securelevel, CTLTYPE_INT | CTLFLAG_RW,
     "I", "Jail secure level");
 SYSCTL_JAIL_PARAM(, osreldate, CTLTYPE_INT | CTLFLAG_RDTUN, "I",
     "Jail value for kern.osreldate and uname -K");
 SYSCTL_JAIL_PARAM_STRING(, osrelease, CTLFLAG_RDTUN, OSRELEASELEN,
     "Jail value for kern.osrelease and uname -r");
 SYSCTL_JAIL_PARAM(, enforce_statfs, CTLTYPE_INT | CTLFLAG_RW,
     "I", "Jail cannot see all mounted file systems");
 SYSCTL_JAIL_PARAM(, devfs_ruleset, CTLTYPE_INT | CTLFLAG_RW,
     "I", "Ruleset for in-jail devfs mounts");
 SYSCTL_JAIL_PARAM(, persist, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail persistence");
 #ifdef VIMAGE
 SYSCTL_JAIL_PARAM(, vnet, CTLTYPE_INT | CTLFLAG_RDTUN,
     "E,jailsys", "Virtual network stack");
 #endif
 SYSCTL_JAIL_PARAM(, dying, CTLTYPE_INT | CTLFLAG_RD,
     "B", "Jail is in the process of shutting down");
 
 SYSCTL_JAIL_PARAM_NODE(children, "Number of child jails");
 SYSCTL_JAIL_PARAM(_children, cur, CTLTYPE_INT | CTLFLAG_RD,
     "I", "Current number of child jails");
 SYSCTL_JAIL_PARAM(_children, max, CTLTYPE_INT | CTLFLAG_RW,
     "I", "Maximum number of child jails");
 
 SYSCTL_JAIL_PARAM_SYS_NODE(host, CTLFLAG_RW, "Jail host info");
 SYSCTL_JAIL_PARAM_STRING(_host, hostname, CTLFLAG_RW, MAXHOSTNAMELEN,
     "Jail hostname");
 SYSCTL_JAIL_PARAM_STRING(_host, domainname, CTLFLAG_RW, MAXHOSTNAMELEN,
     "Jail NIS domainname");
 SYSCTL_JAIL_PARAM_STRING(_host, hostuuid, CTLFLAG_RW, HOSTUUIDLEN,
     "Jail host UUID");
 SYSCTL_JAIL_PARAM(_host, hostid, CTLTYPE_ULONG | CTLFLAG_RW,
     "LU", "Jail host ID");
 
 SYSCTL_JAIL_PARAM_NODE(cpuset, "Jail cpuset");
 SYSCTL_JAIL_PARAM(_cpuset, id, CTLTYPE_INT | CTLFLAG_RD, "I", "Jail cpuset ID");
 
 #ifdef INET
 SYSCTL_JAIL_PARAM_SYS_NODE(ip4, CTLFLAG_RDTUN,
     "Jail IPv4 address virtualization");
 SYSCTL_JAIL_PARAM_STRUCT(_ip4, addr, CTLFLAG_RW, sizeof(struct in_addr),
     "S,in_addr,a", "Jail IPv4 addresses");
 SYSCTL_JAIL_PARAM(_ip4, saddrsel, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Do (not) use IPv4 source address selection rather than the "
     "primary jail IPv4 address.");
 #endif
 #ifdef INET6
 SYSCTL_JAIL_PARAM_SYS_NODE(ip6, CTLFLAG_RDTUN,
     "Jail IPv6 address virtualization");
 SYSCTL_JAIL_PARAM_STRUCT(_ip6, addr, CTLFLAG_RW, sizeof(struct in6_addr),
     "S,in6_addr,a", "Jail IPv6 addresses");
 SYSCTL_JAIL_PARAM(_ip6, saddrsel, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Do (not) use IPv6 source address selection rather than the "
     "primary jail IPv6 address.");
 #endif
 
 SYSCTL_JAIL_PARAM_NODE(allow, "Jail permission flags");
 SYSCTL_JAIL_PARAM(_allow, set_hostname, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may set hostname");
 SYSCTL_JAIL_PARAM(_allow, sysvipc, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may use SYSV IPC");
 SYSCTL_JAIL_PARAM(_allow, raw_sockets, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may create raw sockets");
 SYSCTL_JAIL_PARAM(_allow, chflags, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may alter system file flags");
 SYSCTL_JAIL_PARAM(_allow, quotas, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may set file quotas");
 SYSCTL_JAIL_PARAM(_allow, socket_af, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may create sockets other than just UNIX/IPv4/IPv6/route");
 SYSCTL_JAIL_PARAM(_allow, mlock, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may lock (unlock) physical pages in memory");
 SYSCTL_JAIL_PARAM(_allow, reserved_ports, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may bind sockets to reserved ports");
 SYSCTL_JAIL_PARAM(_allow, read_msgbuf, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may read the kernel message buffer");
 SYSCTL_JAIL_PARAM(_allow, unprivileged_proc_debug, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Unprivileged processes may use process debugging facilities");
 SYSCTL_JAIL_PARAM(_allow, suser, CTLTYPE_INT | CTLFLAG_RW,
     "B", "Processes in jail with uid 0 have privilege");
 
 SYSCTL_JAIL_PARAM_SUBNODE(allow, mount, "Jail mount/unmount permission flags");
 SYSCTL_JAIL_PARAM(_allow_mount, , CTLTYPE_INT | CTLFLAG_RW,
     "B", "Jail may mount/unmount jail-friendly file systems in general");
 
 /*
  * Add a dynamic parameter allow.<name>, or allow.<prefix>.<name>.  Return
  * its associated bit in the pr_allow bitmask, or zero if the parameter was
  * not created.
  */
 unsigned
 prison_add_allow(const char *prefix, const char *name, const char *prefix_descr,
     const char *descr)
 {
 	struct bool_flags *bf;
 	struct sysctl_oid *parent;
 	char *allow_name, *allow_noname, *allowed;
 #ifndef NO_SYSCTL_DESCR
 	char *descr_deprecated;
 #endif
 	u_int allow_flag;
 
 	if (prefix
 	    ? asprintf(&allow_name, M_PRISON, "allow.%s.%s", prefix, name)
 		< 0 ||
 	      asprintf(&allow_noname, M_PRISON, "allow.%s.no%s", prefix, name)
 		< 0
 	    : asprintf(&allow_name, M_PRISON, "allow.%s", name) < 0 ||
 	      asprintf(&allow_noname, M_PRISON, "allow.no%s", name) < 0) {
 		free(allow_name, M_PRISON);
 		return 0;
 	}
 
 	/*
 	 * See if this parameter has already beed added, i.e. a module was
 	 * previously loaded/unloaded.
 	 */
 	mtx_lock(&prison0.pr_mtx);
 	for (bf = pr_flag_allow;
 	     bf < pr_flag_allow + nitems(pr_flag_allow) &&
 		atomic_load_int(&bf->flag) != 0;
 	     bf++) {
 		if (strcmp(bf->name, allow_name) == 0) {
 			allow_flag = bf->flag;
 			goto no_add;
 		}
 	}
 
 	/*
 	 * Find a free bit in pr_allow_all, failing if there are none
 	 * (which shouldn't happen as long as we keep track of how many
 	 * potential dynamic flags exist).
 	 */
 	for (allow_flag = 1;; allow_flag <<= 1) {
 		if (allow_flag == 0)
 			goto no_add;
 		if ((pr_allow_all & allow_flag) == 0)
 			break;
 	}
 
 	/* Note the parameter in the next open slot in pr_flag_allow. */
 	for (bf = pr_flag_allow; ; bf++) {
 		if (bf == pr_flag_allow + nitems(pr_flag_allow)) {
 			/* This should never happen, but is not fatal. */
 			allow_flag = 0;
 			goto no_add;
 		}
 		if (atomic_load_int(&bf->flag) == 0)
 			break;
 	}
 	bf->name = allow_name;
 	bf->noname = allow_noname;
 	pr_allow_all |= allow_flag;
 	/*
 	 * prison0 always has permission for the new parameter.
 	 * Other jails must have it granted to them.
 	 */
 	prison0.pr_allow |= allow_flag;
 	/* The flag indicates a valid entry, so make sure it is set last. */
 	atomic_store_rel_int(&bf->flag, allow_flag);
 	mtx_unlock(&prison0.pr_mtx);
 
 	/*
 	 * Create sysctls for the paramter, and the back-compat global
 	 * permission.
 	 */
 	parent = prefix
 	    ? SYSCTL_ADD_NODE(NULL,
 		  SYSCTL_CHILDREN(&sysctl___security_jail_param_allow),
 		  OID_AUTO, prefix, CTLFLAG_MPSAFE, 0, prefix_descr)
 	    : &sysctl___security_jail_param_allow;
 	(void)SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(parent), OID_AUTO,
 	    name, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
 	    NULL, 0, sysctl_jail_param, "B", descr);
 	if ((prefix
 	     ? asprintf(&allowed, M_TEMP, "%s_%s_allowed", prefix, name)
 	     : asprintf(&allowed, M_TEMP, "%s_allowed", name)) >= 0) {
 #ifndef NO_SYSCTL_DESCR
 		(void)asprintf(&descr_deprecated, M_TEMP, "%s (deprecated)",
 		    descr);
 #endif
 		(void)SYSCTL_ADD_PROC(NULL,
 		    SYSCTL_CHILDREN(&sysctl___security_jail), OID_AUTO, allowed,
 		    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, allow_flag,
 		    sysctl_jail_default_allow, "I", descr_deprecated);
 #ifndef NO_SYSCTL_DESCR
 		free(descr_deprecated, M_TEMP);
 #endif
 		free(allowed, M_TEMP);
 	}
 	return allow_flag;
 
  no_add:
 	mtx_unlock(&prison0.pr_mtx);
 	free(allow_name, M_PRISON);
 	free(allow_noname, M_PRISON);
 	return allow_flag;
 }
 
 /*
  * The VFS system will register jail-aware filesystems here.  They each get
  * a parameter allow.mount.xxxfs and a flag to check when a jailed user
  * attempts to mount.
  */
 void
 prison_add_vfs(struct vfsconf *vfsp)
 {
 #ifdef NO_SYSCTL_DESCR
 
 	vfsp->vfc_prison_flag = prison_add_allow("mount", vfsp->vfc_name,
 	    NULL, NULL);
 #else
 	char *descr;
 
 	(void)asprintf(&descr, M_TEMP, "Jail may mount the %s file system",
 	    vfsp->vfc_name);
 	vfsp->vfc_prison_flag = prison_add_allow("mount", vfsp->vfc_name,
 	    NULL, descr);
 	free(descr, M_TEMP);
 #endif
 }
 
 #ifdef RACCT
 void
 prison_racct_foreach(void (*callback)(struct racct *racct,
     void *arg2, void *arg3), void (*pre)(void), void (*post)(void),
     void *arg2, void *arg3)
 {
 	struct prison_racct *prr;
 
 	ASSERT_RACCT_ENABLED();
 
 	sx_slock(&allprison_lock);
 	if (pre != NULL)
 		(pre)();
 	LIST_FOREACH(prr, &allprison_racct, prr_next)
 		(callback)(prr->prr_racct, arg2, arg3);
 	if (post != NULL)
 		(post)();
 	sx_sunlock(&allprison_lock);
 }
 
 static struct prison_racct *
 prison_racct_find_locked(const char *name)
 {
 	struct prison_racct *prr;
 
 	ASSERT_RACCT_ENABLED();
 	sx_assert(&allprison_lock, SA_XLOCKED);
 
 	if (name[0] == '\0' || strlen(name) >= MAXHOSTNAMELEN)
 		return (NULL);
 
 	LIST_FOREACH(prr, &allprison_racct, prr_next) {
 		if (strcmp(name, prr->prr_name) != 0)
 			continue;
 
 		/* Found prison_racct with a matching name? */
 		prison_racct_hold(prr);
 		return (prr);
 	}
 
 	/* Add new prison_racct. */
 	prr = malloc(sizeof(*prr), M_PRISON_RACCT, M_ZERO | M_WAITOK);
 	racct_create(&prr->prr_racct);
 
 	strcpy(prr->prr_name, name);
 	refcount_init(&prr->prr_refcount, 1);
 	LIST_INSERT_HEAD(&allprison_racct, prr, prr_next);
 
 	return (prr);
 }
 
 struct prison_racct *
 prison_racct_find(const char *name)
 {
 	struct prison_racct *prr;
 
 	ASSERT_RACCT_ENABLED();
 
 	sx_xlock(&allprison_lock);
 	prr = prison_racct_find_locked(name);
 	sx_xunlock(&allprison_lock);
 	return (prr);
 }
 
 void
 prison_racct_hold(struct prison_racct *prr)
 {
 
 	ASSERT_RACCT_ENABLED();
 
 	refcount_acquire(&prr->prr_refcount);
 }
 
 static void
 prison_racct_free_locked(struct prison_racct *prr)
 {
 
 	ASSERT_RACCT_ENABLED();
 	sx_assert(&allprison_lock, SA_XLOCKED);
 
 	if (refcount_release(&prr->prr_refcount)) {
 		racct_destroy(&prr->prr_racct);
 		LIST_REMOVE(prr, prr_next);
 		free(prr, M_PRISON_RACCT);
 	}
 }
 
 void
 prison_racct_free(struct prison_racct *prr)
 {
 
 	ASSERT_RACCT_ENABLED();
 	sx_assert(&allprison_lock, SA_UNLOCKED);
 
 	if (refcount_release_if_not_last(&prr->prr_refcount))
 		return;
 
 	sx_xlock(&allprison_lock);
 	prison_racct_free_locked(prr);
 	sx_xunlock(&allprison_lock);
 }
 
 static void
 prison_racct_attach(struct prison *pr)
 {
 	struct prison_racct *prr;
 
 	ASSERT_RACCT_ENABLED();
 	sx_assert(&allprison_lock, SA_XLOCKED);
 
 	prr = prison_racct_find_locked(pr->pr_name);
 	KASSERT(prr != NULL, ("cannot find prison_racct"));
 
 	pr->pr_prison_racct = prr;
 }
 
 /*
  * Handle jail renaming.  From the racct point of view, renaming means
  * moving from one prison_racct to another.
  */
 static void
 prison_racct_modify(struct prison *pr)
 {
 #ifdef RCTL
 	struct proc *p;
 	struct ucred *cred;
 #endif
 	struct prison_racct *oldprr;
 
 	ASSERT_RACCT_ENABLED();
 
 	sx_slock(&allproc_lock);
 	sx_xlock(&allprison_lock);
 
 	if (strcmp(pr->pr_name, pr->pr_prison_racct->prr_name) == 0) {
 		sx_xunlock(&allprison_lock);
 		sx_sunlock(&allproc_lock);
 		return;
 	}
 
 	oldprr = pr->pr_prison_racct;
 	pr->pr_prison_racct = NULL;
 
 	prison_racct_attach(pr);
 
 	/*
 	 * Move resource utilisation records.
 	 */
 	racct_move(pr->pr_prison_racct->prr_racct, oldprr->prr_racct);
 
 #ifdef RCTL
 	/*
 	 * Force rctl to reattach rules to processes.
 	 */
 	FOREACH_PROC_IN_SYSTEM(p) {
 		PROC_LOCK(p);
 		cred = crhold(p->p_ucred);
 		PROC_UNLOCK(p);
 		rctl_proc_ucred_changed(p, cred);
 		crfree(cred);
 	}
 #endif
 
 	sx_sunlock(&allproc_lock);
 	prison_racct_free_locked(oldprr);
 	sx_xunlock(&allprison_lock);
 }
 
 static void
 prison_racct_detach(struct prison *pr)
 {
 
 	ASSERT_RACCT_ENABLED();
 	sx_assert(&allprison_lock, SA_UNLOCKED);
 
 	if (pr->pr_prison_racct == NULL)
 		return;
 	prison_racct_free(pr->pr_prison_racct);
 	pr->pr_prison_racct = NULL;
 }
 #endif /* RACCT */
 
 #ifdef DDB
 
 static void
 db_show_prison(struct prison *pr)
 {
 	struct bool_flags *bf;
 	struct jailsys_flags *jsf;
 #if defined(INET) || defined(INET6)
 	int ii;
 #endif
 	unsigned f;
 #ifdef INET
 	char ip4buf[INET_ADDRSTRLEN];
 #endif
 #ifdef INET6
 	char ip6buf[INET6_ADDRSTRLEN];
 #endif
 
 	db_printf("prison %p:\n", pr);
 	db_printf(" jid             = %d\n", pr->pr_id);
 	db_printf(" name            = %s\n", pr->pr_name);
 	db_printf(" parent          = %p\n", pr->pr_parent);
 	db_printf(" ref             = %d\n", pr->pr_ref);
 	db_printf(" uref            = %d\n", pr->pr_uref);
 	db_printf(" state           = %s\n",
 	    pr->pr_state == PRISON_STATE_ALIVE ? "alive" :
 	    pr->pr_state == PRISON_STATE_DYING ? "dying" :
 	    "invalid");
 	db_printf(" path            = %s\n", pr->pr_path);
 	db_printf(" cpuset          = %d\n", pr->pr_cpuset
 	    ? pr->pr_cpuset->cs_id : -1);
 #ifdef VIMAGE
 	db_printf(" vnet            = %p\n", pr->pr_vnet);
 #endif
 	db_printf(" root            = %p\n", pr->pr_root);
 	db_printf(" securelevel     = %d\n", pr->pr_securelevel);
 	db_printf(" devfs_rsnum     = %d\n", pr->pr_devfs_rsnum);
 	db_printf(" children.max    = %d\n", pr->pr_childmax);
 	db_printf(" children.cur    = %d\n", pr->pr_childcount);
 	db_printf(" child           = %p\n", LIST_FIRST(&pr->pr_children));
 	db_printf(" sibling         = %p\n", LIST_NEXT(pr, pr_sibling));
 	db_printf(" flags           = 0x%x", pr->pr_flags);
 	for (bf = pr_flag_bool; bf < pr_flag_bool + nitems(pr_flag_bool); bf++)
 		if (pr->pr_flags & bf->flag)
 			db_printf(" %s", bf->name);
 	for (jsf = pr_flag_jailsys;
 	     jsf < pr_flag_jailsys + nitems(pr_flag_jailsys);
 	     jsf++) {
 		f = pr->pr_flags & (jsf->disable | jsf->new);
 		db_printf(" %-16s= %s\n", jsf->name,
 		    (f != 0 && f == jsf->disable) ? "disable"
 		    : (f == jsf->new) ? "new"
 		    : "inherit");
 	}
 	db_printf(" allow           = 0x%x", pr->pr_allow);
 	for (bf = pr_flag_allow;
 	     bf < pr_flag_allow + nitems(pr_flag_allow) &&
 		atomic_load_int(&bf->flag) != 0;
 	     bf++)
 		if (pr->pr_allow & bf->flag)
 			db_printf(" %s", bf->name);
 	db_printf("\n");
 	db_printf(" enforce_statfs  = %d\n", pr->pr_enforce_statfs);
 	db_printf(" host.hostname   = %s\n", pr->pr_hostname);
 	db_printf(" host.domainname = %s\n", pr->pr_domainname);
 	db_printf(" host.hostuuid   = %s\n", pr->pr_hostuuid);
 	db_printf(" host.hostid     = %lu\n", pr->pr_hostid);
 #ifdef INET
 	db_printf(" ip4s            = %d\n", pr->pr_ip4s);
 	for (ii = 0; ii < pr->pr_ip4s; ii++)
 		db_printf(" %s %s\n",
 		    ii == 0 ? "ip4.addr        =" : "                 ",
 		    inet_ntoa_r(pr->pr_ip4[ii], ip4buf));
 #endif
 #ifdef INET6
 	db_printf(" ip6s            = %d\n", pr->pr_ip6s);
 	for (ii = 0; ii < pr->pr_ip6s; ii++)
 		db_printf(" %s %s\n",
 		    ii == 0 ? "ip6.addr        =" : "                 ",
 		    ip6_sprintf(ip6buf, &pr->pr_ip6[ii]));
 #endif
 }
 
 DB_SHOW_COMMAND(prison, db_show_prison_command)
 {
 	struct prison *pr;
 
 	if (!have_addr) {
 		/*
 		 * Show all prisons in the list, and prison0 which is not
 		 * listed.
 		 */
 		db_show_prison(&prison0);
 		if (!db_pager_quit) {
 			TAILQ_FOREACH(pr, &allprison, pr_list) {
 				db_show_prison(pr);
 				if (db_pager_quit)
 					break;
 			}
 		}
 		return;
 	}
 
 	if (addr == 0)
 		pr = &prison0;
 	else {
 		/* Look for a prison with the ID and with references. */
 		TAILQ_FOREACH(pr, &allprison, pr_list)
 			if (pr->pr_id == addr && pr->pr_ref > 0)
 				break;
 		if (pr == NULL)
 			/* Look again, without requiring a reference. */
 			TAILQ_FOREACH(pr, &allprison, pr_list)
 				if (pr->pr_id == addr)
 					break;
 		if (pr == NULL)
 			/* Assume address points to a valid prison. */
 			pr = (struct prison *)addr;
 	}
 	db_show_prison(pr);
 }
 
 #endif /* DDB */
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index eb748928cd91..7f06b51cf096 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -1,4426 +1,4415 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.
  * Copyright (c) 2004 The FreeBSD Foundation
  * Copyright (c) 2004-2008 Robert N. M. Watson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_socket.c	8.3 (Berkeley) 4/15/94
  */
 
 /*
  * Comments on the socket life cycle:
  *
  * soalloc() sets of socket layer state for a socket, called only by
  * socreate() and sonewconn().  Socket layer private.
  *
  * sodealloc() tears down socket layer state for a socket, called only by
  * sofree() and sonewconn().  Socket layer private.
  *
  * pru_attach() associates protocol layer state with an allocated socket;
  * called only once, may fail, aborting socket allocation.  This is called
  * from socreate() and sonewconn().  Socket layer private.
  *
  * pru_detach() disassociates protocol layer state from an attached socket,
  * and will be called exactly once for sockets in which pru_attach() has
  * been successfully called.  If pru_attach() returned an error,
  * pru_detach() will not be called.  Socket layer private.
  *
  * pru_abort() and pru_close() notify the protocol layer that the last
  * consumer of a socket is starting to tear down the socket, and that the
  * protocol should terminate the connection.  Historically, pru_abort() also
  * detached protocol state from the socket state, but this is no longer the
  * case.
  *
  * socreate() creates a socket and attaches protocol state.  This is a public
  * interface that may be used by socket layer consumers to create new
  * sockets.
  *
  * sonewconn() creates a socket and attaches protocol state.  This is a
  * public interface  that may be used by protocols to create new sockets when
  * a new connection is received and will be available for accept() on a
  * listen socket.
  *
  * soclose() destroys a socket after possibly waiting for it to disconnect.
  * This is a public interface that socket consumers should use to close and
  * release a socket when done with it.
  *
  * soabort() destroys a socket without waiting for it to disconnect (used
  * only for incoming connections that are already partially or fully
  * connected).  This is used internally by the socket layer when clearing
  * listen socket queues (due to overflow or close on the listen socket), but
  * is also a public interface protocols may use to abort connections in
  * their incomplete listen queues should they no longer be required.  Sockets
  * placed in completed connection listen queues should not be aborted for
  * reasons described in the comment above the soclose() implementation.  This
  * is not a general purpose close routine, and except in the specific
  * circumstances described here, should not be used.
  *
  * sofree() will free a socket and its protocol state if all references on
  * the socket have been released, and is the public interface to attempt to
  * free a socket when a reference is removed.  This is a socket layer private
  * interface.
  *
  * NOTE: In addition to socreate() and soclose(), which provide a single
  * socket reference to the consumer to be managed as required, there are two
  * calls to explicitly manage socket references, soref(), and sorele().
  * Currently, these are generally required only when transitioning a socket
  * from a listen queue to a file descriptor, in order to prevent garbage
  * collection of the socket at an untimely moment.  For a number of reasons,
  * these interfaces are not preferred, and should be avoided.
  *
  * NOTE: With regard to VNETs the general rule is that callers do not set
  * curvnet. Exceptions to this rule include soabort(), sodisconnect(),
  * sofree() (and with that sorele(), sotryfree()), as well as sonewconn()
  * and sorflush(), which are usually called from a pre-set VNET context.
  * sopoll() currently does not need a VNET context to be set.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_kern_tls.h"
 #include "opt_sctp.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/fcntl.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/mac.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/domain.h>
 #include <sys/file.h>			/* for struct knote */
 #include <sys/hhook.h>
 #include <sys/kernel.h>
 #include <sys/khelp.h>
 #include <sys/ktls.h>
 #include <sys/event.h>
 #include <sys/eventhandler.h>
 #include <sys/poll.h>
 #include <sys/proc.h>
 #include <sys/protosw.h>
 #include <sys/sbuf.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/resourcevar.h>
 #include <net/route.h>
 #include <sys/signalvar.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/taskqueue.h>
 #include <sys/uio.h>
 #include <sys/un.h>
 #include <sys/unpcb.h>
 #include <sys/jail.h>
 #include <sys/syslog.h>
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/tcp.h>
 
 #include <net/vnet.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <vm/uma.h>
 
 #ifdef COMPAT_FREEBSD32
 #include <sys/mount.h>
 #include <sys/sysent.h>
 #include <compat/freebsd32/freebsd32.h>
 #endif
 
 static int	soreceive_rcvoob(struct socket *so, struct uio *uio,
 		    int flags);
 static void	so_rdknl_lock(void *);
 static void	so_rdknl_unlock(void *);
 static void	so_rdknl_assert_lock(void *, int);
 static void	so_wrknl_lock(void *);
 static void	so_wrknl_unlock(void *);
 static void	so_wrknl_assert_lock(void *, int);
 
 static void	filt_sordetach(struct knote *kn);
 static int	filt_soread(struct knote *kn, long hint);
 static void	filt_sowdetach(struct knote *kn);
 static int	filt_sowrite(struct knote *kn, long hint);
 static int	filt_soempty(struct knote *kn, long hint);
 static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id);
 fo_kqfilter_t	soo_kqfilter;
 
 static struct filterops soread_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_sordetach,
 	.f_event = filt_soread,
 };
 static struct filterops sowrite_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_sowdetach,
 	.f_event = filt_sowrite,
 };
 static struct filterops soempty_filtops = {
 	.f_isfd = 1,
 	.f_detach = filt_sowdetach,
 	.f_event = filt_soempty,
 };
 
 so_gen_t	so_gencnt;	/* generation count for sockets */
 
 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
 
 #define	VNET_SO_ASSERT(so)						\
 	VNET_ASSERT(curvnet != NULL,					\
 	    ("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so)));
 
 VNET_DEFINE(struct hhook_head *, socket_hhh[HHOOK_SOCKET_LAST + 1]);
 #define	V_socket_hhh		VNET(socket_hhh)
 
 /*
  * Limit on the number of connections in the listen queue waiting
  * for accept(2).
  * NB: The original sysctl somaxconn is still available but hidden
  * to prevent confusion about the actual purpose of this number.
  */
 static u_int somaxconn = SOMAXCONN;
 
 static int
 sysctl_somaxconn(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	int val;
 
 	val = somaxconn;
 	error = sysctl_handle_int(oidp, &val, 0, req);
 	if (error || !req->newptr )
 		return (error);
 
 	/*
 	 * The purpose of the UINT_MAX / 3 limit, is so that the formula
 	 *   3 * so_qlimit / 2
 	 * below, will not overflow.
          */
 
 	if (val < 1 || val > UINT_MAX / 3)
 		return (EINVAL);
 
 	somaxconn = val;
 	return (0);
 }
 SYSCTL_PROC(_kern_ipc, OID_AUTO, soacceptqueue,
     CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 0, sizeof(int),
     sysctl_somaxconn, "I",
     "Maximum listen socket pending connection accept queue size");
 SYSCTL_PROC(_kern_ipc, KIPC_SOMAXCONN, somaxconn,
     CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_SKIP | CTLFLAG_NEEDGIANT, 0,
     sizeof(int), sysctl_somaxconn, "I",
     "Maximum listen socket pending connection accept queue size (compat)");
 
 static int numopensockets;
 SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD,
     &numopensockets, 0, "Number of open sockets");
 
 /*
  * accept_mtx locks down per-socket fields relating to accept queues.  See
  * socketvar.h for an annotation of the protected fields of struct socket.
  */
 struct mtx accept_mtx;
 MTX_SYSINIT(accept_mtx, &accept_mtx, "accept", MTX_DEF);
 
 /*
  * so_global_mtx protects so_gencnt, numopensockets, and the per-socket
  * so_gencnt field.
  */
 static struct mtx so_global_mtx;
 MTX_SYSINIT(so_global_mtx, &so_global_mtx, "so_glabel", MTX_DEF);
 
 /*
  * General IPC sysctl name space, used by sockets and a variety of other IPC
  * types.
  */
 SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
     "IPC");
 
 /*
  * Initialize the socket subsystem and set up the socket
  * memory allocator.
  */
 static uma_zone_t socket_zone;
 int	maxsockets;
 
 static void
 socket_zone_change(void *tag)
 {
 
 	maxsockets = uma_zone_set_max(socket_zone, maxsockets);
 }
 
 static void
 socket_hhook_register(int subtype)
 {
 
 	if (hhook_head_register(HHOOK_TYPE_SOCKET, subtype,
 	    &V_socket_hhh[subtype],
 	    HHOOK_NOWAIT|HHOOK_HEADISINVNET) != 0)
 		printf("%s: WARNING: unable to register hook\n", __func__);
 }
 
 static void
 socket_hhook_deregister(int subtype)
 {
 
 	if (hhook_head_deregister(V_socket_hhh[subtype]) != 0)
 		printf("%s: WARNING: unable to deregister hook\n", __func__);
 }
 
 static void
 socket_init(void *tag)
 {
 
 	socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	maxsockets = uma_zone_set_max(socket_zone, maxsockets);
 	uma_zone_set_warning(socket_zone, "kern.ipc.maxsockets limit reached");
 	EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL,
 	    EVENTHANDLER_PRI_FIRST);
 }
 SYSINIT(socket, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_init, NULL);
 
 static void
 socket_vnet_init(const void *unused __unused)
 {
 	int i;
 
 	/* We expect a contiguous range */
 	for (i = 0; i <= HHOOK_SOCKET_LAST; i++)
 		socket_hhook_register(i);
 }
 VNET_SYSINIT(socket_vnet_init, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY,
     socket_vnet_init, NULL);
 
 static void
 socket_vnet_uninit(const void *unused __unused)
 {
 	int i;
 
 	for (i = 0; i <= HHOOK_SOCKET_LAST; i++)
 		socket_hhook_deregister(i);
 }
 VNET_SYSUNINIT(socket_vnet_uninit, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY,
     socket_vnet_uninit, NULL);
 
 /*
  * Initialise maxsockets.  This SYSINIT must be run after
  * tunable_mbinit().
  */
 static void
 init_maxsockets(void *ignored)
 {
 
 	TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets);
 	maxsockets = imax(maxsockets, maxfiles);
 }
 SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL);
 
 /*
  * Sysctl to get and set the maximum global sockets limit.  Notify protocols
  * of the change so that they can update their dependent limits as required.
  */
 static int
 sysctl_maxsockets(SYSCTL_HANDLER_ARGS)
 {
 	int error, newmaxsockets;
 
 	newmaxsockets = maxsockets;
 	error = sysctl_handle_int(oidp, &newmaxsockets, 0, req);
 	if (error == 0 && req->newptr) {
 		if (newmaxsockets > maxsockets &&
 		    newmaxsockets <= maxfiles) {
 			maxsockets = newmaxsockets;
 			EVENTHANDLER_INVOKE(maxsockets_change);
 		} else
 			error = EINVAL;
 	}
 	return (error);
 }
 SYSCTL_PROC(_kern_ipc, OID_AUTO, maxsockets,
     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &maxsockets, 0,
     sysctl_maxsockets, "IU",
     "Maximum number of sockets available");
 
 /*
  * Socket operation routines.  These routines are called by the routines in
  * sys_socket.c or from a system process, and implement the semantics of
  * socket operations by switching out to the protocol specific routines.
  */
 
 /*
  * Get a socket structure from our zone, and initialize it.  Note that it
  * would probably be better to allocate socket and PCB at the same time, but
  * I'm not convinced that all the protocols can be easily modified to do
  * this.
  *
  * soalloc() returns a socket with a ref count of 0.
  */
 static struct socket *
 soalloc(struct vnet *vnet)
 {
 	struct socket *so;
 
 	so = uma_zalloc(socket_zone, M_NOWAIT | M_ZERO);
 	if (so == NULL)
 		return (NULL);
 #ifdef MAC
 	if (mac_socket_init(so, M_NOWAIT) != 0) {
 		uma_zfree(socket_zone, so);
 		return (NULL);
 	}
 #endif
 	if (khelp_init_osd(HELPER_CLASS_SOCKET, &so->osd)) {
 		uma_zfree(socket_zone, so);
 		return (NULL);
 	}
 
 	/*
 	 * The socket locking protocol allows to lock 2 sockets at a time,
 	 * however, the first one must be a listening socket.  WITNESS lacks
 	 * a feature to change class of an existing lock, so we use DUPOK.
 	 */
 	mtx_init(&so->so_lock, "socket", NULL, MTX_DEF | MTX_DUPOK);
 	SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd");
 	SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv");
 	so->so_rcv.sb_sel = &so->so_rdsel;
 	so->so_snd.sb_sel = &so->so_wrsel;
 	sx_init(&so->so_snd.sb_sx, "so_snd_sx");
 	sx_init(&so->so_rcv.sb_sx, "so_rcv_sx");
 	TAILQ_INIT(&so->so_snd.sb_aiojobq);
 	TAILQ_INIT(&so->so_rcv.sb_aiojobq);
 	TASK_INIT(&so->so_snd.sb_aiotask, 0, soaio_snd, so);
 	TASK_INIT(&so->so_rcv.sb_aiotask, 0, soaio_rcv, so);
 #ifdef VIMAGE
 	VNET_ASSERT(vnet != NULL, ("%s:%d vnet is NULL, so=%p",
 	    __func__, __LINE__, so));
 	so->so_vnet = vnet;
 #endif
 	/* We shouldn't need the so_global_mtx */
 	if (hhook_run_socket(so, NULL, HHOOK_SOCKET_CREATE)) {
 		/* Do we need more comprehensive error returns? */
 		uma_zfree(socket_zone, so);
 		return (NULL);
 	}
 	mtx_lock(&so_global_mtx);
 	so->so_gencnt = ++so_gencnt;
 	++numopensockets;
 #ifdef VIMAGE
 	vnet->vnet_sockcnt++;
 #endif
 	mtx_unlock(&so_global_mtx);
 
 	return (so);
 }
 
 /*
  * Free the storage associated with a socket at the socket layer, tear down
  * locks, labels, etc.  All protocol state is assumed already to have been
  * torn down (and possibly never set up) by the caller.
  */
 static void
 sodealloc(struct socket *so)
 {
 
 	KASSERT(so->so_count == 0, ("sodealloc(): so_count %d", so->so_count));
 	KASSERT(so->so_pcb == NULL, ("sodealloc(): so_pcb != NULL"));
 
 	mtx_lock(&so_global_mtx);
 	so->so_gencnt = ++so_gencnt;
 	--numopensockets;	/* Could be below, but faster here. */
 #ifdef VIMAGE
 	VNET_ASSERT(so->so_vnet != NULL, ("%s:%d so_vnet is NULL, so=%p",
 	    __func__, __LINE__, so));
 	so->so_vnet->vnet_sockcnt--;
 #endif
 	mtx_unlock(&so_global_mtx);
 #ifdef MAC
 	mac_socket_destroy(so);
 #endif
 	hhook_run_socket(so, NULL, HHOOK_SOCKET_CLOSE);
 
 	crfree(so->so_cred);
 	khelp_destroy_osd(&so->osd);
 	if (SOLISTENING(so)) {
 		if (so->sol_accept_filter != NULL)
 			accept_filt_setopt(so, NULL);
 	} else {
 		if (so->so_rcv.sb_hiwat)
 			(void)chgsbsize(so->so_cred->cr_uidinfo,
 			    &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY);
 		if (so->so_snd.sb_hiwat)
 			(void)chgsbsize(so->so_cred->cr_uidinfo,
 			    &so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
 		sx_destroy(&so->so_snd.sb_sx);
 		sx_destroy(&so->so_rcv.sb_sx);
 		SOCKBUF_LOCK_DESTROY(&so->so_snd);
 		SOCKBUF_LOCK_DESTROY(&so->so_rcv);
 	}
 	mtx_destroy(&so->so_lock);
 	uma_zfree(socket_zone, so);
 }
 
 /*
  * socreate returns a socket with a ref count of 1.  The socket should be
  * closed with soclose().
  */
 int
 socreate(int dom, struct socket **aso, int type, int proto,
     struct ucred *cred, struct thread *td)
 {
 	struct protosw *prp;
 	struct socket *so;
 	int error;
 
 	if (proto)
 		prp = pffindproto(dom, proto, type);
 	else
 		prp = pffindtype(dom, type);
 
 	if (prp == NULL) {
 		/* No support for domain. */
 		if (pffinddomain(dom) == NULL)
 			return (EAFNOSUPPORT);
 		/* No support for socket type. */
 		if (proto == 0 && type != 0)
 			return (EPROTOTYPE);
 		return (EPROTONOSUPPORT);
 	}
 	if (prp->pr_usrreqs->pru_attach == NULL ||
 	    prp->pr_usrreqs->pru_attach == pru_attach_notsupp)
 		return (EPROTONOSUPPORT);
 
 	if (prison_check_af(cred, prp->pr_domain->dom_family) != 0)
 		return (EPROTONOSUPPORT);
 
 	if (prp->pr_type != type)
 		return (EPROTOTYPE);
 	so = soalloc(CRED_TO_VNET(cred));
 	if (so == NULL)
 		return (ENOBUFS);
 
 	so->so_type = type;
 	so->so_cred = crhold(cred);
 	if ((prp->pr_domain->dom_family == PF_INET) ||
 	    (prp->pr_domain->dom_family == PF_INET6) ||
 	    (prp->pr_domain->dom_family == PF_ROUTE))
 		so->so_fibnum = td->td_proc->p_fibnum;
 	else
 		so->so_fibnum = 0;
 	so->so_proto = prp;
 #ifdef MAC
 	mac_socket_create(cred, so);
 #endif
 	knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
 	    so_rdknl_assert_lock);
 	knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
 	    so_wrknl_assert_lock);
 	/*
 	 * Auto-sizing of socket buffers is managed by the protocols and
 	 * the appropriate flags must be set in the pru_attach function.
 	 */
 	CURVNET_SET(so->so_vnet);
 	error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
 	CURVNET_RESTORE();
 	if (error) {
 		sodealloc(so);
 		return (error);
 	}
 	soref(so);
 	*aso = so;
 	return (0);
 }
 
 #ifdef REGRESSION
 static int regression_sonewconn_earlytest = 1;
 SYSCTL_INT(_regression, OID_AUTO, sonewconn_earlytest, CTLFLAG_RW,
     &regression_sonewconn_earlytest, 0, "Perform early sonewconn limit test");
 #endif
 
 static struct timeval overinterval = { 60, 0 };
 SYSCTL_TIMEVAL_SEC(_kern_ipc, OID_AUTO, sooverinterval, CTLFLAG_RW,
     &overinterval,
     "Delay in seconds between warnings for listen socket overflows");
 
 /*
  * When an attempt at a new connection is noted on a socket which accepts
  * connections, sonewconn is called.  If the connection is possible (subject
  * to space constraints, etc.) then we allocate a new structure, properly
  * linked into the data structure of the original socket, and return this.
  * Connstatus may be 0, or SS_ISCONFIRMING, or SS_ISCONNECTED.
  *
  * Note: the ref count on the socket is 0 on return.
  */
 struct socket *
 sonewconn(struct socket *head, int connstatus)
 {
 	struct sbuf descrsb;
 	struct socket *so;
 	int len, overcount;
 	u_int qlen;
 	const char localprefix[] = "local:";
 	char descrbuf[SUNPATHLEN + sizeof(localprefix)];
 #if defined(INET6)
 	char addrbuf[INET6_ADDRSTRLEN];
 #elif defined(INET)
 	char addrbuf[INET_ADDRSTRLEN];
 #endif
 	bool dolog, over;
 
 	SOLISTEN_LOCK(head);
 	over = (head->sol_qlen > 3 * head->sol_qlimit / 2);
 #ifdef REGRESSION
 	if (regression_sonewconn_earlytest && over) {
 #else
 	if (over) {
 #endif
 		head->sol_overcount++;
 		dolog = !!ratecheck(&head->sol_lastover, &overinterval);
 
 		/*
 		 * If we're going to log, copy the overflow count and queue
 		 * length from the listen socket before dropping the lock.
 		 * Also, reset the overflow count.
 		 */
 		if (dolog) {
 			overcount = head->sol_overcount;
 			head->sol_overcount = 0;
 			qlen = head->sol_qlen;
 		}
 		SOLISTEN_UNLOCK(head);
 
 		if (dolog) {
 			/*
 			 * Try to print something descriptive about the
 			 * socket for the error message.
 			 */
 			sbuf_new(&descrsb, descrbuf, sizeof(descrbuf),
 			    SBUF_FIXEDLEN);
 			switch (head->so_proto->pr_domain->dom_family) {
 #if defined(INET) || defined(INET6)
 #ifdef INET
 			case AF_INET:
 #endif
 #ifdef INET6
 			case AF_INET6:
 				if (head->so_proto->pr_domain->dom_family ==
 				    AF_INET6 ||
 				    (sotoinpcb(head)->inp_inc.inc_flags &
 				    INC_ISIPV6)) {
 					ip6_sprintf(addrbuf,
 					    &sotoinpcb(head)->inp_inc.inc6_laddr);
 					sbuf_printf(&descrsb, "[%s]", addrbuf);
 				} else
 #endif
 				{
 #ifdef INET
 					inet_ntoa_r(
 					    sotoinpcb(head)->inp_inc.inc_laddr,
 					    addrbuf);
 					sbuf_cat(&descrsb, addrbuf);
 #endif
 				}
 				sbuf_printf(&descrsb, ":%hu (proto %u)",
 				    ntohs(sotoinpcb(head)->inp_inc.inc_lport),
 				    head->so_proto->pr_protocol);
 				break;
 #endif /* INET || INET6 */
 			case AF_UNIX:
 				sbuf_cat(&descrsb, localprefix);
 				if (sotounpcb(head)->unp_addr != NULL)
 					len =
 					    sotounpcb(head)->unp_addr->sun_len -
 					    offsetof(struct sockaddr_un,
 					    sun_path);
 				else
 					len = 0;
 				if (len > 0)
 					sbuf_bcat(&descrsb,
 					    sotounpcb(head)->unp_addr->sun_path,
 					    len);
 				else
 					sbuf_cat(&descrsb, "(unknown)");
 				break;
 			}
 
 			/*
 			 * If we can't print something more specific, at least
 			 * print the domain name.
 			 */
 			if (sbuf_finish(&descrsb) != 0 ||
 			    sbuf_len(&descrsb) <= 0) {
 				sbuf_clear(&descrsb);
 				sbuf_cat(&descrsb,
 				    head->so_proto->pr_domain->dom_name ?:
 				    "unknown");
 				sbuf_finish(&descrsb);
 			}
 			KASSERT(sbuf_len(&descrsb) > 0,
 			    ("%s: sbuf creation failed", __func__));
 			log(LOG_DEBUG,
 			    "%s: pcb %p (%s): Listen queue overflow: "
 			    "%i already in queue awaiting acceptance "
 			    "(%d occurrences)\n",
 			    __func__, head->so_pcb, sbuf_data(&descrsb),
 			    qlen, overcount);
 			sbuf_delete(&descrsb);
 
 			overcount = 0;
 		}
 
 		return (NULL);
 	}
 	SOLISTEN_UNLOCK(head);
 	VNET_ASSERT(head->so_vnet != NULL, ("%s: so %p vnet is NULL",
 	    __func__, head));
 	so = soalloc(head->so_vnet);
 	if (so == NULL) {
 		log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: "
 		    "limit reached or out of memory\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
 	so->so_listen = head;
 	so->so_type = head->so_type;
 	so->so_options = head->so_options & ~SO_ACCEPTCONN;
 	so->so_linger = head->so_linger;
 	so->so_state = head->so_state | SS_NOFDREF;
 	so->so_fibnum = head->so_fibnum;
 	so->so_proto = head->so_proto;
 	so->so_cred = crhold(head->so_cred);
 #ifdef MAC
 	mac_socket_newconn(head, so);
 #endif
 	knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
 	    so_rdknl_assert_lock);
 	knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
 	    so_wrknl_assert_lock);
 	VNET_SO_ASSERT(head);
 	if (soreserve(so, head->sol_sbsnd_hiwat, head->sol_sbrcv_hiwat)) {
 		sodealloc(so);
 		log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
 	if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
 		sodealloc(so);
 		log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
 	so->so_rcv.sb_lowat = head->sol_sbrcv_lowat;
 	so->so_snd.sb_lowat = head->sol_sbsnd_lowat;
 	so->so_rcv.sb_timeo = head->sol_sbrcv_timeo;
 	so->so_snd.sb_timeo = head->sol_sbsnd_timeo;
 	so->so_rcv.sb_flags |= head->sol_sbrcv_flags & SB_AUTOSIZE;
 	so->so_snd.sb_flags |= head->sol_sbsnd_flags & SB_AUTOSIZE;
 
 	SOLISTEN_LOCK(head);
 	if (head->sol_accept_filter != NULL)
 		connstatus = 0;
 	so->so_state |= connstatus;
 	soref(head); /* A socket on (in)complete queue refs head. */
 	if (connstatus) {
 		TAILQ_INSERT_TAIL(&head->sol_comp, so, so_list);
 		so->so_qstate = SQ_COMP;
 		head->sol_qlen++;
 		solisten_wakeup(head);	/* unlocks */
 	} else {
 		/*
 		 * Keep removing sockets from the head until there's room for
 		 * us to insert on the tail.  In pre-locking revisions, this
 		 * was a simple if(), but as we could be racing with other
 		 * threads and soabort() requires dropping locks, we must
 		 * loop waiting for the condition to be true.
 		 */
 		while (head->sol_incqlen > head->sol_qlimit) {
 			struct socket *sp;
 
 			sp = TAILQ_FIRST(&head->sol_incomp);
 			TAILQ_REMOVE(&head->sol_incomp, sp, so_list);
 			head->sol_incqlen--;
 			SOCK_LOCK(sp);
 			sp->so_qstate = SQ_NONE;
 			sp->so_listen = NULL;
 			SOCK_UNLOCK(sp);
 			sorele(head);	/* does SOLISTEN_UNLOCK, head stays */
 			soabort(sp);
 			SOLISTEN_LOCK(head);
 		}
 		TAILQ_INSERT_TAIL(&head->sol_incomp, so, so_list);
 		so->so_qstate = SQ_INCOMP;
 		head->sol_incqlen++;
 		SOLISTEN_UNLOCK(head);
 	}
 	return (so);
 }
 
 #if defined(SCTP) || defined(SCTP_SUPPORT)
 /*
  * Socket part of sctp_peeloff().  Detach a new socket from an
  * association.  The new socket is returned with a reference.
  */
 struct socket *
 sopeeloff(struct socket *head)
 {
 	struct socket *so;
 
 	VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p",
 	    __func__, __LINE__, head));
 	so = soalloc(head->so_vnet);
 	if (so == NULL) {
 		log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: "
 		    "limit reached or out of memory\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
 	so->so_type = head->so_type;
 	so->so_options = head->so_options;
 	so->so_linger = head->so_linger;
 	so->so_state = (head->so_state & SS_NBIO) | SS_ISCONNECTED;
 	so->so_fibnum = head->so_fibnum;
 	so->so_proto = head->so_proto;
 	so->so_cred = crhold(head->so_cred);
 #ifdef MAC
 	mac_socket_newconn(head, so);
 #endif
 	knlist_init(&so->so_rdsel.si_note, so, so_rdknl_lock, so_rdknl_unlock,
 	    so_rdknl_assert_lock);
 	knlist_init(&so->so_wrsel.si_note, so, so_wrknl_lock, so_wrknl_unlock,
 	    so_wrknl_assert_lock);
 	VNET_SO_ASSERT(head);
 	if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) {
 		sodealloc(so);
 		log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
 	if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
 		sodealloc(so);
 		log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n",
 		    __func__, head->so_pcb);
 		return (NULL);
 	}
 	so->so_rcv.sb_lowat = head->so_rcv.sb_lowat;
 	so->so_snd.sb_lowat = head->so_snd.sb_lowat;
 	so->so_rcv.sb_timeo = head->so_rcv.sb_timeo;
 	so->so_snd.sb_timeo = head->so_snd.sb_timeo;
 	so->so_rcv.sb_flags |= head->so_rcv.sb_flags & SB_AUTOSIZE;
 	so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE;
 
 	soref(so);
 
 	return (so);
 }
 #endif	/* SCTP */
 
-int
-sogetsockaddr(struct socket *so, struct sockaddr **nam)
-{
-	int error;
-
-	CURVNET_SET(so->so_vnet);
-	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, nam);
-	CURVNET_RESTORE();
-	return (error);
-}
-
 int
 sobind(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, td);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 sobindat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_bindat)(fd, so, nam, td);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * solisten() transitions a socket from a non-listening state to a listening
  * state, but can also be used to update the listen queue depth on an
  * existing listen socket.  The protocol will call back into the sockets
  * layer using solisten_proto_check() and solisten_proto() to check and set
  * socket-layer listen state.  Call backs are used so that the protocol can
  * acquire both protocol and socket layer locks in whatever order is required
  * by the protocol.
  *
  * Protocol implementors are advised to hold the socket lock across the
  * socket-layer test and set to avoid races at the socket layer.
  */
 int
 solisten(struct socket *so, int backlog, struct thread *td)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 solisten_proto_check(struct socket *so)
 {
 
 	SOCK_LOCK_ASSERT(so);
 
 	if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
 	    SS_ISDISCONNECTING))
 		return (EINVAL);
 	return (0);
 }
 
 void
 solisten_proto(struct socket *so, int backlog)
 {
 	int sbrcv_lowat, sbsnd_lowat;
 	u_int sbrcv_hiwat, sbsnd_hiwat;
 	short sbrcv_flags, sbsnd_flags;
 	sbintime_t sbrcv_timeo, sbsnd_timeo;
 
 	SOCK_LOCK_ASSERT(so);
 
 	if (SOLISTENING(so))
 		goto listening;
 
 	/*
 	 * Change this socket to listening state.
 	 */
 	sbrcv_lowat = so->so_rcv.sb_lowat;
 	sbsnd_lowat = so->so_snd.sb_lowat;
 	sbrcv_hiwat = so->so_rcv.sb_hiwat;
 	sbsnd_hiwat = so->so_snd.sb_hiwat;
 	sbrcv_flags = so->so_rcv.sb_flags;
 	sbsnd_flags = so->so_snd.sb_flags;
 	sbrcv_timeo = so->so_rcv.sb_timeo;
 	sbsnd_timeo = so->so_snd.sb_timeo;
 
 	sbdestroy(&so->so_snd, so);
 	sbdestroy(&so->so_rcv, so);
 	sx_destroy(&so->so_snd.sb_sx);
 	sx_destroy(&so->so_rcv.sb_sx);
 	SOCKBUF_LOCK_DESTROY(&so->so_snd);
 	SOCKBUF_LOCK_DESTROY(&so->so_rcv);
 
 #ifdef INVARIANTS
 	bzero(&so->so_rcv,
 	    sizeof(struct socket) - offsetof(struct socket, so_rcv));
 #endif
 
 	so->sol_sbrcv_lowat = sbrcv_lowat;
 	so->sol_sbsnd_lowat = sbsnd_lowat;
 	so->sol_sbrcv_hiwat = sbrcv_hiwat;
 	so->sol_sbsnd_hiwat = sbsnd_hiwat;
 	so->sol_sbrcv_flags = sbrcv_flags;
 	so->sol_sbsnd_flags = sbsnd_flags;
 	so->sol_sbrcv_timeo = sbrcv_timeo;
 	so->sol_sbsnd_timeo = sbsnd_timeo;
 
 	so->sol_qlen = so->sol_incqlen = 0;
 	TAILQ_INIT(&so->sol_incomp);
 	TAILQ_INIT(&so->sol_comp);
 
 	so->sol_accept_filter = NULL;
 	so->sol_accept_filter_arg = NULL;
 	so->sol_accept_filter_str = NULL;
 
 	so->sol_upcall = NULL;
 	so->sol_upcallarg = NULL;
 
 	so->so_options |= SO_ACCEPTCONN;
 
 listening:
 	if (backlog < 0 || backlog > somaxconn)
 		backlog = somaxconn;
 	so->sol_qlimit = backlog;
 }
 
 /*
  * Wakeup listeners/subsystems once we have a complete connection.
  * Enters with lock, returns unlocked.
  */
 void
 solisten_wakeup(struct socket *sol)
 {
 
 	if (sol->sol_upcall != NULL)
 		(void )sol->sol_upcall(sol, sol->sol_upcallarg, M_NOWAIT);
 	else {
 		selwakeuppri(&sol->so_rdsel, PSOCK);
 		KNOTE_LOCKED(&sol->so_rdsel.si_note, 0);
 	}
 	SOLISTEN_UNLOCK(sol);
 	wakeup_one(&sol->sol_comp);
 	if ((sol->so_state & SS_ASYNC) && sol->so_sigio != NULL)
 		pgsigio(&sol->so_sigio, SIGIO, 0);
 }
 
 /*
  * Return single connection off a listening socket queue.  Main consumer of
  * the function is kern_accept4().  Some modules, that do their own accept
  * management also use the function.
  *
  * Listening socket must be locked on entry and is returned unlocked on
  * return.
  * The flags argument is set of accept4(2) flags and ACCEPT4_INHERIT.
  */
 int
 solisten_dequeue(struct socket *head, struct socket **ret, int flags)
 {
 	struct socket *so;
 	int error;
 
 	SOLISTEN_LOCK_ASSERT(head);
 
 	while (!(head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp) &&
 	    head->so_error == 0) {
 		error = msleep(&head->sol_comp, &head->so_lock, PSOCK | PCATCH,
 		    "accept", 0);
 		if (error != 0) {
 			SOLISTEN_UNLOCK(head);
 			return (error);
 		}
 	}
 	if (head->so_error) {
 		error = head->so_error;
 		head->so_error = 0;
 	} else if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->sol_comp))
 		error = EWOULDBLOCK;
 	else
 		error = 0;
 	if (error) {
 		SOLISTEN_UNLOCK(head);
 		return (error);
 	}
 	so = TAILQ_FIRST(&head->sol_comp);
 	SOCK_LOCK(so);
 	KASSERT(so->so_qstate == SQ_COMP,
 	    ("%s: so %p not SQ_COMP", __func__, so));
 	soref(so);
 	head->sol_qlen--;
 	so->so_qstate = SQ_NONE;
 	so->so_listen = NULL;
 	TAILQ_REMOVE(&head->sol_comp, so, so_list);
 	if (flags & ACCEPT4_INHERIT)
 		so->so_state |= (head->so_state & SS_NBIO);
 	else
 		so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0;
 	SOCK_UNLOCK(so);
 	sorele(head);
 
 	*ret = so;
 	return (0);
 }
 
 /*
  * Evaluate the reference count and named references on a socket; if no
  * references remain, free it.  This should be called whenever a reference is
  * released, such as in sorele(), but also when named reference flags are
  * cleared in socket or protocol code.
  *
  * sofree() will free the socket if:
  *
  * - There are no outstanding file descriptor references or related consumers
  *   (so_count == 0).
  *
  * - The socket has been closed by user space, if ever open (SS_NOFDREF).
  *
  * - The protocol does not have an outstanding strong reference on the socket
  *   (SS_PROTOREF).
  *
  * - The socket is not in a completed connection queue, so a process has been
  *   notified that it is present.  If it is removed, the user process may
  *   block in accept() despite select() saying the socket was ready.
  */
 void
 sofree(struct socket *so)
 {
 	struct protosw *pr = so->so_proto;
 
 	SOCK_LOCK_ASSERT(so);
 
 	if ((so->so_state & SS_NOFDREF) == 0 || so->so_count != 0 ||
 	    (so->so_state & SS_PROTOREF) || (so->so_qstate == SQ_COMP)) {
 		SOCK_UNLOCK(so);
 		return;
 	}
 
 	if (!SOLISTENING(so) && so->so_qstate == SQ_INCOMP) {
 		struct socket *sol;
 
 		sol = so->so_listen;
 		KASSERT(sol, ("%s: so %p on incomp of NULL", __func__, so));
 
 		/*
 		 * To solve race between close of a listening socket and
 		 * a socket on its incomplete queue, we need to lock both.
 		 * The order is first listening socket, then regular.
 		 * Since we don't have SS_NOFDREF neither SS_PROTOREF, this
 		 * function and the listening socket are the only pointers
 		 * to so.  To preserve so and sol, we reference both and then
 		 * relock.
 		 * After relock the socket may not move to so_comp since it
 		 * doesn't have PCB already, but it may be removed from
 		 * so_incomp. If that happens, we share responsiblity on
 		 * freeing the socket, but soclose() has already removed
 		 * it from queue.
 		 */
 		soref(sol);
 		soref(so);
 		SOCK_UNLOCK(so);
 		SOLISTEN_LOCK(sol);
 		SOCK_LOCK(so);
 		if (so->so_qstate == SQ_INCOMP) {
 			KASSERT(so->so_listen == sol,
 			    ("%s: so %p migrated out of sol %p",
 			    __func__, so, sol));
 			TAILQ_REMOVE(&sol->sol_incomp, so, so_list);
 			sol->sol_incqlen--;
 			/* This is guarenteed not to be the last. */
 			refcount_release(&sol->so_count);
 			so->so_qstate = SQ_NONE;
 			so->so_listen = NULL;
 		} else
 			KASSERT(so->so_listen == NULL,
 			    ("%s: so %p not on (in)comp with so_listen",
 			    __func__, so));
 		sorele(sol);
 		KASSERT(so->so_count == 1,
 		    ("%s: so %p count %u", __func__, so, so->so_count));
 		so->so_count = 0;
 	}
 	if (SOLISTENING(so))
 		so->so_error = ECONNABORTED;
 	SOCK_UNLOCK(so);
 
 	if (so->so_dtor != NULL)
 		so->so_dtor(so);
 
 	VNET_SO_ASSERT(so);
 	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
 		(*pr->pr_domain->dom_dispose)(so);
 	if (pr->pr_usrreqs->pru_detach != NULL)
 		(*pr->pr_usrreqs->pru_detach)(so);
 
 	/*
 	 * From this point on, we assume that no other references to this
 	 * socket exist anywhere else in the stack.  Therefore, no locks need
 	 * to be acquired or held.
 	 *
 	 * We used to do a lot of socket buffer and socket locking here, as
 	 * well as invoke sorflush() and perform wakeups.  The direct call to
 	 * dom_dispose() and sbdestroy() are an inlining of what was
 	 * necessary from sorflush().
 	 *
 	 * Notice that the socket buffer and kqueue state are torn down
 	 * before calling pru_detach.  This means that protocols shold not
 	 * assume they can perform socket wakeups, etc, in their detach code.
 	 */
 	if (!SOLISTENING(so)) {
 		sbdestroy(&so->so_snd, so);
 		sbdestroy(&so->so_rcv, so);
 	}
 	seldrain(&so->so_rdsel);
 	seldrain(&so->so_wrsel);
 	knlist_destroy(&so->so_rdsel.si_note);
 	knlist_destroy(&so->so_wrsel.si_note);
 	sodealloc(so);
 }
 
 /*
  * Close a socket on last file table reference removal.  Initiate disconnect
  * if connected.  Free socket when disconnect complete.
  *
  * This function will sorele() the socket.  Note that soclose() may be called
  * prior to the ref count reaching zero.  The actual socket structure will
  * not be freed until the ref count reaches zero.
  */
 int
 soclose(struct socket *so)
 {
 	struct accept_queue lqueue;
 	bool listening;
 	int error = 0;
 
 	KASSERT(!(so->so_state & SS_NOFDREF), ("soclose: SS_NOFDREF on enter"));
 
 	CURVNET_SET(so->so_vnet);
 	funsetown(&so->so_sigio);
 	if (so->so_state & SS_ISCONNECTED) {
 		if ((so->so_state & SS_ISDISCONNECTING) == 0) {
 			error = sodisconnect(so);
 			if (error) {
 				if (error == ENOTCONN)
 					error = 0;
 				goto drop;
 			}
 		}
 
 		if ((so->so_options & SO_LINGER) != 0 && so->so_linger != 0) {
 			if ((so->so_state & SS_ISDISCONNECTING) &&
 			    (so->so_state & SS_NBIO))
 				goto drop;
 			while (so->so_state & SS_ISCONNECTED) {
 				error = tsleep(&so->so_timeo,
 				    PSOCK | PCATCH, "soclos",
 				    so->so_linger * hz);
 				if (error)
 					break;
 			}
 		}
 	}
 
 drop:
 	if (so->so_proto->pr_usrreqs->pru_close != NULL)
 		(*so->so_proto->pr_usrreqs->pru_close)(so);
 
 	SOCK_LOCK(so);
 	if ((listening = (so->so_options & SO_ACCEPTCONN))) {
 		struct socket *sp;
 
 		TAILQ_INIT(&lqueue);
 		TAILQ_SWAP(&lqueue, &so->sol_incomp, socket, so_list);
 		TAILQ_CONCAT(&lqueue, &so->sol_comp, so_list);
 
 		so->sol_qlen = so->sol_incqlen = 0;
 
 		TAILQ_FOREACH(sp, &lqueue, so_list) {
 			SOCK_LOCK(sp);
 			sp->so_qstate = SQ_NONE;
 			sp->so_listen = NULL;
 			SOCK_UNLOCK(sp);
 			/* Guaranteed not to be the last. */
 			refcount_release(&so->so_count);
 		}
 	}
 	KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF"));
 	so->so_state |= SS_NOFDREF;
 	sorele(so);
 	if (listening) {
 		struct socket *sp, *tsp;
 
 		TAILQ_FOREACH_SAFE(sp, &lqueue, so_list, tsp) {
 			SOCK_LOCK(sp);
 			if (sp->so_count == 0) {
 				SOCK_UNLOCK(sp);
 				soabort(sp);
 			} else
 				/* sp is now in sofree() */
 				SOCK_UNLOCK(sp);
 		}
 	}
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * soabort() is used to abruptly tear down a connection, such as when a
  * resource limit is reached (listen queue depth exceeded), or if a listen
  * socket is closed while there are sockets waiting to be accepted.
  *
  * This interface is tricky, because it is called on an unreferenced socket,
  * and must be called only by a thread that has actually removed the socket
  * from the listen queue it was on, or races with other threads are risked.
  *
  * This interface will call into the protocol code, so must not be called
  * with any socket locks held.  Protocols do call it while holding their own
  * recursible protocol mutexes, but this is something that should be subject
  * to review in the future.
  */
 void
 soabort(struct socket *so)
 {
 
 	/*
 	 * In as much as is possible, assert that no references to this
 	 * socket are held.  This is not quite the same as asserting that the
 	 * current thread is responsible for arranging for no references, but
 	 * is as close as we can get for now.
 	 */
 	KASSERT(so->so_count == 0, ("soabort: so_count"));
 	KASSERT((so->so_state & SS_PROTOREF) == 0, ("soabort: SS_PROTOREF"));
 	KASSERT(so->so_state & SS_NOFDREF, ("soabort: !SS_NOFDREF"));
 	VNET_SO_ASSERT(so);
 
 	if (so->so_proto->pr_usrreqs->pru_abort != NULL)
 		(*so->so_proto->pr_usrreqs->pru_abort)(so);
 	SOCK_LOCK(so);
 	sofree(so);
 }
 
 int
 soaccept(struct socket *so, struct sockaddr **nam)
 {
 	int error;
 
 	SOCK_LOCK(so);
 	KASSERT((so->so_state & SS_NOFDREF) != 0, ("soaccept: !NOFDREF"));
 	so->so_state &= ~SS_NOFDREF;
 	SOCK_UNLOCK(so);
 
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 soconnect(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return (soconnectat(AT_FDCWD, so, nam, td));
 }
 
 int
 soconnectat(int fd, struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 	int error;
 
 	if (so->so_options & SO_ACCEPTCONN)
 		return (EOPNOTSUPP);
 
 	CURVNET_SET(so->so_vnet);
 	/*
 	 * If protocol is connection-based, can only connect once.
 	 * Otherwise, if connected, try to disconnect first.  This allows
 	 * user to disconnect by connecting to, e.g., a null address.
 	 */
 	if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
 	    ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
 	    (error = sodisconnect(so)))) {
 		error = EISCONN;
 	} else {
 		/*
 		 * Prevent accumulated error from previous connection from
 		 * biting us.
 		 */
 		so->so_error = 0;
 		if (fd == AT_FDCWD) {
 			error = (*so->so_proto->pr_usrreqs->pru_connect)(so,
 			    nam, td);
 		} else {
 			error = (*so->so_proto->pr_usrreqs->pru_connectat)(fd,
 			    so, nam, td);
 		}
 	}
 	CURVNET_RESTORE();
 
 	return (error);
 }
 
 int
 soconnect2(struct socket *so1, struct socket *so2)
 {
 	int error;
 
 	CURVNET_SET(so1->so_vnet);
 	error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 sodisconnect(struct socket *so)
 {
 	int error;
 
 	if ((so->so_state & SS_ISCONNECTED) == 0)
 		return (ENOTCONN);
 	if (so->so_state & SS_ISDISCONNECTING)
 		return (EALREADY);
 	VNET_SO_ASSERT(so);
 	error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
 	return (error);
 }
 
 #define	SBLOCKWAIT(f)	(((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT)
 
 int
 sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 	long space;
 	ssize_t resid;
 	int clen = 0, error, dontroute;
 
 	KASSERT(so->so_type == SOCK_DGRAM, ("sosend_dgram: !SOCK_DGRAM"));
 	KASSERT(so->so_proto->pr_flags & PR_ATOMIC,
 	    ("sosend_dgram: !PR_ATOMIC"));
 
 	if (uio != NULL)
 		resid = uio->uio_resid;
 	else
 		resid = top->m_pkthdr.len;
 	/*
 	 * In theory resid should be unsigned.  However, space must be
 	 * signed, as it might be less than 0 if we over-committed, and we
 	 * must use a signed comparison of space and resid.  On the other
 	 * hand, a negative resid causes us to loop sending 0-length
 	 * segments to the protocol.
 	 */
 	if (resid < 0) {
 		error = EINVAL;
 		goto out;
 	}
 
 	dontroute =
 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0;
 	if (td != NULL)
 		td->td_ru.ru_msgsnd++;
 	if (control != NULL)
 		clen = control->m_len;
 
 	SOCKBUF_LOCK(&so->so_snd);
 	if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 		SOCKBUF_UNLOCK(&so->so_snd);
 		error = EPIPE;
 		goto out;
 	}
 	if (so->so_error) {
 		error = so->so_error;
 		so->so_error = 0;
 		SOCKBUF_UNLOCK(&so->so_snd);
 		goto out;
 	}
 	if ((so->so_state & SS_ISCONNECTED) == 0) {
 		/*
 		 * `sendto' and `sendmsg' is allowed on a connection-based
 		 * socket if it supports implied connect.  Return ENOTCONN if
 		 * not connected and no address is supplied.
 		 */
 		if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
 		    (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
 			if ((so->so_state & SS_ISCONFIRMING) == 0 &&
 			    !(resid == 0 && clen != 0)) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				error = ENOTCONN;
 				goto out;
 			}
 		} else if (addr == NULL) {
 			if (so->so_proto->pr_flags & PR_CONNREQUIRED)
 				error = ENOTCONN;
 			else
 				error = EDESTADDRREQ;
 			SOCKBUF_UNLOCK(&so->so_snd);
 			goto out;
 		}
 	}
 
 	/*
 	 * Do we need MSG_OOB support in SOCK_DGRAM?  Signs here may be a
 	 * problem and need fixing.
 	 */
 	space = sbspace(&so->so_snd);
 	if (flags & MSG_OOB)
 		space += 1024;
 	space -= clen;
 	SOCKBUF_UNLOCK(&so->so_snd);
 	if (resid > space) {
 		error = EMSGSIZE;
 		goto out;
 	}
 	if (uio == NULL) {
 		resid = 0;
 		if (flags & MSG_EOR)
 			top->m_flags |= M_EOR;
 	} else {
 		/*
 		 * Copy the data from userland into a mbuf chain.
 		 * If no data is to be copied in, a single empty mbuf
 		 * is returned.
 		 */
 		top = m_uiotombuf(uio, M_WAITOK, space, max_hdr,
 		    (M_PKTHDR | ((flags & MSG_EOR) ? M_EOR : 0)));
 		if (top == NULL) {
 			error = EFAULT;	/* only possible error */
 			goto out;
 		}
 		space -= resid - uio->uio_resid;
 		resid = uio->uio_resid;
 	}
 	KASSERT(resid == 0, ("sosend_dgram: resid != 0"));
 	/*
 	 * XXXRW: Frobbing SO_DONTROUTE here is even worse without sblock
 	 * than with.
 	 */
 	if (dontroute) {
 		SOCK_LOCK(so);
 		so->so_options |= SO_DONTROUTE;
 		SOCK_UNLOCK(so);
 	}
 	/*
 	 * XXX all the SBS_CANTSENDMORE checks previously done could be out
 	 * of date.  We could have received a reset packet in an interrupt or
 	 * maybe we slept while doing page faults in uiomove() etc.  We could
 	 * probably recheck again inside the locking protection here, but
 	 * there are probably other places that this also happens.  We must
 	 * rethink this.
 	 */
 	VNET_SO_ASSERT(so);
 	error = (*so->so_proto->pr_usrreqs->pru_send)(so,
 	    (flags & MSG_OOB) ? PRUS_OOB :
 	/*
 	 * If the user set MSG_EOF, the protocol understands this flag and
 	 * nothing left to send then use PRU_SEND_EOF instead of PRU_SEND.
 	 */
 	    ((flags & MSG_EOF) &&
 	     (so->so_proto->pr_flags & PR_IMPLOPCL) &&
 	     (resid <= 0)) ?
 		PRUS_EOF :
 		/* If there is more to send set PRUS_MORETOCOME */
 		(flags & MSG_MORETOCOME) ||
 		(resid > 0 && space > 0) ? PRUS_MORETOCOME : 0,
 		top, addr, control, td);
 	if (dontroute) {
 		SOCK_LOCK(so);
 		so->so_options &= ~SO_DONTROUTE;
 		SOCK_UNLOCK(so);
 	}
 	clen = 0;
 	control = NULL;
 	top = NULL;
 out:
 	if (top != NULL)
 		m_freem(top);
 	if (control != NULL)
 		m_freem(control);
 	return (error);
 }
 
 /*
  * Send on a socket.  If send must go all at once and message is larger than
  * send buffering, then hard error.  Lock against other senders.  If must go
  * all at once and not enough room now, then inform user that this would
  * block and do nothing.  Otherwise, if nonblocking, send as much as
  * possible.  The data to be sent is described by "uio" if nonzero, otherwise
  * by the mbuf chain "top" (which must be null if uio is not).  Data provided
  * in mbuf chain must be small enough to send all at once.
  *
  * Returns nonzero on error, timeout or signal; callers must check for short
  * counts if EINTR/ERESTART are returned.  Data and control buffers are freed
  * on return.
  */
 int
 sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 	long space;
 	ssize_t resid;
 	int clen = 0, error, dontroute;
 	int atomic = sosendallatonce(so) || top;
 	int pru_flag;
 #ifdef KERN_TLS
 	struct ktls_session *tls;
 	int tls_enq_cnt, tls_pruflag;
 	uint8_t tls_rtype;
 
 	tls = NULL;
 	tls_rtype = TLS_RLTYPE_APP;
 #endif
 	if (uio != NULL)
 		resid = uio->uio_resid;
 	else if ((top->m_flags & M_PKTHDR) != 0)
 		resid = top->m_pkthdr.len;
 	else
 		resid = m_length(top, NULL);
 	/*
 	 * In theory resid should be unsigned.  However, space must be
 	 * signed, as it might be less than 0 if we over-committed, and we
 	 * must use a signed comparison of space and resid.  On the other
 	 * hand, a negative resid causes us to loop sending 0-length
 	 * segments to the protocol.
 	 *
 	 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
 	 * type sockets since that's an error.
 	 */
 	if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) {
 		error = EINVAL;
 		goto out;
 	}
 
 	dontroute =
 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
 	    (so->so_proto->pr_flags & PR_ATOMIC);
 	if (td != NULL)
 		td->td_ru.ru_msgsnd++;
 	if (control != NULL)
 		clen = control->m_len;
 
 	error = sblock(&so->so_snd, SBLOCKWAIT(flags));
 	if (error)
 		goto out;
 
 #ifdef KERN_TLS
 	tls_pruflag = 0;
 	tls = ktls_hold(so->so_snd.sb_tls_info);
 	if (tls != NULL) {
 		if (tls->mode == TCP_TLS_MODE_SW)
 			tls_pruflag = PRUS_NOTREADY;
 
 		if (control != NULL) {
 			struct cmsghdr *cm = mtod(control, struct cmsghdr *);
 
 			if (clen >= sizeof(*cm) &&
 			    cm->cmsg_type == TLS_SET_RECORD_TYPE) {
 				tls_rtype = *((uint8_t *)CMSG_DATA(cm));
 				clen = 0;
 				m_freem(control);
 				control = NULL;
 				atomic = 1;
 			}
 		}
 	}
 #endif
 
 restart:
 	do {
 		SOCKBUF_LOCK(&so->so_snd);
 		if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = EPIPE;
 			goto release;
 		}
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
 			SOCKBUF_UNLOCK(&so->so_snd);
 			goto release;
 		}
 		if ((so->so_state & SS_ISCONNECTED) == 0) {
 			/*
 			 * `sendto' and `sendmsg' is allowed on a connection-
 			 * based socket if it supports implied connect.
 			 * Return ENOTCONN if not connected and no address is
 			 * supplied.
 			 */
 			if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
 			    (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
 				if ((so->so_state & SS_ISCONFIRMING) == 0 &&
 				    !(resid == 0 && clen != 0)) {
 					SOCKBUF_UNLOCK(&so->so_snd);
 					error = ENOTCONN;
 					goto release;
 				}
 			} else if (addr == NULL) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				if (so->so_proto->pr_flags & PR_CONNREQUIRED)
 					error = ENOTCONN;
 				else
 					error = EDESTADDRREQ;
 				goto release;
 			}
 		}
 		space = sbspace(&so->so_snd);
 		if (flags & MSG_OOB)
 			space += 1024;
 		if ((atomic && resid > so->so_snd.sb_hiwat) ||
 		    clen > so->so_snd.sb_hiwat) {
 			SOCKBUF_UNLOCK(&so->so_snd);
 			error = EMSGSIZE;
 			goto release;
 		}
 		if (space < resid + clen &&
 		    (atomic || space < so->so_snd.sb_lowat || space < clen)) {
 			if ((so->so_state & SS_NBIO) ||
 			    (flags & (MSG_NBIO | MSG_DONTWAIT)) != 0) {
 				SOCKBUF_UNLOCK(&so->so_snd);
 				error = EWOULDBLOCK;
 				goto release;
 			}
 			error = sbwait(&so->so_snd);
 			SOCKBUF_UNLOCK(&so->so_snd);
 			if (error)
 				goto release;
 			goto restart;
 		}
 		SOCKBUF_UNLOCK(&so->so_snd);
 		space -= clen;
 		do {
 			if (uio == NULL) {
 				resid = 0;
 				if (flags & MSG_EOR)
 					top->m_flags |= M_EOR;
 #ifdef KERN_TLS
 				if (tls != NULL) {
 					ktls_frame(top, tls, &tls_enq_cnt,
 					    tls_rtype);
 					tls_rtype = TLS_RLTYPE_APP;
 				}
 #endif
 			} else {
 				/*
 				 * Copy the data from userland into a mbuf
 				 * chain.  If resid is 0, which can happen
 				 * only if we have control to send, then
 				 * a single empty mbuf is returned.  This
 				 * is a workaround to prevent protocol send
 				 * methods to panic.
 				 */
 #ifdef KERN_TLS
 				if (tls != NULL) {
 					top = m_uiotombuf(uio, M_WAITOK, space,
 					    tls->params.max_frame_len,
 					    M_EXTPG |
 					    ((flags & MSG_EOR) ? M_EOR : 0));
 					if (top != NULL) {
 						ktls_frame(top, tls,
 						    &tls_enq_cnt, tls_rtype);
 					}
 					tls_rtype = TLS_RLTYPE_APP;
 				} else
 #endif
 					top = m_uiotombuf(uio, M_WAITOK, space,
 					    (atomic ? max_hdr : 0),
 					    (atomic ? M_PKTHDR : 0) |
 					    ((flags & MSG_EOR) ? M_EOR : 0));
 				if (top == NULL) {
 					error = EFAULT; /* only possible error */
 					goto release;
 				}
 				space -= resid - uio->uio_resid;
 				resid = uio->uio_resid;
 			}
 			if (dontroute) {
 				SOCK_LOCK(so);
 				so->so_options |= SO_DONTROUTE;
 				SOCK_UNLOCK(so);
 			}
 			/*
 			 * XXX all the SBS_CANTSENDMORE checks previously
 			 * done could be out of date.  We could have received
 			 * a reset packet in an interrupt or maybe we slept
 			 * while doing page faults in uiomove() etc.  We
 			 * could probably recheck again inside the locking
 			 * protection here, but there are probably other
 			 * places that this also happens.  We must rethink
 			 * this.
 			 */
 			VNET_SO_ASSERT(so);
 
 			pru_flag = (flags & MSG_OOB) ? PRUS_OOB :
 			/*
 			 * If the user set MSG_EOF, the protocol understands
 			 * this flag and nothing left to send then use
 			 * PRU_SEND_EOF instead of PRU_SEND.
 			 */
 			    ((flags & MSG_EOF) &&
 			     (so->so_proto->pr_flags & PR_IMPLOPCL) &&
 			     (resid <= 0)) ?
 				PRUS_EOF :
 			/* If there is more to send set PRUS_MORETOCOME. */
 			    (flags & MSG_MORETOCOME) ||
 			    (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0;
 
 #ifdef KERN_TLS
 			pru_flag |= tls_pruflag;
 #endif
 
 			error = (*so->so_proto->pr_usrreqs->pru_send)(so,
 			    pru_flag, top, addr, control, td);
 
 			if (dontroute) {
 				SOCK_LOCK(so);
 				so->so_options &= ~SO_DONTROUTE;
 				SOCK_UNLOCK(so);
 			}
 
 #ifdef KERN_TLS
 			if (tls != NULL && tls->mode == TCP_TLS_MODE_SW) {
 				/*
 				 * Note that error is intentionally
 				 * ignored.
 				 *
 				 * Like sendfile(), we rely on the
 				 * completion routine (pru_ready())
 				 * to free the mbufs in the event that
 				 * pru_send() encountered an error and
 				 * did not append them to the sockbuf.
 				 */
 				soref(so);
 				ktls_enqueue(top, so, tls_enq_cnt);
 			}
 #endif
 			clen = 0;
 			control = NULL;
 			top = NULL;
 			if (error)
 				goto release;
 		} while (resid && space > 0);
 	} while (resid);
 
 release:
 	sbunlock(&so->so_snd);
 out:
 #ifdef KERN_TLS
 	if (tls != NULL)
 		ktls_free(tls);
 #endif
 	if (top != NULL)
 		m_freem(top);
 	if (control != NULL)
 		m_freem(control);
 	return (error);
 }
 
 int
 sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	if (!SOLISTENING(so))
 		error = so->so_proto->pr_usrreqs->pru_sosend(so, addr, uio,
 		    top, control, flags, td);
 	else {
 		m_freem(top);
 		m_freem(control);
 		error = ENOTCONN;
 	}
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * The part of soreceive() that implements reading non-inline out-of-band
  * data from a socket.  For more complete comments, see soreceive(), from
  * which this code originated.
  *
  * Note that soreceive_rcvoob(), unlike the remainder of soreceive(), is
  * unable to return an mbuf chain to the caller.
  */
 static int
 soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
 {
 	struct protosw *pr = so->so_proto;
 	struct mbuf *m;
 	int error;
 
 	KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
 	VNET_SO_ASSERT(so);
 
 	m = m_get(M_WAITOK, MT_DATA);
 	error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
 	if (error)
 		goto bad;
 	do {
 		error = uiomove(mtod(m, void *),
 		    (int) min(uio->uio_resid, m->m_len), uio);
 		m = m_free(m);
 	} while (uio->uio_resid && error == 0 && m);
 bad:
 	if (m != NULL)
 		m_freem(m);
 	return (error);
 }
 
 /*
  * Following replacement or removal of the first mbuf on the first mbuf chain
  * of a socket buffer, push necessary state changes back into the socket
  * buffer so that other consumers see the values consistently.  'nextrecord'
  * is the callers locally stored value of the original value of
  * sb->sb_mb->m_nextpkt which must be restored when the lead mbuf changes.
  * NOTE: 'nextrecord' may be NULL.
  */
 static __inline void
 sockbuf_pushsync(struct sockbuf *sb, struct mbuf *nextrecord)
 {
 
 	SOCKBUF_LOCK_ASSERT(sb);
 	/*
 	 * First, update for the new value of nextrecord.  If necessary, make
 	 * it the first record.
 	 */
 	if (sb->sb_mb != NULL)
 		sb->sb_mb->m_nextpkt = nextrecord;
 	else
 		sb->sb_mb = nextrecord;
 
 	/*
 	 * Now update any dependent socket buffer fields to reflect the new
 	 * state.  This is an expanded inline of SB_EMPTY_FIXUP(), with the
 	 * addition of a second clause that takes care of the case where
 	 * sb_mb has been updated, but remains the last record.
 	 */
 	if (sb->sb_mb == NULL) {
 		sb->sb_mbtail = NULL;
 		sb->sb_lastrecord = NULL;
 	} else if (sb->sb_mb->m_nextpkt == NULL)
 		sb->sb_lastrecord = sb->sb_mb;
 }
 
 /*
  * Implement receive operations on a socket.  We depend on the way that
  * records are added to the sockbuf by sbappend.  In particular, each record
  * (mbufs linked through m_next) must begin with an address if the protocol
  * so specifies, followed by an optional mbuf or mbufs containing ancillary
  * data, and then zero or more mbufs of data.  In order to allow parallelism
  * between network receive and copying to user space, as well as avoid
  * sleeping with a mutex held, we release the socket buffer mutex during the
  * user space copy.  Although the sockbuf is locked, new data may still be
  * appended, and thus we must maintain consistency of the sockbuf during that
  * time.
  *
  * The caller may receive the data as a single mbuf chain by supplying an
  * mbuf **mp0 for use in returning the chain.  The uio is then used only for
  * the count in uio_resid.
  */
 int
 soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	struct mbuf *m, **mp;
 	int flags, error, offset;
 	ssize_t len;
 	struct protosw *pr = so->so_proto;
 	struct mbuf *nextrecord;
 	int moff, type = 0;
 	ssize_t orig_resid = uio->uio_resid;
 
 	mp = mp0;
 	if (psa != NULL)
 		*psa = NULL;
 	if (controlp != NULL)
 		*controlp = NULL;
 	if (flagsp != NULL)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 	if (flags & MSG_OOB)
 		return (soreceive_rcvoob(so, uio, flags));
 	if (mp != NULL)
 		*mp = NULL;
 	if ((pr->pr_flags & PR_WANTRCVD) && (so->so_state & SS_ISCONFIRMING)
 	    && uio->uio_resid) {
 		VNET_SO_ASSERT(so);
 		(*pr->pr_usrreqs->pru_rcvd)(so, 0);
 	}
 
 	error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
 	if (error)
 		return (error);
 
 restart:
 	SOCKBUF_LOCK(&so->so_rcv);
 	m = so->so_rcv.sb_mb;
 	/*
 	 * If we have less data than requested, block awaiting more (subject
 	 * to any timeout) if:
 	 *   1. the current count is less than the low water mark, or
 	 *   2. MSG_DONTWAIT is not set
 	 */
 	if (m == NULL || (((flags & MSG_DONTWAIT) == 0 &&
 	    sbavail(&so->so_rcv) < uio->uio_resid) &&
 	    sbavail(&so->so_rcv) < so->so_rcv.sb_lowat &&
 	    m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) {
 		KASSERT(m != NULL || !sbavail(&so->so_rcv),
 		    ("receive: m == %p sbavail == %u",
 		    m, sbavail(&so->so_rcv)));
 		if (so->so_error) {
 			if (m != NULL)
 				goto dontblock;
 			error = so->so_error;
 			if ((flags & MSG_PEEK) == 0)
 				so->so_error = 0;
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			goto release;
 		}
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 			if (m != NULL)
 				goto dontblock;
 #ifdef KERN_TLS
 			else if (so->so_rcv.sb_tlsdcc == 0 &&
 			    so->so_rcv.sb_tlscc == 0) {
 #else
 			else {
 #endif
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				goto release;
 			}
 		}
 		for (; m != NULL; m = m->m_next)
 			if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
 				m = so->so_rcv.sb_mb;
 				goto dontblock;
 			}
 		if ((so->so_state & (SS_ISCONNECTING | SS_ISCONNECTED |
 		    SS_ISDISCONNECTING | SS_ISDISCONNECTED)) == 0 &&
 		    (so->so_proto->pr_flags & PR_CONNREQUIRED) != 0) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			error = ENOTCONN;
 			goto release;
 		}
 		if (uio->uio_resid == 0) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			goto release;
 		}
 		if ((so->so_state & SS_NBIO) ||
 		    (flags & (MSG_DONTWAIT|MSG_NBIO))) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			error = EWOULDBLOCK;
 			goto release;
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
 		error = sbwait(&so->so_rcv);
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		if (error)
 			goto release;
 		goto restart;
 	}
 dontblock:
 	/*
 	 * From this point onward, we maintain 'nextrecord' as a cache of the
 	 * pointer to the next record in the socket buffer.  We must keep the
 	 * various socket buffer pointers and local stack versions of the
 	 * pointers in sync, pushing out modifications before dropping the
 	 * socket buffer mutex, and re-reading them when picking it up.
 	 *
 	 * Otherwise, we will race with the network stack appending new data
 	 * or records onto the socket buffer by using inconsistent/stale
 	 * versions of the field, possibly resulting in socket buffer
 	 * corruption.
 	 *
 	 * By holding the high-level sblock(), we prevent simultaneous
 	 * readers from pulling off the front of the socket buffer.
 	 */
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	if (uio->uio_td)
 		uio->uio_td->td_ru.ru_msgrcv++;
 	KASSERT(m == so->so_rcv.sb_mb, ("soreceive: m != so->so_rcv.sb_mb"));
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 	nextrecord = m->m_nextpkt;
 	if (pr->pr_flags & PR_ADDR) {
 		KASSERT(m->m_type == MT_SONAME,
 		    ("m->m_type == %d", m->m_type));
 		orig_resid = 0;
 		if (psa != NULL)
 			*psa = sodupsockaddr(mtod(m, struct sockaddr *),
 			    M_NOWAIT);
 		if (flags & MSG_PEEK) {
 			m = m->m_next;
 		} else {
 			sbfree(&so->so_rcv, m);
 			so->so_rcv.sb_mb = m_free(m);
 			m = so->so_rcv.sb_mb;
 			sockbuf_pushsync(&so->so_rcv, nextrecord);
 		}
 	}
 
 	/*
 	 * Process one or more MT_CONTROL mbufs present before any data mbufs
 	 * in the first mbuf chain on the socket buffer.  If MSG_PEEK, we
 	 * just copy the data; if !MSG_PEEK, we call into the protocol to
 	 * perform externalization (or freeing if controlp == NULL).
 	 */
 	if (m != NULL && m->m_type == MT_CONTROL) {
 		struct mbuf *cm = NULL, *cmn;
 		struct mbuf **cme = &cm;
 #ifdef KERN_TLS
 		struct cmsghdr *cmsg;
 		struct tls_get_record tgr;
 
 		/*
 		 * For MSG_TLSAPPDATA, check for a non-application data
 		 * record.  If found, return ENXIO without removing
 		 * it from the receive queue.  This allows a subsequent
 		 * call without MSG_TLSAPPDATA to receive it.
 		 * Note that, for TLS, there should only be a single
 		 * control mbuf with the TLS_GET_RECORD message in it.
 		 */
 		if (flags & MSG_TLSAPPDATA) {
 			cmsg = mtod(m, struct cmsghdr *);
 			if (cmsg->cmsg_type == TLS_GET_RECORD &&
 			    cmsg->cmsg_len == CMSG_LEN(sizeof(tgr))) {
 				memcpy(&tgr, CMSG_DATA(cmsg), sizeof(tgr));
 				/* This will need to change for TLS 1.3. */
 				if (tgr.tls_type != TLS_RLTYPE_APP) {
 					SOCKBUF_UNLOCK(&so->so_rcv);
 					error = ENXIO;
 					goto release;
 				}
 			}
 		}
 #endif
 
 		do {
 			if (flags & MSG_PEEK) {
 				if (controlp != NULL) {
 					*controlp = m_copym(m, 0, m->m_len,
 					    M_NOWAIT);
 					controlp = &(*controlp)->m_next;
 				}
 				m = m->m_next;
 			} else {
 				sbfree(&so->so_rcv, m);
 				so->so_rcv.sb_mb = m->m_next;
 				m->m_next = NULL;
 				*cme = m;
 				cme = &(*cme)->m_next;
 				m = so->so_rcv.sb_mb;
 			}
 		} while (m != NULL && m->m_type == MT_CONTROL);
 		if ((flags & MSG_PEEK) == 0)
 			sockbuf_pushsync(&so->so_rcv, nextrecord);
 		while (cm != NULL) {
 			cmn = cm->m_next;
 			cm->m_next = NULL;
 			if (pr->pr_domain->dom_externalize != NULL) {
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				VNET_SO_ASSERT(so);
 				error = (*pr->pr_domain->dom_externalize)
 				    (cm, controlp, flags);
 				SOCKBUF_LOCK(&so->so_rcv);
 			} else if (controlp != NULL)
 				*controlp = cm;
 			else
 				m_freem(cm);
 			if (controlp != NULL) {
 				orig_resid = 0;
 				while (*controlp != NULL)
 					controlp = &(*controlp)->m_next;
 			}
 			cm = cmn;
 		}
 		if (m != NULL)
 			nextrecord = so->so_rcv.sb_mb->m_nextpkt;
 		else
 			nextrecord = so->so_rcv.sb_mb;
 		orig_resid = 0;
 	}
 	if (m != NULL) {
 		if ((flags & MSG_PEEK) == 0) {
 			KASSERT(m->m_nextpkt == nextrecord,
 			    ("soreceive: post-control, nextrecord !sync"));
 			if (nextrecord == NULL) {
 				KASSERT(so->so_rcv.sb_mb == m,
 				    ("soreceive: post-control, sb_mb!=m"));
 				KASSERT(so->so_rcv.sb_lastrecord == m,
 				    ("soreceive: post-control, lastrecord!=m"));
 			}
 		}
 		type = m->m_type;
 		if (type == MT_OOBDATA)
 			flags |= MSG_OOB;
 	} else {
 		if ((flags & MSG_PEEK) == 0) {
 			KASSERT(so->so_rcv.sb_mb == nextrecord,
 			    ("soreceive: sb_mb != nextrecord"));
 			if (so->so_rcv.sb_mb == NULL) {
 				KASSERT(so->so_rcv.sb_lastrecord == NULL,
 				    ("soreceive: sb_lastercord != NULL"));
 			}
 		}
 	}
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 
 	/*
 	 * Now continue to read any data mbufs off of the head of the socket
 	 * buffer until the read request is satisfied.  Note that 'type' is
 	 * used to store the type of any mbuf reads that have happened so far
 	 * such that soreceive() can stop reading if the type changes, which
 	 * causes soreceive() to return only one of regular data and inline
 	 * out-of-band data in a single socket receive operation.
 	 */
 	moff = 0;
 	offset = 0;
 	while (m != NULL && !(m->m_flags & M_NOTAVAIL) && uio->uio_resid > 0
 	    && error == 0) {
 		/*
 		 * If the type of mbuf has changed since the last mbuf
 		 * examined ('type'), end the receive operation.
 		 */
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (m->m_type == MT_OOBDATA || m->m_type == MT_CONTROL) {
 			if (type != m->m_type)
 				break;
 		} else if (type == MT_OOBDATA)
 			break;
 		else
 		    KASSERT(m->m_type == MT_DATA,
 			("m->m_type == %d", m->m_type));
 		so->so_rcv.sb_state &= ~SBS_RCVATMARK;
 		len = uio->uio_resid;
 		if (so->so_oobmark && len > so->so_oobmark - offset)
 			len = so->so_oobmark - offset;
 		if (len > m->m_len - moff)
 			len = m->m_len - moff;
 		/*
 		 * If mp is set, just pass back the mbufs.  Otherwise copy
 		 * them out via the uio, then free.  Sockbuf must be
 		 * consistent here (points to current mbuf, it points to next
 		 * record) when we drop priority; we must note any additions
 		 * to the sockbuf when we block interrupts again.
 		 */
 		if (mp == NULL) {
 			SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 			SBLASTRECORDCHK(&so->so_rcv);
 			SBLASTMBUFCHK(&so->so_rcv);
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			if ((m->m_flags & M_EXTPG) != 0)
 				error = m_unmappedtouio(m, moff, uio, (int)len);
 			else
 				error = uiomove(mtod(m, char *) + moff,
 				    (int)len, uio);
 			SOCKBUF_LOCK(&so->so_rcv);
 			if (error) {
 				/*
 				 * The MT_SONAME mbuf has already been removed
 				 * from the record, so it is necessary to
 				 * remove the data mbufs, if any, to preserve
 				 * the invariant in the case of PR_ADDR that
 				 * requires MT_SONAME mbufs at the head of
 				 * each record.
 				 */
 				if (pr->pr_flags & PR_ATOMIC &&
 				    ((flags & MSG_PEEK) == 0))
 					(void)sbdroprecord_locked(&so->so_rcv);
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				goto release;
 			}
 		} else
 			uio->uio_resid -= len;
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (len == m->m_len - moff) {
 			if (m->m_flags & M_EOR)
 				flags |= MSG_EOR;
 			if (flags & MSG_PEEK) {
 				m = m->m_next;
 				moff = 0;
 			} else {
 				nextrecord = m->m_nextpkt;
 				sbfree(&so->so_rcv, m);
 				if (mp != NULL) {
 					m->m_nextpkt = NULL;
 					*mp = m;
 					mp = &m->m_next;
 					so->so_rcv.sb_mb = m = m->m_next;
 					*mp = NULL;
 				} else {
 					so->so_rcv.sb_mb = m_free(m);
 					m = so->so_rcv.sb_mb;
 				}
 				sockbuf_pushsync(&so->so_rcv, nextrecord);
 				SBLASTRECORDCHK(&so->so_rcv);
 				SBLASTMBUFCHK(&so->so_rcv);
 			}
 		} else {
 			if (flags & MSG_PEEK)
 				moff += len;
 			else {
 				if (mp != NULL) {
 					if (flags & MSG_DONTWAIT) {
 						*mp = m_copym(m, 0, len,
 						    M_NOWAIT);
 						if (*mp == NULL) {
 							/*
 							 * m_copym() couldn't
 							 * allocate an mbuf.
 							 * Adjust uio_resid back
 							 * (it was adjusted
 							 * down by len bytes,
 							 * which we didn't end
 							 * up "copying" over).
 							 */
 							uio->uio_resid += len;
 							break;
 						}
 					} else {
 						SOCKBUF_UNLOCK(&so->so_rcv);
 						*mp = m_copym(m, 0, len,
 						    M_WAITOK);
 						SOCKBUF_LOCK(&so->so_rcv);
 					}
 				}
 				sbcut_locked(&so->so_rcv, len);
 			}
 		}
 		SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 		if (so->so_oobmark) {
 			if ((flags & MSG_PEEK) == 0) {
 				so->so_oobmark -= len;
 				if (so->so_oobmark == 0) {
 					so->so_rcv.sb_state |= SBS_RCVATMARK;
 					break;
 				}
 			} else {
 				offset += len;
 				if (offset == so->so_oobmark)
 					break;
 			}
 		}
 		if (flags & MSG_EOR)
 			break;
 		/*
 		 * If the MSG_WAITALL flag is set (for non-atomic socket), we
 		 * must not quit until "uio->uio_resid == 0" or an error
 		 * termination.  If a signal/timeout occurs, return with a
 		 * short count but without error.  Keep sockbuf locked
 		 * against other readers.
 		 */
 		while (flags & MSG_WAITALL && m == NULL && uio->uio_resid > 0 &&
 		    !sosendallatonce(so) && nextrecord == NULL) {
 			SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 			if (so->so_error ||
 			    so->so_rcv.sb_state & SBS_CANTRCVMORE)
 				break;
 			/*
 			 * Notify the protocol that some data has been
 			 * drained before blocking.
 			 */
 			if (pr->pr_flags & PR_WANTRCVD) {
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				VNET_SO_ASSERT(so);
 				(*pr->pr_usrreqs->pru_rcvd)(so, flags);
 				SOCKBUF_LOCK(&so->so_rcv);
 			}
 			SBLASTRECORDCHK(&so->so_rcv);
 			SBLASTMBUFCHK(&so->so_rcv);
 			/*
 			 * We could receive some data while was notifying
 			 * the protocol. Skip blocking in this case.
 			 */
 			if (so->so_rcv.sb_mb == NULL) {
 				error = sbwait(&so->so_rcv);
 				if (error) {
 					SOCKBUF_UNLOCK(&so->so_rcv);
 					goto release;
 				}
 			}
 			m = so->so_rcv.sb_mb;
 			if (m != NULL)
 				nextrecord = m->m_nextpkt;
 		}
 	}
 
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	if (m != NULL && pr->pr_flags & PR_ATOMIC) {
 		flags |= MSG_TRUNC;
 		if ((flags & MSG_PEEK) == 0)
 			(void) sbdroprecord_locked(&so->so_rcv);
 	}
 	if ((flags & MSG_PEEK) == 0) {
 		if (m == NULL) {
 			/*
 			 * First part is an inline SB_EMPTY_FIXUP().  Second
 			 * part makes sure sb_lastrecord is up-to-date if
 			 * there is still data in the socket buffer.
 			 */
 			so->so_rcv.sb_mb = nextrecord;
 			if (so->so_rcv.sb_mb == NULL) {
 				so->so_rcv.sb_mbtail = NULL;
 				so->so_rcv.sb_lastrecord = NULL;
 			} else if (nextrecord->m_nextpkt == NULL)
 				so->so_rcv.sb_lastrecord = nextrecord;
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
 		/*
 		 * If soreceive() is being done from the socket callback,
 		 * then don't need to generate ACK to peer to update window,
 		 * since ACK will be generated on return to TCP.
 		 */
 		if (!(flags & MSG_SOCALLBCK) &&
 		    (pr->pr_flags & PR_WANTRCVD)) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			VNET_SO_ASSERT(so);
 			(*pr->pr_usrreqs->pru_rcvd)(so, flags);
 			SOCKBUF_LOCK(&so->so_rcv);
 		}
 	}
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	if (orig_resid == uio->uio_resid && orig_resid &&
 	    (flags & MSG_EOR) == 0 && (so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) {
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		goto restart;
 	}
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	if (flagsp != NULL)
 		*flagsp |= flags;
 release:
 	sbunlock(&so->so_rcv);
 	return (error);
 }
 
 /*
  * Optimized version of soreceive() for stream (TCP) sockets.
  */
 int
 soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	int len = 0, error = 0, flags, oresid;
 	struct sockbuf *sb;
 	struct mbuf *m, *n = NULL;
 
 	/* We only do stream sockets. */
 	if (so->so_type != SOCK_STREAM)
 		return (EINVAL);
 	if (psa != NULL)
 		*psa = NULL;
 	if (flagsp != NULL)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 	if (controlp != NULL)
 		*controlp = NULL;
 	if (flags & MSG_OOB)
 		return (soreceive_rcvoob(so, uio, flags));
 	if (mp0 != NULL)
 		*mp0 = NULL;
 
 	sb = &so->so_rcv;
 
 #ifdef KERN_TLS
 	/*
 	 * KTLS store TLS records as records with a control message to
 	 * describe the framing.
 	 *
 	 * We check once here before acquiring locks to optimize the
 	 * common case.
 	 */
 	if (sb->sb_tls_info != NULL)
 		return (soreceive_generic(so, psa, uio, mp0, controlp,
 		    flagsp));
 #endif
 
 	/* Prevent other readers from entering the socket. */
 	error = sblock(sb, SBLOCKWAIT(flags));
 	if (error)
 		return (error);
 	SOCKBUF_LOCK(sb);
 
 #ifdef KERN_TLS
 	if (sb->sb_tls_info != NULL) {
 		SOCKBUF_UNLOCK(sb);
 		sbunlock(sb);
 		return (soreceive_generic(so, psa, uio, mp0, controlp,
 		    flagsp));
 	}
 #endif
 
 	/* Easy one, no space to copyout anything. */
 	if (uio->uio_resid == 0) {
 		error = EINVAL;
 		goto out;
 	}
 	oresid = uio->uio_resid;
 
 	/* We will never ever get anything unless we are or were connected. */
 	if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) {
 		error = ENOTCONN;
 		goto out;
 	}
 
 restart:
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	/* Abort if socket has reported problems. */
 	if (so->so_error) {
 		if (sbavail(sb) > 0)
 			goto deliver;
 		if (oresid > uio->uio_resid)
 			goto out;
 		error = so->so_error;
 		if (!(flags & MSG_PEEK))
 			so->so_error = 0;
 		goto out;
 	}
 
 	/* Door is closed.  Deliver what is left, if any. */
 	if (sb->sb_state & SBS_CANTRCVMORE) {
 		if (sbavail(sb) > 0)
 			goto deliver;
 		else
 			goto out;
 	}
 
 	/* Socket buffer is empty and we shall not block. */
 	if (sbavail(sb) == 0 &&
 	    ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) {
 		error = EAGAIN;
 		goto out;
 	}
 
 	/* Socket buffer got some data that we shall deliver now. */
 	if (sbavail(sb) > 0 && !(flags & MSG_WAITALL) &&
 	    ((so->so_state & SS_NBIO) ||
 	     (flags & (MSG_DONTWAIT|MSG_NBIO)) ||
 	     sbavail(sb) >= sb->sb_lowat ||
 	     sbavail(sb) >= uio->uio_resid ||
 	     sbavail(sb) >= sb->sb_hiwat) ) {
 		goto deliver;
 	}
 
 	/* On MSG_WAITALL we must wait until all data or error arrives. */
 	if ((flags & MSG_WAITALL) &&
 	    (sbavail(sb) >= uio->uio_resid || sbavail(sb) >= sb->sb_hiwat))
 		goto deliver;
 
 	/*
 	 * Wait and block until (more) data comes in.
 	 * NB: Drops the sockbuf lock during wait.
 	 */
 	error = sbwait(sb);
 	if (error)
 		goto out;
 	goto restart;
 
 deliver:
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	KASSERT(sbavail(sb) > 0, ("%s: sockbuf empty", __func__));
 	KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__));
 
 	/* Statistics. */
 	if (uio->uio_td)
 		uio->uio_td->td_ru.ru_msgrcv++;
 
 	/* Fill uio until full or current end of socket buffer is reached. */
 	len = min(uio->uio_resid, sbavail(sb));
 	if (mp0 != NULL) {
 		/* Dequeue as many mbufs as possible. */
 		if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) {
 			if (*mp0 == NULL)
 				*mp0 = sb->sb_mb;
 			else
 				m_cat(*mp0, sb->sb_mb);
 			for (m = sb->sb_mb;
 			     m != NULL && m->m_len <= len;
 			     m = m->m_next) {
 				KASSERT(!(m->m_flags & M_NOTAVAIL),
 				    ("%s: m %p not available", __func__, m));
 				len -= m->m_len;
 				uio->uio_resid -= m->m_len;
 				sbfree(sb, m);
 				n = m;
 			}
 			n->m_next = NULL;
 			sb->sb_mb = m;
 			sb->sb_lastrecord = sb->sb_mb;
 			if (sb->sb_mb == NULL)
 				SB_EMPTY_FIXUP(sb);
 		}
 		/* Copy the remainder. */
 		if (len > 0) {
 			KASSERT(sb->sb_mb != NULL,
 			    ("%s: len > 0 && sb->sb_mb empty", __func__));
 
 			m = m_copym(sb->sb_mb, 0, len, M_NOWAIT);
 			if (m == NULL)
 				len = 0;	/* Don't flush data from sockbuf. */
 			else
 				uio->uio_resid -= len;
 			if (*mp0 != NULL)
 				m_cat(*mp0, m);
 			else
 				*mp0 = m;
 			if (*mp0 == NULL) {
 				error = ENOBUFS;
 				goto out;
 			}
 		}
 	} else {
 		/* NB: Must unlock socket buffer as uiomove may sleep. */
 		SOCKBUF_UNLOCK(sb);
 		error = m_mbuftouio(uio, sb->sb_mb, len);
 		SOCKBUF_LOCK(sb);
 		if (error)
 			goto out;
 	}
 	SBLASTRECORDCHK(sb);
 	SBLASTMBUFCHK(sb);
 
 	/*
 	 * Remove the delivered data from the socket buffer unless we
 	 * were only peeking.
 	 */
 	if (!(flags & MSG_PEEK)) {
 		if (len > 0)
 			sbdrop_locked(sb, len);
 
 		/* Notify protocol that we drained some data. */
 		if ((so->so_proto->pr_flags & PR_WANTRCVD) &&
 		    (((flags & MSG_WAITALL) && uio->uio_resid > 0) ||
 		     !(flags & MSG_SOCALLBCK))) {
 			SOCKBUF_UNLOCK(sb);
 			VNET_SO_ASSERT(so);
 			(*so->so_proto->pr_usrreqs->pru_rcvd)(so, flags);
 			SOCKBUF_LOCK(sb);
 		}
 	}
 
 	/*
 	 * For MSG_WAITALL we may have to loop again and wait for
 	 * more data to come in.
 	 */
 	if ((flags & MSG_WAITALL) && uio->uio_resid > 0)
 		goto restart;
 out:
 	SOCKBUF_LOCK_ASSERT(sb);
 	SBLASTRECORDCHK(sb);
 	SBLASTMBUFCHK(sb);
 	SOCKBUF_UNLOCK(sb);
 	sbunlock(sb);
 	return (error);
 }
 
 /*
  * Optimized version of soreceive() for simple datagram cases from userspace.
  * Unlike in the stream case, we're able to drop a datagram if copyout()
  * fails, and because we handle datagrams atomically, we don't need to use a
  * sleep lock to prevent I/O interlacing.
  */
 int
 soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	struct mbuf *m, *m2;
 	int flags, error;
 	ssize_t len;
 	struct protosw *pr = so->so_proto;
 	struct mbuf *nextrecord;
 
 	if (psa != NULL)
 		*psa = NULL;
 	if (controlp != NULL)
 		*controlp = NULL;
 	if (flagsp != NULL)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 
 	/*
 	 * For any complicated cases, fall back to the full
 	 * soreceive_generic().
 	 */
 	if (mp0 != NULL || (flags & MSG_PEEK) || (flags & MSG_OOB))
 		return (soreceive_generic(so, psa, uio, mp0, controlp,
 		    flagsp));
 
 	/*
 	 * Enforce restrictions on use.
 	 */
 	KASSERT((pr->pr_flags & PR_WANTRCVD) == 0,
 	    ("soreceive_dgram: wantrcvd"));
 	KASSERT(pr->pr_flags & PR_ATOMIC, ("soreceive_dgram: !atomic"));
 	KASSERT((so->so_rcv.sb_state & SBS_RCVATMARK) == 0,
 	    ("soreceive_dgram: SBS_RCVATMARK"));
 	KASSERT((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0,
 	    ("soreceive_dgram: P_CONNREQUIRED"));
 
 	/*
 	 * Loop blocking while waiting for a datagram.
 	 */
 	SOCKBUF_LOCK(&so->so_rcv);
 	while ((m = so->so_rcv.sb_mb) == NULL) {
 		KASSERT(sbavail(&so->so_rcv) == 0,
 		    ("soreceive_dgram: sb_mb NULL but sbavail %u",
 		    sbavail(&so->so_rcv)));
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			return (error);
 		}
 		if (so->so_rcv.sb_state & SBS_CANTRCVMORE ||
 		    uio->uio_resid == 0) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			return (0);
 		}
 		if ((so->so_state & SS_NBIO) ||
 		    (flags & (MSG_DONTWAIT|MSG_NBIO))) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			return (EWOULDBLOCK);
 		}
 		SBLASTRECORDCHK(&so->so_rcv);
 		SBLASTMBUFCHK(&so->so_rcv);
 		error = sbwait(&so->so_rcv);
 		if (error) {
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			return (error);
 		}
 	}
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	if (uio->uio_td)
 		uio->uio_td->td_ru.ru_msgrcv++;
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 	nextrecord = m->m_nextpkt;
 	if (nextrecord == NULL) {
 		KASSERT(so->so_rcv.sb_lastrecord == m,
 		    ("soreceive_dgram: lastrecord != m"));
 	}
 
 	KASSERT(so->so_rcv.sb_mb->m_nextpkt == nextrecord,
 	    ("soreceive_dgram: m_nextpkt != nextrecord"));
 
 	/*
 	 * Pull 'm' and its chain off the front of the packet queue.
 	 */
 	so->so_rcv.sb_mb = NULL;
 	sockbuf_pushsync(&so->so_rcv, nextrecord);
 
 	/*
 	 * Walk 'm's chain and free that many bytes from the socket buffer.
 	 */
 	for (m2 = m; m2 != NULL; m2 = m2->m_next)
 		sbfree(&so->so_rcv, m2);
 
 	/*
 	 * Do a few last checks before we let go of the lock.
 	 */
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
 	SOCKBUF_UNLOCK(&so->so_rcv);
 
 	if (pr->pr_flags & PR_ADDR) {
 		KASSERT(m->m_type == MT_SONAME,
 		    ("m->m_type == %d", m->m_type));
 		if (psa != NULL)
 			*psa = sodupsockaddr(mtod(m, struct sockaddr *),
 			    M_NOWAIT);
 		m = m_free(m);
 	}
 	if (m == NULL) {
 		/* XXXRW: Can this happen? */
 		return (0);
 	}
 
 	/*
 	 * Packet to copyout() is now in 'm' and it is disconnected from the
 	 * queue.
 	 *
 	 * Process one or more MT_CONTROL mbufs present before any data mbufs
 	 * in the first mbuf chain on the socket buffer.  We call into the
 	 * protocol to perform externalization (or freeing if controlp ==
 	 * NULL). In some cases there can be only MT_CONTROL mbufs without
 	 * MT_DATA mbufs.
 	 */
 	if (m->m_type == MT_CONTROL) {
 		struct mbuf *cm = NULL, *cmn;
 		struct mbuf **cme = &cm;
 
 		do {
 			m2 = m->m_next;
 			m->m_next = NULL;
 			*cme = m;
 			cme = &(*cme)->m_next;
 			m = m2;
 		} while (m != NULL && m->m_type == MT_CONTROL);
 		while (cm != NULL) {
 			cmn = cm->m_next;
 			cm->m_next = NULL;
 			if (pr->pr_domain->dom_externalize != NULL) {
 				error = (*pr->pr_domain->dom_externalize)
 				    (cm, controlp, flags);
 			} else if (controlp != NULL)
 				*controlp = cm;
 			else
 				m_freem(cm);
 			if (controlp != NULL) {
 				while (*controlp != NULL)
 					controlp = &(*controlp)->m_next;
 			}
 			cm = cmn;
 		}
 	}
 	KASSERT(m == NULL || m->m_type == MT_DATA,
 	    ("soreceive_dgram: !data"));
 	while (m != NULL && uio->uio_resid > 0) {
 		len = uio->uio_resid;
 		if (len > m->m_len)
 			len = m->m_len;
 		error = uiomove(mtod(m, char *), (int)len, uio);
 		if (error) {
 			m_freem(m);
 			return (error);
 		}
 		if (len == m->m_len)
 			m = m_free(m);
 		else {
 			m->m_data += len;
 			m->m_len -= len;
 		}
 	}
 	if (m != NULL) {
 		flags |= MSG_TRUNC;
 		m_freem(m);
 	}
 	if (flagsp != NULL)
 		*flagsp |= flags;
 	return (0);
 }
 
 int
 soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
     struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 	int error;
 
 	CURVNET_SET(so->so_vnet);
 	if (!SOLISTENING(so))
 		error = (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio,
 		    mp0, controlp, flagsp));
 	else
 		error = ENOTCONN;
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 soshutdown(struct socket *so, int how)
 {
 	struct protosw *pr = so->so_proto;
 	int error, soerror_enotconn;
 
 	if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
 		return (EINVAL);
 
 	soerror_enotconn = 0;
 	if ((so->so_state &
 	    (SS_ISCONNECTED | SS_ISCONNECTING | SS_ISDISCONNECTING)) == 0) {
 		/*
 		 * POSIX mandates us to return ENOTCONN when shutdown(2) is
 		 * invoked on a datagram sockets, however historically we would
 		 * actually tear socket down. This is known to be leveraged by
 		 * some applications to unblock process waiting in recvXXX(2)
 		 * by other process that it shares that socket with. Try to meet
 		 * both backward-compatibility and POSIX requirements by forcing
 		 * ENOTCONN but still asking protocol to perform pru_shutdown().
 		 */
 		if (so->so_type != SOCK_DGRAM && !SOLISTENING(so))
 			return (ENOTCONN);
 		soerror_enotconn = 1;
 	}
 
 	if (SOLISTENING(so)) {
 		if (how != SHUT_WR) {
 			SOLISTEN_LOCK(so);
 			so->so_error = ECONNABORTED;
 			solisten_wakeup(so);	/* unlocks so */
 		}
 		goto done;
 	}
 
 	CURVNET_SET(so->so_vnet);
 	if (pr->pr_usrreqs->pru_flush != NULL)
 		(*pr->pr_usrreqs->pru_flush)(so, how);
 	if (how != SHUT_WR)
 		sorflush(so);
 	if (how != SHUT_RD) {
 		error = (*pr->pr_usrreqs->pru_shutdown)(so);
 		wakeup(&so->so_timeo);
 		CURVNET_RESTORE();
 		return ((error == 0 && soerror_enotconn) ? ENOTCONN : error);
 	}
 	wakeup(&so->so_timeo);
 	CURVNET_RESTORE();
 
 done:
 	return (soerror_enotconn ? ENOTCONN : 0);
 }
 
 void
 sorflush(struct socket *so)
 {
 	struct sockbuf *sb = &so->so_rcv;
 	struct protosw *pr = so->so_proto;
 	struct socket aso;
 
 	VNET_SO_ASSERT(so);
 
 	/*
 	 * In order to avoid calling dom_dispose with the socket buffer mutex
 	 * held, and in order to generally avoid holding the lock for a long
 	 * time, we make a copy of the socket buffer and clear the original
 	 * (except locks, state).  The new socket buffer copy won't have
 	 * initialized locks so we can only call routines that won't use or
 	 * assert those locks.
 	 *
 	 * Dislodge threads currently blocked in receive and wait to acquire
 	 * a lock against other simultaneous readers before clearing the
 	 * socket buffer.  Don't let our acquire be interrupted by a signal
 	 * despite any existing socket disposition on interruptable waiting.
 	 */
 	socantrcvmore(so);
 	(void) sblock(sb, SBL_WAIT | SBL_NOINTR);
 
 	/*
 	 * Invalidate/clear most of the sockbuf structure, but leave selinfo
 	 * and mutex data unchanged.
 	 */
 	SOCKBUF_LOCK(sb);
 	bzero(&aso, sizeof(aso));
 	aso.so_pcb = so->so_pcb;
 	bcopy(&sb->sb_startzero, &aso.so_rcv.sb_startzero,
 	    sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
 	bzero(&sb->sb_startzero,
 	    sizeof(*sb) - offsetof(struct sockbuf, sb_startzero));
 	SOCKBUF_UNLOCK(sb);
 	sbunlock(sb);
 
 	/*
 	 * Dispose of special rights and flush the copied socket.  Don't call
 	 * any unsafe routines (that rely on locks being initialized) on aso.
 	 */
 	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
 		(*pr->pr_domain->dom_dispose)(&aso);
 	sbrelease_internal(&aso.so_rcv, so);
 }
 
 /*
  * Wrapper for Socket established helper hook.
  * Parameters: socket, context of the hook point, hook id.
  */
 static int inline
 hhook_run_socket(struct socket *so, void *hctx, int32_t h_id)
 {
 	struct socket_hhook_data hhook_data = {
 		.so = so,
 		.hctx = hctx,
 		.m = NULL,
 		.status = 0
 	};
 
 	CURVNET_SET(so->so_vnet);
 	HHOOKS_RUN_IF(V_socket_hhh[h_id], &hhook_data, &so->osd);
 	CURVNET_RESTORE();
 
 	/* Ugly but needed, since hhooks return void for now */
 	return (hhook_data.status);
 }
 
 /*
  * Perhaps this routine, and sooptcopyout(), below, ought to come in an
  * additional variant to handle the case where the option value needs to be
  * some kind of integer, but not a specific size.  In addition to their use
  * here, these functions are also called by the protocol-level pr_ctloutput()
  * routines.
  */
 int
 sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen)
 {
 	size_t	valsize;
 
 	/*
 	 * If the user gives us more than we wanted, we ignore it, but if we
 	 * don't get the minimum length the caller wants, we return EINVAL.
 	 * On success, sopt->sopt_valsize is set to however much we actually
 	 * retrieved.
 	 */
 	if ((valsize = sopt->sopt_valsize) < minlen)
 		return EINVAL;
 	if (valsize > len)
 		sopt->sopt_valsize = valsize = len;
 
 	if (sopt->sopt_td != NULL)
 		return (copyin(sopt->sopt_val, buf, valsize));
 
 	bcopy(sopt->sopt_val, buf, valsize);
 	return (0);
 }
 
 /*
  * Kernel version of setsockopt(2).
  *
  * XXX: optlen is size_t, not socklen_t
  */
 int
 so_setsockopt(struct socket *so, int level, int optname, void *optval,
     size_t optlen)
 {
 	struct sockopt sopt;
 
 	sopt.sopt_level = level;
 	sopt.sopt_name = optname;
 	sopt.sopt_dir = SOPT_SET;
 	sopt.sopt_val = optval;
 	sopt.sopt_valsize = optlen;
 	sopt.sopt_td = NULL;
 	return (sosetopt(so, &sopt));
 }
 
 int
 sosetopt(struct socket *so, struct sockopt *sopt)
 {
 	int	error, optval;
 	struct	linger l;
 	struct	timeval tv;
 	sbintime_t val;
 	uint32_t val32;
 #ifdef MAC
 	struct mac extmac;
 #endif
 
 	CURVNET_SET(so->so_vnet);
 	error = 0;
 	if (sopt->sopt_level != SOL_SOCKET) {
 		if (so->so_proto->pr_ctloutput != NULL)
 			error = (*so->so_proto->pr_ctloutput)(so, sopt);
 		else
 			error = ENOPROTOOPT;
 	} else {
 		switch (sopt->sopt_name) {
 		case SO_ACCEPTFILTER:
 			error = accept_filt_setopt(so, sopt);
 			if (error)
 				goto bad;
 			break;
 
 		case SO_LINGER:
 			error = sooptcopyin(sopt, &l, sizeof l, sizeof l);
 			if (error)
 				goto bad;
 			if (l.l_linger < 0 ||
 			    l.l_linger > USHRT_MAX ||
 			    l.l_linger > (INT_MAX / hz)) {
 				error = EDOM;
 				goto bad;
 			}
 			SOCK_LOCK(so);
 			so->so_linger = l.l_linger;
 			if (l.l_onoff)
 				so->so_options |= SO_LINGER;
 			else
 				so->so_options &= ~SO_LINGER;
 			SOCK_UNLOCK(so);
 			break;
 
 		case SO_DEBUG:
 		case SO_KEEPALIVE:
 		case SO_DONTROUTE:
 		case SO_USELOOPBACK:
 		case SO_BROADCAST:
 		case SO_REUSEADDR:
 		case SO_REUSEPORT:
 		case SO_REUSEPORT_LB:
 		case SO_OOBINLINE:
 		case SO_TIMESTAMP:
 		case SO_BINTIME:
 		case SO_NOSIGPIPE:
 		case SO_NO_DDP:
 		case SO_NO_OFFLOAD:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				goto bad;
 			SOCK_LOCK(so);
 			if (optval)
 				so->so_options |= sopt->sopt_name;
 			else
 				so->so_options &= ~sopt->sopt_name;
 			SOCK_UNLOCK(so);
 			break;
 
 		case SO_SETFIB:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				goto bad;
 
 			if (optval < 0 || optval >= rt_numfibs) {
 				error = EINVAL;
 				goto bad;
 			}
 			if (((so->so_proto->pr_domain->dom_family == PF_INET) ||
 			   (so->so_proto->pr_domain->dom_family == PF_INET6) ||
 			   (so->so_proto->pr_domain->dom_family == PF_ROUTE)))
 				so->so_fibnum = optval;
 			else
 				so->so_fibnum = 0;
 			break;
 
 		case SO_USER_COOKIE:
 			error = sooptcopyin(sopt, &val32, sizeof val32,
 			    sizeof val32);
 			if (error)
 				goto bad;
 			so->so_user_cookie = val32;
 			break;
 
 		case SO_SNDBUF:
 		case SO_RCVBUF:
 		case SO_SNDLOWAT:
 		case SO_RCVLOWAT:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				goto bad;
 
 			/*
 			 * Values < 1 make no sense for any of these options,
 			 * so disallow them.
 			 */
 			if (optval < 1) {
 				error = EINVAL;
 				goto bad;
 			}
 
 			error = sbsetopt(so, sopt->sopt_name, optval);
 			break;
 
 		case SO_SNDTIMEO:
 		case SO_RCVTIMEO:
 #ifdef COMPAT_FREEBSD32
 			if (SV_CURPROC_FLAG(SV_ILP32)) {
 				struct timeval32 tv32;
 
 				error = sooptcopyin(sopt, &tv32, sizeof tv32,
 				    sizeof tv32);
 				CP(tv32, tv, tv_sec);
 				CP(tv32, tv, tv_usec);
 			} else
 #endif
 				error = sooptcopyin(sopt, &tv, sizeof tv,
 				    sizeof tv);
 			if (error)
 				goto bad;
 			if (tv.tv_sec < 0 || tv.tv_usec < 0 ||
 			    tv.tv_usec >= 1000000) {
 				error = EDOM;
 				goto bad;
 			}
 			if (tv.tv_sec > INT32_MAX)
 				val = SBT_MAX;
 			else
 				val = tvtosbt(tv);
 			switch (sopt->sopt_name) {
 			case SO_SNDTIMEO:
 				so->so_snd.sb_timeo = val;
 				break;
 			case SO_RCVTIMEO:
 				so->so_rcv.sb_timeo = val;
 				break;
 			}
 			break;
 
 		case SO_LABEL:
 #ifdef MAC
 			error = sooptcopyin(sopt, &extmac, sizeof extmac,
 			    sizeof extmac);
 			if (error)
 				goto bad;
 			error = mac_setsockopt_label(sopt->sopt_td->td_ucred,
 			    so, &extmac);
 #else
 			error = EOPNOTSUPP;
 #endif
 			break;
 
 		case SO_TS_CLOCK:
 			error = sooptcopyin(sopt, &optval, sizeof optval,
 			    sizeof optval);
 			if (error)
 				goto bad;
 			if (optval < 0 || optval > SO_TS_CLOCK_MAX) {
 				error = EINVAL;
 				goto bad;
 			}
 			so->so_ts_clock = optval;
 			break;
 
 		case SO_MAX_PACING_RATE:
 			error = sooptcopyin(sopt, &val32, sizeof(val32),
 			    sizeof(val32));
 			if (error)
 				goto bad;
 			so->so_max_pacing_rate = val32;
 			break;
 
 		default:
 			if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0)
 				error = hhook_run_socket(so, sopt,
 				    HHOOK_SOCKET_OPT);
 			else
 				error = ENOPROTOOPT;
 			break;
 		}
 		if (error == 0 && so->so_proto->pr_ctloutput != NULL)
 			(void)(*so->so_proto->pr_ctloutput)(so, sopt);
 	}
 bad:
 	CURVNET_RESTORE();
 	return (error);
 }
 
 /*
  * Helper routine for getsockopt.
  */
 int
 sooptcopyout(struct sockopt *sopt, const void *buf, size_t len)
 {
 	int	error;
 	size_t	valsize;
 
 	error = 0;
 
 	/*
 	 * Documented get behavior is that we always return a value, possibly
 	 * truncated to fit in the user's buffer.  Traditional behavior is
 	 * that we always tell the user precisely how much we copied, rather
 	 * than something useful like the total amount we had available for
 	 * her.  Note that this interface is not idempotent; the entire
 	 * answer must be generated ahead of time.
 	 */
 	valsize = min(len, sopt->sopt_valsize);
 	sopt->sopt_valsize = valsize;
 	if (sopt->sopt_val != NULL) {
 		if (sopt->sopt_td != NULL)
 			error = copyout(buf, sopt->sopt_val, valsize);
 		else
 			bcopy(buf, sopt->sopt_val, valsize);
 	}
 	return (error);
 }
 
 int
 sogetopt(struct socket *so, struct sockopt *sopt)
 {
 	int	error, optval;
 	struct	linger l;
 	struct	timeval tv;
 #ifdef MAC
 	struct mac extmac;
 #endif
 
 	CURVNET_SET(so->so_vnet);
 	error = 0;
 	if (sopt->sopt_level != SOL_SOCKET) {
 		if (so->so_proto->pr_ctloutput != NULL)
 			error = (*so->so_proto->pr_ctloutput)(so, sopt);
 		else
 			error = ENOPROTOOPT;
 		CURVNET_RESTORE();
 		return (error);
 	} else {
 		switch (sopt->sopt_name) {
 		case SO_ACCEPTFILTER:
 			error = accept_filt_getopt(so, sopt);
 			break;
 
 		case SO_LINGER:
 			SOCK_LOCK(so);
 			l.l_onoff = so->so_options & SO_LINGER;
 			l.l_linger = so->so_linger;
 			SOCK_UNLOCK(so);
 			error = sooptcopyout(sopt, &l, sizeof l);
 			break;
 
 		case SO_USELOOPBACK:
 		case SO_DONTROUTE:
 		case SO_DEBUG:
 		case SO_KEEPALIVE:
 		case SO_REUSEADDR:
 		case SO_REUSEPORT:
 		case SO_REUSEPORT_LB:
 		case SO_BROADCAST:
 		case SO_OOBINLINE:
 		case SO_ACCEPTCONN:
 		case SO_TIMESTAMP:
 		case SO_BINTIME:
 		case SO_NOSIGPIPE:
 		case SO_NO_DDP:
 		case SO_NO_OFFLOAD:
 			optval = so->so_options & sopt->sopt_name;
 integer:
 			error = sooptcopyout(sopt, &optval, sizeof optval);
 			break;
 
 		case SO_DOMAIN:
 			optval = so->so_proto->pr_domain->dom_family;
 			goto integer;
 
 		case SO_TYPE:
 			optval = so->so_type;
 			goto integer;
 
 		case SO_PROTOCOL:
 			optval = so->so_proto->pr_protocol;
 			goto integer;
 
 		case SO_ERROR:
 			SOCK_LOCK(so);
 			optval = so->so_error;
 			so->so_error = 0;
 			SOCK_UNLOCK(so);
 			goto integer;
 
 		case SO_SNDBUF:
 			optval = SOLISTENING(so) ? so->sol_sbsnd_hiwat :
 			    so->so_snd.sb_hiwat;
 			goto integer;
 
 		case SO_RCVBUF:
 			optval = SOLISTENING(so) ? so->sol_sbrcv_hiwat :
 			    so->so_rcv.sb_hiwat;
 			goto integer;
 
 		case SO_SNDLOWAT:
 			optval = SOLISTENING(so) ? so->sol_sbsnd_lowat :
 			    so->so_snd.sb_lowat;
 			goto integer;
 
 		case SO_RCVLOWAT:
 			optval = SOLISTENING(so) ? so->sol_sbrcv_lowat :
 			    so->so_rcv.sb_lowat;
 			goto integer;
 
 		case SO_SNDTIMEO:
 		case SO_RCVTIMEO:
 			tv = sbttotv(sopt->sopt_name == SO_SNDTIMEO ?
 			    so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
 #ifdef COMPAT_FREEBSD32
 			if (SV_CURPROC_FLAG(SV_ILP32)) {
 				struct timeval32 tv32;
 
 				CP(tv, tv32, tv_sec);
 				CP(tv, tv32, tv_usec);
 				error = sooptcopyout(sopt, &tv32, sizeof tv32);
 			} else
 #endif
 				error = sooptcopyout(sopt, &tv, sizeof tv);
 			break;
 
 		case SO_LABEL:
 #ifdef MAC
 			error = sooptcopyin(sopt, &extmac, sizeof(extmac),
 			    sizeof(extmac));
 			if (error)
 				goto bad;
 			error = mac_getsockopt_label(sopt->sopt_td->td_ucred,
 			    so, &extmac);
 			if (error)
 				goto bad;
 			error = sooptcopyout(sopt, &extmac, sizeof extmac);
 #else
 			error = EOPNOTSUPP;
 #endif
 			break;
 
 		case SO_PEERLABEL:
 #ifdef MAC
 			error = sooptcopyin(sopt, &extmac, sizeof(extmac),
 			    sizeof(extmac));
 			if (error)
 				goto bad;
 			error = mac_getsockopt_peerlabel(
 			    sopt->sopt_td->td_ucred, so, &extmac);
 			if (error)
 				goto bad;
 			error = sooptcopyout(sopt, &extmac, sizeof extmac);
 #else
 			error = EOPNOTSUPP;
 #endif
 			break;
 
 		case SO_LISTENQLIMIT:
 			optval = SOLISTENING(so) ? so->sol_qlimit : 0;
 			goto integer;
 
 		case SO_LISTENQLEN:
 			optval = SOLISTENING(so) ? so->sol_qlen : 0;
 			goto integer;
 
 		case SO_LISTENINCQLEN:
 			optval = SOLISTENING(so) ? so->sol_incqlen : 0;
 			goto integer;
 
 		case SO_TS_CLOCK:
 			optval = so->so_ts_clock;
 			goto integer;
 
 		case SO_MAX_PACING_RATE:
 			optval = so->so_max_pacing_rate;
 			goto integer;
 
 		default:
 			if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0)
 				error = hhook_run_socket(so, sopt,
 				    HHOOK_SOCKET_OPT);
 			else
 				error = ENOPROTOOPT;
 			break;
 		}
 	}
 #ifdef MAC
 bad:
 #endif
 	CURVNET_RESTORE();
 	return (error);
 }
 
 int
 soopt_getm(struct sockopt *sopt, struct mbuf **mp)
 {
 	struct mbuf *m, *m_prev;
 	int sopt_size = sopt->sopt_valsize;
 
 	MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
 	if (m == NULL)
 		return ENOBUFS;
 	if (sopt_size > MLEN) {
 		MCLGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT);
 		if ((m->m_flags & M_EXT) == 0) {
 			m_free(m);
 			return ENOBUFS;
 		}
 		m->m_len = min(MCLBYTES, sopt_size);
 	} else {
 		m->m_len = min(MLEN, sopt_size);
 	}
 	sopt_size -= m->m_len;
 	*mp = m;
 	m_prev = m;
 
 	while (sopt_size) {
 		MGET(m, sopt->sopt_td ? M_WAITOK : M_NOWAIT, MT_DATA);
 		if (m == NULL) {
 			m_freem(*mp);
 			return ENOBUFS;
 		}
 		if (sopt_size > MLEN) {
 			MCLGET(m, sopt->sopt_td != NULL ? M_WAITOK :
 			    M_NOWAIT);
 			if ((m->m_flags & M_EXT) == 0) {
 				m_freem(m);
 				m_freem(*mp);
 				return ENOBUFS;
 			}
 			m->m_len = min(MCLBYTES, sopt_size);
 		} else {
 			m->m_len = min(MLEN, sopt_size);
 		}
 		sopt_size -= m->m_len;
 		m_prev->m_next = m;
 		m_prev = m;
 	}
 	return (0);
 }
 
 int
 soopt_mcopyin(struct sockopt *sopt, struct mbuf *m)
 {
 	struct mbuf *m0 = m;
 
 	if (sopt->sopt_val == NULL)
 		return (0);
 	while (m != NULL && sopt->sopt_valsize >= m->m_len) {
 		if (sopt->sopt_td != NULL) {
 			int error;
 
 			error = copyin(sopt->sopt_val, mtod(m, char *),
 			    m->m_len);
 			if (error != 0) {
 				m_freem(m0);
 				return(error);
 			}
 		} else
 			bcopy(sopt->sopt_val, mtod(m, char *), m->m_len);
 		sopt->sopt_valsize -= m->m_len;
 		sopt->sopt_val = (char *)sopt->sopt_val + m->m_len;
 		m = m->m_next;
 	}
 	if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */
 		panic("ip6_sooptmcopyin");
 	return (0);
 }
 
 int
 soopt_mcopyout(struct sockopt *sopt, struct mbuf *m)
 {
 	struct mbuf *m0 = m;
 	size_t valsize = 0;
 
 	if (sopt->sopt_val == NULL)
 		return (0);
 	while (m != NULL && sopt->sopt_valsize >= m->m_len) {
 		if (sopt->sopt_td != NULL) {
 			int error;
 
 			error = copyout(mtod(m, char *), sopt->sopt_val,
 			    m->m_len);
 			if (error != 0) {
 				m_freem(m0);
 				return(error);
 			}
 		} else
 			bcopy(mtod(m, char *), sopt->sopt_val, m->m_len);
 		sopt->sopt_valsize -= m->m_len;
 		sopt->sopt_val = (char *)sopt->sopt_val + m->m_len;
 		valsize += m->m_len;
 		m = m->m_next;
 	}
 	if (m != NULL) {
 		/* enough soopt buffer should be given from user-land */
 		m_freem(m0);
 		return(EINVAL);
 	}
 	sopt->sopt_valsize = valsize;
 	return (0);
 }
 
 /*
  * sohasoutofband(): protocol notifies socket layer of the arrival of new
  * out-of-band data, which will then notify socket consumers.
  */
 void
 sohasoutofband(struct socket *so)
 {
 
 	if (so->so_sigio != NULL)
 		pgsigio(&so->so_sigio, SIGURG, 0);
 	selwakeuppri(&so->so_rdsel, PSOCK);
 }
 
 int
 sopoll(struct socket *so, int events, struct ucred *active_cred,
     struct thread *td)
 {
 
 	/*
 	 * We do not need to set or assert curvnet as long as everyone uses
 	 * sopoll_generic().
 	 */
 	return (so->so_proto->pr_usrreqs->pru_sopoll(so, events, active_cred,
 	    td));
 }
 
 int
 sopoll_generic(struct socket *so, int events, struct ucred *active_cred,
     struct thread *td)
 {
 	int revents;
 
 	SOCK_LOCK(so);
 	if (SOLISTENING(so)) {
 		if (!(events & (POLLIN | POLLRDNORM)))
 			revents = 0;
 		else if (!TAILQ_EMPTY(&so->sol_comp))
 			revents = events & (POLLIN | POLLRDNORM);
 		else if ((events & POLLINIGNEOF) == 0 && so->so_error)
 			revents = (events & (POLLIN | POLLRDNORM)) | POLLHUP;
 		else {
 			selrecord(td, &so->so_rdsel);
 			revents = 0;
 		}
 	} else {
 		revents = 0;
 		SOCKBUF_LOCK(&so->so_snd);
 		SOCKBUF_LOCK(&so->so_rcv);
 		if (events & (POLLIN | POLLRDNORM))
 			if (soreadabledata(so))
 				revents |= events & (POLLIN | POLLRDNORM);
 		if (events & (POLLOUT | POLLWRNORM))
 			if (sowriteable(so))
 				revents |= events & (POLLOUT | POLLWRNORM);
 		if (events & (POLLPRI | POLLRDBAND))
 			if (so->so_oobmark ||
 			    (so->so_rcv.sb_state & SBS_RCVATMARK))
 				revents |= events & (POLLPRI | POLLRDBAND);
 		if ((events & POLLINIGNEOF) == 0) {
 			if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 				revents |= events & (POLLIN | POLLRDNORM);
 				if (so->so_snd.sb_state & SBS_CANTSENDMORE)
 					revents |= POLLHUP;
 			}
 		}
 		if (revents == 0) {
 			if (events &
 			    (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
 				selrecord(td, &so->so_rdsel);
 				so->so_rcv.sb_flags |= SB_SEL;
 			}
 			if (events & (POLLOUT | POLLWRNORM)) {
 				selrecord(td, &so->so_wrsel);
 				so->so_snd.sb_flags |= SB_SEL;
 			}
 		}
 		SOCKBUF_UNLOCK(&so->so_rcv);
 		SOCKBUF_UNLOCK(&so->so_snd);
 	}
 	SOCK_UNLOCK(so);
 	return (revents);
 }
 
 int
 soo_kqfilter(struct file *fp, struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
 	struct sockbuf *sb;
 	struct knlist *knl;
 
 	switch (kn->kn_filter) {
 	case EVFILT_READ:
 		kn->kn_fop = &soread_filtops;
 		knl = &so->so_rdsel.si_note;
 		sb = &so->so_rcv;
 		break;
 	case EVFILT_WRITE:
 		kn->kn_fop = &sowrite_filtops;
 		knl = &so->so_wrsel.si_note;
 		sb = &so->so_snd;
 		break;
 	case EVFILT_EMPTY:
 		kn->kn_fop = &soempty_filtops;
 		knl = &so->so_wrsel.si_note;
 		sb = &so->so_snd;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	SOCK_LOCK(so);
 	if (SOLISTENING(so)) {
 		knlist_add(knl, kn, 1);
 	} else {
 		SOCKBUF_LOCK(sb);
 		knlist_add(knl, kn, 1);
 		sb->sb_flags |= SB_KNOTE;
 		SOCKBUF_UNLOCK(sb);
 	}
 	SOCK_UNLOCK(so);
 	return (0);
 }
 
 /*
  * Some routines that return EOPNOTSUPP for entry points that are not
  * supported by a protocol.  Fill in as needed.
  */
 int
 pru_accept_notsupp(struct socket *so, struct sockaddr **nam)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_aio_queue_notsupp(struct socket *so, struct kaiocb *job)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_attach_notsupp(struct socket *so, int proto, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_bind_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_bindat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
     struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_connect_notsupp(struct socket *so, struct sockaddr *nam, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_connectat_notsupp(int fd, struct socket *so, struct sockaddr *nam,
     struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_connect2_notsupp(struct socket *so1, struct socket *so2)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_control_notsupp(struct socket *so, u_long cmd, caddr_t data,
     struct ifnet *ifp, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_disconnect_notsupp(struct socket *so)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_listen_notsupp(struct socket *so, int backlog, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_peeraddr_notsupp(struct socket *so, struct sockaddr **nam)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_rcvd_notsupp(struct socket *so, int flags)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_rcvoob_notsupp(struct socket *so, struct mbuf *m, int flags)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_send_notsupp(struct socket *so, int flags, struct mbuf *m,
     struct sockaddr *addr, struct mbuf *control, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_ready_notsupp(struct socket *so, struct mbuf *m, int count)
 {
 
 	return (EOPNOTSUPP);
 }
 
 /*
  * This isn't really a ``null'' operation, but it's the default one and
  * doesn't do anything destructive.
  */
 int
 pru_sense_null(struct socket *so, struct stat *sb)
 {
 
 	sb->st_blksize = so->so_snd.sb_hiwat;
 	return 0;
 }
 
 int
 pru_shutdown_notsupp(struct socket *so)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_sockaddr_notsupp(struct socket *so, struct sockaddr **nam)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_sosend_notsupp(struct socket *so, struct sockaddr *addr, struct uio *uio,
     struct mbuf *top, struct mbuf *control, int flags, struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_soreceive_notsupp(struct socket *so, struct sockaddr **paddr,
     struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
 {
 
 	return EOPNOTSUPP;
 }
 
 int
 pru_sopoll_notsupp(struct socket *so, int events, struct ucred *cred,
     struct thread *td)
 {
 
 	return EOPNOTSUPP;
 }
 
 static void
 filt_sordetach(struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
 
 	so_rdknl_lock(so);
 	knlist_remove(&so->so_rdsel.si_note, kn, 1);
 	if (!SOLISTENING(so) && knlist_empty(&so->so_rdsel.si_note))
 		so->so_rcv.sb_flags &= ~SB_KNOTE;
 	so_rdknl_unlock(so);
 }
 
 /*ARGSUSED*/
 static int
 filt_soread(struct knote *kn, long hint)
 {
 	struct socket *so;
 
 	so = kn->kn_fp->f_data;
 
 	if (SOLISTENING(so)) {
 		SOCK_LOCK_ASSERT(so);
 		kn->kn_data = so->sol_qlen;
 		if (so->so_error) {
 			kn->kn_flags |= EV_EOF;
 			kn->kn_fflags = so->so_error;
 			return (1);
 		}
 		return (!TAILQ_EMPTY(&so->sol_comp));
 	}
 
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 
 	kn->kn_data = sbavail(&so->so_rcv) - so->so_rcv.sb_ctl;
 	if (so->so_rcv.sb_state & SBS_CANTRCVMORE) {
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = so->so_error;
 		return (1);
 	} else if (so->so_error)	/* temporary udp error */
 		return (1);
 
 	if (kn->kn_sfflags & NOTE_LOWAT) {
 		if (kn->kn_data >= kn->kn_sdata)
 			return (1);
 	} else if (sbavail(&so->so_rcv) >= so->so_rcv.sb_lowat)
 		return (1);
 
 	/* This hook returning non-zero indicates an event, not error */
 	return (hhook_run_socket(so, NULL, HHOOK_FILT_SOREAD));
 }
 
 static void
 filt_sowdetach(struct knote *kn)
 {
 	struct socket *so = kn->kn_fp->f_data;
 
 	so_wrknl_lock(so);
 	knlist_remove(&so->so_wrsel.si_note, kn, 1);
 	if (!SOLISTENING(so) && knlist_empty(&so->so_wrsel.si_note))
 		so->so_snd.sb_flags &= ~SB_KNOTE;
 	so_wrknl_unlock(so);
 }
 
 /*ARGSUSED*/
 static int
 filt_sowrite(struct knote *kn, long hint)
 {
 	struct socket *so;
 
 	so = kn->kn_fp->f_data;
 
 	if (SOLISTENING(so))
 		return (0);
 
 	SOCKBUF_LOCK_ASSERT(&so->so_snd);
 	kn->kn_data = sbspace(&so->so_snd);
 
 	hhook_run_socket(so, kn, HHOOK_FILT_SOWRITE);
 
 	if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
 		kn->kn_flags |= EV_EOF;
 		kn->kn_fflags = so->so_error;
 		return (1);
 	} else if (so->so_error)	/* temporary udp error */
 		return (1);
 	else if (((so->so_state & SS_ISCONNECTED) == 0) &&
 	    (so->so_proto->pr_flags & PR_CONNREQUIRED))
 		return (0);
 	else if (kn->kn_sfflags & NOTE_LOWAT)
 		return (kn->kn_data >= kn->kn_sdata);
 	else
 		return (kn->kn_data >= so->so_snd.sb_lowat);
 }
 
 static int
 filt_soempty(struct knote *kn, long hint)
 {
 	struct socket *so;
 
 	so = kn->kn_fp->f_data;
 
 	if (SOLISTENING(so))
 		return (1);
 
 	SOCKBUF_LOCK_ASSERT(&so->so_snd);
 	kn->kn_data = sbused(&so->so_snd);
 
 	if (kn->kn_data == 0)
 		return (1);
 	else
 		return (0);
 }
 
 int
 socheckuid(struct socket *so, uid_t uid)
 {
 
 	if (so == NULL)
 		return (EPERM);
 	if (so->so_cred->cr_uid != uid)
 		return (EPERM);
 	return (0);
 }
 
 /*
  * These functions are used by protocols to notify the socket layer (and its
  * consumers) of state changes in the sockets driven by protocol-side events.
  */
 
 /*
  * Procedures to manipulate state flags of socket and do appropriate wakeups.
  *
  * Normal sequence from the active (originating) side is that
  * soisconnecting() is called during processing of connect() call, resulting
  * in an eventual call to soisconnected() if/when the connection is
  * established.  When the connection is torn down soisdisconnecting() is
  * called during processing of disconnect() call, and soisdisconnected() is
  * called when the connection to the peer is totally severed.  The semantics
  * of these routines are such that connectionless protocols can call
  * soisconnected() and soisdisconnected() only, bypassing the in-progress
  * calls when setting up a ``connection'' takes no time.
  *
  * From the passive side, a socket is created with two queues of sockets:
  * so_incomp for connections in progress and so_comp for connections already
  * made and awaiting user acceptance.  As a protocol is preparing incoming
  * connections, it creates a socket structure queued on so_incomp by calling
  * sonewconn().  When the connection is established, soisconnected() is
  * called, and transfers the socket structure to so_comp, making it available
  * to accept().
  *
  * If a socket is closed with sockets on either so_incomp or so_comp, these
  * sockets are dropped.
  *
  * If higher-level protocols are implemented in the kernel, the wakeups done
  * here will sometimes cause software-interrupt process scheduling.
  */
 void
 soisconnecting(struct socket *so)
 {
 
 	SOCK_LOCK(so);
 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
 	so->so_state |= SS_ISCONNECTING;
 	SOCK_UNLOCK(so);
 }
 
 void
 soisconnected(struct socket *so)
 {
 
 	SOCK_LOCK(so);
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
 	so->so_state |= SS_ISCONNECTED;
 
 	if (so->so_qstate == SQ_INCOMP) {
 		struct socket *head = so->so_listen;
 		int ret;
 
 		KASSERT(head, ("%s: so %p on incomp of NULL", __func__, so));
 		/*
 		 * Promoting a socket from incomplete queue to complete, we
 		 * need to go through reverse order of locking.  We first do
 		 * trylock, and if that doesn't succeed, we go the hard way
 		 * leaving a reference and rechecking consistency after proper
 		 * locking.
 		 */
 		if (__predict_false(SOLISTEN_TRYLOCK(head) == 0)) {
 			soref(head);
 			SOCK_UNLOCK(so);
 			SOLISTEN_LOCK(head);
 			SOCK_LOCK(so);
 			if (__predict_false(head != so->so_listen)) {
 				/*
 				 * The socket went off the listen queue,
 				 * should be lost race to close(2) of sol.
 				 * The socket is about to soabort().
 				 */
 				SOCK_UNLOCK(so);
 				sorele(head);
 				return;
 			}
 			/* Not the last one, as so holds a ref. */
 			refcount_release(&head->so_count);
 		}
 again:
 		if ((so->so_options & SO_ACCEPTFILTER) == 0) {
 			TAILQ_REMOVE(&head->sol_incomp, so, so_list);
 			head->sol_incqlen--;
 			TAILQ_INSERT_TAIL(&head->sol_comp, so, so_list);
 			head->sol_qlen++;
 			so->so_qstate = SQ_COMP;
 			SOCK_UNLOCK(so);
 			solisten_wakeup(head);	/* unlocks */
 		} else {
 			SOCKBUF_LOCK(&so->so_rcv);
 			soupcall_set(so, SO_RCV,
 			    head->sol_accept_filter->accf_callback,
 			    head->sol_accept_filter_arg);
 			so->so_options &= ~SO_ACCEPTFILTER;
 			ret = head->sol_accept_filter->accf_callback(so,
 			    head->sol_accept_filter_arg, M_NOWAIT);
 			if (ret == SU_ISCONNECTED) {
 				soupcall_clear(so, SO_RCV);
 				SOCKBUF_UNLOCK(&so->so_rcv);
 				goto again;
 			}
 			SOCKBUF_UNLOCK(&so->so_rcv);
 			SOCK_UNLOCK(so);
 			SOLISTEN_UNLOCK(head);
 		}
 		return;
 	}
 	SOCK_UNLOCK(so);
 	wakeup(&so->so_timeo);
 	sorwakeup(so);
 	sowwakeup(so);
 }
 
 void
 soisdisconnecting(struct socket *so)
 {
 
 	SOCK_LOCK(so);
 	so->so_state &= ~SS_ISCONNECTING;
 	so->so_state |= SS_ISDISCONNECTING;
 
 	if (!SOLISTENING(so)) {
 		SOCKBUF_LOCK(&so->so_rcv);
 		socantrcvmore_locked(so);
 		SOCKBUF_LOCK(&so->so_snd);
 		socantsendmore_locked(so);
 	}
 	SOCK_UNLOCK(so);
 	wakeup(&so->so_timeo);
 }
 
 void
 soisdisconnected(struct socket *so)
 {
 
 	SOCK_LOCK(so);
 
 	/*
 	 * There is at least one reader of so_state that does not
 	 * acquire socket lock, namely soreceive_generic().  Ensure
 	 * that it never sees all flags that track connection status
 	 * cleared, by ordering the update with a barrier semantic of
 	 * our release thread fence.
 	 */
 	so->so_state |= SS_ISDISCONNECTED;
 	atomic_thread_fence_rel();
 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
 
 	if (!SOLISTENING(so)) {
 		SOCK_UNLOCK(so);
 		SOCKBUF_LOCK(&so->so_rcv);
 		socantrcvmore_locked(so);
 		SOCKBUF_LOCK(&so->so_snd);
 		sbdrop_locked(&so->so_snd, sbused(&so->so_snd));
 		socantsendmore_locked(so);
 	} else
 		SOCK_UNLOCK(so);
 	wakeup(&so->so_timeo);
 }
 
 /*
  * Make a copy of a sockaddr in a malloced buffer of type M_SONAME.
  */
 struct sockaddr *
 sodupsockaddr(const struct sockaddr *sa, int mflags)
 {
 	struct sockaddr *sa2;
 
 	sa2 = malloc(sa->sa_len, M_SONAME, mflags);
 	if (sa2)
 		bcopy(sa, sa2, sa->sa_len);
 	return sa2;
 }
 
 /*
  * Register per-socket destructor.
  */
 void
 sodtor_set(struct socket *so, so_dtor_t *func)
 {
 
 	SOCK_LOCK_ASSERT(so);
 	so->so_dtor = func;
 }
 
 /*
  * Register per-socket buffer upcalls.
  */
 void
 soupcall_set(struct socket *so, int which, so_upcall_t func, void *arg)
 {
 	struct sockbuf *sb;
 
 	KASSERT(!SOLISTENING(so), ("%s: so %p listening", __func__, so));
 
 	switch (which) {
 	case SO_RCV:
 		sb = &so->so_rcv;
 		break;
 	case SO_SND:
 		sb = &so->so_snd;
 		break;
 	default:
 		panic("soupcall_set: bad which");
 	}
 	SOCKBUF_LOCK_ASSERT(sb);
 	sb->sb_upcall = func;
 	sb->sb_upcallarg = arg;
 	sb->sb_flags |= SB_UPCALL;
 }
 
 void
 soupcall_clear(struct socket *so, int which)
 {
 	struct sockbuf *sb;
 
 	KASSERT(!SOLISTENING(so), ("%s: so %p listening", __func__, so));
 
 	switch (which) {
 	case SO_RCV:
 		sb = &so->so_rcv;
 		break;
 	case SO_SND:
 		sb = &so->so_snd;
 		break;
 	default:
 		panic("soupcall_clear: bad which");
 	}
 	SOCKBUF_LOCK_ASSERT(sb);
 	KASSERT(sb->sb_upcall != NULL,
 	    ("%s: so %p no upcall to clear", __func__, so));
 	sb->sb_upcall = NULL;
 	sb->sb_upcallarg = NULL;
 	sb->sb_flags &= ~SB_UPCALL;
 }
 
 void
 solisten_upcall_set(struct socket *so, so_upcall_t func, void *arg)
 {
 
 	SOLISTEN_LOCK_ASSERT(so);
 	so->sol_upcall = func;
 	so->sol_upcallarg = arg;
 }
 
 static void
 so_rdknl_lock(void *arg)
 {
 	struct socket *so = arg;
 
 	if (SOLISTENING(so))
 		SOCK_LOCK(so);
 	else
 		SOCKBUF_LOCK(&so->so_rcv);
 }
 
 static void
 so_rdknl_unlock(void *arg)
 {
 	struct socket *so = arg;
 
 	if (SOLISTENING(so))
 		SOCK_UNLOCK(so);
 	else
 		SOCKBUF_UNLOCK(&so->so_rcv);
 }
 
 static void
 so_rdknl_assert_lock(void *arg, int what)
 {
 	struct socket *so = arg;
 
 	if (what == LA_LOCKED) {
 		if (SOLISTENING(so))
 			SOCK_LOCK_ASSERT(so);
 		else
 			SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	} else {
 		if (SOLISTENING(so))
 			SOCK_UNLOCK_ASSERT(so);
 		else
 			SOCKBUF_UNLOCK_ASSERT(&so->so_rcv);
 	}
 }
 
 static void
 so_wrknl_lock(void *arg)
 {
 	struct socket *so = arg;
 
 	if (SOLISTENING(so))
 		SOCK_LOCK(so);
 	else
 		SOCKBUF_LOCK(&so->so_snd);
 }
 
 static void
 so_wrknl_unlock(void *arg)
 {
 	struct socket *so = arg;
 
 	if (SOLISTENING(so))
 		SOCK_UNLOCK(so);
 	else
 		SOCKBUF_UNLOCK(&so->so_snd);
 }
 
 static void
 so_wrknl_assert_lock(void *arg, int what)
 {
 	struct socket *so = arg;
 
 	if (what == LA_LOCKED) {
 		if (SOLISTENING(so))
 			SOCK_LOCK_ASSERT(so);
 		else
 			SOCKBUF_LOCK_ASSERT(&so->so_snd);
 	} else {
 		if (SOLISTENING(so))
 			SOCK_UNLOCK_ASSERT(so);
 		else
 			SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
 	}
 }
 
 /*
  * Create an external-format (``xsocket'') structure using the information in
  * the kernel-format socket structure pointed to by so.  This is done to
  * reduce the spew of irrelevant information over this interface, to isolate
  * user code from changes in the kernel structure, and potentially to provide
  * information-hiding if we decide that some of this information should be
  * hidden from users.
  */
 void
 sotoxsocket(struct socket *so, struct xsocket *xso)
 {
 
 	bzero(xso, sizeof(*xso));
 	xso->xso_len = sizeof *xso;
 	xso->xso_so = (uintptr_t)so;
 	xso->so_type = so->so_type;
 	xso->so_options = so->so_options;
 	xso->so_linger = so->so_linger;
 	xso->so_state = so->so_state;
 	xso->so_pcb = (uintptr_t)so->so_pcb;
 	xso->xso_protocol = so->so_proto->pr_protocol;
 	xso->xso_family = so->so_proto->pr_domain->dom_family;
 	xso->so_timeo = so->so_timeo;
 	xso->so_error = so->so_error;
 	xso->so_uid = so->so_cred->cr_uid;
 	xso->so_pgid = so->so_sigio ? so->so_sigio->sio_pgid : 0;
 	if (SOLISTENING(so)) {
 		xso->so_qlen = so->sol_qlen;
 		xso->so_incqlen = so->sol_incqlen;
 		xso->so_qlimit = so->sol_qlimit;
 		xso->so_oobmark = 0;
 	} else {
 		xso->so_state |= so->so_qstate;
 		xso->so_qlen = xso->so_incqlen = xso->so_qlimit = 0;
 		xso->so_oobmark = so->so_oobmark;
 		sbtoxsockbuf(&so->so_snd, &xso->so_snd);
 		sbtoxsockbuf(&so->so_rcv, &xso->so_rcv);
 	}
 }
 
 struct sockbuf *
 so_sockbuf_rcv(struct socket *so)
 {
 
 	return (&so->so_rcv);
 }
 
 struct sockbuf *
 so_sockbuf_snd(struct socket *so)
 {
 
 	return (&so->so_snd);
 }
 
 int
 so_state_get(const struct socket *so)
 {
 
 	return (so->so_state);
 }
 
 void
 so_state_set(struct socket *so, int val)
 {
 
 	so->so_state = val;
 }
 
 int
 so_options_get(const struct socket *so)
 {
 
 	return (so->so_options);
 }
 
 void
 so_options_set(struct socket *so, int val)
 {
 
 	so->so_options = val;
 }
 
 int
 so_error_get(const struct socket *so)
 {
 
 	return (so->so_error);
 }
 
 void
 so_error_set(struct socket *so, int val)
 {
 
 	so->so_error = val;
 }
 
 int
 so_linger_get(const struct socket *so)
 {
 
 	return (so->so_linger);
 }
 
 void
 so_linger_set(struct socket *so, int val)
 {
 
 	KASSERT(val >= 0 && val <= USHRT_MAX && val <= (INT_MAX / hz),
 	    ("%s: val %d out of range", __func__, val));
 
 	so->so_linger = val;
 }
 
 struct protosw *
 so_protosw_get(const struct socket *so)
 {
 
 	return (so->so_proto);
 }
 
 void
 so_protosw_set(struct socket *so, struct protosw *val)
 {
 
 	so->so_proto = val;
 }
 
 void
 so_sorwakeup(struct socket *so)
 {
 
 	sorwakeup(so);
 }
 
 void
 so_sowwakeup(struct socket *so)
 {
 
 	sowwakeup(so);
 }
 
 void
 so_sorwakeup_locked(struct socket *so)
 {
 
 	sorwakeup_locked(so);
 }
 
 void
 so_sowwakeup_locked(struct socket *so)
 {
 
 	sowwakeup_locked(so);
 }
 
 void
 so_lock(struct socket *so)
 {
 
 	SOCK_LOCK(so);
 }
 
 void
 so_unlock(struct socket *so)
 {
 
 	SOCK_UNLOCK(so);
 }
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index b9c2630561cb..23fae343924a 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -1,1630 +1,1632 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1989, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/capsicum.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/sysproto.h>
 #include <sys/malloc.h>
 #include <sys/filedesc.h>
 #include <sys/proc.h>
 #include <sys/filio.h>
 #include <sys/jail.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/rwlock.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysent.h>
 #include <sys/uio.h>
 #include <sys/un.h>
 #include <sys/unpcb.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 #ifdef COMPAT_FREEBSD32
 #include <compat/freebsd32/freebsd32_util.h>
 #endif
 
 #include <net/vnet.h>
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
 
 static int accept1(struct thread *td, int s, struct sockaddr *uname,
 		   socklen_t *anamelen, int flags);
 static int getsockname1(struct thread *td, struct getsockname_args *uap,
 			int compat);
 static int getpeername1(struct thread *td, struct getpeername_args *uap,
 			int compat);
 static int sockargs(struct mbuf **, char *, socklen_t, int);
 
 /*
  * Convert a user file descriptor to a kernel file entry and check if required
  * capability rights are present.
  * If required copy of current set of capability rights is returned.
  * A reference on the file entry is held upon returning.
  */
 int
 getsock_cap(struct thread *td, int fd, cap_rights_t *rightsp,
     struct file **fpp, u_int *fflagp, struct filecaps *havecapsp)
 {
 	struct file *fp;
 	int error;
 
 	error = fget_cap(td, fd, rightsp, &fp, havecapsp);
 	if (error != 0)
 		return (error);
 	if (fp->f_type != DTYPE_SOCKET) {
 		fdrop(fp, td);
 		if (havecapsp != NULL)
 			filecaps_free(havecapsp);
 		return (ENOTSOCK);
 	}
 	if (fflagp != NULL)
 		*fflagp = fp->f_flag;
 	*fpp = fp;
 	return (0);
 }
 
 /*
  * System call interface to the socket abstraction.
  */
 #if defined(COMPAT_43)
 #define COMPAT_OLDSOCK
 #endif
 
 int
 sys_socket(struct thread *td, struct socket_args *uap)
 {
 
 	return (kern_socket(td, uap->domain, uap->type, uap->protocol));
 }
 
 int
 kern_socket(struct thread *td, int domain, int type, int protocol)
 {
 	struct socket *so;
 	struct file *fp;
 	int fd, error, oflag, fflag;
 
 	AUDIT_ARG_SOCKET(domain, type, protocol);
 
 	oflag = 0;
 	fflag = 0;
 	if ((type & SOCK_CLOEXEC) != 0) {
 		type &= ~SOCK_CLOEXEC;
 		oflag |= O_CLOEXEC;
 	}
 	if ((type & SOCK_NONBLOCK) != 0) {
 		type &= ~SOCK_NONBLOCK;
 		fflag |= FNONBLOCK;
 	}
 
 #ifdef MAC
 	error = mac_socket_check_create(td->td_ucred, domain, type, protocol);
 	if (error != 0)
 		return (error);
 #endif
 	error = falloc(td, &fp, &fd, oflag);
 	if (error != 0)
 		return (error);
 	/* An extra reference on `fp' has been held for us by falloc(). */
 	error = socreate(domain, &so, type, protocol, td->td_ucred, td);
 	if (error != 0) {
 		fdclose(td, fp, fd);
 	} else {
 		finit(fp, FREAD | FWRITE | fflag, DTYPE_SOCKET, so, &socketops);
 		if ((fflag & FNONBLOCK) != 0)
 			(void) fo_ioctl(fp, FIONBIO, &fflag, td->td_ucred, td);
 		td->td_retval[0] = fd;
 	}
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_bind(struct thread *td, struct bind_args *uap)
 {
 	struct sockaddr *sa;
 	int error;
 
 	error = getsockaddr(&sa, uap->name, uap->namelen);
 	if (error == 0) {
 		error = kern_bindat(td, AT_FDCWD, uap->s, sa);
 		free(sa, M_SONAME);
 	}
 	return (error);
 }
 
 int
 kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa)
 {
 	struct socket *so;
 	struct file *fp;
 	int error;
 
 #ifdef CAPABILITY_MODE
 	if (IN_CAPABILITY_MODE(td) && (dirfd == AT_FDCWD))
 		return (ECAPMODE);
 #endif
 
 	AUDIT_ARG_FD(fd);
 	AUDIT_ARG_SOCKADDR(td, dirfd, sa);
 	error = getsock_cap(td, fd, &cap_bind_rights,
 	    &fp, NULL, NULL);
 	if (error != 0)
 		return (error);
 	so = fp->f_data;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(sa);
 #endif
 #ifdef MAC
 	error = mac_socket_check_bind(td->td_ucred, so, sa);
 	if (error == 0) {
 #endif
 		if (dirfd == AT_FDCWD)
 			error = sobind(so, sa, td);
 		else
 			error = sobindat(dirfd, so, sa, td);
 #ifdef MAC
 	}
 #endif
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_bindat(struct thread *td, struct bindat_args *uap)
 {
 	struct sockaddr *sa;
 	int error;
 
 	error = getsockaddr(&sa, uap->name, uap->namelen);
 	if (error == 0) {
 		error = kern_bindat(td, uap->fd, uap->s, sa);
 		free(sa, M_SONAME);
 	}
 	return (error);
 }
 
 int
 sys_listen(struct thread *td, struct listen_args *uap)
 {
 
 	return (kern_listen(td, uap->s, uap->backlog));
 }
 
 int
 kern_listen(struct thread *td, int s, int backlog)
 {
 	struct socket *so;
 	struct file *fp;
 	int error;
 
 	AUDIT_ARG_FD(s);
 	error = getsock_cap(td, s, &cap_listen_rights,
 	    &fp, NULL, NULL);
 	if (error == 0) {
 		so = fp->f_data;
 #ifdef MAC
 		error = mac_socket_check_listen(td->td_ucred, so);
 		if (error == 0)
 #endif
 			error = solisten(so, backlog, td);
 		fdrop(fp, td);
 	}
 	return (error);
 }
 
 /*
  * accept1()
  */
 static int
 accept1(td, s, uname, anamelen, flags)
 	struct thread *td;
 	int s;
 	struct sockaddr *uname;
 	socklen_t *anamelen;
 	int flags;
 {
 	struct sockaddr *name;
 	socklen_t namelen;
 	struct file *fp;
 	int error;
 
 	if (uname == NULL)
 		return (kern_accept4(td, s, NULL, NULL, flags, NULL));
 
 	error = copyin(anamelen, &namelen, sizeof (namelen));
 	if (error != 0)
 		return (error);
 
 	error = kern_accept4(td, s, &name, &namelen, flags, &fp);
 
 	if (error != 0)
 		return (error);
 
 	if (error == 0 && uname != NULL) {
 #ifdef COMPAT_OLDSOCK
 		if (SV_PROC_FLAG(td->td_proc, SV_AOUT) &&
 		    (flags & ACCEPT4_COMPAT) != 0)
 			((struct osockaddr *)name)->sa_family =
 			    name->sa_family;
 #endif
 		error = copyout(name, uname, namelen);
 	}
 	if (error == 0)
 		error = copyout(&namelen, anamelen,
 		    sizeof(namelen));
 	if (error != 0)
 		fdclose(td, fp, td->td_retval[0]);
 	fdrop(fp, td);
 	free(name, M_SONAME);
 	return (error);
 }
 
 int
 kern_accept(struct thread *td, int s, struct sockaddr **name,
     socklen_t *namelen, struct file **fp)
 {
 	return (kern_accept4(td, s, name, namelen, ACCEPT4_INHERIT, fp));
 }
 
 int
 kern_accept4(struct thread *td, int s, struct sockaddr **name,
     socklen_t *namelen, int flags, struct file **fp)
 {
 	struct file *headfp, *nfp = NULL;
 	struct sockaddr *sa = NULL;
 	struct socket *head, *so;
 	struct filecaps fcaps;
 	u_int fflag;
 	pid_t pgid;
 	int error, fd, tmp;
 
 	if (name != NULL)
 		*name = NULL;
 
 	AUDIT_ARG_FD(s);
 	error = getsock_cap(td, s, &cap_accept_rights,
 	    &headfp, &fflag, &fcaps);
 	if (error != 0)
 		return (error);
 	head = headfp->f_data;
 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
 		error = EINVAL;
 		goto done;
 	}
 #ifdef MAC
 	error = mac_socket_check_accept(td->td_ucred, head);
 	if (error != 0)
 		goto done;
 #endif
 	error = falloc_caps(td, &nfp, &fd,
 	    (flags & SOCK_CLOEXEC) ? O_CLOEXEC : 0, &fcaps);
 	if (error != 0)
 		goto done;
 	SOCK_LOCK(head);
 	if (!SOLISTENING(head)) {
 		SOCK_UNLOCK(head);
 		error = EINVAL;
 		goto noconnection;
 	}
 
 	error = solisten_dequeue(head, &so, flags);
 	if (error != 0)
 		goto noconnection;
 
 	/* An extra reference on `nfp' has been held for us by falloc(). */
 	td->td_retval[0] = fd;
 
 	/* Connection has been removed from the listen queue. */
 	KNOTE_UNLOCKED(&head->so_rdsel.si_note, 0);
 
 	if (flags & ACCEPT4_INHERIT) {
 		pgid = fgetown(&head->so_sigio);
 		if (pgid != 0)
 			fsetown(pgid, &so->so_sigio);
 	} else {
 		fflag &= ~(FNONBLOCK | FASYNC);
 		if (flags & SOCK_NONBLOCK)
 			fflag |= FNONBLOCK;
 	}
 
 	finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
 	/* Sync socket nonblocking/async state with file flags */
 	tmp = fflag & FNONBLOCK;
 	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
 	tmp = fflag & FASYNC;
 	(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
 	error = soaccept(so, &sa);
 	if (error != 0)
 		goto noconnection;
 	if (sa == NULL) {
 		if (name)
 			*namelen = 0;
 		goto done;
 	}
 	AUDIT_ARG_SOCKADDR(td, AT_FDCWD, sa);
 	if (name) {
 		/* check sa_len before it is destroyed */
 		if (*namelen > sa->sa_len)
 			*namelen = sa->sa_len;
 #ifdef KTRACE
 		if (KTRPOINT(td, KTR_STRUCT))
 			ktrsockaddr(sa);
 #endif
 		*name = sa;
 		sa = NULL;
 	}
 noconnection:
 	free(sa, M_SONAME);
 
 	/*
 	 * close the new descriptor, assuming someone hasn't ripped it
 	 * out from under us.
 	 */
 	if (error != 0)
 		fdclose(td, nfp, fd);
 
 	/*
 	 * Release explicitly held references before returning.  We return
 	 * a reference on nfp to the caller on success if they request it.
 	 */
 done:
 	if (nfp == NULL)
 		filecaps_free(&fcaps);
 	if (fp != NULL) {
 		if (error == 0) {
 			*fp = nfp;
 			nfp = NULL;
 		} else
 			*fp = NULL;
 	}
 	if (nfp != NULL)
 		fdrop(nfp, td);
 	fdrop(headfp, td);
 	return (error);
 }
 
 int
 sys_accept(td, uap)
 	struct thread *td;
 	struct accept_args *uap;
 {
 
 	return (accept1(td, uap->s, uap->name, uap->anamelen, ACCEPT4_INHERIT));
 }
 
 int
 sys_accept4(td, uap)
 	struct thread *td;
 	struct accept4_args *uap;
 {
 
 	if (uap->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
 		return (EINVAL);
 
 	return (accept1(td, uap->s, uap->name, uap->anamelen, uap->flags));
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 oaccept(struct thread *td, struct oaccept_args *uap)
 {
 
 	return (accept1(td, uap->s, uap->name, uap->anamelen,
 	    ACCEPT4_INHERIT | ACCEPT4_COMPAT));
 }
 #endif /* COMPAT_OLDSOCK */
 
 int
 sys_connect(struct thread *td, struct connect_args *uap)
 {
 	struct sockaddr *sa;
 	int error;
 
 	error = getsockaddr(&sa, uap->name, uap->namelen);
 	if (error == 0) {
 		error = kern_connectat(td, AT_FDCWD, uap->s, sa);
 		free(sa, M_SONAME);
 	}
 	return (error);
 }
 
 int
 kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa)
 {
 	struct socket *so;
 	struct file *fp;
 	int error, interrupted = 0;
 
 #ifdef CAPABILITY_MODE
 	if (IN_CAPABILITY_MODE(td) && (dirfd == AT_FDCWD))
 		return (ECAPMODE);
 #endif
 
 	AUDIT_ARG_FD(fd);
 	AUDIT_ARG_SOCKADDR(td, dirfd, sa);
 	error = getsock_cap(td, fd, &cap_connect_rights,
 	    &fp, NULL, NULL);
 	if (error != 0)
 		return (error);
 	so = fp->f_data;
 	if (so->so_state & SS_ISCONNECTING) {
 		error = EALREADY;
 		goto done1;
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(sa);
 #endif
 #ifdef MAC
 	error = mac_socket_check_connect(td->td_ucred, so, sa);
 	if (error != 0)
 		goto bad;
 #endif
 	if (dirfd == AT_FDCWD)
 		error = soconnect(so, sa, td);
 	else
 		error = soconnectat(dirfd, so, sa, td);
 	if (error != 0)
 		goto bad;
 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
 		error = EINPROGRESS;
 		goto done1;
 	}
 	SOCK_LOCK(so);
 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
 		error = msleep(&so->so_timeo, &so->so_lock, PSOCK | PCATCH,
 		    "connec", 0);
 		if (error != 0) {
 			if (error == EINTR || error == ERESTART)
 				interrupted = 1;
 			break;
 		}
 	}
 	if (error == 0) {
 		error = so->so_error;
 		so->so_error = 0;
 	}
 	SOCK_UNLOCK(so);
 bad:
 	if (!interrupted)
 		so->so_state &= ~SS_ISCONNECTING;
 	if (error == ERESTART)
 		error = EINTR;
 done1:
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_connectat(struct thread *td, struct connectat_args *uap)
 {
 	struct sockaddr *sa;
 	int error;
 
 	error = getsockaddr(&sa, uap->name, uap->namelen);
 	if (error == 0) {
 		error = kern_connectat(td, uap->fd, uap->s, sa);
 		free(sa, M_SONAME);
 	}
 	return (error);
 }
 
 int
 kern_socketpair(struct thread *td, int domain, int type, int protocol,
     int *rsv)
 {
 	struct file *fp1, *fp2;
 	struct socket *so1, *so2;
 	int fd, error, oflag, fflag;
 
 	AUDIT_ARG_SOCKET(domain, type, protocol);
 
 	oflag = 0;
 	fflag = 0;
 	if ((type & SOCK_CLOEXEC) != 0) {
 		type &= ~SOCK_CLOEXEC;
 		oflag |= O_CLOEXEC;
 	}
 	if ((type & SOCK_NONBLOCK) != 0) {
 		type &= ~SOCK_NONBLOCK;
 		fflag |= FNONBLOCK;
 	}
 #ifdef MAC
 	/* We might want to have a separate check for socket pairs. */
 	error = mac_socket_check_create(td->td_ucred, domain, type,
 	    protocol);
 	if (error != 0)
 		return (error);
 #endif
 	error = socreate(domain, &so1, type, protocol, td->td_ucred, td);
 	if (error != 0)
 		return (error);
 	error = socreate(domain, &so2, type, protocol, td->td_ucred, td);
 	if (error != 0)
 		goto free1;
 	/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
 	error = falloc(td, &fp1, &fd, oflag);
 	if (error != 0)
 		goto free2;
 	rsv[0] = fd;
 	fp1->f_data = so1;	/* so1 already has ref count */
 	error = falloc(td, &fp2, &fd, oflag);
 	if (error != 0)
 		goto free3;
 	fp2->f_data = so2;	/* so2 already has ref count */
 	rsv[1] = fd;
 	error = soconnect2(so1, so2);
 	if (error != 0)
 		goto free4;
 	if (type == SOCK_DGRAM) {
 		/*
 		 * Datagram socket connection is asymmetric.
 		 */
 		 error = soconnect2(so2, so1);
 		 if (error != 0)
 			goto free4;
 	} else if (so1->so_proto->pr_flags & PR_CONNREQUIRED) {
 		struct unpcb *unp, *unp2;
 		unp = sotounpcb(so1);
 		unp2 = sotounpcb(so2);
 		/* 
 		 * No need to lock the unps, because the sockets are brand-new.
 		 * No other threads can be using them yet
 		 */
 		unp_copy_peercred(td, unp, unp2, unp);
 	}
 	finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data,
 	    &socketops);
 	finit(fp2, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp2->f_data,
 	    &socketops);
 	if ((fflag & FNONBLOCK) != 0) {
 		(void) fo_ioctl(fp1, FIONBIO, &fflag, td->td_ucred, td);
 		(void) fo_ioctl(fp2, FIONBIO, &fflag, td->td_ucred, td);
 	}
 	fdrop(fp1, td);
 	fdrop(fp2, td);
 	return (0);
 free4:
 	fdclose(td, fp2, rsv[1]);
 	fdrop(fp2, td);
 free3:
 	fdclose(td, fp1, rsv[0]);
 	fdrop(fp1, td);
 free2:
 	if (so2 != NULL)
 		(void)soclose(so2);
 free1:
 	if (so1 != NULL)
 		(void)soclose(so1);
 	return (error);
 }
 
 int
 sys_socketpair(struct thread *td, struct socketpair_args *uap)
 {
 	int error, sv[2];
 
 	error = kern_socketpair(td, uap->domain, uap->type,
 	    uap->protocol, sv);
 	if (error != 0)
 		return (error);
 	error = copyout(sv, uap->rsv, 2 * sizeof(int));
 	if (error != 0) {
 		(void)kern_close(td, sv[0]);
 		(void)kern_close(td, sv[1]);
 	}
 	return (error);
 }
 
 static int
 sendit(struct thread *td, int s, struct msghdr *mp, int flags)
 {
 	struct mbuf *control;
 	struct sockaddr *to;
 	int error;
 
 #ifdef CAPABILITY_MODE
 	if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL))
 		return (ECAPMODE);
 #endif
 
 	if (mp->msg_name != NULL) {
 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
 		if (error != 0) {
 			to = NULL;
 			goto bad;
 		}
 		mp->msg_name = to;
 	} else {
 		to = NULL;
 	}
 
 	if (mp->msg_control) {
 		if (mp->msg_controllen < sizeof(struct cmsghdr)
 #ifdef COMPAT_OLDSOCK
 		    && (mp->msg_flags != MSG_COMPAT ||
 		    !SV_PROC_FLAG(td->td_proc, SV_AOUT))
 #endif
 		) {
 			error = EINVAL;
 			goto bad;
 		}
 		error = sockargs(&control, mp->msg_control,
 		    mp->msg_controllen, MT_CONTROL);
 		if (error != 0)
 			goto bad;
 #ifdef COMPAT_OLDSOCK
 		if (mp->msg_flags == MSG_COMPAT &&
 		    SV_PROC_FLAG(td->td_proc, SV_AOUT)) {
 			struct cmsghdr *cm;
 
 			M_PREPEND(control, sizeof(*cm), M_WAITOK);
 			cm = mtod(control, struct cmsghdr *);
 			cm->cmsg_len = control->m_len;
 			cm->cmsg_level = SOL_SOCKET;
 			cm->cmsg_type = SCM_RIGHTS;
 		}
 #endif
 	} else {
 		control = NULL;
 	}
 
 	error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
 
 bad:
 	free(to, M_SONAME);
 	return (error);
 }
 
 int
 kern_sendit(struct thread *td, int s, struct msghdr *mp, int flags,
     struct mbuf *control, enum uio_seg segflg)
 {
 	struct file *fp;
 	struct uio auio;
 	struct iovec *iov;
 	struct socket *so;
 	cap_rights_t *rights;
 #ifdef KTRACE
 	struct uio *ktruio = NULL;
 #endif
 	ssize_t len;
 	int i, error;
 
 	AUDIT_ARG_FD(s);
 	rights = &cap_send_rights;
 	if (mp->msg_name != NULL) {
 		AUDIT_ARG_SOCKADDR(td, AT_FDCWD, mp->msg_name);
 		rights = &cap_send_connect_rights;
 	}
 	error = getsock_cap(td, s, rights, &fp, NULL, NULL);
 	if (error != 0) {
 		m_freem(control);
 		return (error);
 	}
 	so = (struct socket *)fp->f_data;
 
 #ifdef KTRACE
 	if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(mp->msg_name);
 #endif
 #ifdef MAC
 	if (mp->msg_name != NULL) {
 		error = mac_socket_check_connect(td->td_ucred, so,
 		    mp->msg_name);
 		if (error != 0) {
 			m_freem(control);
 			goto bad;
 		}
 	}
 	error = mac_socket_check_send(td->td_ucred, so);
 	if (error != 0) {
 		m_freem(control);
 		goto bad;
 	}
 #endif
 
 	auio.uio_iov = mp->msg_iov;
 	auio.uio_iovcnt = mp->msg_iovlen;
 	auio.uio_segflg = segflg;
 	auio.uio_rw = UIO_WRITE;
 	auio.uio_td = td;
 	auio.uio_offset = 0;			/* XXX */
 	auio.uio_resid = 0;
 	iov = mp->msg_iov;
 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
 		if ((auio.uio_resid += iov->iov_len) < 0) {
 			error = EINVAL;
 			m_freem(control);
 			goto bad;
 		}
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_GENIO))
 		ktruio = cloneuio(&auio);
 #endif
 	len = auio.uio_resid;
 	error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
 	if (error != 0) {
 		if (auio.uio_resid != len && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 		/* Generation of SIGPIPE can be controlled per socket */
 		if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
 		    !(flags & MSG_NOSIGNAL)) {
 			PROC_LOCK(td->td_proc);
 			tdsignal(td, SIGPIPE);
 			PROC_UNLOCK(td->td_proc);
 		}
 	}
 	if (error == 0)
 		td->td_retval[0] = len - auio.uio_resid;
 #ifdef KTRACE
 	if (ktruio != NULL) {
 		ktruio->uio_resid = td->td_retval[0];
 		ktrgenio(s, UIO_WRITE, ktruio, error);
 	}
 #endif
 bad:
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_sendto(struct thread *td, struct sendto_args *uap)
 {
 	struct msghdr msg;
 	struct iovec aiov;
 
 	msg.msg_name = __DECONST(void *, uap->to);
 	msg.msg_namelen = uap->tolen;
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	msg.msg_control = 0;
 #ifdef COMPAT_OLDSOCK
 	if (SV_PROC_FLAG(td->td_proc, SV_AOUT))
 		msg.msg_flags = 0;
 #endif
 	aiov.iov_base = __DECONST(void *, uap->buf);
 	aiov.iov_len = uap->len;
 	return (sendit(td, uap->s, &msg, uap->flags));
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 osend(struct thread *td, struct osend_args *uap)
 {
 	struct msghdr msg;
 	struct iovec aiov;
 
 	msg.msg_name = 0;
 	msg.msg_namelen = 0;
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	aiov.iov_base = __DECONST(void *, uap->buf);
 	aiov.iov_len = uap->len;
 	msg.msg_control = 0;
 	msg.msg_flags = 0;
 	return (sendit(td, uap->s, &msg, uap->flags));
 }
 
 int
 osendmsg(struct thread *td, struct osendmsg_args *uap)
 {
 	struct msghdr msg;
 	struct iovec *iov;
 	int error;
 
 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
 	if (error != 0)
 		return (error);
 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 	if (error != 0)
 		return (error);
 	msg.msg_iov = iov;
 	msg.msg_flags = MSG_COMPAT;
 	error = sendit(td, uap->s, &msg, uap->flags);
 	free(iov, M_IOV);
 	return (error);
 }
 #endif
 
 int
 sys_sendmsg(struct thread *td, struct sendmsg_args *uap)
 {
 	struct msghdr msg;
 	struct iovec *iov;
 	int error;
 
 	error = copyin(uap->msg, &msg, sizeof (msg));
 	if (error != 0)
 		return (error);
 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 	if (error != 0)
 		return (error);
 	msg.msg_iov = iov;
 #ifdef COMPAT_OLDSOCK
 	if (SV_PROC_FLAG(td->td_proc, SV_AOUT))
 		msg.msg_flags = 0;
 #endif
 	error = sendit(td, uap->s, &msg, uap->flags);
 	free(iov, M_IOV);
 	return (error);
 }
 
 int
 kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg,
     struct mbuf **controlp)
 {
 	struct uio auio;
 	struct iovec *iov;
 	struct mbuf *control, *m;
 	caddr_t ctlbuf;
 	struct file *fp;
 	struct socket *so;
 	struct sockaddr *fromsa = NULL;
 #ifdef KTRACE
 	struct uio *ktruio = NULL;
 #endif
 	ssize_t len;
 	int error, i;
 
 	if (controlp != NULL)
 		*controlp = NULL;
 
 	AUDIT_ARG_FD(s);
 	error = getsock_cap(td, s, &cap_recv_rights,
 	    &fp, NULL, NULL);
 	if (error != 0)
 		return (error);
 	so = fp->f_data;
 
 #ifdef MAC
 	error = mac_socket_check_receive(td->td_ucred, so);
 	if (error != 0) {
 		fdrop(fp, td);
 		return (error);
 	}
 #endif
 
 	auio.uio_iov = mp->msg_iov;
 	auio.uio_iovcnt = mp->msg_iovlen;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_rw = UIO_READ;
 	auio.uio_td = td;
 	auio.uio_offset = 0;			/* XXX */
 	auio.uio_resid = 0;
 	iov = mp->msg_iov;
 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
 		if ((auio.uio_resid += iov->iov_len) < 0) {
 			fdrop(fp, td);
 			return (EINVAL);
 		}
 	}
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_GENIO))
 		ktruio = cloneuio(&auio);
 #endif
 	control = NULL;
 	len = auio.uio_resid;
 	error = soreceive(so, &fromsa, &auio, NULL,
 	    (mp->msg_control || controlp) ? &control : NULL,
 	    &mp->msg_flags);
 	if (error != 0) {
 		if (auio.uio_resid != len && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 	}
 	if (fromsa != NULL)
 		AUDIT_ARG_SOCKADDR(td, AT_FDCWD, fromsa);
 #ifdef KTRACE
 	if (ktruio != NULL) {
 		ktruio->uio_resid = len - auio.uio_resid;
 		ktrgenio(s, UIO_READ, ktruio, error);
 	}
 #endif
 	if (error != 0)
 		goto out;
 	td->td_retval[0] = len - auio.uio_resid;
 	if (mp->msg_name) {
 		len = mp->msg_namelen;
 		if (len <= 0 || fromsa == NULL)
 			len = 0;
 		else {
 			/* save sa_len before it is destroyed by MSG_COMPAT */
 			len = MIN(len, fromsa->sa_len);
 #ifdef COMPAT_OLDSOCK
 			if ((mp->msg_flags & MSG_COMPAT) != 0 &&
 			    SV_PROC_FLAG(td->td_proc, SV_AOUT))
 				((struct osockaddr *)fromsa)->sa_family =
 				    fromsa->sa_family;
 #endif
 			if (fromseg == UIO_USERSPACE) {
 				error = copyout(fromsa, mp->msg_name,
 				    (unsigned)len);
 				if (error != 0)
 					goto out;
 			} else
 				bcopy(fromsa, mp->msg_name, len);
 		}
 		mp->msg_namelen = len;
 	}
 	if (mp->msg_control && controlp == NULL) {
 #ifdef COMPAT_OLDSOCK
 		/*
 		 * We assume that old recvmsg calls won't receive access
 		 * rights and other control info, esp. as control info
 		 * is always optional and those options didn't exist in 4.3.
 		 * If we receive rights, trim the cmsghdr; anything else
 		 * is tossed.
 		 */
 		if (control && (mp->msg_flags & MSG_COMPAT) != 0 &&
 		    SV_PROC_FLAG(td->td_proc, SV_AOUT)) {
 			if (mtod(control, struct cmsghdr *)->cmsg_level !=
 			    SOL_SOCKET ||
 			    mtod(control, struct cmsghdr *)->cmsg_type !=
 			    SCM_RIGHTS) {
 				mp->msg_controllen = 0;
 				goto out;
 			}
 			control->m_len -= sizeof (struct cmsghdr);
 			control->m_data += sizeof (struct cmsghdr);
 		}
 #endif
 		ctlbuf = mp->msg_control;
 		len = mp->msg_controllen;
 		mp->msg_controllen = 0;
 		for (m = control; m != NULL && len >= m->m_len; m = m->m_next) {
 			if ((error = copyout(mtod(m, caddr_t), ctlbuf,
 			    m->m_len)) != 0)
 				goto out;
 
 			ctlbuf += m->m_len;
 			len -= m->m_len;
 			mp->msg_controllen += m->m_len;
 		}
 		if (m != NULL) {
 			mp->msg_flags |= MSG_CTRUNC;
 			m_dispose_extcontrolm(m);
 		}
 	}
 out:
 	fdrop(fp, td);
 #ifdef KTRACE
 	if (fromsa && KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(fromsa);
 #endif
 	free(fromsa, M_SONAME);
 
 	if (error == 0 && controlp != NULL)
 		*controlp = control;
 	else if (control != NULL) {
 		if (error != 0)
 			m_dispose_extcontrolm(control);
 		m_freem(control);
 	}
 
 	return (error);
 }
 
 static int
 recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp)
 {
 	int error;
 
 	error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
 	if (error != 0)
 		return (error);
 	if (namelenp != NULL) {
 		error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
 #ifdef COMPAT_OLDSOCK
 		if ((mp->msg_flags & MSG_COMPAT) != 0 &&
 		    SV_PROC_FLAG(td->td_proc, SV_AOUT))
 			error = 0;	/* old recvfrom didn't check */
 #endif
 	}
 	return (error);
 }
 
 int
 sys_recvfrom(struct thread *td, struct recvfrom_args *uap)
 {
 	struct msghdr msg;
 	struct iovec aiov;
 	int error;
 
 	if (uap->fromlenaddr) {
 		error = copyin(uap->fromlenaddr,
 		    &msg.msg_namelen, sizeof (msg.msg_namelen));
 		if (error != 0)
 			goto done2;
 	} else {
 		msg.msg_namelen = 0;
 	}
 	msg.msg_name = uap->from;
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->len;
 	msg.msg_control = 0;
 	msg.msg_flags = uap->flags;
 	error = recvit(td, uap->s, &msg, uap->fromlenaddr);
 done2:
 	return (error);
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 orecvfrom(struct thread *td, struct recvfrom_args *uap)
 {
 
 	uap->flags |= MSG_COMPAT;
 	return (sys_recvfrom(td, uap));
 }
 #endif
 
 #ifdef COMPAT_OLDSOCK
 int
 orecv(struct thread *td, struct orecv_args *uap)
 {
 	struct msghdr msg;
 	struct iovec aiov;
 
 	msg.msg_name = 0;
 	msg.msg_namelen = 0;
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->len;
 	msg.msg_control = 0;
 	msg.msg_flags = uap->flags;
 	return (recvit(td, uap->s, &msg, NULL));
 }
 
 /*
  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
  * overlays the new one, missing only the flags, and with the (old) access
  * rights where the control fields are now.
  */
 int
 orecvmsg(struct thread *td, struct orecvmsg_args *uap)
 {
 	struct msghdr msg;
 	struct iovec *iov;
 	int error;
 
 	error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
 	if (error != 0)
 		return (error);
 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 	if (error != 0)
 		return (error);
 	msg.msg_flags = uap->flags | MSG_COMPAT;
 	msg.msg_iov = iov;
 	error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
 	if (msg.msg_controllen && error == 0)
 		error = copyout(&msg.msg_controllen,
 		    &uap->msg->msg_accrightslen, sizeof (int));
 	free(iov, M_IOV);
 	return (error);
 }
 #endif
 
 int
 sys_recvmsg(struct thread *td, struct recvmsg_args *uap)
 {
 	struct msghdr msg;
 	struct iovec *uiov, *iov;
 	int error;
 
 	error = copyin(uap->msg, &msg, sizeof (msg));
 	if (error != 0)
 		return (error);
 	error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
 	if (error != 0)
 		return (error);
 	msg.msg_flags = uap->flags;
 #ifdef COMPAT_OLDSOCK
 	if (SV_PROC_FLAG(td->td_proc, SV_AOUT))
 		msg.msg_flags &= ~MSG_COMPAT;
 #endif
 	uiov = msg.msg_iov;
 	msg.msg_iov = iov;
 	error = recvit(td, uap->s, &msg, NULL);
 	if (error == 0) {
 		msg.msg_iov = uiov;
 		error = copyout(&msg, uap->msg, sizeof(msg));
 	}
 	free(iov, M_IOV);
 	return (error);
 }
 
 int
 sys_shutdown(struct thread *td, struct shutdown_args *uap)
 {
 
 	return (kern_shutdown(td, uap->s, uap->how));
 }
 
 int
 kern_shutdown(struct thread *td, int s, int how)
 {
 	struct socket *so;
 	struct file *fp;
 	int error;
 
 	AUDIT_ARG_FD(s);
 	error = getsock_cap(td, s, &cap_shutdown_rights,
 	    &fp, NULL, NULL);
 	if (error == 0) {
 		so = fp->f_data;
 		error = soshutdown(so, how);
 		/*
 		 * Previous versions did not return ENOTCONN, but 0 in
 		 * case the socket was not connected. Some important
 		 * programs like syslogd up to r279016, 2015-02-19,
 		 * still depend on this behavior.
 		 */
 		if (error == ENOTCONN &&
 		    td->td_proc->p_osrel < P_OSREL_SHUTDOWN_ENOTCONN)
 			error = 0;
 		fdrop(fp, td);
 	}
 	return (error);
 }
 
 int
 sys_setsockopt(struct thread *td, struct setsockopt_args *uap)
 {
 
 	return (kern_setsockopt(td, uap->s, uap->level, uap->name,
 	    uap->val, UIO_USERSPACE, uap->valsize));
 }
 
 int
 kern_setsockopt(struct thread *td, int s, int level, int name, const void *val,
     enum uio_seg valseg, socklen_t valsize)
 {
 	struct socket *so;
 	struct file *fp;
 	struct sockopt sopt;
 	int error;
 
 	if (val == NULL && valsize != 0)
 		return (EFAULT);
 	if ((int)valsize < 0)
 		return (EINVAL);
 
 	sopt.sopt_dir = SOPT_SET;
 	sopt.sopt_level = level;
 	sopt.sopt_name = name;
 	sopt.sopt_val = __DECONST(void *, val);
 	sopt.sopt_valsize = valsize;
 	switch (valseg) {
 	case UIO_USERSPACE:
 		sopt.sopt_td = td;
 		break;
 	case UIO_SYSSPACE:
 		sopt.sopt_td = NULL;
 		break;
 	default:
 		panic("kern_setsockopt called with bad valseg");
 	}
 
 	AUDIT_ARG_FD(s);
 	error = getsock_cap(td, s, &cap_setsockopt_rights,
 	    &fp, NULL, NULL);
 	if (error == 0) {
 		so = fp->f_data;
 		error = sosetopt(so, &sopt);
 		fdrop(fp, td);
 	}
 	return(error);
 }
 
 int
 sys_getsockopt(struct thread *td, struct getsockopt_args *uap)
 {
 	socklen_t valsize;
 	int error;
 
 	if (uap->val) {
 		error = copyin(uap->avalsize, &valsize, sizeof (valsize));
 		if (error != 0)
 			return (error);
 	}
 
 	error = kern_getsockopt(td, uap->s, uap->level, uap->name,
 	    uap->val, UIO_USERSPACE, &valsize);
 
 	if (error == 0)
 		error = copyout(&valsize, uap->avalsize, sizeof (valsize));
 	return (error);
 }
 
 /*
  * Kernel version of getsockopt.
  * optval can be a userland or userspace. optlen is always a kernel pointer.
  */
 int
 kern_getsockopt(struct thread *td, int s, int level, int name, void *val,
     enum uio_seg valseg, socklen_t *valsize)
 {
 	struct socket *so;
 	struct file *fp;
 	struct sockopt sopt;
 	int error;
 
 	if (val == NULL)
 		*valsize = 0;
 	if ((int)*valsize < 0)
 		return (EINVAL);
 
 	sopt.sopt_dir = SOPT_GET;
 	sopt.sopt_level = level;
 	sopt.sopt_name = name;
 	sopt.sopt_val = val;
 	sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
 	switch (valseg) {
 	case UIO_USERSPACE:
 		sopt.sopt_td = td;
 		break;
 	case UIO_SYSSPACE:
 		sopt.sopt_td = NULL;
 		break;
 	default:
 		panic("kern_getsockopt called with bad valseg");
 	}
 
 	AUDIT_ARG_FD(s);
 	error = getsock_cap(td, s, &cap_getsockopt_rights,
 	    &fp, NULL, NULL);
 	if (error == 0) {
 		so = fp->f_data;
 		error = sogetopt(so, &sopt);
 		*valsize = sopt.sopt_valsize;
 		fdrop(fp, td);
 	}
 	return (error);
 }
 
 /*
  * getsockname1() - Get socket name.
  */
 static int
 getsockname1(struct thread *td, struct getsockname_args *uap, int compat)
 {
 	struct sockaddr *sa;
 	socklen_t len;
 	int error;
 
 	error = copyin(uap->alen, &len, sizeof(len));
 	if (error != 0)
 		return (error);
 
 	error = kern_getsockname(td, uap->fdes, &sa, &len);
 	if (error != 0)
 		return (error);
 
 	if (len != 0) {
 #ifdef COMPAT_OLDSOCK
 		if (compat && SV_PROC_FLAG(td->td_proc, SV_AOUT))
 			((struct osockaddr *)sa)->sa_family = sa->sa_family;
 #endif
 		error = copyout(sa, uap->asa, (u_int)len);
 	}
 	free(sa, M_SONAME);
 	if (error == 0)
 		error = copyout(&len, uap->alen, sizeof(len));
 	return (error);
 }
 
 int
 kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
     socklen_t *alen)
 {
 	struct socket *so;
 	struct file *fp;
 	socklen_t len;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 	error = getsock_cap(td, fd, &cap_getsockname_rights,
 	    &fp, NULL, NULL);
 	if (error != 0)
 		return (error);
 	so = fp->f_data;
 	*sa = NULL;
-	error = sogetsockaddr(so, sa);
+	CURVNET_SET(so->so_vnet);
+	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
+	CURVNET_RESTORE();
 	if (error != 0)
 		goto bad;
 	if (*sa == NULL)
 		len = 0;
 	else
 		len = MIN(*alen, (*sa)->sa_len);
 	*alen = len;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(*sa);
 #endif
 bad:
 	fdrop(fp, td);
 	if (error != 0 && *sa != NULL) {
 		free(*sa, M_SONAME);
 		*sa = NULL;
 	}
 	return (error);
 }
 
 int
 sys_getsockname(struct thread *td, struct getsockname_args *uap)
 {
 
 	return (getsockname1(td, uap, 0));
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 ogetsockname(struct thread *td, struct getsockname_args *uap)
 {
 
 	return (getsockname1(td, uap, 1));
 }
 #endif /* COMPAT_OLDSOCK */
 
 /*
  * getpeername1() - Get name of peer for connected socket.
  */
 static int
 getpeername1(struct thread *td, struct getpeername_args *uap, int compat)
 {
 	struct sockaddr *sa;
 	socklen_t len;
 	int error;
 
 	error = copyin(uap->alen, &len, sizeof (len));
 	if (error != 0)
 		return (error);
 
 	error = kern_getpeername(td, uap->fdes, &sa, &len);
 	if (error != 0)
 		return (error);
 
 	if (len != 0) {
 #ifdef COMPAT_OLDSOCK
 		if (compat && SV_PROC_FLAG(td->td_proc, SV_AOUT))
 			((struct osockaddr *)sa)->sa_family = sa->sa_family;
 #endif
 		error = copyout(sa, uap->asa, (u_int)len);
 	}
 	free(sa, M_SONAME);
 	if (error == 0)
 		error = copyout(&len, uap->alen, sizeof(len));
 	return (error);
 }
 
 int
 kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
     socklen_t *alen)
 {
 	struct socket *so;
 	struct file *fp;
 	socklen_t len;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 	error = getsock_cap(td, fd, &cap_getpeername_rights,
 	    &fp, NULL, NULL);
 	if (error != 0)
 		return (error);
 	so = fp->f_data;
 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
 		error = ENOTCONN;
 		goto done;
 	}
 	*sa = NULL;
 	CURVNET_SET(so->so_vnet);
 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
 	CURVNET_RESTORE();
 	if (error != 0)
 		goto bad;
 	if (*sa == NULL)
 		len = 0;
 	else
 		len = MIN(*alen, (*sa)->sa_len);
 	*alen = len;
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktrsockaddr(*sa);
 #endif
 bad:
 	if (error != 0 && *sa != NULL) {
 		free(*sa, M_SONAME);
 		*sa = NULL;
 	}
 done:
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_getpeername(struct thread *td, struct getpeername_args *uap)
 {
 
 	return (getpeername1(td, uap, 0));
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 ogetpeername(struct thread *td, struct ogetpeername_args *uap)
 {
 
 	/* XXX uap should have type `getpeername_args *' to begin with. */
 	return (getpeername1(td, (struct getpeername_args *)uap, 1));
 }
 #endif /* COMPAT_OLDSOCK */
 
 static int
 sockargs(struct mbuf **mp, char *buf, socklen_t buflen, int type)
 {
 	struct sockaddr *sa;
 	struct mbuf *m;
 	int error;
 
 	if (buflen > MLEN) {
 #ifdef COMPAT_OLDSOCK
 		if (type == MT_SONAME && buflen <= 112 &&
 		    SV_CURPROC_FLAG(SV_AOUT))
 			buflen = MLEN;		/* unix domain compat. hack */
 		else
 #endif
 			if (buflen > MCLBYTES)
 				return (EINVAL);
 	}
 	m = m_get2(buflen, M_WAITOK, type, 0);
 	m->m_len = buflen;
 	error = copyin(buf, mtod(m, void *), buflen);
 	if (error != 0)
 		(void) m_free(m);
 	else {
 		*mp = m;
 		if (type == MT_SONAME) {
 			sa = mtod(m, struct sockaddr *);
 
 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 			if (sa->sa_family == 0 && sa->sa_len < AF_MAX &&
 			    SV_CURPROC_FLAG(SV_AOUT))
 				sa->sa_family = sa->sa_len;
 #endif
 			sa->sa_len = buflen;
 		}
 	}
 	return (error);
 }
 
 int
 getsockaddr(struct sockaddr **namp, const struct sockaddr *uaddr, size_t len)
 {
 	struct sockaddr *sa;
 	int error;
 
 	if (len > SOCK_MAXADDRLEN)
 		return (ENAMETOOLONG);
 	if (len < offsetof(struct sockaddr, sa_data[0]))
 		return (EINVAL);
 	sa = malloc(len, M_SONAME, M_WAITOK);
 	error = copyin(uaddr, sa, len);
 	if (error != 0) {
 		free(sa, M_SONAME);
 	} else {
 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX &&
 		    SV_CURPROC_FLAG(SV_AOUT))
 			sa->sa_family = sa->sa_len;
 #endif
 		sa->sa_len = len;
 		*namp = sa;
 	}
 	return (error);
 }
 
 /*
  * Dispose of externalized rights from an SCM_RIGHTS message.  This function
  * should be used in error or truncation cases to avoid leaking file descriptors
  * into the recipient's (the current thread's) table.
  */
 void
 m_dispose_extcontrolm(struct mbuf *m)
 {
 	struct cmsghdr *cm;
 	struct file *fp;
 	struct thread *td;
 	socklen_t clen, datalen;
 	int error, fd, *fds, nfd;
 
 	td = curthread;
 	for (; m != NULL; m = m->m_next) {
 		if (m->m_type != MT_EXTCONTROL)
 			continue;
 		cm = mtod(m, struct cmsghdr *);
 		clen = m->m_len;
 		while (clen > 0) {
 			if (clen < sizeof(*cm))
 				panic("%s: truncated mbuf %p", __func__, m);
 			datalen = CMSG_SPACE(cm->cmsg_len - CMSG_SPACE(0));
 			if (clen < datalen)
 				panic("%s: truncated mbuf %p", __func__, m);
 
 			if (cm->cmsg_level == SOL_SOCKET &&
 			    cm->cmsg_type == SCM_RIGHTS) {
 				fds = (int *)CMSG_DATA(cm);
 				nfd = (cm->cmsg_len - CMSG_SPACE(0)) /
 				    sizeof(int);
 
 				while (nfd-- > 0) {
 					fd = *fds++;
 					error = fget(td, fd, &cap_no_rights,
 					    &fp);
 					if (error == 0) {
 						fdclose(td, fp, fd);
 						fdrop(fp, td);
 					}
 				}
 			}
 			clen -= datalen;
 			cm = (struct cmsghdr *)((uint8_t *)cm + datalen);
 		}
 		m_chtype(m, MT_CONTROL);
 	}
 }
diff --git a/sys/modules/Makefile b/sys/modules/Makefile
index 32919674901b..0195ce5064d2 100644
--- a/sys/modules/Makefile
+++ b/sys/modules/Makefile
@@ -1,847 +1,846 @@
 # $FreeBSD$
 
 SYSDIR?=${SRCTOP}/sys
 .include "${SYSDIR}/conf/kern.opts.mk"
 
 SUBDIR_PARALLEL=
 
 # Modules that include binary-only blobs of microcode should be selectable by
 # MK_SOURCELESS_UCODE option (see below).
 
 .include "${SYSDIR}/conf/config.mk"
 
 .if defined(MODULES_OVERRIDE) && !defined(ALL_MODULES)
 SUBDIR=${MODULES_OVERRIDE}
 .else
 SUBDIR=	\
 	${_3dfx} \
 	${_3dfx_linux} \
 	${_aac} \
 	${_aacraid} \
 	accf_data \
 	accf_dns \
 	accf_http \
 	acl_nfs4 \
 	acl_posix1e \
 	${_acpi} \
 	ae \
 	${_aesni} \
 	age \
 	${_agp} \
 	ahci \
 	aic7xxx \
 	alc \
 	ale \
 	alq \
 	${_amd_ecc_inject} \
 	${_amdgpio} \
 	${_amdsbwd} \
 	${_amdsmn} \
 	${_amdtemp} \
 	amr \
 	${_an} \
 	${_aout} \
 	${_arcmsr} \
 	${_allwinner} \
 	${_armv8crypto} \
 	${_asmc} \
 	ata \
 	ath \
 	ath_dfs \
 	ath_hal \
 	ath_hal_ar5210 \
 	ath_hal_ar5211 \
 	ath_hal_ar5212 \
 	ath_hal_ar5416 \
 	ath_hal_ar9300 \
 	ath_main \
 	ath_rate \
 	ath_pci \
 	${_autofs} \
 	axgbe \
 	backlight \
 	${_bce} \
 	${_bcm283x_clkman} \
 	${_bcm283x_pwm} \
 	bfe \
 	bge \
 	bhnd \
 	${_bxe} \
 	${_bios} \
 	${_blake2} \
 	bnxt \
 	bridgestp \
 	bwi \
 	bwn \
 	${_bytgpio} \
 	${_chvgpio} \
 	cam \
 	${_cardbus} \
 	${_carp} \
 	cas \
 	${_cbb} \
 	cc \
 	${_ccp} \
 	cd9660 \
 	cd9660_iconv \
 	${_ce} \
 	${_cfi} \
 	${_chromebook_platform} \
 	${_ciss} \
 	cloudabi \
 	${_cloudabi32} \
 	${_cloudabi64} \
 	${_coretemp} \
 	${_cp} \
 	${_cpsw} \
 	${_cpuctl} \
 	${_cpufreq} \
 	${_crypto} \
 	${_cryptodev} \
 	ctl \
 	${_cxgb} \
 	${_cxgbe} \
 	dc \
 	dcons \
 	dcons_crom \
 	${_dpdk_lpm4} \
 	${_dpdk_lpm6} \
 	${_dpms} \
 	dummynet \
 	${_dwwdt} \
 	${_efirt} \
 	${_em} \
 	${_ena} \
 	esp \
 	${_et} \
 	evdev \
 	${_exca} \
 	ext2fs \
 	fdc \
 	fdescfs \
 	${_ffec} \
 	filemon \
 	firewire \
 	firmware \
 	${_ftwd} \
 	fusefs \
 	${_fxp} \
 	gem \
 	geom \
 	${_glxiic} \
 	${_glxsb} \
 	gpio \
 	hid \
 	hifn \
 	${_hpt27xx} \
 	${_hptiop} \
 	${_hptmv} \
 	${_hptnr} \
 	${_hptrr} \
 	hwpmc \
 	${_hwpmc_mips24k} \
 	${_hwpmc_mips74k} \
 	${_hyperv} \
 	i2c \
 	${_iavf} \
         ${_ibcore} \
 	${_ichwd} \
 	${_ice} \
 	${_ice_ddp} \
 	${_ida} \
 	if_bridge \
 	if_disc \
 	if_edsc \
 	${_if_enc} \
 	if_epair \
 	${_if_gif} \
 	${_if_gre} \
 	${_if_me} \
 	if_infiniband \
 	if_lagg \
 	${_if_stf} \
 	if_tuntap \
 	if_vlan \
 	if_vxlan \
-	${_if_wg} \
 	iflib \
 	${_iir} \
 	imgact_binmisc \
 	${_intelspi} \
 	${_io} \
 	${_ioat} \
         ${_ipoib} \
 	${_ipdivert} \
 	${_ipfilter} \
 	${_ipfw} \
 	ipfw_nat \
 	${_ipfw_nat64} \
 	${_ipfw_nptv6} \
 	${_ipfw_pmod} \
 	${_ipmi} \
 	ip6_mroute_mod \
 	ip_mroute_mod \
 	${_ips} \
 	${_ipsec} \
 	${_ipw} \
 	${_ipwfw} \
 	${_isci} \
 	${_iser} \
 	isp \
 	${_ispfw} \
 	${_itwd} \
 	${_iwi} \
 	${_iwifw} \
 	${_iwm} \
 	${_iwmfw} \
 	${_iwn} \
 	${_iwnfw} \
 	${_ix} \
 	${_ixv} \
 	${_ixl} \
 	jme \
 	kbdmux \
 	kgssapi \
 	kgssapi_krb5 \
 	khelp \
 	krpc \
 	ksyms \
 	${_ktls_ocf} \
 	le \
 	lge \
 	libalias \
 	libiconv \
 	libmchain \
 	lindebugfs \
 	linuxkpi \
 	${_lio} \
 	lpt \
 	mac_biba \
 	mac_bsdextended \
 	mac_ifoff \
 	mac_lomac \
 	mac_mls \
 	mac_none \
 	mac_ntpd \
 	mac_partition \
 	mac_portacl \
 	mac_seeotheruids \
 	mac_stub \
 	mac_test \
 	${_malo} \
 	md \
 	mdio \
 	mem \
 	mfi \
 	mii \
 	mlx \
 	mlxfw \
 	${_mlx4} \
 	${_mlx4ib} \
 	${_mlx4en} \
 	${_mlx5} \
 	${_mlx5en} \
 	${_mlx5ib} \
 	${_mly} \
 	mmc \
 	mmcsd \
 	${_mpr} \
 	${_mps} \
 	mpt \
 	mqueue \
 	mrsas \
 	msdosfs \
 	msdosfs_iconv \
 	msk \
 	${_mthca} \
 	mvs \
 	mwl \
 	${_mwlfw} \
 	mxge \
 	my \
 	${_nctgpio} \
 	${_netgraph} \
 	${_nfe} \
 	nfscl \
 	nfscommon \
 	nfsd \
 	nfslockd \
 	nfssvc \
 	nge \
 	nmdm \
 	nullfs \
 	${_ntb} \
 	${_nvd} \
 	${_nvdimm} \
 	${_nvme} \
 	${_nvram} \
 	oce \
 	${_ocs_fc} \
 	${_ossl} \
 	otus \
 	${_otusfw} \
 	ow \
 	${_padlock} \
 	${_padlock_rng} \
 	${_pccard} \
 	${_pchtherm} \
 	${_pcfclock} \
 	${_pf} \
 	${_pflog} \
 	${_pfsync} \
 	plip \
 	${_pms} \
 	ppbus \
 	ppc \
 	ppi \
 	pps \
 	procfs \
 	proto \
 	pseudofs \
 	${_pst} \
 	pty  \
 	puc \
 	pwm \
 	${_qat} \
 	${_qatfw} \
 	${_qlxge} \
 	${_qlxgb} \
 	${_qlxgbe} \
 	${_qlnx} \
 	ral \
 	${_ralfw} \
 	${_random_fortuna} \
 	${_random_other} \
 	rc4 \
 	${_rdma} \
 	${_rdrand_rng} \
 	re \
 	rl \
 	${_rockchip} \
 	rtsx \
 	rtwn \
 	rtwn_pci \
 	rtwn_usb \
 	${_rtwnfw} \
 	${_s3} \
 	${_safe} \
 	safexcel \
 	${_sbni} \
 	scc \
 	${_sctp} \
 	sdhci \
 	${_sdhci_acpi} \
 	sdhci_pci \
 	sdio \
 	sem \
 	send \
 	${_sfxge} \
 	sge \
 	${_sgx} \
 	${_sgx_linux} \
 	siftr \
 	siis \
 	sis \
 	sk \
 	${_smartpqi} \
 	smbfs \
 	snp \
 	sound \
 	${_speaker} \
 	spi \
 	${_splash} \
 	${_sppp} \
 	ste \
 	stge \
 	${_sume} \
 	${_superio} \
 	${_sym} \
 	${_syscons} \
 	sysvipc \
 	tcp \
 	${_ti} \
 	tmpfs \
 	${_toecore} \
 	${_tpm} \
 	${_twa} \
 	twe \
 	tws \
 	uart \
 	udf \
 	udf_iconv \
 	ufs \
 	uinput \
 	unionfs \
 	usb \
 	${_vesa} \
 	virtio \
 	vge \
 	${_viawd} \
 	videomode \
 	vkbd \
 	${_vmd} \
 	${_vmm} \
 	${_vmware} \
 	vr \
 	vte \
 	${_wbwd} \
 	wlan \
 	wlan_acl \
 	wlan_amrr \
 	wlan_ccmp \
 	wlan_rssadapt \
 	wlan_tkip \
 	wlan_wep \
 	wlan_xauth \
 	${_wpi} \
 	${_wpifw} \
 	${_x86bios} \
 	xdr \
 	xl \
 	xz \
 	zlib
 
 .if ${MK_AUTOFS} != "no" || defined(ALL_MODULES)
 _autofs=	autofs
 .endif
 
 .if ${MK_CDDL} != "no" || defined(ALL_MODULES)
 .if (${MACHINE_CPUARCH} != "arm" || ${MACHINE_ARCH:Marmv[67]*} != "") && \
 	${MACHINE_CPUARCH} != "mips"
 .if ${KERN_OPTS:MKDTRACE_HOOKS}
 SUBDIR+=	dtrace
 .endif
 .endif
 SUBDIR+=	opensolaris
 .endif
 
 .if ${MK_CRYPT} != "no" || defined(ALL_MODULES)
 .if exists(${SRCTOP}/sys/opencrypto)
 _crypto=	crypto
 _cryptodev=	cryptodev
 _random_fortuna=random_fortuna
 _random_other=	random_other
 _ktls_ocf=	ktls_ocf
 .endif
 .endif
 
 .if ${MK_CUSE} != "no" || defined(ALL_MODULES)
 SUBDIR+=	cuse
 .endif
 
 .if ${MK_EFI} != "no"
 .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64"
 _efirt=		efirt
 .endif
 .endif
 
 .if (${MK_INET_SUPPORT} != "no" || ${MK_INET6_SUPPORT} != "no") || \
 	defined(ALL_MODULES)
 _carp=		carp
 _toecore=	toecore
 _if_enc=	if_enc
 _if_gif=	if_gif
 _if_gre=	if_gre
 _ipfw_pmod=	ipfw_pmod
 .if ${KERN_OPTS:MIPSEC_SUPPORT} && !${KERN_OPTS:MIPSEC}
 _ipsec=		ipsec
 .endif
 .if ${KERN_OPTS:MSCTP_SUPPORT} || ${KERN_OPTS:MSCTP}
 _sctp=		sctp
 .endif
 .endif
 
 .if (${MK_INET_SUPPORT} != "no" && ${MK_INET6_SUPPORT} != "no") || \
 	defined(ALL_MODULES)
 _if_stf=	if_stf
 .endif
 
 .if ${MK_INET_SUPPORT} != "no" || defined(ALL_MODULES)
 _if_wg=		if_wg
 .endif
 
 .if ${MK_INET_SUPPORT} != "no" || defined(ALL_MODULES)
 _if_me=		if_me
 _ipdivert=	ipdivert
 _ipfw=		ipfw
 .if ${MK_INET6_SUPPORT} != "no" || defined(ALL_MODULES)
 _ipfw_nat64=	ipfw_nat64
 .endif
 .endif
 
 .if ${MK_INET6_SUPPORT} != "no" || defined(ALL_MODULES)
 _ipfw_nptv6=	ipfw_nptv6
 .endif
 
 .if ${MK_IPFILTER} != "no" || defined(ALL_MODULES)
 _ipfilter=	ipfilter
 .endif
 
 .if ${MK_INET_SUPPORT} != "no" && ${KERN_OPTS:MFIB_ALGO}
 _dpdk_lpm4=	dpdk_lpm4
 .endif
 
 .if ${MK_INET6_SUPPORT} != "no" && ${KERN_OPTS:MFIB_ALGO}
 _dpdk_lpm6=	dpdk_lpm6
 .endif
 
 .if ${MK_ISCSI} != "no" || defined(ALL_MODULES)
 SUBDIR+=	cfiscsi
 SUBDIR+=	iscsi
 SUBDIR+=	iscsi_initiator
 .endif
 
 .if !empty(OPT_FDT)
 SUBDIR+=	fdt
 .endif
 
 # Linuxulator
 .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" || \
     ${MACHINE_CPUARCH} == "i386"
 SUBDIR+=	linprocfs
 SUBDIR+=	linsysfs
 .endif
 .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386"
 SUBDIR+=	linux
 .endif
 .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64"
 SUBDIR+=	linux64
 SUBDIR+=	linux_common
 .endif
 
 .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" || \
     ${MACHINE_CPUARCH} == "i386"
 _ena=		ena
 .if ${MK_OFED} != "no" || defined(ALL_MODULES)
 _ibcore=	ibcore
 _ipoib=		ipoib
 _iser=		iser
 .endif
 _ipmi=		ipmi
 _mlx4=		mlx4
 _mlx5=		mlx5
 .if (${MK_INET_SUPPORT} != "no" && ${MK_INET6_SUPPORT} != "no") || \
 	defined(ALL_MODULES)
 _mlx4en=	mlx4en
 _mlx5en=	mlx5en
 .endif
 .if ${MK_OFED} != "no" || defined(ALL_MODULES)
 _mthca=		mthca
 _mlx4ib=	mlx4ib
 _mlx5ib=	mlx5ib
 .endif
 _ossl=		ossl
 _vmware=	vmware
 .endif
 
 .if ${MK_NETGRAPH} != "no" || defined(ALL_MODULES)
 _netgraph=	netgraph
 .endif
 
 .if (${MK_PF} != "no" && (${MK_INET_SUPPORT} != "no" || \
 	${MK_INET6_SUPPORT} != "no")) || defined(ALL_MODULES)
 _pf=		pf
 _pflog=		pflog
 .if ${MK_INET_SUPPORT} != "no"
 _pfsync=	pfsync
 .endif
 .endif
 
 .if ${MK_SOURCELESS_UCODE} != "no"
 _bce=		bce
 _fxp=		fxp
 _ispfw=		ispfw
 _ti=		ti
 
 .if ${MACHINE_CPUARCH} != "mips"
 _mwlfw=		mwlfw
 _otusfw=	otusfw
 _ralfw=		ralfw
 _rtwnfw=	rtwnfw
 .endif
 .endif
 
 .if ${MK_SOURCELESS_UCODE} != "no" && ${MACHINE_CPUARCH} != "arm" && \
 	${MACHINE_CPUARCH} != "mips" && \
 	${MACHINE_ARCH} != "powerpc" && ${MACHINE_ARCH} != "powerpcspe" && \
 	${MACHINE_CPUARCH} != "riscv"
 _cxgbe=		cxgbe
 .endif
 
 .if ${MACHINE_ARCH} == "amd64" || ${MACHINE_ARCH} == "arm64"
 _ice=		ice
 .if ${MK_SOURCELESS_UCODE} != "no"
 _ice_ddp=	ice_ddp
 .endif
 .endif
 
 # These rely on 64bit atomics
 .if ${MACHINE_ARCH} != "powerpc" && ${MACHINE_ARCH} != "powerpcspe" && \
 	${MACHINE_CPUARCH} != "mips"
 _mps=		mps
 _mpr=		mpr
 .endif
 
 .if ${MK_TESTS} != "no" || defined(ALL_MODULES)
 SUBDIR+=	tests
 .endif
 
 .if ${MK_ZFS} != "no" || (defined(ALL_MODULES)  && ${MACHINE_CPUARCH} != "powerpc")
 SUBDIR+=	zfs
 .endif
 
 .if (${MACHINE_CPUARCH} == "mips" && ${MACHINE_ARCH:Mmips64} == "")
 _hwpmc_mips24k=	hwpmc_mips24k
 _hwpmc_mips74k=	hwpmc_mips74k
 .endif
 
 .if ${MACHINE_CPUARCH} != "aarch64" && ${MACHINE_CPUARCH} != "arm" && \
 	${MACHINE_CPUARCH} != "mips" && ${MACHINE_CPUARCH} != "powerpc" && \
 	${MACHINE_CPUARCH} != "riscv"
 _syscons=	syscons
 .endif
 
 .if ${MACHINE_CPUARCH} != "mips"
 # no BUS_SPACE_UNSPECIFIED
 # No barrier instruction support (specific to this driver)
 _sym=		sym
 # intr_disable() is a macro, causes problems
 .if ${MK_SOURCELESS_UCODE} != "no"
 _cxgb=		cxgb
 .endif
 .endif
 
 .if ${MACHINE_CPUARCH} == "aarch64"
 _allwinner=	allwinner
 _armv8crypto=	armv8crypto
 _dwwdt=		dwwdt
 _em=		em
 _rockchip=	rockchip
 .endif
 
 .if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64"
 _agp=		agp
 _an=		an
 _aout=		aout
 _bios=		bios
 .if ${MK_SOURCELESS_UCODE} != "no"
 _bxe=		bxe
 .endif
 _cardbus=	cardbus
 _cbb=		cbb
 _cpuctl=	cpuctl
 _cpufreq=	cpufreq
 _dpms=		dpms
 _em=		em
 _et=		et
 _ftwd=		ftwd
 _exca=		exca
 _io=		io
 _itwd=		itwd
 _ix=		ix
 _ixv=		ixv
 .if ${MK_SOURCELESS_UCODE} != "no"
 _lio=		lio
 .endif
 _nctgpio=	nctgpio
 _ntb=		ntb
 _ocs_fc=	ocs_fc
 _pccard=	pccard
 _qat=		qat
 _qatfw=		qatfw
 .if ${MK_OFED} != "no" || defined(ALL_MODULES)
 _rdma=		rdma
 .endif
 _safe=		safe
 _speaker=	speaker
 _splash=	splash
 _sppp=		sppp
 _wbwd=		wbwd
 
 _aac=		aac
 _aacraid=	aacraid
 _acpi=		acpi
 .if ${MK_CRYPT} != "no" || defined(ALL_MODULES)
 _aesni=		aesni
 .endif
 _amd_ecc_inject=amd_ecc_inject
 _amdsbwd=	amdsbwd
 _amdsmn=	amdsmn
 _amdtemp=	amdtemp
 _arcmsr=	arcmsr
 _asmc=		asmc
 .if ${MK_CRYPT} != "no" || defined(ALL_MODULES)
 _blake2=	blake2
 .endif
 _bytgpio=	bytgpio
 _chvgpio=	chvgpio
 _ciss=		ciss
 _chromebook_platform=	chromebook_platform
 _coretemp=	coretemp
 .if ${MK_SOURCELESS_HOST} != "no" && empty(KCSAN_ENABLED)
 _hpt27xx=	hpt27xx
 .endif
 _hptiop=	hptiop
 .if ${MK_SOURCELESS_HOST} != "no" && empty(KCSAN_ENABLED)
 _hptmv=		hptmv
 _hptnr=		hptnr
 _hptrr=		hptrr
 .endif
 _hyperv=	hyperv
 _ichwd=		ichwd
 _ida=		ida
 _iir=		iir
 _intelspi=	intelspi
 _ips=		ips
 _isci=		isci
 _ipw=		ipw
 _iwi=		iwi
 _iwm=		iwm
 _iwn=		iwn
 .if ${MK_SOURCELESS_UCODE} != "no"
 _ipwfw=		ipwfw
 _iwifw=		iwifw
 _iwmfw=		iwmfw
 _iwnfw=		iwnfw
 .endif
 _mly=		mly
 _nfe=		nfe
 _nvd=		nvd
 _nvme=		nvme
 _nvram=		nvram
 .if ${MK_CRYPT} != "no" || defined(ALL_MODULES)
 _padlock=	padlock
 _padlock_rng=	padlock_rng
 _rdrand_rng=	rdrand_rng
 .endif
 _pchtherm = pchtherm
 _s3=		s3
 _sdhci_acpi=	sdhci_acpi
 _superio=	superio
 _tpm=		tpm
 _twa=		twa
 _vesa=		vesa
 _viawd=		viawd
 _wpi=		wpi
 .if ${MK_SOURCELESS_UCODE} != "no"
 _wpifw=		wpifw
 .endif
 _x86bios=	x86bios
 .endif
 
 .if ${MACHINE_CPUARCH} == "amd64"
 _amdgpio=	amdgpio
 _ccp=		ccp
 _iavf=		iavf
 _ioat=		ioat
 _ixl=		ixl
 _nvdimm=	nvdimm
 _pms=		pms
 _qlxge=		qlxge
 _qlxgb=		qlxgb
 _sume=		sume
 _vmd=		vmd
 .if ${MK_SOURCELESS_UCODE} != "no"
 _qlxgbe=	qlxgbe
 _qlnx=		qlnx
 .endif
 _sfxge=		sfxge
 _sgx=		sgx
 _sgx_linux=	sgx_linux
 _smartpqi=	smartpqi
 
 .if ${MK_BHYVE} != "no" || defined(ALL_MODULES)
 .if ${KERN_OPTS:MSMP}
 _vmm=		vmm
 .endif
 .endif
 .endif
 
 .if ${MACHINE_CPUARCH} == "i386"
 # XXX some of these can move to the general case when de-i386'ed
 # XXX some of these can move now, but are untested on other architectures.
 _3dfx=		3dfx
 _3dfx_linux=	3dfx_linux
 .if ${MK_SOURCELESS_HOST} != "no"
 _ce=		ce
 .endif
 .if ${MK_SOURCELESS_HOST} != "no"
 _cp=		cp
 .endif
 _glxiic=	glxiic
 _glxsb=		glxsb
 _pcfclock=	pcfclock
 _pst=		pst
 _sbni=		sbni
 .endif
 
 .if ${MACHINE_ARCH} == "armv7"
 _cfi=		cfi
 _cpsw=		cpsw
 .endif
 
 .if ${MACHINE_CPUARCH} == "powerpc"
 _aacraid=	aacraid
 _agp=		agp
 _an=		an
 _cardbus=	cardbus
 _cbb=		cbb
 _cfi=		cfi
 _cpufreq=	cpufreq
 _exca=		exca
 _ffec=		ffec
 _nvd=		nvd
 _nvme=		nvme
 _pccard=	pccard
 .endif
 
 .if ${MACHINE_ARCH:Mpowerpc64*} != ""
 _ipmi=		ipmi
 _ixl=		ixl
 _nvram=		opal_nvram
 .endif
 .if ${MACHINE_CPUARCH} == "powerpc" && ${MACHINE_ARCH} != "powerpcspe"
 # Don't build powermac_nvram for powerpcspe, it's never supported.
 _nvram+=	powermac_nvram
 .endif
 
 .if (${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64" || \
      ${MACHINE_ARCH:Marmv[67]*} != "" || ${MACHINE_CPUARCH} == "i386")
 _cloudabi32=	cloudabi32
 .endif
 .if ${MACHINE_CPUARCH} == "aarch64" || ${MACHINE_CPUARCH} == "amd64"
 _cloudabi64=	cloudabi64
 .endif
 
 .endif
 
 .if ${MACHINE_ARCH:Marmv[67]*} != "" || ${MACHINE_CPUARCH} == "aarch64"
 _bcm283x_clkman=  bcm283x_clkman
 _bcm283x_pwm=  bcm283x_pwm
 .endif
 
 .if !(${COMPILER_TYPE} == "clang" && ${COMPILER_VERSION} < 110000)
 # LLVM 10 crashes when building if_malo_pci.c, fixed in LLVM11:
 # https://bugs.llvm.org/show_bug.cgi?id=44351
 _malo=	malo
 .endif
 
 SUBDIR+=${MODULES_EXTRA}
 
 .for reject in ${WITHOUT_MODULES}
 SUBDIR:= ${SUBDIR:N${reject}}
 .endfor
 
 # Calling kldxref(8) for each module is expensive.
 .if !defined(NO_XREF)
 .MAKEFLAGS+=	-DNO_XREF
 afterinstall: .PHONY
 	@if type kldxref >/dev/null 2>&1; then \
 		${ECHO} ${KLDXREF_CMD} ${DESTDIR}${KMODDIR}; \
 		${KLDXREF_CMD} ${DESTDIR}${KMODDIR}; \
 	fi
 .endif
 
 SUBDIR:= ${SUBDIR:u:O}
 
 .include <bsd.subdir.mk>
diff --git a/sys/modules/if_wg/Makefile b/sys/modules/if_wg/Makefile
deleted file mode 100644
index 851c55673738..000000000000
--- a/sys/modules/if_wg/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-# $FreeBSD$
-
-
-KMOD=   if_wg
-
-.PATH: ${SRCTOP}/sys/dev/if_wg
-
-SRCS= opt_inet.h opt_inet6.h device_if.h bus_if.h ifdi_if.h
-
-SRCS+= if_wg.c wg_noise.c wg_cookie.c crypto.c
-
-.include <bsd.kmod.mk>
diff --git a/sys/net/if_types.h b/sys/net/if_types.h
index 030f773d5654..1103d5f90928 100644
--- a/sys/net/if_types.h
+++ b/sys/net/if_types.h
@@ -1,277 +1,276 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)if_types.h	8.3 (Berkeley) 4/28/95
  * $FreeBSD$
  * $NetBSD: if_types.h,v 1.16 2000/04/19 06:30:53 itojun Exp $
  */
 
 #ifndef _NET_IF_TYPES_H_
 #define _NET_IF_TYPES_H_
 
 /*
  * Interface types for benefit of parsing media address headers.
  * This list is derived from the SNMP list of ifTypes, originally
  * documented in RFC1573, now maintained as:
  *
  * 	http://www.iana.org/assignments/smi-numbers
  */
 
 typedef enum {
 	IFT_OTHER	= 0x1,		/* none of the following */
 	IFT_1822	= 0x2,		/* old-style arpanet imp */
 	IFT_HDH1822	= 0x3,		/* HDH arpanet imp */
 	IFT_X25DDN	= 0x4,		/* x25 to imp */
 	IFT_X25		= 0x5,		/* PDN X25 interface (RFC877) */
 	IFT_ETHER	= 0x6,		/* Ethernet CSMA/CD */
 	IFT_ISO88023	= 0x7,		/* CMSA/CD */
 	IFT_ISO88024	= 0x8,		/* Token Bus */
 	IFT_ISO88025	= 0x9,		/* Token Ring */
 	IFT_ISO88026	= 0xa,		/* MAN */
 	IFT_STARLAN	= 0xb,
 	IFT_P10		= 0xc,		/* Proteon 10MBit ring */
 	IFT_P80		= 0xd,		/* Proteon 80MBit ring */
 	IFT_HY		= 0xe,		/* Hyperchannel */
 	IFT_FDDI	= 0xf,
 	IFT_LAPB	= 0x10,
 	IFT_SDLC	= 0x11,
 	IFT_T1		= 0x12,
 	IFT_CEPT	= 0x13,		/* E1 - european T1 */
 	IFT_ISDNBASIC	= 0x14,
 	IFT_ISDNPRIMARY	= 0x15,
 	IFT_PTPSERIAL	= 0x16,		/* Proprietary PTP serial */
 	IFT_PPP		= 0x17,		/* RFC 1331 */
 	IFT_LOOP	= 0x18,		/* loopback */
 	IFT_EON		= 0x19,		/* ISO over IP */
 	IFT_XETHER	= 0x1a,		/* obsolete 3MB experimental ethernet */
 	IFT_NSIP	= 0x1b,		/* XNS over IP */
 	IFT_SLIP	= 0x1c,		/* IP over generic TTY */
 	IFT_ULTRA	= 0x1d,		/* Ultra Technologies */
 	IFT_DS3		= 0x1e,		/* Generic T3 */
 	IFT_SIP		= 0x1f,		/* SMDS */
 	IFT_FRELAY	= 0x20,		/* Frame Relay DTE only */
 	IFT_RS232	= 0x21,
 	IFT_PARA	= 0x22,		/* parallel-port */
 	IFT_ARCNET	= 0x23,
 	IFT_ARCNETPLUS	= 0x24,
 	IFT_ATM		= 0x25,		/* ATM cells */
 	IFT_MIOX25	= 0x26,
 	IFT_SONET	= 0x27,		/* SONET or SDH */
 	IFT_X25PLE	= 0x28,
 	IFT_ISO88022LLC	= 0x29,
 	IFT_LOCALTALK	= 0x2a,
 	IFT_SMDSDXI	= 0x2b,
 	IFT_FRELAYDCE	= 0x2c,		/* Frame Relay DCE */
 	IFT_V35		= 0x2d,
 	IFT_HSSI	= 0x2e,
 	IFT_HIPPI	= 0x2f,
 	IFT_MODEM	= 0x30,		/* Generic Modem */
 	IFT_AAL5	= 0x31,		/* AAL5 over ATM */
 	IFT_SONETPATH	= 0x32,
 	IFT_SONETVT	= 0x33,
 	IFT_SMDSICIP	= 0x34,		/* SMDS InterCarrier Interface */
 	IFT_PROPVIRTUAL	= 0x35,		/* Proprietary Virtual/internal */
 	IFT_PROPMUX	= 0x36,		/* Proprietary Multiplexing */
 	IFT_IEEE80212	= 0x37,		/* 100BaseVG */
 	IFT_FIBRECHANNEL = 0x38,	/* Fibre Channel */
 	IFT_HIPPIINTERFACE = 0x39,	/* HIPPI interfaces	 */
 	IFT_FRAMERELAYINTERCONNECT = 0x3a, /* Obsolete, use 0x20 either 0x2c */
 	IFT_AFLANE8023	= 0x3b,		/* ATM Emulated LAN for 802.3 */
 	IFT_AFLANE8025	= 0x3c,		/* ATM Emulated LAN for 802.5 */
 	IFT_CCTEMUL	= 0x3d,		/* ATM Emulated circuit		  */
 	IFT_FASTETHER	= 0x3e,		/* Fast Ethernet (100BaseT) */
 	IFT_ISDN	= 0x3f,		/* ISDN and X.25	    */
 	IFT_V11		= 0x40,		/* CCITT V.11/X.21		*/
 	IFT_V36		= 0x41,		/* CCITT V.36			*/
 	IFT_G703AT64K	= 0x42,		/* CCITT G703 at 64Kbps */
 	IFT_G703AT2MB	= 0x43,		/* Obsolete see DS1-MIB */
 	IFT_QLLC	= 0x44,		/* SNA QLLC			*/
 	IFT_FASTETHERFX	= 0x45,		/* Fast Ethernet (100BaseFX)	*/
 	IFT_CHANNEL	= 0x46,		/* channel			*/
 	IFT_IEEE80211	= 0x47,		/* radio spread spectrum (unused) */
 	IFT_IBM370PARCHAN = 0x48,	/* IBM System 360/370 OEMI Channel */
 	IFT_ESCON	= 0x49,		/* IBM Enterprise Systems Connection */
 	IFT_DLSW	= 0x4a,		/* Data Link Switching */
 	IFT_ISDNS	= 0x4b,		/* ISDN S/T interface */
 	IFT_ISDNU	= 0x4c,		/* ISDN U interface */
 	IFT_LAPD	= 0x4d,		/* Link Access Protocol D */
 	IFT_IPSWITCH	= 0x4e,		/* IP Switching Objects */
 	IFT_RSRB	= 0x4f,		/* Remote Source Route Bridging */
 	IFT_ATMLOGICAL	= 0x50,		/* ATM Logical Port */
 	IFT_DS0		= 0x51,		/* Digital Signal Level 0 */
 	IFT_DS0BUNDLE	= 0x52,		/* group of ds0s on the same ds1 */
 	IFT_BSC		= 0x53,		/* Bisynchronous Protocol */
 	IFT_ASYNC	= 0x54,		/* Asynchronous Protocol */
 	IFT_CNR		= 0x55,		/* Combat Net Radio */
 	IFT_ISO88025DTR	= 0x56,		/* ISO 802.5r DTR */
 	IFT_EPLRS	= 0x57,		/* Ext Pos Loc Report Sys */
 	IFT_ARAP	= 0x58,		/* Appletalk Remote Access Protocol */
 	IFT_PROPCNLS	= 0x59,		/* Proprietary Connectionless Protocol*/
 	IFT_HOSTPAD	= 0x5a,		/* CCITT-ITU X.29 PAD Protocol */
 	IFT_TERMPAD	= 0x5b,		/* CCITT-ITU X.3 PAD Facility */
 	IFT_FRAMERELAYMPI = 0x5c,	/* Multiproto Interconnect over FR */
 	IFT_X213	= 0x5d,		/* CCITT-ITU X213 */
 	IFT_ADSL	= 0x5e,		/* Asymmetric Digital Subscriber Loop */
 	IFT_RADSL	= 0x5f,		/* Rate-Adapt. Digital Subscriber Loop*/
 	IFT_SDSL	= 0x60,		/* Symmetric Digital Subscriber Loop */
 	IFT_VDSL	= 0x61,		/* Very H-Speed Digital Subscrib. Loop*/
 	IFT_ISO88025CRFPINT = 0x62,	/* ISO 802.5 CRFP */
 	IFT_MYRINET	= 0x63,		/* Myricom Myrinet */
 	IFT_VOICEEM	= 0x64,		/* voice recEive and transMit */
 	IFT_VOICEFXO	= 0x65,		/* voice Foreign Exchange Office */
 	IFT_VOICEFXS	= 0x66,		/* voice Foreign Exchange Station */
 	IFT_VOICEENCAP	= 0x67,		/* voice encapsulation */
 	IFT_VOICEOVERIP	= 0x68,		/* voice over IP encapsulation */
 	IFT_ATMDXI	= 0x69,		/* ATM DXI */
 	IFT_ATMFUNI	= 0x6a,		/* ATM FUNI */
 	IFT_ATMIMA	= 0x6b,		/* ATM IMA		      */
 	IFT_PPPMULTILINKBUNDLE = 0x6c,	/* PPP Multilink Bundle */
 	IFT_IPOVERCDLC	= 0x6d,		/* IBM ipOverCdlc */
 	IFT_IPOVERCLAW	= 0x6e,		/* IBM Common Link Access to Workstn */
 	IFT_STACKTOSTACK = 0x6f,	/* IBM stackToStack */
 	IFT_VIRTUALIPADDRESS = 0x70,	/* IBM VIPA */
 	IFT_MPC		= 0x71,		/* IBM multi-protocol channel support */
 	IFT_IPOVERATM	= 0x72,		/* IBM ipOverAtm */
 	IFT_ISO88025FIBER = 0x73,	/* ISO 802.5j Fiber Token Ring */
 	IFT_TDLC	= 0x74,		/* IBM twinaxial data link control */
 	IFT_GIGABITETHERNET = 0x75,	/* Gigabit Ethernet */
 	IFT_HDLC	= 0x76,		/* HDLC */
 	IFT_LAPF	= 0x77,		/* LAP F */
 	IFT_V37		= 0x78,		/* V.37 */
 	IFT_X25MLP	= 0x79,		/* Multi-Link Protocol */
 	IFT_X25HUNTGROUP = 0x7a,	/* X25 Hunt Group */
 	IFT_TRANSPHDLC	= 0x7b,		/* Transp HDLC */
 	IFT_INTERLEAVE	= 0x7c,		/* Interleave channel */
 	IFT_FAST	= 0x7d,		/* Fast channel */
 	IFT_IP		= 0x7e,		/* IP (for APPN HPR in IP networks) */
 	IFT_DOCSCABLEMACLAYER = 0x7f,	/* CATV Mac Layer */
 	IFT_DOCSCABLEDOWNSTREAM = 0x80,	/* CATV Downstream interface */
 	IFT_DOCSCABLEUPSTREAM = 0x81,	/* CATV Upstream interface */
 	IFT_A12MPPSWITCH = 0x82,	/* Avalon Parallel Processor */
 	IFT_TUNNEL	= 0x83,		/* Encapsulation interface */
 	IFT_COFFEE	= 0x84,		/* coffee pot */
 	IFT_CES		= 0x85,		/* Circiut Emulation Service */
 	IFT_ATMSUBINTERFACE = 0x86,	/* (x)  ATM Sub Interface */
 	IFT_L2VLAN	= 0x87,		/* Layer 2 Virtual LAN using 802.1Q */
 	IFT_L3IPVLAN	= 0x88,		/* Layer 3 Virtual LAN - IP Protocol */
 	IFT_L3IPXVLAN	= 0x89,		/* Layer 3 Virtual LAN - IPX Prot. */
 	IFT_DIGITALPOWERLINE = 0x8a,	/* IP over Power Lines */
 	IFT_MEDIAMAILOVERIP = 0x8b,	/* (xxx)  Multimedia Mail over IP */
 	IFT_DTM		= 0x8c,		/* Dynamic synchronous Transfer Mode */
 	IFT_DCN		= 0x8d,		/* Data Communications Network */
 	IFT_IPFORWARD	= 0x8e,		/* IP Forwarding Interface */
 	IFT_MSDSL	= 0x8f,		/* Multi-rate Symmetric DSL */
 	IFT_IEEE1394	= 0x90,		/* IEEE1394 High Performance SerialBus*/
 	IFT_IFGSN	= 0x91,		/* HIPPI-6400 */
 	IFT_DVBRCCMACLAYER = 0x92,	/* DVB-RCC MAC Layer */
 	IFT_DVBRCCDOWNSTREAM = 0x93,	/* DVB-RCC Downstream Channel */
 	IFT_DVBRCCUPSTREAM = 0x94,	/* DVB-RCC Upstream Channel */
 	IFT_ATMVIRTUAL	= 0x95,		/* ATM Virtual Interface */
 	IFT_MPLSTUNNEL	= 0x96,		/* MPLS Tunnel Virtual Interface */
 	IFT_SRP		= 0x97,		/* Spatial Reuse Protocol */
 	IFT_VOICEOVERATM = 0x98,	/* Voice over ATM */
 	IFT_VOICEOVERFRAMERELAY	= 0x99,	/* Voice Over Frame Relay */
 	IFT_IDSL	= 0x9a,		/* Digital Subscriber Loop over ISDN */
 	IFT_COMPOSITELINK = 0x9b,	/* Avici Composite Link Interface */
 	IFT_SS7SIGLINK	= 0x9c,		/* SS7 Signaling Link */
 	IFT_PROPWIRELESSP2P = 0x9d,	/* Prop. P2P wireless interface */
 	IFT_FRFORWARD	= 0x9e,		/* Frame forward Interface */
 	IFT_RFC1483	= 0x9f,		/* Multiprotocol over ATM AAL5 */
 	IFT_USB		= 0xa0,		/* USB Interface */
 	IFT_IEEE8023ADLAG = 0xa1,	/* IEEE 802.3ad Link Aggregate*/
 	IFT_BGPPOLICYACCOUNTING = 0xa2,	/* BGP Policy Accounting */
 	IFT_FRF16MFRBUNDLE = 0xa3,	/* FRF.16 Multilik Frame Relay*/
 	IFT_H323GATEKEEPER = 0xa4,	/* H323 Gatekeeper */
 	IFT_H323PROXY	= 0xa5,		/* H323 Voice and Video Proxy */
 	IFT_MPLS	= 0xa6,		/* MPLS */
 	IFT_MFSIGLINK	= 0xa7,		/* Multi-frequency signaling link */
 	IFT_HDSL2	= 0xa8,		/* High Bit-Rate DSL, 2nd gen. */
 	IFT_SHDSL	= 0xa9,		/* Multirate HDSL2 */
 	IFT_DS1FDL	= 0xaa,		/* Facility Data Link (4Kbps) on a DS1*/
 	IFT_POS		= 0xab,		/* Packet over SONET/SDH Interface */
 	IFT_DVBASILN	= 0xac,		/* DVB-ASI Input */
 	IFT_DVBASIOUT	= 0xad,		/* DVB-ASI Output */
 	IFT_PLC		= 0xae,		/* Power Line Communications */
 	IFT_NFAS	= 0xaf,		/* Non-Facility Associated Signaling */
 	IFT_TR008	= 0xb0,		/* TROO8 */
 	IFT_GR303RDT	= 0xb1,		/* Remote Digital Terminal */
 	IFT_GR303IDT	= 0xb2,		/* Integrated Digital Terminal */
 	IFT_ISUP	= 0xb3,		/* ISUP */
 	IFT_PROPDOCSWIRELESSMACLAYER = 0xb4,	/* prop/Wireless MAC Layer */
 	IFT_PROPDOCSWIRELESSDOWNSTREAM = 0xb5,	/* prop/Wireless Downstream */
 	IFT_PROPDOCSWIRELESSUPSTREAM = 0xb6,	/* prop/Wireless Upstream */
 	IFT_HIPERLAN2	= 0xb7,		/* HIPERLAN Type 2 Radio Interface */
 	IFT_PROPBWAP2MP	= 0xb8,		/* PropBroadbandWirelessAccess P2MP*/
 	IFT_SONETOVERHEADCHANNEL = 0xb9, /* SONET Overhead Channel */
 	IFT_DIGITALWRAPPEROVERHEADCHANNEL = 0xba, /* Digital Wrapper Overhead */
 	IFT_AAL2	= 0xbb,		/* ATM adaptation layer 2 */
 	IFT_RADIOMAC	= 0xbc,		/* MAC layer over radio links */
 	IFT_ATMRADIO	= 0xbd,		/* ATM over radio links */
 	IFT_IMT		= 0xbe,		/* Inter-Machine Trunks */
 	IFT_MVL		= 0xbf,		/* Multiple Virtual Lines DSL */
 	IFT_REACHDSL	= 0xc0,		/* Long Reach DSL */
 	IFT_FRDLCIENDPT	= 0xc1,		/* Frame Relay DLCI End Point */
 	IFT_ATMVCIENDPT	= 0xc2,		/* ATM VCI End Point */
 	IFT_OPTICALCHANNEL = 0xc3,	/* Optical Channel */
 	IFT_OPTICALTRANSPORT = 0xc4,	/* Optical Transport */
 	IFT_INFINIBAND	= 0xc7,		/* Infiniband */
 	IFT_INFINIBANDLAG = 0xc8,	/* Infiniband Link Aggregate */
 	IFT_BRIDGE	= 0xd1,		/* Transparent bridge interface */
 	IFT_STF		= 0xd7,		/* 6to4 interface */
 
 	/*
 	 * Not based on IANA assignments.  Conflicting with IANA assignments.
 	 * We should make them negative probably.
 	 * This requires changes to struct if_data.
 	 */
 	IFT_GIF		= 0xf0,		/* Generic tunnel interface */
 	IFT_PVC		= 0xf1,		/* Unused */
 	IFT_ENC		= 0xf4,		/* Encapsulating interface */
 	IFT_PFLOG	= 0xf6,		/* PF packet filter logging */
 	IFT_PFSYNC	= 0xf7,		/* PF packet filter synchronization */
-	IFT_WIREGUARD	= 0xf8,		/* WireGuard tunnel */
 } ifType;
 
 /*
  * Some (broken) software uses #ifdef IFT_TYPE to check whether
  * an operating systems supports certain interface type.  Lack of
  * ifdef leads to a piece of functionality compiled out.
  */
 #ifndef BURN_BRIDGES
 #define	IFT_BRIDGE	IFT_BRIDGE
 #define	IFT_PPP		IFT_PPP
 #define	IFT_PROPVIRTUAL	IFT_PROPVIRTUAL
 #define	IFT_L2VLAN	IFT_L2VLAN
 #define	IFT_L3IPVLAN	IFT_L3IPVLAN
 #define	IFT_IEEE1394	IFT_IEEE1394
 #define	IFT_INFINIBAND	IFT_INFINIBAND
 #endif
 
 #endif /* !_NET_IF_TYPES_H_ */
diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c
index 7937749c1299..62f0ac733a23 100644
--- a/sys/netinet6/nd6.c
+++ b/sys/netinet6/nd6.c
@@ -1,2662 +1,2662 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the project nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_route.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/eventhandler.h>
 #include <sys/callout.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/mutex.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/time.h>
 #include <sys/kernel.h>
 #include <sys/protosw.h>
 #include <sys/errno.h>
 #include <sys/syslog.h>
 #include <sys/rwlock.h>
 #include <sys/queue.h>
 #include <sys/sdt.h>
 #include <sys/sysctl.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_dl.h>
 #include <net/if_types.h>
 #include <net/route.h>
 #include <net/route/route_ctl.h>
 #include <net/route/nhop.h>
 #include <net/vnet.h>
 
 #include <netinet/in.h>
 #include <netinet/in_kdtrace.h>
 #include <net/if_llatbl.h>
 #include <netinet/if_ether.h>
 #include <netinet6/in6_var.h>
 #include <netinet/ip6.h>
 #include <netinet6/ip6_var.h>
 #include <netinet6/scope6_var.h>
 #include <netinet6/nd6.h>
 #include <netinet6/in6_ifattach.h>
 #include <netinet/icmp6.h>
 #include <netinet6/send.h>
 
 #include <sys/limits.h>
 
 #include <security/mac/mac_framework.h>
 
 #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
 #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
 
 #define SIN6(s) ((const struct sockaddr_in6 *)(s))
 
 MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery");
 
 /* timer values */
 VNET_DEFINE(int, nd6_prune)	= 1;	/* walk list every 1 seconds */
 VNET_DEFINE(int, nd6_delay)	= 5;	/* delay first probe time 5 second */
 VNET_DEFINE(int, nd6_umaxtries)	= 3;	/* maximum unicast query */
 VNET_DEFINE(int, nd6_mmaxtries)	= 3;	/* maximum multicast query */
 VNET_DEFINE(int, nd6_useloopback) = 1;	/* use loopback interface for
 					 * local traffic */
 VNET_DEFINE(int, nd6_gctimer)	= (60 * 60 * 24); /* 1 day: garbage
 					 * collection timer */
 
 /* preventing too many loops in ND option parsing */
 VNET_DEFINE_STATIC(int, nd6_maxndopt) = 10; /* max # of ND options allowed */
 
 VNET_DEFINE(int, nd6_maxnudhint) = 0;	/* max # of subsequent upper
 					 * layer hints */
 VNET_DEFINE_STATIC(int, nd6_maxqueuelen) = 16; /* max pkts cached in unresolved
 					 * ND entries */
 #define	V_nd6_maxndopt			VNET(nd6_maxndopt)
 #define	V_nd6_maxqueuelen		VNET(nd6_maxqueuelen)
 
 #ifdef ND6_DEBUG
 VNET_DEFINE(int, nd6_debug) = 1;
 #else
 VNET_DEFINE(int, nd6_debug) = 0;
 #endif
 
 static eventhandler_tag lle_event_eh, iflladdr_event_eh, ifnet_link_event_eh;
 
 VNET_DEFINE(struct nd_prhead, nd_prefix);
 VNET_DEFINE(struct rwlock, nd6_lock);
 VNET_DEFINE(uint64_t, nd6_list_genid);
 VNET_DEFINE(struct mtx, nd6_onlink_mtx);
 
 VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL;
 #define	V_nd6_recalc_reachtm_interval	VNET(nd6_recalc_reachtm_interval)
 
 int	(*send_sendso_input_hook)(struct mbuf *, struct ifnet *, int, int);
 
 static int nd6_is_new_addr_neighbor(const struct sockaddr_in6 *,
 	struct ifnet *);
 static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *);
 static void nd6_slowtimo(void *);
 static int regen_tmpaddr(struct in6_ifaddr *);
 static void nd6_free(struct llentry **, int);
 static void nd6_free_redirect(const struct llentry *);
 static void nd6_llinfo_timer(void *);
 static void nd6_llinfo_settimer_locked(struct llentry *, long);
 static void clear_llinfo_pqueue(struct llentry *);
 static int nd6_resolve_slow(struct ifnet *, int, struct mbuf *,
     const struct sockaddr_in6 *, u_char *, uint32_t *, struct llentry **);
 static int nd6_need_cache(struct ifnet *);
 
 VNET_DEFINE_STATIC(struct callout, nd6_slowtimo_ch);
 #define	V_nd6_slowtimo_ch		VNET(nd6_slowtimo_ch)
 
 VNET_DEFINE_STATIC(struct callout, nd6_timer_ch);
 #define	V_nd6_timer_ch			VNET(nd6_timer_ch)
 
 SYSCTL_DECL(_net_inet6_icmp6);
 
 static void
 nd6_lle_event(void *arg __unused, struct llentry *lle, int evt)
 {
 	struct rt_addrinfo rtinfo;
 	struct sockaddr_in6 dst;
 	struct sockaddr_dl gw;
 	struct ifnet *ifp;
 	int type;
 	int fibnum;
 
 	LLE_WLOCK_ASSERT(lle);
 
 	if (lltable_get_af(lle->lle_tbl) != AF_INET6)
 		return;
 
 	switch (evt) {
 	case LLENTRY_RESOLVED:
 		type = RTM_ADD;
 		KASSERT(lle->la_flags & LLE_VALID,
 		    ("%s: %p resolved but not valid?", __func__, lle));
 		break;
 	case LLENTRY_EXPIRED:
 		type = RTM_DELETE;
 		break;
 	default:
 		return;
 	}
 
 	ifp = lltable_get_ifp(lle->lle_tbl);
 
 	bzero(&dst, sizeof(dst));
 	bzero(&gw, sizeof(gw));
 	bzero(&rtinfo, sizeof(rtinfo));
 	lltable_fill_sa_entry(lle, (struct sockaddr *)&dst);
 	dst.sin6_scope_id = in6_getscopezone(ifp,
 	    in6_addrscope(&dst.sin6_addr));
 	gw.sdl_len = sizeof(struct sockaddr_dl);
 	gw.sdl_family = AF_LINK;
 	gw.sdl_alen = ifp->if_addrlen;
 	gw.sdl_index = ifp->if_index;
 	gw.sdl_type = ifp->if_type;
 	if (evt == LLENTRY_RESOLVED)
 		bcopy(lle->ll_addr, gw.sdl_data, ifp->if_addrlen);
 	rtinfo.rti_info[RTAX_DST] = (struct sockaddr *)&dst;
 	rtinfo.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&gw;
 	rtinfo.rti_addrs = RTA_DST | RTA_GATEWAY;
 	fibnum = V_rt_add_addr_allfibs ? RT_ALL_FIBS : ifp->if_fib;
 	rt_missmsg_fib(type, &rtinfo, RTF_HOST | RTF_LLDATA | (
 	    type == RTM_ADD ? RTF_UP: 0), 0, fibnum);
 }
 
 /*
  * A handler for interface link layer address change event.
  */
 static void
 nd6_iflladdr(void *arg __unused, struct ifnet *ifp)
 {
 	if (ifp->if_afdata[AF_INET6] == NULL)
 		return;
 
 	lltable_update_ifaddr(LLTABLE6(ifp));
 }
 
 void
 nd6_init(void)
 {
 
 	mtx_init(&V_nd6_onlink_mtx, "nd6 onlink", NULL, MTX_DEF);
 	rw_init(&V_nd6_lock, "nd6 list");
 
 	LIST_INIT(&V_nd_prefix);
 	nd6_defrouter_init();
 
 	/* Start timers. */
 	callout_init(&V_nd6_slowtimo_ch, 0);
 	callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
 	    nd6_slowtimo, curvnet);
 
 	callout_init(&V_nd6_timer_ch, 0);
 	callout_reset(&V_nd6_timer_ch, hz, nd6_timer, curvnet);
 
 	nd6_dad_init();
 	if (IS_DEFAULT_VNET(curvnet)) {
 		lle_event_eh = EVENTHANDLER_REGISTER(lle_event, nd6_lle_event,
 		    NULL, EVENTHANDLER_PRI_ANY);
 		iflladdr_event_eh = EVENTHANDLER_REGISTER(iflladdr_event,
 		    nd6_iflladdr, NULL, EVENTHANDLER_PRI_ANY);
 		ifnet_link_event_eh = EVENTHANDLER_REGISTER(ifnet_link_event,
 		    nd6_ifnet_link_event, NULL, EVENTHANDLER_PRI_ANY);
 	}
 }
 
 #ifdef VIMAGE
 void
 nd6_destroy()
 {
 
 	callout_drain(&V_nd6_slowtimo_ch);
 	callout_drain(&V_nd6_timer_ch);
 	if (IS_DEFAULT_VNET(curvnet)) {
 		EVENTHANDLER_DEREGISTER(ifnet_link_event, ifnet_link_event_eh);
 		EVENTHANDLER_DEREGISTER(lle_event, lle_event_eh);
 		EVENTHANDLER_DEREGISTER(iflladdr_event, iflladdr_event_eh);
 	}
 	rw_destroy(&V_nd6_lock);
 	mtx_destroy(&V_nd6_onlink_mtx);
 }
 #endif
 
 struct nd_ifinfo *
 nd6_ifattach(struct ifnet *ifp)
 {
 	struct nd_ifinfo *nd;
 
 	nd = malloc(sizeof(*nd), M_IP6NDP, M_WAITOK | M_ZERO);
 	nd->initialized = 1;
 
 	nd->chlim = IPV6_DEFHLIM;
 	nd->basereachable = REACHABLE_TIME;
 	nd->reachable = ND_COMPUTE_RTIME(nd->basereachable);
 	nd->retrans = RETRANS_TIMER;
 
 	nd->flags = ND6_IFF_PERFORMNUD;
 
 	/* Set IPv6 disabled on all interfaces but loopback by default. */
 	if ((ifp->if_flags & IFF_LOOPBACK) == 0)
 		nd->flags |= ND6_IFF_IFDISABLED;
 
 	/* A loopback interface always has ND6_IFF_AUTO_LINKLOCAL.
 	 * XXXHRS: Clear ND6_IFF_AUTO_LINKLOCAL on an IFT_BRIDGE interface by
 	 * default regardless of the V_ip6_auto_linklocal configuration to
 	 * give a reasonable default behavior.
 	 */
-	if ((V_ip6_auto_linklocal && ifp->if_type != IFT_BRIDGE &&
-	    ifp->if_type != IFT_WIREGUARD) || (ifp->if_flags & IFF_LOOPBACK))
+	if ((V_ip6_auto_linklocal && ifp->if_type != IFT_BRIDGE) ||
+	    (ifp->if_flags & IFF_LOOPBACK))
 		nd->flags |= ND6_IFF_AUTO_LINKLOCAL;
 	/*
 	 * A loopback interface does not need to accept RTADV.
 	 * XXXHRS: Clear ND6_IFF_ACCEPT_RTADV on an IFT_BRIDGE interface by
 	 * default regardless of the V_ip6_accept_rtadv configuration to
 	 * prevent the interface from accepting RA messages arrived
 	 * on one of the member interfaces with ND6_IFF_ACCEPT_RTADV.
 	 */
 	if (V_ip6_accept_rtadv &&
 	    !(ifp->if_flags & IFF_LOOPBACK) &&
 	    (ifp->if_type != IFT_BRIDGE)) {
 			nd->flags |= ND6_IFF_ACCEPT_RTADV;
 			/* If we globally accept rtadv, assume IPv6 on. */
 			nd->flags &= ~ND6_IFF_IFDISABLED;
 	}
 	if (V_ip6_no_radr && !(ifp->if_flags & IFF_LOOPBACK))
 		nd->flags |= ND6_IFF_NO_RADR;
 
 	/* XXX: we cannot call nd6_setmtu since ifp is not fully initialized */
 	nd6_setmtu0(ifp, nd);
 
 	return nd;
 }
 
 void
 nd6_ifdetach(struct ifnet *ifp, struct nd_ifinfo *nd)
 {
 	struct epoch_tracker et;
 	struct ifaddr *ifa, *next;
 
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 
 		/* stop DAD processing */
 		nd6_dad_stop(ifa);
 	}
 	NET_EPOCH_EXIT(et);
 
 	free(nd, M_IP6NDP);
 }
 
 /*
  * Reset ND level link MTU. This function is called when the physical MTU
  * changes, which means we might have to adjust the ND level MTU.
  */
 void
 nd6_setmtu(struct ifnet *ifp)
 {
 	if (ifp->if_afdata[AF_INET6] == NULL)
 		return;
 
 	nd6_setmtu0(ifp, ND_IFINFO(ifp));
 }
 
 /* XXX todo: do not maintain copy of ifp->if_mtu in ndi->maxmtu */
 void
 nd6_setmtu0(struct ifnet *ifp, struct nd_ifinfo *ndi)
 {
 	u_int32_t omaxmtu;
 
 	omaxmtu = ndi->maxmtu;
 	ndi->maxmtu = ifp->if_mtu;
 
 	/*
 	 * Decreasing the interface MTU under IPV6 minimum MTU may cause
 	 * undesirable situation.  We thus notify the operator of the change
 	 * explicitly.  The check for omaxmtu is necessary to restrict the
 	 * log to the case of changing the MTU, not initializing it.
 	 */
 	if (omaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) {
 		log(LOG_NOTICE, "nd6_setmtu0: "
 		    "new link MTU on %s (%lu) is too small for IPv6\n",
 		    if_name(ifp), (unsigned long)ndi->maxmtu);
 	}
 
 	if (ndi->maxmtu > V_in6_maxmtu)
 		in6_setmaxmtu(); /* check all interfaces just in case */
 
 }
 
 void
 nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts)
 {
 
 	bzero(ndopts, sizeof(*ndopts));
 	ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
 	ndopts->nd_opts_last
 		= (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);
 
 	if (icmp6len == 0) {
 		ndopts->nd_opts_done = 1;
 		ndopts->nd_opts_search = NULL;
 	}
 }
 
 /*
  * Take one ND option.
  */
 struct nd_opt_hdr *
 nd6_option(union nd_opts *ndopts)
 {
 	struct nd_opt_hdr *nd_opt;
 	int olen;
 
 	KASSERT(ndopts != NULL, ("%s: ndopts == NULL", __func__));
 	KASSERT(ndopts->nd_opts_last != NULL, ("%s: uninitialized ndopts",
 	    __func__));
 	if (ndopts->nd_opts_search == NULL)
 		return NULL;
 	if (ndopts->nd_opts_done)
 		return NULL;
 
 	nd_opt = ndopts->nd_opts_search;
 
 	/* make sure nd_opt_len is inside the buffer */
 	if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
 		bzero(ndopts, sizeof(*ndopts));
 		return NULL;
 	}
 
 	olen = nd_opt->nd_opt_len << 3;
 	if (olen == 0) {
 		/*
 		 * Message validation requires that all included
 		 * options have a length that is greater than zero.
 		 */
 		bzero(ndopts, sizeof(*ndopts));
 		return NULL;
 	}
 
 	ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
 	if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
 		/* option overruns the end of buffer, invalid */
 		bzero(ndopts, sizeof(*ndopts));
 		return NULL;
 	} else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
 		/* reached the end of options chain */
 		ndopts->nd_opts_done = 1;
 		ndopts->nd_opts_search = NULL;
 	}
 	return nd_opt;
 }
 
 /*
  * Parse multiple ND options.
  * This function is much easier to use, for ND routines that do not need
  * multiple options of the same type.
  */
 int
 nd6_options(union nd_opts *ndopts)
 {
 	struct nd_opt_hdr *nd_opt;
 	int i = 0;
 
 	KASSERT(ndopts != NULL, ("%s: ndopts == NULL", __func__));
 	KASSERT(ndopts->nd_opts_last != NULL, ("%s: uninitialized ndopts",
 	    __func__));
 	if (ndopts->nd_opts_search == NULL)
 		return 0;
 
 	while (1) {
 		nd_opt = nd6_option(ndopts);
 		if (nd_opt == NULL && ndopts->nd_opts_last == NULL) {
 			/*
 			 * Message validation requires that all included
 			 * options have a length that is greater than zero.
 			 */
 			ICMP6STAT_INC(icp6s_nd_badopt);
 			bzero(ndopts, sizeof(*ndopts));
 			return -1;
 		}
 
 		if (nd_opt == NULL)
 			goto skip1;
 
 		switch (nd_opt->nd_opt_type) {
 		case ND_OPT_SOURCE_LINKADDR:
 		case ND_OPT_TARGET_LINKADDR:
 		case ND_OPT_MTU:
 		case ND_OPT_REDIRECTED_HEADER:
 		case ND_OPT_NONCE:
 			if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
 				nd6log((LOG_INFO,
 				    "duplicated ND6 option found (type=%d)\n",
 				    nd_opt->nd_opt_type));
 				/* XXX bark? */
 			} else {
 				ndopts->nd_opt_array[nd_opt->nd_opt_type]
 					= nd_opt;
 			}
 			break;
 		case ND_OPT_PREFIX_INFORMATION:
 			if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
 				ndopts->nd_opt_array[nd_opt->nd_opt_type]
 					= nd_opt;
 			}
 			ndopts->nd_opts_pi_end =
 				(struct nd_opt_prefix_info *)nd_opt;
 			break;
 		/* What about ND_OPT_ROUTE_INFO? RFC 4191 */
 		case ND_OPT_RDNSS:	/* RFC 6106 */
 		case ND_OPT_DNSSL:	/* RFC 6106 */
 			/*
 			 * Silently ignore options we know and do not care about
 			 * in the kernel.
 			 */
 			break;
 		default:
 			/*
 			 * Unknown options must be silently ignored,
 			 * to accommodate future extension to the protocol.
 			 */
 			nd6log((LOG_DEBUG,
 			    "nd6_options: unsupported option %d - "
 			    "option ignored\n", nd_opt->nd_opt_type));
 		}
 
 skip1:
 		i++;
 		if (i > V_nd6_maxndopt) {
 			ICMP6STAT_INC(icp6s_nd_toomanyopt);
 			nd6log((LOG_INFO, "too many loop in nd opt\n"));
 			break;
 		}
 
 		if (ndopts->nd_opts_done)
 			break;
 	}
 
 	return 0;
 }
 
 /*
  * ND6 timer routine to handle ND6 entries
  */
 static void
 nd6_llinfo_settimer_locked(struct llentry *ln, long tick)
 {
 	int canceled;
 
 	LLE_WLOCK_ASSERT(ln);
 
 	if (tick < 0) {
 		ln->la_expire = 0;
 		ln->ln_ntick = 0;
 		canceled = callout_stop(&ln->lle_timer);
 	} else {
 		ln->la_expire = time_uptime + tick / hz;
 		LLE_ADDREF(ln);
 		if (tick > INT_MAX) {
 			ln->ln_ntick = tick - INT_MAX;
 			canceled = callout_reset(&ln->lle_timer, INT_MAX,
 			    nd6_llinfo_timer, ln);
 		} else {
 			ln->ln_ntick = 0;
 			canceled = callout_reset(&ln->lle_timer, tick,
 			    nd6_llinfo_timer, ln);
 		}
 	}
 	if (canceled > 0)
 		LLE_REMREF(ln);
 }
 
 /*
  * Gets source address of the first packet in hold queue
  * and stores it in @src.
  * Returns pointer to @src (if hold queue is not empty) or NULL.
  *
  * Set noinline to be dtrace-friendly
  */
 static __noinline struct in6_addr *
 nd6_llinfo_get_holdsrc(struct llentry *ln, struct in6_addr *src)
 {
 	struct ip6_hdr hdr;
 	struct mbuf *m;
 
 	if (ln->la_hold == NULL)
 		return (NULL);
 
 	/*
 	 * assume every packet in la_hold has the same IP header
 	 */
 	m = ln->la_hold;
 	if (sizeof(hdr) > m->m_len)
 		return (NULL);
 
 	m_copydata(m, 0, sizeof(hdr), (caddr_t)&hdr);
 	*src = hdr.ip6_src;
 
 	return (src);
 }
 
 /*
  * Checks if we need to switch from STALE state.
  *
  * RFC 4861 requires switching from STALE to DELAY state
  * on first packet matching entry, waiting V_nd6_delay and
  * transition to PROBE state (if upper layer confirmation was
  * not received).
  *
  * This code performs a bit differently:
  * On packet hit we don't change state (but desired state
  * can be guessed by control plane). However, after V_nd6_delay
  * seconds code will transition to PROBE state (so DELAY state
  * is kinda skipped in most situations).
  *
  * Typically, V_nd6_gctimer is bigger than V_nd6_delay, so
  * we perform the following upon entering STALE state:
  *
  * 1) Arm timer to run each V_nd6_delay seconds to make sure that
  * if packet was transmitted at the start of given interval, we
  * would be able to switch to PROBE state in V_nd6_delay seconds
  * as user expects.
  *
  * 2) Reschedule timer until original V_nd6_gctimer expires keeping
  * lle in STALE state (remaining timer value stored in lle_remtime).
  *
  * 3) Reschedule timer if packet was transmitted less that V_nd6_delay
  * seconds ago.
  *
  * Returns non-zero value if the entry is still STALE (storing
  * the next timer interval in @pdelay).
  *
  * Returns zero value if original timer expired or we need to switch to
  * PROBE (store that in @do_switch variable).
  */
 static int
 nd6_is_stale(struct llentry *lle, long *pdelay, int *do_switch)
 {
 	int nd_delay, nd_gctimer, r_skip_req;
 	time_t lle_hittime;
 	long delay;
 
 	*do_switch = 0;
 	nd_gctimer = V_nd6_gctimer;
 	nd_delay = V_nd6_delay;
 
 	LLE_REQ_LOCK(lle);
 	r_skip_req = lle->r_skip_req;
 	lle_hittime = lle->lle_hittime;
 	LLE_REQ_UNLOCK(lle);
 
 	if (r_skip_req > 0) {
 		/*
 		 * Nonzero r_skip_req value was set upon entering
 		 * STALE state. Since value was not changed, no
 		 * packets were passed using this lle. Ask for
 		 * timer reschedule and keep STALE state.
 		 */
 		delay = (long)(MIN(nd_gctimer, nd_delay));
 		delay *= hz;
 		if (lle->lle_remtime > delay)
 			lle->lle_remtime -= delay;
 		else {
 			delay = lle->lle_remtime;
 			lle->lle_remtime = 0;
 		}
 
 		if (delay == 0) {
 			/*
 			 * The original ng6_gctime timeout ended,
 			 * no more rescheduling.
 			 */
 			return (0);
 		}
 
 		*pdelay = delay;
 		return (1);
 	}
 
 	/*
 	 * Packet received. Verify timestamp
 	 */
 	delay = (long)(time_uptime - lle_hittime);
 	if (delay < nd_delay) {
 		/*
 		 * V_nd6_delay still not passed since the first
 		 * hit in STALE state.
 		 * Reshedule timer and return.
 		 */
 		*pdelay = (long)(nd_delay - delay) * hz;
 		return (1);
 	}
 
 	/* Request switching to probe */
 	*do_switch = 1;
 	return (0);
 }
 
 /*
  * Switch @lle state to new state optionally arming timers.
  *
  * Set noinline to be dtrace-friendly
  */
 __noinline void
 nd6_llinfo_setstate(struct llentry *lle, int newstate)
 {
 	struct ifnet *ifp;
 	int nd_gctimer, nd_delay;
 	long delay, remtime;
 
 	delay = 0;
 	remtime = 0;
 
 	switch (newstate) {
 	case ND6_LLINFO_INCOMPLETE:
 		ifp = lle->lle_tbl->llt_ifp;
 		delay = (long)ND_IFINFO(ifp)->retrans * hz / 1000;
 		break;
 	case ND6_LLINFO_REACHABLE:
 		if (!ND6_LLINFO_PERMANENT(lle)) {
 			ifp = lle->lle_tbl->llt_ifp;
 			delay = (long)ND_IFINFO(ifp)->reachable * hz;
 		}
 		break;
 	case ND6_LLINFO_STALE:
 
 		/*
 		 * Notify fast path that we want to know if any packet
 		 * is transmitted by setting r_skip_req.
 		 */
 		LLE_REQ_LOCK(lle);
 		lle->r_skip_req = 1;
 		LLE_REQ_UNLOCK(lle);
 		nd_delay = V_nd6_delay;
 		nd_gctimer = V_nd6_gctimer;
 
 		delay = (long)(MIN(nd_gctimer, nd_delay)) * hz;
 		remtime = (long)nd_gctimer * hz - delay;
 		break;
 	case ND6_LLINFO_DELAY:
 		lle->la_asked = 0;
 		delay = (long)V_nd6_delay * hz;
 		break;
 	}
 
 	if (delay > 0)
 		nd6_llinfo_settimer_locked(lle, delay);
 
 	lle->lle_remtime = remtime;
 	lle->ln_state = newstate;
 }
 
 /*
  * Timer-dependent part of nd state machine.
  *
  * Set noinline to be dtrace-friendly
  */
 static __noinline void
 nd6_llinfo_timer(void *arg)
 {
 	struct epoch_tracker et;
 	struct llentry *ln;
 	struct in6_addr *dst, *pdst, *psrc, src;
 	struct ifnet *ifp;
 	struct nd_ifinfo *ndi;
 	int do_switch, send_ns;
 	long delay;
 
 	KASSERT(arg != NULL, ("%s: arg NULL", __func__));
 	ln = (struct llentry *)arg;
 	ifp = lltable_get_ifp(ln->lle_tbl);
 	CURVNET_SET(ifp->if_vnet);
 
 	ND6_RLOCK();
 	LLE_WLOCK(ln);
 	if (callout_pending(&ln->lle_timer)) {
 		/*
 		 * Here we are a bit odd here in the treatment of 
 		 * active/pending. If the pending bit is set, it got
 		 * rescheduled before I ran. The active
 		 * bit we ignore, since if it was stopped
 		 * in ll_tablefree() and was currently running
 		 * it would have return 0 so the code would
 		 * not have deleted it since the callout could
 		 * not be stopped so we want to go through
 		 * with the delete here now. If the callout
 		 * was restarted, the pending bit will be back on and
 		 * we just want to bail since the callout_reset would
 		 * return 1 and our reference would have been removed
 		 * by nd6_llinfo_settimer_locked above since canceled
 		 * would have been 1.
 		 */
 		LLE_WUNLOCK(ln);
 		ND6_RUNLOCK();
 		CURVNET_RESTORE();
 		return;
 	}
 	NET_EPOCH_ENTER(et);
 	ndi = ND_IFINFO(ifp);
 	send_ns = 0;
 	dst = &ln->r_l3addr.addr6;
 	pdst = dst;
 
 	if (ln->ln_ntick > 0) {
 		if (ln->ln_ntick > INT_MAX) {
 			ln->ln_ntick -= INT_MAX;
 			nd6_llinfo_settimer_locked(ln, INT_MAX);
 		} else {
 			ln->ln_ntick = 0;
 			nd6_llinfo_settimer_locked(ln, ln->ln_ntick);
 		}
 		goto done;
 	}
 
 	if (ln->la_flags & LLE_STATIC) {
 		goto done;
 	}
 
 	if (ln->la_flags & LLE_DELETED) {
 		nd6_free(&ln, 0);
 		goto done;
 	}
 
 	switch (ln->ln_state) {
 	case ND6_LLINFO_INCOMPLETE:
 		if (ln->la_asked < V_nd6_mmaxtries) {
 			ln->la_asked++;
 			send_ns = 1;
 			/* Send NS to multicast address */
 			pdst = NULL;
 		} else {
 			struct mbuf *m = ln->la_hold;
 			if (m) {
 				struct mbuf *m0;
 
 				/*
 				 * assuming every packet in la_hold has the
 				 * same IP header.  Send error after unlock.
 				 */
 				m0 = m->m_nextpkt;
 				m->m_nextpkt = NULL;
 				ln->la_hold = m0;
 				clear_llinfo_pqueue(ln);
 			}
 			nd6_free(&ln, 0);
 			if (m != NULL) {
 				struct mbuf *n = m;
 
 				/*
 				 * if there are any ummapped mbufs, we
 				 * must free them, rather than using
 				 * them for an ICMP, as they cannot be
 				 * checksummed.
 				 */
 				while ((n = n->m_next) != NULL) {
 					if (n->m_flags & M_EXTPG)
 						break;
 				}
 				if (n != NULL) {
 					m_freem(m);
 					m = NULL;
 				} else {
 					icmp6_error2(m, ICMP6_DST_UNREACH,
 					    ICMP6_DST_UNREACH_ADDR, 0, ifp);
 				}
 			}
 		}
 		break;
 	case ND6_LLINFO_REACHABLE:
 		if (!ND6_LLINFO_PERMANENT(ln))
 			nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
 		break;
 
 	case ND6_LLINFO_STALE:
 		if (nd6_is_stale(ln, &delay, &do_switch) != 0) {
 			/*
 			 * No packet has used this entry and GC timeout
 			 * has not been passed. Reshedule timer and
 			 * return.
 			 */
 			nd6_llinfo_settimer_locked(ln, delay);
 			break;
 		}
 
 		if (do_switch == 0) {
 			/*
 			 * GC timer has ended and entry hasn't been used.
 			 * Run Garbage collector (RFC 4861, 5.3)
 			 */
 			if (!ND6_LLINFO_PERMANENT(ln))
 				nd6_free(&ln, 1);
 			break;
 		}
 
 		/* Entry has been used AND delay timer has ended. */
 
 		/* FALLTHROUGH */
 
 	case ND6_LLINFO_DELAY:
 		if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) {
 			/* We need NUD */
 			ln->la_asked = 1;
 			nd6_llinfo_setstate(ln, ND6_LLINFO_PROBE);
 			send_ns = 1;
 		} else
 			nd6_llinfo_setstate(ln, ND6_LLINFO_STALE); /* XXX */
 		break;
 	case ND6_LLINFO_PROBE:
 		if (ln->la_asked < V_nd6_umaxtries) {
 			ln->la_asked++;
 			send_ns = 1;
 		} else {
 			nd6_free(&ln, 0);
 		}
 		break;
 	default:
 		panic("%s: paths in a dark night can be confusing: %d",
 		    __func__, ln->ln_state);
 	}
 done:
 	if (ln != NULL)
 		ND6_RUNLOCK();
 	if (send_ns != 0) {
 		nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000);
 		psrc = nd6_llinfo_get_holdsrc(ln, &src);
 		LLE_FREE_LOCKED(ln);
 		ln = NULL;
 		nd6_ns_output(ifp, psrc, pdst, dst, NULL);
 	}
 
 	if (ln != NULL)
 		LLE_FREE_LOCKED(ln);
 	NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
 }
 
 /*
  * ND6 timer routine to expire default route list and prefix list
  */
 void
 nd6_timer(void *arg)
 {
 	CURVNET_SET((struct vnet *) arg);
 	struct epoch_tracker et;
 	struct nd_prhead prl;
 	struct nd_prefix *pr, *npr;
 	struct ifnet *ifp;
 	struct in6_ifaddr *ia6, *nia6;
 	uint64_t genid;
 
 	LIST_INIT(&prl);
 
 	NET_EPOCH_ENTER(et);
 	nd6_defrouter_timer();
 
 	/*
 	 * expire interface addresses.
 	 * in the past the loop was inside prefix expiry processing.
 	 * However, from a stricter speci-confrmance standpoint, we should
 	 * rather separate address lifetimes and prefix lifetimes.
 	 *
 	 * XXXRW: in6_ifaddrhead locking.
 	 */
   addrloop:
 	CK_STAILQ_FOREACH_SAFE(ia6, &V_in6_ifaddrhead, ia_link, nia6) {
 		/* check address lifetime */
 		if (IFA6_IS_INVALID(ia6)) {
 			int regen = 0;
 
 			/*
 			 * If the expiring address is temporary, try
 			 * regenerating a new one.  This would be useful when
 			 * we suspended a laptop PC, then turned it on after a
 			 * period that could invalidate all temporary
 			 * addresses.  Although we may have to restart the
 			 * loop (see below), it must be after purging the
 			 * address.  Otherwise, we'd see an infinite loop of
 			 * regeneration.
 			 */
 			if (V_ip6_use_tempaddr &&
 			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
 				if (regen_tmpaddr(ia6) == 0)
 					regen = 1;
 			}
 
 			in6_purgeaddr(&ia6->ia_ifa);
 
 			if (regen)
 				goto addrloop; /* XXX: see below */
 		} else if (IFA6_IS_DEPRECATED(ia6)) {
 			int oldflags = ia6->ia6_flags;
 
 			ia6->ia6_flags |= IN6_IFF_DEPRECATED;
 
 			/*
 			 * If a temporary address has just become deprecated,
 			 * regenerate a new one if possible.
 			 */
 			if (V_ip6_use_tempaddr &&
 			    (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 			    (oldflags & IN6_IFF_DEPRECATED) == 0) {
 				if (regen_tmpaddr(ia6) == 0) {
 					/*
 					 * A new temporary address is
 					 * generated.
 					 * XXX: this means the address chain
 					 * has changed while we are still in
 					 * the loop.  Although the change
 					 * would not cause disaster (because
 					 * it's not a deletion, but an
 					 * addition,) we'd rather restart the
 					 * loop just for safety.  Or does this
 					 * significantly reduce performance??
 					 */
 					goto addrloop;
 				}
 			}
 		} else if ((ia6->ia6_flags & IN6_IFF_TENTATIVE) != 0) {
 			/*
 			 * Schedule DAD for a tentative address.  This happens
 			 * if the interface was down or not running
 			 * when the address was configured.
 			 */
 			int delay;
 
 			delay = arc4random() %
 			    (MAX_RTR_SOLICITATION_DELAY * hz);
 			nd6_dad_start((struct ifaddr *)ia6, delay);
 		} else {
 			/*
 			 * Check status of the interface.  If it is down,
 			 * mark the address as tentative for future DAD.
 			 */
 			ifp = ia6->ia_ifp;
 			if ((ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD) == 0 &&
 			    ((ifp->if_flags & IFF_UP) == 0 ||
 			    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
 			    (ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) != 0)){
 				ia6->ia6_flags &= ~IN6_IFF_DUPLICATED;
 				ia6->ia6_flags |= IN6_IFF_TENTATIVE;
 			}
 
 			/*
 			 * A new RA might have made a deprecated address
 			 * preferred.
 			 */
 			ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
 		}
 	}
 	NET_EPOCH_EXIT(et);
 
 	ND6_WLOCK();
 restart:
 	LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) {
 		/*
 		 * Expire prefixes. Since the pltime is only used for
 		 * autoconfigured addresses, pltime processing for prefixes is
 		 * not necessary.
 		 *
 		 * Only unlink after all derived addresses have expired. This
 		 * may not occur until two hours after the prefix has expired
 		 * per RFC 4862. If the prefix expires before its derived
 		 * addresses, mark it off-link. This will be done automatically
 		 * after unlinking if no address references remain.
 		 */
 		if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME ||
 		    time_uptime - pr->ndpr_lastupdate <= pr->ndpr_vltime)
 			continue;
 
 		if (pr->ndpr_addrcnt == 0) {
 			nd6_prefix_unlink(pr, &prl);
 			continue;
 		}
 		if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
 			genid = V_nd6_list_genid;
 			nd6_prefix_ref(pr);
 			ND6_WUNLOCK();
 			ND6_ONLINK_LOCK();
 			(void)nd6_prefix_offlink(pr);
 			ND6_ONLINK_UNLOCK();
 			ND6_WLOCK();
 			nd6_prefix_rele(pr);
 			if (genid != V_nd6_list_genid)
 				goto restart;
 		}
 	}
 	ND6_WUNLOCK();
 
 	while ((pr = LIST_FIRST(&prl)) != NULL) {
 		LIST_REMOVE(pr, ndpr_entry);
 		nd6_prefix_del(pr);
 	}
 
 	callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz,
 	    nd6_timer, curvnet);
 
 	CURVNET_RESTORE();
 }
 
 /*
  * ia6 - deprecated/invalidated temporary address
  */
 static int
 regen_tmpaddr(struct in6_ifaddr *ia6)
 {
 	struct ifaddr *ifa;
 	struct ifnet *ifp;
 	struct in6_ifaddr *public_ifa6 = NULL;
 
 	NET_EPOCH_ASSERT();
 
 	ifp = ia6->ia_ifa.ifa_ifp;
 	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 		struct in6_ifaddr *it6;
 
 		if (ifa->ifa_addr->sa_family != AF_INET6)
 			continue;
 
 		it6 = (struct in6_ifaddr *)ifa;
 
 		/* ignore no autoconf addresses. */
 		if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0)
 			continue;
 
 		/* ignore autoconf addresses with different prefixes. */
 		if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr)
 			continue;
 
 		/*
 		 * Now we are looking at an autoconf address with the same
 		 * prefix as ours.  If the address is temporary and is still
 		 * preferred, do not create another one.  It would be rare, but
 		 * could happen, for example, when we resume a laptop PC after
 		 * a long period.
 		 */
 		if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
 		    !IFA6_IS_DEPRECATED(it6)) {
 			public_ifa6 = NULL;
 			break;
 		}
 
 		/*
 		 * This is a public autoconf address that has the same prefix
 		 * as ours.  If it is preferred, keep it.  We can't break the
 		 * loop here, because there may be a still-preferred temporary
 		 * address with the prefix.
 		 */
 		if (!IFA6_IS_DEPRECATED(it6))
 			public_ifa6 = it6;
 	}
 	if (public_ifa6 != NULL)
 		ifa_ref(&public_ifa6->ia_ifa);
 
 	if (public_ifa6 != NULL) {
 		int e;
 
 		if ((e = in6_tmpifadd(public_ifa6, 0, 0)) != 0) {
 			ifa_free(&public_ifa6->ia_ifa);
 			log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
 			    " tmp addr,errno=%d\n", e);
 			return (-1);
 		}
 		ifa_free(&public_ifa6->ia_ifa);
 		return (0);
 	}
 
 	return (-1);
 }
 
 /*
  * Remove prefix and default router list entries corresponding to ifp. Neighbor
  * cache entries are freed in in6_domifdetach().
  */
 void
 nd6_purge(struct ifnet *ifp)
 {
 	struct nd_prhead prl;
 	struct nd_prefix *pr, *npr;
 
 	LIST_INIT(&prl);
 
 	/* Purge default router list entries toward ifp. */
 	nd6_defrouter_purge(ifp);
 
 	ND6_WLOCK();
 	/*
 	 * Remove prefixes on ifp. We should have already removed addresses on
 	 * this interface, so no addresses should be referencing these prefixes.
 	 */
 	LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) {
 		if (pr->ndpr_ifp == ifp)
 			nd6_prefix_unlink(pr, &prl);
 	}
 	ND6_WUNLOCK();
 
 	/* Delete the unlinked prefix objects. */
 	while ((pr = LIST_FIRST(&prl)) != NULL) {
 		LIST_REMOVE(pr, ndpr_entry);
 		nd6_prefix_del(pr);
 	}
 
 	/* cancel default outgoing interface setting */
 	if (V_nd6_defifindex == ifp->if_index)
 		nd6_setdefaultiface(0);
 
 	if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
 		/* Refresh default router list. */
 		defrouter_select_fib(ifp->if_fib);
 	}
 }
 
 /* 
  * the caller acquires and releases the lock on the lltbls
  * Returns the llentry locked
  */
 struct llentry *
 nd6_lookup(const struct in6_addr *addr6, int flags, struct ifnet *ifp)
 {
 	struct sockaddr_in6 sin6;
 	struct llentry *ln;
 
 	bzero(&sin6, sizeof(sin6));
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_addr = *addr6;
 
 	IF_AFDATA_LOCK_ASSERT(ifp);
 
 	ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)&sin6);
 
 	return (ln);
 }
 
 static struct llentry *
 nd6_alloc(const struct in6_addr *addr6, int flags, struct ifnet *ifp)
 {
 	struct sockaddr_in6 sin6;
 	struct llentry *ln;
 
 	bzero(&sin6, sizeof(sin6));
 	sin6.sin6_len = sizeof(struct sockaddr_in6);
 	sin6.sin6_family = AF_INET6;
 	sin6.sin6_addr = *addr6;
 
 	ln = lltable_alloc_entry(LLTABLE6(ifp), 0, (struct sockaddr *)&sin6);
 	if (ln != NULL)
 		ln->ln_state = ND6_LLINFO_NOSTATE;
 
 	return (ln);
 }
 
 /*
  * Test whether a given IPv6 address is a neighbor or not, ignoring
  * the actual neighbor cache.  The neighbor cache is ignored in order
  * to not reenter the routing code from within itself.
  */
 static int
 nd6_is_new_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp)
 {
 	struct nd_prefix *pr;
 	struct ifaddr *ifa;
 	struct rt_addrinfo info;
 	struct sockaddr_in6 rt_key;
 	const struct sockaddr *dst6;
 	uint64_t genid;
 	int error, fibnum;
 
 	/*
 	 * A link-local address is always a neighbor.
 	 * XXX: a link does not necessarily specify a single interface.
 	 */
 	if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) {
 		struct sockaddr_in6 sin6_copy;
 		u_int32_t zone;
 
 		/*
 		 * We need sin6_copy since sa6_recoverscope() may modify the
 		 * content (XXX).
 		 */
 		sin6_copy = *addr;
 		if (sa6_recoverscope(&sin6_copy))
 			return (0); /* XXX: should be impossible */
 		if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone))
 			return (0);
 		if (sin6_copy.sin6_scope_id == zone)
 			return (1);
 		else
 			return (0);
 	}
 
 	bzero(&rt_key, sizeof(rt_key));
 	bzero(&info, sizeof(info));
 	info.rti_info[RTAX_DST] = (struct sockaddr *)&rt_key;
 
 	/*
 	 * If the address matches one of our addresses,
 	 * it should be a neighbor.
 	 * If the address matches one of our on-link prefixes, it should be a
 	 * neighbor.
 	 */
 	ND6_RLOCK();
 restart:
 	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 		if (pr->ndpr_ifp != ifp)
 			continue;
 
 		if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
 			dst6 = (const struct sockaddr *)&pr->ndpr_prefix;
 
 			/*
 			 * We only need to check all FIBs if add_addr_allfibs
 			 * is unset. If set, checking any FIB will suffice.
 			 */
 			fibnum = V_rt_add_addr_allfibs ? rt_numfibs - 1 : 0;
 			for (; fibnum < rt_numfibs; fibnum++) {
 				genid = V_nd6_list_genid;
 				ND6_RUNLOCK();
 
 				/*
 				 * Restore length field before
 				 * retrying lookup
 				 */
 				rt_key.sin6_len = sizeof(rt_key);
 				error = rib_lookup_info(fibnum, dst6, 0, 0,
 						        &info);
 
 				ND6_RLOCK();
 				if (genid != V_nd6_list_genid)
 					goto restart;
 				if (error == 0)
 					break;
 			}
 			if (error != 0)
 				continue;
 
 			/*
 			 * This is the case where multiple interfaces
 			 * have the same prefix, but only one is installed 
 			 * into the routing table and that prefix entry
 			 * is not the one being examined here.
 			 */
 			if (!IN6_ARE_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
 			    &rt_key.sin6_addr))
 				continue;
 		}
 
 		if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
 		    &addr->sin6_addr, &pr->ndpr_mask)) {
 			ND6_RUNLOCK();
 			return (1);
 		}
 	}
 	ND6_RUNLOCK();
 
 	/*
 	 * If the address is assigned on the node of the other side of
 	 * a p2p interface, the address should be a neighbor.
 	 */
 	if (ifp->if_flags & IFF_POINTOPOINT) {
 		struct epoch_tracker et;
 
 		NET_EPOCH_ENTER(et);
 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 			if (ifa->ifa_addr->sa_family != addr->sin6_family)
 				continue;
 			if (ifa->ifa_dstaddr != NULL &&
 			    sa_equal(addr, ifa->ifa_dstaddr)) {
 				NET_EPOCH_EXIT(et);
 				return 1;
 			}
 		}
 		NET_EPOCH_EXIT(et);
 	}
 
 	/*
 	 * If the default router list is empty, all addresses are regarded
 	 * as on-link, and thus, as a neighbor.
 	 */
 	if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV &&
 	    nd6_defrouter_list_empty() &&
 	    V_nd6_defifindex == ifp->if_index) {
 		return (1);
 	}
 
 	return (0);
 }
 
 /*
  * Detect if a given IPv6 address identifies a neighbor on a given link.
  * XXX: should take care of the destination of a p2p link?
  */
 int
 nd6_is_addr_neighbor(const struct sockaddr_in6 *addr, struct ifnet *ifp)
 {
 	struct llentry *lle;
 	int rc = 0;
 
 	NET_EPOCH_ASSERT();
 	IF_AFDATA_UNLOCK_ASSERT(ifp);
 	if (nd6_is_new_addr_neighbor(addr, ifp))
 		return (1);
 
 	/*
 	 * Even if the address matches none of our addresses, it might be
 	 * in the neighbor cache.
 	 */
 	if ((lle = nd6_lookup(&addr->sin6_addr, 0, ifp)) != NULL) {
 		LLE_RUNLOCK(lle);
 		rc = 1;
 	}
 	return (rc);
 }
 
 /*
  * Free an nd6 llinfo entry.
  * Since the function would cause significant changes in the kernel, DO NOT
  * make it global, unless you have a strong reason for the change, and are sure
  * that the change is safe.
  *
  * Set noinline to be dtrace-friendly
  */
 static __noinline void
 nd6_free(struct llentry **lnp, int gc)
 {
 	struct ifnet *ifp;
 	struct llentry *ln;
 	struct nd_defrouter *dr;
 
 	ln = *lnp;
 	*lnp = NULL;
 
 	LLE_WLOCK_ASSERT(ln);
 	ND6_RLOCK_ASSERT();
 
 	ifp = lltable_get_ifp(ln->lle_tbl);
 	if ((ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) != 0)
 		dr = defrouter_lookup_locked(&ln->r_l3addr.addr6, ifp);
 	else
 		dr = NULL;
 	ND6_RUNLOCK();
 
 	if ((ln->la_flags & LLE_DELETED) == 0)
 		EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED);
 
 	/*
 	 * we used to have pfctlinput(PRC_HOSTDEAD) here.
 	 * even though it is not harmful, it was not really necessary.
 	 */
 
 	/* cancel timer */
 	nd6_llinfo_settimer_locked(ln, -1);
 
 	if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
 		if (dr != NULL && dr->expire &&
 		    ln->ln_state == ND6_LLINFO_STALE && gc) {
 			/*
 			 * If the reason for the deletion is just garbage
 			 * collection, and the neighbor is an active default
 			 * router, do not delete it.  Instead, reset the GC
 			 * timer using the router's lifetime.
 			 * Simply deleting the entry would affect default
 			 * router selection, which is not necessarily a good
 			 * thing, especially when we're using router preference
 			 * values.
 			 * XXX: the check for ln_state would be redundant,
 			 *      but we intentionally keep it just in case.
 			 */
 			if (dr->expire > time_uptime)
 				nd6_llinfo_settimer_locked(ln,
 				    (dr->expire - time_uptime) * hz);
 			else
 				nd6_llinfo_settimer_locked(ln,
 				    (long)V_nd6_gctimer * hz);
 
 			LLE_REMREF(ln);
 			LLE_WUNLOCK(ln);
 			defrouter_rele(dr);
 			return;
 		}
 
 		if (dr) {
 			/*
 			 * Unreachablity of a router might affect the default
 			 * router selection and on-link detection of advertised
 			 * prefixes.
 			 */
 
 			/*
 			 * Temporarily fake the state to choose a new default
 			 * router and to perform on-link determination of
 			 * prefixes correctly.
 			 * Below the state will be set correctly,
 			 * or the entry itself will be deleted.
 			 */
 			ln->ln_state = ND6_LLINFO_INCOMPLETE;
 		}
 
 		if (ln->ln_router || dr) {
 			/*
 			 * We need to unlock to avoid a LOR with rt6_flush() with the
 			 * rnh and for the calls to pfxlist_onlink_check() and
 			 * defrouter_select_fib() in the block further down for calls
 			 * into nd6_lookup().  We still hold a ref.
 			 */
 			LLE_WUNLOCK(ln);
 
 			/*
 			 * rt6_flush must be called whether or not the neighbor
 			 * is in the Default Router List.
 			 * See a corresponding comment in nd6_na_input().
 			 */
 			rt6_flush(&ln->r_l3addr.addr6, ifp);
 		}
 
 		if (dr) {
 			/*
 			 * Since defrouter_select_fib() does not affect the
 			 * on-link determination and MIP6 needs the check
 			 * before the default router selection, we perform
 			 * the check now.
 			 */
 			pfxlist_onlink_check();
 
 			/*
 			 * Refresh default router list.
 			 */
 			defrouter_select_fib(dr->ifp->if_fib);
 		}
 
 		/*
 		 * If this entry was added by an on-link redirect, remove the
 		 * corresponding host route.
 		 */
 		if (ln->la_flags & LLE_REDIRECT)
 			nd6_free_redirect(ln);
 
 		if (ln->ln_router || dr)
 			LLE_WLOCK(ln);
 	}
 
 	/*
 	 * Save to unlock. We still hold an extra reference and will not
 	 * free(9) in llentry_free() if someone else holds one as well.
 	 */
 	LLE_WUNLOCK(ln);
 	IF_AFDATA_LOCK(ifp);
 	LLE_WLOCK(ln);
 	/* Guard against race with other llentry_free(). */
 	if (ln->la_flags & LLE_LINKED) {
 		/* Remove callout reference */
 		LLE_REMREF(ln);
 		lltable_unlink_entry(ln->lle_tbl, ln);
 	}
 	IF_AFDATA_UNLOCK(ifp);
 
 	llentry_free(ln);
 	if (dr != NULL)
 		defrouter_rele(dr);
 }
 
 static int
 nd6_isdynrte(const struct rtentry *rt, const struct nhop_object *nh, void *xap)
 {
 
 	if (nh->nh_flags & NHF_REDIRECT)
 		return (1);
 
 	return (0);
 }
 
 /*
  * Remove the rtentry for the given llentry,
  * both of which were installed by a redirect.
  */
 static void
 nd6_free_redirect(const struct llentry *ln)
 {
 	int fibnum;
 	struct sockaddr_in6 sin6;
 	struct rt_addrinfo info;
 	struct rib_cmd_info rc;
 	struct epoch_tracker et;
 
 	lltable_fill_sa_entry(ln, (struct sockaddr *)&sin6);
 	memset(&info, 0, sizeof(info));
 	info.rti_info[RTAX_DST] = (struct sockaddr *)&sin6;
 	info.rti_filter = nd6_isdynrte;
 
 	NET_EPOCH_ENTER(et);
 	for (fibnum = 0; fibnum < rt_numfibs; fibnum++)
 		rib_action(fibnum, RTM_DELETE, &info, &rc);
 	NET_EPOCH_EXIT(et);
 }
 
 /*
  * Updates status of the default router route.
  */
 static void
 check_release_defrouter(struct rib_cmd_info *rc, void *_cbdata)
 {
 	struct nd_defrouter *dr;
 	struct nhop_object *nh;
 
 	nh = rc->rc_nh_old;
 
 	if ((nh != NULL) && (nh->nh_flags & NHF_DEFAULT)) {
 		dr = defrouter_lookup(&nh->gw6_sa.sin6_addr, nh->nh_ifp);
 		if (dr != NULL) {
 			dr->installed = 0;
 			defrouter_rele(dr);
 		}
 	}
 }
 
 void
 nd6_subscription_cb(struct rib_head *rnh, struct rib_cmd_info *rc, void *arg)
 {
 
 #ifdef ROUTE_MPATH
 	rib_decompose_notification(rc, check_release_defrouter, NULL);
 #else
 	check_release_defrouter(rc, NULL);
 #endif
 }
 
 int
 nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
 {
 	struct in6_ndireq *ndi = (struct in6_ndireq *)data;
 	struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
 	struct in6_ndifreq *ndif = (struct in6_ndifreq *)data;
 	struct epoch_tracker et;
 	int error = 0;
 
 	if (ifp->if_afdata[AF_INET6] == NULL)
 		return (EPFNOSUPPORT);
 	switch (cmd) {
 	case OSIOCGIFINFO_IN6:
 #define ND	ndi->ndi
 		/* XXX: old ndp(8) assumes a positive value for linkmtu. */
 		bzero(&ND, sizeof(ND));
 		ND.linkmtu = IN6_LINKMTU(ifp);
 		ND.maxmtu = ND_IFINFO(ifp)->maxmtu;
 		ND.basereachable = ND_IFINFO(ifp)->basereachable;
 		ND.reachable = ND_IFINFO(ifp)->reachable;
 		ND.retrans = ND_IFINFO(ifp)->retrans;
 		ND.flags = ND_IFINFO(ifp)->flags;
 		ND.recalctm = ND_IFINFO(ifp)->recalctm;
 		ND.chlim = ND_IFINFO(ifp)->chlim;
 		break;
 	case SIOCGIFINFO_IN6:
 		ND = *ND_IFINFO(ifp);
 		break;
 	case SIOCSIFINFO_IN6:
 		/*
 		 * used to change host variables from userland.
 		 * intended for a use on router to reflect RA configurations.
 		 */
 		/* 0 means 'unspecified' */
 		if (ND.linkmtu != 0) {
 			if (ND.linkmtu < IPV6_MMTU ||
 			    ND.linkmtu > IN6_LINKMTU(ifp)) {
 				error = EINVAL;
 				break;
 			}
 			ND_IFINFO(ifp)->linkmtu = ND.linkmtu;
 		}
 
 		if (ND.basereachable != 0) {
 			int obasereachable = ND_IFINFO(ifp)->basereachable;
 
 			ND_IFINFO(ifp)->basereachable = ND.basereachable;
 			if (ND.basereachable != obasereachable)
 				ND_IFINFO(ifp)->reachable =
 				    ND_COMPUTE_RTIME(ND.basereachable);
 		}
 		if (ND.retrans != 0)
 			ND_IFINFO(ifp)->retrans = ND.retrans;
 		if (ND.chlim != 0)
 			ND_IFINFO(ifp)->chlim = ND.chlim;
 		/* FALLTHROUGH */
 	case SIOCSIFINFO_FLAGS:
 	{
 		struct ifaddr *ifa;
 		struct in6_ifaddr *ia;
 
 		if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
 		    !(ND.flags & ND6_IFF_IFDISABLED)) {
 			/* ifdisabled 1->0 transision */
 
 			/*
 			 * If the interface is marked as ND6_IFF_IFDISABLED and
 			 * has an link-local address with IN6_IFF_DUPLICATED,
 			 * do not clear ND6_IFF_IFDISABLED.
 			 * See RFC 4862, Section 5.4.5.
 			 */
 			NET_EPOCH_ENTER(et);
 			CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
 				if (ifa->ifa_addr->sa_family != AF_INET6)
 					continue;
 				ia = (struct in6_ifaddr *)ifa;
 				if ((ia->ia6_flags & IN6_IFF_DUPLICATED) &&
 				    IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia)))
 					break;
 			}
 			NET_EPOCH_EXIT(et);
 
 			if (ifa != NULL) {
 				/* LLA is duplicated. */
 				ND.flags |= ND6_IFF_IFDISABLED;
 				log(LOG_ERR, "Cannot enable an interface"
 				    " with a link-local address marked"
 				    " duplicate.\n");
 			} else {
 				ND_IFINFO(ifp)->flags &= ~ND6_IFF_IFDISABLED;
 				if (ifp->if_flags & IFF_UP)
 					in6_if_up(ifp);
 			}
 		} else if (!(ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) &&
 			    (ND.flags & ND6_IFF_IFDISABLED)) {
 			/* ifdisabled 0->1 transision */
 			/* Mark all IPv6 address as tentative. */
 
 			ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED;
 			if (V_ip6_dad_count > 0 &&
 			    (ND_IFINFO(ifp)->flags & ND6_IFF_NO_DAD) == 0) {
 				NET_EPOCH_ENTER(et);
 				CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead,
 				    ifa_link) {
 					if (ifa->ifa_addr->sa_family !=
 					    AF_INET6)
 						continue;
 					ia = (struct in6_ifaddr *)ifa;
 					ia->ia6_flags |= IN6_IFF_TENTATIVE;
 				}
 				NET_EPOCH_EXIT(et);
 			}
 		}
 
 		if (ND.flags & ND6_IFF_AUTO_LINKLOCAL) {
 			if (!(ND_IFINFO(ifp)->flags & ND6_IFF_AUTO_LINKLOCAL)) {
 				/* auto_linklocal 0->1 transision */
 
 				/* If no link-local address on ifp, configure */
 				ND_IFINFO(ifp)->flags |= ND6_IFF_AUTO_LINKLOCAL;
 				in6_ifattach(ifp, NULL);
 			} else if (!(ND.flags & ND6_IFF_IFDISABLED) &&
 			    ifp->if_flags & IFF_UP) {
 				/*
 				 * When the IF already has
 				 * ND6_IFF_AUTO_LINKLOCAL, no link-local
 				 * address is assigned, and IFF_UP, try to
 				 * assign one.
 				 */
 				NET_EPOCH_ENTER(et);
 				CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead,
 				    ifa_link) {
 					if (ifa->ifa_addr->sa_family !=
 					    AF_INET6)
 						continue;
 					ia = (struct in6_ifaddr *)ifa;
 					if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia)))
 						break;
 				}
 				NET_EPOCH_EXIT(et);
 				if (ifa != NULL)
 					/* No LLA is configured. */
 					in6_ifattach(ifp, NULL);
 			}
 		}
 		ND_IFINFO(ifp)->flags = ND.flags;
 		break;
 	}
 #undef ND
 	case SIOCSNDFLUSH_IN6:	/* XXX: the ioctl name is confusing... */
 		/* sync kernel routing table with the default router list */
 		defrouter_reset();
 		defrouter_select_fib(RT_ALL_FIBS);
 		break;
 	case SIOCSPFXFLUSH_IN6:
 	{
 		/* flush all the prefix advertised by routers */
 		struct in6_ifaddr *ia, *ia_next;
 		struct nd_prefix *pr, *next;
 		struct nd_prhead prl;
 
 		LIST_INIT(&prl);
 
 		ND6_WLOCK();
 		LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) {
 			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
 				continue; /* XXX */
 			nd6_prefix_unlink(pr, &prl);
 		}
 		ND6_WUNLOCK();
 
 		while ((pr = LIST_FIRST(&prl)) != NULL) {
 			LIST_REMOVE(pr, ndpr_entry);
 			/* XXXRW: in6_ifaddrhead locking. */
 			CK_STAILQ_FOREACH_SAFE(ia, &V_in6_ifaddrhead, ia_link,
 			    ia_next) {
 				if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0)
 					continue;
 
 				if (ia->ia6_ndpr == pr)
 					in6_purgeaddr(&ia->ia_ifa);
 			}
 			nd6_prefix_del(pr);
 		}
 		break;
 	}
 	case SIOCSRTRFLUSH_IN6:
 	{
 		/* flush all the default routers */
 
 		defrouter_reset();
 		nd6_defrouter_flush_all();
 		defrouter_select_fib(RT_ALL_FIBS);
 		break;
 	}
 	case SIOCGNBRINFO_IN6:
 	{
 		struct llentry *ln;
 		struct in6_addr nb_addr = nbi->addr; /* make local for safety */
 
 		if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0)
 			return (error);
 
 		NET_EPOCH_ENTER(et);
 		ln = nd6_lookup(&nb_addr, 0, ifp);
 		NET_EPOCH_EXIT(et);
 
 		if (ln == NULL) {
 			error = EINVAL;
 			break;
 		}
 		nbi->state = ln->ln_state;
 		nbi->asked = ln->la_asked;
 		nbi->isrouter = ln->ln_router;
 		if (ln->la_expire == 0)
 			nbi->expire = 0;
 		else
 			nbi->expire = ln->la_expire + ln->lle_remtime / hz +
 			    (time_second - time_uptime);
 		LLE_RUNLOCK(ln);
 		break;
 	}
 	case SIOCGDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
 		ndif->ifindex = V_nd6_defifindex;
 		break;
 	case SIOCSDEFIFACE_IN6:	/* XXX: should be implemented as a sysctl? */
 		return (nd6_setdefaultiface(ndif->ifindex));
 	}
 	return (error);
 }
 
 /*
  * Calculates new isRouter value based on provided parameters and
  * returns it.
  */
 static int
 nd6_is_router(int type, int code, int is_new, int old_addr, int new_addr,
     int ln_router)
 {
 
 	/*
 	 * ICMP6 type dependent behavior.
 	 *
 	 * NS: clear IsRouter if new entry
 	 * RS: clear IsRouter
 	 * RA: set IsRouter if there's lladdr
 	 * redir: clear IsRouter if new entry
 	 *
 	 * RA case, (1):
 	 * The spec says that we must set IsRouter in the following cases:
 	 * - If lladdr exist, set IsRouter.  This means (1-5).
 	 * - If it is old entry (!newentry), set IsRouter.  This means (7).
 	 * So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
 	 * A quetion arises for (1) case.  (1) case has no lladdr in the
 	 * neighbor cache, this is similar to (6).
 	 * This case is rare but we figured that we MUST NOT set IsRouter.
 	 *
 	 *   is_new  old_addr new_addr 	    NS  RS  RA	redir
 	 *							D R
 	 *	0	n	n	(1)	c   ?     s
 	 *	0	y	n	(2)	c   s     s
 	 *	0	n	y	(3)	c   s     s
 	 *	0	y	y	(4)	c   s     s
 	 *	0	y	y	(5)	c   s     s
 	 *	1	--	n	(6) c	c	c s
 	 *	1	--	y	(7) c	c   s	c s
 	 *
 	 *					(c=clear s=set)
 	 */
 	switch (type & 0xff) {
 	case ND_NEIGHBOR_SOLICIT:
 		/*
 		 * New entry must have is_router flag cleared.
 		 */
 		if (is_new)					/* (6-7) */
 			ln_router = 0;
 		break;
 	case ND_REDIRECT:
 		/*
 		 * If the icmp is a redirect to a better router, always set the
 		 * is_router flag.  Otherwise, if the entry is newly created,
 		 * clear the flag.  [RFC 2461, sec 8.3]
 		 */
 		if (code == ND_REDIRECT_ROUTER)
 			ln_router = 1;
 		else {
 			if (is_new)				/* (6-7) */
 				ln_router = 0;
 		}
 		break;
 	case ND_ROUTER_SOLICIT:
 		/*
 		 * is_router flag must always be cleared.
 		 */
 		ln_router = 0;
 		break;
 	case ND_ROUTER_ADVERT:
 		/*
 		 * Mark an entry with lladdr as a router.
 		 */
 		if ((!is_new && (old_addr || new_addr)) ||	/* (2-5) */
 		    (is_new && new_addr)) {			/* (7) */
 			ln_router = 1;
 		}
 		break;
 	}
 
 	return (ln_router);
 }
 
 /*
  * Create neighbor cache entry and cache link-layer address,
  * on reception of inbound ND6 packets.  (RS/RA/NS/redirect)
  *
  * type - ICMP6 type
  * code - type dependent information
  *
  */
 void
 nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
     int lladdrlen, int type, int code)
 {
 	struct llentry *ln = NULL, *ln_tmp;
 	int is_newentry;
 	int do_update;
 	int olladdr;
 	int llchange;
 	int flags;
 	uint16_t router = 0;
 	struct sockaddr_in6 sin6;
 	struct mbuf *chain = NULL;
 	u_char linkhdr[LLE_MAX_LINKHDR];
 	size_t linkhdrsize;
 	int lladdr_off;
 
 	NET_EPOCH_ASSERT();
 	IF_AFDATA_UNLOCK_ASSERT(ifp);
 
 	KASSERT(ifp != NULL, ("%s: ifp == NULL", __func__));
 	KASSERT(from != NULL, ("%s: from == NULL", __func__));
 
 	/* nothing must be updated for unspecified address */
 	if (IN6_IS_ADDR_UNSPECIFIED(from))
 		return;
 
 	/*
 	 * Validation about ifp->if_addrlen and lladdrlen must be done in
 	 * the caller.
 	 *
 	 * XXX If the link does not have link-layer adderss, what should
 	 * we do? (ifp->if_addrlen == 0)
 	 * Spec says nothing in sections for RA, RS and NA.  There's small
 	 * description on it in NS section (RFC 2461 7.2.3).
 	 */
 	flags = lladdr ? LLE_EXCLUSIVE : 0;
 	ln = nd6_lookup(from, flags, ifp);
 	is_newentry = 0;
 	if (ln == NULL) {
 		flags |= LLE_EXCLUSIVE;
 		ln = nd6_alloc(from, 0, ifp);
 		if (ln == NULL)
 			return;
 
 		/*
 		 * Since we already know all the data for the new entry,
 		 * fill it before insertion.
 		 */
 		if (lladdr != NULL) {
 			linkhdrsize = sizeof(linkhdr);
 			if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
 			    linkhdr, &linkhdrsize, &lladdr_off) != 0)
 				return;
 			lltable_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
 			    lladdr_off);
 		}
 
 		IF_AFDATA_WLOCK(ifp);
 		LLE_WLOCK(ln);
 		/* Prefer any existing lle over newly-created one */
 		ln_tmp = nd6_lookup(from, LLE_EXCLUSIVE, ifp);
 		if (ln_tmp == NULL)
 			lltable_link_entry(LLTABLE6(ifp), ln);
 		IF_AFDATA_WUNLOCK(ifp);
 		if (ln_tmp == NULL) {
 			/* No existing lle, mark as new entry (6,7) */
 			is_newentry = 1;
 			if (lladdr != NULL) {	/* (7) */
 				nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
 				EVENTHANDLER_INVOKE(lle_event, ln,
 				    LLENTRY_RESOLVED);
 			}
 		} else {
 			lltable_free_entry(LLTABLE6(ifp), ln);
 			ln = ln_tmp;
 			ln_tmp = NULL;
 		}
 	} 
 	/* do nothing if static ndp is set */
 	if ((ln->la_flags & LLE_STATIC)) {
 		if (flags & LLE_EXCLUSIVE)
 			LLE_WUNLOCK(ln);
 		else
 			LLE_RUNLOCK(ln);
 		return;
 	}
 
 	olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0;
 	if (olladdr && lladdr) {
 		llchange = bcmp(lladdr, ln->ll_addr,
 		    ifp->if_addrlen);
 	} else if (!olladdr && lladdr)
 		llchange = 1;
 	else
 		llchange = 0;
 
 	/*
 	 * newentry olladdr  lladdr  llchange	(*=record)
 	 *	0	n	n	--	(1)
 	 *	0	y	n	--	(2)
 	 *	0	n	y	y	(3) * STALE
 	 *	0	y	y	n	(4) *
 	 *	0	y	y	y	(5) * STALE
 	 *	1	--	n	--	(6)   NOSTATE(= PASSIVE)
 	 *	1	--	y	--	(7) * STALE
 	 */
 
 	do_update = 0;
 	if (is_newentry == 0 && llchange != 0) {
 		do_update = 1;	/* (3,5) */
 
 		/*
 		 * Record source link-layer address
 		 * XXX is it dependent to ifp->if_type?
 		 */
 		linkhdrsize = sizeof(linkhdr);
 		if (lltable_calc_llheader(ifp, AF_INET6, lladdr,
 		    linkhdr, &linkhdrsize, &lladdr_off) != 0)
 			return;
 
 		if (lltable_try_set_entry_addr(ifp, ln, linkhdr, linkhdrsize,
 		    lladdr_off) == 0) {
 			/* Entry was deleted */
 			return;
 		}
 
 		nd6_llinfo_setstate(ln, ND6_LLINFO_STALE);
 
 		EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
 
 		if (ln->la_hold != NULL)
 			nd6_grab_holdchain(ln, &chain, &sin6);
 	}
 
 	/* Calculates new router status */
 	router = nd6_is_router(type, code, is_newentry, olladdr,
 	    lladdr != NULL ? 1 : 0, ln->ln_router);
 
 	ln->ln_router = router;
 	/* Mark non-router redirects with special flag */
 	if ((type & 0xFF) == ND_REDIRECT && code != ND_REDIRECT_ROUTER)
 		ln->la_flags |= LLE_REDIRECT;
 
 	if (flags & LLE_EXCLUSIVE)
 		LLE_WUNLOCK(ln);
 	else
 		LLE_RUNLOCK(ln);
 
 	if (chain != NULL)
 		nd6_flush_holdchain(ifp, chain, &sin6);
 
 	/*
 	 * When the link-layer address of a router changes, select the
 	 * best router again.  In particular, when the neighbor entry is newly
 	 * created, it might affect the selection policy.
 	 * Question: can we restrict the first condition to the "is_newentry"
 	 * case?
 	 * XXX: when we hear an RA from a new router with the link-layer
 	 * address option, defrouter_select_fib() is called twice, since
 	 * defrtrlist_update called the function as well.  However, I believe
 	 * we can compromise the overhead, since it only happens the first
 	 * time.
 	 * XXX: although defrouter_select_fib() should not have a bad effect
 	 * for those are not autoconfigured hosts, we explicitly avoid such
 	 * cases for safety.
 	 */
 	if ((do_update || is_newentry) && router &&
 	    ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) {
 		/*
 		 * guaranteed recursion
 		 */
 		defrouter_select_fib(ifp->if_fib);
 	}
 }
 
 static void
 nd6_slowtimo(void *arg)
 {
 	struct epoch_tracker et;
 	CURVNET_SET((struct vnet *) arg);
 	struct nd_ifinfo *nd6if;
 	struct ifnet *ifp;
 
 	callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
 	    nd6_slowtimo, curvnet);
 	NET_EPOCH_ENTER(et);
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		if (ifp->if_afdata[AF_INET6] == NULL)
 			continue;
 		nd6if = ND_IFINFO(ifp);
 		if (nd6if->basereachable && /* already initialized */
 		    (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
 			/*
 			 * Since reachable time rarely changes by router
 			 * advertisements, we SHOULD insure that a new random
 			 * value gets recomputed at least once every few hours.
 			 * (RFC 2461, 6.3.4)
 			 */
 			nd6if->recalctm = V_nd6_recalc_reachtm_interval;
 			nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
 		}
 	}
 	NET_EPOCH_EXIT(et);
 	CURVNET_RESTORE();
 }
 
 void
 nd6_grab_holdchain(struct llentry *ln, struct mbuf **chain,
     struct sockaddr_in6 *sin6)
 {
 
 	LLE_WLOCK_ASSERT(ln);
 
 	*chain = ln->la_hold;
 	ln->la_hold = NULL;
 	lltable_fill_sa_entry(ln, (struct sockaddr *)sin6);
 
 	if (ln->ln_state == ND6_LLINFO_STALE) {
 		/*
 		 * The first time we send a packet to a
 		 * neighbor whose entry is STALE, we have
 		 * to change the state to DELAY and a sets
 		 * a timer to expire in DELAY_FIRST_PROBE_TIME
 		 * seconds to ensure do neighbor unreachability
 		 * detection on expiration.
 		 * (RFC 2461 7.3.3)
 		 */
 		nd6_llinfo_setstate(ln, ND6_LLINFO_DELAY);
 	}
 }
 
 int
 nd6_output_ifp(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
     struct sockaddr_in6 *dst, struct route *ro)
 {
 	int error;
 	int ip6len;
 	struct ip6_hdr *ip6;
 	struct m_tag *mtag;
 
 #ifdef MAC
 	mac_netinet6_nd6_send(ifp, m);
 #endif
 
 	/*
 	 * If called from nd6_ns_output() (NS), nd6_na_output() (NA),
 	 * icmp6_redirect_output() (REDIRECT) or from rip6_output() (RS, RA
 	 * as handled by rtsol and rtadvd), mbufs will be tagged for SeND
 	 * to be diverted to user space.  When re-injected into the kernel,
 	 * send_output() will directly dispatch them to the outgoing interface.
 	 */
 	if (send_sendso_input_hook != NULL) {
 		mtag = m_tag_find(m, PACKET_TAG_ND_OUTGOING, NULL);
 		if (mtag != NULL) {
 			ip6 = mtod(m, struct ip6_hdr *);
 			ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen);
 			/* Use the SEND socket */
 			error = send_sendso_input_hook(m, ifp, SND_OUT,
 			    ip6len);
 			/* -1 == no app on SEND socket */
 			if (error == 0 || error != -1)
 			    return (error);
 		}
 	}
 
 	m_clrprotoflags(m);	/* Avoid confusing lower layers. */
 	IP_PROBE(send, NULL, NULL, mtod(m, struct ip6_hdr *), ifp, NULL,
 	    mtod(m, struct ip6_hdr *));
 
 	if ((ifp->if_flags & IFF_LOOPBACK) == 0)
 		origifp = ifp;
 
 	error = (*ifp->if_output)(origifp, m, (struct sockaddr *)dst, ro);
 	return (error);
 }
 
 /*
  * Lookup link headerfor @sa_dst address. Stores found
  * data in @desten buffer. Copy of lle ln_flags can be also
  * saved in @pflags if @pflags is non-NULL.
  *
  * If destination LLE does not exists or lle state modification
  * is required, call "slow" version.
  *
  * Return values:
  * - 0 on success (address copied to buffer).
  * - EWOULDBLOCK (no local error, but address is still unresolved)
  * - other errors (alloc failure, etc)
  */
 int
 nd6_resolve(struct ifnet *ifp, int is_gw, struct mbuf *m,
     const struct sockaddr *sa_dst, u_char *desten, uint32_t *pflags,
     struct llentry **plle)
 {
 	struct llentry *ln = NULL;
 	const struct sockaddr_in6 *dst6;
 
 	NET_EPOCH_ASSERT();
 
 	if (pflags != NULL)
 		*pflags = 0;
 
 	dst6 = (const struct sockaddr_in6 *)sa_dst;
 
 	/* discard the packet if IPv6 operation is disabled on the interface */
 	if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
 		m_freem(m);
 		return (ENETDOWN); /* better error? */
 	}
 
 	if (m != NULL && m->m_flags & M_MCAST) {
 		switch (ifp->if_type) {
 		case IFT_ETHER:
 		case IFT_L2VLAN:
 		case IFT_BRIDGE:
 			ETHER_MAP_IPV6_MULTICAST(&dst6->sin6_addr,
 						 desten);
 			return (0);
 		default:
 			m_freem(m);
 			return (EAFNOSUPPORT);
 		}
 	}
 
 	ln = nd6_lookup(&dst6->sin6_addr, plle ? LLE_EXCLUSIVE : LLE_UNLOCKED,
 	    ifp);
 	if (ln != NULL && (ln->r_flags & RLLE_VALID) != 0) {
 		/* Entry found, let's copy lle info */
 		bcopy(ln->r_linkdata, desten, ln->r_hdrlen);
 		if (pflags != NULL)
 			*pflags = LLE_VALID | (ln->r_flags & RLLE_IFADDR);
 		/* Check if we have feedback request from nd6 timer */
 		if (ln->r_skip_req != 0) {
 			LLE_REQ_LOCK(ln);
 			ln->r_skip_req = 0; /* Notify that entry was used */
 			ln->lle_hittime = time_uptime;
 			LLE_REQ_UNLOCK(ln);
 		}
 		if (plle) {
 			LLE_ADDREF(ln);
 			*plle = ln;
 			LLE_WUNLOCK(ln);
 		}
 		return (0);
 	} else if (plle && ln)
 		LLE_WUNLOCK(ln);
 
 	return (nd6_resolve_slow(ifp, 0, m, dst6, desten, pflags, plle));
 }
 
 /*
  * Do L2 address resolution for @sa_dst address. Stores found
  * address in @desten buffer. Copy of lle ln_flags can be also
  * saved in @pflags if @pflags is non-NULL.
  *
  * Heavy version.
  * Function assume that destination LLE does not exist,
  * is invalid or stale, so LLE_EXCLUSIVE lock needs to be acquired.
  *
  * Set noinline to be dtrace-friendly
  */
 static __noinline int
 nd6_resolve_slow(struct ifnet *ifp, int flags, struct mbuf *m,
     const struct sockaddr_in6 *dst, u_char *desten, uint32_t *pflags,
     struct llentry **plle)
 {
 	struct llentry *lle = NULL, *lle_tmp;
 	struct in6_addr *psrc, src;
 	int send_ns, ll_len;
 	char *lladdr;
 
 	NET_EPOCH_ASSERT();
 
 	/*
 	 * Address resolution or Neighbor Unreachability Detection
 	 * for the next hop.
 	 * At this point, the destination of the packet must be a unicast
 	 * or an anycast address(i.e. not a multicast).
 	 */
 	if (lle == NULL) {
 		lle = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp);
 		if ((lle == NULL) && nd6_is_addr_neighbor(dst, ifp))  {
 			/*
 			 * Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
 			 * the condition below is not very efficient.  But we believe
 			 * it is tolerable, because this should be a rare case.
 			 */
 			lle = nd6_alloc(&dst->sin6_addr, 0, ifp);
 			if (lle == NULL) {
 				char ip6buf[INET6_ADDRSTRLEN];
 				log(LOG_DEBUG,
 				    "nd6_output: can't allocate llinfo for %s "
 				    "(ln=%p)\n",
 				    ip6_sprintf(ip6buf, &dst->sin6_addr), lle);
 				m_freem(m);
 				return (ENOBUFS);
 			}
 
 			IF_AFDATA_WLOCK(ifp);
 			LLE_WLOCK(lle);
 			/* Prefer any existing entry over newly-created one */
 			lle_tmp = nd6_lookup(&dst->sin6_addr, LLE_EXCLUSIVE, ifp);
 			if (lle_tmp == NULL)
 				lltable_link_entry(LLTABLE6(ifp), lle);
 			IF_AFDATA_WUNLOCK(ifp);
 			if (lle_tmp != NULL) {
 				lltable_free_entry(LLTABLE6(ifp), lle);
 				lle = lle_tmp;
 				lle_tmp = NULL;
 			}
 		}
 	} 
 	if (lle == NULL) {
 		m_freem(m);
 		return (ENOBUFS);
 	}
 
 	LLE_WLOCK_ASSERT(lle);
 
 	/*
 	 * The first time we send a packet to a neighbor whose entry is
 	 * STALE, we have to change the state to DELAY and a sets a timer to
 	 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
 	 * neighbor unreachability detection on expiration.
 	 * (RFC 2461 7.3.3)
 	 */
 	if (lle->ln_state == ND6_LLINFO_STALE)
 		nd6_llinfo_setstate(lle, ND6_LLINFO_DELAY);
 
 	/*
 	 * If the neighbor cache entry has a state other than INCOMPLETE
 	 * (i.e. its link-layer address is already resolved), just
 	 * send the packet.
 	 */
 	if (lle->ln_state > ND6_LLINFO_INCOMPLETE) {
 		if (flags & LLE_ADDRONLY) {
 			lladdr = lle->ll_addr;
 			ll_len = ifp->if_addrlen;
 		} else {
 			lladdr = lle->r_linkdata;
 			ll_len = lle->r_hdrlen;
 		}
 		bcopy(lladdr, desten, ll_len);
 		if (pflags != NULL)
 			*pflags = lle->la_flags;
 		if (plle) {
 			LLE_ADDREF(lle);
 			*plle = lle;
 		}
 		LLE_WUNLOCK(lle);
 		return (0);
 	}
 
 	/*
 	 * There is a neighbor cache entry, but no ethernet address
 	 * response yet.  Append this latest packet to the end of the
 	 * packet queue in the mbuf.  When it exceeds nd6_maxqueuelen,
 	 * the oldest packet in the queue will be removed.
 	 */
 
 	if (lle->la_hold != NULL) {
 		struct mbuf *m_hold;
 		int i;
 		
 		i = 0;
 		for (m_hold = lle->la_hold; m_hold; m_hold = m_hold->m_nextpkt){
 			i++;
 			if (m_hold->m_nextpkt == NULL) {
 				m_hold->m_nextpkt = m;
 				break;
 			}
 		}
 		while (i >= V_nd6_maxqueuelen) {
 			m_hold = lle->la_hold;
 			lle->la_hold = lle->la_hold->m_nextpkt;
 			m_freem(m_hold);
 			i--;
 		}
 	} else {
 		lle->la_hold = m;
 	}
 
 	/*
 	 * If there has been no NS for the neighbor after entering the
 	 * INCOMPLETE state, send the first solicitation.
 	 * Note that for newly-created lle la_asked will be 0,
 	 * so we will transition from ND6_LLINFO_NOSTATE to
 	 * ND6_LLINFO_INCOMPLETE state here.
 	 */
 	psrc = NULL;
 	send_ns = 0;
 	if (lle->la_asked == 0) {
 		lle->la_asked++;
 		send_ns = 1;
 		psrc = nd6_llinfo_get_holdsrc(lle, &src);
 
 		nd6_llinfo_setstate(lle, ND6_LLINFO_INCOMPLETE);
 	}
 	LLE_WUNLOCK(lle);
 	if (send_ns != 0)
 		nd6_ns_output(ifp, psrc, NULL, &dst->sin6_addr, NULL);
 
 	return (EWOULDBLOCK);
 }
 
 /*
  * Do L2 address resolution for @sa_dst address. Stores found
  * address in @desten buffer. Copy of lle ln_flags can be also
  * saved in @pflags if @pflags is non-NULL.
  *
  * Return values:
  * - 0 on success (address copied to buffer).
  * - EWOULDBLOCK (no local error, but address is still unresolved)
  * - other errors (alloc failure, etc)
  */
 int
 nd6_resolve_addr(struct ifnet *ifp, int flags, const struct sockaddr *dst,
     char *desten, uint32_t *pflags)
 {
 	int error;
 
 	flags |= LLE_ADDRONLY;
 	error = nd6_resolve_slow(ifp, flags, NULL,
 	    (const struct sockaddr_in6 *)dst, desten, pflags, NULL);
 	return (error);
 }
 
 int
 nd6_flush_holdchain(struct ifnet *ifp, struct mbuf *chain,
     struct sockaddr_in6 *dst)
 {
 	struct mbuf *m, *m_head;
 	int error = 0;
 
 	m_head = chain;
 
 	while (m_head) {
 		m = m_head;
 		m_head = m_head->m_nextpkt;
 		m->m_nextpkt = NULL;
 		error = nd6_output_ifp(ifp, ifp, m, dst, NULL);
 	}
 
 	/*
 	 * XXX
 	 * note that intermediate errors are blindly ignored
 	 */
 	return (error);
 }
 
 static int
 nd6_need_cache(struct ifnet *ifp)
 {
 	/*
 	 * XXX: we currently do not make neighbor cache on any interface
 	 * other than Ethernet and GIF.
 	 *
 	 * RFC2893 says:
 	 * - unidirectional tunnels needs no ND
 	 */
 	switch (ifp->if_type) {
 	case IFT_ETHER:
 	case IFT_IEEE1394:
 	case IFT_L2VLAN:
 	case IFT_INFINIBAND:
 	case IFT_BRIDGE:
 	case IFT_PROPVIRTUAL:
 		return (1);
 	default:
 		return (0);
 	}
 }
 
 /*
  * Add pernament ND6 link-layer record for given
  * interface address.
  *
  * Very similar to IPv4 arp_ifinit(), but:
  * 1) IPv6 DAD is performed in different place
  * 2) It is called by IPv6 protocol stack in contrast to
  * arp_ifinit() which is typically called in SIOCSIFADDR
  * driver ioctl handler.
  *
  */
 int
 nd6_add_ifa_lle(struct in6_ifaddr *ia)
 {
 	struct ifnet *ifp;
 	struct llentry *ln, *ln_tmp;
 	struct sockaddr *dst;
 
 	ifp = ia->ia_ifa.ifa_ifp;
 	if (nd6_need_cache(ifp) == 0)
 		return (0);
 
 	dst = (struct sockaddr *)&ia->ia_addr;
 	ln = lltable_alloc_entry(LLTABLE6(ifp), LLE_IFADDR, dst);
 	if (ln == NULL)
 		return (ENOBUFS);
 
 	IF_AFDATA_WLOCK(ifp);
 	LLE_WLOCK(ln);
 	/* Unlink any entry if exists */
 	ln_tmp = lla_lookup(LLTABLE6(ifp), LLE_EXCLUSIVE, dst);
 	if (ln_tmp != NULL)
 		lltable_unlink_entry(LLTABLE6(ifp), ln_tmp);
 	lltable_link_entry(LLTABLE6(ifp), ln);
 	IF_AFDATA_WUNLOCK(ifp);
 
 	if (ln_tmp != NULL)
 		EVENTHANDLER_INVOKE(lle_event, ln_tmp, LLENTRY_EXPIRED);
 	EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED);
 
 	LLE_WUNLOCK(ln);
 	if (ln_tmp != NULL)
 		llentry_free(ln_tmp);
 
 	return (0);
 }
 
 /*
  * Removes either all lle entries for given @ia, or lle
  * corresponding to @ia address.
  */
 void
 nd6_rem_ifa_lle(struct in6_ifaddr *ia, int all)
 {
 	struct sockaddr_in6 mask, addr;
 	struct sockaddr *saddr, *smask;
 	struct ifnet *ifp;
 
 	ifp = ia->ia_ifa.ifa_ifp;
 	memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr));
 	memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask));
 	saddr = (struct sockaddr *)&addr;
 	smask = (struct sockaddr *)&mask;
 
 	if (all != 0)
 		lltable_prefix_free(AF_INET6, saddr, smask, LLE_STATIC);
 	else
 		lltable_delete_addr(LLTABLE6(ifp), LLE_IFADDR, saddr);
 }
 
 static void 
 clear_llinfo_pqueue(struct llentry *ln)
 {
 	struct mbuf *m_hold, *m_hold_next;
 
 	for (m_hold = ln->la_hold; m_hold; m_hold = m_hold_next) {
 		m_hold_next = m_hold->m_nextpkt;
 		m_freem(m_hold);
 	}
 
 	ln->la_hold = NULL;
 }
 
 static int
 nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
 {
 	struct in6_prefix p;
 	struct sockaddr_in6 s6;
 	struct nd_prefix *pr;
 	struct nd_pfxrouter *pfr;
 	time_t maxexpire;
 	int error;
 	char ip6buf[INET6_ADDRSTRLEN];
 
 	if (req->newptr)
 		return (EPERM);
 
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 
 	bzero(&p, sizeof(p));
 	p.origin = PR_ORIG_RA;
 	bzero(&s6, sizeof(s6));
 	s6.sin6_family = AF_INET6;
 	s6.sin6_len = sizeof(s6);
 
 	ND6_RLOCK();
 	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
 		p.prefix = pr->ndpr_prefix;
 		if (sa6_recoverscope(&p.prefix)) {
 			log(LOG_ERR, "scope error in prefix list (%s)\n",
 			    ip6_sprintf(ip6buf, &p.prefix.sin6_addr));
 			/* XXX: press on... */
 		}
 		p.raflags = pr->ndpr_raf;
 		p.prefixlen = pr->ndpr_plen;
 		p.vltime = pr->ndpr_vltime;
 		p.pltime = pr->ndpr_pltime;
 		p.if_index = pr->ndpr_ifp->if_index;
 		if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
 			p.expire = 0;
 		else {
 			/* XXX: we assume time_t is signed. */
 			maxexpire = (-1) &
 			    ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1));
 			if (pr->ndpr_vltime < maxexpire - pr->ndpr_lastupdate)
 				p.expire = pr->ndpr_lastupdate +
 				    pr->ndpr_vltime +
 				    (time_second - time_uptime);
 			else
 				p.expire = maxexpire;
 		}
 		p.refcnt = pr->ndpr_addrcnt;
 		p.flags = pr->ndpr_stateflags;
 		p.advrtrs = 0;
 		LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry)
 			p.advrtrs++;
 		error = SYSCTL_OUT(req, &p, sizeof(p));
 		if (error != 0)
 			break;
 		LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) {
 			s6.sin6_addr = pfr->router->rtaddr;
 			if (sa6_recoverscope(&s6))
 				log(LOG_ERR,
 				    "scope error in prefix list (%s)\n",
 				    ip6_sprintf(ip6buf, &pfr->router->rtaddr));
 			error = SYSCTL_OUT(req, &s6, sizeof(s6));
 			if (error != 0)
 				goto out;
 		}
 	}
 out:
 	ND6_RUNLOCK();
 	return (error);
 }
 SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
 	CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
 	NULL, 0, nd6_sysctl_prlist, "S,in6_prefix",
 	"NDP prefix list");
 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, "");
 SYSCTL_INT(_net_inet6_icmp6, OID_AUTO, nd6_gctimer,
 	CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_gctimer), (60 * 60 * 24), "");
diff --git a/sys/sys/priv.h b/sys/sys/priv.h
index 9d8a3204add5..7ef54782a60d 100644
--- a/sys/sys/priv.h
+++ b/sys/sys/priv.h
@@ -1,545 +1,544 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2006 nCircle Network Security, Inc.
  * All rights reserved.
  *
  * This software was developed by Robert N. M. Watson for the TrustedBSD
  * Project under contract to nCircle Network Security, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR, NCIRCLE NETWORK SECURITY,
  * INC., OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 /*
  * Privilege checking interface for BSD kernel.
  */
 #ifndef _SYS_PRIV_H_
 #define	_SYS_PRIV_H_
 
 /*
  * Privilege list, sorted loosely by kernel subsystem.
  *
  * Think carefully before adding or reusing one of these privileges -- are
  * there existing instances referring to the same privilege?  Third party
  * vendors may request the assignment of privileges to be used in loadable
  * modules.  Particular numeric privilege assignments are part of the
  * loadable kernel module ABI, and should not be changed across minor
  * releases.
  *
  * When adding a new privilege, remember to determine if it's appropriate
  * for use in jail, and update the privilege switch in prison_priv_check()
  * in kern_jail.c as necessary.
  */
 
 /*
  * Track beginning of privilege list.
  */
 #define	_PRIV_LOWEST	1
 
 /*
  * The remaining privileges typically correspond to one or a small
  * number of specific privilege checks, and have (relatively) precise
  * meanings.  They are loosely sorted into a set of base system
  * privileges, such as the ability to reboot, and then loosely by
  * subsystem, indicated by a subsystem name.
  */
 #define	_PRIV_ROOT		1	/* Removed. */
 #define	PRIV_ACCT		2	/* Manage process accounting. */
 #define	PRIV_MAXFILES		3	/* Exceed system open files limit. */
 #define	PRIV_MAXPROC		4	/* Exceed system processes limit. */
 #define	PRIV_KTRACE		5	/* Set/clear KTRFAC_ROOT on ktrace. */
 #define	PRIV_SETDUMPER		6	/* Configure dump device. */
 #define	PRIV_REBOOT		8	/* Can reboot system. */
 #define	PRIV_SWAPON		9	/* Can swapon(). */
 #define	PRIV_SWAPOFF		10	/* Can swapoff(). */
 #define	PRIV_MSGBUF		11	/* Can read kernel message buffer. */
 #define	PRIV_IO			12	/* Can perform low-level I/O. */
 #define	PRIV_KEYBOARD		13	/* Reprogram keyboard. */
 #define	PRIV_DRIVER		14	/* Low-level driver privilege. */
 #define	PRIV_ADJTIME		15	/* Set time adjustment. */
 #define	PRIV_NTP_ADJTIME	16	/* Set NTP time adjustment. */
 #define	PRIV_CLOCK_SETTIME	17	/* Can call clock_settime. */
 #define	PRIV_SETTIMEOFDAY	18	/* Can call settimeofday. */
 #define	_PRIV_SETHOSTID		19	/* Removed. */
 #define	_PRIV_SETDOMAINNAME	20	/* Removed. */
 
 /*
  * Audit subsystem privileges.
  */
 #define	PRIV_AUDIT_CONTROL	40	/* Can configure audit. */
 #define	PRIV_AUDIT_FAILSTOP	41	/* Can run during audit fail stop. */
 #define	PRIV_AUDIT_GETAUDIT	42	/* Can get proc audit properties. */
 #define	PRIV_AUDIT_SETAUDIT	43	/* Can set proc audit properties. */
 #define	PRIV_AUDIT_SUBMIT	44	/* Can submit an audit record. */
 
 /*
  * Credential management privileges.
  */
 #define	PRIV_CRED_SETUID	50	/* setuid. */
 #define	PRIV_CRED_SETEUID	51	/* seteuid to !ruid and !svuid. */
 #define	PRIV_CRED_SETGID	52	/* setgid. */
 #define	PRIV_CRED_SETEGID	53	/* setgid to !rgid and !svgid. */
 #define	PRIV_CRED_SETGROUPS	54	/* Set process additional groups. */
 #define	PRIV_CRED_SETREUID	55	/* setreuid. */
 #define	PRIV_CRED_SETREGID	56	/* setregid. */
 #define	PRIV_CRED_SETRESUID	57	/* setresuid. */
 #define	PRIV_CRED_SETRESGID	58	/* setresgid. */
 #define	PRIV_SEEOTHERGIDS	59	/* Exempt bsd.seeothergids. */
 #define	PRIV_SEEOTHERUIDS	60	/* Exempt bsd.seeotheruids. */
 
 /*
  * Debugging privileges.
  */
 #define	PRIV_DEBUG_DIFFCRED	80	/* Exempt debugging other users. */
 #define	PRIV_DEBUG_SUGID	81	/* Exempt debugging setuid proc. */
 #define	PRIV_DEBUG_UNPRIV	82	/* Exempt unprivileged debug limit. */
 #define	PRIV_DEBUG_DENIED	83	/* Exempt P2_NOTRACE. */
 
 /*
  * Dtrace privileges.
  */
 #define	PRIV_DTRACE_KERNEL	90	/* Allow use of DTrace on the kernel. */
 #define	PRIV_DTRACE_PROC	91	/* Allow attaching DTrace to process. */
 #define	PRIV_DTRACE_USER	92	/* Process may submit DTrace events. */
 
 /*
  * Firmware privilegs.
  */
 #define	PRIV_FIRMWARE_LOAD	100	/* Can load firmware. */
 
 /*
  * Jail privileges.
  */
 #define	PRIV_JAIL_ATTACH	110	/* Attach to a jail. */
 #define	PRIV_JAIL_SET		111	/* Set jail parameters. */
 #define	PRIV_JAIL_REMOVE	112	/* Remove a jail. */
 
 /*
  * Kernel environment privileges.
  */
 #define	PRIV_KENV_SET		120	/* Set kernel env. variables. */
 #define	PRIV_KENV_UNSET		121	/* Unset kernel env. variables. */
 
 /*
  * Loadable kernel module privileges.
  */
 #define	PRIV_KLD_LOAD		130	/* Load a kernel module. */
 #define	PRIV_KLD_UNLOAD		131	/* Unload a kernel module. */
 
 /*
  * Privileges associated with the MAC Framework and specific MAC policy
  * modules.
  */
 #define	PRIV_MAC_PARTITION	140	/* Privilege in mac_partition policy. */
 #define	PRIV_MAC_PRIVS		141	/* Privilege in the mac_privs policy. */
 
 /*
  * Process-related privileges.
  */
 #define	PRIV_PROC_LIMIT		160	/* Exceed user process limit. */
 #define	PRIV_PROC_SETLOGIN	161	/* Can call setlogin. */
 #define	PRIV_PROC_SETRLIMIT	162	/* Can raise resources limits. */
 #define	PRIV_PROC_SETLOGINCLASS	163	/* Can call setloginclass(2). */
 
 /*
  * System V IPC privileges.
  */
 #define	PRIV_IPC_READ		170	/* Can override IPC read perm. */
 #define	PRIV_IPC_WRITE		171	/* Can override IPC write perm. */
 #define	PRIV_IPC_ADMIN		172	/* Can override IPC owner-only perm. */
 #define	PRIV_IPC_MSGSIZE	173	/* Exempt IPC message queue limit. */
 
 /*
  * POSIX message queue privileges.
  */
 #define	PRIV_MQ_ADMIN		180	/* Can override msgq owner-only perm. */
 
 /*
  * Performance monitoring counter privileges.
  */
 #define	PRIV_PMC_MANAGE		190	/* Can administer PMC. */
 #define	PRIV_PMC_SYSTEM		191	/* Can allocate a system-wide PMC. */
 
 /*
  * Scheduling privileges.
  */
 #define	PRIV_SCHED_DIFFCRED	200	/* Exempt scheduling other users. */
 #define	PRIV_SCHED_SETPRIORITY	201	/* Can set lower nice value for proc. */
 #define	PRIV_SCHED_RTPRIO	202	/* Can set real time scheduling. */
 #define	PRIV_SCHED_SETPOLICY	203	/* Can set scheduler policy. */
 #define	PRIV_SCHED_SET		204	/* Can set thread scheduler. */
 #define	PRIV_SCHED_SETPARAM	205	/* Can set thread scheduler params. */
 #define	PRIV_SCHED_CPUSET	206	/* Can manipulate cpusets. */
 #define	PRIV_SCHED_CPUSET_INTR	207	/* Can adjust IRQ to CPU binding. */
 
 /*
  * POSIX semaphore privileges.
  */
 #define	PRIV_SEM_WRITE		220	/* Can override sem write perm. */
 
 /*
  * Signal privileges.
  */
 #define	PRIV_SIGNAL_DIFFCRED	230	/* Exempt signalling other users. */
 #define	PRIV_SIGNAL_SUGID	231	/* Non-conserv signal setuid proc. */
 
 /*
  * Sysctl privileges.
  */
 #define	PRIV_SYSCTL_DEBUG	240	/* Can invoke sysctl.debug. */
 #define	PRIV_SYSCTL_WRITE	241	/* Can write sysctls. */
 #define	PRIV_SYSCTL_WRITEJAIL	242	/* Can write sysctls, jail permitted. */
 
 /*
  * TTY privileges.
  */
 #define	PRIV_TTY_CONSOLE	250	/* Set console to tty. */
 #define	PRIV_TTY_DRAINWAIT	251	/* Set tty drain wait time. */
 #define	PRIV_TTY_DTRWAIT	252	/* Set DTR wait on tty. */
 #define	PRIV_TTY_EXCLUSIVE	253	/* Override tty exclusive flag. */
 #define	_PRIV_TTY_PRISON	254	/* Removed. */
 #define	PRIV_TTY_STI		255	/* Simulate input on another tty. */
 #define	PRIV_TTY_SETA		256	/* Set tty termios structure. */
 
 /*
  * UFS-specific privileges.
  */
 #define	PRIV_UFS_EXTATTRCTL	270	/* Can configure EAs on UFS1. */
 #define	PRIV_UFS_QUOTAOFF	271	/* quotaoff(). */
 #define	PRIV_UFS_QUOTAON	272	/* quotaon(). */
 #define	PRIV_UFS_SETUSE		273	/* setuse(). */
 
 /*
  * ZFS-specific privileges.
  */
 #define	PRIV_ZFS_POOL_CONFIG	280	/* Can configure ZFS pools. */
 #define	PRIV_ZFS_INJECT		281	/* Can inject faults in the ZFS fault
 					   injection framework. */
 #define	PRIV_ZFS_JAIL		282	/* Can attach/detach ZFS file systems
 					   to/from jails. */
 
 /*
  * NFS-specific privileges.
  */
 #define	PRIV_NFS_DAEMON		290	/* Can become the NFS daemon. */
 #define	PRIV_NFS_LOCKD		291	/* Can become NFS lock daemon. */
 
 /*
  * VFS privileges.
  */
 #define	PRIV_VFS_READ		310	/* Override vnode DAC read perm. */
 #define	PRIV_VFS_WRITE		311	/* Override vnode DAC write perm. */
 #define	PRIV_VFS_ADMIN		312	/* Override vnode DAC admin perm. */
 #define	PRIV_VFS_EXEC		313	/* Override vnode DAC exec perm. */
 #define	PRIV_VFS_LOOKUP		314	/* Override vnode DAC lookup perm. */
 #define	PRIV_VFS_BLOCKRESERVE	315	/* Can use free block reserve. */
 #define	PRIV_VFS_CHFLAGS_DEV	316	/* Can chflags() a device node. */
 #define	PRIV_VFS_CHOWN		317	/* Can set user; group to non-member. */
 #define	PRIV_VFS_CHROOT		318	/* chroot(). */
 #define	PRIV_VFS_RETAINSUGID	319	/* Can retain sugid bits on change. */
 #define	PRIV_VFS_EXCEEDQUOTA	320	/* Exempt from quota restrictions. */
 #define	PRIV_VFS_EXTATTR_SYSTEM	321	/* Operate on system EA namespace. */
 #define	PRIV_VFS_FCHROOT	322	/* fchroot(). */
 #define	PRIV_VFS_FHOPEN		323	/* Can fhopen(). */
 #define	PRIV_VFS_FHSTAT		324	/* Can fhstat(). */
 #define	PRIV_VFS_FHSTATFS	325	/* Can fhstatfs(). */
 #define	PRIV_VFS_GENERATION	326	/* stat() returns generation number. */
 #define	PRIV_VFS_GETFH		327	/* Can retrieve file handles. */
 #define	PRIV_VFS_GETQUOTA	328	/* getquota(). */
 #define	PRIV_VFS_LINK		329	/* bsd.hardlink_check_uid */
 #define	PRIV_VFS_MKNOD_BAD	330	/* Was: mknod() can mark bad inodes. */
 #define	PRIV_VFS_MKNOD_DEV	331	/* Can mknod() to create dev nodes. */
 #define	PRIV_VFS_MKNOD_WHT	332	/* Can mknod() to create whiteout. */
 #define	PRIV_VFS_MOUNT		333	/* Can mount(). */
 #define	PRIV_VFS_MOUNT_OWNER	334	/* Can manage other users' file systems. */
 #define	PRIV_VFS_MOUNT_EXPORTED	335	/* Can set MNT_EXPORTED on mount. */
 #define	PRIV_VFS_MOUNT_PERM	336	/* Override dev node perms at mount. */
 #define	PRIV_VFS_MOUNT_SUIDDIR	337	/* Can set MNT_SUIDDIR on mount. */
 #define	PRIV_VFS_MOUNT_NONUSER	338	/* Can perform a non-user mount. */
 #define	PRIV_VFS_SETGID		339	/* Can setgid if not in group. */
 #define	PRIV_VFS_SETQUOTA	340	/* setquota(). */
 #define	PRIV_VFS_STICKYFILE	341	/* Can set sticky bit on file. */
 #define	PRIV_VFS_SYSFLAGS	342	/* Can modify system flags. */
 #define	PRIV_VFS_UNMOUNT	343	/* Can unmount(). */
 #define	PRIV_VFS_STAT		344	/* Override vnode MAC stat perm. */
 #define	PRIV_VFS_READ_DIR	345	/* Can read(2) a dirfd, needs sysctl. */
 
 /*
  * Virtual memory privileges.
  */
 #define	PRIV_VM_MADV_PROTECT	360	/* Can set MADV_PROTECT. */
 #define	PRIV_VM_MLOCK		361	/* Can mlock(), mlockall(). */
 #define	PRIV_VM_MUNLOCK		362	/* Can munlock(), munlockall(). */
 #define	PRIV_VM_SWAP_NOQUOTA	363	/*
 					 * Can override the global
 					 * swap reservation limits.
 					 */
 #define	PRIV_VM_SWAP_NORLIMIT	364	/*
 					 * Can override the per-uid
 					 * swap reservation limits.
 					 */
 
 /*
  * Device file system privileges.
  */
 #define	PRIV_DEVFS_RULE		370	/* Can manage devfs rules. */
 #define	PRIV_DEVFS_SYMLINK	371	/* Can create symlinks in devfs. */
 
 /*
  * Random number generator privileges.
  */
 #define	PRIV_RANDOM_RESEED	380	/* Closing /dev/random reseeds. */
 
 /*
  * Network stack privileges.
  */
 #define	PRIV_NET_BRIDGE		390	/* Administer bridge. */
 #define	PRIV_NET_GRE		391	/* Administer GRE. */
 #define	_PRIV_NET_PPP		392	/* Removed. */
 #define	_PRIV_NET_SLIP		393	/* Removed. */
 #define	PRIV_NET_BPF		394	/* Monitor BPF. */
 #define	PRIV_NET_RAW		395	/* Open raw socket. */
 #define	PRIV_NET_ROUTE		396	/* Administer routing. */
 #define	PRIV_NET_TAP		397	/* Can open tap device. */
 #define	PRIV_NET_SETIFMTU	398	/* Set interface MTU. */
 #define	PRIV_NET_SETIFFLAGS	399	/* Set interface flags. */
 #define	PRIV_NET_SETIFCAP	400	/* Set interface capabilities. */
 #define	PRIV_NET_SETIFNAME	401	/* Set interface name. */
 #define	PRIV_NET_SETIFMETRIC	402	/* Set interface metrics. */
 #define	PRIV_NET_SETIFPHYS	403	/* Set interface physical layer prop. */
 #define	PRIV_NET_SETIFMAC	404	/* Set interface MAC label. */
 #define	PRIV_NET_ADDMULTI	405	/* Add multicast addr. to ifnet. */
 #define	PRIV_NET_DELMULTI	406	/* Delete multicast addr. from ifnet. */
 #define	PRIV_NET_HWIOCTL	407	/* Issue hardware ioctl on ifnet. */
 #define	PRIV_NET_SETLLADDR	408	/* Set interface link-level address. */
 #define	PRIV_NET_ADDIFGROUP	409	/* Add new interface group. */
 #define	PRIV_NET_DELIFGROUP	410	/* Delete interface group. */
 #define	PRIV_NET_IFCREATE	411	/* Create cloned interface. */
 #define	PRIV_NET_IFDESTROY	412	/* Destroy cloned interface. */
 #define	PRIV_NET_ADDIFADDR	413	/* Add protocol addr to interface. */
 #define	PRIV_NET_DELIFADDR	414	/* Delete protocol addr on interface. */
 #define	PRIV_NET_LAGG		415	/* Administer lagg interface. */
 #define	PRIV_NET_GIF		416	/* Administer gif interface. */
 #define	PRIV_NET_SETIFVNET	417	/* Move interface to vnet. */
 #define	PRIV_NET_SETIFDESCR	418	/* Set interface description. */
 #define	PRIV_NET_SETIFFIB	419	/* Set interface fib. */
 #define	PRIV_NET_VXLAN		420	/* Administer vxlan. */
 #define	PRIV_NET_SETLANPCP	421	/* Set LAN priority. */
 #define	PRIV_NET_SETVLANPCP	PRIV_NET_SETLANPCP /* Alias Set VLAN priority */
-#define	PRIV_NET_WG		422	/* Administrate if_wg. */
 
 /*
  * 802.11-related privileges.
  */
 #define	PRIV_NET80211_VAP_GETKEY	440	/* Query VAP 802.11 keys. */
 #define	PRIV_NET80211_VAP_MANAGE	441	/* Administer 802.11 VAP */
 #define	PRIV_NET80211_VAP_SETMAC	442	/* Set VAP MAC address */
 #define	PRIV_NET80211_CREATE_VAP	443	/* Create a new VAP */
 
 /*
  * Placeholder for AppleTalk privileges, not supported anymore.
  */
 #define	_PRIV_NETATALK_RESERVEDPORT	450	/* Bind low port number. */
 
 /*
  * ATM privileges.
  */
 #define	PRIV_NETATM_CFG		460
 #define	PRIV_NETATM_ADD		461
 #define	PRIV_NETATM_DEL		462
 #define	PRIV_NETATM_SET		463
 
 /*
  * Bluetooth privileges.
  */
 #define	PRIV_NETBLUETOOTH_RAW	470	/* Open raw bluetooth socket. */
 
 /*
  * Netgraph and netgraph module privileges.
  */
 #define	PRIV_NETGRAPH_CONTROL	480	/* Open netgraph control socket. */
 #define	PRIV_NETGRAPH_TTY	481	/* Configure tty for netgraph. */
 
 /*
  * IPv4 and IPv6 privileges.
  */
 #define	PRIV_NETINET_RESERVEDPORT	490	/* Bind low port number. */
 #define	PRIV_NETINET_IPFW	491	/* Administer IPFW firewall. */
 #define	PRIV_NETINET_DIVERT	492	/* Open IP divert socket. */
 #define	PRIV_NETINET_PF		493	/* Administer pf firewall. */
 #define	PRIV_NETINET_DUMMYNET	494	/* Administer DUMMYNET. */
 #define	PRIV_NETINET_CARP	495	/* Administer CARP. */
 #define	PRIV_NETINET_MROUTE	496	/* Administer multicast routing. */
 #define	PRIV_NETINET_RAW	497	/* Open netinet raw socket. */
 #define	PRIV_NETINET_GETCRED	498	/* Query netinet pcb credentials. */
 #define	PRIV_NETINET_ADDRCTRL6	499	/* Administer IPv6 address scopes. */
 #define	PRIV_NETINET_ND6	500	/* Administer IPv6 neighbor disc. */
 #define	PRIV_NETINET_SCOPE6	501	/* Administer IPv6 address scopes. */
 #define	PRIV_NETINET_ALIFETIME6	502	/* Administer IPv6 address lifetimes. */
 #define	PRIV_NETINET_IPSEC	503	/* Administer IPSEC. */
 #define	PRIV_NETINET_REUSEPORT	504	/* Allow [rapid] port/address reuse. */
 #define	PRIV_NETINET_SETHDROPTS	505	/* Set certain IPv4/6 header options. */
 #define	PRIV_NETINET_BINDANY	506	/* Allow bind to any address. */
 #define	PRIV_NETINET_HASHKEY	507	/* Get and set hash keys for IPv4/6. */
 
 /*
  * Placeholders for IPX/SPX privileges, not supported any more.
  */
 #define	_PRIV_NETIPX_RESERVEDPORT	520	/* Bind low port number. */
 #define	_PRIV_NETIPX_RAW		521	/* Open netipx raw socket. */
 
 /*
  * NCP privileges.
  */
 #define	PRIV_NETNCP		530	/* Use another user's connection. */
 
 /*
  * SMB privileges.
  */
 #define	PRIV_NETSMB		540	/* Use another user's connection. */
 
 /*
  * VM86 privileges.
  */
 #define	PRIV_VM86_INTCALL	550	/* Allow invoking vm86 int handlers. */
 
 /*
  * Set of reserved privilege values, which will be allocated to code as
  * needed, in order to avoid renumbering later privileges due to insertion.
  */
 #define	_PRIV_RESERVED0		560
 #define	_PRIV_RESERVED1		561
 #define	_PRIV_RESERVED2		562
 #define	_PRIV_RESERVED3		563
 #define	_PRIV_RESERVED4		564
 #define	_PRIV_RESERVED5		565
 #define	_PRIV_RESERVED6		566
 #define	_PRIV_RESERVED7		567
 #define	_PRIV_RESERVED8		568
 #define	_PRIV_RESERVED9		569
 #define	_PRIV_RESERVED10	570
 #define	_PRIV_RESERVED11	571
 #define	_PRIV_RESERVED12	572
 #define	_PRIV_RESERVED13	573
 #define	_PRIV_RESERVED14	574
 #define	_PRIV_RESERVED15	575
 
 /*
  * Define a set of valid privilege numbers that can be used by loadable
  * modules that don't yet have privilege reservations.  Ideally, these should
  * not be used, since their meaning is opaque to any policies that are aware
  * of specific privileges, such as jail, and as such may be arbitrarily
  * denied.
  */
 #define	PRIV_MODULE0		600
 #define	PRIV_MODULE1		601
 #define	PRIV_MODULE2		602
 #define	PRIV_MODULE3		603
 #define	PRIV_MODULE4		604
 #define	PRIV_MODULE5		605
 #define	PRIV_MODULE6		606
 #define	PRIV_MODULE7		607
 #define	PRIV_MODULE8		608
 #define	PRIV_MODULE9		609
 #define	PRIV_MODULE10		610
 #define	PRIV_MODULE11		611
 #define	PRIV_MODULE12		612
 #define	PRIV_MODULE13		613
 #define	PRIV_MODULE14		614
 #define	PRIV_MODULE15		615
 
 /*
  * DDB(4) privileges.
  */
 #define	PRIV_DDB_CAPTURE	620	/* Allow reading of DDB capture log. */
 
 /*
  * Arla/nnpfs privileges.
  */
 #define	PRIV_NNPFS_DEBUG	630	/* Perforn ARLA_VIOC_NNPFSDEBUG. */
 
 /*
  * cpuctl(4) privileges.
  */
 #define PRIV_CPUCTL_WRMSR	640	/* Write model-specific register. */
 #define PRIV_CPUCTL_UPDATE	641	/* Update cpu microcode. */
 
 /*
  * Capi4BSD privileges.
  */
 #define	PRIV_C4B_RESET_CTLR	650	/* Load firmware, reset controller. */
 #define	PRIV_C4B_TRACE		651	/* Unrestricted CAPI message tracing. */
 
 /*
  * OpenAFS privileges.
  */
 #define	PRIV_AFS_ADMIN		660	/* Can change AFS client settings. */
 #define	PRIV_AFS_DAEMON		661	/* Can become the AFS daemon. */
 
 /*
  * Resource Limits privileges.
  */
 #define	PRIV_RCTL_GET_RACCT	670
 #define	PRIV_RCTL_GET_RULES	671
 #define	PRIV_RCTL_GET_LIMITS	672
 #define	PRIV_RCTL_ADD_RULE	673
 #define	PRIV_RCTL_REMOVE_RULE	674
 
 /*
  * mem(4) privileges.
  */
 #define	PRIV_KMEM_READ		680	/* Open mem/kmem for reading. */
 #define	PRIV_KMEM_WRITE		681	/* Open mem/kmem for writing. */
 
 /*
  * Track end of privilege list.
  */
 #define	_PRIV_HIGHEST		682
 
 /*
  * Validate that a named privilege is known by the privilege system.  Invalid
  * privileges presented to the privilege system by a priv_check interface
  * will result in a panic.  This is only approximate due to sparse allocation
  * of the privilege space.
  */
 #define	PRIV_VALID(x)	((x) > _PRIV_LOWEST && (x) < _PRIV_HIGHEST)
 
 #ifdef _KERNEL
 /*
  * Privilege check interfaces, modeled after historic suser() interfaces, but
  * with the addition of a specific privilege name.  No flags are currently
  * defined for the API.  Historically, flags specified using the real uid
  * instead of the effective uid, and whether or not the check should be
  * allowed in jail.
  */
 struct thread;
 struct ucred;
 int	priv_check(struct thread *td, int priv);
 int	priv_check_cred(struct ucred *cred, int priv);
 int	priv_check_cred_vfs_lookup(struct ucred *cred);
 int	priv_check_cred_vfs_lookup_nomac(struct ucred *cred);
 int	priv_check_cred_vfs_generation(struct ucred *cred);
 #endif
 
 #endif /* !_SYS_PRIV_H_ */
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
index a5bb8a2587ea..295a1cf3d37f 100644
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -1,548 +1,547 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1982, 1986, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)socketvar.h	8.3 (Berkeley) 2/19/95
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_SOCKETVAR_H_
 #define _SYS_SOCKETVAR_H_
 
 /*
  * Socket generation count type.  Also used in xinpcb, xtcpcb, xunpcb.
  */
 typedef uint64_t so_gen_t;
 
 #if defined(_KERNEL) || defined(_WANT_SOCKET)
 #include <sys/queue.h>			/* for TAILQ macros */
 #include <sys/selinfo.h>		/* for struct selinfo */
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <sys/osd.h>
 #include <sys/_sx.h>
 #include <sys/sockbuf.h>
 #ifdef _KERNEL
 #include <sys/caprights.h>
 #include <sys/sockopt.h>
 #endif
 
 struct vnet;
 
 /*
  * Kernel structure per socket.
  * Contains send and receive buffer queues,
  * handle on protocol and pointer to protocol
  * private data and error information.
  */
 typedef	int so_upcall_t(struct socket *, void *, int);
 typedef	void so_dtor_t(struct socket *);
 
 struct socket;
 
 enum socket_qstate {
 	SQ_NONE = 0,
 	SQ_INCOMP = 0x0800,	/* on sol_incomp */
 	SQ_COMP = 0x1000,	/* on sol_comp */
 };
 
 /*-
  * Locking key to struct socket:
  * (a) constant after allocation, no locking required.
  * (b) locked by SOCK_LOCK(so).
  * (cr) locked by SOCKBUF_LOCK(&so->so_rcv).
  * (cs) locked by SOCKBUF_LOCK(&so->so_snd).
  * (e) locked by SOLISTEN_LOCK() of corresponding listening socket.
  * (f) not locked since integer reads/writes are atomic.
  * (g) used only as a sleep/wakeup address, no value.
  * (h) locked by global mutex so_global_mtx.
  * (k) locked by KTLS workqueue mutex
  */
 TAILQ_HEAD(accept_queue, socket);
 struct socket {
 	struct mtx	so_lock;
 	volatile u_int	so_count;	/* (b / refcount) */
 	struct selinfo	so_rdsel;	/* (b/cr) for so_rcv/so_comp */
 	struct selinfo	so_wrsel;	/* (b/cs) for so_snd */
 	short	so_type;		/* (a) generic type, see socket.h */
 	int	so_options;		/* (b) from socket call, see socket.h */
 	short	so_linger;		/* time to linger close(2) */
 	short	so_state;		/* (b) internal state flags SS_* */
 	void	*so_pcb;		/* protocol control block */
 	struct	vnet *so_vnet;		/* (a) network stack instance */
 	struct	protosw *so_proto;	/* (a) protocol handle */
 	short	so_timeo;		/* (g) connection timeout */
 	u_short	so_error;		/* (f) error affecting connection */
 	struct	sigio *so_sigio;	/* [sg] information for async I/O or
 					   out of band data (SIGURG) */
 	struct	ucred *so_cred;		/* (a) user credentials */
 	struct	label *so_label;	/* (b) MAC label for socket */
 	/* NB: generation count must not be first. */
 	so_gen_t so_gencnt;		/* (h) generation count */
 	void	*so_emuldata;		/* (b) private data for emulators */
 	so_dtor_t *so_dtor;		/* (b) optional destructor */
 	struct	osd	osd;		/* Object Specific extensions */
 	/*
 	 * so_fibnum, so_user_cookie and friends can be used to attach
 	 * some user-specified metadata to a socket, which then can be
 	 * used by the kernel for various actions.
 	 * so_user_cookie is used by ipfw/dummynet.
 	 */
 	int so_fibnum;		/* routing domain for this socket */
 	uint32_t so_user_cookie;
 
 	int so_ts_clock;	/* type of the clock used for timestamps */
 	uint32_t so_max_pacing_rate;	/* (f) TX rate limit in bytes/s */
 	union {
 		/* Regular (data flow) socket. */
 		struct {
 			/* (cr, cs) Receive and send buffers. */
 			struct sockbuf		so_rcv, so_snd;
 
 			/* (e) Our place on accept queue. */
 			TAILQ_ENTRY(socket)	so_list;
 			struct socket		*so_listen;	/* (b) */
 			enum socket_qstate so_qstate;		/* (b) */
 			/* (b) cached MAC label for peer */
 			struct	label		*so_peerlabel;
 			u_long	so_oobmark;	/* chars to oob mark */
 
 			/* (k) Our place on KTLS RX work queue. */
 			STAILQ_ENTRY(socket)	so_ktls_rx_list;
 		};
 		/*
 		 * Listening socket, where accepts occur, is so_listen in all
 		 * subsidiary sockets.  If so_listen is NULL, socket is not
 		 * related to an accept.  For a listening socket itself
 		 * sol_incomp queues partially completed connections, while
 		 * sol_comp is a queue of connections ready to be accepted.
 		 * If a connection is aborted and it has so_listen set, then
 		 * it has to be pulled out of either sol_incomp or sol_comp.
 		 * We allow connections to queue up based on current queue
 		 * lengths and limit on number of queued connections for this
 		 * socket.
 		 */
 		struct {
 			/* (e) queue of partial unaccepted connections */
 			struct accept_queue	sol_incomp;
 			/* (e) queue of complete unaccepted connections */
 			struct accept_queue	sol_comp;
 			u_int	sol_qlen;    /* (e) sol_comp length */
 			u_int	sol_incqlen; /* (e) sol_incomp length */
 			u_int	sol_qlimit;  /* (e) queue limit */
 
 			/* accept_filter(9) optional data */
 			struct	accept_filter	*sol_accept_filter;
 			void	*sol_accept_filter_arg;	/* saved filter args */
 			char	*sol_accept_filter_str;	/* saved user args */
 
 			/* Optional upcall, for kernel socket. */
 			so_upcall_t	*sol_upcall;	/* (e) */
 			void		*sol_upcallarg;	/* (e) */
 
 			/* Socket buffer parameters, to be copied to
 			 * dataflow sockets, accepted from this one. */
 			int		sol_sbrcv_lowat;
 			int		sol_sbsnd_lowat;
 			u_int		sol_sbrcv_hiwat;
 			u_int		sol_sbsnd_hiwat;
 			short		sol_sbrcv_flags;
 			short		sol_sbsnd_flags;
 			sbintime_t	sol_sbrcv_timeo;
 			sbintime_t	sol_sbsnd_timeo;
 
 			/* Information tracking listen queue overflows. */
 			struct timeval	sol_lastover;	/* (e) */
 			int		sol_overcount;	/* (e) */
 		};
 	};
 };
 #endif	/* defined(_KERNEL) || defined(_WANT_SOCKET) */
 
 /*
  * Socket state bits.
  *
  * Historically, these bits were all kept in the so_state field.
  * They are now split into separate, lock-specific fields.
  * so_state maintains basic socket state protected by the socket lock.
  * so_qstate holds information about the socket accept queues.
  * Each socket buffer also has a state field holding information
  * relevant to that socket buffer (can't send, rcv).
  * Many fields will be read without locks to improve performance and avoid
  * lock order issues.  However, this approach must be used with caution.
  */
 #define	SS_NOFDREF		0x0001	/* no file table ref any more */
 #define	SS_ISCONNECTED		0x0002	/* socket connected to a peer */
 #define	SS_ISCONNECTING		0x0004	/* in process of connecting to peer */
 #define	SS_ISDISCONNECTING	0x0008	/* in process of disconnecting */
 #define	SS_NBIO			0x0100	/* non-blocking ops */
 #define	SS_ASYNC		0x0200	/* async i/o notify */
 #define	SS_ISCONFIRMING		0x0400	/* deciding to accept connection req */
 #define	SS_ISDISCONNECTED	0x2000	/* socket disconnected from peer */
 
 /*
  * Protocols can mark a socket as SS_PROTOREF to indicate that, following
  * pru_detach, they still want the socket to persist, and will free it
  * themselves when they are done.  Protocols should only ever call sofree()
  * following setting this flag in pru_detach(), and never otherwise, as
  * sofree() bypasses socket reference counting.
  */
 #define	SS_PROTOREF		0x4000	/* strong protocol reference */
 
 #ifdef _KERNEL
 
 #define	SOCK_MTX(so)		&(so)->so_lock
 #define	SOCK_LOCK(so)		mtx_lock(&(so)->so_lock)
 #define	SOCK_OWNED(so)		mtx_owned(&(so)->so_lock)
 #define	SOCK_UNLOCK(so)		mtx_unlock(&(so)->so_lock)
 #define	SOCK_LOCK_ASSERT(so)	mtx_assert(&(so)->so_lock, MA_OWNED)
 #define	SOCK_UNLOCK_ASSERT(so)	mtx_assert(&(so)->so_lock, MA_NOTOWNED)
 
 #define	SOLISTENING(sol)	(((sol)->so_options & SO_ACCEPTCONN) != 0)
 #define	SOLISTEN_LOCK(sol)	do {					\
 	mtx_lock(&(sol)->so_lock);					\
 	KASSERT(SOLISTENING(sol),					\
 	    ("%s: %p not listening", __func__, (sol)));			\
 } while (0)
 #define	SOLISTEN_TRYLOCK(sol)	mtx_trylock(&(sol)->so_lock)
 #define	SOLISTEN_UNLOCK(sol)	do {					\
 	KASSERT(SOLISTENING(sol),					\
 	    ("%s: %p not listening", __func__, (sol)));			\
 	mtx_unlock(&(sol)->so_lock);					\
 } while (0)
 #define	SOLISTEN_LOCK_ASSERT(sol)	do {				\
 	mtx_assert(&(sol)->so_lock, MA_OWNED);				\
 	KASSERT(SOLISTENING(sol),					\
 	    ("%s: %p not listening", __func__, (sol)));			\
 } while (0)
 
 /*
  * Macros for sockets and socket buffering.
  */
 
 /*
  * Flags to sblock().
  */
 #define	SBL_WAIT	0x00000001	/* Wait if not immediately available. */
 #define	SBL_NOINTR	0x00000002	/* Force non-interruptible sleep. */
 #define	SBL_VALID	(SBL_WAIT | SBL_NOINTR)
 
 /*
  * Do we need to notify the other side when I/O is possible?
  */
 #define	sb_notify(sb)	(((sb)->sb_flags & (SB_WAIT | SB_SEL | SB_ASYNC | \
     SB_UPCALL | SB_AIO | SB_KNOTE)) != 0)
 
 /* do we have to send all at once on a socket? */
 #define	sosendallatonce(so) \
     ((so)->so_proto->pr_flags & PR_ATOMIC)
 
 /* can we read something from so? */
 #define	soreadabledata(so) \
 	(sbavail(&(so)->so_rcv) >= (so)->so_rcv.sb_lowat ||  (so)->so_error)
 #define	soreadable(so) \
 	(soreadabledata(so) || ((so)->so_rcv.sb_state & SBS_CANTRCVMORE))
 
 /* can we write something to so? */
 #define	sowriteable(so) \
     ((sbspace(&(so)->so_snd) >= (so)->so_snd.sb_lowat && \
 	(((so)->so_state&SS_ISCONNECTED) || \
 	  ((so)->so_proto->pr_flags&PR_CONNREQUIRED)==0)) || \
      ((so)->so_snd.sb_state & SBS_CANTSENDMORE) || \
      (so)->so_error)
 
 /*
  * soref()/sorele() ref-count the socket structure.
  * soref() may be called without owning socket lock, but in that case a
  * caller must own something that holds socket, and so_count must be not 0.
  * Note that you must still explicitly close the socket, but the last ref
  * count will free the structure.
  */
 #define	soref(so)	refcount_acquire(&(so)->so_count)
 #define	sorele(so) do {							\
 	SOCK_LOCK_ASSERT(so);						\
 	if (refcount_release(&(so)->so_count))				\
 		sofree(so);						\
 	else								\
 		SOCK_UNLOCK(so);					\
 } while (0)
 
 /*
  * In sorwakeup() and sowwakeup(), acquire the socket buffer lock to
  * avoid a non-atomic test-and-wakeup.  However, sowakeup is
  * responsible for releasing the lock if it is called.  We unlock only
  * if we don't call into sowakeup.  If any code is introduced that
  * directly invokes the underlying sowakeup() primitives, it must
  * maintain the same semantics.
  */
 #define	sorwakeup_locked(so) do {					\
 	SOCKBUF_LOCK_ASSERT(&(so)->so_rcv);				\
 	if (sb_notify(&(so)->so_rcv))					\
 		sowakeup((so), &(so)->so_rcv);	 			\
 	else								\
 		SOCKBUF_UNLOCK(&(so)->so_rcv);				\
 } while (0)
 
 #define	sorwakeup(so) do {						\
 	SOCKBUF_LOCK(&(so)->so_rcv);					\
 	sorwakeup_locked(so);						\
 } while (0)
 
 #define	sowwakeup_locked(so) do {					\
 	SOCKBUF_LOCK_ASSERT(&(so)->so_snd);				\
 	if (sb_notify(&(so)->so_snd))					\
 		sowakeup((so), &(so)->so_snd); 				\
 	else								\
 		SOCKBUF_UNLOCK(&(so)->so_snd);				\
 } while (0)
 
 #define	sowwakeup(so) do {						\
 	SOCKBUF_LOCK(&(so)->so_snd);					\
 	sowwakeup_locked(so);						\
 } while (0)
 
 struct accept_filter {
 	char	accf_name[16];
 	int	(*accf_callback)
 		(struct socket *so, void *arg, int waitflag);
 	void *	(*accf_create)
 		(struct socket *so, char *arg);
 	void	(*accf_destroy)
 		(struct socket *so);
 	SLIST_ENTRY(accept_filter) accf_next;
 };
 
 #define	ACCEPT_FILTER_DEFINE(modname, filtname, cb, create, destroy, ver) \
 	static struct accept_filter modname##_filter = {		\
 		.accf_name = filtname,					\
 		.accf_callback = cb,					\
 		.accf_create = create,					\
 		.accf_destroy = destroy,				\
 	};								\
 	static moduledata_t modname##_mod = {				\
 		.name = __XSTRING(modname),				\
 		.evhand = accept_filt_generic_mod_event,		\
 		.priv = &modname##_filter,				\
 	};								\
 	DECLARE_MODULE(modname, modname##_mod, SI_SUB_DRIVERS,		\
 	    SI_ORDER_MIDDLE);						\
 	MODULE_VERSION(modname, ver)
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_ACCF);
 MALLOC_DECLARE(M_PCB);
 MALLOC_DECLARE(M_SONAME);
 #endif
 
 /*
  * Socket specific helper hook point identifiers
  * Do not leave holes in the sequence, hook registration is a loop.
  */
 #define HHOOK_SOCKET_OPT		0
 #define HHOOK_SOCKET_CREATE		1
 #define HHOOK_SOCKET_RCV 		2
 #define HHOOK_SOCKET_SND		3
 #define HHOOK_FILT_SOREAD		4
 #define HHOOK_FILT_SOWRITE		5
 #define HHOOK_SOCKET_CLOSE		6
 #define HHOOK_SOCKET_LAST		HHOOK_SOCKET_CLOSE
 
 struct socket_hhook_data {
 	struct socket	*so;
 	struct mbuf	*m;
 	void		*hctx;		/* hook point specific data*/
 	int		status;
 };
 
 extern int	maxsockets;
 extern u_long	sb_max;
 extern so_gen_t so_gencnt;
 
 struct file;
 struct filecaps;
 struct filedesc;
 struct mbuf;
 struct sockaddr;
 struct ucred;
 struct uio;
 
 /* 'which' values for socket upcalls. */
 #define	SO_RCV		1
 #define	SO_SND		2
 
 /* Return values for socket upcalls. */
 #define	SU_OK		0
 #define	SU_ISCONNECTED	1
 
 /*
  * From uipc_socket and friends
  */
 int	getsockaddr(struct sockaddr **namp, const struct sockaddr *uaddr,
 	    size_t len);
 int	getsock_cap(struct thread *td, int fd, cap_rights_t *rightsp,
 	    struct file **fpp, u_int *fflagp, struct filecaps *havecaps);
 void	soabort(struct socket *so);
 int	soaccept(struct socket *so, struct sockaddr **nam);
 void	soaio_enqueue(struct task *task);
 void	soaio_rcv(void *context, int pending);
 void	soaio_snd(void *context, int pending);
 int	socheckuid(struct socket *so, uid_t uid);
-int	sogetsockaddr(struct socket *so, struct sockaddr **nam);
 int	sobind(struct socket *so, struct sockaddr *nam, struct thread *td);
 int	sobindat(int fd, struct socket *so, struct sockaddr *nam,
 	    struct thread *td);
 int	soclose(struct socket *so);
 int	soconnect(struct socket *so, struct sockaddr *nam, struct thread *td);
 int	soconnectat(int fd, struct socket *so, struct sockaddr *nam,
 	    struct thread *td);
 int	soconnect2(struct socket *so1, struct socket *so2);
 int	socreate(int dom, struct socket **aso, int type, int proto,
 	    struct ucred *cred, struct thread *td);
 int	sodisconnect(struct socket *so);
 void	sodtor_set(struct socket *, so_dtor_t *);
 struct	sockaddr *sodupsockaddr(const struct sockaddr *sa, int mflags);
 void	sofree(struct socket *so);
 void	sohasoutofband(struct socket *so);
 int	solisten(struct socket *so, int backlog, struct thread *td);
 void	solisten_proto(struct socket *so, int backlog);
 int	solisten_proto_check(struct socket *so);
 int	solisten_dequeue(struct socket *, struct socket **, int);
 struct socket *
 	sonewconn(struct socket *head, int connstatus);
 struct socket *
 	sopeeloff(struct socket *);
 int	sopoll(struct socket *so, int events, struct ucred *active_cred,
 	    struct thread *td);
 int	sopoll_generic(struct socket *so, int events,
 	    struct ucred *active_cred, struct thread *td);
 int	soreceive(struct socket *so, struct sockaddr **paddr, struct uio *uio,
 	    struct mbuf **mp0, struct mbuf **controlp, int *flagsp);
 int	soreceive_stream(struct socket *so, struct sockaddr **paddr,
 	    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
 	    int *flagsp);
 int	soreceive_dgram(struct socket *so, struct sockaddr **paddr,
 	    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
 	    int *flagsp);
 int	soreceive_generic(struct socket *so, struct sockaddr **paddr,
 	    struct uio *uio, struct mbuf **mp0, struct mbuf **controlp,
 	    int *flagsp);
 int	soreserve(struct socket *so, u_long sndcc, u_long rcvcc);
 void	sorflush(struct socket *so);
 int	sosend(struct socket *so, struct sockaddr *addr, struct uio *uio,
 	    struct mbuf *top, struct mbuf *control, int flags,
 	    struct thread *td);
 int	sosend_dgram(struct socket *so, struct sockaddr *addr,
 	    struct uio *uio, struct mbuf *top, struct mbuf *control,
 	    int flags, struct thread *td);
 int	sosend_generic(struct socket *so, struct sockaddr *addr,
 	    struct uio *uio, struct mbuf *top, struct mbuf *control,
 	    int flags, struct thread *td);
 int	soshutdown(struct socket *so, int how);
 void	soupcall_clear(struct socket *, int);
 void	soupcall_set(struct socket *, int, so_upcall_t, void *);
 void	solisten_upcall_set(struct socket *, so_upcall_t, void *);
 void	sowakeup(struct socket *so, struct sockbuf *sb);
 void	sowakeup_aio(struct socket *so, struct sockbuf *sb);
 void	solisten_wakeup(struct socket *);
 int	selsocket(struct socket *so, int events, struct timeval *tv,
 	    struct thread *td);
 void	soisconnected(struct socket *so);
 void	soisconnecting(struct socket *so);
 void	soisdisconnected(struct socket *so);
 void	soisdisconnecting(struct socket *so);
 void	socantrcvmore(struct socket *so);
 void	socantrcvmore_locked(struct socket *so);
 void	socantsendmore(struct socket *so);
 void	socantsendmore_locked(struct socket *so);
 
 /*
  * Accept filter functions (duh).
  */
 int	accept_filt_add(struct accept_filter *filt);
 int	accept_filt_del(char *name);
 struct	accept_filter *accept_filt_get(char *name);
 #ifdef ACCEPT_FILTER_MOD
 #ifdef SYSCTL_DECL
 SYSCTL_DECL(_net_inet_accf);
 #endif
 int	accept_filt_generic_mod_event(module_t mod, int event, void *data);
 #endif
 
 #endif /* _KERNEL */
 
 /*
  * Structure to export socket from kernel to utilities, via sysctl(3).
  */
 struct xsocket {
 	ksize_t		xso_len;	/* length of this structure */
 	kvaddr_t	xso_so;		/* kernel address of struct socket */
 	kvaddr_t	so_pcb;		/* kernel address of struct inpcb */
 	uint64_t	so_oobmark;
 	int64_t		so_spare64[8];
 	int32_t		xso_protocol;
 	int32_t		xso_family;
 	uint32_t	so_qlen;
 	uint32_t	so_incqlen;
 	uint32_t	so_qlimit;
 	pid_t		so_pgid;
 	uid_t		so_uid;
 	int32_t		so_spare32[8];
 	int16_t		so_type;
 	int16_t		so_options;
 	int16_t		so_linger;
 	int16_t		so_state;
 	int16_t		so_timeo;
 	uint16_t	so_error;
 	struct xsockbuf {
 		uint32_t	sb_cc;
 		uint32_t	sb_hiwat;
 		uint32_t	sb_mbcnt;
 		uint32_t	sb_mcnt;
 		uint32_t	sb_ccnt;
 		uint32_t	sb_mbmax;
 		int32_t		sb_lowat;
 		int32_t		sb_timeo;
 		int16_t		sb_flags;
 	} so_rcv, so_snd;
 };
 
 #ifdef _KERNEL
 void	sotoxsocket(struct socket *so, struct xsocket *xso);
 void	sbtoxsockbuf(struct sockbuf *sb, struct xsockbuf *xsb);
 #endif
 
 /*
  * Socket buffer state bits.  Exported via libprocstat(3).
  */
 #define	SBS_CANTSENDMORE	0x0010	/* can't send more data to peer */
 #define	SBS_CANTRCVMORE		0x0020	/* can't receive more data from peer */
 #define	SBS_RCVATMARK		0x0040	/* at mark on input */
 
 #endif /* !_SYS_SOCKETVAR_H_ */
diff --git a/tests/sys/netinet/Makefile b/tests/sys/netinet/Makefile
index 95cd044f7031..56a1cf877135 100644
--- a/tests/sys/netinet/Makefile
+++ b/tests/sys/netinet/Makefile
@@ -1,32 +1,24 @@
 # $FreeBSD$
 
 PACKAGE=	tests
 
 TESTSDIR=	${TESTSBASE}/sys/netinet
 BINDIR=		${TESTSDIR}
 
 ATF_TESTS_C=	ip_reass_test \
 		so_reuseport_lb_test \
 		socket_afinet \
 		tcp_connect_port_test
 
-ATF_TESTS_SH=	carp fibs \
-		fibs_test \
-		redirect \
-		divert \
-		forward \
-		output \
-		lpm \
-		arp \
-		if_wg_test
+ATF_TESTS_SH=	carp fibs fibs_test redirect divert forward output lpm arp
 TEST_METADATA.output+=	required_programs="python"
 
 PROGS=	udp_dontroute tcp_user_cookie
 
 ${PACKAGE}FILES+=		redirect.py
 
 ${PACKAGE}FILESMODE_redirect.py=0555
 
 MAN=
 
 .include <bsd.test.mk>
diff --git a/tests/sys/netinet/if_wg_test.sh b/tests/sys/netinet/if_wg_test.sh
deleted file mode 100644
index b0ab70108cf4..000000000000
--- a/tests/sys/netinet/if_wg_test.sh
+++ /dev/null
@@ -1,188 +0,0 @@
-# $FreeBSD$
-#
-# SPDX-License-Identifier: BSD-2-Clause-FreeBSD
-#
-# Copyright (c) 2021 The FreeBSD Foundation
-#
-# This software was developed by Mark Johnston under sponsorship
-# from the FreeBSD Foundation.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# 1. Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-# SUCH DAMAGE.
-
-. $(atf_get_srcdir)/../common/vnet.subr
-
-atf_test_case "wg_basic" "cleanup"
-wg_basic_head()
-{
-	atf_set descr 'Create a wg(4) tunnel over an epair and pass traffic between jails'
-	atf_set require.user root
-}
-
-wg_basic_body()
-{
-	local epair pri1 pri2 pub1 pub2 wg1 wg2
-        local endpoint1 endpoint2 tunnel1 tunnel2
-
-	kldload -n if_wg
-
-	pri1=$(openssl rand -base64 32)
-	pri2=$(openssl rand -base64 32)
-
-	endpoint1=192.168.2.1
-	endpoint2=192.168.2.2
-	tunnel1=169.254.0.1
-	tunnel2=169.254.0.2
-
-	epair=$(vnet_mkepair)
-
-	vnet_init
-
-	vnet_mkjail wgtest1 ${epair}a
-	vnet_mkjail wgtest2 ${epair}b
-
-	# Workaround for PR 254212.
-	jexec wgtest1 ifconfig lo0 up
-	jexec wgtest2 ifconfig lo0 up
-
-	jexec wgtest1 ifconfig ${epair}a $endpoint1 up
-	jexec wgtest2 ifconfig ${epair}b $endpoint2 up
-
-	wg1=$(jexec wgtest1 ifconfig wg create listen-port 12345 private-key "$pri1")
-	pub1=$(jexec wgtest1 ifconfig $wg1 | awk '/public-key:/ {print $2}')
-	wg2=$(jexec wgtest2 ifconfig wg create listen-port 12345 private-key "$pri2")
-	pub2=$(jexec wgtest2 ifconfig $wg2 | awk '/public-key:/ {print $2}')
-
-	atf_check -s exit:0 -o ignore \
-	    jexec wgtest1 ifconfig $wg1 peer public-key "$pub2" \
-	    endpoint ${endpoint2}:12345 allowed-ips ${tunnel2}/32
-	atf_check -s exit:0 \
-	    jexec wgtest1 ifconfig $wg1 inet $tunnel1 up
-
-	atf_check -s exit:0 -o ignore \
-	    jexec wgtest2 ifconfig $wg2 peer public-key "$pub1" \
-	    endpoint ${endpoint1}:12345 allowed-ips ${tunnel1}/32
-	atf_check -s exit:0 \
-	    jexec wgtest2 ifconfig $wg2 inet $tunnel2 up
-
-	# Generous timeout since the handshake takes some time.
-	atf_check -s exit:0 -o ignore jexec wgtest1 ping -o -t 5 -i 0.25 $tunnel2
-	atf_check -s exit:0 -o ignore jexec wgtest2 ping -o -t 5 -i 0.25 $tunnel1
-}
-
-wg_basic_cleanup()
-{
-	vnet_cleanup
-}
-
-# The kernel is expecteld to silently ignore any attempt to add a peer with a
-# public key identical to the host's.
-atf_test_case "wg_key_peerdev_shared" "cleanup"
-wg_key_peerdev_shared_head()
-{
-	atf_set descr 'Create a wg(4) interface with a shared pubkey between device and a peer'
-	atf_set require.user root
-}
-
-wg_key_peerdev_shared_body()
-{
-	local epair pri1 pub1 wg1
-        local endpoint1 tunnel1
-
-	kldload -n if_wg
-
-	pri1=$(openssl rand -base64 32)
-
-	endpoint1=192.168.2.1
-	tunnel1=169.254.0.1
-
-	vnet_mkjail wgtest1
-
-	wg1=$(jexec wgtest1 ifconfig wg create listen-port 12345 private-key "$pri1")
-	pub1=$(jexec wgtest1 ifconfig $wg1 | awk '/public-key:/ {print $2}')
-
-	atf_check -s exit:0 \
-	    jexec wgtest1 ifconfig ${wg1} peer public-key "${pub1}" \
-	    allowed-ips "${tunnel1}/32"
-
-	atf_check -o empty jexec wgtest1 ifconfig ${wg1} peers
-}
-
-wg_key_peerdev_shared_cleanup()
-{
-	vnet_cleanup
-}
-
-# When a wg(8) interface has a private key reassigned that corresponds to the
-# public key already on a peer, the kernel is expected to deconfigure the peer
-# to resolve the conflict.
-atf_test_case "wg_key_peerdev_makeshared" "cleanup"
-wg_key_peerdev_makeshared_head()
-{
-	atf_set descr 'Create a wg(4) interface and assign peer key to device'
-	atf_set require.progs wg
-}
-
-wg_key_peerdev_makeshared_body()
-{
-	local epair pri1 pub1 pri2 wg1 wg2
-        local endpoint1 tunnel1
-
-	kldload -n if_wg
-
-	pri1=$(openssl rand -base64 32)
-	pri2=$(openssl rand -base64 32)
-
-	endpoint1=192.168.2.1
-	tunnel1=169.254.0.1
-
-	vnet_mkjail wgtest1
-
-	wg1=$(jexec wgtest1 ifconfig wg create listen-port 12345 private-key "$pri1")
-	pub1=$(jexec wgtest1 ifconfig $wg1 | awk '/public-key:/ {print $2}')
-
-	wg2=$(jexec wgtest1 ifconfig wg create listen-port 12345 private-key "$pri2")
-
-	atf_check -s exit:0 -o ignore \
-	    jexec wgtest1 ifconfig ${wg2} peer public-key "${pub1}" \
-	    allowed-ips "${tunnel1}/32"
-
-	atf_check -o not-empty jexec wgtest1 ifconfig ${wg2} peers
-
-	jexec wgtest1 sh -c "echo '${pri1}' > pri1"
-
-	atf_check -s exit:0 \
-	   jexec wgtest1 wg set ${wg2} private-key pri1
-
-	atf_check -o empty jexec wgtest1 ifconfig ${wg2} peers
-}
-
-wg_key_peerdev_makeshared_cleanup()
-{
-	vnet_cleanup
-}
-
-atf_init_test_cases()
-{
-	atf_add_test_case "wg_basic"
-	atf_add_test_case "wg_key_peerdev_shared"
-	atf_add_test_case "wg_key_peerdev_makeshared"
-}