Index: head/include/unistd.h
===================================================================
--- head/include/unistd.h	(revision 345981)
+++ head/include/unistd.h	(revision 345982)
@@ -1,596 +1,597 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1991, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)unistd.h	8.12 (Berkeley) 4/27/95
  * $FreeBSD$
  */
 
 #ifndef _UNISTD_H_
 #define	_UNISTD_H_
 
 #include <sys/cdefs.h>
 #include <sys/types.h>			/* XXX adds too much pollution. */
 #include <sys/unistd.h>
 #include <sys/_null.h>
 #include <sys/_types.h>
 
 #ifndef _GID_T_DECLARED
 typedef	__gid_t		gid_t;
 #define	_GID_T_DECLARED
 #endif
 
 #ifndef _OFF_T_DECLARED
 typedef	__off_t		off_t;
 #define	_OFF_T_DECLARED
 #endif
 
 #ifndef _PID_T_DECLARED
 typedef	__pid_t		pid_t;
 #define	_PID_T_DECLARED
 #endif
 
 #ifndef _SIZE_T_DECLARED
 typedef	__size_t	size_t;
 #define	_SIZE_T_DECLARED
 #endif
 
 #ifndef _SSIZE_T_DECLARED
 typedef	__ssize_t	ssize_t;
 #define	_SSIZE_T_DECLARED
 #endif
 
 #ifndef _UID_T_DECLARED
 typedef	__uid_t		uid_t;
 #define	_UID_T_DECLARED
 #endif
 
 #ifndef _USECONDS_T_DECLARED
 typedef	__useconds_t	useconds_t;
 #define	_USECONDS_T_DECLARED
 #endif
 
 #define	STDIN_FILENO	0	/* standard input file descriptor */
 #define	STDOUT_FILENO	1	/* standard output file descriptor */
 #define	STDERR_FILENO	2	/* standard error file descriptor */
 
 #if __XSI_VISIBLE || __POSIX_VISIBLE >= 200112
 #define	F_ULOCK		0	/* unlock locked section */
 #define	F_LOCK		1	/* lock a section for exclusive use */
 #define	F_TLOCK		2	/* test and lock a section for exclusive use */
 #define	F_TEST		3	/* test a section for locks by other procs */
 #endif
 
 /*
  * POSIX options and option groups we unconditionally do or don't
  * implement.  This list includes those options which are exclusively
  * implemented (or not) in user mode.  Please keep this list in
  * alphabetical order.
  *
  * Anything which is defined as zero below **must** have an
  * implementation for the corresponding sysconf() which is able to
  * determine conclusively whether or not the feature is supported.
  * Anything which is defined as other than -1 below **must** have
  * complete headers, types, and function declarations as specified by
  * the POSIX standard; however, if the relevant sysconf() function
  * returns -1, the functions may be stubbed out.
  */
 #define	_POSIX_BARRIERS			200112L
 #define	_POSIX_CPUTIME			200112L
 #define	_POSIX_READER_WRITER_LOCKS	200112L
 #define	_POSIX_REGEXP			1
 #define	_POSIX_SHELL			1
 #define	_POSIX_SPAWN			200112L
 #define	_POSIX_SPIN_LOCKS		200112L
 #define	_POSIX_THREAD_ATTR_STACKADDR	200112L
 #define	_POSIX_THREAD_ATTR_STACKSIZE	200112L
 #define	_POSIX_THREAD_CPUTIME		200112L
 #define	_POSIX_THREAD_PRIO_INHERIT	200112L
 #define	_POSIX_THREAD_PRIO_PROTECT	200112L
 #define	_POSIX_THREAD_PRIORITY_SCHEDULING 200112L
 #define	_POSIX_THREAD_PROCESS_SHARED	200112L
 #define	_POSIX_THREAD_SAFE_FUNCTIONS	-1
 #define	_POSIX_THREAD_SPORADIC_SERVER	-1
 #define	_POSIX_THREADS			200112L
 #define	_POSIX_TRACE			-1
 #define	_POSIX_TRACE_EVENT_FILTER	-1
 #define	_POSIX_TRACE_INHERIT		-1
 #define	_POSIX_TRACE_LOG		-1
 #define	_POSIX2_C_BIND			200112L	/* mandatory */
 #define	_POSIX2_C_DEV			-1 /* need c99 utility */
 #define	_POSIX2_CHAR_TERM		1
 #define	_POSIX2_FORT_DEV		-1 /* need fort77 utility */
 #define	_POSIX2_FORT_RUN		200112L
 #define	_POSIX2_LOCALEDEF		-1
 #define	_POSIX2_PBS			-1
 #define	_POSIX2_PBS_ACCOUNTING		-1
 #define	_POSIX2_PBS_CHECKPOINT		-1
 #define	_POSIX2_PBS_LOCATE		-1
 #define	_POSIX2_PBS_MESSAGE		-1
 #define	_POSIX2_PBS_TRACK		-1
 #define	_POSIX2_SW_DEV			-1 /* XXX ??? */
 #define	_POSIX2_UPE			200112L
 #define	_V6_ILP32_OFF32			-1
 #define	_V6_ILP32_OFFBIG		0
 #define	_V6_LP64_OFF64			0
 #define	_V6_LPBIG_OFFBIG		-1
 
 #if __XSI_VISIBLE
 #define	_XOPEN_CRYPT			-1 /* XXX ??? */
 #define	_XOPEN_ENH_I18N			-1 /* mandatory in XSI */
 #define	_XOPEN_LEGACY			-1
 #define	_XOPEN_REALTIME			-1
 #define	_XOPEN_REALTIME_THREADS		-1
 #define	_XOPEN_UNIX			-1
 #endif
 
 /* Define the POSIX.2 version we target for compliance. */
 #define	_POSIX2_VERSION		199212L
 
 /*
  * POSIX-style system configuration variable accessors (for the
  * sysconf function).  The kernel does not directly implement the
  * sysconf() interface; rather, a C library stub translates references
  * to sysconf() into calls to sysctl() using a giant switch statement.
  * Those that are marked `user' are implemented entirely in the C
  * library and never query the kernel.  pathconf() is implemented
  * directly by the kernel so those are not defined here.
  */
 #define	_SC_ARG_MAX		 1
 #define	_SC_CHILD_MAX		 2
 #define	_SC_CLK_TCK		 3
 #define	_SC_NGROUPS_MAX		 4
 #define	_SC_OPEN_MAX		 5
 #define	_SC_JOB_CONTROL		 6
 #define	_SC_SAVED_IDS		 7
 #define	_SC_VERSION		 8
 #define	_SC_BC_BASE_MAX		 9 /* user */
 #define	_SC_BC_DIM_MAX		10 /* user */
 #define	_SC_BC_SCALE_MAX	11 /* user */
 #define	_SC_BC_STRING_MAX	12 /* user */
 #define	_SC_COLL_WEIGHTS_MAX	13 /* user */
 #define	_SC_EXPR_NEST_MAX	14 /* user */
 #define	_SC_LINE_MAX		15 /* user */
 #define	_SC_RE_DUP_MAX		16 /* user */
 #define	_SC_2_VERSION		17 /* user */
 #define	_SC_2_C_BIND		18 /* user */
 #define	_SC_2_C_DEV		19 /* user */
 #define	_SC_2_CHAR_TERM		20 /* user */
 #define	_SC_2_FORT_DEV		21 /* user */
 #define	_SC_2_FORT_RUN		22 /* user */
 #define	_SC_2_LOCALEDEF		23 /* user */
 #define	_SC_2_SW_DEV		24 /* user */
 #define	_SC_2_UPE		25 /* user */
 #define	_SC_STREAM_MAX		26 /* user */
 #define	_SC_TZNAME_MAX		27 /* user */
 
 #if __POSIX_VISIBLE >= 199309
 #define	_SC_ASYNCHRONOUS_IO	28
 #define	_SC_MAPPED_FILES	29
 #define	_SC_MEMLOCK		30
 #define	_SC_MEMLOCK_RANGE	31
 #define	_SC_MEMORY_PROTECTION	32
 #define	_SC_MESSAGE_PASSING	33
 #define	_SC_PRIORITIZED_IO	34
 #define	_SC_PRIORITY_SCHEDULING	35
 #define	_SC_REALTIME_SIGNALS	36
 #define	_SC_SEMAPHORES		37
 #define	_SC_FSYNC		38
 #define	_SC_SHARED_MEMORY_OBJECTS 39
 #define	_SC_SYNCHRONIZED_IO	40
 #define	_SC_TIMERS		41
 #define	_SC_AIO_LISTIO_MAX	42
 #define	_SC_AIO_MAX		43
 #define	_SC_AIO_PRIO_DELTA_MAX	44
 #define	_SC_DELAYTIMER_MAX	45
 #define	_SC_MQ_OPEN_MAX		46
 #define	_SC_PAGESIZE		47
 #define	_SC_RTSIG_MAX		48
 #define	_SC_SEM_NSEMS_MAX	49
 #define	_SC_SEM_VALUE_MAX	50
 #define	_SC_SIGQUEUE_MAX	51
 #define	_SC_TIMER_MAX		52
 #endif
 
 #if __POSIX_VISIBLE >= 200112
 #define	_SC_2_PBS		59 /* user */
 #define	_SC_2_PBS_ACCOUNTING	60 /* user */
 #define	_SC_2_PBS_CHECKPOINT	61 /* user */
 #define	_SC_2_PBS_LOCATE	62 /* user */
 #define	_SC_2_PBS_MESSAGE	63 /* user */
 #define	_SC_2_PBS_TRACK		64 /* user */
 #define	_SC_ADVISORY_INFO	65
 #define	_SC_BARRIERS		66 /* user */
 #define	_SC_CLOCK_SELECTION	67
 #define	_SC_CPUTIME		68
 #define	_SC_FILE_LOCKING	69
 #define	_SC_GETGR_R_SIZE_MAX	70 /* user */
 #define	_SC_GETPW_R_SIZE_MAX	71 /* user */
 #define	_SC_HOST_NAME_MAX	72
 #define	_SC_LOGIN_NAME_MAX	73
 #define	_SC_MONOTONIC_CLOCK	74
 #define	_SC_MQ_PRIO_MAX		75
 #define	_SC_READER_WRITER_LOCKS	76 /* user */
 #define	_SC_REGEXP		77 /* user */
 #define	_SC_SHELL		78 /* user */
 #define	_SC_SPAWN		79 /* user */
 #define	_SC_SPIN_LOCKS		80 /* user */
 #define	_SC_SPORADIC_SERVER	81
 #define	_SC_THREAD_ATTR_STACKADDR 82 /* user */
 #define	_SC_THREAD_ATTR_STACKSIZE 83 /* user */
 #define	_SC_THREAD_CPUTIME	84 /* user */
 #define	_SC_THREAD_DESTRUCTOR_ITERATIONS 85 /* user */
 #define	_SC_THREAD_KEYS_MAX	86 /* user */
 #define	_SC_THREAD_PRIO_INHERIT	87 /* user */
 #define	_SC_THREAD_PRIO_PROTECT	88 /* user */
 #define	_SC_THREAD_PRIORITY_SCHEDULING 89 /* user */
 #define	_SC_THREAD_PROCESS_SHARED 90 /* user */
 #define	_SC_THREAD_SAFE_FUNCTIONS 91 /* user */
 #define	_SC_THREAD_SPORADIC_SERVER 92 /* user */
 #define	_SC_THREAD_STACK_MIN	93 /* user */
 #define	_SC_THREAD_THREADS_MAX	94 /* user */
 #define	_SC_TIMEOUTS		95 /* user */
 #define	_SC_THREADS		96 /* user */
 #define	_SC_TRACE		97 /* user */
 #define	_SC_TRACE_EVENT_FILTER	98 /* user */
 #define	_SC_TRACE_INHERIT	99 /* user */
 #define	_SC_TRACE_LOG		100 /* user */
 #define	_SC_TTY_NAME_MAX	101 /* user */
 #define	_SC_TYPED_MEMORY_OBJECTS 102
 #define	_SC_V6_ILP32_OFF32	103 /* user */
 #define	_SC_V6_ILP32_OFFBIG	104 /* user */
 #define	_SC_V6_LP64_OFF64	105 /* user */
 #define	_SC_V6_LPBIG_OFFBIG	106 /* user */
 #define	_SC_IPV6		118
 #define	_SC_RAW_SOCKETS		119
 #define	_SC_SYMLOOP_MAX		120
 #endif
 
 #if __XSI_VISIBLE
 #define	_SC_ATEXIT_MAX		107 /* user */
 #define	_SC_IOV_MAX		56
 #define	_SC_PAGE_SIZE		_SC_PAGESIZE
 #define	_SC_XOPEN_CRYPT		108 /* user */
 #define	_SC_XOPEN_ENH_I18N	109 /* user */
 #define	_SC_XOPEN_LEGACY	110 /* user */
 #define	_SC_XOPEN_REALTIME	111
 #define	_SC_XOPEN_REALTIME_THREADS 112
 #define	_SC_XOPEN_SHM		113
 #define	_SC_XOPEN_STREAMS	114
 #define	_SC_XOPEN_UNIX		115
 #define	_SC_XOPEN_VERSION	116
 #define	_SC_XOPEN_XCU_VERSION	117 /* user */
 #endif
 
 #if __BSD_VISIBLE
 #define	_SC_NPROCESSORS_CONF	57
 #define	_SC_NPROCESSORS_ONLN	58
 #define	_SC_CPUSET_SIZE		122
 #endif
 
 /* Extensions found in Solaris and Linux. */
 #define	_SC_PHYS_PAGES		121
 
 /* Keys for the confstr(3) function. */
 #if __POSIX_VISIBLE >= 199209
 #define	_CS_PATH		1	/* default value of PATH */
 #endif
 
 #if __POSIX_VISIBLE >= 200112
 #define	_CS_POSIX_V6_ILP32_OFF32_CFLAGS		2
 #define	_CS_POSIX_V6_ILP32_OFF32_LDFLAGS	3
 #define	_CS_POSIX_V6_ILP32_OFF32_LIBS		4
 #define	_CS_POSIX_V6_ILP32_OFFBIG_CFLAGS	5
 #define	_CS_POSIX_V6_ILP32_OFFBIG_LDFLAGS	6
 #define	_CS_POSIX_V6_ILP32_OFFBIG_LIBS		7
 #define	_CS_POSIX_V6_LP64_OFF64_CFLAGS		8
 #define	_CS_POSIX_V6_LP64_OFF64_LDFLAGS		9
 #define	_CS_POSIX_V6_LP64_OFF64_LIBS		10
 #define	_CS_POSIX_V6_LPBIG_OFFBIG_CFLAGS	11
 #define	_CS_POSIX_V6_LPBIG_OFFBIG_LDFLAGS	12
 #define	_CS_POSIX_V6_LPBIG_OFFBIG_LIBS		13
 #define	_CS_POSIX_V6_WIDTH_RESTRICTED_ENVS	14
 #endif
 
 __BEGIN_DECLS
 /* 1003.1-1990 */
 void	 _exit(int) __dead2;
 int	 access(const char *, int);
 unsigned int	 alarm(unsigned int);
 int	 chdir(const char *);
 int	 chown(const char *, uid_t, gid_t);
 int	 close(int);
 void	 closefrom(int);
 int	 dup(int);
 int	 dup2(int, int);
 int	 execl(const char *, const char *, ...) __null_sentinel;
 int	 execle(const char *, const char *, ...);
 int	 execlp(const char *, const char *, ...) __null_sentinel;
 int	 execv(const char *, char * const *);
 int	 execve(const char *, char * const *, char * const *);
 int	 execvp(const char *, char * const *);
 pid_t	 fork(void);
 long	 fpathconf(int, int);
 char	*getcwd(char *, size_t);
 gid_t	 getegid(void);
 uid_t	 geteuid(void);
 gid_t	 getgid(void);
 int	 getgroups(int, gid_t []);
 char	*getlogin(void);
 pid_t	 getpgrp(void);
 pid_t	 getpid(void);
 pid_t	 getppid(void);
 uid_t	 getuid(void);
 int	 isatty(int);
 int	 link(const char *, const char *);
 #ifndef _LSEEK_DECLARED
 #define	_LSEEK_DECLARED
 off_t	 lseek(int, off_t, int);
 #endif
 long	 pathconf(const char *, int);
 int	 pause(void);
 int	 pipe(int *);
 ssize_t	 read(int, void *, size_t);
 int	 rmdir(const char *);
 int	 setgid(gid_t);
 int	 setpgid(pid_t, pid_t);
 pid_t	 setsid(void);
 int	 setuid(uid_t);
 unsigned int	 sleep(unsigned int);
 long	 sysconf(int);
 pid_t	 tcgetpgrp(int);
 int	 tcsetpgrp(int, pid_t);
 char	*ttyname(int);
 int	ttyname_r(int, char *, size_t);
 int	 unlink(const char *);
 ssize_t	 write(int, const void *, size_t);
 
 /* 1003.2-1992 */
 #if __POSIX_VISIBLE >= 199209 || __XSI_VISIBLE
 size_t	 confstr(int, char *, size_t);
 #ifndef _GETOPT_DECLARED
 #define	_GETOPT_DECLARED
 int	 getopt(int, char * const [], const char *);
 
 extern char *optarg;			/* getopt(3) external variables */
 extern int optind, opterr, optopt;
 #endif /* _GETOPT_DECLARED */
 #endif
 
 /* ISO/IEC 9945-1: 1996 */
 #if __POSIX_VISIBLE >= 199506 || __XSI_VISIBLE
 int	 fsync(int);
 int	 fdatasync(int);
 
 /*
  * ftruncate() was in the POSIX Realtime Extension (it's used for shared
  * memory), but truncate() was not.
  */
 #ifndef _FTRUNCATE_DECLARED
 #define	_FTRUNCATE_DECLARED
 int	 ftruncate(int, off_t);
 #endif
 #endif
 
 #if __POSIX_VISIBLE >= 199506
 int	 getlogin_r(char *, int);
 #endif
 
 /* 1003.1-2001 */
 #if __POSIX_VISIBLE >= 200112 || __XSI_VISIBLE
 int	 fchown(int, uid_t, gid_t);
 ssize_t	 readlink(const char * __restrict, char * __restrict, size_t);
 #endif
 #if __POSIX_VISIBLE >= 200112
 int	 gethostname(char *, size_t);
 int	 setegid(gid_t);
 int	 seteuid(uid_t);
 #endif
 
 /* 1003.1-2008 */
 #if __POSIX_VISIBLE >= 200809 || __XSI_VISIBLE
 int	 getsid(pid_t _pid);
 int	 fchdir(int);
 int	 getpgid(pid_t _pid);
 int	 lchown(const char *, uid_t, gid_t);
 ssize_t	 pread(int, void *, size_t, off_t);
 ssize_t	 pwrite(int, const void *, size_t, off_t);
 
 /* See comment at ftruncate() above. */
 #ifndef _TRUNCATE_DECLARED
 #define	_TRUNCATE_DECLARED
 int	 truncate(const char *, off_t);
 #endif
 #endif /* __POSIX_VISIBLE >= 200809 || __XSI_VISIBLE */
 
 #if __POSIX_VISIBLE >= 200809
 int	faccessat(int, const char *, int, int);
 int	fchownat(int, const char *, uid_t, gid_t, int);
 int	fexecve(int, char *const [], char *const []);
 int	linkat(int, const char *, int, const char *, int);
 ssize_t	readlinkat(int, const char * __restrict, char * __restrict, size_t);
 int	symlinkat(const char *, int, const char *);
 int	unlinkat(int, const char *, int);
 #endif /* __POSIX_VISIBLE >= 200809 */
 
 /*
  * symlink() was originally in POSIX.1a, which was withdrawn after
  * being overtaken by events (1003.1-2001).  It was in XPG4.2, and of
  * course has been in BSD since 4.2.
  */
 #if __POSIX_VISIBLE >= 200112 || __XSI_VISIBLE >= 402
 int	 symlink(const char * __restrict, const char * __restrict);
 #endif
 
 /* X/Open System Interfaces */
 #if __XSI_VISIBLE
 char	*crypt(const char *, const char *);
 long	 gethostid(void);
 int	 lockf(int, int, off_t);
 int	 nice(int);
 int	 setregid(gid_t, gid_t);
 int	 setreuid(uid_t, uid_t);
 
 #ifndef _SWAB_DECLARED
 #define _SWAB_DECLARED
 void	 swab(const void * __restrict, void * __restrict, ssize_t);
 #endif /* _SWAB_DECLARED */
 
 void	 sync(void);
 
 #endif /* __XSI_VISIBLE */
 
 #if (__XSI_VISIBLE && __XSI_VISIBLE <= 500) || __BSD_VISIBLE
 int	 brk(const void *);
 int	 chroot(const char *);
 int	 getdtablesize(void);
 int	 getpagesize(void) __pure2;
 char	*getpass(const char *);
 void	*sbrk(intptr_t);
 #endif
 
 #if (__XSI_VISIBLE && __XSI_VISIBLE <= 600) || __BSD_VISIBLE
 char	*getwd(char *);			/* obsoleted by getcwd() */
 useconds_t
 	 ualarm(useconds_t, useconds_t);
 int	 usleep(useconds_t);
 pid_t	 vfork(void) __returns_twice;
 #endif
 
 #if __BSD_VISIBLE
 struct timeval;				/* select(2) */
 
 struct crypt_data {
 	int	initialized;	/* For compatibility with glibc. */
 	char	__buf[256];	/* Buffer returned by crypt_r(). */
 };
 
 int	 acct(const char *);
 int	 async_daemon(void);
 int	 check_utility_compat(const char *);
 const char *
 	 crypt_get_format(void);
 char	*crypt_r(const char *, const char *, struct crypt_data *);
 int	 crypt_set_format(const char *);
 int	 dup3(int, int, int);
 int	 eaccess(const char *, int);
 void	 endusershell(void);
 int	 exect(const char *, char * const *, char * const *);
 int	 execvP(const char *, const char *, char * const *);
 int	 feature_present(const char *);
 char	*fflagstostr(u_long);
 int	 getdomainname(char *, int);
 int	 getentropy(void *, size_t);
 int	 getgrouplist(const char *, gid_t, gid_t *, int *);
 int	 getloginclass(char *, size_t);
 mode_t	 getmode(const void *, mode_t);
 int	 getosreldate(void);
 int	 getpeereid(int, uid_t *, gid_t *);
 int	 getresgid(gid_t *, gid_t *, gid_t *);
 int	 getresuid(uid_t *, uid_t *, uid_t *);
 char	*getusershell(void);
 int	 initgroups(const char *, gid_t);
 int	 iruserok(unsigned long, int, const char *, const char *);
 int	 iruserok_sa(const void *, int, int, const char *, const char *);
 int	 issetugid(void);
 void	__FreeBSD_libc_enter_restricted_mode(void);
 long	 lpathconf(const char *, int);
 #ifndef _MKDTEMP_DECLARED
 char	*mkdtemp(char *);
 #define	_MKDTEMP_DECLARED
 #endif
 #ifndef	_MKNOD_DECLARED
 int	 mknod(const char *, mode_t, dev_t);
 #define	_MKNOD_DECLARED
 #endif
 #ifndef _MKSTEMP_DECLARED
 int	 mkstemp(char *);
 #define	_MKSTEMP_DECLARED
 #endif
 int	 mkstemps(char *, int);
 #ifndef _MKTEMP_DECLARED
 char	*mktemp(char *);
 #define	_MKTEMP_DECLARED
 #endif
 int	 nfssvc(int, void *);
 int	 nlm_syscall(int, int, int, char **);
 int	 pipe2(int *, int);
 int	 profil(char *, size_t, vm_offset_t, int);
 int	 rcmd(char **, int, const char *, const char *, const char *, int *);
 int	 rcmd_af(char **, int, const char *,
 		const char *, const char *, int *, int);
 int	 rcmdsh(char **, int, const char *,
 		const char *, const char *, const char *);
 char	*re_comp(const char *);
 int	 re_exec(const char *);
 int	 reboot(int);
 int	 revoke(const char *);
 pid_t	 rfork(int);
 pid_t	 rfork_thread(int, void *, int (*)(void *), void *);
 int	 rresvport(int *);
 int	 rresvport_af(int *, int);
 int	 ruserok(const char *, int, const char *, const char *);
 #if __BSD_VISIBLE
 #ifndef _SELECT_DECLARED
 #define	_SELECT_DECLARED
 int	 select(int, fd_set *, fd_set *, fd_set *, struct timeval *);
 #endif
 #endif
 int	 setdomainname(const char *, int);
 int	 setgroups(int, const gid_t *);
 void	 sethostid(long);
 int	 sethostname(const char *, int);
 int	 setlogin(const char *);
 int	 setloginclass(const char *);
 void	*setmode(const char *);
 int	 setpgrp(pid_t, pid_t);			/* obsoleted by setpgid() */
 void	 setproctitle(const char *_fmt, ...) __printf0like(1, 2);
 void	 setproctitle_fast(const char *_fmt, ...) __printf0like(1, 2);
 int	 setresgid(gid_t, gid_t, gid_t);
 int	 setresuid(uid_t, uid_t, uid_t);
 int	 setrgid(gid_t);
 int	 setruid(uid_t);
 void	 setusershell(void);
 int	 strtofflags(char **, u_long *, u_long *);
 int	 swapon(const char *);
 int	 swapoff(const char *);
 int	 syscall(int, ...);
 off_t	 __syscall(quad_t, ...);
 int	 undelete(const char *);
 int	 unwhiteout(const char *);
 void	*valloc(size_t);			/* obsoleted by malloc() */
+int	 funlinkat(int, const char *, int, int);
 
 #ifndef _OPTRESET_DECLARED
 #define	_OPTRESET_DECLARED
 extern int optreset;			/* getopt(3) external variable */
 #endif
 #endif /* __BSD_VISIBLE */
 __END_DECLS
 
 #endif /* !_UNISTD_H_ */
Index: head/lib/libc/sys/Makefile.inc
===================================================================
--- head/lib/libc/sys/Makefile.inc	(revision 345981)
+++ head/lib/libc/sys/Makefile.inc	(revision 345982)
@@ -1,499 +1,500 @@
 #	@(#)Makefile.inc	8.3 (Berkeley) 10/24/94
 # $FreeBSD$
 
 # sys sources
 .PATH: ${LIBC_SRCTOP}/${LIBC_ARCH}/sys ${LIBC_SRCTOP}/sys
 
 # Include the generated makefile containing the *complete* list
 # of syscall names in MIASM.
 .include "${SRCTOP}/sys/sys/syscall.mk"
 
 # Include machine dependent definitions.
 #
 # MDASM names override the default syscall names in MIASM.
 # NOASM will prevent the default syscall code from being generated.
 # PSEUDO generates _<sys>() and __sys_<sys>() symbols, but not <sys>().
 #
 # While historically machine dependent, all architectures have the following
 # declarations in common:
 #
 NOASM=	exit.o \
 	getlogin.o \
 	sstk.o \
 	yield.o
 PSEUDO=	_exit.o \
 	_getlogin.o
 .sinclude "${LIBC_SRCTOP}/${LIBC_ARCH}/sys/Makefile.inc"
 
 SRCS+= clock_gettime.c gettimeofday.c __vdso_gettimeofday.c
 NOASM+=  clock_gettime.o gettimeofday.o
 PSEUDO+= _clock_gettime.o _gettimeofday.o
 
 # Sources common to both syscall interfaces:
 SRCS+=	\
 	__error.c \
 	interposing_table.c
 
 SRCS+= getdents.c lstat.c mknod.c stat.c
 
 SRCS+= fstat.c fstatat.c fstatfs.c getfsstat.c statfs.c
 NOASM+= fstat.o fstatat.o fstatfs.o getfsstat.o statfs.o
 PSEUDO+= _fstat.o _fstatat.o _fstatfs.o _getfsstat.o _statfs.o
 
 SRCS+= getdirentries.c
 NOASM+= getdirentries.o
 PSEUDO+= _getdirentries.o
 
 SRCS+= brk.c
 SRCS+= pipe.c
 SRCS+= vadvise.c
 
 SRCS+=	compat-stub.c
 
 INTERPOSED = \
 	accept \
 	accept4 \
 	aio_suspend \
 	clock_nanosleep \
 	close \
 	connect \
 	fcntl \
 	fdatasync \
 	fsync \
 	fork \
 	kevent \
 	msync \
 	nanosleep \
 	open \
 	openat \
 	poll \
 	ppoll \
 	pselect \
 	ptrace \
 	read \
 	readv \
 	recvfrom \
 	recvmsg \
 	select \
 	sendmsg \
 	sendto \
 	setcontext \
 	sigprocmask \
 	sigsuspend \
 	sigtimedwait \
 	sigwait \
 	sigwaitinfo \
 	swapcontext \
 	wait4 \
 	wait6 \
 	write \
 	writev
 
 .if ${MACHINE_CPUARCH} == "sparc64"
 SRCS+=	sigaction.c
 NOASM+=	sigaction.o
 .else
 INTERPOSED+= sigaction
 .endif
 
 SRCS+=	${INTERPOSED:S/$/.c/}
 NOASM+=	${INTERPOSED:S/$/.o/}
 PSEUDO+=	${INTERPOSED:C/^.*$/_&.o/}
 
 # Add machine dependent asm sources:
 SRCS+=${MDASM}
 
 # Look though the complete list of syscalls (MIASM) for names that are
 # not defined with machine dependent implementations (MDASM) and are
 # not declared for no generation of default code (NOASM).  Add each
 # syscall that satisfies these conditions to the ASM list.
 .for _asm in ${MIASM}
 .if (${MDASM:R:M${_asm:R}} == "")
 .if (${NOASM:R:M${_asm:R}} == "")
 ASM+=$(_asm)
 .endif
 .endif
 .endfor
 
 SASM=	${ASM:S/.o/.S/}
 
 SPSEUDO= ${PSEUDO:S/.o/.S/}
 
 SRCS+=	${SASM} ${SPSEUDO}
 
 SYM_MAPS+=	${LIBC_SRCTOP}/sys/Symbol.map
 
 # Generated files
 CLEANFILES+=	${SASM} ${SPSEUDO}
 
 .if ${MACHINE_CPUARCH} == "amd64" || ${MACHINE_CPUARCH} == "i386" || \
     ${MACHINE_CPUARCH} == "powerpc" || ${MACHINE_ARCH:Marmv[67]*}
 NOTE_GNU_STACK='\t.section .note.GNU-stack,"",%%progbits\n'
 .else
 NOTE_GNU_STACK=''
 .endif
 
 ${SASM}:
 	printf '#include "compat.h"\n' > ${.TARGET}
 	printf '#include "SYS.h"\nRSYSCALL(${.PREFIX})\n' >> ${.TARGET}
 	printf  ${NOTE_GNU_STACK} >>${.TARGET}
 
 ${SPSEUDO}:
 	printf '#include "compat.h"\n' > ${.TARGET}
 	printf '#include "SYS.h"\nPSEUDO(${.PREFIX:S/_//})\n' \
 	    >> ${.TARGET}
 	printf ${NOTE_GNU_STACK} >>${.TARGET}
 
 MAN+=	abort2.2 \
 	accept.2 \
 	access.2 \
 	acct.2 \
 	adjtime.2 \
 	aio_cancel.2 \
 	aio_error.2 \
 	aio_fsync.2 \
 	aio_mlock.2 \
 	aio_read.2 \
 	aio_return.2 \
 	aio_suspend.2 \
 	aio_waitcomplete.2 \
 	aio_write.2 \
 	bind.2 \
 	bindat.2 \
 	brk.2 \
 	cap_enter.2 \
 	cap_fcntls_limit.2 \
 	cap_ioctls_limit.2 \
 	cap_rights_limit.2 \
 	chdir.2 \
 	chflags.2 \
 	chmod.2 \
 	chown.2 \
 	chroot.2 \
 	clock_gettime.2 \
 	close.2 \
 	closefrom.2 \
 	connect.2 \
 	connectat.2 \
 	cpuset.2 \
 	cpuset_getaffinity.2 \
 	cpuset_getdomain.2 \
 	dup.2 \
 	execve.2 \
 	_exit.2 \
 	extattr_get_file.2 \
 	fcntl.2 \
 	ffclock.2 \
 	fhlink.2 \
 	fhopen.2 \
 	fhreadlink.2 \
 	flock.2 \
 	fork.2 \
 	fsync.2 \
 	getdirentries.2 \
 	getdtablesize.2 \
 	getfh.2 \
 	getfsstat.2 \
 	getgid.2 \
 	getgroups.2 \
 	getitimer.2 \
 	getlogin.2 \
 	getloginclass.2 \
 	getpeername.2 \
 	getpgrp.2 \
 	getpid.2 \
 	getpriority.2 \
 	getrandom.2 \
 	getrlimit.2 \
 	getrusage.2 \
 	getsid.2 \
 	getsockname.2 \
 	getsockopt.2 \
 	gettimeofday.2 \
 	getuid.2 \
 	intro.2 \
 	ioctl.2 \
 	issetugid.2 \
 	jail.2 \
 	kenv.2 \
 	kill.2 \
 	kldfind.2 \
 	kldfirstmod.2 \
 	kldload.2 \
 	kldnext.2 \
 	kldstat.2 \
 	kldsym.2 \
 	kldunload.2 \
 	kqueue.2 \
 	ktrace.2 \
 	link.2 \
 	lio_listio.2 \
 	listen.2 \
 	lseek.2 \
 	madvise.2 \
 	mincore.2 \
 	minherit.2 \
 	mkdir.2 \
 	mkfifo.2 \
 	mknod.2 \
 	mlock.2 \
 	mlockall.2 \
 	mmap.2 \
 	modfind.2 \
 	modnext.2 \
 	modstat.2 \
 	mount.2 \
 	mprotect.2 \
 	mq_close.2 \
 	mq_getattr.2 \
 	mq_notify.2 \
 	mq_open.2 \
 	mq_receive.2 \
 	mq_send.2 \
 	mq_setattr.2 \
 	msgctl.2 \
 	msgget.2 \
 	msgrcv.2 \
 	msgsnd.2 \
 	msync.2 \
 	munmap.2 \
 	nanosleep.2 \
 	nfssvc.2 \
 	ntp_adjtime.2 \
 	open.2 \
 	pathconf.2 \
 	pdfork.2 \
 	pipe.2 \
 	poll.2 \
 	posix_fadvise.2 \
 	posix_fallocate.2 \
 	posix_openpt.2 \
 	procctl.2 \
 	profil.2 \
 	pselect.2 \
 	ptrace.2 \
 	quotactl.2 \
 	rctl_add_rule.2 \
 	read.2 \
 	readlink.2 \
 	reboot.2 \
 	recv.2 \
 	rename.2 \
 	revoke.2 \
 	rfork.2 \
 	rmdir.2 \
 	rtprio.2
 .if !defined(NO_P1003_1B)
 MAN+=	sched_get_priority_max.2 \
 	sched_setparam.2 \
 	sched_setscheduler.2 \
 	sched_yield.2
 .endif
 MAN+=	sctp_generic_recvmsg.2 \
 	sctp_generic_sendmsg.2 \
 	sctp_peeloff.2 \
 	select.2 \
 	semctl.2 \
 	semget.2 \
 	semop.2 \
 	send.2 \
 	setfib.2 \
 	sendfile.2 \
 	setgroups.2 \
 	setpgid.2 \
 	setregid.2 \
 	setresuid.2 \
 	setreuid.2 \
 	setsid.2 \
 	setuid.2 \
 	shmat.2 \
 	shmctl.2 \
 	shmget.2 \
 	shm_open.2 \
 	shutdown.2 \
 	sigaction.2 \
 	sigaltstack.2 \
 	sigpending.2 \
 	sigprocmask.2 \
 	sigqueue.2 \
 	sigreturn.2 \
 	sigstack.2 \
 	sigsuspend.2 \
 	sigwait.2 \
 	sigwaitinfo.2 \
 	socket.2 \
 	socketpair.2 \
 	stat.2 \
 	statfs.2 \
 	swapon.2 \
 	symlink.2 \
 	sync.2 \
 	sysarch.2 \
 	syscall.2 \
 	thr_exit.2 \
 	thr_kill.2 \
 	thr_new.2 \
 	thr_self.2 \
 	thr_set_name.2 \
 	thr_suspend.2 \
 	thr_wake.2 \
 	timer_create.2 \
 	timer_delete.2 \
 	timer_settime.2 \
 	truncate.2 \
 	umask.2 \
 	undelete.2 \
 	unlink.2 \
 	utimensat.2 \
 	utimes.2 \
 	utrace.2 \
 	uuidgen.2 \
 	vfork.2 \
 	wait.2 \
 	write.2 \
 	_umtx_op.2
 
 MLINKS+=accept.2 accept4.2
 MLINKS+=access.2 eaccess.2 \
 	access.2 faccessat.2
 MLINKS+=brk.2 sbrk.2
 MLINKS+=cap_enter.2 cap_getmode.2
 MLINKS+=cap_fcntls_limit.2 cap_fcntls_get.2
 MLINKS+=cap_ioctls_limit.2 cap_ioctls_get.2
 MLINKS+=chdir.2 fchdir.2
 MLINKS+=chflags.2 chflagsat.2 \
 	chflags.2 fchflags.2 \
 	chflags.2 lchflags.2
 MLINKS+=chmod.2 fchmod.2 \
 	chmod.2 fchmodat.2 \
 	chmod.2 lchmod.2
 MLINKS+=chown.2 fchown.2 \
 	chown.2 fchownat.2 \
 	chown.2 lchown.2
 MLINKS+=clock_gettime.2 clock_getres.2 \
 	clock_gettime.2 clock_settime.2
 MLINKS+=nanosleep.2 clock_nanosleep.2
 MLINKS+=cpuset.2 cpuset_getid.2 \
 	cpuset.2 cpuset_setid.2
 MLINKS+=cpuset_getaffinity.2 cpuset_setaffinity.2
 MLINKS+=cpuset_getdomain.2 cpuset_setdomain.2
 MLINKS+=dup.2 dup2.2
 MLINKS+=execve.2 fexecve.2
 MLINKS+=extattr_get_file.2 extattr.2 \
 	extattr_get_file.2 extattr_delete_fd.2 \
 	extattr_get_file.2 extattr_delete_file.2 \
 	extattr_get_file.2 extattr_delete_link.2 \
 	extattr_get_file.2 extattr_get_fd.2 \
 	extattr_get_file.2 extattr_get_link.2 \
 	extattr_get_file.2 extattr_list_fd.2 \
 	extattr_get_file.2 extattr_list_file.2 \
 	extattr_get_file.2 extattr_list_link.2 \
 	extattr_get_file.2 extattr_set_fd.2 \
 	extattr_get_file.2 extattr_set_file.2 \
 	extattr_get_file.2 extattr_set_link.2
 MLINKS+=ffclock.2 ffclock_getcounter.2 \
 	ffclock.2 ffclock_getestimate.2 \
 	ffclock.2 ffclock_setestimate.2
 MLINKS+=fhopen.2 fhstat.2 fhopen.2 fhstatfs.2
 MLINKS+=fsync.2 fdatasync.2
 MLINKS+=getdirentries.2 getdents.2
 MLINKS+=getfh.2 lgetfh.2 \
 	getfh.2 getfhat.2
 MLINKS+=getgid.2 getegid.2
 MLINKS+=getitimer.2 setitimer.2
 MLINKS+=getlogin.2 getlogin_r.3
 MLINKS+=getlogin.2 setlogin.2
 MLINKS+=getloginclass.2 setloginclass.2
 MLINKS+=getpgrp.2 getpgid.2
 MLINKS+=getpid.2 getppid.2
 MLINKS+=getpriority.2 setpriority.2
 MLINKS+=getrlimit.2 setrlimit.2
 MLINKS+=getsockopt.2 setsockopt.2
 MLINKS+=gettimeofday.2 settimeofday.2
 MLINKS+=getuid.2 geteuid.2
 MLINKS+=intro.2 errno.2
 MLINKS+=jail.2 jail_attach.2 \
 	jail.2 jail_get.2 \
 	jail.2 jail_remove.2 \
 	jail.2 jail_set.2
 MLINKS+=kldunload.2 kldunloadf.2
 MLINKS+=kqueue.2 kevent.2 \
 	kqueue.2 EV_SET.3
 MLINKS+=link.2 linkat.2
 MLINKS+=madvise.2 posix_madvise.2
 MLINKS+=mkdir.2 mkdirat.2
 MLINKS+=mkfifo.2 mkfifoat.2
 MLINKS+=mknod.2 mknodat.2
 MLINKS+=mlock.2 munlock.2
 MLINKS+=mlockall.2 munlockall.2
 MLINKS+=modnext.2 modfnext.2
 MLINKS+=mount.2 nmount.2 \
 	mount.2 unmount.2
 MLINKS+=mq_receive.2 mq_timedreceive.2
 MLINKS+=mq_send.2 mq_timedsend.2
 MLINKS+=ntp_adjtime.2 ntp_gettime.2
 MLINKS+=open.2 openat.2
 MLINKS+=pathconf.2 fpathconf.2
 MLINKS+=pathconf.2 lpathconf.2
 MLINKS+=pdfork.2 pdgetpid.2\
 	pdfork.2 pdkill.2
 MLINKS+=pipe.2 pipe2.2
 MLINKS+=poll.2 ppoll.2
 MLINKS+=rctl_add_rule.2 rctl_get_limits.2 \
 	rctl_add_rule.2 rctl_get_racct.2 \
 	rctl_add_rule.2 rctl_get_rules.2 \
 	rctl_add_rule.2 rctl_remove_rule.2
 MLINKS+=read.2 pread.2 \
 	read.2 preadv.2 \
 	read.2 readv.2
 MLINKS+=readlink.2 readlinkat.2
 MLINKS+=recv.2 recvfrom.2 \
 	recv.2 recvmsg.2
 MLINKS+=rename.2 renameat.2
 MLINKS+=rtprio.2 rtprio_thread.2
 .if !defined(NO_P1003_1B)
 MLINKS+=sched_get_priority_max.2 sched_get_priority_min.2 \
 	sched_get_priority_max.2 sched_rr_get_interval.2
 MLINKS+=sched_setparam.2 sched_getparam.2
 MLINKS+=sched_setscheduler.2 sched_getscheduler.2
 .endif
 MLINKS+=select.2 FD_CLR.3 \
 	select.2 FD_ISSET.3 \
 	select.2 FD_SET.3 \
 	select.2 FD_ZERO.3
 MLINKS+=send.2 sendmsg.2 \
 	send.2 sendto.2
 MLINKS+=setpgid.2 setpgrp.2
 MLINKS+=setresuid.2 getresgid.2 \
 	setresuid.2 getresuid.2 \
 	setresuid.2 setresgid.2
 MLINKS+=setuid.2 setegid.2 \
 	setuid.2 seteuid.2 \
 	setuid.2 setgid.2
 MLINKS+=shmat.2 shmdt.2
 MLINKS+=shm_open.2 shm_unlink.2
 MLINKS+=sigwaitinfo.2 sigtimedwait.2
 MLINKS+=stat.2 fstat.2 \
 	stat.2 fstatat.2 \
 	stat.2 lstat.2
 MLINKS+=statfs.2 fstatfs.2
 MLINKS+=swapon.2 swapoff.2
 MLINKS+=symlink.2 symlinkat.2
 MLINKS+=syscall.2 __syscall.2
 MLINKS+=timer_settime.2 timer_getoverrun.2 \
 	timer_settime.2 timer_gettime.2
 MLINKS+=thr_kill.2 thr_kill2.2
 MLINKS+=truncate.2 ftruncate.2
 MLINKS+=unlink.2 unlinkat.2
+MLINKS+=unlink.2 funlinkat.2
 MLINKS+=utimensat.2 futimens.2
 MLINKS+=utimes.2 futimes.2 \
 	utimes.2 futimesat.2 \
 	utimes.2 lutimes.2
 MLINKS+=wait.2 wait3.2 \
 	wait.2 wait4.2 \
 	wait.2 waitpid.2 \
 	wait.2 waitid.2 \
 	wait.2 wait6.2
 MLINKS+=write.2 pwrite.2 \
 	write.2 pwritev.2 \
 	write.2 writev.2
Index: head/lib/libc/sys/Symbol.map
===================================================================
--- head/lib/libc/sys/Symbol.map	(revision 345981)
+++ head/lib/libc/sys/Symbol.map	(revision 345982)
@@ -1,1031 +1,1032 @@
 /*
  * $FreeBSD$
  */
 
 /*
  * It'd be nice to automatically generate the syscall symbols, but we
  * don't know to what version they will eventually belong to, so for now
  * it has to be manual.
  */
 FBSD_1.0 {
 	__acl_aclcheck_fd;
 	__acl_aclcheck_file;
 	__acl_aclcheck_link;
 	__acl_delete_fd;
 	__acl_delete_file;
 	__acl_delete_link;
 	__acl_get_fd;
 	__acl_get_file;
 	__acl_get_link;
 	__acl_set_fd;
 	__acl_set_file;
 	__acl_set_link;
 	__getcwd;
 	__mac_execve;
 	__mac_get_fd;
 	__mac_get_file;
 	__mac_get_link;
 	__mac_get_pid;
 	__mac_get_proc;
 	__mac_set_fd;
 	__mac_set_file;
 	__mac_set_link;
 	__mac_set_proc;
 	__setugid;
 	__syscall;
 	__sysctl;
 	_umtx_op;
 	abort2;
 	accept;
 	access;
 	acct;
 	adjtime;
 	aio_cancel;
 	aio_error;
 	aio_fsync;
 	aio_read;
 	aio_return;
 	aio_suspend;
 	aio_waitcomplete;
 	aio_write;
 	audit;
 	auditctl;
 	auditon;
 	bind;
 	chdir;
 	chflags;
 	chmod;
 	chown;
 	chroot;
 	clock_getres;
 	clock_gettime;
 	clock_settime;
 	close;
 	connect;
 	dup;
 	dup2;
 	eaccess;
 	execve;
 	extattr_delete_fd;
 	extattr_delete_file;
 	extattr_delete_link;
 	extattr_get_fd;
 	extattr_get_file;
 	extattr_get_link;
 	extattr_list_fd;
 	extattr_list_file;
 	extattr_list_link;
 	extattr_set_fd;
 	extattr_set_file;
 	extattr_set_link;
 	extattrctl;
 	fchdir;
 	fchflags;
 	fchmod;
 	fchown;
 	fcntl;
 	fhopen;
 	flock;
 	fork;
 	fpathconf;
 	fsync;
 	futimes;
 	getaudit;
 	getaudit_addr;
 	getauid;
 	getcontext;
 	getdtablesize;
 	getegid;
 	geteuid;
 	getfh;
 	getgid;
 	getgroups;
 	getitimer;
 	getpeername;
 	getpgid;
 	getpgrp;
 	getpid;
 	getppid;
 	getpriority;
 	getresgid;
 	getresuid;
 	getrlimit;
 	getrusage;
 	getsid;
 	getsockname;
 	getsockopt;
 	gettimeofday;
 	getuid;
 	ioctl;
 	issetugid;
 	jail;
 	jail_attach;
 	kenv;
 	kill;
 	kldfind;
 	kldfirstmod;
 	kldload;
 	kldnext;
 	kldstat;
 	kldsym;
 	kldunload;
 	kldunloadf;
 	kqueue;
 	kmq_notify;		/* Do we want these to be public interfaces? */
 	kmq_open;		/* librt uses them to provide mq_xxx. */
 	kmq_setattr;
 	kmq_timedreceive;
 	kmq_timedsend;
 	kmq_unlink;
 	ksem_close;
 	ksem_destroy;
 	ksem_getvalue;
 	ksem_init;
 	ksem_open;
 	ksem_post;
 	ksem_timedwait;
 	ksem_trywait;
 	ksem_unlink;
 	ksem_wait;
 	ktrace;
 	lchflags;
 	lchmod;
 	lchown;
 	lgetfh;
 	link;
 	lio_listio;
 	listen;
 	lutimes;
 	mac_syscall;
 	madvise;
 	mincore;
 	minherit;
 	mkdir;
 	mkfifo;
 	mlock;
 	mlockall;
 	modfind;
 	modfnext;
 	modnext;
 	modstat;
 	mount;
 	mprotect;
 	msgget;
 	msgrcv;
 	msgsnd;
 	msgsys;
 	msync;
 	munlock;
 	munlockall;
 	munmap;
 	nanosleep;
 	nfssvc;
 	nmount;
 	ntp_adjtime;
 	ntp_gettime;
 	open;
 	pathconf;
 	pipe;
 	poll;
 	posix_openpt;
 	preadv;
 	profil;
 	pselect;
 	ptrace;
 	pwritev;
 	quotactl;
 	read;
 	readlink;
 	readv;
 	reboot;
 	recvfrom;
 	recvmsg;
 	rename;
 	revoke;
 	rfork;
 	rmdir;
 	rtprio;
 	rtprio_thread;
 	sched_get_priority_max;
 	sched_get_priority_min;
 	sched_getparam;
 	sched_getscheduler;
 	sched_rr_get_interval;
 	sched_setparam;
 	sched_setscheduler;
 	sched_yield;
 	select;
 	semget;
 	semop;
 	semsys;
 	sendfile;
 	sendmsg;
 	sendto;
 	setaudit;
 	setaudit_addr;
 	setauid;
 	setegid;
 	seteuid;
 	setgid;
 	setgroups;
 	setitimer;
 	setlogin;
 	setpgid;
 	setpriority;
 	setregid;
 	setresgid;
 	setresuid;
 	setreuid;
 	setrlimit;
 	setsid;
 	setsockopt;
 	settimeofday;
 	setuid;
 	shm_open;
 	shm_unlink;
 	shmat;
 	shmdt;
 	shmget;
 	shmsys;
 	shutdown;
 	sigaction;
 	sigaltstack;
 	sigpending;
 	sigprocmask;
 	sigqueue;
 	sigreturn;
 	sigsuspend;
 	sigtimedwait;
 	sigwait;
 	sigwaitinfo;
 	socket;
 	socketpair;
 	swapoff;
 	swapon;
 	symlink;
 	sync;
 	sysarch;
 	syscall;
 	thr_create;
 	thr_exit;
 	thr_kill;
 	thr_kill2;
 	thr_new;
 	thr_self;
 	thr_set_name;
 	thr_suspend;
 	thr_wake;
 	ktimer_create;		/* Do we want these to be public interfaces? */
 	ktimer_delete;		/* librt uses them to provide timer_xxx. */
 	ktimer_getoverrun;
 	ktimer_gettime;
 	ktimer_settime;
 	umask;
 	undelete;
 	unlink;
 	unmount;
 	utimes;
 	utrace;
 	uuidgen;
 	vadvise;
 	wait4;
 	write;
 	writev;
 
 	__error;
 	ftruncate;
 	lseek;
 	mmap;
 	pread;
 	pwrite;
 	truncate;
 };
 
 FBSD_1.1 {
 	__semctl;
 	closefrom;
 	cpuset;
 	cpuset_getid;
 	cpuset_setid;
 	cpuset_getaffinity;
 	cpuset_setaffinity;
 	faccessat;
 	fchmodat;
 	fchownat;
 	fexecve;
 	futimesat;
 	jail_get;
 	jail_set;
 	jail_remove;
 	linkat;
 	lpathconf;
 	mkdirat;
 	mkfifoat;
 	msgctl;
 	readlinkat;
 	renameat;
 	setfib;
 	shmctl;
 	symlinkat;
 	unlinkat;
 };
 
 FBSD_1.2 {
 	cap_enter;
 	cap_getmode;
 	getloginclass;
 	pdfork;
 	pdgetpid;
 	pdkill;
 	posix_fallocate;
 	rctl_get_racct;
 	rctl_get_rules;
 	rctl_get_limits;
 	rctl_add_rule;
 	rctl_remove_rule;
 	setloginclass;
 };
 
 FBSD_1.3 {
 	accept4;
 	aio_mlock;
 	bindat;
 	cap_fcntls_get;
 	cap_fcntls_limit;
 	cap_ioctls_get;
 	cap_ioctls_limit;
 	__cap_rights_get;
 	cap_rights_limit;
 	cap_sandboxed;
 	chflagsat;
 	clock_getcpuclockid2;
 	connectat;
 	ffclock_getcounter;
 	ffclock_getestimate;
 	ffclock_setestimate;
 	pipe2;
 	posix_fadvise;
 	procctl;
 	wait6;
 };
 
 FBSD_1.4 {
 	futimens;
 	ppoll;
 	utimensat;
 	numa_setaffinity;
 	numa_getaffinity;
 	sendmmsg;
 	recvmmsg;
 };
 
 FBSD_1.5 {
 	clock_nanosleep;
 	fdatasync;
 	fhstat;
 	fhstatfs;
 	fstat;
 	fstatat;
 	fstatfs;
 	getdents;
 	getdirentries;
 	getfsstat;
 	getrandom;
 	kevent;
 	lstat;
 	mknod;
 	mknodat;
 	stat;
 	statfs;
 	cpuset_getdomain;
 	cpuset_setdomain;
 };
 
 FBSD_1.6 {
 	fhlink;
 	fhlinkat;
 	fhreadlink;
 	getfhat;
+	funlinkat;
 };
 
 FBSDprivate_1.0 {
 	___acl_aclcheck_fd;
 	__sys___acl_aclcheck_fd;
 	___acl_aclcheck_file;
 	__sys___acl_aclcheck_file;
 	___acl_aclcheck_link;
 	__sys___acl_aclcheck_link;
 	___acl_delete_fd;
 	__sys___acl_delete_fd;
 	___acl_delete_file;
 	__sys___acl_delete_file;
 	___acl_delete_link;
 	__sys___acl_delete_link;
 	___acl_get_fd;
 	__sys___acl_get_fd;
 	___acl_get_file;
 	__sys___acl_get_file;
 	___acl_get_link;
 	__sys___acl_get_link;
 	___acl_set_fd;
 	__sys___acl_set_fd;
 	___acl_set_file;
 	__sys___acl_set_file;
 	___acl_set_link;
 	__sys___acl_set_link;
 	___getcwd;
 	__sys___getcwd;
 	___mac_execve;
 	__sys___mac_execve;
 	___mac_get_fd;
 	__sys___mac_get_fd;
 	___mac_get_file;
 	__sys___mac_get_file;
 	___mac_get_link;
 	__sys___mac_get_link;
 	___mac_get_pid;
 	__sys___mac_get_pid;
 	___mac_get_proc;
 	__sys___mac_get_proc;
 	___mac_set_fd;
 	__sys___mac_set_fd;
 	___mac_set_file;
 	__sys___mac_set_file;
 	___mac_set_link;
 	__sys___mac_set_link;
 	___mac_set_proc;
 	__sys___mac_set_proc;
 	___semctl;
 	__sys___semctl;
 	___setugid;
 	__sys___setugid;
 	___syscall;
 	__sys___syscall;
 	___sysctl;
 	__sys___sysctl;
 	__umtx_op;
 	__sys__umtx_op;
 	_abort2;
 	__sys_abort2;
 	_accept;
 	__sys_accept;
 	_accept4;
 	__sys_accept4;
 	_access;
 	__sys_access;
 	_acct;
 	__sys_acct;
 	_adjtime;
 	__sys_adjtime;
 	__sys_aio_cancel;
 	__sys_aio_error;
 	__sys_aio_fsync;
 	__sys_aio_read;
 	__sys_aio_return;
 	__sys_aio_suspend;
 	__sys_aio_waitcomplete;
 	__sys_aio_write;
 	_audit;
 	__sys_audit;
 	_auditctl;
 	__sys_auditctl;
 	_auditon;
 	__sys_auditon;
 	_bind;
 	__sys_bind;
 	_chdir;
 	__sys_chdir;
 	_chflags;
 	__sys_chflags;
 	_chmod;
 	__sys_chmod;
 	_chown;
 	__sys_chown;
 	_chroot;
 	__sys_chroot;
 	_clock_getcpuclockid2;
 	__sys_clock_getcpuclockid2;
 	_clock_getres;
 	__sys_clock_getres;
 	_clock_gettime;
 	__sys_clock_gettime;
 	__sys_clock_nanosleep;
 	_clock_settime;
 	__sys_clock_settime;
 	_close;
 	__sys_close;
 	_closefrom;
 	__sys_closefrom;
 	_connect;
 	__sys_connect;
 	_cpuset;
 	__sys_cpuset;
 	_cpuset_getid;
 	__sys_cpuset_getid;
 	_cpuset_setid;
 	__sys_cpuset_setid;
 	_cpuset_getaffinity;
 	__sys_cpuset_getaffinity;
 	_cpuset_setaffinity;
 	__sys_cpuset_setaffinity;
 	_dup;
 	__sys_dup;
 	_dup2;
 	__sys_dup2;
 	_eaccess;
 	__sys_eaccess;
 	_execve;
 	__sys_execve;
 	_extattr_delete_fd;
 	__sys_extattr_delete_fd;
 	_extattr_delete_file;
 	__sys_extattr_delete_file;
 	_extattr_delete_link;
 	__sys_extattr_delete_link;
 	_extattr_get_fd;
 	__sys_extattr_get_fd;
 	_extattr_get_file;
 	__sys_extattr_get_file;
 	_extattr_get_link;
 	__sys_extattr_get_link;
 	_extattr_list_fd;
 	__sys_extattr_list_fd;
 	_extattr_list_file;
 	__sys_extattr_list_file;
 	_extattr_list_link;
 	__sys_extattr_list_link;
 	_extattr_set_fd;
 	__sys_extattr_set_fd;
 	_extattr_set_file;
 	__sys_extattr_set_file;
 	_extattr_set_link;
 	__sys_extattr_set_link;
 	_extattrctl;
 	__sys_extattrctl;
 	_fchdir;
 	__sys_fchdir;
 	_fchflags;
 	__sys_fchflags;
 	_fchmod;
 	__sys_fchmod;
 	_fchown;
 	__sys_fchown;
 	_fcntl;
 	__sys_fcntl;
 	__fcntl_compat;
 	_fhopen;
 	__sys_fhopen;
 	_fhstat;
 	__sys_fhstat;
 	_fhstatfs;
 	__sys_fhstatfs;
 	_flock;
 	__sys_flock;
 	_fork;
 	__sys_fork;
 	_fpathconf;
 	__sys_fpathconf;
 	_fstat;
 	__sys_fstat;
 	_fstatfs;
 	__sys_fstatfs;
 	_fsync;
 	__sys_fsync;
 	_fdatasync;
 	__sys_fdatasync;
 	_futimes;
 	__sys_futimes;
 	_getaudit;
 	__sys_getaudit;
 	_getaudit_addr;
 	__sys_getaudit_addr;
 	_getauid;
 	__sys_getauid;
 	_getcontext;
 	__sys_getcontext;
 	_getdirentries;
 	__sys_getdirentries;
 	_getdtablesize;
 	__sys_getdtablesize;
 	_getegid;
 	__sys_getegid;
 	_geteuid;
 	__sys_geteuid;
 	_getfh;
 	__sys_getfh;
 	_getfsstat;
 	__sys_getfsstat;
 	_getgid;
 	__sys_getgid;
 	_getgroups;
 	__sys_getgroups;
 	_getitimer;
 	__sys_getitimer;
 	_getpeername;
 	__sys_getpeername;
 	_getpgid;
 	__sys_getpgid;
 	_getpgrp;
 	__sys_getpgrp;
 	_getpid;
 	__sys_getpid;
 	_getppid;
 	__sys_getppid;
 	_getpriority;
 	__sys_getpriority;
 	_getresgid;
 	__sys_getresgid;
 	_getresuid;
 	__sys_getresuid;
 	_getrlimit;
 	__sys_getrlimit;
 	_getrusage;
 	__sys_getrusage;
 	_getsid;
 	__sys_getsid;
 	_getsockname;
 	__sys_getsockname;
 	_getsockopt;
 	__sys_getsockopt;
 	_gettimeofday;
 	__sys_gettimeofday;
 	_getuid;
 	__sys_getuid;
 	_ioctl;
 	__sys_ioctl;
 	_issetugid;
 	__sys_issetugid;
 	_jail;
 	__sys_jail;
 	_jail_attach;
 	__sys_jail_attach;
 	_kenv;
 	__sys_kenv;
 	_kevent;
 	__sys_kevent;
 	_kill;
 	__sys_kill;
 	_kldfind;
 	__sys_kldfind;
 	_kldfirstmod;
 	__sys_kldfirstmod;
 	_kldload;
 	__sys_kldload;
 	_kldnext;
 	__sys_kldnext;
 	_kldstat;
 	__sys_kldstat;
 	_kldsym;
 	__sys_kldsym;
 	_kldunload;
 	__sys_kldunload;
 	_kldunloadf;
 	__sys_kldunloadf;
 	_kmq_notify;
 	__sys_kmq_notify;
 	_kmq_open;
 	__sys_kmq_open;
 	_kmq_setattr;
 	__sys_kmq_setattr;
 	_kmq_timedreceive;
 	__sys_kmq_timedreceive;
 	_kmq_timedsend;
 	__sys_kmq_timedsend;
 	_kmq_unlink;
 	__sys_kmq_unlink;
 	_kqueue;
 	__sys_kqueue;
 	_ksem_close;
 	__sys_ksem_close;
 	_ksem_destroy;
 	__sys_ksem_destroy;
 	_ksem_getvalue;
 	__sys_ksem_getvalue;
 	_ksem_init;
 	__sys_ksem_init;
 	_ksem_open;
 	__sys_ksem_open;
 	_ksem_post;
 	__sys_ksem_post;
 	_ksem_timedwait;
 	__sys_ksem_timedwait;
 	_ksem_trywait;
 	__sys_ksem_trywait;
 	_ksem_unlink;
 	__sys_ksem_unlink;
 	_ksem_wait;
 	__sys_ksem_wait;
 	_ktrace;
 	__sys_ktrace;
 	_lchflags;
 	__sys_lchflags;
 	_lchmod;
 	__sys_lchmod;
 	_lchown;
 	__sys_lchown;
 	_lgetfh;
 	__sys_lgetfh;
 	_link;
 	__sys_link;
 	__sys_lio_listio;
 	_listen;
 	__sys_listen;
 	_lutimes;
 	__sys_lutimes;
 	_mac_syscall;
 	__sys_mac_syscall;
 	_madvise;
 	__sys_madvise;
 	_mincore;
 	__sys_mincore;
 	_minherit;
 	__sys_minherit;
 	_mkdir;
 	__sys_mkdir;
 	_mkfifo;
 	__sys_mkfifo;
 	_mknod;
 	__sys_mknod;
 	_mlock;
 	__sys_mlock;
 	_mlockall;
 	__sys_mlockall;
 	_modfind;
 	__sys_modfind;
 	_modfnext;
 	__sys_modfnext;
 	_modnext;
 	__sys_modnext;
 	_modstat;
 	__sys_modstat;
 	_mount;
 	__sys_mount;
 	_mprotect;
 	__sys_mprotect;
 	_msgctl;
 	__sys_msgctl;
 	_msgget;
 	__sys_msgget;
 	_msgrcv;
 	__sys_msgrcv;
 	_msgsnd;
 	__sys_msgsnd;
 	_msgsys;
 	__sys_msgsys;
 	_msync;
 	__sys_msync;
 	_munlock;
 	__sys_munlock;
 	_munlockall;
 	__sys_munlockall;
 	_munmap;
 	__sys_munmap;
 	_nanosleep;
 	__sys_nanosleep;
 	_nfssvc;
 	__sys_nfssvc;
 	_nmount;
 	__sys_nmount;
 	_ntp_adjtime;
 	__sys_ntp_adjtime;
 	_ntp_gettime;
 	__sys_ntp_gettime;
 	_open;
 	__sys_open;
 	_openat;
 	__sys_openat;
 	_pathconf;
 	__sys_pathconf;
 	_pipe;
 	__sys_pipe;
 	_poll;
 	__sys_poll;
 	_ppoll;
 	__sys_ppoll;
 	_preadv;
 	__sys_preadv;
 	_procctl;
 	__sys_procctl;
 	_profil;
 	__sys_profil;
 	_pselect;
 	__sys_pselect;
 	_ptrace;
 	__sys_ptrace;
 	_pwritev;
 	__sys_pwritev;
 	_quotactl;
 	__sys_quotactl;
 	_read;
 	__sys_read;
 	_readlink;
 	__sys_readlink;
 	_readv;
 	__sys_readv;
 	_reboot;
 	__sys_reboot;
 	_recvfrom;
 	__sys_recvfrom;
 	_recvmsg;
 	__sys_recvmsg;
 	_rename;
 	__sys_rename;
 	_revoke;
 	__sys_revoke;
 	_rfork;
 	__sys_rfork;
 	_rmdir;
 	__sys_rmdir;
 	_rtprio;
 	__sys_rtprio;
 	_rtprio_thread;
 	__sys_rtprio_thread;
 	_sched_get_priority_max;
 	__sys_sched_get_priority_max;
 	_sched_get_priority_min;
 	__sys_sched_get_priority_min;
 	_sched_getparam;
 	__sys_sched_getparam;
 	_sched_getscheduler;
 	__sys_sched_getscheduler;
 	_sched_rr_get_interval;
 	__sys_sched_rr_get_interval;
 	_sched_setparam;
 	__sys_sched_setparam;
 	_sched_setscheduler;
 	__sys_sched_setscheduler;
 	_sched_yield;
 	__sys_sched_yield;
 	_select;
 	__sys_select;
 	_semget;
 	__sys_semget;
 	_semop;
 	__sys_semop;
 	_semsys;
 	__sys_semsys;
 	_sendfile;
 	__sys_sendfile;
 	_sendmsg;
 	__sys_sendmsg;
 	_sendto;
 	__sys_sendto;
 	_setaudit;
 	__sys_setaudit;
 	_setaudit_addr;
 	__sys_setaudit_addr;
 	_setauid;
 	__sys_setauid;
 	_setcontext;
 	__sys_setcontext;
 	_setegid;
 	__sys_setegid;
 	_seteuid;
 	__sys_seteuid;
 	_setgid;
 	__sys_setgid;
 	_setgroups;
 	__sys_setgroups;
 	_setitimer;
 	__sys_setitimer;
 	_setlogin;
 	__sys_setlogin;
 	_setpgid;
 	__sys_setpgid;
 	_setpriority;
 	__sys_setpriority;
 	_setregid;
 	__sys_setregid;
 	_setresgid;
 	__sys_setresgid;
 	_setresuid;
 	__sys_setresuid;
 	_setreuid;
 	__sys_setreuid;
 	_setrlimit;
 	__sys_setrlimit;
 	_setsid;
 	__sys_setsid;
 	_setsockopt;
 	__sys_setsockopt;
 	_settimeofday;
 	__sys_settimeofday;
 	_setuid;
 	__sys_setuid;
 	_shm_open;
 	__sys_shm_open;
 	_shm_unlink;
 	__sys_shm_unlink;
 	_shmat;
 	__sys_shmat;
 	_shmctl;
 	__sys_shmctl;
 	_shmdt;
 	__sys_shmdt;
 	_shmget;
 	__sys_shmget;
 	_shmsys;
 	__sys_shmsys;
 	_shutdown;
 	__sys_shutdown;
 	_sigaction;
 	__sys_sigaction;
 	_sigaltstack;
 	__sys_sigaltstack;
 	_sigpending;
 	__sys_sigpending;
 	_sigprocmask;
 	__sys_sigprocmask;
 	_sigqueue;
 	__sys_sigqueue;
 	_sigreturn;
 	__sys_sigreturn;
 	_sigsuspend;
 	__sys_sigsuspend;
 	_sigtimedwait;
 	__sys_sigtimedwait;
 	_sigwait;
 	__sigwait;
 	__sys_sigwait;
 	_sigwaitinfo;
 	__sys_sigwaitinfo;
 	_socket;
 	__sys_socket;
 	_socketpair;
 	__sys_socketpair;
 	_statfs;
 	__sys_statfs;
 	_swapcontext;
 	__sys_swapcontext;
 	_swapoff;
 	__sys_swapoff;
 	_swapon;
 	__sys_swapon;
 	_symlink;
 	__sys_symlink;
 	_sync;
 	__sys_sync;
 	_sysarch;
 	__sys_sysarch;
 	_syscall;
 	__sys_syscall;
 	_thr_create;
 	__sys_thr_create;
 	_thr_exit;
 	__sys_thr_exit;
 	_thr_kill;
 	__sys_thr_kill;
 	_thr_kill2;
 	__sys_thr_kill2;
 	_thr_new;
 	__sys_thr_new;
 	_thr_self;
 	__sys_thr_self;
 	_thr_set_name;
 	__sys_thr_set_name;
 	_thr_suspend;
 	__sys_thr_suspend;
 	_thr_wake;
 	__sys_thr_wake;
 	_ktimer_create;
 	__sys_ktimer_create;
 	_ktimer_delete;
 	__sys_ktimer_delete;
 	_ktimer_getoverrun;
 	__sys_ktimer_getoverrun;
 	_ktimer_gettime;
 	__sys_ktimer_gettime;
 	_ktimer_settime;
 	__sys_ktimer_settime;
 	_umask;
 	__sys_umask;
 	_undelete;
 	__sys_undelete;
 	_unlink;
 	__sys_unlink;
 	_unmount;
 	__sys_unmount;
 	_utimes;
 	__sys_utimes;
 	_utrace;
 	__sys_utrace;
 	_uuidgen;
 	__sys_uuidgen;
 	_wait4;
 	__sys_wait4;
 	_wait6;
 	__sys_wait6;
 	_write;
 	__sys_write;
 	_writev;
 	__sys_writev;
 	__set_error_selector;
 	nlm_syscall;
 	gssd_syscall;
 	__libc_interposing_slot;
 	__libc_sigwait;
 	_cpuset_getdomain;
 	__sys_cpuset_getdomain;
 	_cpuset_setdomain;
 	__sys_cpuset_setdomain;
 };
Index: head/lib/libc/sys/unlink.2
===================================================================
--- head/lib/libc/sys/unlink.2	(revision 345981)
+++ head/lib/libc/sys/unlink.2	(revision 345982)
@@ -1,254 +1,289 @@
 .\" Copyright (c) 1980, 1991, 1993
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 3. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\"     @(#)unlink.2	8.1 (Berkeley) 6/4/93
 .\" $FreeBSD$
 .\"
-.Dd November 11, 2018
+.Dd April 6, 2019
 .Dt UNLINK 2
 .Os
 .Sh NAME
 .Nm unlink ,
 .Nm unlinkat
 .Nd remove directory entry
 .Sh LIBRARY
 .Lb libc
 .Sh SYNOPSIS
 .In unistd.h
 .Ft int
 .Fn unlink "const char *path"
 .Ft int
-.Fn unlinkat "int fd" "const char *path" "int flag"
+.Fn unlinkat "int dfd" "const char *path" "int flag"
+.Ft int
+.Fn funlinkat "int dfd" "const char *path" "int fd" "int flag"
 .Sh DESCRIPTION
 The
 .Fn unlink
 system call
 removes the link named by
 .Fa path
 from its directory and decrements the link count of the
 file which was referenced by the link.
 If that decrement reduces the link count of the file
 to zero,
 and no process has the file open, then
 all resources associated with the file are reclaimed.
 If one or more process have the file open when the last link is removed,
 the link is removed, but the removal of the file is delayed until
 all references to it have been closed.
 The
 .Fa path
 argument
 may not be a directory.
 .Pp
 The
 .Fn unlinkat
 system call is equivalent to
 .Fn unlink
 or
 .Fn rmdir
 except in the case where
 .Fa path
 specifies a relative path.
 In this case the directory entry to be removed is determined
 relative to the directory associated with the file descriptor
-.Fa fd
+.Fa dfd
 instead of the current working directory.
 .Pp
 The values for
 .Fa flag
 are constructed by a bitwise-inclusive OR of flags from the following list,
 defined in
 .In fcntl.h :
 .Bl -tag -width indent
 .It Dv AT_REMOVEDIR
 Remove the directory entry specified by
 .Fa fd
 and
 .Fa path
 as a directory, not a normal file.
 .It Dv AT_BENEATH
 Only unlink files and directories which are beneath of the topping
 directory.
 See the description of the
 .Dv O_BENEATH
 flag in the
 .Xr open 2
 manual page.
 .El
 .Pp
 If
 .Fn unlinkat
 is passed the special value
 .Dv AT_FDCWD
 in the
 .Fa fd
 parameter, the current working directory is used and the behavior is
 identical to a call to
 .Fa unlink
 or
 .Fa rmdir
 respectively, depending on whether or not the
 .Dv AT_REMOVEDIR
 bit is set in flag.
+.Pp
+The
+.Fn funlinkat
+system call can be used to unlink an already-opened file, unless that
+file has been replaced since it was opened.
+It is equivalent to
+.Fn unlinkat
+in the case where
+.Fa path
+is already open as the file descriptor
+.Fa fd .
+Otherwise, the path will not be removed and an error will be returned.
+The
+.Fa fd
+can be set the
+.Dv FD_NONE .
+In that case
+.Fn funlinkat
+behaves exactly like
+.Fn unlinkat .
 .Sh RETURN VALUES
 .Rv -std unlink
 .Sh ERRORS
 The
 .Fn unlink
 succeeds unless:
 .Bl -tag -width Er
 .It Bq Er ENOTDIR
 A component of the path prefix is not a directory.
 .It Bq Er EISDIR
 The named file is a directory.
 .It Bq Er ENAMETOOLONG
 A component of a pathname exceeded 255 characters,
 or an entire path name exceeded 1023 characters.
 .It Bq Er ENOENT
 The named file does not exist.
 .It Bq Er EACCES
 Search permission is denied for a component of the path prefix.
 .It Bq Er EACCES
 Write permission is denied on the directory containing the link
 to be removed.
 .It Bq Er ELOOP
 Too many symbolic links were encountered in translating the pathname.
 .It Bq Er EPERM
 The named file is a directory.
 .It Bq Er EPERM
 The named file has its immutable, undeletable or append-only flag set, see the
 .Xr chflags 2
 manual page for more information.
 .It Bq Er EPERM
 The parent directory of the named file has its immutable or append-only flag
 set.
 .It Bq Er EPERM
 The directory containing the file is marked sticky,
 and neither the containing directory nor the file to be removed
 are owned by the effective user ID.
 .It Bq Er EIO
 An I/O error occurred while deleting the directory entry
 or deallocating the inode.
 .It Bq Er EROFS
 The named file resides on a read-only file system.
 .It Bq Er EFAULT
 The
 .Fa path
 argument
 points outside the process's allocated address space.
 .It Bq Er ENOSPC
 On file systems supporting copy-on-write or snapshots, there was not enough
 free space to record metadata for the delete operation of the file.
 .El
 .Pp
 In addition to the errors returned by the
 .Fn unlink ,
 the
 .Fn unlinkat
 may fail if:
 .Bl -tag -width Er
 .It Bq Er EBADF
 The
 .Fa path
 argument does not specify an absolute path and the
 .Fa fd
 argument is neither
 .Dv AT_FDCWD
 nor a valid file descriptor open for searching.
 .It Bq Er ENOTEMPTY
 The
 .Fa flag
 parameter has the
 .Dv AT_REMOVEDIR
 bit set and the
 .Fa path
 argument names a directory that is not an empty directory,
 or there are hard links to the directory other than dot or
 a single entry in dot-dot.
 .It Bq Er ENOTDIR
 The
 .Fa flag
 parameter has the
 .Dv AT_REMOVEDIR
 bit set and
 .Fa path
 does not name a directory.
 .It Bq Er EINVAL
 The value of the
 .Fa flag
 argument is not valid.
 .It Bq Er ENOTDIR
 The
 .Fa path
 argument is not an absolute path and
 .Fa fd
 is neither
 .Dv AT_FDCWD
 nor a file descriptor associated with a directory.
 .It Bq Er ENOTCAPABLE
 .Fa path
 is an absolute path,
 or contained a ".." component leading to a
 directory outside of the directory hierarchy specified by
 .Fa fd ,
 and the process is in capability mode.
 .It Bq Er ENOTCAPABLE
 The
 .Dv AT_BENEATH
 flag was provided to
 .Fn unlinkat ,
 and the absolute
 .Fa path
 does not have its tail fully contained under the topping directory,
 or the relative
 .Fa path
 escapes it.
 .El
+.Pp
+In addition to the errors returned by
+.Fn unlinkat ,
+.Fn funlinkat
+may fail if:
+.Bl -tag -width Er
+.It Bq Er EDEADLK
+The file descriptor is not associated with the path.
+.El
 .Sh SEE ALSO
 .Xr chflags 2 ,
 .Xr close 2 ,
 .Xr link 2 ,
 .Xr rmdir 2 ,
 .Xr symlink 7
 .Sh STANDARDS
 The
 .Fn unlinkat
 system call follows The Open Group Extended API Set 2 specification.
 .Sh HISTORY
 The
 .Fn unlink
 function appeared in
 .At v1 .
 The
 .Fn unlinkat
 system call appeared in
 .Fx 8.0 .
+The
+.Fn funlinkat
+system call appeared in
+.Fx 13.0 .
 .Pp
 The
 .Fn unlink
 system call traditionally allows the super-user to unlink directories which
 can damage the file system integrity.
 This implementation no longer permits it.
Index: head/sys/cddl/compat/opensolaris/sys/vnode.h
===================================================================
--- head/sys/cddl/compat/opensolaris/sys/vnode.h	(revision 345981)
+++ head/sys/cddl/compat/opensolaris/sys/vnode.h	(revision 345982)
@@ -1,286 +1,287 @@
 /*-
  * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _OPENSOLARIS_SYS_VNODE_H_
 #define	_OPENSOLARIS_SYS_VNODE_H_
 
 #ifdef _KERNEL
 
 struct vnode;
 struct vattr;
 
 typedef	struct vnode	vnode_t;
 typedef	struct vattr	vattr_t;
 typedef enum vtype vtype_t;
 
 #include <sys/namei.h>
 enum symfollow { NO_FOLLOW = NOFOLLOW };
 
 #include <sys/proc.h>
 #include_next <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/cred.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/syscallsubr.h>
 
 typedef	struct vop_vector	vnodeops_t;
 #define	VOP_FID		VOP_VPTOFH
 #define	vop_fid		vop_vptofh
 #define	vop_fid_args	vop_vptofh_args
 #define	a_fid		a_fhp
 
 #define	IS_XATTRDIR(dvp)	(0)
 
 #define	v_count	v_usecount
 
 #define	V_APPEND	VAPPEND
 
 #define	rootvfs		(rootvnode == NULL ? NULL : rootvnode->v_mount)
 
 static __inline int
 vn_is_readonly(vnode_t *vp)
 {
 	return (vp->v_mount->mnt_flag & MNT_RDONLY);
 }
 #define	vn_vfswlock(vp)		(0)
 #define	vn_vfsunlock(vp)	do { } while (0)
 #define	vn_ismntpt(vp)		((vp)->v_type == VDIR && (vp)->v_mountedhere != NULL)
 #define	vn_mountedvfs(vp)	((vp)->v_mountedhere)
 #define	vn_has_cached_data(vp)	\
 	((vp)->v_object != NULL && \
 	 (vp)->v_object->resident_page_count > 0)
 #define	vn_exists(vp)		do { } while (0)
 #define	vn_invalid(vp)		do { } while (0)
 #define	vn_renamepath(tdvp, svp, tnm, lentnm)	do { } while (0)
 #define	vn_free(vp)		do { } while (0)
 #define	vn_matchops(vp, vops)	((vp)->v_op == &(vops))
 
 #define	VN_HOLD(v)	vref(v)
 #define	VN_RELE(v)	vrele(v)
 #define	VN_URELE(v)	vput(v)
 
 #define	vnevent_create(vp, ct)			do { } while (0)
 #define	vnevent_link(vp, ct)			do { } while (0)
 #define	vnevent_remove(vp, dvp, name, ct)	do { } while (0)
 #define	vnevent_rmdir(vp, dvp, name, ct)	do { } while (0)
 #define	vnevent_rename_src(vp, dvp, name, ct)	do { } while (0)
 #define	vnevent_rename_dest(vp, dvp, name, ct)	do { } while (0)
 #define	vnevent_rename_dest_dir(vp, ct)		do { } while (0)
 
 #define	specvp(vp, rdev, type, cr)	(VN_HOLD(vp), (vp))
 #define	MANDMODE(mode)		(0)
 #define	MANDLOCK(vp, mode)	(0)
 #define	chklock(vp, op, offset, size, mode, ct)	(0)
 #define	cleanlocks(vp, pid, foo)	do { } while (0)
 #define	cleanshares(vp, pid)		do { } while (0)
 
 /*
  * We will use va_spare is place of Solaris' va_mask.
  * This field is initialized in zfs_setattr().
  */
 #define	va_mask		va_spare
 /* TODO: va_fileid is shorter than va_nodeid !!! */
 #define	va_nodeid	va_fileid
 /* TODO: This field needs conversion! */
 #define	va_nblocks	va_bytes
 #define	va_blksize	va_blocksize
 #define	va_seq		va_gen
 
 #define	MAXOFFSET_T	OFF_MAX
 #define	EXCL		0
 
 #define	ACCESSED		(AT_ATIME)
 #define	STATE_CHANGED		(AT_CTIME)
 #define	CONTENT_MODIFIED	(AT_MTIME | AT_CTIME)
 
 static __inline void
 vattr_init_mask(vattr_t *vap)
 {
 
 	vap->va_mask = 0;
 
 	if (vap->va_type != VNON)
 		vap->va_mask |= AT_TYPE;
 	if (vap->va_uid != (uid_t)VNOVAL)
 		vap->va_mask |= AT_UID;
 	if (vap->va_gid != (gid_t)VNOVAL)
 		vap->va_mask |= AT_GID;
 	if (vap->va_size != (u_quad_t)VNOVAL)
 		vap->va_mask |= AT_SIZE;
 	if (vap->va_atime.tv_sec != VNOVAL)
 		vap->va_mask |= AT_ATIME;
 	if (vap->va_mtime.tv_sec != VNOVAL)
 		vap->va_mask |= AT_MTIME;
 	if (vap->va_mode != (u_short)VNOVAL)
 		vap->va_mask |= AT_MODE;
 	if (vap->va_flags != VNOVAL)
 		vap->va_mask |= AT_XVATTR;
 }
 
 #define	FCREAT		O_CREAT
 #define	FTRUNC		O_TRUNC
 #define	FEXCL		O_EXCL
 #define	FDSYNC		FFSYNC
 #define	FRSYNC		FFSYNC
 #define	FSYNC		FFSYNC
 #define	FOFFMAX		0x00
 #define	FIGNORECASE	0x00
 
 static __inline int
 vn_openat(char *pnamep, enum uio_seg seg, int filemode, int createmode,
     vnode_t **vpp, enum create crwhy, mode_t umask, struct vnode *startvp,
     int fd)
 {
 	struct thread *td = curthread;
 	struct nameidata nd;
 	int error, operation;
 
 	ASSERT(seg == UIO_SYSSPACE);
 	if ((filemode & FCREAT) != 0) {
 		ASSERT(filemode == (FWRITE | FCREAT | FTRUNC | FOFFMAX));
 		ASSERT(crwhy == CRCREAT);
 		operation = CREATE;
 	} else {
 		ASSERT(filemode == (FREAD | FOFFMAX) ||
 		    filemode == (FREAD | FWRITE | FOFFMAX));
 		ASSERT(crwhy == 0);
 		operation = LOOKUP;
 	}
 	ASSERT(umask == 0);
 
 	pwd_ensure_dirs();
 
 	if (startvp != NULL)
 		vref(startvp);
 	NDINIT_ATVP(&nd, operation, 0, UIO_SYSSPACE, pnamep, startvp, td);
 	filemode |= O_NOFOLLOW;
 	error = vn_open_cred(&nd, &filemode, createmode, 0, td->td_ucred, NULL);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (error == 0) {
 		/* We just unlock so we hold a reference. */
 		VOP_UNLOCK(nd.ni_vp, 0);
 		*vpp = nd.ni_vp;
 	}
 	return (error);
 }
 
 static __inline int
 zfs_vn_open(char *pnamep, enum uio_seg seg, int filemode, int createmode,
     vnode_t **vpp, enum create crwhy, mode_t umask)
 {
 
 	return (vn_openat(pnamep, seg, filemode, createmode, vpp, crwhy,
 	    umask, NULL, -1));
 }
 #define	vn_open(pnamep, seg, filemode, createmode, vpp, crwhy, umask)	\
 	zfs_vn_open((pnamep), (seg), (filemode), (createmode), (vpp), (crwhy), (umask))
 
 #define	RLIM64_INFINITY	0
 static __inline int
 zfs_vn_rdwr(enum uio_rw rw, vnode_t *vp, caddr_t base, ssize_t len,
     offset_t offset, enum uio_seg seg, int ioflag, int ulimit, cred_t *cr,
     ssize_t *residp)
 {
 	struct thread *td = curthread;
 	int error;
 	ssize_t resid;
 
 	ASSERT(ioflag == 0);
 	ASSERT(ulimit == RLIM64_INFINITY);
 
 	if (rw == UIO_WRITE) {
 		ioflag = IO_SYNC;
 	} else {
 		ioflag = IO_DIRECT;
 	}
 	error = vn_rdwr(rw, vp, base, len, offset, seg, ioflag, cr, NOCRED,
 	    &resid, td);
 	if (residp != NULL)
 		*residp = (ssize_t)resid;
 	return (error);
 }
 #define	vn_rdwr(rw, vp, base, len, offset, seg, ioflag, ulimit, cr, residp) \
 	zfs_vn_rdwr((rw), (vp), (base), (len), (offset), (seg), (ioflag), (ulimit), (cr), (residp))
 
 static __inline int
 zfs_vop_fsync(vnode_t *vp, int flag, cred_t *cr)
 {
 	struct mount *mp;
 	int error;
 
 	ASSERT(flag == FSYNC);
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		goto drop;
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	error = VOP_FSYNC(vp, MNT_WAIT, curthread);
 	VOP_UNLOCK(vp, 0);
 	vn_finished_write(mp);
 drop:
 	return (error);
 }
 #define	VOP_FSYNC(vp, flag, cr, ct)	zfs_vop_fsync((vp), (flag), (cr))
 
 static __inline int
 zfs_vop_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *cr)
 {
 	int error;
 
 	ASSERT(count == 1);
 	ASSERT(offset == 0);
 
 	error = vn_close(vp, flag, cr, curthread);
 	return (error);
 }
 #define	VOP_CLOSE(vp, oflags, count, offset, cr, ct)			\
 	zfs_vop_close((vp), (oflags), (count), (offset), (cr))
 
 static __inline int
 vn_rename(char *from, char *to, enum uio_seg seg)
 {
 
 	ASSERT(seg == UIO_SYSSPACE);
 
 	return (kern_renameat(curthread, AT_FDCWD, from, AT_FDCWD, to, seg));
 }
 
 static __inline int
 vn_remove(char *fnamep, enum uio_seg seg, enum rm dirflag)
 {
 
 	ASSERT(seg == UIO_SYSSPACE);
 	ASSERT(dirflag == RMFILE);
 
-	return (kern_unlinkat(curthread, AT_FDCWD, fnamep, seg, 0, 0));
+	return (kern_funlinkat(curthread, AT_FDCWD, fnamep, FD_NONE, seg, 0,
+	    0));
 }
 
 #endif	/* _KERNEL */
 
 #endif	/* _OPENSOLARIS_SYS_VNODE_H_ */
Index: head/sys/compat/cloudabi/cloudabi_file.c
===================================================================
--- head/sys/compat/cloudabi/cloudabi_file.c	(revision 345981)
+++ head/sys/compat/cloudabi/cloudabi_file.c	(revision 345982)
@@ -1,760 +1,762 @@
 /*-
  * Copyright (c) 2015 Nuxi, https://nuxi.nl/
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/capsicum.h>
 #include <sys/dirent.h>
 #include <sys/fcntl.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/stat.h>
 #include <sys/syscallsubr.h>
 #include <sys/uio.h>
 #include <sys/vnode.h>
 
 #include <contrib/cloudabi/cloudabi_types_common.h>
 
 #include <compat/cloudabi/cloudabi_proto.h>
 #include <compat/cloudabi/cloudabi_util.h>
 
 #include <security/mac/mac_framework.h>
 
 static MALLOC_DEFINE(M_CLOUDABI_PATH, "cloudabipath", "CloudABI pathnames");
 
 /*
  * Copying pathnames from userspace to kernelspace.
  *
  * Unlike most operating systems, CloudABI doesn't use null-terminated
  * pathname strings. Processes always pass pathnames to the kernel by
  * providing a base pointer and a length. This has a couple of reasons:
  *
  * - It makes it easier to use CloudABI in combination with programming
  *   languages other than C, that may use non-null terminated strings.
  * - It allows for calling system calls on individual components of the
  *   pathname without modifying the input string.
  *
  * The function below copies in pathname strings and null-terminates it.
  * It also ensure that the string itself does not contain any null
  * bytes.
  *
  * TODO(ed): Add an abstraction to vfs_lookup.c that allows us to pass
  *           in unterminated pathname strings, so we can do away with
  *           the copying.
  */
 
 static int
 copyin_path(const char *uaddr, size_t len, char **result)
 {
 	char *buf;
 	int error;
 
 	if (len >= PATH_MAX)
 		return (ENAMETOOLONG);
 	buf = malloc(len + 1, M_CLOUDABI_PATH, M_WAITOK);
 	error = copyin(uaddr, buf, len);
 	if (error != 0) {
 		free(buf, M_CLOUDABI_PATH);
 		return (error);
 	}
 	if (memchr(buf, '\0', len) != NULL) {
 		free(buf, M_CLOUDABI_PATH);
 		return (EINVAL);
 	}
 	buf[len] = '\0';
 	*result = buf;
 	return (0);
 }
 
 static void
 cloudabi_freestr(char *buf)
 {
 
 	free(buf, M_CLOUDABI_PATH);
 }
 
 int
 cloudabi_sys_file_advise(struct thread *td,
     struct cloudabi_sys_file_advise_args *uap)
 {
 	int advice;
 
 	switch (uap->advice) {
 	case CLOUDABI_ADVICE_DONTNEED:
 		advice = POSIX_FADV_DONTNEED;
 		break;
 	case CLOUDABI_ADVICE_NOREUSE:
 		advice = POSIX_FADV_NOREUSE;
 		break;
 	case CLOUDABI_ADVICE_NORMAL:
 		advice = POSIX_FADV_NORMAL;
 		break;
 	case CLOUDABI_ADVICE_RANDOM:
 		advice = POSIX_FADV_RANDOM;
 		break;
 	case CLOUDABI_ADVICE_SEQUENTIAL:
 		advice = POSIX_FADV_SEQUENTIAL;
 		break;
 	case CLOUDABI_ADVICE_WILLNEED:
 		advice = POSIX_FADV_WILLNEED;
 		break;
 	default:
 		return (EINVAL);
 	}
 
 	return (kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, advice));
 }
 
 int
 cloudabi_sys_file_allocate(struct thread *td,
     struct cloudabi_sys_file_allocate_args *uap)
 {
 
 	return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len));
 }
 
 int
 cloudabi_sys_file_create(struct thread *td,
     struct cloudabi_sys_file_create_args *uap)
 {
 	char *path;
 	int error;
 
 	error = copyin_path(uap->path, uap->path_len, &path);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * CloudABI processes cannot interact with UNIX credentials and
 	 * permissions. Depend on the umask that is set prior to
 	 * execution to restrict the file permissions.
 	 */
 	switch (uap->type) {
 	case CLOUDABI_FILETYPE_DIRECTORY:
 		error = kern_mkdirat(td, uap->fd, path, UIO_SYSSPACE, 0777);
 		break;
 	default:
 		error = EINVAL;
 		break;
 	}
 	cloudabi_freestr(path);
 	return (error);
 }
 
 int
 cloudabi_sys_file_link(struct thread *td,
     struct cloudabi_sys_file_link_args *uap)
 {
 	char *path1, *path2;
 	int error;
 
 	error = copyin_path(uap->path1, uap->path1_len, &path1);
 	if (error != 0)
 		return (error);
 	error = copyin_path(uap->path2, uap->path2_len, &path2);
 	if (error != 0) {
 		cloudabi_freestr(path1);
 		return (error);
 	}
 
 	error = kern_linkat(td, uap->fd1.fd, uap->fd2, path1, path2,
 	    UIO_SYSSPACE, (uap->fd1.flags & CLOUDABI_LOOKUP_SYMLINK_FOLLOW) ?
 	    FOLLOW : NOFOLLOW);
 	cloudabi_freestr(path1);
 	cloudabi_freestr(path2);
 	return (error);
 }
 
 int
 cloudabi_sys_file_open(struct thread *td,
     struct cloudabi_sys_file_open_args *uap)
 {
 	cloudabi_fdstat_t fds;
 	cap_rights_t rights;
 	struct filecaps fcaps = {};
 	struct nameidata nd;
 	struct file *fp;
 	struct vnode *vp;
 	char *path;
 	int error, fd, fflags;
 	bool read, write;
 
 	error = copyin(uap->fds, &fds, sizeof(fds));
 	if (error != 0)
 		return (error);
 
 	/* All the requested rights should be set on the descriptor. */
 	error = cloudabi_convert_rights(
 	    fds.fs_rights_base | fds.fs_rights_inheriting, &rights);
 	if (error != 0)
 		return (error);
 	cap_rights_set(&rights, CAP_LOOKUP);
 
 	/* Convert rights to corresponding access mode. */
 	read = (fds.fs_rights_base & (CLOUDABI_RIGHT_FD_READ |
 	    CLOUDABI_RIGHT_FILE_READDIR | CLOUDABI_RIGHT_MEM_MAP_EXEC)) != 0;
 	write = (fds.fs_rights_base & (CLOUDABI_RIGHT_FD_DATASYNC |
 	    CLOUDABI_RIGHT_FD_WRITE | CLOUDABI_RIGHT_FILE_ALLOCATE |
 	    CLOUDABI_RIGHT_FILE_STAT_FPUT_SIZE)) != 0;
 	fflags = write ? read ? FREAD | FWRITE : FWRITE : FREAD;
 
 	/* Convert open flags. */
 	if ((uap->oflags & CLOUDABI_O_CREAT) != 0) {
 		fflags |= O_CREAT;
 		cap_rights_set(&rights, CAP_CREATE);
 	}
 	if ((uap->oflags & CLOUDABI_O_DIRECTORY) != 0)
 		fflags |= O_DIRECTORY;
 	if ((uap->oflags & CLOUDABI_O_EXCL) != 0)
 		fflags |= O_EXCL;
 	if ((uap->oflags & CLOUDABI_O_TRUNC) != 0) {
 		fflags |= O_TRUNC;
 		cap_rights_set(&rights, CAP_FTRUNCATE);
 	}
 	if ((fds.fs_flags & CLOUDABI_FDFLAG_APPEND) != 0)
 		fflags |= O_APPEND;
 	if ((fds.fs_flags & CLOUDABI_FDFLAG_NONBLOCK) != 0)
 		fflags |= O_NONBLOCK;
 	if ((fds.fs_flags & (CLOUDABI_FDFLAG_SYNC | CLOUDABI_FDFLAG_DSYNC |
 	    CLOUDABI_FDFLAG_RSYNC)) != 0) {
 		fflags |= O_SYNC;
 		cap_rights_set(&rights, CAP_FSYNC);
 	}
 	if ((uap->dirfd.flags & CLOUDABI_LOOKUP_SYMLINK_FOLLOW) == 0)
 		fflags |= O_NOFOLLOW;
 	if (write && (fflags & (O_APPEND | O_TRUNC)) == 0)
 		cap_rights_set(&rights, CAP_SEEK);
 
 	/* Allocate new file descriptor. */
 	error = falloc_noinstall(td, &fp);
 	if (error != 0)
 		return (error);
 	fp->f_flag = fflags & FMASK;
 
 	/* Open path. */
 	error = copyin_path(uap->path, uap->path_len, &path);
 	if (error != 0) {
 		fdrop(fp, td);
 		return (error);
 	}
 	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, uap->dirfd.fd,
 	    &rights, td);
 	error = vn_open(&nd, &fflags, 0777 & ~td->td_proc->p_fd->fd_cmask, fp);
 	cloudabi_freestr(path);
 	if (error != 0) {
 		/* Custom operations provided. */
 		if (error == ENXIO && fp->f_ops != &badfileops)
 			goto success;
 
 		/*
 		 * POSIX compliance: return ELOOP in case openat() is
 		 * called on a symbolic link and O_NOFOLLOW is set.
 		 */
 		if (error == EMLINK)
 			error = ELOOP;
 		fdrop(fp, td);
 		return (error);
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	filecaps_free(&nd.ni_filecaps);
 	fp->f_vnode = vp = nd.ni_vp;
 
 	/* Install vnode operations if no custom operations are provided. */
 	if (fp->f_ops == &badfileops) {
 		fp->f_seqcount = 1;
 		finit(fp, (fflags & FMASK) | (fp->f_flag & FHASLOCK),
 		    DTYPE_VNODE, vp, &vnops);
 	}
 	VOP_UNLOCK(vp, 0);
 
 	/* Truncate file. */
 	if (fflags & O_TRUNC) {
 		error = fo_truncate(fp, 0, td->td_ucred, td);
 		if (error != 0) {
 			fdrop(fp, td);
 			return (error);
 		}
 	}
 
 success:
 	/* Determine which Capsicum rights to set on the file descriptor. */
 	cloudabi_remove_conflicting_rights(cloudabi_convert_filetype(fp),
 	    &fds.fs_rights_base, &fds.fs_rights_inheriting);
 	cloudabi_convert_rights(fds.fs_rights_base | fds.fs_rights_inheriting,
 	    &fcaps.fc_rights);
 	if (cap_rights_is_set(&fcaps.fc_rights))
 		fcaps.fc_fcntls = CAP_FCNTL_SETFL;
 
 	error = finstall(td, fp, &fd, fflags, &fcaps);
 	fdrop(fp, td);
 	if (error != 0)
 		return (error);
 	td->td_retval[0] = fd;
 	return (0);
 }
 
 /* Converts a FreeBSD directory entry structure and writes it to userspace. */
 static int
 write_dirent(struct dirent *bde, cloudabi_dircookie_t cookie, struct uio *uio)
 {
 	cloudabi_dirent_t cde = {
 		.d_next = cookie,
 		.d_ino = bde->d_fileno,
 		.d_namlen = bde->d_namlen,
 	};
 	size_t len;
 	int error;
 
 	/* Convert file type. */
 	switch (bde->d_type) {
 	case DT_BLK:
 		cde.d_type = CLOUDABI_FILETYPE_BLOCK_DEVICE;
 		break;
 	case DT_CHR:
 		cde.d_type = CLOUDABI_FILETYPE_CHARACTER_DEVICE;
 		break;
 	case DT_DIR:
 		cde.d_type = CLOUDABI_FILETYPE_DIRECTORY;
 		break;
 	case DT_FIFO:
 		cde.d_type = CLOUDABI_FILETYPE_SOCKET_STREAM;
 		break;
 	case DT_LNK:
 		cde.d_type = CLOUDABI_FILETYPE_SYMBOLIC_LINK;
 		break;
 	case DT_REG:
 		cde.d_type = CLOUDABI_FILETYPE_REGULAR_FILE;
 		break;
 	case DT_SOCK:
 		/* The exact socket type cannot be derived. */
 		cde.d_type = CLOUDABI_FILETYPE_SOCKET_STREAM;
 		break;
 	default:
 		cde.d_type = CLOUDABI_FILETYPE_UNKNOWN;
 		break;
 	}
 
 	/* Write directory entry structure. */
 	len = sizeof(cde) < uio->uio_resid ? sizeof(cde) : uio->uio_resid;
 	error = uiomove(&cde, len, uio);
 	if (error != 0)
 		return (error);
 
 	/* Write filename. */
 	len = bde->d_namlen < uio->uio_resid ? bde->d_namlen : uio->uio_resid;
 	return (uiomove(bde->d_name, len, uio));
 }
 
 int
 cloudabi_sys_file_readdir(struct thread *td,
     struct cloudabi_sys_file_readdir_args *uap)
 {
 	struct iovec iov = {
 		.iov_base = uap->buf,
 		.iov_len = uap->buf_len
 	};
 	struct uio uio = {
 		.uio_iov = &iov,
 		.uio_iovcnt = 1,
 		.uio_resid = iov.iov_len,
 		.uio_segflg = UIO_USERSPACE,
 		.uio_rw = UIO_READ,
 		.uio_td = td
 	};
 	struct file *fp;
 	struct vnode *vp;
 	void *readbuf;
 	cloudabi_dircookie_t offset;
 	int error;
 
 	/* Obtain directory vnode. */
 	error = getvnode(td, uap->fd, &cap_read_rights, &fp);
 	if (error != 0) {
 		if (error == EINVAL)
 			return (ENOTDIR);
 		return (error);
 	}
 	if ((fp->f_flag & FREAD) == 0) {
 		fdrop(fp, td);
 		return (EBADF);
 	}
 
 	/*
 	 * Call VOP_READDIR() and convert resulting data until the user
 	 * provided buffer is filled.
 	 */
 	readbuf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
 	offset = uap->cookie;
 	vp = fp->f_vnode;
 	while (uio.uio_resid > 0) {
 		struct iovec readiov = {
 			.iov_base = readbuf,
 			.iov_len = MAXBSIZE
 		};
 		struct uio readuio = {
 			.uio_iov = &readiov,
 			.uio_iovcnt = 1,
 			.uio_rw = UIO_READ,
 			.uio_segflg = UIO_SYSSPACE,
 			.uio_td = td,
 			.uio_resid = MAXBSIZE,
 			.uio_offset = offset
 		};
 		struct dirent *bde;
 		unsigned long *cookies, *cookie;
 		size_t readbuflen;
 		int eof, ncookies;
 
 		/* Validate file type. */
 		vn_lock(vp, LK_SHARED | LK_RETRY);
 		if (vp->v_type != VDIR) {
 			VOP_UNLOCK(vp, 0);
 			error = ENOTDIR;
 			goto done;
 		}
 #ifdef MAC
 		error = mac_vnode_check_readdir(td->td_ucred, vp);
 		if (error != 0) {
 			VOP_UNLOCK(vp, 0);
 			goto done;
 		}
 #endif /* MAC */
 
 		/* Read new directory entries. */
 		cookies = NULL;
 		ncookies = 0;
 		error = VOP_READDIR(vp, &readuio, fp->f_cred, &eof,
 		    &ncookies, &cookies);
 		VOP_UNLOCK(vp, 0);
 		if (error != 0)
 			goto done;
 
 		/* Convert entries to CloudABI's format. */
 		readbuflen = MAXBSIZE - readuio.uio_resid;
 		bde = readbuf;
 		cookie = cookies;
 		while (readbuflen >= offsetof(struct dirent, d_name) &&
 		    uio.uio_resid > 0 && ncookies > 0) {
 			/* Ensure that the returned offset always increases. */
 			if (readbuflen >= bde->d_reclen && bde->d_fileno != 0 &&
 			    *cookie > offset) {
 				error = write_dirent(bde, *cookie, &uio);
 				if (error != 0) {
 					free(cookies, M_TEMP);
 					goto done;
 				}
 			}
 
 			if (offset < *cookie)
 				offset = *cookie;
 			++cookie;
 			--ncookies;
 			readbuflen -= bde->d_reclen;
 			bde = (struct dirent *)((char *)bde + bde->d_reclen);
 		}
 		free(cookies, M_TEMP);
 		if (eof)
 			break;
 	}
 
 done:
 	fdrop(fp, td);
 	free(readbuf, M_TEMP);
 	if (error != 0)
 		return (error);
 
 	/* Return number of bytes copied to userspace. */
 	td->td_retval[0] = uap->buf_len - uio.uio_resid;
 	return (0);
 }
 
 int
 cloudabi_sys_file_readlink(struct thread *td,
     struct cloudabi_sys_file_readlink_args *uap)
 {
 	char *path;
 	int error;
 
 	error = copyin_path(uap->path, uap->path_len, &path);
 	if (error != 0)
 		return (error);
 
 	error = kern_readlinkat(td, uap->fd, path, UIO_SYSSPACE,
 	    uap->buf, UIO_USERSPACE, uap->buf_len);
 	cloudabi_freestr(path);
 	return (error);
 }
 
 int
 cloudabi_sys_file_rename(struct thread *td,
     struct cloudabi_sys_file_rename_args *uap)
 {
 	char *old, *new;
 	int error;
 
 	error = copyin_path(uap->path1, uap->path1_len, &old);
 	if (error != 0)
 		return (error);
 	error = copyin_path(uap->path2, uap->path2_len, &new);
 	if (error != 0) {
 		cloudabi_freestr(old);
 		return (error);
 	}
 
 	error = kern_renameat(td, uap->fd1, old, uap->fd2, new,
 	    UIO_SYSSPACE);
 	cloudabi_freestr(old);
 	cloudabi_freestr(new);
 	return (error);
 }
 
 /* Converts a FreeBSD stat structure to a CloudABI stat structure. */
 static void
 convert_stat(const struct stat *sb, cloudabi_filestat_t *csb)
 {
 	cloudabi_filestat_t res = {
 		.st_dev		= sb->st_dev,
 		.st_ino		= sb->st_ino,
 		.st_nlink	= sb->st_nlink,
 		.st_size	= sb->st_size,
 	};
 
 	cloudabi_convert_timespec(&sb->st_atim, &res.st_atim);
 	cloudabi_convert_timespec(&sb->st_mtim, &res.st_mtim);
 	cloudabi_convert_timespec(&sb->st_ctim, &res.st_ctim);
 	*csb = res;
 }
 
 int
 cloudabi_sys_file_stat_fget(struct thread *td,
     struct cloudabi_sys_file_stat_fget_args *uap)
 {
 	struct stat sb;
 	cloudabi_filestat_t csb;
 	struct file *fp;
 	cloudabi_filetype_t filetype;
 	int error;
 
 	memset(&csb, 0, sizeof(csb));
 
 	/* Fetch file descriptor attributes. */
 	error = fget(td, uap->fd, &cap_fstat_rights, &fp);
 	if (error != 0)
 		return (error);
 	error = fo_stat(fp, &sb, td->td_ucred, td);
 	if (error != 0) {
 		fdrop(fp, td);
 		return (error);
 	}
 	filetype = cloudabi_convert_filetype(fp);
 	fdrop(fp, td);
 
 	/* Convert attributes to CloudABI's format. */
 	convert_stat(&sb, &csb);
 	csb.st_filetype = filetype;
 	return (copyout(&csb, uap->buf, sizeof(csb)));
 }
 
 /* Converts timestamps to arguments to futimens() and utimensat(). */
 static void
 convert_utimens_arguments(const cloudabi_filestat_t *fs,
     cloudabi_fsflags_t flags, struct timespec *ts)
 {
 
 	if ((flags & CLOUDABI_FILESTAT_ATIM_NOW) != 0) {
 		ts[0].tv_nsec = UTIME_NOW;
 	} else if ((flags & CLOUDABI_FILESTAT_ATIM) != 0) {
 		ts[0].tv_sec = fs->st_atim / 1000000000;
 		ts[0].tv_nsec = fs->st_atim % 1000000000;
 	} else {
 		ts[0].tv_nsec = UTIME_OMIT;
 	}
 
 	if ((flags & CLOUDABI_FILESTAT_MTIM_NOW) != 0) {
 		ts[1].tv_nsec = UTIME_NOW;
 	} else if ((flags & CLOUDABI_FILESTAT_MTIM) != 0) {
 		ts[1].tv_sec = fs->st_mtim / 1000000000;
 		ts[1].tv_nsec = fs->st_mtim % 1000000000;
 	} else {
 		ts[1].tv_nsec = UTIME_OMIT;
 	}
 }
 
 int
 cloudabi_sys_file_stat_fput(struct thread *td,
     struct cloudabi_sys_file_stat_fput_args *uap)
 {
 	cloudabi_filestat_t fs;
 	struct timespec ts[2];
 	int error;
 
 	error = copyin(uap->buf, &fs, sizeof(fs));
 	if (error != 0)
 		return (error);
 
 	/*
 	 * Only support truncation and timestamp modification separately
 	 * for now, to prevent unnecessary code duplication.
 	 */
 	if ((uap->flags & CLOUDABI_FILESTAT_SIZE) != 0) {
 		/* Call into kern_ftruncate() for file truncation. */
 		if ((uap->flags & ~CLOUDABI_FILESTAT_SIZE) != 0)
 			return (EINVAL);
 		return (kern_ftruncate(td, uap->fd, fs.st_size));
 	} else if ((uap->flags & (CLOUDABI_FILESTAT_ATIM |
 	    CLOUDABI_FILESTAT_ATIM_NOW | CLOUDABI_FILESTAT_MTIM |
 	    CLOUDABI_FILESTAT_MTIM_NOW)) != 0) {
 		/* Call into kern_futimens() for timestamp modification. */
 		if ((uap->flags & ~(CLOUDABI_FILESTAT_ATIM |
 		    CLOUDABI_FILESTAT_ATIM_NOW | CLOUDABI_FILESTAT_MTIM |
 		    CLOUDABI_FILESTAT_MTIM_NOW)) != 0)
 			return (EINVAL);
 		convert_utimens_arguments(&fs, uap->flags, ts);
 		return (kern_futimens(td, uap->fd, ts, UIO_SYSSPACE));
 	}
 	return (EINVAL);
 }
 
 int
 cloudabi_sys_file_stat_get(struct thread *td,
     struct cloudabi_sys_file_stat_get_args *uap)
 {
 	struct stat sb;
 	cloudabi_filestat_t csb;
 	char *path;
 	int error;
 
 	memset(&csb, 0, sizeof(csb));
 
 	error = copyin_path(uap->path, uap->path_len, &path);
 	if (error != 0)
 		return (error);
 
 	error = kern_statat(td,
 	    (uap->fd.flags & CLOUDABI_LOOKUP_SYMLINK_FOLLOW) != 0 ? 0 :
 	    AT_SYMLINK_NOFOLLOW, uap->fd.fd, path, UIO_SYSSPACE, &sb, NULL);
 	cloudabi_freestr(path);
 	if (error != 0)
 		return (error);
 
 	/* Convert results and return them. */
 	convert_stat(&sb, &csb);
 	if (S_ISBLK(sb.st_mode))
 		csb.st_filetype = CLOUDABI_FILETYPE_BLOCK_DEVICE;
 	else if (S_ISCHR(sb.st_mode))
 		csb.st_filetype = CLOUDABI_FILETYPE_CHARACTER_DEVICE;
 	else if (S_ISDIR(sb.st_mode))
 		csb.st_filetype = CLOUDABI_FILETYPE_DIRECTORY;
 	else if (S_ISFIFO(sb.st_mode))
 		csb.st_filetype = CLOUDABI_FILETYPE_SOCKET_STREAM;
 	else if (S_ISREG(sb.st_mode))
 		csb.st_filetype = CLOUDABI_FILETYPE_REGULAR_FILE;
 	else if (S_ISSOCK(sb.st_mode)) {
 		/* Inaccurate, but the best that we can do. */
 		csb.st_filetype = CLOUDABI_FILETYPE_SOCKET_STREAM;
 	} else if (S_ISLNK(sb.st_mode))
 		csb.st_filetype = CLOUDABI_FILETYPE_SYMBOLIC_LINK;
 	else
 		csb.st_filetype = CLOUDABI_FILETYPE_UNKNOWN;
 	return (copyout(&csb, uap->buf, sizeof(csb)));
 }
 
 int
 cloudabi_sys_file_stat_put(struct thread *td,
     struct cloudabi_sys_file_stat_put_args *uap)
 {
 	cloudabi_filestat_t fs;
 	struct timespec ts[2];
 	char *path;
 	int error;
 
 	/*
 	 * Only support timestamp modification for now, as there is no
 	 * truncateat().
 	 */
 	if ((uap->flags & ~(CLOUDABI_FILESTAT_ATIM |
 	    CLOUDABI_FILESTAT_ATIM_NOW | CLOUDABI_FILESTAT_MTIM |
 	    CLOUDABI_FILESTAT_MTIM_NOW)) != 0)
 		return (EINVAL);
 
 	error = copyin(uap->buf, &fs, sizeof(fs));
 	if (error != 0)
 		return (error);
 	error = copyin_path(uap->path, uap->path_len, &path);
 	if (error != 0)
 		return (error);
 
 	convert_utimens_arguments(&fs, uap->flags, ts);
 	error = kern_utimensat(td, uap->fd.fd, path, UIO_SYSSPACE, ts,
 	    UIO_SYSSPACE, (uap->fd.flags & CLOUDABI_LOOKUP_SYMLINK_FOLLOW) ?
 	    0 : AT_SYMLINK_NOFOLLOW);
 	cloudabi_freestr(path);
 	return (error);
 }
 
 int
 cloudabi_sys_file_symlink(struct thread *td,
     struct cloudabi_sys_file_symlink_args *uap)
 {
 	char *path1, *path2;
 	int error;
 
 	error = copyin_path(uap->path1, uap->path1_len, &path1);
 	if (error != 0)
 		return (error);
 	error = copyin_path(uap->path2, uap->path2_len, &path2);
 	if (error != 0) {
 		cloudabi_freestr(path1);
 		return (error);
 	}
 
 	error = kern_symlinkat(td, path1, uap->fd, path2, UIO_SYSSPACE);
 	cloudabi_freestr(path1);
 	cloudabi_freestr(path2);
 	return (error);
 }
 
 int
 cloudabi_sys_file_unlink(struct thread *td,
     struct cloudabi_sys_file_unlink_args *uap)
 {
 	char *path;
 	int error;
 
 	error = copyin_path(uap->path, uap->path_len, &path);
 	if (error != 0)
 		return (error);
 
 	if (uap->flags & CLOUDABI_UNLINK_REMOVEDIR)
-		error = kern_rmdirat(td, uap->fd, path, UIO_SYSSPACE, 0);
+		error = kern_frmdirat(td, uap->fd, path, FD_NONE,
+		    UIO_SYSSPACE, 0);
 	else
-		error = kern_unlinkat(td, uap->fd, path, UIO_SYSSPACE, 0, 0);
+		error = kern_funlinkat(td, uap->fd, path, FD_NONE,
+		    UIO_SYSSPACE, 0, 0);
 	cloudabi_freestr(path);
 	return (error);
 }
Index: head/sys/compat/freebsd32/syscalls.master
===================================================================
--- head/sys/compat/freebsd32/syscalls.master	(revision 345981)
+++ head/sys/compat/freebsd32/syscalls.master	(revision 345982)
@@ -1,1149 +1,1151 @@
  $FreeBSD$
 ;	from: @(#)syscalls.master	8.2 (Berkeley) 1/13/94
 ;	from: src/sys/kern/syscalls.master 1.107
 ;
 ; System call name/number master file.
 ; Processed to created init_sysent.c, syscalls.c and syscall.h.
 
 ; Columns: number audit type name alt{name,tag,rtyp}/comments
 ;	number	system call number, must be in order
 ;	audit	the audit event associated with the system call
 ;		A value of AUE_NULL means no auditing, but it also means that
 ;		there is no audit event for the call at this time. For the
 ;		case where the event exists, but we don't want auditing, the
 ;		event should be #defined to AUE_NULL in audit_kevents.h.
 ;	type	one of STD, OBSOL, UNIMPL, COMPAT, COMPAT4, COMPAT6,
 ;		COMPAT7, COMPAT11, NODEF, NOARGS, NOPROTO, NOSTD
 ;		The COMPAT* options may be combined with one or more NO*
 ;		options separated by '|' with no spaces (e.g. COMPAT|NOARGS)
 ;	name	pseudo-prototype of syscall routine
 ;		If one of the following alts is different, then all appear:
 ;	altname	name of system call if different
 ;	alttag	name of args struct tag if different from [o]`name'"_args"
 ;	altrtyp	return type if not int (bogus - syscalls always return int)
 ;		for UNIMPL/OBSOL, name continues with comments
 
 ; types:
 ;	STD	always included
 ;	COMPAT	included on COMPAT #ifdef
 ;	COMPAT4	included on COMPAT_FREEBSD4 #ifdef (FreeBSD 4 compat)
 ;	COMPAT6	included on COMPAT_FREEBSD6 #ifdef (FreeBSD 6 compat)
 ;	COMPAT7	included on COMPAT_FREEBSD7 #ifdef (FreeBSD 7 compat)
 ;	COMPAT10 included on COMPAT_FREEBSD10 #ifdef (FreeBSD 10 compat)
 ;	COMPAT11 included on COMPAT_FREEBSD11 #ifdef (FreeBSD 11 compat)
 ;	OBSOL	obsolete, not included in system, only specifies name
 ;	UNIMPL	not implemented, placeholder only
 ;	NOSTD	implemented but as a lkm that can be statically
 ;		compiled in; sysent entry will be filled with lkmressys
 ;		so the SYSCALL_MODULE macro works
 ;	NOARGS	same as STD except do not create structure in sys/sysproto.h
 ;	NODEF	same as STD except only have the entry in the syscall table
 ;		added.  Meaning - do not create structure or function
 ;		prototype in sys/sysproto.h
 ;	NOPROTO	same as STD except do not create structure or
 ;		function prototype in sys/sysproto.h.  Does add a
 ;		definition to syscall.h besides adding a sysent.
 
 ; #ifdef's, etc. may be included, and are copied to the output files.
 
 #include <sys/param.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/mount.h>
 #include <sys/socket.h>
 #include <compat/freebsd32/freebsd32.h>
 #include <compat/freebsd32/freebsd32_proto.h>
 
 #if !defined(PAD64_REQUIRED) && !defined(__amd64__)
 #define PAD64_REQUIRED
 #endif
 
 ; Reserved/unimplemented system calls in the range 0-150 inclusive
 ; are reserved for use in future Berkeley releases.
 ; Additional system calls implemented in vendor and other
 ; redistributions should be placed in the reserved range at the end
 ; of the current calls.
 
 0	AUE_NULL	NOPROTO	{ int nosys(void); } syscall nosys_args int
 1	AUE_EXIT	NOPROTO	{ void sys_exit(int rval); } exit \
 				    sys_exit_args void
 2	AUE_FORK	NOPROTO	{ int fork(void); }
 3	AUE_READ	NOPROTO	{ ssize_t read(int fd, void *buf, \
 				    size_t nbyte); }
 4	AUE_WRITE	NOPROTO	{ ssize_t write(int fd, const void *buf, \
 				    size_t nbyte); }
 5	AUE_OPEN_RWTC	NOPROTO	{ int open(const char *path, int flags, \
 				    mode_t mode); }
 6	AUE_CLOSE	NOPROTO	{ int close(int fd); }
 7	AUE_WAIT4	STD	{ int freebsd32_wait4(int pid, int *status, \
 				    int options, struct rusage32 *rusage); }
 8	AUE_CREAT	OBSOL	old creat
 9	AUE_LINK	NOPROTO	{ int link(const char *path, \
 				    const char *link); }
 10	AUE_UNLINK	NOPROTO	{ int unlink(const char *path); }
 11	AUE_NULL	OBSOL	execv
 12	AUE_CHDIR	NOPROTO	{ int chdir(const char *path); }
 13	AUE_FCHDIR	NOPROTO	{ int fchdir(int fd); }
 14	AUE_MKNOD	COMPAT11|NOPROTO { int mknod(const char *path, \
 					int mode, uint32_t dev); }
 15	AUE_CHMOD	NOPROTO	{ int chmod(const char *path, mode_t mode); }
 16	AUE_CHOWN	NOPROTO	{ int chown(const char *path, int uid, int gid); }
 17	AUE_NULL	NOPROTO	{ void *break(char *nsize); }
 18	AUE_GETFSSTAT	COMPAT4	{ int freebsd32_getfsstat( \
 				    struct statfs32 *buf, long bufsize, \
 				    int mode); }
 19	AUE_LSEEK	COMPAT	{ int freebsd32_lseek(int fd, int offset, \
 				    int whence); }
 20	AUE_GETPID	NOPROTO	{ pid_t getpid(void); }
 21	AUE_MOUNT	NOPROTO	{ int mount(const char *type, \
 				    const char *path, \
 				    int flags, void *data); }
 22	AUE_UMOUNT	NOPROTO	{ int unmount(const char *path, int flags); }
 23	AUE_SETUID	NOPROTO	{ int setuid(uid_t uid); }
 24	AUE_GETUID	NOPROTO	{ uid_t getuid(void); }
 25	AUE_GETEUID	NOPROTO	{ uid_t geteuid(void); }
 26	AUE_PTRACE	NOPROTO	{ int ptrace(int req, pid_t pid, \
 				    caddr_t addr, int data); }
 27	AUE_RECVMSG	STD	{ int freebsd32_recvmsg(int s, struct msghdr32 *msg, \
 				    int flags); }
 28	AUE_SENDMSG	STD	{ int freebsd32_sendmsg(int s, struct msghdr32 *msg, \
 				    int flags); }
 29	AUE_RECVFROM	STD	{ int freebsd32_recvfrom(int s, void *buf, \
 				    uint32_t len, int flags, \
 				    struct sockaddr *from, \
 				    uint32_t fromlenaddr); }
 30	AUE_ACCEPT	NOPROTO	{ int accept(int s, struct sockaddr *name, \
 				    int *anamelen); }
 31	AUE_GETPEERNAME	NOPROTO	{ int getpeername(int fdes, \
 				    struct sockaddr *asa, \
 				    int *alen); }
 32	AUE_GETSOCKNAME	NOPROTO	{ int getsockname(int fdes, \
 				    struct sockaddr *asa, \
 				    int *alen); }
 33	AUE_ACCESS	NOPROTO	{ int access(const char *path, int amode); }
 34	AUE_CHFLAGS	NOPROTO	{ int chflags(const char *path, u_long flags); }
 35	AUE_FCHFLAGS	NOPROTO	{ int fchflags(int fd, u_long flags); }
 36	AUE_SYNC	NOPROTO	{ int sync(void); }
 37	AUE_KILL	NOPROTO	{ int kill(int pid, int signum); }
 38	AUE_STAT	COMPAT	{ int freebsd32_stat(const char *path, \
 				    struct ostat32 *ub); }
 39	AUE_GETPPID	NOPROTO	{ pid_t getppid(void); }
 40	AUE_LSTAT	COMPAT	{ int freebsd32_lstat(const char *path, \
 				    struct ostat *ub); }
 41	AUE_DUP		NOPROTO	{ int dup(u_int fd); }
 42	AUE_PIPE	COMPAT10	{ int freebsd32_pipe(void); }
 43	AUE_GETEGID	NOPROTO	{ gid_t getegid(void); }
 44	AUE_PROFILE	NOPROTO	{ int profil(char *samples, size_t size, \
 				    size_t offset, u_int scale); }
 45	AUE_KTRACE	NOPROTO	{ int ktrace(const char *fname, int ops, \
 				    int facs, int pid); }
 46	AUE_SIGACTION	COMPAT	{ int freebsd32_sigaction( int signum, \
 				   struct osigaction32 *nsa, \
 				   struct osigaction32 *osa); }
 47	AUE_GETGID	NOPROTO	{ gid_t getgid(void); }
 48	AUE_SIGPROCMASK	COMPAT	{ int freebsd32_sigprocmask(int how, \
 				   osigset_t mask); }
 49	AUE_GETLOGIN	NOPROTO	{ int getlogin(char *namebuf, \
 				    u_int namelen); }
 50	AUE_SETLOGIN	NOPROTO	{ int setlogin(const char *namebuf); }
 51	AUE_ACCT	NOPROTO	{ int acct(const char *path); }
 52	AUE_SIGPENDING	COMPAT	{ int freebsd32_sigpending(void); }
 53	AUE_SIGALTSTACK	STD	{ int freebsd32_sigaltstack( \
 				    struct sigaltstack32 *ss, \
 				    struct sigaltstack32 *oss); }
 54	AUE_IOCTL	STD	{ int freebsd32_ioctl(int fd, uint32_t com, \
 				    struct md_ioctl32 *data); }
 55	AUE_REBOOT	NOPROTO	{ int reboot(int opt); }
 56	AUE_REVOKE	NOPROTO	{ int revoke(const char *path); }
 57	AUE_SYMLINK	NOPROTO	{ int symlink(const char *path, \
 				    const char *link); }
 58	AUE_READLINK	NOPROTO	{ ssize_t readlink(const char *path, char *buf, \
 				    size_t count); }
 59	AUE_EXECVE	STD	{ int freebsd32_execve(const char *fname, \
 				    uint32_t *argv, uint32_t *envv); }
 60	AUE_UMASK	NOPROTO	{ int umask(mode_t newmask); }
 61	AUE_CHROOT	NOPROTO	{ int chroot(const char *path); }
 62	AUE_FSTAT	COMPAT	{ int freebsd32_fstat(int fd, \
 				    struct ostat32 *ub); }
 63	AUE_NULL	OBSOL	ogetkerninfo
 64	AUE_NULL	COMPAT	{ int freebsd32_getpagesize( \
 				    int32_t dummy); }
 65	AUE_MSYNC	NOPROTO	{ int msync(void *addr, size_t len, \
 				    int flags); }
 66	AUE_VFORK	NOPROTO	{ int vfork(void); }
 67	AUE_NULL	OBSOL	vread
 68	AUE_NULL	OBSOL	vwrite
 69	AUE_SBRK	NOPROTO	{ int sbrk(int incr); }
 70	AUE_SSTK	NOPROTO	{ int sstk(int incr); }
 71	AUE_MMAP	COMPAT|NOPROTO	{ void *mmap(void *addr, int len, \
 				    int prot, int flags, int fd, int pos); }
 72	AUE_O_VADVISE	COMPAT11|NOPROTO	{ int vadvise(int anom); }
 73	AUE_MUNMAP	NOPROTO	{ int munmap(void *addr, size_t len); }
 74	AUE_MPROTECT	STD	{ int freebsd32_mprotect(void *addr, \
 				    size_t len, int prot); }
 75	AUE_MADVISE	NOPROTO	{ int madvise(void *addr, size_t len, \
 				    int behav); }
 76	AUE_NULL	OBSOL	vhangup
 77	AUE_NULL	OBSOL	vlimit
 78	AUE_MINCORE	NOPROTO	{ int mincore(const void *addr, size_t len, \
 				    char *vec); }
 79	AUE_GETGROUPS	NOPROTO	{ int getgroups(u_int gidsetsize, \
 				    gid_t *gidset); }
 80	AUE_SETGROUPS	NOPROTO	{ int setgroups(u_int gidsetsize, \
 				    gid_t *gidset); }
 81	AUE_GETPGRP	NOPROTO	{ int getpgrp(void); }
 82	AUE_SETPGRP	NOPROTO	{ int setpgid(int pid, int pgid); }
 83	AUE_SETITIMER	STD	{ int freebsd32_setitimer(u_int which, \
 				    struct itimerval32 *itv, \
 				    struct itimerval32 *oitv); }
 84	AUE_NULL	OBSOL	owait
 ; XXX implement
 85	AUE_SWAPON	NOPROTO	{ int swapon(const char *name); }
 86	AUE_GETITIMER	STD	{ int freebsd32_getitimer(u_int which, \
 				    struct itimerval32 *itv); }
 87	AUE_O_GETHOSTNAME	OBSOL	ogethostname
 88	AUE_O_SETHOSTNAME	OBSOL	osethostname
 89	AUE_GETDTABLESIZE	NOPROTO	{ int getdtablesize(void); }
 90	AUE_DUP2	NOPROTO	{ int dup2(u_int from, u_int to); }
 91	AUE_NULL	UNIMPL	getdopt
 92	AUE_FCNTL	STD	{ int freebsd32_fcntl(int fd, int cmd, \
 				    int arg); }
 93	AUE_SELECT	STD	{ int freebsd32_select(int nd, fd_set *in, \
 				    fd_set *ou, fd_set *ex, \
 				    struct timeval32 *tv); }
 94	AUE_NULL	UNIMPL	setdopt
 95	AUE_FSYNC	NOPROTO	{ int fsync(int fd); }
 96	AUE_SETPRIORITY	NOPROTO	{ int setpriority(int which, int who, \
 				    int prio); }
 97	AUE_SOCKET	NOPROTO	{ int socket(int domain, int type, \
 				    int protocol); }
 98	AUE_CONNECT	NOPROTO	{ int connect(int s, \
 				    const struct sockaddr *name, \
 				    int namelen); }
 99	AUE_NULL	OBSOL	oaccept
 100	AUE_GETPRIORITY	NOPROTO	{ int getpriority(int which, int who); }
 101	AUE_NULL	OBSOL	osend
 102	AUE_NULL	OBSOL	orecv
 103	AUE_SIGRETURN	COMPAT	{ int freebsd32_sigreturn( \
 				    struct ia32_sigcontext3 *sigcntxp); }
 104	AUE_BIND	NOPROTO	{ int bind(int s, const struct sockaddr *name, \
 				    int namelen); }
 105	AUE_SETSOCKOPT	NOPROTO	{ int setsockopt(int s, int level, \
 				    int name, const void *val, int valsize); }
 106	AUE_LISTEN	NOPROTO	{ int listen(int s, int backlog); }
 107	AUE_NULL	OBSOL	vtimes
 108	AUE_O_SIGVEC	COMPAT	{ int freebsd32_sigvec(int signum, \
 				     struct sigvec32 *nsv, \
 				     struct sigvec32 *osv); }
 109	AUE_O_SIGBLOCK	COMPAT	{ int freebsd32_sigblock(int mask); }
 110	AUE_O_SIGSETMASK	COMPAT	{ int freebsd32_sigsetmask( int mask); }
 111	AUE_SIGSUSPEND	COMPAT	{ int freebsd32_sigsuspend( int mask); }
 112	AUE_O_SIGSTACK	COMPAT	{ int freebsd32_sigstack( \
 				     struct sigstack32 *nss, \
 				     struct sigstack32 *oss); }
 113	AUE_NULL	OBSOL	orecvmsg
 114	AUE_NULL	OBSOL	osendmsg
 115	AUE_NULL	OBSOL	vtrace
 116	AUE_GETTIMEOFDAY	STD	{ int freebsd32_gettimeofday( \
 				    struct timeval32 *tp, \
 				    struct timezone *tzp); }
 117	AUE_GETRUSAGE	STD	{ int freebsd32_getrusage(int who, \
 				    struct rusage32 *rusage); }
 118	AUE_GETSOCKOPT	NOPROTO	{ int getsockopt(int s, int level, \
 				    int name, void *val, int *avalsize); }
 119	AUE_NULL	UNIMPL	resuba (BSD/OS 2.x)
 120	AUE_READV	STD	{ int freebsd32_readv(int fd, \
 				    struct iovec32 *iovp, u_int iovcnt); }
 121	AUE_WRITEV	STD	{ int freebsd32_writev(int fd, \
 				    struct iovec32 *iovp, u_int iovcnt); }
 122	AUE_SETTIMEOFDAY	STD	{ int freebsd32_settimeofday( \
 				    struct timeval32 *tv, \
 				    struct timezone *tzp); }
 123	AUE_FCHOWN	NOPROTO	{ int fchown(int fd, int uid, int gid); }
 124	AUE_FCHMOD	NOPROTO	{ int fchmod(int fd, mode_t mode); }
 125	AUE_RECVFROM	OBSOL	orecvfrom
 126	AUE_SETREUID	NOPROTO	{ int setreuid(int ruid, int euid); }
 127	AUE_SETREGID	NOPROTO	{ int setregid(int rgid, int egid); }
 128	AUE_RENAME	NOPROTO	{ int rename(const char *from, \
 				    const char *to); }
 129	AUE_TRUNCATE	COMPAT|NOPROTO	{ int truncate(const char *path, \
 					    int length); }
 130	AUE_FTRUNCATE	COMPAT|NOPROTO	{ int ftruncate(int fd, int length); }
 131	AUE_FLOCK	NOPROTO	{ int flock(int fd, int how); }
 132	AUE_MKFIFO	NOPROTO	{ int mkfifo(const char *path, mode_t mode); }
 133	AUE_SENDTO	NOPROTO	{ int sendto(int s, const void *buf, \
 				    size_t len, int flags, \
 				    const struct sockaddr *to, \
 				    int tolen); }
 134	AUE_SHUTDOWN	NOPROTO	{ int shutdown(int s, int how); }
 135	AUE_SOCKETPAIR	NOPROTO	{ int socketpair(int domain, int type, \
 				    int protocol, int *rsv); }
 136	AUE_MKDIR	NOPROTO	{ int mkdir(const char *path, mode_t mode); }
 137	AUE_RMDIR	NOPROTO	{ int rmdir(const char *path); }
 138	AUE_UTIMES	STD	{ int freebsd32_utimes(const char *path, \
 				    struct timeval32 *tptr); }
 139	AUE_NULL	OBSOL	4.2 sigreturn
 140	AUE_ADJTIME	STD	{ int freebsd32_adjtime( \
 				    struct timeval32 *delta, \
 				    struct timeval32 *olddelta); }
 141	AUE_GETPEERNAME	OBSOL	ogetpeername
 142	AUE_SYSCTL	OBSOL	ogethostid
 143	AUE_SYSCTL	OBSOL	sethostid
 144	AUE_GETRLIMIT	OBSOL	getrlimit
 145	AUE_SETRLIMIT	OBSOL	setrlimit
 146	AUE_KILLPG	OBSOL	killpg
 147	AUE_SETSID	NOPROTO	{ int setsid(void); }
 148	AUE_QUOTACTL	NOPROTO	{ int quotactl(const char *path, int cmd, \
 				    int uid, void *arg); }
 149	AUE_O_QUOTA	OBSOL oquota
 150	AUE_GETSOCKNAME	OBSOL ogetsockname
 
 ; Syscalls 151-180 inclusive are reserved for vendor-specific
 ; system calls.  (This includes various calls added for compatibity
 ; with other Unix variants.)
 ; Some of these calls are now supported by BSD...
 151	AUE_NULL	UNIMPL	sem_lock (BSD/OS 2.x)
 152	AUE_NULL	UNIMPL	sem_wakeup (BSD/OS 2.x)
 153	AUE_NULL	UNIMPL	asyncdaemon (BSD/OS 2.x)
 ; 154 is initialised by the NLM code, if present.
 154	AUE_NULL	UNIMPL	nlm_syscall
 ; 155 is initialized by the NFS code, if present.
 ; XXX this is a problem!!!
 155	AUE_NFS_SVC	UNIMPL	nfssvc
 156	AUE_GETDIRENTRIES COMPAT { int freebsd32_getdirentries(int fd, \
 				    char *buf, u_int count, uint32_t *basep); }
 157	AUE_STATFS	COMPAT4	{ int freebsd32_statfs(const char *path, \
 				    struct statfs32 *buf); }
 158	AUE_FSTATFS	COMPAT4	{ int freebsd32_fstatfs(int fd, \
 				    struct statfs32 *buf); }
 159	AUE_NULL	UNIMPL	nosys
 160	AUE_LGETFH	UNIMPL	lgetfh
 161	AUE_NFS_GETFH	NOPROTO	{ int getfh(const char *fname, \
 				    struct fhandle *fhp); }
 162	AUE_SYSCTL	OBSOL	getdomainname
 163	AUE_SYSCTL	OBSOL	setdomainname
 164	AUE_NULL	OBSOL	uname
 165	AUE_SYSARCH	STD	{ int freebsd32_sysarch(int op, char *parms); }
 166	AUE_RTPRIO	NOPROTO	{ int rtprio(int function, pid_t pid, \
 				    struct rtprio *rtp); }
 167	AUE_NULL	UNIMPL	nosys
 168	AUE_NULL	UNIMPL	nosys
 169	AUE_SEMSYS	NOSTD	{ int freebsd32_semsys(int which, int a2, \
 				    int a3, int a4, int a5); }
 170	AUE_MSGSYS	NOSTD	{ int freebsd32_msgsys(int which, int a2, \
 				    int a3, int a4, int a5, int a6); }
 171	AUE_SHMSYS	NOSTD	{ int freebsd32_shmsys(uint32_t which, uint32_t a2, \
 				    uint32_t a3, uint32_t a4); }
 172	AUE_NULL	UNIMPL	nosys
 173	AUE_PREAD	COMPAT6	{ ssize_t freebsd32_pread(int fd, void *buf, \
 				    size_t nbyte, int pad, \
 				    uint32_t offset1, uint32_t offset2); }
 174	AUE_PWRITE	COMPAT6	{ ssize_t freebsd32_pwrite(int fd, \
 				    const void *buf, size_t nbyte, int pad, \
 				    uint32_t offset1, uint32_t offset2); }
 175	AUE_NULL	UNIMPL	nosys
 176	AUE_NTP_ADJTIME	NOPROTO	{ int ntp_adjtime(struct timex *tp); }
 177	AUE_NULL	UNIMPL	sfork (BSD/OS 2.x)
 178	AUE_NULL	UNIMPL	getdescriptor (BSD/OS 2.x)
 179	AUE_NULL	UNIMPL	setdescriptor (BSD/OS 2.x)
 180	AUE_NULL	UNIMPL	nosys
 
 ; Syscalls 181-199 are used by/reserved for BSD
 181	AUE_SETGID	NOPROTO	{ int setgid(gid_t gid); }
 182	AUE_SETEGID	NOPROTO	{ int setegid(gid_t egid); }
 183	AUE_SETEUID	NOPROTO	{ int seteuid(uid_t euid); }
 184	AUE_NULL	OBSOL	lfs_bmapv
 185	AUE_NULL	OBSOL	lfs_markv
 186	AUE_NULL	OBSOL	lfs_segclean
 187	AUE_NULL	OBSOL	lfs_segwait
 188	AUE_STAT	COMPAT11 { int freebsd32_stat(const char *path, \
 				    struct freebsd11_stat32 *ub); }
 189	AUE_FSTAT	COMPAT11 { int freebsd32_fstat(int fd, \
 				    struct freebsd11_stat32 *ub); }
 190	AUE_LSTAT	COMPAT11 { int freebsd32_lstat(const char *path, \
 				    struct freebsd11_stat32 *ub); }
 191	AUE_PATHCONF	NOPROTO	{ int pathconf(const char *path, int name); }
 192	AUE_FPATHCONF	NOPROTO	{ int fpathconf(int fd, int name); }
 193	AUE_NULL	UNIMPL	nosys
 194	AUE_GETRLIMIT	NOPROTO	{ int getrlimit(u_int which, \
 				    struct rlimit *rlp); } getrlimit \
 				    __getrlimit_args int
 195	AUE_SETRLIMIT	NOPROTO	{ int setrlimit(u_int which, \
 				    struct rlimit *rlp); } setrlimit \
 				    __setrlimit_args int
 196	AUE_GETDIRENTRIES COMPAT11 { int freebsd32_getdirentries(int fd, \
 				    char *buf, u_int count, int32_t *basep); }
 197	AUE_MMAP	COMPAT6	{ void *freebsd32_mmap(void *addr, \
 				    size_t len, int prot, int flags, int fd, \
 				    int pad, uint32_t pos1, uint32_t pos2); }
 198	AUE_NULL	NOPROTO	{ int nosys(void); } __syscall \
 				    __syscall_args int
 199	AUE_LSEEK	COMPAT6	{ off_t freebsd32_lseek(int fd, int pad, \
 				    uint32_t offset1, uint32_t offset2, \
 				    int whence); }
 200	AUE_TRUNCATE	COMPAT6	{ int freebsd32_truncate(const char *path, \
 				    int pad, uint32_t length1, \
 				    uint32_t length2); }
 201	AUE_FTRUNCATE	COMPAT6	{ int freebsd32_ftruncate(int fd, int pad, \
 				    uint32_t length1, uint32_t length2); }
 202	AUE_SYSCTL	STD	{ int freebsd32___sysctl(int *name, \
 				    u_int namelen, void *old, \
 				    uint32_t *oldlenp, const void *new, \
 				    uint32_t newlen); }
 203	AUE_MLOCK	NOPROTO	{ int mlock(const void *addr, \
 				    size_t len); }
 204	AUE_MUNLOCK	NOPROTO	{ int munlock(const void *addr, \
 				    size_t len); }
 205	AUE_UNDELETE	NOPROTO	{ int undelete(const char *path); }
 206	AUE_FUTIMES	STD	{ int freebsd32_futimes(int fd, \
 				    struct timeval32 *tptr); }
 207	AUE_GETPGID	NOPROTO	{ int getpgid(pid_t pid); }
 208	AUE_NULL	UNIMPL	nosys
 209	AUE_POLL	NOPROTO	{ int poll(struct pollfd *fds, u_int nfds, \
 				    int timeout); }
 
 ;
 ; The following are reserved for loadable syscalls
 ;
 210	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 211	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 212	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 213	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 214	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 215	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 216	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 217	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 218	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 219	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 
 220	AUE_SEMCTL	COMPAT7|NOSTD	{ int freebsd32_semctl( \
 				    int semid, int semnum, \
 				    int cmd, union semun32 *arg); }
 221	AUE_SEMGET	NOSTD|NOPROTO	{ int semget(key_t key, int nsems, \
 				    int semflg); }
 222	AUE_SEMOP	NOSTD|NOPROTO	{ int semop(int semid, \
 				    struct sembuf *sops, u_int nsops); }
 223	AUE_NULL	OBSOL	semconfig
 224	AUE_MSGCTL	COMPAT7|NOSTD	{ int freebsd32_msgctl( \
 				    int msqid, int cmd, \
 				    struct msqid_ds32_old *buf); }
 225	AUE_MSGGET	NOSTD|NOPROTO	{ int msgget(key_t key, int msgflg); }
 226	AUE_MSGSND	NOSTD	{ int freebsd32_msgsnd(int msqid, void *msgp, \
 				    size_t msgsz, int msgflg); }
 227	AUE_MSGRCV	NOSTD	{ int freebsd32_msgrcv(int msqid, void *msgp, \
 				    size_t msgsz, long msgtyp, int msgflg); }
 228	AUE_SHMAT	NOSTD|NOPROTO	{ void *shmat(int shmid, void *shmaddr, \
 				    int shmflg); }
 229	AUE_SHMCTL	COMPAT7|NOSTD	{ int freebsd32_shmctl( \
 				    int shmid, int cmd, \
 				    struct shmid_ds32_old *buf); }
 230	AUE_SHMDT	NOSTD|NOPROTO	{ int shmdt(void *shmaddr); }
 231	AUE_SHMGET	NOSTD|NOPROTO	{ int shmget(key_t key, int size, \
 				    int shmflg); }
 ;
 232	AUE_NULL	STD 	{ int freebsd32_clock_gettime(clockid_t clock_id, \
 				    struct timespec32 *tp); }
 233	AUE_CLOCK_SETTIME	STD	{ int freebsd32_clock_settime(clockid_t clock_id, \
 				    const struct timespec32 *tp); }
 234	AUE_NULL	STD	{ int freebsd32_clock_getres(clockid_t clock_id, \
 				    struct timespec32 *tp); }
 235	AUE_NULL	STD	{ int freebsd32_ktimer_create(\
 				    clockid_t clock_id, \
 				    struct sigevent32 *evp, int *timerid); }
 236	AUE_NULL	NOPROTO	{ int ktimer_delete(int timerid); }
 237	AUE_NULL	STD	{ int freebsd32_ktimer_settime(int timerid,\
 				    int flags, \
 				    const struct itimerspec32 *value, \
 				    struct itimerspec32 *ovalue); }
 238	AUE_NULL	STD	{ int freebsd32_ktimer_gettime(int timerid,\
 				    struct itimerspec32 *value); }
 239	AUE_NULL	NOPROTO	{ int ktimer_getoverrun(int timerid); }
 240	AUE_NULL	STD	{ int freebsd32_nanosleep( \
 				    const struct timespec32 *rqtp, \
 				    struct timespec32 *rmtp); }
 241	AUE_NULL	NOPROTO	{ int ffclock_getcounter(ffcounter *ffcount); }
 242	AUE_NULL	NOPROTO	{ int ffclock_setestimate( \
 				    struct ffclock_estimate *cest); }
 243	AUE_NULL	NOPROTO	{ int ffclock_getestimate( \
 				    struct ffclock_estimate *cest); }
 244	AUE_NULL	STD	{ int freebsd32_clock_nanosleep( \
 				    clockid_t clock_id, int flags, \
 				    const struct timespec32 *rqtp, \
 				    struct timespec32 *rmtp); }
 245	AUE_NULL	UNIMPL	nosys
 246	AUE_NULL	UNIMPL	nosys
 247	AUE_NULL	STD	{ int freebsd32_clock_getcpuclockid2(\
 				    uint32_t id1, uint32_t id2,\
 				    int which, clockid_t *clock_id); }
 248	AUE_NULL	UNIMPL	ntp_gettime
 249	AUE_NULL	UNIMPL	nosys
 250	AUE_MINHERIT	NOPROTO	{ int minherit(void *addr, size_t len, \
 				    int inherit); }
 251	AUE_RFORK	NOPROTO	{ int rfork(int flags); }
 252	AUE_POLL	OBSOL	openbsd_poll
 253	AUE_ISSETUGID	NOPROTO	{ int issetugid(void); }
 254	AUE_LCHOWN	NOPROTO	{ int lchown(const char *path, int uid, \
 				    int gid); }
 255	AUE_AIO_READ	STD	{ int freebsd32_aio_read( \
 				    struct aiocb32 *aiocbp); }
 256	AUE_AIO_WRITE	STD	{ int freebsd32_aio_write( \
 				    struct aiocb32 *aiocbp); }
 257	AUE_LIO_LISTIO	STD	{ int freebsd32_lio_listio(int mode, \
 				    struct aiocb32 * const *acb_list, \
 				    int nent, struct sigevent32 *sig); }
 258	AUE_NULL	UNIMPL	nosys
 259	AUE_NULL	UNIMPL	nosys
 260	AUE_NULL	UNIMPL	nosys
 261	AUE_NULL	UNIMPL	nosys
 262	AUE_NULL	UNIMPL	nosys
 263	AUE_NULL	UNIMPL	nosys
 264	AUE_NULL	UNIMPL	nosys
 265	AUE_NULL	UNIMPL	nosys
 266	AUE_NULL	UNIMPL	nosys
 267	AUE_NULL	UNIMPL	nosys
 268	AUE_NULL	UNIMPL	nosys
 269	AUE_NULL	UNIMPL	nosys
 270	AUE_NULL	UNIMPL	nosys
 271	AUE_NULL	UNIMPL	nosys
 272	AUE_O_GETDENTS	COMPAT11 { int freebsd32_getdents(int fd, char *buf, \
 				    int count); }
 273	AUE_NULL	UNIMPL	nosys
 274	AUE_LCHMOD	NOPROTO	{ int lchmod(const char *path, mode_t mode); }
 275	AUE_NULL	OBSOL	netbsd_lchown
 276	AUE_LUTIMES	STD	{ int freebsd32_lutimes(const char *path, \
 				    struct timeval32 *tptr); }
 277	AUE_NULL	OBSOL	netbsd_msync
 278	AUE_STAT  COMPAT11|NOPROTO { int nstat(const char *path, \
 				    struct nstat *ub); }
 279	AUE_FSTAT COMPAT11|NOPROTO { int nfstat(int fd, struct nstat *sb); }
 280	AUE_LSTAT COMPAT11|NOPROTO { int nlstat(const char *path, \
 				    struct nstat *ub); }
 281	AUE_NULL	UNIMPL	nosys
 282	AUE_NULL	UNIMPL	nosys
 283	AUE_NULL	UNIMPL	nosys
 284	AUE_NULL	UNIMPL	nosys
 285	AUE_NULL	UNIMPL	nosys
 286	AUE_NULL	UNIMPL	nosys
 287	AUE_NULL	UNIMPL	nosys
 288	AUE_NULL	UNIMPL	nosys
 289	AUE_PREADV	STD	{ ssize_t freebsd32_preadv(int fd, \
 					struct iovec32 *iovp, \
 					u_int iovcnt, \
 					uint32_t offset1, uint32_t offset2); }
 290	AUE_PWRITEV	STD	{ ssize_t freebsd32_pwritev(int fd, \
 					struct iovec32 *iovp, \
 					u_int iovcnt, \
 					uint32_t offset1, uint32_t offset2); }
 291	AUE_NULL	UNIMPL	nosys
 292	AUE_NULL	UNIMPL	nosys
 293	AUE_NULL	UNIMPL	nosys
 294	AUE_NULL	UNIMPL	nosys
 295	AUE_NULL	UNIMPL	nosys
 296	AUE_NULL	UNIMPL	nosys
 297	AUE_FHSTATFS	COMPAT4	{ int freebsd32_fhstatfs( \
 				    const struct fhandle *u_fhp, \
 				    struct statfs32 *buf); }
 298	AUE_FHOPEN	NOPROTO	{ int fhopen(const struct fhandle *u_fhp, \
 			 	    int flags); }
 299	AUE_FHSTAT	COMPAT11 { int freebsd32_fhstat( \
 				    const struct fhandle *u_fhp, \
 				    struct freebsd11_stat32 *sb); }
 ; syscall numbers for FreeBSD
 300	AUE_NULL	NOPROTO	{ int modnext(int modid); }
 301	AUE_NULL	STD	{ int freebsd32_modstat(int modid, \
 				    struct module_stat32* stat); }
 302	AUE_NULL	NOPROTO	{ int modfnext(int modid); }
 303	AUE_NULL	NOPROTO	{ int modfind(const char *name); }
 304	AUE_MODLOAD	NOPROTO	{ int kldload(const char *file); }
 305	AUE_MODUNLOAD	NOPROTO	{ int kldunload(int fileid); }
 306	AUE_NULL	NOPROTO	{ int kldfind(const char *file); }
 307	AUE_NULL	NOPROTO	{ int kldnext(int fileid); }
 308	AUE_NULL	STD	{ int freebsd32_kldstat(int fileid, \
 				    struct kld32_file_stat* stat); }
 309	AUE_NULL	NOPROTO	{ int kldfirstmod(int fileid); }
 310	AUE_GETSID	NOPROTO	{ int getsid(pid_t pid); }
 311	AUE_SETRESUID	NOPROTO	{ int setresuid(uid_t ruid, uid_t euid, \
 				    uid_t suid); }
 312	AUE_SETRESGID	NOPROTO	{ int setresgid(gid_t rgid, gid_t egid, \
 				    gid_t sgid); }
 313	AUE_NULL	OBSOL	signanosleep
 314	AUE_AIO_RETURN	STD	{ int freebsd32_aio_return( \
 				    struct aiocb32 *aiocbp); }
 315	AUE_AIO_SUSPEND	STD	{ int freebsd32_aio_suspend( \
 				    struct aiocb32 * const * aiocbp, int nent, \
 				    const struct timespec32 *timeout); }
 316	AUE_AIO_CANCEL	NOPROTO	{ int aio_cancel(int fd, \
 				    struct aiocb *aiocbp); }
 317	AUE_AIO_ERROR	STD	{ int freebsd32_aio_error( \
 				    struct aiocb32 *aiocbp); }
 318	AUE_AIO_READ	COMPAT6	{ int freebsd32_aio_read( \
 				    struct oaiocb32 *aiocbp); }
 319	AUE_AIO_WRITE	COMPAT6	{ int freebsd32_aio_write( \
 				    struct oaiocb32 *aiocbp); }
 320	AUE_LIO_LISTIO	COMPAT6	{ int freebsd32_lio_listio(int mode, \
 				    struct oaiocb32 * const *acb_list, \
 				    int nent, struct osigevent32 *sig); }
 321	AUE_NULL	NOPROTO	{ int yield(void); }
 322	AUE_NULL	OBSOL	thr_sleep
 323	AUE_NULL	OBSOL	thr_wakeup
 324	AUE_MLOCKALL	NOPROTO	{ int mlockall(int how); }
 325	AUE_MUNLOCKALL	NOPROTO	{ int munlockall(void); }
 326	AUE_GETCWD	NOPROTO	{ int __getcwd(char *buf, size_t buflen); }
 
 327	AUE_NULL	NOPROTO	{ int sched_setparam (pid_t pid, \
 				    const struct sched_param *param); }
 328	AUE_NULL	NOPROTO	{ int sched_getparam (pid_t pid, \
 				    struct sched_param *param); }
 
 329	AUE_NULL	NOPROTO	{ int sched_setscheduler (pid_t pid, \
 				    int policy, \
 				    const struct sched_param *param); }
 330	AUE_NULL	NOPROTO	{ int sched_getscheduler (pid_t pid); }
 
 331	AUE_NULL	NOPROTO	{ int sched_yield (void); }
 332	AUE_NULL	NOPROTO	{ int sched_get_priority_max (int policy); }
 333	AUE_NULL	NOPROTO	{ int sched_get_priority_min (int policy); }
 334	AUE_NULL	STD	{ int freebsd32_sched_rr_get_interval ( \
 				    pid_t pid, \
 				    struct timespec32 *interval); }
 335	AUE_NULL	NOPROTO	{ int utrace(const void *addr, size_t len); }
 336	AUE_SENDFILE	COMPAT4	{ int freebsd32_sendfile(int fd, int s, \
 				    uint32_t offset1, uint32_t offset2, \
 				    size_t nbytes, struct sf_hdtr32 *hdtr, \
 				    off_t *sbytes, int flags); }
 337	AUE_NULL	NOPROTO	{ int kldsym(int fileid, int cmd, \
 				    void *data); }
 338	AUE_JAIL	STD	{ int freebsd32_jail(struct jail32 *jail); }
 339	AUE_NULL	UNIMPL	pioctl
 340	AUE_SIGPROCMASK	NOPROTO	{ int sigprocmask(int how, \
 				    const sigset_t *set, sigset_t *oset); }
 341	AUE_SIGSUSPEND	NOPROTO	{ int sigsuspend(const sigset_t *sigmask); }
 342	AUE_SIGACTION	COMPAT4	{ int freebsd32_sigaction(int sig, \
 				    struct sigaction32 *act, \
 				    struct sigaction32 *oact); }
 343	AUE_SIGPENDING	NOPROTO	{ int sigpending(sigset_t *set); }
 344	AUE_SIGRETURN	COMPAT4	{ int freebsd32_sigreturn( \
 		    const struct freebsd4_freebsd32_ucontext *sigcntxp); }
 345	AUE_SIGWAIT	STD	{ int freebsd32_sigtimedwait(const sigset_t *set, \
 				    siginfo_t *info, \
 				    const struct timespec *timeout); }
 346	AUE_NULL	STD	{ int freebsd32_sigwaitinfo(const sigset_t *set, \
 				    siginfo_t *info); }
 347	AUE_ACL_GET_FILE	NOPROTO	{ int __acl_get_file(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 348	AUE_ACL_SET_FILE	NOPROTO	{ int __acl_set_file(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 349	AUE_ACL_GET_FD	NOPROTO	{ int __acl_get_fd(int filedes, \
 				    acl_type_t type, struct acl *aclp); }
 350	AUE_ACL_SET_FD	NOPROTO	{ int __acl_set_fd(int filedes, \
 				    acl_type_t type, struct acl *aclp); }
 351	AUE_ACL_DELETE_FILE	NOPROTO	{ int __acl_delete_file(const char *path, \
 				    acl_type_t type); }
 352	AUE_ACL_DELETE_FD	NOPROTO	{ int __acl_delete_fd(int filedes, \
 				    acl_type_t type); }
 353	AUE_ACL_CHECK_FILE	NOPROTO	{ int __acl_aclcheck_file(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 354	AUE_ACL_CHECK_FD	NOPROTO	{ int __acl_aclcheck_fd(int filedes, \
 				    acl_type_t type, struct acl *aclp); }
 355	AUE_EXTATTRCTL	NOPROTO	{ int extattrctl(const char *path, int cmd, \
 				    const char *filename, int attrnamespace, \
 				    const char *attrname); }
 356	AUE_EXTATTR_SET_FILE	NOPROTO	{ ssize_t extattr_set_file( \
 				    const char *path, int attrnamespace, \
 				    const char *attrname, void *data, \
 				    size_t nbytes); }
 357	AUE_EXTATTR_GET_FILE	NOPROTO	{ ssize_t extattr_get_file( \
 				    const char *path, int attrnamespace, \
 				    const char *attrname, void *data, \
 				    size_t nbytes); }
 358	AUE_EXTATTR_DELETE_FILE	NOPROTO	{ int extattr_delete_file( \
 				    const char *path, int attrnamespace, \
 				    const char *attrname); }
 359	AUE_AIO_WAITCOMPLETE	STD	{ int freebsd32_aio_waitcomplete( \
 				    struct aiocb32 **aiocbp, \
 				    struct timespec32 *timeout); }
 360	AUE_GETRESUID	NOPROTO	{ int getresuid(uid_t *ruid, uid_t *euid, \
 				    uid_t *suid); }
 361	AUE_GETRESGID	NOPROTO	{ int getresgid(gid_t *rgid, gid_t *egid, \
 				    gid_t *sgid); }
 362	AUE_KQUEUE	NOPROTO	{ int kqueue(void); }
 363	AUE_KEVENT	COMPAT11 { int freebsd32_kevent(int fd, \
 				    const struct kevent32_freebsd11 * \
 				    changelist, \
 				    int nchanges, \
 				    struct kevent32_freebsd11 *eventlist, \
 				    int nevents, \
 				    const struct timespec32 *timeout); }
 364	AUE_NULL	OBSOL	__cap_get_proc
 365	AUE_NULL	OBSOL	__cap_set_proc
 366	AUE_NULL	OBSOL	__cap_get_fd
 367	AUE_NULL	OBSOL	__cap_get_file
 368	AUE_NULL	OBSOL	__cap_set_fd
 369	AUE_NULL	OBSOL	__cap_set_file
 370	AUE_NULL	UNIMPL	nosys
 371	AUE_EXTATTR_SET_FD	NOPROTO	{ ssize_t extattr_set_fd(int fd, \
 				    int attrnamespace, const char *attrname, \
 				    void *data, size_t nbytes); }
 372	AUE_EXTATTR_GET_FD	NOPROTO	{ ssize_t extattr_get_fd(int fd, \
 				    int attrnamespace, const char *attrname, \
 				    void *data, size_t nbytes); }
 373	AUE_EXTATTR_DELETE_FD	NOPROTO	{ int extattr_delete_fd(int fd, \
 				    int attrnamespace, \
 				    const char *attrname); }
 374	AUE_SETUGID	NOPROTO	{ int __setugid(int flag); }
 375	AUE_NULL	OBSOL	nfsclnt
 376	AUE_EACCESS	NOPROTO	{ int eaccess(const char *path, int amode); }
 377	AUE_NULL	UNIMPL	afs_syscall
 378	AUE_NMOUNT	STD	{ int freebsd32_nmount(struct iovec32 *iovp, \
 				    unsigned int iovcnt, int flags); }
 379	AUE_NULL	OBSOL	kse_exit
 380	AUE_NULL	OBSOL	kse_wakeup
 381	AUE_NULL	OBSOL	kse_create
 382	AUE_NULL	OBSOL	kse_thr_interrupt
 383	AUE_NULL	OBSOL	kse_release
 384	AUE_NULL	UNIMPL	__mac_get_proc
 385	AUE_NULL	UNIMPL	__mac_set_proc
 386	AUE_NULL	UNIMPL	__mac_get_fd
 387	AUE_NULL	UNIMPL	__mac_get_file
 388	AUE_NULL	UNIMPL	__mac_set_fd
 389	AUE_NULL	UNIMPL	__mac_set_file
 390	AUE_NULL	NOPROTO	{ int kenv(int what, const char *name, \
 				    char *value, int len); }
 391	AUE_LCHFLAGS	NOPROTO	{ int lchflags(const char *path, \
 				    u_long flags); }
 392	AUE_NULL	NOPROTO	{ int uuidgen(struct uuid *store, \
 				    int count); }
 393	AUE_SENDFILE	STD	{ int freebsd32_sendfile(int fd, int s, \
 				    uint32_t offset1, uint32_t offset2, \
 				    size_t nbytes, struct sf_hdtr32 *hdtr, \
 				    off_t *sbytes, int flags); }
 394	AUE_NULL	UNIMPL	mac_syscall
 395	AUE_GETFSSTAT	COMPAT11|NOPROTO	{ int getfsstat( \
 				    struct freebsd11_statfs *buf, \
 				    long bufsize, int mode); }
 396	AUE_STATFS	COMPAT11|NOPROTO	{ int statfs(const char *path, \
 				    struct statfs *buf); }
 397	AUE_FSTATFS	COMPAT11|NOPROTO	{ int fstatfs(int fd, \
 				    struct freebsd11_statfs *buf); }
 398	AUE_FHSTATFS	COMPAT11|NOPROTO	{ int fhstatfs( \
 				    const struct fhandle *u_fhp, \
 				    struct freebsd11_statfs *buf); }
 399	AUE_NULL	UNIMPL	nosys
 400	AUE_SEMCLOSE	NOSTD|NOPROTO	{ int ksem_close(semid_t id); }
 401	AUE_SEMPOST	NOSTD|NOPROTO	{ int ksem_post(semid_t id); }
 402	AUE_SEMWAIT	NOSTD|NOPROTO	{ int ksem_wait(semid_t id); }
 403	AUE_SEMTRYWAIT	NOSTD|NOPROTO	{ int ksem_trywait(semid_t id); }
 404	AUE_SEMINIT	NOSTD	{ int freebsd32_ksem_init(semid_t *idp, \
 				    unsigned int value); }
 405	AUE_SEMOPEN	NOSTD	{ int freebsd32_ksem_open(semid_t *idp, \
 				    const char *name, int oflag, \
 				    mode_t mode, unsigned int value); }
 406	AUE_SEMUNLINK	NOSTD|NOPROTO	{ int ksem_unlink(const char *name); }
 407	AUE_SEMGETVALUE	NOSTD|NOPROTO	{ int ksem_getvalue(semid_t id, \
 				    int *val); }
 408	AUE_SEMDESTROY	NOSTD|NOPROTO	{ int ksem_destroy(semid_t id); }
 409	AUE_NULL	UNIMPL	__mac_get_pid
 410	AUE_NULL	UNIMPL	__mac_get_link
 411	AUE_NULL	UNIMPL	__mac_set_link
 412	AUE_EXTATTR_SET_LINK	NOPROTO	{ ssize_t extattr_set_link( \
 				    const char *path, int attrnamespace, \
 				    const char *attrname, void *data, \
 				    size_t nbytes); }
 413	AUE_EXTATTR_GET_LINK	NOPROTO	{ ssize_t extattr_get_link( \
 				    const char *path, int attrnamespace, \
 				    const char *attrname, void *data, \
 				    size_t nbytes); }
 414	AUE_EXTATTR_DELETE_LINK	NOPROTO	{ int extattr_delete_link( \
 				    const char *path, int attrnamespace, \
 				    const char *attrname); }
 415	AUE_NULL	UNIMPL	__mac_execve
 416	AUE_SIGACTION	STD	{ int freebsd32_sigaction(int sig, \
 				    struct sigaction32 *act, \
 				    struct sigaction32 *oact); }
 417	AUE_SIGRETURN	STD	{ int freebsd32_sigreturn( \
 		    const struct freebsd32_ucontext *sigcntxp); }
 418	AUE_NULL	UNIMPL	__xstat
 419	AUE_NULL	UNIMPL	__xfstat
 420	AUE_NULL	UNIMPL	__xlstat
 421	AUE_NULL	STD	{ int freebsd32_getcontext( \
 				    struct freebsd32_ucontext *ucp); }
 422	AUE_NULL	STD	{ int freebsd32_setcontext( \
 				    const struct freebsd32_ucontext *ucp); }
 423	AUE_NULL	STD	{ int freebsd32_swapcontext( \
 				    struct freebsd32_ucontext *oucp, \
 				    const struct freebsd32_ucontext *ucp); }
 424	AUE_SWAPOFF	UNIMPL	swapoff
 425	AUE_ACL_GET_LINK	NOPROTO	{ int __acl_get_link(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 426	AUE_ACL_SET_LINK	NOPROTO	{ int __acl_set_link(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 427	AUE_ACL_DELETE_LINK	NOPROTO	{ int __acl_delete_link(const char *path, \
 				    acl_type_t type); }
 428	AUE_ACL_CHECK_LINK	NOPROTO	{ int __acl_aclcheck_link(const char *path, \
 				    acl_type_t type, struct acl *aclp); }
 429	AUE_SIGWAIT	NOPROTO	{ int sigwait(const sigset_t *set, \
 				    int *sig); }
 430	AUE_THR_CREATE	UNIMPL	thr_create;
 431	AUE_THR_EXIT	NOPROTO	{ void thr_exit(long *state); }
 432	AUE_NULL	NOPROTO	{ int thr_self(long *id); }
 433	AUE_THR_KILL	NOPROTO	{ int thr_kill(long id, int sig); }
 434	AUE_NULL	UNIMPL	nosys
 435	AUE_NULL	UNIMPL	nosys
 436	AUE_JAIL_ATTACH	NOPROTO	{ int jail_attach(int jid); }
 437	AUE_EXTATTR_LIST_FD	NOPROTO	{ ssize_t extattr_list_fd(int fd, \
 				    int attrnamespace, void *data, \
 				    size_t nbytes); }
 438	AUE_EXTATTR_LIST_FILE	NOPROTO	{ ssize_t extattr_list_file( \
 				    const char *path, int attrnamespace, \
 				    void *data, size_t nbytes); }
 439	AUE_EXTATTR_LIST_LINK	NOPROTO	{ ssize_t extattr_list_link( \
 				    const char *path, int attrnamespace, \
 				    void *data, size_t nbytes); }
 440	AUE_NULL	OBSOL	kse_switchin
 441	AUE_SEMWAIT	NOSTD	{ int freebsd32_ksem_timedwait(semid_t id, \
 				    const struct timespec32 *abstime); }
 442	AUE_NULL	STD	{ int freebsd32_thr_suspend( \
 				    const struct timespec32 *timeout); }
 443	AUE_NULL	NOPROTO	{ int thr_wake(long id); }
 444	AUE_MODUNLOAD	NOPROTO	{ int kldunloadf(int fileid, int flags); }
 445	AUE_AUDIT	NOPROTO	{ int audit(const void *record, \
 				    u_int length); }
 446	AUE_AUDITON	NOPROTO	{ int auditon(int cmd, void *data, \
 				    u_int length); }
 447	AUE_GETAUID	NOPROTO	{ int getauid(uid_t *auid); }
 448	AUE_SETAUID	NOPROTO	{ int setauid(uid_t *auid); }
 449	AUE_GETAUDIT	NOPROTO	{ int getaudit(struct auditinfo *auditinfo); }
 450	AUE_SETAUDIT	NOPROTO	{ int setaudit(struct auditinfo *auditinfo); }
 451	AUE_GETAUDIT_ADDR	NOPROTO	{ int getaudit_addr( \
 				    struct auditinfo_addr *auditinfo_addr, \
 				    u_int length); }
 452	AUE_SETAUDIT_ADDR	NOPROTO	{ int setaudit_addr( \
 				    struct auditinfo_addr *auditinfo_addr, \
 				    u_int length); }
 453	AUE_AUDITCTL	NOPROTO	{ int auditctl(const char *path); }
 454	AUE_NULL	STD	{ int freebsd32__umtx_op(void *obj, int op,\
 				    u_long val, void *uaddr, \
 				    void *uaddr2); }
 455	AUE_THR_NEW	STD	{ int freebsd32_thr_new(	\
 				    struct thr_param32 *param,	\
 				    int param_size); }
 456	AUE_NULL	STD	{ int freebsd32_sigqueue(pid_t pid, \
 				    int signum, int value); }
 457	AUE_MQ_OPEN	NOSTD	{ int freebsd32_kmq_open( \
 				    const char *path, int flags, mode_t mode, \
 				    const struct mq_attr32 *attr); }
 458	AUE_MQ_SETATTR	NOSTD	{ int freebsd32_kmq_setattr(int mqd, \
 				    const struct mq_attr32 *attr,	\
 				    struct mq_attr32 *oattr); }
 459	AUE_MQ_TIMEDRECEIVE	NOSTD	{ int freebsd32_kmq_timedreceive(int mqd, \
 				    char *msg_ptr, size_t msg_len,	\
 				    unsigned *msg_prio,			\
 				    const struct timespec32 *abs_timeout); }
 460	AUE_MQ_TIMEDSEND	NOSTD	{ int freebsd32_kmq_timedsend(int mqd,	\
 				    const char *msg_ptr, size_t msg_len,\
 				    unsigned msg_prio,			\
 				    const struct timespec32 *abs_timeout);}
 461	AUE_MQ_NOTIFY	NOSTD	{ int freebsd32_kmq_notify(int mqd,	\
 				    const struct sigevent32 *sigev); }
 462	AUE_MQ_UNLINK	NOPROTO|NOSTD	{ int kmq_unlink(const char *path); }
 463	AUE_NULL	NOPROTO	{ int abort2(const char *why, int nargs, void **args); }
 464	AUE_NULL 	NOPROTO	{ int thr_set_name(long id, const char *name); }
 465	AUE_AIO_FSYNC	STD	{ int freebsd32_aio_fsync(int op, \
 				    struct aiocb32 *aiocbp); }
 466	AUE_RTPRIO	NOPROTO	{ int rtprio_thread(int function, \
 				    lwpid_t lwpid, struct rtprio *rtp); }
 467	AUE_NULL	UNIMPL	nosys
 468	AUE_NULL	UNIMPL	nosys
 469	AUE_NULL	UNIMPL	__getpath_fromfd
 470	AUE_NULL	UNIMPL	__getpath_fromaddr
 471	AUE_SCTP_PEELOFF	NOPROTO|NOSTD	{ int sctp_peeloff(int sd, uint32_t name); }
 472	AUE_SCTP_GENERIC_SENDMSG	NOPROTO|NOSTD	{ int sctp_generic_sendmsg( \
 				    int sd, void *msg, int mlen, \
 				    struct sockaddr *to, __socklen_t tolen, \
 				    struct sctp_sndrcvinfo *sinfo, int flags); }
 473	AUE_SCTP_GENERIC_SENDMSG_IOV	NOPROTO|NOSTD	{ int sctp_generic_sendmsg_iov(int sd, struct iovec *iov, int iovlen, \
 				    struct sockaddr *to, __socklen_t tolen, \
 				    struct sctp_sndrcvinfo *sinfo, int flags); }
 474	AUE_SCTP_GENERIC_RECVMSG	NOPROTO|NOSTD	{ int sctp_generic_recvmsg(int sd, struct iovec *iov, int iovlen, \
 				    struct sockaddr * from, __socklen_t *fromlenaddr, \
 				    struct sctp_sndrcvinfo *sinfo, int *msg_flags); }
 #ifdef PAD64_REQUIRED
 475	AUE_PREAD	STD	{ ssize_t freebsd32_pread(int fd, \
 				    void *buf,size_t nbyte, \
 				    int pad, \
 				    uint32_t offset1, uint32_t offset2); }
 476	AUE_PWRITE	STD	{ ssize_t freebsd32_pwrite(int fd, \
 				    const void *buf, size_t nbyte, \
 				    int pad, \
 				    uint32_t offset1, uint32_t offset2); }
 477	AUE_MMAP	STD 	{ void *freebsd32_mmap(void *addr, \
 				    size_t len, int prot, int flags, int fd, \
 				    int pad, \
 				    uint32_t pos1, uint32_t pos2); }
 478	AUE_LSEEK	STD	{ off_t freebsd32_lseek(int fd, \
 				    int pad, \
 				    uint32_t offset1, uint32_t offset2, \
 				    int whence); }
 479	AUE_TRUNCATE	STD	{ int freebsd32_truncate(const char *path, \
 				    int pad, \
 				    uint32_t length1, uint32_t length2); }
 480	AUE_FTRUNCATE	STD	{ int freebsd32_ftruncate(int fd, \
 				    int pad, \
 				    uint32_t length1, uint32_t length2); }
 #else
 475	AUE_PREAD	STD	{ ssize_t freebsd32_pread(int fd, \
 				    void *buf,size_t nbyte, \
 				    uint32_t offset1, uint32_t offset2); }
 476	AUE_PWRITE	STD	{ ssize_t freebsd32_pwrite(int fd, \
 				    const void *buf, size_t nbyte, \
 				    uint32_t offset1, uint32_t offset2); }
 477	AUE_MMAP	STD 	{ void *freebsd32_mmap(void *addr, \
 				    size_t len, int prot, int flags, int fd, \
 				    uint32_t pos1, uint32_t pos2); }
 478	AUE_LSEEK	STD	{ off_t freebsd32_lseek(int fd, \
 				    uint32_t offset1, uint32_t offset2, \
 				    int whence); }
 479	AUE_TRUNCATE	STD	{ int freebsd32_truncate(const char *path, \
 				    uint32_t length1, uint32_t length2); }
 480	AUE_FTRUNCATE	STD	{ int freebsd32_ftruncate(int fd, \
 				    uint32_t length1, uint32_t length2); }
 #endif
 481	AUE_THR_KILL2	NOPROTO	{ int thr_kill2(pid_t pid, long id, int sig); }
 482	AUE_SHMOPEN	NOPROTO	{ int shm_open(const char *path, int flags, \
 				    mode_t mode); }
 483	AUE_SHMUNLINK	NOPROTO	{ int shm_unlink(const char *path); }
 484	AUE_NULL	NOPROTO	{ int cpuset(cpusetid_t *setid); }
 #ifdef PAD64_REQUIRED
 485	AUE_NULL	STD	{ int freebsd32_cpuset_setid(cpuwhich_t which, \
 				    int pad, \
 				    uint32_t id1, uint32_t id2, \
 				    cpusetid_t setid); }
 #else
 485	AUE_NULL	STD	{ int freebsd32_cpuset_setid(cpuwhich_t which, \
 				    uint32_t id1, uint32_t id2, \
 				    cpusetid_t setid); }
 #endif
 486	AUE_NULL	STD	{ int freebsd32_cpuset_getid(cpulevel_t level, \
 				    cpuwhich_t which, \
 				    uint32_t id1, uint32_t id2, \
 				    cpusetid_t *setid); }
 487	AUE_NULL	STD	{ int freebsd32_cpuset_getaffinity( \
 				    cpulevel_t level, cpuwhich_t which, \
 				    uint32_t id1, uint32_t id2, \
 				    size_t cpusetsize, \
 				    cpuset_t *mask); }
 488	AUE_NULL	STD	{ int freebsd32_cpuset_setaffinity( \
 				    cpulevel_t level, cpuwhich_t which, \
 				    uint32_t id1, uint32_t id2, \
 				    size_t cpusetsize, \
 				    const cpuset_t *mask); }
 489	AUE_FACCESSAT	NOPROTO	{ int faccessat(int fd, const char *path, \
 				    int amode, int flag); }
 490	AUE_FCHMODAT	NOPROTO	{ int fchmodat(int fd, const char *path, \
 				    mode_t mode, int flag); }
 491	AUE_FCHOWNAT	NOPROTO	{ int fchownat(int fd, const char *path, \
 				    uid_t uid, gid_t gid, int flag); }
 492	AUE_FEXECVE	STD	{ int freebsd32_fexecve(int fd, \
 				    uint32_t *argv, uint32_t *envv); }
 493	AUE_FSTATAT	COMPAT11 { int freebsd32_fstatat(int fd, \
 				    const char *path, \
 				    struct freebsd11_stat32 *buf, \
 				    int flag); }
 494	AUE_FUTIMESAT	STD	{ int freebsd32_futimesat(int fd, \
 				    const char *path, \
 				    struct timeval *times); }
 495	AUE_LINKAT	NOPROTO	{ int linkat(int fd1, const char *path1, \
 				    int fd2, const char *path2, int flag); }
 496	AUE_MKDIRAT	NOPROTO	{ int mkdirat(int fd, const char *path, \
 				    mode_t mode); }
 497	AUE_MKFIFOAT	NOPROTO	{ int mkfifoat(int fd, const char *path, \
 				    mode_t mode); }
 498	AUE_MKNODAT	COMPAT11|NOPROTO { int mknodat(int fd, \
 				    const char *path, mode_t mode, \
 				    uint32_t dev); }
 499	AUE_OPENAT_RWTC	NOPROTO	{ int openat(int fd, const char *path, \
 				    int flag, mode_t mode); }
 500	AUE_READLINKAT	NOPROTO	{ int readlinkat(int fd, const char *path, \
 				    char *buf, size_t bufsize); }
 501	AUE_RENAMEAT	NOPROTO	{ int renameat(int oldfd, const char *old, \
 				    int newfd, const char *new); }
 502	AUE_SYMLINKAT	NOPROTO	{ int symlinkat(const char *path1, int fd, \
 				    const char *path2); }
 503	AUE_UNLINKAT	NOPROTO	{ int unlinkat(int fd, const char *path, \
 				    int flag); }
 504	AUE_POSIX_OPENPT	NOPROTO	{ int posix_openpt(int flags); }
 ; 505 is initialised by the kgssapi code, if present.
 505	AUE_NULL	UNIMPL	gssd_syscall
 506	AUE_JAIL_GET	STD	{ int freebsd32_jail_get(struct iovec32 *iovp, \
 				    unsigned int iovcnt, int flags); }
 507	AUE_JAIL_SET	STD	{ int freebsd32_jail_set(struct iovec32 *iovp, \
 				    unsigned int iovcnt, int flags); }
 508	AUE_JAIL_REMOVE	NOPROTO	{ int jail_remove(int jid); }
 509	AUE_CLOSEFROM	NOPROTO	{ int closefrom(int lowfd); }
 510	AUE_SEMCTL	NOSTD { int freebsd32_semctl(int semid, int semnum, \
 				    int cmd, union semun32 *arg); }
 511	AUE_MSGCTL	NOSTD	{ int freebsd32_msgctl(int msqid, int cmd, \
 				    struct msqid_ds32 *buf); }
 512	AUE_SHMCTL	NOSTD	{ int freebsd32_shmctl(int shmid, int cmd, \
 				    struct shmid_ds32 *buf); }
 513	AUE_LPATHCONF	NOPROTO	{ int lpathconf(const char *path, int name); }
 514	AUE_NULL	OBSOL	cap_new
 515	AUE_CAP_RIGHTS_GET	NOPROTO	{ int __cap_rights_get(int version, \
 				    int fd, cap_rights_t *rightsp); }
 516	AUE_CAP_ENTER	NOPROTO	{ int cap_enter(void); }
 517	AUE_CAP_GETMODE	NOPROTO	{ int cap_getmode(u_int *modep); }
 518	AUE_PDFORK	NOPROTO	{ int pdfork(int *fdp, int flags); }
 519	AUE_PDKILL	NOPROTO	{ int pdkill(int fd, int signum); }
 520	AUE_PDGETPID	NOPROTO	{ int pdgetpid(int fd, pid_t *pidp); }
 521	AUE_PDWAIT	UNIMPL	pdwait4
 522	AUE_SELECT	STD	{ int freebsd32_pselect(int nd, fd_set *in, \
 				    fd_set *ou, fd_set *ex, \
 				    const struct timespec32 *ts, \
 				    const sigset_t *sm); }
 523	AUE_GETLOGINCLASS	NOPROTO	{ int getloginclass(char *namebuf, \
 				    size_t namelen); }
 524	AUE_SETLOGINCLASS	NOPROTO	{ int setloginclass(const char *namebuf); }
 525	AUE_NULL	NOPROTO	{ int rctl_get_racct(const void *inbufp, \
 				    size_t inbuflen, void *outbufp, \
 				    size_t outbuflen); }
 526	AUE_NULL	NOPROTO	{ int rctl_get_rules(const void *inbufp, \
 				    size_t inbuflen, void *outbufp, \
 				    size_t outbuflen); }
 527	AUE_NULL	NOPROTO	{ int rctl_get_limits(const void *inbufp, \
 				    size_t inbuflen, void *outbufp, \
 				    size_t outbuflen); }
 528	AUE_NULL	NOPROTO	{ int rctl_add_rule(const void *inbufp, \
 				    size_t inbuflen, void *outbufp, \
 				    size_t outbuflen); }
 529	AUE_NULL	NOPROTO	{ int rctl_remove_rule(const void *inbufp, \
 				    size_t inbuflen, void *outbufp, \
 				    size_t outbuflen); }
 #ifdef PAD64_REQUIRED
 530	AUE_POSIX_FALLOCATE	STD	{ int freebsd32_posix_fallocate(int fd, \
 				    int pad, \
 				    uint32_t offset1, uint32_t offset2,\
 				    uint32_t len1, uint32_t len2); }
 531	AUE_POSIX_FADVISE	STD	{ int freebsd32_posix_fadvise(int fd, \
 				    int pad, \
 				    uint32_t offset1, uint32_t offset2,\
 				    uint32_t len1, uint32_t len2, \
 				    int advice); }
 532	AUE_WAIT6	STD	{ int freebsd32_wait6(int idtype, int pad, \
 				    uint32_t id1, uint32_t id2, \
 				    int *status, int options, \
 				    struct wrusage32 *wrusage, \
 				    siginfo_t *info); }
 #else
 530	AUE_POSIX_FALLOCATE	STD	{ int freebsd32_posix_fallocate(int fd,\
 				    uint32_t offset1, uint32_t offset2,\
 				    uint32_t len1, uint32_t len2); }
 531	AUE_POSIX_FADVISE	STD	{ int freebsd32_posix_fadvise(int fd, \
 				    uint32_t offset1, uint32_t offset2,\
 				    uint32_t len1, uint32_t len2, \
 				    int advice); }
 532	AUE_WAIT6	STD	{ int freebsd32_wait6(int idtype, \
 				    uint32_t id1, uint32_t id2, \
 				    int *status, int options, \
 				    struct wrusage32 *wrusage, \
 				    siginfo_t *info); }
 #endif
 533	AUE_CAP_RIGHTS_LIMIT	NOPROTO	{ \
 				    int cap_rights_limit(int fd, \
 				    cap_rights_t *rightsp); }
 534	AUE_CAP_IOCTLS_LIMIT	STD	{ \
 				    int freebsd32_cap_ioctls_limit(int fd, \
 				    const uint32_t *cmds, size_t ncmds); }
 535	AUE_CAP_IOCTLS_GET	STD	{ \
 				    ssize_t freebsd32_cap_ioctls_get(int fd, \
 				    uint32_t *cmds, size_t maxcmds); }
 536	AUE_CAP_FCNTLS_LIMIT	NOPROTO	{ int cap_fcntls_limit(int fd, \
 				    uint32_t fcntlrights); }
 537	AUE_CAP_FCNTLS_GET	NOPROTO	{ int cap_fcntls_get(int fd, \
 				    uint32_t *fcntlrightsp); }
 538	AUE_BINDAT	NOPROTO	{ int bindat(int fd, int s, \
 				    const struct sockaddr *name, \
 				    int namelen); }
 539	AUE_CONNECTAT	NOPROTO	{ int connectat(int fd, int s, \
 				    const struct sockaddr *name, \
 				    int namelen); }
 540	AUE_CHFLAGSAT	NOPROTO	{ int chflagsat(int fd, const char *path, \
 				    u_long flags, int atflag); }
 541	AUE_ACCEPT	NOPROTO	{ int accept4(int s, \
 				    struct sockaddr *name, \
 				    __socklen_t *anamelen, \
 				    int flags); }
 542	AUE_PIPE	NOPROTO	{ int pipe2(int *fildes, int flags); }
 543	AUE_AIO_MLOCK	STD	{ int freebsd32_aio_mlock( \
 				    struct aiocb32 *aiocbp); }
 #ifdef PAD64_REQUIRED
 544	AUE_PROCCTL	STD	{ int freebsd32_procctl(int idtype, int pad, \
 				    uint32_t id1, uint32_t id2, int com, \
 				    void *data); }
 #else
 544	AUE_PROCCTL	STD	{ int freebsd32_procctl(int idtype, \
 				    uint32_t id1, uint32_t id2, int com, \
 				    void *data); }
 #endif
 545	AUE_POLL	STD	{ int freebsd32_ppoll(struct pollfd *fds, \
 				    u_int nfds, const struct timespec32 *ts, \
 				    const sigset_t *set); }
 546	AUE_FUTIMES	STD	{ int freebsd32_futimens(int fd, \
 				    struct timespec *times); }
 547	AUE_FUTIMESAT	STD	{ int freebsd32_utimensat(int fd, \
 				    const char *path, \
 				    struct timespec *times, int flag); }
 548	AUE_NULL	OBSOL	numa_getaffinity
 549	AUE_NULL	OBSOL	numa_setaffinity
 550	AUE_FSYNC	NOPROTO	{ int fdatasync(int fd); }
 551	AUE_FSTAT	STD	{ int freebsd32_fstat(int fd, \
 				    struct stat32 *ub); }
 552	AUE_FSTATAT	STD	{ int freebsd32_fstatat(int fd, \
 				    const char *path, struct stat32 *buf, \
 				    int flag); }
 553	AUE_FHSTAT	STD	{ int freebsd32_fhstat( \
 				    const struct fhandle *u_fhp, \
 				    struct stat32 *sb); }
 554	AUE_GETDIRENTRIES NOPROTO	{ ssize_t getdirentries( \
 				    int fd, char *buf, size_t count, \
 				    off_t *basep); }
 555	AUE_STATFS	NOPROTO	{ int statfs(const char *path, \
 				    struct statfs32 *buf); }
 556	AUE_FSTATFS	NOPROTO	{ int fstatfs(int fd, struct statfs32 *buf); }
 557	AUE_GETFSSTAT	NOPROTO	{ int getfsstat(struct statfs32 *buf, \
 				    long bufsize, int mode); }
 558	AUE_FHSTATFS	NOPROTO	{ int fhstatfs(const struct fhandle *u_fhp, \
 				    struct statfs32 *buf); }
 #ifdef PAD64_REQUIRED
 559	AUE_MKNODAT	STD	{ int freebsd32_mknodat(int fd, \
 				    const char *path, mode_t mode, \
 				    int pad, uint32_t dev1, uint32_t dev2); }
 #else
 559	AUE_MKNODAT	STD	{ int freebsd32_mknodat(int fd, \
 				    const char *path, mode_t mode, \
 				    uint32_t dev1, uint32_t dev2); }
 #endif
 560	AUE_KEVENT	STD	{ int freebsd32_kevent(int fd, \
 				    const struct kevent32 *changelist, \
 				    int nchanges, \
 				    struct kevent32 *eventlist, \
 				    int nevents, \
 				    const struct timespec32 *timeout); }
 561	AUE_NULL	STD	{ int freebsd32_cpuset_getdomain(cpulevel_t level, \
 				    cpuwhich_t which, uint32_t id1, uint32_t id2, \
 				    size_t domainsetsize, domainset_t *mask, \
 				    int *policy); }
 562	AUE_NULL	STD	{ int freebsd32_cpuset_setdomain(cpulevel_t level, \
 				    cpuwhich_t which, uint32_t id1, uint32_t id2, \
 				    size_t domainsetsize, domainset_t *mask, \
 				    int policy); }
 563	AUE_NULL	NOPROTO	{ int getrandom(void *buf, size_t buflen, \
 				    unsigned int flags); }
 564	AUE_NULL	NOPROTO { int getfhat( int fd, char *path, \
 				    struct fhandle *fhp, int flags); }
 565	AUE_NULL	NOPROTO { int fhlink( struct fhandle *fhp, const char *to ); }
 566	AUE_NULL	NOPROTO { int fhlinkat( struct fhandle *fhp, int tofd, \
 				    const char *to); }
 567	AUE_NULL	NOPROTO { int fhreadlink( struct fhandle *fhp, char *buf, \
 				    size_t bufsize); }
+568	AUE_UNLINKAT	NOPROTO { int funlinkat(int dfd, const char *path, int fd, \
+				    int flag); }
 
 ; vim: syntax=off
Index: head/sys/compat/linux/linux_file.c
===================================================================
--- head/sys/compat/linux/linux_file.c	(revision 345981)
+++ head/sys/compat/linux/linux_file.c	(revision 345982)
@@ -1,1690 +1,1691 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 1994-1995 Søren Schmidt
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_compat.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/capsicum.h>
 #include <sys/conf.h>
 #include <sys/dirent.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysproto.h>
 #include <sys/tty.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 
 #ifdef COMPAT_LINUX32
 #include <machine/../linux32/linux.h>
 #include <machine/../linux32/linux32_proto.h>
 #else
 #include <machine/../linux/linux.h>
 #include <machine/../linux/linux_proto.h>
 #endif
 #include <compat/linux/linux_misc.h>
 #include <compat/linux/linux_util.h>
 #include <compat/linux/linux_file.h>
 
 static int	linux_common_open(struct thread *, int, char *, int, int);
 static int	linux_getdents_error(struct thread *, int, int);
 
 
 #ifdef LINUX_LEGACY_SYSCALLS
 int
 linux_creat(struct thread *td, struct linux_creat_args *args)
 {
 	char *path;
 	int error;
 
 	LCONVPATHEXIST(td, args->path, &path);
 #ifdef DEBUG
 	if (ldebug(creat))
 		printf(ARGS(creat, "%s, %d"), path, args->mode);
 #endif
 	error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE,
 	    O_WRONLY | O_CREAT | O_TRUNC, args->mode);
 	LFREEPATH(path);
 	return (error);
 }
 #endif
 
 static int
 linux_common_open(struct thread *td, int dirfd, char *path, int l_flags, int mode)
 {
 	struct proc *p = td->td_proc;
 	struct file *fp;
 	int fd;
 	int bsd_flags, error;
 
 	bsd_flags = 0;
 	switch (l_flags & LINUX_O_ACCMODE) {
 	case LINUX_O_WRONLY:
 		bsd_flags |= O_WRONLY;
 		break;
 	case LINUX_O_RDWR:
 		bsd_flags |= O_RDWR;
 		break;
 	default:
 		bsd_flags |= O_RDONLY;
 	}
 	if (l_flags & LINUX_O_NDELAY)
 		bsd_flags |= O_NONBLOCK;
 	if (l_flags & LINUX_O_APPEND)
 		bsd_flags |= O_APPEND;
 	if (l_flags & LINUX_O_SYNC)
 		bsd_flags |= O_FSYNC;
 	if (l_flags & LINUX_O_NONBLOCK)
 		bsd_flags |= O_NONBLOCK;
 	if (l_flags & LINUX_FASYNC)
 		bsd_flags |= O_ASYNC;
 	if (l_flags & LINUX_O_CREAT)
 		bsd_flags |= O_CREAT;
 	if (l_flags & LINUX_O_TRUNC)
 		bsd_flags |= O_TRUNC;
 	if (l_flags & LINUX_O_EXCL)
 		bsd_flags |= O_EXCL;
 	if (l_flags & LINUX_O_NOCTTY)
 		bsd_flags |= O_NOCTTY;
 	if (l_flags & LINUX_O_DIRECT)
 		bsd_flags |= O_DIRECT;
 	if (l_flags & LINUX_O_NOFOLLOW)
 		bsd_flags |= O_NOFOLLOW;
 	if (l_flags & LINUX_O_DIRECTORY)
 		bsd_flags |= O_DIRECTORY;
 	/* XXX LINUX_O_NOATIME: unable to be easily implemented. */
 
 	error = kern_openat(td, dirfd, path, UIO_SYSSPACE, bsd_flags, mode);
 	if (error != 0)
 		goto done;
 	if (bsd_flags & O_NOCTTY)
 		goto done;
 
 	/*
 	 * XXX In between kern_openat() and fget(), another process
 	 * having the same filedesc could use that fd without
 	 * checking below.
 	*/
 	fd = td->td_retval[0];
 	if (fget(td, fd, &cap_ioctl_rights, &fp) == 0) {
 		if (fp->f_type != DTYPE_VNODE) {
 			fdrop(fp, td);
 			goto done;
 		}
 		sx_slock(&proctree_lock);
 		PROC_LOCK(p);
 		if (SESS_LEADER(p) && !(p->p_flag & P_CONTROLT)) {
 			PROC_UNLOCK(p);
 			sx_sunlock(&proctree_lock);
 			/* XXXPJD: Verify if TIOCSCTTY is allowed. */
 			(void) fo_ioctl(fp, TIOCSCTTY, (caddr_t) 0,
 			    td->td_ucred, td);
 		} else {
 			PROC_UNLOCK(p);
 			sx_sunlock(&proctree_lock);
 		}
 		fdrop(fp, td);
 	}
 
 done:
 #ifdef DEBUG
 #ifdef LINUX_LEGACY_SYSCALLS
 	if (ldebug(open))
 #else
 	if (ldebug(openat))
 #endif
 		printf(LMSG("open returns error %d"), error);
 #endif
 	LFREEPATH(path);
 	return (error);
 }
 
 int
 linux_openat(struct thread *td, struct linux_openat_args *args)
 {
 	char *path;
 	int dfd;
 
 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
 	if (args->flags & LINUX_O_CREAT)
 		LCONVPATH_AT(td, args->filename, &path, 1, dfd);
 	else
 		LCONVPATH_AT(td, args->filename, &path, 0, dfd);
 #ifdef DEBUG
 	if (ldebug(openat))
 		printf(ARGS(openat, "%i, %s, 0x%x, 0x%x"), args->dfd,
 		    path, args->flags, args->mode);
 #endif
 	return (linux_common_open(td, dfd, path, args->flags, args->mode));
 }
 
 #ifdef LINUX_LEGACY_SYSCALLS
 int
 linux_open(struct thread *td, struct linux_open_args *args)
 {
 	char *path;
 
 	if (args->flags & LINUX_O_CREAT)
 		LCONVPATHCREAT(td, args->path, &path);
 	else
 		LCONVPATHEXIST(td, args->path, &path);
 #ifdef DEBUG
 	if (ldebug(open))
 		printf(ARGS(open, "%s, 0x%x, 0x%x"),
 		    path, args->flags, args->mode);
 #endif
 	return (linux_common_open(td, AT_FDCWD, path, args->flags, args->mode));
 }
 #endif
 
 int
 linux_lseek(struct thread *td, struct linux_lseek_args *args)
 {
 
 #ifdef DEBUG
 	if (ldebug(lseek))
 		printf(ARGS(lseek, "%d, %ld, %d"),
 		    args->fdes, (long)args->off, args->whence);
 #endif
 	return (kern_lseek(td, args->fdes, args->off, args->whence));
 }
 
 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 int
 linux_llseek(struct thread *td, struct linux_llseek_args *args)
 {
 	int error;
 	off_t off;
 
 #ifdef DEBUG
 	if (ldebug(llseek))
 		printf(ARGS(llseek, "%d, %d:%d, %d"),
 		    args->fd, args->ohigh, args->olow, args->whence);
 #endif
 	off = (args->olow) | (((off_t) args->ohigh) << 32);
 
 	error = kern_lseek(td, args->fd, off, args->whence);
 	if (error != 0)
 		return (error);
 
 	error = copyout(td->td_retval, args->res, sizeof(off_t));
 	if (error != 0)
 		return (error);
 
 	td->td_retval[0] = 0;
 	return (0);
 }
 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
 
 /*
  * Note that linux_getdents(2) and linux_getdents64(2) have the same
  * arguments. They only differ in the definition of struct dirent they
  * operate on.
  * Note that linux_readdir(2) is a special case of linux_getdents(2)
  * where count is always equals 1, meaning that the buffer is one
  * dirent-structure in size and that the code can't handle more anyway.
  * Note that linux_readdir(2) can't be implemented by means of linux_getdents(2)
  * as in case when the *dent buffer size is equal to 1 linux_getdents(2) will
  * trash user stack.
  */
 
 static int
 linux_getdents_error(struct thread *td, int fd, int err)
 {
 	struct vnode *vp;
 	struct file *fp;
 	int error;
 
 	/* Linux return ENOTDIR in case when fd is not a directory. */
 	error = getvnode(td, fd, &cap_read_rights, &fp);
 	if (error != 0)
 		return (error);
 	vp = fp->f_vnode;
 	if (vp->v_type != VDIR) {
 		fdrop(fp, td);
 		return (ENOTDIR);
 	}
 	fdrop(fp, td);
 	return (err);
 }
 
 struct l_dirent {
 	l_ulong		d_ino;
 	l_off_t		d_off;
 	l_ushort	d_reclen;
 	char		d_name[LINUX_NAME_MAX + 1];
 };
 
 struct l_dirent64 {
 	uint64_t	d_ino;
 	int64_t		d_off;
 	l_ushort	d_reclen;
 	u_char		d_type;
 	char		d_name[LINUX_NAME_MAX + 1];
 };
 
 /*
  * Linux uses the last byte in the dirent buffer to store d_type,
  * at least glibc-2.7 requires it. That is why l_dirent is padded with 2 bytes.
  */
 #define LINUX_RECLEN(namlen)						\
     roundup(offsetof(struct l_dirent, d_name) + (namlen) + 2, sizeof(l_ulong))
 
 #define LINUX_RECLEN64(namlen)						\
     roundup(offsetof(struct l_dirent64, d_name) + (namlen) + 1,		\
     sizeof(uint64_t))
 
 #ifdef LINUX_LEGACY_SYSCALLS
 int
 linux_getdents(struct thread *td, struct linux_getdents_args *args)
 {
 	struct dirent *bdp;
 	caddr_t inp, buf;		/* BSD-format */
 	int len, reclen;		/* BSD-format */
 	caddr_t outp;			/* Linux-format */
 	int resid, linuxreclen;		/* Linux-format */
 	caddr_t lbuf;			/* Linux-format */
 	off_t base;
 	struct l_dirent *linux_dirent;
 	int buflen, error;
 	size_t retval;
 
 #ifdef DEBUG
 	if (ldebug(getdents))
 		printf(ARGS(getdents, "%d, *, %d"), args->fd, args->count);
 #endif
 	buflen = min(args->count, MAXBSIZE);
 	buf = malloc(buflen, M_TEMP, M_WAITOK);
 
 	error = kern_getdirentries(td, args->fd, buf, buflen,
 	    &base, NULL, UIO_SYSSPACE);
 	if (error != 0) {
 		error = linux_getdents_error(td, args->fd, error);
 		goto out1;
 	}
 
 	lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
 
 	len = td->td_retval[0];
 	inp = buf;
 	outp = (caddr_t)args->dent;
 	resid = args->count;
 	retval = 0;
 
 	while (len > 0) {
 		bdp = (struct dirent *) inp;
 		reclen = bdp->d_reclen;
 		linuxreclen = LINUX_RECLEN(bdp->d_namlen);
 		/*
 		 * No more space in the user supplied dirent buffer.
 		 * Return EINVAL.
 		 */
 		if (resid < linuxreclen) {
 			error = EINVAL;
 			goto out;
 		}
 
 		linux_dirent = (struct l_dirent*)lbuf;
 		linux_dirent->d_ino = bdp->d_fileno;
 		linux_dirent->d_off = base + reclen;
 		linux_dirent->d_reclen = linuxreclen;
 		/*
 		 * Copy d_type to last byte of l_dirent buffer
 		 */
 		lbuf[linuxreclen - 1] = bdp->d_type;
 		strlcpy(linux_dirent->d_name, bdp->d_name,
 		    linuxreclen - offsetof(struct l_dirent, d_name)-1);
 		error = copyout(linux_dirent, outp, linuxreclen);
 		if (error != 0)
 			goto out;
 
 		inp += reclen;
 		base += reclen;
 		len -= reclen;
 
 		retval += linuxreclen;
 		outp += linuxreclen;
 		resid -= linuxreclen;
 	}
 	td->td_retval[0] = retval;
 
 out:
 	free(lbuf, M_TEMP);
 out1:
 	free(buf, M_TEMP);
 	return (error);
 }
 #endif
 
 int
 linux_getdents64(struct thread *td, struct linux_getdents64_args *args)
 {
 	struct dirent *bdp;
 	caddr_t inp, buf;		/* BSD-format */
 	int len, reclen;		/* BSD-format */
 	caddr_t outp;			/* Linux-format */
 	int resid, linuxreclen;		/* Linux-format */
 	caddr_t lbuf;			/* Linux-format */
 	off_t base;
 	struct l_dirent64 *linux_dirent64;
 	int buflen, error;
 	size_t retval;
 
 #ifdef DEBUG
 	if (ldebug(getdents64))
 		uprintf(ARGS(getdents64, "%d, *, %d"), args->fd, args->count);
 #endif
 	buflen = min(args->count, MAXBSIZE);
 	buf = malloc(buflen, M_TEMP, M_WAITOK);
 
 	error = kern_getdirentries(td, args->fd, buf, buflen,
 	    &base, NULL, UIO_SYSSPACE);
 	if (error != 0) {
 		error = linux_getdents_error(td, args->fd, error);
 		goto out1;
 	}
 
 	lbuf = malloc(LINUX_RECLEN64(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
 
 	len = td->td_retval[0];
 	inp = buf;
 	outp = (caddr_t)args->dirent;
 	resid = args->count;
 	retval = 0;
 
 	while (len > 0) {
 		bdp = (struct dirent *) inp;
 		reclen = bdp->d_reclen;
 		linuxreclen = LINUX_RECLEN64(bdp->d_namlen);
 		/*
 		 * No more space in the user supplied dirent buffer.
 		 * Return EINVAL.
 		 */
 		if (resid < linuxreclen) {
 			error = EINVAL;
 			goto out;
 		}
 
 		linux_dirent64 = (struct l_dirent64*)lbuf;
 		linux_dirent64->d_ino = bdp->d_fileno;
 		linux_dirent64->d_off = base + reclen;
 		linux_dirent64->d_reclen = linuxreclen;
 		linux_dirent64->d_type = bdp->d_type;
 		strlcpy(linux_dirent64->d_name, bdp->d_name,
 		    linuxreclen - offsetof(struct l_dirent64, d_name));
 		error = copyout(linux_dirent64, outp, linuxreclen);
 		if (error != 0)
 			goto out;
 
 		inp += reclen;
 		base += reclen;
 		len -= reclen;
 
 		retval += linuxreclen;
 		outp += linuxreclen;
 		resid -= linuxreclen;
 	}
 	td->td_retval[0] = retval;
 
 out:
 	free(lbuf, M_TEMP);
 out1:
 	free(buf, M_TEMP);
 	return (error);
 }
 
 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 int
 linux_readdir(struct thread *td, struct linux_readdir_args *args)
 {
 	struct dirent *bdp;
 	caddr_t buf;			/* BSD-format */
 	int linuxreclen;		/* Linux-format */
 	caddr_t lbuf;			/* Linux-format */
 	off_t base;
 	struct l_dirent *linux_dirent;
 	int buflen, error;
 
 #ifdef DEBUG
 	if (ldebug(readdir))
 		printf(ARGS(readdir, "%d, *"), args->fd);
 #endif
 	buflen = LINUX_RECLEN(LINUX_NAME_MAX);
 	buf = malloc(buflen, M_TEMP, M_WAITOK);
 
 	error = kern_getdirentries(td, args->fd, buf, buflen,
 	    &base, NULL, UIO_SYSSPACE);
 	if (error != 0) {
 		error = linux_getdents_error(td, args->fd, error);
 		goto out;
 	}
 	if (td->td_retval[0] == 0)
 		goto out;
 
 	lbuf = malloc(LINUX_RECLEN(LINUX_NAME_MAX), M_TEMP, M_WAITOK | M_ZERO);
 
 	bdp = (struct dirent *) buf;
 	linuxreclen = LINUX_RECLEN(bdp->d_namlen);
 
 	linux_dirent = (struct l_dirent*)lbuf;
 	linux_dirent->d_ino = bdp->d_fileno;
 	linux_dirent->d_off = linuxreclen;
 	linux_dirent->d_reclen = bdp->d_namlen;
 	strlcpy(linux_dirent->d_name, bdp->d_name,
 	    linuxreclen - offsetof(struct l_dirent, d_name));
 	error = copyout(linux_dirent, args->dent, linuxreclen);
 	if (error == 0)
 		td->td_retval[0] = linuxreclen;
 
 	free(lbuf, M_TEMP);
 out:
 	free(buf, M_TEMP);
 	return (error);
 }
 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
 
 
 /*
  * These exist mainly for hooks for doing /compat/linux translation.
  */
 
 #ifdef LINUX_LEGACY_SYSCALLS
 int
 linux_access(struct thread *td, struct linux_access_args *args)
 {
 	char *path;
 	int error;
 
 	/* Linux convention. */
 	if (args->amode & ~(F_OK | X_OK | W_OK | R_OK))
 		return (EINVAL);
 
 	LCONVPATHEXIST(td, args->path, &path);
 
 #ifdef DEBUG
 	if (ldebug(access))
 		printf(ARGS(access, "%s, %d"), path, args->amode);
 #endif
 	error = kern_accessat(td, AT_FDCWD, path, UIO_SYSSPACE, 0,
 	    args->amode);
 	LFREEPATH(path);
 
 	return (error);
 }
 #endif
 
 int
 linux_faccessat(struct thread *td, struct linux_faccessat_args *args)
 {
 	char *path;
 	int error, dfd;
 
 	/* Linux convention. */
 	if (args->amode & ~(F_OK | X_OK | W_OK | R_OK))
 		return (EINVAL);
 
 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
 	LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
 
 #ifdef DEBUG
 	if (ldebug(faccessat))
 		printf(ARGS(access, "%s, %d"), path, args->amode);
 #endif
 
 	error = kern_accessat(td, dfd, path, UIO_SYSSPACE, 0, args->amode);
 	LFREEPATH(path);
 
 	return (error);
 }
 
 #ifdef LINUX_LEGACY_SYSCALLS
 int
 linux_unlink(struct thread *td, struct linux_unlink_args *args)
 {
 	char *path;
 	int error;
 	struct stat st;
 
 	LCONVPATHEXIST(td, args->path, &path);
 
 #ifdef DEBUG
 	if (ldebug(unlink))
 		printf(ARGS(unlink, "%s"), path);
 #endif
 
-	error = kern_unlinkat(td, AT_FDCWD, path, UIO_SYSSPACE, 0, 0);
+	error = kern_funlinkat(td, AT_FDCWD, path, FD_NONE, UIO_SYSSPACE, 0, 0);
 	if (error == EPERM) {
 		/* Introduce POSIX noncompliant behaviour of Linux */
 		if (kern_statat(td, 0, AT_FDCWD, path, UIO_SYSSPACE, &st,
 		    NULL) == 0) {
 			if (S_ISDIR(st.st_mode))
 				error = EISDIR;
 		}
 	}
 	LFREEPATH(path);
 	return (error);
 }
 #endif
 
 int
 linux_unlinkat(struct thread *td, struct linux_unlinkat_args *args)
 {
 	char *path;
 	int error, dfd;
 	struct stat st;
 
 	if (args->flag & ~LINUX_AT_REMOVEDIR)
 		return (EINVAL);
 
 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
 	LCONVPATHEXIST_AT(td, args->pathname, &path, dfd);
 
 #ifdef DEBUG
 	if (ldebug(unlinkat))
 		printf(ARGS(unlinkat, "%s"), path);
 #endif
 
 	if (args->flag & LINUX_AT_REMOVEDIR)
-		error = kern_rmdirat(td, dfd, path, UIO_SYSSPACE, 0);
+		error = kern_frmdirat(td, dfd, path, FD_NONE, UIO_SYSSPACE, 0);
 	else
-		error = kern_unlinkat(td, dfd, path, UIO_SYSSPACE, 0, 0);
+		error = kern_funlinkat(td, dfd, path, FD_NONE, UIO_SYSSPACE, 0,
+		    0);
 	if (error == EPERM && !(args->flag & LINUX_AT_REMOVEDIR)) {
 		/* Introduce POSIX noncompliant behaviour of Linux */
 		if (kern_statat(td, AT_SYMLINK_NOFOLLOW, dfd, path,
 		    UIO_SYSSPACE, &st, NULL) == 0 && S_ISDIR(st.st_mode))
 			error = EISDIR;
 	}
 	LFREEPATH(path);
 	return (error);
 }
 int
 linux_chdir(struct thread *td, struct linux_chdir_args *args)
 {
 	char *path;
 	int error;
 
 	LCONVPATHEXIST(td, args->path, &path);
 
 #ifdef DEBUG
 	if (ldebug(chdir))
 		printf(ARGS(chdir, "%s"), path);
 #endif
 	error = kern_chdir(td, path, UIO_SYSSPACE);
 	LFREEPATH(path);
 	return (error);
 }
 
 #ifdef LINUX_LEGACY_SYSCALLS
 int
 linux_chmod(struct thread *td, struct linux_chmod_args *args)
 {
 	char *path;
 	int error;
 
 	LCONVPATHEXIST(td, args->path, &path);
 
 #ifdef DEBUG
 	if (ldebug(chmod))
 		printf(ARGS(chmod, "%s, %d"), path, args->mode);
 #endif
 	error = kern_fchmodat(td, AT_FDCWD, path, UIO_SYSSPACE,
 	    args->mode, 0);
 	LFREEPATH(path);
 	return (error);
 }
 #endif
 
 int
 linux_fchmodat(struct thread *td, struct linux_fchmodat_args *args)
 {
 	char *path;
 	int error, dfd;
 
 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
 	LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
 
 #ifdef DEBUG
 	if (ldebug(fchmodat))
 		printf(ARGS(fchmodat, "%s, %d"), path, args->mode);
 #endif
 
 	error = kern_fchmodat(td, dfd, path, UIO_SYSSPACE, args->mode, 0);
 	LFREEPATH(path);
 	return (error);
 }
 
 #ifdef LINUX_LEGACY_SYSCALLS
 int
 linux_mkdir(struct thread *td, struct linux_mkdir_args *args)
 {
 	char *path;
 	int error;
 
 	LCONVPATHCREAT(td, args->path, &path);
 
 #ifdef DEBUG
 	if (ldebug(mkdir))
 		printf(ARGS(mkdir, "%s, %d"), path, args->mode);
 #endif
 	error = kern_mkdirat(td, AT_FDCWD, path, UIO_SYSSPACE, args->mode);
 	LFREEPATH(path);
 	return (error);
 }
 #endif
 
 int
 linux_mkdirat(struct thread *td, struct linux_mkdirat_args *args)
 {
 	char *path;
 	int error, dfd;
 
 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
 	LCONVPATHCREAT_AT(td, args->pathname, &path, dfd);
 
 #ifdef DEBUG
 	if (ldebug(mkdirat))
 		printf(ARGS(mkdirat, "%s, %d"), path, args->mode);
 #endif
 	error = kern_mkdirat(td, dfd, path, UIO_SYSSPACE, args->mode);
 	LFREEPATH(path);
 	return (error);
 }
 
 #ifdef LINUX_LEGACY_SYSCALLS
 int
 linux_rmdir(struct thread *td, struct linux_rmdir_args *args)
 {
 	char *path;
 	int error;
 
 	LCONVPATHEXIST(td, args->path, &path);
 
 #ifdef DEBUG
 	if (ldebug(rmdir))
 		printf(ARGS(rmdir, "%s"), path);
 #endif
-	error = kern_rmdirat(td, AT_FDCWD, path, UIO_SYSSPACE, 0);
+	error = kern_frmdirat(td, AT_FDCWD, path, FD_NONE, UIO_SYSSPACE, 0);
 	LFREEPATH(path);
 	return (error);
 }
 
 int
 linux_rename(struct thread *td, struct linux_rename_args *args)
 {
 	char *from, *to;
 	int error;
 
 	LCONVPATHEXIST(td, args->from, &from);
 	/* Expand LCONVPATHCREATE so that `from' can be freed on errors */
 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
 	if (to == NULL) {
 		LFREEPATH(from);
 		return (error);
 	}
 
 #ifdef DEBUG
 	if (ldebug(rename))
 		printf(ARGS(rename, "%s, %s"), from, to);
 #endif
 	error = kern_renameat(td, AT_FDCWD, from, AT_FDCWD, to, UIO_SYSSPACE);
 	LFREEPATH(from);
 	LFREEPATH(to);
 	return (error);
 }
 #endif
 
 int
 linux_renameat(struct thread *td, struct linux_renameat_args *args)
 {
 	char *from, *to;
 	int error, olddfd, newdfd;
 
 	olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd;
 	newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
 	LCONVPATHEXIST_AT(td, args->oldname, &from, olddfd);
 	/* Expand LCONVPATHCREATE so that `from' can be freed on errors */
 	error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd);
 	if (to == NULL) {
 		LFREEPATH(from);
 		return (error);
 	}
 
 #ifdef DEBUG
 	if (ldebug(renameat))
 		printf(ARGS(renameat, "%s, %s"), from, to);
 #endif
 	error = kern_renameat(td, olddfd, from, newdfd, to, UIO_SYSSPACE);
 	LFREEPATH(from);
 	LFREEPATH(to);
 	return (error);
 }
 
 #ifdef LINUX_LEGACY_SYSCALLS
 int
 linux_symlink(struct thread *td, struct linux_symlink_args *args)
 {
 	char *path, *to;
 	int error;
 
 	LCONVPATHEXIST(td, args->path, &path);
 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
 	if (to == NULL) {
 		LFREEPATH(path);
 		return (error);
 	}
 
 #ifdef DEBUG
 	if (ldebug(symlink))
 		printf(ARGS(symlink, "%s, %s"), path, to);
 #endif
 	error = kern_symlinkat(td, path, AT_FDCWD, to, UIO_SYSSPACE);
 	LFREEPATH(path);
 	LFREEPATH(to);
 	return (error);
 }
 #endif
 
 int
 linux_symlinkat(struct thread *td, struct linux_symlinkat_args *args)
 {
 	char *path, *to;
 	int error, dfd;
 
 	dfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
 	LCONVPATHEXIST(td, args->oldname, &path);
 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
 	error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, dfd);
 	if (to == NULL) {
 		LFREEPATH(path);
 		return (error);
 	}
 
 #ifdef DEBUG
 	if (ldebug(symlinkat))
 		printf(ARGS(symlinkat, "%s, %s"), path, to);
 #endif
 
 	error = kern_symlinkat(td, path, dfd, to, UIO_SYSSPACE);
 	LFREEPATH(path);
 	LFREEPATH(to);
 	return (error);
 }
 
 #ifdef LINUX_LEGACY_SYSCALLS
 int
 linux_readlink(struct thread *td, struct linux_readlink_args *args)
 {
 	char *name;
 	int error;
 
 	LCONVPATHEXIST(td, args->name, &name);
 
 #ifdef DEBUG
 	if (ldebug(readlink))
 		printf(ARGS(readlink, "%s, %p, %d"), name, (void *)args->buf,
 		    args->count);
 #endif
 	error = kern_readlinkat(td, AT_FDCWD, name, UIO_SYSSPACE,
 	    args->buf, UIO_USERSPACE, args->count);
 	LFREEPATH(name);
 	return (error);
 }
 #endif
 
 int
 linux_readlinkat(struct thread *td, struct linux_readlinkat_args *args)
 {
 	char *name;
 	int error, dfd;
 
 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd;
 	LCONVPATHEXIST_AT(td, args->path, &name, dfd);
 
 #ifdef DEBUG
 	if (ldebug(readlinkat))
 		printf(ARGS(readlinkat, "%s, %p, %d"), name, (void *)args->buf,
 		    args->bufsiz);
 #endif
 
 	error = kern_readlinkat(td, dfd, name, UIO_SYSSPACE, args->buf,
 	    UIO_USERSPACE, args->bufsiz);
 	LFREEPATH(name);
 	return (error);
 }
 
 int
 linux_truncate(struct thread *td, struct linux_truncate_args *args)
 {
 	char *path;
 	int error;
 
 	LCONVPATHEXIST(td, args->path, &path);
 
 #ifdef DEBUG
 	if (ldebug(truncate))
 		printf(ARGS(truncate, "%s, %ld"), path, (long)args->length);
 #endif
 
 	error = kern_truncate(td, path, UIO_SYSSPACE, args->length);
 	LFREEPATH(path);
 	return (error);
 }
 
 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 int
 linux_truncate64(struct thread *td, struct linux_truncate64_args *args)
 {
 	char *path;
 	int error;
 
 	LCONVPATHEXIST(td, args->path, &path);
 
 #ifdef DEBUG
 	if (ldebug(truncate64))
 		printf(ARGS(truncate64, "%s, %jd"), path, args->length);
 #endif
 
 	error = kern_truncate(td, path, UIO_SYSSPACE, args->length);
 	LFREEPATH(path);
 	return (error);
 }
 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
 
 int
 linux_ftruncate(struct thread *td, struct linux_ftruncate_args *args)
 {
 
 	return (kern_ftruncate(td, args->fd, args->length));
 }
 
 #ifdef LINUX_LEGACY_SYSCALLS
 int
 linux_link(struct thread *td, struct linux_link_args *args)
 {
 	char *path, *to;
 	int error;
 
 	LCONVPATHEXIST(td, args->path, &path);
 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
 	error = linux_emul_convpath(td, args->to, UIO_USERSPACE, &to, 1, AT_FDCWD);
 	if (to == NULL) {
 		LFREEPATH(path);
 		return (error);
 	}
 
 #ifdef DEBUG
 	if (ldebug(link))
 		printf(ARGS(link, "%s, %s"), path, to);
 #endif
 	error = kern_linkat(td, AT_FDCWD, AT_FDCWD, path, to, UIO_SYSSPACE,
 	    FOLLOW);
 	LFREEPATH(path);
 	LFREEPATH(to);
 	return (error);
 }
 #endif
 
 int
 linux_linkat(struct thread *td, struct linux_linkat_args *args)
 {
 	char *path, *to;
 	int error, olddfd, newdfd, follow;
 
 	if (args->flag & ~LINUX_AT_SYMLINK_FOLLOW)
 		return (EINVAL);
 
 	olddfd = (args->olddfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->olddfd;
 	newdfd = (args->newdfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->newdfd;
 	LCONVPATHEXIST_AT(td, args->oldname, &path, olddfd);
 	/* Expand LCONVPATHCREATE so that `path' can be freed on errors */
 	error = linux_emul_convpath(td, args->newname, UIO_USERSPACE, &to, 1, newdfd);
 	if (to == NULL) {
 		LFREEPATH(path);
 		return (error);
 	}
 
 #ifdef DEBUG
 	if (ldebug(linkat))
 		printf(ARGS(linkat, "%i, %s, %i, %s, %i"), args->olddfd, path,
 			args->newdfd, to, args->flag);
 #endif
 
 	follow = (args->flag & LINUX_AT_SYMLINK_FOLLOW) == 0 ? NOFOLLOW :
 	    FOLLOW;
 	error = kern_linkat(td, olddfd, newdfd, path, to, UIO_SYSSPACE, follow);
 	LFREEPATH(path);
 	LFREEPATH(to);
 	return (error);
 }
 
 int
 linux_fdatasync(td, uap)
 	struct thread *td;
 	struct linux_fdatasync_args *uap;
 {
 
 	return (kern_fsync(td, uap->fd, false));
 }
 
 int
 linux_pread(struct thread *td, struct linux_pread_args *uap)
 {
 	struct vnode *vp;
 	int error;
 
 	error = kern_pread(td, uap->fd, uap->buf, uap->nbyte, uap->offset);
 	if (error == 0) {
 		/* This seems to violate POSIX but Linux does it. */
 		error = fgetvp(td, uap->fd, &cap_pread_rights, &vp);
 		if (error != 0)
 			return (error);
 		if (vp->v_type == VDIR) {
 			vrele(vp);
 			return (EISDIR);
 		}
 		vrele(vp);
 	}
 	return (error);
 }
 
 int
 linux_pwrite(struct thread *td, struct linux_pwrite_args *uap)
 {
 
 	return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, uap->offset));
 }
 
 int
 linux_preadv(struct thread *td, struct linux_preadv_args *uap)
 {
 	struct uio *auio;
 	int error;
 	off_t offset;
 
 	/*
 	 * According http://man7.org/linux/man-pages/man2/preadv.2.html#NOTES
 	 * pos_l and pos_h, respectively, contain the
 	 * low order and high order 32 bits of offset.
 	 */
 	offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) <<
 	    (sizeof(offset) * 4)) | uap->pos_l;
 	if (offset < 0)
 		return (EINVAL);
 #ifdef COMPAT_LINUX32
 	error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio);
 #else
 	error = copyinuio(uap->vec, uap->vlen, &auio);
 #endif
 	if (error != 0)
 		return (error);
 	error = kern_preadv(td, uap->fd, auio, offset);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 linux_pwritev(struct thread *td, struct linux_pwritev_args *uap)
 {
 	struct uio *auio;
 	int error;
 	off_t offset;
 
 	/*
 	 * According http://man7.org/linux/man-pages/man2/pwritev.2.html#NOTES
 	 * pos_l and pos_h, respectively, contain the
 	 * low order and high order 32 bits of offset.
 	 */
 	offset = (((off_t)uap->pos_h << (sizeof(offset) * 4)) <<
 	    (sizeof(offset) * 4)) | uap->pos_l;
 	if (offset < 0)
 		return (EINVAL);
 #ifdef COMPAT_LINUX32
 	error = linux32_copyinuio(PTRIN(uap->vec), uap->vlen, &auio);
 #else
 	error = copyinuio(uap->vec, uap->vlen, &auio);
 #endif
 	if (error != 0)
 		return (error);
 	error = kern_pwritev(td, uap->fd, auio, offset);
 	free(auio, M_IOV);
 	return (error);
 }
 
 int
 linux_mount(struct thread *td, struct linux_mount_args *args)
 {
 	char fstypename[MFSNAMELEN];
 	char *mntonname, *mntfromname;
 	int error, fsflags;
 
 	mntonname = malloc(MNAMELEN, M_TEMP, M_WAITOK);
 	mntfromname = malloc(MNAMELEN, M_TEMP, M_WAITOK);
 	error = copyinstr(args->filesystemtype, fstypename, MFSNAMELEN - 1,
 	    NULL);
 	if (error != 0)
 		goto out;
 	error = copyinstr(args->specialfile, mntfromname, MNAMELEN - 1, NULL);
 	if (error != 0)
 		goto out;
 	error = copyinstr(args->dir, mntonname, MNAMELEN - 1, NULL);
 	if (error != 0)
 		goto out;
 
 #ifdef DEBUG
 	if (ldebug(mount))
 		printf(ARGS(mount, "%s, %s, %s"),
 		    fstypename, mntfromname, mntonname);
 #endif
 
 	if (strcmp(fstypename, "ext2") == 0) {
 		strcpy(fstypename, "ext2fs");
 	} else if (strcmp(fstypename, "proc") == 0) {
 		strcpy(fstypename, "linprocfs");
 	} else if (strcmp(fstypename, "vfat") == 0) {
 		strcpy(fstypename, "msdosfs");
 	}
 
 	fsflags = 0;
 
 	if ((args->rwflag & 0xffff0000) == 0xc0ed0000) {
 		/*
 		 * Linux SYNC flag is not included; the closest equivalent
 		 * FreeBSD has is !ASYNC, which is our default.
 		 */
 		if (args->rwflag & LINUX_MS_RDONLY)
 			fsflags |= MNT_RDONLY;
 		if (args->rwflag & LINUX_MS_NOSUID)
 			fsflags |= MNT_NOSUID;
 		if (args->rwflag & LINUX_MS_NOEXEC)
 			fsflags |= MNT_NOEXEC;
 		if (args->rwflag & LINUX_MS_REMOUNT)
 			fsflags |= MNT_UPDATE;
 	}
 
 	error = kernel_vmount(fsflags,
 	    "fstype", fstypename,
 	    "fspath", mntonname,
 	    "from", mntfromname,
 	    NULL);
 out:
 	free(mntonname, M_TEMP);
 	free(mntfromname, M_TEMP);
 	return (error);
 }
 
 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 int
 linux_oldumount(struct thread *td, struct linux_oldumount_args *args)
 {
 	struct linux_umount_args args2;
 
 	args2.path = args->path;
 	args2.flags = 0;
 	return (linux_umount(td, &args2));
 }
 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
 
 #ifdef LINUX_LEGACY_SYSCALLS
 int
 linux_umount(struct thread *td, struct linux_umount_args *args)
 {
 	struct unmount_args bsd;
 
 	bsd.path = args->path;
 	bsd.flags = args->flags;	/* XXX correct? */
 	return (sys_unmount(td, &bsd));
 }
 #endif
 
 /*
  * fcntl family of syscalls
  */
 
 struct l_flock {
 	l_short		l_type;
 	l_short		l_whence;
 	l_off_t		l_start;
 	l_off_t		l_len;
 	l_pid_t		l_pid;
 }
 #if defined(__amd64__) && defined(COMPAT_LINUX32)
 __packed
 #endif
 ;
 
 static void
 linux_to_bsd_flock(struct l_flock *linux_flock, struct flock *bsd_flock)
 {
 	switch (linux_flock->l_type) {
 	case LINUX_F_RDLCK:
 		bsd_flock->l_type = F_RDLCK;
 		break;
 	case LINUX_F_WRLCK:
 		bsd_flock->l_type = F_WRLCK;
 		break;
 	case LINUX_F_UNLCK:
 		bsd_flock->l_type = F_UNLCK;
 		break;
 	default:
 		bsd_flock->l_type = -1;
 		break;
 	}
 	bsd_flock->l_whence = linux_flock->l_whence;
 	bsd_flock->l_start = (off_t)linux_flock->l_start;
 	bsd_flock->l_len = (off_t)linux_flock->l_len;
 	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
 	bsd_flock->l_sysid = 0;
 }
 
 static void
 bsd_to_linux_flock(struct flock *bsd_flock, struct l_flock *linux_flock)
 {
 	switch (bsd_flock->l_type) {
 	case F_RDLCK:
 		linux_flock->l_type = LINUX_F_RDLCK;
 		break;
 	case F_WRLCK:
 		linux_flock->l_type = LINUX_F_WRLCK;
 		break;
 	case F_UNLCK:
 		linux_flock->l_type = LINUX_F_UNLCK;
 		break;
 	}
 	linux_flock->l_whence = bsd_flock->l_whence;
 	linux_flock->l_start = (l_off_t)bsd_flock->l_start;
 	linux_flock->l_len = (l_off_t)bsd_flock->l_len;
 	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
 }
 
 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 struct l_flock64 {
 	l_short		l_type;
 	l_short		l_whence;
 	l_loff_t	l_start;
 	l_loff_t	l_len;
 	l_pid_t		l_pid;
 }
 #if defined(__amd64__) && defined(COMPAT_LINUX32)
 __packed
 #endif
 ;
 
 static void
 linux_to_bsd_flock64(struct l_flock64 *linux_flock, struct flock *bsd_flock)
 {
 	switch (linux_flock->l_type) {
 	case LINUX_F_RDLCK:
 		bsd_flock->l_type = F_RDLCK;
 		break;
 	case LINUX_F_WRLCK:
 		bsd_flock->l_type = F_WRLCK;
 		break;
 	case LINUX_F_UNLCK:
 		bsd_flock->l_type = F_UNLCK;
 		break;
 	default:
 		bsd_flock->l_type = -1;
 		break;
 	}
 	bsd_flock->l_whence = linux_flock->l_whence;
 	bsd_flock->l_start = (off_t)linux_flock->l_start;
 	bsd_flock->l_len = (off_t)linux_flock->l_len;
 	bsd_flock->l_pid = (pid_t)linux_flock->l_pid;
 	bsd_flock->l_sysid = 0;
 }
 
 static void
 bsd_to_linux_flock64(struct flock *bsd_flock, struct l_flock64 *linux_flock)
 {
 	switch (bsd_flock->l_type) {
 	case F_RDLCK:
 		linux_flock->l_type = LINUX_F_RDLCK;
 		break;
 	case F_WRLCK:
 		linux_flock->l_type = LINUX_F_WRLCK;
 		break;
 	case F_UNLCK:
 		linux_flock->l_type = LINUX_F_UNLCK;
 		break;
 	}
 	linux_flock->l_whence = bsd_flock->l_whence;
 	linux_flock->l_start = (l_loff_t)bsd_flock->l_start;
 	linux_flock->l_len = (l_loff_t)bsd_flock->l_len;
 	linux_flock->l_pid = (l_pid_t)bsd_flock->l_pid;
 }
 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
 
 static int
 fcntl_common(struct thread *td, struct linux_fcntl_args *args)
 {
 	struct l_flock linux_flock;
 	struct flock bsd_flock;
 	struct file *fp;
 	long arg;
 	int error, result;
 
 	switch (args->cmd) {
 	case LINUX_F_DUPFD:
 		return (kern_fcntl(td, args->fd, F_DUPFD, args->arg));
 
 	case LINUX_F_GETFD:
 		return (kern_fcntl(td, args->fd, F_GETFD, 0));
 
 	case LINUX_F_SETFD:
 		return (kern_fcntl(td, args->fd, F_SETFD, args->arg));
 
 	case LINUX_F_GETFL:
 		error = kern_fcntl(td, args->fd, F_GETFL, 0);
 		result = td->td_retval[0];
 		td->td_retval[0] = 0;
 		if (result & O_RDONLY)
 			td->td_retval[0] |= LINUX_O_RDONLY;
 		if (result & O_WRONLY)
 			td->td_retval[0] |= LINUX_O_WRONLY;
 		if (result & O_RDWR)
 			td->td_retval[0] |= LINUX_O_RDWR;
 		if (result & O_NDELAY)
 			td->td_retval[0] |= LINUX_O_NONBLOCK;
 		if (result & O_APPEND)
 			td->td_retval[0] |= LINUX_O_APPEND;
 		if (result & O_FSYNC)
 			td->td_retval[0] |= LINUX_O_SYNC;
 		if (result & O_ASYNC)
 			td->td_retval[0] |= LINUX_FASYNC;
 #ifdef LINUX_O_NOFOLLOW
 		if (result & O_NOFOLLOW)
 			td->td_retval[0] |= LINUX_O_NOFOLLOW;
 #endif
 #ifdef LINUX_O_DIRECT
 		if (result & O_DIRECT)
 			td->td_retval[0] |= LINUX_O_DIRECT;
 #endif
 		return (error);
 
 	case LINUX_F_SETFL:
 		arg = 0;
 		if (args->arg & LINUX_O_NDELAY)
 			arg |= O_NONBLOCK;
 		if (args->arg & LINUX_O_APPEND)
 			arg |= O_APPEND;
 		if (args->arg & LINUX_O_SYNC)
 			arg |= O_FSYNC;
 		if (args->arg & LINUX_FASYNC)
 			arg |= O_ASYNC;
 #ifdef LINUX_O_NOFOLLOW
 		if (args->arg & LINUX_O_NOFOLLOW)
 			arg |= O_NOFOLLOW;
 #endif
 #ifdef LINUX_O_DIRECT
 		if (args->arg & LINUX_O_DIRECT)
 			arg |= O_DIRECT;
 #endif
 		return (kern_fcntl(td, args->fd, F_SETFL, arg));
 
 	case LINUX_F_GETLK:
 		error = copyin((void *)args->arg, &linux_flock,
 		    sizeof(linux_flock));
 		if (error)
 			return (error);
 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
 		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
 		if (error)
 			return (error);
 		bsd_to_linux_flock(&bsd_flock, &linux_flock);
 		return (copyout(&linux_flock, (void *)args->arg,
 		    sizeof(linux_flock)));
 
 	case LINUX_F_SETLK:
 		error = copyin((void *)args->arg, &linux_flock,
 		    sizeof(linux_flock));
 		if (error)
 			return (error);
 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
 		return (kern_fcntl(td, args->fd, F_SETLK,
 		    (intptr_t)&bsd_flock));
 
 	case LINUX_F_SETLKW:
 		error = copyin((void *)args->arg, &linux_flock,
 		    sizeof(linux_flock));
 		if (error)
 			return (error);
 		linux_to_bsd_flock(&linux_flock, &bsd_flock);
 		return (kern_fcntl(td, args->fd, F_SETLKW,
 		     (intptr_t)&bsd_flock));
 
 	case LINUX_F_GETOWN:
 		return (kern_fcntl(td, args->fd, F_GETOWN, 0));
 
 	case LINUX_F_SETOWN:
 		/*
 		 * XXX some Linux applications depend on F_SETOWN having no
 		 * significant effect for pipes (SIGIO is not delivered for
 		 * pipes under Linux-2.2.35 at least).
 		 */
 		error = fget(td, args->fd,
 		    &cap_fcntl_rights, &fp);
 		if (error)
 			return (error);
 		if (fp->f_type == DTYPE_PIPE) {
 			fdrop(fp, td);
 			return (EINVAL);
 		}
 		fdrop(fp, td);
 
 		return (kern_fcntl(td, args->fd, F_SETOWN, args->arg));
 
 	case LINUX_F_DUPFD_CLOEXEC:
 		return (kern_fcntl(td, args->fd, F_DUPFD_CLOEXEC, args->arg));
 	}
 
 	return (EINVAL);
 }
 
 int
 linux_fcntl(struct thread *td, struct linux_fcntl_args *args)
 {
 
 #ifdef DEBUG
 	if (ldebug(fcntl))
 		printf(ARGS(fcntl, "%d, %08x, *"), args->fd, args->cmd);
 #endif
 
 	return (fcntl_common(td, args));
 }
 
 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 int
 linux_fcntl64(struct thread *td, struct linux_fcntl64_args *args)
 {
 	struct l_flock64 linux_flock;
 	struct flock bsd_flock;
 	struct linux_fcntl_args fcntl_args;
 	int error;
 
 #ifdef DEBUG
 	if (ldebug(fcntl64))
 		printf(ARGS(fcntl64, "%d, %08x, *"), args->fd, args->cmd);
 #endif
 
 	switch (args->cmd) {
 	case LINUX_F_GETLK64:
 		error = copyin((void *)args->arg, &linux_flock,
 		    sizeof(linux_flock));
 		if (error)
 			return (error);
 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
 		error = kern_fcntl(td, args->fd, F_GETLK, (intptr_t)&bsd_flock);
 		if (error)
 			return (error);
 		bsd_to_linux_flock64(&bsd_flock, &linux_flock);
 		return (copyout(&linux_flock, (void *)args->arg,
 			    sizeof(linux_flock)));
 
 	case LINUX_F_SETLK64:
 		error = copyin((void *)args->arg, &linux_flock,
 		    sizeof(linux_flock));
 		if (error)
 			return (error);
 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
 		return (kern_fcntl(td, args->fd, F_SETLK,
 		    (intptr_t)&bsd_flock));
 
 	case LINUX_F_SETLKW64:
 		error = copyin((void *)args->arg, &linux_flock,
 		    sizeof(linux_flock));
 		if (error)
 			return (error);
 		linux_to_bsd_flock64(&linux_flock, &bsd_flock);
 		return (kern_fcntl(td, args->fd, F_SETLKW,
 		    (intptr_t)&bsd_flock));
 	}
 
 	fcntl_args.fd = args->fd;
 	fcntl_args.cmd = args->cmd;
 	fcntl_args.arg = args->arg;
 	return (fcntl_common(td, &fcntl_args));
 }
 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
 
 #ifdef LINUX_LEGACY_SYSCALLS
 int
 linux_chown(struct thread *td, struct linux_chown_args *args)
 {
 	char *path;
 	int error;
 
 	LCONVPATHEXIST(td, args->path, &path);
 
 #ifdef DEBUG
 	if (ldebug(chown))
 		printf(ARGS(chown, "%s, %d, %d"), path, args->uid, args->gid);
 #endif
 	error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid,
 	    args->gid, 0);
 	LFREEPATH(path);
 	return (error);
 }
 #endif
 
 int
 linux_fchownat(struct thread *td, struct linux_fchownat_args *args)
 {
 	char *path;
 	int error, dfd, flag;
 
 	if (args->flag & ~LINUX_AT_SYMLINK_NOFOLLOW)
 		return (EINVAL);
 
 	dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD :  args->dfd;
 	LCONVPATHEXIST_AT(td, args->filename, &path, dfd);
 
 #ifdef DEBUG
 	if (ldebug(fchownat))
 		printf(ARGS(fchownat, "%s, %d, %d"), path, args->uid, args->gid);
 #endif
 
 	flag = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) == 0 ? 0 :
 	    AT_SYMLINK_NOFOLLOW;
 	error = kern_fchownat(td, dfd, path, UIO_SYSSPACE, args->uid, args->gid,
 	    flag);
 	LFREEPATH(path);
 	return (error);
 }
 
 #ifdef LINUX_LEGACY_SYSCALLS
 int
 linux_lchown(struct thread *td, struct linux_lchown_args *args)
 {
 	char *path;
 	int error;
 
 	LCONVPATHEXIST(td, args->path, &path);
 
 #ifdef DEBUG
 	if (ldebug(lchown))
 		printf(ARGS(lchown, "%s, %d, %d"), path, args->uid, args->gid);
 #endif
 	error = kern_fchownat(td, AT_FDCWD, path, UIO_SYSSPACE, args->uid,
 	    args->gid, AT_SYMLINK_NOFOLLOW);
 	LFREEPATH(path);
 	return (error);
 }
 #endif
 
 static int
 convert_fadvice(int advice)
 {
 	switch (advice) {
 	case LINUX_POSIX_FADV_NORMAL:
 		return (POSIX_FADV_NORMAL);
 	case LINUX_POSIX_FADV_RANDOM:
 		return (POSIX_FADV_RANDOM);
 	case LINUX_POSIX_FADV_SEQUENTIAL:
 		return (POSIX_FADV_SEQUENTIAL);
 	case LINUX_POSIX_FADV_WILLNEED:
 		return (POSIX_FADV_WILLNEED);
 	case LINUX_POSIX_FADV_DONTNEED:
 		return (POSIX_FADV_DONTNEED);
 	case LINUX_POSIX_FADV_NOREUSE:
 		return (POSIX_FADV_NOREUSE);
 	default:
 		return (-1);
 	}
 }
 
 int
 linux_fadvise64(struct thread *td, struct linux_fadvise64_args *args)
 {
 	int advice;
 
 	advice = convert_fadvice(args->advice);
 	if (advice == -1)
 		return (EINVAL);
 	return (kern_posix_fadvise(td, args->fd, args->offset, args->len,
 	    advice));
 }
 
 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32))
 int
 linux_fadvise64_64(struct thread *td, struct linux_fadvise64_64_args *args)
 {
 	int advice;
 
 	advice = convert_fadvice(args->advice);
 	if (advice == -1)
 		return (EINVAL);
 	return (kern_posix_fadvise(td, args->fd, args->offset, args->len,
 	    advice));
 }
 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */
 
 #ifdef LINUX_LEGACY_SYSCALLS
 int
 linux_pipe(struct thread *td, struct linux_pipe_args *args)
 {
 	int fildes[2];
 	int error;
 
 #ifdef DEBUG
 	if (ldebug(pipe))
 		printf(ARGS(pipe, "*"));
 #endif
 
 	error = kern_pipe(td, fildes, 0, NULL, NULL);
 	if (error != 0)
 		return (error);
 
 	error = copyout(fildes, args->pipefds, sizeof(fildes));
 	if (error != 0) {
 		(void)kern_close(td, fildes[0]);
 		(void)kern_close(td, fildes[1]);
 	}
 
 	return (error);
 }
 #endif
 
 int
 linux_pipe2(struct thread *td, struct linux_pipe2_args *args)
 {
 	int fildes[2];
 	int error, flags;
 
 #ifdef DEBUG
 	if (ldebug(pipe2))
 		printf(ARGS(pipe2, "*, %d"), args->flags);
 #endif
 
 	if ((args->flags & ~(LINUX_O_NONBLOCK | LINUX_O_CLOEXEC)) != 0)
 		return (EINVAL);
 
 	flags = 0;
 	if ((args->flags & LINUX_O_NONBLOCK) != 0)
 		flags |= O_NONBLOCK;
 	if ((args->flags & LINUX_O_CLOEXEC) != 0)
 		flags |= O_CLOEXEC;
 	error = kern_pipe(td, fildes, flags, NULL, NULL);
 	if (error != 0)
 		return (error);
 
 	error = copyout(fildes, args->pipefds, sizeof(fildes));
 	if (error != 0) {
 		(void)kern_close(td, fildes[0]);
 		(void)kern_close(td, fildes[1]);
 	}
 
 	return (error);
 }
 
 int
 linux_dup3(struct thread *td, struct linux_dup3_args *args)
 {
 	int cmd;
 	intptr_t newfd;
 
 	if (args->oldfd == args->newfd)
 		return (EINVAL);
 	if ((args->flags & ~LINUX_O_CLOEXEC) != 0)
 		return (EINVAL);
 	if (args->flags & LINUX_O_CLOEXEC)
 		cmd = F_DUP2FD_CLOEXEC;
 	else
 		cmd = F_DUP2FD;
 
 	newfd = args->newfd;
 	return (kern_fcntl(td, args->oldfd, cmd, newfd));
 }
 
 int
 linux_fallocate(struct thread *td, struct linux_fallocate_args *args)
 {
 
 	/*
 	 * We emulate only posix_fallocate system call for which
 	 * mode should be 0.
 	 */
 	if (args->mode != 0)
 		return (ENOSYS);
 
 	return (kern_posix_fallocate(td, args->fd, args->offset,
 	    args->len));
 }
Index: head/sys/kern/capabilities.conf
===================================================================
--- head/sys/kern/capabilities.conf	(revision 345981)
+++ head/sys/kern/capabilities.conf	(revision 345982)
@@ -1,752 +1,753 @@
 ##
 ## Copyright (c) 2008-2010 Robert N. M. Watson
 ## All rights reserved.
 ##
 ## This software was developed at the University of Cambridge Computer
 ## Laboratory with support from a grant from Google, Inc.
 ##
 ## Redistribution and use in source and binary forms, with or without
 ## modification, are permitted provided that the following conditions
 ## are met:
 ## 1. Redistributions of source code must retain the above copyright
 ##    notice, this list of conditions and the following disclaimer.
 ## 2. Redistributions in binary form must reproduce the above copyright
 ##    notice, this list of conditions and the following disclaimer in the
 ##    documentation and/or other materials provided with the distribution.
 ##
 ## THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 ## ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 ## IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 ## ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 ## FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 ## DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 ## OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 ## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 ## LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 ## OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 ## SUCH DAMAGE.
 ##
 ## List of system calls enabled in capability mode, one name per line.
 ##
 ## Notes:
 ## - sys_exit(2), abort2(2) and close(2) are very important.
 ## - Sorted alphabetically, please keep it that way.
 ##
 ## $FreeBSD$
 ##
 
 ##
 ## Allow ACL and MAC label operations by file descriptor, subject to
 ## capability rights.  Allow MAC label operations on the current process but
 ## we will need to scope __mac_get_pid(2).
 ##
 __acl_aclcheck_fd
 __acl_delete_fd
 __acl_get_fd
 __acl_set_fd
 __mac_get_fd
 #__mac_get_pid
 __mac_get_proc
 __mac_set_fd
 __mac_set_proc
 
 ##
 ## Allow sysctl(2) as we scope internal to the call; this is a global
 ## namespace, but there are several critical sysctls required for almost
 ## anything to run, such as hw.pagesize.  For now that policy lives in the
 ## kernel for performance and simplicity, but perhaps it could move to a
 ## proxying daemon in userspace.
 ##
 __sysctl
 
 ##
 ## Allow umtx operations as these are scoped by address space.
 ##
 ## XXRW: Need to check this very carefully.
 ##
 _umtx_op
 
 ##
 ## Allow process termination using abort2(2).
 ##
 abort2
 
 ##
 ## Allow accept(2) since it doesn't manipulate namespaces directly, rather
 ## relies on existing bindings on a socket, subject to capability rights.
 ##
 accept
 accept4
 
 ##
 ## Allow AIO operations by file descriptor, subject to capability rights.
 ##
 aio_cancel
 aio_error
 aio_fsync
 aio_read
 aio_return
 aio_suspend
 aio_waitcomplete
 aio_write
 
 ##
 ## audit(2) is a global operation, submitting to the global trail, but it is
 ## controlled by privilege, and it might be useful to be able to submit
 ## records from sandboxes.  For now, disallow, but we may want to think about
 ## providing some sort of proxy service for this.
 ##
 #audit
 
 ##
 ## Allow bindat(2).
 ##
 bindat
 
 ##
 ## Allow capability mode and capability system calls.
 ##
 cap_enter
 cap_fcntls_get
 cap_fcntls_limit
 cap_getmode
 cap_ioctls_get
 cap_ioctls_limit
 __cap_rights_get
 cap_rights_limit
 
 ##
 ## Allow read-only clock operations.
 ##
 clock_getres
 clock_gettime
 
 ##
 ## Always allow file descriptor close(2).
 ##
 close
 closefrom
 
 ##
 ## Allow connectat(2).
 ##
 connectat
 
 ##
 ## cpuset(2) and related calls are limited to caller's own process/thread.
 ##
 #cpuset
 cpuset_getaffinity
 #cpuset_getid
 cpuset_setaffinity
 #cpuset_setid
 
 ##
 ## Always allow dup(2) and dup2(2) manipulation of the file descriptor table.
 ##
 dup
 dup2
 
 ##
 ## Allow extended attribute operations by file descriptor, subject to
 ## capability rights.
 ##
 extattr_delete_fd
 extattr_get_fd
 extattr_list_fd
 extattr_set_fd
 
 ##
 ## Allow changing file flags, mode, and owner by file descriptor, subject to
 ## capability rights.
 ##
 fchflags
 fchmod
 fchown
 
 ##
 ## For now, allow fcntl(2), subject to capability rights, but this probably
 ## needs additional scoping.
 ##
 fcntl
 
 ##
 ## Allow fexecve(2), subject to capability rights.  We perform some scoping,
 ## such as disallowing privilege escalation.
 ##
 fexecve
 
 ##
 ## Allow flock(2), subject to capability rights.
 ##
 flock
 
 ##
 ## Allow fork(2), even though it returns pids -- some applications seem to
 ## prefer this interface.
 ##
 fork
 
 ##
 ## Allow fpathconf(2), subject to capability rights.
 ##
 fpathconf
 
 ##
 ## Allow various file descriptor-based I/O operations, subject to capability
 ## rights.
 ##
 freebsd11_fstat
 freebsd11_fstatat
 freebsd11_getdirentries
 freebsd11_fstatfs
 freebsd11_mknodat
 freebsd6_ftruncate
 freebsd6_lseek
 freebsd6_mmap
 freebsd6_pread
 freebsd6_pwrite
 
 ##
 ## Allow querying file and file system state with fstat(2) and fstatfs(2),
 ## subject to capability rights.
 ##
 fstat
 fstatfs
 
 ##
 ## Allow further file descriptor-based I/O operations, subject to capability
 ## rights.
 ##
 fsync
 ftruncate
 
 ##
 ## Allow futimens(2) and futimes(2), subject to capability rights.
 ##
 futimens
 futimes
 
 ##
 ## Allow querying process audit state, subject to normal access control.
 ##
 getaudit
 getaudit_addr
 getauid
 
 ##
 ## Allow thread context management with getcontext(2).
 ##
 getcontext
 
 ##
 ## Allow directory I/O on a file descriptor, subject to capability rights.
 ## Originally we had separate capabilities for directory-specific read
 ## operations, but on BSD we allow reading the raw directory data, so we just
 ## rely on CAP_READ now.
 ##
 getdents
 getdirentries
 
 ##
 ## Allow querying certain trivial global state.
 ##
 getdomainname
 
 ##
 ## Allow querying certain per-process resource limit state.
 ##
 getdtablesize
 
 ##
 ## Allow querying current process credential state.
 ##
 getegid
 geteuid
 
 ##
 ## Allow querying certain trivial global state.
 ##
 gethostid
 gethostname
 
 ##
 ## Allow querying per-process timer.
 ##
 getitimer
 
 ##
 ## Allow querying current process credential state.
 ##
 getgid
 getgroups
 getlogin
 
 ##
 ## Allow querying certain trivial global state.
 ##
 getpagesize
 getpeername
 
 ##
 ## Allow querying certain per-process scheduling, resource limit, and
 ## credential state.
 ##
 ## XXXRW: getpgid(2) needs scoping.  It's not clear if it's worth scoping
 ## getppid(2).  getpriority(2) needs scoping.  getrusage(2) needs scoping.
 ## getsid(2) needs scoping.
 ##
 getpgid
 getpgrp
 getpid
 getppid
 getpriority
 getresgid
 getresuid
 getrlimit
 getrusage
 getsid
 
 ##
 ## Allow getrandom
 ##
 getrandom
 
 ##
 ## Allow querying socket state, subject to capability rights.
 ##
 ## XXXRW: getsockopt(2) may need more attention.
 ##
 getsockname
 getsockopt
 
 ##
 ## Allow querying the global clock.
 ##
 gettimeofday
 
 ##
 ## Allow querying current process credential state.
 ##
 getuid
 
 ##
 ## Allow ioctl(2), which hopefully will be limited by applications only to
 ## required commands with cap_ioctls_limit(2) syscall.
 ##
 ioctl
 
 ##
 ## Allow querying current process credential state.
 ##
 issetugid
 
 ##
 ## Allow kevent(2), as we will authorize based on capability rights on the
 ## target descriptor.
 ##
 kevent
 
 ##
 ## Allow kill(2), as we allow the process to send signals only to himself.
 ##
 kill
 
 ##
 ## Allow message queue operations on file descriptors, subject to capability
 ## rights.
 ## NOTE: Corresponding sysents are initialized in sys/kern/uipc_mqueue.c with
 ## SYF_CAPENABLED.
 ##
 kmq_notify
 kmq_setattr
 kmq_timedreceive
 kmq_timedsend
 
 ##
 ## Allow kqueue(2), we will control use.
 ##
 kqueue
 
 ##
 ## Allow managing per-process timers.
 ##
 ktimer_create
 ktimer_delete
 ktimer_getoverrun
 ktimer_gettime
 ktimer_settime
 
 ##
 ## We can't allow ktrace(2) because it relies on a global namespace, but we
 ## might want to introduce an fktrace(2) of some sort.
 ##
 #ktrace
 
 ##
 ## Allow AIO operations by file descriptor, subject to capability rights.
 ##
 lio_listio
 
 ##
 ## Allow listen(2), subject to capability rights.
 ##
 ## XXXRW: One might argue this manipulates a global namespace.
 ##
 listen
 
 ##
 ## Allow I/O-related file descriptors, subject to capability rights.
 ##
 lseek
 
 ##
 ## Allow simple VM operations on the current process.
 ##
 madvise
 mincore
 minherit
 mlock
 mlockall
 
 ##
 ## Allow memory mapping a file descriptor, and updating protections, subject
 ## to capability rights.
 ##
 mmap
 mprotect
 
 ##
 ## Allow simple VM operations on the current process.
 ##
 msync
 munlock
 munlockall
 munmap
 
 ##
 ## Allow the current process to sleep.
 ##
 nanosleep
 
 ##
 ## Allow querying the global clock.
 ##
 ntp_gettime
 
 ##
 ## Allow AIO operations by file descriptor, subject to capability rights.
 ##
 oaio_read
 oaio_write
 
 ##
 ## Allow simple VM operations on the current process.
 ##
 break
 
 ##
 ## Allow AIO operations by file descriptor, subject to capability rights.
 ##
 olio_listio
 
 ##
 ## Operations relative to directory capabilities.
 ##
 chflagsat
 faccessat
 fchmodat
 fchownat
 fstatat
 futimesat
 linkat
 mkdirat
 mkfifoat
 mknodat
 openat
 readlinkat
 renameat
 symlinkat
 unlinkat
+funlinkat
 utimensat
 
 ##
 ## Process descriptor-related system calls are allowed.
 ##
 pdfork
 pdgetpid
 pdkill
 #pdwait4	# not yet implemented
 
 ##
 ## Allow pipe(2).
 ##
 pipe
 pipe2
 
 ##
 ## Allow poll(2), which will be scoped by capability rights.
 ##
 poll
 ppoll
 
 ##
 ## Allow I/O-related file descriptors, subject to capability rights.
 ##
 posix_fallocate
 pread
 preadv
 
 ##
 ## Allow access to profiling state on the current process.
 ##
 profil
 
 ##
 ## Disallow ptrace(2) for now, but we do need debugging facilities in
 ## capability mode, so we will want to revisit this, possibly by scoping its
 ## operation.
 ##
 #ptrace
 
 ##
 ## Allow I/O-related file descriptors, subject to capability rights.
 ##
 pwrite
 pwritev
 read
 readv
 recv
 recvfrom
 recvmsg
 
 ##
 ## Allow real-time scheduling primitives to be used.
 ##
 ## XXXRW: These require scoping.
 ##
 rtprio
 rtprio_thread
 
 ##
 ## Allow simple VM operations on the current process.
 ##
 sbrk
 
 ##
 ## Allow querying trivial global scheduler state.
 ##
 sched_get_priority_max
 sched_get_priority_min
 
 ##
 ## Allow various thread/process scheduler operations.
 ##
 ## XXXRW: Some of these require further scoping.
 ##
 sched_getparam
 sched_getscheduler
 sched_rr_get_interval
 sched_setparam
 sched_setscheduler
 sched_yield
 
 ##
 ## Allow I/O-related file descriptors, subject to capability rights.
 ## NOTE: Corresponding sysents are initialized in sys/netinet/sctp_syscalls.c
 ## with SYF_CAPENABLED.
 ##
 sctp_generic_recvmsg
 sctp_generic_sendmsg
 sctp_generic_sendmsg_iov
 sctp_peeloff
 
 ##
 ## Allow pselect(2) and select(2), which will be scoped by capability rights.
 ##
 ## XXXRW: But is it?
 ##
 pselect
 select
 
 ##
 ## Allow I/O-related file descriptors, subject to capability rights.  Use of
 ## explicit addresses here is restricted by the system calls themselves.
 ##
 send
 sendfile
 sendmsg
 sendto
 
 ##
 ## Allow setting per-process audit state, which is controlled separately by
 ## privileges.
 ##
 setaudit
 setaudit_addr
 setauid
 
 ##
 ## Allow setting thread context.
 ##
 setcontext
 
 ##
 ## Allow setting current process credential state, which is controlled
 ## separately by privilege.
 ##
 setegid
 seteuid
 setgid
 
 ##
 ## Allow use of the process interval timer.
 ##
 setitimer
 
 ##
 ## Allow setpriority(2).
 ##
 ## XXXRW: Requires scoping.
 ##
 setpriority
 
 ##
 ## Allow setting current process credential state, which is controlled
 ## separately by privilege.
 ##
 setregid
 setresgid
 setresuid
 setreuid
 
 ##
 ## Allow setting process resource limits with setrlimit(2).
 ##
 setrlimit
 
 ##
 ## Allow creating a new session with setsid(2).
 ##
 setsid
 
 ##
 ## Allow setting socket options with setsockopt(2), subject to capability
 ## rights.
 ##
 ## XXXRW: Might require scoping.
 ##
 setsockopt
 
 ##
 ## Allow setting current process credential state, which is controlled
 ## separately by privilege.
 ##
 setuid
 
 ##
 ## shm_open(2) is scoped so as to allow only access to new anonymous objects.
 ##
 shm_open
 
 ##
 ## Allow I/O-related file descriptors, subject to capability rights.
 ##
 shutdown
 
 ##
 ## Allow signal control on current process.
 ##
 sigaction
 sigaltstack
 sigblock
 sigpending
 sigprocmask
 sigqueue
 sigreturn
 sigsetmask
 sigstack
 sigsuspend
 sigtimedwait
 sigvec
 sigwaitinfo
 sigwait
 
 ##
 ## Allow creating new socket pairs with socket(2) and socketpair(2).
 ##
 socket
 socketpair
 
 ##
 ## Allow simple VM operations on the current process.
 ##
 ## XXXRW: Kernel doesn't implement this, so drop?
 ##
 sstk
 
 ##
 ## Do allow sync(2) for now, but possibly shouldn't.
 ##
 sync
 
 ##
 ## Always allow process termination with sys_exit(2).
 ##
 sys_exit
 
 ##
 ## sysarch(2) does rather diverse things, but is required on at least i386
 ## in order to configure per-thread data.  As such, it's scoped on each
 ## architecture.
 ##
 sysarch
 
 ##
 ## Allow thread operations operating only on current process.
 ##
 thr_create
 thr_exit
 thr_kill
 
 ##
 ## Disallow thr_kill2(2), as it may operate beyond the current process.
 ##
 ## XXXRW: Requires scoping.
 ##
 #thr_kill2
 
 ##
 ## Allow thread operations operating only on current process.
 ##
 thr_new
 thr_self
 thr_set_name
 thr_suspend
 thr_wake
 
 ##
 ## Allow manipulation of the current process umask with umask(2).
 ##
 umask
 
 ##
 ## Allow submitting of process trace entries with utrace(2).
 ##
 utrace
 
 ##
 ## Allow generating UUIDs with uuidgen(2).
 ##
 uuidgen
 
 ##
 ## Allow I/O-related file descriptors, subject to capability rights.
 ##
 write
 writev
 
 ##
 ## Allow processes to yield(2).
 ##
 yield
Index: head/sys/kern/syscalls.master
===================================================================
--- head/sys/kern/syscalls.master	(revision 345981)
+++ head/sys/kern/syscalls.master	(revision 345982)
@@ -1,3173 +1,3181 @@
  $FreeBSD$
 ;	from: @(#)syscalls.master	8.2 (Berkeley) 1/13/94
 ;
 ; System call name/number master file.
 ; Processed to created init_sysent.c, syscalls.c and syscall.h.
 
 ; Columns: number audit type name alt{name,tag,rtyp}/comments
 ;	number	system call number, must be in order
 ;	audit	the audit event associated with the system call
 ;		A value of AUE_NULL means no auditing, but it also means that
 ;		there is no audit event for the call at this time. For the
 ;		case where the event exists, but we don't want auditing, the
 ;		event should be #defined to AUE_NULL in audit_kevents.h.
 ;	type	one of STD, OBSOL, UNIMPL, COMPAT, COMPAT4, COMPAT6,
 ;		COMPAT7, COMPAT11, NODEF, NOARGS, NOPROTO, NOSTD
 ;		The COMPAT* options may be combined with one or more NO*
 ;		options separated by '|' with no spaces (e.g. COMPAT|NOARGS)
 ;	name	pseudo-prototype of syscall routine
 ;		If one of the following alts is different, then all appear:
 ;	altname	name of system call if different
 ;	alttag	name of args struct tag if different from [o]`name'"_args"
 ;	altrtyp	return type if not int (bogus - syscalls always return int)
 ;		for UNIMPL/OBSOL, name continues with comments
 
 ; types:
 ;	STD	always included
 ;	COMPAT	included on COMPAT #ifdef
 ;	COMPAT4	included on COMPAT_FREEBSD4 #ifdef (FreeBSD 4 compat)
 ;	COMPAT6	included on COMPAT_FREEBSD6 #ifdef (FreeBSD 6 compat)
 ;	COMPAT7	included on COMPAT_FREEBSD7 #ifdef (FreeBSD 7 compat)
 ;	COMPAT10 included on COMPAT_FREEBSD10 #ifdef (FreeBSD 10 compat)
 ;	COMPAT11 included on COMPAT_FREEBSD11 #ifdef (FreeBSD 11 compat)
 ;	OBSOL	obsolete, not included in system, only specifies name
 ;	UNIMPL	not implemented, placeholder only
 ;	NOSTD	implemented but as a lkm that can be statically
 ;		compiled in; sysent entry will be filled with lkmressys
 ;		so the SYSCALL_MODULE macro works
 ;	NOARGS	same as STD except do not create structure in sys/sysproto.h
 ;	NODEF	same as STD except only have the entry in the syscall table
 ;		added.  Meaning - do not create structure or function
 ;		prototype in sys/sysproto.h
 ;	NOPROTO	same as STD except do not create structure or
 ;		function prototype in sys/sysproto.h.  Does add a
 ;		definition to syscall.h besides adding a sysent.
 ;	NOTSTATIC syscall is loadable
 
 ; annotations:
 ;	SAL 2.0 annotations are used to specify how system calls treat
 ;	arguments that are passed using pointers. There are three basic
 ;	annotations.
 ;
 ;	_In_    Object pointed to will be read and not modified.
 ;	_Out_   Object pointed to will be written and not read.
 ;	_Inout_ Object pointed to will be written and read.
 ;
 ;	These annotations are used alone when the pointer refers to a single
 ;	object i.e. scalar types, structs, and pointers, and not NULL. Adding
 ;	the _opt_ suffix, e.g. _In_opt_, implies that the pointer may also
 ;	refer to NULL.
 ;
 ;	For pointers to arrays, additional suffixes are added:
 ;
 ;	_In_z_, _Out_z_, _Inout_z_:
 ;	    for a NUL terminated array e.g. a string.
 ;	_In_reads_z_(n),_Out_writes_z_(n), _Inout_updates_z_(n):
 ;	    for a NUL terminated array e.g. a string, of known length n bytes.
 ;	_In_reads_(n),_Out_writes_(n),_Inout_updates_(n):
 ;	    for an array of n elements.
 ;	_In_reads_bytes_(n), _Out_writes_bytes_(n), _Inout_updates_bytes(n):
 ;	    for a buffer of n-bytes.
 
 ; Please copy any additions and changes to the following compatability tables:
 ; sys/compat/freebsd32/syscalls.master
 
 ; #ifdef's, etc. may be included, and are copied to the output files.
 
 #include <sys/param.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 
 ; Reserved/unimplemented system calls in the range 0-150 inclusive
 ; are reserved for use in future Berkeley releases.
 ; Additional system calls implemented in vendor and other
 ; redistributions should be placed in the reserved range at the end
 ; of the current calls.
 
 0	AUE_NULL	STD {
 		int nosys(void);
 	} syscall nosys_args int
 1	AUE_EXIT	STD {
 		void sys_exit(
 		    int rval
 		);
 	} exit sys_exit_args void
 2	AUE_FORK	STD {
 		int fork(void);
 	}
 3	AUE_READ	STD {
 		ssize_t read(
 		    int fd,
 		    _Out_writes_bytes_(nbyte) void *buf,
 		    size_t nbyte
 		);
 	}
 4	AUE_WRITE	STD {
 		ssize_t write(
 		    int fd,
 		    _In_reads_bytes_(nbyte) const void *buf,
 		    size_t nbyte
 		);
 	}
 5	AUE_OPEN_RWTC	STD {
 		int open(
 		    _In_z_ const char *path,
 		    int flags,
 		    mode_t mode
 		);
 	}
 ; XXX should be		{ int open(const char *path, int flags, ...); }
 ; but we're not ready for varargs.
 6	AUE_CLOSE	STD {
 		int close(
 		    int fd
 		);
 	}
 7	AUE_WAIT4	STD {
 		int wait4(
 		    int pid,
 		    _Out_opt_ int *status,
 		    int options,
 		    _Out_opt_ struct rusage *rusage
 		);
 	}
 8	AUE_CREAT	COMPAT {
 		int creat(
 		    _In_z_ const char *path,
 		    int mode
 		);
 	}
 9	AUE_LINK	STD {
 		int link(
 		    _In_z_ const char *path,
 		    _In_z_ const char *link
 		);
 	}
 10	AUE_UNLINK	STD {
 		int unlink(
 		    _In_z_ const char *path
 		);
 	}
 11	AUE_NULL	OBSOL	execv
 12	AUE_CHDIR	STD {
 		int chdir(
 		    _In_z_ const char *path
 		);
 	}
 13	AUE_FCHDIR	STD {
 		int fchdir(
 		    int fd
 		);
 	}
 14	AUE_MKNOD	COMPAT11 {
 		int mknod(
 		    _In_z_ const char *path,
 		    int mode,
 		    uint32_t dev
 		);
 	}
 15	AUE_CHMOD	STD {
 		int chmod(
 		    _In_z_ const char *path,
 		    mode_t mode
 		);
 	}
 16	AUE_CHOWN	STD {
 		int chown(
 		    _In_z_ const char *path,
 		    int uid,
 		    int gid
 		);
 	}
 17	AUE_NULL	STD {
 		void *break(
 		    _In_ char *nsize
 		);
 	}
 18	AUE_GETFSSTAT	COMPAT4 {
 		int getfsstat(
 		    _Out_writes_bytes_opt_(bufsize) struct ostatfs *buf,
 		    long bufsize,
 		    int mode
 		);
 	}
 19	AUE_LSEEK	COMPAT {
 		long lseek(
 		    int fd,
 		    long offset,
 		    int whence
 		);
 	}
 20	AUE_GETPID	STD {
 		pid_t getpid(void);
 	}
 21	AUE_MOUNT	STD {
 		int mount(
 		    _In_z_ const char *type,
 		    _In_z_ const char *path,
 		    int flags,
 		    _In_opt_ void *data
 		);
 	}
 22	AUE_UMOUNT	STD {
 		int unmount(
 		    _In_z_ const char *path,
 		    int flags
 		);
 	}
 23	AUE_SETUID	STD {
 		int setuid(
 		    uid_t uid
 		);
 	}
 24	AUE_GETUID	STD {
 		uid_t getuid(void);
 	}
 25	AUE_GETEUID	STD {
 		uid_t geteuid(void);
 	}
 26	AUE_PTRACE	STD {
 		int ptrace(
 		    int req,
 		    pid_t pid,
 		    _Inout_opt_ caddr_t addr,
 		    int data
 		);
 	}
 27	AUE_RECVMSG	STD {
 		int recvmsg(
 		    int s,
 		    _Inout_ struct msghdr *msg,
 		    int flags
 		);
 	}
 28	AUE_SENDMSG	STD {
 		int sendmsg(
 		    int s,
 		    _In_ struct msghdr *msg,
 		    int flags
 		);
 	}
 29	AUE_RECVFROM	STD {
 		int recvfrom(
 		    int s,
 		    _Out_writes_bytes_(len) void *buf,
 		    size_t len,
 		    int flags,
 		    _Out_writes_bytes_opt_(*fromlenaddr) struct sockaddr *from,
 		    _Inout_opt_ __socklen_t *fromlenaddr
 		);
 	}
 30	AUE_ACCEPT	STD {
 		int accept(
 		    int s,
 		    _Out_writes_bytes_opt_(*anamelen) struct sockaddr *name,
 		    _Inout_opt_ __socklen_t *anamelen
 		);
 	}
 31	AUE_GETPEERNAME	STD {
 		int getpeername(
 		    int fdes,
 		    _Out_writes_bytes_(*alen) struct sockaddr *asa,
 		    _Inout_opt_ __socklen_t *alen
 		);
 	}
 32	AUE_GETSOCKNAME	STD {
 		int getsockname(
 		    int fdes,
 		    _Out_writes_bytes_(*alen) struct sockaddr *asa,
 		    _Inout_ __socklen_t *alen
 		);
 	}
 33	AUE_ACCESS	STD {
 		int access(
 		    _In_z_ const char *path,
 		    int amode
 		);
 	}
 34	AUE_CHFLAGS	STD {
 		int chflags(
 		    _In_z_ const char *path,
 		    u_long flags
 		);
 	}
 35	AUE_FCHFLAGS	STD {
 		int fchflags(
 		    int fd,
 		    u_long flags
 		);
 	}
 36	AUE_SYNC	STD {
 		int sync(void);
 	}
 37	AUE_KILL	STD {
 		int kill(
 		    int pid,
 		    int signum
 		);
 	}
 38	AUE_STAT	COMPAT {
 		int stat(
 		    _In_z_ const char *path,
 		    _Out_ struct ostat *ub
 		);
 	}
 39	AUE_GETPPID	STD {
 		pid_t getppid(void);
 	}
 40	AUE_LSTAT	COMPAT {
 		int lstat(
 		    _In_z_ const char *path,
 		    _Out_ struct ostat *ub
 		);
 	}
 41	AUE_DUP		STD {
 		int dup(
 		    u_int fd
 		);
 	}
 42	AUE_PIPE	COMPAT10 {
 		int pipe(void);
 	}
 43	AUE_GETEGID	STD {
 		gid_t getegid(void);
 	}
 44	AUE_PROFILE	STD {
 		int profil(
 		    _Out_writes_bytes_(size) char *samples,
 		    size_t size,
 		    size_t offset,
 		    u_int scale
 		);
 	}
 45	AUE_KTRACE	STD {
 		int ktrace(
 		    _In_z_ const char *fname,
 		    int ops,
 		    int facs,
 		    int pid
 		);
 	}
 46	AUE_SIGACTION	COMPAT {
 		int sigaction(
 		    int signum,
 		    _In_opt_ struct osigaction *nsa,
 		    _Out_opt_ struct osigaction *osa
 		);
 	}
 47	AUE_GETGID	STD {
 		gid_t getgid(void);
 	}
 48	AUE_SIGPROCMASK	COMPAT {
 		int sigprocmask(
 		    int how,
 		    osigset_t mask
 		);
 	}
 ; XXX note nonstandard (bogus) calling convention - the libc stub passes
 ; us the mask, not a pointer to it, and we return the old mask as the
 ; (int) return value.
 49	AUE_GETLOGIN	STD {
 		int getlogin(
 		    _Out_writes_z_(namelen) char *namebuf,
 		    u_int namelen
 		);
 	}
 50	AUE_SETLOGIN	STD {
 		int setlogin(
 		    _In_z_ const char *namebuf
 		);
 	}
 51	AUE_ACCT	STD {
 		int acct(
 		    _In_z_ const char *path
 		);
 	}
 52	AUE_SIGPENDING	COMPAT {
 		int sigpending(void);
 	}
 53	AUE_SIGALTSTACK	STD {
 		int sigaltstack(
 		    _In_opt_ stack_t *ss,
 		    _Out_opt_ stack_t *oss
 		);
 	}
 54	AUE_IOCTL	STD {
 		int ioctl(
 		    int fd,
 		    u_long com,
 		    _Inout_opt_ char *data
 		);
 	}
 55	AUE_REBOOT	STD {
 		int reboot(
 		    int opt
 		);
 	}
 56	AUE_REVOKE	STD {
 		int revoke(
 		    _In_z_ const char *path
 		);
 	}
 57	AUE_SYMLINK	STD {
 		int symlink(
 		    _In_z_ const char *path,
 		    _In_z_ const char *link
 		);
 	}
 58	AUE_READLINK	STD {
 		ssize_t readlink(
 		    _In_z_ const char *path,
 		    _Out_writes_z_(count) char *buf,
 		    size_t count
 		);
 	}
 59	AUE_EXECVE	STD {
 		int execve(
 		    _In_z_ const char *fname,
 		    _In_z_ char **argv,
 		    _In_z_ char **envv
 		);
 	}
 60	AUE_UMASK	STD {
 		int umask(
 		    mode_t newmask
 		);
 	}
 61	AUE_CHROOT	STD {
 		int chroot(
 		    _In_z_ const char *path
 		);
 	}
 62	AUE_FSTAT	COMPAT {
 		int fstat(
 		    int fd,
 		    _Out_ struct ostat *sb
 		);
 	}
 63	AUE_NULL	COMPAT {
 		int getkerninfo(
 		    int op,
 		    _Out_writes_bytes_opt(
 		    *size) char *where,
 		    _Inout_opt_ size_t *size,
 		    int arg
 		);
 	}
 64	AUE_NULL	COMPAT {
 		int getpagesize(void);
 	}
 65	AUE_MSYNC	STD {
 		int msync(
 		    _In_ void *addr,
 		    size_t len,
 		    int flags
 		);
 	}
 66	AUE_VFORK	STD {
 		int vfork(void);
 	}
 67	AUE_NULL	OBSOL	vread
 68	AUE_NULL	OBSOL	vwrite
 69	AUE_SBRK	STD {
 		int sbrk(
 		    int incr
 		);
 	}
 70	AUE_SSTK	STD {
 		int sstk(
 		    int incr
 		);
 	}
 71	AUE_MMAP	COMPAT {
 		void *mmap(
 		    _In_ void *addr,
 		    int len,
 		    int prot,
 		    int flags,
 		    int fd,
 		    long pos
 		);
 	}
 72	AUE_O_VADVISE	COMPAT11 {
 		int vadvise(
 		    int anom
 		);
 	}
 73	AUE_MUNMAP	STD {
 		int munmap(
 		    _In_ void *addr,
 		    size_t len
 		);
 	}
 74	AUE_MPROTECT	STD {
 		int mprotect(
 		    _In_ void *addr,
 		    size_t len,
 		    int prot
 		);
 	}
 75	AUE_MADVISE	STD {
 		int madvise(
 		    _In_ void *addr,
 		    size_t len,
 		    int behav
 		);
 	}
 76	AUE_NULL	OBSOL	vhangup
 77	AUE_NULL	OBSOL	vlimit
 78	AUE_MINCORE	STD {
 		int mincore(
 		    _In_ const void *addr,
 		    size_t len,
 		    _Out_writes_bytes_(len/PAGE_SIZE) char *vec
 		);
 	}
 79	AUE_GETGROUPS	STD {
 		int getgroups(
 		    u_int gidsetsize,
 		    _Out_writes_opt_(gidsetsize) gid_t *gidset
 		);
 	}
 80	AUE_SETGROUPS	STD {
 		int setgroups(
 		    u_int gidsetsize,
 		    _In_reads_(gidsetsize) gid_t *gidset
 		);
 	}
 81	AUE_GETPGRP	STD {
 		int getpgrp(void);
 	}
 82	AUE_SETPGRP	STD {
 		int setpgid(
 		    int pid,
 		    int pgid
 		);
 	}
 83	AUE_SETITIMER	STD {
 		int setitimer(
 		    u_int which,
 		    _In_ struct itimerval *itv,
 		    _Out_opt_ struct itimerval *oitv
 		);
 	}
 84	AUE_WAIT4	COMPAT {
 		int wait(void);
 	}
 85	AUE_SWAPON	STD {
 		int swapon(
 		    _In_z_ const char *name
 		);
 	}
 86	AUE_GETITIMER	STD {
 		int getitimer(
 		    u_int which,
 		    _Out_ struct itimerval *itv
 		);
 	}
 87	AUE_SYSCTL	COMPAT {
 		int gethostname(
 		    _Out_writes_z_(len) char *hostname,
 		    u_int len
 		);
 	}
 88	AUE_SYSCTL	COMPAT {
 		int sethostname(
 		    _In_reads_z_(len) char *hostname,
 		    u_int len
 		);
 	}
 89	AUE_GETDTABLESIZE	STD {
 		int getdtablesize(void);
 	}
 90	AUE_DUP2	STD {
 		int dup2(
 		    u_int from,
 		    u_int to
 		);
 	}
 91	AUE_NULL	UNIMPL	getdopt
 92	AUE_FCNTL	STD {
 		int fcntl(
 		    int fd,
 		    int cmd,
 		    long arg
 		);
 	}
 ; XXX should be { int fcntl(int fd, int cmd, ...); }
 ; but we're not ready for varargs.
 93	AUE_SELECT	STD {
 		int select(
 		    int nd,
 		    _Inout_opt_ fd_set *in,
 		    _Inout_opt_ fd_set *ou,
 		    _Inout_opt_ fd_set *ex,
 		    _In_opt_ struct timeval *tv
 		);
 	}
 94	AUE_NULL	UNIMPL	setdopt
 95	AUE_FSYNC	STD {
 		int fsync(
 		    int fd
 		);
 	}
 96	AUE_SETPRIORITY	STD {
 		int setpriority(
 		    int which,
 		    int who,
 		    int prio
 		);
 	}
 97	AUE_SOCKET	STD {
 		int socket(
 		    int domain,
 		    int type,
 		    int protocol
 		);
 	}
 98	AUE_CONNECT	STD {
 		int connect(
 		    int s,
 		    _In_reads_bytes_(namelen) const struct sockaddr *name,
 		    int namelen
 		);
 	}
 99	AUE_ACCEPT	COMPAT {
 		int accept(
 		    int s,
 		    _Out_writes_bytes_opt_(*anamelen) struct sockaddr *name,
 		    int *anamelen
 		);
 	}
 100	AUE_GETPRIORITY	STD {
 		int getpriority(
 		    int which,
 		    int who
 		);
 	}
 101	AUE_SEND	COMPAT {
 		int send(
 		    int s,
 		    _In_reads_bytes_(len) const void *buf,
 		    int len,
 		    int flags
 		);
 	}
 102	AUE_RECV	COMPAT {
 		int recv(
 		    int s,
 		    _Out_writes_bytes_(len) void *buf,
 		    int len,
 		    int flags
 		);
 	}
 103	AUE_SIGRETURN	COMPAT {
 		int sigreturn(
 		    _In_ struct osigcontext *sigcntxp
 		);
 	}
 104	AUE_BIND	STD {
 		int bind(
 		    int s,
 		    _In_reads_bytes_(namelen) const struct sockaddr *name,
 		    int namelen
 		);
 	}
 105	AUE_SETSOCKOPT	STD {
 		int setsockopt(
 		    int s,
 		    int level,
 		    int name,
 		    _In_reads_bytes_opt_(valsize) const void *val,
 		    int valsize
 		);
 	}
 106	AUE_LISTEN	STD {
 		int listen(
 		    int s,
 		    int backlog
 		);
 	}
 107	AUE_NULL	OBSOL	vtimes
 108	AUE_NULL	COMPAT {
 		int sigvec(
 		    int signum,
 		    _In_opt_ struct sigvec *nsv,
 		    _Out_opt_ struct sigvec *osv
 		);
 	}
 109	AUE_NULL	COMPAT {
 		int sigblock(
 		    int mask
 		);
 	}
 110	AUE_NULL	COMPAT {
 		int sigsetmask(
 		    int mask
 		);
 	}
 111	AUE_NULL	COMPAT {
 		int sigsuspend(
 		    osigset_t mask
 		);
 	}
 ; XXX note nonstandard (bogus) calling convention - the libc stub passes
 ; us the mask, not a pointer to it.
 112	AUE_NULL	COMPAT {
 		int sigstack(
 		    _In_opt_ struct sigstack *nss,
 		    _Out_opt_ struct sigstack *oss
 		);
 	}
 113	AUE_RECVMSG	COMPAT {
 		int recvmsg(
 		    int s,
 		    _Inout_ struct omsghdr *msg,
 		    int flags
 		);
 	}
 114	AUE_SENDMSG	COMPAT {
 		int sendmsg(
 		    int s,
 		    _In_ const void *msg,
 		    int flags
 		);
 	}
 115	AUE_NULL	OBSOL	vtrace
 116	AUE_GETTIMEOFDAY	STD {
 		int gettimeofday(
 		    _Out_ struct timeval *tp,
 		    _Out_opt_ struct timezone *tzp
 		);
 	}
 117	AUE_GETRUSAGE	STD {
 		int getrusage(
 		    int who,
 		    _Out_ struct rusage *rusage
 		);
 	}
 118	AUE_GETSOCKOPT	STD {
 		int getsockopt(
 		    int s,
 		    int level,
 		    int name,
 		    _Out_writes_bytes_opt_(*avalsize) void *val,
 		    _Inout_  int *avalsize
 		);
 	}
 119	AUE_NULL	UNIMPL	resuba (BSD/OS 2.x)
 120	AUE_READV	STD {
 		int readv(
 		    int fd,
 		    _Inout_updates_(iovcnt) struct iovec *iovp,
 		    u_int iovcnt
 		);
 	}
 121	AUE_WRITEV	STD {
 		int writev(
 		    int fd,
 		    _In_reads_opt_(iovcnt) struct iovec *iovp,
 		    u_int iovcnt
 		);
 	}
 122	AUE_SETTIMEOFDAY	STD {
 		int settimeofday(
 		    _In_ struct timeval *tv,
 		    _In_opt_ struct timezone *tzp
 		);
 	}
 123	AUE_FCHOWN	STD {
 		int fchown(
 		    int fd,
 		    int uid,
 		    int gid
 		);
 	}
 124	AUE_FCHMOD	STD {
 		int fchmod(
 		    int fd,
 		    mode_t mode
 		);
 	}
 125	AUE_RECVFROM	COMPAT|NOARGS {
 		int recvfrom(
 		    int s,
 		    _Out_writes_(len) void *buf,
 		    size_t len,
 		    int flags,
 		    _Out_writes_bytes_(*fromlenaddr) struct sockaddr *from,
 		    _Inout_ int *fromlenaddr
 		);
 	} recvfrom recvfrom_args int
 126	AUE_SETREUID	STD {
 		int setreuid(
 		    int ruid,
 		    int euid
 		);
 	}
 127	AUE_SETREGID	STD {
 		int setregid(
 		    int rgid,
 		    int egid
 		);
 	}
 128	AUE_RENAME	STD {
 		int rename(
 		    _In_z_ const char *from,
 		    _In_z_ const char *to
 		);
 	}
 129	AUE_TRUNCATE	COMPAT {
 		int truncate(
 		    _In_z_ const char *path,
 		    long length
 		);
 	}
 130	AUE_FTRUNCATE	COMPAT {
 		int ftruncate(
 		    int fd,
 		    long length
 		);
 	}
 131	AUE_FLOCK	STD {
 		int flock(
 		    int fd,
 		    int how
 		);
 	}
 132	AUE_MKFIFO	STD {
 		int mkfifo(
 		    _In_z_ const char *path,
 		    mode_t mode
 		);
 	}
 133	AUE_SENDTO	STD {
 		int sendto(
 		    int s,
 		    _In_reads_bytes_(len) const void *buf,
 		    size_t len,
 		    int flags,
 		    _In_reads_bytes_opt_(tolen) const struct sockaddr *to,
 		    int tolen
 		);
 	}
 134	AUE_SHUTDOWN	STD {
 		int shutdown(
 		    int s,
 		    int how
 		);
 	}
 135	AUE_SOCKETPAIR	STD {
 		int socketpair(
 		    int domain,
 		    int type,
 		    int protocol,
 		    _Out_writes_(2) int *rsv
 		);
 	}
 136	AUE_MKDIR	STD {
 		int mkdir(
 		    _In_z_ const char *path,
 		    mode_t mode
 		);
 	}
 137	AUE_RMDIR	STD {
 		int rmdir(
 		    _In_z_ const char *path
 		);
 	}
 138	AUE_UTIMES	STD {
 		int utimes(
 		    _In_z_ const char *path,
 		    _In_ struct timeval *tptr
 		);
 	}
 139	AUE_NULL	OBSOL	4.2 sigreturn
 140	AUE_ADJTIME	STD {
 		int adjtime(
 		    _In_ struct timeval *delta,
 		    _Out_opt_ struct timeval *olddelta
 		);
 	}
 141	AUE_GETPEERNAME	COMPAT {
 		int getpeername(
 		    int fdes,
 		    _Out_writes_bytes_(*alen) struct sockaddr *asa,
 		    _Inout_opt_ int *alen
 		);
 	}
 142	AUE_SYSCTL	COMPAT {
 		long gethostid(void);
 	}
 143	AUE_SYSCTL	COMPAT {
 		int sethostid(
 		    long hostid
 		);
 	}
 144	AUE_GETRLIMIT	COMPAT {
 		int getrlimit(
 		    u_int which,
 		    _Out_ struct orlimit *rlp
 		);
 	}
 145	AUE_SETRLIMIT	COMPAT {
 		int setrlimit(
 		    u_int which,
 		    _Out_ struct orlimit *rlp
 		);
 	}
 146	AUE_KILLPG	COMPAT {
 		int killpg(
 		    int pgid,
 		    int signum
 		);
 	}
 147	AUE_SETSID	STD {
 		int setsid(void);
 	}
 148	AUE_QUOTACTL	STD {
 		int quotactl(
 		    _In_z_ const char *path,
 		    int cmd,
 		    int uid,
 		    _In_ void *arg
 		);
 	}
 149	AUE_O_QUOTA	COMPAT {
 		int quota(void);
 	}
 150	AUE_GETSOCKNAME	COMPAT|NOARGS {
 		int getsockname(
 		    int fdec,
 		    _Out_writes_bytes_(*alen) struct sockaddr *asa,
 		    _Inout_ int *alen
 		);
 	} getsockname getsockname_args int
 
 ; Syscalls 151-180 inclusive are reserved for vendor-specific
 ; system calls.  (This includes various calls added for compatibity
 ; with other Unix variants.)
 ; Some of these calls are now supported by BSD...
 151	AUE_NULL	UNIMPL	sem_lock (BSD/OS 2.x)
 152	AUE_NULL	UNIMPL	sem_wakeup (BSD/OS 2.x)
 153	AUE_NULL	UNIMPL	asyncdaemon (BSD/OS 2.x)
 ; 154 is initialised by the NLM code, if present.
 154	AUE_NULL	NOSTD {
 		int nlm_syscall(
 		    int debug_level,
 		    int grace_period,
 		    int addr_count,
 		    _In_reads_(addr_count) char **addrs
 		);
 	}
 ; 155 is initialized by the NFS code, if present.
 155	AUE_NFS_SVC	NOSTD {
 		int nfssvc(
 		    int flag,
 		    _In_ void *argp
 		);
 	}
 156	AUE_GETDIRENTRIES	COMPAT {
 		int getdirentries(
 		    int fd,
 		    _Out_writes_bytes_(count) char *buf,
 		    u_int count,
 		    _Out_ long *basep
 		);
 	}
 157	AUE_STATFS	COMPAT4 {
 		int statfs(
 		    _In_z_ const char *path,
 		    _Out_ struct ostatfs *buf
 		);
 	}
 158	AUE_FSTATFS	COMPAT4 {
 		int fstatfs(
 		    int fd,
 		    _Out_ struct ostatfs *buf
 		);
 	}
 159	AUE_NULL	UNIMPL	nosys
 160	AUE_LGETFH	STD {
 		int lgetfh(
 		    _In_z_ const char *fname,
 		    _Out_ struct fhandle *fhp
 		);
 	}
 161	AUE_NFS_GETFH	STD {
 		int getfh(
 		    _In_z_ const char *fname,
 		    _Out_ struct fhandle *fhp
 		);
 	}
 162	AUE_SYSCTL	COMPAT4 {
 		int getdomainname(
 		    _Out_writes_z_(len) char *domainname,
 		    int len
 		);
 	}
 163	AUE_SYSCTL	COMPAT4 {
 		int setdomainname(
 		    _In_reads_z_(len) char *domainname,
 		    int len
 		);
 	}
 164	AUE_NULL	COMPAT4 {
 		int uname(
 		    _Out_ struct utsname *name
 		);
 	}
 165	AUE_SYSARCH	STD {
 		int sysarch(
 		    int op,
 		    _In_z_ char *parms
 		);
 	}
 166	AUE_RTPRIO	STD {
 		int rtprio(
 		    int function,
 		    pid_t pid,
 		    _Inout_ struct rtprio *rtp
 		);
 	}
 167	AUE_NULL	UNIMPL	nosys
 168	AUE_NULL	UNIMPL	nosys
 169	AUE_SEMSYS	NOSTD {
 		int semsys(
 		    int which,
 		    int a2,
 		    int a3,
 		    int a4,
 		    int a5
 		);
 	}
 ; XXX should be { int semsys(int which, ...); }
 170	AUE_MSGSYS	NOSTD {
 		int msgsys(
 		    int which,
 		    int a2,
 		    int a3,
 		    int a4,
 		    int a5,
 		    int a6
 		);
 	}
 ; XXX should be { int msgsys(int which, ...); }
 171	AUE_SHMSYS	NOSTD {
 		int shmsys(
 		    int which,
 		    int a2,
 		    int a3,
 		    int a4
 		);
 	}
 ; XXX should be { int shmsys(int which, ...); }
 172	AUE_NULL	UNIMPL	nosys
 173	AUE_PREAD	COMPAT6 {
 		ssize_t pread(
 		    int fd,
 		    _Out_writes_bytes_(nbyte) void *buf,
 		    size_t nbyte,
 		    int pad,
 		    off_t offset
 		);
 	}
 174	AUE_PWRITE	COMPAT6 {
 		ssize_t pwrite(
 		    int fd,
 		    _In_reads_bytes_(nbyte) const void *buf,
 		    size_t nbyte,
 		    int pad,
 		    off_t offset
 		);
 	}
 175	AUE_SETFIB	STD {
 		int setfib(
 		    int fibnum
 		);
 	}
 176	AUE_NTP_ADJTIME	STD {
 		int ntp_adjtime(
 		    _Inout_ struct timex *tp
 		);
 	}
 177	AUE_NULL	UNIMPL	sfork (BSD/OS 2.x)
 178	AUE_NULL	UNIMPL	getdescriptor (BSD/OS 2.x)
 179	AUE_NULL	UNIMPL	setdescriptor (BSD/OS 2.x)
 180	AUE_NULL	UNIMPL	nosys
 
 ; Syscalls 181-199 are used by/reserved for BSD
 181	AUE_SETGID	STD {
 		int setgid(
 		    gid_t gid
 		);
 	}
 182	AUE_SETEGID	STD {
 		int setegid(
 		    gid_t egid
 		);
 	}
 183	AUE_SETEUID	STD {
 		int seteuid(
 		    uid_t euid
 		);
 	}
 184	AUE_NULL	OBSOL	lfs_bmapv
 185	AUE_NULL	OBSOL	lfs_markv
 186	AUE_NULL	OBSOL	lfs_segclean
 187	AUE_NULL	OBSOL	lfs_segwait
 188	AUE_STAT	COMPAT11 {
 		int stat(
 		    _In_z_ const char *path,
 		    _Out_ struct freebsd11_stat *ub
 		);
 	}
 189	AUE_FSTAT	COMPAT11 {
 		int fstat(
 		    int fd,
 		    _Out_ struct freebsd11_stat *sb
 		);
 	}
 190	AUE_LSTAT	COMPAT11 {
 		int lstat(
 		    _In_z_ const char *path,
 		    _Out_ struct freebsd11_stat *ub
 		);
 	}
 191	AUE_PATHCONF	STD {
 		int pathconf(
 		    _In_z_ const char *path,
 		    int name
 		);
 	}
 192	AUE_FPATHCONF	STD {
 		int fpathconf(
 		    int fd,
 		    int name
 		);
 	}
 193	AUE_NULL	UNIMPL	nosys
 194	AUE_GETRLIMIT	STD {
 		int getrlimit(
 		    u_int which,
 		    _Out_ struct rlimit *rlp
 		);
 	} getrlimit __getrlimit_args int
 195	AUE_SETRLIMIT	STD {
 		int setrlimit(
 		    u_int which,
 		    _In_ struct rlimit *rlp
 		);
 	} setrlimit __setrlimit_args int
 196	AUE_GETDIRENTRIES	COMPAT11 {
 		int getdirentries(
 		    int fd,
 		    _Out_writes_bytes_(count) char *buf,
 		    u_int count,
 		    _Out_ long *basep
 		);
 	}
 197	AUE_MMAP	COMPAT6 {
 		void *mmap(
 		    _In_ void *addr,
 		    size_t len,
 		    int prot,
 		    int flags,
 		    int fd,
 		    int pad,
 		    off_t pos
 		);
 	}
 198	AUE_NULL	NOPROTO {
 		int nosys(void);
 	} __syscall __syscall_args int
 199	AUE_LSEEK	COMPAT6 {
 		off_t lseek(
 		    int fd,
 		    int pad,
 		    off_t offset,
 		    int whence
 		);
 	}
 200	AUE_TRUNCATE	COMPAT6 {
 		int truncate(
 		    _In_z_ const char *path,
 		    int pad,
 		    off_t length
 		);
 	}
 201	AUE_FTRUNCATE	COMPAT6 {
 		int ftruncate(
 		    int fd,
 		    int pad,
 		    off_t length
 		);
 	}
 202	AUE_SYSCTL	STD {
 		int __sysctl(
 		    _In_reads_(namelen) int *name,
 		    u_int namelen,
 		    _Out_writes_bytes_opt_(*oldlenp) void *old,
 		    _Inout_opt_ size_t *oldlenp,
 		    _In_reads_bytes_opt_(newlen) const void *new,
 		    size_t newlen
 		);
 	} __sysctl sysctl_args int
 203	AUE_MLOCK	STD {
 		int mlock(
 		    _In_ const void *addr,
 		    size_t len
 		);
 	}
 204	AUE_MUNLOCK	STD {
 		int munlock(
 		    _In_ const void *addr,
 		    size_t len
 		);
 	}
 205	AUE_UNDELETE	STD {
 		int undelete(
 		    _In_z_ const char *path
 		);
 	}
 206	AUE_FUTIMES	STD {
 		int futimes(
 		    int fd,
 		    _In_reads_(2) struct timeval *tptr
 		);
 	}
 207	AUE_GETPGID	STD {
 		int getpgid(
 		    pid_t pid
 		);
 	}
 208	AUE_NULL	UNIMPL	nosys
 209	AUE_POLL	STD {
 		int poll(
 		    _Inout_updates_(nfds) struct pollfd *fds,
 		    u_int nfds,
 		    int timeout
 		);
 	}
 ;
 ; The following are reserved for loadable syscalls
 ;
 210	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 211	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 212	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 213	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 214	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 215	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 216	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 217	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 218	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 219	AUE_NULL	NODEF|NOTSTATIC	lkmnosys lkmnosys nosys_args int
 
 220	AUE_SEMCTL	COMPAT7|NOSTD {
 		int __semctl(
 		    int semid,
 		    int semnum,
 		    int cmd,
 		    union semun_old *arg
 		);
 	}
 221	AUE_SEMGET	NOSTD {
 		int semget(
 		    key_t key,
 		    int nsems,
 		    int semflg
 		);
 	}
 222	AUE_SEMOP	NOSTD {
 		int semop(
 		    int semid,
 		    _In_reads_(nsops) struct sembuf *sops,
 		    size_t nsops
 		);
 	}
 223	AUE_NULL	OBSOL	semconfig
 224	AUE_MSGCTL	COMPAT7|NOSTD {
 		int msgctl(
 		    int msqid,
 		    int cmd,
 		    struct msqid_ds_old *buf
 		);
 	}
 225	AUE_MSGGET	NOSTD {
 		int msgget(
 		    key_t key,
 		    int msgflg
 		);
 	}
 226	AUE_MSGSND	NOSTD {
 		int msgsnd(
 		    int msqid,
 		    _In_reads_bytes_(msgsz) const void *msgp,
 		    size_t msgsz,
 		    int msgflg
 		);
 	}
 227	AUE_MSGRCV	NOSTD {
 		ssize_t msgrcv(
 		    int msqid,
 		    _Out_writes_bytes_(msgsz) void *msgp,
 		    size_t msgsz,
 		    long msgtyp,
 		    int msgflg
 		);
 	}
 228	AUE_SHMAT	NOSTD {
 		void *shmat(
 		    int shmid,
 		    _In_ const void *shmaddr,
 		    int shmflg
 		);
 	}
 229	AUE_SHMCTL	COMPAT7|NOSTD {
 		int shmctl(
 		    int shmid,
 		    int cmd,
 		    struct shmid_ds_old *buf
 		);
 	}
 230	AUE_SHMDT	NOSTD {
 		int shmdt(
 		    _In_ const void *shmaddr
 		);
 	}
 231	AUE_SHMGET	NOSTD {
 		int shmget(
 		    key_t key,
 		    size_t size,
 		    int shmflg
 		);
 	}
 232	AUE_NULL	STD {
 		int clock_gettime(
 		    clockid_t clock_id,
 		    _Out_ struct timespec *tp
 		);
 	}
 233	AUE_CLOCK_SETTIME	STD {
 		int clock_settime(
 		    clockid_t clock_id,
 		    _In_ const struct timespec *tp
 		);
 	}
 234	AUE_NULL	STD {
 		int clock_getres(
 		    clockid_t clock_id,
 		    _Out_ struct timespec *tp
 		);
 	}
 235	AUE_NULL	STD {
 		int ktimer_create(
 		    clockid_t clock_id,
 		    _In_ struct sigevent *evp,
 		    _Out_ int *timerid
 		);
 	}
 236	AUE_NULL	STD {
 		int ktimer_delete(
 		    int timerid
 		);
 	}
 237	AUE_NULL	STD {
 		int ktimer_settime(
 		    int timerid,
 		    int flags,
 		    _In_ const struct itimerspec *value,
 		    _Out_opt_ struct itimerspec *ovalue
 		);
 	}
 238	AUE_NULL	STD {
 		int ktimer_gettime(
 		    int timerid,
 		    _Out_ struct itimerspec *value
 		);
 	}
 239	AUE_NULL	STD {
 		int ktimer_getoverrun(
 		    int timerid
 		);
 	}
 240	AUE_NULL	STD {
 		int nanosleep(
 		    _In_ const struct timespec *rqtp,
 		    _Out_opt_ struct timespec *rmtp
 		);
 	}
 241	AUE_NULL	STD {
 		int ffclock_getcounter(
 		    _Out_ ffcounter *ffcount
 		);
 	}
 242	AUE_NULL	STD {
 		int ffclock_setestimate(
 		    _In_ struct ffclock_estimate *cest
 		);
 	}
 243	AUE_NULL	STD {
 		int ffclock_getestimate(
 		    _Out_ struct ffclock_estimate *cest
 		);
 	}
 244	AUE_NULL	STD {
 		int clock_nanosleep(
 		    clockid_t clock_id,
 		    int flags,
 		    _In_ const struct timespec *rqtp,
 		    _Out_opt_ struct timespec *rmtp
 		);
 	}
 245-246	AUE_NULL	UNIMPL	nosys
 247	AUE_NULL	STD {
 		int clock_getcpuclockid2(
 		    id_t id,
 		    int which,
 		    _Out_ clockid_t *clock_id
 		);
 	}
 248	AUE_NULL	STD {
 		int ntp_gettime(
 		    _Out_ struct ntptimeval *ntvp
 		);
 	}
 249	AUE_NULL	UNIMPL	nosys
 ; syscall numbers initially used in OpenBSD
 250	AUE_MINHERIT	STD {
 		int minherit(
 		    _In_ void *addr,
 		    size_t len,
 		    int inherit
 		);
 	}
 251	AUE_RFORK	STD {
 		int rfork(
 		    int flags
 		);
 	}
 252	AUE_POLL	OBSOL	openbsd_poll
 253	AUE_ISSETUGID	STD {
 		int issetugid(void);
 	}
 254	AUE_LCHOWN	STD {
 		int lchown(
 		    _In_z_ const char *path,
 		    int uid,
 		    int gid
 		);
 	}
 255	AUE_AIO_READ	STD {
 		int aio_read(
 		    _Inout_ struct aiocb *aiocbp
 		);
 	}
 256	AUE_AIO_WRITE	STD {
 		int aio_write(
 		    _Inout_ struct aiocb *aiocbp
 		);
 	}
 257	AUE_LIO_LISTIO	STD {
 		int lio_listio(
 		    int mode,
 		    _Inout_updates_(nent) struct aiocb* const *acb_list,
 		    int nent,
 		    _In_opt_ struct sigevent *sig
 		);
 	}
 258-271	AUE_NULL	UNIMPL	nosys
 272	AUE_O_GETDENTS	COMPAT11 {
 		int getdents(
 		    int fd,
 		    _Out_writes_bytes_(count) char *buf,
 		    size_t count
 		);
 	}
 273	AUE_NULL	UNIMPL	nosys
 274	AUE_LCHMOD	STD {
 		int lchmod(
 		    _In_z_ const char *path,
 		    mode_t mode
 		);
 	}
 275	AUE_NULL	OBSOL	netbsd_lchown
 276	AUE_LUTIMES	STD {
 		int lutimes(
 		    _In_z_ const char *path,
 		    _In_ struct timeval *tptr
 		);
 	}
 277	AUE_NULL	OBSOL	netbsd_msync
 278	AUE_STAT	COMPAT11 {
 		int nstat(
 		    _In_z_ const char *path,
 		    _Out_ struct nstat *ub
 		);
 	}
 279	AUE_FSTAT	COMPAT11 {
 		int nfstat(
 		    int fd,
 		    _Out_ struct nstat *sb
 		);
 	}
 280	AUE_LSTAT	COMPAT11 {
 		int nlstat(
 		    _In_z_ const char *path,
 		    _Out_ struct nstat *ub
 		);
 	}
 281-288	AUE_NULL	UNIMPL	nosys
 289	AUE_PREADV	STD {
 		ssize_t preadv(
 		    int fd,
 		    _In_reads_(iovcnt) struct iovec *iovp,
 		    u_int iovcnt,
 		    off_t offset
 		);
 	}
 290	AUE_PWRITEV	STD {
 		ssize_t pwritev(
 		    int fd,
 		    _In_reads_(iovcnt) struct iovec *iovp,
 		    u_int iovcnt,
 		    off_t offset
 		);
 	}
 291-296	AUE_NULL	UNIMPL	nosys
 297	AUE_FHSTATFS	COMPAT4 {
 		int fhstatfs(
 		    _In_ const struct fhandle *u_fhp,
 		    _Out_ struct ostatfs *buf
 		);
 	}
 298	AUE_FHOPEN	STD {
 		int fhopen(
 		    _In_ const struct fhandle *u_fhp,
 		    int flags
 		);
 	}
 299	AUE_FHSTAT	COMPAT11 {
 		int fhstat(
 		    _In_ const struct fhandle *u_fhp,
 		    _Out_ struct freebsd11_stat *sb
 		);
 	}
 300	AUE_NULL	STD {
 		int modnext(
 		    int modid
 		);
 	}
 301	AUE_NULL	STD {
 		int modstat(
 		    int modid,
 		    _Out_ struct module_stat* stat
 		);
 	}
 302	AUE_NULL	STD {
 		int modfnext(
 		    int modid
 		);
 	}
 303	AUE_NULL	STD {
 		int modfind(
 		    _In_z_ const char *name
 		);
 	}
 304	AUE_MODLOAD	STD {
 		int kldload(
 		    _In_z_ const char *file
 		);
 	}
 305	AUE_MODUNLOAD	STD {
 		int kldunload(
 		    int fileid
 		);
 	}
 306	AUE_NULL	STD {
 		int kldfind(
 		    _In_z_ const char *file
 		);
 	}
 307	AUE_NULL	STD {
 		int kldnext(
 		    int fileid
 		);
 	}
 308	AUE_NULL	STD {
 		int kldstat(
 		    int fileid,
 		    _Out_ struct kld_file_stat *stat
 		);
 	}
 309	AUE_NULL	STD {
 		int kldfirstmod(
 		    int fileid
 		);
 	}
 310	AUE_GETSID	STD {
 		int getsid(
 		    pid_t pid
 		);
 	}
 311	AUE_SETRESUID	STD {
 		int setresuid(
 		    uid_t ruid,
 		    uid_t euid,
 		    uid_t suid
 		);
 	}
 312	AUE_SETRESGID	STD {
 		int setresgid(
 		    gid_t rgid,
 		    gid_t egid,
 		    gid_t sgid
 		);
 	}
 313	AUE_NULL	OBSOL	signanosleep
 314	AUE_AIO_RETURN	STD {
 		ssize_t aio_return(
 		    _Inout_ struct aiocb *aiocbp
 		);
 	}
 315	AUE_AIO_SUSPEND	STD {
 		int aio_suspend(
 		    _Inout_updates_(nent) struct aiocb * const * aiocbp,
 		    int nent,
 		    _In_opt_ const struct timespec *timeout
 		);
 	}
 316	AUE_AIO_CANCEL	STD {
 		int aio_cancel(
 		    int fd,
 		    _In_opt_ struct aiocb *aiocbp
 		);
 	}
 317	AUE_AIO_ERROR	STD {
 		int aio_error(
 		    _In_ struct aiocb *aiocbp
 		);
 	}
 318	AUE_AIO_READ	COMPAT6 {
 		int aio_read(
 		    _Inout_  struct oaiocb *aiocbp
 		);
 	}
 319	AUE_AIO_WRITE	COMPAT6 {
 		int aio_write(
 		    _Inout_ struct oaiocb *aiocbp
 		);
 	}
 320	AUE_LIO_LISTIO	COMPAT6 {
 		int lio_listio(
 		    int mode,
 		    _Inout_updates_(nent) struct oaiocb * const *acb_list,
 		    int nent,
 		    _In_opt_ struct osigevent *sig
 		);
 	}
 321	AUE_NULL	STD {
 		int yield(void);
 	}
 322	AUE_NULL	OBSOL	thr_sleep
 323	AUE_NULL	OBSOL	thr_wakeup
 324	AUE_MLOCKALL	STD {
 		int mlockall(
 		    int how
 		);
 	}
 325	AUE_MUNLOCKALL	STD {
 		int munlockall(void); }
 326	AUE_GETCWD	STD {
 		int __getcwd(
 		    _Out_writes_z_(buflen) char *buf,
 		    size_t buflen
 		);
 	}
 327	AUE_NULL	STD {
 		int sched_setparam(
 		    pid_t pid,
 		    _In_ const struct sched_param *param
 		);
 	}
 328	AUE_NULL	STD {
 		int sched_getparam(
 		    pid_t pid,
 		    _Out_ struct sched_param *param
 		);
 	}
 329	AUE_NULL	STD {
 		int sched_setscheduler(
 		    pid_t pid,
 		    int policy,
 		    _In_ const struct sched_param *param
 		);
 	}
 330	AUE_NULL	STD {
 		int sched_getscheduler(
 		    pid_t pid
 		);
 	}
 331	AUE_NULL	STD {
 		int sched_yield(void);
 	}
 332	AUE_NULL	STD {
 		int sched_get_priority_max(
 		    int policy
 		);
 	}
 333	AUE_NULL	STD {
 		int sched_get_priority_min(
 		    int policy
 		);
 	}
 334	AUE_NULL	STD {
 		int sched_rr_get_interval(
 		    pid_t pid,
 		    _Out_ struct timespec *interval
 		);
 	}
 335	AUE_NULL	STD {
 		int utrace(
 		   _In_reads_bytes_(len) const void *addr,
 		    size_t len
 		);
 	}
 336	AUE_SENDFILE	COMPAT4 {
 		int sendfile(
 		    int fd,
 		    int s,
 		    off_t offset,
 		    size_t nbytes,
 		    _In_opt_ struct sf_hdtr *hdtr,
 		    _Out_opt_ off_t *sbytes,
 		    int flags
 		);
 	}
 337	AUE_NULL	STD {
 		int kldsym(
 		    int fileid,
 		    int cmd,
 		    _In_ void *data
 		);
 	}
 338	AUE_JAIL	STD {
 		int jail(
 		    _In_ struct jail *jail
 		);
 	}
 339	AUE_NULL	NOSTD|NOTSTATIC {
 		int nnpfs_syscall(
 		    int operation,
 		    char *a_pathP,
 		    int a_opcode,
 		    void *a_paramsP,
 		    int a_followSymlinks
 		);
 	}
 340	AUE_SIGPROCMASK	STD {
 		int sigprocmask(
 		    int how,
 		    _In_opt_ const sigset_t *set,
 		    _Out_opt_ sigset_t *oset
 		);
 	}
 341	AUE_SIGSUSPEND	STD {
 		int sigsuspend(
 		    _In_ const sigset_t *sigmask
 		);
 	}
 342	AUE_SIGACTION	COMPAT4 {
 		int sigaction(
 		    int sig,
 		    _In_opt_ const struct sigaction *act,
 		    _Out_opt_ struct sigaction *oact
 		);
 	}
 343	AUE_SIGPENDING	STD {
 		int sigpending(
 		    _In_ sigset_t *set
 		);
 	}
 344	AUE_SIGRETURN	COMPAT4 {
 		int sigreturn(
 		    _In_ const struct ucontext4 *sigcntxp
 		);
 	}
 345	AUE_SIGWAIT	STD {
 		int sigtimedwait(
 		    _In_ const sigset_t *set,
 		    _Out_opt_ siginfo_t *info,
 		    _In_opt_ const struct timespec *timeout
 		);
 	}
 346	AUE_NULL	STD {
 		int sigwaitinfo(
 		    _In_ const sigset_t *set,
 		    _Out_opt_ siginfo_t *info
 		);
 	}
 347	AUE_ACL_GET_FILE	STD {
 		int __acl_get_file(
 		    _In_z_ const char *path,
 		    acl_type_t type,
 		    _Out_ struct acl *aclp
 		);
 	}
 348	AUE_ACL_SET_FILE	STD {
 		int __acl_set_file(
 		    _In_z_ const char *path,
 		    acl_type_t type,
 		    _In_ struct acl *aclp
 		);
 	}
 349	AUE_ACL_GET_FD	STD {
 		int __acl_get_fd(
 		    int filedes,
 		    acl_type_t type,
 		    _Out_ struct acl *aclp
 		);
 	}
 350	AUE_ACL_SET_FD	STD {
 		int __acl_set_fd(
 		    int filedes,
 		    acl_type_t type,
 		    _In_ struct acl *aclp
 		);
 	}
 351	AUE_ACL_DELETE_FILE	STD {
 		int __acl_delete_file(
 		    _In_z_ const char *path,
 		    acl_type_t type
 		);
 	}
 352	AUE_ACL_DELETE_FD	STD {
 		int __acl_delete_fd(
 		    int filedes,
 		    acl_type_t type
 		);
 	}
 353	AUE_ACL_CHECK_FILE	STD {
 		int __acl_aclcheck_file(
 		    _In_z_ const char *path,
 		    acl_type_t type,
 		    _In_ struct acl *aclp
 		);
 	}
 354	AUE_ACL_CHECK_FD	STD {
 		int __acl_aclcheck_fd(
 		    int filedes,
 		    acl_type_t type,
 		    _In_ struct acl *aclp
 		);
 	}
 355	AUE_EXTATTRCTL	STD {
 		int extattrctl(
 		    _In_z_ const char *path,
 		    int cmd,
 		    _In_z_opt_ const char *filename,
 		    int attrnamespace,
 		    _In_z_ const char *attrname
 		);
 	}
 356	AUE_EXTATTR_SET_FILE	STD {
 		ssize_t extattr_set_file(
 		    _In_z_ const char *path,
 		    int attrnamespace,
 		    _In_z_ const char *attrname,
 		    _In_reads_bytes_(nbytes) void *data,
 		    size_t nbytes
 		);
 	}
 357	AUE_EXTATTR_GET_FILE	STD {
 		ssize_t extattr_get_file(
 		    _In_z_ const char *path,
 		    int attrnamespace,
 		    _In_z_ const char *attrname,
 		    _Out_writes_bytes_(nbytes) void *data,
 		    size_t nbytes
 		);
 	}
 358	AUE_EXTATTR_DELETE_FILE	STD {
 		int extattr_delete_file(
 		    _In_z_ const char *path,
 		    int attrnamespace,
 		    _In_z_ const char *attrname
 		);
 	}
 359	AUE_AIO_WAITCOMPLETE	STD {
 		ssize_t aio_waitcomplete(
 		    _Outptr_result_maybenull_ struct aiocb **aiocbp,
 		    _In_opt_ struct timespec *timeout
 		);
 	}
 360	AUE_GETRESUID	STD {
 		int getresuid(
 		    _Out_opt_ uid_t *ruid,
 		    _Out_opt_ uid_t *euid,
 		    _Out_opt_ uid_t *suid
 		);
 	}
 361	AUE_GETRESGID	STD {
 		int getresgid(
 		    _Out_opt_ gid_t *rgid,
 		    _Out_opt_ gid_t *egid,
 		    _Out_opt_ gid_t *sgid
 		);
 	}
 362	AUE_KQUEUE	STD {
 		int kqueue(void);
 	}
 363	AUE_KEVENT	COMPAT11 {
 		int kevent(
 		    int fd,
 		    _In_reads_opt_(nchanges) struct kevent_freebsd11 *changelist,
 		    int nchanges,
 		    _Out_writes_opt_(nevents) struct kevent_freebsd11 *eventlist,
 		    int nevents,
 		    _In_opt_ const struct timespec *timeout
 		);
 	}
 364	AUE_NULL	OBSOL	__cap_get_proc
 365	AUE_NULL	OBSOL	__cap_set_proc
 366	AUE_NULL	OBSOL	__cap_get_fd
 367	AUE_NULL	OBSOL	__cap_get_file
 368	AUE_NULL	OBSOL	__cap_set_fd
 369	AUE_NULL	OBSOL	__cap_set_file
 370	AUE_NULL	UNIMPL	nosys
 371	AUE_EXTATTR_SET_FD	STD {
 		ssize_t extattr_set_fd(
 		    int fd,
 		    int attrnamespace,
 		    _In_z_ const char *attrname,
 		    _In_reads_bytes_(nbytes) void *data,
 		    size_t nbytes
 		);
 	}
 372	AUE_EXTATTR_GET_FD	STD {
 		ssize_t extattr_get_fd(
 		    int fd,
 		    int attrnamespace,
 		    _In_z_ const char *attrname,
 		    _Out_writes_bytes_(nbytes) void *data,
 		    size_t nbytes
 		);
 	}
 373	AUE_EXTATTR_DELETE_FD	STD {
 		int extattr_delete_fd(
 		    int fd,
 		    int attrnamespace,
 		    _In_z_ const char *attrname
 		);
 	}
 374	AUE_SETUGID	STD {
 		int __setugid(
 		    int flag
 		);
 	}
 375	AUE_NULL	OBSOL	nfsclnt
 376	AUE_EACCESS	STD {
 		int eaccess(
 		    _In_z_ const char *path,
 		    int amode
 		);
 	}
 377	AUE_NULL	NOSTD|NOTSTATIC {
 		int afs3_syscall(
 		    long syscall,
 		    long parm1,
 		    long parm2,
 		    long parm3,
 		    long parm4,
 		    long parm5,
 		    long parm6
 		);
 	}
 378	AUE_NMOUNT	STD {
 		int nmount(
 		    _In_reads_(iovcnt) struct iovec *iovp,
 		    unsigned int iovcnt,
 		    int flags
 		);
 	}
 379	AUE_NULL	OBSOL	kse_exit
 380	AUE_NULL	OBSOL	kse_wakeup
 381	AUE_NULL	OBSOL	kse_create
 382	AUE_NULL	OBSOL	kse_thr_interrupt
 383	AUE_NULL	OBSOL	kse_release
 384	AUE_NULL	STD {
 		int __mac_get_proc(
 		    _In_ struct mac *mac_p
 		);
 	}
 385	AUE_NULL	STD {
 		int __mac_set_proc(
 		    _In_ struct mac *mac_p
 		);
 	}
 386	AUE_NULL	STD {
 		int __mac_get_fd(
 		    int fd,
 		    _In_ struct mac *mac_p
 		);
 	}
 387	AUE_NULL	STD {
 		int __mac_get_file(
 		    _In_z_ const char *path_p,
 		    _In_ struct mac *mac_p
 		);
 	}
 388	AUE_NULL	STD {
 		int __mac_set_fd(
 		    int fd,
 		    _In_ struct mac *mac_p
 		);
 	}
 389	AUE_NULL	STD {
 		int __mac_set_file(
 		    _In_z_ const char *path_p,
 		    _In_ struct mac *mac_p
 		);
 	}
 390	AUE_NULL	STD {
 		int kenv(
 		    int what,
 		    _In_z_opt_ const char *name,
 		    _Inout_updates_opt_(len) char *value,
 		    int len
 		);
 	}
 391	AUE_LCHFLAGS	STD {
 		int lchflags(
 		    _In_z_ const char *path,
 		    u_long flags
 		);
 	}
 392	AUE_NULL	STD {
 		int uuidgen(
 		    _Out_writes_(count) struct uuid *store,
 		    int count
 		);
 	}
 393	AUE_SENDFILE	STD {
 		int sendfile(
 		    int fd,
 		    int s,
 		    off_t offset,
 		    size_t nbytes,
 		    _In_opt_ struct sf_hdtr *hdtr,
 		    _Out_opt_ off_t *sbytes,
 		    int flags
 		);
 	}
 394	AUE_NULL	STD {
 		int mac_syscall(
 		    _In_z_ const char *policy,
 		    int call,
 		    _In_opt_ void *arg
 		);
 	}
 395	AUE_GETFSSTAT	COMPAT11 {
 		int getfsstat(
 		    _Out_writes_bytes_opt_(bufsize) struct freebsd11_statfs *buf,
 		    long bufsize,
 		    int mode
 		);
 	}
 396	AUE_STATFS	COMPAT11 {
 		int statfs(
 		    _In_z_ const char *path,
 		    _Out_ struct freebsd11_statfs *buf
 		);
 	}
 397	AUE_FSTATFS	COMPAT11 {
 		int fstatfs(
 		    int fd,
 		    _Out_ struct freebsd11_statfs *buf
 		);
 	}
 398	AUE_FHSTATFS	COMPAT11 {
 		int fhstatfs(
 		    _In_ const struct fhandle *u_fhp,
 		    _Out_ struct freebsd11_statfs *buf
 		);
 	}
 399	AUE_NULL	UNIMPL	nosys
 400	AUE_SEMCLOSE	NOSTD {
 		int ksem_close(
 		    semid_t id
 		);
 	}
 401	AUE_SEMPOST	NOSTD {
 		int ksem_post(
 		    semid_t id
 		);
 	}
 402	AUE_SEMWAIT	NOSTD {
 		int ksem_wait(
 		    semid_t id
 		);
 	}
 403	AUE_SEMTRYWAIT	NOSTD {
 		int ksem_trywait(
 		    semid_t id
 		);
 	}
 404	AUE_SEMINIT	NOSTD {
 		int ksem_init(
 		    _Out_ semid_t *idp,
 		    unsigned int value
 		);
 	}
 405	AUE_SEMOPEN	NOSTD {
 		int ksem_open(
 		    _Out_ semid_t *idp,
 		    _In_z_ const char *name,
 		    int oflag,
 		    mode_t mode,
 		    unsigned int value
 		);
 	}
 406	AUE_SEMUNLINK	NOSTD {
 		int ksem_unlink(
 		    _In_z_ const char *name
 		);
 	}
 407	AUE_SEMGETVALUE	NOSTD {
 		int ksem_getvalue(
 		    semid_t id,
 		    _Out_ int *val
 		);
 	}
 408	AUE_SEMDESTROY	NOSTD {
 		int ksem_destroy(
 		    semid_t id
 		);
 	}
 409	AUE_NULL	STD {
 		int __mac_get_pid(
 		    pid_t pid,
 		    _In_ struct mac *mac_p
 		);
 	}
 410	AUE_NULL	STD {
 		int __mac_get_link(
 		    _In_z_ const char *path_p,
 		    _In_ struct mac *mac_p
 		);
 	}
 411	AUE_NULL	STD {
 		int __mac_set_link(
 		    _In_z_ const char *path_p,
 		    _In_ struct mac *mac_p
 		);
 	}
 412	AUE_EXTATTR_SET_LINK	STD {
 		ssize_t extattr_set_link(
 		    _In_z_ const char *path,
 		    int attrnamespace,
 		    _In_z_ const char *attrname,
 		    _In_reads_bytes_(nbytes) void *data,
 		    size_t nbytes
 		);
 	}
 413	AUE_EXTATTR_GET_LINK	STD {
 		ssize_t extattr_get_link(
 		    _In_z_ const char *path,
 		    int attrnamespace,
 		    _In_z_ const char *attrname,
 		    _Out_writes_bytes_(nbytes) void *data,
 		    size_t nbytes
 		);
 	}
 414	AUE_EXTATTR_DELETE_LINK	STD {
 		int extattr_delete_link(
 		    _In_z_ const char *path,
 		    int attrnamespace,
 		    _In_z_ const char *attrname
 		);
 	}
 415	AUE_NULL	STD {
 		int __mac_execve(
 		    _In_z_ const char *fname,
 		    _In_ char **argv,
 		    _In_ char **envv,
 		    _In_ struct mac *mac_p
 		);
 	}
 416	AUE_SIGACTION	STD {
 		int sigaction(
 		    int sig,
 		    _In_opt_ const struct sigaction *act,
 		    _Out_opt_ struct sigaction *oact
 		);
 	}
 417	AUE_SIGRETURN	STD {
 		int sigreturn(
 		    _In_ const struct __ucontext *sigcntxp
 		);
 	}
 418	AUE_NULL	UNIMPL	__xstat
 419	AUE_NULL	UNIMPL	__xfstat
 420	AUE_NULL	UNIMPL	__xlstat
 421	AUE_NULL	STD {
 		int getcontext(
 		    _Out_ struct __ucontext *ucp
 		);
 	}
 422	AUE_NULL	STD {
 		int setcontext(
 		    _In_ const struct __ucontext *ucp
 		);
 	}
 423	AUE_NULL	STD {
 		int swapcontext(
 		    _Out_ struct __ucontext *oucp,
 		    _In_ const struct __ucontext *ucp
 		);
 	}
 424	AUE_SWAPOFF	STD {
 		int swapoff(
 		    _In_z_ const char *name
 		);
 	}
 425	AUE_ACL_GET_LINK	STD {
 		int __acl_get_link(
 		    _In_z_ const char *path,
 		    acl_type_t type,
 		    _Out_ struct acl *aclp
 		);
 	}
 426	AUE_ACL_SET_LINK	STD {
 		int __acl_set_link(
 		    _In_z_ const char *path,
 		    acl_type_t type,
 		    _In_ struct acl *aclp
 		);
 	}
 427	AUE_ACL_DELETE_LINK	STD {
 		int __acl_delete_link(
 		    _In_z_ const char *path,
 		    acl_type_t type
 		);
 	}
 428	AUE_ACL_CHECK_LINK	STD {
 		int __acl_aclcheck_link(
 		    _In_z_ const char *path,
 		    acl_type_t type,
 		    _In_ struct acl *aclp
 		);
 	}
 429	AUE_SIGWAIT	STD {
 		int sigwait(
 		    _In_ const sigset_t *set,
 		    _Out_ int *sig
 		);
 	}
 430	AUE_THR_CREATE	STD {
 		int thr_create(
 		    _In_ ucontext_t *ctx,
 		    _Out_ long *id,
 		    int flags
 		);
 	}
 431	AUE_THR_EXIT	STD {
 		void thr_exit(
 		    _Out_opt_ long *state
 		);
 	}
 432	AUE_NULL	STD {
 		int thr_self(
 		    _Out_ long *id
 		);
 	}
 433	AUE_THR_KILL	STD {
 		int thr_kill(
 		    long id,
 		    int sig
 		);
 	}
 434-435	AUE_NULL	UNIMPL	nosys
 436	AUE_JAIL_ATTACH	STD {
 		int jail_attach(
 		    int jid
 		);
 	}
 437	AUE_EXTATTR_LIST_FD	STD {
 		ssize_t extattr_list_fd(
 		    int fd,
 		    int attrnamespace,
 		    _Out_writes_bytes_opt_(nbytes) void *data,
 		    size_t nbytes
 		);
 	}
 438	AUE_EXTATTR_LIST_FILE	STD {
 		ssize_t extattr_list_file(
 		    _In_z_ const char *path,
 		    int attrnamespace,
 		    _Out_writes_bytes_opt_(nbytes) void *data,
 		    size_t nbytes
 		);
 	}
 439	AUE_EXTATTR_LIST_LINK	STD {
 		ssize_t extattr_list_link(
 		    _In_z_ const char *path,
 		    int attrnamespace,
 		    _Out_writes_bytes_opt_(nbytes)
 		    void *data,
 		    size_t nbytes
 		);
 	}
 440	AUE_NULL	OBSOL	kse_switchin
 441	AUE_SEMWAIT	NOSTD {
 		int ksem_timedwait(
 		    semid_t id,
 		    _In_opt_ const struct timespec *abstime
 		);
 	}
 442	AUE_NULL	STD {
 		int thr_suspend(
 		    _In_opt_ const struct timespec *timeout
 		);
 	}
 443	AUE_NULL	STD {
 		int thr_wake(
 		    long id
 		);
 	}
 444	AUE_MODUNLOAD	STD {
 		int kldunloadf(
 		    int fileid,
 		    int flags
 		);
 	}
 445	AUE_AUDIT	STD {
 		int audit(
 		    _In_reads_bytes_(length) const void *record,
 		    u_int length
 		);
 	}
 446	AUE_AUDITON	STD {
 		int auditon(
 		    int cmd,
 		    _In_opt_ void *data,
 		    u_int length
 		);
 	}
 447	AUE_GETAUID	STD {
 		int getauid(
 		    _Out_ uid_t *auid
 		);
 	}
 448	AUE_SETAUID	STD {
 		int setauid(
 		    _In_ uid_t *auid
 		);
 	}
 449	AUE_GETAUDIT	STD {
 		int getaudit(
 		    _Out_ struct auditinfo *auditinfo
 		);
 	}
 450	AUE_SETAUDIT	STD {
 		int setaudit(
 		    _In_ struct auditinfo *auditinfo
 		);
 	}
 451	AUE_GETAUDIT_ADDR	STD {
 		int getaudit_addr(
 		    _Out_writes_bytes_(length) struct auditinfo_addr *auditinfo_addr,
 		    u_int length
 		);
 	}
 452	AUE_SETAUDIT_ADDR	STD {
 		int setaudit_addr(
 		    _In_reads_bytes_(length) struct auditinfo_addr *auditinfo_addr,
 		    u_int length
 		);
 	}
 453	AUE_AUDITCTL	STD {
 		int auditctl(
 		    _In_z_ const char *path
 		);
 	}
 454	AUE_NULL	STD {
 		int _umtx_op(
 		    _Inout_ void *obj,
 		    int op,
 		    u_long val,
 		    _In_ void *uaddr1,
 		    _In_ void *uaddr2
 		);
 	}
 455	AUE_THR_NEW	STD {
 		int thr_new(
 		    _In_ struct thr_param *param,
 		    int param_size
 		);
 	}
 456	AUE_NULL	STD {
 		int sigqueue(
 		    pid_t pid,
 		    int signum,
 		    _In_ void *value
 		);
 	}
 
 457	AUE_MQ_OPEN	NOSTD {
 		int kmq_open(
 		    _In_z_ const char *path,
 		    int flags,
 		    mode_t mode,
 		    _In_opt_ const struct mq_attr *attr
 		);
 	}
 458	AUE_MQ_SETATTR	NOSTD {
 		int kmq_setattr(
 		    int mqd,
 		    _In_opt_ const struct mq_attr *attr,
 		    _Out_opt_ struct mq_attr *oattr
 		);
 	}
 459	AUE_MQ_TIMEDRECEIVE	NOSTD {
 		int kmq_timedreceive(
 		    int mqd,
 		    _Out_writes_bytes_(msg_len) char *msg_ptr,
 		    size_t msg_len,
 		    _Out_opt_ unsigned *msg_prio,
 		    _In_opt_ const struct timespec *abs_timeout
 		);
 	}
 460	AUE_MQ_TIMEDSEND	NOSTD {
 		int kmq_timedsend(
 		    int mqd,
 		    _In_reads_bytes_(msg_len) const char *msg_ptr,
 		    size_t msg_len,
 		    unsigned msg_prio,
 		    _In_opt_ const struct timespec *abs_timeout
 		);
 	}
 461	AUE_MQ_NOTIFY	NOSTD {
 		int kmq_notify(
 		    int mqd,
 		    _In_opt_ const struct sigevent *sigev
 		);
 	}
 462	AUE_MQ_UNLINK	NOSTD {
 		int kmq_unlink(
 		    _In_z_ const char *path
 		);
 	}
 463	AUE_NULL	STD {
 		int abort2(
 		    _In_z_ const char *why,
 		    int nargs,
 		    _In_reads_(nargs) void **args
 		);
 	}
 464	AUE_NULL	STD {
 		int thr_set_name(
 		    long id,
 		    _In_z_ const char *name
 		);
 	}
 465	AUE_AIO_FSYNC	STD {
 		int aio_fsync(
 		    int op,
 		    _In_ struct aiocb *aiocbp
 		);
 	}
 466	AUE_RTPRIO	STD {
 		int rtprio_thread(
 		    int function,
 		    lwpid_t lwpid,
 		    _Inout_ struct rtprio *rtp
 		);
 	}
 467-468	AUE_NULL	UNIMPL	nosys
 469	AUE_NULL	UNIMPL	__getpath_fromfd
 470	AUE_NULL	UNIMPL	__getpath_fromaddr
 471	AUE_SCTP_PEELOFF	NOSTD {
 		int sctp_peeloff(
 		    int sd,
 		    uint32_t name
 		);
 	}
 472	AUE_SCTP_GENERIC_SENDMSG	NOSTD {
 		int sctp_generic_sendmsg(
 		    int sd,
 		    _In_reads_bytes_(mlen) void *msg,
 		    int mlen,
 		    _In_reads_bytes_(tolen) struct sockaddr *to,
 		    __socklen_t tolen,
 		    _In_opt_ struct sctp_sndrcvinfo *sinfo,
 		    int flags
 		);
 	}
 473	AUE_SCTP_GENERIC_SENDMSG_IOV	NOSTD {
 		int sctp_generic_sendmsg_iov(
 		    int sd,
 		    _In_reads_(iovlen) struct iovec *iov,
 		    int iovlen,
 		    _In_reads_bytes_(tolen) struct sockaddr *to,
 		    __socklen_t tolen,
 		    _In_opt_ struct sctp_sndrcvinfo *sinfo,
 		    int flags
 		);
 	}
 474	AUE_SCTP_GENERIC_RECVMSG	NOSTD {
 		int sctp_generic_recvmsg(
 		    int sd,
 		    _In_reads_(iovlen) struct iovec *iov,
 		    int iovlen,
 		    _Out_writes_bytes_(*fromlenaddr) struct sockaddr *from,
 		    _Out_ __socklen_t *fromlenaddr,
 		    _In_opt_ struct sctp_sndrcvinfo *sinfo,
 		    _Out_opt_ int *msg_flags
 		);
 	}
 475	AUE_PREAD	STD {
 		ssize_t pread(
 		    int fd,
 		    _Out_writes_bytes_(nbyte) void *buf,
 		    size_t nbyte,
 		    off_t offset
 		);
 	}
 476	AUE_PWRITE	STD {
 		ssize_t pwrite(
 		    int fd,
 		    _In_reads_bytes_(nbyte) const void *buf,
 		    size_t nbyte,
 		    off_t offset
 		);
 	}
 477	AUE_MMAP	STD {
 		void *mmap(
 		    _In_ void *addr,
 		    size_t len,
 		    int prot,
 		    int flags,
 		    int fd,
 		    off_t pos
 		);
 	}
 478	AUE_LSEEK	STD {
 		off_t lseek(
 		    int fd,
 		    off_t offset,
 		    int whence
 		);
 	}
 479	AUE_TRUNCATE	STD {
 		int truncate(
 		    _In_z_ const char *path,
 		    off_t length
 		);
 	}
 480	AUE_FTRUNCATE	STD {
 		int ftruncate(
 		    int fd,
 		    off_t length
 		);
 	}
 481	AUE_THR_KILL2	STD {
 		int thr_kill2(
 		    pid_t pid,
 		    long id,
 		    int sig
 		);
 	}
 482	AUE_SHMOPEN	STD {
 		int shm_open(
 		    _In_z_ const char *path,
 		    int flags,
 		    mode_t mode
 		);
 	}
 483	AUE_SHMUNLINK	STD {
 		int shm_unlink(
 		    _In_z_ const char *path
 		);
 	}
 484	AUE_NULL	STD {
 		int cpuset(
 		    _Out_ cpusetid_t *setid
 		);
 	}
 485	AUE_NULL	STD {
 		int cpuset_setid(
 		    cpuwhich_t which,
 		    id_t id,
 		    cpusetid_t setid
 		);
 	}
 486	AUE_NULL	STD {
 		int cpuset_getid(
 		    cpulevel_t level,
 		    cpuwhich_t which,
 		    id_t id,
 		    _Out_ cpusetid_t *setid
 		);
 	}
 487	AUE_NULL	STD {
 		int cpuset_getaffinity(
 		    cpulevel_t level,
 		    cpuwhich_t which,
 		    id_t id,
 		    size_t cpusetsize,
 		    _Out_ cpuset_t *mask
 		);
 	}
 488	AUE_NULL	STD {
 		int cpuset_setaffinity(
 		    cpulevel_t level,
 		    cpuwhich_t which,
 		    id_t id,
 		    size_t cpusetsize,
 		    _Out_ const cpuset_t *mask
 		);
 	}
 489	AUE_FACCESSAT	STD {
 		int faccessat(
 		    int fd,
 		    _In_z_ const char *path,
 		    int amode,
 		    int flag
 		);
 	}
 490	AUE_FCHMODAT	STD {
 		int fchmodat(
 		    int fd,
 		    _In_z_ const char *path,
 		    mode_t mode,
 		    int flag
 		);
 	}
 491	AUE_FCHOWNAT	STD {
 		int fchownat(
 		    int fd,
 		    _In_z_ const char *path,
 		    uid_t uid,
 		    gid_t gid,
 		    int flag
 		);
 	}
 492	AUE_FEXECVE	STD {
 		int fexecve(
 		    int fd,
 		    _In_ char **argv,
 		    _In_ char **envv
 		);
 	}
 493	AUE_FSTATAT	COMPAT11 {
 		int fstatat(
 		    int fd,
 		    _In_z_ const char *path,
 		    _Out_ struct freebsd11_stat *buf,
 		    int flag
 		);
 	}
 494	AUE_FUTIMESAT	STD {
 		int futimesat(
 		    int fd,
 		    _In_z_ const char *path,
 		    _In_reads_(2) struct timeval *times
 		);
 	}
 495	AUE_LINKAT	STD {
 		int linkat(
 		    int fd1,
 		    _In_z_ const char *path1,
 		    int fd2,
 		    _In_z_ const char *path2,
 		    int flag
 		);
 	}
 496	AUE_MKDIRAT	STD {
 		int mkdirat(
 		    int fd,
 		    _In_z_ const char *path,
 		    mode_t mode
 		);
 	}
 497	AUE_MKFIFOAT	STD {
 		int mkfifoat(
 		    int fd,
 		    _In_z_ const char *path,
 		    mode_t mode
 		);
 	}
 498	AUE_MKNODAT	COMPAT11 {
 		int mknodat(
 		    int fd,
 		    _In_z_ const char *path,
 		    mode_t mode,
 		    uint32_t dev
 		);
 	}
 ; XXX: see the comment for open
 499	AUE_OPENAT_RWTC	STD {
 		int openat(
 		    int fd,
 		    _In_z_ const char *path,
 		    int flag,
 		    mode_t mode
 		);
 	}
 500	AUE_READLINKAT	STD {
 		int readlinkat(
 		    int fd,
 		    _In_z_ const char *path,
 		    _Out_writes_bytes_(bufsize) char *buf,
 		    size_t bufsize
 		);
 	}
 501	AUE_RENAMEAT	STD {
 		int renameat(
 		    int oldfd,
 		    _In_z_ const char *old,
 		    int newfd,
 		    _In_z_ const char *new
 		);
 	}
 502	AUE_SYMLINKAT	STD {
 		int symlinkat(
 		    _In_z_ const char *path1,
 		    int fd,
 		    _In_z_ const char *path2
 		);
 	}
 503	AUE_UNLINKAT	STD {
 		int unlinkat(
 		    int fd,
 		    _In_z_ const char *path,
 		    int flag
 		);
 	}
 504	AUE_POSIX_OPENPT	STD {
 		int posix_openpt(
 		    int flags
 		);
 	}
 ; 505 is initialised by the kgssapi code, if present.
 505	AUE_NULL	NOSTD {
 		int gssd_syscall(
 		    _In_z_ const char *path
 		);
 	}
 506	AUE_JAIL_GET	STD {
 		int jail_get(
 		    _In_reads_(iovcnt) struct iovec *iovp,
 		    unsigned int iovcnt,
 		    int flags
 		);
 	}
 507	AUE_JAIL_SET	STD {
 		int jail_set(
 		    _In_reads_(iovcnt) struct iovec *iovp,
 		    unsigned int iovcnt,
 		    int flags
 		);
 	}
 508	AUE_JAIL_REMOVE	STD {
 		int jail_remove(
 		    int jid
 		);
 	}
 509	AUE_CLOSEFROM	STD {
 		int closefrom(
 		    int lowfd
 		);
 	}
 510	AUE_SEMCTL	NOSTD {
 		int __semctl(
 		    int semid,
 		    int semnum,
 		    int cmd,
 		    _Inout_ union semun *arg
 		);
 	}
 511	AUE_MSGCTL	NOSTD {
 		int msgctl(
 		    int msqid,
 		    int cmd,
 		    _Inout_opt_ struct msqid_ds *buf
 		);
 	}
 512	AUE_SHMCTL	NOSTD {
 		int shmctl(
 		    int shmid,
 		    int cmd,
 		    _Inout_opt_ struct shmid_ds *buf
 		);
 	}
 513	AUE_LPATHCONF	STD {
 		int lpathconf(
 		    _In_z_ const char *path,
 		    int name
 		);
 	}
 514	AUE_NULL	OBSOL	cap_new
 515	AUE_CAP_RIGHTS_GET	STD {
 		int __cap_rights_get(
 		    int version,
 		    int fd,
 		    _Out_ cap_rights_t *rightsp
 		);
 	}
 516	AUE_CAP_ENTER	STD {
 		int cap_enter(void);
 	}
 517	AUE_CAP_GETMODE	STD {
 		int cap_getmode(
 		    _Out_ u_int *modep
 		);
 	}
 518	AUE_PDFORK	STD {
 		int pdfork(
 		    _Out_ int *fdp,
 		    int flags
 		);
 	}
 519	AUE_PDKILL	STD {
 		int pdkill(
 		    int fd,
 		    int signum
 		);
 	}
 520	AUE_PDGETPID	STD {
 		int pdgetpid(
 		    int fd,
 		    _Out_ pid_t *pidp
 		);
 	}
 521	AUE_PDWAIT	UNIMPL	pdwait4
 522	AUE_SELECT	STD {
 		int pselect(
 		    int nd,
 		    _Inout_opt_ fd_set *in,
 		    _Inout_opt_ fd_set *ou,
 		    _Inout_opt_ fd_set *ex,
 		    _In_opt_ const struct timespec *ts,
 		    _In_opt_ const sigset_t *sm
 		);
 	}
 523	AUE_GETLOGINCLASS	STD {
 		int getloginclass(
 		    _Out_writes_z_(namelen) char *namebuf,
 		    size_t namelen
 		);
 	}
 524	AUE_SETLOGINCLASS	STD {
 		int setloginclass(
 		    _In_z_ const char *namebuf
 		);
 	}
 525	AUE_NULL	STD {
 		int rctl_get_racct(
 		    _In_reads_bytes_(inbuflen) const void *inbufp,
 		    size_t inbuflen,
 		    _Out_writes_bytes_(outbuflen) void *outbufp,
 		    size_t outbuflen
 		);
 	}
 526	AUE_NULL	STD {
 		int rctl_get_rules(
 		    _In_reads_bytes_(inbuflen) const void *inbufp,
 		    size_t inbuflen,
 		    _Out_writes_bytes_(outbuflen) void *outbufp,
 		    size_t outbuflen
 		);
 	}
 527	AUE_NULL	STD {
 		int rctl_get_limits(
 		    _In_reads_bytes_(inbuflen) const void *inbufp,
 		    size_t inbuflen,
 		    _Out_writes_bytes_(outbuflen) void *outbufp,
 		    size_t outbuflen
 		);
 	}
 528	AUE_NULL	STD {
 		int rctl_add_rule(
 		    _In_reads_bytes_(inbuflen) const void *inbufp,
 		    size_t inbuflen,
 		    _Out_writes_bytes_(outbuflen) void *outbufp,
 		    size_t outbuflen
 		);
 	}
 529	AUE_NULL	STD {
 		int rctl_remove_rule(
 		    _In_reads_bytes_(inbuflen) const void *inbufp,
 		    size_t inbuflen,
 		    _Out_writes_bytes_(outbuflen) void *outbufp,
 		    size_t outbuflen
 		);
 	}
 530	AUE_POSIX_FALLOCATE	STD {
 		int posix_fallocate(
 		    int fd,
 		    off_t offset,
 		    off_t len
 		);
 	}
 531	AUE_POSIX_FADVISE	STD {
 		int posix_fadvise(
 		    int fd,
 		    off_t offset,
 		    off_t len,
 		    int advice
 		);
 	}
 532	AUE_WAIT6	STD {
 		int wait6(
 		    idtype_t idtype,
 		    id_t id,
 		    _Out_opt_ int *status,
 		    int options,
 		    _Out_opt_ struct __wrusage *wrusage,
 		    _Out_opt_ siginfo_t *info
 		);
 	}
 533	AUE_CAP_RIGHTS_LIMIT	STD {
 		int cap_rights_limit(
 		    int fd,
 		    _In_ cap_rights_t *rightsp
 		);
 	}
 534	AUE_CAP_IOCTLS_LIMIT	STD {
 		int cap_ioctls_limit(
 		    int fd,
 		    _In_reads_(ncmds) const u_long *cmds,
 		    size_t ncmds
 		);
 	}
 535	AUE_CAP_IOCTLS_GET	STD {
 		ssize_t cap_ioctls_get(
 		    int fd,
 		    _Out_writes_(maxcmds) u_long *cmds,
 		    size_t maxcmds
 		);
 	}
 536	AUE_CAP_FCNTLS_LIMIT	STD {
 		int cap_fcntls_limit(
 		    int fd,
 		    uint32_t fcntlrights
 		);
 	}
 537	AUE_CAP_FCNTLS_GET	STD {
 		int cap_fcntls_get(
 		    int fd,
 		    _Out_ uint32_t *fcntlrightsp
 		);
 	}
 538	AUE_BINDAT	STD {
 		int bindat(
 		    int fd,
 		    int s,
 		    _In_reads_bytes_(namelen) const struct sockaddr *name,
 		    int namelen
 		);
 	}
 539	AUE_CONNECTAT	STD {
 		int connectat(
 		    int fd,
 		    int s,
 		    _In_reads_bytes_(namelen) const struct sockaddr *name,
 		    int namelen
 		);
 	}
 540	AUE_CHFLAGSAT	STD {
 		int chflagsat(
 		    int fd,
 		    _In_z_ const char *path,
 		    u_long flags,
 		    int atflag
 		);
 	}
 541	AUE_ACCEPT	STD {
 		int accept4(
 		    int s,
 		    _Out_writes_bytes_opt_(*anamelen) struct sockaddr *name,
 		    _Inout_opt_ __socklen_t *anamelen,
 		    int flags
 		);
 	}
 542	AUE_PIPE	STD {
 		int pipe2(
 		    _Out_writes_(2) int *fildes,
 		    int flags
 		);
 	}
 543	AUE_AIO_MLOCK	STD {
 		int aio_mlock(
 		    _In_ struct aiocb *aiocbp
 		);
 	}
 544	AUE_PROCCTL	STD {
 		int procctl(
 		    idtype_t idtype,
 		    id_t id,
 		    int com,
 		    _In_opt_ void *data
 		);
 	}
 545	AUE_POLL	STD {
 		int ppoll(
 		    _Inout_updates_(nfds) struct pollfd *fds,
 		    u_int nfds,
 		    _In_opt_ const struct timespec *ts,
 		    _In_opt_ const sigset_t *set
 		);
 	}
 546	AUE_FUTIMES	STD {
 		int futimens(
 		    int fd,
 		    _In_reads_(2) struct timespec *times
 		);
 	}
 547	AUE_FUTIMESAT	STD {
 		int utimensat(
 		    int fd,
 		    _In_z_ const char *path,
 		    _In_reads_(2) struct timespec *times,
 		    int flag
 		);
 	}
 548	AUE_NULL	OBSOL	numa_getaffinity
 549	AUE_NULL	OBSOL	numa_setaffinity
 550	AUE_FSYNC	STD {
 		int fdatasync(
 		    int fd
 		);
 	}
 551	AUE_FSTAT	STD {
 		int fstat(
 		    int fd,
 		    _Out_ struct stat *sb
 		);
 	}
 552	AUE_FSTATAT	STD {
 		int fstatat(
 		    int fd,
 		    _In_z_ const char *path,
 		    _Out_ struct stat *buf,
 		    int flag
 		);
 	}
 553	AUE_FHSTAT	STD {
 		int fhstat(
 		    _In_ const struct fhandle *u_fhp,
 		    _Out_ struct stat *sb
 		);
 	}
 554	AUE_GETDIRENTRIES STD {
 		ssize_t getdirentries(
 		    int fd,
 		    _Out_writes_bytes_(count) char *buf,
 		    size_t count,
 		    _Out_ off_t *basep
 		);
 	}
 555	AUE_STATFS	STD {
 		int statfs(
 		    _In_z_ const char *path,
 		    _Out_ struct statfs *buf
 		);
 	}
 556	AUE_FSTATFS	STD {
 		int fstatfs(
 		    int fd,
 		    _Out_ struct statfs *buf
 		);
 	}
 557	AUE_GETFSSTAT	STD {
 		int getfsstat(
 		    _Out_writes_bytes_opt_(bufsize) struct statfs *buf,
 		    long bufsize,
 		    int mode
 		);
 	}
 558	AUE_FHSTATFS	STD {
 		int fhstatfs(
 		    _In_ const struct fhandle *u_fhp,
 		    _Out_ struct statfs *buf
 		);
 	}
 559	AUE_MKNODAT	STD {
 		int mknodat(
 		    int fd,
 		    _In_z_ const char *path,
 		    mode_t mode,
 		    dev_t dev
 		);
 	}
 560	AUE_KEVENT	STD {
 		int kevent(
 		    int fd,
 		    _In_reads_opt_(nchanges) struct kevent *changelist,
 		    int nchanges,
 		    _Out_writes_opt_(nevents) struct kevent *eventlist,
 		    int nevents,
 		    _In_opt_ const struct timespec *timeout
 		);
 	}
 561	AUE_NULL	STD {
 		int cpuset_getdomain(
 		    cpulevel_t level,
 		    cpuwhich_t which,
 		    id_t id,
 		    size_t domainsetsize,
 		    _Out_writes_bytes_(domainsetsize) domainset_t *mask,
 		    _Out_ int *policy
 		);
 	}
 562	AUE_NULL	STD {
 		int cpuset_setdomain(
 		    cpulevel_t level,
 		    cpuwhich_t which,
 		    id_t id,
 		    size_t domainsetsize,
 		    _In_ domainset_t *mask,
 		    int policy
 		);
 	}
 563	AUE_NULL	STD {
 		int getrandom(
 		    _Out_writes_bytes_(buflen) void *buf,
 		    size_t buflen,
 		    unsigned int flags
 		);
 	}
 564	AUE_NULL	STD {
 		int getfhat(
 		    int fd,
 		    _In_z_ char *path,
 		    _Out_ struct fhandle *fhp,
 		    int flags
 		);
 	}
 565	AUE_NULL	STD {
 		int fhlink(
 		    _In_ struct fhandle *fhp,
 		    _In_z_ const char *to
 		);
 	}
 566	AUE_NULL	STD {
 		int fhlinkat(
 		    _In_ struct fhandle *fhp,
 		    int tofd,
 		    _In_z_ const char *to,
 		);
 	}
 567	AUE_NULL	STD {
 		int fhreadlink(
 		    _In_ struct fhandle *fhp,
 		    _Out_writes_(bufsize) char *buf,
 		    size_t bufsize
 		);
 	}
+568	AUE_UNLINKAT	STD {
+		int funlinkat(
+		    int dfd,
+		    _In_z_ const char *path,
+		    int fd,
+		    int flag
+		);
+	}
 
 ; Please copy any additions and changes to the following compatability tables:
 ; sys/compat/freebsd32/syscalls.master
 ; vim: syntax=off
Index: head/sys/kern/vfs_mountroot.c
===================================================================
--- head/sys/kern/vfs_mountroot.c	(revision 345981)
+++ head/sys/kern/vfs_mountroot.c	(revision 345982)
@@ -1,1132 +1,1132 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2010 Marcel Moolenaar
  * Copyright (c) 1999-2004 Poul-Henning Kamp
  * Copyright (c) 1999 Michael Smith
  * Copyright (c) 1989, 1993
  *      The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include "opt_rootdevname.h"
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/conf.h>
 #include <sys/cons.h>
 #include <sys/fcntl.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mdioctl.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/filedesc.h>
 #include <sys/reboot.h>
 #include <sys/sbuf.h>
 #include <sys/stat.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysproto.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/systm.h>
 #include <sys/vnode.h>
 
 #include <geom/geom.h>
 
 /*
  * The root filesystem is detailed in the kernel environment variable
  * vfs.root.mountfrom, which is expected to be in the general format
  *
  * <vfsname>:[<path>][	<vfsname>:[<path>] ...]
  * vfsname   := the name of a VFS known to the kernel and capable
  *              of being mounted as root
  * path      := disk device name or other data used by the filesystem
  *              to locate its physical store
  *
  * If the environment variable vfs.root.mountfrom is a space separated list,
  * each list element is tried in turn and the root filesystem will be mounted
  * from the first one that succeeds.
  *
  * The environment variable vfs.root.mountfrom.options is a comma delimited
  * set of string mount options.  These mount options must be parseable
  * by nmount() in the kernel.
  */
 
 static int parse_mount(char **);
 static struct mntarg *parse_mountroot_options(struct mntarg *, const char *);
 static int sysctl_vfs_root_mount_hold(SYSCTL_HANDLER_ARGS);
 static void vfs_mountroot_wait(void);
 static int vfs_mountroot_wait_if_neccessary(const char *fs, const char *dev);
 
 /*
  * The vnode of the system's root (/ in the filesystem, without chroot
  * active.)
  */
 struct vnode *rootvnode;
 
 /*
  * Mount of the system's /dev.
  */
 struct mount *rootdevmp;
 
 char *rootdevnames[2] = {NULL, NULL};
 
 struct mtx root_holds_mtx;
 MTX_SYSINIT(root_holds, &root_holds_mtx, "root_holds", MTX_DEF);
 
 struct root_hold_token {
 	const char			*who;
 	LIST_ENTRY(root_hold_token)	list;
 };
 
 static LIST_HEAD(, root_hold_token)	root_holds =
     LIST_HEAD_INITIALIZER(root_holds);
 
 enum action {
 	A_CONTINUE,
 	A_PANIC,
 	A_REBOOT,
 	A_RETRY
 };
 
 static enum action root_mount_onfail = A_CONTINUE;
 
 static int root_mount_mddev;
 static int root_mount_complete;
 
 /* By default wait up to 3 seconds for devices to appear. */
 static int root_mount_timeout = 3;
 TUNABLE_INT("vfs.mountroot.timeout", &root_mount_timeout);
 
 static int root_mount_always_wait = 0;
 SYSCTL_INT(_vfs, OID_AUTO, root_mount_always_wait, CTLFLAG_RDTUN,
     &root_mount_always_wait, 0,
     "Wait for root mount holds even if the root device already exists");
 
 SYSCTL_PROC(_vfs, OID_AUTO, root_mount_hold,
     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
     NULL, 0, sysctl_vfs_root_mount_hold, "A",
     "List of root mount hold tokens");
 
 static int
 sysctl_vfs_root_mount_hold(SYSCTL_HANDLER_ARGS)
 {
 	struct sbuf sb;
 	struct root_hold_token *h;
 	int error;
 
 	sbuf_new(&sb, NULL, 256, SBUF_AUTOEXTEND | SBUF_INCLUDENUL);
 
 	mtx_lock(&root_holds_mtx);
 	LIST_FOREACH(h, &root_holds, list) {
 		if (h != LIST_FIRST(&root_holds))
 			sbuf_putc(&sb, ' ');
 		sbuf_printf(&sb, "%s", h->who);
 	}
 	mtx_unlock(&root_holds_mtx);
 
 	error = sbuf_finish(&sb);
 	if (error == 0)
 		error = SYSCTL_OUT(req, sbuf_data(&sb), sbuf_len(&sb));
 	sbuf_delete(&sb);
 	return (error);
 }
 
 struct root_hold_token *
 root_mount_hold(const char *identifier)
 {
 	struct root_hold_token *h;
 
 	h = malloc(sizeof *h, M_DEVBUF, M_ZERO | M_WAITOK);
 	h->who = identifier;
 	mtx_lock(&root_holds_mtx);
 	TSHOLD("root mount");
 	LIST_INSERT_HEAD(&root_holds, h, list);
 	mtx_unlock(&root_holds_mtx);
 	return (h);
 }
 
 void
 root_mount_rel(struct root_hold_token *h)
 {
 
 	if (h == NULL)
 		return;
 
 	mtx_lock(&root_holds_mtx);
 	LIST_REMOVE(h, list);
 	TSRELEASE("root mount");
 	wakeup(&root_holds);
 	mtx_unlock(&root_holds_mtx);
 	free(h, M_DEVBUF);
 }
 
 int
 root_mounted(void)
 {
 
 	/* No mutex is acquired here because int stores are atomic. */
 	return (root_mount_complete);
 }
 
 static void
 set_rootvnode(void)
 {
 	struct proc *p;
 
 	if (VFS_ROOT(TAILQ_FIRST(&mountlist), LK_EXCLUSIVE, &rootvnode))
 		panic("set_rootvnode: Cannot find root vnode");
 
 	VOP_UNLOCK(rootvnode, 0);
 
 	p = curthread->td_proc;
 	FILEDESC_XLOCK(p->p_fd);
 
 	if (p->p_fd->fd_cdir != NULL)
 		vrele(p->p_fd->fd_cdir);
 	p->p_fd->fd_cdir = rootvnode;
 	VREF(rootvnode);
 
 	if (p->p_fd->fd_rdir != NULL)
 		vrele(p->p_fd->fd_rdir);
 	p->p_fd->fd_rdir = rootvnode;
 	VREF(rootvnode);
 
 	FILEDESC_XUNLOCK(p->p_fd);
 }
 
 static int
 vfs_mountroot_devfs(struct thread *td, struct mount **mpp)
 {
 	struct vfsoptlist *opts;
 	struct vfsconf *vfsp;
 	struct mount *mp;
 	int error;
 
 	*mpp = NULL;
 
 	if (rootdevmp != NULL) {
 		/*
 		 * Already have /dev; this happens during rerooting.
 		 */
 		error = vfs_busy(rootdevmp, 0);
 		if (error != 0)
 			return (error);
 		*mpp = rootdevmp;
 	} else {
 		vfsp = vfs_byname("devfs");
 		KASSERT(vfsp != NULL, ("Could not find devfs by name"));
 		if (vfsp == NULL)
 			return (ENOENT);
 
 		mp = vfs_mount_alloc(NULLVP, vfsp, "/dev", td->td_ucred);
 
 		error = VFS_MOUNT(mp);
 		KASSERT(error == 0, ("VFS_MOUNT(devfs) failed %d", error));
 		if (error)
 			return (error);
 
 		opts = malloc(sizeof(struct vfsoptlist), M_MOUNT, M_WAITOK);
 		TAILQ_INIT(opts);
 		mp->mnt_opt = opts;
 
 		mtx_lock(&mountlist_mtx);
 		TAILQ_INSERT_HEAD(&mountlist, mp, mnt_list);
 		mtx_unlock(&mountlist_mtx);
 
 		*mpp = mp;
 		rootdevmp = mp;
 	}
 
 	set_rootvnode();
 
 	error = kern_symlinkat(td, "/", AT_FDCWD, "dev", UIO_SYSSPACE);
 	if (error)
 		printf("kern_symlink /dev -> / returns %d\n", error);
 
 	return (error);
 }
 
 static void
 vfs_mountroot_shuffle(struct thread *td, struct mount *mpdevfs)
 {
 	struct nameidata nd;
 	struct mount *mporoot, *mpnroot;
 	struct vnode *vp, *vporoot, *vpdevfs;
 	char *fspath;
 	int error;
 
 	mpnroot = TAILQ_NEXT(mpdevfs, mnt_list);
 
 	/* Shuffle the mountlist. */
 	mtx_lock(&mountlist_mtx);
 	mporoot = TAILQ_FIRST(&mountlist);
 	TAILQ_REMOVE(&mountlist, mpdevfs, mnt_list);
 	if (mporoot != mpdevfs) {
 		TAILQ_REMOVE(&mountlist, mpnroot, mnt_list);
 		TAILQ_INSERT_HEAD(&mountlist, mpnroot, mnt_list);
 	}
 	TAILQ_INSERT_TAIL(&mountlist, mpdevfs, mnt_list);
 	mtx_unlock(&mountlist_mtx);
 
 	cache_purgevfs(mporoot, true);
 	if (mporoot != mpdevfs)
 		cache_purgevfs(mpdevfs, true);
 
 	if (VFS_ROOT(mporoot, LK_EXCLUSIVE, &vporoot))
 		panic("vfs_mountroot_shuffle: Cannot find root vnode");
 
 	VI_LOCK(vporoot);
 	vporoot->v_iflag &= ~VI_MOUNT;
 	VI_UNLOCK(vporoot);
 	vporoot->v_mountedhere = NULL;
 	mporoot->mnt_flag &= ~MNT_ROOTFS;
 	mporoot->mnt_vnodecovered = NULL;
 	vput(vporoot);
 
 	/* Set up the new rootvnode, and purge the cache */
 	mpnroot->mnt_vnodecovered = NULL;
 	set_rootvnode();
 	cache_purgevfs(rootvnode->v_mount, true);
 
 	if (mporoot != mpdevfs) {
 		/* Remount old root under /.mount or /mnt */
 		fspath = "/.mount";
 		NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
 		    fspath, td);
 		error = namei(&nd);
 		if (error) {
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 			fspath = "/mnt";
 			NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,
 			    fspath, td);
 			error = namei(&nd);
 		}
 		if (!error) {
 			vp = nd.ni_vp;
 			error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
 			if (!error)
 				error = vinvalbuf(vp, V_SAVE, 0, 0);
 			if (!error) {
 				cache_purge(vp);
 				mporoot->mnt_vnodecovered = vp;
 				vp->v_mountedhere = mporoot;
 				strlcpy(mporoot->mnt_stat.f_mntonname,
 				    fspath, MNAMELEN);
 				VOP_UNLOCK(vp, 0);
 			} else
 				vput(vp);
 		}
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 
 		if (error)
 			printf("mountroot: unable to remount previous root "
 			    "under /.mount or /mnt (error %d)\n", error);
 	}
 
 	/* Remount devfs under /dev */
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, "/dev", td);
 	error = namei(&nd);
 	if (!error) {
 		vp = nd.ni_vp;
 		error = (vp->v_type == VDIR) ? 0 : ENOTDIR;
 		if (!error)
 			error = vinvalbuf(vp, V_SAVE, 0, 0);
 		if (!error) {
 			vpdevfs = mpdevfs->mnt_vnodecovered;
 			if (vpdevfs != NULL) {
 				cache_purge(vpdevfs);
 				vpdevfs->v_mountedhere = NULL;
 				vrele(vpdevfs);
 			}
 			mpdevfs->mnt_vnodecovered = vp;
 			vp->v_mountedhere = mpdevfs;
 			VOP_UNLOCK(vp, 0);
 		} else
 			vput(vp);
 	}
 	if (error)
 		printf("mountroot: unable to remount devfs under /dev "
 		    "(error %d)\n", error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	if (mporoot == mpdevfs) {
 		vfs_unbusy(mpdevfs);
 		/* Unlink the no longer needed /dev/dev -> / symlink */
-		error = kern_unlinkat(td, AT_FDCWD, "/dev/dev",
+		error = kern_funlinkat(td, AT_FDCWD, "/dev/dev", FD_NONE,
 		    UIO_SYSSPACE, 0, 0);
 		if (error)
 			printf("mountroot: unable to unlink /dev/dev "
 			    "(error %d)\n", error);
 	}
 }
 
 /*
  * Configuration parser.
  */
 
 /* Parser character classes. */
 #define	CC_WHITESPACE		-1
 #define	CC_NONWHITESPACE	-2
 
 /* Parse errors. */
 #define	PE_EOF			-1
 #define	PE_EOL			-2
 
 static __inline int
 parse_peek(char **conf)
 {
 
 	return (**conf);
 }
 
 static __inline void
 parse_poke(char **conf, int c)
 {
 
 	**conf = c;
 }
 
 static __inline void
 parse_advance(char **conf)
 {
 
 	(*conf)++;
 }
 
 static int
 parse_skipto(char **conf, int mc)
 {
 	int c, match;
 
 	while (1) {
 		c = parse_peek(conf);
 		if (c == 0)
 			return (PE_EOF);
 		switch (mc) {
 		case CC_WHITESPACE:
 			match = (c == ' ' || c == '\t' || c == '\n') ? 1 : 0;
 			break;
 		case CC_NONWHITESPACE:
 			if (c == '\n')
 				return (PE_EOL);
 			match = (c != ' ' && c != '\t') ? 1 : 0;
 			break;
 		default:
 			match = (c == mc) ? 1 : 0;
 			break;
 		}
 		if (match)
 			break;
 		parse_advance(conf);
 	}
 	return (0);
 }
 
 static int
 parse_token(char **conf, char **tok)
 {
 	char *p;
 	size_t len;
 	int error;
 
 	*tok = NULL;
 	error = parse_skipto(conf, CC_NONWHITESPACE);
 	if (error)
 		return (error);
 	p = *conf;
 	error = parse_skipto(conf, CC_WHITESPACE);
 	len = *conf - p;
 	*tok = malloc(len + 1, M_TEMP, M_WAITOK | M_ZERO);
 	bcopy(p, *tok, len);
 	return (0);
 }
 
 static void
 parse_dir_ask_printenv(const char *var)
 {
 	char *val;
 
 	val = kern_getenv(var);
 	if (val != NULL) {
 		printf("  %s=%s\n", var, val);
 		freeenv(val);
 	}
 }
 
 static int
 parse_dir_ask(char **conf)
 {
 	char name[80];
 	char *mnt;
 	int error;
 
 	vfs_mountroot_wait();
 
 	printf("\nLoader variables:\n");
 	parse_dir_ask_printenv("vfs.root.mountfrom");
 	parse_dir_ask_printenv("vfs.root.mountfrom.options");
 
 	printf("\nManual root filesystem specification:\n");
 	printf("  <fstype>:<device> [options]\n");
 	printf("      Mount <device> using filesystem <fstype>\n");
 	printf("      and with the specified (optional) option list.\n");
 	printf("\n");
 	printf("    eg. ufs:/dev/da0s1a\n");
 	printf("        zfs:zroot/ROOT/default\n");
 	printf("        cd9660:/dev/cd0 ro\n");
 	printf("          (which is equivalent to: ");
 	printf("mount -t cd9660 -o ro /dev/cd0 /)\n");
 	printf("\n");
 	printf("  ?               List valid disk boot devices\n");
 	printf("  .               Yield 1 second (for background tasks)\n");
 	printf("  <empty line>    Abort manual input\n");
 
 	do {
 		error = EINVAL;
 		printf("\nmountroot> ");
 		cngets(name, sizeof(name), GETS_ECHO);
 		if (name[0] == '\0')
 			break;
 		if (name[0] == '?' && name[1] == '\0') {
 			printf("\nList of GEOM managed disk devices:\n  ");
 			g_dev_print();
 			continue;
 		}
 		if (name[0] == '.' && name[1] == '\0') {
 			pause("rmask", hz);
 			continue;
 		}
 		mnt = name;
 		error = parse_mount(&mnt);
 		if (error == -1)
 			printf("Invalid file system specification.\n");
 	} while (error != 0);
 
 	return (error);
 }
 
 static int
 parse_dir_md(char **conf)
 {
 	struct stat sb;
 	struct thread *td;
 	struct md_ioctl *mdio;
 	char *path, *tok;
 	int error, fd, len;
 
 	td = curthread;
 
 	error = parse_token(conf, &tok);
 	if (error)
 		return (error);
 
 	len = strlen(tok);
 	mdio = malloc(sizeof(*mdio) + len + 1, M_TEMP, M_WAITOK | M_ZERO);
 	path = (void *)(mdio + 1);
 	bcopy(tok, path, len);
 	free(tok, M_TEMP);
 
 	/* Get file status. */
 	error = kern_statat(td, 0, AT_FDCWD, path, UIO_SYSSPACE, &sb, NULL);
 	if (error)
 		goto out;
 
 	/* Open /dev/mdctl so that we can attach/detach. */
 	error = kern_openat(td, AT_FDCWD, "/dev/" MDCTL_NAME, UIO_SYSSPACE,
 	    O_RDWR, 0);
 	if (error)
 		goto out;
 
 	fd = td->td_retval[0];
 	mdio->md_version = MDIOVERSION;
 	mdio->md_type = MD_VNODE;
 
 	if (root_mount_mddev != -1) {
 		mdio->md_unit = root_mount_mddev;
 		(void)kern_ioctl(td, fd, MDIOCDETACH, (void *)mdio);
 		/* Ignore errors. We don't care. */
 		root_mount_mddev = -1;
 	}
 
 	mdio->md_file = (void *)(mdio + 1);
 	mdio->md_options = MD_AUTOUNIT | MD_READONLY;
 	mdio->md_mediasize = sb.st_size;
 	mdio->md_unit = 0;
 	error = kern_ioctl(td, fd, MDIOCATTACH, (void *)mdio);
 	if (error)
 		goto out;
 
 	if (mdio->md_unit > 9) {
 		printf("rootmount: too many md units\n");
 		mdio->md_file = NULL;
 		mdio->md_options = 0;
 		mdio->md_mediasize = 0;
 		error = kern_ioctl(td, fd, MDIOCDETACH, (void *)mdio);
 		/* Ignore errors. We don't care. */
 		error = ERANGE;
 		goto out;
 	}
 
 	root_mount_mddev = mdio->md_unit;
 	printf(MD_NAME "%u attached to %s\n", root_mount_mddev, mdio->md_file);
 
 	error = kern_close(td, fd);
 
  out:
 	free(mdio, M_TEMP);
 	return (error);
 }
 
 static int
 parse_dir_onfail(char **conf)
 {
 	char *action;
 	int error;
 
 	error = parse_token(conf, &action);
 	if (error)
 		return (error);
 
 	if (!strcmp(action, "continue"))
 		root_mount_onfail = A_CONTINUE;
 	else if (!strcmp(action, "panic"))
 		root_mount_onfail = A_PANIC;
 	else if (!strcmp(action, "reboot"))
 		root_mount_onfail = A_REBOOT;
 	else if (!strcmp(action, "retry"))
 		root_mount_onfail = A_RETRY;
 	else {
 		printf("rootmount: %s: unknown action\n", action);
 		error = EINVAL;
 	}
 
 	free(action, M_TEMP);
 	return (0);
 }
 
 static int
 parse_dir_timeout(char **conf)
 {
 	char *tok, *endtok;
 	long secs;
 	int error;
 
 	error = parse_token(conf, &tok);
 	if (error)
 		return (error);
 
 	secs = strtol(tok, &endtok, 0);
 	error = (secs < 0 || *endtok != '\0') ? EINVAL : 0;
 	if (!error)
 		root_mount_timeout = secs;
 	free(tok, M_TEMP);
 	return (error);
 }
 
 static int
 parse_directive(char **conf)
 {
 	char *dir;
 	int error;
 
 	error = parse_token(conf, &dir);
 	if (error)
 		return (error);
 
 	if (strcmp(dir, ".ask") == 0)
 		error = parse_dir_ask(conf);
 	else if (strcmp(dir, ".md") == 0)
 		error = parse_dir_md(conf);
 	else if (strcmp(dir, ".onfail") == 0)
 		error = parse_dir_onfail(conf);
 	else if (strcmp(dir, ".timeout") == 0)
 		error = parse_dir_timeout(conf);
 	else {
 		printf("mountroot: invalid directive `%s'\n", dir);
 		/* Ignore the rest of the line. */
 		(void)parse_skipto(conf, '\n');
 		error = EINVAL;
 	}
 	free(dir, M_TEMP);
 	return (error);
 }
 
 static int
 parse_mount_dev_present(const char *dev)
 {
 	struct nameidata nd;
 	int error;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, dev, curthread);
 	error = namei(&nd);
 	if (!error)
 		vput(nd.ni_vp);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	return (error != 0) ? 0 : 1;
 }
 
 #define	ERRMSGL	255
 static int
 parse_mount(char **conf)
 {
 	char *errmsg;
 	struct mntarg *ma;
 	char *dev, *fs, *opts, *tok;
 	int delay, error, timeout;
 
 	error = parse_token(conf, &tok);
 	if (error)
 		return (error);
 	fs = tok;
 	error = parse_skipto(&tok, ':');
 	if (error) {
 		free(fs, M_TEMP);
 		return (error);
 	}
 	parse_poke(&tok, '\0');
 	parse_advance(&tok);
 	dev = tok;
 
 	if (root_mount_mddev != -1) {
 		/* Handle substitution for the md unit number. */
 		tok = strstr(dev, "md#");
 		if (tok != NULL)
 			tok[2] = '0' + root_mount_mddev;
 	}
 
 	/* Parse options. */
 	error = parse_token(conf, &tok);
 	opts = (error == 0) ? tok : NULL;
 
 	printf("Trying to mount root from %s:%s [%s]...\n", fs, dev,
 	    (opts != NULL) ? opts : "");
 
 	errmsg = malloc(ERRMSGL, M_TEMP, M_WAITOK | M_ZERO);
 
 	if (vfs_byname(fs) == NULL) {
 		strlcpy(errmsg, "unknown file system", ERRMSGL);
 		error = ENOENT;
 		goto out;
 	}
 
 	error = vfs_mountroot_wait_if_neccessary(fs, dev);
 	if (error != 0)
 		goto out;
 
 	delay = hz / 10;
 	timeout = root_mount_timeout * hz;
 
 	for (;;) {
 		ma = NULL;
 		ma = mount_arg(ma, "fstype", fs, -1);
 		ma = mount_arg(ma, "fspath", "/", -1);
 		ma = mount_arg(ma, "from", dev, -1);
 		ma = mount_arg(ma, "errmsg", errmsg, ERRMSGL);
 		ma = mount_arg(ma, "ro", NULL, 0);
 		ma = parse_mountroot_options(ma, opts);
 
 		error = kernel_mount(ma, MNT_ROOTFS);
 		if (error == 0 || timeout <= 0)
 			break;
 
 		if (root_mount_timeout * hz == timeout ||
 		    (bootverbose && timeout % hz == 0)) {
 			printf("Mounting from %s:%s failed with error %d; "
 			    "retrying for %d more second%s\n", fs, dev, error,
 			    timeout / hz, (timeout / hz > 1) ? "s" : "");
 		}
 		pause("rmretry", delay);
 		timeout -= delay;
 	}
  out:
 	if (error) {
 		printf("Mounting from %s:%s failed with error %d",
 		    fs, dev, error);
 		if (errmsg[0] != '\0')
 			printf(": %s", errmsg);
 		printf(".\n");
 	}
 	free(fs, M_TEMP);
 	free(errmsg, M_TEMP);
 	if (opts != NULL)
 		free(opts, M_TEMP);
 	/* kernel_mount can return -1 on error. */
 	return ((error < 0) ? EDOOFUS : error);
 }
 #undef ERRMSGL
 
 static int
 vfs_mountroot_parse(struct sbuf *sb, struct mount *mpdevfs)
 {
 	struct mount *mp;
 	char *conf;
 	int error;
 
 	root_mount_mddev = -1;
 
 retry:
 	conf = sbuf_data(sb);
 	mp = TAILQ_NEXT(mpdevfs, mnt_list);
 	error = (mp == NULL) ? 0 : EDOOFUS;
 	root_mount_onfail = A_CONTINUE;
 	while (mp == NULL) {
 		error = parse_skipto(&conf, CC_NONWHITESPACE);
 		if (error == PE_EOL) {
 			parse_advance(&conf);
 			continue;
 		}
 		if (error < 0)
 			break;
 		switch (parse_peek(&conf)) {
 		case '#':
 			error = parse_skipto(&conf, '\n');
 			break;
 		case '.':
 			error = parse_directive(&conf);
 			break;
 		default:
 			error = parse_mount(&conf);
 			if (error == -1) {
 				printf("mountroot: invalid file system "
 				    "specification.\n");
 				error = 0;
 			}
 			break;
 		}
 		if (error < 0)
 			break;
 		/* Ignore any trailing garbage on the line. */
 		if (parse_peek(&conf) != '\n') {
 			printf("mountroot: advancing to next directive...\n");
 			(void)parse_skipto(&conf, '\n');
 		}
 		mp = TAILQ_NEXT(mpdevfs, mnt_list);
 	}
 	if (mp != NULL)
 		return (0);
 
 	/*
 	 * We failed to mount (a new) root.
 	 */
 	switch (root_mount_onfail) {
 	case A_CONTINUE:
 		break;
 	case A_PANIC:
 		panic("mountroot: unable to (re-)mount root.");
 		/* NOTREACHED */
 	case A_RETRY:
 		goto retry;
 	case A_REBOOT:
 		kern_reboot(RB_NOSYNC);
 		/* NOTREACHED */
 	}
 
 	return (error);
 }
 
 static void
 vfs_mountroot_conf0(struct sbuf *sb)
 {
 	char *s, *tok, *mnt, *opt;
 	int error;
 
 	sbuf_printf(sb, ".onfail panic\n");
 	sbuf_printf(sb, ".timeout %d\n", root_mount_timeout);
 	if (boothowto & RB_ASKNAME)
 		sbuf_printf(sb, ".ask\n");
 #ifdef ROOTDEVNAME
 	if (boothowto & RB_DFLTROOT)
 		sbuf_printf(sb, "%s\n", ROOTDEVNAME);
 #endif
 	if (boothowto & RB_CDROM) {
 		sbuf_printf(sb, "cd9660:/dev/cd0 ro\n");
 		sbuf_printf(sb, ".timeout 0\n");
 		sbuf_printf(sb, "cd9660:/dev/cd1 ro\n");
 		sbuf_printf(sb, ".timeout %d\n", root_mount_timeout);
 	}
 	s = kern_getenv("vfs.root.mountfrom");
 	if (s != NULL) {
 		opt = kern_getenv("vfs.root.mountfrom.options");
 		tok = s;
 		error = parse_token(&tok, &mnt);
 		while (!error) {
 			sbuf_printf(sb, "%s %s\n", mnt,
 			    (opt != NULL) ? opt : "");
 			free(mnt, M_TEMP);
 			error = parse_token(&tok, &mnt);
 		}
 		if (opt != NULL)
 			freeenv(opt);
 		freeenv(s);
 	}
 	if (rootdevnames[0] != NULL)
 		sbuf_printf(sb, "%s\n", rootdevnames[0]);
 	if (rootdevnames[1] != NULL)
 		sbuf_printf(sb, "%s\n", rootdevnames[1]);
 #ifdef ROOTDEVNAME
 	if (!(boothowto & RB_DFLTROOT))
 		sbuf_printf(sb, "%s\n", ROOTDEVNAME);
 #endif
 	if (!(boothowto & RB_ASKNAME))
 		sbuf_printf(sb, ".ask\n");
 }
 
 static int
 vfs_mountroot_readconf(struct thread *td, struct sbuf *sb)
 {
 	static char buf[128];
 	struct nameidata nd;
 	off_t ofs;
 	ssize_t resid;
 	int error, flags, len;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, "/.mount.conf", td);
 	flags = FREAD;
 	error = vn_open(&nd, &flags, 0, NULL);
 	if (error)
 		return (error);
 
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	ofs = 0;
 	len = sizeof(buf) - 1;
 	while (1) {
 		error = vn_rdwr(UIO_READ, nd.ni_vp, buf, len, ofs,
 		    UIO_SYSSPACE, IO_NODELOCKED, td->td_ucred,
 		    NOCRED, &resid, td);
 		if (error)
 			break;
 		if (resid == len)
 			break;
 		buf[len - resid] = 0;
 		sbuf_printf(sb, "%s", buf);
 		ofs += len - resid;
 	}
 
 	VOP_UNLOCK(nd.ni_vp, 0);
 	vn_close(nd.ni_vp, FREAD, td->td_ucred, td);
 	return (error);
 }
 
 static void
 vfs_mountroot_wait(void)
 {
 	struct root_hold_token *h;
 	struct timeval lastfail;
 	int curfail;
 
 	TSENTER();
 
 	curfail = 0;
 	while (1) {
 		g_waitidle();
 		mtx_lock(&root_holds_mtx);
 		if (LIST_EMPTY(&root_holds)) {
 			mtx_unlock(&root_holds_mtx);
 			break;
 		}
 		if (ppsratecheck(&lastfail, &curfail, 1)) {
 			printf("Root mount waiting for:");
 			LIST_FOREACH(h, &root_holds, list)
 				printf(" %s", h->who);
 			printf("\n");
 		}
 		TSWAIT("root mount");
 		msleep(&root_holds, &root_holds_mtx, PZERO | PDROP, "roothold",
 		    hz);
 		TSUNWAIT("root mount");
 	}
 
 	TSEXIT();
 }
 
 static int
 vfs_mountroot_wait_if_neccessary(const char *fs, const char *dev)
 {
 	int delay, timeout;
 
 	/*
 	 * In case of ZFS and NFS we don't have a way to wait for
 	 * specific device.  Also do the wait if the user forced that
 	 * behaviour by setting vfs.root_mount_always_wait=1.
 	 */
 	if (strcmp(fs, "zfs") == 0 || strstr(fs, "nfs") != NULL ||
 	    dev[0] == '\0' || root_mount_always_wait != 0) {
 		vfs_mountroot_wait();
 		return (0);
 	}
 
 	/*
 	 * Otherwise, no point in waiting if the device is already there.
 	 * Note that we must wait for GEOM to finish reconfiguring itself,
 	 * eg for geom_part(4) to finish tasting.
 	 */
 	g_waitidle();
 	if (parse_mount_dev_present(dev))
 		return (0);
 
 	/*
 	 * No luck.  Let's wait.  This code looks weird, but it's that way
 	 * to behave exactly as it used to work before.
 	 */
 	vfs_mountroot_wait();
 	printf("mountroot: waiting for device %s...\n", dev);
 	delay = hz / 10;
 	timeout = root_mount_timeout * hz;
 	do {
 		pause("rmdev", delay);
 		timeout -= delay;
 	} while (timeout > 0 && !parse_mount_dev_present(dev));
 
 	if (timeout <= 0)
 		return (ENODEV);
 
 	return (0);
 }
 
 void
 vfs_mountroot(void)
 {
 	struct mount *mp;
 	struct sbuf *sb;
 	struct thread *td;
 	time_t timebase;
 	int error;
 	
 	mtx_assert(&Giant, MA_NOTOWNED);
 
 	TSENTER();
 
 	td = curthread;
 
 	sb = sbuf_new_auto();
 	vfs_mountroot_conf0(sb);
 	sbuf_finish(sb);
 
 	error = vfs_mountroot_devfs(td, &mp);
 	while (!error) {
 		error = vfs_mountroot_parse(sb, mp);
 		if (!error) {
 			vfs_mountroot_shuffle(td, mp);
 			sbuf_clear(sb);
 			error = vfs_mountroot_readconf(td, sb);
 			sbuf_finish(sb);
 		}
 	}
 
 	sbuf_delete(sb);
 
 	/*
 	 * Iterate over all currently mounted file systems and use
 	 * the time stamp found to check and/or initialize the RTC.
 	 * Call inittodr() only once and pass it the largest of the
 	 * timestamps we encounter.
 	 */
 	timebase = 0;
 	mtx_lock(&mountlist_mtx);
 	mp = TAILQ_FIRST(&mountlist);
 	while (mp != NULL) {
 		if (mp->mnt_time > timebase)
 			timebase = mp->mnt_time;
 		mp = TAILQ_NEXT(mp, mnt_list);
 	}
 	mtx_unlock(&mountlist_mtx);
 	inittodr(timebase);
 
 	/* Keep prison0's root in sync with the global rootvnode. */
 	mtx_lock(&prison0.pr_mtx);
 	prison0.pr_root = rootvnode;
 	vref(prison0.pr_root);
 	mtx_unlock(&prison0.pr_mtx);
 
 	mtx_lock(&root_holds_mtx);
 	atomic_store_rel_int(&root_mount_complete, 1);
 	wakeup(&root_mount_complete);
 	mtx_unlock(&root_holds_mtx);
 
 	EVENTHANDLER_INVOKE(mountroot);
 
 	TSEXIT();
 }
 
 static struct mntarg *
 parse_mountroot_options(struct mntarg *ma, const char *options)
 {
 	char *p;
 	char *name, *name_arg;
 	char *val, *val_arg;
 	char *opts;
 
 	if (options == NULL || options[0] == '\0')
 		return (ma);
 
 	p = opts = strdup(options, M_MOUNT);
 	if (opts == NULL) {
 		return (ma);
 	}
 
 	while((name = strsep(&p, ",")) != NULL) {
 		if (name[0] == '\0')
 			break;
 
 		val = strchr(name, '=');
 		if (val != NULL) {
 			*val = '\0';
 			++val;
 		}
 		if( strcmp(name, "rw") == 0 ||
 		    strcmp(name, "noro") == 0) {
 			/*
 			 * The first time we mount the root file system,
 			 * we need to mount 'ro', so We need to ignore
 			 * 'rw' and 'noro' mount options.
 			 */
 			continue;
 		}
 		name_arg = strdup(name, M_MOUNT);
 		val_arg = NULL;
 		if (val != NULL)
 			val_arg = strdup(val, M_MOUNT);
 
 		ma = mount_arg(ma, name_arg, val_arg,
 		    (val_arg != NULL ? -1 : 0));
 	}
 	free(opts, M_MOUNT);
 	return (ma);
 }
Index: head/sys/kern/vfs_syscalls.c
===================================================================
--- head/sys/kern/vfs_syscalls.c	(revision 345981)
+++ head/sys/kern/vfs_syscalls.c	(revision 345982)
@@ -1,4753 +1,4818 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_capsicum.h"
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/capsicum.h>
 #include <sys/disk.h>
 #include <sys/sysent.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/sysproto.h>
 #include <sys/namei.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filio.h>
 #include <sys/limits.h>
 #include <sys/linker.h>
 #include <sys/rwlock.h>
 #include <sys/sdt.h>
 #include <sys/stat.h>
 #include <sys/sx.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/dirent.h>
 #include <sys/jail.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #include <machine/stdarg.h>
 
 #include <security/audit/audit.h>
 #include <security/mac/mac_framework.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/uma.h>
 
 #include <ufs/ufs/quota.h>
 
 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information");
 
 SDT_PROVIDER_DEFINE(vfs);
 SDT_PROBE_DEFINE2(vfs, , stat, mode, "char *", "int");
 SDT_PROBE_DEFINE2(vfs, , stat, reg, "char *", "int");
 
 static int kern_chflagsat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, u_long flags, int atflag);
 static int setfflags(struct thread *td, struct vnode *, u_long);
 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *);
 static int getutimens(const struct timespec *, enum uio_seg,
     struct timespec *, int *);
 static int setutimes(struct thread *td, struct vnode *,
     const struct timespec *, int, int);
 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
     struct thread *td);
 static int kern_fhlinkat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, fhandle_t *fhp);
 static int kern_getfhat(struct thread *td, int flags, int fd,
     const char *path, enum uio_seg pathseg, fhandle_t *fhp);
 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg,
     size_t count, struct thread *td);
 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd,
     const char *path, enum uio_seg segflag);
 
 /*
  * Sync each mounted filesystem.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct sync_args {
 	int     dummy;
 };
 #endif
 /* ARGSUSED */
 int
 sys_sync(struct thread *td, struct sync_args *uap)
 {
 	struct mount *mp, *nmp;
 	int save;
 
 	mtx_lock(&mountlist_mtx);
 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 		if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) {
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
 		if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
 		    vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
 			save = curthread_pflags_set(TDP_SYNCIO);
 			vfs_msync(mp, MNT_NOWAIT);
 			VFS_SYNC(mp, MNT_NOWAIT);
 			curthread_pflags_restore(save);
 			vn_finished_write(mp);
 		}
 		mtx_lock(&mountlist_mtx);
 		nmp = TAILQ_NEXT(mp, mnt_list);
 		vfs_unbusy(mp);
 	}
 	mtx_unlock(&mountlist_mtx);
 	return (0);
 }
 
 /*
  * Change filesystem quotas.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct quotactl_args {
 	char *path;
 	int cmd;
 	int uid;
 	caddr_t arg;
 };
 #endif
 int
 sys_quotactl(struct thread *td, struct quotactl_args *uap)
 {
 	struct mount *mp;
 	struct nameidata nd;
 	int error;
 
 	AUDIT_ARG_CMD(uap->cmd);
 	AUDIT_ARG_UID(uap->uid);
 	if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
 		return (EPERM);
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 	    uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	mp = nd.ni_vp->v_mount;
 	vfs_ref(mp);
 	vput(nd.ni_vp);
 	error = vfs_busy(mp, 0);
 	vfs_rel(mp);
 	if (error != 0)
 		return (error);
 	error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg);
 
 	/*
 	 * Since quota on operation typically needs to open quota
 	 * file, the Q_QUOTAON handler needs to unbusy the mount point
 	 * before calling into namei.  Otherwise, unmount might be
 	 * started between two vfs_busy() invocations (first is our,
 	 * second is from mount point cross-walk code in lookup()),
 	 * causing deadlock.
 	 *
 	 * Require that Q_QUOTAON handles the vfs_busy() reference on
 	 * its own, always returning with ubusied mount point.
 	 */
 	if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON &&
 	    (uap->cmd >> SUBCMDSHIFT) != Q_QUOTAOFF)
 		vfs_unbusy(mp);
 	return (error);
 }
 
 /*
  * Used by statfs conversion routines to scale the block size up if
  * necessary so that all of the block counts are <= 'max_size'.  Note
  * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero
  * value of 'n'.
  */
 void
 statfs_scale_blocks(struct statfs *sf, long max_size)
 {
 	uint64_t count;
 	int shift;
 
 	KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__));
 
 	/*
 	 * Attempt to scale the block counts to give a more accurate
 	 * overview to userland of the ratio of free space to used
 	 * space.  To do this, find the largest block count and compute
 	 * a divisor that lets it fit into a signed integer <= max_size.
 	 */
 	if (sf->f_bavail < 0)
 		count = -sf->f_bavail;
 	else
 		count = sf->f_bavail;
 	count = MAX(sf->f_blocks, MAX(sf->f_bfree, count));
 	if (count <= max_size)
 		return;
 
 	count >>= flsl(max_size);
 	shift = 0;
 	while (count > 0) {
 		shift++;
 		count >>=1;
 	}
 
 	sf->f_bsize <<= shift;
 	sf->f_blocks >>= shift;
 	sf->f_bfree >>= shift;
 	sf->f_bavail >>= shift;
 }
 
 static int
 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf)
 {
 	struct statfs *sp;
 	int error;
 
 	if (mp == NULL)
 		return (EBADF);
 	error = vfs_busy(mp, 0);
 	vfs_rel(mp);
 	if (error != 0)
 		return (error);
 #ifdef MAC
 	error = mac_mount_check_stat(td->td_ucred, mp);
 	if (error != 0)
 		goto out;
 #endif
 	/*
 	 * Set these in case the underlying filesystem fails to do so.
 	 */
 	sp = &mp->mnt_stat;
 	sp->f_version = STATFS_VERSION;
 	sp->f_namemax = NAME_MAX;
 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 	error = VFS_STATFS(mp, sp);
 	if (error != 0)
 		goto out;
 	*buf = *sp;
 	if (priv_check(td, PRIV_VFS_GENERATION)) {
 		buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0;
 		prison_enforce_statfs(td->td_ucred, mp, buf);
 	}
 out:
 	vfs_unbusy(mp);
 	return (error);
 }
 
 /*
  * Get filesystem statistics.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct statfs_args {
 	char *path;
 	struct statfs *buf;
 };
 #endif
 int
 sys_statfs(struct thread *td, struct statfs_args *uap)
 {
 	struct statfs *sfp;
 	int error;
 
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp);
 	if (error == 0)
 		error = copyout(sfp, uap->buf, sizeof(struct statfs));
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 int
 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg,
     struct statfs *buf)
 {
 	struct mount *mp;
 	struct nameidata nd;
 	int error;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 	    pathseg, path, td);
 	error = namei(&nd);
 	if (error != 0)
 		return (error);
 	mp = nd.ni_vp->v_mount;
 	vfs_ref(mp);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_vp);
 	return (kern_do_statfs(td, mp, buf));
 }
 
 /*
  * Get filesystem statistics.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fstatfs_args {
 	int fd;
 	struct statfs *buf;
 };
 #endif
 int
 sys_fstatfs(struct thread *td, struct fstatfs_args *uap)
 {
 	struct statfs *sfp;
 	int error;
 
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_fstatfs(td, uap->fd, sfp);
 	if (error == 0)
 		error = copyout(sfp, uap->buf, sizeof(struct statfs));
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 int
 kern_fstatfs(struct thread *td, int fd, struct statfs *buf)
 {
 	struct file *fp;
 	struct mount *mp;
 	struct vnode *vp;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 	error = getvnode(td, fd, &cap_fstatfs_rights, &fp);
 	if (error != 0)
 		return (error);
 	vp = fp->f_vnode;
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 #ifdef AUDIT
 	AUDIT_ARG_VNODE1(vp);
 #endif
 	mp = vp->v_mount;
 	if (mp != NULL)
 		vfs_ref(mp);
 	VOP_UNLOCK(vp, 0);
 	fdrop(fp, td);
 	return (kern_do_statfs(td, mp, buf));
 }
 
 /*
  * Get statistics on all filesystems.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getfsstat_args {
 	struct statfs *buf;
 	long bufsize;
 	int mode;
 };
 #endif
 int
 sys_getfsstat(struct thread *td, struct getfsstat_args *uap)
 {
 	size_t count;
 	int error;
 
 	if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX)
 		return (EINVAL);
 	error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count,
 	    UIO_USERSPACE, uap->mode);
 	if (error == 0)
 		td->td_retval[0] = count;
 	return (error);
 }
 
 /*
  * If (bufsize > 0 && bufseg == UIO_SYSSPACE)
  *	The caller is responsible for freeing memory which will be allocated
  *	in '*buf'.
  */
 int
 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
     size_t *countp, enum uio_seg bufseg, int mode)
 {
 	struct mount *mp, *nmp;
 	struct statfs *sfsp, *sp, *sptmp, *tofree;
 	size_t count, maxcount;
 	int error;
 
 	switch (mode) {
 	case MNT_WAIT:
 	case MNT_NOWAIT:
 		break;
 	default:
 		if (bufseg == UIO_SYSSPACE)
 			*buf = NULL;
 		return (EINVAL);
 	}
 restart:
 	maxcount = bufsize / sizeof(struct statfs);
 	if (bufsize == 0) {
 		sfsp = NULL;
 		tofree = NULL;
 	} else if (bufseg == UIO_USERSPACE) {
 		sfsp = *buf;
 		tofree = NULL;
 	} else /* if (bufseg == UIO_SYSSPACE) */ {
 		count = 0;
 		mtx_lock(&mountlist_mtx);
 		TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 			count++;
 		}
 		mtx_unlock(&mountlist_mtx);
 		if (maxcount > count)
 			maxcount = count;
 		tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs),
 		    M_STATFS, M_WAITOK);
 	}
 	count = 0;
 	mtx_lock(&mountlist_mtx);
 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 		if (prison_canseemount(td->td_ucred, mp) != 0) {
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
 #ifdef MAC
 		if (mac_mount_check_stat(td->td_ucred, mp) != 0) {
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
 #endif
 		if (mode == MNT_WAIT) {
 			if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) {
 				/*
 				 * If vfs_busy() failed, and MBF_NOWAIT
 				 * wasn't passed, then the mp is gone.
 				 * Furthermore, because of MBF_MNTLSTLOCK,
 				 * the mountlist_mtx was dropped.  We have
 				 * no other choice than to start over.
 				 */
 				mtx_unlock(&mountlist_mtx);
 				free(tofree, M_STATFS);
 				goto restart;
 			}
 		} else {
 			if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) {
 				nmp = TAILQ_NEXT(mp, mnt_list);
 				continue;
 			}
 		}
 		if (sfsp != NULL && count < maxcount) {
 			sp = &mp->mnt_stat;
 			/*
 			 * Set these in case the underlying filesystem
 			 * fails to do so.
 			 */
 			sp->f_version = STATFS_VERSION;
 			sp->f_namemax = NAME_MAX;
 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 			/*
 			 * If MNT_NOWAIT is specified, do not refresh
 			 * the fsstat cache.
 			 */
 			if (mode != MNT_NOWAIT) {
 				error = VFS_STATFS(mp, sp);
 				if (error != 0) {
 					mtx_lock(&mountlist_mtx);
 					nmp = TAILQ_NEXT(mp, mnt_list);
 					vfs_unbusy(mp);
 					continue;
 				}
 			}
 			if (priv_check(td, PRIV_VFS_GENERATION)) {
 				sptmp = malloc(sizeof(struct statfs), M_STATFS,
 				    M_WAITOK);
 				*sptmp = *sp;
 				sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0;
 				prison_enforce_statfs(td->td_ucred, mp, sptmp);
 				sp = sptmp;
 			} else
 				sptmp = NULL;
 			if (bufseg == UIO_SYSSPACE) {
 				bcopy(sp, sfsp, sizeof(*sp));
 				free(sptmp, M_STATFS);
 			} else /* if (bufseg == UIO_USERSPACE) */ {
 				error = copyout(sp, sfsp, sizeof(*sp));
 				free(sptmp, M_STATFS);
 				if (error != 0) {
 					vfs_unbusy(mp);
 					return (error);
 				}
 			}
 			sfsp++;
 		}
 		count++;
 		mtx_lock(&mountlist_mtx);
 		nmp = TAILQ_NEXT(mp, mnt_list);
 		vfs_unbusy(mp);
 	}
 	mtx_unlock(&mountlist_mtx);
 	if (sfsp != NULL && count > maxcount)
 		*countp = maxcount;
 	else
 		*countp = count;
 	return (0);
 }
 
 #ifdef COMPAT_FREEBSD4
 /*
  * Get old format filesystem statistics.
  */
 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *);
 
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd4_statfs_args {
 	char *path;
 	struct ostatfs *buf;
 };
 #endif
 int
 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap)
 {
 	struct ostatfs osb;
 	struct statfs *sfp;
 	int error;
 
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp);
 	if (error == 0) {
 		freebsd4_cvtstatfs(sfp, &osb);
 		error = copyout(&osb, uap->buf, sizeof(osb));
 	}
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 /*
  * Get filesystem statistics.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd4_fstatfs_args {
 	int fd;
 	struct ostatfs *buf;
 };
 #endif
 int
 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap)
 {
 	struct ostatfs osb;
 	struct statfs *sfp;
 	int error;
 
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_fstatfs(td, uap->fd, sfp);
 	if (error == 0) {
 		freebsd4_cvtstatfs(sfp, &osb);
 		error = copyout(&osb, uap->buf, sizeof(osb));
 	}
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 /*
  * Get statistics on all filesystems.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd4_getfsstat_args {
 	struct ostatfs *buf;
 	long bufsize;
 	int mode;
 };
 #endif
 int
 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap)
 {
 	struct statfs *buf, *sp;
 	struct ostatfs osb;
 	size_t count, size;
 	int error;
 
 	if (uap->bufsize < 0)
 		return (EINVAL);
 	count = uap->bufsize / sizeof(struct ostatfs);
 	if (count > SIZE_MAX / sizeof(struct statfs))
 		return (EINVAL);
 	size = count * sizeof(struct statfs);
 	error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE,
 	    uap->mode);
 	if (error == 0)
 		td->td_retval[0] = count;
 	if (size != 0) {
 		sp = buf;
 		while (count != 0 && error == 0) {
 			freebsd4_cvtstatfs(sp, &osb);
 			error = copyout(&osb, uap->buf, sizeof(osb));
 			sp++;
 			uap->buf++;
 			count--;
 		}
 		free(buf, M_STATFS);
 	}
 	return (error);
 }
 
 /*
  * Implement fstatfs() for (NFS) file handles.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd4_fhstatfs_args {
 	struct fhandle *u_fhp;
 	struct ostatfs *buf;
 };
 #endif
 int
 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap)
 {
 	struct ostatfs osb;
 	struct statfs *sfp;
 	fhandle_t fh;
 	int error;
 
 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 	if (error != 0)
 		return (error);
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_fhstatfs(td, fh, sfp);
 	if (error == 0) {
 		freebsd4_cvtstatfs(sfp, &osb);
 		error = copyout(&osb, uap->buf, sizeof(osb));
 	}
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 /*
  * Convert a new format statfs structure to an old format statfs structure.
  */
 static void
 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp)
 {
 
 	statfs_scale_blocks(nsp, LONG_MAX);
 	bzero(osp, sizeof(*osp));
 	osp->f_bsize = nsp->f_bsize;
 	osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX);
 	osp->f_blocks = nsp->f_blocks;
 	osp->f_bfree = nsp->f_bfree;
 	osp->f_bavail = nsp->f_bavail;
 	osp->f_files = MIN(nsp->f_files, LONG_MAX);
 	osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX);
 	osp->f_owner = nsp->f_owner;
 	osp->f_type = nsp->f_type;
 	osp->f_flags = nsp->f_flags;
 	osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX);
 	osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX);
 	osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX);
 	osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX);
 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
 	    MIN(MFSNAMELEN, OMFSNAMELEN));
 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
 	    MIN(MNAMELEN, OMNAMELEN));
 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
 	    MIN(MNAMELEN, OMNAMELEN));
 	osp->f_fsid = nsp->f_fsid;
 }
 #endif /* COMPAT_FREEBSD4 */
 
 #if defined(COMPAT_FREEBSD11)
 /*
  * Get old format filesystem statistics.
  */
 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *);
 
 int
 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap)
 {
 	struct freebsd11_statfs osb;
 	struct statfs *sfp;
 	int error;
 
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp);
 	if (error == 0) {
 		freebsd11_cvtstatfs(sfp, &osb);
 		error = copyout(&osb, uap->buf, sizeof(osb));
 	}
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 /*
  * Get filesystem statistics.
  */
 int
 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap)
 {
 	struct freebsd11_statfs osb;
 	struct statfs *sfp;
 	int error;
 
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_fstatfs(td, uap->fd, sfp);
 	if (error == 0) {
 		freebsd11_cvtstatfs(sfp, &osb);
 		error = copyout(&osb, uap->buf, sizeof(osb));
 	}
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 /*
  * Get statistics on all filesystems.
  */
 int
 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap)
 {
 	struct freebsd11_statfs osb;
 	struct statfs *buf, *sp;
 	size_t count, size;
 	int error;
 
 	count = uap->bufsize / sizeof(struct ostatfs);
 	size = count * sizeof(struct statfs);
 	error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE,
 	    uap->mode);
 	if (error == 0)
 		td->td_retval[0] = count;
 	if (size > 0) {
 		sp = buf;
 		while (count > 0 && error == 0) {
 			freebsd11_cvtstatfs(sp, &osb);
 			error = copyout(&osb, uap->buf, sizeof(osb));
 			sp++;
 			uap->buf++;
 			count--;
 		}
 		free(buf, M_STATFS);
 	}
 	return (error);
 }
 
 /*
  * Implement fstatfs() for (NFS) file handles.
  */
 int
 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap)
 {
 	struct freebsd11_statfs osb;
 	struct statfs *sfp;
 	fhandle_t fh;
 	int error;
 
 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 	if (error)
 		return (error);
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_fhstatfs(td, fh, sfp);
 	if (error == 0) {
 		freebsd11_cvtstatfs(sfp, &osb);
 		error = copyout(&osb, uap->buf, sizeof(osb));
 	}
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 /*
  * Convert a new format statfs structure to an old format statfs structure.
  */
 static void
 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp)
 {
 
 	bzero(osp, sizeof(*osp));
 	osp->f_version = FREEBSD11_STATFS_VERSION;
 	osp->f_type = nsp->f_type;
 	osp->f_flags = nsp->f_flags;
 	osp->f_bsize = nsp->f_bsize;
 	osp->f_iosize = nsp->f_iosize;
 	osp->f_blocks = nsp->f_blocks;
 	osp->f_bfree = nsp->f_bfree;
 	osp->f_bavail = nsp->f_bavail;
 	osp->f_files = nsp->f_files;
 	osp->f_ffree = nsp->f_ffree;
 	osp->f_syncwrites = nsp->f_syncwrites;
 	osp->f_asyncwrites = nsp->f_asyncwrites;
 	osp->f_syncreads = nsp->f_syncreads;
 	osp->f_asyncreads = nsp->f_asyncreads;
 	osp->f_namemax = nsp->f_namemax;
 	osp->f_owner = nsp->f_owner;
 	osp->f_fsid = nsp->f_fsid;
 	strlcpy(osp->f_fstypename, nsp->f_fstypename,
 	    MIN(MFSNAMELEN, sizeof(osp->f_fstypename)));
 	strlcpy(osp->f_mntonname, nsp->f_mntonname,
 	    MIN(MNAMELEN, sizeof(osp->f_mntonname)));
 	strlcpy(osp->f_mntfromname, nsp->f_mntfromname,
 	    MIN(MNAMELEN, sizeof(osp->f_mntfromname)));
 }
 #endif /* COMPAT_FREEBSD11 */
 
 /*
  * Change current working directory to a given file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchdir_args {
 	int	fd;
 };
 #endif
 int
 sys_fchdir(struct thread *td, struct fchdir_args *uap)
 {
 	struct vnode *vp, *tdp;
 	struct mount *mp;
 	struct file *fp;
 	int error;
 
 	AUDIT_ARG_FD(uap->fd);
 	error = getvnode(td, uap->fd, &cap_fchdir_rights,
 	    &fp);
 	if (error != 0)
 		return (error);
 	vp = fp->f_vnode;
 	vrefact(vp);
 	fdrop(fp, td);
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	AUDIT_ARG_VNODE1(vp);
 	error = change_dir(vp, td);
 	while (!error && (mp = vp->v_mountedhere) != NULL) {
 		if (vfs_busy(mp, 0))
 			continue;
 		error = VFS_ROOT(mp, LK_SHARED, &tdp);
 		vfs_unbusy(mp);
 		if (error != 0)
 			break;
 		vput(vp);
 		vp = tdp;
 	}
 	if (error != 0) {
 		vput(vp);
 		return (error);
 	}
 	VOP_UNLOCK(vp, 0);
 	pwd_chdir(td, vp);
 	return (0);
 }
 
 /*
  * Change current working directory (``.'').
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chdir_args {
 	char	*path;
 };
 #endif
 int
 sys_chdir(struct thread *td, struct chdir_args *uap)
 {
 
 	return (kern_chdir(td, uap->path, UIO_USERSPACE));
 }
 
 int
 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg)
 {
 	struct nameidata nd;
 	int error;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 	    pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	if ((error = change_dir(nd.ni_vp, td)) != 0) {
 		vput(nd.ni_vp);
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		return (error);
 	}
 	VOP_UNLOCK(nd.ni_vp, 0);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	pwd_chdir(td, nd.ni_vp);
 	return (0);
 }
 
 /*
  * Change notion of root (``/'') directory.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chroot_args {
 	char	*path;
 };
 #endif
 int
 sys_chroot(struct thread *td, struct chroot_args *uap)
 {
 	struct nameidata nd;
 	int error;
 
 	error = priv_check(td, PRIV_VFS_CHROOT);
 	if (error != 0)
 		return (error);
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 	    UIO_USERSPACE, uap->path, td);
 	error = namei(&nd);
 	if (error != 0)
 		goto error;
 	error = change_dir(nd.ni_vp, td);
 	if (error != 0)
 		goto e_vunlock;
 #ifdef MAC
 	error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp);
 	if (error != 0)
 		goto e_vunlock;
 #endif
 	VOP_UNLOCK(nd.ni_vp, 0);
 	error = pwd_chroot(td, nd.ni_vp);
 	vrele(nd.ni_vp);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	return (error);
 e_vunlock:
 	vput(nd.ni_vp);
 error:
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	return (error);
 }
 
 /*
  * Common routine for chroot and chdir.  Callers must provide a locked vnode
  * instance.
  */
 int
 change_dir(struct vnode *vp, struct thread *td)
 {
 #ifdef MAC
 	int error;
 #endif
 
 	ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked");
 	if (vp->v_type != VDIR)
 		return (ENOTDIR);
 #ifdef MAC
 	error = mac_vnode_check_chdir(td->td_ucred, vp);
 	if (error != 0)
 		return (error);
 #endif
 	return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td));
 }
 
 static __inline void
 flags_to_rights(int flags, cap_rights_t *rightsp)
 {
 
 	if (flags & O_EXEC) {
 		cap_rights_set(rightsp, CAP_FEXECVE);
 	} else {
 		switch ((flags & O_ACCMODE)) {
 		case O_RDONLY:
 			cap_rights_set(rightsp, CAP_READ);
 			break;
 		case O_RDWR:
 			cap_rights_set(rightsp, CAP_READ);
 			/* FALLTHROUGH */
 		case O_WRONLY:
 			cap_rights_set(rightsp, CAP_WRITE);
 			if (!(flags & (O_APPEND | O_TRUNC)))
 				cap_rights_set(rightsp, CAP_SEEK);
 			break;
 		}
 	}
 
 	if (flags & O_CREAT)
 		cap_rights_set(rightsp, CAP_CREATE);
 
 	if (flags & O_TRUNC)
 		cap_rights_set(rightsp, CAP_FTRUNCATE);
 
 	if (flags & (O_SYNC | O_FSYNC))
 		cap_rights_set(rightsp, CAP_FSYNC);
 
 	if (flags & (O_EXLOCK | O_SHLOCK))
 		cap_rights_set(rightsp, CAP_FLOCK);
 }
 
 /*
  * Check permissions, allocate an open file structure, and call the device
  * open routine if any.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct open_args {
 	char	*path;
 	int	flags;
 	int	mode;
 };
 #endif
 int
 sys_open(struct thread *td, struct open_args *uap)
 {
 
 	return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->flags, uap->mode));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct openat_args {
 	int	fd;
 	char	*path;
 	int	flag;
 	int	mode;
 };
 #endif
 int
 sys_openat(struct thread *td, struct openat_args *uap)
 {
 
 	AUDIT_ARG_FD(uap->fd);
 	return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 	    uap->mode));
 }
 
 int
 kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg,
     int flags, int mode)
 {
 	struct proc *p = td->td_proc;
 	struct filedesc *fdp = p->p_fd;
 	struct file *fp;
 	struct vnode *vp;
 	struct nameidata nd;
 	cap_rights_t rights;
 	int cmode, error, indx;
 
 	indx = -1;
 
 	AUDIT_ARG_FFLAGS(flags);
 	AUDIT_ARG_MODE(mode);
 	cap_rights_init(&rights, CAP_LOOKUP);
 	flags_to_rights(flags, &rights);
 	/*
 	 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags
 	 * may be specified.
 	 */
 	if (flags & O_EXEC) {
 		if (flags & O_ACCMODE)
 			return (EINVAL);
 	} else if ((flags & O_ACCMODE) == O_ACCMODE) {
 		return (EINVAL);
 	} else {
 		flags = FFLAGS(flags);
 	}
 
 	/*
 	 * Allocate a file structure. The descriptor to reference it
 	 * is allocated and set by finstall() below.
 	 */
 	error = falloc_noinstall(td, &fp);
 	if (error != 0)
 		return (error);
 	/*
 	 * An extra reference on `fp' has been held for us by
 	 * falloc_noinstall().
 	 */
 	/* Set the flags early so the finit in devfs can pick them up. */
 	fp->f_flag = flags & FMASK;
 	cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT;
 	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 	    &rights, td);
 	td->td_dupfd = -1;		/* XXX check for fdopen */
 	error = vn_open(&nd, &flags, cmode, fp);
 	if (error != 0) {
 		/*
 		 * If the vn_open replaced the method vector, something
 		 * wonderous happened deep below and we just pass it up
 		 * pretending we know what we do.
 		 */
 		if (error == ENXIO && fp->f_ops != &badfileops)
 			goto success;
 
 		/*
 		 * Handle special fdopen() case. bleh.
 		 *
 		 * Don't do this for relative (capability) lookups; we don't
 		 * understand exactly what would happen, and we don't think
 		 * that it ever should.
 		 */
 		if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) == 0 &&
 		    (error == ENODEV || error == ENXIO) &&
 		    td->td_dupfd >= 0) {
 			error = dupfdopen(td, fdp, td->td_dupfd, flags, error,
 			    &indx);
 			if (error == 0)
 				goto success;
 		}
 
 		goto bad;
 	}
 	td->td_dupfd = 0;
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 
 	/*
 	 * Store the vnode, for any f_type. Typically, the vnode use
 	 * count is decremented by direct call to vn_closefile() for
 	 * files that switched type in the cdevsw fdopen() method.
 	 */
 	fp->f_vnode = vp;
 	/*
 	 * If the file wasn't claimed by devfs bind it to the normal
 	 * vnode operations here.
 	 */
 	if (fp->f_ops == &badfileops) {
 		KASSERT(vp->v_type != VFIFO, ("Unexpected fifo."));
 		fp->f_seqcount = 1;
 		finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK),
 		    DTYPE_VNODE, vp, &vnops);
 	}
 
 	VOP_UNLOCK(vp, 0);
 	if (flags & O_TRUNC) {
 		error = fo_truncate(fp, 0, td->td_ucred, td);
 		if (error != 0)
 			goto bad;
 	}
 success:
 	/*
 	 * If we haven't already installed the FD (for dupfdopen), do so now.
 	 */
 	if (indx == -1) {
 		struct filecaps *fcaps;
 
 #ifdef CAPABILITIES
 		if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) != 0)
 			fcaps = &nd.ni_filecaps;
 		else
 #endif
 			fcaps = NULL;
 		error = finstall(td, fp, &indx, flags, fcaps);
 		/* On success finstall() consumes fcaps. */
 		if (error != 0) {
 			filecaps_free(&nd.ni_filecaps);
 			goto bad;
 		}
 	} else {
 		filecaps_free(&nd.ni_filecaps);
 	}
 
 	/*
 	 * Release our private reference, leaving the one associated with
 	 * the descriptor table intact.
 	 */
 	fdrop(fp, td);
 	td->td_retval[0] = indx;
 	return (0);
 bad:
 	KASSERT(indx == -1, ("indx=%d, should be -1", indx));
 	fdrop(fp, td);
 	return (error);
 }
 
 #ifdef COMPAT_43
 /*
  * Create a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ocreat_args {
 	char	*path;
 	int	mode;
 };
 #endif
 int
 ocreat(struct thread *td, struct ocreat_args *uap)
 {
 
 	return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    O_WRONLY | O_CREAT | O_TRUNC, uap->mode));
 }
 #endif /* COMPAT_43 */
 
 /*
  * Create a special file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mknodat_args {
 	int	fd;
 	char	*path;
 	mode_t	mode;
 	dev_t	dev;
 };
 #endif
 int
 sys_mknodat(struct thread *td, struct mknodat_args *uap)
 {
 
 	return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 	    uap->dev));
 }
 
 #if defined(COMPAT_FREEBSD11)
 int
 freebsd11_mknod(struct thread *td,
     struct freebsd11_mknod_args *uap)
 {
 
 	return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->mode, uap->dev));
 }
 
 int
 freebsd11_mknodat(struct thread *td,
     struct freebsd11_mknodat_args *uap)
 {
 
 	return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode,
 	    uap->dev));
 }
 #endif /* COMPAT_FREEBSD11 */
 
 int
 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg,
     int mode, dev_t dev)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	struct vattr vattr;
 	struct nameidata nd;
 	int error, whiteout = 0;
 
 	AUDIT_ARG_MODE(mode);
 	AUDIT_ARG_DEV(dev);
 	switch (mode & S_IFMT) {
 	case S_IFCHR:
 	case S_IFBLK:
 		error = priv_check(td, PRIV_VFS_MKNOD_DEV);
 		if (error == 0 && dev == VNOVAL)
 			error = EINVAL;
 		break;
 	case S_IFWHT:
 		error = priv_check(td, PRIV_VFS_MKNOD_WHT);
 		break;
 	case S_IFIFO:
 		if (dev == 0)
 			return (kern_mkfifoat(td, fd, path, pathseg, mode));
 		/* FALLTHROUGH */
 	default:
 		error = EINVAL;
 		break;
 	}
 	if (error != 0)
 		return (error);
 restart:
 	bwillwrite();
 	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 	    NOCACHE, pathseg, path, fd, &cap_mknodat_rights,
 	    td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	if (vp != NULL) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		if (vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		vrele(vp);
 		return (EEXIST);
 	} else {
 		VATTR_NULL(&vattr);
 		vattr.va_mode = (mode & ALLPERMS) &
 		    ~td->td_proc->p_fd->fd_cmask;
 		vattr.va_rdev = dev;
 		whiteout = 0;
 
 		switch (mode & S_IFMT) {
 		case S_IFCHR:
 			vattr.va_type = VCHR;
 			break;
 		case S_IFBLK:
 			vattr.va_type = VBLK;
 			break;
 		case S_IFWHT:
 			whiteout = 1;
 			break;
 		default:
 			panic("kern_mknod: invalid mode");
 		}
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 #ifdef MAC
 	if (error == 0 && !whiteout)
 		error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp,
 		    &nd.ni_cnd, &vattr);
 #endif
 	if (error == 0) {
 		if (whiteout)
 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 		else {
 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 						&nd.ni_cnd, &vattr);
 			if (error == 0)
 				vput(nd.ni_vp);
 		}
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Create a named pipe.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mkfifo_args {
 	char	*path;
 	int	mode;
 };
 #endif
 int
 sys_mkfifo(struct thread *td, struct mkfifo_args *uap)
 {
 
 	return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->mode));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct mkfifoat_args {
 	int	fd;
 	char	*path;
 	mode_t	mode;
 };
 #endif
 int
 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap)
 {
 
 	return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE,
 	    uap->mode));
 }
 
 int
 kern_mkfifoat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, int mode)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	struct nameidata nd;
 	int error;
 
 	AUDIT_ARG_MODE(mode);
 restart:
 	bwillwrite();
 	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 	    NOCACHE, pathseg, path, fd, &cap_mkfifoat_rights,
 	    td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	if (nd.ni_vp != NULL) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		if (nd.ni_vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		vrele(nd.ni_vp);
 		return (EEXIST);
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VFIFO;
 	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
 #ifdef MAC
 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 	    &vattr);
 	if (error != 0)
 		goto out;
 #endif
 	error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 	if (error == 0)
 		vput(nd.ni_vp);
 #ifdef MAC
 out:
 #endif
 	vput(nd.ni_dvp);
 	vn_finished_write(mp);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	return (error);
 }
 
 /*
  * Make a hard file link.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct link_args {
 	char	*path;
 	char	*link;
 };
 #endif
 int
 sys_link(struct thread *td, struct link_args *uap)
 {
 
 	return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link,
 	    UIO_USERSPACE, FOLLOW));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct linkat_args {
 	int	fd1;
 	char	*path1;
 	int	fd2;
 	char	*path2;
 	int	flag;
 };
 #endif
 int
 sys_linkat(struct thread *td, struct linkat_args *uap)
 {
 	int flag;
 
 	flag = uap->flag;
 	if ((flag & ~(AT_SYMLINK_FOLLOW | AT_BENEATH)) != 0)
 		return (EINVAL);
 
 	return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2,
 	    UIO_USERSPACE, ((flag & AT_SYMLINK_FOLLOW) != 0 ? FOLLOW :
 	    NOFOLLOW) | ((flag & AT_BENEATH) != 0 ? BENEATH : 0)));
 }
 
 int hardlink_check_uid = 0;
 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW,
     &hardlink_check_uid, 0,
     "Unprivileged processes cannot create hard links to files owned by other "
     "users");
 static int hardlink_check_gid = 0;
 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW,
     &hardlink_check_gid, 0,
     "Unprivileged processes cannot create hard links to files owned by other "
     "groups");
 
 static int
 can_hardlink(struct vnode *vp, struct ucred *cred)
 {
 	struct vattr va;
 	int error;
 
 	if (!hardlink_check_uid && !hardlink_check_gid)
 		return (0);
 
 	error = VOP_GETATTR(vp, &va, cred);
 	if (error != 0)
 		return (error);
 
 	if (hardlink_check_uid && cred->cr_uid != va.va_uid) {
 		error = priv_check_cred(cred, PRIV_VFS_LINK);
 		if (error != 0)
 			return (error);
 	}
 
 	if (hardlink_check_gid && !groupmember(va.va_gid, cred)) {
 		error = priv_check_cred(cred, PRIV_VFS_LINK);
 		if (error != 0)
 			return (error);
 	}
 
 	return (0);
 }
 
 int
 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1,
     const char *path2, enum uio_seg segflag, int follow)
 {
 	struct nameidata nd;
 	int error;
 
 	do {
 		bwillwrite();
 		NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflag,
 		    path1, fd1, &cap_linkat_source_rights, td);
 		if ((error = namei(&nd)) != 0)
 			return (error);
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag);
 	} while (error ==  EAGAIN);
 	return (error);
 }
 
 static int
 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path,
     enum uio_seg segflag)
 {
 	struct nameidata nd;
 	struct mount *mp;
 	int error;
 
 	if (vp->v_type == VDIR) {
 		vrele(vp);
 		return (EPERM);		/* POSIX */
 	}
 	NDINIT_ATRIGHTS(&nd, CREATE,
 	    LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflag, path, fd,
 	    &cap_linkat_target_rights, td);
 	if ((error = namei(&nd)) == 0) {
 		if (nd.ni_vp != NULL) {
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 			if (nd.ni_dvp == nd.ni_vp)
 				vrele(nd.ni_dvp);
 			else
 				vput(nd.ni_dvp);
 			vrele(nd.ni_vp);
 			vrele(vp);
 			return (EEXIST);
 		} else if (nd.ni_dvp->v_mount != vp->v_mount) {
 			/*
 			 * Cross-device link.  No need to recheck
 			 * vp->v_type, since it cannot change, except
 			 * to VBAD.
 			 */
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 			vput(nd.ni_dvp);
 			vrele(vp);
 			return (EXDEV);
 		} else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) {
 			error = can_hardlink(vp, td->td_ucred);
 #ifdef MAC
 			if (error == 0)
 				error = mac_vnode_check_link(td->td_ucred,
 				    nd.ni_dvp, vp, &nd.ni_cnd);
 #endif
 			if (error != 0) {
 				vput(vp);
 				vput(nd.ni_dvp);
 				NDFREE(&nd, NDF_ONLY_PNBUF);
 				return (error);
 			}
 			error = vn_start_write(vp, &mp, V_NOWAIT);
 			if (error != 0) {
 				vput(vp);
 				vput(nd.ni_dvp);
 				NDFREE(&nd, NDF_ONLY_PNBUF);
 				error = vn_start_write(NULL, &mp,
 				    V_XSLEEP | PCATCH);
 				if (error != 0)
 					return (error);
 				return (EAGAIN);
 			}
 			error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 			VOP_UNLOCK(vp, 0);
 			vput(nd.ni_dvp);
 			vn_finished_write(mp);
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 		} else {
 			vput(nd.ni_dvp);
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 			vrele(vp);
 			return (EAGAIN);
 		}
 	}
 	vrele(vp);
 	return (error);
 }
 
 /*
  * Make a symbolic link.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct symlink_args {
 	char	*path;
 	char	*link;
 };
 #endif
 int
 sys_symlink(struct thread *td, struct symlink_args *uap)
 {
 
 	return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link,
 	    UIO_USERSPACE));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct symlinkat_args {
 	char	*path;
 	int	fd;
 	char	*path2;
 };
 #endif
 int
 sys_symlinkat(struct thread *td, struct symlinkat_args *uap)
 {
 
 	return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2,
 	    UIO_USERSPACE));
 }
 
 int
 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2,
     enum uio_seg segflg)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	const char *syspath;
 	char *tmppath;
 	struct nameidata nd;
 	int error;
 
 	if (segflg == UIO_SYSSPACE) {
 		syspath = path1;
 	} else {
 		tmppath = uma_zalloc(namei_zone, M_WAITOK);
 		if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0)
 			goto out;
 		syspath = tmppath;
 	}
 	AUDIT_ARG_TEXT(syspath);
 restart:
 	bwillwrite();
 	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 	    NOCACHE, segflg, path2, fd, &cap_symlinkat_rights,
 	    td);
 	if ((error = namei(&nd)) != 0)
 		goto out;
 	if (nd.ni_vp) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		if (nd.ni_vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		vrele(nd.ni_vp);
 		error = EEXIST;
 		goto out;
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			goto out;
 		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
 #ifdef MAC
 	vattr.va_type = VLNK;
 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 	    &vattr);
 	if (error != 0)
 		goto out2;
 #endif
 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath);
 	if (error == 0)
 		vput(nd.ni_vp);
 #ifdef MAC
 out2:
 #endif
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
 	vn_finished_write(mp);
 out:
 	if (segflg != UIO_SYSSPACE)
 		uma_zfree(namei_zone, tmppath);
 	return (error);
 }
 
 /*
  * Delete a whiteout from the filesystem.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct undelete_args {
 	char *path;
 };
 #endif
 int
 sys_undelete(struct thread *td, struct undelete_args *uap)
 {
 	struct mount *mp;
 	struct nameidata nd;
 	int error;
 
 restart:
 	bwillwrite();
 	NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1,
 	    UIO_USERSPACE, uap->path, td);
 	error = namei(&nd);
 	if (error != 0)
 		return (error);
 
 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		if (nd.ni_vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		if (nd.ni_vp)
 			vrele(nd.ni_vp);
 		return (EEXIST);
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 	error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Delete a name from the filesystem.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct unlink_args {
 	char	*path;
 };
 #endif
 int
 sys_unlink(struct thread *td, struct unlink_args *uap)
 {
 
-	return (kern_unlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0, 0));
+	return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE,
+	    0, 0));
 }
 
+static int
+kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd,
+    int flag, enum uio_seg pathseg, ino_t oldinum)
+{
+
+	if ((flag & ~AT_REMOVEDIR) != 0)
+		return (EINVAL);
+
+	if ((flag & AT_REMOVEDIR) != 0)
+		return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0));
+
+	return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0));
+}
+
 #ifndef _SYS_SYSPROTO_H_
 struct unlinkat_args {
 	int	fd;
 	char	*path;
 	int	flag;
 };
 #endif
 int
 sys_unlinkat(struct thread *td, struct unlinkat_args *uap)
 {
-	int fd, flag;
-	const char *path;
 
-	flag = uap->flag;
-	fd = uap->fd;
-	path = uap->path;
+	return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag,
+	    UIO_USERSPACE, 0));
+}
 
-	if ((flag & ~(AT_REMOVEDIR | AT_BENEATH)) != 0)
-		return (EINVAL);
+#ifndef _SYS_SYSPROTO_H_
+struct funlinkat_args {
+	int		dfd;
+	const char	*path;
+	int		fd;
+	int		flag;
+};
+#endif
+int
+sys_funlinkat(struct thread *td, struct funlinkat_args *uap)
+{
 
-	if ((uap->flag & AT_REMOVEDIR) != 0)
-		return (kern_rmdirat(td, fd, path, UIO_USERSPACE, flag));
-	else
-		return (kern_unlinkat(td, fd, path, UIO_USERSPACE, flag, 0));
+	return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag,
+	    UIO_USERSPACE, 0));
 }
 
 int
-kern_unlinkat(struct thread *td, int fd, const char *path,
+kern_funlinkat(struct thread *td, int dfd, const char *path, int fd,
     enum uio_seg pathseg, int flag, ino_t oldinum)
 {
 	struct mount *mp;
+	struct file *fp;
 	struct vnode *vp;
 	struct nameidata nd;
 	struct stat sb;
+	cap_rights_t rights;
 	int error;
 
+	fp = NULL;
+	if (fd != FD_NONE) {
+		error = getvnode(td, fd, cap_rights_init(&rights, CAP_LOOKUP),
+		    &fp);
+		if (error != 0)
+			return (error);
+	}
+
 restart:
 	bwillwrite();
 	NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 |
 	    ((flag & AT_BENEATH) != 0 ? BENEATH : 0),
-	    pathseg, path, fd, &cap_unlinkat_rights, td);
-	if ((error = namei(&nd)) != 0)
-		return (error == EINVAL ? EPERM : error);
+	    pathseg, path, dfd, &cap_unlinkat_rights, td);
+	if ((error = namei(&nd)) != 0) {
+		if (error == EINVAL)
+			error = EPERM;
+		goto fdout;
+	}
 	vp = nd.ni_vp;
 	if (vp->v_type == VDIR && oldinum == 0) {
 		error = EPERM;		/* POSIX */
 	} else if (oldinum != 0 &&
 		  ((error = vn_stat(vp, &sb, td->td_ucred, NOCRED, td)) == 0) &&
 		  sb.st_ino != oldinum) {
-			error = EIDRM;	/* Identifier removed */
+		error = EIDRM;	/* Identifier removed */
+	} else if (fp != NULL && fp->f_vnode != vp) {
+		if ((fp->f_vnode->v_iflag & VI_DOOMED) != 0)
+			error = EBADF;
+		else
+			error = EDEADLK;
 	} else {
 		/*
 		 * The root of a mounted filesystem cannot be deleted.
 		 *
 		 * XXX: can this only be a VDIR case?
 		 */
 		if (vp->v_vflag & VV_ROOT)
 			error = EBUSY;
 	}
 	if (error == 0) {
 		if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 			NDFREE(&nd, NDF_ONLY_PNBUF);
 			vput(nd.ni_dvp);
 			if (vp == nd.ni_dvp)
 				vrele(vp);
 			else
 				vput(vp);
 			if ((error = vn_start_write(NULL, &mp,
-			    V_XSLEEP | PCATCH)) != 0)
-				return (error);
+			    V_XSLEEP | PCATCH)) != 0) {
+				goto fdout;
+			}
 			goto restart;
 		}
 #ifdef MAC
 		error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 		    &nd.ni_cnd);
 		if (error != 0)
 			goto out;
 #endif
 		vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 #ifdef MAC
 out:
 #endif
 		vn_finished_write(mp);
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
 	if (vp == nd.ni_dvp)
 		vrele(vp);
 	else
 		vput(vp);
+fdout:
+	if (fp != NULL)
+		fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Reposition read/write file offset.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lseek_args {
 	int	fd;
 	int	pad;
 	off_t	offset;
 	int	whence;
 };
 #endif
 int
 sys_lseek(struct thread *td, struct lseek_args *uap)
 {
 
 	return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 }
 
 int
 kern_lseek(struct thread *td, int fd, off_t offset, int whence)
 {
 	struct file *fp;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 	error = fget(td, fd, &cap_seek_rights, &fp);
 	if (error != 0)
 		return (error);
 	error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ?
 	    fo_seek(fp, offset, whence, td) : ESPIPE;
 	fdrop(fp, td);
 	return (error);
 }
 
 #if defined(COMPAT_43)
 /*
  * Reposition read/write file offset.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct olseek_args {
 	int	fd;
 	long	offset;
 	int	whence;
 };
 #endif
 int
 olseek(struct thread *td, struct olseek_args *uap)
 {
 
 	return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 }
 #endif /* COMPAT_43 */
 
 #if defined(COMPAT_FREEBSD6)
 /* Version with the 'pad' argument */
 int
 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap)
 {
 
 	return (kern_lseek(td, uap->fd, uap->offset, uap->whence));
 }
 #endif
 
 /*
  * Check access permissions using passed credentials.
  */
 static int
 vn_access(struct vnode *vp, int user_flags, struct ucred *cred,
      struct thread *td)
 {
 	accmode_t accmode;
 	int error;
 
 	/* Flags == 0 means only check for existence. */
 	if (user_flags == 0)
 		return (0);
 
 	accmode = 0;
 	if (user_flags & R_OK)
 		accmode |= VREAD;
 	if (user_flags & W_OK)
 		accmode |= VWRITE;
 	if (user_flags & X_OK)
 		accmode |= VEXEC;
 #ifdef MAC
 	error = mac_vnode_check_access(cred, vp, accmode);
 	if (error != 0)
 		return (error);
 #endif
 	if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 		error = VOP_ACCESS(vp, accmode, cred, td);
 	return (error);
 }
 
 /*
  * Check access permissions using "real" credentials.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct access_args {
 	char	*path;
 	int	amode;
 };
 #endif
 int
 sys_access(struct thread *td, struct access_args *uap)
 {
 
 	return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    0, uap->amode));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct faccessat_args {
 	int	dirfd;
 	char	*path;
 	int	amode;
 	int	flag;
 }
 #endif
 int
 sys_faccessat(struct thread *td, struct faccessat_args *uap)
 {
 
 	return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag,
 	    uap->amode));
 }
 
 int
 kern_accessat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, int flag, int amode)
 {
 	struct ucred *cred, *usecred;
 	struct vnode *vp;
 	struct nameidata nd;
 	int error;
 
 	if ((flag & ~(AT_EACCESS | AT_BENEATH)) != 0)
 		return (EINVAL);
 	if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0)
 		return (EINVAL);
 
 	/*
 	 * Create and modify a temporary credential instead of one that
 	 * is potentially shared (if we need one).
 	 */
 	cred = td->td_ucred;
 	if ((flag & AT_EACCESS) == 0 &&
 	    ((cred->cr_uid != cred->cr_ruid ||
 	    cred->cr_rgid != cred->cr_groups[0]))) {
 		usecred = crdup(cred);
 		usecred->cr_uid = cred->cr_ruid;
 		usecred->cr_groups[0] = cred->cr_rgid;
 		td->td_ucred = usecred;
 	} else
 		usecred = cred;
 	AUDIT_ARG_VALUE(amode);
 	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF |
 	    AUDITVNODE1 | ((flag & AT_BENEATH) != 0 ? BENEATH : 0),
 	    pathseg, path, fd, &cap_fstat_rights, td);
 	if ((error = namei(&nd)) != 0)
 		goto out;
 	vp = nd.ni_vp;
 
 	error = vn_access(vp, amode, usecred, td);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(vp);
 out:
 	if (usecred != cred) {
 		td->td_ucred = cred;
 		crfree(usecred);
 	}
 	return (error);
 }
 
 /*
  * Check access permissions using "effective" credentials.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct eaccess_args {
 	char	*path;
 	int	amode;
 };
 #endif
 int
 sys_eaccess(struct thread *td, struct eaccess_args *uap)
 {
 
 	return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    AT_EACCESS, uap->amode));
 }
 
 #if defined(COMPAT_43)
 /*
  * Get file status; this version follows links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ostat_args {
 	char	*path;
 	struct ostat *ub;
 };
 #endif
 int
 ostat(struct thread *td, struct ostat_args *uap)
 {
 	struct stat sb;
 	struct ostat osb;
 	int error;
 
 	error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    &sb, NULL);
 	if (error != 0)
 		return (error);
 	cvtstat(&sb, &osb);
 	return (copyout(&osb, uap->ub, sizeof (osb)));
 }
 
 /*
  * Get file status; this version does not follow links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct olstat_args {
 	char	*path;
 	struct ostat *ub;
 };
 #endif
 int
 olstat(struct thread *td, struct olstat_args *uap)
 {
 	struct stat sb;
 	struct ostat osb;
 	int error;
 
 	error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 	    UIO_USERSPACE, &sb, NULL);
 	if (error != 0)
 		return (error);
 	cvtstat(&sb, &osb);
 	return (copyout(&osb, uap->ub, sizeof (osb)));
 }
 
 /*
  * Convert from an old to a new stat structure.
  * XXX: many values are blindly truncated.
  */
 void
 cvtstat(struct stat *st, struct ostat *ost)
 {
 
 	bzero(ost, sizeof(*ost));
 	ost->st_dev = st->st_dev;
 	ost->st_ino = st->st_ino;
 	ost->st_mode = st->st_mode;
 	ost->st_nlink = st->st_nlink;
 	ost->st_uid = st->st_uid;
 	ost->st_gid = st->st_gid;
 	ost->st_rdev = st->st_rdev;
 	ost->st_size = MIN(st->st_size, INT32_MAX);
 	ost->st_atim = st->st_atim;
 	ost->st_mtim = st->st_mtim;
 	ost->st_ctim = st->st_ctim;
 	ost->st_blksize = st->st_blksize;
 	ost->st_blocks = st->st_blocks;
 	ost->st_flags = st->st_flags;
 	ost->st_gen = st->st_gen;
 }
 #endif /* COMPAT_43 */
 
 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11)
 int ino64_trunc_error;
 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW,
     &ino64_trunc_error, 0,
     "Error on truncation of device, file or inode number, or link count");
 
 int
 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost)
 {
 
 	ost->st_dev = st->st_dev;
 	if (ost->st_dev != st->st_dev) {
 		switch (ino64_trunc_error) {
 		default:
 			/*
 			 * Since dev_t is almost raw, don't clamp to the
 			 * maximum for case 2, but ignore the error.
 			 */
 			break;
 		case 1:
 			return (EOVERFLOW);
 		}
 	}
 	ost->st_ino = st->st_ino;
 	if (ost->st_ino != st->st_ino) {
 		switch (ino64_trunc_error) {
 		default:
 		case 0:
 			break;
 		case 1:
 			return (EOVERFLOW);
 		case 2:
 			ost->st_ino = UINT32_MAX;
 			break;
 		}
 	}
 	ost->st_mode = st->st_mode;
 	ost->st_nlink = st->st_nlink;
 	if (ost->st_nlink != st->st_nlink) {
 		switch (ino64_trunc_error) {
 		default:
 		case 0:
 			break;
 		case 1:
 			return (EOVERFLOW);
 		case 2:
 			ost->st_nlink = UINT16_MAX;
 			break;
 		}
 	}
 	ost->st_uid = st->st_uid;
 	ost->st_gid = st->st_gid;
 	ost->st_rdev = st->st_rdev;
 	if (ost->st_rdev != st->st_rdev) {
 		switch (ino64_trunc_error) {
 		default:
 			break;
 		case 1:
 			return (EOVERFLOW);
 		}
 	}
 	ost->st_atim = st->st_atim;
 	ost->st_mtim = st->st_mtim;
 	ost->st_ctim = st->st_ctim;
 	ost->st_size = st->st_size;
 	ost->st_blocks = st->st_blocks;
 	ost->st_blksize = st->st_blksize;
 	ost->st_flags = st->st_flags;
 	ost->st_gen = st->st_gen;
 	ost->st_lspare = 0;
 	ost->st_birthtim = st->st_birthtim;
 	bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim),
 	    sizeof(*ost) - offsetof(struct freebsd11_stat,
 	    st_birthtim) - sizeof(ost->st_birthtim));
 	return (0);
 }
 
 int
 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap)
 {
 	struct stat sb;
 	struct freebsd11_stat osb;
 	int error;
 
 	error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    &sb, NULL);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtstat(&sb, &osb);
 	if (error == 0)
 		error = copyout(&osb, uap->ub, sizeof(osb));
 	return (error);
 }
 
 int
 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap)
 {
 	struct stat sb;
 	struct freebsd11_stat osb;
 	int error;
 
 	error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 	    UIO_USERSPACE, &sb, NULL);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtstat(&sb, &osb);
 	if (error == 0)
 		error = copyout(&osb, uap->ub, sizeof(osb));
 	return (error);
 }
 
 int
 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap)
 {
 	struct fhandle fh;
 	struct stat sb;
 	struct freebsd11_stat osb;
 	int error;
 
 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 	if (error != 0)
 		return (error);
 	error = kern_fhstat(td, fh, &sb);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtstat(&sb, &osb);
 	if (error == 0)
 		error = copyout(&osb, uap->sb, sizeof(osb));
 	return (error);
 }
 
 int
 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap)
 {
 	struct stat sb;
 	struct freebsd11_stat osb;
 	int error;
 
 	error = kern_statat(td, uap->flag, uap->fd, uap->path,
 	    UIO_USERSPACE, &sb, NULL);
 	if (error != 0)
 		return (error);
 	error = freebsd11_cvtstat(&sb, &osb);
 	if (error == 0)
 		error = copyout(&osb, uap->buf, sizeof(osb));
 	return (error);
 }
 #endif	/* COMPAT_FREEBSD11 */
 
 /*
  * Get file status
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fstatat_args {
 	int	fd;
 	char	*path;
 	struct stat	*buf;
 	int	flag;
 }
 #endif
 int
 sys_fstatat(struct thread *td, struct fstatat_args *uap)
 {
 	struct stat sb;
 	int error;
 
 	error = kern_statat(td, uap->flag, uap->fd, uap->path,
 	    UIO_USERSPACE, &sb, NULL);
 	if (error == 0)
 		error = copyout(&sb, uap->buf, sizeof (sb));
 	return (error);
 }
 
 int
 kern_statat(struct thread *td, int flag, int fd, const char *path,
     enum uio_seg pathseg, struct stat *sbp,
     void (*hook)(struct vnode *vp, struct stat *sbp))
 {
 	struct nameidata nd;
 	int error;
 
 	if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0)
 		return (EINVAL);
 
 	NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) != 0 ?
 	    NOFOLLOW : FOLLOW) | ((flag & AT_BENEATH) != 0 ? BENEATH : 0) |
 	    LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd,
 	    &cap_fstat_rights, td);
 
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	error = vn_stat(nd.ni_vp, sbp, td->td_ucred, NOCRED, td);
 	if (error == 0) {
 		SDT_PROBE2(vfs, , stat, mode, path, sbp->st_mode);
 		if (S_ISREG(sbp->st_mode))
 			SDT_PROBE2(vfs, , stat, reg, path, pathseg);
 		if (__predict_false(hook != NULL))
 			hook(nd.ni_vp, sbp);
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_vp);
 	if (error != 0)
 		return (error);
 #ifdef __STAT_TIME_T_EXT
 	sbp->st_atim_ext = 0;
 	sbp->st_mtim_ext = 0;
 	sbp->st_ctim_ext = 0;
 	sbp->st_btim_ext = 0;
 #endif
 #ifdef KTRACE
 	if (KTRPOINT(td, KTR_STRUCT))
 		ktrstat(sbp);
 #endif
 	return (0);
 }
 
 #if defined(COMPAT_FREEBSD11)
 /*
  * Implementation of the NetBSD [l]stat() functions.
  */
 void
 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb)
 {
 
 	bzero(nsb, sizeof(*nsb));
 	nsb->st_dev = sb->st_dev;
 	nsb->st_ino = sb->st_ino;
 	nsb->st_mode = sb->st_mode;
 	nsb->st_nlink = sb->st_nlink;
 	nsb->st_uid = sb->st_uid;
 	nsb->st_gid = sb->st_gid;
 	nsb->st_rdev = sb->st_rdev;
 	nsb->st_atim = sb->st_atim;
 	nsb->st_mtim = sb->st_mtim;
 	nsb->st_ctim = sb->st_ctim;
 	nsb->st_size = sb->st_size;
 	nsb->st_blocks = sb->st_blocks;
 	nsb->st_blksize = sb->st_blksize;
 	nsb->st_flags = sb->st_flags;
 	nsb->st_gen = sb->st_gen;
 	nsb->st_birthtim = sb->st_birthtim;
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd11_nstat_args {
 	char	*path;
 	struct nstat *ub;
 };
 #endif
 int
 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap)
 {
 	struct stat sb;
 	struct nstat nsb;
 	int error;
 
 	error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    &sb, NULL);
 	if (error != 0)
 		return (error);
 	freebsd11_cvtnstat(&sb, &nsb);
 	return (copyout(&nsb, uap->ub, sizeof (nsb)));
 }
 
 /*
  * NetBSD lstat.  Get file status; this version does not follow links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd11_nlstat_args {
 	char	*path;
 	struct nstat *ub;
 };
 #endif
 int
 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap)
 {
 	struct stat sb;
 	struct nstat nsb;
 	int error;
 
 	error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path,
 	    UIO_USERSPACE, &sb, NULL);
 	if (error != 0)
 		return (error);
 	freebsd11_cvtnstat(&sb, &nsb);
 	return (copyout(&nsb, uap->ub, sizeof (nsb)));
 }
 #endif /* COMPAT_FREEBSD11 */
 
 /*
  * Get configurable pathname variables.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct pathconf_args {
 	char	*path;
 	int	name;
 };
 #endif
 int
 sys_pathconf(struct thread *td, struct pathconf_args *uap)
 {
 	long value;
 	int error;
 
 	error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW,
 	    &value);
 	if (error == 0)
 		td->td_retval[0] = value;
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct lpathconf_args {
 	char	*path;
 	int	name;
 };
 #endif
 int
 sys_lpathconf(struct thread *td, struct lpathconf_args *uap)
 {
 	long value;
 	int error;
 
 	error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name,
 	    NOFOLLOW, &value);
 	if (error == 0)
 		td->td_retval[0] = value;
 	return (error);
 }
 
 int
 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg,
     int name, u_long flags, long *valuep)
 {
 	struct nameidata nd;
 	int error;
 
 	NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags,
 	    pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 
 	error = VOP_PATHCONF(nd.ni_vp, name, valuep);
 	vput(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Return target name of a symbolic link.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct readlink_args {
 	char	*path;
 	char	*buf;
 	size_t	count;
 };
 #endif
 int
 sys_readlink(struct thread *td, struct readlink_args *uap)
 {
 
 	return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->buf, UIO_USERSPACE, uap->count));
 }
 #ifndef _SYS_SYSPROTO_H_
 struct readlinkat_args {
 	int	fd;
 	char	*path;
 	char	*buf;
 	size_t	bufsize;
 };
 #endif
 int
 sys_readlinkat(struct thread *td, struct readlinkat_args *uap)
 {
 
 	return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE,
 	    uap->buf, UIO_USERSPACE, uap->bufsize));
 }
 
 int
 kern_readlinkat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count)
 {
 	struct vnode *vp;
 	struct nameidata nd;
 	int error;
 
 	if (count > IOSIZE_MAX)
 		return (EINVAL);
 
 	NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1,
 	    pathseg, path, fd, td);
 
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 
 	error = kern_readlink_vp(vp, buf, bufseg, count, td);
 	vput(vp);
 
 	return (error);
 }
 
 /*
  * Helper function to readlink from a vnode
  */
 static int
 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count,
     struct thread *td)
 {
 	struct iovec aiov;
 	struct uio auio;
 	int error;
 
 	ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked");
 #ifdef MAC
 	error = mac_vnode_check_readlink(td->td_ucred, vp);
 	if (error != 0)
 		return (error);
 #endif
 	if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0)
 		return (EINVAL);
 
 	aiov.iov_base = buf;
 	aiov.iov_len = count;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_offset = 0;
 	auio.uio_rw = UIO_READ;
 	auio.uio_segflg = bufseg;
 	auio.uio_td = td;
 	auio.uio_resid = count;
 	error = VOP_READLINK(vp, &auio, td->td_ucred);
 	td->td_retval[0] = count - auio.uio_resid;
 	return (error);
 }
 
 /*
  * Common implementation code for chflags() and fchflags().
  */
 static int
 setfflags(struct thread *td, struct vnode *vp, u_long flags)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	int error;
 
 	/* We can't support the value matching VNOVAL. */
 	if (flags == VNOVAL)
 		return (EOPNOTSUPP);
 
 	/*
 	 * Prevent non-root users from setting flags on devices.  When
 	 * a device is reused, users can retain ownership of the device
 	 * if they are allowed to set flags and programs assume that
 	 * chown can't fail when done as root.
 	 */
 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
 		error = priv_check(td, PRIV_VFS_CHFLAGS_DEV);
 		if (error != 0)
 			return (error);
 	}
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	VATTR_NULL(&vattr);
 	vattr.va_flags = flags;
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 #ifdef MAC
 	error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags);
 	if (error == 0)
 #endif
 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 	VOP_UNLOCK(vp, 0);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Change flags of a file given a path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chflags_args {
 	const char *path;
 	u_long	flags;
 };
 #endif
 int
 sys_chflags(struct thread *td, struct chflags_args *uap)
 {
 
 	return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->flags, 0));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct chflagsat_args {
 	int	fd;
 	const char *path;
 	u_long	flags;
 	int	atflag;
 }
 #endif
 int
 sys_chflagsat(struct thread *td, struct chflagsat_args *uap)
 {
 
 	if ((uap->atflag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0)
 		return (EINVAL);
 
 	return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE,
 	    uap->flags, uap->atflag));
 }
 
 /*
  * Same as chflags() but doesn't follow symlinks.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lchflags_args {
 	const char *path;
 	u_long flags;
 };
 #endif
 int
 sys_lchflags(struct thread *td, struct lchflags_args *uap)
 {
 
 	return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->flags, AT_SYMLINK_NOFOLLOW));
 }
 
 static int
 kern_chflagsat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, u_long flags, int atflag)
 {
 	struct nameidata nd;
 	int error, follow;
 
 	AUDIT_ARG_FFLAGS(flags);
 	follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 	follow |= (atflag & AT_BENEATH) != 0 ? BENEATH : 0;
 	NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 	    &cap_fchflags_rights, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setfflags(td, nd.ni_vp, flags);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Change flags of a file given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchflags_args {
 	int	fd;
 	u_long	flags;
 };
 #endif
 int
 sys_fchflags(struct thread *td, struct fchflags_args *uap)
 {
 	struct file *fp;
 	int error;
 
 	AUDIT_ARG_FD(uap->fd);
 	AUDIT_ARG_FFLAGS(uap->flags);
 	error = getvnode(td, uap->fd, &cap_fchflags_rights,
 	    &fp);
 	if (error != 0)
 		return (error);
 #ifdef AUDIT
 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 	AUDIT_ARG_VNODE1(fp->f_vnode);
 	VOP_UNLOCK(fp->f_vnode, 0);
 #endif
 	error = setfflags(td, fp->f_vnode, uap->flags);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Common implementation code for chmod(), lchmod() and fchmod().
  */
 int
 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	int error;
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	VATTR_NULL(&vattr);
 	vattr.va_mode = mode & ALLPERMS;
 #ifdef MAC
 	error = mac_vnode_check_setmode(cred, vp, vattr.va_mode);
 	if (error == 0)
 #endif
 		error = VOP_SETATTR(vp, &vattr, cred);
 	VOP_UNLOCK(vp, 0);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Change mode of a file given path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chmod_args {
 	char	*path;
 	int	mode;
 };
 #endif
 int
 sys_chmod(struct thread *td, struct chmod_args *uap)
 {
 
 	return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->mode, 0));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct fchmodat_args {
 	int	dirfd;
 	char	*path;
 	mode_t	mode;
 	int	flag;
 }
 #endif
 int
 sys_fchmodat(struct thread *td, struct fchmodat_args *uap)
 {
 
 	if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0)
 		return (EINVAL);
 
 	return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE,
 	    uap->mode, uap->flag));
 }
 
 /*
  * Change mode of a file given path name (don't follow links.)
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lchmod_args {
 	char	*path;
 	int	mode;
 };
 #endif
 int
 sys_lchmod(struct thread *td, struct lchmod_args *uap)
 {
 
 	return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->mode, AT_SYMLINK_NOFOLLOW));
 }
 
 int
 kern_fchmodat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, mode_t mode, int flag)
 {
 	struct nameidata nd;
 	int error, follow;
 
 	AUDIT_ARG_MODE(mode);
 	follow = (flag & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : FOLLOW;
 	follow |= (flag & AT_BENEATH) != 0 ? BENEATH : 0;
 	NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 	    &cap_fchmod_rights, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setfmode(td, td->td_ucred, nd.ni_vp, mode);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Change mode of a file given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchmod_args {
 	int	fd;
 	int	mode;
 };
 #endif
 int
 sys_fchmod(struct thread *td, struct fchmod_args *uap)
 {
 	struct file *fp;
 	int error;
 
 	AUDIT_ARG_FD(uap->fd);
 	AUDIT_ARG_MODE(uap->mode);
 
 	error = fget(td, uap->fd, &cap_fchmod_rights, &fp);
 	if (error != 0)
 		return (error);
 	error = fo_chmod(fp, uap->mode, td->td_ucred, td);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Common implementation for chown(), lchown(), and fchown()
  */
 int
 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid,
     gid_t gid)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	int error;
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	VATTR_NULL(&vattr);
 	vattr.va_uid = uid;
 	vattr.va_gid = gid;
 #ifdef MAC
 	error = mac_vnode_check_setowner(cred, vp, vattr.va_uid,
 	    vattr.va_gid);
 	if (error == 0)
 #endif
 		error = VOP_SETATTR(vp, &vattr, cred);
 	VOP_UNLOCK(vp, 0);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Set ownership given a path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chown_args {
 	char	*path;
 	int	uid;
 	int	gid;
 };
 #endif
 int
 sys_chown(struct thread *td, struct chown_args *uap)
 {
 
 	return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid,
 	    uap->gid, 0));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct fchownat_args {
 	int fd;
 	const char * path;
 	uid_t uid;
 	gid_t gid;
 	int flag;
 };
 #endif
 int
 sys_fchownat(struct thread *td, struct fchownat_args *uap)
 {
 
 	if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0)
 		return (EINVAL);
 
 	return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid,
 	    uap->gid, uap->flag));
 }
 
 int
 kern_fchownat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, int uid, int gid, int flag)
 {
 	struct nameidata nd;
 	int error, follow;
 
 	AUDIT_ARG_OWNER(uid, gid);
 	follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW;
 	follow |= (flag & AT_BENEATH) != 0 ? BENEATH : 0;
 	NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd,
 	    &cap_fchown_rights, td);
 
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Set ownership given a path name, do not cross symlinks.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lchown_args {
 	char	*path;
 	int	uid;
 	int	gid;
 };
 #endif
 int
 sys_lchown(struct thread *td, struct lchown_args *uap)
 {
 
 	return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW));
 }
 
 /*
  * Set ownership given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchown_args {
 	int	fd;
 	int	uid;
 	int	gid;
 };
 #endif
 int
 sys_fchown(struct thread *td, struct fchown_args *uap)
 {
 	struct file *fp;
 	int error;
 
 	AUDIT_ARG_FD(uap->fd);
 	AUDIT_ARG_OWNER(uap->uid, uap->gid);
 	error = fget(td, uap->fd, &cap_fchown_rights, &fp);
 	if (error != 0)
 		return (error);
 	error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Common implementation code for utimes(), lutimes(), and futimes().
  */
 static int
 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg,
     struct timespec *tsp)
 {
 	struct timeval tv[2];
 	const struct timeval *tvp;
 	int error;
 
 	if (usrtvp == NULL) {
 		vfs_timestamp(&tsp[0]);
 		tsp[1] = tsp[0];
 	} else {
 		if (tvpseg == UIO_SYSSPACE) {
 			tvp = usrtvp;
 		} else {
 			if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0)
 				return (error);
 			tvp = tv;
 		}
 
 		if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 ||
 		    tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000)
 			return (EINVAL);
 		TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]);
 		TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]);
 	}
 	return (0);
 }
 
 /*
  * Common implementation code for futimens(), utimensat().
  */
 #define	UTIMENS_NULL	0x1
 #define	UTIMENS_EXIT	0x2
 static int
 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg,
     struct timespec *tsp, int *retflags)
 {
 	struct timespec tsnow;
 	int error;
 
 	vfs_timestamp(&tsnow);
 	*retflags = 0;
 	if (usrtsp == NULL) {
 		tsp[0] = tsnow;
 		tsp[1] = tsnow;
 		*retflags |= UTIMENS_NULL;
 		return (0);
 	}
 	if (tspseg == UIO_SYSSPACE) {
 		tsp[0] = usrtsp[0];
 		tsp[1] = usrtsp[1];
 	} else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0)
 		return (error);
 	if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT)
 		*retflags |= UTIMENS_EXIT;
 	if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW)
 		*retflags |= UTIMENS_NULL;
 	if (tsp[0].tv_nsec == UTIME_OMIT)
 		tsp[0].tv_sec = VNOVAL;
 	else if (tsp[0].tv_nsec == UTIME_NOW)
 		tsp[0] = tsnow;
 	else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L)
 		return (EINVAL);
 	if (tsp[1].tv_nsec == UTIME_OMIT)
 		tsp[1].tv_sec = VNOVAL;
 	else if (tsp[1].tv_nsec == UTIME_NOW)
 		tsp[1] = tsnow;
 	else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L)
 		return (EINVAL);
 
 	return (0);
 }
 
 /*
  * Common implementation code for utimes(), lutimes(), futimes(), futimens(),
  * and utimensat().
  */
 static int
 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts,
     int numtimes, int nullflag)
 {
 	struct mount *mp;
 	struct vattr vattr;
 	int error, setbirthtime;
 
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	setbirthtime = 0;
 	if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) &&
 	    timespeccmp(&ts[1], &vattr.va_birthtime, < ))
 		setbirthtime = 1;
 	VATTR_NULL(&vattr);
 	vattr.va_atime = ts[0];
 	vattr.va_mtime = ts[1];
 	if (setbirthtime)
 		vattr.va_birthtime = ts[1];
 	if (numtimes > 2)
 		vattr.va_birthtime = ts[2];
 	if (nullflag)
 		vattr.va_vaflags |= VA_UTIMES_NULL;
 #ifdef MAC
 	error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime,
 	    vattr.va_mtime);
 #endif
 	if (error == 0)
 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 	VOP_UNLOCK(vp, 0);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Set the access and modification times of a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct utimes_args {
 	char	*path;
 	struct	timeval *tptr;
 };
 #endif
 int
 sys_utimes(struct thread *td, struct utimes_args *uap)
 {
 
 	return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->tptr, UIO_USERSPACE));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct futimesat_args {
 	int fd;
 	const char * path;
 	const struct timeval * times;
 };
 #endif
 int
 sys_futimesat(struct thread *td, struct futimesat_args *uap)
 {
 
 	return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE,
 	    uap->times, UIO_USERSPACE));
 }
 
 int
 kern_utimesat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg)
 {
 	struct nameidata nd;
 	struct timespec ts[2];
 	int error;
 
 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 		return (error);
 	NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd,
 	    &cap_futimes_rights, td);
 
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Set the access and modification times of a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lutimes_args {
 	char	*path;
 	struct	timeval *tptr;
 };
 #endif
 int
 sys_lutimes(struct thread *td, struct lutimes_args *uap)
 {
 
 	return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr,
 	    UIO_USERSPACE));
 }
 
 int
 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg,
     struct timeval *tptr, enum uio_seg tptrseg)
 {
 	struct timespec ts[2];
 	struct nameidata nd;
 	int error;
 
 	if ((error = getutimes(tptr, tptrseg, ts)) != 0)
 		return (error);
 	NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Set the access and modification times of a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct futimes_args {
 	int	fd;
 	struct	timeval *tptr;
 };
 #endif
 int
 sys_futimes(struct thread *td, struct futimes_args *uap)
 {
 
 	return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE));
 }
 
 int
 kern_futimes(struct thread *td, int fd, struct timeval *tptr,
     enum uio_seg tptrseg)
 {
 	struct timespec ts[2];
 	struct file *fp;
 	int error;
 
 	AUDIT_ARG_FD(fd);
 	error = getutimes(tptr, tptrseg, ts);
 	if (error != 0)
 		return (error);
 	error = getvnode(td, fd, &cap_futimes_rights, &fp);
 	if (error != 0)
 		return (error);
 #ifdef AUDIT
 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 	AUDIT_ARG_VNODE1(fp->f_vnode);
 	VOP_UNLOCK(fp->f_vnode, 0);
 #endif
 	error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL);
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_futimens(struct thread *td, struct futimens_args *uap)
 {
 
 	return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE));
 }
 
 int
 kern_futimens(struct thread *td, int fd, struct timespec *tptr,
     enum uio_seg tptrseg)
 {
 	struct timespec ts[2];
 	struct file *fp;
 	int error, flags;
 
 	AUDIT_ARG_FD(fd);
 	error = getutimens(tptr, tptrseg, ts, &flags);
 	if (error != 0)
 		return (error);
 	if (flags & UTIMENS_EXIT)
 		return (0);
 	error = getvnode(td, fd, &cap_futimes_rights, &fp);
 	if (error != 0)
 		return (error);
 #ifdef AUDIT
 	vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY);
 	AUDIT_ARG_VNODE1(fp->f_vnode);
 	VOP_UNLOCK(fp->f_vnode, 0);
 #endif
 	error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL);
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_utimensat(struct thread *td, struct utimensat_args *uap)
 {
 
 	return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE,
 	    uap->times, UIO_USERSPACE, uap->flag));
 }
 
 int
 kern_utimensat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, struct timespec *tptr, enum uio_seg tptrseg,
     int flag)
 {
 	struct nameidata nd;
 	struct timespec ts[2];
 	int error, flags;
 
 	if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0)
 		return (EINVAL);
 
 	if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0)
 		return (error);
 	NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW :
 	    FOLLOW) |  ((flag & AT_BENEATH) != 0 ? BENEATH : 0) | AUDITVNODE1,
 	    pathseg, path, fd, &cap_futimes_rights, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	/*
 	 * We are allowed to call namei() regardless of 2xUTIME_OMIT.
 	 * POSIX states:
 	 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected."
 	 * "Search permission is denied by a component of the path prefix."
 	 */
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if ((flags & UTIMENS_EXIT) == 0)
 		error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL);
 	vrele(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Truncate a file given its path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct truncate_args {
 	char	*path;
 	int	pad;
 	off_t	length;
 };
 #endif
 int
 sys_truncate(struct thread *td, struct truncate_args *uap)
 {
 
 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 }
 
 int
 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg,
     off_t length)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	void *rl_cookie;
 	struct vattr vattr;
 	struct nameidata nd;
 	int error;
 
 	if (length < 0)
 		return(EINVAL);
 	NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX);
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) {
 		vn_rangelock_unlock(vp, rl_cookie);
 		vrele(vp);
 		return (error);
 	}
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	if (vp->v_type == VDIR)
 		error = EISDIR;
 #ifdef MAC
 	else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) {
 	}
 #endif
 	else if ((error = vn_writechk(vp)) == 0 &&
 	    (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) {
 		VATTR_NULL(&vattr);
 		vattr.va_size = length;
 		error = VOP_SETATTR(vp, &vattr, td->td_ucred);
 	}
 	VOP_UNLOCK(vp, 0);
 	vn_finished_write(mp);
 	vn_rangelock_unlock(vp, rl_cookie);
 	vrele(vp);
 	return (error);
 }
 
 #if defined(COMPAT_43)
 /*
  * Truncate a file given its path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct otruncate_args {
 	char	*path;
 	long	length;
 };
 #endif
 int
 otruncate(struct thread *td, struct otruncate_args *uap)
 {
 
 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 }
 #endif /* COMPAT_43 */
 
 #if defined(COMPAT_FREEBSD6)
 /* Versions with the pad argument */
 int
 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap)
 {
 
 	return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length));
 }
 
 int
 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap)
 {
 
 	return (kern_ftruncate(td, uap->fd, uap->length));
 }
 #endif
 
 int
 kern_fsync(struct thread *td, int fd, bool fullsync)
 {
 	struct vnode *vp;
 	struct mount *mp;
 	struct file *fp;
 	int error, lock_flags;
 
 	AUDIT_ARG_FD(fd);
 	error = getvnode(td, fd, &cap_fsync_rights, &fp);
 	if (error != 0)
 		return (error);
 	vp = fp->f_vnode;
 #if 0
 	if (!fullsync)
 		/* XXXKIB: compete outstanding aio writes */;
 #endif
 	error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 	if (error != 0)
 		goto drop;
 	if (MNT_SHARED_WRITES(mp) ||
 	    ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) {
 		lock_flags = LK_SHARED;
 	} else {
 		lock_flags = LK_EXCLUSIVE;
 	}
 	vn_lock(vp, lock_flags | LK_RETRY);
 	AUDIT_ARG_VNODE1(vp);
 	if (vp->v_object != NULL) {
 		VM_OBJECT_WLOCK(vp->v_object);
 		vm_object_page_clean(vp->v_object, 0, 0, 0);
 		VM_OBJECT_WUNLOCK(vp->v_object);
 	}
 	error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td);
 	VOP_UNLOCK(vp, 0);
 	vn_finished_write(mp);
 drop:
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Sync an open file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fsync_args {
 	int	fd;
 };
 #endif
 int
 sys_fsync(struct thread *td, struct fsync_args *uap)
 {
 
 	return (kern_fsync(td, uap->fd, true));
 }
 
 int
 sys_fdatasync(struct thread *td, struct fdatasync_args *uap)
 {
 
 	return (kern_fsync(td, uap->fd, false));
 }
 
 /*
  * Rename files.  Source and destination must either both be directories, or
  * both not be directories.  If target is a directory, it must be empty.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct rename_args {
 	char	*from;
 	char	*to;
 };
 #endif
 int
 sys_rename(struct thread *td, struct rename_args *uap)
 {
 
 	return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD,
 	    uap->to, UIO_USERSPACE));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct renameat_args {
 	int	oldfd;
 	char	*old;
 	int	newfd;
 	char	*new;
 };
 #endif
 int
 sys_renameat(struct thread *td, struct renameat_args *uap)
 {
 
 	return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new,
 	    UIO_USERSPACE));
 }
 
 int
 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd,
     const char *new, enum uio_seg pathseg)
 {
 	struct mount *mp = NULL;
 	struct vnode *tvp, *fvp, *tdvp;
 	struct nameidata fromnd, tond;
 	int error;
 
 again:
 	bwillwrite();
 #ifdef MAC
 	NDINIT_ATRIGHTS(&fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART |
 	    AUDITVNODE1, pathseg, old, oldfd,
 	    &cap_renameat_source_rights, td);
 #else
 	NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1,
 	    pathseg, old, oldfd,
 	    &cap_renameat_source_rights, td);
 #endif
 
 	if ((error = namei(&fromnd)) != 0)
 		return (error);
 #ifdef MAC
 	error = mac_vnode_check_rename_from(td->td_ucred, fromnd.ni_dvp,
 	    fromnd.ni_vp, &fromnd.ni_cnd);
 	VOP_UNLOCK(fromnd.ni_dvp, 0);
 	if (fromnd.ni_dvp != fromnd.ni_vp)
 		VOP_UNLOCK(fromnd.ni_vp, 0);
 #endif
 	fvp = fromnd.ni_vp;
 	NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE |
 	    SAVESTART | AUDITVNODE2, pathseg, new, newfd,
 	    &cap_renameat_target_rights, td);
 	if (fromnd.ni_vp->v_type == VDIR)
 		tond.ni_cnd.cn_flags |= WILLBEDIR;
 	if ((error = namei(&tond)) != 0) {
 		/* Translate error code for rename("dir1", "dir2/."). */
 		if (error == EISDIR && fvp->v_type == VDIR)
 			error = EINVAL;
 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
 		vrele(fromnd.ni_dvp);
 		vrele(fvp);
 		goto out1;
 	}
 	tdvp = tond.ni_dvp;
 	tvp = tond.ni_vp;
 	error = vn_start_write(fvp, &mp, V_NOWAIT);
 	if (error != 0) {
 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
 		NDFREE(&tond, NDF_ONLY_PNBUF);
 		if (tvp != NULL)
 			vput(tvp);
 		if (tdvp == tvp)
 			vrele(tdvp);
 		else
 			vput(tdvp);
 		vrele(fromnd.ni_dvp);
 		vrele(fvp);
 		vrele(tond.ni_startdir);
 		if (fromnd.ni_startdir != NULL)
 			vrele(fromnd.ni_startdir);
 		error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
 		if (error != 0)
 			return (error);
 		goto again;
 	}
 	if (tvp != NULL) {
 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 			error = ENOTDIR;
 			goto out;
 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 			error = EISDIR;
 			goto out;
 		}
 #ifdef CAPABILITIES
 		if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) {
 			/*
 			 * If the target already exists we require CAP_UNLINKAT
 			 * from 'newfd', when newfd was used for the lookup.
 			 */
 			error = cap_check(&tond.ni_filecaps.fc_rights,
 			    &cap_unlinkat_rights);
 			if (error != 0)
 				goto out;
 		}
 #endif
 	}
 	if (fvp == tdvp) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * If the source is the same as the destination (that is, if they
 	 * are links to the same vnode), then there is nothing to do.
 	 */
 	if (fvp == tvp)
 		error = -1;
 #ifdef MAC
 	else
 		error = mac_vnode_check_rename_to(td->td_ucred, tdvp,
 		    tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd);
 #endif
 out:
 	if (error == 0) {
 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 		    tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
 		NDFREE(&tond, NDF_ONLY_PNBUF);
 	} else {
 		NDFREE(&fromnd, NDF_ONLY_PNBUF);
 		NDFREE(&tond, NDF_ONLY_PNBUF);
 		if (tvp != NULL)
 			vput(tvp);
 		if (tdvp == tvp)
 			vrele(tdvp);
 		else
 			vput(tdvp);
 		vrele(fromnd.ni_dvp);
 		vrele(fvp);
 	}
 	vrele(tond.ni_startdir);
 	vn_finished_write(mp);
 out1:
 	if (fromnd.ni_startdir)
 		vrele(fromnd.ni_startdir);
 	if (error == -1)
 		return (0);
 	return (error);
 }
 
 /*
  * Make a directory file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mkdir_args {
 	char	*path;
 	int	mode;
 };
 #endif
 int
 sys_mkdir(struct thread *td, struct mkdir_args *uap)
 {
 
 	return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE,
 	    uap->mode));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct mkdirat_args {
 	int	fd;
 	char	*path;
 	mode_t	mode;
 };
 #endif
 int
 sys_mkdirat(struct thread *td, struct mkdirat_args *uap)
 {
 
 	return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode));
 }
 
 int
 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg,
     int mode)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	struct vattr vattr;
 	struct nameidata nd;
 	int error;
 
 	AUDIT_ARG_MODE(mode);
 restart:
 	bwillwrite();
 	NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 |
 	    NOCACHE, segflg, path, fd, &cap_mkdirat_rights,
 	    td);
 	nd.ni_cnd.cn_flags |= WILLBEDIR;
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	if (vp != NULL) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		/*
 		 * XXX namei called with LOCKPARENT but not LOCKLEAF has
 		 * the strange behaviour of leaving the vnode unlocked
 		 * if the target is the same vnode as the parent.
 		 */
 		if (vp == nd.ni_dvp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		vrele(vp);
 		return (EEXIST);
 	}
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
 			return (error);
 		goto restart;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VDIR;
 	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
 #ifdef MAC
 	error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
 	    &vattr);
 	if (error != 0)
 		goto out;
 #endif
 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 #ifdef MAC
 out:
 #endif
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(nd.ni_dvp);
 	if (error == 0)
 		vput(nd.ni_vp);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Remove a directory file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct rmdir_args {
 	char	*path;
 };
 #endif
 int
 sys_rmdir(struct thread *td, struct rmdir_args *uap)
 {
 
-	return (kern_rmdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 0));
+	return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE,
+	    0));
 }
 
 int
-kern_rmdirat(struct thread *td, int fd, const char *path, enum uio_seg pathseg,
-    int flag)
+kern_frmdirat(struct thread *td, int dfd, const char *path, int fd,
+    enum uio_seg pathseg, int flag)
 {
 	struct mount *mp;
 	struct vnode *vp;
+	struct file *fp;
 	struct nameidata nd;
+	cap_rights_t rights;
 	int error;
 
+	fp = NULL;
+	if (fd != FD_NONE) {
+		error = getvnode(td, fd, cap_rights_init(&rights, CAP_LOOKUP),
+		    &fp);
+		if (error != 0)
+			return (error);
+	}
+
 restart:
 	bwillwrite();
 	NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 |
 	    ((flag & AT_BENEATH) != 0 ? BENEATH : 0),
-	    pathseg, path, fd, &cap_unlinkat_rights, td);
+	    pathseg, path, dfd, &cap_unlinkat_rights, td);
 	if ((error = namei(&nd)) != 0)
-		return (error);
+		goto fdout;
 	vp = nd.ni_vp;
 	if (vp->v_type != VDIR) {
 		error = ENOTDIR;
 		goto out;
 	}
 	/*
 	 * No rmdir "." please.
 	 */
 	if (nd.ni_dvp == vp) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * The root of a mounted filesystem cannot be deleted.
 	 */
 	if (vp->v_vflag & VV_ROOT) {
 		error = EBUSY;
 		goto out;
 	}
+
+	if (fp != NULL && fp->f_vnode != vp) {
+		if ((fp->f_vnode->v_iflag & VI_DOOMED) != 0)
+			error = EBADF;
+		else
+			error = EDEADLK;
+		goto out;
+	}
+
 #ifdef MAC
 	error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp,
 	    &nd.ni_cnd);
 	if (error != 0)
 		goto out;
 #endif
 	if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
 		NDFREE(&nd, NDF_ONLY_PNBUF);
 		vput(vp);
 		if (nd.ni_dvp == vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0)
-			return (error);
+			goto fdout;
 		goto restart;
 	}
 	vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK);
 	error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 	vn_finished_write(mp);
 out:
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vput(vp);
 	if (nd.ni_dvp == vp)
 		vrele(nd.ni_dvp);
 	else
 		vput(nd.ni_dvp);
+fdout:
+	if (fp != NULL)
+		fdrop(fp, td);
 	return (error);
 }
 
 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11)
 int
 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count,
     long *basep, void (*func)(struct freebsd11_dirent *))
 {
 	struct freebsd11_dirent dstdp;
 	struct dirent *dp, *edp;
 	char *dirbuf;
 	off_t base;
 	ssize_t resid, ucount;
 	int error;
 
 	/* XXX arbitrary sanity limit on `count'. */
 	count = min(count, 64 * 1024);
 
 	dirbuf = malloc(count, M_TEMP, M_WAITOK);
 
 	error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid,
 	    UIO_SYSSPACE);
 	if (error != 0)
 		goto done;
 	if (basep != NULL)
 		*basep = base;
 
 	ucount = 0;
 	for (dp = (struct dirent *)dirbuf,
 	    edp = (struct dirent *)&dirbuf[count - resid];
 	    ucount < count && dp < edp; ) {
 		if (dp->d_reclen == 0)
 			break;
 		MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0));
 		if (dp->d_namlen >= sizeof(dstdp.d_name))
 			continue;
 		dstdp.d_type = dp->d_type;
 		dstdp.d_namlen = dp->d_namlen;
 		dstdp.d_fileno = dp->d_fileno;		/* truncate */
 		if (dstdp.d_fileno != dp->d_fileno) {
 			switch (ino64_trunc_error) {
 			default:
 			case 0:
 				break;
 			case 1:
 				error = EOVERFLOW;
 				goto done;
 			case 2:
 				dstdp.d_fileno = UINT32_MAX;
 				break;
 			}
 		}
 		dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) +
 		    ((dp->d_namlen + 1 + 3) &~ 3);
 		bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen);
 		bzero(dstdp.d_name + dstdp.d_namlen,
 		    dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) -
 		    dstdp.d_namlen);
 		MPASS(dstdp.d_reclen <= dp->d_reclen);
 		MPASS(ucount + dstdp.d_reclen <= count);
 		if (func != NULL)
 			func(&dstdp);
 		error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen);
 		if (error != 0)
 			break;
 		dp = (struct dirent *)((char *)dp + dp->d_reclen);
 		ucount += dstdp.d_reclen;
 	}
 
 done:
 	free(dirbuf, M_TEMP);
 	if (error == 0)
 		td->td_retval[0] = ucount;
 	return (error);
 }
 #endif /* COMPAT */
 
 #ifdef COMPAT_43
 static void
 ogetdirentries_cvt(struct freebsd11_dirent *dp)
 {
 #if (BYTE_ORDER == LITTLE_ENDIAN)
 	/*
 	 * The expected low byte of dp->d_namlen is our dp->d_type.
 	 * The high MBZ byte of dp->d_namlen is our dp->d_namlen.
 	 */
 	dp->d_type = dp->d_namlen;
 	dp->d_namlen = 0;
 #else
 	/*
 	 * The dp->d_type is the high byte of the expected dp->d_namlen,
 	 * so must be zero'ed.
 	 */
 	dp->d_type = 0;
 #endif
 }
 
 /*
  * Read a block of directory entries in a filesystem independent format.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ogetdirentries_args {
 	int	fd;
 	char	*buf;
 	u_int	count;
 	long	*basep;
 };
 #endif
 int
 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap)
 {
 	long loff;
 	int error;
 
 	error = kern_ogetdirentries(td, uap, &loff);
 	if (error == 0)
 		error = copyout(&loff, uap->basep, sizeof(long));
 	return (error);
 }
 
 int
 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
     long *ploff)
 {
 	long base;
 	int error;
 
 	/* XXX arbitrary sanity limit on `count'. */
 	if (uap->count > 64 * 1024)
 		return (EINVAL);
 
 	error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count,
 	    &base, ogetdirentries_cvt);
 
 	if (error == 0 && uap->basep != NULL)
 		error = copyout(&base, uap->basep, sizeof(long));
 
 	return (error);
 }
 #endif /* COMPAT_43 */
 
 #if defined(COMPAT_FREEBSD11)
 #ifndef _SYS_SYSPROTO_H_
 struct freebsd11_getdirentries_args {
 	int	fd;
 	char	*buf;
 	u_int	count;
 	long	*basep;
 };
 #endif
 int
 freebsd11_getdirentries(struct thread *td,
     struct freebsd11_getdirentries_args *uap)
 {
 	long base;
 	int error;
 
 	error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count,
 	    &base, NULL);
 
 	if (error == 0 && uap->basep != NULL)
 		error = copyout(&base, uap->basep, sizeof(long));
 	return (error);
 }
 
 int
 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap)
 {
 	struct freebsd11_getdirentries_args ap;
 
 	ap.fd = uap->fd;
 	ap.buf = uap->buf;
 	ap.count = uap->count;
 	ap.basep = NULL;
 	return (freebsd11_getdirentries(td, &ap));
 }
 #endif /* COMPAT_FREEBSD11 */
 
 /*
  * Read a block of directory entries in a filesystem independent format.
  */
 int
 sys_getdirentries(struct thread *td, struct getdirentries_args *uap)
 {
 	off_t base;
 	int error;
 
 	error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base,
 	    NULL, UIO_USERSPACE);
 	if (error != 0)
 		return (error);
 	if (uap->basep != NULL)
 		error = copyout(&base, uap->basep, sizeof(off_t));
 	return (error);
 }
 
 int
 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count,
     off_t *basep, ssize_t *residp, enum uio_seg bufseg)
 {
 	struct vnode *vp;
 	struct file *fp;
 	struct uio auio;
 	struct iovec aiov;
 	off_t loff;
 	int error, eofflag;
 	off_t foffset;
 
 	AUDIT_ARG_FD(fd);
 	if (count > IOSIZE_MAX)
 		return (EINVAL);
 	auio.uio_resid = count;
 	error = getvnode(td, fd, &cap_read_rights, &fp);
 	if (error != 0)
 		return (error);
 	if ((fp->f_flag & FREAD) == 0) {
 		fdrop(fp, td);
 		return (EBADF);
 	}
 	vp = fp->f_vnode;
 	foffset = foffset_lock(fp, 0);
 unionread:
 	if (vp->v_type != VDIR) {
 		error = EINVAL;
 		goto fail;
 	}
 	aiov.iov_base = buf;
 	aiov.iov_len = count;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_rw = UIO_READ;
 	auio.uio_segflg = bufseg;
 	auio.uio_td = td;
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	AUDIT_ARG_VNODE1(vp);
 	loff = auio.uio_offset = foffset;
 #ifdef MAC
 	error = mac_vnode_check_readdir(td->td_ucred, vp);
 	if (error == 0)
 #endif
 		error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL,
 		    NULL);
 	foffset = auio.uio_offset;
 	if (error != 0) {
 		VOP_UNLOCK(vp, 0);
 		goto fail;
 	}
 	if (count == auio.uio_resid &&
 	    (vp->v_vflag & VV_ROOT) &&
 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
 		struct vnode *tvp = vp;
 
 		vp = vp->v_mount->mnt_vnodecovered;
 		VREF(vp);
 		fp->f_vnode = vp;
 		fp->f_data = vp;
 		foffset = 0;
 		vput(tvp);
 		goto unionread;
 	}
 	VOP_UNLOCK(vp, 0);
 	*basep = loff;
 	if (residp != NULL)
 		*residp = auio.uio_resid;
 	td->td_retval[0] = count - auio.uio_resid;
 fail:
 	foffset_unlock(fp, foffset, 0);
 	fdrop(fp, td);
 	return (error);
 }
 
 /*
  * Set the mode mask for creation of filesystem nodes.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct umask_args {
 	int	newmask;
 };
 #endif
 int
 sys_umask(struct thread *td, struct umask_args *uap)
 {
 	struct filedesc *fdp;
 
 	fdp = td->td_proc->p_fd;
 	FILEDESC_XLOCK(fdp);
 	td->td_retval[0] = fdp->fd_cmask;
 	fdp->fd_cmask = uap->newmask & ALLPERMS;
 	FILEDESC_XUNLOCK(fdp);
 	return (0);
 }
 
 /*
  * Void all references to file by ripping underlying filesystem away from
  * vnode.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct revoke_args {
 	char	*path;
 };
 #endif
 int
 sys_revoke(struct thread *td, struct revoke_args *uap)
 {
 	struct vnode *vp;
 	struct vattr vattr;
 	struct nameidata nd;
 	int error;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE,
 	    uap->path, td);
 	if ((error = namei(&nd)) != 0)
 		return (error);
 	vp = nd.ni_vp;
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (vp->v_type != VCHR || vp->v_rdev == NULL) {
 		error = EINVAL;
 		goto out;
 	}
 #ifdef MAC
 	error = mac_vnode_check_revoke(td->td_ucred, vp);
 	if (error != 0)
 		goto out;
 #endif
 	error = VOP_GETATTR(vp, &vattr, td->td_ucred);
 	if (error != 0)
 		goto out;
 	if (td->td_ucred->cr_uid != vattr.va_uid) {
 		error = priv_check(td, PRIV_VFS_ADMIN);
 		if (error != 0)
 			goto out;
 	}
 	if (vcount(vp) > 1)
 		VOP_REVOKE(vp, REVOKEALL);
 out:
 	vput(vp);
 	return (error);
 }
 
 /*
  * Convert a user file descriptor to a kernel file entry and check that, if it
  * is a capability, the correct rights are present. A reference on the file
  * entry is held upon returning.
  */
 int
 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp)
 {
 	struct file *fp;
 	int error;
 
 	error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp, NULL);
 	if (error != 0)
 		return (error);
 
 	/*
 	 * The file could be not of the vnode type, or it may be not
 	 * yet fully initialized, in which case the f_vnode pointer
 	 * may be set, but f_ops is still badfileops.  E.g.,
 	 * devfs_open() transiently create such situation to
 	 * facilitate csw d_fdopen().
 	 *
 	 * Dupfdopen() handling in kern_openat() installs the
 	 * half-baked file into the process descriptor table, allowing
 	 * other thread to dereference it. Guard against the race by
 	 * checking f_ops.
 	 */
 	if (fp->f_vnode == NULL || fp->f_ops == &badfileops) {
 		fdrop(fp, td);
 		return (EINVAL);
 	}
 	*fpp = fp;
 	return (0);
 }
 
 
 /*
  * Get an (NFS) file handle.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lgetfh_args {
 	char *fname;
 	fhandle_t *fhp;
 };
 #endif
 int
 sys_lgetfh(struct thread *td, struct lgetfh_args *uap)
 {
 
 	return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname,
 	    UIO_USERSPACE, uap->fhp));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct getfh_args {
 	char *fname;
 	fhandle_t *fhp;
 };
 #endif
 int
 sys_getfh(struct thread *td, struct getfh_args *uap)
 {
 
 	return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE,
 	    uap->fhp));
 }
 
 /*
  * syscall for the rpc.lockd to use to translate an open descriptor into
  * a NFS file handle.
  *
  * warning: do not remove the priv_check() call or this becomes one giant
  * security hole.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getfhat_args {
 	int fd;
 	char *path;
 	fhandle_t *fhp;
 	int flags;
 };
 #endif
 int
 sys_getfhat(struct thread *td, struct getfhat_args *uap)
 {
 
 	if ((uap->flags & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0)
 		return (EINVAL);
 	return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE,
 	    uap->fhp));
 }
 
 static int
 kern_getfhat(struct thread *td, int flags, int fd, const char *path,
     enum uio_seg pathseg, fhandle_t *fhp)
 {
 	struct nameidata nd;
 	fhandle_t fh;
 	struct vnode *vp;
 	int error;
 
 	error = priv_check(td, PRIV_VFS_GETFH);
 	if (error != 0)
 		return (error);
 	NDINIT_AT(&nd, LOOKUP, ((flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW :
 	    FOLLOW) | ((flags & AT_BENEATH) != 0 ? BENEATH : 0) | LOCKLEAF |
 	    AUDITVNODE1, pathseg, path, fd, td);
 	error = namei(&nd);
 	if (error != 0)
 		return (error);
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	vp = nd.ni_vp;
 	bzero(&fh, sizeof(fh));
 	fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
 	error = VOP_VPTOFH(vp, &fh.fh_fid);
 	vput(vp);
 	if (error == 0)
 		error = copyout(&fh, fhp, sizeof (fh));
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct fhlink_args {
 	fhandle_t *fhp;
 	const char *to;
 };
 #endif
 int
 sys_fhlink(struct thread *td, struct fhlink_args *uap)
 {
 
 	return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp));
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct fhlinkat_args {
 	fhandle_t *fhp;
 	int tofd;
 	const char *to;
 };
 #endif
 int
 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap)
 {
 
 	return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp));
 }
 
 static int
 kern_fhlinkat(struct thread *td, int fd, const char *path,
     enum uio_seg pathseg, fhandle_t *fhp)
 {
 	fhandle_t fh;
 	struct mount *mp;
 	struct vnode *vp;
 	int error;
 
 	error = priv_check(td, PRIV_VFS_GETFH);
 	if (error != 0)
 		return (error);
 	error = copyin(fhp, &fh, sizeof(fh));
 	if (error != 0)
 		return (error);
 	do {
 		bwillwrite();
 		if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 			return (ESTALE);
 		error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp);
 		vfs_unbusy(mp);
 		if (error != 0)
 			return (error);
 		VOP_UNLOCK(vp, 0);
 	} while ((error = kern_linkat_vp(td, vp, fd, path, pathseg)) == EAGAIN);
 	return (error);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct fhreadlink_args {
 	fhandle_t *fhp;
 	char *buf;
 	size_t bufsize;
 };
 #endif
 int
 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap)
 {
 	fhandle_t fh;
 	struct mount *mp;
 	struct vnode *vp;
 	int error;
 
 	error = priv_check(td, PRIV_VFS_GETFH);
 	if (error != 0)
 		return (error);
 	if (uap->bufsize > IOSIZE_MAX)
 		return (EINVAL);
 	error = copyin(uap->fhp, &fh, sizeof(fh));
 	if (error != 0)
 		return (error);
 	if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 		return (ESTALE);
 	error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp);
 	vfs_unbusy(mp);
 	if (error != 0)
 		return (error);
 	error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td);
 	vput(vp);
 	return (error);
 }
 
 /*
  * syscall for the rpc.lockd to use to translate a NFS file handle into an
  * open descriptor.
  *
  * warning: do not remove the priv_check() call or this becomes one giant
  * security hole.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fhopen_args {
 	const struct fhandle *u_fhp;
 	int flags;
 };
 #endif
 int
 sys_fhopen(struct thread *td, struct fhopen_args *uap)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	struct fhandle fhp;
 	struct file *fp;
 	int fmode, error;
 	int indx;
 
 	error = priv_check(td, PRIV_VFS_FHOPEN);
 	if (error != 0)
 		return (error);
 	indx = -1;
 	fmode = FFLAGS(uap->flags);
 	/* why not allow a non-read/write open for our lockd? */
 	if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT))
 		return (EINVAL);
 	error = copyin(uap->u_fhp, &fhp, sizeof(fhp));
 	if (error != 0)
 		return(error);
 	/* find the mount point */
 	mp = vfs_busyfs(&fhp.fh_fsid);
 	if (mp == NULL)
 		return (ESTALE);
 	/* now give me my vnode, it gets returned to me locked */
 	error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp);
 	vfs_unbusy(mp);
 	if (error != 0)
 		return (error);
 
 	error = falloc_noinstall(td, &fp);
 	if (error != 0) {
 		vput(vp);
 		return (error);
 	}
 	/*
 	 * An extra reference on `fp' has been held for us by
 	 * falloc_noinstall().
 	 */
 
 #ifdef INVARIANTS
 	td->td_dupfd = -1;
 #endif
 	error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp);
 	if (error != 0) {
 		KASSERT(fp->f_ops == &badfileops,
 		    ("VOP_OPEN in fhopen() set f_ops"));
 		KASSERT(td->td_dupfd < 0,
 		    ("fhopen() encountered fdopen()"));
 
 		vput(vp);
 		goto bad;
 	}
 #ifdef INVARIANTS
 	td->td_dupfd = 0;
 #endif
 	fp->f_vnode = vp;
 	fp->f_seqcount = 1;
 	finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp,
 	    &vnops);
 	VOP_UNLOCK(vp, 0);
 	if ((fmode & O_TRUNC) != 0) {
 		error = fo_truncate(fp, 0, td->td_ucred, td);
 		if (error != 0)
 			goto bad;
 	}
 
 	error = finstall(td, fp, &indx, fmode, NULL);
 bad:
 	fdrop(fp, td);
 	td->td_retval[0] = indx;
 	return (error);
 }
 
 /*
  * Stat an (NFS) file handle.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fhstat_args {
 	struct fhandle *u_fhp;
 	struct stat *sb;
 };
 #endif
 int
 sys_fhstat(struct thread *td, struct fhstat_args *uap)
 {
 	struct stat sb;
 	struct fhandle fh;
 	int error;
 
 	error = copyin(uap->u_fhp, &fh, sizeof(fh));
 	if (error != 0)
 		return (error);
 	error = kern_fhstat(td, fh, &sb);
 	if (error == 0)
 		error = copyout(&sb, uap->sb, sizeof(sb));
 	return (error);
 }
 
 int
 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb)
 {
 	struct mount *mp;
 	struct vnode *vp;
 	int error;
 
 	error = priv_check(td, PRIV_VFS_FHSTAT);
 	if (error != 0)
 		return (error);
 	if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 		return (ESTALE);
 	error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 	vfs_unbusy(mp);
 	if (error != 0)
 		return (error);
 	error = vn_stat(vp, sb, td->td_ucred, NOCRED, td);
 	vput(vp);
 	return (error);
 }
 
 /*
  * Implement fstatfs() for (NFS) file handles.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fhstatfs_args {
 	struct fhandle *u_fhp;
 	struct statfs *buf;
 };
 #endif
 int
 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap)
 {
 	struct statfs *sfp;
 	fhandle_t fh;
 	int error;
 
 	error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t));
 	if (error != 0)
 		return (error);
 	sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK);
 	error = kern_fhstatfs(td, fh, sfp);
 	if (error == 0)
 		error = copyout(sfp, uap->buf, sizeof(*sfp));
 	free(sfp, M_STATFS);
 	return (error);
 }
 
 int
 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf)
 {
 	struct statfs *sp;
 	struct mount *mp;
 	struct vnode *vp;
 	int error;
 
 	error = priv_check(td, PRIV_VFS_FHSTATFS);
 	if (error != 0)
 		return (error);
 	if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL)
 		return (ESTALE);
 	error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp);
 	if (error != 0) {
 		vfs_unbusy(mp);
 		return (error);
 	}
 	vput(vp);
 	error = prison_canseemount(td->td_ucred, mp);
 	if (error != 0)
 		goto out;
 #ifdef MAC
 	error = mac_mount_check_stat(td->td_ucred, mp);
 	if (error != 0)
 		goto out;
 #endif
 	/*
 	 * Set these in case the underlying filesystem fails to do so.
 	 */
 	sp = &mp->mnt_stat;
 	sp->f_version = STATFS_VERSION;
 	sp->f_namemax = NAME_MAX;
 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 	error = VFS_STATFS(mp, sp);
 	if (error == 0)
 		*buf = *sp;
 out:
 	vfs_unbusy(mp);
 	return (error);
 }
 
 int
 kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
 {
 	struct file *fp;
 	struct mount *mp;
 	struct vnode *vp;
 	off_t olen, ooffset;
 	int error;
 #ifdef AUDIT
 	int audited_vnode1 = 0;
 #endif
 
 	AUDIT_ARG_FD(fd);
 	if (offset < 0 || len <= 0)
 		return (EINVAL);
 	/* Check for wrap. */
 	if (offset > OFF_MAX - len)
 		return (EFBIG);
 	AUDIT_ARG_FD(fd);
 	error = fget(td, fd, &cap_pwrite_rights, &fp);
 	if (error != 0)
 		return (error);
 	AUDIT_ARG_FILE(td->td_proc, fp);
 	if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
 		error = ESPIPE;
 		goto out;
 	}
 	if ((fp->f_flag & FWRITE) == 0) {
 		error = EBADF;
 		goto out;
 	}
 	if (fp->f_type != DTYPE_VNODE) {
 		error = ENODEV;
 		goto out;
 	}
 	vp = fp->f_vnode;
 	if (vp->v_type != VREG) {
 		error = ENODEV;
 		goto out;
 	}
 
 	/* Allocating blocks may take a long time, so iterate. */
 	for (;;) {
 		olen = len;
 		ooffset = offset;
 
 		bwillwrite();
 		mp = NULL;
 		error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
 		if (error != 0)
 			break;
 		error = vn_lock(vp, LK_EXCLUSIVE);
 		if (error != 0) {
 			vn_finished_write(mp);
 			break;
 		}
 #ifdef AUDIT
 		if (!audited_vnode1) {
 			AUDIT_ARG_VNODE1(vp);
 			audited_vnode1 = 1;
 		}
 #endif
 #ifdef MAC
 		error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
 		if (error == 0)
 #endif
 			error = VOP_ALLOCATE(vp, &offset, &len);
 		VOP_UNLOCK(vp, 0);
 		vn_finished_write(mp);
 
 		if (olen + ooffset != offset + len) {
 			panic("offset + len changed from %jx/%jx to %jx/%jx",
 			    ooffset, olen, offset, len);
 		}
 		if (error != 0 || len == 0)
 			break;
 		KASSERT(olen > len, ("Iteration did not make progress?"));
 		maybe_yield();
 	}
  out:
 	fdrop(fp, td);
 	return (error);
 }
 
 int
 sys_posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
 {
 	int error;
 
 	error = kern_posix_fallocate(td, uap->fd, uap->offset, uap->len);
 	return (kern_posix_error(td, error));
 }
 
 /*
  * Unlike madvise(2), we do not make a best effort to remember every
  * possible caching hint.  Instead, we remember the last setting with
  * the exception that we will allow POSIX_FADV_NORMAL to adjust the
  * region of any current setting.
  */
 int
 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
     int advice)
 {
 	struct fadvise_info *fa, *new;
 	struct file *fp;
 	struct vnode *vp;
 	off_t end;
 	int error;
 
 	if (offset < 0 || len < 0 || offset > OFF_MAX - len)
 		return (EINVAL);
 	AUDIT_ARG_VALUE(advice);
 	switch (advice) {
 	case POSIX_FADV_SEQUENTIAL:
 	case POSIX_FADV_RANDOM:
 	case POSIX_FADV_NOREUSE:
 		new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK);
 		break;
 	case POSIX_FADV_NORMAL:
 	case POSIX_FADV_WILLNEED:
 	case POSIX_FADV_DONTNEED:
 		new = NULL;
 		break;
 	default:
 		return (EINVAL);
 	}
 	/* XXX: CAP_POSIX_FADVISE? */
 	AUDIT_ARG_FD(fd);
 	error = fget(td, fd, &cap_no_rights, &fp);
 	if (error != 0)
 		goto out;
 	AUDIT_ARG_FILE(td->td_proc, fp);
 	if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) {
 		error = ESPIPE;
 		goto out;
 	}
 	if (fp->f_type != DTYPE_VNODE) {
 		error = ENODEV;
 		goto out;
 	}
 	vp = fp->f_vnode;
 	if (vp->v_type != VREG) {
 		error = ENODEV;
 		goto out;
 	}
 	if (len == 0)
 		end = OFF_MAX;
 	else
 		end = offset + len - 1;
 	switch (advice) {
 	case POSIX_FADV_SEQUENTIAL:
 	case POSIX_FADV_RANDOM:
 	case POSIX_FADV_NOREUSE:
 		/*
 		 * Try to merge any existing non-standard region with
 		 * this new region if possible, otherwise create a new
 		 * non-standard region for this request.
 		 */
 		mtx_pool_lock(mtxpool_sleep, fp);
 		fa = fp->f_advice;
 		if (fa != NULL && fa->fa_advice == advice &&
 		    ((fa->fa_start <= end && fa->fa_end >= offset) ||
 		    (end != OFF_MAX && fa->fa_start == end + 1) ||
 		    (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) {
 			if (offset < fa->fa_start)
 				fa->fa_start = offset;
 			if (end > fa->fa_end)
 				fa->fa_end = end;
 		} else {
 			new->fa_advice = advice;
 			new->fa_start = offset;
 			new->fa_end = end;
 			fp->f_advice = new;
 			new = fa;
 		}
 		mtx_pool_unlock(mtxpool_sleep, fp);
 		break;
 	case POSIX_FADV_NORMAL:
 		/*
 		 * If a the "normal" region overlaps with an existing
 		 * non-standard region, trim or remove the
 		 * non-standard region.
 		 */
 		mtx_pool_lock(mtxpool_sleep, fp);
 		fa = fp->f_advice;
 		if (fa != NULL) {
 			if (offset <= fa->fa_start && end >= fa->fa_end) {
 				new = fa;
 				fp->f_advice = NULL;
 			} else if (offset <= fa->fa_start &&
 			    end >= fa->fa_start)
 				fa->fa_start = end + 1;
 			else if (offset <= fa->fa_end && end >= fa->fa_end)
 				fa->fa_end = offset - 1;
 			else if (offset >= fa->fa_start && end <= fa->fa_end) {
 				/*
 				 * If the "normal" region is a middle
 				 * portion of the existing
 				 * non-standard region, just remove
 				 * the whole thing rather than picking
 				 * one side or the other to
 				 * preserve.
 				 */
 				new = fa;
 				fp->f_advice = NULL;
 			}
 		}
 		mtx_pool_unlock(mtxpool_sleep, fp);
 		break;
 	case POSIX_FADV_WILLNEED:
 	case POSIX_FADV_DONTNEED:
 		error = VOP_ADVISE(vp, offset, end, advice);
 		break;
 	}
 out:
 	if (fp != NULL)
 		fdrop(fp, td);
 	free(new, M_FADVISE);
 	return (error);
 }
 
 int
 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap)
 {
 	int error;
 
 	error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len,
 	    uap->advice);
 	return (kern_posix_error(td, error));
 }
Index: head/sys/sys/fcntl.h
===================================================================
--- head/sys/sys/fcntl.h	(revision 345981)
+++ head/sys/sys/fcntl.h	(revision 345982)
@@ -1,337 +1,347 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 1983, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)fcntl.h	8.3 (Berkeley) 1/21/94
  * $FreeBSD$
  */
 
 #ifndef _SYS_FCNTL_H_
 #define	_SYS_FCNTL_H_
 
 /*
  * This file includes the definitions for open and fcntl
  * described by POSIX for <fcntl.h>; it also includes
  * related kernel definitions.
  */
 
 #include <sys/cdefs.h>
 #include <sys/_types.h>
 
 #ifndef _MODE_T_DECLARED
 typedef	__mode_t	mode_t;
 #define	_MODE_T_DECLARED
 #endif
 
 #ifndef _OFF_T_DECLARED
 typedef	__off_t		off_t;
 #define	_OFF_T_DECLARED
 #endif
 
 #ifndef _PID_T_DECLARED
 typedef	__pid_t		pid_t;
 #define	_PID_T_DECLARED
 #endif
 
 /*
  * File status flags: these are used by open(2), fcntl(2).
  * They are also used (indirectly) in the kernel file structure f_flags,
  * which is a superset of the open/fcntl flags.  Open flags and f_flags
  * are inter-convertible using OFLAGS(fflags) and FFLAGS(oflags).
  * Open/fcntl flags begin with O_; kernel-internal flags begin with F.
  */
 /* open-only flags */
 #define	O_RDONLY	0x0000		/* open for reading only */
 #define	O_WRONLY	0x0001		/* open for writing only */
 #define	O_RDWR		0x0002		/* open for reading and writing */
 #define	O_ACCMODE	0x0003		/* mask for above modes */
 
 /*
  * Kernel encoding of open mode; separate read and write bits that are
  * independently testable: 1 greater than the above.
  *
  * XXX
  * FREAD and FWRITE are excluded from the #ifdef _KERNEL so that TIOCFLUSH,
  * which was documented to use FREAD/FWRITE, continues to work.
  */
 #if __BSD_VISIBLE
 #define	FREAD		0x0001
 #define	FWRITE		0x0002
 #endif
 #define	O_NONBLOCK	0x0004		/* no delay */
 #define	O_APPEND	0x0008		/* set append mode */
 #if __BSD_VISIBLE
 #define	O_SHLOCK	0x0010		/* open with shared file lock */
 #define	O_EXLOCK	0x0020		/* open with exclusive file lock */
 #define	O_ASYNC		0x0040		/* signal pgrp when data ready */
 #define	O_FSYNC		0x0080		/* synchronous writes */
 #endif
 #define	O_SYNC		0x0080		/* POSIX synonym for O_FSYNC */
 #if __POSIX_VISIBLE >= 200809
 #define	O_NOFOLLOW	0x0100		/* don't follow symlinks */
 #endif
 #define	O_CREAT		0x0200		/* create if nonexistent */
 #define	O_TRUNC		0x0400		/* truncate to zero length */
 #define	O_EXCL		0x0800		/* error if already exists */
 #ifdef _KERNEL
 #define	FHASLOCK	0x4000		/* descriptor holds advisory lock */
 #endif
 
 /* Defined by POSIX 1003.1; BSD default, but must be distinct from O_RDONLY. */
 #define	O_NOCTTY	0x8000		/* don't assign controlling terminal */
 
 #if __BSD_VISIBLE
 /* Attempt to bypass buffer cache */
 #define	O_DIRECT	0x00010000
 #endif
 
 #if __POSIX_VISIBLE >= 200809
 #define	O_DIRECTORY	0x00020000	/* Fail if not directory */
 #define	O_EXEC		0x00040000	/* Open for execute only */
 #endif
 #ifdef	_KERNEL
 #define	FEXEC		O_EXEC
 #endif
 
 #if __POSIX_VISIBLE >= 200809
 /* Defined by POSIX 1003.1-2008; BSD default, but reserve for future use. */
 #define	O_TTY_INIT	0x00080000	/* Restore default termios attributes */
 
 #define	O_CLOEXEC	0x00100000
 #endif
 
 #if __BSD_VISIBLE
 #define	O_VERIFY	0x00200000	/* open only after verification */
 #define	O_BENEATH	0x00400000	/* Fail if not under cwd */
 #endif
 
 /*
  * XXX missing O_DSYNC, O_RSYNC.
  */
 
 #ifdef _KERNEL
 
 /* Only for devfs d_close() flags. */
 #define	FLASTCLOSE	O_DIRECTORY
 #define	FREVOKE		O_VERIFY
 /* Only for fo_close() from half-succeeded open */
 #define	FOPENFAILED	O_TTY_INIT
 
 /* convert from open() flags to/from fflags; convert O_RD/WR to FREAD/FWRITE */
 #define	FFLAGS(oflags)	((oflags) & O_EXEC ? (oflags) : (oflags) + 1)
 #define	OFLAGS(fflags)	((fflags) & O_EXEC ? (fflags) : (fflags) - 1)
 
 /* bits to save after open */
 #define	FMASK	(FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FNONBLOCK|O_DIRECT|FEXEC)
 /* bits settable by fcntl(F_SETFL, ...) */
 #define	FCNTLFLAGS	(FAPPEND|FASYNC|FFSYNC|FNONBLOCK|FRDAHEAD|O_DIRECT)
 
 #if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \
     defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4)
 /*
  * Set by shm_open(3) in older libc's to get automatic MAP_ASYNC
  * behavior for POSIX shared memory objects (which are otherwise
  * implemented as plain files).
  */
 #define	FPOSIXSHM	O_NOFOLLOW
 #undef FCNTLFLAGS
 #define	FCNTLFLAGS	(FAPPEND|FASYNC|FFSYNC|FNONBLOCK|FPOSIXSHM|FRDAHEAD| \
 			 O_DIRECT)
 #endif
 #endif
 
 /*
  * The O_* flags used to have only F* names, which were used in the kernel
  * and by fcntl.  We retain the F* names for the kernel f_flag field
  * and for backward compatibility for fcntl.  These flags are deprecated.
  */
 #if __BSD_VISIBLE
 #define	FAPPEND		O_APPEND	/* kernel/compat */
 #define	FASYNC		O_ASYNC		/* kernel/compat */
 #define	FFSYNC		O_FSYNC		/* kernel */
 #define	FNONBLOCK	O_NONBLOCK	/* kernel */
 #define	FNDELAY		O_NONBLOCK	/* compat */
 #define	O_NDELAY	O_NONBLOCK	/* compat */
 #endif
 
 /*
  * We are out of bits in f_flag (which is a short).  However,
  * the flag bits not set in FMASK are only meaningful in the
  * initial open syscall.  Those bits can thus be given a
  * different meaning for fcntl(2).
  */
 #if __BSD_VISIBLE
 /* Read ahead */
 #define	FRDAHEAD	O_CREAT
 #endif
 
 #if __POSIX_VISIBLE >= 200809
 /*
  * Magic value that specify the use of the current working directory
  * to determine the target of relative file paths in the openat() and
  * similar syscalls.
  */
 #define	AT_FDCWD		-100
 
 /*
  * Miscellaneous flags for the *at() syscalls.
  */
 #define	AT_EACCESS		0x0100	/* Check access using effective user
 					   and group ID */
 #define	AT_SYMLINK_NOFOLLOW	0x0200	/* Do not follow symbolic links */
 #define	AT_SYMLINK_FOLLOW	0x0400	/* Follow symbolic link */
 #define	AT_REMOVEDIR		0x0800	/* Remove directory instead of file */
 #define	AT_BENEATH		0x1000	/* Fail if not under dirfd */
 #endif
 
 /*
  * Constants used for fcntl(2)
  */
 
 /* command values */
 #define	F_DUPFD		0		/* duplicate file descriptor */
 #define	F_GETFD		1		/* get file descriptor flags */
 #define	F_SETFD		2		/* set file descriptor flags */
 #define	F_GETFL		3		/* get file status flags */
 #define	F_SETFL		4		/* set file status flags */
 #if __XSI_VISIBLE || __POSIX_VISIBLE >= 200112
 #define	F_GETOWN	5		/* get SIGIO/SIGURG proc/pgrp */
 #define	F_SETOWN	6		/* set SIGIO/SIGURG proc/pgrp */
 #endif
 #if __BSD_VISIBLE
 #define	F_OGETLK	7		/* get record locking information */
 #define	F_OSETLK	8		/* set record locking information */
 #define	F_OSETLKW	9		/* F_SETLK; wait if blocked */
 #define	F_DUP2FD	10		/* duplicate file descriptor to arg */
 #endif
 #define	F_GETLK		11		/* get record locking information */
 #define	F_SETLK		12		/* set record locking information */
 #define	F_SETLKW	13		/* F_SETLK; wait if blocked */
 #if __BSD_VISIBLE
 #define	F_SETLK_REMOTE	14		/* debugging support for remote locks */
 #define	F_READAHEAD	15		/* read ahead */
 #define	F_RDAHEAD	16		/* Darwin compatible read ahead */
 #endif
 #if __POSIX_VISIBLE >= 200809
 #define	F_DUPFD_CLOEXEC	17		/* Like F_DUPFD, but FD_CLOEXEC is set */
 #endif
 #if __BSD_VISIBLE
 #define	F_DUP2FD_CLOEXEC 18		/* Like F_DUP2FD, but FD_CLOEXEC is set */
 #endif
 
 /* file descriptor flags (F_GETFD, F_SETFD) */
 #define	FD_CLOEXEC	1		/* close-on-exec flag */
 
 /* record locking flags (F_GETLK, F_SETLK, F_SETLKW) */
 #define	F_RDLCK		1		/* shared or read lock */
 #define	F_UNLCK		2		/* unlock */
 #define	F_WRLCK		3		/* exclusive or write lock */
 #if __BSD_VISIBLE
 #define	F_UNLCKSYS	4		/* purge locks for a given system ID */ 
 #define	F_CANCEL	5		/* cancel an async lock request */
 #endif
 #ifdef _KERNEL
 #define	F_WAIT		0x010		/* Wait until lock is granted */
 #define	F_FLOCK		0x020	 	/* Use flock(2) semantics for lock */
 #define	F_POSIX		0x040	 	/* Use POSIX semantics for lock */
 #define	F_REMOTE	0x080		/* Lock owner is remote NFS client */
 #define	F_NOINTR	0x100		/* Ignore signals when waiting */
 #endif
 
 /*
  * Advisory file segment locking data type -
  * information passed to system by user
  */
 struct flock {
 	off_t	l_start;	/* starting offset */
 	off_t	l_len;		/* len = 0 means until end of file */
 	pid_t	l_pid;		/* lock owner */
 	short	l_type;		/* lock type: read/write, etc. */
 	short	l_whence;	/* type of l_start */
 	int	l_sysid;	/* remote system id or zero for local */
 };
 
 #if __BSD_VISIBLE
 /*
  * Old advisory file segment locking data type,
  * before adding l_sysid.
  */
 struct __oflock {
 	off_t	l_start;	/* starting offset */
 	off_t	l_len;		/* len = 0 means until end of file */
 	pid_t	l_pid;		/* lock owner */
 	short	l_type;		/* lock type: read/write, etc. */
 	short	l_whence;	/* type of l_start */
 };
 #endif
 
 #if __BSD_VISIBLE
 /* lock operations for flock(2) */
 #define	LOCK_SH		0x01		/* shared file lock */
 #define	LOCK_EX		0x02		/* exclusive file lock */
 #define	LOCK_NB		0x04		/* don't block when locking */
 #define	LOCK_UN		0x08		/* unlock file */
 #endif
 
 #if __POSIX_VISIBLE >= 200112
 /*
  * Advice to posix_fadvise
  */
 #define	POSIX_FADV_NORMAL	0	/* no special treatment */
 #define	POSIX_FADV_RANDOM	1	/* expect random page references */
 #define	POSIX_FADV_SEQUENTIAL	2	/* expect sequential page references */
 #define	POSIX_FADV_WILLNEED	3	/* will need these pages */
 #define	POSIX_FADV_DONTNEED	4	/* dont need these pages */
 #define	POSIX_FADV_NOREUSE	5	/* access data only once */
 #endif
 
+
+#ifdef __BSD_VISIBLE
+/*
+ * Magic value that specify that corresponding file descriptor to filename
+ * is unknown and sanitary check should be omitted in the funlinkat() and
+ * similar syscalls.
+ */
+#define	FD_NONE			-200
+#endif
+
 #ifndef _KERNEL
 __BEGIN_DECLS
 int	open(const char *, int, ...);
 int	creat(const char *, mode_t);
 int	fcntl(int, int, ...);
 #if __BSD_VISIBLE
 int	flock(int, int);
 #endif
 #if __POSIX_VISIBLE >= 200809
 int	openat(int, const char *, int, ...);
 #endif
 #if __POSIX_VISIBLE >= 200112
 int	posix_fadvise(int, off_t, off_t, int);
 int	posix_fallocate(int, off_t, off_t);
 #endif
 __END_DECLS
 #endif
 
 #endif /* !_SYS_FCNTL_H_ */
Index: head/sys/sys/syscallsubr.h
===================================================================
--- head/sys/sys/syscallsubr.h	(revision 345981)
+++ head/sys/sys/syscallsubr.h	(revision 345982)
@@ -1,312 +1,312 @@
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
  *
  * Copyright (c) 2002 Ian Dowse.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #ifndef _SYS_SYSCALLSUBR_H_
 #define _SYS_SYSCALLSUBR_H_
 
 #include <sys/signal.h>
 #include <sys/socket.h>
 #include <sys/mac.h>
 #include <sys/mount.h>
 #include <sys/_cpuset.h>
 #include <sys/_domainset.h>
 #include <sys/_uio.h>
 
 struct __wrusage;
 struct file;
 struct filecaps;
 enum idtype;
 struct itimerval;
 struct image_args;
 struct jail;
 struct kevent;
 struct kevent_copyops;
 struct kld_file_stat;
 struct ksiginfo;
 struct mbuf;
 struct msghdr;
 struct msqid_ds;
 struct pollfd;
 struct ogetdirentries_args;
 struct rlimit;
 struct rusage;
 struct sched_param;
 union semun;
 struct sockaddr;
 struct stat;
 struct thr_param;
 struct uio;
 
 int	kern___getcwd(struct thread *td, char *buf, enum uio_seg bufseg,
 	    size_t buflen, size_t path_max);
 int	kern_accept(struct thread *td, int s, struct sockaddr **name,
 	    socklen_t *namelen, struct file **fp);
 int	kern_accept4(struct thread *td, int s, struct sockaddr **name,
 	    socklen_t *namelen, int flags, struct file **fp);
 int	kern_accessat(struct thread *td, int fd, const char *path,
 	    enum uio_seg pathseg, int flags, int mode);
 int	kern_adjtime(struct thread *td, struct timeval *delta,
 	    struct timeval *olddelta);
 int	kern_alternate_path(struct thread *td, const char *prefix, const char *path,
 	    enum uio_seg pathseg, char **pathbuf, int create, int dirfd);
 int	kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa);
 int	kern_break(struct thread *td, uintptr_t *addr);
 int	kern_cap_ioctls_limit(struct thread *td, int fd, u_long *cmds,
 	    size_t ncmds);
 int	kern_cap_rights_limit(struct thread *td, int fd, cap_rights_t *rights);
 int	kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg);
 int	kern_clock_getcpuclockid2(struct thread *td, id_t id, int which,
 	    clockid_t *clk_id);
 int	kern_clock_getres(struct thread *td, clockid_t clock_id,
 	    struct timespec *ts);
 int	kern_clock_gettime(struct thread *td, clockid_t clock_id,
 	    struct timespec *ats);
 int	kern_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags,
 	    const struct timespec *rqtp, struct timespec *rmtp);
 int	kern_clock_settime(struct thread *td, clockid_t clock_id,
 	    struct timespec *ats);
 int	kern_close(struct thread *td, int fd);
 int	kern_connectat(struct thread *td, int dirfd, int fd,
 	    struct sockaddr *sa);
 int	kern_cpuset_getaffinity(struct thread *td, cpulevel_t level,
 	    cpuwhich_t which, id_t id, size_t cpusetsize, cpuset_t *maskp);
 int	kern_cpuset_setaffinity(struct thread *td, cpulevel_t level,
 	    cpuwhich_t which, id_t id, size_t cpusetsize,
 	    const cpuset_t *maskp);
 int	kern_cpuset_getdomain(struct thread *td, cpulevel_t level,
 	    cpuwhich_t which, id_t id, size_t domainsetsize,
 	    domainset_t *maskp, int *policyp);
 int	kern_cpuset_setdomain(struct thread *td, cpulevel_t level,
 	    cpuwhich_t which, id_t id, size_t domainsetsize,
 	    const domainset_t *maskp, int policy);
 int	kern_cpuset_getid(struct thread *td, cpulevel_t level,
 	    cpuwhich_t which, id_t id, cpusetid_t *setid);
 int	kern_cpuset_setid(struct thread *td, cpuwhich_t which,
 	    id_t id, cpusetid_t setid);
 int	kern_dup(struct thread *td, u_int mode, int flags, int old, int new);
 int	kern_execve(struct thread *td, struct image_args *args,
 	    struct mac *mac_p);
 int	kern_fchmodat(struct thread *td, int fd, const char *path,
 	    enum uio_seg pathseg, mode_t mode, int flag);
 int	kern_fchownat(struct thread *td, int fd, const char *path,
 	    enum uio_seg pathseg, int uid, int gid, int flag);
 int	kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg);
 int	kern_fcntl_freebsd(struct thread *td, int fd, int cmd, long arg);
 int	kern_fhstat(struct thread *td, fhandle_t fh, struct stat *buf);
 int	kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf);
 int	kern_fpathconf(struct thread *td, int fd, int name, long *valuep);
 int	kern_fstat(struct thread *td, int fd, struct stat *sbp);
 int	kern_fstatfs(struct thread *td, int fd, struct statfs *buf);
 int	kern_fsync(struct thread *td, int fd, bool fullsync);
 int	kern_ftruncate(struct thread *td, int fd, off_t length);
 int	kern_futimes(struct thread *td, int fd, struct timeval *tptr,
 	    enum uio_seg tptrseg);
 int	kern_futimens(struct thread *td, int fd, struct timespec *tptr,
 	    enum uio_seg tptrseg);
 int	kern_getdirentries(struct thread *td, int fd, char *buf, size_t count,
 	    off_t *basep, ssize_t *residp, enum uio_seg bufseg);
 int	kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize,
 	    size_t *countp, enum uio_seg bufseg, int mode);
 int	kern_getitimer(struct thread *, u_int, struct itimerval *);
 int	kern_getppid(struct thread *);
 int	kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
 	    socklen_t *alen);
 int	kern_getrusage(struct thread *td, int who, struct rusage *rup);
 int	kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
 	    socklen_t *alen);
 int	kern_getsockopt(struct thread *td, int s, int level, int name,
 	    void *optval, enum uio_seg valseg, socklen_t *valsize);
 int	kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data);
 int	kern_jail(struct thread *td, struct jail *j);
 int	kern_jail_get(struct thread *td, struct uio *options, int flags);
 int	kern_jail_set(struct thread *td, struct uio *options, int flags);
 int	kern_kevent(struct thread *td, int fd, int nchanges, int nevents,
 	    struct kevent_copyops *k_ops, const struct timespec *timeout);
 int	kern_kevent_anonymous(struct thread *td, int nevents,
 	    struct kevent_copyops *k_ops);
 int	kern_kevent_fp(struct thread *td, struct file *fp, int nchanges,
 	    int nevents, struct kevent_copyops *k_ops,
 	    const struct timespec *timeout);
 int	kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps);
 int	kern_kldload(struct thread *td, const char *file, int *fileid);
 int	kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat);
 int	kern_kldunload(struct thread *td, int fileid, int flags);
 int	kern_linkat(struct thread *td, int fd1, int fd2, const char *path1,
 	    const char *path2, enum uio_seg segflg, int follow);
 int	kern_listen(struct thread *td, int s, int backlog);
 int	kern_lseek(struct thread *td, int fd, off_t offset, int whence);
 int	kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg,
 	    struct timeval *tptr, enum uio_seg tptrseg);
 int	kern_madvise(struct thread *td, uintptr_t addr, size_t len, int behav);
 int	kern_mincore(struct thread *td, uintptr_t addr, size_t len, char *vec);
 int	kern_mkdirat(struct thread *td, int fd, const char *path,
 	    enum uio_seg segflg, int mode);
 int	kern_mkfifoat(struct thread *td, int fd, const char *path,
 	    enum uio_seg pathseg, int mode);
 int	kern_mknodat(struct thread *td, int fd, const char *path,
 	    enum uio_seg pathseg, int mode, dev_t dev);
 int	kern_mlock(struct proc *proc, struct ucred *cred, uintptr_t addr,
 	    size_t len);
 int	kern_mmap(struct thread *td, uintptr_t addr, size_t size, int prot,
 	    int flags, int fd, off_t pos);
 int	kern_mprotect(struct thread *td, uintptr_t addr, size_t size, int prot);
 int	kern_msgctl(struct thread *, int, int, struct msqid_ds *);
 int	kern_msgrcv(struct thread *, int, void *, size_t, long, int, long *);
 int	kern_msgsnd(struct thread *, int, const void *, size_t, int, long);
 int	kern_msync(struct thread *td, uintptr_t addr, size_t size, int flags);
 int	kern_munlock(struct thread *td, uintptr_t addr, size_t size);
 int	kern_munmap(struct thread *td, uintptr_t addr, size_t size);
 int     kern_nanosleep(struct thread *td, struct timespec *rqt,
 	    struct timespec *rmt);
 int	kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap,
 	    long *ploff);
 int	kern_openat(struct thread *td, int fd, const char *path,
 	    enum uio_seg pathseg, int flags, int mode);
 int	kern_pathconf(struct thread *td, const char *path,
 	    enum uio_seg pathseg, int name, u_long flags, long *valuep);
 int	kern_pipe(struct thread *td, int fildes[2], int flags,
 	    struct filecaps *fcaps1, struct filecaps *fcaps2);
 int	kern_poll(struct thread *td, struct pollfd *fds, u_int nfds,
 	    struct timespec *tsp, sigset_t *uset);
 int	kern_posix_error(struct thread *td, int error);
 int	kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
 	    int advice);
 int	kern_posix_fallocate(struct thread *td, int fd, off_t offset,
 	    off_t len);
 int	kern_procctl(struct thread *td, enum idtype idtype, id_t id, int com,
 	    void *data);
 int	kern_pread(struct thread *td, int fd, void *buf, size_t nbyte,
 	    off_t offset);
 int	kern_preadv(struct thread *td, int fd, struct uio *auio, off_t offset);
 int	kern_pselect(struct thread *td, int nd, fd_set *in, fd_set *ou,
 	    fd_set *ex, struct timeval *tvp, sigset_t *uset, int abi_nfdbits);
 int	kern_ptrace(struct thread *td, int req, pid_t pid, void *addr,
 	    int data);
 int	kern_pwrite(struct thread *td, int fd, const void *buf, size_t nbyte,
 	    off_t offset);
 int	kern_pwritev(struct thread *td, int fd, struct uio *auio, off_t offset);
 int	kern_readlinkat(struct thread *td, int fd, const char *path,
 	    enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count);
 int	kern_readv(struct thread *td, int fd, struct uio *auio);
 int	kern_recvit(struct thread *td, int s, struct msghdr *mp,
 	    enum uio_seg fromseg, struct mbuf **controlp);
 int	kern_renameat(struct thread *td, int oldfd, const char *old, int newfd,
 	    const char *new, enum uio_seg pathseg);
-int	kern_rmdirat(struct thread *td, int fd, const char *path,
+int	kern_frmdirat(struct thread *td, int dfd, const char *path, int fd,
 	    enum uio_seg pathseg, int flag);
 int	kern_sched_getparam(struct thread *td, struct thread *targettd,
 	    struct sched_param *param);
 int	kern_sched_getscheduler(struct thread *td, struct thread *targettd,
 	    int *policy);
 int	kern_sched_setparam(struct thread *td, struct thread *targettd,
 	    struct sched_param *param);
 int	kern_sched_setscheduler(struct thread *td, struct thread *targettd,
 	    int policy, struct sched_param *param);
 int	kern_sched_rr_get_interval(struct thread *td, pid_t pid,
 	    struct timespec *ts);
 int	kern_sched_rr_get_interval_td(struct thread *td, struct thread *targettd,
 	    struct timespec *ts);
 int	kern_semctl(struct thread *td, int semid, int semnum, int cmd,
 	    union semun *arg, register_t *rval);
 int	kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
 	    fd_set *fd_ex, struct timeval *tvp, int abi_nfdbits);
 int	kern_sendit(struct thread *td, int s, struct msghdr *mp, int flags,
 	    struct mbuf *control, enum uio_seg segflg);
 int	kern_setgroups(struct thread *td, u_int ngrp, gid_t *groups);
 int	kern_setitimer(struct thread *, u_int, struct itimerval *,
 	    struct itimerval *);
 int	kern_setrlimit(struct thread *, u_int, struct rlimit *);
 int	kern_setsockopt(struct thread *td, int s, int level, int name,
 	    const void *optval, enum uio_seg valseg, socklen_t valsize);
 int	kern_settimeofday(struct thread *td, struct timeval *tv,
 	    struct timezone *tzp);
 int	kern_shm_open(struct thread *td, const char *userpath, int flags,
 	    mode_t mode, struct filecaps *fcaps);
 int	kern_shmat(struct thread *td, int shmid, const void *shmaddr,
 	    int shmflg);
 int	kern_shmctl(struct thread *td, int shmid, int cmd, void *buf,
 	    size_t *bufsz);
 int	kern_shutdown(struct thread *td, int s, int how);
 int	kern_sigaction(struct thread *td, int sig, const struct sigaction *act,
 	    struct sigaction *oact, int flags);
 int	kern_sigaltstack(struct thread *td, stack_t *ss, stack_t *oss);
 int	kern_sigprocmask(struct thread *td, int how,
 	    sigset_t *set, sigset_t *oset, int flags);
 int	kern_sigsuspend(struct thread *td, sigset_t mask);
 int	kern_sigtimedwait(struct thread *td, sigset_t waitset,
 	    struct ksiginfo *ksi, struct timespec *timeout);
 int	kern_sigqueue(struct thread *td, pid_t pid, int signum,
 	    union sigval *value);
 int	kern_socket(struct thread *td, int domain, int type, int protocol);
 int	kern_statat(struct thread *td, int flag, int fd, const char *path,
 	    enum uio_seg pathseg, struct stat *sbp,
 	    void (*hook)(struct vnode *vp, struct stat *sbp));
 int	kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg,
 	    struct statfs *buf);
 int	kern_symlinkat(struct thread *td, const char *path1, int fd,
 	    const char *path2, enum uio_seg segflg);
 int	kern_ktimer_create(struct thread *td, clockid_t clock_id,
 	    struct sigevent *evp, int *timerid, int preset_id);
 int	kern_ktimer_delete(struct thread *, int);
 int	kern_ktimer_settime(struct thread *td, int timer_id, int flags,
 	    struct itimerspec *val, struct itimerspec *oval);
 int	kern_ktimer_gettime(struct thread *td, int timer_id,
 	    struct itimerspec *val);
 int	kern_ktimer_getoverrun(struct thread *td, int timer_id);
 int	kern_thr_alloc(struct proc *, int pages, struct thread **);
 int	kern_thr_exit(struct thread *td);
 int	kern_thr_new(struct thread *td, struct thr_param *param);
 int	kern_thr_suspend(struct thread *td, struct timespec *tsp);
 int	kern_truncate(struct thread *td, const char *path,
 	    enum uio_seg pathseg, off_t length);
-int	kern_unlinkat(struct thread *td, int fd, const char *path,
+int	kern_funlinkat(struct thread *td, int dfd, const char *path, int fd,
 	    enum uio_seg pathseg, int flag, ino_t oldinum);
 int	kern_utimesat(struct thread *td, int fd, const char *path,
 	    enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg);
 int	kern_utimensat(struct thread *td, int fd, const char *path,
 	    enum uio_seg pathseg, struct timespec *tptr, enum uio_seg tptrseg,
 	    int follow);
 int	kern_wait(struct thread *td, pid_t pid, int *status, int options,
 	    struct rusage *rup);
 int	kern_wait6(struct thread *td, enum idtype idtype, id_t id, int *status,
 	    int options, struct __wrusage *wrup, siginfo_t *sip);
 int	kern_writev(struct thread *td, int fd, struct uio *auio);
 int	kern_socketpair(struct thread *td, int domain, int type, int protocol,
 	    int *rsv);
 
 /* flags for kern_sigaction */
 #define	KSA_OSIGSET	0x0001	/* uses osigact_t */
 #define	KSA_FREEBSD4	0x0002	/* uses ucontext4 */
 
 struct freebsd11_dirent;
 
 int	freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int
 	    count, long *basep, void (*func)(struct freebsd11_dirent *));
 
 #endif /* !_SYS_SYSCALLSUBR_H_ */
Index: head/sys/ufs/ffs/ffs_alloc.c
===================================================================
--- head/sys/ufs/ffs/ffs_alloc.c	(revision 345981)
+++ head/sys/ufs/ffs/ffs_alloc.c	(revision 345982)
@@ -1,3612 +1,3613 @@
 /*-
  * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND BSD-3-Clause)
  *
  * Copyright (c) 2002 Networks Associates Technology, Inc.
  * All rights reserved.
  *
  * This software was developed for the FreeBSD Project by Marshall
  * Kirk McKusick and Network Associates Laboratories, the Security
  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
  * research program
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_alloc.c	8.18 (Berkeley) 5/26/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_quota.h"
 
 #include <sys/param.h>
 #include <sys/capsicum.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/kernel.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/taskqueue.h>
 
 #include <security/audit/audit.h>
 
 #include <geom/geom.h>
 #include <geom/geom_vfs.h>
 
 #include <ufs/ufs/dir.h>
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufs_extern.h>
 #include <ufs/ufs/ufsmount.h>
 
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/ffs_extern.h>
 #include <ufs/ffs/softdep.h>
 
 typedef ufs2_daddr_t allocfcn_t(struct inode *ip, u_int cg, ufs2_daddr_t bpref,
 				  int size, int rsize);
 
 static ufs2_daddr_t ffs_alloccg(struct inode *, u_int, ufs2_daddr_t, int, int);
 static ufs2_daddr_t
 	      ffs_alloccgblk(struct inode *, struct buf *, ufs2_daddr_t, int);
 static void	ffs_blkfree_cg(struct ufsmount *, struct fs *,
 		    struct vnode *, ufs2_daddr_t, long, ino_t,
 		    struct workhead *);
 #ifdef INVARIANTS
 static int	ffs_checkblk(struct inode *, ufs2_daddr_t, long);
 #endif
 static ufs2_daddr_t ffs_clusteralloc(struct inode *, u_int, ufs2_daddr_t, int);
 static ino_t	ffs_dirpref(struct inode *);
 static ufs2_daddr_t ffs_fragextend(struct inode *, u_int, ufs2_daddr_t,
 		    int, int);
 static ufs2_daddr_t	ffs_hashalloc
 		(struct inode *, u_int, ufs2_daddr_t, int, int, allocfcn_t *);
 static ufs2_daddr_t ffs_nodealloccg(struct inode *, u_int, ufs2_daddr_t, int,
 		    int);
 static ufs1_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs2_daddr_t, int);
 static int	ffs_reallocblks_ufs1(struct vop_reallocblks_args *);
 static int	ffs_reallocblks_ufs2(struct vop_reallocblks_args *);
 static void	ffs_ckhash_cg(struct buf *);
 
 /*
  * Allocate a block in the filesystem.
  *
  * The size of the requested block is given, which must be some
  * multiple of fs_fsize and <= fs_bsize.
  * A preference may be optionally specified. If a preference is given
  * the following hierarchy is used to allocate a block:
  *   1) allocate the requested block.
  *   2) allocate a rotationally optimal block in the same cylinder.
  *   3) allocate a block in the same cylinder group.
  *   4) quadradically rehash into other cylinder groups, until an
  *      available block is located.
  * If no block preference is given the following hierarchy is used
  * to allocate a block:
  *   1) allocate a block in the cylinder group that contains the
  *      inode for the file.
  *   2) quadradically rehash into other cylinder groups, until an
  *      available block is located.
  */
 int
 ffs_alloc(ip, lbn, bpref, size, flags, cred, bnp)
 	struct inode *ip;
 	ufs2_daddr_t lbn, bpref;
 	int size, flags;
 	struct ucred *cred;
 	ufs2_daddr_t *bnp;
 {
 	struct fs *fs;
 	struct ufsmount *ump;
 	ufs2_daddr_t bno;
 	u_int cg, reclaimed;
 	static struct timeval lastfail;
 	static int curfail;
 	int64_t delta;
 #ifdef QUOTA
 	int error;
 #endif
 
 	*bnp = 0;
 	ump = ITOUMP(ip);
 	fs = ump->um_fs;
 	mtx_assert(UFS_MTX(ump), MA_OWNED);
 #ifdef INVARIANTS
 	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
 		printf("dev = %s, bsize = %ld, size = %d, fs = %s\n",
 		    devtoname(ump->um_dev), (long)fs->fs_bsize, size,
 		    fs->fs_fsmnt);
 		panic("ffs_alloc: bad size");
 	}
 	if (cred == NOCRED)
 		panic("ffs_alloc: missing credential");
 #endif /* INVARIANTS */
 	reclaimed = 0;
 retry:
 #ifdef QUOTA
 	UFS_UNLOCK(ump);
 	error = chkdq(ip, btodb(size), cred, 0);
 	if (error)
 		return (error);
 	UFS_LOCK(ump);
 #endif
 	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
 		goto nospace;
 	if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE) &&
 	    freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0)
 		goto nospace;
 	if (bpref >= fs->fs_size)
 		bpref = 0;
 	if (bpref == 0)
 		cg = ino_to_cg(fs, ip->i_number);
 	else
 		cg = dtog(fs, bpref);
 	bno = ffs_hashalloc(ip, cg, bpref, size, size, ffs_alloccg);
 	if (bno > 0) {
 		delta = btodb(size);
 		DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta);
 		if (flags & IO_EXT)
 			ip->i_flag |= IN_CHANGE;
 		else
 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		*bnp = bno;
 		return (0);
 	}
 nospace:
 #ifdef QUOTA
 	UFS_UNLOCK(ump);
 	/*
 	 * Restore user's disk quota because allocation failed.
 	 */
 	(void) chkdq(ip, -btodb(size), cred, FORCE);
 	UFS_LOCK(ump);
 #endif
 	if (reclaimed == 0 && (flags & IO_BUFLOCKED) == 0) {
 		reclaimed = 1;
 		softdep_request_cleanup(fs, ITOV(ip), cred, FLUSH_BLOCKS_WAIT);
 		goto retry;
 	}
 	UFS_UNLOCK(ump);
 	if (reclaimed > 0 && ppsratecheck(&lastfail, &curfail, 1)) {
 		ffs_fserr(fs, ip->i_number, "filesystem full");
 		uprintf("\n%s: write failed, filesystem is full\n",
 		    fs->fs_fsmnt);
 	}
 	return (ENOSPC);
 }
 
 /*
  * Reallocate a fragment to a bigger size
  *
  * The number and size of the old block is given, and a preference
  * and new size is also specified. The allocator attempts to extend
  * the original block. Failing that, the regular block allocator is
  * invoked to get an appropriate block.
  */
 int
 ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, flags, cred, bpp)
 	struct inode *ip;
 	ufs2_daddr_t lbprev;
 	ufs2_daddr_t bprev;
 	ufs2_daddr_t bpref;
 	int osize, nsize, flags;
 	struct ucred *cred;
 	struct buf **bpp;
 {
 	struct vnode *vp;
 	struct fs *fs;
 	struct buf *bp;
 	struct ufsmount *ump;
 	u_int cg, request, reclaimed;
 	int error, gbflags;
 	ufs2_daddr_t bno;
 	static struct timeval lastfail;
 	static int curfail;
 	int64_t delta;
 
 	vp = ITOV(ip);
 	ump = ITOUMP(ip);
 	fs = ump->um_fs;
 	bp = NULL;
 	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
 
 	mtx_assert(UFS_MTX(ump), MA_OWNED);
 #ifdef INVARIANTS
 	if (vp->v_mount->mnt_kern_flag & MNTK_SUSPENDED)
 		panic("ffs_realloccg: allocation on suspended filesystem");
 	if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
 	    (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
 		printf(
 		"dev = %s, bsize = %ld, osize = %d, nsize = %d, fs = %s\n",
 		    devtoname(ump->um_dev), (long)fs->fs_bsize, osize,
 		    nsize, fs->fs_fsmnt);
 		panic("ffs_realloccg: bad size");
 	}
 	if (cred == NOCRED)
 		panic("ffs_realloccg: missing credential");
 #endif /* INVARIANTS */
 	reclaimed = 0;
 retry:
 	if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE) &&
 	    freespace(fs, fs->fs_minfree) -  numfrags(fs, nsize - osize) < 0) {
 		goto nospace;
 	}
 	if (bprev == 0) {
 		printf("dev = %s, bsize = %ld, bprev = %jd, fs = %s\n",
 		    devtoname(ump->um_dev), (long)fs->fs_bsize, (intmax_t)bprev,
 		    fs->fs_fsmnt);
 		panic("ffs_realloccg: bad bprev");
 	}
 	UFS_UNLOCK(ump);
 	/*
 	 * Allocate the extra space in the buffer.
 	 */
 	error = bread_gb(vp, lbprev, osize, NOCRED, gbflags, &bp);
 	if (error) {
 		brelse(bp);
 		return (error);
 	}
 
 	if (bp->b_blkno == bp->b_lblkno) {
 		if (lbprev >= UFS_NDADDR)
 			panic("ffs_realloccg: lbprev out of range");
 		bp->b_blkno = fsbtodb(fs, bprev);
 	}
 
 #ifdef QUOTA
 	error = chkdq(ip, btodb(nsize - osize), cred, 0);
 	if (error) {
 		brelse(bp);
 		return (error);
 	}
 #endif
 	/*
 	 * Check for extension in the existing location.
 	 */
 	*bpp = NULL;
 	cg = dtog(fs, bprev);
 	UFS_LOCK(ump);
 	bno = ffs_fragextend(ip, cg, bprev, osize, nsize);
 	if (bno) {
 		if (bp->b_blkno != fsbtodb(fs, bno))
 			panic("ffs_realloccg: bad blockno");
 		delta = btodb(nsize - osize);
 		DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta);
 		if (flags & IO_EXT)
 			ip->i_flag |= IN_CHANGE;
 		else
 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		allocbuf(bp, nsize);
 		bp->b_flags |= B_DONE;
 		vfs_bio_bzero_buf(bp, osize, nsize - osize);
 		if ((bp->b_flags & (B_MALLOC | B_VMIO)) == B_VMIO)
 			vfs_bio_set_valid(bp, osize, nsize - osize);
 		*bpp = bp;
 		return (0);
 	}
 	/*
 	 * Allocate a new disk location.
 	 */
 	if (bpref >= fs->fs_size)
 		bpref = 0;
 	switch ((int)fs->fs_optim) {
 	case FS_OPTSPACE:
 		/*
 		 * Allocate an exact sized fragment. Although this makes
 		 * best use of space, we will waste time relocating it if
 		 * the file continues to grow. If the fragmentation is
 		 * less than half of the minimum free reserve, we choose
 		 * to begin optimizing for time.
 		 */
 		request = nsize;
 		if (fs->fs_minfree <= 5 ||
 		    fs->fs_cstotal.cs_nffree >
 		    (off_t)fs->fs_dsize * fs->fs_minfree / (2 * 100))
 			break;
 		log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
 			fs->fs_fsmnt);
 		fs->fs_optim = FS_OPTTIME;
 		break;
 	case FS_OPTTIME:
 		/*
 		 * At this point we have discovered a file that is trying to
 		 * grow a small fragment to a larger fragment. To save time,
 		 * we allocate a full sized block, then free the unused portion.
 		 * If the file continues to grow, the `ffs_fragextend' call
 		 * above will be able to grow it in place without further
 		 * copying. If aberrant programs cause disk fragmentation to
 		 * grow within 2% of the free reserve, we choose to begin
 		 * optimizing for space.
 		 */
 		request = fs->fs_bsize;
 		if (fs->fs_cstotal.cs_nffree <
 		    (off_t)fs->fs_dsize * (fs->fs_minfree - 2) / 100)
 			break;
 		log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
 			fs->fs_fsmnt);
 		fs->fs_optim = FS_OPTSPACE;
 		break;
 	default:
 		printf("dev = %s, optim = %ld, fs = %s\n",
 		    devtoname(ump->um_dev), (long)fs->fs_optim, fs->fs_fsmnt);
 		panic("ffs_realloccg: bad optim");
 		/* NOTREACHED */
 	}
 	bno = ffs_hashalloc(ip, cg, bpref, request, nsize, ffs_alloccg);
 	if (bno > 0) {
 		bp->b_blkno = fsbtodb(fs, bno);
 		if (!DOINGSOFTDEP(vp))
 			/*
 			 * The usual case is that a smaller fragment that
 			 * was just allocated has been replaced with a bigger
 			 * fragment or a full-size block. If it is marked as
 			 * B_DELWRI, the current contents have not been written
 			 * to disk. It is possible that the block was written
 			 * earlier, but very uncommon. If the block has never
 			 * been written, there is no need to send a BIO_DELETE
 			 * for it when it is freed. The gain from avoiding the
 			 * TRIMs for the common case of unwritten blocks far
 			 * exceeds the cost of the write amplification for the
 			 * uncommon case of failing to send a TRIM for a block
 			 * that had been written.
 			 */
 			ffs_blkfree(ump, fs, ump->um_devvp, bprev, (long)osize,
 			    ip->i_number, vp->v_type, NULL,
 			    (bp->b_flags & B_DELWRI) != 0 ?
 			    NOTRIM_KEY : SINGLETON_KEY);
 		delta = btodb(nsize - osize);
 		DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + delta);
 		if (flags & IO_EXT)
 			ip->i_flag |= IN_CHANGE;
 		else
 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		allocbuf(bp, nsize);
 		bp->b_flags |= B_DONE;
 		vfs_bio_bzero_buf(bp, osize, nsize - osize);
 		if ((bp->b_flags & (B_MALLOC | B_VMIO)) == B_VMIO)
 			vfs_bio_set_valid(bp, osize, nsize - osize);
 		*bpp = bp;
 		return (0);
 	}
 #ifdef QUOTA
 	UFS_UNLOCK(ump);
 	/*
 	 * Restore user's disk quota because allocation failed.
 	 */
 	(void) chkdq(ip, -btodb(nsize - osize), cred, FORCE);
 	UFS_LOCK(ump);
 #endif
 nospace:
 	/*
 	 * no space available
 	 */
 	if (reclaimed == 0 && (flags & IO_BUFLOCKED) == 0) {
 		reclaimed = 1;
 		UFS_UNLOCK(ump);
 		if (bp) {
 			brelse(bp);
 			bp = NULL;
 		}
 		UFS_LOCK(ump);
 		softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT);
 		goto retry;
 	}
 	UFS_UNLOCK(ump);
 	if (bp)
 		brelse(bp);
 	if (reclaimed > 0 && ppsratecheck(&lastfail, &curfail, 1)) {
 		ffs_fserr(fs, ip->i_number, "filesystem full");
 		uprintf("\n%s: write failed, filesystem is full\n",
 		    fs->fs_fsmnt);
 	}
 	return (ENOSPC);
 }
 
 /*
  * Reallocate a sequence of blocks into a contiguous sequence of blocks.
  *
  * The vnode and an array of buffer pointers for a range of sequential
  * logical blocks to be made contiguous is given. The allocator attempts
  * to find a range of sequential blocks starting as close as possible
  * from the end of the allocation for the logical block immediately
  * preceding the current range. If successful, the physical block numbers
  * in the buffer pointers and in the inode are changed to reflect the new
  * allocation. If unsuccessful, the allocation is left unchanged. The
  * success in doing the reallocation is returned. Note that the error
  * return is not reflected back to the user. Rather the previous block
  * allocation will be used.
  */
 
 SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem");
 
 static int doasyncfree = 1;
 SYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0,
 "do not force synchronous writes when blocks are reallocated");
 
 static int doreallocblks = 1;
 SYSCTL_INT(_vfs_ffs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0,
 "enable block reallocation");
 
 static int dotrimcons = 1;
 SYSCTL_INT(_vfs_ffs, OID_AUTO, dotrimcons, CTLFLAG_RWTUN, &dotrimcons, 0,
 "enable BIO_DELETE / TRIM consolidation");
 
 static int maxclustersearch = 10;
 SYSCTL_INT(_vfs_ffs, OID_AUTO, maxclustersearch, CTLFLAG_RW, &maxclustersearch,
 0, "max number of cylinder group to search for contigous blocks");
 
 #ifdef DEBUG
 static volatile int prtrealloc = 0;
 #endif
 
 int
 ffs_reallocblks(ap)
 	struct vop_reallocblks_args /* {
 		struct vnode *a_vp;
 		struct cluster_save *a_buflist;
 	} */ *ap;
 {
 	struct ufsmount *ump;
 
 	/*
 	 * We used to skip reallocating the blocks of a file into a
 	 * contiguous sequence if the underlying flash device requested
 	 * BIO_DELETE notifications, because devices that benefit from
 	 * BIO_DELETE also benefit from not moving the data. However,
 	 * the destination for the data is usually moved before the data
 	 * is written to the initially allocated location, so we rarely
 	 * suffer the penalty of extra writes. With the addition of the
 	 * consolidation of contiguous blocks into single BIO_DELETE
 	 * operations, having fewer but larger contiguous blocks reduces
 	 * the number of (slow and expensive) BIO_DELETE operations. So
 	 * when doing BIO_DELETE consolidation, we do block reallocation.
 	 *
 	 * Skip if reallocblks has been disabled globally.
 	 */
 	ump = ap->a_vp->v_mount->mnt_data;
 	if ((((ump->um_flags) & UM_CANDELETE) != 0 && dotrimcons == 0) ||
 	    doreallocblks == 0)
 		return (ENOSPC);
 
 	/*
 	 * We can't wait in softdep prealloc as it may fsync and recurse
 	 * here.  Instead we simply fail to reallocate blocks if this
 	 * rare condition arises.
 	 */
 	if (DOINGSOFTDEP(ap->a_vp))
 		if (softdep_prealloc(ap->a_vp, MNT_NOWAIT) != 0)
 			return (ENOSPC);
 	if (ump->um_fstype == UFS1)
 		return (ffs_reallocblks_ufs1(ap));
 	return (ffs_reallocblks_ufs2(ap));
 }
 	
 static int
 ffs_reallocblks_ufs1(ap)
 	struct vop_reallocblks_args /* {
 		struct vnode *a_vp;
 		struct cluster_save *a_buflist;
 	} */ *ap;
 {
 	struct fs *fs;
 	struct inode *ip;
 	struct vnode *vp;
 	struct buf *sbp, *ebp, *bp;
 	ufs1_daddr_t *bap, *sbap, *ebap;
 	struct cluster_save *buflist;
 	struct ufsmount *ump;
 	ufs_lbn_t start_lbn, end_lbn;
 	ufs1_daddr_t soff, newblk, blkno;
 	ufs2_daddr_t pref;
 	struct indir start_ap[UFS_NIADDR + 1], end_ap[UFS_NIADDR + 1], *idp;
 	int i, cg, len, start_lvl, end_lvl, ssize;
 
 	vp = ap->a_vp;
 	ip = VTOI(vp);
 	ump = ITOUMP(ip);
 	fs = ump->um_fs;
 	/*
 	 * If we are not tracking block clusters or if we have less than 4%
 	 * free blocks left, then do not attempt to cluster. Running with
 	 * less than 5% free block reserve is not recommended and those that
 	 * choose to do so do not expect to have good file layout.
 	 */
 	if (fs->fs_contigsumsize <= 0 || freespace(fs, 4) < 0)
 		return (ENOSPC);
 	buflist = ap->a_buflist;
 	len = buflist->bs_nchildren;
 	start_lbn = buflist->bs_children[0]->b_lblkno;
 	end_lbn = start_lbn + len - 1;
 #ifdef INVARIANTS
 	for (i = 0; i < len; i++)
 		if (!ffs_checkblk(ip,
 		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
 			panic("ffs_reallocblks: unallocated block 1");
 	for (i = 1; i < len; i++)
 		if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
 			panic("ffs_reallocblks: non-logical cluster");
 	blkno = buflist->bs_children[0]->b_blkno;
 	ssize = fsbtodb(fs, fs->fs_frag);
 	for (i = 1; i < len - 1; i++)
 		if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
 			panic("ffs_reallocblks: non-physical cluster %d", i);
 #endif
 	/*
 	 * If the cluster crosses the boundary for the first indirect
 	 * block, leave space for the indirect block. Indirect blocks
 	 * are initially laid out in a position after the last direct
 	 * block. Block reallocation would usually destroy locality by
 	 * moving the indirect block out of the way to make room for
 	 * data blocks if we didn't compensate here. We should also do
 	 * this for other indirect block boundaries, but it is only
 	 * important for the first one.
 	 */
 	if (start_lbn < UFS_NDADDR && end_lbn >= UFS_NDADDR)
 		return (ENOSPC);
 	/*
 	 * If the latest allocation is in a new cylinder group, assume that
 	 * the filesystem has decided to move and do not force it back to
 	 * the previous cylinder group.
 	 */
 	if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
 	    dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
 		return (ENOSPC);
 	if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
 	    ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
 		return (ENOSPC);
 	/*
 	 * Get the starting offset and block map for the first block.
 	 */
 	if (start_lvl == 0) {
 		sbap = &ip->i_din1->di_db[0];
 		soff = start_lbn;
 	} else {
 		idp = &start_ap[start_lvl - 1];
 		if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
 			brelse(sbp);
 			return (ENOSPC);
 		}
 		sbap = (ufs1_daddr_t *)sbp->b_data;
 		soff = idp->in_off;
 	}
 	/*
 	 * If the block range spans two block maps, get the second map.
 	 */
 	ebap = NULL;
 	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
 		ssize = len;
 	} else {
 #ifdef INVARIANTS
 		if (start_lvl > 0 &&
 		    start_ap[start_lvl - 1].in_lbn == idp->in_lbn)
 			panic("ffs_reallocblk: start == end");
 #endif
 		ssize = len - (idp->in_off + 1);
 		if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
 			goto fail;
 		ebap = (ufs1_daddr_t *)ebp->b_data;
 	}
 	/*
 	 * Find the preferred location for the cluster. If we have not
 	 * previously failed at this endeavor, then follow our standard
 	 * preference calculation. If we have failed at it, then pick up
 	 * where we last ended our search.
 	 */
 	UFS_LOCK(ump);
 	if (ip->i_nextclustercg == -1)
 		pref = ffs_blkpref_ufs1(ip, start_lbn, soff, sbap);
 	else
 		pref = cgdata(fs, ip->i_nextclustercg);
 	/*
 	 * Search the block map looking for an allocation of the desired size.
 	 * To avoid wasting too much time, we limit the number of cylinder
 	 * groups that we will search.
 	 */
 	cg = dtog(fs, pref);
 	for (i = min(maxclustersearch, fs->fs_ncg); i > 0; i--) {
 		if ((newblk = ffs_clusteralloc(ip, cg, pref, len)) != 0)
 			break;
 		cg += 1;
 		if (cg >= fs->fs_ncg)
 			cg = 0;
 	}
 	/*
 	 * If we have failed in our search, record where we gave up for
 	 * next time. Otherwise, fall back to our usual search citerion.
 	 */
 	if (newblk == 0) {
 		ip->i_nextclustercg = cg;
 		UFS_UNLOCK(ump);
 		goto fail;
 	}
 	ip->i_nextclustercg = -1;
 	/*
 	 * We have found a new contiguous block.
 	 *
 	 * First we have to replace the old block pointers with the new
 	 * block pointers in the inode and indirect blocks associated
 	 * with the file.
 	 */
 #ifdef DEBUG
 	if (prtrealloc)
 		printf("realloc: ino %ju, lbns %jd-%jd\n\told:",
 		    (uintmax_t)ip->i_number,
 		    (intmax_t)start_lbn, (intmax_t)end_lbn);
 #endif
 	blkno = newblk;
 	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
 		if (i == ssize) {
 			bap = ebap;
 			soff = -i;
 		}
 #ifdef INVARIANTS
 		if (!ffs_checkblk(ip,
 		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
 			panic("ffs_reallocblks: unallocated block 2");
 		if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap)
 			panic("ffs_reallocblks: alloc mismatch");
 #endif
 #ifdef DEBUG
 		if (prtrealloc)
 			printf(" %d,", *bap);
 #endif
 		if (DOINGSOFTDEP(vp)) {
 			if (sbap == &ip->i_din1->di_db[0] && i < ssize)
 				softdep_setup_allocdirect(ip, start_lbn + i,
 				    blkno, *bap, fs->fs_bsize, fs->fs_bsize,
 				    buflist->bs_children[i]);
 			else
 				softdep_setup_allocindir_page(ip, start_lbn + i,
 				    i < ssize ? sbp : ebp, soff + i, blkno,
 				    *bap, buflist->bs_children[i]);
 		}
 		*bap++ = blkno;
 	}
 	/*
 	 * Next we must write out the modified inode and indirect blocks.
 	 * For strict correctness, the writes should be synchronous since
 	 * the old block values may have been written to disk. In practise
 	 * they are almost never written, but if we are concerned about
 	 * strict correctness, the `doasyncfree' flag should be set to zero.
 	 *
 	 * The test on `doasyncfree' should be changed to test a flag
 	 * that shows whether the associated buffers and inodes have
 	 * been written. The flag should be set when the cluster is
 	 * started and cleared whenever the buffer or inode is flushed.
 	 * We can then check below to see if it is set, and do the
 	 * synchronous write only when it has been cleared.
 	 */
 	if (sbap != &ip->i_din1->di_db[0]) {
 		if (doasyncfree)
 			bdwrite(sbp);
 		else
 			bwrite(sbp);
 	} else {
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		if (!doasyncfree)
 			ffs_update(vp, 1);
 	}
 	if (ssize < len) {
 		if (doasyncfree)
 			bdwrite(ebp);
 		else
 			bwrite(ebp);
 	}
 	/*
 	 * Last, free the old blocks and assign the new blocks to the buffers.
 	 */
 #ifdef DEBUG
 	if (prtrealloc)
 		printf("\n\tnew:");
 #endif
 	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
 		bp = buflist->bs_children[i];
 		if (!DOINGSOFTDEP(vp))
 			/*
 			 * The usual case is that a set of N-contiguous blocks
 			 * that was just allocated has been replaced with a
 			 * set of N+1-contiguous blocks. If they are marked as
 			 * B_DELWRI, the current contents have not been written
 			 * to disk. It is possible that the blocks were written
 			 * earlier, but very uncommon. If the blocks have never
 			 * been written, there is no need to send a BIO_DELETE
 			 * for them when they are freed. The gain from avoiding
 			 * the TRIMs for the common case of unwritten blocks
 			 * far exceeds the cost of the write amplification for
 			 * the uncommon case of failing to send a TRIM for the
 			 * blocks that had been written.
 			 */
 			ffs_blkfree(ump, fs, ump->um_devvp,
 			    dbtofsb(fs, bp->b_blkno),
 			    fs->fs_bsize, ip->i_number, vp->v_type, NULL,
 			    (bp->b_flags & B_DELWRI) != 0 ?
 			    NOTRIM_KEY : SINGLETON_KEY);
 		bp->b_blkno = fsbtodb(fs, blkno);
 #ifdef INVARIANTS
 		if (!ffs_checkblk(ip, dbtofsb(fs, bp->b_blkno), fs->fs_bsize))
 			panic("ffs_reallocblks: unallocated block 3");
 #endif
 #ifdef DEBUG
 		if (prtrealloc)
 			printf(" %d,", blkno);
 #endif
 	}
 #ifdef DEBUG
 	if (prtrealloc) {
 		prtrealloc--;
 		printf("\n");
 	}
 #endif
 	return (0);
 
 fail:
 	if (ssize < len)
 		brelse(ebp);
 	if (sbap != &ip->i_din1->di_db[0])
 		brelse(sbp);
 	return (ENOSPC);
 }
 
 static int
 ffs_reallocblks_ufs2(ap)
 	struct vop_reallocblks_args /* {
 		struct vnode *a_vp;
 		struct cluster_save *a_buflist;
 	} */ *ap;
 {
 	struct fs *fs;
 	struct inode *ip;
 	struct vnode *vp;
 	struct buf *sbp, *ebp, *bp;
 	ufs2_daddr_t *bap, *sbap, *ebap;
 	struct cluster_save *buflist;
 	struct ufsmount *ump;
 	ufs_lbn_t start_lbn, end_lbn;
 	ufs2_daddr_t soff, newblk, blkno, pref;
 	struct indir start_ap[UFS_NIADDR + 1], end_ap[UFS_NIADDR + 1], *idp;
 	int i, cg, len, start_lvl, end_lvl, ssize;
 
 	vp = ap->a_vp;
 	ip = VTOI(vp);
 	ump = ITOUMP(ip);
 	fs = ump->um_fs;
 	/*
 	 * If we are not tracking block clusters or if we have less than 4%
 	 * free blocks left, then do not attempt to cluster. Running with
 	 * less than 5% free block reserve is not recommended and those that
 	 * choose to do so do not expect to have good file layout.
 	 */
 	if (fs->fs_contigsumsize <= 0 || freespace(fs, 4) < 0)
 		return (ENOSPC);
 	buflist = ap->a_buflist;
 	len = buflist->bs_nchildren;
 	start_lbn = buflist->bs_children[0]->b_lblkno;
 	end_lbn = start_lbn + len - 1;
 #ifdef INVARIANTS
 	for (i = 0; i < len; i++)
 		if (!ffs_checkblk(ip,
 		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
 			panic("ffs_reallocblks: unallocated block 1");
 	for (i = 1; i < len; i++)
 		if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
 			panic("ffs_reallocblks: non-logical cluster");
 	blkno = buflist->bs_children[0]->b_blkno;
 	ssize = fsbtodb(fs, fs->fs_frag);
 	for (i = 1; i < len - 1; i++)
 		if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
 			panic("ffs_reallocblks: non-physical cluster %d", i);
 #endif
 	/*
 	 * If the cluster crosses the boundary for the first indirect
 	 * block, do not move anything in it. Indirect blocks are
 	 * usually initially laid out in a position between the data
 	 * blocks. Block reallocation would usually destroy locality by
 	 * moving the indirect block out of the way to make room for
 	 * data blocks if we didn't compensate here. We should also do
 	 * this for other indirect block boundaries, but it is only
 	 * important for the first one.
 	 */
 	if (start_lbn < UFS_NDADDR && end_lbn >= UFS_NDADDR)
 		return (ENOSPC);
 	/*
 	 * If the latest allocation is in a new cylinder group, assume that
 	 * the filesystem has decided to move and do not force it back to
 	 * the previous cylinder group.
 	 */
 	if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
 	    dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
 		return (ENOSPC);
 	if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
 	    ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
 		return (ENOSPC);
 	/*
 	 * Get the starting offset and block map for the first block.
 	 */
 	if (start_lvl == 0) {
 		sbap = &ip->i_din2->di_db[0];
 		soff = start_lbn;
 	} else {
 		idp = &start_ap[start_lvl - 1];
 		if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
 			brelse(sbp);
 			return (ENOSPC);
 		}
 		sbap = (ufs2_daddr_t *)sbp->b_data;
 		soff = idp->in_off;
 	}
 	/*
 	 * If the block range spans two block maps, get the second map.
 	 */
 	ebap = NULL;
 	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
 		ssize = len;
 	} else {
 #ifdef INVARIANTS
 		if (start_lvl > 0 &&
 		    start_ap[start_lvl - 1].in_lbn == idp->in_lbn)
 			panic("ffs_reallocblk: start == end");
 #endif
 		ssize = len - (idp->in_off + 1);
 		if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
 			goto fail;
 		ebap = (ufs2_daddr_t *)ebp->b_data;
 	}
 	/*
 	 * Find the preferred location for the cluster. If we have not
 	 * previously failed at this endeavor, then follow our standard
 	 * preference calculation. If we have failed at it, then pick up
 	 * where we last ended our search.
 	 */
 	UFS_LOCK(ump);
 	if (ip->i_nextclustercg == -1)
 		pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap);
 	else
 		pref = cgdata(fs, ip->i_nextclustercg);
 	/*
 	 * Search the block map looking for an allocation of the desired size.
 	 * To avoid wasting too much time, we limit the number of cylinder
 	 * groups that we will search.
 	 */
 	cg = dtog(fs, pref);
 	for (i = min(maxclustersearch, fs->fs_ncg); i > 0; i--) {
 		if ((newblk = ffs_clusteralloc(ip, cg, pref, len)) != 0)
 			break;
 		cg += 1;
 		if (cg >= fs->fs_ncg)
 			cg = 0;
 	}
 	/*
 	 * If we have failed in our search, record where we gave up for
 	 * next time. Otherwise, fall back to our usual search citerion.
 	 */
 	if (newblk == 0) {
 		ip->i_nextclustercg = cg;
 		UFS_UNLOCK(ump);
 		goto fail;
 	}
 	ip->i_nextclustercg = -1;
 	/*
 	 * We have found a new contiguous block.
 	 *
 	 * First we have to replace the old block pointers with the new
 	 * block pointers in the inode and indirect blocks associated
 	 * with the file.
 	 */
 #ifdef DEBUG
 	if (prtrealloc)
 		printf("realloc: ino %ju, lbns %jd-%jd\n\told:", (uintmax_t)ip->i_number,
 		    (intmax_t)start_lbn, (intmax_t)end_lbn);
 #endif
 	blkno = newblk;
 	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
 		if (i == ssize) {
 			bap = ebap;
 			soff = -i;
 		}
 #ifdef INVARIANTS
 		if (!ffs_checkblk(ip,
 		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
 			panic("ffs_reallocblks: unallocated block 2");
 		if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap)
 			panic("ffs_reallocblks: alloc mismatch");
 #endif
 #ifdef DEBUG
 		if (prtrealloc)
 			printf(" %jd,", (intmax_t)*bap);
 #endif
 		if (DOINGSOFTDEP(vp)) {
 			if (sbap == &ip->i_din2->di_db[0] && i < ssize)
 				softdep_setup_allocdirect(ip, start_lbn + i,
 				    blkno, *bap, fs->fs_bsize, fs->fs_bsize,
 				    buflist->bs_children[i]);
 			else
 				softdep_setup_allocindir_page(ip, start_lbn + i,
 				    i < ssize ? sbp : ebp, soff + i, blkno,
 				    *bap, buflist->bs_children[i]);
 		}
 		*bap++ = blkno;
 	}
 	/*
 	 * Next we must write out the modified inode and indirect blocks.
 	 * For strict correctness, the writes should be synchronous since
 	 * the old block values may have been written to disk. In practise
 	 * they are almost never written, but if we are concerned about
 	 * strict correctness, the `doasyncfree' flag should be set to zero.
 	 *
 	 * The test on `doasyncfree' should be changed to test a flag
 	 * that shows whether the associated buffers and inodes have
 	 * been written. The flag should be set when the cluster is
 	 * started and cleared whenever the buffer or inode is flushed.
 	 * We can then check below to see if it is set, and do the
 	 * synchronous write only when it has been cleared.
 	 */
 	if (sbap != &ip->i_din2->di_db[0]) {
 		if (doasyncfree)
 			bdwrite(sbp);
 		else
 			bwrite(sbp);
 	} else {
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		if (!doasyncfree)
 			ffs_update(vp, 1);
 	}
 	if (ssize < len) {
 		if (doasyncfree)
 			bdwrite(ebp);
 		else
 			bwrite(ebp);
 	}
 	/*
 	 * Last, free the old blocks and assign the new blocks to the buffers.
 	 */
 #ifdef DEBUG
 	if (prtrealloc)
 		printf("\n\tnew:");
 #endif
 	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
 		bp = buflist->bs_children[i];
 		if (!DOINGSOFTDEP(vp))
 			/*
 			 * The usual case is that a set of N-contiguous blocks
 			 * that was just allocated has been replaced with a
 			 * set of N+1-contiguous blocks. If they are marked as
 			 * B_DELWRI, the current contents have not been written
 			 * to disk. It is possible that the blocks were written
 			 * earlier, but very uncommon. If the blocks have never
 			 * been written, there is no need to send a BIO_DELETE
 			 * for them when they are freed. The gain from avoiding
 			 * the TRIMs for the common case of unwritten blocks
 			 * far exceeds the cost of the write amplification for
 			 * the uncommon case of failing to send a TRIM for the
 			 * blocks that had been written.
 			 */
 			ffs_blkfree(ump, fs, ump->um_devvp,
 			    dbtofsb(fs, bp->b_blkno),
 			    fs->fs_bsize, ip->i_number, vp->v_type, NULL,
 			    (bp->b_flags & B_DELWRI) != 0 ?
 			    NOTRIM_KEY : SINGLETON_KEY);
 		bp->b_blkno = fsbtodb(fs, blkno);
 #ifdef INVARIANTS
 		if (!ffs_checkblk(ip, dbtofsb(fs, bp->b_blkno), fs->fs_bsize))
 			panic("ffs_reallocblks: unallocated block 3");
 #endif
 #ifdef DEBUG
 		if (prtrealloc)
 			printf(" %jd,", (intmax_t)blkno);
 #endif
 	}
 #ifdef DEBUG
 	if (prtrealloc) {
 		prtrealloc--;
 		printf("\n");
 	}
 #endif
 	return (0);
 
 fail:
 	if (ssize < len)
 		brelse(ebp);
 	if (sbap != &ip->i_din2->di_db[0])
 		brelse(sbp);
 	return (ENOSPC);
 }
 
 /*
  * Allocate an inode in the filesystem.
  *
  * If allocating a directory, use ffs_dirpref to select the inode.
  * If allocating in a directory, the following hierarchy is followed:
  *   1) allocate the preferred inode.
  *   2) allocate an inode in the same cylinder group.
  *   3) quadradically rehash into other cylinder groups, until an
  *      available inode is located.
  * If no inode preference is given the following hierarchy is used
  * to allocate an inode:
  *   1) allocate an inode in cylinder group 0.
  *   2) quadradically rehash into other cylinder groups, until an
  *      available inode is located.
  */
 int
 ffs_valloc(pvp, mode, cred, vpp)
 	struct vnode *pvp;
 	int mode;
 	struct ucred *cred;
 	struct vnode **vpp;
 {
 	struct inode *pip;
 	struct fs *fs;
 	struct inode *ip;
 	struct timespec ts;
 	struct ufsmount *ump;
 	ino_t ino, ipref;
 	u_int cg;
 	int error, error1, reclaimed;
 	static struct timeval lastfail;
 	static int curfail;
 
 	*vpp = NULL;
 	pip = VTOI(pvp);
 	ump = ITOUMP(pip);
 	fs = ump->um_fs;
 
 	UFS_LOCK(ump);
 	reclaimed = 0;
 retry:
 	if (fs->fs_cstotal.cs_nifree == 0)
 		goto noinodes;
 
 	if ((mode & IFMT) == IFDIR)
 		ipref = ffs_dirpref(pip);
 	else
 		ipref = pip->i_number;
 	if (ipref >= fs->fs_ncg * fs->fs_ipg)
 		ipref = 0;
 	cg = ino_to_cg(fs, ipref);
 	/*
 	 * Track number of dirs created one after another
 	 * in a same cg without intervening by files.
 	 */
 	if ((mode & IFMT) == IFDIR) {
 		if (fs->fs_contigdirs[cg] < 255)
 			fs->fs_contigdirs[cg]++;
 	} else {
 		if (fs->fs_contigdirs[cg] > 0)
 			fs->fs_contigdirs[cg]--;
 	}
 	ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, 0,
 					(allocfcn_t *)ffs_nodealloccg);
 	if (ino == 0)
 		goto noinodes;
 	error = ffs_vget(pvp->v_mount, ino, LK_EXCLUSIVE, vpp);
 	if (error) {
 		error1 = ffs_vgetf(pvp->v_mount, ino, LK_EXCLUSIVE, vpp,
 		    FFSV_FORCEINSMQ);
 		ffs_vfree(pvp, ino, mode);
 		if (error1 == 0) {
 			ip = VTOI(*vpp);
 			if (ip->i_mode)
 				goto dup_alloc;
 			ip->i_flag |= IN_MODIFIED;
 			vput(*vpp);
 		}
 		return (error);
 	}
 	ip = VTOI(*vpp);
 	if (ip->i_mode) {
 dup_alloc:
 		printf("mode = 0%o, inum = %ju, fs = %s\n",
 		    ip->i_mode, (uintmax_t)ip->i_number, fs->fs_fsmnt);
 		panic("ffs_valloc: dup alloc");
 	}
 	if (DIP(ip, i_blocks) && (fs->fs_flags & FS_UNCLEAN) == 0) {  /* XXX */
 		printf("free inode %s/%lu had %ld blocks\n",
 		    fs->fs_fsmnt, (u_long)ino, (long)DIP(ip, i_blocks));
 		DIP_SET(ip, i_blocks, 0);
 	}
 	ip->i_flags = 0;
 	DIP_SET(ip, i_flags, 0);
 	/*
 	 * Set up a new generation number for this inode.
 	 */
 	while (ip->i_gen == 0 || ++ip->i_gen == 0)
 		ip->i_gen = arc4random();
 	DIP_SET(ip, i_gen, ip->i_gen);
 	if (fs->fs_magic == FS_UFS2_MAGIC) {
 		vfs_timestamp(&ts);
 		ip->i_din2->di_birthtime = ts.tv_sec;
 		ip->i_din2->di_birthnsec = ts.tv_nsec;
 	}
 	ufs_prepare_reclaim(*vpp);
 	ip->i_flag = 0;
 	(*vpp)->v_vflag = 0;
 	(*vpp)->v_type = VNON;
 	if (fs->fs_magic == FS_UFS2_MAGIC) {
 		(*vpp)->v_op = &ffs_vnodeops2;
 		ip->i_flag |= IN_UFS2;
 	} else {
 		(*vpp)->v_op = &ffs_vnodeops1;
 	}
 	return (0);
 noinodes:
 	if (reclaimed == 0) {
 		reclaimed = 1;
 		softdep_request_cleanup(fs, pvp, cred, FLUSH_INODES_WAIT);
 		goto retry;
 	}
 	UFS_UNLOCK(ump);
 	if (ppsratecheck(&lastfail, &curfail, 1)) {
 		ffs_fserr(fs, pip->i_number, "out of inodes");
 		uprintf("\n%s: create/symlink failed, no inodes free\n",
 		    fs->fs_fsmnt);
 	}
 	return (ENOSPC);
 }
 
 /*
  * Find a cylinder group to place a directory.
  *
  * The policy implemented by this algorithm is to allocate a
  * directory inode in the same cylinder group as its parent
  * directory, but also to reserve space for its files inodes
  * and data. Restrict the number of directories which may be
  * allocated one after another in the same cylinder group
  * without intervening allocation of files.
  *
  * If we allocate a first level directory then force allocation
  * in another cylinder group.
  */
 static ino_t
 ffs_dirpref(pip)
 	struct inode *pip;
 {
 	struct fs *fs;
 	int cg, prefcg, dirsize, cgsize;
 	u_int avgifree, avgbfree, avgndir, curdirsize;
 	u_int minifree, minbfree, maxndir;
 	u_int mincg, minndir;
 	u_int maxcontigdirs;
 
 	mtx_assert(UFS_MTX(ITOUMP(pip)), MA_OWNED);
 	fs = ITOFS(pip);
 
 	avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
 	avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
 	avgndir = fs->fs_cstotal.cs_ndir / fs->fs_ncg;
 
 	/*
 	 * Force allocation in another cg if creating a first level dir.
 	 */
 	ASSERT_VOP_LOCKED(ITOV(pip), "ffs_dirpref");
 	if (ITOV(pip)->v_vflag & VV_ROOT) {
 		prefcg = arc4random() % fs->fs_ncg;
 		mincg = prefcg;
 		minndir = fs->fs_ipg;
 		for (cg = prefcg; cg < fs->fs_ncg; cg++)
 			if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
 			    fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
 			    fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
 				mincg = cg;
 				minndir = fs->fs_cs(fs, cg).cs_ndir;
 			}
 		for (cg = 0; cg < prefcg; cg++)
 			if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
 			    fs->fs_cs(fs, cg).cs_nifree >= avgifree &&
 			    fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
 				mincg = cg;
 				minndir = fs->fs_cs(fs, cg).cs_ndir;
 			}
 		return ((ino_t)(fs->fs_ipg * mincg));
 	}
 
 	/*
 	 * Count various limits which used for
 	 * optimal allocation of a directory inode.
 	 */
 	maxndir = min(avgndir + fs->fs_ipg / 16, fs->fs_ipg);
 	minifree = avgifree - avgifree / 4;
 	if (minifree < 1)
 		minifree = 1;
 	minbfree = avgbfree - avgbfree / 4;
 	if (minbfree < 1)
 		minbfree = 1;
 	cgsize = fs->fs_fsize * fs->fs_fpg;
 	dirsize = fs->fs_avgfilesize * fs->fs_avgfpdir;
 	curdirsize = avgndir ? (cgsize - avgbfree * fs->fs_bsize) / avgndir : 0;
 	if (dirsize < curdirsize)
 		dirsize = curdirsize;
 	if (dirsize <= 0)
 		maxcontigdirs = 0;		/* dirsize overflowed */
 	else
 		maxcontigdirs = min((avgbfree * fs->fs_bsize) / dirsize, 255);
 	if (fs->fs_avgfpdir > 0)
 		maxcontigdirs = min(maxcontigdirs,
 				    fs->fs_ipg / fs->fs_avgfpdir);
 	if (maxcontigdirs == 0)
 		maxcontigdirs = 1;
 
 	/*
 	 * Limit number of dirs in one cg and reserve space for 
 	 * regular files, but only if we have no deficit in
 	 * inodes or space.
 	 *
 	 * We are trying to find a suitable cylinder group nearby
 	 * our preferred cylinder group to place a new directory.
 	 * We scan from our preferred cylinder group forward looking
 	 * for a cylinder group that meets our criterion. If we get
 	 * to the final cylinder group and do not find anything,
 	 * we start scanning forwards from the beginning of the
 	 * filesystem. While it might seem sensible to start scanning
 	 * backwards or even to alternate looking forward and backward,
 	 * this approach fails badly when the filesystem is nearly full.
 	 * Specifically, we first search all the areas that have no space
 	 * and finally try the one preceding that. We repeat this on
 	 * every request and in the case of the final block end up
 	 * searching the entire filesystem. By jumping to the front
 	 * of the filesystem, our future forward searches always look
 	 * in new cylinder groups so finds every possible block after
 	 * one pass over the filesystem.
 	 */
 	prefcg = ino_to_cg(fs, pip->i_number);
 	for (cg = prefcg; cg < fs->fs_ncg; cg++)
 		if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
 		    fs->fs_cs(fs, cg).cs_nifree >= minifree &&
 		    fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
 			if (fs->fs_contigdirs[cg] < maxcontigdirs)
 				return ((ino_t)(fs->fs_ipg * cg));
 		}
 	for (cg = 0; cg < prefcg; cg++)
 		if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
 		    fs->fs_cs(fs, cg).cs_nifree >= minifree &&
 		    fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
 			if (fs->fs_contigdirs[cg] < maxcontigdirs)
 				return ((ino_t)(fs->fs_ipg * cg));
 		}
 	/*
 	 * This is a backstop when we have deficit in space.
 	 */
 	for (cg = prefcg; cg < fs->fs_ncg; cg++)
 		if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
 			return ((ino_t)(fs->fs_ipg * cg));
 	for (cg = 0; cg < prefcg; cg++)
 		if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
 			break;
 	return ((ino_t)(fs->fs_ipg * cg));
 }
 
 /*
  * Select the desired position for the next block in a file.  The file is
  * logically divided into sections. The first section is composed of the
  * direct blocks and the next fs_maxbpg blocks. Each additional section
  * contains fs_maxbpg blocks.
  *
  * If no blocks have been allocated in the first section, the policy is to
  * request a block in the same cylinder group as the inode that describes
  * the file. The first indirect is allocated immediately following the last
  * direct block and the data blocks for the first indirect immediately
  * follow it.
  *
  * If no blocks have been allocated in any other section, the indirect 
  * block(s) are allocated in the same cylinder group as its inode in an
  * area reserved immediately following the inode blocks. The policy for
  * the data blocks is to place them in a cylinder group with a greater than
  * average number of free blocks. An appropriate cylinder group is found
  * by using a rotor that sweeps the cylinder groups. When a new group of
  * blocks is needed, the sweep begins in the cylinder group following the
  * cylinder group from which the previous allocation was made. The sweep
  * continues until a cylinder group with greater than the average number
  * of free blocks is found. If the allocation is for the first block in an
  * indirect block or the previous block is a hole, then the information on
  * the previous allocation is unavailable; here a best guess is made based
  * on the logical block number being allocated.
  *
  * If a section is already partially allocated, the policy is to
  * allocate blocks contiguously within the section if possible.
  */
 ufs2_daddr_t
 ffs_blkpref_ufs1(ip, lbn, indx, bap)
 	struct inode *ip;
 	ufs_lbn_t lbn;
 	int indx;
 	ufs1_daddr_t *bap;
 {
 	struct fs *fs;
 	u_int cg, inocg;
 	u_int avgbfree, startcg;
 	ufs2_daddr_t pref;
 
 	KASSERT(indx <= 0 || bap != NULL, ("need non-NULL bap"));
 	mtx_assert(UFS_MTX(ITOUMP(ip)), MA_OWNED);
 	fs = ITOFS(ip);
 	/*
 	 * Allocation of indirect blocks is indicated by passing negative
 	 * values in indx: -1 for single indirect, -2 for double indirect,
 	 * -3 for triple indirect. As noted below, we attempt to allocate
 	 * the first indirect inline with the file data. For all later
 	 * indirect blocks, the data is often allocated in other cylinder
 	 * groups. However to speed random file access and to speed up
 	 * fsck, the filesystem reserves the first fs_metaspace blocks
 	 * (typically half of fs_minfree) of the data area of each cylinder
 	 * group to hold these later indirect blocks.
 	 */
 	inocg = ino_to_cg(fs, ip->i_number);
 	if (indx < 0) {
 		/*
 		 * Our preference for indirect blocks is the zone at the
 		 * beginning of the inode's cylinder group data area that
 		 * we try to reserve for indirect blocks.
 		 */
 		pref = cgmeta(fs, inocg);
 		/*
 		 * If we are allocating the first indirect block, try to
 		 * place it immediately following the last direct block.
 		 */
 		if (indx == -1 && lbn < UFS_NDADDR + NINDIR(fs) &&
 		    ip->i_din1->di_db[UFS_NDADDR - 1] != 0)
 			pref = ip->i_din1->di_db[UFS_NDADDR - 1] + fs->fs_frag;
 		return (pref);
 	}
 	/*
 	 * If we are allocating the first data block in the first indirect
 	 * block and the indirect has been allocated in the data block area,
 	 * try to place it immediately following the indirect block.
 	 */
 	if (lbn == UFS_NDADDR) {
 		pref = ip->i_din1->di_ib[0];
 		if (pref != 0 && pref >= cgdata(fs, inocg) &&
 		    pref < cgbase(fs, inocg + 1))
 			return (pref + fs->fs_frag);
 	}
 	/*
 	 * If we are at the beginning of a file, or we have already allocated
 	 * the maximum number of blocks per cylinder group, or we do not
 	 * have a block allocated immediately preceding us, then we need
 	 * to decide where to start allocating new blocks.
 	 */
 	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
 		/*
 		 * If we are allocating a directory data block, we want
 		 * to place it in the metadata area.
 		 */
 		if ((ip->i_mode & IFMT) == IFDIR)
 			return (cgmeta(fs, inocg));
 		/*
 		 * Until we fill all the direct and all the first indirect's
 		 * blocks, we try to allocate in the data area of the inode's
 		 * cylinder group.
 		 */
 		if (lbn < UFS_NDADDR + NINDIR(fs))
 			return (cgdata(fs, inocg));
 		/*
 		 * Find a cylinder with greater than average number of
 		 * unused data blocks.
 		 */
 		if (indx == 0 || bap[indx - 1] == 0)
 			startcg = inocg + lbn / fs->fs_maxbpg;
 		else
 			startcg = dtog(fs, bap[indx - 1]) + 1;
 		startcg %= fs->fs_ncg;
 		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
 		for (cg = startcg; cg < fs->fs_ncg; cg++)
 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
 				fs->fs_cgrotor = cg;
 				return (cgdata(fs, cg));
 			}
 		for (cg = 0; cg <= startcg; cg++)
 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
 				fs->fs_cgrotor = cg;
 				return (cgdata(fs, cg));
 			}
 		return (0);
 	}
 	/*
 	 * Otherwise, we just always try to lay things out contiguously.
 	 */
 	return (bap[indx - 1] + fs->fs_frag);
 }
 
 /*
  * Same as above, but for UFS2
  */
 ufs2_daddr_t
 ffs_blkpref_ufs2(ip, lbn, indx, bap)
 	struct inode *ip;
 	ufs_lbn_t lbn;
 	int indx;
 	ufs2_daddr_t *bap;
 {
 	struct fs *fs;
 	u_int cg, inocg;
 	u_int avgbfree, startcg;
 	ufs2_daddr_t pref;
 
 	KASSERT(indx <= 0 || bap != NULL, ("need non-NULL bap"));
 	mtx_assert(UFS_MTX(ITOUMP(ip)), MA_OWNED);
 	fs = ITOFS(ip);
 	/*
 	 * Allocation of indirect blocks is indicated by passing negative
 	 * values in indx: -1 for single indirect, -2 for double indirect,
 	 * -3 for triple indirect. As noted below, we attempt to allocate
 	 * the first indirect inline with the file data. For all later
 	 * indirect blocks, the data is often allocated in other cylinder
 	 * groups. However to speed random file access and to speed up
 	 * fsck, the filesystem reserves the first fs_metaspace blocks
 	 * (typically half of fs_minfree) of the data area of each cylinder
 	 * group to hold these later indirect blocks.
 	 */
 	inocg = ino_to_cg(fs, ip->i_number);
 	if (indx < 0) {
 		/*
 		 * Our preference for indirect blocks is the zone at the
 		 * beginning of the inode's cylinder group data area that
 		 * we try to reserve for indirect blocks.
 		 */
 		pref = cgmeta(fs, inocg);
 		/*
 		 * If we are allocating the first indirect block, try to
 		 * place it immediately following the last direct block.
 		 */
 		if (indx == -1 && lbn < UFS_NDADDR + NINDIR(fs) &&
 		    ip->i_din2->di_db[UFS_NDADDR - 1] != 0)
 			pref = ip->i_din2->di_db[UFS_NDADDR - 1] + fs->fs_frag;
 		return (pref);
 	}
 	/*
 	 * If we are allocating the first data block in the first indirect
 	 * block and the indirect has been allocated in the data block area,
 	 * try to place it immediately following the indirect block.
 	 */
 	if (lbn == UFS_NDADDR) {
 		pref = ip->i_din2->di_ib[0];
 		if (pref != 0 && pref >= cgdata(fs, inocg) &&
 		    pref < cgbase(fs, inocg + 1))
 			return (pref + fs->fs_frag);
 	}
 	/*
 	 * If we are at the beginning of a file, or we have already allocated
 	 * the maximum number of blocks per cylinder group, or we do not
 	 * have a block allocated immediately preceding us, then we need
 	 * to decide where to start allocating new blocks.
 	 */
 	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
 		/*
 		 * If we are allocating a directory data block, we want
 		 * to place it in the metadata area.
 		 */
 		if ((ip->i_mode & IFMT) == IFDIR)
 			return (cgmeta(fs, inocg));
 		/*
 		 * Until we fill all the direct and all the first indirect's
 		 * blocks, we try to allocate in the data area of the inode's
 		 * cylinder group.
 		 */
 		if (lbn < UFS_NDADDR + NINDIR(fs))
 			return (cgdata(fs, inocg));
 		/*
 		 * Find a cylinder with greater than average number of
 		 * unused data blocks.
 		 */
 		if (indx == 0 || bap[indx - 1] == 0)
 			startcg = inocg + lbn / fs->fs_maxbpg;
 		else
 			startcg = dtog(fs, bap[indx - 1]) + 1;
 		startcg %= fs->fs_ncg;
 		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
 		for (cg = startcg; cg < fs->fs_ncg; cg++)
 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
 				fs->fs_cgrotor = cg;
 				return (cgdata(fs, cg));
 			}
 		for (cg = 0; cg <= startcg; cg++)
 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
 				fs->fs_cgrotor = cg;
 				return (cgdata(fs, cg));
 			}
 		return (0);
 	}
 	/*
 	 * Otherwise, we just always try to lay things out contiguously.
 	 */
 	return (bap[indx - 1] + fs->fs_frag);
 }
 
 /*
  * Implement the cylinder overflow algorithm.
  *
  * The policy implemented by this algorithm is:
  *   1) allocate the block in its requested cylinder group.
  *   2) quadradically rehash on the cylinder group number.
  *   3) brute force search for a free block.
  *
  * Must be called with the UFS lock held.  Will release the lock on success
  * and return with it held on failure.
  */
 /*VARARGS5*/
 static ufs2_daddr_t
 ffs_hashalloc(ip, cg, pref, size, rsize, allocator)
 	struct inode *ip;
 	u_int cg;
 	ufs2_daddr_t pref;
 	int size;	/* Search size for data blocks, mode for inodes */
 	int rsize;	/* Real allocated size. */
 	allocfcn_t *allocator;
 {
 	struct fs *fs;
 	ufs2_daddr_t result;
 	u_int i, icg = cg;
 
 	mtx_assert(UFS_MTX(ITOUMP(ip)), MA_OWNED);
 #ifdef INVARIANTS
 	if (ITOV(ip)->v_mount->mnt_kern_flag & MNTK_SUSPENDED)
 		panic("ffs_hashalloc: allocation on suspended filesystem");
 #endif
 	fs = ITOFS(ip);
 	/*
 	 * 1: preferred cylinder group
 	 */
 	result = (*allocator)(ip, cg, pref, size, rsize);
 	if (result)
 		return (result);
 	/*
 	 * 2: quadratic rehash
 	 */
 	for (i = 1; i < fs->fs_ncg; i *= 2) {
 		cg += i;
 		if (cg >= fs->fs_ncg)
 			cg -= fs->fs_ncg;
 		result = (*allocator)(ip, cg, 0, size, rsize);
 		if (result)
 			return (result);
 	}
 	/*
 	 * 3: brute force search
 	 * Note that we start at i == 2, since 0 was checked initially,
 	 * and 1 is always checked in the quadratic rehash.
 	 */
 	cg = (icg + 2) % fs->fs_ncg;
 	for (i = 2; i < fs->fs_ncg; i++) {
 		result = (*allocator)(ip, cg, 0, size, rsize);
 		if (result)
 			return (result);
 		cg++;
 		if (cg == fs->fs_ncg)
 			cg = 0;
 	}
 	return (0);
 }
 
 /*
  * Determine whether a fragment can be extended.
  *
  * Check to see if the necessary fragments are available, and
  * if they are, allocate them.
  */
 static ufs2_daddr_t
 ffs_fragextend(ip, cg, bprev, osize, nsize)
 	struct inode *ip;
 	u_int cg;
 	ufs2_daddr_t bprev;
 	int osize, nsize;
 {
 	struct fs *fs;
 	struct cg *cgp;
 	struct buf *bp;
 	struct ufsmount *ump;
 	int nffree;
 	long bno;
 	int frags, bbase;
 	int i, error;
 	u_int8_t *blksfree;
 
 	ump = ITOUMP(ip);
 	fs = ump->um_fs;
 	if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
 		return (0);
 	frags = numfrags(fs, nsize);
 	bbase = fragnum(fs, bprev);
 	if (bbase > fragnum(fs, (bprev + frags - 1))) {
 		/* cannot extend across a block boundary */
 		return (0);
 	}
 	UFS_UNLOCK(ump);
 	if ((error = ffs_getcg(fs, ump->um_devvp, cg, &bp, &cgp)) != 0)
 		goto fail;
 	bno = dtogd(fs, bprev);
 	blksfree = cg_blksfree(cgp);
 	for (i = numfrags(fs, osize); i < frags; i++)
 		if (isclr(blksfree, bno + i))
 			goto fail;
 	/*
 	 * the current fragment can be extended
 	 * deduct the count on fragment being extended into
 	 * increase the count on the remaining fragment (if any)
 	 * allocate the extended piece
 	 */
 	for (i = frags; i < fs->fs_frag - bbase; i++)
 		if (isclr(blksfree, bno + i))
 			break;
 	cgp->cg_frsum[i - numfrags(fs, osize)]--;
 	if (i != frags)
 		cgp->cg_frsum[i - frags]++;
 	for (i = numfrags(fs, osize), nffree = 0; i < frags; i++) {
 		clrbit(blksfree, bno + i);
 		cgp->cg_cs.cs_nffree--;
 		nffree++;
 	}
 	UFS_LOCK(ump);
 	fs->fs_cstotal.cs_nffree -= nffree;
 	fs->fs_cs(fs, cg).cs_nffree -= nffree;
 	fs->fs_fmod = 1;
 	ACTIVECLEAR(fs, cg);
 	UFS_UNLOCK(ump);
 	if (DOINGSOFTDEP(ITOV(ip)))
 		softdep_setup_blkmapdep(bp, UFSTOVFS(ump), bprev,
 		    frags, numfrags(fs, osize));
 	bdwrite(bp);
 	return (bprev);
 
 fail:
 	brelse(bp);
 	UFS_LOCK(ump);
 	return (0);
 
 }
 
 /*
  * Determine whether a block can be allocated.
  *
  * Check to see if a block of the appropriate size is available,
  * and if it is, allocate it.
  */
 static ufs2_daddr_t
 ffs_alloccg(ip, cg, bpref, size, rsize)
 	struct inode *ip;
 	u_int cg;
 	ufs2_daddr_t bpref;
 	int size;
 	int rsize;
 {
 	struct fs *fs;
 	struct cg *cgp;
 	struct buf *bp;
 	struct ufsmount *ump;
 	ufs1_daddr_t bno;
 	ufs2_daddr_t blkno;
 	int i, allocsiz, error, frags;
 	u_int8_t *blksfree;
 
 	ump = ITOUMP(ip);
 	fs = ump->um_fs;
 	if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
 		return (0);
 	UFS_UNLOCK(ump);
 	if ((error = ffs_getcg(fs, ump->um_devvp, cg, &bp, &cgp)) != 0 ||
 	   (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize))
 		goto fail;
 	if (size == fs->fs_bsize) {
 		UFS_LOCK(ump);
 		blkno = ffs_alloccgblk(ip, bp, bpref, rsize);
 		ACTIVECLEAR(fs, cg);
 		UFS_UNLOCK(ump);
 		bdwrite(bp);
 		return (blkno);
 	}
 	/*
 	 * check to see if any fragments are already available
 	 * allocsiz is the size which will be allocated, hacking
 	 * it down to a smaller size if necessary
 	 */
 	blksfree = cg_blksfree(cgp);
 	frags = numfrags(fs, size);
 	for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
 		if (cgp->cg_frsum[allocsiz] != 0)
 			break;
 	if (allocsiz == fs->fs_frag) {
 		/*
 		 * no fragments were available, so a block will be
 		 * allocated, and hacked up
 		 */
 		if (cgp->cg_cs.cs_nbfree == 0)
 			goto fail;
 		UFS_LOCK(ump);
 		blkno = ffs_alloccgblk(ip, bp, bpref, rsize);
 		ACTIVECLEAR(fs, cg);
 		UFS_UNLOCK(ump);
 		bdwrite(bp);
 		return (blkno);
 	}
 	KASSERT(size == rsize,
 	    ("ffs_alloccg: size(%d) != rsize(%d)", size, rsize));
 	bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
 	if (bno < 0)
 		goto fail;
 	for (i = 0; i < frags; i++)
 		clrbit(blksfree, bno + i);
 	cgp->cg_cs.cs_nffree -= frags;
 	cgp->cg_frsum[allocsiz]--;
 	if (frags != allocsiz)
 		cgp->cg_frsum[allocsiz - frags]++;
 	UFS_LOCK(ump);
 	fs->fs_cstotal.cs_nffree -= frags;
 	fs->fs_cs(fs, cg).cs_nffree -= frags;
 	fs->fs_fmod = 1;
 	blkno = cgbase(fs, cg) + bno;
 	ACTIVECLEAR(fs, cg);
 	UFS_UNLOCK(ump);
 	if (DOINGSOFTDEP(ITOV(ip)))
 		softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno, frags, 0);
 	bdwrite(bp);
 	return (blkno);
 
 fail:
 	brelse(bp);
 	UFS_LOCK(ump);
 	return (0);
 }
 
 /*
  * Allocate a block in a cylinder group.
  *
  * This algorithm implements the following policy:
  *   1) allocate the requested block.
  *   2) allocate a rotationally optimal block in the same cylinder.
  *   3) allocate the next available block on the block rotor for the
  *      specified cylinder group.
  * Note that this routine only allocates fs_bsize blocks; these
  * blocks may be fragmented by the routine that allocates them.
  */
 static ufs2_daddr_t
 ffs_alloccgblk(ip, bp, bpref, size)
 	struct inode *ip;
 	struct buf *bp;
 	ufs2_daddr_t bpref;
 	int size;
 {
 	struct fs *fs;
 	struct cg *cgp;
 	struct ufsmount *ump;
 	ufs1_daddr_t bno;
 	ufs2_daddr_t blkno;
 	u_int8_t *blksfree;
 	int i, cgbpref;
 
 	ump = ITOUMP(ip);
 	fs = ump->um_fs;
 	mtx_assert(UFS_MTX(ump), MA_OWNED);
 	cgp = (struct cg *)bp->b_data;
 	blksfree = cg_blksfree(cgp);
 	if (bpref == 0) {
 		bpref = cgbase(fs, cgp->cg_cgx) + cgp->cg_rotor + fs->fs_frag;
 	} else if ((cgbpref = dtog(fs, bpref)) != cgp->cg_cgx) {
 		/* map bpref to correct zone in this cg */
 		if (bpref < cgdata(fs, cgbpref))
 			bpref = cgmeta(fs, cgp->cg_cgx);
 		else
 			bpref = cgdata(fs, cgp->cg_cgx);
 	}
 	/*
 	 * if the requested block is available, use it
 	 */
 	bno = dtogd(fs, blknum(fs, bpref));
 	if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno)))
 		goto gotit;
 	/*
 	 * Take the next available block in this cylinder group.
 	 */
 	bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
 	if (bno < 0)
 		return (0);
 	/* Update cg_rotor only if allocated from the data zone */
 	if (bno >= dtogd(fs, cgdata(fs, cgp->cg_cgx)))
 		cgp->cg_rotor = bno;
 gotit:
 	blkno = fragstoblks(fs, bno);
 	ffs_clrblock(fs, blksfree, (long)blkno);
 	ffs_clusteracct(fs, cgp, blkno, -1);
 	cgp->cg_cs.cs_nbfree--;
 	fs->fs_cstotal.cs_nbfree--;
 	fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
 	fs->fs_fmod = 1;
 	blkno = cgbase(fs, cgp->cg_cgx) + bno;
 	/*
 	 * If the caller didn't want the whole block free the frags here.
 	 */
 	size = numfrags(fs, size);
 	if (size != fs->fs_frag) {
 		bno = dtogd(fs, blkno);
 		for (i = size; i < fs->fs_frag; i++)
 			setbit(blksfree, bno + i);
 		i = fs->fs_frag - size;
 		cgp->cg_cs.cs_nffree += i;
 		fs->fs_cstotal.cs_nffree += i;
 		fs->fs_cs(fs, cgp->cg_cgx).cs_nffree += i;
 		fs->fs_fmod = 1;
 		cgp->cg_frsum[i]++;
 	}
 	/* XXX Fixme. */
 	UFS_UNLOCK(ump);
 	if (DOINGSOFTDEP(ITOV(ip)))
 		softdep_setup_blkmapdep(bp, UFSTOVFS(ump), blkno, size, 0);
 	UFS_LOCK(ump);
 	return (blkno);
 }
 
 /*
  * Determine whether a cluster can be allocated.
  *
  * We do not currently check for optimal rotational layout if there
  * are multiple choices in the same cylinder group. Instead we just
  * take the first one that we find following bpref.
  */
 static ufs2_daddr_t
 ffs_clusteralloc(ip, cg, bpref, len)
 	struct inode *ip;
 	u_int cg;
 	ufs2_daddr_t bpref;
 	int len;
 {
 	struct fs *fs;
 	struct cg *cgp;
 	struct buf *bp;
 	struct ufsmount *ump;
 	int i, run, bit, map, got, error;
 	ufs2_daddr_t bno;
 	u_char *mapp;
 	int32_t *lp;
 	u_int8_t *blksfree;
 
 	ump = ITOUMP(ip);
 	fs = ump->um_fs;
 	if (fs->fs_maxcluster[cg] < len)
 		return (0);
 	UFS_UNLOCK(ump);
 	if ((error = ffs_getcg(fs, ump->um_devvp, cg, &bp, &cgp)) != 0) {
 		UFS_LOCK(ump);
 		return (0);
 	}
 	/*
 	 * Check to see if a cluster of the needed size (or bigger) is
 	 * available in this cylinder group.
 	 */
 	lp = &cg_clustersum(cgp)[len];
 	for (i = len; i <= fs->fs_contigsumsize; i++)
 		if (*lp++ > 0)
 			break;
 	if (i > fs->fs_contigsumsize) {
 		/*
 		 * This is the first time looking for a cluster in this
 		 * cylinder group. Update the cluster summary information
 		 * to reflect the true maximum sized cluster so that
 		 * future cluster allocation requests can avoid reading
 		 * the cylinder group map only to find no clusters.
 		 */
 		lp = &cg_clustersum(cgp)[len - 1];
 		for (i = len - 1; i > 0; i--)
 			if (*lp-- > 0)
 				break;
 		UFS_LOCK(ump);
 		fs->fs_maxcluster[cg] = i;
 		brelse(bp);
 		return (0);
 	}
 	/*
 	 * Search the cluster map to find a big enough cluster.
 	 * We take the first one that we find, even if it is larger
 	 * than we need as we prefer to get one close to the previous
 	 * block allocation. We do not search before the current
 	 * preference point as we do not want to allocate a block
 	 * that is allocated before the previous one (as we will
 	 * then have to wait for another pass of the elevator
 	 * algorithm before it will be read). We prefer to fail and
 	 * be recalled to try an allocation in the next cylinder group.
 	 */
 	if (dtog(fs, bpref) != cg)
 		bpref = cgdata(fs, cg);
 	else
 		bpref = blknum(fs, bpref);
 	bpref = fragstoblks(fs, dtogd(fs, bpref));
 	mapp = &cg_clustersfree(cgp)[bpref / NBBY];
 	map = *mapp++;
 	bit = 1 << (bpref % NBBY);
 	for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) {
 		if ((map & bit) == 0) {
 			run = 0;
 		} else {
 			run++;
 			if (run == len)
 				break;
 		}
 		if ((got & (NBBY - 1)) != (NBBY - 1)) {
 			bit <<= 1;
 		} else {
 			map = *mapp++;
 			bit = 1;
 		}
 	}
 	if (got >= cgp->cg_nclusterblks) {
 		UFS_LOCK(ump);
 		brelse(bp);
 		return (0);
 	}
 	/*
 	 * Allocate the cluster that we have found.
 	 */
 	blksfree = cg_blksfree(cgp);
 	for (i = 1; i <= len; i++)
 		if (!ffs_isblock(fs, blksfree, got - run + i))
 			panic("ffs_clusteralloc: map mismatch");
 	bno = cgbase(fs, cg) + blkstofrags(fs, got - run + 1);
 	if (dtog(fs, bno) != cg)
 		panic("ffs_clusteralloc: allocated out of group");
 	len = blkstofrags(fs, len);
 	UFS_LOCK(ump);
 	for (i = 0; i < len; i += fs->fs_frag)
 		if (ffs_alloccgblk(ip, bp, bno + i, fs->fs_bsize) != bno + i)
 			panic("ffs_clusteralloc: lost block");
 	ACTIVECLEAR(fs, cg);
 	UFS_UNLOCK(ump);
 	bdwrite(bp);
 	return (bno);
 }
 
 static inline struct buf *
 getinobuf(struct inode *ip, u_int cg, u_int32_t cginoblk, int gbflags)
 {
 	struct fs *fs;
 
 	fs = ITOFS(ip);
 	return (getblk(ITODEVVP(ip), fsbtodb(fs, ino_to_fsba(fs,
 	    cg * fs->fs_ipg + cginoblk)), (int)fs->fs_bsize, 0, 0,
 	    gbflags));
 }
 
 /*
  * Synchronous inode initialization is needed only when barrier writes do not
  * work as advertised, and will impose a heavy cost on file creation in a newly
  * created filesystem.
  */
 static int doasyncinodeinit = 1;
 SYSCTL_INT(_vfs_ffs, OID_AUTO, doasyncinodeinit, CTLFLAG_RWTUN,
     &doasyncinodeinit, 0,
     "Perform inode block initialization using asynchronous writes");
 
 /*
  * Determine whether an inode can be allocated.
  *
  * Check to see if an inode is available, and if it is,
  * allocate it using the following policy:
  *   1) allocate the requested inode.
  *   2) allocate the next available inode after the requested
  *      inode in the specified cylinder group.
  */
 static ufs2_daddr_t
 ffs_nodealloccg(ip, cg, ipref, mode, unused)
 	struct inode *ip;
 	u_int cg;
 	ufs2_daddr_t ipref;
 	int mode;
 	int unused;
 {
 	struct fs *fs;
 	struct cg *cgp;
 	struct buf *bp, *ibp;
 	struct ufsmount *ump;
 	u_int8_t *inosused, *loc;
 	struct ufs2_dinode *dp2;
 	int error, start, len, i;
 	u_int32_t old_initediblk;
 
 	ump = ITOUMP(ip);
 	fs = ump->um_fs;
 check_nifree:
 	if (fs->fs_cs(fs, cg).cs_nifree == 0)
 		return (0);
 	UFS_UNLOCK(ump);
 	if ((error = ffs_getcg(fs, ump->um_devvp, cg, &bp, &cgp)) != 0) {
 		UFS_LOCK(ump);
 		return (0);
 	}
 restart:
 	if (cgp->cg_cs.cs_nifree == 0) {
 		brelse(bp);
 		UFS_LOCK(ump);
 		return (0);
 	}
 	inosused = cg_inosused(cgp);
 	if (ipref) {
 		ipref %= fs->fs_ipg;
 		if (isclr(inosused, ipref))
 			goto gotit;
 	}
 	start = cgp->cg_irotor / NBBY;
 	len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
 	loc = memcchr(&inosused[start], 0xff, len);
 	if (loc == NULL) {
 		len = start + 1;
 		start = 0;
 		loc = memcchr(&inosused[start], 0xff, len);
 		if (loc == NULL) {
 			printf("cg = %d, irotor = %ld, fs = %s\n",
 			    cg, (long)cgp->cg_irotor, fs->fs_fsmnt);
 			panic("ffs_nodealloccg: map corrupted");
 			/* NOTREACHED */
 		}
 	}
 	ipref = (loc - inosused) * NBBY + ffs(~*loc) - 1;
 gotit:
 	/*
 	 * Check to see if we need to initialize more inodes.
 	 */
 	if (fs->fs_magic == FS_UFS2_MAGIC &&
 	    ipref + INOPB(fs) > cgp->cg_initediblk &&
 	    cgp->cg_initediblk < cgp->cg_niblk) {
 		old_initediblk = cgp->cg_initediblk;
 
 		/*
 		 * Free the cylinder group lock before writing the
 		 * initialized inode block.  Entering the
 		 * babarrierwrite() with the cylinder group lock
 		 * causes lock order violation between the lock and
 		 * snaplk.
 		 *
 		 * Another thread can decide to initialize the same
 		 * inode block, but whichever thread first gets the
 		 * cylinder group lock after writing the newly
 		 * allocated inode block will update it and the other
 		 * will realize that it has lost and leave the
 		 * cylinder group unchanged.
 		 */
 		ibp = getinobuf(ip, cg, old_initediblk, GB_LOCK_NOWAIT);
 		brelse(bp);
 		if (ibp == NULL) {
 			/*
 			 * The inode block buffer is already owned by
 			 * another thread, which must initialize it.
 			 * Wait on the buffer to allow another thread
 			 * to finish the updates, with dropped cg
 			 * buffer lock, then retry.
 			 */
 			ibp = getinobuf(ip, cg, old_initediblk, 0);
 			brelse(ibp);
 			UFS_LOCK(ump);
 			goto check_nifree;
 		}
 		bzero(ibp->b_data, (int)fs->fs_bsize);
 		dp2 = (struct ufs2_dinode *)(ibp->b_data);
 		for (i = 0; i < INOPB(fs); i++) {
 			while (dp2->di_gen == 0)
 				dp2->di_gen = arc4random();
 			dp2++;
 		}
 
 		/*
 		 * Rather than adding a soft updates dependency to ensure
 		 * that the new inode block is written before it is claimed
 		 * by the cylinder group map, we just do a barrier write
 		 * here. The barrier write will ensure that the inode block
 		 * gets written before the updated cylinder group map can be
 		 * written. The barrier write should only slow down bulk
 		 * loading of newly created filesystems.
 		 */
 		if (doasyncinodeinit)
 			babarrierwrite(ibp);
 		else
 			bwrite(ibp);
 
 		/*
 		 * After the inode block is written, try to update the
 		 * cg initediblk pointer.  If another thread beat us
 		 * to it, then leave it unchanged as the other thread
 		 * has already set it correctly.
 		 */
 		error = ffs_getcg(fs, ump->um_devvp, cg, &bp, &cgp);
 		UFS_LOCK(ump);
 		ACTIVECLEAR(fs, cg);
 		UFS_UNLOCK(ump);
 		if (error != 0)
 			return (error);
 		if (cgp->cg_initediblk == old_initediblk)
 			cgp->cg_initediblk += INOPB(fs);
 		goto restart;
 	}
 	cgp->cg_irotor = ipref;
 	UFS_LOCK(ump);
 	ACTIVECLEAR(fs, cg);
 	setbit(inosused, ipref);
 	cgp->cg_cs.cs_nifree--;
 	fs->fs_cstotal.cs_nifree--;
 	fs->fs_cs(fs, cg).cs_nifree--;
 	fs->fs_fmod = 1;
 	if ((mode & IFMT) == IFDIR) {
 		cgp->cg_cs.cs_ndir++;
 		fs->fs_cstotal.cs_ndir++;
 		fs->fs_cs(fs, cg).cs_ndir++;
 	}
 	UFS_UNLOCK(ump);
 	if (DOINGSOFTDEP(ITOV(ip)))
 		softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref, mode);
 	bdwrite(bp);
 	return ((ino_t)(cg * fs->fs_ipg + ipref));
 }
 
 /*
  * Free a block or fragment.
  *
  * The specified block or fragment is placed back in the
  * free map. If a fragment is deallocated, a possible
  * block reassembly is checked.
  */
 static void
 ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd)
 	struct ufsmount *ump;
 	struct fs *fs;
 	struct vnode *devvp;
 	ufs2_daddr_t bno;
 	long size;
 	ino_t inum;
 	struct workhead *dephd;
 {
 	struct mount *mp;
 	struct cg *cgp;
 	struct buf *bp;
 	ufs1_daddr_t fragno, cgbno;
 	int i, blk, frags, bbase, error;
 	u_int cg;
 	u_int8_t *blksfree;
 	struct cdev *dev;
 
 	cg = dtog(fs, bno);
 	if (devvp->v_type == VREG) {
 		/* devvp is a snapshot */
 		MPASS(devvp->v_mount->mnt_data == ump);
 		dev = ump->um_devvp->v_rdev;
 	} else if (devvp->v_type == VCHR) {
 		/* devvp is a normal disk device */
 		dev = devvp->v_rdev;
 		ASSERT_VOP_LOCKED(devvp, "ffs_blkfree_cg");
 	} else
 		return;
 #ifdef INVARIANTS
 	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
 	    fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
 		printf("dev=%s, bno = %jd, bsize = %ld, size = %ld, fs = %s\n",
 		    devtoname(dev), (intmax_t)bno, (long)fs->fs_bsize,
 		    size, fs->fs_fsmnt);
 		panic("ffs_blkfree_cg: bad size");
 	}
 #endif
 	if ((u_int)bno >= fs->fs_size) {
 		printf("bad block %jd, ino %lu\n", (intmax_t)bno,
 		    (u_long)inum);
 		ffs_fserr(fs, inum, "bad block");
 		return;
 	}
 	if ((error = ffs_getcg(fs, devvp, cg, &bp, &cgp)) != 0)
 		return;
 	cgbno = dtogd(fs, bno);
 	blksfree = cg_blksfree(cgp);
 	UFS_LOCK(ump);
 	if (size == fs->fs_bsize) {
 		fragno = fragstoblks(fs, cgbno);
 		if (!ffs_isfreeblock(fs, blksfree, fragno)) {
 			if (devvp->v_type == VREG) {
 				UFS_UNLOCK(ump);
 				/* devvp is a snapshot */
 				brelse(bp);
 				return;
 			}
 			printf("dev = %s, block = %jd, fs = %s\n",
 			    devtoname(dev), (intmax_t)bno, fs->fs_fsmnt);
 			panic("ffs_blkfree_cg: freeing free block");
 		}
 		ffs_setblock(fs, blksfree, fragno);
 		ffs_clusteracct(fs, cgp, fragno, 1);
 		cgp->cg_cs.cs_nbfree++;
 		fs->fs_cstotal.cs_nbfree++;
 		fs->fs_cs(fs, cg).cs_nbfree++;
 	} else {
 		bbase = cgbno - fragnum(fs, cgbno);
 		/*
 		 * decrement the counts associated with the old frags
 		 */
 		blk = blkmap(fs, blksfree, bbase);
 		ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
 		/*
 		 * deallocate the fragment
 		 */
 		frags = numfrags(fs, size);
 		for (i = 0; i < frags; i++) {
 			if (isset(blksfree, cgbno + i)) {
 				printf("dev = %s, block = %jd, fs = %s\n",
 				    devtoname(dev), (intmax_t)(bno + i),
 				    fs->fs_fsmnt);
 				panic("ffs_blkfree_cg: freeing free frag");
 			}
 			setbit(blksfree, cgbno + i);
 		}
 		cgp->cg_cs.cs_nffree += i;
 		fs->fs_cstotal.cs_nffree += i;
 		fs->fs_cs(fs, cg).cs_nffree += i;
 		/*
 		 * add back in counts associated with the new frags
 		 */
 		blk = blkmap(fs, blksfree, bbase);
 		ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
 		/*
 		 * if a complete block has been reassembled, account for it
 		 */
 		fragno = fragstoblks(fs, bbase);
 		if (ffs_isblock(fs, blksfree, fragno)) {
 			cgp->cg_cs.cs_nffree -= fs->fs_frag;
 			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
 			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
 			ffs_clusteracct(fs, cgp, fragno, 1);
 			cgp->cg_cs.cs_nbfree++;
 			fs->fs_cstotal.cs_nbfree++;
 			fs->fs_cs(fs, cg).cs_nbfree++;
 		}
 	}
 	fs->fs_fmod = 1;
 	ACTIVECLEAR(fs, cg);
 	UFS_UNLOCK(ump);
 	mp = UFSTOVFS(ump);
 	if (MOUNTEDSOFTDEP(mp) && devvp->v_type == VCHR)
 		softdep_setup_blkfree(UFSTOVFS(ump), bp, bno,
 		    numfrags(fs, size), dephd);
 	bdwrite(bp);
 }
 
 /*
  * Structures and routines associated with trim management.
  *
  * The following requests are passed to trim_lookup to indicate
  * the actions that should be taken.
  */
 #define	NEW	1	/* if found, error else allocate and hash it */
 #define	OLD	2	/* if not found, error, else return it */
 #define	REPLACE	3	/* if not found, error else unhash and reallocate it */
 #define	DONE	4	/* if not found, error else unhash and return it */
 #define	SINGLE	5	/* don't look up, just allocate it and don't hash it */
 
 MALLOC_DEFINE(M_TRIM, "ufs_trim", "UFS trim structures");
 
 #define	TRIMLIST_HASH(ump, key) \
 	(&(ump)->um_trimhash[(key) & (ump)->um_trimlisthashsize])
 
 /*
  * These structures describe each of the block free requests aggregated
  * together to make up a trim request.
  */
 struct trim_blkreq {
 	TAILQ_ENTRY(trim_blkreq) blkreqlist;
 	ufs2_daddr_t bno;
 	long size;
 	struct workhead *pdephd;
 	struct workhead dephd;
 };
 
 /*
  * Description of a trim request.
  */
 struct ffs_blkfree_trim_params {
 	TAILQ_HEAD(, trim_blkreq) blklist;
 	LIST_ENTRY(ffs_blkfree_trim_params) hashlist;
 	struct task task;
 	struct ufsmount *ump;
 	struct vnode *devvp;
 	ino_t inum;
 	ufs2_daddr_t bno;
 	long size;
 	long key;
 };
 
 static void	ffs_blkfree_trim_completed(struct buf *);
 static void	ffs_blkfree_trim_task(void *ctx, int pending __unused);
 static struct	ffs_blkfree_trim_params *trim_lookup(struct ufsmount *,
 		    struct vnode *, ufs2_daddr_t, long, ino_t, u_long, int);
 static void	ffs_blkfree_sendtrim(struct ffs_blkfree_trim_params *);
 
 /*
  * Called on trim completion to start a task to free the associated block(s).
  */
 static void
 ffs_blkfree_trim_completed(bp)
 	struct buf *bp;
 {
 	struct ffs_blkfree_trim_params *tp;
 
 	tp = bp->b_fsprivate1;
 	free(bp, M_TRIM);
 	TASK_INIT(&tp->task, 0, ffs_blkfree_trim_task, tp);
 	taskqueue_enqueue(tp->ump->um_trim_tq, &tp->task);
 }
 
 /*
  * Trim completion task that free associated block(s).
  */
 static void
 ffs_blkfree_trim_task(ctx, pending)
 	void *ctx;
 	int pending;
 {
 	struct ffs_blkfree_trim_params *tp;
 	struct trim_blkreq *blkelm;
 	struct ufsmount *ump;
 
 	tp = ctx;
 	ump = tp->ump;
 	while ((blkelm = TAILQ_FIRST(&tp->blklist)) != NULL) {
 		ffs_blkfree_cg(ump, ump->um_fs, tp->devvp, blkelm->bno,
 		    blkelm->size, tp->inum, blkelm->pdephd);
 		TAILQ_REMOVE(&tp->blklist, blkelm, blkreqlist);
 		free(blkelm, M_TRIM);
 	}
 	vn_finished_secondary_write(UFSTOVFS(ump));
 	UFS_LOCK(ump);
 	ump->um_trim_inflight -= 1;
 	ump->um_trim_inflight_blks -= numfrags(ump->um_fs, tp->size);
 	UFS_UNLOCK(ump);
 	free(tp, M_TRIM);
 }
 
 /*
  * Lookup a trim request by inode number.
  * Allocate if requested (NEW, REPLACE, SINGLE).
  */
 static struct ffs_blkfree_trim_params *
 trim_lookup(ump, devvp, bno, size, inum, key, alloctype)
 	struct ufsmount *ump;
 	struct vnode *devvp;
 	ufs2_daddr_t bno;
 	long size;
 	ino_t inum;
 	u_long key;
 	int alloctype;
 {
 	struct trimlist_hashhead *tphashhead;
 	struct ffs_blkfree_trim_params *tp, *ntp;
 
 	ntp = malloc(sizeof(struct ffs_blkfree_trim_params), M_TRIM, M_WAITOK);
 	if (alloctype != SINGLE) {
 		KASSERT(key >= FIRST_VALID_KEY, ("trim_lookup: invalid key"));
 		UFS_LOCK(ump);
 		tphashhead = TRIMLIST_HASH(ump, key);
 		LIST_FOREACH(tp, tphashhead, hashlist)
 			if (key == tp->key)
 				break;
 	}
 	switch (alloctype) {
 	case NEW:
 		KASSERT(tp == NULL, ("trim_lookup: found trim"));
 		break;
 	case OLD:
 		KASSERT(tp != NULL,
 		    ("trim_lookup: missing call to ffs_blkrelease_start()"));
 		UFS_UNLOCK(ump);
 		free(ntp, M_TRIM);
 		return (tp);
 	case REPLACE:
 		KASSERT(tp != NULL, ("trim_lookup: missing REPLACE trim"));
 		LIST_REMOVE(tp, hashlist);
 		/* tp will be freed by caller */
 		break;
 	case DONE:
 		KASSERT(tp != NULL, ("trim_lookup: missing DONE trim"));
 		LIST_REMOVE(tp, hashlist);
 		UFS_UNLOCK(ump);
 		free(ntp, M_TRIM);
 		return (tp);
 	}
 	TAILQ_INIT(&ntp->blklist);
 	ntp->ump = ump;
 	ntp->devvp = devvp;
 	ntp->bno = bno;
 	ntp->size = size;
 	ntp->inum = inum;
 	ntp->key = key;
 	if (alloctype != SINGLE) {
 		LIST_INSERT_HEAD(tphashhead, ntp, hashlist);
 		UFS_UNLOCK(ump);
 	}
 	return (ntp);
 }
 
 /*
  * Dispatch a trim request.
  */
 static void
 ffs_blkfree_sendtrim(tp)
 	struct ffs_blkfree_trim_params *tp;
 {
 	struct ufsmount *ump;
 	struct mount *mp;
 	struct buf *bp;
 
 	/*
 	 * Postpone the set of the free bit in the cg bitmap until the
 	 * BIO_DELETE is completed.  Otherwise, due to disk queue
 	 * reordering, TRIM might be issued after we reuse the block
 	 * and write some new data into it.
 	 */
 	ump = tp->ump;
 	bp = malloc(sizeof(*bp), M_TRIM, M_WAITOK | M_ZERO);
 	bp->b_iocmd = BIO_DELETE;
 	bp->b_iooffset = dbtob(fsbtodb(ump->um_fs, tp->bno));
 	bp->b_iodone = ffs_blkfree_trim_completed;
 	bp->b_bcount = tp->size;
 	bp->b_fsprivate1 = tp;
 	UFS_LOCK(ump);
 	ump->um_trim_total += 1;
 	ump->um_trim_inflight += 1;
 	ump->um_trim_inflight_blks += numfrags(ump->um_fs, tp->size);
 	ump->um_trim_total_blks += numfrags(ump->um_fs, tp->size);
 	UFS_UNLOCK(ump);
 
 	mp = UFSTOVFS(ump);
 	vn_start_secondary_write(NULL, &mp, 0);
 	g_vfs_strategy(ump->um_bo, bp);
 }
 
 /*
  * Allocate a new key to use to identify a range of blocks.
  */
 u_long
 ffs_blkrelease_start(ump, devvp, inum)
 	struct ufsmount *ump;
 	struct vnode *devvp;
 	ino_t inum;
 {
 	static u_long masterkey;
 	u_long key;
 
 	if (((ump->um_flags & UM_CANDELETE) == 0) || dotrimcons == 0)
 		return (SINGLETON_KEY);
 	do {
 		key = atomic_fetchadd_long(&masterkey, 1);
 	} while (key < FIRST_VALID_KEY);
 	(void) trim_lookup(ump, devvp, 0, 0, inum, key, NEW);
 	return (key);
 }
 
 /*
  * Deallocate a key that has been used to identify a range of blocks.
  */
 void
 ffs_blkrelease_finish(ump, key)
 	struct ufsmount *ump;
 	u_long key;
 {
 	struct ffs_blkfree_trim_params *tp;
 
 	if (((ump->um_flags & UM_CANDELETE) == 0) || dotrimcons == 0)
 		return;
 	/*
 	 * If the vfs.ffs.dotrimcons sysctl option is enabled while
 	 * a file deletion is active, specifically after a call
 	 * to ffs_blkrelease_start() but before the call to
 	 * ffs_blkrelease_finish(), ffs_blkrelease_start() will
 	 * have handed out SINGLETON_KEY rather than starting a
 	 * collection sequence. Thus if we get a SINGLETON_KEY
 	 * passed to ffs_blkrelease_finish(), we just return rather
 	 * than trying to finish the nonexistent sequence.
 	 */
 	if (key == SINGLETON_KEY) {
 #ifdef INVARIANTS
 		printf("%s: vfs.ffs.dotrimcons enabled on active filesystem\n",
 		    ump->um_mountp->mnt_stat.f_mntonname);
 #endif
 		return;
 	}
 	/*
 	 * We are done with sending blocks using this key. Look up the key
 	 * using the DONE alloctype (in tp) to request that it be unhashed
 	 * as we will not be adding to it. If the key has never been used,
 	 * tp->size will be zero, so we can just free tp. Otherwise the call
 	 * to ffs_blkfree_sendtrim(tp) causes the block range described by
 	 * tp to be issued (and then tp to be freed).
 	 */
 	tp = trim_lookup(ump, NULL, 0, 0, 0, key, DONE);
 	if (tp->size == 0)
 		free(tp, M_TRIM);
 	else
 		ffs_blkfree_sendtrim(tp);
 }
 
 /*
  * Setup to free a block or fragment.
  *
  * Check for snapshots that might want to claim the block.
  * If trims are requested, prepare a trim request. Attempt to
  * aggregate consecutive blocks into a single trim request.
  */
 void
 ffs_blkfree(ump, fs, devvp, bno, size, inum, vtype, dephd, key)
 	struct ufsmount *ump;
 	struct fs *fs;
 	struct vnode *devvp;
 	ufs2_daddr_t bno;
 	long size;
 	ino_t inum;
 	enum vtype vtype;
 	struct workhead *dephd;
 	u_long key;
 {
 	struct ffs_blkfree_trim_params *tp, *ntp;
 	struct trim_blkreq *blkelm;
 
 	/*
 	 * Check to see if a snapshot wants to claim the block.
 	 * Check that devvp is a normal disk device, not a snapshot,
 	 * it has a snapshot(s) associated with it, and one of the
 	 * snapshots wants to claim the block.
 	 */
 	if (devvp->v_type == VCHR &&
 	    (devvp->v_vflag & VV_COPYONWRITE) &&
 	    ffs_snapblkfree(fs, devvp, bno, size, inum, vtype, dephd)) {
 		return;
 	}
 	/*
 	 * Nothing to delay if TRIM is not required for this block or TRIM
 	 * is disabled or the operation is performed on a snapshot.
 	 */
 	if (key == NOTRIM_KEY || ((ump->um_flags & UM_CANDELETE) == 0) ||
 	    devvp->v_type == VREG) {
 		ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd);
 		return;
 	}
 	blkelm = malloc(sizeof(struct trim_blkreq), M_TRIM, M_WAITOK);
 	blkelm->bno = bno;
 	blkelm->size = size;
 	if (dephd == NULL) {
 		blkelm->pdephd = NULL;
 	} else {
 		LIST_INIT(&blkelm->dephd);
 		LIST_SWAP(dephd, &blkelm->dephd, worklist, wk_list);
 		blkelm->pdephd = &blkelm->dephd;
 	}
 	if (key == SINGLETON_KEY) {
 		/*
 		 * Just a single non-contiguous piece. Use the SINGLE
 		 * alloctype to return a trim request that will not be
 		 * hashed for future lookup.
 		 */
 		tp = trim_lookup(ump, devvp, bno, size, inum, key, SINGLE);
 		TAILQ_INSERT_HEAD(&tp->blklist, blkelm, blkreqlist);
 		ffs_blkfree_sendtrim(tp);
 		return;
 	}
 	/*
 	 * The callers of this function are not tracking whether or not
 	 * the blocks are contiguous. They are just saying that they
 	 * are freeing a set of blocks. It is this code that determines
 	 * the pieces of that range that are actually contiguous.
 	 *
 	 * Calling ffs_blkrelease_start() will have created an entry
 	 * that we will use.
 	 */
 	tp = trim_lookup(ump, devvp, bno, size, inum, key, OLD);
 	if (tp->size == 0) {
 		/*
 		 * First block of a potential range, set block and size
 		 * for the trim block.
 		 */
 		tp->bno = bno;
 		tp->size = size;
 		TAILQ_INSERT_HEAD(&tp->blklist, blkelm, blkreqlist);
 		return;
 	}
 	/*
 	 * If this block is a continuation of the range (either
 	 * follows at the end or preceeds in the front) then we
 	 * add it to the front or back of the list and return.
 	 *
 	 * If it is not a continuation of the trim that we were
 	 * building, using the REPLACE alloctype, we request that
 	 * the old trim request (still in tp) be unhashed and a
 	 * new range started (in ntp). The ffs_blkfree_sendtrim(tp)
 	 * call causes the block range described by tp to be issued
 	 * (and then tp to be freed).
 	 */
 	if (bno + numfrags(fs, size) == tp->bno) {
 		TAILQ_INSERT_HEAD(&tp->blklist, blkelm, blkreqlist);
 		tp->bno = bno;
 		tp->size += size;
 		return;
 	} else if (bno == tp->bno + numfrags(fs, tp->size)) {
 		TAILQ_INSERT_TAIL(&tp->blklist, blkelm, blkreqlist);
 		tp->size += size;
 		return;
 	}
 	ntp = trim_lookup(ump, devvp, bno, size, inum, key, REPLACE);
 	TAILQ_INSERT_HEAD(&ntp->blklist, blkelm, blkreqlist);
 	ffs_blkfree_sendtrim(tp);
 }
 
 #ifdef INVARIANTS
 /*
  * Verify allocation of a block or fragment. Returns true if block or
  * fragment is allocated, false if it is free.
  */
 static int
 ffs_checkblk(ip, bno, size)
 	struct inode *ip;
 	ufs2_daddr_t bno;
 	long size;
 {
 	struct fs *fs;
 	struct cg *cgp;
 	struct buf *bp;
 	ufs1_daddr_t cgbno;
 	int i, error, frags, free;
 	u_int8_t *blksfree;
 
 	fs = ITOFS(ip);
 	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
 		printf("bsize = %ld, size = %ld, fs = %s\n",
 		    (long)fs->fs_bsize, size, fs->fs_fsmnt);
 		panic("ffs_checkblk: bad size");
 	}
 	if ((u_int)bno >= fs->fs_size)
 		panic("ffs_checkblk: bad block %jd", (intmax_t)bno);
 	error = ffs_getcg(fs, ITODEVVP(ip), dtog(fs, bno), &bp, &cgp);
 	if (error)
 		panic("ffs_checkblk: cylinder group read failed");
 	blksfree = cg_blksfree(cgp);
 	cgbno = dtogd(fs, bno);
 	if (size == fs->fs_bsize) {
 		free = ffs_isblock(fs, blksfree, fragstoblks(fs, cgbno));
 	} else {
 		frags = numfrags(fs, size);
 		for (free = 0, i = 0; i < frags; i++)
 			if (isset(blksfree, cgbno + i))
 				free++;
 		if (free != 0 && free != frags)
 			panic("ffs_checkblk: partially free fragment");
 	}
 	brelse(bp);
 	return (!free);
 }
 #endif /* INVARIANTS */
 
 /*
  * Free an inode.
  */
 int
 ffs_vfree(pvp, ino, mode)
 	struct vnode *pvp;
 	ino_t ino;
 	int mode;
 {
 	struct ufsmount *ump;
 
 	if (DOINGSOFTDEP(pvp)) {
 		softdep_freefile(pvp, ino, mode);
 		return (0);
 	}
 	ump = VFSTOUFS(pvp->v_mount);
 	return (ffs_freefile(ump, ump->um_fs, ump->um_devvp, ino, mode, NULL));
 }
 
 /*
  * Do the actual free operation.
  * The specified inode is placed back in the free map.
  */
 int
 ffs_freefile(ump, fs, devvp, ino, mode, wkhd)
 	struct ufsmount *ump;
 	struct fs *fs;
 	struct vnode *devvp;
 	ino_t ino;
 	int mode;
 	struct workhead *wkhd;
 {
 	struct cg *cgp;
 	struct buf *bp;
 	int error;
 	u_int cg;
 	u_int8_t *inosused;
 	struct cdev *dev;
 
 	cg = ino_to_cg(fs, ino);
 	if (devvp->v_type == VREG) {
 		/* devvp is a snapshot */
 		MPASS(devvp->v_mount->mnt_data == ump);
 		dev = ump->um_devvp->v_rdev;
 	} else if (devvp->v_type == VCHR) {
 		/* devvp is a normal disk device */
 		dev = devvp->v_rdev;
 	} else {
 		bp = NULL;
 		return (0);
 	}
 	if (ino >= fs->fs_ipg * fs->fs_ncg)
 		panic("ffs_freefile: range: dev = %s, ino = %ju, fs = %s",
 		    devtoname(dev), (uintmax_t)ino, fs->fs_fsmnt);
 	if ((error = ffs_getcg(fs, devvp, cg, &bp, &cgp)) != 0)
 		return (error);
 	inosused = cg_inosused(cgp);
 	ino %= fs->fs_ipg;
 	if (isclr(inosused, ino)) {
 		printf("dev = %s, ino = %ju, fs = %s\n", devtoname(dev),
 		    (uintmax_t)(ino + cg * fs->fs_ipg), fs->fs_fsmnt);
 		if (fs->fs_ronly == 0)
 			panic("ffs_freefile: freeing free inode");
 	}
 	clrbit(inosused, ino);
 	if (ino < cgp->cg_irotor)
 		cgp->cg_irotor = ino;
 	cgp->cg_cs.cs_nifree++;
 	UFS_LOCK(ump);
 	fs->fs_cstotal.cs_nifree++;
 	fs->fs_cs(fs, cg).cs_nifree++;
 	if ((mode & IFMT) == IFDIR) {
 		cgp->cg_cs.cs_ndir--;
 		fs->fs_cstotal.cs_ndir--;
 		fs->fs_cs(fs, cg).cs_ndir--;
 	}
 	fs->fs_fmod = 1;
 	ACTIVECLEAR(fs, cg);
 	UFS_UNLOCK(ump);
 	if (MOUNTEDSOFTDEP(UFSTOVFS(ump)) && devvp->v_type == VCHR)
 		softdep_setup_inofree(UFSTOVFS(ump), bp,
 		    ino + cg * fs->fs_ipg, wkhd);
 	bdwrite(bp);
 	return (0);
 }
 
 /*
  * Check to see if a file is free.
  * Used to check for allocated files in snapshots.
  */
 int
 ffs_checkfreefile(fs, devvp, ino)
 	struct fs *fs;
 	struct vnode *devvp;
 	ino_t ino;
 {
 	struct cg *cgp;
 	struct buf *bp;
 	int ret, error;
 	u_int cg;
 	u_int8_t *inosused;
 
 	cg = ino_to_cg(fs, ino);
 	if ((devvp->v_type != VREG) && (devvp->v_type != VCHR))
 		return (1);
 	if (ino >= fs->fs_ipg * fs->fs_ncg)
 		return (1);
 	if ((error = ffs_getcg(fs, devvp, cg, &bp, &cgp)) != 0)
 		return (1);
 	inosused = cg_inosused(cgp);
 	ino %= fs->fs_ipg;
 	ret = isclr(inosused, ino);
 	brelse(bp);
 	return (ret);
 }
 
 /*
  * Find a block of the specified size in the specified cylinder group.
  *
  * It is a panic if a request is made to find a block if none are
  * available.
  */
 static ufs1_daddr_t
 ffs_mapsearch(fs, cgp, bpref, allocsiz)
 	struct fs *fs;
 	struct cg *cgp;
 	ufs2_daddr_t bpref;
 	int allocsiz;
 {
 	ufs1_daddr_t bno;
 	int start, len, loc, i;
 	int blk, field, subfield, pos;
 	u_int8_t *blksfree;
 
 	/*
 	 * find the fragment by searching through the free block
 	 * map for an appropriate bit pattern
 	 */
 	if (bpref)
 		start = dtogd(fs, bpref) / NBBY;
 	else
 		start = cgp->cg_frotor / NBBY;
 	blksfree = cg_blksfree(cgp);
 	len = howmany(fs->fs_fpg, NBBY) - start;
 	loc = scanc((u_int)len, (u_char *)&blksfree[start],
 		fragtbl[fs->fs_frag],
 		(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
 	if (loc == 0) {
 		len = start + 1;
 		start = 0;
 		loc = scanc((u_int)len, (u_char *)&blksfree[0],
 			fragtbl[fs->fs_frag],
 			(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
 		if (loc == 0) {
 			printf("start = %d, len = %d, fs = %s\n",
 			    start, len, fs->fs_fsmnt);
 			panic("ffs_alloccg: map corrupted");
 			/* NOTREACHED */
 		}
 	}
 	bno = (start + len - loc) * NBBY;
 	cgp->cg_frotor = bno;
 	/*
 	 * found the byte in the map
 	 * sift through the bits to find the selected frag
 	 */
 	for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
 		blk = blkmap(fs, blksfree, bno);
 		blk <<= 1;
 		field = around[allocsiz];
 		subfield = inside[allocsiz];
 		for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
 			if ((blk & field) == subfield)
 				return (bno + pos);
 			field <<= 1;
 			subfield <<= 1;
 		}
 	}
 	printf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt);
 	panic("ffs_alloccg: block not in map");
 	return (-1);
 }
 
 static const struct statfs *
 ffs_getmntstat(struct vnode *devvp)
 {
 
 	if (devvp->v_type == VCHR)
 		return (&devvp->v_rdev->si_mountpt->mnt_stat);
 	return (ffs_getmntstat(VFSTOUFS(devvp->v_mount)->um_devvp));
 }
 
 /*
  * Fetch and verify a cylinder group.
  */
 int
 ffs_getcg(fs, devvp, cg, bpp, cgpp)
 	struct fs *fs;
 	struct vnode *devvp;
 	u_int cg;
 	struct buf **bpp;
 	struct cg **cgpp;
 {
 	struct buf *bp;
 	struct cg *cgp;
 	const struct statfs *sfs;
 	int flags, error;
 
 	*bpp = NULL;
 	*cgpp = NULL;
 	flags = 0;
 	if ((fs->fs_metackhash & CK_CYLGRP) != 0)
 		flags |= GB_CKHASH;
 	error = breadn_flags(devvp, devvp->v_type == VREG ?
 	    fragstoblks(fs, cgtod(fs, cg)) : fsbtodb(fs, cgtod(fs, cg)),
 	    (int)fs->fs_cgsize, NULL, NULL, 0, NOCRED, flags,
 	    ffs_ckhash_cg, &bp);
 	if (error != 0)
 		return (error);
 	cgp = (struct cg *)bp->b_data;
 	if ((fs->fs_metackhash & CK_CYLGRP) != 0 &&
 	    (bp->b_flags & B_CKHASH) != 0 &&
 	    cgp->cg_ckhash != bp->b_ckhash) {
 		sfs = ffs_getmntstat(devvp);
 		printf("UFS %s%s (%s) cylinder checksum failed: cg %u, cgp: "
 		    "0x%x != bp: 0x%jx\n",
 		    devvp->v_type == VCHR ? "" : "snapshot of ",
 		    sfs->f_mntfromname, sfs->f_mntonname,
 		    cg, cgp->cg_ckhash, (uintmax_t)bp->b_ckhash);
 		bp->b_flags &= ~B_CKHASH;
 		bp->b_flags |= B_INVAL | B_NOCACHE;
 		brelse(bp);
 		return (EIO);
 	}
 	if (!cg_chkmagic(cgp) || cgp->cg_cgx != cg) {
 		sfs = ffs_getmntstat(devvp);
 		printf("UFS %s%s (%s)",
 		    devvp->v_type == VCHR ? "" : "snapshot of ",
 		    sfs->f_mntfromname, sfs->f_mntonname);
 		if (!cg_chkmagic(cgp))
 			printf(" cg %u: bad magic number 0x%x should be 0x%x\n",
 			    cg, cgp->cg_magic, CG_MAGIC);
 		else
 			printf(": wrong cylinder group cg %u != cgx %u\n", cg,
 			    cgp->cg_cgx);
 		bp->b_flags &= ~B_CKHASH;
 		bp->b_flags |= B_INVAL | B_NOCACHE;
 		brelse(bp);
 		return (EIO);
 	}
 	bp->b_flags &= ~B_CKHASH;
 	bp->b_xflags |= BX_BKGRDWRITE;
 	/*
 	 * If we are using check hashes on the cylinder group then we want
 	 * to limit changing the cylinder group time to when we are actually
 	 * going to write it to disk so that its check hash remains correct
 	 * in memory. If the CK_CYLGRP flag is set the time is updated in
 	 * ffs_bufwrite() as the buffer is queued for writing. Otherwise we
 	 * update the time here as we have done historically.
 	 */
 	if ((fs->fs_metackhash & CK_CYLGRP) != 0)
 		bp->b_xflags |= BX_CYLGRP;
 	else
 		cgp->cg_old_time = cgp->cg_time = time_second;
 	*bpp = bp;
 	*cgpp = cgp;
 	return (0);
 }
 
 static void
 ffs_ckhash_cg(bp)
 	struct buf *bp;
 {
 	uint32_t ckhash;
 	struct cg *cgp;
 
 	cgp = (struct cg *)bp->b_data;
 	ckhash = cgp->cg_ckhash;
 	cgp->cg_ckhash = 0;
 	bp->b_ckhash = calculate_crc32c(~0L, bp->b_data, bp->b_bcount);
 	cgp->cg_ckhash = ckhash;
 }
 
 /*
  * Fserr prints the name of a filesystem with an error diagnostic.
  *
  * The form of the error message is:
  *	fs: error message
  */
 void
 ffs_fserr(fs, inum, cp)
 	struct fs *fs;
 	ino_t inum;
 	char *cp;
 {
 	struct thread *td = curthread;	/* XXX */
 	struct proc *p = td->td_proc;
 
 	log(LOG_ERR, "pid %d (%s), uid %d inumber %ju on %s: %s\n",
 	    p->p_pid, p->p_comm, td->td_ucred->cr_uid, (uintmax_t)inum,
 	    fs->fs_fsmnt, cp);
 }
 
 /*
  * This function provides the capability for the fsck program to
  * update an active filesystem. Fourteen operations are provided:
  *
  * adjrefcnt(inode, amt) - adjusts the reference count on the
  *	specified inode by the specified amount. Under normal
  *	operation the count should always go down. Decrementing
  *	the count to zero will cause the inode to be freed.
  * adjblkcnt(inode, amt) - adjust the number of blocks used by the
  *	inode by the specified amount.
  * adjsize(inode, size) - set the size of the inode to the
  *	specified size.
  * adjndir, adjbfree, adjifree, adjffree, adjnumclusters(amt) -
  *	adjust the superblock summary.
  * freedirs(inode, count) - directory inodes [inode..inode + count - 1]
  *	are marked as free. Inodes should never have to be marked
  *	as in use.
  * freefiles(inode, count) - file inodes [inode..inode + count - 1]
  *	are marked as free. Inodes should never have to be marked
  *	as in use.
  * freeblks(blockno, size) - blocks [blockno..blockno + size - 1]
  *	are marked as free. Blocks should never have to be marked
  *	as in use.
  * setflags(flags, set/clear) - the fs_flags field has the specified
  *	flags set (second parameter +1) or cleared (second parameter -1).
  * setcwd(dirinode) - set the current directory to dirinode in the
  *	filesystem associated with the snapshot.
  * setdotdot(oldvalue, newvalue) - Verify that the inode number for ".."
  *	in the current directory is oldvalue then change it to newvalue.
  * unlink(nameptr, oldvalue) - Verify that the inode number associated
  *	with nameptr in the current directory is oldvalue then unlink it.
  *
  * The following functions may only be used on a quiescent filesystem
  * by the soft updates journal. They are not safe to be run on an active
  * filesystem.
  *
  * setinode(inode, dip) - the specified disk inode is replaced with the
  *	contents pointed to by dip.
  * setbufoutput(fd, flags) - output associated with the specified file
  *	descriptor (which must reference the character device supporting
  *	the filesystem) switches from using physio to running through the
  *	buffer cache when flags is set to 1. The descriptor reverts to
  *	physio for output when flags is set to zero.
  */
 
 static int sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS);
 
 SYSCTL_PROC(_vfs_ffs, FFS_ADJ_REFCNT, adjrefcnt, CTLFLAG_WR|CTLTYPE_STRUCT,
 	0, 0, sysctl_ffs_fsck, "S,fsck", "Adjust Inode Reference Count");
 
 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_BLKCNT, adjblkcnt, CTLFLAG_WR,
 	sysctl_ffs_fsck, "Adjust Inode Used Blocks Count");
 
 static SYSCTL_NODE(_vfs_ffs, FFS_SET_SIZE, setsize, CTLFLAG_WR,
 	sysctl_ffs_fsck, "Set the inode size");
 
 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NDIR, adjndir, CTLFLAG_WR,
 	sysctl_ffs_fsck, "Adjust number of directories");
 
 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NBFREE, adjnbfree, CTLFLAG_WR,
 	sysctl_ffs_fsck, "Adjust number of free blocks");
 
 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NIFREE, adjnifree, CTLFLAG_WR,
 	sysctl_ffs_fsck, "Adjust number of free inodes");
 
 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NFFREE, adjnffree, CTLFLAG_WR,
 	sysctl_ffs_fsck, "Adjust number of free frags");
 
 static SYSCTL_NODE(_vfs_ffs, FFS_ADJ_NUMCLUSTERS, adjnumclusters, CTLFLAG_WR,
 	sysctl_ffs_fsck, "Adjust number of free clusters");
 
 static SYSCTL_NODE(_vfs_ffs, FFS_DIR_FREE, freedirs, CTLFLAG_WR,
 	sysctl_ffs_fsck, "Free Range of Directory Inodes");
 
 static SYSCTL_NODE(_vfs_ffs, FFS_FILE_FREE, freefiles, CTLFLAG_WR,
 	sysctl_ffs_fsck, "Free Range of File Inodes");
 
 static SYSCTL_NODE(_vfs_ffs, FFS_BLK_FREE, freeblks, CTLFLAG_WR,
 	sysctl_ffs_fsck, "Free Range of Blocks");
 
 static SYSCTL_NODE(_vfs_ffs, FFS_SET_FLAGS, setflags, CTLFLAG_WR,
 	sysctl_ffs_fsck, "Change Filesystem Flags");
 
 static SYSCTL_NODE(_vfs_ffs, FFS_SET_CWD, setcwd, CTLFLAG_WR,
 	sysctl_ffs_fsck, "Set Current Working Directory");
 
 static SYSCTL_NODE(_vfs_ffs, FFS_SET_DOTDOT, setdotdot, CTLFLAG_WR,
 	sysctl_ffs_fsck, "Change Value of .. Entry");
 
 static SYSCTL_NODE(_vfs_ffs, FFS_UNLINK, unlink, CTLFLAG_WR,
 	sysctl_ffs_fsck, "Unlink a Duplicate Name");
 
 static SYSCTL_NODE(_vfs_ffs, FFS_SET_INODE, setinode, CTLFLAG_WR,
 	sysctl_ffs_fsck, "Update an On-Disk Inode");
 
 static SYSCTL_NODE(_vfs_ffs, FFS_SET_BUFOUTPUT, setbufoutput, CTLFLAG_WR,
 	sysctl_ffs_fsck, "Set Buffered Writing for Descriptor");
 
 #define DEBUG 1
 #ifdef DEBUG
 static int fsckcmds = 0;
 SYSCTL_INT(_debug, OID_AUTO, fsckcmds, CTLFLAG_RW, &fsckcmds, 0, "");
 #endif /* DEBUG */
 
 static int buffered_write(struct file *, struct uio *, struct ucred *,
 	int, struct thread *);
 
 static int
 sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS)
 {
 	struct thread *td = curthread;
 	struct fsck_cmd cmd;
 	struct ufsmount *ump;
 	struct vnode *vp, *dvp, *fdvp;
 	struct inode *ip, *dp;
 	struct mount *mp;
 	struct fs *fs;
 	ufs2_daddr_t blkno;
 	long blkcnt, blksize;
 	u_long key;
 	struct file *fp, *vfp;
 	cap_rights_t rights;
 	int filetype, error;
 	static struct fileops *origops, bufferedops;
 
 	if (req->newlen > sizeof cmd)
 		return (EBADRPC);
 	if ((error = SYSCTL_IN(req, &cmd, sizeof cmd)) != 0)
 		return (error);
 	if (cmd.version != FFS_CMD_VERSION)
 		return (ERPCMISMATCH);
 	if ((error = getvnode(td, cmd.handle,
 	    cap_rights_init(&rights, CAP_FSCK), &fp)) != 0)
 		return (error);
 	vp = fp->f_data;
 	if (vp->v_type != VREG && vp->v_type != VDIR) {
 		fdrop(fp, td);
 		return (EINVAL);
 	}
 	vn_start_write(vp, &mp, V_WAIT);
 	if (mp == NULL ||
 	    strncmp(mp->mnt_stat.f_fstypename, "ufs", MFSNAMELEN)) {
 		vn_finished_write(mp);
 		fdrop(fp, td);
 		return (EINVAL);
 	}
 	ump = VFSTOUFS(mp);
 	if ((mp->mnt_flag & MNT_RDONLY) &&
 	    ump->um_fsckpid != td->td_proc->p_pid) {
 		vn_finished_write(mp);
 		fdrop(fp, td);
 		return (EROFS);
 	}
 	fs = ump->um_fs;
 	filetype = IFREG;
 
 	switch (oidp->oid_number) {
 
 	case FFS_SET_FLAGS:
 #ifdef DEBUG
 		if (fsckcmds)
 			printf("%s: %s flags\n", mp->mnt_stat.f_mntonname,
 			    cmd.size > 0 ? "set" : "clear");
 #endif /* DEBUG */
 		if (cmd.size > 0)
 			fs->fs_flags |= (long)cmd.value;
 		else
 			fs->fs_flags &= ~(long)cmd.value;
 		break;
 
 	case FFS_ADJ_REFCNT:
 #ifdef DEBUG
 		if (fsckcmds) {
 			printf("%s: adjust inode %jd link count by %jd\n",
 			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value,
 			    (intmax_t)cmd.size);
 		}
 #endif /* DEBUG */
 		if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp)))
 			break;
 		ip = VTOI(vp);
 		ip->i_nlink += cmd.size;
 		DIP_SET(ip, i_nlink, ip->i_nlink);
 		ip->i_effnlink += cmd.size;
 		ip->i_flag |= IN_CHANGE | IN_MODIFIED;
 		error = ffs_update(vp, 1);
 		if (DOINGSOFTDEP(vp))
 			softdep_change_linkcnt(ip);
 		vput(vp);
 		break;
 
 	case FFS_ADJ_BLKCNT:
 #ifdef DEBUG
 		if (fsckcmds) {
 			printf("%s: adjust inode %jd block count by %jd\n",
 			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value,
 			    (intmax_t)cmd.size);
 		}
 #endif /* DEBUG */
 		if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp)))
 			break;
 		ip = VTOI(vp);
 		DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + cmd.size);
 		ip->i_flag |= IN_CHANGE | IN_MODIFIED;
 		error = ffs_update(vp, 1);
 		vput(vp);
 		break;
 
 	case FFS_SET_SIZE:
 #ifdef DEBUG
 		if (fsckcmds) {
 			printf("%s: set inode %jd size to %jd\n",
 			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value,
 			    (intmax_t)cmd.size);
 		}
 #endif /* DEBUG */
 		if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp)))
 			break;
 		ip = VTOI(vp);
 		DIP_SET(ip, i_size, cmd.size);
 		ip->i_flag |= IN_CHANGE | IN_MODIFIED;
 		error = ffs_update(vp, 1);
 		vput(vp);
 		break;
 
 	case FFS_DIR_FREE:
 		filetype = IFDIR;
 		/* fall through */
 
 	case FFS_FILE_FREE:
 #ifdef DEBUG
 		if (fsckcmds) {
 			if (cmd.size == 1)
 				printf("%s: free %s inode %ju\n",
 				    mp->mnt_stat.f_mntonname,
 				    filetype == IFDIR ? "directory" : "file",
 				    (uintmax_t)cmd.value);
 			else
 				printf("%s: free %s inodes %ju-%ju\n",
 				    mp->mnt_stat.f_mntonname,
 				    filetype == IFDIR ? "directory" : "file",
 				    (uintmax_t)cmd.value,
 				    (uintmax_t)(cmd.value + cmd.size - 1));
 		}
 #endif /* DEBUG */
 		while (cmd.size > 0) {
 			if ((error = ffs_freefile(ump, fs, ump->um_devvp,
 			    cmd.value, filetype, NULL)))
 				break;
 			cmd.size -= 1;
 			cmd.value += 1;
 		}
 		break;
 
 	case FFS_BLK_FREE:
 #ifdef DEBUG
 		if (fsckcmds) {
 			if (cmd.size == 1)
 				printf("%s: free block %jd\n",
 				    mp->mnt_stat.f_mntonname,
 				    (intmax_t)cmd.value);
 			else
 				printf("%s: free blocks %jd-%jd\n",
 				    mp->mnt_stat.f_mntonname, 
 				    (intmax_t)cmd.value,
 				    (intmax_t)cmd.value + cmd.size - 1);
 		}
 #endif /* DEBUG */
 		blkno = cmd.value;
 		blkcnt = cmd.size;
 		blksize = fs->fs_frag - (blkno % fs->fs_frag);
 		key = ffs_blkrelease_start(ump, ump->um_devvp, UFS_ROOTINO);
 		while (blkcnt > 0) {
 			if (blkcnt < blksize)
 				blksize = blkcnt;
 			ffs_blkfree(ump, fs, ump->um_devvp, blkno,
 			    blksize * fs->fs_fsize, UFS_ROOTINO, 
 			    VDIR, NULL, key);
 			blkno += blksize;
 			blkcnt -= blksize;
 			blksize = fs->fs_frag;
 		}
 		ffs_blkrelease_finish(ump, key);
 		break;
 
 	/*
 	 * Adjust superblock summaries.  fsck(8) is expected to
 	 * submit deltas when necessary.
 	 */
 	case FFS_ADJ_NDIR:
 #ifdef DEBUG
 		if (fsckcmds) {
 			printf("%s: adjust number of directories by %jd\n",
 			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
 		}
 #endif /* DEBUG */
 		fs->fs_cstotal.cs_ndir += cmd.value;
 		break;
 
 	case FFS_ADJ_NBFREE:
 #ifdef DEBUG
 		if (fsckcmds) {
 			printf("%s: adjust number of free blocks by %+jd\n",
 			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
 		}
 #endif /* DEBUG */
 		fs->fs_cstotal.cs_nbfree += cmd.value;
 		break;
 
 	case FFS_ADJ_NIFREE:
 #ifdef DEBUG
 		if (fsckcmds) {
 			printf("%s: adjust number of free inodes by %+jd\n",
 			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
 		}
 #endif /* DEBUG */
 		fs->fs_cstotal.cs_nifree += cmd.value;
 		break;
 
 	case FFS_ADJ_NFFREE:
 #ifdef DEBUG
 		if (fsckcmds) {
 			printf("%s: adjust number of free frags by %+jd\n",
 			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
 		}
 #endif /* DEBUG */
 		fs->fs_cstotal.cs_nffree += cmd.value;
 		break;
 
 	case FFS_ADJ_NUMCLUSTERS:
 #ifdef DEBUG
 		if (fsckcmds) {
 			printf("%s: adjust number of free clusters by %+jd\n",
 			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
 		}
 #endif /* DEBUG */
 		fs->fs_cstotal.cs_numclusters += cmd.value;
 		break;
 
 	case FFS_SET_CWD:
 #ifdef DEBUG
 		if (fsckcmds) {
 			printf("%s: set current directory to inode %jd\n",
 			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
 		}
 #endif /* DEBUG */
 		if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_SHARED, &vp)))
 			break;
 		AUDIT_ARG_VNODE1(vp);
 		if ((error = change_dir(vp, td)) != 0) {
 			vput(vp);
 			break;
 		}
 		VOP_UNLOCK(vp, 0);
 		pwd_chdir(td, vp);
 		break;
 
 	case FFS_SET_DOTDOT:
 #ifdef DEBUG
 		if (fsckcmds) {
 			printf("%s: change .. in cwd from %jd to %jd\n",
 			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value,
 			    (intmax_t)cmd.size);
 		}
 #endif /* DEBUG */
 		/*
 		 * First we have to get and lock the parent directory
 		 * to which ".." points.
 		 */
 		error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &fdvp);
 		if (error)
 			break;
 		/*
 		 * Now we get and lock the child directory containing "..".
 		 */
 		FILEDESC_SLOCK(td->td_proc->p_fd);
 		dvp = td->td_proc->p_fd->fd_cdir;
 		FILEDESC_SUNLOCK(td->td_proc->p_fd);
 		if ((error = vget(dvp, LK_EXCLUSIVE, td)) != 0) {
 			vput(fdvp);
 			break;
 		}
 		dp = VTOI(dvp);
 		dp->i_offset = 12;	/* XXX mastertemplate.dot_reclen */
 		error = ufs_dirrewrite(dp, VTOI(fdvp), (ino_t)cmd.size,
 		    DT_DIR, 0);
 		cache_purge(fdvp);
 		cache_purge(dvp);
 		vput(dvp);
 		vput(fdvp);
 		break;
 
 	case FFS_UNLINK:
 #ifdef DEBUG
 		if (fsckcmds) {
 			char buf[32];
 
 			if (copyinstr((char *)(intptr_t)cmd.value, buf,32,NULL))
 				strncpy(buf, "Name_too_long", 32);
 			printf("%s: unlink %s (inode %jd)\n",
 			    mp->mnt_stat.f_mntonname, buf, (intmax_t)cmd.size);
 		}
 #endif /* DEBUG */
 		/*
-		 * kern_unlinkat will do its own start/finish writes and
+		 * kern_funlinkat will do its own start/finish writes and
 		 * they do not nest, so drop ours here. Setting mp == NULL
 		 * indicates that vn_finished_write is not needed down below.
 		 */
 		vn_finished_write(mp);
 		mp = NULL;
-		error = kern_unlinkat(td, AT_FDCWD, (char *)(intptr_t)cmd.value,
-		    UIO_USERSPACE, 0, (ino_t)cmd.size);
+		error = kern_funlinkat(td, AT_FDCWD,
+		    (char *)(intptr_t)cmd.value, FD_NONE, UIO_USERSPACE,
+		    0, (ino_t)cmd.size);
 		break;
 
 	case FFS_SET_INODE:
 		if (ump->um_fsckpid != td->td_proc->p_pid) {
 			error = EPERM;
 			break;
 		}
 #ifdef DEBUG
 		if (fsckcmds) {
 			printf("%s: update inode %jd\n",
 			    mp->mnt_stat.f_mntonname, (intmax_t)cmd.value);
 		}
 #endif /* DEBUG */
 		if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp)))
 			break;
 		AUDIT_ARG_VNODE1(vp);
 		ip = VTOI(vp);
 		if (I_IS_UFS1(ip))
 			error = copyin((void *)(intptr_t)cmd.size, ip->i_din1,
 			    sizeof(struct ufs1_dinode));
 		else
 			error = copyin((void *)(intptr_t)cmd.size, ip->i_din2,
 			    sizeof(struct ufs2_dinode));
 		if (error) {
 			vput(vp);
 			break;
 		}
 		ip->i_flag |= IN_CHANGE | IN_MODIFIED;
 		error = ffs_update(vp, 1);
 		vput(vp);
 		break;
 
 	case FFS_SET_BUFOUTPUT:
 		if (ump->um_fsckpid != td->td_proc->p_pid) {
 			error = EPERM;
 			break;
 		}
 		if (ITOUMP(VTOI(vp)) != ump) {
 			error = EINVAL;
 			break;
 		}
 #ifdef DEBUG
 		if (fsckcmds) {
 			printf("%s: %s buffered output for descriptor %jd\n",
 			    mp->mnt_stat.f_mntonname,
 			    cmd.size == 1 ? "enable" : "disable",
 			    (intmax_t)cmd.value);
 		}
 #endif /* DEBUG */
 		if ((error = getvnode(td, cmd.value,
 		    cap_rights_init(&rights, CAP_FSCK), &vfp)) != 0)
 			break;
 		if (vfp->f_vnode->v_type != VCHR) {
 			fdrop(vfp, td);
 			error = EINVAL;
 			break;
 		}
 		if (origops == NULL) {
 			origops = vfp->f_ops;
 			bcopy((void *)origops, (void *)&bufferedops,
 			    sizeof(bufferedops));
 			bufferedops.fo_write = buffered_write;
 		}
 		if (cmd.size == 1)
 			atomic_store_rel_ptr((volatile uintptr_t *)&vfp->f_ops,
 			    (uintptr_t)&bufferedops);
 		else
 			atomic_store_rel_ptr((volatile uintptr_t *)&vfp->f_ops,
 			    (uintptr_t)origops);
 		fdrop(vfp, td);
 		break;
 
 	default:
 #ifdef DEBUG
 		if (fsckcmds) {
 			printf("Invalid request %d from fsck\n",
 			    oidp->oid_number);
 		}
 #endif /* DEBUG */
 		error = EINVAL;
 		break;
 
 	}
 	fdrop(fp, td);
 	vn_finished_write(mp);
 	return (error);
 }
 
 /*
  * Function to switch a descriptor to use the buffer cache to stage
  * its I/O. This is needed so that writes to the filesystem device
  * will give snapshots a chance to copy modified blocks for which it
  * needs to retain copies.
  */
 static int
 buffered_write(fp, uio, active_cred, flags, td)
 	struct file *fp;
 	struct uio *uio;
 	struct ucred *active_cred;
 	int flags;
 	struct thread *td;
 {
 	struct vnode *devvp, *vp;
 	struct inode *ip;
 	struct buf *bp;
 	struct fs *fs;
 	struct filedesc *fdp;
 	int error;
 	daddr_t lbn;
 
 	/*
 	 * The devvp is associated with the /dev filesystem. To discover
 	 * the filesystem with which the device is associated, we depend
 	 * on the application setting the current directory to a location
 	 * within the filesystem being written. Yes, this is an ugly hack.
 	 */
 	devvp = fp->f_vnode;
 	if (!vn_isdisk(devvp, NULL))
 		return (EINVAL);
 	fdp = td->td_proc->p_fd;
 	FILEDESC_SLOCK(fdp);
 	vp = fdp->fd_cdir;
 	vref(vp);
 	FILEDESC_SUNLOCK(fdp);
 	vn_lock(vp, LK_SHARED | LK_RETRY);
 	/*
 	 * Check that the current directory vnode indeed belongs to
 	 * UFS before trying to dereference UFS-specific v_data fields.
 	 */
 	if (vp->v_op != &ffs_vnodeops1 && vp->v_op != &ffs_vnodeops2) {
 		vput(vp);
 		return (EINVAL);
 	}
 	ip = VTOI(vp);
 	if (ITODEVVP(ip) != devvp) {
 		vput(vp);
 		return (EINVAL);
 	}
 	fs = ITOFS(ip);
 	vput(vp);
 	foffset_lock_uio(fp, uio, flags);
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 #ifdef DEBUG
 	if (fsckcmds) {
 		printf("%s: buffered write for block %jd\n",
 		    fs->fs_fsmnt, (intmax_t)btodb(uio->uio_offset));
 	}
 #endif /* DEBUG */
 	/*
 	 * All I/O must be contained within a filesystem block, start on
 	 * a fragment boundary, and be a multiple of fragments in length.
 	 */
 	if (uio->uio_resid > fs->fs_bsize - (uio->uio_offset % fs->fs_bsize) ||
 	    fragoff(fs, uio->uio_offset) != 0 ||
 	    fragoff(fs, uio->uio_resid) != 0) {
 		error = EINVAL;
 		goto out;
 	}
 	lbn = numfrags(fs, uio->uio_offset);
 	bp = getblk(devvp, lbn, uio->uio_resid, 0, 0, 0);
 	bp->b_flags |= B_RELBUF;
 	if ((error = uiomove((char *)bp->b_data, uio->uio_resid, uio)) != 0) {
 		brelse(bp);
 		goto out;
 	}
 	error = bwrite(bp);
 out:
 	VOP_UNLOCK(devvp, 0);
 	foffset_unlock_uio(fp, uio, flags | FOF_NEXTOFF);
 	return (error);
 }